remdb 0.3.146__py3-none-any.whl → 0.3.181__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of remdb might be problematic. Click here for more details.

Files changed (57) hide show
  1. rem/agentic/agents/__init__.py +16 -0
  2. rem/agentic/agents/agent_manager.py +311 -0
  3. rem/agentic/context.py +81 -3
  4. rem/agentic/context_builder.py +36 -9
  5. rem/agentic/mcp/tool_wrapper.py +43 -14
  6. rem/agentic/providers/pydantic_ai.py +76 -34
  7. rem/agentic/schema.py +4 -3
  8. rem/agentic/tools/rem_tools.py +11 -0
  9. rem/api/deps.py +3 -5
  10. rem/api/main.py +22 -3
  11. rem/api/mcp_router/resources.py +75 -14
  12. rem/api/mcp_router/server.py +28 -23
  13. rem/api/mcp_router/tools.py +177 -2
  14. rem/api/middleware/tracking.py +5 -5
  15. rem/api/routers/auth.py +352 -6
  16. rem/api/routers/chat/completions.py +5 -3
  17. rem/api/routers/chat/streaming.py +95 -22
  18. rem/api/routers/messages.py +24 -15
  19. rem/auth/__init__.py +13 -3
  20. rem/auth/jwt.py +352 -0
  21. rem/auth/middleware.py +70 -30
  22. rem/auth/providers/__init__.py +4 -1
  23. rem/auth/providers/email.py +215 -0
  24. rem/cli/commands/ask.py +1 -1
  25. rem/cli/commands/db.py +118 -54
  26. rem/models/entities/__init__.py +4 -0
  27. rem/models/entities/ontology.py +93 -101
  28. rem/models/entities/subscriber.py +175 -0
  29. rem/models/entities/user.py +1 -0
  30. rem/schemas/agents/core/agent-builder.yaml +235 -0
  31. rem/services/__init__.py +3 -1
  32. rem/services/content/service.py +4 -3
  33. rem/services/email/__init__.py +10 -0
  34. rem/services/email/service.py +522 -0
  35. rem/services/email/templates.py +360 -0
  36. rem/services/embeddings/worker.py +26 -12
  37. rem/services/postgres/README.md +38 -0
  38. rem/services/postgres/diff_service.py +19 -3
  39. rem/services/postgres/pydantic_to_sqlalchemy.py +37 -2
  40. rem/services/postgres/register_type.py +1 -1
  41. rem/services/postgres/repository.py +37 -25
  42. rem/services/postgres/schema_generator.py +5 -5
  43. rem/services/postgres/sql_builder.py +6 -5
  44. rem/services/session/compression.py +113 -50
  45. rem/services/session/reload.py +14 -7
  46. rem/services/user_service.py +41 -9
  47. rem/settings.py +182 -1
  48. rem/sql/background_indexes.sql +5 -0
  49. rem/sql/migrations/001_install.sql +33 -4
  50. rem/sql/migrations/002_install_models.sql +204 -186
  51. rem/sql/migrations/005_schema_update.sql +145 -0
  52. rem/utils/model_helpers.py +101 -0
  53. rem/utils/schema_loader.py +45 -7
  54. {remdb-0.3.146.dist-info → remdb-0.3.181.dist-info}/METADATA +1 -1
  55. {remdb-0.3.146.dist-info → remdb-0.3.181.dist-info}/RECORD +57 -48
  56. {remdb-0.3.146.dist-info → remdb-0.3.181.dist-info}/WHEEL +0 -0
  57. {remdb-0.3.146.dist-info → remdb-0.3.181.dist-info}/entry_points.txt +0 -0
@@ -0,0 +1,215 @@
1
+ """
2
+ Email Authentication Provider.
3
+
4
+ Passwordless authentication using email verification codes.
5
+ Unlike OAuth providers, this handles the full flow internally.
6
+
7
+ Flow:
8
+ 1. User requests login with email address
9
+ 2. System generates code, upserts user, sends email
10
+ 3. User enters code
11
+ 4. System verifies code and creates session
12
+
13
+ Design:
14
+ - Uses EmailService for sending codes
15
+ - Creates users with deterministic UUID from email hash
16
+ - Stores challenge in user metadata
17
+ - No external OAuth dependencies
18
+ """
19
+
20
+ from typing import TYPE_CHECKING
21
+ from pydantic import BaseModel, Field
22
+ from loguru import logger
23
+
24
+ from ...services.email import EmailService
25
+
26
+ if TYPE_CHECKING:
27
+ from ...services.postgres import PostgresService
28
+
29
+
30
+ class EmailAuthResult(BaseModel):
31
+ """Result of email authentication operations."""
32
+
33
+ success: bool = Field(description="Whether operation succeeded")
34
+ email: str = Field(description="Email address")
35
+ user_id: str | None = Field(default=None, description="User ID if authenticated")
36
+ error: str | None = Field(default=None, description="Error message if failed")
37
+ message: str | None = Field(default=None, description="User-friendly message")
38
+
39
+
40
+ class EmailAuthProvider:
41
+ """
42
+ Email-based passwordless authentication provider.
43
+
44
+ Handles the complete email login flow:
45
+ 1. send_code() - Generate and send verification code
46
+ 2. verify_code() - Verify code and return user info
47
+ """
48
+
49
+ def __init__(
50
+ self,
51
+ email_service: EmailService | None = None,
52
+ template_kwargs: dict | None = None,
53
+ ):
54
+ """
55
+ Initialize EmailAuthProvider.
56
+
57
+ Args:
58
+ email_service: EmailService instance (creates new one if not provided)
59
+ template_kwargs: Customization for email templates (colors, branding, etc.)
60
+ """
61
+ self._email_service = email_service or EmailService()
62
+ self._template_kwargs = template_kwargs or {}
63
+
64
+ @property
65
+ def is_configured(self) -> bool:
66
+ """Check if email auth is properly configured."""
67
+ return self._email_service.is_configured
68
+
69
+ async def send_code(
70
+ self,
71
+ email: str,
72
+ db: "PostgresService",
73
+ tenant_id: str = "default",
74
+ ) -> EmailAuthResult:
75
+ """
76
+ Send a verification code to an email address.
77
+
78
+ Creates user if not exists (using deterministic UUID from email).
79
+ Stores code in user metadata.
80
+
81
+ Args:
82
+ email: Email address to send code to
83
+ db: PostgresService instance
84
+ tenant_id: Tenant identifier
85
+
86
+ Returns:
87
+ EmailAuthResult with success status
88
+ """
89
+ if not self.is_configured:
90
+ return EmailAuthResult(
91
+ success=False,
92
+ email=email,
93
+ error="Email service not configured",
94
+ message="Email login is not available. Please try another method.",
95
+ )
96
+
97
+ try:
98
+ result = await self._email_service.send_login_code(
99
+ email=email,
100
+ db=db,
101
+ tenant_id=tenant_id,
102
+ template_kwargs=self._template_kwargs,
103
+ )
104
+
105
+ if result["success"]:
106
+ return EmailAuthResult(
107
+ success=True,
108
+ email=email,
109
+ user_id=result["user_id"],
110
+ message=f"Verification code sent to {email}. Check your inbox.",
111
+ )
112
+ else:
113
+ return EmailAuthResult(
114
+ success=False,
115
+ email=email,
116
+ error=result.get("error", "Failed to send code"),
117
+ message="Failed to send verification code. Please try again.",
118
+ )
119
+
120
+ except Exception as e:
121
+ logger.error(f"Error sending login code: {e}")
122
+ return EmailAuthResult(
123
+ success=False,
124
+ email=email,
125
+ error=str(e),
126
+ message="An error occurred. Please try again.",
127
+ )
128
+
129
+ async def verify_code(
130
+ self,
131
+ email: str,
132
+ code: str,
133
+ db: "PostgresService",
134
+ tenant_id: str = "default",
135
+ ) -> EmailAuthResult:
136
+ """
137
+ Verify a login code and authenticate user.
138
+
139
+ Args:
140
+ email: Email address
141
+ code: 6-digit verification code
142
+ db: PostgresService instance
143
+ tenant_id: Tenant identifier
144
+
145
+ Returns:
146
+ EmailAuthResult with user_id if successful
147
+ """
148
+ try:
149
+ result = await self._email_service.verify_login_code(
150
+ email=email,
151
+ code=code,
152
+ db=db,
153
+ tenant_id=tenant_id,
154
+ )
155
+
156
+ if result["valid"]:
157
+ return EmailAuthResult(
158
+ success=True,
159
+ email=email,
160
+ user_id=result["user_id"],
161
+ message="Successfully authenticated!",
162
+ )
163
+ else:
164
+ error = result.get("error", "Invalid code")
165
+ # User-friendly error messages
166
+ if error == "Login code expired":
167
+ message = "Your code has expired. Please request a new one."
168
+ elif error == "Invalid login code":
169
+ message = "Invalid code. Please check and try again."
170
+ elif error == "No login code requested":
171
+ message = "No code was requested for this email. Please request a new code."
172
+ elif error == "User not found":
173
+ message = "Email not found. Please request a login code first."
174
+ else:
175
+ message = "Verification failed. Please try again."
176
+
177
+ return EmailAuthResult(
178
+ success=False,
179
+ email=email,
180
+ error=error,
181
+ message=message,
182
+ )
183
+
184
+ except Exception as e:
185
+ logger.error(f"Error verifying login code: {e}")
186
+ return EmailAuthResult(
187
+ success=False,
188
+ email=email,
189
+ error=str(e),
190
+ message="An error occurred. Please try again.",
191
+ )
192
+
193
+ def get_user_dict(self, email: str, user_id: str) -> dict:
194
+ """
195
+ Create a user dict for session storage.
196
+
197
+ Compatible with OAuth user format for consistent session handling.
198
+
199
+ Args:
200
+ email: User's email
201
+ user_id: User's UUID
202
+
203
+ Returns:
204
+ User dict for session
205
+ """
206
+ return {
207
+ "id": user_id,
208
+ "email": email,
209
+ "email_verified": True, # Email is verified through code
210
+ "name": email.split("@")[0], # Use email prefix as name
211
+ "provider": "email",
212
+ "tenant_id": "default",
213
+ "tier": "free", # Email users start at free tier
214
+ "roles": ["user"],
215
+ }
rem/cli/commands/ask.py CHANGED
@@ -75,7 +75,7 @@ async def run_agent_streaming(
75
75
  """
76
76
  Run agent in streaming mode using agent.iter() with usage limits.
77
77
 
78
- Design Pattern (from carrier):
78
+ Design Pattern:
79
79
  - Use agent.iter() for complete execution with tool call visibility
80
80
  - run_stream() stops after first output, missing tool calls
81
81
  - Stream tool call markers: [Calling: tool_name]
rem/cli/commands/db.py CHANGED
@@ -333,64 +333,120 @@ def rebuild_cache(connection: str | None):
333
333
 
334
334
  @click.command()
335
335
  @click.argument("file_path", type=click.Path(exists=True, path_type=Path))
336
+ @click.option("--table", "-t", default=None, help="Target table name (required for non-YAML formats)")
336
337
  @click.option("--user-id", default=None, help="User ID to scope data privately (default: public/shared)")
337
338
  @click.option("--dry-run", is_flag=True, help="Show what would be loaded without loading")
338
- def load(file_path: Path, user_id: str | None, dry_run: bool):
339
+ def load(file_path: Path, table: str | None, user_id: str | None, dry_run: bool):
339
340
  """
340
- Load data from YAML file into database.
341
+ Load data from file into database.
341
342
 
342
- File format:
343
- - table: resources
344
- key_field: name
345
- rows:
346
- - name: Example
347
- content: Test data...
343
+ Supports YAML with embedded metadata, or any tabular format via Polars
344
+ (jsonl, parquet, csv, json, arrow, etc.). For non-YAML formats, use --table.
348
345
 
349
346
  Examples:
350
- rem db load rem/tests/data/graph_seed.yaml
351
- rem db load data.yaml --user-id my-user # Private to user
352
- rem db load data.yaml --dry-run
347
+ rem db load data.yaml # YAML with metadata
348
+ rem db load data.jsonl -t resources # Any Polars-supported format
353
349
  """
354
- asyncio.run(_load_async(file_path, user_id, dry_run))
350
+ asyncio.run(_load_async(file_path, table, user_id, dry_run))
355
351
 
356
352
 
357
- async def _load_async(file_path: Path, user_id: str | None, dry_run: bool):
353
+ def _load_dataframe_from_file(file_path: Path) -> "pl.DataFrame":
354
+ """Load any Polars-supported file format into a DataFrame."""
355
+ import polars as pl
356
+
357
+ suffix = file_path.suffix.lower()
358
+
359
+ if suffix in {".jsonl", ".ndjson"}:
360
+ return pl.read_ndjson(file_path)
361
+ elif suffix in {".parquet", ".pq"}:
362
+ return pl.read_parquet(file_path)
363
+ elif suffix == ".csv":
364
+ return pl.read_csv(file_path)
365
+ elif suffix == ".json":
366
+ return pl.read_json(file_path)
367
+ elif suffix in {".ipc", ".arrow"}:
368
+ return pl.read_ipc(file_path)
369
+ else:
370
+ raise ValueError(f"Unsupported file format: {suffix}. Use any Polars-supported format.")
371
+
372
+
373
+ async def _load_async(file_path: Path, table: str | None, user_id: str | None, dry_run: bool):
358
374
  """Async implementation of load command."""
375
+ import polars as pl
359
376
  import yaml
360
377
  from ...models.core.inline_edge import InlineEdge
361
- from ...models.entities import Resource, Moment, User, Message, SharedSession, Schema
378
+ from ...models.entities import SharedSession
362
379
  from ...services.postgres import get_postgres_service
380
+ from ...utils.model_helpers import get_table_name
381
+ from ... import get_model_registry
363
382
 
364
383
  logger.info(f"Loading data from: {file_path}")
365
384
  scope_msg = f"user: {user_id}" if user_id else "public"
366
385
  logger.info(f"Scope: {scope_msg}")
367
386
 
368
- # Load YAML file
369
- with open(file_path) as f:
370
- data = yaml.safe_load(f)
387
+ suffix = file_path.suffix.lower()
388
+ is_yaml = suffix in {".yaml", ".yml"}
371
389
 
372
- if not isinstance(data, list):
373
- logger.error("YAML must be a list of table definitions")
374
- raise click.Abort()
375
-
376
- if dry_run:
377
- logger.info("DRY RUN - Would load:")
378
- logger.info(yaml.dump(data, default_flow_style=False))
379
- return
380
-
381
- # Map table names to model classes
382
- # CoreModel subclasses use Repository.upsert()
390
+ # Build MODEL_MAP dynamically from registry
391
+ registry = get_model_registry()
392
+ registry.register_core_models()
383
393
  MODEL_MAP = {
384
- "users": User,
385
- "moments": Moment,
386
- "resources": Resource,
387
- "messages": Message,
388
- "schemas": Schema,
394
+ get_table_name(model): model
395
+ for model in registry.get_model_classes().values()
389
396
  }
390
397
 
391
398
  # Non-CoreModel tables that need direct SQL insertion
392
399
  DIRECT_INSERT_TABLES = {"shared_sessions"}
393
400
 
401
+ # Parse file based on format
402
+ if is_yaml:
403
+ # YAML with embedded metadata
404
+ with open(file_path) as f:
405
+ data = yaml.safe_load(f)
406
+
407
+ if not isinstance(data, list):
408
+ logger.error("YAML must be a list of table definitions")
409
+ raise click.Abort()
410
+
411
+ if dry_run:
412
+ logger.info("DRY RUN - Would load:")
413
+ logger.info(yaml.dump(data, default_flow_style=False))
414
+ return
415
+
416
+ table_defs = data
417
+ else:
418
+ # Polars-supported format - require --table
419
+ if not table:
420
+ logger.error(f"For {suffix} files, --table is required. Example: rem db load {file_path.name} -t resources")
421
+ raise click.Abort()
422
+
423
+ try:
424
+ df = _load_dataframe_from_file(file_path)
425
+ except Exception as e:
426
+ logger.error(f"Failed to load file: {e}")
427
+ raise click.Abort()
428
+
429
+ rows = df.to_dicts()
430
+
431
+ if dry_run:
432
+ logger.info(f"DRY RUN - Would load {len(rows)} rows to table '{table}':")
433
+ logger.info(f"Columns: {list(df.columns)}")
434
+
435
+ # Validate first row against model if table is known
436
+ if table in MODEL_MAP and rows:
437
+ from ...utils.model_helpers import validate_data_for_model
438
+ result = validate_data_for_model(MODEL_MAP[table], rows[0])
439
+ if result.extra_fields:
440
+ logger.warning(f"Unknown fields (ignored): {result.extra_fields}")
441
+ if result.valid:
442
+ logger.success(f"Sample row validates OK. Required: {result.required_fields or '(none)'}")
443
+ else:
444
+ result.log_errors("Sample row")
445
+ return
446
+
447
+ # Wrap as single table definition
448
+ table_defs = [{"table": table, "rows": rows}]
449
+
394
450
  # Connect to database
395
451
  pg = get_postgres_service()
396
452
  if not pg:
@@ -399,23 +455,24 @@ async def _load_async(file_path: Path, user_id: str | None, dry_run: bool):
399
455
 
400
456
  await pg.connect()
401
457
 
458
+ # Start embedding worker for generating embeddings
459
+ if pg.embedding_worker:
460
+ await pg.embedding_worker.start()
461
+
402
462
  try:
403
463
  total_loaded = 0
404
464
 
405
- for table_def in data:
465
+ for table_def in table_defs:
406
466
  table_name = table_def["table"]
407
- key_field = table_def.get("key_field", "id")
408
467
  rows = table_def.get("rows", [])
409
468
 
410
469
  # Handle direct insert tables (non-CoreModel)
411
470
  if table_name in DIRECT_INSERT_TABLES:
412
471
  for row_data in rows:
413
- # Add tenant_id if not present
414
472
  if "tenant_id" not in row_data:
415
473
  row_data["tenant_id"] = "default"
416
474
 
417
475
  if table_name == "shared_sessions":
418
- # Insert shared_session directly
419
476
  await pg.fetch(
420
477
  """INSERT INTO shared_sessions
421
478
  (session_id, owner_user_id, shared_with_user_id, tenant_id)
@@ -434,16 +491,13 @@ async def _load_async(file_path: Path, user_id: str | None, dry_run: bool):
434
491
  logger.warning(f"Unknown table: {table_name}, skipping")
435
492
  continue
436
493
 
437
- model_class = MODEL_MAP[table_name] # Type is inferred from MODEL_MAP
494
+ model_class = MODEL_MAP[table_name]
438
495
 
439
- for row_data in rows:
440
- # Add user_id and tenant_id only if explicitly provided
441
- # Default is public (None) - data is shared/visible to all
442
- # Pass --user-id to scope data privately to a specific user
443
- if "user_id" not in row_data and user_id is not None:
444
- row_data["user_id"] = user_id
496
+ for row_idx, row_data in enumerate(rows):
497
+ # user_id stays NULL for public data (accessible by any user)
498
+ # Only set tenant_id for scoping - the --user-id flag controls tenant scope
445
499
  if "tenant_id" not in row_data and user_id is not None:
446
- row_data["tenant_id"] = row_data.get("user_id", user_id)
500
+ row_data["tenant_id"] = user_id
447
501
 
448
502
  # Convert graph_edges to InlineEdge format if present
449
503
  if "graph_edges" in row_data:
@@ -452,30 +506,40 @@ async def _load_async(file_path: Path, user_id: str | None, dry_run: bool):
452
506
  for edge in row_data["graph_edges"]
453
507
  ]
454
508
 
455
- # Convert any ISO timestamp strings with Z suffix to naive datetime
456
- # This handles fields like starts_timestamp, ends_timestamp, etc.
509
+ # Convert ISO timestamp strings
457
510
  from ...utils.date_utils import parse_iso
458
511
  for key, value in list(row_data.items()):
459
512
  if isinstance(value, str) and (key.endswith("_timestamp") or key.endswith("_at")):
460
513
  try:
461
514
  row_data[key] = parse_iso(value)
462
515
  except (ValueError, TypeError):
463
- pass # Not a valid datetime string, leave as-is
516
+ pass
464
517
 
465
- # Create model instance and upsert via repository
466
518
  from ...services.postgres.repository import Repository
519
+ from ...utils.model_helpers import validate_data_for_model
520
+
521
+ result = validate_data_for_model(model_class, row_data)
522
+ if not result.valid:
523
+ result.log_errors(f"Row {row_idx + 1} ({table_name})")
524
+ raise click.Abort()
467
525
 
468
- instance = model_class(**row_data)
469
- repo = Repository(model_class, table_name, pg) # Type inferred from MODEL_MAP
470
- await repo.upsert(instance) # type: ignore[arg-type]
526
+ repo = Repository(model_class, table_name, pg)
527
+ await repo.upsert(result.instance) # type: ignore[arg-type]
471
528
  total_loaded += 1
472
529
 
473
- # Log based on model type
474
- name = getattr(instance, 'name', getattr(instance, 'id', '?'))
530
+ name = getattr(result.instance, 'name', getattr(result.instance, 'id', '?'))
475
531
  logger.success(f"Loaded {table_name[:-1]}: {name}")
476
532
 
477
533
  logger.success(f"Data loaded successfully! Total rows: {total_loaded}")
478
534
 
535
+ # Wait for embeddings to complete
536
+ if pg.embedding_worker and pg.embedding_worker.running:
537
+ queue_size = pg.embedding_worker.task_queue.qsize()
538
+ if queue_size > 0:
539
+ logger.info(f"Waiting for {queue_size} embeddings to complete...")
540
+ await pg.embedding_worker.stop()
541
+ logger.success("Embeddings generated successfully")
542
+
479
543
  finally:
480
544
  await pg.disconnect()
481
545
 
@@ -39,6 +39,7 @@ from .shared_session import (
39
39
  SharedWithMeResponse,
40
40
  SharedWithMeSummary,
41
41
  )
42
+ from .subscriber import Subscriber, SubscriberOrigin, SubscriberStatus
42
43
  from .user import User, UserTier
43
44
 
44
45
  __all__ = [
@@ -56,6 +57,9 @@ __all__ = [
56
57
  "FeedbackCategory",
57
58
  "User",
58
59
  "UserTier",
60
+ "Subscriber",
61
+ "SubscriberStatus",
62
+ "SubscriberOrigin",
59
63
  "File",
60
64
  "Moment",
61
65
  "Schema",