remdb 0.3.0__py3-none-any.whl → 0.3.114__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of remdb might be problematic. Click here for more details.

Files changed (98) hide show
  1. rem/__init__.py +129 -2
  2. rem/agentic/README.md +76 -0
  3. rem/agentic/__init__.py +15 -0
  4. rem/agentic/agents/__init__.py +16 -2
  5. rem/agentic/agents/sse_simulator.py +500 -0
  6. rem/agentic/context.py +28 -22
  7. rem/agentic/llm_provider_models.py +301 -0
  8. rem/agentic/otel/setup.py +92 -4
  9. rem/agentic/providers/phoenix.py +32 -43
  10. rem/agentic/providers/pydantic_ai.py +142 -22
  11. rem/agentic/schema.py +358 -21
  12. rem/agentic/tools/rem_tools.py +3 -3
  13. rem/api/README.md +238 -1
  14. rem/api/deps.py +255 -0
  15. rem/api/main.py +151 -37
  16. rem/api/mcp_router/resources.py +1 -1
  17. rem/api/mcp_router/server.py +17 -2
  18. rem/api/mcp_router/tools.py +143 -7
  19. rem/api/middleware/tracking.py +172 -0
  20. rem/api/routers/admin.py +277 -0
  21. rem/api/routers/auth.py +124 -0
  22. rem/api/routers/chat/completions.py +152 -16
  23. rem/api/routers/chat/models.py +7 -3
  24. rem/api/routers/chat/sse_events.py +526 -0
  25. rem/api/routers/chat/streaming.py +608 -45
  26. rem/api/routers/dev.py +81 -0
  27. rem/api/routers/feedback.py +148 -0
  28. rem/api/routers/messages.py +473 -0
  29. rem/api/routers/models.py +78 -0
  30. rem/api/routers/query.py +357 -0
  31. rem/api/routers/shared_sessions.py +406 -0
  32. rem/auth/middleware.py +126 -27
  33. rem/cli/commands/README.md +201 -70
  34. rem/cli/commands/ask.py +13 -10
  35. rem/cli/commands/cluster.py +1359 -0
  36. rem/cli/commands/configure.py +4 -3
  37. rem/cli/commands/db.py +350 -137
  38. rem/cli/commands/experiments.py +76 -72
  39. rem/cli/commands/process.py +22 -15
  40. rem/cli/commands/scaffold.py +47 -0
  41. rem/cli/commands/schema.py +95 -49
  42. rem/cli/main.py +29 -6
  43. rem/config.py +2 -2
  44. rem/models/core/core_model.py +7 -1
  45. rem/models/core/rem_query.py +5 -2
  46. rem/models/entities/__init__.py +21 -0
  47. rem/models/entities/domain_resource.py +38 -0
  48. rem/models/entities/feedback.py +123 -0
  49. rem/models/entities/message.py +30 -1
  50. rem/models/entities/session.py +83 -0
  51. rem/models/entities/shared_session.py +180 -0
  52. rem/models/entities/user.py +10 -3
  53. rem/registry.py +373 -0
  54. rem/schemas/agents/rem.yaml +7 -3
  55. rem/services/content/providers.py +94 -140
  56. rem/services/content/service.py +92 -20
  57. rem/services/dreaming/affinity_service.py +2 -16
  58. rem/services/dreaming/moment_service.py +2 -15
  59. rem/services/embeddings/api.py +24 -17
  60. rem/services/embeddings/worker.py +16 -16
  61. rem/services/phoenix/EXPERIMENT_DESIGN.md +3 -3
  62. rem/services/phoenix/client.py +252 -19
  63. rem/services/postgres/README.md +159 -15
  64. rem/services/postgres/__init__.py +2 -1
  65. rem/services/postgres/diff_service.py +426 -0
  66. rem/services/postgres/pydantic_to_sqlalchemy.py +427 -129
  67. rem/services/postgres/repository.py +132 -0
  68. rem/services/postgres/schema_generator.py +86 -5
  69. rem/services/postgres/service.py +6 -6
  70. rem/services/rate_limit.py +113 -0
  71. rem/services/rem/README.md +14 -0
  72. rem/services/rem/parser.py +44 -9
  73. rem/services/rem/service.py +36 -2
  74. rem/services/session/compression.py +17 -1
  75. rem/services/session/reload.py +1 -1
  76. rem/services/user_service.py +98 -0
  77. rem/settings.py +169 -17
  78. rem/sql/background_indexes.sql +21 -16
  79. rem/sql/migrations/001_install.sql +231 -54
  80. rem/sql/migrations/002_install_models.sql +457 -393
  81. rem/sql/migrations/003_optional_extensions.sql +326 -0
  82. rem/utils/constants.py +97 -0
  83. rem/utils/date_utils.py +228 -0
  84. rem/utils/embeddings.py +17 -4
  85. rem/utils/files.py +167 -0
  86. rem/utils/mime_types.py +158 -0
  87. rem/utils/model_helpers.py +156 -1
  88. rem/utils/schema_loader.py +191 -35
  89. rem/utils/sql_types.py +3 -1
  90. rem/utils/vision.py +9 -14
  91. rem/workers/README.md +14 -14
  92. rem/workers/db_maintainer.py +74 -0
  93. {remdb-0.3.0.dist-info → remdb-0.3.114.dist-info}/METADATA +303 -164
  94. {remdb-0.3.0.dist-info → remdb-0.3.114.dist-info}/RECORD +96 -70
  95. {remdb-0.3.0.dist-info → remdb-0.3.114.dist-info}/WHEEL +1 -1
  96. rem/sql/002_install_models.sql +0 -1068
  97. rem/sql/install_models.sql +0 -1038
  98. {remdb-0.3.0.dist-info → remdb-0.3.114.dist-info}/entry_points.txt +0 -0
@@ -578,8 +578,7 @@ def run(
578
578
  from rem.services.git import GitService
579
579
  from rem.services.phoenix import PhoenixClient
580
580
  from rem.agentic.providers.phoenix import create_evaluator_from_schema
581
- from datetime import datetime
582
- import pandas as pd
581
+ from rem.utils.date_utils import utc_now, to_iso, format_timestamp_for_experiment
583
582
  import os
584
583
 
585
584
  try:
@@ -615,36 +614,22 @@ def run(
615
614
  click.echo(f" Mode: DRY RUN (no data will be saved)")
616
615
  click.echo()
617
616
 
618
- # Load agent schema from Git or filesystem
617
+ # Load agent schema using centralized schema loader
619
618
  agent_name = config.agent_schema_ref.name
620
619
  agent_version = config.agent_schema_ref.version
621
620
 
622
621
  click.echo(f"Loading agent schema: {agent_name} (version: {agent_version or 'latest'})")
623
622
 
624
- # Try Git first, fallback to filesystem
625
- agent_schema = None
626
- try:
627
- git_svc = GitService()
628
- agent_schema = git_svc.load_schema(agent_name, version=agent_version)
629
- click.echo(f"✓ Loaded agent schema from Git")
630
- except Exception as e:
631
- logger.debug(f"Git not available, trying filesystem: {e}")
623
+ from rem.utils.schema_loader import load_agent_schema
632
624
 
633
- # Fallback to local filesystem
634
- from rem.services.fs import FS
635
- fs = FS()
636
-
637
- schema_path = f"schemas/agents/{agent_name}.yaml"
638
- try:
639
- agent_schema = fs.read(schema_path)
640
- click.echo(f"✓ Loaded agent schema from filesystem")
641
- except Exception as fs_error:
642
- logger.error(f"Failed to load agent schema: Git: {e}, FS: {fs_error}")
643
- click.echo(f"Error: Could not load agent schema '{agent_name}'")
644
- click.echo(f" Tried Git: {e}")
645
- click.echo(f" Tried filesystem: {schema_path}")
646
- click.echo(f" Make sure the schema exists")
647
- raise click.Abort()
625
+ try:
626
+ agent_schema = load_agent_schema(agent_name)
627
+ click.echo(f"✓ Loaded agent schema: {agent_name}")
628
+ except FileNotFoundError as e:
629
+ logger.error(f"Failed to load agent schema: {e}")
630
+ click.echo(f"Error: Could not load agent schema '{agent_name}'")
631
+ click.echo(f" {e}")
632
+ raise click.Abort()
648
633
 
649
634
  # Create agent function from schema
650
635
  from rem.agentic.providers.pydantic_ai import create_agent
@@ -683,73 +668,85 @@ def run(
683
668
  return {"output": serialized}
684
669
  return serialized if isinstance(serialized, dict) else {"output": str(serialized)}
685
670
 
686
- # Load evaluator schema
671
+ # Load evaluator schema using centralized schema loader
687
672
  evaluator_name = config.evaluator_schema_ref.name
688
673
  evaluator_version = config.evaluator_schema_ref.version
689
674
 
690
- # Resolve evaluator path (evaluators are organized by agent name)
691
- evaluator_schema_path = f"rem/schemas/evaluators/{agent_name}/{evaluator_name}.yaml"
692
-
693
675
  click.echo(f"Loading evaluator: {evaluator_name} for agent {agent_name}")
694
676
 
695
- try:
696
- evaluator_fn = create_evaluator_from_schema(
697
- evaluator_schema_path=evaluator_schema_path,
698
- model_name=None, # Use default from schema
699
- )
700
- click.echo(f"✓ Loaded evaluator schema")
701
- except Exception as e:
702
- logger.warning(f"Failed to load evaluator: {e}")
703
- click.echo(f"Error: Could not load evaluator schema")
704
- click.echo(f" Path: {evaluator_schema_path}")
705
- click.echo(f" Make sure the schema exists")
677
+ # Try multiple evaluator path patterns (agent-specific, then generic)
678
+ evaluator_paths_to_try = [
679
+ f"{agent_name}/{evaluator_name}", # e.g., hello-world/default
680
+ f"{agent_name}-{evaluator_name}", # e.g., hello-world-default
681
+ evaluator_name, # e.g., default (generic)
682
+ ]
683
+
684
+ evaluator_fn = None
685
+ evaluator_load_error = None
686
+
687
+ for evaluator_path in evaluator_paths_to_try:
688
+ try:
689
+ evaluator_fn = create_evaluator_from_schema(
690
+ evaluator_schema_path=evaluator_path,
691
+ model_name=None, # Use default from schema
692
+ )
693
+ click.echo(f"✓ Loaded evaluator schema: {evaluator_path}")
694
+ break
695
+ except FileNotFoundError as e:
696
+ evaluator_load_error = e
697
+ logger.debug(f"Evaluator not found at {evaluator_path}: {e}")
698
+ continue
699
+ except Exception as e:
700
+ evaluator_load_error = e
701
+ logger.warning(f"Failed to load evaluator from {evaluator_path}: {e}")
702
+ continue
703
+
704
+ if evaluator_fn is None:
705
+ click.echo(f"Error: Could not load evaluator schema '{evaluator_name}'")
706
+ click.echo(f" Tried paths: {evaluator_paths_to_try}")
707
+ if evaluator_load_error:
708
+ click.echo(f" Last error: {evaluator_load_error}")
706
709
  raise click.Abort()
707
710
 
708
- # Load dataset
711
+ # Load dataset using Polars
712
+ import polars as pl
713
+
709
714
  click.echo(f"Loading dataset: {list(config.datasets.keys())[0]}")
710
715
  dataset_ref = list(config.datasets.values())[0]
711
716
 
712
717
  if dataset_ref.location.value == "git":
713
- # Load from Git
718
+ # Load from Git (local filesystem)
714
719
  dataset_path = Path(base_path) / name / dataset_ref.path
715
720
  if not dataset_path.exists():
716
721
  click.echo(f"Error: Dataset not found: {dataset_path}")
717
722
  raise click.Abort()
718
723
 
719
724
  if dataset_ref.format == "csv":
720
- dataset_df = pd.read_csv(dataset_path)
725
+ dataset_df = pl.read_csv(dataset_path)
721
726
  elif dataset_ref.format == "parquet":
722
- dataset_df = pd.read_parquet(dataset_path)
727
+ dataset_df = pl.read_parquet(dataset_path)
723
728
  elif dataset_ref.format == "jsonl":
724
- dataset_df = pd.read_json(dataset_path, lines=True)
729
+ dataset_df = pl.read_ndjson(dataset_path)
725
730
  else:
726
731
  click.echo(f"Error: Format '{dataset_ref.format}' not yet supported")
727
732
  raise click.Abort()
728
733
  elif dataset_ref.location.value in ["s3", "hybrid"]:
729
734
  # Load from S3 using FS provider
730
735
  from rem.services.fs import FS
736
+ from io import BytesIO
731
737
 
732
738
  fs = FS()
733
739
 
734
740
  try:
735
741
  if dataset_ref.format == "csv":
736
742
  content = fs.read(dataset_ref.path)
737
- from io import StringIO
738
- dataset_df = pd.read_csv(StringIO(content))
743
+ dataset_df = pl.read_csv(BytesIO(content.encode() if isinstance(content, str) else content))
739
744
  elif dataset_ref.format == "parquet":
740
- # For parquet, we need binary read
741
- import tempfile
742
- with tempfile.NamedTemporaryFile(suffix=".parquet", delete=False) as tmp:
743
- tmp_path = tmp.name
744
- # Download via FS
745
- content_bytes = fs.read(dataset_ref.path)
746
- tmp.write(content_bytes)
747
- dataset_df = pd.read_parquet(tmp_path)
748
- Path(tmp_path).unlink() # Clean up temp file
745
+ content_bytes = fs.read(dataset_ref.path)
746
+ dataset_df = pl.read_parquet(BytesIO(content_bytes if isinstance(content_bytes, bytes) else content_bytes.encode()))
749
747
  elif dataset_ref.format == "jsonl":
750
748
  content = fs.read(dataset_ref.path)
751
- from io import StringIO
752
- dataset_df = pd.read_json(StringIO(content), lines=True)
749
+ dataset_df = pl.read_ndjson(BytesIO(content.encode() if isinstance(content, str) else content))
753
750
  else:
754
751
  click.echo(f"Error: Format '{dataset_ref.format}' not yet supported")
755
752
  raise click.Abort()
@@ -793,13 +790,13 @@ def run(
793
790
 
794
791
  client = PhoenixClient(config=phoenix_config)
795
792
 
796
- experiment_name = f"{config.name}-{datetime.now().strftime('%Y%m%d-%H%M%S')}"
793
+ experiment_name = f"{config.name}-{format_timestamp_for_experiment()}"
797
794
 
798
795
  click.echo(f"\n⏳ Running experiment: {experiment_name}")
799
796
  click.echo(f" This may take several minutes...")
800
797
 
801
798
  experiment = client.run_experiment(
802
- dataset=dataset_df, # type: ignore[arg-type]
799
+ dataset=dataset_df,
803
800
  task=task_fn,
804
801
  evaluators=[evaluator_fn],
805
802
  experiment_name=experiment_name,
@@ -809,12 +806,15 @@ def run(
809
806
  "evaluator": config.evaluator_schema_ref.name,
810
807
  "experiment_config": config.name,
811
808
  **config.metadata
812
- }
809
+ },
810
+ # Smart column detection for DataFrame -> Phoenix Dataset conversion
811
+ input_keys=["input"] if "input" in dataset_df.columns else None,
812
+ output_keys=["expected_output"] if "expected_output" in dataset_df.columns else None,
813
813
  )
814
814
 
815
815
  # Update experiment status
816
816
  config.status = ExperimentStatus.COMPLETED
817
- config.last_run_at = datetime.now()
817
+ config.last_run_at = utc_now()
818
818
  if not version: # Only save if not loading from Git
819
819
  config.save(base_path)
820
820
 
@@ -835,7 +835,7 @@ def run(
835
835
  "agent": config.agent_schema_ref.name,
836
836
  "evaluator": config.evaluator_schema_ref.name,
837
837
  "dataset_size": len(dataset_df),
838
- "completed_at": datetime.now().isoformat(),
838
+ "completed_at": to_iso(utc_now()),
839
839
  "phoenix_url": getattr(experiment, "url", None),
840
840
  "task_runs": len(exp_data.get("task_runs", [])),
841
841
  }
@@ -1015,20 +1015,24 @@ def dataset_add(
1015
1015
  --output-keys expected_label,expected_type
1016
1016
  """
1017
1017
  from rem.services.phoenix import PhoenixClient
1018
- import pandas as pd
1018
+ import polars as pl
1019
1019
 
1020
1020
  try:
1021
1021
  client = PhoenixClient()
1022
1022
 
1023
- # Load CSV
1024
- df = pd.read_csv(from_csv)
1023
+ # Load CSV with Polars
1024
+ df = pl.read_csv(from_csv)
1025
+ records = df.to_dicts()
1025
1026
 
1026
1027
  # Extract data
1027
- inputs = cast(list[dict[str, Any]], df[input_keys.split(",")].to_dict("records"))
1028
- outputs = cast(list[dict[str, Any]], df[output_keys.split(",")].to_dict("records"))
1028
+ input_cols = input_keys.split(",")
1029
+ output_cols = output_keys.split(",")
1030
+ inputs = [{k: row.get(k) for k in input_cols} for row in records]
1031
+ outputs = [{k: row.get(k) for k in output_cols} for row in records]
1029
1032
  metadata = None
1030
1033
  if metadata_keys:
1031
- metadata = cast(list[dict[str, Any]], df[metadata_keys.split(",")].to_dict("records"))
1034
+ meta_cols = metadata_keys.split(",")
1035
+ metadata = [{k: row.get(k) for k in meta_cols} for row in records]
1032
1036
 
1033
1037
  # Add to dataset
1034
1038
  dataset = client.add_examples_to_dataset(
@@ -1269,12 +1273,12 @@ def trace_list(
1269
1273
  rem experiments trace list --project rem-agents --days 7 --limit 50
1270
1274
  """
1271
1275
  from rem.services.phoenix import PhoenixClient
1272
- from datetime import datetime, timedelta
1276
+ from rem.utils.date_utils import days_ago
1273
1277
 
1274
1278
  try:
1275
1279
  client = PhoenixClient()
1276
1280
 
1277
- start_time = datetime.now() - timedelta(days=days)
1281
+ start_time = days_ago(days)
1278
1282
 
1279
1283
  traces_df = client.get_traces(
1280
1284
  project_name=project,
@@ -12,12 +12,12 @@ from rem.services.content import ContentService
12
12
 
13
13
  @click.command(name="ingest")
14
14
  @click.argument("file_path", type=click.Path(exists=True))
15
- @click.option("--user-id", required=True, help="User ID to own the file")
15
+ @click.option("--user-id", default=None, help="User ID to scope file privately (default: public/shared)")
16
16
  @click.option("--category", help="Optional file category")
17
17
  @click.option("--tags", help="Optional comma-separated tags")
18
18
  def process_ingest(
19
19
  file_path: str,
20
- user_id: str,
20
+ user_id: str | None,
21
21
  category: str | None,
22
22
  tags: str | None,
23
23
  ):
@@ -32,8 +32,9 @@ def process_ingest(
32
32
  5. Creates a File entity record.
33
33
 
34
34
  Examples:
35
- rem process ingest sample.pdf --user-id user-123
36
- rem process ingest contract.docx --user-id user-123 --category legal --tags contract,2023
35
+ rem process ingest sample.pdf
36
+ rem process ingest contract.docx --category legal --tags contract,2023
37
+ rem process ingest agent.yaml # Auto-detects kind=agent, saves to schemas table
37
38
  """
38
39
  import asyncio
39
40
  from ...services.content import ContentService
@@ -56,7 +57,8 @@ def process_ingest(
56
57
 
57
58
  tag_list = tags.split(",") if tags else None
58
59
 
59
- logger.info(f"Ingesting file: {file_path} for user: {user_id}")
60
+ scope_msg = f"user: {user_id}" if user_id else "public"
61
+ logger.info(f"Ingesting file: {file_path} ({scope_msg})")
60
62
  result = await service.ingest_file(
61
63
  file_uri=file_path,
62
64
  user_id=user_id,
@@ -65,11 +67,15 @@ def process_ingest(
65
67
  is_local_server=True, # CLI is local
66
68
  )
67
69
 
68
- if result.get("processing_status") == "completed":
69
- logger.success(f"File ingested successfully: {result['file_name']}")
70
+ # Handle schema ingestion (agents/evaluators)
71
+ if result.get("schema_name"):
72
+ logger.success(f"Schema ingested: {result['schema_name']} (kind={result.get('kind', 'agent')})")
73
+ logger.info(f"Version: {result.get('version', '1.0.0')}")
74
+ # Handle file ingestion
75
+ elif result.get("processing_status") == "completed":
76
+ logger.success(f"File ingested: {result['file_name']}")
70
77
  logger.info(f"File ID: {result['file_id']}")
71
78
  logger.info(f"Resources created: {result['resources_created']}")
72
- logger.info(f"Status: {result['processing_status']}")
73
79
  else:
74
80
  logger.error(f"Ingestion failed: {result.get('message', 'Unknown error')}")
75
81
  sys.exit(1)
@@ -192,15 +198,13 @@ def process_uri(uri: str, output: str, save: str | None):
192
198
 
193
199
 
194
200
  @click.command(name="files")
195
- @click.option("--tenant-id", required=True, help="Tenant ID")
196
- @click.option("--user-id", help="Filter by user ID")
201
+ @click.option("--user-id", default=None, help="User ID (default: from settings)")
197
202
  @click.option("--status", type=click.Choice(["pending", "processing", "completed", "failed"]), help="Filter by status")
198
203
  @click.option("--extractor", help="Run files through custom extractor (e.g., cv-parser-v1)")
199
204
  @click.option("--limit", type=int, help="Max files to process")
200
205
  @click.option("--provider", help="Optional LLM provider override")
201
206
  @click.option("--model", help="Optional model override")
202
207
  def process_files(
203
- tenant_id: str,
204
208
  user_id: Optional[str],
205
209
  status: Optional[str],
206
210
  extractor: Optional[str],
@@ -217,19 +221,22 @@ def process_files(
217
221
 
218
222
  \b
219
223
  # List completed files
220
- rem process files --tenant-id acme-corp --status completed
224
+ rem process files --status completed
221
225
 
222
226
  \b
223
227
  # Extract from CV files
224
- rem process files --tenant-id acme-corp --extractor cv-parser-v1 --limit 10
228
+ rem process files --extractor cv-parser-v1 --limit 10
225
229
 
226
230
  \b
227
231
  # Extract with provider override
228
- rem process files --tenant-id acme-corp --extractor contract-analyzer-v1 \\
232
+ rem process files --extractor contract-analyzer-v1 \\
229
233
  --provider anthropic --model claude-sonnet-4-5
230
234
  """
235
+ from ...settings import settings
236
+ effective_user_id = user_id or settings.test.effective_user_id
237
+
231
238
  logger.warning("Not implemented yet")
232
- logger.info(f"Would process files for tenant: {tenant_id}")
239
+ logger.info(f"Would process files for user: {effective_user_id}")
233
240
 
234
241
  if user_id:
235
242
  logger.info(f"Filter: user_id={user_id}")
@@ -0,0 +1,47 @@
1
+ """
2
+ Scaffold command - generate project structure for REM-based applications.
3
+
4
+ TODO: Implement this command to generate:
5
+ - my_app/main.py (entry point with create_app)
6
+ - my_app/models.py (example CoreModel subclass)
7
+ - my_app/routers/ (example FastAPI router)
8
+ - schemas/agents/ (example agent schema)
9
+ - schemas/evaluators/ (example evaluator)
10
+ - sql/migrations/ (empty migrations directory)
11
+ - pyproject.toml (with remdb dependency)
12
+ - README.md (basic usage instructions)
13
+
14
+ Usage:
15
+ rem scaffold my-app
16
+ rem scaffold my-app --with-examples # Include example models/routers/tools
17
+ """
18
+
19
+ import click
20
+
21
+
22
+ @click.command()
23
+ @click.argument("name")
24
+ @click.option("--with-examples", is_flag=True, help="Include example code")
25
+ def scaffold(name: str, with_examples: bool) -> None:
26
+ """
27
+ Generate a new REM-based project structure.
28
+
29
+ NAME is the project directory name to create.
30
+ """
31
+ click.echo(f"TODO: Scaffold command not yet implemented")
32
+ click.echo(f"Would create project: {name}")
33
+ click.echo(f"With examples: {with_examples}")
34
+ click.echo()
35
+ click.echo("For now, manually create this structure:")
36
+ click.echo(f"""
37
+ {name}/
38
+ ├── {name.replace('-', '_')}/
39
+ │ ├── main.py # Entry point (create_app + extensions)
40
+ │ ├── models.py # Custom models (inherit CoreModel)
41
+ │ └── routers/ # Custom FastAPI routers
42
+ ├── schemas/
43
+ │ ├── agents/ # Custom agent YAML schemas
44
+ │ └── evaluators/ # Custom evaluator schemas
45
+ ├── sql/migrations/ # Custom SQL migrations
46
+ └── pyproject.toml
47
+ """)
@@ -8,6 +8,7 @@ Usage:
8
8
  """
9
9
 
10
10
  import asyncio
11
+ import importlib
11
12
  from pathlib import Path
12
13
 
13
14
  import click
@@ -17,66 +18,113 @@ from ...settings import settings
17
18
  from ...services.postgres.schema_generator import SchemaGenerator
18
19
 
19
20
 
21
+ def _import_model_modules() -> list[str]:
22
+ """
23
+ Import modules specified in MODELS__IMPORT_MODULES setting.
24
+
25
+ This ensures downstream models decorated with @rem.register_model
26
+ are registered before schema generation.
27
+
28
+ Returns:
29
+ List of successfully imported module names
30
+ """
31
+ imported = []
32
+ for module_name in settings.models.module_list:
33
+ try:
34
+ importlib.import_module(module_name)
35
+ imported.append(module_name)
36
+ logger.debug(f"Imported model module: {module_name}")
37
+ except ImportError as e:
38
+ logger.warning(f"Failed to import model module '{module_name}': {e}")
39
+ click.echo(
40
+ click.style(f" ⚠ Could not import '{module_name}': {e}", fg="yellow"),
41
+ err=True,
42
+ )
43
+ return imported
44
+
45
+
20
46
  @click.command()
21
- @click.option(
22
- "--models",
23
- "-m",
24
- required=True,
25
- type=click.Path(exists=True, path_type=Path),
26
- help="Directory containing Pydantic models",
27
- )
28
47
  @click.option(
29
48
  "--output",
30
49
  "-o",
31
50
  type=click.Path(path_type=Path),
32
- default="install_models.sql",
33
- help="Output SQL file (default: install_models.sql)",
51
+ default="002_install_models.sql",
52
+ help="Output SQL file (default: 002_install_models.sql)",
34
53
  )
35
54
  @click.option(
36
55
  "--output-dir",
37
56
  type=click.Path(path_type=Path),
38
57
  default=None,
39
- help=f"Base output directory (default: {settings.sql_dir})",
58
+ help=f"Base output directory (default: {settings.sql_dir}/migrations)",
40
59
  )
41
- def generate(models: Path, output: Path, output_dir: Path | None):
60
+ def generate(output: Path, output_dir: Path | None):
42
61
  """
43
- Generate database schema from Pydantic models.
62
+ Generate database schema from registered Pydantic models.
44
63
 
45
- Scans the specified directory for Pydantic models and generates:
64
+ Uses the model registry (core models + user-registered models) to generate:
46
65
  - CREATE TABLE statements
47
66
  - Embeddings tables (embeddings_<table>)
48
67
  - KV_STORE triggers for cache maintenance
49
68
  - Indexes (foreground only)
50
69
 
51
- Output is written to src/rem/sql/install_models.sql by default.
70
+ Output is written to src/rem/sql/migrations/002_install_models.sql by default.
52
71
 
53
72
  Example:
54
- rem db schema generate --models src/rem/models/entities
73
+ rem db schema generate
74
+
75
+ To register custom models in downstream apps:
76
+
77
+ 1. Create models with @rem.register_model decorator:
78
+
79
+ # models/__init__.py
80
+ import rem
81
+ from rem.models.core import CoreModel
82
+
83
+ @rem.register_model
84
+ class MyEntity(CoreModel):
85
+ name: str
86
+
87
+ 2. Set MODELS__IMPORT_MODULES in your .env:
88
+
89
+ MODELS__IMPORT_MODULES=models
90
+
91
+ 3. Run schema generation:
92
+
93
+ rem db schema generate
55
94
 
56
95
  This creates:
57
- - src/rem/sql/install_models.sql - Entity tables and triggers
96
+ - src/rem/sql/migrations/002_install_models.sql - Entity tables and triggers
58
97
  - src/rem/sql/background_indexes.sql - HNSW indexes (apply after data load)
59
98
 
60
- After generation, apply with:
61
- rem db migrate
99
+ After generation, verify with:
100
+ rem db diff
62
101
  """
63
- click.echo(f"Discovering models in {models}")
102
+ from ...registry import get_model_registry
103
+
104
+ # Import downstream model modules to trigger @rem.register_model decorators
105
+ imported_modules = _import_model_modules()
106
+ if imported_modules:
107
+ click.echo(f"Imported model modules: {', '.join(imported_modules)}")
108
+
109
+ registry = get_model_registry()
110
+ models = registry.get_models(include_core=True)
111
+ click.echo(f"Generating schema from {len(models)} registered models")
64
112
 
65
- # Use settings.sql_dir if not provided
66
- actual_output_dir = output_dir or Path(settings.sql_dir)
113
+ # Default to migrations directory
114
+ actual_output_dir = output_dir or Path(settings.sql_dir) / "migrations"
67
115
  generator = SchemaGenerator(output_dir=actual_output_dir)
68
116
 
69
- # Generate schema
117
+ # Generate schema from registry
70
118
  try:
71
- schema_sql = asyncio.run(generator.generate_from_directory(models, output_file=output.name))
119
+ schema_sql = asyncio.run(generator.generate_from_registry(output_file=output.name))
72
120
 
73
121
  click.echo(f"✓ Schema generated: {len(generator.schemas)} tables")
74
122
  click.echo(f"✓ Written to: {actual_output_dir / output.name}")
75
123
 
76
- # Generate background indexes
124
+ # Generate background indexes in parent sql dir
77
125
  background_indexes = generator.generate_background_indexes()
78
126
  if background_indexes:
79
- bg_file = actual_output_dir / "background_indexes.sql"
127
+ bg_file = Path(settings.sql_dir) / "background_indexes.sql"
80
128
  bg_file.write_text(background_indexes)
81
129
  click.echo(f"✓ Background indexes: {bg_file}")
82
130
 
@@ -94,48 +142,46 @@ def generate(models: Path, output: Path, output_dir: Path | None):
94
142
 
95
143
 
96
144
  @click.command()
97
- @click.option(
98
- "--models",
99
- "-m",
100
- required=True,
101
- type=click.Path(exists=True, path_type=Path),
102
- help="Directory containing Pydantic models",
103
- )
104
- def validate(models: Path):
145
+ def validate():
105
146
  """
106
- Validate Pydantic models for schema generation.
147
+ Validate registered Pydantic models for schema generation.
107
148
 
108
149
  Checks:
109
- - Models can be loaded
150
+ - Models can be loaded from registry
110
151
  - Models have suitable entity_key fields
111
152
  - Fields with embeddings are properly configured
153
+
154
+ Set MODELS__IMPORT_MODULES to include custom models from downstream apps.
112
155
  """
113
- click.echo(f"Validating models in {models}")
156
+ from ...registry import get_model_registry
114
157
 
115
- generator = SchemaGenerator()
116
- discovered = generator.discover_models(models)
158
+ # Import downstream model modules to trigger @rem.register_model decorators
159
+ imported_modules = _import_model_modules()
160
+ if imported_modules:
161
+ click.echo(f"Imported model modules: {', '.join(imported_modules)}")
117
162
 
118
- if not discovered:
119
- click.echo("✗ No models found", err=True)
120
- raise click.Abort()
163
+ registry = get_model_registry()
164
+ models = registry.get_models(include_core=True)
165
+
166
+ click.echo(f"Validating {len(models)} registered models")
121
167
 
122
- click.echo(f"✓ Discovered {len(discovered)} models")
168
+ if not models:
169
+ click.echo("✗ No models found in registry", err=True)
170
+ raise click.Abort()
123
171
 
172
+ generator = SchemaGenerator()
124
173
  errors: list[str] = []
125
174
  warnings: list[str] = []
126
175
 
127
- for model_name, model in discovered.items():
128
- table_name = generator.infer_table_name(model)
129
- entity_key = generator.infer_entity_key_field(model)
176
+ for model_name, ext in models.items():
177
+ model = ext.model
178
+ table_name = ext.table_name or generator.infer_table_name(model)
179
+ entity_key = ext.entity_key_field or generator.infer_entity_key_field(model)
130
180
 
131
181
  # Check for entity_key
132
182
  if entity_key == "id":
133
183
  warnings.append(f"{model_name}: No natural key field, using 'id'")
134
184
 
135
- # Check for embeddable fields
136
- # TODO: Implement should_embed_field check
137
- embeddable: list[str] = [] # Placeholder - needs implementation
138
-
139
185
  click.echo(f" {model_name} -> {table_name} (key: {entity_key})")
140
186
 
141
187
  if warnings: