remdb 0.3.0__py3-none-any.whl → 0.3.114__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of remdb might be problematic. Click here for more details.
- rem/__init__.py +129 -2
- rem/agentic/README.md +76 -0
- rem/agentic/__init__.py +15 -0
- rem/agentic/agents/__init__.py +16 -2
- rem/agentic/agents/sse_simulator.py +500 -0
- rem/agentic/context.py +28 -22
- rem/agentic/llm_provider_models.py +301 -0
- rem/agentic/otel/setup.py +92 -4
- rem/agentic/providers/phoenix.py +32 -43
- rem/agentic/providers/pydantic_ai.py +142 -22
- rem/agentic/schema.py +358 -21
- rem/agentic/tools/rem_tools.py +3 -3
- rem/api/README.md +238 -1
- rem/api/deps.py +255 -0
- rem/api/main.py +151 -37
- rem/api/mcp_router/resources.py +1 -1
- rem/api/mcp_router/server.py +17 -2
- rem/api/mcp_router/tools.py +143 -7
- rem/api/middleware/tracking.py +172 -0
- rem/api/routers/admin.py +277 -0
- rem/api/routers/auth.py +124 -0
- rem/api/routers/chat/completions.py +152 -16
- rem/api/routers/chat/models.py +7 -3
- rem/api/routers/chat/sse_events.py +526 -0
- rem/api/routers/chat/streaming.py +608 -45
- rem/api/routers/dev.py +81 -0
- rem/api/routers/feedback.py +148 -0
- rem/api/routers/messages.py +473 -0
- rem/api/routers/models.py +78 -0
- rem/api/routers/query.py +357 -0
- rem/api/routers/shared_sessions.py +406 -0
- rem/auth/middleware.py +126 -27
- rem/cli/commands/README.md +201 -70
- rem/cli/commands/ask.py +13 -10
- rem/cli/commands/cluster.py +1359 -0
- rem/cli/commands/configure.py +4 -3
- rem/cli/commands/db.py +350 -137
- rem/cli/commands/experiments.py +76 -72
- rem/cli/commands/process.py +22 -15
- rem/cli/commands/scaffold.py +47 -0
- rem/cli/commands/schema.py +95 -49
- rem/cli/main.py +29 -6
- rem/config.py +2 -2
- rem/models/core/core_model.py +7 -1
- rem/models/core/rem_query.py +5 -2
- rem/models/entities/__init__.py +21 -0
- rem/models/entities/domain_resource.py +38 -0
- rem/models/entities/feedback.py +123 -0
- rem/models/entities/message.py +30 -1
- rem/models/entities/session.py +83 -0
- rem/models/entities/shared_session.py +180 -0
- rem/models/entities/user.py +10 -3
- rem/registry.py +373 -0
- rem/schemas/agents/rem.yaml +7 -3
- rem/services/content/providers.py +94 -140
- rem/services/content/service.py +92 -20
- rem/services/dreaming/affinity_service.py +2 -16
- rem/services/dreaming/moment_service.py +2 -15
- rem/services/embeddings/api.py +24 -17
- rem/services/embeddings/worker.py +16 -16
- rem/services/phoenix/EXPERIMENT_DESIGN.md +3 -3
- rem/services/phoenix/client.py +252 -19
- rem/services/postgres/README.md +159 -15
- rem/services/postgres/__init__.py +2 -1
- rem/services/postgres/diff_service.py +426 -0
- rem/services/postgres/pydantic_to_sqlalchemy.py +427 -129
- rem/services/postgres/repository.py +132 -0
- rem/services/postgres/schema_generator.py +86 -5
- rem/services/postgres/service.py +6 -6
- rem/services/rate_limit.py +113 -0
- rem/services/rem/README.md +14 -0
- rem/services/rem/parser.py +44 -9
- rem/services/rem/service.py +36 -2
- rem/services/session/compression.py +17 -1
- rem/services/session/reload.py +1 -1
- rem/services/user_service.py +98 -0
- rem/settings.py +169 -17
- rem/sql/background_indexes.sql +21 -16
- rem/sql/migrations/001_install.sql +231 -54
- rem/sql/migrations/002_install_models.sql +457 -393
- rem/sql/migrations/003_optional_extensions.sql +326 -0
- rem/utils/constants.py +97 -0
- rem/utils/date_utils.py +228 -0
- rem/utils/embeddings.py +17 -4
- rem/utils/files.py +167 -0
- rem/utils/mime_types.py +158 -0
- rem/utils/model_helpers.py +156 -1
- rem/utils/schema_loader.py +191 -35
- rem/utils/sql_types.py +3 -1
- rem/utils/vision.py +9 -14
- rem/workers/README.md +14 -14
- rem/workers/db_maintainer.py +74 -0
- {remdb-0.3.0.dist-info → remdb-0.3.114.dist-info}/METADATA +303 -164
- {remdb-0.3.0.dist-info → remdb-0.3.114.dist-info}/RECORD +96 -70
- {remdb-0.3.0.dist-info → remdb-0.3.114.dist-info}/WHEEL +1 -1
- rem/sql/002_install_models.sql +0 -1068
- rem/sql/install_models.sql +0 -1038
- {remdb-0.3.0.dist-info → remdb-0.3.114.dist-info}/entry_points.txt +0 -0
rem/cli/commands/experiments.py
CHANGED
|
@@ -578,8 +578,7 @@ def run(
|
|
|
578
578
|
from rem.services.git import GitService
|
|
579
579
|
from rem.services.phoenix import PhoenixClient
|
|
580
580
|
from rem.agentic.providers.phoenix import create_evaluator_from_schema
|
|
581
|
-
from
|
|
582
|
-
import pandas as pd
|
|
581
|
+
from rem.utils.date_utils import utc_now, to_iso, format_timestamp_for_experiment
|
|
583
582
|
import os
|
|
584
583
|
|
|
585
584
|
try:
|
|
@@ -615,36 +614,22 @@ def run(
|
|
|
615
614
|
click.echo(f" Mode: DRY RUN (no data will be saved)")
|
|
616
615
|
click.echo()
|
|
617
616
|
|
|
618
|
-
# Load agent schema
|
|
617
|
+
# Load agent schema using centralized schema loader
|
|
619
618
|
agent_name = config.agent_schema_ref.name
|
|
620
619
|
agent_version = config.agent_schema_ref.version
|
|
621
620
|
|
|
622
621
|
click.echo(f"Loading agent schema: {agent_name} (version: {agent_version or 'latest'})")
|
|
623
622
|
|
|
624
|
-
|
|
625
|
-
agent_schema = None
|
|
626
|
-
try:
|
|
627
|
-
git_svc = GitService()
|
|
628
|
-
agent_schema = git_svc.load_schema(agent_name, version=agent_version)
|
|
629
|
-
click.echo(f"✓ Loaded agent schema from Git")
|
|
630
|
-
except Exception as e:
|
|
631
|
-
logger.debug(f"Git not available, trying filesystem: {e}")
|
|
623
|
+
from rem.utils.schema_loader import load_agent_schema
|
|
632
624
|
|
|
633
|
-
|
|
634
|
-
|
|
635
|
-
|
|
636
|
-
|
|
637
|
-
|
|
638
|
-
|
|
639
|
-
|
|
640
|
-
|
|
641
|
-
except Exception as fs_error:
|
|
642
|
-
logger.error(f"Failed to load agent schema: Git: {e}, FS: {fs_error}")
|
|
643
|
-
click.echo(f"Error: Could not load agent schema '{agent_name}'")
|
|
644
|
-
click.echo(f" Tried Git: {e}")
|
|
645
|
-
click.echo(f" Tried filesystem: {schema_path}")
|
|
646
|
-
click.echo(f" Make sure the schema exists")
|
|
647
|
-
raise click.Abort()
|
|
625
|
+
try:
|
|
626
|
+
agent_schema = load_agent_schema(agent_name)
|
|
627
|
+
click.echo(f"✓ Loaded agent schema: {agent_name}")
|
|
628
|
+
except FileNotFoundError as e:
|
|
629
|
+
logger.error(f"Failed to load agent schema: {e}")
|
|
630
|
+
click.echo(f"Error: Could not load agent schema '{agent_name}'")
|
|
631
|
+
click.echo(f" {e}")
|
|
632
|
+
raise click.Abort()
|
|
648
633
|
|
|
649
634
|
# Create agent function from schema
|
|
650
635
|
from rem.agentic.providers.pydantic_ai import create_agent
|
|
@@ -683,73 +668,85 @@ def run(
|
|
|
683
668
|
return {"output": serialized}
|
|
684
669
|
return serialized if isinstance(serialized, dict) else {"output": str(serialized)}
|
|
685
670
|
|
|
686
|
-
# Load evaluator schema
|
|
671
|
+
# Load evaluator schema using centralized schema loader
|
|
687
672
|
evaluator_name = config.evaluator_schema_ref.name
|
|
688
673
|
evaluator_version = config.evaluator_schema_ref.version
|
|
689
674
|
|
|
690
|
-
# Resolve evaluator path (evaluators are organized by agent name)
|
|
691
|
-
evaluator_schema_path = f"rem/schemas/evaluators/{agent_name}/{evaluator_name}.yaml"
|
|
692
|
-
|
|
693
675
|
click.echo(f"Loading evaluator: {evaluator_name} for agent {agent_name}")
|
|
694
676
|
|
|
695
|
-
|
|
696
|
-
|
|
697
|
-
|
|
698
|
-
|
|
699
|
-
)
|
|
700
|
-
|
|
701
|
-
|
|
702
|
-
|
|
703
|
-
|
|
704
|
-
|
|
705
|
-
|
|
677
|
+
# Try multiple evaluator path patterns (agent-specific, then generic)
|
|
678
|
+
evaluator_paths_to_try = [
|
|
679
|
+
f"{agent_name}/{evaluator_name}", # e.g., hello-world/default
|
|
680
|
+
f"{agent_name}-{evaluator_name}", # e.g., hello-world-default
|
|
681
|
+
evaluator_name, # e.g., default (generic)
|
|
682
|
+
]
|
|
683
|
+
|
|
684
|
+
evaluator_fn = None
|
|
685
|
+
evaluator_load_error = None
|
|
686
|
+
|
|
687
|
+
for evaluator_path in evaluator_paths_to_try:
|
|
688
|
+
try:
|
|
689
|
+
evaluator_fn = create_evaluator_from_schema(
|
|
690
|
+
evaluator_schema_path=evaluator_path,
|
|
691
|
+
model_name=None, # Use default from schema
|
|
692
|
+
)
|
|
693
|
+
click.echo(f"✓ Loaded evaluator schema: {evaluator_path}")
|
|
694
|
+
break
|
|
695
|
+
except FileNotFoundError as e:
|
|
696
|
+
evaluator_load_error = e
|
|
697
|
+
logger.debug(f"Evaluator not found at {evaluator_path}: {e}")
|
|
698
|
+
continue
|
|
699
|
+
except Exception as e:
|
|
700
|
+
evaluator_load_error = e
|
|
701
|
+
logger.warning(f"Failed to load evaluator from {evaluator_path}: {e}")
|
|
702
|
+
continue
|
|
703
|
+
|
|
704
|
+
if evaluator_fn is None:
|
|
705
|
+
click.echo(f"Error: Could not load evaluator schema '{evaluator_name}'")
|
|
706
|
+
click.echo(f" Tried paths: {evaluator_paths_to_try}")
|
|
707
|
+
if evaluator_load_error:
|
|
708
|
+
click.echo(f" Last error: {evaluator_load_error}")
|
|
706
709
|
raise click.Abort()
|
|
707
710
|
|
|
708
|
-
# Load dataset
|
|
711
|
+
# Load dataset using Polars
|
|
712
|
+
import polars as pl
|
|
713
|
+
|
|
709
714
|
click.echo(f"Loading dataset: {list(config.datasets.keys())[0]}")
|
|
710
715
|
dataset_ref = list(config.datasets.values())[0]
|
|
711
716
|
|
|
712
717
|
if dataset_ref.location.value == "git":
|
|
713
|
-
# Load from Git
|
|
718
|
+
# Load from Git (local filesystem)
|
|
714
719
|
dataset_path = Path(base_path) / name / dataset_ref.path
|
|
715
720
|
if not dataset_path.exists():
|
|
716
721
|
click.echo(f"Error: Dataset not found: {dataset_path}")
|
|
717
722
|
raise click.Abort()
|
|
718
723
|
|
|
719
724
|
if dataset_ref.format == "csv":
|
|
720
|
-
dataset_df =
|
|
725
|
+
dataset_df = pl.read_csv(dataset_path)
|
|
721
726
|
elif dataset_ref.format == "parquet":
|
|
722
|
-
dataset_df =
|
|
727
|
+
dataset_df = pl.read_parquet(dataset_path)
|
|
723
728
|
elif dataset_ref.format == "jsonl":
|
|
724
|
-
dataset_df =
|
|
729
|
+
dataset_df = pl.read_ndjson(dataset_path)
|
|
725
730
|
else:
|
|
726
731
|
click.echo(f"Error: Format '{dataset_ref.format}' not yet supported")
|
|
727
732
|
raise click.Abort()
|
|
728
733
|
elif dataset_ref.location.value in ["s3", "hybrid"]:
|
|
729
734
|
# Load from S3 using FS provider
|
|
730
735
|
from rem.services.fs import FS
|
|
736
|
+
from io import BytesIO
|
|
731
737
|
|
|
732
738
|
fs = FS()
|
|
733
739
|
|
|
734
740
|
try:
|
|
735
741
|
if dataset_ref.format == "csv":
|
|
736
742
|
content = fs.read(dataset_ref.path)
|
|
737
|
-
|
|
738
|
-
dataset_df = pd.read_csv(StringIO(content))
|
|
743
|
+
dataset_df = pl.read_csv(BytesIO(content.encode() if isinstance(content, str) else content))
|
|
739
744
|
elif dataset_ref.format == "parquet":
|
|
740
|
-
|
|
741
|
-
|
|
742
|
-
with tempfile.NamedTemporaryFile(suffix=".parquet", delete=False) as tmp:
|
|
743
|
-
tmp_path = tmp.name
|
|
744
|
-
# Download via FS
|
|
745
|
-
content_bytes = fs.read(dataset_ref.path)
|
|
746
|
-
tmp.write(content_bytes)
|
|
747
|
-
dataset_df = pd.read_parquet(tmp_path)
|
|
748
|
-
Path(tmp_path).unlink() # Clean up temp file
|
|
745
|
+
content_bytes = fs.read(dataset_ref.path)
|
|
746
|
+
dataset_df = pl.read_parquet(BytesIO(content_bytes if isinstance(content_bytes, bytes) else content_bytes.encode()))
|
|
749
747
|
elif dataset_ref.format == "jsonl":
|
|
750
748
|
content = fs.read(dataset_ref.path)
|
|
751
|
-
|
|
752
|
-
dataset_df = pd.read_json(StringIO(content), lines=True)
|
|
749
|
+
dataset_df = pl.read_ndjson(BytesIO(content.encode() if isinstance(content, str) else content))
|
|
753
750
|
else:
|
|
754
751
|
click.echo(f"Error: Format '{dataset_ref.format}' not yet supported")
|
|
755
752
|
raise click.Abort()
|
|
@@ -793,13 +790,13 @@ def run(
|
|
|
793
790
|
|
|
794
791
|
client = PhoenixClient(config=phoenix_config)
|
|
795
792
|
|
|
796
|
-
experiment_name = f"{config.name}-{
|
|
793
|
+
experiment_name = f"{config.name}-{format_timestamp_for_experiment()}"
|
|
797
794
|
|
|
798
795
|
click.echo(f"\n⏳ Running experiment: {experiment_name}")
|
|
799
796
|
click.echo(f" This may take several minutes...")
|
|
800
797
|
|
|
801
798
|
experiment = client.run_experiment(
|
|
802
|
-
dataset=dataset_df,
|
|
799
|
+
dataset=dataset_df,
|
|
803
800
|
task=task_fn,
|
|
804
801
|
evaluators=[evaluator_fn],
|
|
805
802
|
experiment_name=experiment_name,
|
|
@@ -809,12 +806,15 @@ def run(
|
|
|
809
806
|
"evaluator": config.evaluator_schema_ref.name,
|
|
810
807
|
"experiment_config": config.name,
|
|
811
808
|
**config.metadata
|
|
812
|
-
}
|
|
809
|
+
},
|
|
810
|
+
# Smart column detection for DataFrame -> Phoenix Dataset conversion
|
|
811
|
+
input_keys=["input"] if "input" in dataset_df.columns else None,
|
|
812
|
+
output_keys=["expected_output"] if "expected_output" in dataset_df.columns else None,
|
|
813
813
|
)
|
|
814
814
|
|
|
815
815
|
# Update experiment status
|
|
816
816
|
config.status = ExperimentStatus.COMPLETED
|
|
817
|
-
config.last_run_at =
|
|
817
|
+
config.last_run_at = utc_now()
|
|
818
818
|
if not version: # Only save if not loading from Git
|
|
819
819
|
config.save(base_path)
|
|
820
820
|
|
|
@@ -835,7 +835,7 @@ def run(
|
|
|
835
835
|
"agent": config.agent_schema_ref.name,
|
|
836
836
|
"evaluator": config.evaluator_schema_ref.name,
|
|
837
837
|
"dataset_size": len(dataset_df),
|
|
838
|
-
"completed_at":
|
|
838
|
+
"completed_at": to_iso(utc_now()),
|
|
839
839
|
"phoenix_url": getattr(experiment, "url", None),
|
|
840
840
|
"task_runs": len(exp_data.get("task_runs", [])),
|
|
841
841
|
}
|
|
@@ -1015,20 +1015,24 @@ def dataset_add(
|
|
|
1015
1015
|
--output-keys expected_label,expected_type
|
|
1016
1016
|
"""
|
|
1017
1017
|
from rem.services.phoenix import PhoenixClient
|
|
1018
|
-
import
|
|
1018
|
+
import polars as pl
|
|
1019
1019
|
|
|
1020
1020
|
try:
|
|
1021
1021
|
client = PhoenixClient()
|
|
1022
1022
|
|
|
1023
|
-
# Load CSV
|
|
1024
|
-
df =
|
|
1023
|
+
# Load CSV with Polars
|
|
1024
|
+
df = pl.read_csv(from_csv)
|
|
1025
|
+
records = df.to_dicts()
|
|
1025
1026
|
|
|
1026
1027
|
# Extract data
|
|
1027
|
-
|
|
1028
|
-
|
|
1028
|
+
input_cols = input_keys.split(",")
|
|
1029
|
+
output_cols = output_keys.split(",")
|
|
1030
|
+
inputs = [{k: row.get(k) for k in input_cols} for row in records]
|
|
1031
|
+
outputs = [{k: row.get(k) for k in output_cols} for row in records]
|
|
1029
1032
|
metadata = None
|
|
1030
1033
|
if metadata_keys:
|
|
1031
|
-
|
|
1034
|
+
meta_cols = metadata_keys.split(",")
|
|
1035
|
+
metadata = [{k: row.get(k) for k in meta_cols} for row in records]
|
|
1032
1036
|
|
|
1033
1037
|
# Add to dataset
|
|
1034
1038
|
dataset = client.add_examples_to_dataset(
|
|
@@ -1269,12 +1273,12 @@ def trace_list(
|
|
|
1269
1273
|
rem experiments trace list --project rem-agents --days 7 --limit 50
|
|
1270
1274
|
"""
|
|
1271
1275
|
from rem.services.phoenix import PhoenixClient
|
|
1272
|
-
from
|
|
1276
|
+
from rem.utils.date_utils import days_ago
|
|
1273
1277
|
|
|
1274
1278
|
try:
|
|
1275
1279
|
client = PhoenixClient()
|
|
1276
1280
|
|
|
1277
|
-
start_time =
|
|
1281
|
+
start_time = days_ago(days)
|
|
1278
1282
|
|
|
1279
1283
|
traces_df = client.get_traces(
|
|
1280
1284
|
project_name=project,
|
rem/cli/commands/process.py
CHANGED
|
@@ -12,12 +12,12 @@ from rem.services.content import ContentService
|
|
|
12
12
|
|
|
13
13
|
@click.command(name="ingest")
|
|
14
14
|
@click.argument("file_path", type=click.Path(exists=True))
|
|
15
|
-
@click.option("--user-id",
|
|
15
|
+
@click.option("--user-id", default=None, help="User ID to scope file privately (default: public/shared)")
|
|
16
16
|
@click.option("--category", help="Optional file category")
|
|
17
17
|
@click.option("--tags", help="Optional comma-separated tags")
|
|
18
18
|
def process_ingest(
|
|
19
19
|
file_path: str,
|
|
20
|
-
user_id: str,
|
|
20
|
+
user_id: str | None,
|
|
21
21
|
category: str | None,
|
|
22
22
|
tags: str | None,
|
|
23
23
|
):
|
|
@@ -32,8 +32,9 @@ def process_ingest(
|
|
|
32
32
|
5. Creates a File entity record.
|
|
33
33
|
|
|
34
34
|
Examples:
|
|
35
|
-
rem process ingest sample.pdf
|
|
36
|
-
rem process ingest contract.docx --
|
|
35
|
+
rem process ingest sample.pdf
|
|
36
|
+
rem process ingest contract.docx --category legal --tags contract,2023
|
|
37
|
+
rem process ingest agent.yaml # Auto-detects kind=agent, saves to schemas table
|
|
37
38
|
"""
|
|
38
39
|
import asyncio
|
|
39
40
|
from ...services.content import ContentService
|
|
@@ -56,7 +57,8 @@ def process_ingest(
|
|
|
56
57
|
|
|
57
58
|
tag_list = tags.split(",") if tags else None
|
|
58
59
|
|
|
59
|
-
|
|
60
|
+
scope_msg = f"user: {user_id}" if user_id else "public"
|
|
61
|
+
logger.info(f"Ingesting file: {file_path} ({scope_msg})")
|
|
60
62
|
result = await service.ingest_file(
|
|
61
63
|
file_uri=file_path,
|
|
62
64
|
user_id=user_id,
|
|
@@ -65,11 +67,15 @@ def process_ingest(
|
|
|
65
67
|
is_local_server=True, # CLI is local
|
|
66
68
|
)
|
|
67
69
|
|
|
68
|
-
|
|
69
|
-
|
|
70
|
+
# Handle schema ingestion (agents/evaluators)
|
|
71
|
+
if result.get("schema_name"):
|
|
72
|
+
logger.success(f"Schema ingested: {result['schema_name']} (kind={result.get('kind', 'agent')})")
|
|
73
|
+
logger.info(f"Version: {result.get('version', '1.0.0')}")
|
|
74
|
+
# Handle file ingestion
|
|
75
|
+
elif result.get("processing_status") == "completed":
|
|
76
|
+
logger.success(f"File ingested: {result['file_name']}")
|
|
70
77
|
logger.info(f"File ID: {result['file_id']}")
|
|
71
78
|
logger.info(f"Resources created: {result['resources_created']}")
|
|
72
|
-
logger.info(f"Status: {result['processing_status']}")
|
|
73
79
|
else:
|
|
74
80
|
logger.error(f"Ingestion failed: {result.get('message', 'Unknown error')}")
|
|
75
81
|
sys.exit(1)
|
|
@@ -192,15 +198,13 @@ def process_uri(uri: str, output: str, save: str | None):
|
|
|
192
198
|
|
|
193
199
|
|
|
194
200
|
@click.command(name="files")
|
|
195
|
-
@click.option("--
|
|
196
|
-
@click.option("--user-id", help="Filter by user ID")
|
|
201
|
+
@click.option("--user-id", default=None, help="User ID (default: from settings)")
|
|
197
202
|
@click.option("--status", type=click.Choice(["pending", "processing", "completed", "failed"]), help="Filter by status")
|
|
198
203
|
@click.option("--extractor", help="Run files through custom extractor (e.g., cv-parser-v1)")
|
|
199
204
|
@click.option("--limit", type=int, help="Max files to process")
|
|
200
205
|
@click.option("--provider", help="Optional LLM provider override")
|
|
201
206
|
@click.option("--model", help="Optional model override")
|
|
202
207
|
def process_files(
|
|
203
|
-
tenant_id: str,
|
|
204
208
|
user_id: Optional[str],
|
|
205
209
|
status: Optional[str],
|
|
206
210
|
extractor: Optional[str],
|
|
@@ -217,19 +221,22 @@ def process_files(
|
|
|
217
221
|
|
|
218
222
|
\b
|
|
219
223
|
# List completed files
|
|
220
|
-
rem process files --
|
|
224
|
+
rem process files --status completed
|
|
221
225
|
|
|
222
226
|
\b
|
|
223
227
|
# Extract from CV files
|
|
224
|
-
rem process files --
|
|
228
|
+
rem process files --extractor cv-parser-v1 --limit 10
|
|
225
229
|
|
|
226
230
|
\b
|
|
227
231
|
# Extract with provider override
|
|
228
|
-
rem process files --
|
|
232
|
+
rem process files --extractor contract-analyzer-v1 \\
|
|
229
233
|
--provider anthropic --model claude-sonnet-4-5
|
|
230
234
|
"""
|
|
235
|
+
from ...settings import settings
|
|
236
|
+
effective_user_id = user_id or settings.test.effective_user_id
|
|
237
|
+
|
|
231
238
|
logger.warning("Not implemented yet")
|
|
232
|
-
logger.info(f"Would process files for
|
|
239
|
+
logger.info(f"Would process files for user: {effective_user_id}")
|
|
233
240
|
|
|
234
241
|
if user_id:
|
|
235
242
|
logger.info(f"Filter: user_id={user_id}")
|
|
@@ -0,0 +1,47 @@
|
|
|
1
|
+
"""
|
|
2
|
+
Scaffold command - generate project structure for REM-based applications.
|
|
3
|
+
|
|
4
|
+
TODO: Implement this command to generate:
|
|
5
|
+
- my_app/main.py (entry point with create_app)
|
|
6
|
+
- my_app/models.py (example CoreModel subclass)
|
|
7
|
+
- my_app/routers/ (example FastAPI router)
|
|
8
|
+
- schemas/agents/ (example agent schema)
|
|
9
|
+
- schemas/evaluators/ (example evaluator)
|
|
10
|
+
- sql/migrations/ (empty migrations directory)
|
|
11
|
+
- pyproject.toml (with remdb dependency)
|
|
12
|
+
- README.md (basic usage instructions)
|
|
13
|
+
|
|
14
|
+
Usage:
|
|
15
|
+
rem scaffold my-app
|
|
16
|
+
rem scaffold my-app --with-examples # Include example models/routers/tools
|
|
17
|
+
"""
|
|
18
|
+
|
|
19
|
+
import click
|
|
20
|
+
|
|
21
|
+
|
|
22
|
+
@click.command()
|
|
23
|
+
@click.argument("name")
|
|
24
|
+
@click.option("--with-examples", is_flag=True, help="Include example code")
|
|
25
|
+
def scaffold(name: str, with_examples: bool) -> None:
|
|
26
|
+
"""
|
|
27
|
+
Generate a new REM-based project structure.
|
|
28
|
+
|
|
29
|
+
NAME is the project directory name to create.
|
|
30
|
+
"""
|
|
31
|
+
click.echo(f"TODO: Scaffold command not yet implemented")
|
|
32
|
+
click.echo(f"Would create project: {name}")
|
|
33
|
+
click.echo(f"With examples: {with_examples}")
|
|
34
|
+
click.echo()
|
|
35
|
+
click.echo("For now, manually create this structure:")
|
|
36
|
+
click.echo(f"""
|
|
37
|
+
{name}/
|
|
38
|
+
├── {name.replace('-', '_')}/
|
|
39
|
+
│ ├── main.py # Entry point (create_app + extensions)
|
|
40
|
+
│ ├── models.py # Custom models (inherit CoreModel)
|
|
41
|
+
│ └── routers/ # Custom FastAPI routers
|
|
42
|
+
├── schemas/
|
|
43
|
+
│ ├── agents/ # Custom agent YAML schemas
|
|
44
|
+
│ └── evaluators/ # Custom evaluator schemas
|
|
45
|
+
├── sql/migrations/ # Custom SQL migrations
|
|
46
|
+
└── pyproject.toml
|
|
47
|
+
""")
|
rem/cli/commands/schema.py
CHANGED
|
@@ -8,6 +8,7 @@ Usage:
|
|
|
8
8
|
"""
|
|
9
9
|
|
|
10
10
|
import asyncio
|
|
11
|
+
import importlib
|
|
11
12
|
from pathlib import Path
|
|
12
13
|
|
|
13
14
|
import click
|
|
@@ -17,66 +18,113 @@ from ...settings import settings
|
|
|
17
18
|
from ...services.postgres.schema_generator import SchemaGenerator
|
|
18
19
|
|
|
19
20
|
|
|
21
|
+
def _import_model_modules() -> list[str]:
|
|
22
|
+
"""
|
|
23
|
+
Import modules specified in MODELS__IMPORT_MODULES setting.
|
|
24
|
+
|
|
25
|
+
This ensures downstream models decorated with @rem.register_model
|
|
26
|
+
are registered before schema generation.
|
|
27
|
+
|
|
28
|
+
Returns:
|
|
29
|
+
List of successfully imported module names
|
|
30
|
+
"""
|
|
31
|
+
imported = []
|
|
32
|
+
for module_name in settings.models.module_list:
|
|
33
|
+
try:
|
|
34
|
+
importlib.import_module(module_name)
|
|
35
|
+
imported.append(module_name)
|
|
36
|
+
logger.debug(f"Imported model module: {module_name}")
|
|
37
|
+
except ImportError as e:
|
|
38
|
+
logger.warning(f"Failed to import model module '{module_name}': {e}")
|
|
39
|
+
click.echo(
|
|
40
|
+
click.style(f" ⚠ Could not import '{module_name}': {e}", fg="yellow"),
|
|
41
|
+
err=True,
|
|
42
|
+
)
|
|
43
|
+
return imported
|
|
44
|
+
|
|
45
|
+
|
|
20
46
|
@click.command()
|
|
21
|
-
@click.option(
|
|
22
|
-
"--models",
|
|
23
|
-
"-m",
|
|
24
|
-
required=True,
|
|
25
|
-
type=click.Path(exists=True, path_type=Path),
|
|
26
|
-
help="Directory containing Pydantic models",
|
|
27
|
-
)
|
|
28
47
|
@click.option(
|
|
29
48
|
"--output",
|
|
30
49
|
"-o",
|
|
31
50
|
type=click.Path(path_type=Path),
|
|
32
|
-
default="
|
|
33
|
-
help="Output SQL file (default:
|
|
51
|
+
default="002_install_models.sql",
|
|
52
|
+
help="Output SQL file (default: 002_install_models.sql)",
|
|
34
53
|
)
|
|
35
54
|
@click.option(
|
|
36
55
|
"--output-dir",
|
|
37
56
|
type=click.Path(path_type=Path),
|
|
38
57
|
default=None,
|
|
39
|
-
help=f"Base output directory (default: {settings.sql_dir})",
|
|
58
|
+
help=f"Base output directory (default: {settings.sql_dir}/migrations)",
|
|
40
59
|
)
|
|
41
|
-
def generate(
|
|
60
|
+
def generate(output: Path, output_dir: Path | None):
|
|
42
61
|
"""
|
|
43
|
-
Generate database schema from Pydantic models.
|
|
62
|
+
Generate database schema from registered Pydantic models.
|
|
44
63
|
|
|
45
|
-
|
|
64
|
+
Uses the model registry (core models + user-registered models) to generate:
|
|
46
65
|
- CREATE TABLE statements
|
|
47
66
|
- Embeddings tables (embeddings_<table>)
|
|
48
67
|
- KV_STORE triggers for cache maintenance
|
|
49
68
|
- Indexes (foreground only)
|
|
50
69
|
|
|
51
|
-
Output is written to src/rem/sql/
|
|
70
|
+
Output is written to src/rem/sql/migrations/002_install_models.sql by default.
|
|
52
71
|
|
|
53
72
|
Example:
|
|
54
|
-
rem db schema generate
|
|
73
|
+
rem db schema generate
|
|
74
|
+
|
|
75
|
+
To register custom models in downstream apps:
|
|
76
|
+
|
|
77
|
+
1. Create models with @rem.register_model decorator:
|
|
78
|
+
|
|
79
|
+
# models/__init__.py
|
|
80
|
+
import rem
|
|
81
|
+
from rem.models.core import CoreModel
|
|
82
|
+
|
|
83
|
+
@rem.register_model
|
|
84
|
+
class MyEntity(CoreModel):
|
|
85
|
+
name: str
|
|
86
|
+
|
|
87
|
+
2. Set MODELS__IMPORT_MODULES in your .env:
|
|
88
|
+
|
|
89
|
+
MODELS__IMPORT_MODULES=models
|
|
90
|
+
|
|
91
|
+
3. Run schema generation:
|
|
92
|
+
|
|
93
|
+
rem db schema generate
|
|
55
94
|
|
|
56
95
|
This creates:
|
|
57
|
-
- src/rem/sql/
|
|
96
|
+
- src/rem/sql/migrations/002_install_models.sql - Entity tables and triggers
|
|
58
97
|
- src/rem/sql/background_indexes.sql - HNSW indexes (apply after data load)
|
|
59
98
|
|
|
60
|
-
After generation,
|
|
61
|
-
rem db
|
|
99
|
+
After generation, verify with:
|
|
100
|
+
rem db diff
|
|
62
101
|
"""
|
|
63
|
-
|
|
102
|
+
from ...registry import get_model_registry
|
|
103
|
+
|
|
104
|
+
# Import downstream model modules to trigger @rem.register_model decorators
|
|
105
|
+
imported_modules = _import_model_modules()
|
|
106
|
+
if imported_modules:
|
|
107
|
+
click.echo(f"Imported model modules: {', '.join(imported_modules)}")
|
|
108
|
+
|
|
109
|
+
registry = get_model_registry()
|
|
110
|
+
models = registry.get_models(include_core=True)
|
|
111
|
+
click.echo(f"Generating schema from {len(models)} registered models")
|
|
64
112
|
|
|
65
|
-
#
|
|
66
|
-
actual_output_dir = output_dir or Path(settings.sql_dir)
|
|
113
|
+
# Default to migrations directory
|
|
114
|
+
actual_output_dir = output_dir or Path(settings.sql_dir) / "migrations"
|
|
67
115
|
generator = SchemaGenerator(output_dir=actual_output_dir)
|
|
68
116
|
|
|
69
|
-
# Generate schema
|
|
117
|
+
# Generate schema from registry
|
|
70
118
|
try:
|
|
71
|
-
schema_sql = asyncio.run(generator.
|
|
119
|
+
schema_sql = asyncio.run(generator.generate_from_registry(output_file=output.name))
|
|
72
120
|
|
|
73
121
|
click.echo(f"✓ Schema generated: {len(generator.schemas)} tables")
|
|
74
122
|
click.echo(f"✓ Written to: {actual_output_dir / output.name}")
|
|
75
123
|
|
|
76
|
-
# Generate background indexes
|
|
124
|
+
# Generate background indexes in parent sql dir
|
|
77
125
|
background_indexes = generator.generate_background_indexes()
|
|
78
126
|
if background_indexes:
|
|
79
|
-
bg_file =
|
|
127
|
+
bg_file = Path(settings.sql_dir) / "background_indexes.sql"
|
|
80
128
|
bg_file.write_text(background_indexes)
|
|
81
129
|
click.echo(f"✓ Background indexes: {bg_file}")
|
|
82
130
|
|
|
@@ -94,48 +142,46 @@ def generate(models: Path, output: Path, output_dir: Path | None):
|
|
|
94
142
|
|
|
95
143
|
|
|
96
144
|
@click.command()
|
|
97
|
-
|
|
98
|
-
"--models",
|
|
99
|
-
"-m",
|
|
100
|
-
required=True,
|
|
101
|
-
type=click.Path(exists=True, path_type=Path),
|
|
102
|
-
help="Directory containing Pydantic models",
|
|
103
|
-
)
|
|
104
|
-
def validate(models: Path):
|
|
145
|
+
def validate():
|
|
105
146
|
"""
|
|
106
|
-
Validate Pydantic models for schema generation.
|
|
147
|
+
Validate registered Pydantic models for schema generation.
|
|
107
148
|
|
|
108
149
|
Checks:
|
|
109
|
-
- Models can be loaded
|
|
150
|
+
- Models can be loaded from registry
|
|
110
151
|
- Models have suitable entity_key fields
|
|
111
152
|
- Fields with embeddings are properly configured
|
|
153
|
+
|
|
154
|
+
Set MODELS__IMPORT_MODULES to include custom models from downstream apps.
|
|
112
155
|
"""
|
|
113
|
-
|
|
156
|
+
from ...registry import get_model_registry
|
|
114
157
|
|
|
115
|
-
|
|
116
|
-
|
|
158
|
+
# Import downstream model modules to trigger @rem.register_model decorators
|
|
159
|
+
imported_modules = _import_model_modules()
|
|
160
|
+
if imported_modules:
|
|
161
|
+
click.echo(f"Imported model modules: {', '.join(imported_modules)}")
|
|
117
162
|
|
|
118
|
-
|
|
119
|
-
|
|
120
|
-
|
|
163
|
+
registry = get_model_registry()
|
|
164
|
+
models = registry.get_models(include_core=True)
|
|
165
|
+
|
|
166
|
+
click.echo(f"Validating {len(models)} registered models")
|
|
121
167
|
|
|
122
|
-
|
|
168
|
+
if not models:
|
|
169
|
+
click.echo("✗ No models found in registry", err=True)
|
|
170
|
+
raise click.Abort()
|
|
123
171
|
|
|
172
|
+
generator = SchemaGenerator()
|
|
124
173
|
errors: list[str] = []
|
|
125
174
|
warnings: list[str] = []
|
|
126
175
|
|
|
127
|
-
for model_name,
|
|
128
|
-
|
|
129
|
-
|
|
176
|
+
for model_name, ext in models.items():
|
|
177
|
+
model = ext.model
|
|
178
|
+
table_name = ext.table_name or generator.infer_table_name(model)
|
|
179
|
+
entity_key = ext.entity_key_field or generator.infer_entity_key_field(model)
|
|
130
180
|
|
|
131
181
|
# Check for entity_key
|
|
132
182
|
if entity_key == "id":
|
|
133
183
|
warnings.append(f"{model_name}: No natural key field, using 'id'")
|
|
134
184
|
|
|
135
|
-
# Check for embeddable fields
|
|
136
|
-
# TODO: Implement should_embed_field check
|
|
137
|
-
embeddable: list[str] = [] # Placeholder - needs implementation
|
|
138
|
-
|
|
139
185
|
click.echo(f" {model_name} -> {table_name} (key: {entity_key})")
|
|
140
186
|
|
|
141
187
|
if warnings:
|