remdb 0.3.0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of remdb might be problematic. Click here for more details.
- rem/__init__.py +2 -0
- rem/agentic/README.md +650 -0
- rem/agentic/__init__.py +39 -0
- rem/agentic/agents/README.md +155 -0
- rem/agentic/agents/__init__.py +8 -0
- rem/agentic/context.py +148 -0
- rem/agentic/context_builder.py +329 -0
- rem/agentic/mcp/__init__.py +0 -0
- rem/agentic/mcp/tool_wrapper.py +107 -0
- rem/agentic/otel/__init__.py +5 -0
- rem/agentic/otel/setup.py +151 -0
- rem/agentic/providers/phoenix.py +674 -0
- rem/agentic/providers/pydantic_ai.py +572 -0
- rem/agentic/query.py +117 -0
- rem/agentic/query_helper.py +89 -0
- rem/agentic/schema.py +396 -0
- rem/agentic/serialization.py +245 -0
- rem/agentic/tools/__init__.py +5 -0
- rem/agentic/tools/rem_tools.py +231 -0
- rem/api/README.md +420 -0
- rem/api/main.py +324 -0
- rem/api/mcp_router/prompts.py +182 -0
- rem/api/mcp_router/resources.py +536 -0
- rem/api/mcp_router/server.py +213 -0
- rem/api/mcp_router/tools.py +584 -0
- rem/api/routers/auth.py +229 -0
- rem/api/routers/chat/__init__.py +5 -0
- rem/api/routers/chat/completions.py +281 -0
- rem/api/routers/chat/json_utils.py +76 -0
- rem/api/routers/chat/models.py +124 -0
- rem/api/routers/chat/streaming.py +185 -0
- rem/auth/README.md +258 -0
- rem/auth/__init__.py +26 -0
- rem/auth/middleware.py +100 -0
- rem/auth/providers/__init__.py +13 -0
- rem/auth/providers/base.py +376 -0
- rem/auth/providers/google.py +163 -0
- rem/auth/providers/microsoft.py +237 -0
- rem/cli/README.md +455 -0
- rem/cli/__init__.py +8 -0
- rem/cli/commands/README.md +126 -0
- rem/cli/commands/__init__.py +3 -0
- rem/cli/commands/ask.py +566 -0
- rem/cli/commands/configure.py +497 -0
- rem/cli/commands/db.py +493 -0
- rem/cli/commands/dreaming.py +324 -0
- rem/cli/commands/experiments.py +1302 -0
- rem/cli/commands/mcp.py +66 -0
- rem/cli/commands/process.py +245 -0
- rem/cli/commands/schema.py +183 -0
- rem/cli/commands/serve.py +106 -0
- rem/cli/dreaming.py +363 -0
- rem/cli/main.py +96 -0
- rem/config.py +237 -0
- rem/mcp_server.py +41 -0
- rem/models/core/__init__.py +49 -0
- rem/models/core/core_model.py +64 -0
- rem/models/core/engram.py +333 -0
- rem/models/core/experiment.py +628 -0
- rem/models/core/inline_edge.py +132 -0
- rem/models/core/rem_query.py +243 -0
- rem/models/entities/__init__.py +43 -0
- rem/models/entities/file.py +57 -0
- rem/models/entities/image_resource.py +88 -0
- rem/models/entities/message.py +35 -0
- rem/models/entities/moment.py +123 -0
- rem/models/entities/ontology.py +191 -0
- rem/models/entities/ontology_config.py +131 -0
- rem/models/entities/resource.py +95 -0
- rem/models/entities/schema.py +87 -0
- rem/models/entities/user.py +85 -0
- rem/py.typed +0 -0
- rem/schemas/README.md +507 -0
- rem/schemas/__init__.py +6 -0
- rem/schemas/agents/README.md +92 -0
- rem/schemas/agents/core/moment-builder.yaml +178 -0
- rem/schemas/agents/core/rem-query-agent.yaml +226 -0
- rem/schemas/agents/core/resource-affinity-assessor.yaml +99 -0
- rem/schemas/agents/core/simple-assistant.yaml +19 -0
- rem/schemas/agents/core/user-profile-builder.yaml +163 -0
- rem/schemas/agents/examples/contract-analyzer.yaml +317 -0
- rem/schemas/agents/examples/contract-extractor.yaml +134 -0
- rem/schemas/agents/examples/cv-parser.yaml +263 -0
- rem/schemas/agents/examples/hello-world.yaml +37 -0
- rem/schemas/agents/examples/query.yaml +54 -0
- rem/schemas/agents/examples/simple.yaml +21 -0
- rem/schemas/agents/examples/test.yaml +29 -0
- rem/schemas/agents/rem.yaml +128 -0
- rem/schemas/evaluators/hello-world/default.yaml +77 -0
- rem/schemas/evaluators/rem/faithfulness.yaml +219 -0
- rem/schemas/evaluators/rem/lookup-correctness.yaml +182 -0
- rem/schemas/evaluators/rem/retrieval-precision.yaml +199 -0
- rem/schemas/evaluators/rem/retrieval-recall.yaml +211 -0
- rem/schemas/evaluators/rem/search-correctness.yaml +192 -0
- rem/services/__init__.py +16 -0
- rem/services/audio/INTEGRATION.md +308 -0
- rem/services/audio/README.md +376 -0
- rem/services/audio/__init__.py +15 -0
- rem/services/audio/chunker.py +354 -0
- rem/services/audio/transcriber.py +259 -0
- rem/services/content/README.md +1269 -0
- rem/services/content/__init__.py +5 -0
- rem/services/content/providers.py +806 -0
- rem/services/content/service.py +676 -0
- rem/services/dreaming/README.md +230 -0
- rem/services/dreaming/__init__.py +53 -0
- rem/services/dreaming/affinity_service.py +336 -0
- rem/services/dreaming/moment_service.py +264 -0
- rem/services/dreaming/ontology_service.py +54 -0
- rem/services/dreaming/user_model_service.py +297 -0
- rem/services/dreaming/utils.py +39 -0
- rem/services/embeddings/__init__.py +11 -0
- rem/services/embeddings/api.py +120 -0
- rem/services/embeddings/worker.py +421 -0
- rem/services/fs/README.md +662 -0
- rem/services/fs/__init__.py +62 -0
- rem/services/fs/examples.py +206 -0
- rem/services/fs/examples_paths.py +204 -0
- rem/services/fs/git_provider.py +935 -0
- rem/services/fs/local_provider.py +760 -0
- rem/services/fs/parsing-hooks-examples.md +172 -0
- rem/services/fs/paths.py +276 -0
- rem/services/fs/provider.py +460 -0
- rem/services/fs/s3_provider.py +1042 -0
- rem/services/fs/service.py +186 -0
- rem/services/git/README.md +1075 -0
- rem/services/git/__init__.py +17 -0
- rem/services/git/service.py +469 -0
- rem/services/phoenix/EXPERIMENT_DESIGN.md +1146 -0
- rem/services/phoenix/README.md +453 -0
- rem/services/phoenix/__init__.py +46 -0
- rem/services/phoenix/client.py +686 -0
- rem/services/phoenix/config.py +88 -0
- rem/services/phoenix/prompt_labels.py +477 -0
- rem/services/postgres/README.md +575 -0
- rem/services/postgres/__init__.py +23 -0
- rem/services/postgres/migration_service.py +427 -0
- rem/services/postgres/pydantic_to_sqlalchemy.py +232 -0
- rem/services/postgres/register_type.py +352 -0
- rem/services/postgres/repository.py +337 -0
- rem/services/postgres/schema_generator.py +379 -0
- rem/services/postgres/service.py +802 -0
- rem/services/postgres/sql_builder.py +354 -0
- rem/services/rem/README.md +304 -0
- rem/services/rem/__init__.py +23 -0
- rem/services/rem/exceptions.py +71 -0
- rem/services/rem/executor.py +293 -0
- rem/services/rem/parser.py +145 -0
- rem/services/rem/queries.py +196 -0
- rem/services/rem/query.py +371 -0
- rem/services/rem/service.py +527 -0
- rem/services/session/README.md +374 -0
- rem/services/session/__init__.py +6 -0
- rem/services/session/compression.py +360 -0
- rem/services/session/reload.py +77 -0
- rem/settings.py +1235 -0
- rem/sql/002_install_models.sql +1068 -0
- rem/sql/background_indexes.sql +42 -0
- rem/sql/install_models.sql +1038 -0
- rem/sql/migrations/001_install.sql +503 -0
- rem/sql/migrations/002_install_models.sql +1202 -0
- rem/utils/AGENTIC_CHUNKING.md +597 -0
- rem/utils/README.md +583 -0
- rem/utils/__init__.py +43 -0
- rem/utils/agentic_chunking.py +622 -0
- rem/utils/batch_ops.py +343 -0
- rem/utils/chunking.py +108 -0
- rem/utils/clip_embeddings.py +276 -0
- rem/utils/dict_utils.py +98 -0
- rem/utils/embeddings.py +423 -0
- rem/utils/examples/embeddings_example.py +305 -0
- rem/utils/examples/sql_types_example.py +202 -0
- rem/utils/markdown.py +16 -0
- rem/utils/model_helpers.py +236 -0
- rem/utils/schema_loader.py +336 -0
- rem/utils/sql_types.py +348 -0
- rem/utils/user_id.py +81 -0
- rem/utils/vision.py +330 -0
- rem/workers/README.md +506 -0
- rem/workers/__init__.py +5 -0
- rem/workers/dreaming.py +502 -0
- rem/workers/engram_processor.py +312 -0
- rem/workers/sqs_file_processor.py +193 -0
- remdb-0.3.0.dist-info/METADATA +1455 -0
- remdb-0.3.0.dist-info/RECORD +187 -0
- remdb-0.3.0.dist-info/WHEEL +4 -0
- remdb-0.3.0.dist-info/entry_points.txt +2 -0
rem/cli/commands/mcp.py
ADDED
|
@@ -0,0 +1,66 @@
|
|
|
1
|
+
"""
|
|
2
|
+
REM MCP server command.
|
|
3
|
+
|
|
4
|
+
Run the FastMCP server in standalone mode (stdio transport for Claude Desktop).
|
|
5
|
+
|
|
6
|
+
Usage:
|
|
7
|
+
rem mcp # Run MCP server in stdio mode
|
|
8
|
+
rem mcp --http # Run in HTTP mode (for testing)
|
|
9
|
+
"""
|
|
10
|
+
|
|
11
|
+
import click
|
|
12
|
+
from loguru import logger
|
|
13
|
+
|
|
14
|
+
|
|
15
|
+
@click.command("mcp")
|
|
16
|
+
@click.option(
|
|
17
|
+
"--http",
|
|
18
|
+
is_flag=True,
|
|
19
|
+
help="Run in HTTP mode instead of stdio (for testing)",
|
|
20
|
+
)
|
|
21
|
+
@click.option(
|
|
22
|
+
"--host",
|
|
23
|
+
default="0.0.0.0",
|
|
24
|
+
help="Host to bind to (HTTP mode only)",
|
|
25
|
+
)
|
|
26
|
+
@click.option(
|
|
27
|
+
"--port",
|
|
28
|
+
default=8001,
|
|
29
|
+
type=int,
|
|
30
|
+
help="Port to listen on (HTTP mode only)",
|
|
31
|
+
)
|
|
32
|
+
def mcp_command(http: bool, host: str, port: int):
|
|
33
|
+
"""
|
|
34
|
+
Run the REM MCP server.
|
|
35
|
+
|
|
36
|
+
By default, runs in stdio mode for Claude Desktop integration.
|
|
37
|
+
Use --http for testing in HTTP mode.
|
|
38
|
+
|
|
39
|
+
Examples:
|
|
40
|
+
# Stdio mode (for Claude Desktop)
|
|
41
|
+
rem mcp
|
|
42
|
+
|
|
43
|
+
# HTTP mode (for testing)
|
|
44
|
+
rem mcp --http --port 8001
|
|
45
|
+
"""
|
|
46
|
+
from rem.api.mcp_router.server import create_mcp_server
|
|
47
|
+
|
|
48
|
+
# Stdio mode is local (allows local file paths), HTTP mode is remote (s3/https only)
|
|
49
|
+
mcp = create_mcp_server(is_local=not http)
|
|
50
|
+
|
|
51
|
+
if http:
|
|
52
|
+
# HTTP mode for testing
|
|
53
|
+
logger.info(f"Starting REM MCP server in HTTP mode at http://{host}:{port}")
|
|
54
|
+
import uvicorn
|
|
55
|
+
|
|
56
|
+
mcp_app = mcp.get_asgi_app()
|
|
57
|
+
uvicorn.run(mcp_app, host=host, port=port)
|
|
58
|
+
else:
|
|
59
|
+
# Stdio mode for Claude Desktop
|
|
60
|
+
logger.info("Starting REM MCP server in stdio mode (Claude Desktop)")
|
|
61
|
+
mcp.run()
|
|
62
|
+
|
|
63
|
+
|
|
64
|
+
def register_command(cli_group):
|
|
65
|
+
"""Register the mcp command."""
|
|
66
|
+
cli_group.add_command(mcp_command)
|
|
@@ -0,0 +1,245 @@
|
|
|
1
|
+
"""File processing CLI commands."""
|
|
2
|
+
|
|
3
|
+
import json
|
|
4
|
+
import sys
|
|
5
|
+
from typing import Optional
|
|
6
|
+
|
|
7
|
+
import click
|
|
8
|
+
from loguru import logger
|
|
9
|
+
|
|
10
|
+
from rem.services.content import ContentService
|
|
11
|
+
|
|
12
|
+
|
|
13
|
+
@click.command(name="ingest")
|
|
14
|
+
@click.argument("file_path", type=click.Path(exists=True))
|
|
15
|
+
@click.option("--user-id", required=True, help="User ID to own the file")
|
|
16
|
+
@click.option("--category", help="Optional file category")
|
|
17
|
+
@click.option("--tags", help="Optional comma-separated tags")
|
|
18
|
+
def process_ingest(
|
|
19
|
+
file_path: str,
|
|
20
|
+
user_id: str,
|
|
21
|
+
category: str | None,
|
|
22
|
+
tags: str | None,
|
|
23
|
+
):
|
|
24
|
+
"""
|
|
25
|
+
Ingest a file into REM (storage + parsing + embedding).
|
|
26
|
+
|
|
27
|
+
This command performs the full ingestion pipeline:
|
|
28
|
+
1. Reads the file from the local path.
|
|
29
|
+
2. Stores it in the configured storage (local/S3).
|
|
30
|
+
3. Parses the content.
|
|
31
|
+
4. Chunks and embeds the content into Resources.
|
|
32
|
+
5. Creates a File entity record.
|
|
33
|
+
|
|
34
|
+
Examples:
|
|
35
|
+
rem process ingest sample.pdf --user-id user-123
|
|
36
|
+
rem process ingest contract.docx --user-id user-123 --category legal --tags contract,2023
|
|
37
|
+
"""
|
|
38
|
+
import asyncio
|
|
39
|
+
from ...services.content import ContentService
|
|
40
|
+
|
|
41
|
+
async def _ingest():
|
|
42
|
+
# Initialize ContentService with repositories for proper resource saving
|
|
43
|
+
from rem.services.postgres import get_postgres_service
|
|
44
|
+
from rem.services.postgres.repository import Repository
|
|
45
|
+
from rem.models.entities import File, Resource
|
|
46
|
+
|
|
47
|
+
db = get_postgres_service()
|
|
48
|
+
if not db:
|
|
49
|
+
raise RuntimeError("PostgreSQL service not available")
|
|
50
|
+
await db.connect()
|
|
51
|
+
|
|
52
|
+
try:
|
|
53
|
+
file_repo = Repository(File, "files", db=db)
|
|
54
|
+
resource_repo = Repository(Resource, "resources", db=db)
|
|
55
|
+
service = ContentService(file_repo=file_repo, resource_repo=resource_repo)
|
|
56
|
+
|
|
57
|
+
tag_list = tags.split(",") if tags else None
|
|
58
|
+
|
|
59
|
+
logger.info(f"Ingesting file: {file_path} for user: {user_id}")
|
|
60
|
+
result = await service.ingest_file(
|
|
61
|
+
file_uri=file_path,
|
|
62
|
+
user_id=user_id,
|
|
63
|
+
category=category,
|
|
64
|
+
tags=tag_list,
|
|
65
|
+
is_local_server=True, # CLI is local
|
|
66
|
+
)
|
|
67
|
+
|
|
68
|
+
if result.get("processing_status") == "completed":
|
|
69
|
+
logger.success(f"File ingested successfully: {result['file_name']}")
|
|
70
|
+
logger.info(f"File ID: {result['file_id']}")
|
|
71
|
+
logger.info(f"Resources created: {result['resources_created']}")
|
|
72
|
+
logger.info(f"Status: {result['processing_status']}")
|
|
73
|
+
else:
|
|
74
|
+
logger.error(f"Ingestion failed: {result.get('message', 'Unknown error')}")
|
|
75
|
+
sys.exit(1)
|
|
76
|
+
|
|
77
|
+
except Exception as e:
|
|
78
|
+
logger.error(f"Error during ingestion: {e}")
|
|
79
|
+
sys.exit(1)
|
|
80
|
+
finally:
|
|
81
|
+
# Wait for global embedding worker to finish queued tasks
|
|
82
|
+
from rem.services.embeddings.worker import get_global_embedding_worker
|
|
83
|
+
try:
|
|
84
|
+
worker = get_global_embedding_worker()
|
|
85
|
+
if worker and worker.running and not worker.task_queue.empty():
|
|
86
|
+
logger.info(f"Waiting for {worker.task_queue.qsize()} embedding tasks to complete...")
|
|
87
|
+
# Worker.stop() waits for queue to drain (see worker.py line ~148)
|
|
88
|
+
await worker.stop()
|
|
89
|
+
except RuntimeError:
|
|
90
|
+
# Worker doesn't exist yet - no tasks queued
|
|
91
|
+
pass
|
|
92
|
+
|
|
93
|
+
await db.disconnect()
|
|
94
|
+
|
|
95
|
+
asyncio.run(_ingest())
|
|
96
|
+
|
|
97
|
+
def register_commands(group: click.Group):
|
|
98
|
+
"""Register process commands."""
|
|
99
|
+
group.add_command(process_uri)
|
|
100
|
+
group.add_command(process_files)
|
|
101
|
+
group.add_command(process_ingest)
|
|
102
|
+
|
|
103
|
+
|
|
104
|
+
@click.command(name="uri")
|
|
105
|
+
@click.argument("uri", type=str)
|
|
106
|
+
@click.option(
|
|
107
|
+
"--output",
|
|
108
|
+
"-o",
|
|
109
|
+
type=click.Choice(["json", "text"]),
|
|
110
|
+
default="json",
|
|
111
|
+
help="Output format (json or text)",
|
|
112
|
+
)
|
|
113
|
+
@click.option(
|
|
114
|
+
"--save",
|
|
115
|
+
"-s",
|
|
116
|
+
type=click.Path(),
|
|
117
|
+
help="Save extracted content to file",
|
|
118
|
+
)
|
|
119
|
+
def process_uri(uri: str, output: str, save: str | None):
|
|
120
|
+
"""
|
|
121
|
+
Process a file URI and extract content (READ-ONLY, no storage).
|
|
122
|
+
|
|
123
|
+
**ARCHITECTURE NOTE - Code Path Comparison**:
|
|
124
|
+
|
|
125
|
+
This CLI command provides READ-ONLY file processing:
|
|
126
|
+
- Uses ContentService.process_uri() directly (no file storage, no DB writes)
|
|
127
|
+
- Returns extracted content to stdout or saves to local file
|
|
128
|
+
- No File entity created, no Resource chunks stored in database
|
|
129
|
+
- Useful for testing file parsing without side effects
|
|
130
|
+
|
|
131
|
+
Compare with MCP tool 'parse_and_ingest_file' (api/mcp_router/tools.py):
|
|
132
|
+
- WRITES file to internal storage (~/.rem/fs/ or S3)
|
|
133
|
+
- Creates File entity in database
|
|
134
|
+
- Creates Resource chunks via ContentService.process_and_save()
|
|
135
|
+
- Full ingestion pipeline for searchable content
|
|
136
|
+
|
|
137
|
+
**SHARED CODE**: Both use ContentService for file parsing:
|
|
138
|
+
- CLI: ContentService.process_uri() → extract only
|
|
139
|
+
- MCP: ContentService.process_and_save() → extract + store chunks
|
|
140
|
+
|
|
141
|
+
URI can be:
|
|
142
|
+
- S3 URI: s3://bucket/key
|
|
143
|
+
- Local file: /path/to/file.md or ./file.md
|
|
144
|
+
|
|
145
|
+
Examples:
|
|
146
|
+
|
|
147
|
+
\b
|
|
148
|
+
# Process local markdown file
|
|
149
|
+
rem process uri ./README.md
|
|
150
|
+
|
|
151
|
+
\b
|
|
152
|
+
# Process S3 file
|
|
153
|
+
rem process uri s3://rem/uploads/document.md
|
|
154
|
+
|
|
155
|
+
\b
|
|
156
|
+
# Save to file
|
|
157
|
+
rem process uri s3://rem/uploads/doc.md -s output.json
|
|
158
|
+
|
|
159
|
+
\b
|
|
160
|
+
# Text-only output
|
|
161
|
+
rem process uri ./file.md -o text
|
|
162
|
+
"""
|
|
163
|
+
try:
|
|
164
|
+
service = ContentService()
|
|
165
|
+
result = service.process_uri(uri)
|
|
166
|
+
|
|
167
|
+
if output == "json":
|
|
168
|
+
output_data = json.dumps(result, indent=2, default=str)
|
|
169
|
+
else:
|
|
170
|
+
# Text-only output
|
|
171
|
+
output_data = result["content"]
|
|
172
|
+
|
|
173
|
+
# Save to file or print to stdout
|
|
174
|
+
if save:
|
|
175
|
+
with open(save, "w") as f:
|
|
176
|
+
f.write(output_data)
|
|
177
|
+
logger.info(f"Saved to {save}")
|
|
178
|
+
else:
|
|
179
|
+
click.echo(output_data)
|
|
180
|
+
|
|
181
|
+
sys.exit(0)
|
|
182
|
+
|
|
183
|
+
except FileNotFoundError as e:
|
|
184
|
+
logger.error(f"File not found: {e}")
|
|
185
|
+
sys.exit(1)
|
|
186
|
+
except RuntimeError as e:
|
|
187
|
+
logger.error(f"Processing error: {e}")
|
|
188
|
+
sys.exit(1)
|
|
189
|
+
except Exception as e:
|
|
190
|
+
logger.exception(f"Unexpected error: {e}")
|
|
191
|
+
sys.exit(1)
|
|
192
|
+
|
|
193
|
+
|
|
194
|
+
@click.command(name="files")
|
|
195
|
+
@click.option("--tenant-id", required=True, help="Tenant ID")
|
|
196
|
+
@click.option("--user-id", help="Filter by user ID")
|
|
197
|
+
@click.option("--status", type=click.Choice(["pending", "processing", "completed", "failed"]), help="Filter by status")
|
|
198
|
+
@click.option("--extractor", help="Run files through custom extractor (e.g., cv-parser-v1)")
|
|
199
|
+
@click.option("--limit", type=int, help="Max files to process")
|
|
200
|
+
@click.option("--provider", help="Optional LLM provider override")
|
|
201
|
+
@click.option("--model", help="Optional model override")
|
|
202
|
+
def process_files(
|
|
203
|
+
tenant_id: str,
|
|
204
|
+
user_id: Optional[str],
|
|
205
|
+
status: Optional[str],
|
|
206
|
+
extractor: Optional[str],
|
|
207
|
+
limit: Optional[int],
|
|
208
|
+
provider: Optional[str],
|
|
209
|
+
model: Optional[str],
|
|
210
|
+
):
|
|
211
|
+
"""Process files with optional custom extractor.
|
|
212
|
+
|
|
213
|
+
Query files from the database and optionally run them through
|
|
214
|
+
a custom extractor to extract domain-specific knowledge.
|
|
215
|
+
|
|
216
|
+
Examples:
|
|
217
|
+
|
|
218
|
+
\b
|
|
219
|
+
# List completed files
|
|
220
|
+
rem process files --tenant-id acme-corp --status completed
|
|
221
|
+
|
|
222
|
+
\b
|
|
223
|
+
# Extract from CV files
|
|
224
|
+
rem process files --tenant-id acme-corp --extractor cv-parser-v1 --limit 10
|
|
225
|
+
|
|
226
|
+
\b
|
|
227
|
+
# Extract with provider override
|
|
228
|
+
rem process files --tenant-id acme-corp --extractor contract-analyzer-v1 \\
|
|
229
|
+
--provider anthropic --model claude-sonnet-4-5
|
|
230
|
+
"""
|
|
231
|
+
logger.warning("Not implemented yet")
|
|
232
|
+
logger.info(f"Would process files for tenant: {tenant_id}")
|
|
233
|
+
|
|
234
|
+
if user_id:
|
|
235
|
+
logger.info(f"Filter: user_id={user_id}")
|
|
236
|
+
if status:
|
|
237
|
+
logger.info(f"Filter: status={status}")
|
|
238
|
+
if extractor:
|
|
239
|
+
logger.info(f"Extractor: {extractor}")
|
|
240
|
+
if limit:
|
|
241
|
+
logger.info(f"Limit: {limit} files")
|
|
242
|
+
if provider:
|
|
243
|
+
logger.info(f"Provider override: {provider}")
|
|
244
|
+
if model:
|
|
245
|
+
logger.info(f"Model override: {model}")
|
|
@@ -0,0 +1,183 @@
|
|
|
1
|
+
"""
|
|
2
|
+
Schema generation commands.
|
|
3
|
+
|
|
4
|
+
Usage:
|
|
5
|
+
rem db schema generate --models src/rem/models/entities
|
|
6
|
+
rem db schema validate
|
|
7
|
+
rem db schema indexes --background
|
|
8
|
+
"""
|
|
9
|
+
|
|
10
|
+
import asyncio
|
|
11
|
+
from pathlib import Path
|
|
12
|
+
|
|
13
|
+
import click
|
|
14
|
+
from loguru import logger
|
|
15
|
+
|
|
16
|
+
from ...settings import settings
|
|
17
|
+
from ...services.postgres.schema_generator import SchemaGenerator
|
|
18
|
+
|
|
19
|
+
|
|
20
|
+
@click.command()
|
|
21
|
+
@click.option(
|
|
22
|
+
"--models",
|
|
23
|
+
"-m",
|
|
24
|
+
required=True,
|
|
25
|
+
type=click.Path(exists=True, path_type=Path),
|
|
26
|
+
help="Directory containing Pydantic models",
|
|
27
|
+
)
|
|
28
|
+
@click.option(
|
|
29
|
+
"--output",
|
|
30
|
+
"-o",
|
|
31
|
+
type=click.Path(path_type=Path),
|
|
32
|
+
default="install_models.sql",
|
|
33
|
+
help="Output SQL file (default: install_models.sql)",
|
|
34
|
+
)
|
|
35
|
+
@click.option(
|
|
36
|
+
"--output-dir",
|
|
37
|
+
type=click.Path(path_type=Path),
|
|
38
|
+
default=None,
|
|
39
|
+
help=f"Base output directory (default: {settings.sql_dir})",
|
|
40
|
+
)
|
|
41
|
+
def generate(models: Path, output: Path, output_dir: Path | None):
|
|
42
|
+
"""
|
|
43
|
+
Generate database schema from Pydantic models.
|
|
44
|
+
|
|
45
|
+
Scans the specified directory for Pydantic models and generates:
|
|
46
|
+
- CREATE TABLE statements
|
|
47
|
+
- Embeddings tables (embeddings_<table>)
|
|
48
|
+
- KV_STORE triggers for cache maintenance
|
|
49
|
+
- Indexes (foreground only)
|
|
50
|
+
|
|
51
|
+
Output is written to src/rem/sql/install_models.sql by default.
|
|
52
|
+
|
|
53
|
+
Example:
|
|
54
|
+
rem db schema generate --models src/rem/models/entities
|
|
55
|
+
|
|
56
|
+
This creates:
|
|
57
|
+
- src/rem/sql/install_models.sql - Entity tables and triggers
|
|
58
|
+
- src/rem/sql/background_indexes.sql - HNSW indexes (apply after data load)
|
|
59
|
+
|
|
60
|
+
After generation, apply with:
|
|
61
|
+
rem db migrate
|
|
62
|
+
"""
|
|
63
|
+
click.echo(f"Discovering models in {models}")
|
|
64
|
+
|
|
65
|
+
# Use settings.sql_dir if not provided
|
|
66
|
+
actual_output_dir = output_dir or Path(settings.sql_dir)
|
|
67
|
+
generator = SchemaGenerator(output_dir=actual_output_dir)
|
|
68
|
+
|
|
69
|
+
# Generate schema
|
|
70
|
+
try:
|
|
71
|
+
schema_sql = asyncio.run(generator.generate_from_directory(models, output_file=output.name))
|
|
72
|
+
|
|
73
|
+
click.echo(f"✓ Schema generated: {len(generator.schemas)} tables")
|
|
74
|
+
click.echo(f"✓ Written to: {actual_output_dir / output.name}")
|
|
75
|
+
|
|
76
|
+
# Generate background indexes
|
|
77
|
+
background_indexes = generator.generate_background_indexes()
|
|
78
|
+
if background_indexes:
|
|
79
|
+
bg_file = actual_output_dir / "background_indexes.sql"
|
|
80
|
+
bg_file.write_text(background_indexes)
|
|
81
|
+
click.echo(f"✓ Background indexes: {bg_file}")
|
|
82
|
+
|
|
83
|
+
# Summary
|
|
84
|
+
click.echo("\nGenerated tables:")
|
|
85
|
+
for table_name, schema in generator.schemas.items():
|
|
86
|
+
embeddable = len(schema.get("embeddable_fields", []))
|
|
87
|
+
embed_status = f"({embeddable} embeddable fields)" if embeddable else "(no embeddings)"
|
|
88
|
+
click.echo(f" - {table_name} {embed_status}")
|
|
89
|
+
|
|
90
|
+
except Exception as e:
|
|
91
|
+
logger.exception("Schema generation failed")
|
|
92
|
+
click.echo(f"✗ Error: {e}", err=True)
|
|
93
|
+
raise click.Abort()
|
|
94
|
+
|
|
95
|
+
|
|
96
|
+
@click.command()
|
|
97
|
+
@click.option(
|
|
98
|
+
"--models",
|
|
99
|
+
"-m",
|
|
100
|
+
required=True,
|
|
101
|
+
type=click.Path(exists=True, path_type=Path),
|
|
102
|
+
help="Directory containing Pydantic models",
|
|
103
|
+
)
|
|
104
|
+
def validate(models: Path):
|
|
105
|
+
"""
|
|
106
|
+
Validate Pydantic models for schema generation.
|
|
107
|
+
|
|
108
|
+
Checks:
|
|
109
|
+
- Models can be loaded
|
|
110
|
+
- Models have suitable entity_key fields
|
|
111
|
+
- Fields with embeddings are properly configured
|
|
112
|
+
"""
|
|
113
|
+
click.echo(f"Validating models in {models}")
|
|
114
|
+
|
|
115
|
+
generator = SchemaGenerator()
|
|
116
|
+
discovered = generator.discover_models(models)
|
|
117
|
+
|
|
118
|
+
if not discovered:
|
|
119
|
+
click.echo("✗ No models found", err=True)
|
|
120
|
+
raise click.Abort()
|
|
121
|
+
|
|
122
|
+
click.echo(f"✓ Discovered {len(discovered)} models")
|
|
123
|
+
|
|
124
|
+
errors: list[str] = []
|
|
125
|
+
warnings: list[str] = []
|
|
126
|
+
|
|
127
|
+
for model_name, model in discovered.items():
|
|
128
|
+
table_name = generator.infer_table_name(model)
|
|
129
|
+
entity_key = generator.infer_entity_key_field(model)
|
|
130
|
+
|
|
131
|
+
# Check for entity_key
|
|
132
|
+
if entity_key == "id":
|
|
133
|
+
warnings.append(f"{model_name}: No natural key field, using 'id'")
|
|
134
|
+
|
|
135
|
+
# Check for embeddable fields
|
|
136
|
+
# TODO: Implement should_embed_field check
|
|
137
|
+
embeddable: list[str] = [] # Placeholder - needs implementation
|
|
138
|
+
|
|
139
|
+
click.echo(f" {model_name} -> {table_name} (key: {entity_key})")
|
|
140
|
+
|
|
141
|
+
if warnings:
|
|
142
|
+
click.echo("\nWarnings:")
|
|
143
|
+
for warning in warnings:
|
|
144
|
+
click.echo(click.style(f" ⚠ {warning}", fg="yellow"))
|
|
145
|
+
|
|
146
|
+
if errors:
|
|
147
|
+
click.echo("\nErrors:")
|
|
148
|
+
for error in errors:
|
|
149
|
+
click.echo(click.style(f" ✗ {error}", fg="red"))
|
|
150
|
+
raise click.Abort()
|
|
151
|
+
|
|
152
|
+
click.echo("\n✓ All models valid")
|
|
153
|
+
|
|
154
|
+
|
|
155
|
+
@click.command()
|
|
156
|
+
@click.option(
|
|
157
|
+
"--output",
|
|
158
|
+
"-o",
|
|
159
|
+
type=click.Path(path_type=Path),
|
|
160
|
+
default=None,
|
|
161
|
+
help=f"Output file for background indexes (default: {settings.sql_dir}/background_indexes.sql)",
|
|
162
|
+
)
|
|
163
|
+
def indexes(output: Path):
|
|
164
|
+
"""
|
|
165
|
+
Generate SQL for background index creation.
|
|
166
|
+
|
|
167
|
+
Creates HNSW vector indexes that should be run CONCURRENTLY
|
|
168
|
+
after initial data load to avoid blocking writes.
|
|
169
|
+
"""
|
|
170
|
+
click.echo("Generating background index SQL")
|
|
171
|
+
|
|
172
|
+
generator = SchemaGenerator()
|
|
173
|
+
|
|
174
|
+
# Load existing schemas (would need to be persisted or regenerated)
|
|
175
|
+
click.echo(click.style("⚠ Note: This requires schemas to be generated first", fg="yellow"))
|
|
176
|
+
click.echo(click.style("⚠ Run 'rem db schema generate' before 'rem db schema indexes'", fg="yellow"))
|
|
177
|
+
|
|
178
|
+
|
|
179
|
+
def register_commands(schema_group):
|
|
180
|
+
"""Register all schema commands."""
|
|
181
|
+
schema_group.add_command(generate)
|
|
182
|
+
schema_group.add_command(validate)
|
|
183
|
+
schema_group.add_command(indexes)
|
|
@@ -0,0 +1,106 @@
|
|
|
1
|
+
"""
|
|
2
|
+
REM API server command.
|
|
3
|
+
|
|
4
|
+
Start the FastAPI server with uvicorn.
|
|
5
|
+
|
|
6
|
+
Usage:
|
|
7
|
+
rem serve # Start API server with default settings
|
|
8
|
+
rem serve --host 0.0.0.0 # Bind to all interfaces
|
|
9
|
+
rem serve --port 8080 # Use custom port
|
|
10
|
+
rem serve --reload # Enable auto-reload (development)
|
|
11
|
+
rem serve --workers 4 # Production mode with workers
|
|
12
|
+
"""
|
|
13
|
+
|
|
14
|
+
import click
|
|
15
|
+
from loguru import logger
|
|
16
|
+
|
|
17
|
+
|
|
18
|
+
@click.command("serve")
|
|
19
|
+
@click.option(
|
|
20
|
+
"--host",
|
|
21
|
+
default=None,
|
|
22
|
+
help="Host to bind to (overrides config/env)",
|
|
23
|
+
)
|
|
24
|
+
@click.option(
|
|
25
|
+
"--port",
|
|
26
|
+
default=None,
|
|
27
|
+
type=int,
|
|
28
|
+
help="Port to listen on (overrides config/env)",
|
|
29
|
+
)
|
|
30
|
+
@click.option(
|
|
31
|
+
"--reload",
|
|
32
|
+
is_flag=True,
|
|
33
|
+
default=None,
|
|
34
|
+
help="Enable auto-reload for development",
|
|
35
|
+
)
|
|
36
|
+
@click.option(
|
|
37
|
+
"--workers",
|
|
38
|
+
default=None,
|
|
39
|
+
type=int,
|
|
40
|
+
help="Number of worker processes (production mode)",
|
|
41
|
+
)
|
|
42
|
+
@click.option(
|
|
43
|
+
"--log-level",
|
|
44
|
+
default=None,
|
|
45
|
+
type=click.Choice(["critical", "error", "warning", "info", "debug", "trace"]),
|
|
46
|
+
help="Logging level",
|
|
47
|
+
)
|
|
48
|
+
def serve_command(
|
|
49
|
+
host: str | None,
|
|
50
|
+
port: int | None,
|
|
51
|
+
reload: bool | None,
|
|
52
|
+
workers: int | None,
|
|
53
|
+
log_level: str | None,
|
|
54
|
+
):
|
|
55
|
+
"""
|
|
56
|
+
Start the REM API server.
|
|
57
|
+
|
|
58
|
+
The server will load configuration from:
|
|
59
|
+
1. Environment variables (highest priority)
|
|
60
|
+
2. ~/.rem/config.yaml (user configuration)
|
|
61
|
+
3. Default values (lowest priority)
|
|
62
|
+
|
|
63
|
+
Examples:
|
|
64
|
+
rem serve # Use settings from config
|
|
65
|
+
rem serve --host 0.0.0.0 # Bind to all interfaces
|
|
66
|
+
rem serve --reload # Development mode
|
|
67
|
+
rem serve --workers 4 # Production mode (4 workers)
|
|
68
|
+
"""
|
|
69
|
+
import uvicorn
|
|
70
|
+
|
|
71
|
+
# Import settings to trigger config loading
|
|
72
|
+
from rem.settings import settings
|
|
73
|
+
|
|
74
|
+
# Determine reload/workers (mutually exclusive)
|
|
75
|
+
use_reload = reload if reload is not None else (settings.api.reload if workers is None else False)
|
|
76
|
+
use_workers = workers if workers is not None else (settings.api.workers if not use_reload else None)
|
|
77
|
+
|
|
78
|
+
# Start server
|
|
79
|
+
final_host = host or settings.api.host
|
|
80
|
+
final_port = port or settings.api.port
|
|
81
|
+
final_log_level = log_level or settings.api.log_level
|
|
82
|
+
|
|
83
|
+
logger.info(f"Starting REM API server at http://{final_host}:{final_port}")
|
|
84
|
+
|
|
85
|
+
# Call uvicorn.run with explicit parameters to satisfy type checker
|
|
86
|
+
if use_reload:
|
|
87
|
+
uvicorn.run(
|
|
88
|
+
"rem.api.main:app",
|
|
89
|
+
host=final_host,
|
|
90
|
+
port=final_port,
|
|
91
|
+
log_level=final_log_level,
|
|
92
|
+
reload=True,
|
|
93
|
+
)
|
|
94
|
+
else:
|
|
95
|
+
uvicorn.run(
|
|
96
|
+
"rem.api.main:app",
|
|
97
|
+
host=final_host,
|
|
98
|
+
port=final_port,
|
|
99
|
+
log_level=final_log_level,
|
|
100
|
+
workers=use_workers,
|
|
101
|
+
)
|
|
102
|
+
|
|
103
|
+
|
|
104
|
+
def register_command(cli_group):
|
|
105
|
+
"""Register the serve command."""
|
|
106
|
+
cli_group.add_command(serve_command)
|