remdb 0.3.242__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of remdb might be problematic. Click here for more details.
- rem/__init__.py +129 -0
- rem/agentic/README.md +760 -0
- rem/agentic/__init__.py +54 -0
- rem/agentic/agents/README.md +155 -0
- rem/agentic/agents/__init__.py +38 -0
- rem/agentic/agents/agent_manager.py +311 -0
- rem/agentic/agents/sse_simulator.py +502 -0
- rem/agentic/context.py +425 -0
- rem/agentic/context_builder.py +360 -0
- rem/agentic/llm_provider_models.py +301 -0
- rem/agentic/mcp/__init__.py +0 -0
- rem/agentic/mcp/tool_wrapper.py +273 -0
- rem/agentic/otel/__init__.py +5 -0
- rem/agentic/otel/setup.py +240 -0
- rem/agentic/providers/phoenix.py +926 -0
- rem/agentic/providers/pydantic_ai.py +854 -0
- rem/agentic/query.py +117 -0
- rem/agentic/query_helper.py +89 -0
- rem/agentic/schema.py +737 -0
- rem/agentic/serialization.py +245 -0
- rem/agentic/tools/__init__.py +5 -0
- rem/agentic/tools/rem_tools.py +242 -0
- rem/api/README.md +657 -0
- rem/api/deps.py +253 -0
- rem/api/main.py +460 -0
- rem/api/mcp_router/prompts.py +182 -0
- rem/api/mcp_router/resources.py +820 -0
- rem/api/mcp_router/server.py +243 -0
- rem/api/mcp_router/tools.py +1605 -0
- rem/api/middleware/tracking.py +172 -0
- rem/api/routers/admin.py +520 -0
- rem/api/routers/auth.py +898 -0
- rem/api/routers/chat/__init__.py +5 -0
- rem/api/routers/chat/child_streaming.py +394 -0
- rem/api/routers/chat/completions.py +702 -0
- rem/api/routers/chat/json_utils.py +76 -0
- rem/api/routers/chat/models.py +202 -0
- rem/api/routers/chat/otel_utils.py +33 -0
- rem/api/routers/chat/sse_events.py +546 -0
- rem/api/routers/chat/streaming.py +950 -0
- rem/api/routers/chat/streaming_utils.py +327 -0
- rem/api/routers/common.py +18 -0
- rem/api/routers/dev.py +87 -0
- rem/api/routers/feedback.py +276 -0
- rem/api/routers/messages.py +620 -0
- rem/api/routers/models.py +86 -0
- rem/api/routers/query.py +362 -0
- rem/api/routers/shared_sessions.py +422 -0
- rem/auth/README.md +258 -0
- rem/auth/__init__.py +36 -0
- rem/auth/jwt.py +367 -0
- rem/auth/middleware.py +318 -0
- rem/auth/providers/__init__.py +16 -0
- rem/auth/providers/base.py +376 -0
- rem/auth/providers/email.py +215 -0
- rem/auth/providers/google.py +163 -0
- rem/auth/providers/microsoft.py +237 -0
- rem/cli/README.md +517 -0
- rem/cli/__init__.py +8 -0
- rem/cli/commands/README.md +299 -0
- rem/cli/commands/__init__.py +3 -0
- rem/cli/commands/ask.py +549 -0
- rem/cli/commands/cluster.py +1808 -0
- rem/cli/commands/configure.py +495 -0
- rem/cli/commands/db.py +828 -0
- rem/cli/commands/dreaming.py +324 -0
- rem/cli/commands/experiments.py +1698 -0
- rem/cli/commands/mcp.py +66 -0
- rem/cli/commands/process.py +388 -0
- rem/cli/commands/query.py +109 -0
- rem/cli/commands/scaffold.py +47 -0
- rem/cli/commands/schema.py +230 -0
- rem/cli/commands/serve.py +106 -0
- rem/cli/commands/session.py +453 -0
- rem/cli/dreaming.py +363 -0
- rem/cli/main.py +123 -0
- rem/config.py +244 -0
- rem/mcp_server.py +41 -0
- rem/models/core/__init__.py +49 -0
- rem/models/core/core_model.py +70 -0
- rem/models/core/engram.py +333 -0
- rem/models/core/experiment.py +672 -0
- rem/models/core/inline_edge.py +132 -0
- rem/models/core/rem_query.py +246 -0
- rem/models/entities/__init__.py +68 -0
- rem/models/entities/domain_resource.py +38 -0
- rem/models/entities/feedback.py +123 -0
- rem/models/entities/file.py +57 -0
- rem/models/entities/image_resource.py +88 -0
- rem/models/entities/message.py +64 -0
- rem/models/entities/moment.py +123 -0
- rem/models/entities/ontology.py +181 -0
- rem/models/entities/ontology_config.py +131 -0
- rem/models/entities/resource.py +95 -0
- rem/models/entities/schema.py +87 -0
- rem/models/entities/session.py +84 -0
- rem/models/entities/shared_session.py +180 -0
- rem/models/entities/subscriber.py +175 -0
- rem/models/entities/user.py +93 -0
- rem/py.typed +0 -0
- rem/registry.py +373 -0
- rem/schemas/README.md +507 -0
- rem/schemas/__init__.py +6 -0
- rem/schemas/agents/README.md +92 -0
- rem/schemas/agents/core/agent-builder.yaml +235 -0
- rem/schemas/agents/core/moment-builder.yaml +178 -0
- rem/schemas/agents/core/rem-query-agent.yaml +226 -0
- rem/schemas/agents/core/resource-affinity-assessor.yaml +99 -0
- rem/schemas/agents/core/simple-assistant.yaml +19 -0
- rem/schemas/agents/core/user-profile-builder.yaml +163 -0
- rem/schemas/agents/examples/contract-analyzer.yaml +317 -0
- rem/schemas/agents/examples/contract-extractor.yaml +134 -0
- rem/schemas/agents/examples/cv-parser.yaml +263 -0
- rem/schemas/agents/examples/hello-world.yaml +37 -0
- rem/schemas/agents/examples/query.yaml +54 -0
- rem/schemas/agents/examples/simple.yaml +21 -0
- rem/schemas/agents/examples/test.yaml +29 -0
- rem/schemas/agents/rem.yaml +132 -0
- rem/schemas/evaluators/hello-world/default.yaml +77 -0
- rem/schemas/evaluators/rem/faithfulness.yaml +219 -0
- rem/schemas/evaluators/rem/lookup-correctness.yaml +182 -0
- rem/schemas/evaluators/rem/retrieval-precision.yaml +199 -0
- rem/schemas/evaluators/rem/retrieval-recall.yaml +211 -0
- rem/schemas/evaluators/rem/search-correctness.yaml +192 -0
- rem/services/__init__.py +18 -0
- rem/services/audio/INTEGRATION.md +308 -0
- rem/services/audio/README.md +376 -0
- rem/services/audio/__init__.py +15 -0
- rem/services/audio/chunker.py +354 -0
- rem/services/audio/transcriber.py +259 -0
- rem/services/content/README.md +1269 -0
- rem/services/content/__init__.py +5 -0
- rem/services/content/providers.py +760 -0
- rem/services/content/service.py +762 -0
- rem/services/dreaming/README.md +230 -0
- rem/services/dreaming/__init__.py +53 -0
- rem/services/dreaming/affinity_service.py +322 -0
- rem/services/dreaming/moment_service.py +251 -0
- rem/services/dreaming/ontology_service.py +54 -0
- rem/services/dreaming/user_model_service.py +297 -0
- rem/services/dreaming/utils.py +39 -0
- rem/services/email/__init__.py +10 -0
- rem/services/email/service.py +522 -0
- rem/services/email/templates.py +360 -0
- rem/services/embeddings/__init__.py +11 -0
- rem/services/embeddings/api.py +127 -0
- rem/services/embeddings/worker.py +435 -0
- rem/services/fs/README.md +662 -0
- rem/services/fs/__init__.py +62 -0
- rem/services/fs/examples.py +206 -0
- rem/services/fs/examples_paths.py +204 -0
- rem/services/fs/git_provider.py +935 -0
- rem/services/fs/local_provider.py +760 -0
- rem/services/fs/parsing-hooks-examples.md +172 -0
- rem/services/fs/paths.py +276 -0
- rem/services/fs/provider.py +460 -0
- rem/services/fs/s3_provider.py +1042 -0
- rem/services/fs/service.py +186 -0
- rem/services/git/README.md +1075 -0
- rem/services/git/__init__.py +17 -0
- rem/services/git/service.py +469 -0
- rem/services/phoenix/EXPERIMENT_DESIGN.md +1146 -0
- rem/services/phoenix/README.md +453 -0
- rem/services/phoenix/__init__.py +46 -0
- rem/services/phoenix/client.py +960 -0
- rem/services/phoenix/config.py +88 -0
- rem/services/phoenix/prompt_labels.py +477 -0
- rem/services/postgres/README.md +757 -0
- rem/services/postgres/__init__.py +49 -0
- rem/services/postgres/diff_service.py +599 -0
- rem/services/postgres/migration_service.py +427 -0
- rem/services/postgres/programmable_diff_service.py +635 -0
- rem/services/postgres/pydantic_to_sqlalchemy.py +562 -0
- rem/services/postgres/register_type.py +353 -0
- rem/services/postgres/repository.py +481 -0
- rem/services/postgres/schema_generator.py +661 -0
- rem/services/postgres/service.py +802 -0
- rem/services/postgres/sql_builder.py +355 -0
- rem/services/rate_limit.py +113 -0
- rem/services/rem/README.md +318 -0
- rem/services/rem/__init__.py +23 -0
- rem/services/rem/exceptions.py +71 -0
- rem/services/rem/executor.py +293 -0
- rem/services/rem/parser.py +180 -0
- rem/services/rem/queries.py +196 -0
- rem/services/rem/query.py +371 -0
- rem/services/rem/service.py +608 -0
- rem/services/session/README.md +374 -0
- rem/services/session/__init__.py +13 -0
- rem/services/session/compression.py +488 -0
- rem/services/session/pydantic_messages.py +310 -0
- rem/services/session/reload.py +85 -0
- rem/services/user_service.py +130 -0
- rem/settings.py +1877 -0
- rem/sql/background_indexes.sql +52 -0
- rem/sql/migrations/001_install.sql +983 -0
- rem/sql/migrations/002_install_models.sql +3157 -0
- rem/sql/migrations/003_optional_extensions.sql +326 -0
- rem/sql/migrations/004_cache_system.sql +282 -0
- rem/sql/migrations/005_schema_update.sql +145 -0
- rem/sql/migrations/migrate_session_id_to_uuid.sql +45 -0
- rem/utils/AGENTIC_CHUNKING.md +597 -0
- rem/utils/README.md +628 -0
- rem/utils/__init__.py +61 -0
- rem/utils/agentic_chunking.py +622 -0
- rem/utils/batch_ops.py +343 -0
- rem/utils/chunking.py +108 -0
- rem/utils/clip_embeddings.py +276 -0
- rem/utils/constants.py +97 -0
- rem/utils/date_utils.py +228 -0
- rem/utils/dict_utils.py +98 -0
- rem/utils/embeddings.py +436 -0
- rem/utils/examples/embeddings_example.py +305 -0
- rem/utils/examples/sql_types_example.py +202 -0
- rem/utils/files.py +323 -0
- rem/utils/markdown.py +16 -0
- rem/utils/mime_types.py +158 -0
- rem/utils/model_helpers.py +492 -0
- rem/utils/schema_loader.py +649 -0
- rem/utils/sql_paths.py +146 -0
- rem/utils/sql_types.py +350 -0
- rem/utils/user_id.py +81 -0
- rem/utils/vision.py +325 -0
- rem/workers/README.md +506 -0
- rem/workers/__init__.py +7 -0
- rem/workers/db_listener.py +579 -0
- rem/workers/db_maintainer.py +74 -0
- rem/workers/dreaming.py +502 -0
- rem/workers/engram_processor.py +312 -0
- rem/workers/sqs_file_processor.py +193 -0
- rem/workers/unlogged_maintainer.py +463 -0
- remdb-0.3.242.dist-info/METADATA +1632 -0
- remdb-0.3.242.dist-info/RECORD +235 -0
- remdb-0.3.242.dist-info/WHEEL +4 -0
- remdb-0.3.242.dist-info/entry_points.txt +2 -0
rem/cli/commands/mcp.py
ADDED
|
@@ -0,0 +1,66 @@
|
|
|
1
|
+
"""
|
|
2
|
+
REM MCP server command.
|
|
3
|
+
|
|
4
|
+
Run the FastMCP server in standalone mode (stdio transport for Claude Desktop).
|
|
5
|
+
|
|
6
|
+
Usage:
|
|
7
|
+
rem mcp # Run MCP server in stdio mode
|
|
8
|
+
rem mcp --http # Run in HTTP mode (for testing)
|
|
9
|
+
"""
|
|
10
|
+
|
|
11
|
+
import click
|
|
12
|
+
from loguru import logger
|
|
13
|
+
|
|
14
|
+
|
|
15
|
+
@click.command("mcp")
|
|
16
|
+
@click.option(
|
|
17
|
+
"--http",
|
|
18
|
+
is_flag=True,
|
|
19
|
+
help="Run in HTTP mode instead of stdio (for testing)",
|
|
20
|
+
)
|
|
21
|
+
@click.option(
|
|
22
|
+
"--host",
|
|
23
|
+
default="0.0.0.0",
|
|
24
|
+
help="Host to bind to (HTTP mode only)",
|
|
25
|
+
)
|
|
26
|
+
@click.option(
|
|
27
|
+
"--port",
|
|
28
|
+
default=8001,
|
|
29
|
+
type=int,
|
|
30
|
+
help="Port to listen on (HTTP mode only)",
|
|
31
|
+
)
|
|
32
|
+
def mcp_command(http: bool, host: str, port: int):
|
|
33
|
+
"""
|
|
34
|
+
Run the REM MCP server.
|
|
35
|
+
|
|
36
|
+
By default, runs in stdio mode for Claude Desktop integration.
|
|
37
|
+
Use --http for testing in HTTP mode.
|
|
38
|
+
|
|
39
|
+
Examples:
|
|
40
|
+
# Stdio mode (for Claude Desktop)
|
|
41
|
+
rem mcp
|
|
42
|
+
|
|
43
|
+
# HTTP mode (for testing)
|
|
44
|
+
rem mcp --http --port 8001
|
|
45
|
+
"""
|
|
46
|
+
from rem.api.mcp_router.server import create_mcp_server
|
|
47
|
+
|
|
48
|
+
# Stdio mode is local (allows local file paths), HTTP mode is remote (s3/https only)
|
|
49
|
+
mcp = create_mcp_server(is_local=not http)
|
|
50
|
+
|
|
51
|
+
if http:
|
|
52
|
+
# HTTP mode for testing
|
|
53
|
+
logger.info(f"Starting REM MCP server in HTTP mode at http://{host}:{port}")
|
|
54
|
+
import uvicorn
|
|
55
|
+
|
|
56
|
+
mcp_app = mcp.get_asgi_app()
|
|
57
|
+
uvicorn.run(mcp_app, host=host, port=port)
|
|
58
|
+
else:
|
|
59
|
+
# Stdio mode for Claude Desktop
|
|
60
|
+
logger.info("Starting REM MCP server in stdio mode (Claude Desktop)")
|
|
61
|
+
mcp.run()
|
|
62
|
+
|
|
63
|
+
|
|
64
|
+
def register_command(cli_group):
|
|
65
|
+
"""Register the mcp command."""
|
|
66
|
+
cli_group.add_command(mcp_command)
|
|
@@ -0,0 +1,388 @@
|
|
|
1
|
+
"""File processing CLI commands."""
|
|
2
|
+
|
|
3
|
+
import json
|
|
4
|
+
import sys
|
|
5
|
+
from typing import Optional
|
|
6
|
+
|
|
7
|
+
import click
|
|
8
|
+
from loguru import logger
|
|
9
|
+
|
|
10
|
+
from rem.services.content import ContentService
|
|
11
|
+
|
|
12
|
+
|
|
13
|
+
@click.command(name="ingest")
|
|
14
|
+
@click.argument("path", type=click.Path(exists=True))
|
|
15
|
+
@click.option("--table", "-t", default=None, help="Target table (e.g., ontologies, resources). Auto-detected for schemas.")
|
|
16
|
+
@click.option("--make-private", is_flag=True, help="Make data private to a specific user. RARELY NEEDED - most data should be public/shared.")
|
|
17
|
+
@click.option("--user-id", default=None, help="User ID for private data. REQUIRES --make-private flag.")
|
|
18
|
+
@click.option("--category", help="Optional file category")
|
|
19
|
+
@click.option("--tags", help="Optional comma-separated tags")
|
|
20
|
+
@click.option("--pattern", "-p", default="**/*.md", help="Glob pattern for directory ingestion (default: **/*.md)")
|
|
21
|
+
@click.option("--dry-run", is_flag=True, help="Show what would be ingested without making changes")
|
|
22
|
+
def process_ingest(
|
|
23
|
+
path: str,
|
|
24
|
+
table: str | None,
|
|
25
|
+
make_private: bool,
|
|
26
|
+
user_id: str | None,
|
|
27
|
+
category: str | None,
|
|
28
|
+
tags: str | None,
|
|
29
|
+
pattern: str,
|
|
30
|
+
dry_run: bool,
|
|
31
|
+
):
|
|
32
|
+
"""
|
|
33
|
+
Ingest files into REM (storage + parsing + embedding).
|
|
34
|
+
|
|
35
|
+
Supports both single files and directories. For directories, recursively
|
|
36
|
+
processes files matching the pattern (default: **/*.md).
|
|
37
|
+
|
|
38
|
+
**IMPORTANT: Data is PUBLIC by default.** This is the correct behavior for
|
|
39
|
+
shared knowledge bases (ontologies, procedures, reference data). Private
|
|
40
|
+
user-scoped data is rarely needed and requires explicit --make-private flag.
|
|
41
|
+
|
|
42
|
+
Target table is auto-detected for schemas (agent.yaml → schemas table).
|
|
43
|
+
Use --table to explicitly set the target (e.g., ontologies for clinical knowledge).
|
|
44
|
+
|
|
45
|
+
Examples:
|
|
46
|
+
rem process ingest sample.pdf
|
|
47
|
+
rem process ingest contract.docx --category legal --tags contract,2023
|
|
48
|
+
rem process ingest agent.yaml # Auto-detects kind=agent, saves to schemas table
|
|
49
|
+
|
|
50
|
+
# Directory ingestion into ontologies table (PUBLIC - no user-id needed)
|
|
51
|
+
rem process ingest ontology/procedures/scid-5/ --table ontologies
|
|
52
|
+
rem process ingest ontology/ --table ontologies --pattern "**/*.md"
|
|
53
|
+
|
|
54
|
+
# Preview what would be ingested
|
|
55
|
+
rem process ingest ontology/ --table ontologies --dry-run
|
|
56
|
+
|
|
57
|
+
# RARE: Private user-scoped data (requires --make-private)
|
|
58
|
+
rem process ingest private-notes.md --make-private --user-id user-123
|
|
59
|
+
"""
|
|
60
|
+
import asyncio
|
|
61
|
+
|
|
62
|
+
# Validate: user_id requires --make-private flag
|
|
63
|
+
if user_id and not make_private:
|
|
64
|
+
raise click.UsageError(
|
|
65
|
+
"Setting --user-id requires the --make-private flag.\n\n"
|
|
66
|
+
"Data should be PUBLIC by default (no user-id). Private user-scoped data\n"
|
|
67
|
+
"is rarely needed - only use --make-private for truly personal content.\n\n"
|
|
68
|
+
"Example: rem process ingest file.md --make-private --user-id user-123"
|
|
69
|
+
)
|
|
70
|
+
|
|
71
|
+
# If --make-private is set, user_id is required
|
|
72
|
+
if make_private and not user_id:
|
|
73
|
+
raise click.UsageError(
|
|
74
|
+
"--make-private requires --user-id to specify which user owns the data.\n\n"
|
|
75
|
+
"Example: rem process ingest file.md --make-private --user-id user-123"
|
|
76
|
+
)
|
|
77
|
+
|
|
78
|
+
# Clear user_id if not making private (ensure None for public data)
|
|
79
|
+
effective_user_id = user_id if make_private else None
|
|
80
|
+
from pathlib import Path
|
|
81
|
+
from ...services.content import ContentService
|
|
82
|
+
|
|
83
|
+
async def _ingest():
|
|
84
|
+
from rem.services.postgres import get_postgres_service
|
|
85
|
+
from rem.services.postgres.repository import Repository
|
|
86
|
+
from rem.models.entities import File, Resource, Ontology
|
|
87
|
+
|
|
88
|
+
input_path = Path(path)
|
|
89
|
+
tag_list = tags.split(",") if tags else None
|
|
90
|
+
|
|
91
|
+
# Collect files to process
|
|
92
|
+
if input_path.is_dir():
|
|
93
|
+
files_to_process = list(input_path.glob(pattern))
|
|
94
|
+
if not files_to_process:
|
|
95
|
+
logger.error(f"No files matching '{pattern}' found in {input_path}")
|
|
96
|
+
sys.exit(1)
|
|
97
|
+
logger.info(f"Found {len(files_to_process)} files matching '{pattern}'")
|
|
98
|
+
else:
|
|
99
|
+
files_to_process = [input_path]
|
|
100
|
+
|
|
101
|
+
# Dry run: just show what would be processed
|
|
102
|
+
if dry_run:
|
|
103
|
+
logger.info("DRY RUN - Would ingest:")
|
|
104
|
+
for f in files_to_process[:20]:
|
|
105
|
+
entity_key = f.stem # filename without extension
|
|
106
|
+
logger.info(f" {f} → {table or 'auto-detect'} (key: {entity_key})")
|
|
107
|
+
if len(files_to_process) > 20:
|
|
108
|
+
logger.info(f" ... and {len(files_to_process) - 20} more files")
|
|
109
|
+
return
|
|
110
|
+
|
|
111
|
+
db = get_postgres_service()
|
|
112
|
+
if not db:
|
|
113
|
+
raise RuntimeError("PostgreSQL service not available")
|
|
114
|
+
await db.connect()
|
|
115
|
+
|
|
116
|
+
try:
|
|
117
|
+
# Direct table ingestion (ontologies, etc.)
|
|
118
|
+
if table:
|
|
119
|
+
await _ingest_to_table(
|
|
120
|
+
db=db,
|
|
121
|
+
files=files_to_process,
|
|
122
|
+
table_name=table,
|
|
123
|
+
user_id=effective_user_id,
|
|
124
|
+
category=category,
|
|
125
|
+
tag_list=tag_list,
|
|
126
|
+
)
|
|
127
|
+
else:
|
|
128
|
+
# Standard file ingestion via ContentService
|
|
129
|
+
file_repo = Repository(File, "files", db=db)
|
|
130
|
+
resource_repo = Repository(Resource, "resources", db=db)
|
|
131
|
+
service = ContentService(file_repo=file_repo, resource_repo=resource_repo)
|
|
132
|
+
|
|
133
|
+
for file_path in files_to_process:
|
|
134
|
+
scope_msg = f"user: {effective_user_id}" if effective_user_id else "public"
|
|
135
|
+
logger.info(f"Ingesting: {file_path} ({scope_msg})")
|
|
136
|
+
|
|
137
|
+
result = await service.ingest_file(
|
|
138
|
+
file_uri=str(file_path),
|
|
139
|
+
user_id=effective_user_id,
|
|
140
|
+
category=category,
|
|
141
|
+
tags=tag_list,
|
|
142
|
+
is_local_server=True,
|
|
143
|
+
)
|
|
144
|
+
|
|
145
|
+
# Handle schema ingestion (agents/evaluators)
|
|
146
|
+
if result.get("schema_name"):
|
|
147
|
+
logger.success(f"Schema: {result['schema_name']} (kind={result.get('kind', 'agent')})")
|
|
148
|
+
elif result.get("processing_status") == "completed":
|
|
149
|
+
logger.success(f"File: {result['file_name']} ({result['resources_created']} resources)")
|
|
150
|
+
else:
|
|
151
|
+
logger.error(f"Failed: {result.get('message', 'Unknown error')}")
|
|
152
|
+
|
|
153
|
+
except Exception as e:
|
|
154
|
+
logger.error(f"Error during ingestion: {e}")
|
|
155
|
+
sys.exit(1)
|
|
156
|
+
finally:
|
|
157
|
+
# Wait for embedding worker to finish
|
|
158
|
+
from rem.services.embeddings.worker import get_global_embedding_worker
|
|
159
|
+
try:
|
|
160
|
+
worker = get_global_embedding_worker()
|
|
161
|
+
if worker and worker.running and not worker.task_queue.empty():
|
|
162
|
+
logger.info(f"Waiting for {worker.task_queue.qsize()} embedding tasks...")
|
|
163
|
+
await worker.stop()
|
|
164
|
+
except RuntimeError:
|
|
165
|
+
pass
|
|
166
|
+
|
|
167
|
+
await db.disconnect()
|
|
168
|
+
|
|
169
|
+
async def _ingest_to_table(db, files, table_name, user_id, category, tag_list):
|
|
170
|
+
"""Direct ingestion of files to a specific table (ontologies, etc.)."""
|
|
171
|
+
from rem.services.postgres.repository import Repository
|
|
172
|
+
from rem import get_model_registry
|
|
173
|
+
from rem.utils.model_helpers import get_table_name
|
|
174
|
+
|
|
175
|
+
# Get model class for table
|
|
176
|
+
registry = get_model_registry()
|
|
177
|
+
registry.register_core_models()
|
|
178
|
+
model_class = None
|
|
179
|
+
for model in registry.get_model_classes().values():
|
|
180
|
+
if get_table_name(model) == table_name:
|
|
181
|
+
model_class = model
|
|
182
|
+
break
|
|
183
|
+
|
|
184
|
+
if not model_class:
|
|
185
|
+
logger.error(f"Unknown table: {table_name}")
|
|
186
|
+
sys.exit(1)
|
|
187
|
+
|
|
188
|
+
repo = Repository(model_class, table_name, db=db)
|
|
189
|
+
processed = 0
|
|
190
|
+
failed = 0
|
|
191
|
+
|
|
192
|
+
for file_path in files:
|
|
193
|
+
try:
|
|
194
|
+
# Read file content
|
|
195
|
+
content = file_path.read_text(encoding="utf-8")
|
|
196
|
+
|
|
197
|
+
# Generate entity key from filename
|
|
198
|
+
# Special case: README files use parent directory as section name
|
|
199
|
+
if file_path.stem.lower() == "readme":
|
|
200
|
+
# Use parent directory name, e.g., "drugs" for drugs/README.md
|
|
201
|
+
# For nested paths like disorders/anxiety/README.md -> "anxiety"
|
|
202
|
+
entity_key = file_path.parent.name
|
|
203
|
+
else:
|
|
204
|
+
entity_key = file_path.stem # filename without extension
|
|
205
|
+
|
|
206
|
+
# Build entity based on table
|
|
207
|
+
entity_data = {
|
|
208
|
+
"name": entity_key,
|
|
209
|
+
"content": content,
|
|
210
|
+
"tags": tag_list or [],
|
|
211
|
+
}
|
|
212
|
+
|
|
213
|
+
# Add optional fields
|
|
214
|
+
if category:
|
|
215
|
+
entity_data["category"] = category
|
|
216
|
+
|
|
217
|
+
# Scoping: user_id for private data, "public" for shared
|
|
218
|
+
# tenant_id="public" is the default for shared knowledge bases
|
|
219
|
+
entity_data["tenant_id"] = user_id or "public"
|
|
220
|
+
entity_data["user_id"] = user_id # None = public/shared
|
|
221
|
+
|
|
222
|
+
# For ontologies, add URI
|
|
223
|
+
if table_name == "ontologies":
|
|
224
|
+
entity_data["uri"] = f"file://{file_path.absolute()}"
|
|
225
|
+
|
|
226
|
+
entity = model_class(**entity_data)
|
|
227
|
+
await repo.upsert(entity, embeddable_fields=["content"], generate_embeddings=True)
|
|
228
|
+
processed += 1
|
|
229
|
+
logger.success(f" ✓ {entity_key}")
|
|
230
|
+
|
|
231
|
+
except Exception as e:
|
|
232
|
+
failed += 1
|
|
233
|
+
logger.error(f" ✗ {file_path.name}: {e}")
|
|
234
|
+
|
|
235
|
+
logger.info(f"Completed: {processed} succeeded, {failed} failed")
|
|
236
|
+
|
|
237
|
+
asyncio.run(_ingest())
|
|
238
|
+
|
|
239
|
+
def register_commands(group: click.Group):
|
|
240
|
+
"""Register process commands."""
|
|
241
|
+
group.add_command(process_uri)
|
|
242
|
+
group.add_command(process_files)
|
|
243
|
+
group.add_command(process_ingest)
|
|
244
|
+
|
|
245
|
+
|
|
246
|
+
@click.command(name="uri")
|
|
247
|
+
@click.argument("uri", type=str)
|
|
248
|
+
@click.option(
|
|
249
|
+
"--output",
|
|
250
|
+
"-o",
|
|
251
|
+
type=click.Choice(["json", "text"]),
|
|
252
|
+
default="json",
|
|
253
|
+
help="Output format (json or text)",
|
|
254
|
+
)
|
|
255
|
+
@click.option(
|
|
256
|
+
"--save",
|
|
257
|
+
"-s",
|
|
258
|
+
type=click.Path(),
|
|
259
|
+
help="Save extracted content to file",
|
|
260
|
+
)
|
|
261
|
+
def process_uri(uri: str, output: str, save: str | None):
|
|
262
|
+
"""
|
|
263
|
+
Process a file URI and extract content (READ-ONLY, no storage).
|
|
264
|
+
|
|
265
|
+
**ARCHITECTURE NOTE - Code Path Comparison**:
|
|
266
|
+
|
|
267
|
+
This CLI command provides READ-ONLY file processing:
|
|
268
|
+
- Uses ContentService.process_uri() directly (no file storage, no DB writes)
|
|
269
|
+
- Returns extracted content to stdout or saves to local file
|
|
270
|
+
- No File entity created, no Resource chunks stored in database
|
|
271
|
+
- Useful for testing file parsing without side effects
|
|
272
|
+
|
|
273
|
+
Compare with MCP tool 'parse_and_ingest_file' (api/mcp_router/tools.py):
|
|
274
|
+
- WRITES file to internal storage (~/.rem/fs/ or S3)
|
|
275
|
+
- Creates File entity in database
|
|
276
|
+
- Creates Resource chunks via ContentService.process_and_save()
|
|
277
|
+
- Full ingestion pipeline for searchable content
|
|
278
|
+
|
|
279
|
+
**SHARED CODE**: Both use ContentService for file parsing:
|
|
280
|
+
- CLI: ContentService.process_uri() → extract only
|
|
281
|
+
- MCP: ContentService.process_and_save() → extract + store chunks
|
|
282
|
+
|
|
283
|
+
URI can be:
|
|
284
|
+
- S3 URI: s3://bucket/key
|
|
285
|
+
- Local file: /path/to/file.md or ./file.md
|
|
286
|
+
|
|
287
|
+
Examples:
|
|
288
|
+
|
|
289
|
+
\b
|
|
290
|
+
# Process local markdown file
|
|
291
|
+
rem process uri ./README.md
|
|
292
|
+
|
|
293
|
+
\b
|
|
294
|
+
# Process S3 file
|
|
295
|
+
rem process uri s3://rem/uploads/document.md
|
|
296
|
+
|
|
297
|
+
\b
|
|
298
|
+
# Save to file
|
|
299
|
+
rem process uri s3://rem/uploads/doc.md -s output.json
|
|
300
|
+
|
|
301
|
+
\b
|
|
302
|
+
# Text-only output
|
|
303
|
+
rem process uri ./file.md -o text
|
|
304
|
+
"""
|
|
305
|
+
try:
|
|
306
|
+
service = ContentService()
|
|
307
|
+
result = service.process_uri(uri)
|
|
308
|
+
|
|
309
|
+
if output == "json":
|
|
310
|
+
output_data = json.dumps(result, indent=2, default=str)
|
|
311
|
+
else:
|
|
312
|
+
# Text-only output
|
|
313
|
+
output_data = result["content"]
|
|
314
|
+
|
|
315
|
+
# Save to file or print to stdout
|
|
316
|
+
if save:
|
|
317
|
+
with open(save, "w") as f:
|
|
318
|
+
f.write(output_data)
|
|
319
|
+
logger.info(f"Saved to {save}")
|
|
320
|
+
else:
|
|
321
|
+
click.echo(output_data)
|
|
322
|
+
|
|
323
|
+
sys.exit(0)
|
|
324
|
+
|
|
325
|
+
except FileNotFoundError as e:
|
|
326
|
+
logger.error(f"File not found: {e}")
|
|
327
|
+
sys.exit(1)
|
|
328
|
+
except RuntimeError as e:
|
|
329
|
+
logger.error(f"Processing error: {e}")
|
|
330
|
+
sys.exit(1)
|
|
331
|
+
except Exception as e:
|
|
332
|
+
logger.exception(f"Unexpected error: {e}")
|
|
333
|
+
sys.exit(1)
|
|
334
|
+
|
|
335
|
+
|
|
336
|
+
@click.command(name="files")
|
|
337
|
+
@click.option("--user-id", default=None, help="User ID (default: from settings)")
|
|
338
|
+
@click.option("--status", type=click.Choice(["pending", "processing", "completed", "failed"]), help="Filter by status")
|
|
339
|
+
@click.option("--extractor", help="Run files through custom extractor (e.g., cv-parser-v1)")
|
|
340
|
+
@click.option("--limit", type=int, help="Max files to process")
|
|
341
|
+
@click.option("--provider", help="Optional LLM provider override")
|
|
342
|
+
@click.option("--model", help="Optional model override")
|
|
343
|
+
def process_files(
|
|
344
|
+
user_id: Optional[str],
|
|
345
|
+
status: Optional[str],
|
|
346
|
+
extractor: Optional[str],
|
|
347
|
+
limit: Optional[int],
|
|
348
|
+
provider: Optional[str],
|
|
349
|
+
model: Optional[str],
|
|
350
|
+
):
|
|
351
|
+
"""Process files with optional custom extractor.
|
|
352
|
+
|
|
353
|
+
Query files from the database and optionally run them through
|
|
354
|
+
a custom extractor to extract domain-specific knowledge.
|
|
355
|
+
|
|
356
|
+
Examples:
|
|
357
|
+
|
|
358
|
+
\b
|
|
359
|
+
# List completed files
|
|
360
|
+
rem process files --status completed
|
|
361
|
+
|
|
362
|
+
\b
|
|
363
|
+
# Extract from CV files
|
|
364
|
+
rem process files --extractor cv-parser-v1 --limit 10
|
|
365
|
+
|
|
366
|
+
\b
|
|
367
|
+
# Extract with provider override
|
|
368
|
+
rem process files --extractor contract-analyzer-v1 \\
|
|
369
|
+
--provider anthropic --model claude-sonnet-4-5
|
|
370
|
+
"""
|
|
371
|
+
from ...settings import settings
|
|
372
|
+
effective_user_id = user_id or settings.test.effective_user_id
|
|
373
|
+
|
|
374
|
+
logger.warning("Not implemented yet")
|
|
375
|
+
logger.info(f"Would process files for user: {effective_user_id}")
|
|
376
|
+
|
|
377
|
+
if user_id:
|
|
378
|
+
logger.info(f"Filter: user_id={user_id}")
|
|
379
|
+
if status:
|
|
380
|
+
logger.info(f"Filter: status={status}")
|
|
381
|
+
if extractor:
|
|
382
|
+
logger.info(f"Extractor: {extractor}")
|
|
383
|
+
if limit:
|
|
384
|
+
logger.info(f"Limit: {limit} files")
|
|
385
|
+
if provider:
|
|
386
|
+
logger.info(f"Provider override: {provider}")
|
|
387
|
+
if model:
|
|
388
|
+
logger.info(f"Model override: {model}")
|
|
@@ -0,0 +1,109 @@
|
|
|
1
|
+
"""
|
|
2
|
+
REM query command.
|
|
3
|
+
|
|
4
|
+
Usage:
|
|
5
|
+
rem query --sql 'LOOKUP "Sarah Chen"'
|
|
6
|
+
rem query --sql 'SEARCH resources "API design" LIMIT 10'
|
|
7
|
+
rem query --sql "SELECT * FROM resources LIMIT 5"
|
|
8
|
+
rem query --file queries/my_query.sql
|
|
9
|
+
|
|
10
|
+
This tool connects to the configured PostgreSQL instance and executes the
|
|
11
|
+
provided REM dialect query, printing results as JSON (default) or plain dicts.
|
|
12
|
+
"""
|
|
13
|
+
|
|
14
|
+
from __future__ import annotations
|
|
15
|
+
|
|
16
|
+
import asyncio
|
|
17
|
+
import json
|
|
18
|
+
from pathlib import Path
|
|
19
|
+
from typing import List
|
|
20
|
+
|
|
21
|
+
import click
|
|
22
|
+
from loguru import logger
|
|
23
|
+
|
|
24
|
+
from ...services.rem import QueryExecutionError
|
|
25
|
+
from ...services.rem.service import RemService
|
|
26
|
+
|
|
27
|
+
|
|
28
|
+
@click.command("query")
|
|
29
|
+
@click.option("--sql", "-s", default=None, help="REM query string (LOOKUP, SEARCH, FUZZY, TRAVERSE, or SQL)")
|
|
30
|
+
@click.option(
|
|
31
|
+
"--file",
|
|
32
|
+
"-f",
|
|
33
|
+
"sql_file",
|
|
34
|
+
type=click.Path(exists=True, path_type=Path),
|
|
35
|
+
default=None,
|
|
36
|
+
help="Path to file containing REM query",
|
|
37
|
+
)
|
|
38
|
+
@click.option("--no-json", is_flag=True, default=False, help="Print rows as Python dicts instead of JSON")
|
|
39
|
+
@click.option("--user-id", "-u", default=None, help="Scope query to a specific user")
|
|
40
|
+
def query_command(sql: str | None, sql_file: Path | None, no_json: bool, user_id: str | None):
|
|
41
|
+
"""
|
|
42
|
+
Execute a REM query against the database.
|
|
43
|
+
|
|
44
|
+
Supports REM dialect queries (LOOKUP, SEARCH, FUZZY, TRAVERSE) and raw SQL.
|
|
45
|
+
Either --sql or --file must be provided.
|
|
46
|
+
"""
|
|
47
|
+
if not sql and not sql_file:
|
|
48
|
+
click.secho("Error: either --sql or --file is required", fg="red")
|
|
49
|
+
raise click.Abort()
|
|
50
|
+
|
|
51
|
+
# Read query from file if provided
|
|
52
|
+
if sql_file:
|
|
53
|
+
query_text = sql_file.read_text(encoding="utf-8")
|
|
54
|
+
else:
|
|
55
|
+
query_text = sql # type: ignore[assignment]
|
|
56
|
+
|
|
57
|
+
try:
|
|
58
|
+
asyncio.run(_run_query_async(query_text, not no_json, user_id))
|
|
59
|
+
except Exception as exc: # pragma: no cover - CLI error path
|
|
60
|
+
logger.exception("Query failed")
|
|
61
|
+
click.secho(f"✗ Query failed: {exc}", fg="red")
|
|
62
|
+
raise click.Abort()
|
|
63
|
+
|
|
64
|
+
|
|
65
|
+
async def _run_query_async(query_text: str, as_json: bool, user_id: str | None) -> None:
|
|
66
|
+
"""
|
|
67
|
+
Execute the query using RemService.execute_query_string().
|
|
68
|
+
"""
|
|
69
|
+
from ...services.postgres import get_postgres_service
|
|
70
|
+
|
|
71
|
+
db = get_postgres_service()
|
|
72
|
+
if not db:
|
|
73
|
+
click.secho("✗ PostgreSQL is disabled in settings. Enable with POSTGRES__ENABLED=true", fg="red")
|
|
74
|
+
raise click.Abort()
|
|
75
|
+
|
|
76
|
+
if db.pool is None:
|
|
77
|
+
await db.connect()
|
|
78
|
+
|
|
79
|
+
rem_service = RemService(db)
|
|
80
|
+
|
|
81
|
+
try:
|
|
82
|
+
# Use the unified execute_query_string method
|
|
83
|
+
result = await rem_service.execute_query_string(query_text, user_id=user_id)
|
|
84
|
+
output_rows = result.get("results", [])
|
|
85
|
+
except QueryExecutionError as qe:
|
|
86
|
+
logger.exception("Query execution failed")
|
|
87
|
+
click.secho(f"✗ Query execution failed: {qe}. Please check the query you provided and try again.", fg="red")
|
|
88
|
+
raise click.Abort()
|
|
89
|
+
except ValueError as ve:
|
|
90
|
+
# Parse errors from the query parser
|
|
91
|
+
click.secho(f"✗ Invalid query: {ve}", fg="red")
|
|
92
|
+
raise click.Abort()
|
|
93
|
+
except Exception as exc: # pragma: no cover - CLI error path
|
|
94
|
+
logger.exception("Unexpected error during query execution")
|
|
95
|
+
click.secho("✗ An unexpected error occurred while executing the query. Please check the query you provided and try again.", fg="red")
|
|
96
|
+
raise click.Abort()
|
|
97
|
+
|
|
98
|
+
if as_json:
|
|
99
|
+
click.echo(json.dumps(output_rows, default=str, indent=2))
|
|
100
|
+
else:
|
|
101
|
+
for r in output_rows:
|
|
102
|
+
click.echo(str(r))
|
|
103
|
+
|
|
104
|
+
|
|
105
|
+
def register_command(cli_group):
|
|
106
|
+
"""Register the query command on the given CLI group (top-level)."""
|
|
107
|
+
cli_group.add_command(query_command)
|
|
108
|
+
|
|
109
|
+
|
|
@@ -0,0 +1,47 @@
|
|
|
1
|
+
"""
|
|
2
|
+
Scaffold command - generate project structure for REM-based applications.
|
|
3
|
+
|
|
4
|
+
TODO: Implement this command to generate:
|
|
5
|
+
- my_app/main.py (entry point with create_app)
|
|
6
|
+
- my_app/models.py (example CoreModel subclass)
|
|
7
|
+
- my_app/routers/ (example FastAPI router)
|
|
8
|
+
- schemas/agents/ (example agent schema)
|
|
9
|
+
- schemas/evaluators/ (example evaluator)
|
|
10
|
+
- sql/migrations/ (empty migrations directory)
|
|
11
|
+
- pyproject.toml (with remdb dependency)
|
|
12
|
+
- README.md (basic usage instructions)
|
|
13
|
+
|
|
14
|
+
Usage:
|
|
15
|
+
rem scaffold my-app
|
|
16
|
+
rem scaffold my-app --with-examples # Include example models/routers/tools
|
|
17
|
+
"""
|
|
18
|
+
|
|
19
|
+
import click
|
|
20
|
+
|
|
21
|
+
|
|
22
|
+
@click.command()
|
|
23
|
+
@click.argument("name")
|
|
24
|
+
@click.option("--with-examples", is_flag=True, help="Include example code")
|
|
25
|
+
def scaffold(name: str, with_examples: bool) -> None:
|
|
26
|
+
"""
|
|
27
|
+
Generate a new REM-based project structure.
|
|
28
|
+
|
|
29
|
+
NAME is the project directory name to create.
|
|
30
|
+
"""
|
|
31
|
+
click.echo(f"TODO: Scaffold command not yet implemented")
|
|
32
|
+
click.echo(f"Would create project: {name}")
|
|
33
|
+
click.echo(f"With examples: {with_examples}")
|
|
34
|
+
click.echo()
|
|
35
|
+
click.echo("For now, manually create this structure:")
|
|
36
|
+
click.echo(f"""
|
|
37
|
+
{name}/
|
|
38
|
+
├── {name.replace('-', '_')}/
|
|
39
|
+
│ ├── main.py # Entry point (create_app + extensions)
|
|
40
|
+
│ ├── models.py # Custom models (inherit CoreModel)
|
|
41
|
+
│ └── routers/ # Custom FastAPI routers
|
|
42
|
+
├── schemas/
|
|
43
|
+
│ ├── agents/ # Custom agent YAML schemas
|
|
44
|
+
│ └── evaluators/ # Custom evaluator schemas
|
|
45
|
+
├── sql/migrations/ # Custom SQL migrations
|
|
46
|
+
└── pyproject.toml
|
|
47
|
+
""")
|