remdb 0.3.242__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of remdb might be problematic. Click here for more details.

Files changed (235) hide show
  1. rem/__init__.py +129 -0
  2. rem/agentic/README.md +760 -0
  3. rem/agentic/__init__.py +54 -0
  4. rem/agentic/agents/README.md +155 -0
  5. rem/agentic/agents/__init__.py +38 -0
  6. rem/agentic/agents/agent_manager.py +311 -0
  7. rem/agentic/agents/sse_simulator.py +502 -0
  8. rem/agentic/context.py +425 -0
  9. rem/agentic/context_builder.py +360 -0
  10. rem/agentic/llm_provider_models.py +301 -0
  11. rem/agentic/mcp/__init__.py +0 -0
  12. rem/agentic/mcp/tool_wrapper.py +273 -0
  13. rem/agentic/otel/__init__.py +5 -0
  14. rem/agentic/otel/setup.py +240 -0
  15. rem/agentic/providers/phoenix.py +926 -0
  16. rem/agentic/providers/pydantic_ai.py +854 -0
  17. rem/agentic/query.py +117 -0
  18. rem/agentic/query_helper.py +89 -0
  19. rem/agentic/schema.py +737 -0
  20. rem/agentic/serialization.py +245 -0
  21. rem/agentic/tools/__init__.py +5 -0
  22. rem/agentic/tools/rem_tools.py +242 -0
  23. rem/api/README.md +657 -0
  24. rem/api/deps.py +253 -0
  25. rem/api/main.py +460 -0
  26. rem/api/mcp_router/prompts.py +182 -0
  27. rem/api/mcp_router/resources.py +820 -0
  28. rem/api/mcp_router/server.py +243 -0
  29. rem/api/mcp_router/tools.py +1605 -0
  30. rem/api/middleware/tracking.py +172 -0
  31. rem/api/routers/admin.py +520 -0
  32. rem/api/routers/auth.py +898 -0
  33. rem/api/routers/chat/__init__.py +5 -0
  34. rem/api/routers/chat/child_streaming.py +394 -0
  35. rem/api/routers/chat/completions.py +702 -0
  36. rem/api/routers/chat/json_utils.py +76 -0
  37. rem/api/routers/chat/models.py +202 -0
  38. rem/api/routers/chat/otel_utils.py +33 -0
  39. rem/api/routers/chat/sse_events.py +546 -0
  40. rem/api/routers/chat/streaming.py +950 -0
  41. rem/api/routers/chat/streaming_utils.py +327 -0
  42. rem/api/routers/common.py +18 -0
  43. rem/api/routers/dev.py +87 -0
  44. rem/api/routers/feedback.py +276 -0
  45. rem/api/routers/messages.py +620 -0
  46. rem/api/routers/models.py +86 -0
  47. rem/api/routers/query.py +362 -0
  48. rem/api/routers/shared_sessions.py +422 -0
  49. rem/auth/README.md +258 -0
  50. rem/auth/__init__.py +36 -0
  51. rem/auth/jwt.py +367 -0
  52. rem/auth/middleware.py +318 -0
  53. rem/auth/providers/__init__.py +16 -0
  54. rem/auth/providers/base.py +376 -0
  55. rem/auth/providers/email.py +215 -0
  56. rem/auth/providers/google.py +163 -0
  57. rem/auth/providers/microsoft.py +237 -0
  58. rem/cli/README.md +517 -0
  59. rem/cli/__init__.py +8 -0
  60. rem/cli/commands/README.md +299 -0
  61. rem/cli/commands/__init__.py +3 -0
  62. rem/cli/commands/ask.py +549 -0
  63. rem/cli/commands/cluster.py +1808 -0
  64. rem/cli/commands/configure.py +495 -0
  65. rem/cli/commands/db.py +828 -0
  66. rem/cli/commands/dreaming.py +324 -0
  67. rem/cli/commands/experiments.py +1698 -0
  68. rem/cli/commands/mcp.py +66 -0
  69. rem/cli/commands/process.py +388 -0
  70. rem/cli/commands/query.py +109 -0
  71. rem/cli/commands/scaffold.py +47 -0
  72. rem/cli/commands/schema.py +230 -0
  73. rem/cli/commands/serve.py +106 -0
  74. rem/cli/commands/session.py +453 -0
  75. rem/cli/dreaming.py +363 -0
  76. rem/cli/main.py +123 -0
  77. rem/config.py +244 -0
  78. rem/mcp_server.py +41 -0
  79. rem/models/core/__init__.py +49 -0
  80. rem/models/core/core_model.py +70 -0
  81. rem/models/core/engram.py +333 -0
  82. rem/models/core/experiment.py +672 -0
  83. rem/models/core/inline_edge.py +132 -0
  84. rem/models/core/rem_query.py +246 -0
  85. rem/models/entities/__init__.py +68 -0
  86. rem/models/entities/domain_resource.py +38 -0
  87. rem/models/entities/feedback.py +123 -0
  88. rem/models/entities/file.py +57 -0
  89. rem/models/entities/image_resource.py +88 -0
  90. rem/models/entities/message.py +64 -0
  91. rem/models/entities/moment.py +123 -0
  92. rem/models/entities/ontology.py +181 -0
  93. rem/models/entities/ontology_config.py +131 -0
  94. rem/models/entities/resource.py +95 -0
  95. rem/models/entities/schema.py +87 -0
  96. rem/models/entities/session.py +84 -0
  97. rem/models/entities/shared_session.py +180 -0
  98. rem/models/entities/subscriber.py +175 -0
  99. rem/models/entities/user.py +93 -0
  100. rem/py.typed +0 -0
  101. rem/registry.py +373 -0
  102. rem/schemas/README.md +507 -0
  103. rem/schemas/__init__.py +6 -0
  104. rem/schemas/agents/README.md +92 -0
  105. rem/schemas/agents/core/agent-builder.yaml +235 -0
  106. rem/schemas/agents/core/moment-builder.yaml +178 -0
  107. rem/schemas/agents/core/rem-query-agent.yaml +226 -0
  108. rem/schemas/agents/core/resource-affinity-assessor.yaml +99 -0
  109. rem/schemas/agents/core/simple-assistant.yaml +19 -0
  110. rem/schemas/agents/core/user-profile-builder.yaml +163 -0
  111. rem/schemas/agents/examples/contract-analyzer.yaml +317 -0
  112. rem/schemas/agents/examples/contract-extractor.yaml +134 -0
  113. rem/schemas/agents/examples/cv-parser.yaml +263 -0
  114. rem/schemas/agents/examples/hello-world.yaml +37 -0
  115. rem/schemas/agents/examples/query.yaml +54 -0
  116. rem/schemas/agents/examples/simple.yaml +21 -0
  117. rem/schemas/agents/examples/test.yaml +29 -0
  118. rem/schemas/agents/rem.yaml +132 -0
  119. rem/schemas/evaluators/hello-world/default.yaml +77 -0
  120. rem/schemas/evaluators/rem/faithfulness.yaml +219 -0
  121. rem/schemas/evaluators/rem/lookup-correctness.yaml +182 -0
  122. rem/schemas/evaluators/rem/retrieval-precision.yaml +199 -0
  123. rem/schemas/evaluators/rem/retrieval-recall.yaml +211 -0
  124. rem/schemas/evaluators/rem/search-correctness.yaml +192 -0
  125. rem/services/__init__.py +18 -0
  126. rem/services/audio/INTEGRATION.md +308 -0
  127. rem/services/audio/README.md +376 -0
  128. rem/services/audio/__init__.py +15 -0
  129. rem/services/audio/chunker.py +354 -0
  130. rem/services/audio/transcriber.py +259 -0
  131. rem/services/content/README.md +1269 -0
  132. rem/services/content/__init__.py +5 -0
  133. rem/services/content/providers.py +760 -0
  134. rem/services/content/service.py +762 -0
  135. rem/services/dreaming/README.md +230 -0
  136. rem/services/dreaming/__init__.py +53 -0
  137. rem/services/dreaming/affinity_service.py +322 -0
  138. rem/services/dreaming/moment_service.py +251 -0
  139. rem/services/dreaming/ontology_service.py +54 -0
  140. rem/services/dreaming/user_model_service.py +297 -0
  141. rem/services/dreaming/utils.py +39 -0
  142. rem/services/email/__init__.py +10 -0
  143. rem/services/email/service.py +522 -0
  144. rem/services/email/templates.py +360 -0
  145. rem/services/embeddings/__init__.py +11 -0
  146. rem/services/embeddings/api.py +127 -0
  147. rem/services/embeddings/worker.py +435 -0
  148. rem/services/fs/README.md +662 -0
  149. rem/services/fs/__init__.py +62 -0
  150. rem/services/fs/examples.py +206 -0
  151. rem/services/fs/examples_paths.py +204 -0
  152. rem/services/fs/git_provider.py +935 -0
  153. rem/services/fs/local_provider.py +760 -0
  154. rem/services/fs/parsing-hooks-examples.md +172 -0
  155. rem/services/fs/paths.py +276 -0
  156. rem/services/fs/provider.py +460 -0
  157. rem/services/fs/s3_provider.py +1042 -0
  158. rem/services/fs/service.py +186 -0
  159. rem/services/git/README.md +1075 -0
  160. rem/services/git/__init__.py +17 -0
  161. rem/services/git/service.py +469 -0
  162. rem/services/phoenix/EXPERIMENT_DESIGN.md +1146 -0
  163. rem/services/phoenix/README.md +453 -0
  164. rem/services/phoenix/__init__.py +46 -0
  165. rem/services/phoenix/client.py +960 -0
  166. rem/services/phoenix/config.py +88 -0
  167. rem/services/phoenix/prompt_labels.py +477 -0
  168. rem/services/postgres/README.md +757 -0
  169. rem/services/postgres/__init__.py +49 -0
  170. rem/services/postgres/diff_service.py +599 -0
  171. rem/services/postgres/migration_service.py +427 -0
  172. rem/services/postgres/programmable_diff_service.py +635 -0
  173. rem/services/postgres/pydantic_to_sqlalchemy.py +562 -0
  174. rem/services/postgres/register_type.py +353 -0
  175. rem/services/postgres/repository.py +481 -0
  176. rem/services/postgres/schema_generator.py +661 -0
  177. rem/services/postgres/service.py +802 -0
  178. rem/services/postgres/sql_builder.py +355 -0
  179. rem/services/rate_limit.py +113 -0
  180. rem/services/rem/README.md +318 -0
  181. rem/services/rem/__init__.py +23 -0
  182. rem/services/rem/exceptions.py +71 -0
  183. rem/services/rem/executor.py +293 -0
  184. rem/services/rem/parser.py +180 -0
  185. rem/services/rem/queries.py +196 -0
  186. rem/services/rem/query.py +371 -0
  187. rem/services/rem/service.py +608 -0
  188. rem/services/session/README.md +374 -0
  189. rem/services/session/__init__.py +13 -0
  190. rem/services/session/compression.py +488 -0
  191. rem/services/session/pydantic_messages.py +310 -0
  192. rem/services/session/reload.py +85 -0
  193. rem/services/user_service.py +130 -0
  194. rem/settings.py +1877 -0
  195. rem/sql/background_indexes.sql +52 -0
  196. rem/sql/migrations/001_install.sql +983 -0
  197. rem/sql/migrations/002_install_models.sql +3157 -0
  198. rem/sql/migrations/003_optional_extensions.sql +326 -0
  199. rem/sql/migrations/004_cache_system.sql +282 -0
  200. rem/sql/migrations/005_schema_update.sql +145 -0
  201. rem/sql/migrations/migrate_session_id_to_uuid.sql +45 -0
  202. rem/utils/AGENTIC_CHUNKING.md +597 -0
  203. rem/utils/README.md +628 -0
  204. rem/utils/__init__.py +61 -0
  205. rem/utils/agentic_chunking.py +622 -0
  206. rem/utils/batch_ops.py +343 -0
  207. rem/utils/chunking.py +108 -0
  208. rem/utils/clip_embeddings.py +276 -0
  209. rem/utils/constants.py +97 -0
  210. rem/utils/date_utils.py +228 -0
  211. rem/utils/dict_utils.py +98 -0
  212. rem/utils/embeddings.py +436 -0
  213. rem/utils/examples/embeddings_example.py +305 -0
  214. rem/utils/examples/sql_types_example.py +202 -0
  215. rem/utils/files.py +323 -0
  216. rem/utils/markdown.py +16 -0
  217. rem/utils/mime_types.py +158 -0
  218. rem/utils/model_helpers.py +492 -0
  219. rem/utils/schema_loader.py +649 -0
  220. rem/utils/sql_paths.py +146 -0
  221. rem/utils/sql_types.py +350 -0
  222. rem/utils/user_id.py +81 -0
  223. rem/utils/vision.py +325 -0
  224. rem/workers/README.md +506 -0
  225. rem/workers/__init__.py +7 -0
  226. rem/workers/db_listener.py +579 -0
  227. rem/workers/db_maintainer.py +74 -0
  228. rem/workers/dreaming.py +502 -0
  229. rem/workers/engram_processor.py +312 -0
  230. rem/workers/sqs_file_processor.py +193 -0
  231. rem/workers/unlogged_maintainer.py +463 -0
  232. remdb-0.3.242.dist-info/METADATA +1632 -0
  233. remdb-0.3.242.dist-info/RECORD +235 -0
  234. remdb-0.3.242.dist-info/WHEEL +4 -0
  235. remdb-0.3.242.dist-info/entry_points.txt +2 -0
@@ -0,0 +1,66 @@
1
+ """
2
+ REM MCP server command.
3
+
4
+ Run the FastMCP server in standalone mode (stdio transport for Claude Desktop).
5
+
6
+ Usage:
7
+ rem mcp # Run MCP server in stdio mode
8
+ rem mcp --http # Run in HTTP mode (for testing)
9
+ """
10
+
11
+ import click
12
+ from loguru import logger
13
+
14
+
15
+ @click.command("mcp")
16
+ @click.option(
17
+ "--http",
18
+ is_flag=True,
19
+ help="Run in HTTP mode instead of stdio (for testing)",
20
+ )
21
+ @click.option(
22
+ "--host",
23
+ default="0.0.0.0",
24
+ help="Host to bind to (HTTP mode only)",
25
+ )
26
+ @click.option(
27
+ "--port",
28
+ default=8001,
29
+ type=int,
30
+ help="Port to listen on (HTTP mode only)",
31
+ )
32
+ def mcp_command(http: bool, host: str, port: int):
33
+ """
34
+ Run the REM MCP server.
35
+
36
+ By default, runs in stdio mode for Claude Desktop integration.
37
+ Use --http for testing in HTTP mode.
38
+
39
+ Examples:
40
+ # Stdio mode (for Claude Desktop)
41
+ rem mcp
42
+
43
+ # HTTP mode (for testing)
44
+ rem mcp --http --port 8001
45
+ """
46
+ from rem.api.mcp_router.server import create_mcp_server
47
+
48
+ # Stdio mode is local (allows local file paths), HTTP mode is remote (s3/https only)
49
+ mcp = create_mcp_server(is_local=not http)
50
+
51
+ if http:
52
+ # HTTP mode for testing
53
+ logger.info(f"Starting REM MCP server in HTTP mode at http://{host}:{port}")
54
+ import uvicorn
55
+
56
+ mcp_app = mcp.get_asgi_app()
57
+ uvicorn.run(mcp_app, host=host, port=port)
58
+ else:
59
+ # Stdio mode for Claude Desktop
60
+ logger.info("Starting REM MCP server in stdio mode (Claude Desktop)")
61
+ mcp.run()
62
+
63
+
64
+ def register_command(cli_group):
65
+ """Register the mcp command."""
66
+ cli_group.add_command(mcp_command)
@@ -0,0 +1,388 @@
1
+ """File processing CLI commands."""
2
+
3
+ import json
4
+ import sys
5
+ from typing import Optional
6
+
7
+ import click
8
+ from loguru import logger
9
+
10
+ from rem.services.content import ContentService
11
+
12
+
13
+ @click.command(name="ingest")
14
+ @click.argument("path", type=click.Path(exists=True))
15
+ @click.option("--table", "-t", default=None, help="Target table (e.g., ontologies, resources). Auto-detected for schemas.")
16
+ @click.option("--make-private", is_flag=True, help="Make data private to a specific user. RARELY NEEDED - most data should be public/shared.")
17
+ @click.option("--user-id", default=None, help="User ID for private data. REQUIRES --make-private flag.")
18
+ @click.option("--category", help="Optional file category")
19
+ @click.option("--tags", help="Optional comma-separated tags")
20
+ @click.option("--pattern", "-p", default="**/*.md", help="Glob pattern for directory ingestion (default: **/*.md)")
21
+ @click.option("--dry-run", is_flag=True, help="Show what would be ingested without making changes")
22
+ def process_ingest(
23
+ path: str,
24
+ table: str | None,
25
+ make_private: bool,
26
+ user_id: str | None,
27
+ category: str | None,
28
+ tags: str | None,
29
+ pattern: str,
30
+ dry_run: bool,
31
+ ):
32
+ """
33
+ Ingest files into REM (storage + parsing + embedding).
34
+
35
+ Supports both single files and directories. For directories, recursively
36
+ processes files matching the pattern (default: **/*.md).
37
+
38
+ **IMPORTANT: Data is PUBLIC by default.** This is the correct behavior for
39
+ shared knowledge bases (ontologies, procedures, reference data). Private
40
+ user-scoped data is rarely needed and requires explicit --make-private flag.
41
+
42
+ Target table is auto-detected for schemas (agent.yaml → schemas table).
43
+ Use --table to explicitly set the target (e.g., ontologies for clinical knowledge).
44
+
45
+ Examples:
46
+ rem process ingest sample.pdf
47
+ rem process ingest contract.docx --category legal --tags contract,2023
48
+ rem process ingest agent.yaml # Auto-detects kind=agent, saves to schemas table
49
+
50
+ # Directory ingestion into ontologies table (PUBLIC - no user-id needed)
51
+ rem process ingest ontology/procedures/scid-5/ --table ontologies
52
+ rem process ingest ontology/ --table ontologies --pattern "**/*.md"
53
+
54
+ # Preview what would be ingested
55
+ rem process ingest ontology/ --table ontologies --dry-run
56
+
57
+ # RARE: Private user-scoped data (requires --make-private)
58
+ rem process ingest private-notes.md --make-private --user-id user-123
59
+ """
60
+ import asyncio
61
+
62
+ # Validate: user_id requires --make-private flag
63
+ if user_id and not make_private:
64
+ raise click.UsageError(
65
+ "Setting --user-id requires the --make-private flag.\n\n"
66
+ "Data should be PUBLIC by default (no user-id). Private user-scoped data\n"
67
+ "is rarely needed - only use --make-private for truly personal content.\n\n"
68
+ "Example: rem process ingest file.md --make-private --user-id user-123"
69
+ )
70
+
71
+ # If --make-private is set, user_id is required
72
+ if make_private and not user_id:
73
+ raise click.UsageError(
74
+ "--make-private requires --user-id to specify which user owns the data.\n\n"
75
+ "Example: rem process ingest file.md --make-private --user-id user-123"
76
+ )
77
+
78
+ # Clear user_id if not making private (ensure None for public data)
79
+ effective_user_id = user_id if make_private else None
80
+ from pathlib import Path
81
+ from ...services.content import ContentService
82
+
83
+ async def _ingest():
84
+ from rem.services.postgres import get_postgres_service
85
+ from rem.services.postgres.repository import Repository
86
+ from rem.models.entities import File, Resource, Ontology
87
+
88
+ input_path = Path(path)
89
+ tag_list = tags.split(",") if tags else None
90
+
91
+ # Collect files to process
92
+ if input_path.is_dir():
93
+ files_to_process = list(input_path.glob(pattern))
94
+ if not files_to_process:
95
+ logger.error(f"No files matching '{pattern}' found in {input_path}")
96
+ sys.exit(1)
97
+ logger.info(f"Found {len(files_to_process)} files matching '{pattern}'")
98
+ else:
99
+ files_to_process = [input_path]
100
+
101
+ # Dry run: just show what would be processed
102
+ if dry_run:
103
+ logger.info("DRY RUN - Would ingest:")
104
+ for f in files_to_process[:20]:
105
+ entity_key = f.stem # filename without extension
106
+ logger.info(f" {f} → {table or 'auto-detect'} (key: {entity_key})")
107
+ if len(files_to_process) > 20:
108
+ logger.info(f" ... and {len(files_to_process) - 20} more files")
109
+ return
110
+
111
+ db = get_postgres_service()
112
+ if not db:
113
+ raise RuntimeError("PostgreSQL service not available")
114
+ await db.connect()
115
+
116
+ try:
117
+ # Direct table ingestion (ontologies, etc.)
118
+ if table:
119
+ await _ingest_to_table(
120
+ db=db,
121
+ files=files_to_process,
122
+ table_name=table,
123
+ user_id=effective_user_id,
124
+ category=category,
125
+ tag_list=tag_list,
126
+ )
127
+ else:
128
+ # Standard file ingestion via ContentService
129
+ file_repo = Repository(File, "files", db=db)
130
+ resource_repo = Repository(Resource, "resources", db=db)
131
+ service = ContentService(file_repo=file_repo, resource_repo=resource_repo)
132
+
133
+ for file_path in files_to_process:
134
+ scope_msg = f"user: {effective_user_id}" if effective_user_id else "public"
135
+ logger.info(f"Ingesting: {file_path} ({scope_msg})")
136
+
137
+ result = await service.ingest_file(
138
+ file_uri=str(file_path),
139
+ user_id=effective_user_id,
140
+ category=category,
141
+ tags=tag_list,
142
+ is_local_server=True,
143
+ )
144
+
145
+ # Handle schema ingestion (agents/evaluators)
146
+ if result.get("schema_name"):
147
+ logger.success(f"Schema: {result['schema_name']} (kind={result.get('kind', 'agent')})")
148
+ elif result.get("processing_status") == "completed":
149
+ logger.success(f"File: {result['file_name']} ({result['resources_created']} resources)")
150
+ else:
151
+ logger.error(f"Failed: {result.get('message', 'Unknown error')}")
152
+
153
+ except Exception as e:
154
+ logger.error(f"Error during ingestion: {e}")
155
+ sys.exit(1)
156
+ finally:
157
+ # Wait for embedding worker to finish
158
+ from rem.services.embeddings.worker import get_global_embedding_worker
159
+ try:
160
+ worker = get_global_embedding_worker()
161
+ if worker and worker.running and not worker.task_queue.empty():
162
+ logger.info(f"Waiting for {worker.task_queue.qsize()} embedding tasks...")
163
+ await worker.stop()
164
+ except RuntimeError:
165
+ pass
166
+
167
+ await db.disconnect()
168
+
169
+ async def _ingest_to_table(db, files, table_name, user_id, category, tag_list):
170
+ """Direct ingestion of files to a specific table (ontologies, etc.)."""
171
+ from rem.services.postgres.repository import Repository
172
+ from rem import get_model_registry
173
+ from rem.utils.model_helpers import get_table_name
174
+
175
+ # Get model class for table
176
+ registry = get_model_registry()
177
+ registry.register_core_models()
178
+ model_class = None
179
+ for model in registry.get_model_classes().values():
180
+ if get_table_name(model) == table_name:
181
+ model_class = model
182
+ break
183
+
184
+ if not model_class:
185
+ logger.error(f"Unknown table: {table_name}")
186
+ sys.exit(1)
187
+
188
+ repo = Repository(model_class, table_name, db=db)
189
+ processed = 0
190
+ failed = 0
191
+
192
+ for file_path in files:
193
+ try:
194
+ # Read file content
195
+ content = file_path.read_text(encoding="utf-8")
196
+
197
+ # Generate entity key from filename
198
+ # Special case: README files use parent directory as section name
199
+ if file_path.stem.lower() == "readme":
200
+ # Use parent directory name, e.g., "drugs" for drugs/README.md
201
+ # For nested paths like disorders/anxiety/README.md -> "anxiety"
202
+ entity_key = file_path.parent.name
203
+ else:
204
+ entity_key = file_path.stem # filename without extension
205
+
206
+ # Build entity based on table
207
+ entity_data = {
208
+ "name": entity_key,
209
+ "content": content,
210
+ "tags": tag_list or [],
211
+ }
212
+
213
+ # Add optional fields
214
+ if category:
215
+ entity_data["category"] = category
216
+
217
+ # Scoping: user_id for private data, "public" for shared
218
+ # tenant_id="public" is the default for shared knowledge bases
219
+ entity_data["tenant_id"] = user_id or "public"
220
+ entity_data["user_id"] = user_id # None = public/shared
221
+
222
+ # For ontologies, add URI
223
+ if table_name == "ontologies":
224
+ entity_data["uri"] = f"file://{file_path.absolute()}"
225
+
226
+ entity = model_class(**entity_data)
227
+ await repo.upsert(entity, embeddable_fields=["content"], generate_embeddings=True)
228
+ processed += 1
229
+ logger.success(f" ✓ {entity_key}")
230
+
231
+ except Exception as e:
232
+ failed += 1
233
+ logger.error(f" ✗ {file_path.name}: {e}")
234
+
235
+ logger.info(f"Completed: {processed} succeeded, {failed} failed")
236
+
237
+ asyncio.run(_ingest())
238
+
239
+ def register_commands(group: click.Group):
240
+ """Register process commands."""
241
+ group.add_command(process_uri)
242
+ group.add_command(process_files)
243
+ group.add_command(process_ingest)
244
+
245
+
246
+ @click.command(name="uri")
247
+ @click.argument("uri", type=str)
248
+ @click.option(
249
+ "--output",
250
+ "-o",
251
+ type=click.Choice(["json", "text"]),
252
+ default="json",
253
+ help="Output format (json or text)",
254
+ )
255
+ @click.option(
256
+ "--save",
257
+ "-s",
258
+ type=click.Path(),
259
+ help="Save extracted content to file",
260
+ )
261
+ def process_uri(uri: str, output: str, save: str | None):
262
+ """
263
+ Process a file URI and extract content (READ-ONLY, no storage).
264
+
265
+ **ARCHITECTURE NOTE - Code Path Comparison**:
266
+
267
+ This CLI command provides READ-ONLY file processing:
268
+ - Uses ContentService.process_uri() directly (no file storage, no DB writes)
269
+ - Returns extracted content to stdout or saves to local file
270
+ - No File entity created, no Resource chunks stored in database
271
+ - Useful for testing file parsing without side effects
272
+
273
+ Compare with MCP tool 'parse_and_ingest_file' (api/mcp_router/tools.py):
274
+ - WRITES file to internal storage (~/.rem/fs/ or S3)
275
+ - Creates File entity in database
276
+ - Creates Resource chunks via ContentService.process_and_save()
277
+ - Full ingestion pipeline for searchable content
278
+
279
+ **SHARED CODE**: Both use ContentService for file parsing:
280
+ - CLI: ContentService.process_uri() → extract only
281
+ - MCP: ContentService.process_and_save() → extract + store chunks
282
+
283
+ URI can be:
284
+ - S3 URI: s3://bucket/key
285
+ - Local file: /path/to/file.md or ./file.md
286
+
287
+ Examples:
288
+
289
+ \b
290
+ # Process local markdown file
291
+ rem process uri ./README.md
292
+
293
+ \b
294
+ # Process S3 file
295
+ rem process uri s3://rem/uploads/document.md
296
+
297
+ \b
298
+ # Save to file
299
+ rem process uri s3://rem/uploads/doc.md -s output.json
300
+
301
+ \b
302
+ # Text-only output
303
+ rem process uri ./file.md -o text
304
+ """
305
+ try:
306
+ service = ContentService()
307
+ result = service.process_uri(uri)
308
+
309
+ if output == "json":
310
+ output_data = json.dumps(result, indent=2, default=str)
311
+ else:
312
+ # Text-only output
313
+ output_data = result["content"]
314
+
315
+ # Save to file or print to stdout
316
+ if save:
317
+ with open(save, "w") as f:
318
+ f.write(output_data)
319
+ logger.info(f"Saved to {save}")
320
+ else:
321
+ click.echo(output_data)
322
+
323
+ sys.exit(0)
324
+
325
+ except FileNotFoundError as e:
326
+ logger.error(f"File not found: {e}")
327
+ sys.exit(1)
328
+ except RuntimeError as e:
329
+ logger.error(f"Processing error: {e}")
330
+ sys.exit(1)
331
+ except Exception as e:
332
+ logger.exception(f"Unexpected error: {e}")
333
+ sys.exit(1)
334
+
335
+
336
+ @click.command(name="files")
337
+ @click.option("--user-id", default=None, help="User ID (default: from settings)")
338
+ @click.option("--status", type=click.Choice(["pending", "processing", "completed", "failed"]), help="Filter by status")
339
+ @click.option("--extractor", help="Run files through custom extractor (e.g., cv-parser-v1)")
340
+ @click.option("--limit", type=int, help="Max files to process")
341
+ @click.option("--provider", help="Optional LLM provider override")
342
+ @click.option("--model", help="Optional model override")
343
+ def process_files(
344
+ user_id: Optional[str],
345
+ status: Optional[str],
346
+ extractor: Optional[str],
347
+ limit: Optional[int],
348
+ provider: Optional[str],
349
+ model: Optional[str],
350
+ ):
351
+ """Process files with optional custom extractor.
352
+
353
+ Query files from the database and optionally run them through
354
+ a custom extractor to extract domain-specific knowledge.
355
+
356
+ Examples:
357
+
358
+ \b
359
+ # List completed files
360
+ rem process files --status completed
361
+
362
+ \b
363
+ # Extract from CV files
364
+ rem process files --extractor cv-parser-v1 --limit 10
365
+
366
+ \b
367
+ # Extract with provider override
368
+ rem process files --extractor contract-analyzer-v1 \\
369
+ --provider anthropic --model claude-sonnet-4-5
370
+ """
371
+ from ...settings import settings
372
+ effective_user_id = user_id or settings.test.effective_user_id
373
+
374
+ logger.warning("Not implemented yet")
375
+ logger.info(f"Would process files for user: {effective_user_id}")
376
+
377
+ if user_id:
378
+ logger.info(f"Filter: user_id={user_id}")
379
+ if status:
380
+ logger.info(f"Filter: status={status}")
381
+ if extractor:
382
+ logger.info(f"Extractor: {extractor}")
383
+ if limit:
384
+ logger.info(f"Limit: {limit} files")
385
+ if provider:
386
+ logger.info(f"Provider override: {provider}")
387
+ if model:
388
+ logger.info(f"Model override: {model}")
@@ -0,0 +1,109 @@
1
+ """
2
+ REM query command.
3
+
4
+ Usage:
5
+ rem query --sql 'LOOKUP "Sarah Chen"'
6
+ rem query --sql 'SEARCH resources "API design" LIMIT 10'
7
+ rem query --sql "SELECT * FROM resources LIMIT 5"
8
+ rem query --file queries/my_query.sql
9
+
10
+ This tool connects to the configured PostgreSQL instance and executes the
11
+ provided REM dialect query, printing results as JSON (default) or plain dicts.
12
+ """
13
+
14
+ from __future__ import annotations
15
+
16
+ import asyncio
17
+ import json
18
+ from pathlib import Path
19
+ from typing import List
20
+
21
+ import click
22
+ from loguru import logger
23
+
24
+ from ...services.rem import QueryExecutionError
25
+ from ...services.rem.service import RemService
26
+
27
+
28
+ @click.command("query")
29
+ @click.option("--sql", "-s", default=None, help="REM query string (LOOKUP, SEARCH, FUZZY, TRAVERSE, or SQL)")
30
+ @click.option(
31
+ "--file",
32
+ "-f",
33
+ "sql_file",
34
+ type=click.Path(exists=True, path_type=Path),
35
+ default=None,
36
+ help="Path to file containing REM query",
37
+ )
38
+ @click.option("--no-json", is_flag=True, default=False, help="Print rows as Python dicts instead of JSON")
39
+ @click.option("--user-id", "-u", default=None, help="Scope query to a specific user")
40
+ def query_command(sql: str | None, sql_file: Path | None, no_json: bool, user_id: str | None):
41
+ """
42
+ Execute a REM query against the database.
43
+
44
+ Supports REM dialect queries (LOOKUP, SEARCH, FUZZY, TRAVERSE) and raw SQL.
45
+ Either --sql or --file must be provided.
46
+ """
47
+ if not sql and not sql_file:
48
+ click.secho("Error: either --sql or --file is required", fg="red")
49
+ raise click.Abort()
50
+
51
+ # Read query from file if provided
52
+ if sql_file:
53
+ query_text = sql_file.read_text(encoding="utf-8")
54
+ else:
55
+ query_text = sql # type: ignore[assignment]
56
+
57
+ try:
58
+ asyncio.run(_run_query_async(query_text, not no_json, user_id))
59
+ except Exception as exc: # pragma: no cover - CLI error path
60
+ logger.exception("Query failed")
61
+ click.secho(f"✗ Query failed: {exc}", fg="red")
62
+ raise click.Abort()
63
+
64
+
65
+ async def _run_query_async(query_text: str, as_json: bool, user_id: str | None) -> None:
66
+ """
67
+ Execute the query using RemService.execute_query_string().
68
+ """
69
+ from ...services.postgres import get_postgres_service
70
+
71
+ db = get_postgres_service()
72
+ if not db:
73
+ click.secho("✗ PostgreSQL is disabled in settings. Enable with POSTGRES__ENABLED=true", fg="red")
74
+ raise click.Abort()
75
+
76
+ if db.pool is None:
77
+ await db.connect()
78
+
79
+ rem_service = RemService(db)
80
+
81
+ try:
82
+ # Use the unified execute_query_string method
83
+ result = await rem_service.execute_query_string(query_text, user_id=user_id)
84
+ output_rows = result.get("results", [])
85
+ except QueryExecutionError as qe:
86
+ logger.exception("Query execution failed")
87
+ click.secho(f"✗ Query execution failed: {qe}. Please check the query you provided and try again.", fg="red")
88
+ raise click.Abort()
89
+ except ValueError as ve:
90
+ # Parse errors from the query parser
91
+ click.secho(f"✗ Invalid query: {ve}", fg="red")
92
+ raise click.Abort()
93
+ except Exception as exc: # pragma: no cover - CLI error path
94
+ logger.exception("Unexpected error during query execution")
95
+ click.secho("✗ An unexpected error occurred while executing the query. Please check the query you provided and try again.", fg="red")
96
+ raise click.Abort()
97
+
98
+ if as_json:
99
+ click.echo(json.dumps(output_rows, default=str, indent=2))
100
+ else:
101
+ for r in output_rows:
102
+ click.echo(str(r))
103
+
104
+
105
+ def register_command(cli_group):
106
+ """Register the query command on the given CLI group (top-level)."""
107
+ cli_group.add_command(query_command)
108
+
109
+
@@ -0,0 +1,47 @@
1
+ """
2
+ Scaffold command - generate project structure for REM-based applications.
3
+
4
+ TODO: Implement this command to generate:
5
+ - my_app/main.py (entry point with create_app)
6
+ - my_app/models.py (example CoreModel subclass)
7
+ - my_app/routers/ (example FastAPI router)
8
+ - schemas/agents/ (example agent schema)
9
+ - schemas/evaluators/ (example evaluator)
10
+ - sql/migrations/ (empty migrations directory)
11
+ - pyproject.toml (with remdb dependency)
12
+ - README.md (basic usage instructions)
13
+
14
+ Usage:
15
+ rem scaffold my-app
16
+ rem scaffold my-app --with-examples # Include example models/routers/tools
17
+ """
18
+
19
+ import click
20
+
21
+
22
+ @click.command()
23
+ @click.argument("name")
24
+ @click.option("--with-examples", is_flag=True, help="Include example code")
25
+ def scaffold(name: str, with_examples: bool) -> None:
26
+ """
27
+ Generate a new REM-based project structure.
28
+
29
+ NAME is the project directory name to create.
30
+ """
31
+ click.echo(f"TODO: Scaffold command not yet implemented")
32
+ click.echo(f"Would create project: {name}")
33
+ click.echo(f"With examples: {with_examples}")
34
+ click.echo()
35
+ click.echo("For now, manually create this structure:")
36
+ click.echo(f"""
37
+ {name}/
38
+ ├── {name.replace('-', '_')}/
39
+ │ ├── main.py # Entry point (create_app + extensions)
40
+ │ ├── models.py # Custom models (inherit CoreModel)
41
+ │ └── routers/ # Custom FastAPI routers
42
+ ├── schemas/
43
+ │ ├── agents/ # Custom agent YAML schemas
44
+ │ └── evaluators/ # Custom evaluator schemas
45
+ ├── sql/migrations/ # Custom SQL migrations
46
+ └── pyproject.toml
47
+ """)