remdb 0.3.230__py3-none-any.whl → 0.3.258__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (40) hide show
  1. rem/agentic/__init__.py +10 -1
  2. rem/agentic/context.py +13 -2
  3. rem/agentic/context_builder.py +45 -34
  4. rem/agentic/providers/pydantic_ai.py +302 -110
  5. rem/api/mcp_router/resources.py +223 -0
  6. rem/api/mcp_router/tools.py +76 -10
  7. rem/api/routers/auth.py +113 -10
  8. rem/api/routers/chat/child_streaming.py +22 -8
  9. rem/api/routers/chat/completions.py +3 -3
  10. rem/api/routers/chat/sse_events.py +3 -3
  11. rem/api/routers/chat/streaming.py +40 -45
  12. rem/api/routers/chat/streaming_utils.py +5 -7
  13. rem/api/routers/feedback.py +2 -2
  14. rem/api/routers/query.py +5 -14
  15. rem/cli/commands/ask.py +144 -33
  16. rem/cli/commands/experiments.py +1 -1
  17. rem/cli/commands/process.py +9 -1
  18. rem/cli/commands/query.py +109 -0
  19. rem/cli/commands/session.py +117 -0
  20. rem/cli/main.py +2 -0
  21. rem/models/core/experiment.py +1 -1
  22. rem/models/entities/session.py +1 -0
  23. rem/schemas/agents/core/agent-builder.yaml +1 -1
  24. rem/schemas/agents/test_orchestrator.yaml +42 -0
  25. rem/schemas/agents/test_structured_output.yaml +52 -0
  26. rem/services/content/providers.py +151 -49
  27. rem/services/postgres/repository.py +1 -0
  28. rem/services/rem/README.md +4 -3
  29. rem/services/rem/parser.py +7 -10
  30. rem/services/rem/service.py +47 -0
  31. rem/services/session/compression.py +7 -3
  32. rem/services/session/pydantic_messages.py +25 -7
  33. rem/services/session/reload.py +2 -1
  34. rem/settings.py +64 -7
  35. rem/sql/migrations/004_cache_system.sql +3 -1
  36. rem/utils/schema_loader.py +135 -103
  37. {remdb-0.3.230.dist-info → remdb-0.3.258.dist-info}/METADATA +6 -5
  38. {remdb-0.3.230.dist-info → remdb-0.3.258.dist-info}/RECORD +40 -37
  39. {remdb-0.3.230.dist-info → remdb-0.3.258.dist-info}/WHEEL +0 -0
  40. {remdb-0.3.230.dist-info → remdb-0.3.258.dist-info}/entry_points.txt +0 -0
@@ -331,6 +331,123 @@ async def _show_async(
331
331
  raise
332
332
 
333
333
 
334
+ @session.command("clone")
335
+ @click.argument("session_id")
336
+ @click.option("--to-turn", "-t", type=int, help="Clone up to turn N (counting user messages only)")
337
+ @click.option("--name", "-n", help="Name/description for the cloned session")
338
+ def clone(session_id: str, to_turn: int | None, name: str | None):
339
+ """
340
+ Clone a session for exploring alternate conversation paths.
341
+
342
+ SESSION_ID: The session ID to clone.
343
+
344
+ Examples:
345
+
346
+ # Clone entire session
347
+ rem session clone 810f1f2d-d5a1-4c02-83b6-67040b47f7c0
348
+
349
+ # Clone up to turn 3 (first 3 user messages and their responses)
350
+ rem session clone 810f1f2d-d5a1-4c02-83b6-67040b47f7c0 --to-turn 3
351
+
352
+ # Clone with a descriptive name
353
+ rem session clone 810f1f2d-d5a1-4c02-83b6-67040b47f7c0 -n "Alternate anxiety path"
354
+ """
355
+ asyncio.run(_clone_async(session_id, to_turn, name))
356
+
357
+
358
+ async def _clone_async(
359
+ session_id: str,
360
+ to_turn: int | None,
361
+ name: str | None,
362
+ ):
363
+ """Async implementation of clone command."""
364
+ from uuid import uuid4
365
+ from ...models.entities.session import Session, SessionMode
366
+
367
+ pg = get_postgres_service()
368
+ if not pg:
369
+ logger.error("PostgreSQL not available")
370
+ return
371
+
372
+ await pg.connect()
373
+
374
+ try:
375
+ # Load original session messages
376
+ message_repo = Repository(Message, "messages", db=pg)
377
+ messages = await message_repo.find(
378
+ filters={"session_id": session_id},
379
+ order_by="created_at ASC",
380
+ limit=1000,
381
+ )
382
+
383
+ if not messages:
384
+ logger.error(f"No messages found for session {session_id}")
385
+ return
386
+
387
+ # If --to-turn specified, filter messages up to that turn (user messages)
388
+ if to_turn is not None:
389
+ user_count = 0
390
+ cutoff_idx = len(messages)
391
+ for idx, msg in enumerate(messages):
392
+ if msg.message_type == "user":
393
+ user_count += 1
394
+ if user_count > to_turn:
395
+ cutoff_idx = idx
396
+ break
397
+ messages = messages[:cutoff_idx]
398
+ logger.info(f"Cloning {len(messages)} messages (up to turn {to_turn})")
399
+ else:
400
+ logger.info(f"Cloning all {len(messages)} messages")
401
+
402
+ # Generate new session ID
403
+ new_session_id = str(uuid4())
404
+
405
+ # Get user_id and tenant_id from first message
406
+ first_msg = messages[0]
407
+ user_id = first_msg.user_id
408
+ tenant_id = first_msg.tenant_id or "default"
409
+
410
+ # Create Session record with CLONE mode and lineage
411
+ session_repo = Repository(Session, "sessions", db=pg)
412
+ new_session = Session(
413
+ id=uuid4(),
414
+ name=name or f"Clone of {session_id[:8]}",
415
+ mode=SessionMode.CLONE,
416
+ original_trace_id=session_id,
417
+ description=f"Cloned from session {session_id}" + (f" at turn {to_turn}" if to_turn else ""),
418
+ user_id=user_id,
419
+ tenant_id=tenant_id,
420
+ message_count=len(messages),
421
+ )
422
+ await session_repo.upsert(new_session)
423
+ logger.info(f"Created session record: {new_session.id}")
424
+
425
+ # Copy messages with new session_id
426
+ for msg in messages:
427
+ new_msg = Message(
428
+ id=uuid4(),
429
+ user_id=msg.user_id,
430
+ tenant_id=msg.tenant_id,
431
+ session_id=str(new_session.id),
432
+ content=msg.content,
433
+ message_type=msg.message_type,
434
+ metadata=msg.metadata,
435
+ )
436
+ await message_repo.upsert(new_msg)
437
+
438
+ click.echo(f"\n✅ Cloned session successfully!")
439
+ click.echo(f" Original: {session_id}")
440
+ click.echo(f" New: {new_session.id}")
441
+ click.echo(f" Messages: {len(messages)}")
442
+ if to_turn:
443
+ click.echo(f" Turns: {to_turn}")
444
+ click.echo(f"\nContinue this session with:")
445
+ click.echo(f" rem ask <agent> \"your message\" --session-id {new_session.id}")
446
+
447
+ finally:
448
+ await pg.disconnect()
449
+
450
+
334
451
  def register_command(cli_group):
335
452
  """Register the session command group."""
336
453
  cli_group.add_command(session)
rem/cli/main.py CHANGED
@@ -97,6 +97,7 @@ from .commands.mcp import register_command as register_mcp_command
97
97
  from .commands.scaffold import scaffold as scaffold_command
98
98
  from .commands.cluster import register_commands as register_cluster_commands
99
99
  from .commands.session import register_command as register_session_command
100
+ from .commands.query import register_command as register_query_command
100
101
 
101
102
  register_schema_commands(schema)
102
103
  register_db_commands(db)
@@ -107,6 +108,7 @@ register_ask_command(cli)
107
108
  register_configure_command(cli)
108
109
  register_serve_command(cli)
109
110
  register_mcp_command(cli)
111
+ register_query_command(cli)
110
112
  cli.add_command(experiments_group)
111
113
  cli.add_command(scaffold_command)
112
114
  register_session_command(cli)
@@ -461,7 +461,7 @@ class ExperimentConfig(BaseModel):
461
461
  """
462
462
  Get the evaluator filename with task prefix.
463
463
 
464
- Returns: {agent_name}-{task}.yaml (e.g., siggy-risk-assessment.yaml)
464
+ Returns: {agent_name}-{task}.yaml (e.g., rem-risk-assessment.yaml)
465
465
  """
466
466
  return f"{self.agent_schema_ref.name}-{self.task}.yaml"
467
467
 
@@ -21,6 +21,7 @@ class SessionMode(str, Enum):
21
21
 
22
22
  NORMAL = "normal"
23
23
  EVALUATION = "evaluation"
24
+ CLONE = "clone"
24
25
 
25
26
 
26
27
  class Session(CoreModel):
@@ -229,7 +229,7 @@ json_schema_extra:
229
229
  - uri: rem://agents
230
230
  description: "List all available agent schemas with descriptions"
231
231
  - uri: rem://agents/{agent_name}
232
- description: "Load a specific agent schema by name (e.g., 'rem', 'siggy')"
232
+ description: "Load a specific agent schema by name (e.g., 'rem', 'intake')"
233
233
  tools:
234
234
  - name: save_agent
235
235
  description: "Save the agent schema. Only call when user approves the preview in Step 6."
@@ -0,0 +1,42 @@
1
+ # =============================================================================
2
+ # TEST ORCHESTRATOR AGENT
3
+ # =============================================================================
4
+ # Parent agent that delegates to test_structured_output for testing
5
+ # the structured output persistence feature.
6
+ # =============================================================================
7
+
8
+ name: test_orchestrator
9
+ version: "1.0"
10
+ description: |
11
+ You are an orchestrator that helps analyze user messages.
12
+
13
+ When the user provides a message to analyze, you MUST:
14
+ 1. Call the ask_agent tool to delegate to "test_structured_output"
15
+ 2. Return the structured result to the user
16
+
17
+ ## CRITICAL RULES
18
+
19
+ - ALWAYS call ask_agent with agent_name="test_structured_output"
20
+ - Pass the user's message as input_text
21
+ - Report back the structured result you receive
22
+
23
+ type: object
24
+ properties:
25
+ answer:
26
+ type: string
27
+ description: Response to the user
28
+
29
+ required:
30
+ - answer
31
+
32
+ json_schema_extra:
33
+ kind: agent
34
+ name: test_orchestrator
35
+ version: "1.0.0"
36
+ tags: [test, orchestrator]
37
+ tools:
38
+ - name: ask_agent
39
+ description: |
40
+ Delegate to the test_structured_output agent to analyze the message.
41
+ Always use agent_name="test_structured_output".
42
+ resources: []
@@ -0,0 +1,52 @@
1
+ # =============================================================================
2
+ # TEST STRUCTURED OUTPUT AGENT
3
+ # =============================================================================
4
+ # Simple agent for testing structured_output: true functionality
5
+ # =============================================================================
6
+
7
+ name: test_structured_output
8
+ version: "1.0"
9
+ description: |
10
+ You are a test agent that produces structured output.
11
+
12
+ Your ONLY job is to return a structured response matching the schema below.
13
+
14
+ Based on the user's input, extract:
15
+ - summary: A brief summary of what they said
16
+ - sentiment: positive, negative, or neutral
17
+ - keywords: List of key words from their message
18
+
19
+ DO NOT ask questions. Just produce the structured output.
20
+
21
+ type: object
22
+ properties:
23
+ result:
24
+ type: object
25
+ description: Structured analysis result
26
+ properties:
27
+ summary:
28
+ type: string
29
+ description: Brief summary of the input
30
+ sentiment:
31
+ type: string
32
+ enum: [positive, negative, neutral]
33
+ description: Overall sentiment
34
+ keywords:
35
+ type: array
36
+ items:
37
+ type: string
38
+ description: Key words extracted from input
39
+ required: [summary, sentiment, keywords]
40
+ additionalProperties: false
41
+
42
+ required:
43
+ - result
44
+
45
+ json_schema_extra:
46
+ kind: agent
47
+ name: test_structured_output
48
+ version: "1.0.0"
49
+ tags: [test, structured-output]
50
+ structured_output: true
51
+ tools: []
52
+ resources: []
@@ -118,15 +118,40 @@ class DocProvider(ContentProvider):
118
118
  - Images (.png, .jpg) - OCR text extraction
119
119
 
120
120
  Handles:
121
- - Text extraction with OCR fallback
121
+ - Text extraction with automatic OCR fallback for scanned documents
122
122
  - Table detection and extraction
123
123
  - Daemon process workaround for multiprocessing restrictions
124
+
125
+ Environment Variables:
126
+ EXTRACTION_OCR_FALLBACK: Enable OCR fallback (default: true)
127
+ EXTRACTION_OCR_THRESHOLD: Min chars before triggering OCR fallback (default: 100)
128
+ EXTRACTION_FORCE_OCR: Always use OCR, skip native extraction (default: false)
129
+ EXTRACTION_OCR_LANGUAGE: Tesseract language codes (default: eng)
124
130
  """
125
131
 
126
132
  @property
127
133
  def name(self) -> str:
128
134
  return "doc"
129
135
 
136
+ def _get_env_bool(self, key: str, default: bool) -> bool:
137
+ """Get boolean from environment variable."""
138
+ import os
139
+ val = os.environ.get(key, "").lower()
140
+ if val in ("true", "1", "yes"):
141
+ return True
142
+ elif val in ("false", "0", "no"):
143
+ return False
144
+ return default
145
+
146
+ def _get_env_int(self, key: str, default: int) -> int:
147
+ """Get integer from environment variable."""
148
+ import os
149
+ val = os.environ.get(key, "")
150
+ try:
151
+ return int(val) if val else default
152
+ except ValueError:
153
+ return default
154
+
130
155
  def _is_daemon_process(self) -> bool:
131
156
  """Check if running in a daemon process."""
132
157
  try:
@@ -134,29 +159,34 @@ class DocProvider(ContentProvider):
134
159
  except Exception:
135
160
  return False
136
161
 
137
- def _parse_in_subprocess(self, file_path: Path) -> dict:
162
+ def _parse_in_subprocess(self, file_path: Path, force_ocr: bool = False) -> dict:
138
163
  """Run kreuzberg in a separate subprocess to bypass daemon restrictions."""
139
- script = """
164
+ import os
165
+ ocr_language = os.environ.get("EXTRACTION_OCR_LANGUAGE", "eng")
166
+
167
+ script = f"""
140
168
  import json
141
169
  import sys
142
170
  from pathlib import Path
143
- from kreuzberg import ExtractionConfig, extract_file_sync
171
+ from kreuzberg import ExtractionConfig, OcrConfig, extract_file_sync
144
172
 
145
- # Parse document with kreuzberg 3.x
146
- config = ExtractionConfig(
147
- extract_tables=True,
148
- chunk_content=False,
149
- extract_keywords=False,
150
- )
173
+ force_ocr = {force_ocr}
174
+
175
+ if force_ocr:
176
+ config = ExtractionConfig(
177
+ force_ocr=True,
178
+ ocr=OcrConfig(backend="tesseract", language="{ocr_language}")
179
+ )
180
+ else:
181
+ config = ExtractionConfig()
151
182
 
152
183
  result = extract_file_sync(Path(sys.argv[1]), config=config)
153
184
 
154
- # Serialize result to JSON
155
- output = {
185
+ output = {{
156
186
  'content': result.content,
157
- 'tables': [t.model_dump() for t in result.tables] if result.tables else [],
158
- 'metadata': result.metadata
159
- }
187
+ 'tables': [],
188
+ 'metadata': {{}}
189
+ }}
160
190
  print(json.dumps(output))
161
191
  """
162
192
 
@@ -173,9 +203,41 @@ print(json.dumps(output))
173
203
 
174
204
  return json.loads(result.stdout)
175
205
 
206
+ def _extract_with_config(self, tmp_path: Path, force_ocr: bool = False) -> tuple[str, dict]:
207
+ """Extract content with optional OCR config."""
208
+ import os
209
+ from kreuzberg import ExtractionConfig, OcrConfig, extract_file_sync
210
+
211
+ ocr_language = os.environ.get("EXTRACTION_OCR_LANGUAGE", "eng")
212
+
213
+ if force_ocr:
214
+ config = ExtractionConfig(
215
+ force_ocr=True,
216
+ ocr=OcrConfig(backend="tesseract", language=ocr_language)
217
+ )
218
+ parser_name = "kreuzberg_ocr"
219
+ else:
220
+ config = ExtractionConfig()
221
+ parser_name = "kreuzberg"
222
+
223
+ result = extract_file_sync(tmp_path, config=config)
224
+ text = result.content
225
+
226
+ extraction_metadata = {
227
+ "parser": parser_name,
228
+ "file_extension": tmp_path.suffix,
229
+ }
230
+
231
+ return text, extraction_metadata
232
+
176
233
  def extract(self, content: bytes, metadata: dict[str, Any]) -> dict[str, Any]:
177
234
  """
178
- Extract document content using Kreuzberg.
235
+ Extract document content using Kreuzberg with intelligent OCR fallback.
236
+
237
+ Process:
238
+ 1. Try native text extraction first (fast, preserves structure)
239
+ 2. If content is minimal (< threshold chars), retry with OCR
240
+ 3. Use OCR result if it's better than native result
179
241
 
180
242
  Args:
181
243
  content: Document file bytes
@@ -184,49 +246,89 @@ print(json.dumps(output))
184
246
  Returns:
185
247
  dict with text and extraction metadata
186
248
  """
249
+ # Get OCR settings from environment
250
+ force_ocr = self._get_env_bool("EXTRACTION_FORCE_OCR", False)
251
+ ocr_fallback = self._get_env_bool("EXTRACTION_OCR_FALLBACK", True)
252
+ ocr_threshold = self._get_env_int("EXTRACTION_OCR_THRESHOLD", 100)
253
+
187
254
  # Write bytes to temp file for kreuzberg
188
- # Detect extension from metadata
189
255
  content_type = metadata.get("content_type", "")
190
256
  suffix = get_extension(content_type, default=".pdf")
191
257
 
192
258
  with temp_file_from_bytes(content, suffix=suffix) as tmp_path:
259
+ ocr_used = False
260
+ ocr_fallback_triggered = False
261
+ native_char_count = 0
262
+
193
263
  # Check if running in daemon process
194
264
  if self._is_daemon_process():
195
- logger.info("Daemon process detected - using subprocess workaround for document parsing")
265
+ logger.info("Daemon process detected - using subprocess workaround")
196
266
  try:
197
- result_dict = self._parse_in_subprocess(tmp_path)
198
- text = result_dict["content"]
199
- extraction_metadata = {
200
- "table_count": len(result_dict["tables"]),
201
- "parser": "kreuzberg_subprocess",
202
- "file_extension": tmp_path.suffix,
203
- }
267
+ if force_ocr:
268
+ result_dict = self._parse_in_subprocess(tmp_path, force_ocr=True)
269
+ text = result_dict["content"]
270
+ ocr_used = True
271
+ extraction_metadata = {
272
+ "parser": "kreuzberg_subprocess_ocr",
273
+ "file_extension": tmp_path.suffix,
274
+ }
275
+ else:
276
+ # Try native first
277
+ result_dict = self._parse_in_subprocess(tmp_path, force_ocr=False)
278
+ text = result_dict["content"]
279
+ native_char_count = len(text)
280
+
281
+ # OCR fallback if content is minimal
282
+ if ocr_fallback and len(text.strip()) < ocr_threshold:
283
+ logger.warning(f"Content below threshold ({len(text.strip())} < {ocr_threshold}) - trying OCR fallback")
284
+ try:
285
+ ocr_result = self._parse_in_subprocess(tmp_path, force_ocr=True)
286
+ ocr_text = ocr_result["content"]
287
+ if len(ocr_text.strip()) > len(text.strip()):
288
+ logger.info(f"OCR fallback improved result: {len(ocr_text)} chars (was {native_char_count})")
289
+ text = ocr_text
290
+ ocr_used = True
291
+ ocr_fallback_triggered = True
292
+ except Exception as e:
293
+ logger.warning(f"OCR fallback failed in subprocess: {e}")
294
+
295
+ extraction_metadata = {
296
+ "parser": "kreuzberg_subprocess" if not ocr_used else "kreuzberg_subprocess_ocr_fallback",
297
+ "file_extension": tmp_path.suffix,
298
+ }
204
299
  except Exception as e:
205
- logger.error(f"Subprocess parsing failed: {e}. Falling back to text-only.")
206
- # Fallback to simple text extraction (kreuzberg 3.x API)
207
- from kreuzberg import ExtractionConfig, extract_file_sync
208
- config = ExtractionConfig(extract_tables=False)
209
- result = extract_file_sync(tmp_path, config=config)
210
- text = result.content
211
- extraction_metadata = {
212
- "parser": "kreuzberg_fallback",
213
- "file_extension": tmp_path.suffix,
214
- }
300
+ logger.error(f"Subprocess parsing failed: {e}. Falling back to direct call.")
301
+ text, extraction_metadata = self._extract_with_config(tmp_path, force_ocr=force_ocr)
302
+ ocr_used = force_ocr
215
303
  else:
216
- # Normal execution (not in daemon) - kreuzberg 4.x with native ONNX/Rust
217
- from kreuzberg import ExtractionConfig, extract_file_sync
218
- config = ExtractionConfig(
219
- enable_quality_processing=True, # Enables table extraction with native ONNX
220
- chunk_content=False, # We handle chunking ourselves
221
- extract_tables=False, # Disable table extraction to avoid PyTorch dependency
222
- )
223
- result = extract_file_sync(tmp_path, config=config)
224
- text = result.content
225
- extraction_metadata = {
226
- "table_count": len(result.tables) if result.tables else 0,
227
- "parser": "kreuzberg",
228
- "file_extension": tmp_path.suffix,
229
- }
304
+ # Normal execution (not in daemon)
305
+ if force_ocr:
306
+ text, extraction_metadata = self._extract_with_config(tmp_path, force_ocr=True)
307
+ ocr_used = True
308
+ else:
309
+ # Try native first
310
+ text, extraction_metadata = self._extract_with_config(tmp_path, force_ocr=False)
311
+ native_char_count = len(text)
312
+
313
+ # OCR fallback if content is minimal
314
+ if ocr_fallback and len(text.strip()) < ocr_threshold:
315
+ logger.warning(f"Content below threshold ({len(text.strip())} < {ocr_threshold}) - trying OCR fallback")
316
+ try:
317
+ ocr_text, _ = self._extract_with_config(tmp_path, force_ocr=True)
318
+ if len(ocr_text.strip()) > len(text.strip()):
319
+ logger.info(f"OCR fallback improved result: {len(ocr_text)} chars (was {native_char_count})")
320
+ text = ocr_text
321
+ ocr_used = True
322
+ ocr_fallback_triggered = True
323
+ extraction_metadata["parser"] = "kreuzberg_ocr_fallback"
324
+ except Exception as e:
325
+ logger.warning(f"OCR fallback failed: {e}")
326
+
327
+ # Add OCR metadata
328
+ extraction_metadata["ocr_used"] = ocr_used
329
+ extraction_metadata["ocr_fallback_triggered"] = ocr_fallback_triggered
330
+ extraction_metadata["native_char_count"] = native_char_count
331
+ extraction_metadata["final_char_count"] = len(text)
230
332
 
231
333
  return {
232
334
  "text": text,
@@ -25,6 +25,7 @@ from .sql_builder import (
25
25
  build_select,
26
26
  build_upsert,
27
27
  )
28
+ from ...settings import settings
28
29
 
29
30
  if TYPE_CHECKING:
30
31
  from .service import PostgresService
@@ -40,15 +40,16 @@ FuzzyQuery ::= FUZZY <text:string> [THRESHOLD <t:float>] [LIMIT <n:int>]
40
40
  available : Stage 1+
41
41
  example : FUZZY "sara" THRESHOLD 0.5 LIMIT 10
42
42
 
43
- SearchQuery ::= SEARCH <text:string> [TABLE <table:string>] [WHERE <clause:string>] [LIMIT <n:int>]
43
+ SearchQuery ::= SEARCH <text:string> [IN|TABLE <table:string>] [WHERE <clause:string>] [LIMIT <n:int>]
44
44
  text : Semantic query text
45
- table : Target table (default: "resources")
45
+ table : Target table (default: "resources"). Use IN or TABLE keyword.
46
46
  clause : Optional PostgreSQL WHERE clause for hybrid filtering (combines vector + structured)
47
47
  limit : Max results (default: 10)
48
48
  performance : Indexed (pgvector)
49
49
  available : Stage 3+
50
50
  examples :
51
- - SEARCH "database migration" TABLE resources LIMIT 10
51
+ - SEARCH "database migration" IN resources LIMIT 10
52
+ - SEARCH "parcel delivery" IN ontologies
52
53
  - SEARCH "team discussion" TABLE moments WHERE "moment_type='meeting'" LIMIT 5
53
54
  - SEARCH "project updates" WHERE "created_at >= '2024-01-01'" LIMIT 20
54
55
  - SEARCH "AI research" WHERE "tags @> ARRAY['machine-learning']" LIMIT 10
@@ -64,7 +64,7 @@ class RemQueryParser:
64
64
  token_upper = token.upper()
65
65
 
66
66
  # Handle REM keywords that take a value
67
- if token_upper in ("LIMIT", "DEPTH", "THRESHOLD", "TYPE", "FROM", "WITH"):
67
+ if token_upper in ("LIMIT", "DEPTH", "THRESHOLD", "TYPE", "FROM", "WITH", "TABLE", "IN", "WHERE"):
68
68
  if i + 1 < len(tokens):
69
69
  keyword_map = {
70
70
  "LIMIT": "limit",
@@ -73,6 +73,9 @@ class RemQueryParser:
73
73
  "TYPE": "edge_types",
74
74
  "FROM": "initial_query",
75
75
  "WITH": "initial_query",
76
+ "TABLE": "table_name",
77
+ "IN": "table_name", # IN is alias for TABLE
78
+ "WHERE": "where_clause",
76
79
  }
77
80
  key = keyword_map[token_upper]
78
81
  value = tokens[i + 1]
@@ -161,15 +164,9 @@ class RemQueryParser:
161
164
  params["query_text"] = combined_value
162
165
 
163
166
  elif query_type == QueryType.SEARCH:
164
- # SEARCH expects: SEARCH <table> <query_text> [LIMIT n]
165
- # First positional arg is table name, rest is query text
166
- if len(positional_args) >= 2:
167
- params["table_name"] = positional_args[0]
168
- params["query_text"] = " ".join(positional_args[1:])
169
- elif len(positional_args) == 1:
170
- # Could be table name or query text - assume query text if no table
171
- params["query_text"] = positional_args[0]
172
- # If no positional args, params stays empty
167
+ # SEARCH expects: SEARCH <text> [TABLE <table>] [WHERE <clause>] [LIMIT n]
168
+ # All positional args are query_text, TABLE/WHERE/LIMIT are handled as keywords
169
+ params["query_text"] = combined_value
173
170
 
174
171
  elif query_type == QueryType.TRAVERSE:
175
172
  params["initial_query"] = combined_value
@@ -478,6 +478,53 @@ class RemService:
478
478
  parser = RemQueryParser()
479
479
  return parser.parse(query_string)
480
480
 
481
+ async def execute_query_string(
482
+ self, query_string: str, user_id: str | None = None
483
+ ) -> dict[str, Any]:
484
+ """
485
+ Execute a REM dialect query string directly.
486
+
487
+ This is the unified entry point for executing REM queries from both
488
+ the CLI and API. It handles parsing the query string, creating the
489
+ RemQuery model, and executing it.
490
+
491
+ Args:
492
+ query_string: REM dialect query (e.g., 'LOOKUP "Sarah Chen"',
493
+ 'SEARCH resources "API design"', 'SELECT * FROM users')
494
+ user_id: Optional user ID for query isolation
495
+
496
+ Returns:
497
+ Dict with query results and metadata:
498
+ - query_type: The type of query executed
499
+ - results: List of result rows
500
+ - count: Number of results
501
+ - Additional fields depending on query type
502
+
503
+ Raises:
504
+ ValueError: If the query string is invalid
505
+ QueryExecutionError: If query execution fails
506
+
507
+ Example:
508
+ >>> result = await rem_service.execute_query_string(
509
+ ... 'LOOKUP "Sarah Chen"',
510
+ ... user_id="user-123"
511
+ ... )
512
+ >>> print(result["count"])
513
+ 1
514
+ """
515
+ # Parse the query string into type and parameters
516
+ query_type, parameters = self._parse_query_string(query_string)
517
+
518
+ # Create and validate the RemQuery model
519
+ rem_query = RemQuery.model_validate({
520
+ "query_type": query_type,
521
+ "parameters": parameters,
522
+ "user_id": user_id,
523
+ })
524
+
525
+ # Execute and return results
526
+ return await self.execute_query(rem_query)
527
+
481
528
  async def ask_rem(
482
529
  self, natural_query: str, tenant_id: str, llm_model: str | None = None, plan_mode: bool = False
483
530
  ) -> dict[str, Any]: