remdb 0.3.242__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of remdb might be problematic. Click here for more details.

Files changed (235) hide show
  1. rem/__init__.py +129 -0
  2. rem/agentic/README.md +760 -0
  3. rem/agentic/__init__.py +54 -0
  4. rem/agentic/agents/README.md +155 -0
  5. rem/agentic/agents/__init__.py +38 -0
  6. rem/agentic/agents/agent_manager.py +311 -0
  7. rem/agentic/agents/sse_simulator.py +502 -0
  8. rem/agentic/context.py +425 -0
  9. rem/agentic/context_builder.py +360 -0
  10. rem/agentic/llm_provider_models.py +301 -0
  11. rem/agentic/mcp/__init__.py +0 -0
  12. rem/agentic/mcp/tool_wrapper.py +273 -0
  13. rem/agentic/otel/__init__.py +5 -0
  14. rem/agentic/otel/setup.py +240 -0
  15. rem/agentic/providers/phoenix.py +926 -0
  16. rem/agentic/providers/pydantic_ai.py +854 -0
  17. rem/agentic/query.py +117 -0
  18. rem/agentic/query_helper.py +89 -0
  19. rem/agentic/schema.py +737 -0
  20. rem/agentic/serialization.py +245 -0
  21. rem/agentic/tools/__init__.py +5 -0
  22. rem/agentic/tools/rem_tools.py +242 -0
  23. rem/api/README.md +657 -0
  24. rem/api/deps.py +253 -0
  25. rem/api/main.py +460 -0
  26. rem/api/mcp_router/prompts.py +182 -0
  27. rem/api/mcp_router/resources.py +820 -0
  28. rem/api/mcp_router/server.py +243 -0
  29. rem/api/mcp_router/tools.py +1605 -0
  30. rem/api/middleware/tracking.py +172 -0
  31. rem/api/routers/admin.py +520 -0
  32. rem/api/routers/auth.py +898 -0
  33. rem/api/routers/chat/__init__.py +5 -0
  34. rem/api/routers/chat/child_streaming.py +394 -0
  35. rem/api/routers/chat/completions.py +702 -0
  36. rem/api/routers/chat/json_utils.py +76 -0
  37. rem/api/routers/chat/models.py +202 -0
  38. rem/api/routers/chat/otel_utils.py +33 -0
  39. rem/api/routers/chat/sse_events.py +546 -0
  40. rem/api/routers/chat/streaming.py +950 -0
  41. rem/api/routers/chat/streaming_utils.py +327 -0
  42. rem/api/routers/common.py +18 -0
  43. rem/api/routers/dev.py +87 -0
  44. rem/api/routers/feedback.py +276 -0
  45. rem/api/routers/messages.py +620 -0
  46. rem/api/routers/models.py +86 -0
  47. rem/api/routers/query.py +362 -0
  48. rem/api/routers/shared_sessions.py +422 -0
  49. rem/auth/README.md +258 -0
  50. rem/auth/__init__.py +36 -0
  51. rem/auth/jwt.py +367 -0
  52. rem/auth/middleware.py +318 -0
  53. rem/auth/providers/__init__.py +16 -0
  54. rem/auth/providers/base.py +376 -0
  55. rem/auth/providers/email.py +215 -0
  56. rem/auth/providers/google.py +163 -0
  57. rem/auth/providers/microsoft.py +237 -0
  58. rem/cli/README.md +517 -0
  59. rem/cli/__init__.py +8 -0
  60. rem/cli/commands/README.md +299 -0
  61. rem/cli/commands/__init__.py +3 -0
  62. rem/cli/commands/ask.py +549 -0
  63. rem/cli/commands/cluster.py +1808 -0
  64. rem/cli/commands/configure.py +495 -0
  65. rem/cli/commands/db.py +828 -0
  66. rem/cli/commands/dreaming.py +324 -0
  67. rem/cli/commands/experiments.py +1698 -0
  68. rem/cli/commands/mcp.py +66 -0
  69. rem/cli/commands/process.py +388 -0
  70. rem/cli/commands/query.py +109 -0
  71. rem/cli/commands/scaffold.py +47 -0
  72. rem/cli/commands/schema.py +230 -0
  73. rem/cli/commands/serve.py +106 -0
  74. rem/cli/commands/session.py +453 -0
  75. rem/cli/dreaming.py +363 -0
  76. rem/cli/main.py +123 -0
  77. rem/config.py +244 -0
  78. rem/mcp_server.py +41 -0
  79. rem/models/core/__init__.py +49 -0
  80. rem/models/core/core_model.py +70 -0
  81. rem/models/core/engram.py +333 -0
  82. rem/models/core/experiment.py +672 -0
  83. rem/models/core/inline_edge.py +132 -0
  84. rem/models/core/rem_query.py +246 -0
  85. rem/models/entities/__init__.py +68 -0
  86. rem/models/entities/domain_resource.py +38 -0
  87. rem/models/entities/feedback.py +123 -0
  88. rem/models/entities/file.py +57 -0
  89. rem/models/entities/image_resource.py +88 -0
  90. rem/models/entities/message.py +64 -0
  91. rem/models/entities/moment.py +123 -0
  92. rem/models/entities/ontology.py +181 -0
  93. rem/models/entities/ontology_config.py +131 -0
  94. rem/models/entities/resource.py +95 -0
  95. rem/models/entities/schema.py +87 -0
  96. rem/models/entities/session.py +84 -0
  97. rem/models/entities/shared_session.py +180 -0
  98. rem/models/entities/subscriber.py +175 -0
  99. rem/models/entities/user.py +93 -0
  100. rem/py.typed +0 -0
  101. rem/registry.py +373 -0
  102. rem/schemas/README.md +507 -0
  103. rem/schemas/__init__.py +6 -0
  104. rem/schemas/agents/README.md +92 -0
  105. rem/schemas/agents/core/agent-builder.yaml +235 -0
  106. rem/schemas/agents/core/moment-builder.yaml +178 -0
  107. rem/schemas/agents/core/rem-query-agent.yaml +226 -0
  108. rem/schemas/agents/core/resource-affinity-assessor.yaml +99 -0
  109. rem/schemas/agents/core/simple-assistant.yaml +19 -0
  110. rem/schemas/agents/core/user-profile-builder.yaml +163 -0
  111. rem/schemas/agents/examples/contract-analyzer.yaml +317 -0
  112. rem/schemas/agents/examples/contract-extractor.yaml +134 -0
  113. rem/schemas/agents/examples/cv-parser.yaml +263 -0
  114. rem/schemas/agents/examples/hello-world.yaml +37 -0
  115. rem/schemas/agents/examples/query.yaml +54 -0
  116. rem/schemas/agents/examples/simple.yaml +21 -0
  117. rem/schemas/agents/examples/test.yaml +29 -0
  118. rem/schemas/agents/rem.yaml +132 -0
  119. rem/schemas/evaluators/hello-world/default.yaml +77 -0
  120. rem/schemas/evaluators/rem/faithfulness.yaml +219 -0
  121. rem/schemas/evaluators/rem/lookup-correctness.yaml +182 -0
  122. rem/schemas/evaluators/rem/retrieval-precision.yaml +199 -0
  123. rem/schemas/evaluators/rem/retrieval-recall.yaml +211 -0
  124. rem/schemas/evaluators/rem/search-correctness.yaml +192 -0
  125. rem/services/__init__.py +18 -0
  126. rem/services/audio/INTEGRATION.md +308 -0
  127. rem/services/audio/README.md +376 -0
  128. rem/services/audio/__init__.py +15 -0
  129. rem/services/audio/chunker.py +354 -0
  130. rem/services/audio/transcriber.py +259 -0
  131. rem/services/content/README.md +1269 -0
  132. rem/services/content/__init__.py +5 -0
  133. rem/services/content/providers.py +760 -0
  134. rem/services/content/service.py +762 -0
  135. rem/services/dreaming/README.md +230 -0
  136. rem/services/dreaming/__init__.py +53 -0
  137. rem/services/dreaming/affinity_service.py +322 -0
  138. rem/services/dreaming/moment_service.py +251 -0
  139. rem/services/dreaming/ontology_service.py +54 -0
  140. rem/services/dreaming/user_model_service.py +297 -0
  141. rem/services/dreaming/utils.py +39 -0
  142. rem/services/email/__init__.py +10 -0
  143. rem/services/email/service.py +522 -0
  144. rem/services/email/templates.py +360 -0
  145. rem/services/embeddings/__init__.py +11 -0
  146. rem/services/embeddings/api.py +127 -0
  147. rem/services/embeddings/worker.py +435 -0
  148. rem/services/fs/README.md +662 -0
  149. rem/services/fs/__init__.py +62 -0
  150. rem/services/fs/examples.py +206 -0
  151. rem/services/fs/examples_paths.py +204 -0
  152. rem/services/fs/git_provider.py +935 -0
  153. rem/services/fs/local_provider.py +760 -0
  154. rem/services/fs/parsing-hooks-examples.md +172 -0
  155. rem/services/fs/paths.py +276 -0
  156. rem/services/fs/provider.py +460 -0
  157. rem/services/fs/s3_provider.py +1042 -0
  158. rem/services/fs/service.py +186 -0
  159. rem/services/git/README.md +1075 -0
  160. rem/services/git/__init__.py +17 -0
  161. rem/services/git/service.py +469 -0
  162. rem/services/phoenix/EXPERIMENT_DESIGN.md +1146 -0
  163. rem/services/phoenix/README.md +453 -0
  164. rem/services/phoenix/__init__.py +46 -0
  165. rem/services/phoenix/client.py +960 -0
  166. rem/services/phoenix/config.py +88 -0
  167. rem/services/phoenix/prompt_labels.py +477 -0
  168. rem/services/postgres/README.md +757 -0
  169. rem/services/postgres/__init__.py +49 -0
  170. rem/services/postgres/diff_service.py +599 -0
  171. rem/services/postgres/migration_service.py +427 -0
  172. rem/services/postgres/programmable_diff_service.py +635 -0
  173. rem/services/postgres/pydantic_to_sqlalchemy.py +562 -0
  174. rem/services/postgres/register_type.py +353 -0
  175. rem/services/postgres/repository.py +481 -0
  176. rem/services/postgres/schema_generator.py +661 -0
  177. rem/services/postgres/service.py +802 -0
  178. rem/services/postgres/sql_builder.py +355 -0
  179. rem/services/rate_limit.py +113 -0
  180. rem/services/rem/README.md +318 -0
  181. rem/services/rem/__init__.py +23 -0
  182. rem/services/rem/exceptions.py +71 -0
  183. rem/services/rem/executor.py +293 -0
  184. rem/services/rem/parser.py +180 -0
  185. rem/services/rem/queries.py +196 -0
  186. rem/services/rem/query.py +371 -0
  187. rem/services/rem/service.py +608 -0
  188. rem/services/session/README.md +374 -0
  189. rem/services/session/__init__.py +13 -0
  190. rem/services/session/compression.py +488 -0
  191. rem/services/session/pydantic_messages.py +310 -0
  192. rem/services/session/reload.py +85 -0
  193. rem/services/user_service.py +130 -0
  194. rem/settings.py +1877 -0
  195. rem/sql/background_indexes.sql +52 -0
  196. rem/sql/migrations/001_install.sql +983 -0
  197. rem/sql/migrations/002_install_models.sql +3157 -0
  198. rem/sql/migrations/003_optional_extensions.sql +326 -0
  199. rem/sql/migrations/004_cache_system.sql +282 -0
  200. rem/sql/migrations/005_schema_update.sql +145 -0
  201. rem/sql/migrations/migrate_session_id_to_uuid.sql +45 -0
  202. rem/utils/AGENTIC_CHUNKING.md +597 -0
  203. rem/utils/README.md +628 -0
  204. rem/utils/__init__.py +61 -0
  205. rem/utils/agentic_chunking.py +622 -0
  206. rem/utils/batch_ops.py +343 -0
  207. rem/utils/chunking.py +108 -0
  208. rem/utils/clip_embeddings.py +276 -0
  209. rem/utils/constants.py +97 -0
  210. rem/utils/date_utils.py +228 -0
  211. rem/utils/dict_utils.py +98 -0
  212. rem/utils/embeddings.py +436 -0
  213. rem/utils/examples/embeddings_example.py +305 -0
  214. rem/utils/examples/sql_types_example.py +202 -0
  215. rem/utils/files.py +323 -0
  216. rem/utils/markdown.py +16 -0
  217. rem/utils/mime_types.py +158 -0
  218. rem/utils/model_helpers.py +492 -0
  219. rem/utils/schema_loader.py +649 -0
  220. rem/utils/sql_paths.py +146 -0
  221. rem/utils/sql_types.py +350 -0
  222. rem/utils/user_id.py +81 -0
  223. rem/utils/vision.py +325 -0
  224. rem/workers/README.md +506 -0
  225. rem/workers/__init__.py +7 -0
  226. rem/workers/db_listener.py +579 -0
  227. rem/workers/db_maintainer.py +74 -0
  228. rem/workers/dreaming.py +502 -0
  229. rem/workers/engram_processor.py +312 -0
  230. rem/workers/sqs_file_processor.py +193 -0
  231. rem/workers/unlogged_maintainer.py +463 -0
  232. remdb-0.3.242.dist-info/METADATA +1632 -0
  233. remdb-0.3.242.dist-info/RECORD +235 -0
  234. remdb-0.3.242.dist-info/WHEEL +4 -0
  235. remdb-0.3.242.dist-info/entry_points.txt +2 -0
@@ -0,0 +1,608 @@
1
+ """
2
+ RemService - REM query execution service (wrapper around PostgresService).
3
+
4
+ Delegates to PostgreSQL functions for performance:
5
+ - LOOKUP → rem_lookup() function (O(1) KV_STORE)
6
+ - FUZZY → rem_fuzzy() function (pg_trgm similarity)
7
+ - SEARCH → rem_search() function (vector similarity with embeddings)
8
+ - SQL → Direct PostgresService.execute() (pushed down to Postgres)
9
+ - TRAVERSE → rem_traverse() function (recursive graph traversal)
10
+
11
+ Design:
12
+ - RemService wraps PostgresService, does NOT duplicate logic
13
+ - All queries pushed down to Postgres for performance
14
+ - Model schema inspection for validation only
15
+ - Exceptions for missing fields/embeddings
16
+
17
+ TODO: Staged Plan Execution
18
+ - Implement execute_staged_plan() method for multi-stage query execution
19
+ - Each stage can be:
20
+ 1. Static query (query field): Execute REM dialect directly
21
+ 2. Dynamic query (intent field): LLM interprets intent + previous results to build query
22
+ - Flow for dynamic stages:
23
+ 1. Gather results from depends_on stages (from previous_results or current execution)
24
+ 2. Pass intent + previous results to LLM (like ask_rem but with context)
25
+ 3. LLM generates REM query based on what it learned from previous stages
26
+ 4. Execute generated query
27
+ 5. Store results in stage_results for client to use in continuation
28
+ - Multi-turn continuation:
29
+ - Client passes previous_results back from response's stage_results
30
+ - Client sets resume_from_stage to skip already-executed stages
31
+ - Server uses previous_results as context for depends_on lookups
32
+ - Use cases:
33
+ - LOOKUP "Sarah" → intent: "find her team members" (LLM sees Sarah's graph_edges, builds TRAVERSE)
34
+ - SEARCH "API docs" → intent: "get authors" (LLM extracts author refs, builds LOOKUP)
35
+ - Complex graph exploration with LLM-driven navigation
36
+ - API: POST /api/v1/query with:
37
+ - mode="staged-plan"
38
+ - plan=[{stage, query|intent, name, depends_on}]
39
+ - previous_results=[{stage, name, query_executed, results, count}] (for continuation)
40
+ - resume_from_stage=N (to skip completed stages)
41
+ """
42
+
43
+ from typing import Any
44
+
45
+ from loguru import logger
46
+
47
+ from .parser import RemQueryParser
48
+ from ...models.core import (
49
+ FuzzyParameters,
50
+ LookupParameters,
51
+ QueryType,
52
+ RemQuery,
53
+ SearchParameters,
54
+ SQLParameters,
55
+ TraverseParameters,
56
+ )
57
+ from .exceptions import (
58
+ ContentFieldNotFoundError,
59
+ EmbeddingFieldNotFoundError,
60
+ FieldNotFoundError,
61
+ InvalidParametersError,
62
+ QueryExecutionError,
63
+ )
64
+
65
+
66
+ class RemService:
67
+ """
68
+ REM query execution service.
69
+
70
+ Wraps PostgresService and delegates all queries to PostgreSQL functions.
71
+ """
72
+
73
+ def __init__(self, postgres_service: Any, model_registry: dict[str, Any] | None = None):
74
+ """
75
+ Initialize REM service.
76
+
77
+ Args:
78
+ postgres_service: PostgresService instance
79
+ model_registry: Optional dict mapping table names to Pydantic models
80
+ """
81
+ self.db = postgres_service
82
+ self.model_registry = model_registry or {}
83
+
84
+ def register_model(self, table_name: str, model: Any):
85
+ """
86
+ Register a Pydantic model for schema validation.
87
+
88
+ Args:
89
+ table_name: Table name (e.g., "resources")
90
+ model: Pydantic model class
91
+ """
92
+ self.model_registry[table_name] = model
93
+ logger.debug(f"Registered model {model.__name__} for table {table_name}")
94
+
95
+ def _get_model_fields(self, table_name: str) -> list[str]:
96
+ """Get list of field names from registered model."""
97
+ if table_name not in self.model_registry:
98
+ return []
99
+ model = self.model_registry[table_name]
100
+ return list(model.model_fields.keys())
101
+
102
+ def _get_embeddable_fields(self, table_name: str) -> list[str]:
103
+ """
104
+ Get list of fields that have embeddings.
105
+
106
+ Uses register_type conventions:
107
+ - Fields with json_schema_extra={"embed": True}
108
+ - Default embeddable fields: content, description, summary, text, body, message, notes
109
+ """
110
+ if table_name not in self.model_registry:
111
+ return []
112
+
113
+ model = self.model_registry[table_name]
114
+ embeddable = []
115
+
116
+ DEFAULT_EMBED_FIELDS = {
117
+ "content",
118
+ "description",
119
+ "summary",
120
+ "text",
121
+ "body",
122
+ "message",
123
+ "notes",
124
+ }
125
+
126
+ for field_name, field_info in model.model_fields.items():
127
+ # Check json_schema_extra for explicit embed configuration
128
+ json_extra = getattr(field_info, "json_schema_extra", None)
129
+ if json_extra and isinstance(json_extra, dict):
130
+ embed = json_extra.get("embed")
131
+ if embed is True:
132
+ embeddable.append(field_name)
133
+ continue
134
+ elif embed is False:
135
+ continue
136
+
137
+ # Default: embed if field name matches common content fields
138
+ if field_name.lower() in DEFAULT_EMBED_FIELDS:
139
+ embeddable.append(field_name)
140
+
141
+ return embeddable
142
+
143
+ async def execute_query(self, query: RemQuery) -> dict[str, Any]:
144
+ """
145
+ Execute REM query with delegation to PostgreSQL functions.
146
+
147
+ Args:
148
+ query: RemQuery with type and parameters
149
+
150
+ Returns:
151
+ Query results with metadata
152
+
153
+ Raises:
154
+ QueryExecutionError: If query execution fails
155
+ FieldNotFoundError: If field does not exist
156
+ EmbeddingFieldNotFoundError: If field has no embeddings
157
+ """
158
+ try:
159
+ # RemQuery uses user_id for isolation (mapped to tenant_id in execution)
160
+ tenant_id = query.user_id
161
+
162
+ if query.query_type == QueryType.LOOKUP:
163
+ if isinstance(query.parameters, LookupParameters):
164
+ return await self._execute_lookup(query.parameters, tenant_id)
165
+ raise InvalidParametersError("LOOKUP", "Invalid parameters type")
166
+ elif query.query_type == QueryType.FUZZY:
167
+ if isinstance(query.parameters, FuzzyParameters):
168
+ return await self._execute_fuzzy(query.parameters, tenant_id)
169
+ raise InvalidParametersError("FUZZY", "Invalid parameters type")
170
+ elif query.query_type == QueryType.SEARCH:
171
+ if isinstance(query.parameters, SearchParameters):
172
+ return await self._execute_search(query.parameters, tenant_id)
173
+ raise InvalidParametersError("SEARCH", "Invalid parameters type")
174
+ elif query.query_type == QueryType.SQL:
175
+ if isinstance(query.parameters, SQLParameters):
176
+ return await self._execute_sql(query.parameters, tenant_id)
177
+ raise InvalidParametersError("SQL", "Invalid parameters type")
178
+ elif query.query_type == QueryType.TRAVERSE:
179
+ if isinstance(query.parameters, TraverseParameters):
180
+ return await self._execute_traverse(query.parameters, tenant_id)
181
+ raise InvalidParametersError("TRAVERSE", "Invalid parameters type")
182
+ else:
183
+ raise InvalidParametersError("UNKNOWN", f"Unknown query type: {query.query_type}")
184
+ except (FieldNotFoundError, EmbeddingFieldNotFoundError, InvalidParametersError):
185
+ # Re-raise our custom exceptions
186
+ raise
187
+ except Exception as e:
188
+ logger.exception(f"REM query execution failed: {e}")
189
+ raise QueryExecutionError(query.query_type.value, str(e), e)
190
+
191
+ async def _execute_lookup(
192
+ self, params: LookupParameters, tenant_id: str
193
+ ) -> dict[str, Any]:
194
+ """
195
+ Execute LOOKUP query via rem_lookup() PostgreSQL function.
196
+
197
+ Supports both single key and list of keys. When given a list, executes
198
+ multiple LOOKUP queries and aggregates results.
199
+
200
+ Delegates to: rem_lookup(entity_key, tenant_id, user_id)
201
+
202
+ Args:
203
+ params: LookupParameters with entity key (str or list[str])
204
+ tenant_id: Tenant identifier
205
+
206
+ Returns:
207
+ Dict with entity metadata from KV_STORE
208
+ """
209
+ from .queries import LOOKUP_QUERY, get_lookup_params
210
+
211
+ # Handle both single key and list of keys
212
+ keys = params.key if isinstance(params.key, list) else [params.key]
213
+
214
+ all_results = []
215
+ for key in keys:
216
+ # Use tenant_id (from query.user_id) as the user_id param for lookup if params.user_id not set
217
+ user_id = params.user_id or tenant_id
218
+ query_params = get_lookup_params(key, tenant_id, user_id)
219
+ results = await self.db.execute(LOOKUP_QUERY, query_params)
220
+ all_results.extend(results)
221
+
222
+ return {
223
+ "query_type": "LOOKUP",
224
+ "keys": keys, # Return list for consistency
225
+ "results": all_results,
226
+ "count": len(all_results),
227
+ }
228
+
229
+ async def _execute_fuzzy(
230
+ self, params: FuzzyParameters, tenant_id: str
231
+ ) -> dict[str, Any]:
232
+ """
233
+ Execute FUZZY query via rem_fuzzy() PostgreSQL function.
234
+
235
+ Delegates to: rem_fuzzy(query, tenant_id, threshold, limit, user_id)
236
+
237
+ Args:
238
+ params: FuzzyParameters with query text and threshold
239
+ tenant_id: Tenant identifier
240
+
241
+ Returns:
242
+ Dict with fuzzy-matched entities ordered by similarity
243
+ """
244
+ from .queries import FUZZY_QUERY, get_fuzzy_params
245
+
246
+ query_params = get_fuzzy_params(
247
+ params.query_text,
248
+ tenant_id,
249
+ params.threshold,
250
+ params.limit,
251
+ tenant_id, # Use tenant_id (query.user_id) as user_id
252
+ )
253
+ results = await self.db.execute(FUZZY_QUERY, query_params)
254
+
255
+ return {
256
+ "query_type": "FUZZY",
257
+ "query_text": params.query_text,
258
+ "threshold": params.threshold,
259
+ "results": results,
260
+ "count": len(results),
261
+ }
262
+
263
+ async def _execute_search(
264
+ self, params: SearchParameters, tenant_id: str
265
+ ) -> dict[str, Any]:
266
+ """
267
+ Execute SEARCH query via rem_search() PostgreSQL function.
268
+
269
+ Validates:
270
+ - Table exists in model registry
271
+ - Field exists in model (or defaults to 'content')
272
+ - Field has embeddings configured
273
+
274
+ Delegates to: rem_search(query_embedding, table_name, field_name, ...)
275
+
276
+ Args:
277
+ params: SearchParameters with query text and table
278
+ tenant_id: Tenant identifier
279
+
280
+ Returns:
281
+ Dict with semantically similar entities
282
+
283
+ Raises:
284
+ FieldNotFoundError: If field does not exist
285
+ EmbeddingFieldNotFoundError: If field has no embeddings
286
+ ContentFieldNotFoundError: If no 'content' field and field_name not specified
287
+ """
288
+ table_name = params.table_name
289
+ # SearchParameters doesn't have field_name, imply from table or default
290
+ field_name = "content" # Default
291
+
292
+ # Get model fields for validation
293
+ available_fields = self._get_model_fields(table_name)
294
+ embeddable_fields = self._get_embeddable_fields(table_name)
295
+
296
+ # Default to 'content' if field_name not specified
297
+ if field_name is None:
298
+ if "content" in available_fields:
299
+ field_name = "content"
300
+ else:
301
+ raise ContentFieldNotFoundError(
302
+ table_name or "UNKNOWN",
303
+ available_fields,
304
+ )
305
+
306
+ # Validate field exists
307
+ if available_fields and field_name not in available_fields:
308
+ raise FieldNotFoundError(
309
+ table_name or "UNKNOWN",
310
+ field_name,
311
+ available_fields,
312
+ )
313
+
314
+ # Validate field has embeddings
315
+ if embeddable_fields and field_name not in embeddable_fields:
316
+ raise EmbeddingFieldNotFoundError(
317
+ table_name or "UNKNOWN",
318
+ field_name,
319
+ embeddable_fields,
320
+ )
321
+
322
+ # Generate embedding for query text
323
+ from ...settings import settings
324
+ from ..embeddings.api import generate_embedding_async
325
+ from .queries import SEARCH_QUERY, get_search_params
326
+
327
+ # SearchParameters doesn't have provider, use default
328
+ provider = settings.llm.embedding_provider
329
+
330
+ query_embedding = await generate_embedding_async(
331
+ text=params.query_text,
332
+ model=settings.llm.embedding_model,
333
+ provider=provider,
334
+ )
335
+
336
+ # Execute vector search via rem_search() PostgreSQL function
337
+ min_sim = params.min_similarity if params.min_similarity is not None else 0.3
338
+ limit = params.limit or 10
339
+ query_params = get_search_params(
340
+ query_embedding,
341
+ table_name,
342
+ field_name,
343
+ tenant_id,
344
+ provider,
345
+ min_sim,
346
+ limit,
347
+ tenant_id, # Use tenant_id (query.user_id) as user_id
348
+ )
349
+ logger.debug(
350
+ f"SEARCH params: table={table_name}, field={field_name}, "
351
+ f"tenant_id={tenant_id}, provider={provider}, "
352
+ f"min_similarity={min_sim}, limit={limit}, "
353
+ f"embedding_dims={len(query_embedding)}"
354
+ )
355
+ results = await self.db.execute(SEARCH_QUERY, query_params)
356
+ logger.debug(f"SEARCH results: {len(results)} rows")
357
+
358
+ return {
359
+ "query_type": "SEARCH",
360
+ "query_text": params.query_text,
361
+ "table_name": table_name,
362
+ "field_name": field_name,
363
+ "results": results,
364
+ "count": len(results),
365
+ }
366
+
367
+ async def _execute_sql(
368
+ self, params: SQLParameters, tenant_id: str
369
+ ) -> dict[str, Any]:
370
+ """
371
+ Execute SQL query via direct PostgresService.execute().
372
+
373
+ Pushes SELECT queries down to Postgres for performance.
374
+
375
+ Supports two modes:
376
+ 1. Raw SQL: params.raw_query contains full SQL statement
377
+ 2. Structured: params.table_name + where_clause (with tenant isolation)
378
+
379
+ Args:
380
+ params: SQLParameters with raw_query OR table_name + where_clause
381
+ tenant_id: Tenant identifier
382
+
383
+ Returns:
384
+ Query results
385
+ """
386
+ # Mode 1: Raw SQL query (no tenant isolation added automatically)
387
+ if params.raw_query:
388
+ # Security: Block destructive operations
389
+ # Allow: SELECT, INSERT, UPDATE, WITH (read + data modifications)
390
+ # Block: DROP, DELETE, TRUNCATE, ALTER (destructive operations)
391
+ query_upper = params.raw_query.strip().upper()
392
+ forbidden_keywords = ["DROP", "DELETE", "TRUNCATE", "ALTER"]
393
+
394
+ for keyword in forbidden_keywords:
395
+ if query_upper.startswith(keyword):
396
+ raise ValueError(
397
+ f"Destructive SQL operation '{keyword}' is not allowed. "
398
+ f"Forbidden operations: {', '.join(forbidden_keywords)}"
399
+ )
400
+
401
+ results = await self.db.execute(params.raw_query)
402
+ return {
403
+ "query_type": "SQL",
404
+ "raw_query": params.raw_query,
405
+ "results": results,
406
+ "count": len(results),
407
+ }
408
+
409
+ # Mode 2: Structured query with tenant isolation
410
+ from .queries import build_sql_query
411
+
412
+ if not params.table_name:
413
+ raise ValueError("SQL query requires either raw_query or table_name")
414
+
415
+ # Build SQL query with tenant isolation
416
+ query = build_sql_query(
417
+ table_name=params.table_name,
418
+ where_clause=params.where_clause or "1=1",
419
+ tenant_id=tenant_id,
420
+ limit=params.limit,
421
+ )
422
+
423
+ results = await self.db.execute(query, (tenant_id,))
424
+
425
+ return {
426
+ "query_type": "SQL",
427
+ "table_name": params.table_name,
428
+ "results": results,
429
+ "count": len(results),
430
+ }
431
+
432
+ async def _execute_traverse(
433
+ self, params: TraverseParameters, tenant_id: str
434
+ ) -> dict[str, Any]:
435
+ """
436
+ Execute TRAVERSE query via rem_traverse() PostgreSQL function.
437
+
438
+ Delegates to: rem_traverse(entity_key, tenant_id, max_depth, rel_types, user_id)
439
+
440
+ Args:
441
+ params: TraverseParameters with start key and depth
442
+ tenant_id: Tenant identifier
443
+
444
+ Returns:
445
+ Dict with traversed entities and paths
446
+ """
447
+ from .queries import TRAVERSE_QUERY, get_traverse_params
448
+
449
+ # Handle edge_types - PostgreSQL function takes single type, not array
450
+ # Use first type from list or None for all types
451
+ rel_type: str | None = None
452
+ if params.edge_types and "*" not in params.edge_types:
453
+ rel_type = params.edge_types[0] if params.edge_types else None
454
+
455
+ query_params = get_traverse_params(
456
+ start_key=params.initial_query,
457
+ tenant_id=tenant_id,
458
+ user_id=tenant_id, # Use tenant_id (query.user_id) as user_id
459
+ max_depth=params.max_depth or 1,
460
+ rel_type=rel_type,
461
+ keys_only=False,
462
+ )
463
+ results = await self.db.execute(TRAVERSE_QUERY, query_params)
464
+
465
+ return {
466
+ "query_type": "TRAVERSE",
467
+ "start_key": params.initial_query,
468
+ "max_depth": params.max_depth,
469
+ "edge_types": params.edge_types,
470
+ "results": results,
471
+ "count": len(results),
472
+ }
473
+
474
+ def _parse_query_string(self, query_string: str) -> tuple[QueryType, dict[str, Any]]:
475
+ """
476
+ Parse REM query string using the robust RemQueryParser.
477
+ """
478
+ parser = RemQueryParser()
479
+ return parser.parse(query_string)
480
+
481
+ async def execute_query_string(
482
+ self, query_string: str, user_id: str | None = None
483
+ ) -> dict[str, Any]:
484
+ """
485
+ Execute a REM dialect query string directly.
486
+
487
+ This is the unified entry point for executing REM queries from both
488
+ the CLI and API. It handles parsing the query string, creating the
489
+ RemQuery model, and executing it.
490
+
491
+ Args:
492
+ query_string: REM dialect query (e.g., 'LOOKUP "Sarah Chen"',
493
+ 'SEARCH resources "API design"', 'SELECT * FROM users')
494
+ user_id: Optional user ID for query isolation
495
+
496
+ Returns:
497
+ Dict with query results and metadata:
498
+ - query_type: The type of query executed
499
+ - results: List of result rows
500
+ - count: Number of results
501
+ - Additional fields depending on query type
502
+
503
+ Raises:
504
+ ValueError: If the query string is invalid
505
+ QueryExecutionError: If query execution fails
506
+
507
+ Example:
508
+ >>> result = await rem_service.execute_query_string(
509
+ ... 'LOOKUP "Sarah Chen"',
510
+ ... user_id="user-123"
511
+ ... )
512
+ >>> print(result["count"])
513
+ 1
514
+ """
515
+ # Parse the query string into type and parameters
516
+ query_type, parameters = self._parse_query_string(query_string)
517
+
518
+ # Create and validate the RemQuery model
519
+ rem_query = RemQuery.model_validate({
520
+ "query_type": query_type,
521
+ "parameters": parameters,
522
+ "user_id": user_id,
523
+ })
524
+
525
+ # Execute and return results
526
+ return await self.execute_query(rem_query)
527
+
528
+ async def ask_rem(
529
+ self, natural_query: str, tenant_id: str, llm_model: str | None = None, plan_mode: bool = False
530
+ ) -> dict[str, Any]:
531
+ """
532
+ Natural language to REM query conversion with optional execution.
533
+
534
+ Uses REM Query Agent (Cerebras Qwen) to convert user questions into REM query strings.
535
+ Auto-executes if confidence >= 0.7, otherwise returns query for review.
536
+
537
+ Args:
538
+ natural_query: Natural language question
539
+ tenant_id: Tenant identifier
540
+ llm_model: Optional LLM model override
541
+ plan_mode: If True, only shows generated query without executing
542
+
543
+ Returns:
544
+ Dict with:
545
+ - query: Generated REM query string (e.g., "LOOKUP sarah-chen")
546
+ - confidence: Confidence score (0.0-1.0)
547
+ - reasoning: Explanation (only if confidence < 0.7)
548
+ - results: Executed query results (if confidence >= 0.7 and not plan_mode)
549
+ - warning: Low confidence warning (if confidence < 0.7)
550
+
551
+ Example:
552
+ >>> result = await rem_service.ask_rem("Who is Sarah Chen?", tenant_id="acme")
553
+ >>> print(result["query"])
554
+ "LOOKUP sarah-chen"
555
+ >>> print(result["results"]["count"])
556
+ 1
557
+
558
+ >>> # Plan mode - show query without executing
559
+ >>> result = await rem_service.ask_rem("Find Sarah", tenant_id="acme", plan_mode=True)
560
+ >>> print(result["query"])
561
+ "LOOKUP sarah"
562
+ >>> print("results" in result)
563
+ False
564
+ """
565
+ from ...agentic.agents import ask_rem as agent_ask_rem
566
+ from ...models.core import RemQuery
567
+
568
+ # Get query string from REM Query Agent
569
+ query_output = await agent_ask_rem(
570
+ natural_query=natural_query,
571
+ llm_model=llm_model,
572
+ )
573
+
574
+ result = {
575
+ "query": query_output.query,
576
+ "confidence": query_output.confidence,
577
+ "reasoning": query_output.reasoning or "",
578
+ "natural_query": natural_query,
579
+ }
580
+
581
+ # Execute query if confidence is high enough and not in plan mode
582
+ if query_output.confidence >= 0.7 and not plan_mode:
583
+ try:
584
+ # Parse query string
585
+ query_type, parameters = self._parse_query_string(query_output.query)
586
+
587
+ # Create RemQuery and execute
588
+ # RemQuery takes user_id, which we treat as tenant_id
589
+ # Pydantic will validate and convert the dict to the correct parameter type
590
+ rem_query = RemQuery.model_validate({
591
+ "query_type": query_type,
592
+ "parameters": parameters,
593
+ "user_id": tenant_id,
594
+ })
595
+
596
+ result["results"] = await self.execute_query(rem_query)
597
+
598
+ except Exception as e:
599
+ result["warning"] = f"Failed to parse or execute query: {str(e)}"
600
+ logger.error(f"Query execution failed: {e}", exc_info=True)
601
+
602
+ elif plan_mode:
603
+ result["plan_mode"] = True
604
+ else:
605
+ # Low confidence - don't auto-execute
606
+ result["warning"] = "Low confidence score. Review reasoning before executing."
607
+
608
+ return result