remdb 0.3.242__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of remdb might be problematic. Click here for more details.

Files changed (235) hide show
  1. rem/__init__.py +129 -0
  2. rem/agentic/README.md +760 -0
  3. rem/agentic/__init__.py +54 -0
  4. rem/agentic/agents/README.md +155 -0
  5. rem/agentic/agents/__init__.py +38 -0
  6. rem/agentic/agents/agent_manager.py +311 -0
  7. rem/agentic/agents/sse_simulator.py +502 -0
  8. rem/agentic/context.py +425 -0
  9. rem/agentic/context_builder.py +360 -0
  10. rem/agentic/llm_provider_models.py +301 -0
  11. rem/agentic/mcp/__init__.py +0 -0
  12. rem/agentic/mcp/tool_wrapper.py +273 -0
  13. rem/agentic/otel/__init__.py +5 -0
  14. rem/agentic/otel/setup.py +240 -0
  15. rem/agentic/providers/phoenix.py +926 -0
  16. rem/agentic/providers/pydantic_ai.py +854 -0
  17. rem/agentic/query.py +117 -0
  18. rem/agentic/query_helper.py +89 -0
  19. rem/agentic/schema.py +737 -0
  20. rem/agentic/serialization.py +245 -0
  21. rem/agentic/tools/__init__.py +5 -0
  22. rem/agentic/tools/rem_tools.py +242 -0
  23. rem/api/README.md +657 -0
  24. rem/api/deps.py +253 -0
  25. rem/api/main.py +460 -0
  26. rem/api/mcp_router/prompts.py +182 -0
  27. rem/api/mcp_router/resources.py +820 -0
  28. rem/api/mcp_router/server.py +243 -0
  29. rem/api/mcp_router/tools.py +1605 -0
  30. rem/api/middleware/tracking.py +172 -0
  31. rem/api/routers/admin.py +520 -0
  32. rem/api/routers/auth.py +898 -0
  33. rem/api/routers/chat/__init__.py +5 -0
  34. rem/api/routers/chat/child_streaming.py +394 -0
  35. rem/api/routers/chat/completions.py +702 -0
  36. rem/api/routers/chat/json_utils.py +76 -0
  37. rem/api/routers/chat/models.py +202 -0
  38. rem/api/routers/chat/otel_utils.py +33 -0
  39. rem/api/routers/chat/sse_events.py +546 -0
  40. rem/api/routers/chat/streaming.py +950 -0
  41. rem/api/routers/chat/streaming_utils.py +327 -0
  42. rem/api/routers/common.py +18 -0
  43. rem/api/routers/dev.py +87 -0
  44. rem/api/routers/feedback.py +276 -0
  45. rem/api/routers/messages.py +620 -0
  46. rem/api/routers/models.py +86 -0
  47. rem/api/routers/query.py +362 -0
  48. rem/api/routers/shared_sessions.py +422 -0
  49. rem/auth/README.md +258 -0
  50. rem/auth/__init__.py +36 -0
  51. rem/auth/jwt.py +367 -0
  52. rem/auth/middleware.py +318 -0
  53. rem/auth/providers/__init__.py +16 -0
  54. rem/auth/providers/base.py +376 -0
  55. rem/auth/providers/email.py +215 -0
  56. rem/auth/providers/google.py +163 -0
  57. rem/auth/providers/microsoft.py +237 -0
  58. rem/cli/README.md +517 -0
  59. rem/cli/__init__.py +8 -0
  60. rem/cli/commands/README.md +299 -0
  61. rem/cli/commands/__init__.py +3 -0
  62. rem/cli/commands/ask.py +549 -0
  63. rem/cli/commands/cluster.py +1808 -0
  64. rem/cli/commands/configure.py +495 -0
  65. rem/cli/commands/db.py +828 -0
  66. rem/cli/commands/dreaming.py +324 -0
  67. rem/cli/commands/experiments.py +1698 -0
  68. rem/cli/commands/mcp.py +66 -0
  69. rem/cli/commands/process.py +388 -0
  70. rem/cli/commands/query.py +109 -0
  71. rem/cli/commands/scaffold.py +47 -0
  72. rem/cli/commands/schema.py +230 -0
  73. rem/cli/commands/serve.py +106 -0
  74. rem/cli/commands/session.py +453 -0
  75. rem/cli/dreaming.py +363 -0
  76. rem/cli/main.py +123 -0
  77. rem/config.py +244 -0
  78. rem/mcp_server.py +41 -0
  79. rem/models/core/__init__.py +49 -0
  80. rem/models/core/core_model.py +70 -0
  81. rem/models/core/engram.py +333 -0
  82. rem/models/core/experiment.py +672 -0
  83. rem/models/core/inline_edge.py +132 -0
  84. rem/models/core/rem_query.py +246 -0
  85. rem/models/entities/__init__.py +68 -0
  86. rem/models/entities/domain_resource.py +38 -0
  87. rem/models/entities/feedback.py +123 -0
  88. rem/models/entities/file.py +57 -0
  89. rem/models/entities/image_resource.py +88 -0
  90. rem/models/entities/message.py +64 -0
  91. rem/models/entities/moment.py +123 -0
  92. rem/models/entities/ontology.py +181 -0
  93. rem/models/entities/ontology_config.py +131 -0
  94. rem/models/entities/resource.py +95 -0
  95. rem/models/entities/schema.py +87 -0
  96. rem/models/entities/session.py +84 -0
  97. rem/models/entities/shared_session.py +180 -0
  98. rem/models/entities/subscriber.py +175 -0
  99. rem/models/entities/user.py +93 -0
  100. rem/py.typed +0 -0
  101. rem/registry.py +373 -0
  102. rem/schemas/README.md +507 -0
  103. rem/schemas/__init__.py +6 -0
  104. rem/schemas/agents/README.md +92 -0
  105. rem/schemas/agents/core/agent-builder.yaml +235 -0
  106. rem/schemas/agents/core/moment-builder.yaml +178 -0
  107. rem/schemas/agents/core/rem-query-agent.yaml +226 -0
  108. rem/schemas/agents/core/resource-affinity-assessor.yaml +99 -0
  109. rem/schemas/agents/core/simple-assistant.yaml +19 -0
  110. rem/schemas/agents/core/user-profile-builder.yaml +163 -0
  111. rem/schemas/agents/examples/contract-analyzer.yaml +317 -0
  112. rem/schemas/agents/examples/contract-extractor.yaml +134 -0
  113. rem/schemas/agents/examples/cv-parser.yaml +263 -0
  114. rem/schemas/agents/examples/hello-world.yaml +37 -0
  115. rem/schemas/agents/examples/query.yaml +54 -0
  116. rem/schemas/agents/examples/simple.yaml +21 -0
  117. rem/schemas/agents/examples/test.yaml +29 -0
  118. rem/schemas/agents/rem.yaml +132 -0
  119. rem/schemas/evaluators/hello-world/default.yaml +77 -0
  120. rem/schemas/evaluators/rem/faithfulness.yaml +219 -0
  121. rem/schemas/evaluators/rem/lookup-correctness.yaml +182 -0
  122. rem/schemas/evaluators/rem/retrieval-precision.yaml +199 -0
  123. rem/schemas/evaluators/rem/retrieval-recall.yaml +211 -0
  124. rem/schemas/evaluators/rem/search-correctness.yaml +192 -0
  125. rem/services/__init__.py +18 -0
  126. rem/services/audio/INTEGRATION.md +308 -0
  127. rem/services/audio/README.md +376 -0
  128. rem/services/audio/__init__.py +15 -0
  129. rem/services/audio/chunker.py +354 -0
  130. rem/services/audio/transcriber.py +259 -0
  131. rem/services/content/README.md +1269 -0
  132. rem/services/content/__init__.py +5 -0
  133. rem/services/content/providers.py +760 -0
  134. rem/services/content/service.py +762 -0
  135. rem/services/dreaming/README.md +230 -0
  136. rem/services/dreaming/__init__.py +53 -0
  137. rem/services/dreaming/affinity_service.py +322 -0
  138. rem/services/dreaming/moment_service.py +251 -0
  139. rem/services/dreaming/ontology_service.py +54 -0
  140. rem/services/dreaming/user_model_service.py +297 -0
  141. rem/services/dreaming/utils.py +39 -0
  142. rem/services/email/__init__.py +10 -0
  143. rem/services/email/service.py +522 -0
  144. rem/services/email/templates.py +360 -0
  145. rem/services/embeddings/__init__.py +11 -0
  146. rem/services/embeddings/api.py +127 -0
  147. rem/services/embeddings/worker.py +435 -0
  148. rem/services/fs/README.md +662 -0
  149. rem/services/fs/__init__.py +62 -0
  150. rem/services/fs/examples.py +206 -0
  151. rem/services/fs/examples_paths.py +204 -0
  152. rem/services/fs/git_provider.py +935 -0
  153. rem/services/fs/local_provider.py +760 -0
  154. rem/services/fs/parsing-hooks-examples.md +172 -0
  155. rem/services/fs/paths.py +276 -0
  156. rem/services/fs/provider.py +460 -0
  157. rem/services/fs/s3_provider.py +1042 -0
  158. rem/services/fs/service.py +186 -0
  159. rem/services/git/README.md +1075 -0
  160. rem/services/git/__init__.py +17 -0
  161. rem/services/git/service.py +469 -0
  162. rem/services/phoenix/EXPERIMENT_DESIGN.md +1146 -0
  163. rem/services/phoenix/README.md +453 -0
  164. rem/services/phoenix/__init__.py +46 -0
  165. rem/services/phoenix/client.py +960 -0
  166. rem/services/phoenix/config.py +88 -0
  167. rem/services/phoenix/prompt_labels.py +477 -0
  168. rem/services/postgres/README.md +757 -0
  169. rem/services/postgres/__init__.py +49 -0
  170. rem/services/postgres/diff_service.py +599 -0
  171. rem/services/postgres/migration_service.py +427 -0
  172. rem/services/postgres/programmable_diff_service.py +635 -0
  173. rem/services/postgres/pydantic_to_sqlalchemy.py +562 -0
  174. rem/services/postgres/register_type.py +353 -0
  175. rem/services/postgres/repository.py +481 -0
  176. rem/services/postgres/schema_generator.py +661 -0
  177. rem/services/postgres/service.py +802 -0
  178. rem/services/postgres/sql_builder.py +355 -0
  179. rem/services/rate_limit.py +113 -0
  180. rem/services/rem/README.md +318 -0
  181. rem/services/rem/__init__.py +23 -0
  182. rem/services/rem/exceptions.py +71 -0
  183. rem/services/rem/executor.py +293 -0
  184. rem/services/rem/parser.py +180 -0
  185. rem/services/rem/queries.py +196 -0
  186. rem/services/rem/query.py +371 -0
  187. rem/services/rem/service.py +608 -0
  188. rem/services/session/README.md +374 -0
  189. rem/services/session/__init__.py +13 -0
  190. rem/services/session/compression.py +488 -0
  191. rem/services/session/pydantic_messages.py +310 -0
  192. rem/services/session/reload.py +85 -0
  193. rem/services/user_service.py +130 -0
  194. rem/settings.py +1877 -0
  195. rem/sql/background_indexes.sql +52 -0
  196. rem/sql/migrations/001_install.sql +983 -0
  197. rem/sql/migrations/002_install_models.sql +3157 -0
  198. rem/sql/migrations/003_optional_extensions.sql +326 -0
  199. rem/sql/migrations/004_cache_system.sql +282 -0
  200. rem/sql/migrations/005_schema_update.sql +145 -0
  201. rem/sql/migrations/migrate_session_id_to_uuid.sql +45 -0
  202. rem/utils/AGENTIC_CHUNKING.md +597 -0
  203. rem/utils/README.md +628 -0
  204. rem/utils/__init__.py +61 -0
  205. rem/utils/agentic_chunking.py +622 -0
  206. rem/utils/batch_ops.py +343 -0
  207. rem/utils/chunking.py +108 -0
  208. rem/utils/clip_embeddings.py +276 -0
  209. rem/utils/constants.py +97 -0
  210. rem/utils/date_utils.py +228 -0
  211. rem/utils/dict_utils.py +98 -0
  212. rem/utils/embeddings.py +436 -0
  213. rem/utils/examples/embeddings_example.py +305 -0
  214. rem/utils/examples/sql_types_example.py +202 -0
  215. rem/utils/files.py +323 -0
  216. rem/utils/markdown.py +16 -0
  217. rem/utils/mime_types.py +158 -0
  218. rem/utils/model_helpers.py +492 -0
  219. rem/utils/schema_loader.py +649 -0
  220. rem/utils/sql_paths.py +146 -0
  221. rem/utils/sql_types.py +350 -0
  222. rem/utils/user_id.py +81 -0
  223. rem/utils/vision.py +325 -0
  224. rem/workers/README.md +506 -0
  225. rem/workers/__init__.py +7 -0
  226. rem/workers/db_listener.py +579 -0
  227. rem/workers/db_maintainer.py +74 -0
  228. rem/workers/dreaming.py +502 -0
  229. rem/workers/engram_processor.py +312 -0
  230. rem/workers/sqs_file_processor.py +193 -0
  231. rem/workers/unlogged_maintainer.py +463 -0
  232. remdb-0.3.242.dist-info/METADATA +1632 -0
  233. remdb-0.3.242.dist-info/RECORD +235 -0
  234. remdb-0.3.242.dist-info/WHEEL +4 -0
  235. remdb-0.3.242.dist-info/entry_points.txt +2 -0
@@ -0,0 +1,293 @@
1
+ """
2
+ REM Query Executor - Shared PostgreSQL function calling layer.
3
+
4
+ This module provides the single source of truth for executing REM queries
5
+ against PostgreSQL functions (rem_lookup, rem_search, rem_fuzzy, rem_traverse).
6
+
7
+ Both REMQueryService (string-based) and RemService (Pydantic-based) delegate
8
+ to these functions to avoid code duplication.
9
+
10
+ Design:
11
+ - One function per query type
12
+ - All embedding generation happens here
13
+ - Direct PostgreSQL function calls
14
+ - Type-safe parameters via Pydantic models or dicts
15
+ """
16
+
17
+ import asyncio
18
+ from collections import defaultdict
19
+ from typing import Any, Optional, cast
20
+ from loguru import logger
21
+
22
+
23
+ class REMQueryExecutor:
24
+ """
25
+ Executor for REM PostgreSQL functions.
26
+
27
+ Provides unified backend for both string-based and Pydantic-based query services.
28
+ """
29
+
30
+ def __init__(self, postgres_service: Any):
31
+ """
32
+ Initialize query executor.
33
+
34
+ Args:
35
+ postgres_service: PostgresService instance
36
+ """
37
+ self.db = postgres_service
38
+ logger.debug("Initialized REMQueryExecutor")
39
+
40
+ async def execute_lookup(
41
+ self,
42
+ entity_key: str,
43
+ user_id: str | None = None,
44
+ ) -> list[dict[str, Any]]:
45
+ """
46
+ Execute rem_lookup() PostgreSQL function.
47
+
48
+ Args:
49
+ entity_key: Entity key to lookup
50
+ user_id: Optional user filter
51
+
52
+ Returns:
53
+ List of entity dicts from KV_STORE
54
+ """
55
+ sql = """
56
+ SELECT entity_type, data
57
+ FROM rem_lookup($1, $2, $3)
58
+ """
59
+
60
+ results = await self.db.execute(sql, (entity_key, user_id, user_id))
61
+ # Extract JSONB records from the data column and add aliases
62
+ entities = []
63
+ for row in results:
64
+ entity = dict(row["data"])
65
+ # Add entity_key as alias for name (for backward compat with tests)
66
+ if "name" in entity:
67
+ entity["entity_key"] = entity["name"]
68
+ # Add entity_id as alias for id (for backward compat with tests)
69
+ if "id" in entity:
70
+ entity["entity_id"] = entity["id"]
71
+ entities.append(entity)
72
+ logger.debug(f"LOOKUP '{entity_key}': {len(entities)} results")
73
+ return entities
74
+
75
+ async def execute_fetch(
76
+ self,
77
+ entity_keys: list[str],
78
+ entity_types: list[str],
79
+ user_id: str | None = None,
80
+ ) -> list[dict[str, Any]]:
81
+ """
82
+ Execute rem_fetch() PostgreSQL function.
83
+
84
+ Fetches full entity records (all columns) from multiple tables by entity keys.
85
+ Groups by table internally, fetches all records, returns unified JSONB result set.
86
+ Returns complete entities, not just KV store metadata.
87
+
88
+ Args:
89
+ entity_keys: List of entity keys to fetch
90
+ entity_types: Parallel list of entity types (table names)
91
+ user_id: Optional user filter
92
+
93
+ Returns:
94
+ List of full entity records as dicts with entity_key, entity_type, and entity_record
95
+ """
96
+ if not entity_keys:
97
+ return []
98
+
99
+ # Build JSONB structure: {"resources": ["key1", "key2"], "moments": ["key3"]}
100
+ import json
101
+ entities_by_table: dict[str, list[str]] = {}
102
+ for key, table in zip(entity_keys, entity_types):
103
+ if table not in entities_by_table:
104
+ entities_by_table[table] = []
105
+ entities_by_table[table].append(key)
106
+
107
+ entities_json = json.dumps(entities_by_table)
108
+
109
+ sql = """
110
+ SELECT entity_key, entity_type, entity_record
111
+ FROM rem_fetch($1::jsonb, $2)
112
+ """
113
+
114
+ results = await self.db.execute(sql, (entities_json, user_id))
115
+
116
+ logger.debug(
117
+ f"FETCH: {len(results)}/{len(entity_keys)} records fetched from {len(set(entity_types))} tables"
118
+ )
119
+ return cast(list[dict[str, Any]], results)
120
+
121
+ async def execute_fuzzy(
122
+ self,
123
+ query_text: str,
124
+ user_id: str | None = None,
125
+ threshold: float = 0.3,
126
+ limit: int = 10,
127
+ ) -> list[dict[str, Any]]:
128
+ """
129
+ Execute rem_fuzzy() PostgreSQL function.
130
+
131
+ Args:
132
+ query_text: Text to fuzzy match
133
+ user_id: Optional user filter
134
+ threshold: Similarity threshold (0.0-1.0)
135
+ limit: Max results
136
+
137
+ Returns:
138
+ List of fuzzy-matched entities with similarity_score
139
+ """
140
+ sql = """
141
+ SELECT entity_type, data, similarity_score
142
+ FROM rem_fuzzy($1, $2, $3, $4, $5)
143
+ """
144
+
145
+ results = await self.db.execute(
146
+ sql, (query_text, user_id, threshold, limit, user_id)
147
+ )
148
+ # Extract JSONB records and add similarity_score + entity_key alias
149
+ entities = []
150
+ for row in results:
151
+ entity = dict(row["data"])
152
+ entity["similarity_score"] = row["similarity_score"]
153
+ # Add entity_key as alias for name (for backward compat)
154
+ if "name" in entity:
155
+ entity["entity_key"] = entity["name"]
156
+ entities.append(entity)
157
+ logger.debug(f"FUZZY '{query_text}': {len(entities)} results (threshold={threshold})")
158
+ return entities
159
+
160
+ async def execute_search(
161
+ self,
162
+ query_embedding: list[float],
163
+ table_name: str,
164
+ field_name: str,
165
+ provider: str,
166
+ min_similarity: float = 0.7,
167
+ limit: int = 10,
168
+ user_id: str | None = None,
169
+ ) -> list[dict[str, Any]]:
170
+ """
171
+ Execute rem_search() PostgreSQL function.
172
+
173
+ Args:
174
+ query_embedding: Embedding vector for query
175
+ table_name: Table to search (resources, moments, users)
176
+ field_name: Field name to search
177
+ provider: Embedding provider (openai, anthropic)
178
+ min_similarity: Minimum cosine similarity
179
+ limit: Max results
180
+ user_id: Optional user filter
181
+
182
+ Returns:
183
+ List of similar entities with distance scores
184
+ """
185
+ # Convert embedding to PostgreSQL vector format
186
+ embedding_str = "[" + ",".join(str(x) for x in query_embedding) + "]"
187
+
188
+ sql = """
189
+ SELECT entity_type, similarity_score, data
190
+ FROM rem_search($1::vector(1536), $2, $3, $4, $5, $6, $7, $8)
191
+ """
192
+
193
+ results = await self.db.execute(
194
+ sql,
195
+ (
196
+ embedding_str,
197
+ table_name,
198
+ field_name,
199
+ user_id, # tenant_id (backward compat)
200
+ provider,
201
+ min_similarity,
202
+ limit,
203
+ user_id, # user_id
204
+ ),
205
+ )
206
+ # Extract JSONB records and add similarity_score + entity_key alias
207
+ entities = []
208
+ for row in results:
209
+ entity = dict(row["data"])
210
+ entity["similarity_score"] = row["similarity_score"]
211
+ entity["entity_type"] = row["entity_type"]
212
+ # Add entity_key as alias for name (for backward compat)
213
+ if "name" in entity:
214
+ entity["entity_key"] = entity["name"]
215
+ # Add distance as alias for similarity_score (for backward compat)
216
+ # Note: similarity_score is cosine similarity (higher = more similar)
217
+ # distance is inverse (lower = more similar), so: distance = 1 - similarity_score
218
+ entity["distance"] = 1.0 - row["similarity_score"]
219
+ entities.append(entity)
220
+ logger.debug(
221
+ f"SEARCH in {table_name}.{field_name}: {len(entities)} results (similarity≥{min_similarity})"
222
+ )
223
+ return entities
224
+
225
+ async def execute_traverse(
226
+ self,
227
+ start_key: str,
228
+ direction: str,
229
+ max_depth: int,
230
+ edge_types: list[str] | None,
231
+ user_id: str | None = None,
232
+ ) -> list[dict[str, Any]]:
233
+ """
234
+ Execute rem_traverse() PostgreSQL function.
235
+
236
+ Args:
237
+ start_key: Starting entity key
238
+ direction: OUTBOUND, INBOUND, or BOTH (not used in current function)
239
+ max_depth: Maximum traversal depth
240
+ edge_types: Optional list of edge types to filter
241
+ user_id: Optional user filter
242
+
243
+ Returns:
244
+ List of traversed entities with path information
245
+ """
246
+ # Convert edge_types to PostgreSQL array or NULL
247
+ edge_types_sql = None
248
+ if edge_types:
249
+ edge_types_sql = "{" + ",".join(edge_types) + "}"
250
+
251
+ # Note: rem_traverse signature is (entity_key, tenant_id, user_id, max_depth, rel_type, keys_only)
252
+ # tenant_id is for backward compat, set to user_id
253
+ # direction parameter is not used by the current PostgreSQL function
254
+ # edge_types is single value, not array
255
+ edge_type_filter = edge_types[0] if edge_types else None
256
+
257
+ sql = """
258
+ SELECT depth, entity_key, entity_type, entity_id, rel_type, rel_weight, path, entity_record
259
+ FROM rem_traverse($1, $2, $3, $4, $5, $6)
260
+ """
261
+
262
+ results = await self.db.execute(
263
+ sql, (start_key, user_id, user_id, max_depth, edge_type_filter, False)
264
+ )
265
+ # Add edge_type alias for rel_type (backward compat)
266
+ processed_results = []
267
+ for row in results:
268
+ result = dict(row)
269
+ if "rel_type" in result:
270
+ result["edge_type"] = result["rel_type"]
271
+ processed_results.append(result)
272
+
273
+ logger.debug(
274
+ f"TRAVERSE from '{start_key}' (depth={max_depth}): {len(processed_results)} results"
275
+ )
276
+ return processed_results
277
+
278
+ async def execute_sql(
279
+ self,
280
+ query: str,
281
+ ) -> list[dict[str, Any]]:
282
+ """
283
+ Execute raw SQL query.
284
+
285
+ Args:
286
+ query: SQL query string
287
+
288
+ Returns:
289
+ Query results as list of dicts
290
+ """
291
+ results = await self.db.execute(query)
292
+ logger.debug(f"SQL query: {len(results)} results")
293
+ return cast(list[dict[str, Any]], results)
@@ -0,0 +1,180 @@
1
+ import shlex
2
+ from typing import Any, Dict, List, Optional, Tuple, Union
3
+
4
+ from ...models.core import QueryType
5
+
6
+
7
+ class RemQueryParser:
8
+ """
9
+ Robust parser for REM query language using shlex for proper quoting support.
10
+ """
11
+
12
+ def parse(self, query_string: str) -> Tuple[QueryType, Dict[str, Any]]:
13
+ """
14
+ Parse a REM query string into a QueryType and a dictionary of parameters.
15
+
16
+ Args:
17
+ query_string: The raw query string (e.g., 'LOOKUP "Sarah Chen"').
18
+
19
+ Returns:
20
+ Tuple of (QueryType, parameters_dict).
21
+
22
+ Raises:
23
+ ValueError: If the query string is empty or has an invalid query type.
24
+ """
25
+ if not query_string or not query_string.strip():
26
+ raise ValueError("Empty query string")
27
+
28
+ try:
29
+ # Use shlex to handle quoted strings correctly
30
+ tokens = shlex.split(query_string)
31
+ except ValueError as e:
32
+ raise ValueError(f"Failed to parse query string: {e}")
33
+
34
+ if not tokens:
35
+ raise ValueError("Empty query string")
36
+
37
+ query_type_str = tokens[0].upper()
38
+
39
+ # Try to match REM query types first
40
+ try:
41
+ query_type = QueryType(query_type_str)
42
+ except ValueError:
43
+ # If not a known REM query type, treat as raw SQL
44
+ # This supports SELECT, INSERT, UPDATE, DELETE, WITH, DROP, CREATE, ALTER, etc.
45
+ query_type = QueryType.SQL
46
+ # Return raw SQL query directly in params
47
+ params = {"raw_query": query_string.strip()}
48
+ return query_type, params
49
+
50
+ params: Dict[str, Any] = {}
51
+ positional_args: List[str] = []
52
+
53
+ # For SQL queries, preserve the raw query (keywords like LIMIT are SQL keywords)
54
+ if query_type == QueryType.SQL:
55
+ # Everything after "SQL" is the raw SQL query
56
+ raw_sql = query_string[3:].strip() # Skip "SQL" prefix
57
+ params["raw_query"] = raw_sql
58
+ return query_type, params
59
+
60
+ # Process remaining tokens, handling REM keywords
61
+ i = 1
62
+ while i < len(tokens):
63
+ token = tokens[i]
64
+ token_upper = token.upper()
65
+
66
+ # Handle REM keywords that take a value
67
+ if token_upper in ("LIMIT", "DEPTH", "THRESHOLD", "TYPE", "FROM", "WITH"):
68
+ if i + 1 < len(tokens):
69
+ keyword_map = {
70
+ "LIMIT": "limit",
71
+ "DEPTH": "max_depth",
72
+ "THRESHOLD": "threshold",
73
+ "TYPE": "edge_types",
74
+ "FROM": "initial_query",
75
+ "WITH": "initial_query",
76
+ }
77
+ key = keyword_map[token_upper]
78
+ value = tokens[i + 1]
79
+ params[key] = self._convert_value(key, value)
80
+ i += 2
81
+ continue
82
+ elif "=" in token:
83
+ # It's a keyword argument
84
+ key, value = token.split("=", 1)
85
+ # Handle parameter aliases
86
+ mapped_key = self._map_parameter_alias(key)
87
+ params[mapped_key] = self._convert_value(mapped_key, value)
88
+ else:
89
+ # It's a positional argument part
90
+ positional_args.append(token)
91
+ i += 1
92
+
93
+ # Map positional arguments to specific fields based on QueryType
94
+ self._map_positional_args(query_type, positional_args, params)
95
+
96
+ return query_type, params
97
+
98
+ def _map_parameter_alias(self, key: str) -> str:
99
+ """
100
+ Map common aliases to internal model field names.
101
+ """
102
+ aliases = {
103
+ "table": "table_name",
104
+ "field": "field_name",
105
+ "where": "where_clause",
106
+ "depth": "max_depth",
107
+ "rel_type": "edge_types",
108
+ "rel_types": "edge_types",
109
+ }
110
+ return aliases.get(key, key)
111
+
112
+ def _convert_value(self, key: str, value: str) -> Union[str, int, float, List[str]]:
113
+ """
114
+ Convert string values to appropriate types based on the key name.
115
+ """
116
+ # Integer fields
117
+ if key in ("limit", "max_depth", "depth", "limit"):
118
+ try:
119
+ return int(value)
120
+ except ValueError:
121
+ return value # Return as string if conversion fails (validation will catch it)
122
+
123
+ # Float fields
124
+ if key in ("threshold", "min_similarity"):
125
+ try:
126
+ return float(value)
127
+ except ValueError:
128
+ return value
129
+
130
+ # List fields (comma-separated)
131
+ if key in ("edge_types", "tags"):
132
+ return [v.strip() for v in value.split(",")]
133
+
134
+ # Default to string
135
+ return value
136
+
137
+ def _map_positional_args(
138
+ self, query_type: QueryType, positional_args: List[str], params: Dict[str, Any]
139
+ ) -> None:
140
+ """
141
+ Map accumulated positional arguments to the primary field for the query type.
142
+ """
143
+ if not positional_args:
144
+ return
145
+
146
+ # Join positional args with space to reconstruct the text
147
+ # This handles cases where the user didn't quote a multi-word string
148
+ # e.g. FUZZY Sarah Chen -> "Sarah Chen"
149
+ combined_value = " ".join(positional_args)
150
+
151
+ if query_type == QueryType.LOOKUP:
152
+ # LOOKUP supports list of keys, but as positional arg we treat as single key or comma-separated
153
+ # If the user provided "key1 key2", it might be interpreted as one key "key1 key2"
154
+ # or multiple keys. For now, let's assume it's a single key entity name unless it has commas.
155
+ if "," in combined_value:
156
+ params["key"] = [k.strip() for k in combined_value.split(",")]
157
+ else:
158
+ params["key"] = combined_value
159
+
160
+ elif query_type == QueryType.FUZZY:
161
+ params["query_text"] = combined_value
162
+
163
+ elif query_type == QueryType.SEARCH:
164
+ # SEARCH expects: SEARCH <table> <query_text> [LIMIT n]
165
+ # First positional arg is table name, rest is query text
166
+ if len(positional_args) >= 2:
167
+ params["table_name"] = positional_args[0]
168
+ params["query_text"] = " ".join(positional_args[1:])
169
+ elif len(positional_args) == 1:
170
+ # Could be table name or query text - assume query text if no table
171
+ params["query_text"] = positional_args[0]
172
+ # If no positional args, params stays empty
173
+
174
+ elif query_type == QueryType.TRAVERSE:
175
+ params["initial_query"] = combined_value
176
+
177
+ elif query_type == QueryType.SQL:
178
+ # SQL with positional args means "SQL SELECT * FROM ..." form
179
+ # Treat the combined positional args as the raw SQL query
180
+ params["raw_query"] = combined_value
@@ -0,0 +1,196 @@
1
+ """
2
+ REM Query SQL Templates.
3
+
4
+ All SQL queries for REM operations are defined here with proper parameterization.
5
+ This separates query logic from business logic and makes queries easier to maintain.
6
+
7
+ Design Pattern:
8
+ - Each query is a named constant with $1, $2, etc. placeholders
9
+ - Query parameters are documented in docstrings
10
+ - Queries delegate to PostgreSQL functions for performance
11
+ - All queries include tenant isolation
12
+ """
13
+
14
+ # LOOKUP Query
15
+ # Delegates to rem_lookup() PostgreSQL function
16
+ # Returns raw JSONB data for LLM consumption
17
+ LOOKUP_QUERY = """
18
+ SELECT
19
+ entity_type,
20
+ data
21
+ FROM rem_lookup($1, $2, $3)
22
+ """
23
+ # Parameters:
24
+ # $1: entity_key (str)
25
+ # $2: tenant_id (str)
26
+ # $3: user_id (str | None)
27
+ # Returns:
28
+ # - entity_type: Table name (e.g., "resources", "users")
29
+ # - data: Complete entity record as JSONB
30
+
31
+
32
+ # FUZZY Query
33
+ # Delegates to rem_fuzzy() PostgreSQL function
34
+ # Returns raw JSONB data with similarity scores
35
+ FUZZY_QUERY = """
36
+ SELECT
37
+ entity_type,
38
+ similarity_score,
39
+ data
40
+ FROM rem_fuzzy($1, $2, $3, $4, $5)
41
+ """
42
+ # Parameters:
43
+ # $1: query_text (str)
44
+ # $2: tenant_id (str)
45
+ # $3: threshold (float)
46
+ # $4: limit (int)
47
+ # $5: user_id (str | None)
48
+ # Returns:
49
+ # - entity_type: Table name (e.g., "resources", "files")
50
+ # - similarity_score: Fuzzy match score (0.0-1.0)
51
+ # - data: Complete entity record as JSONB
52
+
53
+
54
+ # SEARCH Query
55
+ # Delegates to rem_search() PostgreSQL function
56
+ # Returns raw JSONB data with similarity scores
57
+ SEARCH_QUERY = """
58
+ SELECT
59
+ entity_type,
60
+ similarity_score,
61
+ data
62
+ FROM rem_search($1, $2, $3, $4, $5, $6, $7, $8)
63
+ """
64
+ # Parameters:
65
+ # $1: query_embedding (list[float])
66
+ # $2: table_name (str)
67
+ # $3: field_name (str)
68
+ # $4: tenant_id (str)
69
+ # $5: provider (str)
70
+ # $6: min_similarity (float)
71
+ # $7: limit (int)
72
+ # $8: user_id (str | None)
73
+ # Returns:
74
+ # - entity_type: Table name (e.g., "resources", "moments")
75
+ # - similarity_score: Vector similarity (0.0-1.0)
76
+ # - data: Complete entity record as JSONB
77
+
78
+
79
+ # TRAVERSE Query
80
+ # Delegates to rem_traverse() PostgreSQL function
81
+ TRAVERSE_QUERY = """
82
+ SELECT
83
+ depth,
84
+ entity_key,
85
+ entity_type,
86
+ entity_id,
87
+ rel_type,
88
+ rel_weight,
89
+ path
90
+ FROM rem_traverse($1, $2, $3, $4, $5, $6)
91
+ """
92
+ # Parameters:
93
+ # $1: start_key (str)
94
+ # $2: tenant_id (str)
95
+ # $3: user_id (str | None)
96
+ # $4: max_depth (int)
97
+ # $5: rel_type (str | None) - single type, not array
98
+ # $6: keys_only (bool)
99
+
100
+
101
+ # SQL Query Builder
102
+ # Direct SQL queries with tenant isolation
103
+ def build_sql_query(table_name: str, where_clause: str, tenant_id: str, limit: int | None = None) -> str:
104
+ """
105
+ Build SQL query with tenant isolation.
106
+
107
+ Args:
108
+ table_name: Table name (e.g., "resources", "moments")
109
+ where_clause: WHERE clause (e.g., "moment_type='meeting'")
110
+ tenant_id: Tenant identifier for isolation
111
+ limit: Optional result limit
112
+
113
+ Returns:
114
+ Parameterized SQL query string
115
+
116
+ Note:
117
+ This builds a dynamic query. Consider using prepared statements
118
+ or query builders like SQLAlchemy for production.
119
+ """
120
+ # Sanitize table name (basic validation)
121
+ allowed_tables = ["resources", "moments", "messages", "users", "files"]
122
+ if table_name not in allowed_tables:
123
+ raise ValueError(f"Invalid table name: {table_name}")
124
+
125
+ # Build query with tenant isolation
126
+ where_clause = where_clause or "1=1"
127
+ query = f"SELECT * FROM {table_name} WHERE tenant_id = $1 AND ({where_clause})"
128
+
129
+ if limit:
130
+ query += f" LIMIT {int(limit)}"
131
+
132
+ return query
133
+
134
+
135
+ # Helper: Get query parameters for LOOKUP
136
+ def get_lookup_params(entity_key: str, tenant_id: str, user_id: str | None = None) -> tuple:
137
+ """Get parameters for LOOKUP query."""
138
+ return (entity_key, tenant_id, user_id)
139
+
140
+
141
+ # Helper: Get query parameters for FUZZY
142
+ def get_fuzzy_params(
143
+ query_text: str,
144
+ tenant_id: str,
145
+ threshold: float = 0.7,
146
+ limit: int = 10,
147
+ user_id: str | None = None,
148
+ ) -> tuple:
149
+ """Get parameters for FUZZY query."""
150
+ return (query_text, tenant_id, threshold, limit, user_id)
151
+
152
+
153
+ # Helper: Get query parameters for SEARCH
154
+ def get_search_params(
155
+ query_embedding: list[float],
156
+ table_name: str,
157
+ field_name: str,
158
+ tenant_id: str,
159
+ provider: str,
160
+ min_similarity: float = 0.7,
161
+ limit: int = 10,
162
+ user_id: str | None = None,
163
+ ) -> tuple:
164
+ """
165
+ Get parameters for SEARCH query.
166
+
167
+ Note: provider parameter is required (no default) - should come from settings.
168
+ """
169
+ return (
170
+ str(query_embedding),
171
+ table_name,
172
+ field_name,
173
+ tenant_id,
174
+ provider,
175
+ min_similarity,
176
+ limit,
177
+ user_id,
178
+ )
179
+
180
+
181
+ # Helper: Get query parameters for TRAVERSE
182
+ def get_traverse_params(
183
+ start_key: str,
184
+ tenant_id: str,
185
+ user_id: str | None,
186
+ max_depth: int = 1,
187
+ rel_type: str | None = None,
188
+ keys_only: bool = False,
189
+ ) -> tuple:
190
+ """
191
+ Get parameters for TRAVERSE query.
192
+
193
+ Note: rel_type is singular (not array) - PostgreSQL function filters by single type.
194
+ If you need multiple types, call traverse multiple times or update the function.
195
+ """
196
+ return (start_key, tenant_id, user_id, max_depth, rel_type, keys_only)