remdb 0.3.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of remdb might be problematic. Click here for more details.

Files changed (187) hide show
  1. rem/__init__.py +2 -0
  2. rem/agentic/README.md +650 -0
  3. rem/agentic/__init__.py +39 -0
  4. rem/agentic/agents/README.md +155 -0
  5. rem/agentic/agents/__init__.py +8 -0
  6. rem/agentic/context.py +148 -0
  7. rem/agentic/context_builder.py +329 -0
  8. rem/agentic/mcp/__init__.py +0 -0
  9. rem/agentic/mcp/tool_wrapper.py +107 -0
  10. rem/agentic/otel/__init__.py +5 -0
  11. rem/agentic/otel/setup.py +151 -0
  12. rem/agentic/providers/phoenix.py +674 -0
  13. rem/agentic/providers/pydantic_ai.py +572 -0
  14. rem/agentic/query.py +117 -0
  15. rem/agentic/query_helper.py +89 -0
  16. rem/agentic/schema.py +396 -0
  17. rem/agentic/serialization.py +245 -0
  18. rem/agentic/tools/__init__.py +5 -0
  19. rem/agentic/tools/rem_tools.py +231 -0
  20. rem/api/README.md +420 -0
  21. rem/api/main.py +324 -0
  22. rem/api/mcp_router/prompts.py +182 -0
  23. rem/api/mcp_router/resources.py +536 -0
  24. rem/api/mcp_router/server.py +213 -0
  25. rem/api/mcp_router/tools.py +584 -0
  26. rem/api/routers/auth.py +229 -0
  27. rem/api/routers/chat/__init__.py +5 -0
  28. rem/api/routers/chat/completions.py +281 -0
  29. rem/api/routers/chat/json_utils.py +76 -0
  30. rem/api/routers/chat/models.py +124 -0
  31. rem/api/routers/chat/streaming.py +185 -0
  32. rem/auth/README.md +258 -0
  33. rem/auth/__init__.py +26 -0
  34. rem/auth/middleware.py +100 -0
  35. rem/auth/providers/__init__.py +13 -0
  36. rem/auth/providers/base.py +376 -0
  37. rem/auth/providers/google.py +163 -0
  38. rem/auth/providers/microsoft.py +237 -0
  39. rem/cli/README.md +455 -0
  40. rem/cli/__init__.py +8 -0
  41. rem/cli/commands/README.md +126 -0
  42. rem/cli/commands/__init__.py +3 -0
  43. rem/cli/commands/ask.py +566 -0
  44. rem/cli/commands/configure.py +497 -0
  45. rem/cli/commands/db.py +493 -0
  46. rem/cli/commands/dreaming.py +324 -0
  47. rem/cli/commands/experiments.py +1302 -0
  48. rem/cli/commands/mcp.py +66 -0
  49. rem/cli/commands/process.py +245 -0
  50. rem/cli/commands/schema.py +183 -0
  51. rem/cli/commands/serve.py +106 -0
  52. rem/cli/dreaming.py +363 -0
  53. rem/cli/main.py +96 -0
  54. rem/config.py +237 -0
  55. rem/mcp_server.py +41 -0
  56. rem/models/core/__init__.py +49 -0
  57. rem/models/core/core_model.py +64 -0
  58. rem/models/core/engram.py +333 -0
  59. rem/models/core/experiment.py +628 -0
  60. rem/models/core/inline_edge.py +132 -0
  61. rem/models/core/rem_query.py +243 -0
  62. rem/models/entities/__init__.py +43 -0
  63. rem/models/entities/file.py +57 -0
  64. rem/models/entities/image_resource.py +88 -0
  65. rem/models/entities/message.py +35 -0
  66. rem/models/entities/moment.py +123 -0
  67. rem/models/entities/ontology.py +191 -0
  68. rem/models/entities/ontology_config.py +131 -0
  69. rem/models/entities/resource.py +95 -0
  70. rem/models/entities/schema.py +87 -0
  71. rem/models/entities/user.py +85 -0
  72. rem/py.typed +0 -0
  73. rem/schemas/README.md +507 -0
  74. rem/schemas/__init__.py +6 -0
  75. rem/schemas/agents/README.md +92 -0
  76. rem/schemas/agents/core/moment-builder.yaml +178 -0
  77. rem/schemas/agents/core/rem-query-agent.yaml +226 -0
  78. rem/schemas/agents/core/resource-affinity-assessor.yaml +99 -0
  79. rem/schemas/agents/core/simple-assistant.yaml +19 -0
  80. rem/schemas/agents/core/user-profile-builder.yaml +163 -0
  81. rem/schemas/agents/examples/contract-analyzer.yaml +317 -0
  82. rem/schemas/agents/examples/contract-extractor.yaml +134 -0
  83. rem/schemas/agents/examples/cv-parser.yaml +263 -0
  84. rem/schemas/agents/examples/hello-world.yaml +37 -0
  85. rem/schemas/agents/examples/query.yaml +54 -0
  86. rem/schemas/agents/examples/simple.yaml +21 -0
  87. rem/schemas/agents/examples/test.yaml +29 -0
  88. rem/schemas/agents/rem.yaml +128 -0
  89. rem/schemas/evaluators/hello-world/default.yaml +77 -0
  90. rem/schemas/evaluators/rem/faithfulness.yaml +219 -0
  91. rem/schemas/evaluators/rem/lookup-correctness.yaml +182 -0
  92. rem/schemas/evaluators/rem/retrieval-precision.yaml +199 -0
  93. rem/schemas/evaluators/rem/retrieval-recall.yaml +211 -0
  94. rem/schemas/evaluators/rem/search-correctness.yaml +192 -0
  95. rem/services/__init__.py +16 -0
  96. rem/services/audio/INTEGRATION.md +308 -0
  97. rem/services/audio/README.md +376 -0
  98. rem/services/audio/__init__.py +15 -0
  99. rem/services/audio/chunker.py +354 -0
  100. rem/services/audio/transcriber.py +259 -0
  101. rem/services/content/README.md +1269 -0
  102. rem/services/content/__init__.py +5 -0
  103. rem/services/content/providers.py +806 -0
  104. rem/services/content/service.py +676 -0
  105. rem/services/dreaming/README.md +230 -0
  106. rem/services/dreaming/__init__.py +53 -0
  107. rem/services/dreaming/affinity_service.py +336 -0
  108. rem/services/dreaming/moment_service.py +264 -0
  109. rem/services/dreaming/ontology_service.py +54 -0
  110. rem/services/dreaming/user_model_service.py +297 -0
  111. rem/services/dreaming/utils.py +39 -0
  112. rem/services/embeddings/__init__.py +11 -0
  113. rem/services/embeddings/api.py +120 -0
  114. rem/services/embeddings/worker.py +421 -0
  115. rem/services/fs/README.md +662 -0
  116. rem/services/fs/__init__.py +62 -0
  117. rem/services/fs/examples.py +206 -0
  118. rem/services/fs/examples_paths.py +204 -0
  119. rem/services/fs/git_provider.py +935 -0
  120. rem/services/fs/local_provider.py +760 -0
  121. rem/services/fs/parsing-hooks-examples.md +172 -0
  122. rem/services/fs/paths.py +276 -0
  123. rem/services/fs/provider.py +460 -0
  124. rem/services/fs/s3_provider.py +1042 -0
  125. rem/services/fs/service.py +186 -0
  126. rem/services/git/README.md +1075 -0
  127. rem/services/git/__init__.py +17 -0
  128. rem/services/git/service.py +469 -0
  129. rem/services/phoenix/EXPERIMENT_DESIGN.md +1146 -0
  130. rem/services/phoenix/README.md +453 -0
  131. rem/services/phoenix/__init__.py +46 -0
  132. rem/services/phoenix/client.py +686 -0
  133. rem/services/phoenix/config.py +88 -0
  134. rem/services/phoenix/prompt_labels.py +477 -0
  135. rem/services/postgres/README.md +575 -0
  136. rem/services/postgres/__init__.py +23 -0
  137. rem/services/postgres/migration_service.py +427 -0
  138. rem/services/postgres/pydantic_to_sqlalchemy.py +232 -0
  139. rem/services/postgres/register_type.py +352 -0
  140. rem/services/postgres/repository.py +337 -0
  141. rem/services/postgres/schema_generator.py +379 -0
  142. rem/services/postgres/service.py +802 -0
  143. rem/services/postgres/sql_builder.py +354 -0
  144. rem/services/rem/README.md +304 -0
  145. rem/services/rem/__init__.py +23 -0
  146. rem/services/rem/exceptions.py +71 -0
  147. rem/services/rem/executor.py +293 -0
  148. rem/services/rem/parser.py +145 -0
  149. rem/services/rem/queries.py +196 -0
  150. rem/services/rem/query.py +371 -0
  151. rem/services/rem/service.py +527 -0
  152. rem/services/session/README.md +374 -0
  153. rem/services/session/__init__.py +6 -0
  154. rem/services/session/compression.py +360 -0
  155. rem/services/session/reload.py +77 -0
  156. rem/settings.py +1235 -0
  157. rem/sql/002_install_models.sql +1068 -0
  158. rem/sql/background_indexes.sql +42 -0
  159. rem/sql/install_models.sql +1038 -0
  160. rem/sql/migrations/001_install.sql +503 -0
  161. rem/sql/migrations/002_install_models.sql +1202 -0
  162. rem/utils/AGENTIC_CHUNKING.md +597 -0
  163. rem/utils/README.md +583 -0
  164. rem/utils/__init__.py +43 -0
  165. rem/utils/agentic_chunking.py +622 -0
  166. rem/utils/batch_ops.py +343 -0
  167. rem/utils/chunking.py +108 -0
  168. rem/utils/clip_embeddings.py +276 -0
  169. rem/utils/dict_utils.py +98 -0
  170. rem/utils/embeddings.py +423 -0
  171. rem/utils/examples/embeddings_example.py +305 -0
  172. rem/utils/examples/sql_types_example.py +202 -0
  173. rem/utils/markdown.py +16 -0
  174. rem/utils/model_helpers.py +236 -0
  175. rem/utils/schema_loader.py +336 -0
  176. rem/utils/sql_types.py +348 -0
  177. rem/utils/user_id.py +81 -0
  178. rem/utils/vision.py +330 -0
  179. rem/workers/README.md +506 -0
  180. rem/workers/__init__.py +5 -0
  181. rem/workers/dreaming.py +502 -0
  182. rem/workers/engram_processor.py +312 -0
  183. rem/workers/sqs_file_processor.py +193 -0
  184. remdb-0.3.0.dist-info/METADATA +1455 -0
  185. remdb-0.3.0.dist-info/RECORD +187 -0
  186. remdb-0.3.0.dist-info/WHEEL +4 -0
  187. remdb-0.3.0.dist-info/entry_points.txt +2 -0
@@ -0,0 +1,371 @@
1
+ """
2
+ REM Query Service - REM dialect implementation.
3
+
4
+ REM Dialect Operations:
5
+ 1. SELECT/INSERT/UPDATE/WITH - Raw SQL queries (automatically detected)
6
+ 2. SEARCH <text> [FROM <table>] [LIMIT <n>] - Vector similarity search
7
+ 3. LOOKUP <key> [IN <table>] - Exact KV store lookup
8
+ 4. FUZZY <text> [IN <table>] [THRESHOLD <n>] - Trigram fuzzy search
9
+ 5. TRAVERSE <entity> <direction> [DEPTH <n>] - Graph traversal
10
+
11
+ Examples:
12
+ - SELECT * FROM resources WHERE tenant_id = 'acme-corp'
13
+ - SEARCH "getting started" FROM resources LIMIT 5
14
+ - LOOKUP "docs://getting-started.md" IN resources
15
+ - FUZZY "getting start" IN resources THRESHOLD 0.3
16
+ - TRAVERSE res:123 OUTBOUND DEPTH 2
17
+
18
+ Note: Any query not starting with a REM keyword is treated as raw SQL.
19
+ Blocked for safety: DROP, DELETE, TRUNCATE, ALTER (destructive operations).
20
+ """
21
+
22
+ import re
23
+ from typing import Any, Optional
24
+
25
+ from loguru import logger
26
+ from pydantic import BaseModel
27
+
28
+ from ...settings import settings
29
+ from ...utils.embeddings import generate_embeddings
30
+
31
+
32
+ class REMQueryResult(BaseModel):
33
+ """Result from REM query execution."""
34
+
35
+ operation: str
36
+ results: list[dict[str, Any]]
37
+ count: int
38
+ metadata: dict[str, Any] = {}
39
+
40
+
41
+ class REMQueryService:
42
+ """
43
+ REM Query Service - Executes REM dialect queries.
44
+
45
+ Parses SQL-like REM dialect and delegates to REMQueryExecutor.
46
+ """
47
+
48
+ def __init__(self, postgres_service: Any):
49
+ """
50
+ Initialize REM query service.
51
+
52
+ Args:
53
+ postgres_service: PostgresService instance
54
+ """
55
+ self.pg = postgres_service
56
+
57
+ # Delegate PostgreSQL function calls to shared executor
58
+ from .executor import REMQueryExecutor
59
+ self.executor = REMQueryExecutor(postgres_service)
60
+
61
+ logger.info("Initialized REMQueryService")
62
+
63
+ async def execute(self, query: str, user_id: Optional[str] = None) -> REMQueryResult:
64
+ """
65
+ Execute REM dialect query.
66
+
67
+ Args:
68
+ query: REM query string
69
+ user_id: Optional user filter
70
+
71
+ Returns:
72
+ REMQueryResult with results and metadata
73
+
74
+ Raises:
75
+ ValueError: If query syntax is invalid
76
+ """
77
+ query = query.strip()
78
+ logger.info(f"Executing REM query: {query}")
79
+
80
+ # Parse operation - check REM keywords first
81
+ query_upper = query.upper()
82
+ if query_upper.startswith("SQL "):
83
+ return await self._execute_sql(query[4:].strip(), user_id)
84
+ elif query_upper.startswith("SEARCH "):
85
+ return await self._execute_search(query[7:].strip(), user_id)
86
+ elif query_upper.startswith("LOOKUP "):
87
+ return await self._execute_lookup(query[7:].strip(), user_id)
88
+ elif query_upper.startswith("FUZZY LOOKUP ") or query_upper.startswith("FUZZY "):
89
+ # Support both "FUZZY LOOKUP" and "FUZZY"
90
+ prefix_len = 13 if query_upper.startswith("FUZZY LOOKUP ") else 6
91
+ return await self._execute_fuzzy_lookup(query[prefix_len:].strip(), user_id)
92
+ elif query_upper.startswith("TRAVERSE "):
93
+ return await self._execute_traverse(query[9:].strip(), user_id)
94
+ else:
95
+ # If not a REM keyword, treat as raw SQL (SELECT, INSERT, UPDATE, DELETE, etc.)
96
+ return await self._execute_sql(query, user_id)
97
+
98
+ async def _execute_sql(self, query: str, user_id: Optional[str]) -> REMQueryResult:
99
+ """
100
+ Execute raw SQL query.
101
+
102
+ Args:
103
+ query: SQL query string
104
+ user_id: Optional user filter
105
+
106
+ Returns:
107
+ Query results
108
+ """
109
+ logger.debug(f"Executing SQL: {query}")
110
+
111
+ results = await self.executor.execute_sql(query)
112
+
113
+ return REMQueryResult(
114
+ operation="SQL",
115
+ results=results,
116
+ count=len(results),
117
+ metadata={"query": query},
118
+ )
119
+
120
+ async def _execute_search(self, query: str, user_id: Optional[str]) -> REMQueryResult:
121
+ """
122
+ Execute vector similarity SEARCH using rem_search() DB function.
123
+
124
+ Syntax: SEARCH "<text>" [FROM <table>] [LIMIT <n>]
125
+
126
+ Args:
127
+ query: Search query string
128
+ user_id: Optional user filter
129
+
130
+ Returns:
131
+ Similar entities ranked by cosine similarity
132
+
133
+ Example:
134
+ SEARCH "getting started" FROM resources LIMIT 5
135
+
136
+ Note:
137
+ Requires embedding generation for search_text. Currently returns zero
138
+ vector - integrate with OpenAI/Anthropic embedding API for production use.
139
+ """
140
+ match = re.match(
141
+ r'"([^"]+)"(?:\s+FROM\s+(\w+))?(?:\s+LIMIT\s+(\d+))?',
142
+ query,
143
+ re.IGNORECASE,
144
+ )
145
+
146
+ if not match:
147
+ raise ValueError(f"Invalid SEARCH syntax: {query}")
148
+
149
+ search_text = match.group(1)
150
+ table_name = match.group(2) or "resources"
151
+ limit = int(match.group(3)) if match.group(3) else 10
152
+
153
+ logger.debug(
154
+ f"SEARCH: text='{search_text}', table={table_name}, limit={limit}"
155
+ )
156
+
157
+ # Generate embedding for search query
158
+ provider_str = f"{settings.llm.embedding_provider}:{settings.llm.embedding_model}"
159
+ embeddings_result = generate_embeddings(provider_str, [search_text])
160
+ # We passed a list, so result is list[list[float]], extract first element
161
+ query_embedding: list[float] = embeddings_result[0] # type: ignore[assignment]
162
+
163
+ # Delegate to executor
164
+ results = await self.executor.execute_search(
165
+ query_embedding=query_embedding,
166
+ table_name=table_name,
167
+ field_name="content",
168
+ provider=settings.llm.embedding_provider,
169
+ min_similarity=0.0,
170
+ limit=limit,
171
+ user_id=user_id,
172
+ )
173
+
174
+ return REMQueryResult(
175
+ operation="SEARCH",
176
+ results=results,
177
+ count=len(results),
178
+ metadata={
179
+ "search_text": search_text,
180
+ "table": table_name,
181
+ "limit": limit,
182
+ },
183
+ )
184
+
185
+ async def _execute_lookup(self, query: str, user_id: Optional[str]) -> REMQueryResult:
186
+ """
187
+ Execute exact KV store LOOKUP using rem_lookup() DB function.
188
+
189
+ Syntax: LOOKUP "<key>" [IN <table>]
190
+
191
+ Args:
192
+ query: Lookup query string
193
+ user_id: Optional user filter
194
+
195
+ Returns:
196
+ Exact matches from KV store (O(1) lookup)
197
+
198
+ Example:
199
+ LOOKUP "docs://getting-started.md" IN resources
200
+ """
201
+ match = re.match(r'"([^"]+)"(?:\s+IN\s+(\w+))?', query, re.IGNORECASE)
202
+
203
+ if not match:
204
+ raise ValueError(f"Invalid LOOKUP syntax: {query}")
205
+
206
+ entity_key = match.group(1)
207
+ entity_type = match.group(2) or None
208
+
209
+ logger.debug(f"LOOKUP: key='{entity_key}', type={entity_type}")
210
+
211
+ # Delegate to executor
212
+ results = await self.executor.execute_lookup(
213
+ entity_key=entity_key,
214
+ user_id=user_id,
215
+ )
216
+
217
+ return REMQueryResult(
218
+ operation="LOOKUP",
219
+ results=results,
220
+ count=len(results),
221
+ metadata={"entity_key": entity_key, "entity_type": entity_type},
222
+ )
223
+
224
+ async def _execute_fuzzy_lookup(
225
+ self, query: str, user_id: Optional[str]
226
+ ) -> REMQueryResult:
227
+ """
228
+ Execute fuzzy LOOKUP using rem_fuzzy() DB function with trigram similarity.
229
+
230
+ Syntax: FUZZY LOOKUP "<text>" [IN <table>] [THRESHOLD <n>]
231
+
232
+ Args:
233
+ query: Fuzzy lookup query string
234
+ user_id: Optional user filter
235
+
236
+ Returns:
237
+ Fuzzy matches ranked by similarity
238
+
239
+ Example:
240
+ FUZZY LOOKUP "getting start" IN resources THRESHOLD 0.3
241
+ """
242
+ match = re.match(
243
+ r'"([^"]+)"(?:\s+IN\s+(\w+))?(?:\s+THRESHOLD\s+([\d.]+))?',
244
+ query,
245
+ re.IGNORECASE,
246
+ )
247
+
248
+ if not match:
249
+ raise ValueError(f"Invalid FUZZY LOOKUP syntax: {query}")
250
+
251
+ search_text = match.group(1)
252
+ entity_type = match.group(2) or None
253
+ threshold = float(match.group(3)) if match.group(3) else 0.3
254
+
255
+ logger.debug(
256
+ f"FUZZY LOOKUP: text='{search_text}', type={entity_type}, threshold={threshold}"
257
+ )
258
+
259
+ # Delegate to executor
260
+ results = await self.executor.execute_fuzzy(
261
+ query_text=search_text,
262
+ user_id=user_id,
263
+ threshold=threshold,
264
+ limit=10,
265
+ )
266
+
267
+ return REMQueryResult(
268
+ operation="FUZZY LOOKUP",
269
+ results=results,
270
+ count=len(results),
271
+ metadata={
272
+ "search_text": search_text,
273
+ "entity_type": entity_type,
274
+ "threshold": threshold,
275
+ },
276
+ )
277
+
278
+ async def _execute_traverse(self, query: str, user_id: Optional[str]) -> REMQueryResult:
279
+ """
280
+ Execute graph TRAVERSE using rem_traverse() DB function.
281
+
282
+ Syntax: TRAVERSE <entity_id_or_key> <direction> [DEPTH <n>] [TYPE <edge_type>]
283
+
284
+ Directions: OUTBOUND (currently supported), INBOUND, BOTH
285
+ Edge types: Optional filter (e.g., "references", "related_to")
286
+
287
+ Args:
288
+ query: Traverse query string
289
+ user_id: Optional user filter
290
+
291
+ Returns:
292
+ Connected entities via graph edges
293
+
294
+ Example:
295
+ TRAVERSE "cae28bba-fa2f-5ef3-bde9-def3030db723" OUTBOUND DEPTH 2
296
+ TRAVERSE "docs://getting-started.md" OUTBOUND DEPTH 1 TYPE "references"
297
+
298
+ Note:
299
+ Currently only supports OUTBOUND direction. The SQL function follows
300
+ edges from source to target entities.
301
+ """
302
+ match = re.match(
303
+ r'"?([a-f0-9-]+|[^"]+)"?\s+(OUTBOUND|INBOUND|BOTH)(?:\s+DEPTH\s+(\d+))?(?:\s+TYPE\s+"([^"]+)")?',
304
+ query,
305
+ re.IGNORECASE,
306
+ )
307
+
308
+ if not match:
309
+ raise ValueError(f"Invalid TRAVERSE syntax: {query}")
310
+
311
+ entity_identifier = match.group(1)
312
+ direction = match.group(2).upper()
313
+ depth = int(match.group(3)) if match.group(3) else 1
314
+ edge_type = match.group(4) if match.group(4) else None
315
+
316
+ logger.debug(
317
+ f"TRAVERSE: entity={entity_identifier}, direction={direction}, "
318
+ f"depth={depth}, type={edge_type}"
319
+ )
320
+
321
+ if direction != "OUTBOUND":
322
+ logger.warning(
323
+ f"Direction {direction} not yet implemented in rem_traverse - only OUTBOUND supported"
324
+ )
325
+
326
+ entity_key = entity_identifier
327
+ if re.match(r"^[a-f0-9-]{36}$", entity_identifier):
328
+ lookup_sql = "SELECT entity_key FROM kv_store WHERE entity_id = $1 AND user_id = $2"
329
+ lookup_result = await self.pg.execute(
330
+ lookup_sql, (entity_identifier, user_id)
331
+ )
332
+ if lookup_result:
333
+ entity_key = lookup_result[0]["entity_key"]
334
+ else:
335
+ return REMQueryResult(
336
+ operation="TRAVERSE",
337
+ results=[],
338
+ count=0,
339
+ metadata={
340
+ "entity_identifier": entity_identifier,
341
+ "direction": direction,
342
+ "depth": depth,
343
+ "edge_type": edge_type,
344
+ "error": "Entity not found",
345
+ },
346
+ )
347
+
348
+ # Convert single edge_type to list
349
+ edge_types = [edge_type] if edge_type else None
350
+
351
+ # Delegate to executor
352
+ results = await self.executor.execute_traverse(
353
+ start_key=entity_key,
354
+ direction=direction,
355
+ max_depth=depth,
356
+ edge_types=edge_types,
357
+ user_id=user_id,
358
+ )
359
+
360
+ return REMQueryResult(
361
+ operation="TRAVERSE",
362
+ results=results,
363
+ count=len(results),
364
+ metadata={
365
+ "entity_identifier": entity_identifier,
366
+ "entity_key": entity_key,
367
+ "direction": direction,
368
+ "depth": depth,
369
+ "edge_type": edge_type,
370
+ },
371
+ )