remdb 0.3.7__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (187) hide show
  1. rem/__init__.py +2 -0
  2. rem/agentic/README.md +650 -0
  3. rem/agentic/__init__.py +39 -0
  4. rem/agentic/agents/README.md +155 -0
  5. rem/agentic/agents/__init__.py +8 -0
  6. rem/agentic/context.py +148 -0
  7. rem/agentic/context_builder.py +329 -0
  8. rem/agentic/mcp/__init__.py +0 -0
  9. rem/agentic/mcp/tool_wrapper.py +107 -0
  10. rem/agentic/otel/__init__.py +5 -0
  11. rem/agentic/otel/setup.py +151 -0
  12. rem/agentic/providers/phoenix.py +674 -0
  13. rem/agentic/providers/pydantic_ai.py +572 -0
  14. rem/agentic/query.py +117 -0
  15. rem/agentic/query_helper.py +89 -0
  16. rem/agentic/schema.py +396 -0
  17. rem/agentic/serialization.py +245 -0
  18. rem/agentic/tools/__init__.py +5 -0
  19. rem/agentic/tools/rem_tools.py +231 -0
  20. rem/api/README.md +420 -0
  21. rem/api/main.py +324 -0
  22. rem/api/mcp_router/prompts.py +182 -0
  23. rem/api/mcp_router/resources.py +536 -0
  24. rem/api/mcp_router/server.py +213 -0
  25. rem/api/mcp_router/tools.py +584 -0
  26. rem/api/routers/auth.py +229 -0
  27. rem/api/routers/chat/__init__.py +5 -0
  28. rem/api/routers/chat/completions.py +281 -0
  29. rem/api/routers/chat/json_utils.py +76 -0
  30. rem/api/routers/chat/models.py +124 -0
  31. rem/api/routers/chat/streaming.py +185 -0
  32. rem/auth/README.md +258 -0
  33. rem/auth/__init__.py +26 -0
  34. rem/auth/middleware.py +100 -0
  35. rem/auth/providers/__init__.py +13 -0
  36. rem/auth/providers/base.py +376 -0
  37. rem/auth/providers/google.py +163 -0
  38. rem/auth/providers/microsoft.py +237 -0
  39. rem/cli/README.md +455 -0
  40. rem/cli/__init__.py +8 -0
  41. rem/cli/commands/README.md +126 -0
  42. rem/cli/commands/__init__.py +3 -0
  43. rem/cli/commands/ask.py +566 -0
  44. rem/cli/commands/configure.py +497 -0
  45. rem/cli/commands/db.py +493 -0
  46. rem/cli/commands/dreaming.py +324 -0
  47. rem/cli/commands/experiments.py +1302 -0
  48. rem/cli/commands/mcp.py +66 -0
  49. rem/cli/commands/process.py +245 -0
  50. rem/cli/commands/schema.py +183 -0
  51. rem/cli/commands/serve.py +106 -0
  52. rem/cli/dreaming.py +363 -0
  53. rem/cli/main.py +96 -0
  54. rem/config.py +237 -0
  55. rem/mcp_server.py +41 -0
  56. rem/models/core/__init__.py +49 -0
  57. rem/models/core/core_model.py +64 -0
  58. rem/models/core/engram.py +333 -0
  59. rem/models/core/experiment.py +628 -0
  60. rem/models/core/inline_edge.py +132 -0
  61. rem/models/core/rem_query.py +243 -0
  62. rem/models/entities/__init__.py +43 -0
  63. rem/models/entities/file.py +57 -0
  64. rem/models/entities/image_resource.py +88 -0
  65. rem/models/entities/message.py +35 -0
  66. rem/models/entities/moment.py +123 -0
  67. rem/models/entities/ontology.py +191 -0
  68. rem/models/entities/ontology_config.py +131 -0
  69. rem/models/entities/resource.py +95 -0
  70. rem/models/entities/schema.py +87 -0
  71. rem/models/entities/user.py +85 -0
  72. rem/py.typed +0 -0
  73. rem/schemas/README.md +507 -0
  74. rem/schemas/__init__.py +6 -0
  75. rem/schemas/agents/README.md +92 -0
  76. rem/schemas/agents/core/moment-builder.yaml +178 -0
  77. rem/schemas/agents/core/rem-query-agent.yaml +226 -0
  78. rem/schemas/agents/core/resource-affinity-assessor.yaml +99 -0
  79. rem/schemas/agents/core/simple-assistant.yaml +19 -0
  80. rem/schemas/agents/core/user-profile-builder.yaml +163 -0
  81. rem/schemas/agents/examples/contract-analyzer.yaml +317 -0
  82. rem/schemas/agents/examples/contract-extractor.yaml +134 -0
  83. rem/schemas/agents/examples/cv-parser.yaml +263 -0
  84. rem/schemas/agents/examples/hello-world.yaml +37 -0
  85. rem/schemas/agents/examples/query.yaml +54 -0
  86. rem/schemas/agents/examples/simple.yaml +21 -0
  87. rem/schemas/agents/examples/test.yaml +29 -0
  88. rem/schemas/agents/rem.yaml +128 -0
  89. rem/schemas/evaluators/hello-world/default.yaml +77 -0
  90. rem/schemas/evaluators/rem/faithfulness.yaml +219 -0
  91. rem/schemas/evaluators/rem/lookup-correctness.yaml +182 -0
  92. rem/schemas/evaluators/rem/retrieval-precision.yaml +199 -0
  93. rem/schemas/evaluators/rem/retrieval-recall.yaml +211 -0
  94. rem/schemas/evaluators/rem/search-correctness.yaml +192 -0
  95. rem/services/__init__.py +16 -0
  96. rem/services/audio/INTEGRATION.md +308 -0
  97. rem/services/audio/README.md +376 -0
  98. rem/services/audio/__init__.py +15 -0
  99. rem/services/audio/chunker.py +354 -0
  100. rem/services/audio/transcriber.py +259 -0
  101. rem/services/content/README.md +1269 -0
  102. rem/services/content/__init__.py +5 -0
  103. rem/services/content/providers.py +801 -0
  104. rem/services/content/service.py +676 -0
  105. rem/services/dreaming/README.md +230 -0
  106. rem/services/dreaming/__init__.py +53 -0
  107. rem/services/dreaming/affinity_service.py +336 -0
  108. rem/services/dreaming/moment_service.py +264 -0
  109. rem/services/dreaming/ontology_service.py +54 -0
  110. rem/services/dreaming/user_model_service.py +297 -0
  111. rem/services/dreaming/utils.py +39 -0
  112. rem/services/embeddings/__init__.py +11 -0
  113. rem/services/embeddings/api.py +120 -0
  114. rem/services/embeddings/worker.py +421 -0
  115. rem/services/fs/README.md +662 -0
  116. rem/services/fs/__init__.py +62 -0
  117. rem/services/fs/examples.py +206 -0
  118. rem/services/fs/examples_paths.py +204 -0
  119. rem/services/fs/git_provider.py +935 -0
  120. rem/services/fs/local_provider.py +760 -0
  121. rem/services/fs/parsing-hooks-examples.md +172 -0
  122. rem/services/fs/paths.py +276 -0
  123. rem/services/fs/provider.py +460 -0
  124. rem/services/fs/s3_provider.py +1042 -0
  125. rem/services/fs/service.py +186 -0
  126. rem/services/git/README.md +1075 -0
  127. rem/services/git/__init__.py +17 -0
  128. rem/services/git/service.py +469 -0
  129. rem/services/phoenix/EXPERIMENT_DESIGN.md +1146 -0
  130. rem/services/phoenix/README.md +453 -0
  131. rem/services/phoenix/__init__.py +46 -0
  132. rem/services/phoenix/client.py +686 -0
  133. rem/services/phoenix/config.py +88 -0
  134. rem/services/phoenix/prompt_labels.py +477 -0
  135. rem/services/postgres/README.md +575 -0
  136. rem/services/postgres/__init__.py +23 -0
  137. rem/services/postgres/migration_service.py +427 -0
  138. rem/services/postgres/pydantic_to_sqlalchemy.py +232 -0
  139. rem/services/postgres/register_type.py +352 -0
  140. rem/services/postgres/repository.py +337 -0
  141. rem/services/postgres/schema_generator.py +379 -0
  142. rem/services/postgres/service.py +802 -0
  143. rem/services/postgres/sql_builder.py +354 -0
  144. rem/services/rem/README.md +304 -0
  145. rem/services/rem/__init__.py +23 -0
  146. rem/services/rem/exceptions.py +71 -0
  147. rem/services/rem/executor.py +293 -0
  148. rem/services/rem/parser.py +145 -0
  149. rem/services/rem/queries.py +196 -0
  150. rem/services/rem/query.py +371 -0
  151. rem/services/rem/service.py +527 -0
  152. rem/services/session/README.md +374 -0
  153. rem/services/session/__init__.py +6 -0
  154. rem/services/session/compression.py +360 -0
  155. rem/services/session/reload.py +77 -0
  156. rem/settings.py +1235 -0
  157. rem/sql/002_install_models.sql +1068 -0
  158. rem/sql/background_indexes.sql +42 -0
  159. rem/sql/install_models.sql +1038 -0
  160. rem/sql/migrations/001_install.sql +503 -0
  161. rem/sql/migrations/002_install_models.sql +1202 -0
  162. rem/utils/AGENTIC_CHUNKING.md +597 -0
  163. rem/utils/README.md +583 -0
  164. rem/utils/__init__.py +43 -0
  165. rem/utils/agentic_chunking.py +622 -0
  166. rem/utils/batch_ops.py +343 -0
  167. rem/utils/chunking.py +108 -0
  168. rem/utils/clip_embeddings.py +276 -0
  169. rem/utils/dict_utils.py +98 -0
  170. rem/utils/embeddings.py +423 -0
  171. rem/utils/examples/embeddings_example.py +305 -0
  172. rem/utils/examples/sql_types_example.py +202 -0
  173. rem/utils/markdown.py +16 -0
  174. rem/utils/model_helpers.py +236 -0
  175. rem/utils/schema_loader.py +336 -0
  176. rem/utils/sql_types.py +348 -0
  177. rem/utils/user_id.py +81 -0
  178. rem/utils/vision.py +330 -0
  179. rem/workers/README.md +506 -0
  180. rem/workers/__init__.py +5 -0
  181. rem/workers/dreaming.py +502 -0
  182. rem/workers/engram_processor.py +312 -0
  183. rem/workers/sqs_file_processor.py +193 -0
  184. remdb-0.3.7.dist-info/METADATA +1473 -0
  185. remdb-0.3.7.dist-info/RECORD +187 -0
  186. remdb-0.3.7.dist-info/WHEEL +4 -0
  187. remdb-0.3.7.dist-info/entry_points.txt +2 -0
@@ -0,0 +1,354 @@
1
+ """SQL query builder for Pydantic models.
2
+
3
+ Generates INSERT, UPDATE, SELECT queries from Pydantic model instances.
4
+ Handles serialization and parameter binding automatically.
5
+ """
6
+
7
+ import hashlib
8
+ import json
9
+ import uuid
10
+ from typing import Any, Type
11
+
12
+ from pydantic import BaseModel
13
+
14
+
15
+ def get_natural_key(model: BaseModel) -> str | None:
16
+ """
17
+ Get natural key from model following precedence: uri -> key -> name.
18
+
19
+ Used for generating deterministic IDs from business keys.
20
+ Does NOT include 'id' since that's what we're trying to generate.
21
+
22
+ Args:
23
+ model: Pydantic model instance
24
+
25
+ Returns:
26
+ Natural key string or None
27
+ """
28
+ for field in ["uri", "key", "name"]:
29
+ if hasattr(model, field):
30
+ value = getattr(model, field)
31
+ if value:
32
+ return str(value)
33
+ return None
34
+
35
+
36
+ def get_entity_key(model: BaseModel) -> str:
37
+ """
38
+ Get entity key for KV store following precedence: id -> uri -> key -> name.
39
+
40
+ For KV store lookups, we prefer globally unique identifiers first (id),
41
+ then natural keys (uri/key/name). Always returns a value (id as fallback).
42
+
43
+ Args:
44
+ model: Pydantic model instance
45
+
46
+ Returns:
47
+ Entity key string (guaranteed to exist)
48
+ """
49
+ for field in ["id", "uri", "key", "name"]:
50
+ if hasattr(model, field):
51
+ value = getattr(model, field)
52
+ if value:
53
+ return str(value)
54
+ # Should never reach here since id always exists in CoreModel
55
+ raise ValueError(f"Model {type(model)} has no id, uri, key, or name field")
56
+
57
+
58
+ def generate_deterministic_id(user_id: str | None, entity_key: str) -> uuid.UUID:
59
+ """
60
+ Generate deterministic UUID from hash of (user_id, entity_key).
61
+
62
+ Args:
63
+ user_id: User identifier (optional)
64
+ entity_key: Entity key field value
65
+
66
+ Returns:
67
+ Deterministic UUID
68
+ """
69
+ # Combine user_id and key for hashing
70
+ combined = f"{user_id or 'system'}:{entity_key}"
71
+ hash_bytes = hashlib.sha256(combined.encode()).digest()
72
+ # Use first 16 bytes for UUID
73
+ return uuid.UUID(bytes=hash_bytes[:16])
74
+
75
+
76
+ def model_to_dict(model: BaseModel, exclude_none: bool = True) -> dict[str, Any]:
77
+ """
78
+ Convert Pydantic model to dict suitable for SQL insertion.
79
+
80
+ Generates deterministic ID if not present, based on hash(user_id, key).
81
+ Serializes JSONB fields (list[dict], dict) to JSON strings for asyncpg.
82
+
83
+ Args:
84
+ model: Pydantic model instance
85
+ exclude_none: Exclude None values (default: True)
86
+
87
+ Returns:
88
+ Dict of field_name -> value with JSONB fields as JSON strings
89
+ """
90
+ # Use python mode to preserve datetime objects
91
+ data = model.model_dump(exclude_none=exclude_none, mode="python")
92
+
93
+ # Generate deterministic ID if not present
94
+ if not data.get("id"):
95
+ natural_key = get_natural_key(model)
96
+ if natural_key:
97
+ user_id = data.get("user_id")
98
+ data["id"] = generate_deterministic_id(user_id, natural_key)
99
+ else:
100
+ # Fallback to random UUID if no natural key (uri/key/name)
101
+ data["id"] = uuid.uuid4()
102
+
103
+ # Note: JSONB conversion is handled by asyncpg codec (set_type_codec in PostgresService)
104
+ # No need to manually convert dicts/lists to JSON strings
105
+
106
+ return data
107
+
108
+
109
+ def build_insert(
110
+ model: BaseModel, table_name: str, return_id: bool = True
111
+ ) -> tuple[str, list[Any]]:
112
+ """
113
+ Build INSERT query from Pydantic model.
114
+
115
+ Args:
116
+ model: Pydantic model instance
117
+ table_name: Target table name
118
+ return_id: Return the inserted ID (default: True)
119
+
120
+ Returns:
121
+ Tuple of (sql_query, parameters)
122
+
123
+ Example:
124
+ sql, params = build_insert(message, "messages")
125
+ # INSERT INTO messages (id, content, created_at) VALUES ($1, $2, $3) RETURNING id
126
+ """
127
+ data = model_to_dict(model)
128
+
129
+ fields = list(data.keys())
130
+ # Quote field names to handle reserved words
131
+ quoted_fields = [f'"{field}"' for field in fields]
132
+ placeholders = [f"${i+1}" for i in range(len(fields))]
133
+ values = [data[field] for field in fields]
134
+
135
+ sql = f"INSERT INTO {table_name} ({', '.join(quoted_fields)}) VALUES ({', '.join(placeholders)})"
136
+
137
+ if return_id:
138
+ sql += " RETURNING id"
139
+
140
+ return sql, values
141
+
142
+
143
+ def build_upsert(
144
+ model: BaseModel,
145
+ table_name: str,
146
+ conflict_field: str = "id",
147
+ return_id: bool = True,
148
+ ) -> tuple[str, list[Any]]:
149
+ """
150
+ Build INSERT ... ON CONFLICT DO UPDATE (upsert) query from Pydantic model.
151
+
152
+ Args:
153
+ model: Pydantic model instance
154
+ table_name: Target table name
155
+ conflict_field: Field to check for conflicts (default: "id")
156
+ return_id: Return the inserted/updated ID (default: True)
157
+
158
+ Returns:
159
+ Tuple of (sql_query, parameters)
160
+
161
+ Example:
162
+ sql, params = build_upsert(message, "messages")
163
+ # INSERT INTO messages (...) VALUES (...)
164
+ # ON CONFLICT (id) DO UPDATE SET field1=$1, field2=$2, ...
165
+ # RETURNING id
166
+ """
167
+ data = model_to_dict(model)
168
+
169
+ fields = list(data.keys())
170
+ quoted_fields = [f'"{field}"' for field in fields]
171
+ placeholders = [f"${i+1}" for i in range(len(fields))]
172
+ values = [data[field] for field in fields]
173
+
174
+ # Build update clause (exclude conflict field)
175
+ update_fields = [f for f in fields if f != conflict_field]
176
+ update_clauses = [f'"{field}" = EXCLUDED."{field}"' for field in update_fields]
177
+
178
+ sql = f"""
179
+ INSERT INTO {table_name} ({', '.join(quoted_fields)})
180
+ VALUES ({', '.join(placeholders)})
181
+ ON CONFLICT ("{conflict_field}") DO UPDATE
182
+ SET {', '.join(update_clauses)}
183
+ """
184
+
185
+ if return_id:
186
+ sql += " RETURNING id"
187
+
188
+ return sql.strip(), values
189
+
190
+
191
+ def build_select(
192
+ model_class: Type[BaseModel],
193
+ table_name: str,
194
+ filters: dict[str, Any],
195
+ order_by: str | None = None,
196
+ limit: int | None = None,
197
+ offset: int | None = None,
198
+ ) -> tuple[str, list[Any]]:
199
+ """
200
+ Build SELECT query with filters.
201
+
202
+ Args:
203
+ model_class: Pydantic model class (for field validation)
204
+ table_name: Source table name
205
+ filters: Dict of field -> value filters (AND-ed together)
206
+ order_by: Optional ORDER BY clause
207
+ limit: Optional LIMIT
208
+ offset: Optional OFFSET
209
+
210
+ Returns:
211
+ Tuple of (sql_query, parameters)
212
+
213
+ Example:
214
+ sql, params = build_select(
215
+ Message,
216
+ "messages",
217
+ {"session_id": "abc", "tenant_id": "xyz"},
218
+ order_by="created_at DESC",
219
+ limit=10
220
+ )
221
+ # SELECT * FROM messages
222
+ # WHERE session_id = $1 AND tenant_id = $2 AND deleted_at IS NULL
223
+ # ORDER BY created_at DESC
224
+ # LIMIT 10
225
+ """
226
+ where_clauses = ['"deleted_at" IS NULL'] # Soft delete filter
227
+ params = []
228
+ param_idx = 1
229
+
230
+ for field, value in filters.items():
231
+ where_clauses.append(f'"{field}" = ${param_idx}')
232
+ params.append(value)
233
+ param_idx += 1
234
+
235
+ sql = f"SELECT * FROM {table_name} WHERE {' AND '.join(where_clauses)}"
236
+
237
+ if order_by:
238
+ sql += f" ORDER BY {order_by}"
239
+
240
+ if limit is not None:
241
+ sql += f" LIMIT ${param_idx}"
242
+ params.append(limit)
243
+ param_idx += 1
244
+
245
+ if offset is not None:
246
+ sql += f" OFFSET ${param_idx}"
247
+ params.append(offset)
248
+
249
+ return sql, params
250
+
251
+
252
+ def build_update(
253
+ model: BaseModel, table_name: str, id_value: str, tenant_id: str
254
+ ) -> tuple[str, list[Any]]:
255
+ """
256
+ Build UPDATE query from Pydantic model.
257
+
258
+ Args:
259
+ model: Pydantic model instance with updated values
260
+ table_name: Target table name
261
+ id_value: ID of record to update
262
+ tenant_id: Tenant ID for isolation
263
+
264
+ Returns:
265
+ Tuple of (sql_query, parameters)
266
+
267
+ Example:
268
+ sql, params = build_update(message, "messages", "msg-123", "tenant-1")
269
+ # UPDATE messages SET field1=$1, field2=$2, updated_at=NOW()
270
+ # WHERE id=$N AND tenant_id=$N+1 AND deleted_at IS NULL
271
+ """
272
+ data = model_to_dict(model, exclude_none=False)
273
+
274
+ # Exclude id from update fields
275
+ update_fields = [k for k in data.keys() if k != "id"]
276
+ params = [data[field] for field in update_fields]
277
+
278
+ # Build SET clause
279
+ set_clauses = [f'"{field}" = ${i+1}' for i, field in enumerate(update_fields)]
280
+ set_clauses.append('"updated_at" = NOW()')
281
+
282
+ # Add WHERE params
283
+ param_idx = len(params) + 1
284
+ sql = f"""
285
+ UPDATE {table_name}
286
+ SET {', '.join(set_clauses)}
287
+ WHERE "id" = ${param_idx} AND "tenant_id" = ${param_idx+1} AND "deleted_at" IS NULL
288
+ RETURNING "id"
289
+ """
290
+
291
+ params.extend([id_value, tenant_id])
292
+
293
+ return sql.strip(), params
294
+
295
+
296
+ def build_delete(
297
+ table_name: str, id_value: str, tenant_id: str
298
+ ) -> tuple[str, list[Any]]:
299
+ """
300
+ Build soft DELETE query (sets deleted_at).
301
+
302
+ Args:
303
+ table_name: Target table name
304
+ id_value: ID of record to delete
305
+ tenant_id: Tenant ID for isolation
306
+
307
+ Returns:
308
+ Tuple of (sql_query, parameters)
309
+
310
+ Example:
311
+ sql, params = build_delete("messages", "msg-123", "tenant-1")
312
+ # UPDATE messages SET deleted_at=NOW(), updated_at=NOW()
313
+ # WHERE id=$1 AND tenant_id=$2 AND deleted_at IS NULL
314
+ """
315
+ sql = f"""
316
+ UPDATE {table_name}
317
+ SET "deleted_at" = NOW(), "updated_at" = NOW()
318
+ WHERE "id" = $1 AND "tenant_id" = $2 AND "deleted_at" IS NULL
319
+ RETURNING "id"
320
+ """
321
+
322
+ return sql.strip(), [id_value, tenant_id]
323
+
324
+
325
+ def build_count(
326
+ table_name: str, filters: dict[str, Any]
327
+ ) -> tuple[str, list[Any]]:
328
+ """
329
+ Build COUNT query with filters.
330
+
331
+ Args:
332
+ table_name: Source table name
333
+ filters: Dict of field -> value filters (AND-ed together)
334
+
335
+ Returns:
336
+ Tuple of (sql_query, parameters)
337
+
338
+ Example:
339
+ sql, params = build_count("messages", {"session_id": "abc"})
340
+ # SELECT COUNT(*) FROM messages
341
+ # WHERE session_id = $1 AND deleted_at IS NULL
342
+ """
343
+ where_clauses = ['"deleted_at" IS NULL']
344
+ params = []
345
+ param_idx = 1
346
+
347
+ for field, value in filters.items():
348
+ where_clauses.append(f'"{field}" = ${param_idx}')
349
+ params.append(value)
350
+ param_idx += 1
351
+
352
+ sql = f"SELECT COUNT(*) FROM {table_name} WHERE {' AND '.join(where_clauses)}"
353
+
354
+ return sql, params
@@ -0,0 +1,304 @@
1
+ # REM Service
2
+
3
+ The `RemService` is the high-level query execution engine for REM (Resources-Entities-Moments), a bio-inspired memory infrastructure combining temporal narratives, semantic relationships, and structured knowledge.
4
+
5
+ ## Architecture Overview
6
+
7
+ REM mirrors human memory systems through three complementary layers:
8
+
9
+ **Resources**: Chunked, embedded content from documents, files, and conversations. Stored with semantic embeddings for vector search, entity references, and knowledge graph edges.
10
+
11
+ **Entities**: Domain knowledge nodes with natural language labels (not UUIDs). Examples: "sarah-chen", "tidb-migration-spec". Enables conversational queries without requiring internal ID knowledge.
12
+
13
+ **Moments**: Temporal narratives (meetings, coding sessions, conversations) with time boundaries, present persons, speakers, emotion tags, and topic tags. Enable chronological memory retrieval.
14
+
15
+ Core design principle: Multi-index organization (vectors + graph + time + key-value) supporting iterated retrieval where LLMs conduct multi-turn database conversations.
16
+
17
+ ## Query Dialect (AST)
18
+
19
+ REM queries follow a structured dialect with availability dependent on memory evolution stage.
20
+
21
+ ### Grammar
22
+
23
+ ```
24
+ Query ::= LookupQuery | FuzzyQuery | SearchQuery | SqlQuery | TraverseQuery
25
+
26
+ LookupQuery ::= LOOKUP <key:string|list[string]>
27
+ key : Single entity name or list of entity names (natural language labels)
28
+ performance : O(1) per key
29
+ available : Stage 1+
30
+ examples :
31
+ - LOOKUP "Sarah"
32
+ - LOOKUP ["Sarah", "Mike", "Emily"]
33
+ - LOOKUP "Project Alpha"
34
+
35
+ FuzzyQuery ::= FUZZY <text:string> [THRESHOLD <t:float>] [LIMIT <n:int>]
36
+ text : Search text (partial/misspelled)
37
+ threshold : Similarity score 0.0-1.0 (default: 0.5)
38
+ limit : Max results (default: 5)
39
+ performance : Indexed (pg_trgm)
40
+ available : Stage 1+
41
+ example : FUZZY "sara" THRESHOLD 0.5 LIMIT 10
42
+
43
+ SearchQuery ::= SEARCH <text:string> [TABLE <table:string>] [WHERE <clause:string>] [LIMIT <n:int>]
44
+ text : Semantic query text
45
+ table : Target table (default: "resources")
46
+ clause : Optional PostgreSQL WHERE clause for hybrid filtering (combines vector + structured)
47
+ limit : Max results (default: 10)
48
+ performance : Indexed (pgvector)
49
+ available : Stage 3+
50
+ examples :
51
+ - SEARCH "database migration" TABLE resources LIMIT 10
52
+ - SEARCH "team discussion" TABLE moments WHERE "moment_type='meeting'" LIMIT 5
53
+ - SEARCH "project updates" WHERE "created_at >= '2024-01-01'" LIMIT 20
54
+ - SEARCH "AI research" WHERE "tags @> ARRAY['machine-learning']" LIMIT 10
55
+
56
+ Hybrid Query Support: SEARCH combines semantic vector similarity with structured filtering.
57
+ Use WHERE clause to filter on system fields or entity-specific fields.
58
+
59
+ SqlQuery ::= SQL <table:string> [WHERE <clause:string>] [ORDER BY <order:string>] [LIMIT <n:int>]
60
+ table : Table name ("resources", "moments", etc.)
61
+ clause : PostgreSQL WHERE conditions (any valid PostgreSQL syntax)
62
+ order : ORDER BY clause
63
+ limit : Max results
64
+ performance : O(n) with indexes
65
+ available : Stage 1+
66
+ dialect : PostgreSQL (supports all PostgreSQL features: JSONB operators, array operators, etc.)
67
+ examples :
68
+ - SQL moments WHERE "moment_type='meeting'" ORDER BY starts_timestamp DESC LIMIT 10
69
+ - SQL resources WHERE "metadata->>'status' = 'published'" LIMIT 20
70
+ - SQL moments WHERE "tags && ARRAY['urgent', 'bug']" ORDER BY created_at DESC
71
+
72
+ PostgreSQL Dialect: SQL queries use PostgreSQL syntax with full support for:
73
+ - JSONB operators (->>, ->, @>, etc.)
74
+ - Array operators (&&, @>, <@, etc.)
75
+ - Advanced filtering and aggregations
76
+
77
+ TraverseQuery ::= TRAVERSE [<edge_types:list>] WITH <initial_query:Query> [DEPTH <d:int>] [ORDER BY <order:string>] [LIMIT <n:int>]
78
+ edge_types : Relationship types to follow (e.g., ["manages", "reports-to"], default: all)
79
+ initial_query : Starting query (typically LOOKUP)
80
+ depth : Number of hops (0=PLAN mode, 1=single hop, N=multi-hop, default: 1)
81
+ order : Order results (default: "edge.created_at DESC")
82
+ limit : Max nodes (default: 9)
83
+ performance : O(k) where k = visited nodes
84
+ available : Stage 3+
85
+ examples :
86
+ - TRAVERSE manages WITH LOOKUP "Sally" DEPTH 1
87
+ - TRAVERSE WITH LOOKUP "Sally" DEPTH 0 (PLAN mode: edge analysis only)
88
+ - TRAVERSE manages,reports-to WITH LOOKUP "Sarah" DEPTH 2 LIMIT 5
89
+ ```
90
+
91
+ ### System Fields (CoreModel)
92
+
93
+ All REM entities inherit from CoreModel and have these system fields:
94
+
95
+ * **id** (UUID or string): Unique identifier
96
+ * **created_at** (timestamp): Entity creation time (RECOMMENDED for filtering)
97
+ * **updated_at** (timestamp): Last modification time (RECOMMENDED for filtering)
98
+ * **deleted_at** (timestamp): Soft deletion time (null if active)
99
+ * **tenant_id** (string): Optional, for future multi-tenant SaaS use (kept for backward compat)
100
+ * **user_id** (string): Owner user identifier (primary isolation scope, auto-filtered)
101
+ * **graph_edges** (JSONB array): Knowledge graph edges - USE IN SELECT, NOT WHERE
102
+ * **metadata** (JSONB object): Flexible metadata storage
103
+ * **tags** (array of strings): Entity tags
104
+
105
+ **CRITICAL: graph_edges Usage Rules:**
106
+
107
+ * ✓ DO: Select `graph_edges` in result sets to see relationships
108
+ * ✗ DON'T: Filter by `graph_edges` in WHERE clauses (edge names vary by entity)
109
+ * ✓ DO: Use TRAVERSE queries to follow graph edges
110
+
111
+ Example CORRECT:
112
+ ```sql
113
+ SELECT id, name, created_at, graph_edges FROM resources WHERE created_at >= '2024-01-01'
114
+ ```
115
+
116
+ Example WRONG:
117
+ ```sql
118
+ -- Edge names are unknown and vary by entity!
119
+ SELECT * FROM resources WHERE graph_edges @> '[{"dst": "sarah"}]'
120
+ ```
121
+
122
+ ### Main Tables (Resources, Moments, Files)
123
+
124
+ **Resources table:**
125
+
126
+ * **name** (string): Human-readable resource name
127
+ * **uri** (string): Content URI/identifier
128
+ * **content** (text): Resource content
129
+ * **timestamp** (timestamp): Content creation time (use for temporal filtering)
130
+ * **category** (string): Resource category (document, conversation, artifact, etc.)
131
+ * **related_entities** (JSONB): Extracted entities
132
+
133
+ **Moments table:**
134
+
135
+ * **name** (string): Human-readable moment name
136
+ * **moment_type** (string): Moment classification (meeting, coding-session, conversation, etc.)
137
+ * **category** (string): Moment category
138
+ * **starts_timestamp** (timestamp): Start time (use for temporal filtering)
139
+ * **ends_timestamp** (timestamp): End time
140
+ * **present_persons** (JSONB): People present in moment
141
+ * **emotion_tags** (array): Sentiment tags (happy, frustrated, focused, etc.)
142
+ * **topic_tags** (array): Topic/concept tags
143
+ * **summary** (text): Natural language description
144
+
145
+ **Files table:**
146
+
147
+ * **name** (string): File name
148
+ * **uri** (string): File URI/path
149
+ * **mime_type** (string): File MIME type
150
+ * **size_bytes** (integer): File size
151
+ * **processing_status** (string): Processing status (pending, completed, failed)
152
+ * **category** (string): File category
153
+
154
+ ### Recommended Filtering Fields
155
+
156
+ * **Temporal**: created_at, updated_at, timestamp, starts_timestamp, ends_timestamp
157
+ * **Categorical**: category, moment_type, mime_type, processing_status
158
+ * **Arrays**: tags, emotion_tags, topic_tags (use && or @> operators)
159
+ * **Text**: name, content, summary (use ILIKE for pattern matching)
160
+
161
+ Use these fields in WHERE clauses for both SEARCH (hybrid) and SQL queries.
162
+
163
+ ### Python API
164
+
165
+ ```python
166
+ # LOOKUP - O(1) entity retrieval by natural language key
167
+ RemQuery(
168
+ query_type=QueryType.LOOKUP,
169
+ parameters=LookupParameters(key="Sarah")
170
+ )
171
+
172
+ # FUZZY - Trigram-based fuzzy text search
173
+ RemQuery(
174
+ query_type=QueryType.FUZZY,
175
+ parameters=FuzzyParameters(query_text="sara", threshold=0.5, limit=5)
176
+ )
177
+
178
+ # SEARCH - Vector similarity search using embeddings
179
+ RemQuery(
180
+ query_type=QueryType.SEARCH,
181
+ parameters=SearchParameters(query_text="database migration to TiDB", table_name="resources", limit=10)
182
+ )
183
+
184
+ # SQL - Direct SQL execution (tenant-isolated)
185
+ RemQuery(
186
+ query_type=QueryType.SQL,
187
+ parameters=SQLParameters(table_name="moments", where_clause="moment_type='meeting'", order_by="resource_timestamp DESC", limit=10)
188
+ )
189
+
190
+ # TRAVERSE - Recursive graph traversal following edges
191
+ RemQuery(
192
+ query_type=QueryType.TRAVERSE,
193
+ parameters=TraverseParameters(initial_query="Sally", edge_types=["manages"], max_depth=2, order_by="edge.created_at DESC", limit=9)
194
+ )
195
+ ```
196
+
197
+ ### Query Availability by Evolution Stage
198
+
199
+ | Query Type | Stage 0 | Stage 1 | Stage 2 | Stage 3 | Stage 4 |
200
+ |------------|---------|---------|---------|---------|---------|
201
+ | LOOKUP | ✗ | ✓ | ✓ | ✓ | ✓ |
202
+ | FUZZY | ✗ | ✓ | ✓ | ✓ | ✓ |
203
+ | SEARCH | ✗ | ✗ | ✗ | ✓ | ✓ |
204
+ | SQL | ✗ | ✓ | ✓ | ✓ | ✓ |
205
+ | TRAVERSE | ✗ | ✗ | ✗ | ✓ | ✓ |
206
+
207
+ **Stage 0**: No data, all queries fail.
208
+
209
+ **Stage 1** (20% answerable): Resources seeded with entity extraction. LOOKUP and FUZZY work for finding entities. SQL works for basic filtering.
210
+
211
+ **Stage 2** (50% answerable): Moments extracted. SQL temporal queries work. LOOKUP includes moment entities.
212
+
213
+ **Stage 3** (80% answerable): Affinity graph built. SEARCH and TRAVERSE become available. Multi-hop graph queries work.
214
+
215
+ **Stage 4** (100% answerable): Mature graph with rich historical data. All query types fully functional with high-quality results.
216
+
217
+ ## Query Types
218
+
219
+ The service supports schema-agnostic and indexed query operations with strict performance contracts:
220
+
221
+ * **LOOKUP**: O(1) entity retrieval by natural language key (via `kv_store`).
222
+ * **FUZZY**: Trigram-based fuzzy text search (indexed).
223
+ * **SEARCH**: Vector similarity search using embeddings (requires `pgvector`).
224
+ * **SQL**: Direct SQL execution (tenant-isolated).
225
+ * **TRAVERSE**: Recursive graph traversal (O(k) where k = visited nodes).
226
+
227
+ ## Graph Traversal (`TRAVERSE`)
228
+
229
+ The `TRAVERSE` operation allows agents to explore the knowledge graph by following edges between entities.
230
+
231
+ ### Contract
232
+ * **Performance**: O(k) where k is the number of visited nodes.
233
+ * **Polymorphism**: Seamlessly traverses relationships between different entity types (`Resources`, `Moments`, `Users`, etc.).
234
+ * **Filtering**: Supports filtering by relationship type(s).
235
+ * **Cycle Detection**: Built-in cycle detection prevents infinite loops.
236
+
237
+ ### Data Model
238
+ Graph traversal relies on the `InlineEdge` Pydantic model stored in the `graph_edges` JSONB column of every entity table.
239
+
240
+ **Expected JSON Structure (`InlineEdge`):**
241
+ ```json
242
+ {
243
+ "dst": "target-entity-key", // Human-readable key (NOT UUID)
244
+ "rel_type": "authored_by", // Relationship type
245
+ "weight": 0.8, // Connection strength (0.0-1.0)
246
+ "properties": { ... } // Additional metadata
247
+ }
248
+ ```
249
+
250
+ ### Usage
251
+ The `TRAVERSE` query accepts the following parameters:
252
+
253
+ * `initial_query` (str): The starting entity key.
254
+ * `max_depth` (int): Maximum number of hops (default: 1).
255
+ * `edge_types` (list[str]): List of relationship types to follow. If empty or `['*']`, follows all edges.
256
+
257
+ **Example:**
258
+ ```python
259
+ # Find entities connected to "Project X" via "depends_on" or "related_to" edges, up to 2 hops deep.
260
+ result = await rem_service.execute_query(
261
+ RemQuery(
262
+ query_type=QueryType.TRAVERSE,
263
+ parameters=TraverseParameters(
264
+ initial_query="Project X",
265
+ max_depth=2,
266
+ edge_types=["depends_on", "related_to"]
267
+ ),
268
+ user_id="user-123"
269
+ )
270
+ )
271
+ ```
272
+
273
+ ## Memory Evolution Through Dreaming
274
+
275
+ REM improves query answerability over time through background dreaming workflows:
276
+
277
+ * **Stage 0**: Raw resources only (0% answerable)
278
+ * **Stage 1**: Entity extraction complete (20% answerable, LOOKUP works)
279
+ * **Stage 2**: Moments generated (50% answerable, temporal queries work)
280
+ * **Stage 3**: Affinity matching complete (80% answerable, semantic/graph queries work)
281
+ * **Stage 4**: Multiple dreaming cycles (100% answerable, full query capabilities)
282
+
283
+ Dreaming workers extract temporal narratives (moments) and build semantic graph edges (affinity) from resources, progressively enriching the knowledge graph.
284
+
285
+ ## Testing Approach
286
+
287
+ REM testing follows a quality-driven methodology focused on query evolution:
288
+
289
+ **Critical Principle**: Test with user-known information only. Users provide natural language ("Sarah", "Project Alpha"), not internal representations ("sarah-chen", "project-alpha").
290
+
291
+ **Quality Validation**:
292
+
293
+ * Moment quality: Temporal validity, person extraction, speaker identification, tag quality, entity references, temporal coverage, type distribution
294
+ * Affinity quality: Edge existence, edge format, semantic relevance, bidirectional edges, entity connections, graph connectivity, edge distribution
295
+
296
+ **Integration Tests**: Validate progressive query answerability across memory evolution stages. Test suite includes realistic queries simulating multi-turn LLM-database conversations.
297
+
298
+ See `tests/integration/test_rem_query_evolution.py` for stage-based validation and `tests/integration/test_graph_traversal.py` for graph query testing.
299
+
300
+ ## Architecture Notes
301
+
302
+ * **Unified View**: The underlying SQL function `rem_traverse` uses a view `all_graph_edges` that unions `graph_edges` from all entity tables (`resources`, `moments`, `users`, etc.). This enables polymorphic traversal without complex joins in the application layer.
303
+ * **KV Store**: Edge destinations (`dst`) are resolved to entity IDs using the `kv_store`. This requires that all traversable entities have an entry in the `kv_store` (handled automatically by database triggers).
304
+ * **Iterated Retrieval**: REM is architected for multi-turn retrieval where LLMs conduct conversational database exploration. Each query informs the next, enabling emergent information discovery without requiring upfront schema knowledge.
@@ -0,0 +1,23 @@
1
+ """
2
+ REM query execution and graph operations service.
3
+ """
4
+
5
+ from .exceptions import (
6
+ ContentFieldNotFoundError,
7
+ EmbeddingFieldNotFoundError,
8
+ FieldNotFoundError,
9
+ InvalidParametersError,
10
+ QueryExecutionError,
11
+ REMException,
12
+ )
13
+ from .service import RemService
14
+
15
+ __all__ = [
16
+ "RemService",
17
+ "REMException",
18
+ "FieldNotFoundError",
19
+ "EmbeddingFieldNotFoundError",
20
+ "ContentFieldNotFoundError",
21
+ "QueryExecutionError",
22
+ "InvalidParametersError",
23
+ ]