remdb 0.3.242__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of remdb might be problematic. Click here for more details.

Files changed (235) hide show
  1. rem/__init__.py +129 -0
  2. rem/agentic/README.md +760 -0
  3. rem/agentic/__init__.py +54 -0
  4. rem/agentic/agents/README.md +155 -0
  5. rem/agentic/agents/__init__.py +38 -0
  6. rem/agentic/agents/agent_manager.py +311 -0
  7. rem/agentic/agents/sse_simulator.py +502 -0
  8. rem/agentic/context.py +425 -0
  9. rem/agentic/context_builder.py +360 -0
  10. rem/agentic/llm_provider_models.py +301 -0
  11. rem/agentic/mcp/__init__.py +0 -0
  12. rem/agentic/mcp/tool_wrapper.py +273 -0
  13. rem/agentic/otel/__init__.py +5 -0
  14. rem/agentic/otel/setup.py +240 -0
  15. rem/agentic/providers/phoenix.py +926 -0
  16. rem/agentic/providers/pydantic_ai.py +854 -0
  17. rem/agentic/query.py +117 -0
  18. rem/agentic/query_helper.py +89 -0
  19. rem/agentic/schema.py +737 -0
  20. rem/agentic/serialization.py +245 -0
  21. rem/agentic/tools/__init__.py +5 -0
  22. rem/agentic/tools/rem_tools.py +242 -0
  23. rem/api/README.md +657 -0
  24. rem/api/deps.py +253 -0
  25. rem/api/main.py +460 -0
  26. rem/api/mcp_router/prompts.py +182 -0
  27. rem/api/mcp_router/resources.py +820 -0
  28. rem/api/mcp_router/server.py +243 -0
  29. rem/api/mcp_router/tools.py +1605 -0
  30. rem/api/middleware/tracking.py +172 -0
  31. rem/api/routers/admin.py +520 -0
  32. rem/api/routers/auth.py +898 -0
  33. rem/api/routers/chat/__init__.py +5 -0
  34. rem/api/routers/chat/child_streaming.py +394 -0
  35. rem/api/routers/chat/completions.py +702 -0
  36. rem/api/routers/chat/json_utils.py +76 -0
  37. rem/api/routers/chat/models.py +202 -0
  38. rem/api/routers/chat/otel_utils.py +33 -0
  39. rem/api/routers/chat/sse_events.py +546 -0
  40. rem/api/routers/chat/streaming.py +950 -0
  41. rem/api/routers/chat/streaming_utils.py +327 -0
  42. rem/api/routers/common.py +18 -0
  43. rem/api/routers/dev.py +87 -0
  44. rem/api/routers/feedback.py +276 -0
  45. rem/api/routers/messages.py +620 -0
  46. rem/api/routers/models.py +86 -0
  47. rem/api/routers/query.py +362 -0
  48. rem/api/routers/shared_sessions.py +422 -0
  49. rem/auth/README.md +258 -0
  50. rem/auth/__init__.py +36 -0
  51. rem/auth/jwt.py +367 -0
  52. rem/auth/middleware.py +318 -0
  53. rem/auth/providers/__init__.py +16 -0
  54. rem/auth/providers/base.py +376 -0
  55. rem/auth/providers/email.py +215 -0
  56. rem/auth/providers/google.py +163 -0
  57. rem/auth/providers/microsoft.py +237 -0
  58. rem/cli/README.md +517 -0
  59. rem/cli/__init__.py +8 -0
  60. rem/cli/commands/README.md +299 -0
  61. rem/cli/commands/__init__.py +3 -0
  62. rem/cli/commands/ask.py +549 -0
  63. rem/cli/commands/cluster.py +1808 -0
  64. rem/cli/commands/configure.py +495 -0
  65. rem/cli/commands/db.py +828 -0
  66. rem/cli/commands/dreaming.py +324 -0
  67. rem/cli/commands/experiments.py +1698 -0
  68. rem/cli/commands/mcp.py +66 -0
  69. rem/cli/commands/process.py +388 -0
  70. rem/cli/commands/query.py +109 -0
  71. rem/cli/commands/scaffold.py +47 -0
  72. rem/cli/commands/schema.py +230 -0
  73. rem/cli/commands/serve.py +106 -0
  74. rem/cli/commands/session.py +453 -0
  75. rem/cli/dreaming.py +363 -0
  76. rem/cli/main.py +123 -0
  77. rem/config.py +244 -0
  78. rem/mcp_server.py +41 -0
  79. rem/models/core/__init__.py +49 -0
  80. rem/models/core/core_model.py +70 -0
  81. rem/models/core/engram.py +333 -0
  82. rem/models/core/experiment.py +672 -0
  83. rem/models/core/inline_edge.py +132 -0
  84. rem/models/core/rem_query.py +246 -0
  85. rem/models/entities/__init__.py +68 -0
  86. rem/models/entities/domain_resource.py +38 -0
  87. rem/models/entities/feedback.py +123 -0
  88. rem/models/entities/file.py +57 -0
  89. rem/models/entities/image_resource.py +88 -0
  90. rem/models/entities/message.py +64 -0
  91. rem/models/entities/moment.py +123 -0
  92. rem/models/entities/ontology.py +181 -0
  93. rem/models/entities/ontology_config.py +131 -0
  94. rem/models/entities/resource.py +95 -0
  95. rem/models/entities/schema.py +87 -0
  96. rem/models/entities/session.py +84 -0
  97. rem/models/entities/shared_session.py +180 -0
  98. rem/models/entities/subscriber.py +175 -0
  99. rem/models/entities/user.py +93 -0
  100. rem/py.typed +0 -0
  101. rem/registry.py +373 -0
  102. rem/schemas/README.md +507 -0
  103. rem/schemas/__init__.py +6 -0
  104. rem/schemas/agents/README.md +92 -0
  105. rem/schemas/agents/core/agent-builder.yaml +235 -0
  106. rem/schemas/agents/core/moment-builder.yaml +178 -0
  107. rem/schemas/agents/core/rem-query-agent.yaml +226 -0
  108. rem/schemas/agents/core/resource-affinity-assessor.yaml +99 -0
  109. rem/schemas/agents/core/simple-assistant.yaml +19 -0
  110. rem/schemas/agents/core/user-profile-builder.yaml +163 -0
  111. rem/schemas/agents/examples/contract-analyzer.yaml +317 -0
  112. rem/schemas/agents/examples/contract-extractor.yaml +134 -0
  113. rem/schemas/agents/examples/cv-parser.yaml +263 -0
  114. rem/schemas/agents/examples/hello-world.yaml +37 -0
  115. rem/schemas/agents/examples/query.yaml +54 -0
  116. rem/schemas/agents/examples/simple.yaml +21 -0
  117. rem/schemas/agents/examples/test.yaml +29 -0
  118. rem/schemas/agents/rem.yaml +132 -0
  119. rem/schemas/evaluators/hello-world/default.yaml +77 -0
  120. rem/schemas/evaluators/rem/faithfulness.yaml +219 -0
  121. rem/schemas/evaluators/rem/lookup-correctness.yaml +182 -0
  122. rem/schemas/evaluators/rem/retrieval-precision.yaml +199 -0
  123. rem/schemas/evaluators/rem/retrieval-recall.yaml +211 -0
  124. rem/schemas/evaluators/rem/search-correctness.yaml +192 -0
  125. rem/services/__init__.py +18 -0
  126. rem/services/audio/INTEGRATION.md +308 -0
  127. rem/services/audio/README.md +376 -0
  128. rem/services/audio/__init__.py +15 -0
  129. rem/services/audio/chunker.py +354 -0
  130. rem/services/audio/transcriber.py +259 -0
  131. rem/services/content/README.md +1269 -0
  132. rem/services/content/__init__.py +5 -0
  133. rem/services/content/providers.py +760 -0
  134. rem/services/content/service.py +762 -0
  135. rem/services/dreaming/README.md +230 -0
  136. rem/services/dreaming/__init__.py +53 -0
  137. rem/services/dreaming/affinity_service.py +322 -0
  138. rem/services/dreaming/moment_service.py +251 -0
  139. rem/services/dreaming/ontology_service.py +54 -0
  140. rem/services/dreaming/user_model_service.py +297 -0
  141. rem/services/dreaming/utils.py +39 -0
  142. rem/services/email/__init__.py +10 -0
  143. rem/services/email/service.py +522 -0
  144. rem/services/email/templates.py +360 -0
  145. rem/services/embeddings/__init__.py +11 -0
  146. rem/services/embeddings/api.py +127 -0
  147. rem/services/embeddings/worker.py +435 -0
  148. rem/services/fs/README.md +662 -0
  149. rem/services/fs/__init__.py +62 -0
  150. rem/services/fs/examples.py +206 -0
  151. rem/services/fs/examples_paths.py +204 -0
  152. rem/services/fs/git_provider.py +935 -0
  153. rem/services/fs/local_provider.py +760 -0
  154. rem/services/fs/parsing-hooks-examples.md +172 -0
  155. rem/services/fs/paths.py +276 -0
  156. rem/services/fs/provider.py +460 -0
  157. rem/services/fs/s3_provider.py +1042 -0
  158. rem/services/fs/service.py +186 -0
  159. rem/services/git/README.md +1075 -0
  160. rem/services/git/__init__.py +17 -0
  161. rem/services/git/service.py +469 -0
  162. rem/services/phoenix/EXPERIMENT_DESIGN.md +1146 -0
  163. rem/services/phoenix/README.md +453 -0
  164. rem/services/phoenix/__init__.py +46 -0
  165. rem/services/phoenix/client.py +960 -0
  166. rem/services/phoenix/config.py +88 -0
  167. rem/services/phoenix/prompt_labels.py +477 -0
  168. rem/services/postgres/README.md +757 -0
  169. rem/services/postgres/__init__.py +49 -0
  170. rem/services/postgres/diff_service.py +599 -0
  171. rem/services/postgres/migration_service.py +427 -0
  172. rem/services/postgres/programmable_diff_service.py +635 -0
  173. rem/services/postgres/pydantic_to_sqlalchemy.py +562 -0
  174. rem/services/postgres/register_type.py +353 -0
  175. rem/services/postgres/repository.py +481 -0
  176. rem/services/postgres/schema_generator.py +661 -0
  177. rem/services/postgres/service.py +802 -0
  178. rem/services/postgres/sql_builder.py +355 -0
  179. rem/services/rate_limit.py +113 -0
  180. rem/services/rem/README.md +318 -0
  181. rem/services/rem/__init__.py +23 -0
  182. rem/services/rem/exceptions.py +71 -0
  183. rem/services/rem/executor.py +293 -0
  184. rem/services/rem/parser.py +180 -0
  185. rem/services/rem/queries.py +196 -0
  186. rem/services/rem/query.py +371 -0
  187. rem/services/rem/service.py +608 -0
  188. rem/services/session/README.md +374 -0
  189. rem/services/session/__init__.py +13 -0
  190. rem/services/session/compression.py +488 -0
  191. rem/services/session/pydantic_messages.py +310 -0
  192. rem/services/session/reload.py +85 -0
  193. rem/services/user_service.py +130 -0
  194. rem/settings.py +1877 -0
  195. rem/sql/background_indexes.sql +52 -0
  196. rem/sql/migrations/001_install.sql +983 -0
  197. rem/sql/migrations/002_install_models.sql +3157 -0
  198. rem/sql/migrations/003_optional_extensions.sql +326 -0
  199. rem/sql/migrations/004_cache_system.sql +282 -0
  200. rem/sql/migrations/005_schema_update.sql +145 -0
  201. rem/sql/migrations/migrate_session_id_to_uuid.sql +45 -0
  202. rem/utils/AGENTIC_CHUNKING.md +597 -0
  203. rem/utils/README.md +628 -0
  204. rem/utils/__init__.py +61 -0
  205. rem/utils/agentic_chunking.py +622 -0
  206. rem/utils/batch_ops.py +343 -0
  207. rem/utils/chunking.py +108 -0
  208. rem/utils/clip_embeddings.py +276 -0
  209. rem/utils/constants.py +97 -0
  210. rem/utils/date_utils.py +228 -0
  211. rem/utils/dict_utils.py +98 -0
  212. rem/utils/embeddings.py +436 -0
  213. rem/utils/examples/embeddings_example.py +305 -0
  214. rem/utils/examples/sql_types_example.py +202 -0
  215. rem/utils/files.py +323 -0
  216. rem/utils/markdown.py +16 -0
  217. rem/utils/mime_types.py +158 -0
  218. rem/utils/model_helpers.py +492 -0
  219. rem/utils/schema_loader.py +649 -0
  220. rem/utils/sql_paths.py +146 -0
  221. rem/utils/sql_types.py +350 -0
  222. rem/utils/user_id.py +81 -0
  223. rem/utils/vision.py +325 -0
  224. rem/workers/README.md +506 -0
  225. rem/workers/__init__.py +7 -0
  226. rem/workers/db_listener.py +579 -0
  227. rem/workers/db_maintainer.py +74 -0
  228. rem/workers/dreaming.py +502 -0
  229. rem/workers/engram_processor.py +312 -0
  230. rem/workers/sqs_file_processor.py +193 -0
  231. rem/workers/unlogged_maintainer.py +463 -0
  232. remdb-0.3.242.dist-info/METADATA +1632 -0
  233. remdb-0.3.242.dist-info/RECORD +235 -0
  234. remdb-0.3.242.dist-info/WHEEL +4 -0
  235. remdb-0.3.242.dist-info/entry_points.txt +2 -0
@@ -0,0 +1,318 @@
1
+ # REM Service
2
+
3
+ The `RemService` is the high-level query execution engine for REM (Resources-Entities-Moments), a bio-inspired memory infrastructure combining temporal narratives, semantic relationships, and structured knowledge.
4
+
5
+ ## Architecture Overview
6
+
7
+ REM mirrors human memory systems through three complementary layers:
8
+
9
+ **Resources**: Chunked, embedded content from documents, files, and conversations. Stored with semantic embeddings for vector search, entity references, and knowledge graph edges.
10
+
11
+ **Entities**: Domain knowledge nodes with natural language labels (not UUIDs). Examples: "sarah-chen", "tidb-migration-spec". Enables conversational queries without requiring internal ID knowledge.
12
+
13
+ **Moments**: Temporal narratives (meetings, coding sessions, conversations) with time boundaries, present persons, speakers, emotion tags, and topic tags. Enable chronological memory retrieval.
14
+
15
+ Core design principle: Multi-index organization (vectors + graph + time + key-value) supporting iterated retrieval where LLMs conduct multi-turn database conversations.
16
+
17
+ ## Query Dialect (AST)
18
+
19
+ REM queries follow a structured dialect with availability dependent on memory evolution stage.
20
+
21
+ ### Grammar
22
+
23
+ ```
24
+ Query ::= LookupQuery | FuzzyQuery | SearchQuery | SqlQuery | TraverseQuery
25
+
26
+ LookupQuery ::= LOOKUP <key:string|list[string]>
27
+ key : Single entity name or list of entity names (natural language labels)
28
+ performance : O(1) per key
29
+ available : Stage 1+
30
+ examples :
31
+ - LOOKUP "Sarah"
32
+ - LOOKUP ["Sarah", "Mike", "Emily"]
33
+ - LOOKUP "Project Alpha"
34
+
35
+ FuzzyQuery ::= FUZZY <text:string> [THRESHOLD <t:float>] [LIMIT <n:int>]
36
+ text : Search text (partial/misspelled)
37
+ threshold : Similarity score 0.0-1.0 (default: 0.5)
38
+ limit : Max results (default: 5)
39
+ performance : Indexed (pg_trgm)
40
+ available : Stage 1+
41
+ example : FUZZY "sara" THRESHOLD 0.5 LIMIT 10
42
+
43
+ SearchQuery ::= SEARCH <text:string> [TABLE <table:string>] [WHERE <clause:string>] [LIMIT <n:int>]
44
+ text : Semantic query text
45
+ table : Target table (default: "resources")
46
+ clause : Optional PostgreSQL WHERE clause for hybrid filtering (combines vector + structured)
47
+ limit : Max results (default: 10)
48
+ performance : Indexed (pgvector)
49
+ available : Stage 3+
50
+ examples :
51
+ - SEARCH "database migration" TABLE resources LIMIT 10
52
+ - SEARCH "team discussion" TABLE moments WHERE "moment_type='meeting'" LIMIT 5
53
+ - SEARCH "project updates" WHERE "created_at >= '2024-01-01'" LIMIT 20
54
+ - SEARCH "AI research" WHERE "tags @> ARRAY['machine-learning']" LIMIT 10
55
+
56
+ Hybrid Query Support: SEARCH combines semantic vector similarity with structured filtering.
57
+ Use WHERE clause to filter on system fields or entity-specific fields.
58
+
59
+ SqlQuery ::= SQL <table:string> [WHERE <clause:string>] [ORDER BY <order:string>] [LIMIT <n:int>]
60
+ table : Table name ("resources", "moments", etc.)
61
+ clause : PostgreSQL WHERE conditions (any valid PostgreSQL syntax)
62
+ order : ORDER BY clause
63
+ limit : Max results
64
+ performance : O(n) with indexes
65
+ available : Stage 1+
66
+ dialect : PostgreSQL (supports all PostgreSQL features: JSONB operators, array operators, etc.)
67
+ examples :
68
+ - SQL moments WHERE "moment_type='meeting'" ORDER BY starts_timestamp DESC LIMIT 10
69
+ - SQL resources WHERE "metadata->>'status' = 'published'" LIMIT 20
70
+ - SQL moments WHERE "tags && ARRAY['urgent', 'bug']" ORDER BY created_at DESC
71
+
72
+ PostgreSQL Dialect: SQL queries use PostgreSQL syntax with full support for:
73
+ - JSONB operators (->>, ->, @>, etc.)
74
+ - Array operators (&&, @>, <@, etc.)
75
+ - Advanced filtering and aggregations
76
+
77
+ TraverseQuery ::= TRAVERSE [<edge_types:list>] WITH <initial_query:Query> [DEPTH <d:int>] [ORDER BY <order:string>] [LIMIT <n:int>]
78
+ edge_types : Relationship types to follow (e.g., ["manages", "reports-to"], default: all)
79
+ initial_query : Starting query (typically LOOKUP)
80
+ depth : Number of hops (0=PLAN mode, 1=single hop, N=multi-hop, default: 1)
81
+ order : Order results (default: "edge.created_at DESC")
82
+ limit : Max nodes (default: 9)
83
+ performance : O(k) where k = visited nodes
84
+ available : Stage 3+
85
+ examples :
86
+ - TRAVERSE manages WITH LOOKUP "Sally" DEPTH 1
87
+ - TRAVERSE WITH LOOKUP "Sally" DEPTH 0 (PLAN mode: edge analysis only)
88
+ - TRAVERSE manages,reports-to WITH LOOKUP "Sarah" DEPTH 2 LIMIT 5
89
+ ```
90
+
91
+ ### System Fields (CoreModel)
92
+
93
+ All REM entities inherit from CoreModel and have these system fields:
94
+
95
+ * **id** (UUID or string): Unique identifier
96
+ * **created_at** (timestamp): Entity creation time (RECOMMENDED for filtering)
97
+ * **updated_at** (timestamp): Last modification time (RECOMMENDED for filtering)
98
+ * **deleted_at** (timestamp): Soft deletion time (null if active)
99
+ * **tenant_id** (string): Optional, for future multi-tenant SaaS use (kept for backward compat)
100
+ * **user_id** (string): Owner user identifier (primary isolation scope, auto-filtered)
101
+ * **graph_edges** (JSONB array): Knowledge graph edges - USE IN SELECT, NOT WHERE
102
+ * **metadata** (JSONB object): Flexible metadata storage
103
+ * **tags** (array of strings): Entity tags
104
+
105
+ **CRITICAL: graph_edges Usage Rules:**
106
+
107
+ * ✓ DO: Select `graph_edges` in result sets to see relationships
108
+ * ✗ DON'T: Filter by `graph_edges` in WHERE clauses (edge names vary by entity)
109
+ * ✓ DO: Use TRAVERSE queries to follow graph edges
110
+
111
+ Example CORRECT:
112
+ ```sql
113
+ SELECT id, name, created_at, graph_edges FROM resources WHERE created_at >= '2024-01-01'
114
+ ```
115
+
116
+ Example WRONG:
117
+ ```sql
118
+ -- Edge names are unknown and vary by entity!
119
+ SELECT * FROM resources WHERE graph_edges @> '[{"dst": "sarah"}]'
120
+ ```
121
+
122
+ ### Main Tables (Resources, Moments, Files)
123
+
124
+ **Resources table:**
125
+
126
+ * **name** (string): Human-readable resource name
127
+ * **uri** (string): Content URI/identifier
128
+ * **content** (text): Resource content
129
+ * **timestamp** (timestamp): Content creation time (use for temporal filtering)
130
+ * **category** (string): Resource category (document, conversation, artifact, etc.)
131
+ * **related_entities** (JSONB): Extracted entities
132
+
133
+ **Moments table:**
134
+
135
+ * **name** (string): Human-readable moment name
136
+ * **moment_type** (string): Moment classification (meeting, coding-session, conversation, etc.)
137
+ * **category** (string): Moment category
138
+ * **starts_timestamp** (timestamp): Start time (use for temporal filtering)
139
+ * **ends_timestamp** (timestamp): End time
140
+ * **present_persons** (JSONB): People present in moment
141
+ * **emotion_tags** (array): Sentiment tags (happy, frustrated, focused, etc.)
142
+ * **topic_tags** (array): Topic/concept tags
143
+ * **summary** (text): Natural language description
144
+
145
+ **Files table:**
146
+
147
+ * **name** (string): File name
148
+ * **uri** (string): File URI/path
149
+ * **mime_type** (string): File MIME type
150
+ * **size_bytes** (integer): File size
151
+ * **processing_status** (string): Processing status (pending, completed, failed)
152
+ * **category** (string): File category
153
+
154
+ ### Recommended Filtering Fields
155
+
156
+ * **Temporal**: created_at, updated_at, timestamp, starts_timestamp, ends_timestamp
157
+ * **Categorical**: category, moment_type, mime_type, processing_status
158
+ * **Arrays**: tags, emotion_tags, topic_tags (use && or @> operators)
159
+ * **Text**: name, content, summary (use ILIKE for pattern matching)
160
+
161
+ Use these fields in WHERE clauses for both SEARCH (hybrid) and SQL queries.
162
+
163
+ ### Python API
164
+
165
+ ```python
166
+ # LOOKUP - O(1) entity retrieval by natural language key
167
+ RemQuery(
168
+ query_type=QueryType.LOOKUP,
169
+ parameters=LookupParameters(key="Sarah")
170
+ )
171
+
172
+ # FUZZY - Trigram-based fuzzy text search
173
+ RemQuery(
174
+ query_type=QueryType.FUZZY,
175
+ parameters=FuzzyParameters(query_text="sara", threshold=0.5, limit=5)
176
+ )
177
+
178
+ # SEARCH - Vector similarity search using embeddings
179
+ RemQuery(
180
+ query_type=QueryType.SEARCH,
181
+ parameters=SearchParameters(query_text="database migration to TiDB", table_name="resources", limit=10)
182
+ )
183
+
184
+ # SQL - Direct SQL execution (tenant-isolated)
185
+ RemQuery(
186
+ query_type=QueryType.SQL,
187
+ parameters=SQLParameters(table_name="moments", where_clause="moment_type='meeting'", order_by="resource_timestamp DESC", limit=10)
188
+ )
189
+
190
+ # TRAVERSE - Recursive graph traversal following edges
191
+ RemQuery(
192
+ query_type=QueryType.TRAVERSE,
193
+ parameters=TraverseParameters(initial_query="Sally", edge_types=["manages"], max_depth=2, order_by="edge.created_at DESC", limit=9)
194
+ )
195
+ ```
196
+
197
+ ### Query Availability by Evolution Stage
198
+
199
+ | Query Type | Stage 0 | Stage 1 | Stage 2 | Stage 3 | Stage 4 |
200
+ |------------|---------|---------|---------|---------|---------|
201
+ | LOOKUP | ✗ | ✓ | ✓ | ✓ | ✓ |
202
+ | FUZZY | ✗ | ✓ | ✓ | ✓ | ✓ |
203
+ | SEARCH | ✗ | ✗ | ✗ | ✓ | ✓ |
204
+ | SQL | ✗ | ✓ | ✓ | ✓ | ✓ |
205
+ | TRAVERSE | ✗ | ✗ | ✗ | ✓ | ✓ |
206
+
207
+ **Stage 0**: No data, all queries fail.
208
+
209
+ **Stage 1** (20% answerable): Resources seeded with entity extraction. LOOKUP and FUZZY work for finding entities. SQL works for basic filtering.
210
+
211
+ **Stage 2** (50% answerable): Moments extracted. SQL temporal queries work. LOOKUP includes moment entities.
212
+
213
+ **Stage 3** (80% answerable): Affinity graph built. SEARCH and TRAVERSE become available. Multi-hop graph queries work.
214
+
215
+ **Stage 4** (100% answerable): Mature graph with rich historical data. All query types fully functional with high-quality results.
216
+
217
+ ## Query Types
218
+
219
+ The service supports schema-agnostic and indexed query operations with strict performance contracts:
220
+
221
+ * **LOOKUP**: O(1) entity retrieval by natural language key (via `kv_store`).
222
+ * **FUZZY**: Trigram-based fuzzy text search (indexed).
223
+ * **SEARCH**: Vector similarity search using embeddings (requires `pgvector`).
224
+ * **SQL**: Direct SQL execution (tenant-isolated).
225
+ * **TRAVERSE**: Recursive graph traversal (O(k) where k = visited nodes).
226
+
227
+ ## Graph Traversal (`TRAVERSE`)
228
+
229
+ The `TRAVERSE` operation allows agents to explore the knowledge graph by following edges between entities.
230
+
231
+ ### Contract
232
+ * **Performance**: O(k) where k is the number of visited nodes.
233
+ * **Polymorphism**: Seamlessly traverses relationships between different entity types (`Resources`, `Moments`, `Users`, etc.).
234
+ * **Filtering**: Supports filtering by relationship type(s).
235
+ * **Cycle Detection**: Built-in cycle detection prevents infinite loops.
236
+
237
+ ### Data Model
238
+ Graph traversal relies on the `InlineEdge` Pydantic model stored in the `graph_edges` JSONB column of every entity table.
239
+
240
+ **Expected JSON Structure (`InlineEdge`):**
241
+ ```json
242
+ {
243
+ "dst": "target-entity-key", // Human-readable key (NOT UUID)
244
+ "rel_type": "authored_by", // Relationship type
245
+ "weight": 0.8, // Connection strength (0.0-1.0)
246
+ "properties": { ... } // Additional metadata
247
+ }
248
+ ```
249
+
250
+ ### Usage
251
+ The `TRAVERSE` query accepts the following parameters:
252
+
253
+ * `initial_query` (str): The starting entity key.
254
+ * `max_depth` (int): Maximum number of hops (default: 1).
255
+ * `edge_types` (list[str]): List of relationship types to follow. If empty or `['*']`, follows all edges.
256
+
257
+ **Example:**
258
+ ```python
259
+ # Find entities connected to "Project X" via "depends_on" or "related_to" edges, up to 2 hops deep.
260
+ result = await rem_service.execute_query(
261
+ RemQuery(
262
+ query_type=QueryType.TRAVERSE,
263
+ parameters=TraverseParameters(
264
+ initial_query="Project X",
265
+ max_depth=2,
266
+ edge_types=["depends_on", "related_to"]
267
+ ),
268
+ user_id="user-123"
269
+ )
270
+ )
271
+ ```
272
+
273
+ ## Memory Evolution Through Dreaming
274
+
275
+ REM improves query answerability over time through background dreaming workflows:
276
+
277
+ * **Stage 0**: Raw resources only (0% answerable)
278
+ * **Stage 1**: Entity extraction complete (20% answerable, LOOKUP works)
279
+ * **Stage 2**: Moments generated (50% answerable, temporal queries work)
280
+ * **Stage 3**: Affinity matching complete (80% answerable, semantic/graph queries work)
281
+ * **Stage 4**: Multiple dreaming cycles (100% answerable, full query capabilities)
282
+
283
+ Dreaming workers extract temporal narratives (moments) and build semantic graph edges (affinity) from resources, progressively enriching the knowledge graph.
284
+
285
+ ## Testing Approach
286
+
287
+ REM testing follows a quality-driven methodology focused on query evolution:
288
+
289
+ **Critical Principle**: Test with user-known information only. Users provide natural language ("Sarah", "Project Alpha"), not internal representations ("sarah-chen", "project-alpha").
290
+
291
+ **Quality Validation**:
292
+
293
+ * Moment quality: Temporal validity, person extraction, speaker identification, tag quality, entity references, temporal coverage, type distribution
294
+ * Affinity quality: Edge existence, edge format, semantic relevance, bidirectional edges, entity connections, graph connectivity, edge distribution
295
+
296
+ **Integration Tests**: Validate progressive query answerability across memory evolution stages. Test suite includes realistic queries simulating multi-turn LLM-database conversations.
297
+
298
+ See `tests/integration/test_rem_query_evolution.py` for stage-based validation and `tests/integration/test_graph_traversal.py` for graph query testing.
299
+
300
+ ## Architecture Notes
301
+
302
+ * **Unified View**: The underlying SQL function `rem_traverse` uses a view `all_graph_edges` that unions `graph_edges` from all entity tables (`resources`, `moments`, `users`, etc.). This enables polymorphic traversal without complex joins in the application layer.
303
+ * **KV Store**: Edge destinations (`dst`) are resolved to entity IDs using the `kv_store`. This requires that all traversable entities have an entry in the `kv_store` (handled automatically by database triggers).
304
+ * **Iterated Retrieval**: REM is architected for multi-turn retrieval where LLMs conduct conversational database exploration. Each query informs the next, enabling emergent information discovery without requiring upfront schema knowledge.
305
+
306
+ ## Scaling & Architectural Decisions
307
+
308
+ ### 1. Hybrid Adjacency List
309
+ REM implements a **Hybrid Adjacency List** pattern to balance strict relational guarantees with graph flexibility:
310
+ * **Primary Storage (Source of Truth):** Standard PostgreSQL tables (`resources`, `moments`, etc.) enforce schema validation, constraints, and type safety.
311
+ * **Graph Overlay:** Relationships are stored as "inline edges" within a JSONB column (`graph_edges`) on each entity.
312
+ * **Performance Layer:** A denormalized `UNLOGGED` table (`kv_store`) acts as a high-speed cache, mapping human-readable keys to internal UUIDs and edges. This avoids the traditional "join bomb" of traversing normalized SQL tables while avoiding the operational complexity of a separate graph database (e.g., Neo4j).
313
+
314
+ ### 2. The Pareto Principle in Graph Algorithms
315
+ We explicitly choose **Simplicity over Full-Scale Graph Analytics**.
316
+ * **Hypothesis:** For LLM Agent workloads, 80% of the value is derived from **local context retrieval** (1-3 hops via `LOOKUP` and `TRAVERSE`).
317
+ * **Diminishing Returns:** Global graph algorithms (PageRank, Community Detection) offer diminishing returns for real-time agentic retrieval tasks. Agents typically need to answer specific questions ("Who worked on file X?"), which is a local neighborhood problem, not a global cluster analysis problem.
318
+ * **Future Scaling:** If deeper analysis is needed, we prefer **Graph + Vector (RAG)** approaches (using semantic similarity to find implicit links) over complex explicit graph algorithms.
@@ -0,0 +1,23 @@
1
+ """
2
+ REM query execution and graph operations service.
3
+ """
4
+
5
+ from .exceptions import (
6
+ ContentFieldNotFoundError,
7
+ EmbeddingFieldNotFoundError,
8
+ FieldNotFoundError,
9
+ InvalidParametersError,
10
+ QueryExecutionError,
11
+ REMException,
12
+ )
13
+ from .service import RemService
14
+
15
+ __all__ = [
16
+ "RemService",
17
+ "REMException",
18
+ "FieldNotFoundError",
19
+ "EmbeddingFieldNotFoundError",
20
+ "ContentFieldNotFoundError",
21
+ "QueryExecutionError",
22
+ "InvalidParametersError",
23
+ ]
@@ -0,0 +1,71 @@
1
+ """
2
+ REM service exceptions.
3
+
4
+ Custom exceptions for REM query execution errors.
5
+ """
6
+
7
+
8
+ class REMException(Exception):
9
+ """Base exception for REM service errors."""
10
+
11
+ pass
12
+
13
+
14
+ class FieldNotFoundError(REMException):
15
+ """Raised when a field does not exist in the model."""
16
+
17
+ def __init__(self, model_name: str, field_name: str, available_fields: list[str]):
18
+ self.model_name = model_name
19
+ self.field_name = field_name
20
+ self.available_fields = available_fields
21
+ super().__init__(
22
+ f"Field '{field_name}' not found in model '{model_name}'. "
23
+ f"Available fields: {', '.join(available_fields)}"
24
+ )
25
+
26
+
27
+ class EmbeddingFieldNotFoundError(REMException):
28
+ """Raised when trying to search on a field that has no embeddings."""
29
+
30
+ def __init__(self, model_name: str, field_name: str, embeddable_fields: list[str]):
31
+ self.model_name = model_name
32
+ self.field_name = field_name
33
+ self.embeddable_fields = embeddable_fields
34
+ msg = (
35
+ f"Field '{field_name}' in model '{model_name}' does not have embeddings. "
36
+ )
37
+ if embeddable_fields:
38
+ msg += f"Embeddable fields: {', '.join(embeddable_fields)}"
39
+ else:
40
+ msg += "No embeddable fields configured for this model."
41
+ super().__init__(msg)
42
+
43
+
44
+ class ContentFieldNotFoundError(REMException):
45
+ """Raised when model has no 'content' field for default embedding search."""
46
+
47
+ def __init__(self, model_name: str, available_fields: list[str]):
48
+ self.model_name = model_name
49
+ self.available_fields = available_fields
50
+ super().__init__(
51
+ f"Model '{model_name}' has no 'content' field. "
52
+ f"Available fields: {', '.join(available_fields)}. "
53
+ f"Specify field_name explicitly in SearchParameters."
54
+ )
55
+
56
+
57
+ class QueryExecutionError(REMException):
58
+ """Raised when REM query execution fails."""
59
+
60
+ def __init__(self, query_type: str, message: str, original_error: Exception | None = None):
61
+ self.query_type = query_type
62
+ self.original_error = original_error
63
+ super().__init__(f"{query_type} query failed: {message}")
64
+
65
+
66
+ class InvalidParametersError(REMException):
67
+ """Raised when query parameters are invalid."""
68
+
69
+ def __init__(self, query_type: str, message: str):
70
+ self.query_type = query_type
71
+ super().__init__(f"Invalid {query_type} parameters: {message}")