remdb 0.3.242__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of remdb might be problematic. Click here for more details.
- rem/__init__.py +129 -0
- rem/agentic/README.md +760 -0
- rem/agentic/__init__.py +54 -0
- rem/agentic/agents/README.md +155 -0
- rem/agentic/agents/__init__.py +38 -0
- rem/agentic/agents/agent_manager.py +311 -0
- rem/agentic/agents/sse_simulator.py +502 -0
- rem/agentic/context.py +425 -0
- rem/agentic/context_builder.py +360 -0
- rem/agentic/llm_provider_models.py +301 -0
- rem/agentic/mcp/__init__.py +0 -0
- rem/agentic/mcp/tool_wrapper.py +273 -0
- rem/agentic/otel/__init__.py +5 -0
- rem/agentic/otel/setup.py +240 -0
- rem/agentic/providers/phoenix.py +926 -0
- rem/agentic/providers/pydantic_ai.py +854 -0
- rem/agentic/query.py +117 -0
- rem/agentic/query_helper.py +89 -0
- rem/agentic/schema.py +737 -0
- rem/agentic/serialization.py +245 -0
- rem/agentic/tools/__init__.py +5 -0
- rem/agentic/tools/rem_tools.py +242 -0
- rem/api/README.md +657 -0
- rem/api/deps.py +253 -0
- rem/api/main.py +460 -0
- rem/api/mcp_router/prompts.py +182 -0
- rem/api/mcp_router/resources.py +820 -0
- rem/api/mcp_router/server.py +243 -0
- rem/api/mcp_router/tools.py +1605 -0
- rem/api/middleware/tracking.py +172 -0
- rem/api/routers/admin.py +520 -0
- rem/api/routers/auth.py +898 -0
- rem/api/routers/chat/__init__.py +5 -0
- rem/api/routers/chat/child_streaming.py +394 -0
- rem/api/routers/chat/completions.py +702 -0
- rem/api/routers/chat/json_utils.py +76 -0
- rem/api/routers/chat/models.py +202 -0
- rem/api/routers/chat/otel_utils.py +33 -0
- rem/api/routers/chat/sse_events.py +546 -0
- rem/api/routers/chat/streaming.py +950 -0
- rem/api/routers/chat/streaming_utils.py +327 -0
- rem/api/routers/common.py +18 -0
- rem/api/routers/dev.py +87 -0
- rem/api/routers/feedback.py +276 -0
- rem/api/routers/messages.py +620 -0
- rem/api/routers/models.py +86 -0
- rem/api/routers/query.py +362 -0
- rem/api/routers/shared_sessions.py +422 -0
- rem/auth/README.md +258 -0
- rem/auth/__init__.py +36 -0
- rem/auth/jwt.py +367 -0
- rem/auth/middleware.py +318 -0
- rem/auth/providers/__init__.py +16 -0
- rem/auth/providers/base.py +376 -0
- rem/auth/providers/email.py +215 -0
- rem/auth/providers/google.py +163 -0
- rem/auth/providers/microsoft.py +237 -0
- rem/cli/README.md +517 -0
- rem/cli/__init__.py +8 -0
- rem/cli/commands/README.md +299 -0
- rem/cli/commands/__init__.py +3 -0
- rem/cli/commands/ask.py +549 -0
- rem/cli/commands/cluster.py +1808 -0
- rem/cli/commands/configure.py +495 -0
- rem/cli/commands/db.py +828 -0
- rem/cli/commands/dreaming.py +324 -0
- rem/cli/commands/experiments.py +1698 -0
- rem/cli/commands/mcp.py +66 -0
- rem/cli/commands/process.py +388 -0
- rem/cli/commands/query.py +109 -0
- rem/cli/commands/scaffold.py +47 -0
- rem/cli/commands/schema.py +230 -0
- rem/cli/commands/serve.py +106 -0
- rem/cli/commands/session.py +453 -0
- rem/cli/dreaming.py +363 -0
- rem/cli/main.py +123 -0
- rem/config.py +244 -0
- rem/mcp_server.py +41 -0
- rem/models/core/__init__.py +49 -0
- rem/models/core/core_model.py +70 -0
- rem/models/core/engram.py +333 -0
- rem/models/core/experiment.py +672 -0
- rem/models/core/inline_edge.py +132 -0
- rem/models/core/rem_query.py +246 -0
- rem/models/entities/__init__.py +68 -0
- rem/models/entities/domain_resource.py +38 -0
- rem/models/entities/feedback.py +123 -0
- rem/models/entities/file.py +57 -0
- rem/models/entities/image_resource.py +88 -0
- rem/models/entities/message.py +64 -0
- rem/models/entities/moment.py +123 -0
- rem/models/entities/ontology.py +181 -0
- rem/models/entities/ontology_config.py +131 -0
- rem/models/entities/resource.py +95 -0
- rem/models/entities/schema.py +87 -0
- rem/models/entities/session.py +84 -0
- rem/models/entities/shared_session.py +180 -0
- rem/models/entities/subscriber.py +175 -0
- rem/models/entities/user.py +93 -0
- rem/py.typed +0 -0
- rem/registry.py +373 -0
- rem/schemas/README.md +507 -0
- rem/schemas/__init__.py +6 -0
- rem/schemas/agents/README.md +92 -0
- rem/schemas/agents/core/agent-builder.yaml +235 -0
- rem/schemas/agents/core/moment-builder.yaml +178 -0
- rem/schemas/agents/core/rem-query-agent.yaml +226 -0
- rem/schemas/agents/core/resource-affinity-assessor.yaml +99 -0
- rem/schemas/agents/core/simple-assistant.yaml +19 -0
- rem/schemas/agents/core/user-profile-builder.yaml +163 -0
- rem/schemas/agents/examples/contract-analyzer.yaml +317 -0
- rem/schemas/agents/examples/contract-extractor.yaml +134 -0
- rem/schemas/agents/examples/cv-parser.yaml +263 -0
- rem/schemas/agents/examples/hello-world.yaml +37 -0
- rem/schemas/agents/examples/query.yaml +54 -0
- rem/schemas/agents/examples/simple.yaml +21 -0
- rem/schemas/agents/examples/test.yaml +29 -0
- rem/schemas/agents/rem.yaml +132 -0
- rem/schemas/evaluators/hello-world/default.yaml +77 -0
- rem/schemas/evaluators/rem/faithfulness.yaml +219 -0
- rem/schemas/evaluators/rem/lookup-correctness.yaml +182 -0
- rem/schemas/evaluators/rem/retrieval-precision.yaml +199 -0
- rem/schemas/evaluators/rem/retrieval-recall.yaml +211 -0
- rem/schemas/evaluators/rem/search-correctness.yaml +192 -0
- rem/services/__init__.py +18 -0
- rem/services/audio/INTEGRATION.md +308 -0
- rem/services/audio/README.md +376 -0
- rem/services/audio/__init__.py +15 -0
- rem/services/audio/chunker.py +354 -0
- rem/services/audio/transcriber.py +259 -0
- rem/services/content/README.md +1269 -0
- rem/services/content/__init__.py +5 -0
- rem/services/content/providers.py +760 -0
- rem/services/content/service.py +762 -0
- rem/services/dreaming/README.md +230 -0
- rem/services/dreaming/__init__.py +53 -0
- rem/services/dreaming/affinity_service.py +322 -0
- rem/services/dreaming/moment_service.py +251 -0
- rem/services/dreaming/ontology_service.py +54 -0
- rem/services/dreaming/user_model_service.py +297 -0
- rem/services/dreaming/utils.py +39 -0
- rem/services/email/__init__.py +10 -0
- rem/services/email/service.py +522 -0
- rem/services/email/templates.py +360 -0
- rem/services/embeddings/__init__.py +11 -0
- rem/services/embeddings/api.py +127 -0
- rem/services/embeddings/worker.py +435 -0
- rem/services/fs/README.md +662 -0
- rem/services/fs/__init__.py +62 -0
- rem/services/fs/examples.py +206 -0
- rem/services/fs/examples_paths.py +204 -0
- rem/services/fs/git_provider.py +935 -0
- rem/services/fs/local_provider.py +760 -0
- rem/services/fs/parsing-hooks-examples.md +172 -0
- rem/services/fs/paths.py +276 -0
- rem/services/fs/provider.py +460 -0
- rem/services/fs/s3_provider.py +1042 -0
- rem/services/fs/service.py +186 -0
- rem/services/git/README.md +1075 -0
- rem/services/git/__init__.py +17 -0
- rem/services/git/service.py +469 -0
- rem/services/phoenix/EXPERIMENT_DESIGN.md +1146 -0
- rem/services/phoenix/README.md +453 -0
- rem/services/phoenix/__init__.py +46 -0
- rem/services/phoenix/client.py +960 -0
- rem/services/phoenix/config.py +88 -0
- rem/services/phoenix/prompt_labels.py +477 -0
- rem/services/postgres/README.md +757 -0
- rem/services/postgres/__init__.py +49 -0
- rem/services/postgres/diff_service.py +599 -0
- rem/services/postgres/migration_service.py +427 -0
- rem/services/postgres/programmable_diff_service.py +635 -0
- rem/services/postgres/pydantic_to_sqlalchemy.py +562 -0
- rem/services/postgres/register_type.py +353 -0
- rem/services/postgres/repository.py +481 -0
- rem/services/postgres/schema_generator.py +661 -0
- rem/services/postgres/service.py +802 -0
- rem/services/postgres/sql_builder.py +355 -0
- rem/services/rate_limit.py +113 -0
- rem/services/rem/README.md +318 -0
- rem/services/rem/__init__.py +23 -0
- rem/services/rem/exceptions.py +71 -0
- rem/services/rem/executor.py +293 -0
- rem/services/rem/parser.py +180 -0
- rem/services/rem/queries.py +196 -0
- rem/services/rem/query.py +371 -0
- rem/services/rem/service.py +608 -0
- rem/services/session/README.md +374 -0
- rem/services/session/__init__.py +13 -0
- rem/services/session/compression.py +488 -0
- rem/services/session/pydantic_messages.py +310 -0
- rem/services/session/reload.py +85 -0
- rem/services/user_service.py +130 -0
- rem/settings.py +1877 -0
- rem/sql/background_indexes.sql +52 -0
- rem/sql/migrations/001_install.sql +983 -0
- rem/sql/migrations/002_install_models.sql +3157 -0
- rem/sql/migrations/003_optional_extensions.sql +326 -0
- rem/sql/migrations/004_cache_system.sql +282 -0
- rem/sql/migrations/005_schema_update.sql +145 -0
- rem/sql/migrations/migrate_session_id_to_uuid.sql +45 -0
- rem/utils/AGENTIC_CHUNKING.md +597 -0
- rem/utils/README.md +628 -0
- rem/utils/__init__.py +61 -0
- rem/utils/agentic_chunking.py +622 -0
- rem/utils/batch_ops.py +343 -0
- rem/utils/chunking.py +108 -0
- rem/utils/clip_embeddings.py +276 -0
- rem/utils/constants.py +97 -0
- rem/utils/date_utils.py +228 -0
- rem/utils/dict_utils.py +98 -0
- rem/utils/embeddings.py +436 -0
- rem/utils/examples/embeddings_example.py +305 -0
- rem/utils/examples/sql_types_example.py +202 -0
- rem/utils/files.py +323 -0
- rem/utils/markdown.py +16 -0
- rem/utils/mime_types.py +158 -0
- rem/utils/model_helpers.py +492 -0
- rem/utils/schema_loader.py +649 -0
- rem/utils/sql_paths.py +146 -0
- rem/utils/sql_types.py +350 -0
- rem/utils/user_id.py +81 -0
- rem/utils/vision.py +325 -0
- rem/workers/README.md +506 -0
- rem/workers/__init__.py +7 -0
- rem/workers/db_listener.py +579 -0
- rem/workers/db_maintainer.py +74 -0
- rem/workers/dreaming.py +502 -0
- rem/workers/engram_processor.py +312 -0
- rem/workers/sqs_file_processor.py +193 -0
- rem/workers/unlogged_maintainer.py +463 -0
- remdb-0.3.242.dist-info/METADATA +1632 -0
- remdb-0.3.242.dist-info/RECORD +235 -0
- remdb-0.3.242.dist-info/WHEEL +4 -0
- remdb-0.3.242.dist-info/entry_points.txt +2 -0
|
@@ -0,0 +1,318 @@
|
|
|
1
|
+
# REM Service
|
|
2
|
+
|
|
3
|
+
The `RemService` is the high-level query execution engine for REM (Resources-Entities-Moments), a bio-inspired memory infrastructure combining temporal narratives, semantic relationships, and structured knowledge.
|
|
4
|
+
|
|
5
|
+
## Architecture Overview
|
|
6
|
+
|
|
7
|
+
REM mirrors human memory systems through three complementary layers:
|
|
8
|
+
|
|
9
|
+
**Resources**: Chunked, embedded content from documents, files, and conversations. Stored with semantic embeddings for vector search, entity references, and knowledge graph edges.
|
|
10
|
+
|
|
11
|
+
**Entities**: Domain knowledge nodes with natural language labels (not UUIDs). Examples: "sarah-chen", "tidb-migration-spec". Enables conversational queries without requiring internal ID knowledge.
|
|
12
|
+
|
|
13
|
+
**Moments**: Temporal narratives (meetings, coding sessions, conversations) with time boundaries, present persons, speakers, emotion tags, and topic tags. Enable chronological memory retrieval.
|
|
14
|
+
|
|
15
|
+
Core design principle: Multi-index organization (vectors + graph + time + key-value) supporting iterated retrieval where LLMs conduct multi-turn database conversations.
|
|
16
|
+
|
|
17
|
+
## Query Dialect (AST)
|
|
18
|
+
|
|
19
|
+
REM queries follow a structured dialect with availability dependent on memory evolution stage.
|
|
20
|
+
|
|
21
|
+
### Grammar
|
|
22
|
+
|
|
23
|
+
```
|
|
24
|
+
Query ::= LookupQuery | FuzzyQuery | SearchQuery | SqlQuery | TraverseQuery
|
|
25
|
+
|
|
26
|
+
LookupQuery ::= LOOKUP <key:string|list[string]>
|
|
27
|
+
key : Single entity name or list of entity names (natural language labels)
|
|
28
|
+
performance : O(1) per key
|
|
29
|
+
available : Stage 1+
|
|
30
|
+
examples :
|
|
31
|
+
- LOOKUP "Sarah"
|
|
32
|
+
- LOOKUP ["Sarah", "Mike", "Emily"]
|
|
33
|
+
- LOOKUP "Project Alpha"
|
|
34
|
+
|
|
35
|
+
FuzzyQuery ::= FUZZY <text:string> [THRESHOLD <t:float>] [LIMIT <n:int>]
|
|
36
|
+
text : Search text (partial/misspelled)
|
|
37
|
+
threshold : Similarity score 0.0-1.0 (default: 0.5)
|
|
38
|
+
limit : Max results (default: 5)
|
|
39
|
+
performance : Indexed (pg_trgm)
|
|
40
|
+
available : Stage 1+
|
|
41
|
+
example : FUZZY "sara" THRESHOLD 0.5 LIMIT 10
|
|
42
|
+
|
|
43
|
+
SearchQuery ::= SEARCH <text:string> [TABLE <table:string>] [WHERE <clause:string>] [LIMIT <n:int>]
|
|
44
|
+
text : Semantic query text
|
|
45
|
+
table : Target table (default: "resources")
|
|
46
|
+
clause : Optional PostgreSQL WHERE clause for hybrid filtering (combines vector + structured)
|
|
47
|
+
limit : Max results (default: 10)
|
|
48
|
+
performance : Indexed (pgvector)
|
|
49
|
+
available : Stage 3+
|
|
50
|
+
examples :
|
|
51
|
+
- SEARCH "database migration" TABLE resources LIMIT 10
|
|
52
|
+
- SEARCH "team discussion" TABLE moments WHERE "moment_type='meeting'" LIMIT 5
|
|
53
|
+
- SEARCH "project updates" WHERE "created_at >= '2024-01-01'" LIMIT 20
|
|
54
|
+
- SEARCH "AI research" WHERE "tags @> ARRAY['machine-learning']" LIMIT 10
|
|
55
|
+
|
|
56
|
+
Hybrid Query Support: SEARCH combines semantic vector similarity with structured filtering.
|
|
57
|
+
Use WHERE clause to filter on system fields or entity-specific fields.
|
|
58
|
+
|
|
59
|
+
SqlQuery ::= SQL <table:string> [WHERE <clause:string>] [ORDER BY <order:string>] [LIMIT <n:int>]
|
|
60
|
+
table : Table name ("resources", "moments", etc.)
|
|
61
|
+
clause : PostgreSQL WHERE conditions (any valid PostgreSQL syntax)
|
|
62
|
+
order : ORDER BY clause
|
|
63
|
+
limit : Max results
|
|
64
|
+
performance : O(n) with indexes
|
|
65
|
+
available : Stage 1+
|
|
66
|
+
dialect : PostgreSQL (supports all PostgreSQL features: JSONB operators, array operators, etc.)
|
|
67
|
+
examples :
|
|
68
|
+
- SQL moments WHERE "moment_type='meeting'" ORDER BY starts_timestamp DESC LIMIT 10
|
|
69
|
+
- SQL resources WHERE "metadata->>'status' = 'published'" LIMIT 20
|
|
70
|
+
- SQL moments WHERE "tags && ARRAY['urgent', 'bug']" ORDER BY created_at DESC
|
|
71
|
+
|
|
72
|
+
PostgreSQL Dialect: SQL queries use PostgreSQL syntax with full support for:
|
|
73
|
+
- JSONB operators (->>, ->, @>, etc.)
|
|
74
|
+
- Array operators (&&, @>, <@, etc.)
|
|
75
|
+
- Advanced filtering and aggregations
|
|
76
|
+
|
|
77
|
+
TraverseQuery ::= TRAVERSE [<edge_types:list>] WITH <initial_query:Query> [DEPTH <d:int>] [ORDER BY <order:string>] [LIMIT <n:int>]
|
|
78
|
+
edge_types : Relationship types to follow (e.g., ["manages", "reports-to"], default: all)
|
|
79
|
+
initial_query : Starting query (typically LOOKUP)
|
|
80
|
+
depth : Number of hops (0=PLAN mode, 1=single hop, N=multi-hop, default: 1)
|
|
81
|
+
order : Order results (default: "edge.created_at DESC")
|
|
82
|
+
limit : Max nodes (default: 9)
|
|
83
|
+
performance : O(k) where k = visited nodes
|
|
84
|
+
available : Stage 3+
|
|
85
|
+
examples :
|
|
86
|
+
- TRAVERSE manages WITH LOOKUP "Sally" DEPTH 1
|
|
87
|
+
- TRAVERSE WITH LOOKUP "Sally" DEPTH 0 (PLAN mode: edge analysis only)
|
|
88
|
+
- TRAVERSE manages,reports-to WITH LOOKUP "Sarah" DEPTH 2 LIMIT 5
|
|
89
|
+
```
|
|
90
|
+
|
|
91
|
+
### System Fields (CoreModel)
|
|
92
|
+
|
|
93
|
+
All REM entities inherit from CoreModel and have these system fields:
|
|
94
|
+
|
|
95
|
+
* **id** (UUID or string): Unique identifier
|
|
96
|
+
* **created_at** (timestamp): Entity creation time (RECOMMENDED for filtering)
|
|
97
|
+
* **updated_at** (timestamp): Last modification time (RECOMMENDED for filtering)
|
|
98
|
+
* **deleted_at** (timestamp): Soft deletion time (null if active)
|
|
99
|
+
* **tenant_id** (string): Optional, for future multi-tenant SaaS use (kept for backward compat)
|
|
100
|
+
* **user_id** (string): Owner user identifier (primary isolation scope, auto-filtered)
|
|
101
|
+
* **graph_edges** (JSONB array): Knowledge graph edges - USE IN SELECT, NOT WHERE
|
|
102
|
+
* **metadata** (JSONB object): Flexible metadata storage
|
|
103
|
+
* **tags** (array of strings): Entity tags
|
|
104
|
+
|
|
105
|
+
**CRITICAL: graph_edges Usage Rules:**
|
|
106
|
+
|
|
107
|
+
* ✓ DO: Select `graph_edges` in result sets to see relationships
|
|
108
|
+
* ✗ DON'T: Filter by `graph_edges` in WHERE clauses (edge names vary by entity)
|
|
109
|
+
* ✓ DO: Use TRAVERSE queries to follow graph edges
|
|
110
|
+
|
|
111
|
+
Example CORRECT:
|
|
112
|
+
```sql
|
|
113
|
+
SELECT id, name, created_at, graph_edges FROM resources WHERE created_at >= '2024-01-01'
|
|
114
|
+
```
|
|
115
|
+
|
|
116
|
+
Example WRONG:
|
|
117
|
+
```sql
|
|
118
|
+
-- Edge names are unknown and vary by entity!
|
|
119
|
+
SELECT * FROM resources WHERE graph_edges @> '[{"dst": "sarah"}]'
|
|
120
|
+
```
|
|
121
|
+
|
|
122
|
+
### Main Tables (Resources, Moments, Files)
|
|
123
|
+
|
|
124
|
+
**Resources table:**
|
|
125
|
+
|
|
126
|
+
* **name** (string): Human-readable resource name
|
|
127
|
+
* **uri** (string): Content URI/identifier
|
|
128
|
+
* **content** (text): Resource content
|
|
129
|
+
* **timestamp** (timestamp): Content creation time (use for temporal filtering)
|
|
130
|
+
* **category** (string): Resource category (document, conversation, artifact, etc.)
|
|
131
|
+
* **related_entities** (JSONB): Extracted entities
|
|
132
|
+
|
|
133
|
+
**Moments table:**
|
|
134
|
+
|
|
135
|
+
* **name** (string): Human-readable moment name
|
|
136
|
+
* **moment_type** (string): Moment classification (meeting, coding-session, conversation, etc.)
|
|
137
|
+
* **category** (string): Moment category
|
|
138
|
+
* **starts_timestamp** (timestamp): Start time (use for temporal filtering)
|
|
139
|
+
* **ends_timestamp** (timestamp): End time
|
|
140
|
+
* **present_persons** (JSONB): People present in moment
|
|
141
|
+
* **emotion_tags** (array): Sentiment tags (happy, frustrated, focused, etc.)
|
|
142
|
+
* **topic_tags** (array): Topic/concept tags
|
|
143
|
+
* **summary** (text): Natural language description
|
|
144
|
+
|
|
145
|
+
**Files table:**
|
|
146
|
+
|
|
147
|
+
* **name** (string): File name
|
|
148
|
+
* **uri** (string): File URI/path
|
|
149
|
+
* **mime_type** (string): File MIME type
|
|
150
|
+
* **size_bytes** (integer): File size
|
|
151
|
+
* **processing_status** (string): Processing status (pending, completed, failed)
|
|
152
|
+
* **category** (string): File category
|
|
153
|
+
|
|
154
|
+
### Recommended Filtering Fields
|
|
155
|
+
|
|
156
|
+
* **Temporal**: created_at, updated_at, timestamp, starts_timestamp, ends_timestamp
|
|
157
|
+
* **Categorical**: category, moment_type, mime_type, processing_status
|
|
158
|
+
* **Arrays**: tags, emotion_tags, topic_tags (use && or @> operators)
|
|
159
|
+
* **Text**: name, content, summary (use ILIKE for pattern matching)
|
|
160
|
+
|
|
161
|
+
Use these fields in WHERE clauses for both SEARCH (hybrid) and SQL queries.
|
|
162
|
+
|
|
163
|
+
### Python API
|
|
164
|
+
|
|
165
|
+
```python
|
|
166
|
+
# LOOKUP - O(1) entity retrieval by natural language key
|
|
167
|
+
RemQuery(
|
|
168
|
+
query_type=QueryType.LOOKUP,
|
|
169
|
+
parameters=LookupParameters(key="Sarah")
|
|
170
|
+
)
|
|
171
|
+
|
|
172
|
+
# FUZZY - Trigram-based fuzzy text search
|
|
173
|
+
RemQuery(
|
|
174
|
+
query_type=QueryType.FUZZY,
|
|
175
|
+
parameters=FuzzyParameters(query_text="sara", threshold=0.5, limit=5)
|
|
176
|
+
)
|
|
177
|
+
|
|
178
|
+
# SEARCH - Vector similarity search using embeddings
|
|
179
|
+
RemQuery(
|
|
180
|
+
query_type=QueryType.SEARCH,
|
|
181
|
+
parameters=SearchParameters(query_text="database migration to TiDB", table_name="resources", limit=10)
|
|
182
|
+
)
|
|
183
|
+
|
|
184
|
+
# SQL - Direct SQL execution (tenant-isolated)
|
|
185
|
+
RemQuery(
|
|
186
|
+
query_type=QueryType.SQL,
|
|
187
|
+
parameters=SQLParameters(table_name="moments", where_clause="moment_type='meeting'", order_by="resource_timestamp DESC", limit=10)
|
|
188
|
+
)
|
|
189
|
+
|
|
190
|
+
# TRAVERSE - Recursive graph traversal following edges
|
|
191
|
+
RemQuery(
|
|
192
|
+
query_type=QueryType.TRAVERSE,
|
|
193
|
+
parameters=TraverseParameters(initial_query="Sally", edge_types=["manages"], max_depth=2, order_by="edge.created_at DESC", limit=9)
|
|
194
|
+
)
|
|
195
|
+
```
|
|
196
|
+
|
|
197
|
+
### Query Availability by Evolution Stage
|
|
198
|
+
|
|
199
|
+
| Query Type | Stage 0 | Stage 1 | Stage 2 | Stage 3 | Stage 4 |
|
|
200
|
+
|------------|---------|---------|---------|---------|---------|
|
|
201
|
+
| LOOKUP | ✗ | ✓ | ✓ | ✓ | ✓ |
|
|
202
|
+
| FUZZY | ✗ | ✓ | ✓ | ✓ | ✓ |
|
|
203
|
+
| SEARCH | ✗ | ✗ | ✗ | ✓ | ✓ |
|
|
204
|
+
| SQL | ✗ | ✓ | ✓ | ✓ | ✓ |
|
|
205
|
+
| TRAVERSE | ✗ | ✗ | ✗ | ✓ | ✓ |
|
|
206
|
+
|
|
207
|
+
**Stage 0**: No data, all queries fail.
|
|
208
|
+
|
|
209
|
+
**Stage 1** (20% answerable): Resources seeded with entity extraction. LOOKUP and FUZZY work for finding entities. SQL works for basic filtering.
|
|
210
|
+
|
|
211
|
+
**Stage 2** (50% answerable): Moments extracted. SQL temporal queries work. LOOKUP includes moment entities.
|
|
212
|
+
|
|
213
|
+
**Stage 3** (80% answerable): Affinity graph built. SEARCH and TRAVERSE become available. Multi-hop graph queries work.
|
|
214
|
+
|
|
215
|
+
**Stage 4** (100% answerable): Mature graph with rich historical data. All query types fully functional with high-quality results.
|
|
216
|
+
|
|
217
|
+
## Query Types
|
|
218
|
+
|
|
219
|
+
The service supports schema-agnostic and indexed query operations with strict performance contracts:
|
|
220
|
+
|
|
221
|
+
* **LOOKUP**: O(1) entity retrieval by natural language key (via `kv_store`).
|
|
222
|
+
* **FUZZY**: Trigram-based fuzzy text search (indexed).
|
|
223
|
+
* **SEARCH**: Vector similarity search using embeddings (requires `pgvector`).
|
|
224
|
+
* **SQL**: Direct SQL execution (tenant-isolated).
|
|
225
|
+
* **TRAVERSE**: Recursive graph traversal (O(k) where k = visited nodes).
|
|
226
|
+
|
|
227
|
+
## Graph Traversal (`TRAVERSE`)
|
|
228
|
+
|
|
229
|
+
The `TRAVERSE` operation allows agents to explore the knowledge graph by following edges between entities.
|
|
230
|
+
|
|
231
|
+
### Contract
|
|
232
|
+
* **Performance**: O(k) where k is the number of visited nodes.
|
|
233
|
+
* **Polymorphism**: Seamlessly traverses relationships between different entity types (`Resources`, `Moments`, `Users`, etc.).
|
|
234
|
+
* **Filtering**: Supports filtering by relationship type(s).
|
|
235
|
+
* **Cycle Detection**: Built-in cycle detection prevents infinite loops.
|
|
236
|
+
|
|
237
|
+
### Data Model
|
|
238
|
+
Graph traversal relies on the `InlineEdge` Pydantic model stored in the `graph_edges` JSONB column of every entity table.
|
|
239
|
+
|
|
240
|
+
**Expected JSON Structure (`InlineEdge`):**
|
|
241
|
+
```json
|
|
242
|
+
{
|
|
243
|
+
"dst": "target-entity-key", // Human-readable key (NOT UUID)
|
|
244
|
+
"rel_type": "authored_by", // Relationship type
|
|
245
|
+
"weight": 0.8, // Connection strength (0.0-1.0)
|
|
246
|
+
"properties": { ... } // Additional metadata
|
|
247
|
+
}
|
|
248
|
+
```
|
|
249
|
+
|
|
250
|
+
### Usage
|
|
251
|
+
The `TRAVERSE` query accepts the following parameters:
|
|
252
|
+
|
|
253
|
+
* `initial_query` (str): The starting entity key.
|
|
254
|
+
* `max_depth` (int): Maximum number of hops (default: 1).
|
|
255
|
+
* `edge_types` (list[str]): List of relationship types to follow. If empty or `['*']`, follows all edges.
|
|
256
|
+
|
|
257
|
+
**Example:**
|
|
258
|
+
```python
|
|
259
|
+
# Find entities connected to "Project X" via "depends_on" or "related_to" edges, up to 2 hops deep.
|
|
260
|
+
result = await rem_service.execute_query(
|
|
261
|
+
RemQuery(
|
|
262
|
+
query_type=QueryType.TRAVERSE,
|
|
263
|
+
parameters=TraverseParameters(
|
|
264
|
+
initial_query="Project X",
|
|
265
|
+
max_depth=2,
|
|
266
|
+
edge_types=["depends_on", "related_to"]
|
|
267
|
+
),
|
|
268
|
+
user_id="user-123"
|
|
269
|
+
)
|
|
270
|
+
)
|
|
271
|
+
```
|
|
272
|
+
|
|
273
|
+
## Memory Evolution Through Dreaming
|
|
274
|
+
|
|
275
|
+
REM improves query answerability over time through background dreaming workflows:
|
|
276
|
+
|
|
277
|
+
* **Stage 0**: Raw resources only (0% answerable)
|
|
278
|
+
* **Stage 1**: Entity extraction complete (20% answerable, LOOKUP works)
|
|
279
|
+
* **Stage 2**: Moments generated (50% answerable, temporal queries work)
|
|
280
|
+
* **Stage 3**: Affinity matching complete (80% answerable, semantic/graph queries work)
|
|
281
|
+
* **Stage 4**: Multiple dreaming cycles (100% answerable, full query capabilities)
|
|
282
|
+
|
|
283
|
+
Dreaming workers extract temporal narratives (moments) and build semantic graph edges (affinity) from resources, progressively enriching the knowledge graph.
|
|
284
|
+
|
|
285
|
+
## Testing Approach
|
|
286
|
+
|
|
287
|
+
REM testing follows a quality-driven methodology focused on query evolution:
|
|
288
|
+
|
|
289
|
+
**Critical Principle**: Test with user-known information only. Users provide natural language ("Sarah", "Project Alpha"), not internal representations ("sarah-chen", "project-alpha").
|
|
290
|
+
|
|
291
|
+
**Quality Validation**:
|
|
292
|
+
|
|
293
|
+
* Moment quality: Temporal validity, person extraction, speaker identification, tag quality, entity references, temporal coverage, type distribution
|
|
294
|
+
* Affinity quality: Edge existence, edge format, semantic relevance, bidirectional edges, entity connections, graph connectivity, edge distribution
|
|
295
|
+
|
|
296
|
+
**Integration Tests**: Validate progressive query answerability across memory evolution stages. Test suite includes realistic queries simulating multi-turn LLM-database conversations.
|
|
297
|
+
|
|
298
|
+
See `tests/integration/test_rem_query_evolution.py` for stage-based validation and `tests/integration/test_graph_traversal.py` for graph query testing.
|
|
299
|
+
|
|
300
|
+
## Architecture Notes
|
|
301
|
+
|
|
302
|
+
* **Unified View**: The underlying SQL function `rem_traverse` uses a view `all_graph_edges` that unions `graph_edges` from all entity tables (`resources`, `moments`, `users`, etc.). This enables polymorphic traversal without complex joins in the application layer.
|
|
303
|
+
* **KV Store**: Edge destinations (`dst`) are resolved to entity IDs using the `kv_store`. This requires that all traversable entities have an entry in the `kv_store` (handled automatically by database triggers).
|
|
304
|
+
* **Iterated Retrieval**: REM is architected for multi-turn retrieval where LLMs conduct conversational database exploration. Each query informs the next, enabling emergent information discovery without requiring upfront schema knowledge.
|
|
305
|
+
|
|
306
|
+
## Scaling & Architectural Decisions
|
|
307
|
+
|
|
308
|
+
### 1. Hybrid Adjacency List
|
|
309
|
+
REM implements a **Hybrid Adjacency List** pattern to balance strict relational guarantees with graph flexibility:
|
|
310
|
+
* **Primary Storage (Source of Truth):** Standard PostgreSQL tables (`resources`, `moments`, etc.) enforce schema validation, constraints, and type safety.
|
|
311
|
+
* **Graph Overlay:** Relationships are stored as "inline edges" within a JSONB column (`graph_edges`) on each entity.
|
|
312
|
+
* **Performance Layer:** A denormalized `UNLOGGED` table (`kv_store`) acts as a high-speed cache, mapping human-readable keys to internal UUIDs and edges. This avoids the traditional "join bomb" of traversing normalized SQL tables while avoiding the operational complexity of a separate graph database (e.g., Neo4j).
|
|
313
|
+
|
|
314
|
+
### 2. The Pareto Principle in Graph Algorithms
|
|
315
|
+
We explicitly choose **Simplicity over Full-Scale Graph Analytics**.
|
|
316
|
+
* **Hypothesis:** For LLM Agent workloads, 80% of the value is derived from **local context retrieval** (1-3 hops via `LOOKUP` and `TRAVERSE`).
|
|
317
|
+
* **Diminishing Returns:** Global graph algorithms (PageRank, Community Detection) offer diminishing returns for real-time agentic retrieval tasks. Agents typically need to answer specific questions ("Who worked on file X?"), which is a local neighborhood problem, not a global cluster analysis problem.
|
|
318
|
+
* **Future Scaling:** If deeper analysis is needed, we prefer **Graph + Vector (RAG)** approaches (using semantic similarity to find implicit links) over complex explicit graph algorithms.
|
|
@@ -0,0 +1,23 @@
|
|
|
1
|
+
"""
|
|
2
|
+
REM query execution and graph operations service.
|
|
3
|
+
"""
|
|
4
|
+
|
|
5
|
+
from .exceptions import (
|
|
6
|
+
ContentFieldNotFoundError,
|
|
7
|
+
EmbeddingFieldNotFoundError,
|
|
8
|
+
FieldNotFoundError,
|
|
9
|
+
InvalidParametersError,
|
|
10
|
+
QueryExecutionError,
|
|
11
|
+
REMException,
|
|
12
|
+
)
|
|
13
|
+
from .service import RemService
|
|
14
|
+
|
|
15
|
+
__all__ = [
|
|
16
|
+
"RemService",
|
|
17
|
+
"REMException",
|
|
18
|
+
"FieldNotFoundError",
|
|
19
|
+
"EmbeddingFieldNotFoundError",
|
|
20
|
+
"ContentFieldNotFoundError",
|
|
21
|
+
"QueryExecutionError",
|
|
22
|
+
"InvalidParametersError",
|
|
23
|
+
]
|
|
@@ -0,0 +1,71 @@
|
|
|
1
|
+
"""
|
|
2
|
+
REM service exceptions.
|
|
3
|
+
|
|
4
|
+
Custom exceptions for REM query execution errors.
|
|
5
|
+
"""
|
|
6
|
+
|
|
7
|
+
|
|
8
|
+
class REMException(Exception):
|
|
9
|
+
"""Base exception for REM service errors."""
|
|
10
|
+
|
|
11
|
+
pass
|
|
12
|
+
|
|
13
|
+
|
|
14
|
+
class FieldNotFoundError(REMException):
|
|
15
|
+
"""Raised when a field does not exist in the model."""
|
|
16
|
+
|
|
17
|
+
def __init__(self, model_name: str, field_name: str, available_fields: list[str]):
|
|
18
|
+
self.model_name = model_name
|
|
19
|
+
self.field_name = field_name
|
|
20
|
+
self.available_fields = available_fields
|
|
21
|
+
super().__init__(
|
|
22
|
+
f"Field '{field_name}' not found in model '{model_name}'. "
|
|
23
|
+
f"Available fields: {', '.join(available_fields)}"
|
|
24
|
+
)
|
|
25
|
+
|
|
26
|
+
|
|
27
|
+
class EmbeddingFieldNotFoundError(REMException):
|
|
28
|
+
"""Raised when trying to search on a field that has no embeddings."""
|
|
29
|
+
|
|
30
|
+
def __init__(self, model_name: str, field_name: str, embeddable_fields: list[str]):
|
|
31
|
+
self.model_name = model_name
|
|
32
|
+
self.field_name = field_name
|
|
33
|
+
self.embeddable_fields = embeddable_fields
|
|
34
|
+
msg = (
|
|
35
|
+
f"Field '{field_name}' in model '{model_name}' does not have embeddings. "
|
|
36
|
+
)
|
|
37
|
+
if embeddable_fields:
|
|
38
|
+
msg += f"Embeddable fields: {', '.join(embeddable_fields)}"
|
|
39
|
+
else:
|
|
40
|
+
msg += "No embeddable fields configured for this model."
|
|
41
|
+
super().__init__(msg)
|
|
42
|
+
|
|
43
|
+
|
|
44
|
+
class ContentFieldNotFoundError(REMException):
|
|
45
|
+
"""Raised when model has no 'content' field for default embedding search."""
|
|
46
|
+
|
|
47
|
+
def __init__(self, model_name: str, available_fields: list[str]):
|
|
48
|
+
self.model_name = model_name
|
|
49
|
+
self.available_fields = available_fields
|
|
50
|
+
super().__init__(
|
|
51
|
+
f"Model '{model_name}' has no 'content' field. "
|
|
52
|
+
f"Available fields: {', '.join(available_fields)}. "
|
|
53
|
+
f"Specify field_name explicitly in SearchParameters."
|
|
54
|
+
)
|
|
55
|
+
|
|
56
|
+
|
|
57
|
+
class QueryExecutionError(REMException):
|
|
58
|
+
"""Raised when REM query execution fails."""
|
|
59
|
+
|
|
60
|
+
def __init__(self, query_type: str, message: str, original_error: Exception | None = None):
|
|
61
|
+
self.query_type = query_type
|
|
62
|
+
self.original_error = original_error
|
|
63
|
+
super().__init__(f"{query_type} query failed: {message}")
|
|
64
|
+
|
|
65
|
+
|
|
66
|
+
class InvalidParametersError(REMException):
|
|
67
|
+
"""Raised when query parameters are invalid."""
|
|
68
|
+
|
|
69
|
+
def __init__(self, query_type: str, message: str):
|
|
70
|
+
self.query_type = query_type
|
|
71
|
+
super().__init__(f"Invalid {query_type} parameters: {message}")
|