remdb 0.3.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of remdb might be problematic. Click here for more details.

Files changed (187) hide show
  1. rem/__init__.py +2 -0
  2. rem/agentic/README.md +650 -0
  3. rem/agentic/__init__.py +39 -0
  4. rem/agentic/agents/README.md +155 -0
  5. rem/agentic/agents/__init__.py +8 -0
  6. rem/agentic/context.py +148 -0
  7. rem/agentic/context_builder.py +329 -0
  8. rem/agentic/mcp/__init__.py +0 -0
  9. rem/agentic/mcp/tool_wrapper.py +107 -0
  10. rem/agentic/otel/__init__.py +5 -0
  11. rem/agentic/otel/setup.py +151 -0
  12. rem/agentic/providers/phoenix.py +674 -0
  13. rem/agentic/providers/pydantic_ai.py +572 -0
  14. rem/agentic/query.py +117 -0
  15. rem/agentic/query_helper.py +89 -0
  16. rem/agentic/schema.py +396 -0
  17. rem/agentic/serialization.py +245 -0
  18. rem/agentic/tools/__init__.py +5 -0
  19. rem/agentic/tools/rem_tools.py +231 -0
  20. rem/api/README.md +420 -0
  21. rem/api/main.py +324 -0
  22. rem/api/mcp_router/prompts.py +182 -0
  23. rem/api/mcp_router/resources.py +536 -0
  24. rem/api/mcp_router/server.py +213 -0
  25. rem/api/mcp_router/tools.py +584 -0
  26. rem/api/routers/auth.py +229 -0
  27. rem/api/routers/chat/__init__.py +5 -0
  28. rem/api/routers/chat/completions.py +281 -0
  29. rem/api/routers/chat/json_utils.py +76 -0
  30. rem/api/routers/chat/models.py +124 -0
  31. rem/api/routers/chat/streaming.py +185 -0
  32. rem/auth/README.md +258 -0
  33. rem/auth/__init__.py +26 -0
  34. rem/auth/middleware.py +100 -0
  35. rem/auth/providers/__init__.py +13 -0
  36. rem/auth/providers/base.py +376 -0
  37. rem/auth/providers/google.py +163 -0
  38. rem/auth/providers/microsoft.py +237 -0
  39. rem/cli/README.md +455 -0
  40. rem/cli/__init__.py +8 -0
  41. rem/cli/commands/README.md +126 -0
  42. rem/cli/commands/__init__.py +3 -0
  43. rem/cli/commands/ask.py +566 -0
  44. rem/cli/commands/configure.py +497 -0
  45. rem/cli/commands/db.py +493 -0
  46. rem/cli/commands/dreaming.py +324 -0
  47. rem/cli/commands/experiments.py +1302 -0
  48. rem/cli/commands/mcp.py +66 -0
  49. rem/cli/commands/process.py +245 -0
  50. rem/cli/commands/schema.py +183 -0
  51. rem/cli/commands/serve.py +106 -0
  52. rem/cli/dreaming.py +363 -0
  53. rem/cli/main.py +96 -0
  54. rem/config.py +237 -0
  55. rem/mcp_server.py +41 -0
  56. rem/models/core/__init__.py +49 -0
  57. rem/models/core/core_model.py +64 -0
  58. rem/models/core/engram.py +333 -0
  59. rem/models/core/experiment.py +628 -0
  60. rem/models/core/inline_edge.py +132 -0
  61. rem/models/core/rem_query.py +243 -0
  62. rem/models/entities/__init__.py +43 -0
  63. rem/models/entities/file.py +57 -0
  64. rem/models/entities/image_resource.py +88 -0
  65. rem/models/entities/message.py +35 -0
  66. rem/models/entities/moment.py +123 -0
  67. rem/models/entities/ontology.py +191 -0
  68. rem/models/entities/ontology_config.py +131 -0
  69. rem/models/entities/resource.py +95 -0
  70. rem/models/entities/schema.py +87 -0
  71. rem/models/entities/user.py +85 -0
  72. rem/py.typed +0 -0
  73. rem/schemas/README.md +507 -0
  74. rem/schemas/__init__.py +6 -0
  75. rem/schemas/agents/README.md +92 -0
  76. rem/schemas/agents/core/moment-builder.yaml +178 -0
  77. rem/schemas/agents/core/rem-query-agent.yaml +226 -0
  78. rem/schemas/agents/core/resource-affinity-assessor.yaml +99 -0
  79. rem/schemas/agents/core/simple-assistant.yaml +19 -0
  80. rem/schemas/agents/core/user-profile-builder.yaml +163 -0
  81. rem/schemas/agents/examples/contract-analyzer.yaml +317 -0
  82. rem/schemas/agents/examples/contract-extractor.yaml +134 -0
  83. rem/schemas/agents/examples/cv-parser.yaml +263 -0
  84. rem/schemas/agents/examples/hello-world.yaml +37 -0
  85. rem/schemas/agents/examples/query.yaml +54 -0
  86. rem/schemas/agents/examples/simple.yaml +21 -0
  87. rem/schemas/agents/examples/test.yaml +29 -0
  88. rem/schemas/agents/rem.yaml +128 -0
  89. rem/schemas/evaluators/hello-world/default.yaml +77 -0
  90. rem/schemas/evaluators/rem/faithfulness.yaml +219 -0
  91. rem/schemas/evaluators/rem/lookup-correctness.yaml +182 -0
  92. rem/schemas/evaluators/rem/retrieval-precision.yaml +199 -0
  93. rem/schemas/evaluators/rem/retrieval-recall.yaml +211 -0
  94. rem/schemas/evaluators/rem/search-correctness.yaml +192 -0
  95. rem/services/__init__.py +16 -0
  96. rem/services/audio/INTEGRATION.md +308 -0
  97. rem/services/audio/README.md +376 -0
  98. rem/services/audio/__init__.py +15 -0
  99. rem/services/audio/chunker.py +354 -0
  100. rem/services/audio/transcriber.py +259 -0
  101. rem/services/content/README.md +1269 -0
  102. rem/services/content/__init__.py +5 -0
  103. rem/services/content/providers.py +806 -0
  104. rem/services/content/service.py +676 -0
  105. rem/services/dreaming/README.md +230 -0
  106. rem/services/dreaming/__init__.py +53 -0
  107. rem/services/dreaming/affinity_service.py +336 -0
  108. rem/services/dreaming/moment_service.py +264 -0
  109. rem/services/dreaming/ontology_service.py +54 -0
  110. rem/services/dreaming/user_model_service.py +297 -0
  111. rem/services/dreaming/utils.py +39 -0
  112. rem/services/embeddings/__init__.py +11 -0
  113. rem/services/embeddings/api.py +120 -0
  114. rem/services/embeddings/worker.py +421 -0
  115. rem/services/fs/README.md +662 -0
  116. rem/services/fs/__init__.py +62 -0
  117. rem/services/fs/examples.py +206 -0
  118. rem/services/fs/examples_paths.py +204 -0
  119. rem/services/fs/git_provider.py +935 -0
  120. rem/services/fs/local_provider.py +760 -0
  121. rem/services/fs/parsing-hooks-examples.md +172 -0
  122. rem/services/fs/paths.py +276 -0
  123. rem/services/fs/provider.py +460 -0
  124. rem/services/fs/s3_provider.py +1042 -0
  125. rem/services/fs/service.py +186 -0
  126. rem/services/git/README.md +1075 -0
  127. rem/services/git/__init__.py +17 -0
  128. rem/services/git/service.py +469 -0
  129. rem/services/phoenix/EXPERIMENT_DESIGN.md +1146 -0
  130. rem/services/phoenix/README.md +453 -0
  131. rem/services/phoenix/__init__.py +46 -0
  132. rem/services/phoenix/client.py +686 -0
  133. rem/services/phoenix/config.py +88 -0
  134. rem/services/phoenix/prompt_labels.py +477 -0
  135. rem/services/postgres/README.md +575 -0
  136. rem/services/postgres/__init__.py +23 -0
  137. rem/services/postgres/migration_service.py +427 -0
  138. rem/services/postgres/pydantic_to_sqlalchemy.py +232 -0
  139. rem/services/postgres/register_type.py +352 -0
  140. rem/services/postgres/repository.py +337 -0
  141. rem/services/postgres/schema_generator.py +379 -0
  142. rem/services/postgres/service.py +802 -0
  143. rem/services/postgres/sql_builder.py +354 -0
  144. rem/services/rem/README.md +304 -0
  145. rem/services/rem/__init__.py +23 -0
  146. rem/services/rem/exceptions.py +71 -0
  147. rem/services/rem/executor.py +293 -0
  148. rem/services/rem/parser.py +145 -0
  149. rem/services/rem/queries.py +196 -0
  150. rem/services/rem/query.py +371 -0
  151. rem/services/rem/service.py +527 -0
  152. rem/services/session/README.md +374 -0
  153. rem/services/session/__init__.py +6 -0
  154. rem/services/session/compression.py +360 -0
  155. rem/services/session/reload.py +77 -0
  156. rem/settings.py +1235 -0
  157. rem/sql/002_install_models.sql +1068 -0
  158. rem/sql/background_indexes.sql +42 -0
  159. rem/sql/install_models.sql +1038 -0
  160. rem/sql/migrations/001_install.sql +503 -0
  161. rem/sql/migrations/002_install_models.sql +1202 -0
  162. rem/utils/AGENTIC_CHUNKING.md +597 -0
  163. rem/utils/README.md +583 -0
  164. rem/utils/__init__.py +43 -0
  165. rem/utils/agentic_chunking.py +622 -0
  166. rem/utils/batch_ops.py +343 -0
  167. rem/utils/chunking.py +108 -0
  168. rem/utils/clip_embeddings.py +276 -0
  169. rem/utils/dict_utils.py +98 -0
  170. rem/utils/embeddings.py +423 -0
  171. rem/utils/examples/embeddings_example.py +305 -0
  172. rem/utils/examples/sql_types_example.py +202 -0
  173. rem/utils/markdown.py +16 -0
  174. rem/utils/model_helpers.py +236 -0
  175. rem/utils/schema_loader.py +336 -0
  176. rem/utils/sql_types.py +348 -0
  177. rem/utils/user_id.py +81 -0
  178. rem/utils/vision.py +330 -0
  179. rem/workers/README.md +506 -0
  180. rem/workers/__init__.py +5 -0
  181. rem/workers/dreaming.py +502 -0
  182. rem/workers/engram_processor.py +312 -0
  183. rem/workers/sqs_file_processor.py +193 -0
  184. remdb-0.3.0.dist-info/METADATA +1455 -0
  185. remdb-0.3.0.dist-info/RECORD +187 -0
  186. remdb-0.3.0.dist-info/WHEEL +4 -0
  187. remdb-0.3.0.dist-info/entry_points.txt +2 -0
@@ -0,0 +1,1455 @@
1
+ Metadata-Version: 2.4
2
+ Name: remdb
3
+ Version: 0.3.0
4
+ Summary: Resources Entities Moments - Bio-inspired memory system for agentic AI workloads
5
+ Project-URL: Homepage, https://github.com/Percolation-Labs/reminiscent
6
+ Project-URL: Documentation, https://github.com/Percolation-Labs/reminiscent/blob/main/README.md
7
+ Project-URL: Repository, https://github.com/Percolation-Labs/reminiscent
8
+ Project-URL: Issues, https://github.com/Percolation-Labs/reminiscent/issues
9
+ Author-email: mr-saoirse <amartey@gmail.com>
10
+ License: MIT
11
+ Keywords: agents,ai,mcp,memory,postgresql,vector-search
12
+ Classifier: Development Status :: 3 - Alpha
13
+ Classifier: Intended Audience :: Developers
14
+ Classifier: License :: OSI Approved :: MIT License
15
+ Classifier: Programming Language :: Python :: 3.12
16
+ Classifier: Topic :: Scientific/Engineering :: Artificial Intelligence
17
+ Requires-Python: >=3.12
18
+ Requires-Dist: aioboto3>=13.0.0
19
+ Requires-Dist: arize-phoenix>=5.0.0
20
+ Requires-Dist: asyncpg>=0.30.0
21
+ Requires-Dist: boto3>=1.35.0
22
+ Requires-Dist: click>=8.1.0
23
+ Requires-Dist: fastapi>=0.115.0
24
+ Requires-Dist: fastmcp>=0.5.0
25
+ Requires-Dist: gitpython>=3.1.45
26
+ Requires-Dist: gmft==0.3.1
27
+ Requires-Dist: hypercorn>=0.17.0
28
+ Requires-Dist: itsdangerous>=2.0.0
29
+ Requires-Dist: json-schema-to-pydantic>=0.2.0
30
+ Requires-Dist: kreuzberg[gmft]>=3.21.0
31
+ Requires-Dist: loguru>=0.7.0
32
+ Requires-Dist: openinference-instrumentation-pydantic-ai>=0.1.0
33
+ Requires-Dist: opentelemetry-api>=1.28.0
34
+ Requires-Dist: opentelemetry-exporter-otlp-proto-grpc>=1.28.0
35
+ Requires-Dist: opentelemetry-exporter-otlp-proto-http>=1.28.0
36
+ Requires-Dist: opentelemetry-exporter-otlp>=1.28.0
37
+ Requires-Dist: opentelemetry-instrumentation-fastapi>=0.49b0
38
+ Requires-Dist: opentelemetry-instrumentation>=0.49b0
39
+ Requires-Dist: opentelemetry-sdk>=1.28.0
40
+ Requires-Dist: psycopg[binary]>=3.2.0
41
+ Requires-Dist: pydantic-ai>=0.0.14
42
+ Requires-Dist: pydantic-settings>=2.6.0
43
+ Requires-Dist: pydantic>=2.10.0
44
+ Requires-Dist: pydub>=0.25.0
45
+ Requires-Dist: python-dotenv>=1.0.0
46
+ Requires-Dist: pyyaml>=6.0.0
47
+ Requires-Dist: requests>=2.32.0
48
+ Requires-Dist: semchunk>=2.2.0
49
+ Requires-Dist: tenacity>=9.0.0
50
+ Requires-Dist: tiktoken>=0.5.0
51
+ Requires-Dist: uvicorn[standard]>=0.32.0
52
+ Provides-Extra: all
53
+ Requires-Dist: ipdb>=0.13.0; extra == 'all'
54
+ Requires-Dist: ipython>=8.29.0; extra == 'all'
55
+ Requires-Dist: json-schema-to-pydantic>=0.2.0; extra == 'all'
56
+ Requires-Dist: mypy>=1.13.0; extra == 'all'
57
+ Requires-Dist: pandas-stubs>=2.0.0; extra == 'all'
58
+ Requires-Dist: pillow>=10.0.0; extra == 'all'
59
+ Requires-Dist: polars>=1.0.0; extra == 'all'
60
+ Requires-Dist: pydub>=0.25.0; extra == 'all'
61
+ Requires-Dist: pytest-asyncio>=0.24.0; extra == 'all'
62
+ Requires-Dist: pytest-cov>=6.0.0; extra == 'all'
63
+ Requires-Dist: pytest-mock>=3.14.0; extra == 'all'
64
+ Requires-Dist: pytest>=8.0.0; extra == 'all'
65
+ Requires-Dist: ruff>=0.8.0; extra == 'all'
66
+ Requires-Dist: types-pyyaml>=6.0.0; extra == 'all'
67
+ Provides-Extra: audio
68
+ Requires-Dist: pydub>=0.25.0; extra == 'audio'
69
+ Provides-Extra: dev
70
+ Requires-Dist: ipdb>=0.13.0; extra == 'dev'
71
+ Requires-Dist: ipython>=8.29.0; extra == 'dev'
72
+ Requires-Dist: mypy>=1.13.0; extra == 'dev'
73
+ Requires-Dist: pandas-stubs>=2.0.0; extra == 'dev'
74
+ Requires-Dist: pytest-asyncio>=0.24.0; extra == 'dev'
75
+ Requires-Dist: pytest-cov>=6.0.0; extra == 'dev'
76
+ Requires-Dist: pytest-mock>=3.14.0; extra == 'dev'
77
+ Requires-Dist: pytest>=8.0.0; extra == 'dev'
78
+ Requires-Dist: ruff>=0.8.0; extra == 'dev'
79
+ Requires-Dist: types-pyyaml>=6.0.0; extra == 'dev'
80
+ Provides-Extra: fs
81
+ Requires-Dist: pillow>=10.0.0; extra == 'fs'
82
+ Requires-Dist: polars>=1.0.0; extra == 'fs'
83
+ Requires-Dist: pydub>=0.25.0; extra == 'fs'
84
+ Provides-Extra: schema
85
+ Requires-Dist: json-schema-to-pydantic>=0.2.0; extra == 'schema'
86
+ Description-Content-Type: text/markdown
87
+
88
+ # REM - Resources Entities Moments
89
+
90
+ Cloud-native unified memory infrastructure for agentic AI systems built with Pydantic AI, FastAPI, and FastMCP.
91
+
92
+ ## Architecture Overview
93
+
94
+ <p align="center">
95
+ <img src="https://mermaid.ink/img/Z3JhcGggVEQKICAgIEFQSVtGYXN0QVBJPGJyLz5DaGF0ICsgTUNQXSAtLT4gQUdFTlRTW0pTT04gU2NoZW1hPGJyLz5BZ2VudHNdCiAgICBBR0VOVFMgLS0-IFRPT0xTW01DUCBUb29sczxici8-NSBUb29sc10KCiAgICBUT09MUyAtLT4gUVVFUllbUkVNIFF1ZXJ5PGJyLz5EaWFsZWN0XQogICAgUVVFUlkgLS0-IERCWyhQb3N0Z3JlU1FMPGJyLz4rcGd2ZWN0b3IpXQoKICAgIEZJTEVTW0ZpbGUgUHJvY2Vzc29yXSAtLT4gRFJFQU1bRHJlYW1pbmc8YnIvPldvcmtlcnNdCiAgICBEUkVBTSAtLT4gREIKCiAgICBBR0VOVFMgLS0-IE9URUxbT3BlblRlbGVtZXRyeV0KICAgIE9URUwgLS0-IFBIT0VOSVhbQXJpemU8YnIvPlBob2VuaXhdCgogICAgRVZBTFtFdmFsdWF0aW9uPGJyLz5GcmFtZXdvcmtdIC0tPiBQSE9FTklYCgogICAgY2xhc3NEZWYgYXBpIGZpbGw6IzRBOTBFMixzdHJva2U6IzJFNUM4QSxjb2xvcjojZmZmCiAgICBjbGFzc0RlZiBhZ2VudCBmaWxsOiM3QjY4RUUsc3Ryb2tlOiM0ODNEOEIsY29sb3I6I2ZmZgogICAgY2xhc3NEZWYgZGIgZmlsbDojNTBDODc4LHN0cm9rZTojMkU3RDRFLGNvbG9yOiNmZmYKICAgIGNsYXNzRGVmIG9icyBmaWxsOiM5QjU5QjYsc3Ryb2tlOiM2QzM0ODMsY29sb3I6I2ZmZgoKICAgIGNsYXNzIEFQSSxUT09MUyBhcGkKICAgIGNsYXNzIEFHRU5UUyBhZ2VudAogICAgY2xhc3MgREIsUVVFUlkgZGIKICAgIGNsYXNzIE9URUwsUEhPRU5JWCxFVkFMIG9icwo=" alt="REM Architecture" width="700">
96
+ </p>
97
+
98
+ **Key Components:**
99
+
100
+ - **API Layer**: OpenAI-compatible chat completions + MCP server (not separate deployments)
101
+ - **Agentic Framework**: JSON Schema-based agents with no-code configuration
102
+ - **Database Layer**: PostgreSQL 18 with pgvector for multi-index memory (KV + Vector + Graph)
103
+ - **REM Query Dialect**: Custom query language with O(1) lookups, semantic search, graph traversal
104
+ - **Ingestion & Dreaming**: Background workers for content extraction and progressive index enrichment (0% → 100% answerable)
105
+ - **Observability & Evals**: OpenTelemetry tracing + Arize Phoenix + LLM-as-a-Judge evaluation framework
106
+
107
+ ## Features
108
+
109
+ | Feature | Description | Benefits |
110
+ |---------|-------------|----------|
111
+ | **OpenAI-Compatible Chat API** | Drop-in replacement for OpenAI chat completions API with streaming support | Use with existing OpenAI clients, switch models across providers (OpenAI, Anthropic, etc.) |
112
+ | **Built-in MCP Server** | FastMCP server with 4 tools + 3 resources for memory operations | Export memory to Claude Desktop, Cursor, or any MCP-compatible host |
113
+ | **REM Query Engine** | Multi-index query system (LOOKUP, FUZZY, SEARCH, SQL, TRAVERSE) with custom dialect | O(1) lookups, semantic search, graph traversal - all tenant-isolated |
114
+ | **Dreaming Workers** | Background workers for entity extraction, moment generation, and affinity matching | Automatic knowledge graph construction from resources (0% → 100% query answerable) |
115
+ | **PostgreSQL + pgvector** | CloudNativePG with PostgreSQL 18, pgvector extension, streaming replication | Production-ready vector search, no external vector DB needed |
116
+ | **AWS EKS Recipe** | Complete infrastructure-as-code with Pulumi, Karpenter, ArgoCD | Deploy to production EKS in minutes with auto-scaling and GitOps |
117
+ | **JSON Schema Agents** | Dynamic agent creation from YAML schemas via Pydantic AI factory | Define agents declaratively, version control schemas, load dynamically |
118
+ | **Content Providers** | Audio transcription (Whisper), vision (GPT-4V, Claude), PDFs, DOCX, images | Multimodal ingestion out of the box with format detection |
119
+ | **Configurable Embeddings** | Provider-agnostic embedding system (OpenAI, Cohere, Jina) | Switch embedding providers via env vars, no code changes |
120
+ | **Multi-Tenancy** | Tenant isolation at database level with automatic scoping | SaaS-ready with complete data separation per tenant |
121
+ | **Streaming Everything** | SSE for chat, background workers for embeddings, async throughout | Real-time responses, non-blocking operations, scalable |
122
+ | **Zero Vendor Lock-in** | Raw HTTP clients (no OpenAI SDK), swappable providers, open standards | Not tied to any vendor, easy to migrate, full control |
123
+
124
+ ## Quick Start
125
+
126
+ Choose your path:
127
+
128
+ - **Option 1: Package Users with Example Data** (Recommended for first-time users) - PyPI + example datasets
129
+ - **Option 2: Package Users** (Recommended for non-developers) - PyPI package + dockerized database
130
+ - **Option 3: Developers** - Clone repo, local development with uv
131
+
132
+ ---
133
+
134
+ ## Option 1: Package Users with Example Data (Recommended)
135
+
136
+ **Best for**: First-time users who want to explore REM with curated example datasets.
137
+
138
+ ```bash
139
+ # Install remdb
140
+ pip install remdb[all]
141
+
142
+ # Clone example datasets
143
+ git clone https://github.com/Percolation-Labs/remstack-lab.git
144
+ cd remstack-lab
145
+
146
+ # Configure REM (interactive wizard)
147
+ rem configure --install
148
+
149
+ # Start PostgreSQL
150
+ docker run -d \
151
+ --name rem-postgres \
152
+ -e POSTGRES_USER=rem \
153
+ -e POSTGRES_PASSWORD=rem \
154
+ -e POSTGRES_DB=rem \
155
+ -p 5050:5432 \
156
+ pgvector/pgvector:pg18
157
+
158
+ # Load quickstart dataset
159
+ rem db load --file datasets/quickstart/sample_data.yaml --user-id demo-user
160
+
161
+ # Ask questions
162
+ rem ask --user-id demo-user "What documents exist in the system?"
163
+ rem ask --user-id demo-user "Show me meetings about API design"
164
+
165
+ # Try other datasets
166
+ rem db load --file datasets/domains/recruitment/scenarios/candidate_pipeline/data.yaml --user-id my-company
167
+ rem ask --user-id my-company "Show me candidates with Python experience"
168
+ ```
169
+
170
+ **What you get:**
171
+ - Quickstart: 3 users, 3 resources, 3 moments, 4 messages
172
+ - Domain datasets: recruitment, legal, enterprise, misc
173
+ - Format examples: engrams, documents, conversations, files
174
+ - Jupyter notebooks and experiments
175
+
176
+ **Learn more**: [remstack-lab repository](https://github.com/Percolation-Labs/remstack-lab)
177
+
178
+ ---
179
+
180
+ ## Option 2: Package Users (No Example Data)
181
+
182
+ **Best for**: Using REM as a service (API + CLI) without modifying code, bringing your own data.
183
+
184
+ ### Step 1: Start Database and API with Docker Compose
185
+
186
+ ```bash
187
+ # Create a project directory
188
+ mkdir my-rem-project && cd my-rem-project
189
+
190
+ # Download docker-compose file from public gist
191
+ curl -O https://gist.githubusercontent.com/percolating-sirsh/d117b673bc0edfdef1a5068ccd3cf3e5/raw/docker-compose.prebuilt.yml
192
+
193
+ # IMPORTANT: Export API keys BEFORE running docker compose
194
+ # Docker Compose reads env vars at startup - exporting them after won't work!
195
+
196
+ # Required: OpenAI for embeddings (text-embedding-3-small)
197
+ export OPENAI_API_KEY="sk-..."
198
+
199
+ # Recommended: At least one chat completion provider
200
+ export ANTHROPIC_API_KEY="sk-ant-..." # Claude Sonnet 4.5 (high quality)
201
+ export CEREBRAS_API_KEY="csk-..." # Cerebras (fast, cheap inference)
202
+
203
+ # Start PostgreSQL + API
204
+ docker compose -f docker-compose.prebuilt.yml up -d
205
+
206
+ # Verify services are running
207
+ curl http://localhost:8000/health
208
+ ```
209
+
210
+ This starts:
211
+ - **PostgreSQL** with pgvector on port **5051** (connection: `postgresql://rem:rem@localhost:5051/rem`)
212
+ - **REM API** on port **8000** with OpenAI-compatible chat completions + MCP server
213
+ - Uses pre-built Docker image from Docker Hub (no local build required)
214
+
215
+ ### Step 2: Install and Configure CLI (REQUIRED)
216
+
217
+ **This step is required** before you can use REM - it installs the database schema and configures your LLM API keys.
218
+
219
+ ```bash
220
+ # Install remdb package from PyPI
221
+ pip install remdb[all]
222
+
223
+ # Configure REM (defaults to port 5051 for package users)
224
+ rem configure --install --claude-desktop
225
+ ```
226
+
227
+ The interactive wizard will:
228
+ 1. **Configure PostgreSQL**: Defaults to `postgresql://rem:rem@localhost:5051/rem` (prebuilt docker-compose)
229
+ - Just press Enter to accept defaults
230
+ - Custom database: Enter your own host/port/credentials
231
+ 2. **Configure LLM providers**: Enter your OpenAI/Anthropic API keys
232
+ 3. **Install database tables**: Creates schema, functions, indexes (**required for CLI/API to work**)
233
+ 4. **Register with Claude Desktop**: Adds REM MCP server to Claude
234
+
235
+ Configuration saved to `~/.rem/config.yaml` (can edit with `rem configure --edit`)
236
+
237
+ **Port Guide:**
238
+ - **5051**: Package users with `docker-compose.prebuilt.yml` (pre-built image)
239
+ - **5050**: Developers with `docker-compose.yml` (local build)
240
+ - **Custom**: Your own PostgreSQL database
241
+
242
+ **Next Steps:**
243
+ - See [CLI Reference](#cli-reference) for all available commands
244
+ - See [REM Query Dialect](#rem-query-dialect) for query examples
245
+ - See [API Endpoints](#api-endpoints) for OpenAI-compatible API usage
246
+
247
+ ### Step 3: Load Sample Data (Optional but Recommended)
248
+
249
+ **Option A: Clone example datasets** (Recommended - works with all README examples)
250
+
251
+ ```bash
252
+ # Clone datasets repository
253
+ git clone https://github.com/Percolation-Labs/remstack-lab.git
254
+
255
+ # Load quickstart dataset
256
+ rem db load --file remstack-lab/datasets/quickstart/sample_data.yaml --user-id demo-user
257
+
258
+ # Test with sample queries
259
+ rem ask --user-id demo-user "What documents exist in the system?"
260
+ rem ask --user-id demo-user "Show me meetings about API design"
261
+ rem ask --user-id demo-user "Who is Sarah Chen?"
262
+
263
+ # Try domain-specific datasets
264
+ rem db load --file remstack-lab/datasets/domains/recruitment/scenarios/candidate_pipeline/data.yaml --user-id my-company
265
+ rem ask --user-id my-company "Show me candidates with Python experience"
266
+ ```
267
+
268
+ **Option B: Bring your own data**
269
+
270
+ ```bash
271
+ # Ingest your own files
272
+ echo "REM is a bio-inspired memory system for agentic AI workloads." > test-doc.txt
273
+ rem process ingest test-doc.txt --user-id test-user --category documentation --tags rem,ai
274
+
275
+ # Query your ingested data
276
+ rem ask --user-id test-user "What do you know about REM from my knowledge base?"
277
+ ```
278
+
279
+ ### Step 4: Test the API
280
+
281
+ ```bash
282
+ # Test the OpenAI-compatible chat completions API
283
+ curl -X POST http://localhost:8000/api/v1/chat/completions \
284
+ -H "Content-Type: application/json" \
285
+ -H "X-User-Id: demo-user" \
286
+ -d '{
287
+ "model": "anthropic:claude-sonnet-4-5-20250929",
288
+ "messages": [{"role": "user", "content": "What documents did Sarah Chen author?"}],
289
+ "stream": false
290
+ }'
291
+ ```
292
+
293
+ **Available Commands:**
294
+ - `rem ask` - Natural language queries to REM
295
+ - `rem process ingest <file>` - Full ingestion pipeline (storage + parsing + embedding + database)
296
+ - `rem process uri <file>` - READ-ONLY parsing (no database storage, useful for testing parsers)
297
+ - `rem db load --file <yaml>` - Load structured datasets directly
298
+
299
+ ## Example Datasets
300
+
301
+ 🎯 **Recommended**: Clone [remstack-lab](https://github.com/Percolation-Labs/remstack-lab) for curated datasets organized by domain and format.
302
+
303
+ **What's included:**
304
+ - **Quickstart**: Minimal dataset (3 users, 3 resources, 3 moments) - perfect for first-time users
305
+ - **Domains**: Recruitment (CV parsing), Legal (contracts), Enterprise (team collaboration)
306
+ - **Formats**: Engrams (voice memos), Documents (markdown/PDF), Conversations (chat logs)
307
+ - **Evaluation**: Golden datasets for Phoenix-based agent testing
308
+
309
+ **Working from remstack-lab:**
310
+ ```bash
311
+ cd remstack-lab
312
+
313
+ # Load any dataset
314
+ rem db load --file datasets/quickstart/sample_data.yaml --user-id demo-user
315
+
316
+ # Explore formats
317
+ rem db load --file datasets/formats/engrams/scenarios/team_meeting/team_standup_meeting.yaml --user-id demo-user
318
+
319
+ # Try domain-specific examples
320
+ rem db load --file datasets/domains/recruitment/scenarios/candidate_pipeline/data.yaml --user-id acme-corp
321
+ ```
322
+
323
+ ## See Also
324
+
325
+ - [REM Query Dialect](#rem-query-dialect) - LOOKUP, SEARCH, TRAVERSE, SQL query types
326
+ - [API Endpoints](#api-endpoints) - OpenAI-compatible chat completions, MCP server
327
+ - [CLI Reference](#cli-reference) - Complete command-line interface documentation
328
+ - [Bring Your Own Agent](#bring-your-own-agent) - Create custom agents with your own prompts and tools
329
+ - [Production Deployment](#production-deployment) - AWS EKS with Kubernetes
330
+ - [Example Datasets](https://github.com/Percolation-Labs/remstack-lab) - Curated datasets by domain and format
331
+
332
+ ---
333
+
334
+ ## Bring Your Own Agent
335
+
336
+ REM allows you to create **custom agents** with your own system prompts, tools, and output schemas. Custom agents are stored in the database and dynamically loaded when referenced, enabling **no-code agent creation** without modifying the codebase.
337
+
338
+ ### How It Works
339
+
340
+ 1. **Define Agent Schema** - Create a YAML file with your agent's prompt, tools, and output structure
341
+ 2. **Ingest Schema** - Use `rem process ingest` to store the schema in the database
342
+ 3. **Use Your Agent** - Reference your agent by name with `rem ask <agent-name> "query"`
343
+
344
+ When you run `rem ask my-agent "query"`, REM:
345
+ 1. Checks if `my-agent` exists in the filesystem (`schemas/agents/`)
346
+ 2. If not found, performs a **LOOKUP** query on the `schemas` table in the database
347
+ 3. Loads the schema dynamically and creates a Pydantic AI agent
348
+ 4. Runs your query with the custom agent
349
+
350
+ ### Expected Behavior
351
+
352
+ **Schema Ingestion Flow** (`rem process ingest my-agent.yaml`):
353
+ - Parse YAML file to extract JSON Schema content
354
+ - Extract `json_schema_extra.kind` field → maps to `category` column
355
+ - Extract `json_schema_extra.provider_configs` → stores provider configurations
356
+ - Extract `json_schema_extra.embedding_fields` → stores semantic search fields
357
+ - Create `Schema` entity in `schemas` table with `user_id` scoping
358
+ - Schema is now queryable via `LOOKUP "my-agent" FROM schemas`
359
+
360
+ **Agent Loading Flow** (`rem ask my-agent "query"`):
361
+ 1. `load_agent_schema("my-agent")` checks filesystem cache → miss
362
+ 2. Falls back to database: `LOOKUP "my-agent" FROM schemas WHERE user_id = '<user-id>'`
363
+ 3. Returns `Schema.spec` (JSON Schema dict) from database
364
+ 4. `create_agent()` factory creates Pydantic AI agent from schema
365
+ 5. Agent runs with tools specified in `json_schema_extra.tools`
366
+ 6. Returns structured output defined in `properties` field
367
+
368
+ ### Quick Example
369
+
370
+ **Step 1: Create Agent Schema** (`my-research-assistant.yaml`)
371
+
372
+ ```yaml
373
+ type: object
374
+ description: |
375
+ You are a research assistant that helps users find and analyze documents.
376
+
377
+ Use the search_rem tool to find relevant documents, then analyze and summarize them.
378
+ Be concise and cite specific documents in your responses.
379
+
380
+ properties:
381
+ summary:
382
+ type: string
383
+ description: A concise summary of findings
384
+ sources:
385
+ type: array
386
+ items:
387
+ type: string
388
+ description: List of document labels referenced
389
+
390
+ required:
391
+ - summary
392
+ - sources
393
+
394
+ json_schema_extra:
395
+ kind: agent
396
+ name: research-assistant
397
+ version: 1.0.0
398
+ tools:
399
+ - search_rem
400
+ - ask_rem_agent
401
+ resources: []
402
+ ```
403
+
404
+ **For more examples**, see:
405
+ - Simple agent (no tools): `src/rem/schemas/agents/examples/simple.yaml`
406
+ - Agent with REM tools: `src/rem/schemas/agents/core/rem-query-agent.yaml`
407
+ - Ontology extractor: `src/rem/schemas/agents/examples/cv-parser.yaml`
408
+
409
+ **Step 2: Ingest Schema into Database**
410
+
411
+ ```bash
412
+ # Ingest the schema (stores in database schemas table)
413
+ rem process ingest my-research-assistant.yaml \
414
+ --user-id my-user \
415
+ --category agents \
416
+ --tags custom,research
417
+
418
+ # Verify schema is in database (should show schema details)
419
+ rem ask "LOOKUP 'my-research-assistant' FROM schemas" --user-id my-user
420
+ ```
421
+
422
+ **Step 3: Use Your Custom Agent**
423
+
424
+ ```bash
425
+ # Run a query with your custom agent
426
+ rem ask research-assistant "Find documents about machine learning architecture" \
427
+ --user-id my-user
428
+
429
+ # With streaming
430
+ rem ask research-assistant "Summarize recent API design documents" \
431
+ --user-id my-user \
432
+ --stream
433
+
434
+ # With session continuity
435
+ rem ask research-assistant "What did we discuss about ML?" \
436
+ --user-id my-user \
437
+ --session-id abc-123
438
+ ```
439
+
440
+ ### Agent Schema Structure
441
+
442
+ Every agent schema must include:
443
+
444
+ **Required Fields:**
445
+ - `type: object` - JSON Schema type (always "object")
446
+ - `description` - System prompt with instructions for the agent
447
+ - `properties` - Output schema defining structured response fields
448
+
449
+ **Optional Metadata** (`json_schema_extra`):
450
+ - `kind` - Agent category ("agent", "evaluator", etc.) → maps to `Schema.category`
451
+ - `name` - Agent identifier (used for LOOKUP)
452
+ - `version` - Semantic version (e.g., "1.0.0")
453
+ - `tools` - List of MCP tools to load (e.g., `["search_rem", "lookup_rem"]`)
454
+ - `resources` - List of MCP resources to expose (e.g., `["user_profile"]`)
455
+ - `provider_configs` - Multi-provider testing configurations (for ontology extractors)
456
+ - `embedding_fields` - Fields to embed for semantic search (for ontology extractors)
457
+
458
+ ### Available MCP Tools
459
+
460
+ REM provides **4 built-in MCP tools** your agents can use:
461
+
462
+ | Tool | Purpose | Parameters |
463
+ |------|---------|------------|
464
+ | `search_rem` | Execute REM queries (LOOKUP, FUZZY, SEARCH, SQL, TRAVERSE) | `query_type`, `entity_key`, `query_text`, `table`, `sql_query`, `initial_query`, `edge_types`, `depth` |
465
+ | `ask_rem_agent` | Natural language to REM query via agent-driven reasoning | `query`, `agent_schema`, `agent_version` |
466
+ | `ingest_into_rem` | Full file ingestion pipeline (read → store → parse → chunk → embed) | `file_uri`, `category`, `tags`, `is_local_server` |
467
+ | `read_resource` | Access MCP resources (schemas, status) for Claude Desktop | `uri` |
468
+
469
+ **Tool Reference**: Tools are defined in `src/rem/api/mcp_router/tools.py`
470
+
471
+ **Note**: `search_rem` is a unified tool that handles all REM query types via the `query_type` parameter:
472
+ - `query_type="lookup"` - O(1) entity lookup by label
473
+ - `query_type="fuzzy"` - Fuzzy text matching with similarity threshold
474
+ - `query_type="search"` - Semantic vector search (table-specific)
475
+ - `query_type="sql"` - Direct SQL queries (WHERE clause)
476
+ - `query_type="traverse"` - Graph traversal with depth control
477
+
478
+ ### Multi-User Isolation
479
+
480
+ Custom agents are **scoped by `user_id`**, ensuring complete data isolation:
481
+
482
+ ```bash
483
+ # User A creates a custom agent
484
+ rem process ingest my-agent.yaml --user-id user-a --category agents
485
+
486
+ # User B cannot see User A's agent
487
+ rem ask my-agent "test" --user-id user-b
488
+ # ❌ Error: Schema not found (LOOKUP returns no results for user-b)
489
+
490
+ # User A can use their agent
491
+ rem ask my-agent "test" --user-id user-a
492
+ # ✅ Works - LOOKUP finds schema for user-a
493
+ ```
494
+
495
+ ### Advanced: Ontology Extractors
496
+
497
+ Custom agents can also be used as **ontology extractors** to extract structured knowledge from files. See [CLAUDE.md](../CLAUDE.md#ontology-extraction-pattern) for details on:
498
+ - Multi-provider testing (`provider_configs`)
499
+ - Semantic search configuration (`embedding_fields`)
500
+ - File matching rules (`OntologyConfig`)
501
+ - Dreaming workflow integration
502
+
503
+ ### Troubleshooting
504
+
505
+ **Schema not found error:**
506
+ ```bash
507
+ # Check if schema was ingested correctly
508
+ rem ask "SEARCH 'my-agent' FROM schemas" --user-id my-user
509
+
510
+ # List all schemas for your user
511
+ rem ask "SELECT name, category, created_at FROM schemas ORDER BY created_at DESC LIMIT 10" --user-id my-user
512
+ ```
513
+
514
+ **Agent not loading tools:**
515
+ - Verify `json_schema_extra.tools` lists correct tool names
516
+ - Valid tool names: `search_rem`, `ask_rem_agent`, `ingest_into_rem`, `read_resource`
517
+ - Check MCP tool names in `src/rem/api/mcp_router/tools.py`
518
+ - Tools are case-sensitive: use `search_rem`, not `Search_REM`
519
+
520
+ **Agent not returning structured output:**
521
+ - Ensure `properties` field defines all expected output fields
522
+ - Use `required` field to mark mandatory fields
523
+ - Check agent response with `--stream` disabled to see full JSON output
524
+
525
+ ---
526
+
527
+ ## REM Query Dialect
528
+
529
+ REM provides a custom query language designed for **LLM-driven iterated retrieval** with performance guarantees.
530
+
531
+ ### Design Philosophy
532
+
533
+ Unlike traditional single-shot SQL queries, the REM dialect is optimized for **multi-turn exploration** where LLMs participate in query planning:
534
+
535
+ - **Iterated Queries**: Queries return partial results that LLMs use to refine subsequent queries
536
+ - **Composable WITH Syntax**: Chain operations together (e.g., `TRAVERSE FROM ... WITH LOOKUP "..."`)
537
+ - **Mixed Indexes**: Combines exact lookups (O(1)), semantic search (vector), and graph traversal
538
+ - **Query Planner Participation**: Results include metadata for LLMs to decide next steps
539
+
540
+ **Example Multi-Turn Flow**:
541
+ ```
542
+ Turn 1: LOOKUP "sarah-chen" → Returns entity + available edge types
543
+ Turn 2: TRAVERSE FROM "sarah-chen" TYPE "authored_by" DEPTH 1 → Returns connected documents
544
+ Turn 3: SEARCH "architecture decisions" WITH TRAVERSE FROM "sarah-chen" → Combines semantic + graph
545
+ ```
546
+
547
+ This enables LLMs to **progressively build context** rather than requiring perfect queries upfront.
548
+
549
+ See [REM Query Dialect (AST)](#rem-query-dialect-ast) for complete grammar specification.
550
+
551
+ ### Query Types
552
+
553
+ #### `LOOKUP` - O(1) Exact Label Lookup
554
+
555
+ Fast exact match on entity labels (natural language identifiers, not UUIDs).
556
+
557
+ ```sql
558
+ LOOKUP "sarah-chen" FROM resources
559
+ LOOKUP "api-design-v2" FROM resources WHERE category = "projects"
560
+ ```
561
+
562
+ **Performance**: O(1) - indexed on `label` column
563
+ **Returns**: Single entity or null
564
+ **Use case**: Fetch specific known entities by human-readable name
565
+
566
+ #### `FUZZY` - Fuzzy Text Search
567
+
568
+ Fuzzy matching for partial names or misspellings using PostgreSQL trigram similarity.
569
+
570
+ ```sql
571
+ FUZZY "sara" FROM resources LIMIT 10
572
+ FUZZY "api desgin" FROM resources THRESHOLD 0.3 LIMIT 5
573
+ ```
574
+
575
+ **Performance**: O(n) with pg_trgm GIN index (fast for small-medium datasets)
576
+ **Returns**: Ranked list by similarity score
577
+ **Use case**: Handle typos, partial names, or when exact label is unknown
578
+
579
+ #### `SEARCH` - Semantic Vector Search
580
+
581
+ Semantic search using pgvector embeddings with cosine similarity.
582
+
583
+ ```sql
584
+ SEARCH "machine learning architecture" FROM resources LIMIT 10
585
+ SEARCH "contract disputes" FROM resources WHERE tags @> ARRAY['legal'] LIMIT 5
586
+ ```
587
+
588
+ **Performance**: O(log n) with HNSW index
589
+ **Returns**: Ranked list of semantically similar entities
590
+ **Use case**: Find conceptually related content without exact keyword matches
591
+
592
+ #### `TRAVERSE` - Recursive Graph Traversal
593
+
594
+ Follow `graph_edges` relationships across the knowledge graph.
595
+
596
+ ```sql
597
+ TRAVERSE FROM "sarah-chen" TYPE "authored_by" DEPTH 2
598
+ TRAVERSE FROM "api-design-v2" TYPE "references,depends_on" DEPTH 3
599
+ ```
600
+
601
+ **Features**:
602
+ - **Polymorphic**: Seamlessly traverses `resources`, `moments`, `users` via `all_graph_edges` view
603
+ - **Filtering**: Filter by one or multiple edge types (comma-separated)
604
+ - **Depth Control**: Configurable recursion depth (default: 2)
605
+ - **Data Model**: Requires `InlineEdge` JSON structure in `graph_edges` column
606
+
607
+ **Returns**: Graph of connected entities with edge metadata
608
+ **Use case**: Explore relationships, find connected entities, build context
609
+
610
+ #### Direct SQL Queries
611
+
612
+ Raw SQL for complex temporal, aggregation, or custom queries.
613
+
614
+ ```sql
615
+ SELECT * FROM resources WHERE created_at > NOW() - INTERVAL '7 days' ORDER BY created_at DESC LIMIT 20
616
+ SELECT category, COUNT(*) as count FROM resources GROUP BY category
617
+ WITH recent AS (SELECT * FROM resources WHERE created_at > NOW() - INTERVAL '1 day') SELECT * FROM recent
618
+ ```
619
+
620
+ **Performance**: Depends on query and indexes
621
+ **Returns**: Raw query results
622
+ **Use case**: Complex filtering, aggregations, temporal queries
623
+ **Allowed**: SELECT, INSERT, UPDATE, WITH (read + data modifications)
624
+ **Blocked**: DROP, DELETE, TRUNCATE, ALTER (destructive operations)
625
+ **Note**: Can be used standalone or with `WITH` syntax for composition
626
+
627
+ ### Graph Edge Format
628
+
629
+ Edges stored inline using `InlineEdge` pattern with human-readable destination labels.
630
+
631
+ ```json
632
+ {
633
+ "dst": "sarah-chen",
634
+ "rel_type": "authored_by",
635
+ "weight": 1.0,
636
+ "properties": {
637
+ "dst_entity_type": "users:engineers/sarah-chen",
638
+ "created_at": "2025-01-15T10:30:00Z"
639
+ }
640
+ }
641
+ ```
642
+
643
+ **Destination Entity Type Convention** (`properties.dst_entity_type`):
644
+
645
+ Format: `<table_schema>:<category>/<key>`
646
+
647
+ Examples:
648
+ - `"resources:managers/bob"` → Look up bob in resources table with category="managers"
649
+ - `"users:engineers/sarah-chen"` → Look up sarah-chen in users table
650
+ - `"moments:meetings/standup-2024-01"` → Look up in moments table
651
+ - `"resources/api-design-v2"` → Look up in resources table (no category)
652
+ - `"bob"` → Defaults to resources table, no category
653
+
654
+ **Edge Type Format** (`rel_type`):
655
+ - Use snake_case: `"authored_by"`, `"depends_on"`, `"references"`
656
+ - Be specific but consistent
657
+ - Use passive voice for bidirectional clarity
658
+
659
+ ### Multi-Turn Iterated Retrieval
660
+
661
+ REM enables agents to conduct multi-turn database conversations:
662
+
663
+ 1. **Initial Query**: Agent runs SEARCH to find candidates
664
+ 2. **Refinement**: Agent analyzes results, runs LOOKUP on specific entities
665
+ 3. **Context Expansion**: Agent runs TRAVERSE to find related entities
666
+ 4. **Temporal Filter**: Agent runs SQL to filter by time range
667
+ 5. **Final Answer**: Agent synthesizes knowledge from all queries
668
+
669
+ **Plan Memos**: Agents track query plans in scratchpad for iterative refinement.
670
+
671
+ ### Query Performance Contracts
672
+
673
+ | Query Type | Complexity | Index | Use When |
674
+ |------------|-----------|-------|----------|
675
+ | `LOOKUP` | O(1) | B-tree on `label` | You know exact entity name |
676
+ | `FUZZY` | O(n) | GIN on `label` (pg_trgm) | Handling typos/partial matches |
677
+ | `SEARCH` | O(log n) | HNSW on `embedding` | Semantic similarity needed |
678
+ | `TRAVERSE` | O(depth × edges) | B-tree on `graph_edges` | Exploring relationships |
679
+ | `SQL` | Variable | Custom indexes | Complex filtering/aggregation |
680
+
681
+ ### Example: Multi-Query Session
682
+
683
+ ```python
684
+ # Query 1: Find relevant documents
685
+ SEARCH "API migration planning" FROM resources LIMIT 5
686
+
687
+ # Query 2: Get specific document
688
+ LOOKUP "tidb-migration-spec" FROM resources
689
+
690
+ # Query 3: Find related people
691
+ TRAVERSE FROM "tidb-migration-spec" TYPE "authored_by,reviewed_by" DEPTH 1
692
+
693
+ # Query 4: Recent activity
694
+ SELECT * FROM moments WHERE
695
+ 'tidb-migration' = ANY(topic_tags) AND
696
+ start_time > NOW() - INTERVAL '30 days'
697
+ ```
698
+
699
+ ### Tenant Isolation
700
+
701
+ All queries automatically scoped by `user_id` for complete data isolation:
702
+
703
+ ```sql
704
+ -- Automatically filtered to user's data
705
+ SEARCH "contracts" FROM resources LIMIT 10
706
+
707
+ -- No cross-user data leakage
708
+ TRAVERSE FROM "project-x" TYPE "references" DEPTH 3
709
+ ```
710
+
711
+ ## API Endpoints
712
+
713
+ ### Chat Completions (OpenAI-compatible)
714
+
715
+ ```bash
716
+ POST /api/v1/chat/completions
717
+ ```
718
+
719
+ **Headers**:
720
+ - `X-Tenant-Id`: Tenant identifier (required for REM)
721
+ - `X-User-Id`: User identifier
722
+ - `X-Session-Id`: Session/conversation identifier
723
+ - `X-Agent-Schema`: Agent schema URI to use
724
+
725
+ **Body**:
726
+ ```json
727
+ {
728
+ "model": "anthropic:claude-sonnet-4-5-20250929",
729
+ "messages": [
730
+ {"role": "user", "content": "Find all documents Sarah authored"}
731
+ ],
732
+ "stream": true,
733
+ "response_format": {"type": "text"}
734
+ }
735
+ ```
736
+
737
+ **Streaming Response** (SSE):
738
+ ```
739
+ data: {"id": "chatcmpl-123", "choices": [{"delta": {"role": "assistant", "content": ""}}]}
740
+
741
+ data: {"id": "chatcmpl-123", "choices": [{"delta": {"content": "[Calling: search_rem]"}}]}
742
+
743
+ data: {"id": "chatcmpl-123", "choices": [{"delta": {"content": "Found 3 documents..."}}]}
744
+
745
+ data: {"id": "chatcmpl-123", "choices": [{"delta": {}, "finish_reason": "stop"}]}
746
+
747
+ data: [DONE]
748
+ ```
749
+
750
+ ### MCP Endpoint
751
+
752
+ ```bash
753
+ # MCP HTTP transport
754
+ POST /api/v1/mcp
755
+ ```
756
+
757
+ Tools and resources for REM query execution, resource management, file operations.
758
+
759
+ ### Health Check
760
+
761
+ ```bash
762
+ GET /health
763
+ # {"status": "healthy", "version": "0.1.0"}
764
+ ```
765
+
766
+ ## CLI Reference
767
+
768
+ REM provides a comprehensive command-line interface for all operations.
769
+
770
+ ### Configuration & Server
771
+
772
+ #### `rem configure` - Interactive Setup Wizard
773
+
774
+ Set up REM with PostgreSQL, LLM providers, and S3 storage. **Defaults to port 5051 (package users).**
775
+
776
+ ```bash
777
+ # Complete setup (recommended for package users)
778
+ rem configure --install --claude-desktop
779
+
780
+ # This runs:
781
+ # 1. Interactive wizard (creates ~/.rem/config.yaml)
782
+ # 2. Installs database tables (rem db migrate)
783
+ # 3. Registers REM MCP server with Claude Desktop
784
+
785
+ # Other options:
786
+ rem configure # Just run wizard
787
+ rem configure --install # Wizard + database install
788
+ rem configure --show # Show current configuration
789
+ rem configure --edit # Edit configuration in $EDITOR
790
+ ```
791
+
792
+ **Default Configuration:**
793
+ - **Package users**: `localhost:5051` (docker-compose.prebuilt.yml with Docker Hub image)
794
+ - **Developers**: Change to `localhost:5050` during wizard (docker-compose.yml with local build)
795
+ - **Custom database**: Enter your own host/port/credentials
796
+
797
+ **Configuration File:** `~/.rem/config.yaml`
798
+
799
+ ```yaml
800
+ postgres:
801
+ # Package users (prebuilt)
802
+ connection_string: postgresql://rem:rem@localhost:5051/rem
803
+ # OR Developers (local build)
804
+ # connection_string: postgresql://rem:rem@localhost:5050/rem
805
+ pool_min_size: 5
806
+ pool_max_size: 20
807
+
808
+ llm:
809
+ default_model: anthropic:claude-sonnet-4-5-20250929
810
+ openai_api_key: sk-...
811
+ anthropic_api_key: sk-ant-...
812
+
813
+ s3:
814
+ bucket_name: rem-storage
815
+ region: us-east-1
816
+ ```
817
+
818
+ **Precedence:** Environment variables > Config file > Defaults
819
+
820
+ **Port Guide:**
821
+ - **5051**: Package users with `docker-compose.prebuilt.yml` (recommended)
822
+ - **5050**: Developers with `docker-compose.yml` (local development)
823
+ - **Custom**: Your own PostgreSQL instance
824
+
825
+ #### `rem mcp` - Run MCP Server
826
+
827
+ Run the FastMCP server for Claude Desktop integration.
828
+
829
+ ```bash
830
+ # Stdio mode (for Claude Desktop)
831
+ rem mcp
832
+
833
+ # HTTP mode (for testing)
834
+ rem mcp --http --port 8001
835
+ ```
836
+
837
+ #### `rem serve` - Start API Server
838
+
839
+ Start the FastAPI server with uvicorn.
840
+
841
+ ```bash
842
+ # Use settings from config
843
+ rem serve
844
+
845
+ # Development mode (auto-reload)
846
+ rem serve --reload
847
+
848
+ # Production mode (4 workers)
849
+ rem serve --workers 4
850
+
851
+ # Bind to all interfaces
852
+ rem serve --host 0.0.0.0 --port 8080
853
+
854
+ # Override log level
855
+ rem serve --log-level debug
856
+ ```
857
+
858
+ ### Database Management
859
+
860
+ #### `rem db migrate` - Run Migrations
861
+
862
+ Apply database migrations (install.sql and install_models.sql).
863
+
864
+ ```bash
865
+ # Apply all migrations
866
+ rem db migrate
867
+
868
+ # Core infrastructure only (extensions, functions)
869
+ rem db migrate --install
870
+
871
+ # Entity tables only (Resource, Message, etc.)
872
+ rem db migrate --models
873
+
874
+ # Background indexes (HNSW for vectors)
875
+ rem db migrate --background-indexes
876
+
877
+ # Custom connection string
878
+ rem db migrate --connection "postgresql://user:pass@host:5432/db"
879
+
880
+ # Custom SQL directory
881
+ rem db migrate --sql-dir /path/to/sql
882
+ ```
883
+
884
+ #### `rem db status` - Migration Status
885
+
886
+ Show applied migrations and execution times.
887
+
888
+ ```bash
889
+ rem db status
890
+ ```
891
+
892
+ #### `rem db rebuild-cache` - Rebuild KV Cache
893
+
894
+ Rebuild KV_STORE cache from entity tables (after database restart or bulk imports).
895
+
896
+ ```bash
897
+ rem db rebuild-cache
898
+ ```
899
+
900
+ ### Schema Management
901
+
902
+ #### `rem db schema generate` - Generate SQL Schema
903
+
904
+ Generate database schema from Pydantic models.
905
+
906
+ ```bash
907
+ # Generate install_models.sql from entity models
908
+ rem db schema generate \
909
+ --models src/rem/models/entities \
910
+ --output rem/src/rem/sql/install_models.sql
911
+
912
+ # Generate migration file
913
+ rem db schema generate \
914
+ --models src/rem/models/entities \
915
+ --output rem/src/rem/sql/migrations/003_add_fields.sql
916
+ ```
917
+
918
+ #### `rem db schema indexes` - Generate Background Indexes
919
+
920
+ Generate SQL for background index creation (HNSW for vectors).
921
+
922
+ ```bash
923
+ # Generate background_indexes.sql
924
+ rem db schema indexes \
925
+ --models src/rem/models/entities \
926
+ --output rem/src/rem/sql/background_indexes.sql
927
+ ```
928
+
929
+ #### `rem db schema validate` - Validate Models
930
+
931
+ Validate Pydantic models for schema generation.
932
+
933
+ ```bash
934
+ rem db schema validate --models src/rem/models/entities
935
+ ```
936
+
937
+ ### File Processing
938
+
939
+ #### `rem process files` - Process Files
940
+
941
+ Process files with optional custom extractor (ontology extraction).
942
+
943
+ ```bash
944
+ # Process all completed files for tenant
945
+ rem process files \
946
+ --tenant-id acme-corp \
947
+ --status completed \
948
+ --limit 10
949
+
950
+ # Process with custom extractor
951
+ rem process files \
952
+ --tenant-id acme-corp \
953
+ --extractor cv-parser-v1 \
954
+ --limit 50
955
+
956
+ # Process files from the last 7 days
957
+ rem process files \
958
+ --tenant-id acme-corp \
959
+ --lookback-hours 168
960
+ ```
961
+
962
+ #### `rem process ingest` - Ingest File into REM
963
+
964
+ Ingest a file into REM with full pipeline (storage + parsing + embedding + database).
965
+
966
+ ```bash
967
+ # Ingest local file
968
+ rem process ingest /path/to/document.pdf \
969
+ --user-id user-123 \
970
+ --category legal \
971
+ --tags contract,2024
972
+
973
+ # Ingest with minimal options
974
+ rem process ingest ./meeting-notes.md --user-id user-123
975
+ ```
976
+
977
+ #### `rem process uri` - Parse File (Read-Only)
978
+
979
+ Parse a file and extract content **without** storing to database (useful for testing parsers).
980
+
981
+ ```bash
982
+ # Parse local file (output to stdout)
983
+ rem process uri /path/to/document.pdf
984
+
985
+ # Parse and save extracted content to file
986
+ rem process uri /path/to/document.pdf --save output.json
987
+
988
+ # Parse S3 file
989
+ rem process uri s3://bucket/key.docx --output text
990
+ ```
991
+
992
+ ### Memory & Knowledge Extraction (Dreaming)
993
+
994
+ #### `rem dreaming full` - Complete Workflow
995
+
996
+ Run full dreaming workflow: extractors → moments → affinity → user model.
997
+
998
+ ```bash
999
+ # Full workflow for user
1000
+ rem dreaming full \
1001
+ --user-id user-123 \
1002
+ --tenant-id acme-corp
1003
+
1004
+ # Skip ontology extractors
1005
+ rem dreaming full \
1006
+ --user-id user-123 \
1007
+ --tenant-id acme-corp \
1008
+ --skip-extractors
1009
+
1010
+ # Process last 24 hours only
1011
+ rem dreaming full \
1012
+ --user-id user-123 \
1013
+ --tenant-id acme-corp \
1014
+ --lookback-hours 24
1015
+
1016
+ # Limit resources processed
1017
+ rem dreaming full \
1018
+ --user-id user-123 \
1019
+ --tenant-id acme-corp \
1020
+ --limit 100
1021
+ ```
1022
+
1023
+ #### `rem dreaming custom` - Custom Extractor
1024
+
1025
+ Run specific ontology extractor on user's data.
1026
+
1027
+ ```bash
1028
+ # Run CV parser on user's files
1029
+ rem dreaming custom \
1030
+ --user-id user-123 \
1031
+ --tenant-id acme-corp \
1032
+ --extractor cv-parser-v1
1033
+
1034
+ # Process last week's files
1035
+ rem dreaming custom \
1036
+ --user-id user-123 \
1037
+ --tenant-id acme-corp \
1038
+ --extractor contract-analyzer-v1 \
1039
+ --lookback-hours 168 \
1040
+ --limit 50
1041
+ ```
1042
+
1043
+ #### `rem dreaming moments` - Extract Moments
1044
+
1045
+ Extract temporal narratives from resources.
1046
+
1047
+ ```bash
1048
+ # Generate moments for user
1049
+ rem dreaming moments \
1050
+ --user-id user-123 \
1051
+ --tenant-id acme-corp \
1052
+ --limit 50
1053
+
1054
+ # Process last 7 days
1055
+ rem dreaming moments \
1056
+ --user-id user-123 \
1057
+ --tenant-id acme-corp \
1058
+ --lookback-hours 168
1059
+ ```
1060
+
1061
+ #### `rem dreaming affinity` - Build Relationships
1062
+
1063
+ Build semantic relationships between resources using embeddings.
1064
+
1065
+ ```bash
1066
+ # Build affinity graph for user
1067
+ rem dreaming affinity \
1068
+ --user-id user-123 \
1069
+ --tenant-id acme-corp \
1070
+ --limit 100
1071
+
1072
+ # Process recent resources only
1073
+ rem dreaming affinity \
1074
+ --user-id user-123 \
1075
+ --tenant-id acme-corp \
1076
+ --lookback-hours 24
1077
+ ```
1078
+
1079
+ #### `rem dreaming user-model` - Update User Model
1080
+
1081
+ Update user model from recent activity (preferences, interests, patterns).
1082
+
1083
+ ```bash
1084
+ # Update user model
1085
+ rem dreaming user-model \
1086
+ --user-id user-123 \
1087
+ --tenant-id acme-corp
1088
+ ```
1089
+
1090
+ ### Evaluation & Experiments
1091
+
1092
+ #### `rem experiments` - Experiment Management
1093
+
1094
+ Manage evaluation experiments with datasets, prompts, and traces.
1095
+
1096
+ ```bash
1097
+ # Create experiment configuration
1098
+ rem experiments create my-evaluation \
1099
+ --agent ask_rem \
1100
+ --evaluator rem-lookup-correctness \
1101
+ --description "Baseline evaluation"
1102
+
1103
+ # Run experiment
1104
+ rem experiments run my-evaluation
1105
+
1106
+ # List experiments
1107
+ rem experiments list
1108
+ rem experiments show my-evaluation
1109
+ ```
1110
+
1111
+ #### `rem experiments dataset` - Dataset Management
1112
+
1113
+ ```bash
1114
+ # Create dataset from CSV
1115
+ rem experiments dataset create rem-lookup-golden \
1116
+ --from-csv golden.csv \
1117
+ --input-keys query \
1118
+ --output-keys expected_label,expected_type
1119
+
1120
+ # Add more examples
1121
+ rem experiments dataset add rem-lookup-golden \
1122
+ --from-csv more-data.csv \
1123
+ --input-keys query \
1124
+ --output-keys expected_label,expected_type
1125
+
1126
+ # List datasets
1127
+ rem experiments dataset list
1128
+ ```
1129
+
1130
+ #### `rem experiments prompt` - Prompt Management
1131
+
1132
+ ```bash
1133
+ # Create agent prompt
1134
+ rem experiments prompt create hello-world \
1135
+ --system-prompt "You are a helpful assistant." \
1136
+ --model-name gpt-4o
1137
+
1138
+ # List prompts
1139
+ rem experiments prompt list
1140
+ ```
1141
+
1142
+ #### `rem experiments trace` - Trace Retrieval
1143
+
1144
+ ```bash
1145
+ # List recent traces
1146
+ rem experiments trace list --project rem-agents --days 7 --limit 50
1147
+ ```
1148
+
1149
+ #### `rem experiments` - Experiment Config
1150
+
1151
+ Manage experiment configurations (A/B testing, parameter sweeps).
1152
+
1153
+ ```bash
1154
+ # Create experiment config
1155
+ rem experiments create \
1156
+ --name cv-parser-test \
1157
+ --description "Test CV parser with different models"
1158
+
1159
+ # List experiments
1160
+ rem experiments list
1161
+
1162
+ # Show experiment details
1163
+ rem experiments show cv-parser-test
1164
+
1165
+ # Run experiment
1166
+ rem experiments run cv-parser-test
1167
+ ```
1168
+
1169
+ ### Interactive Agent
1170
+
1171
+ #### `rem ask` - Test Agent
1172
+
1173
+ Test Pydantic AI agent with natural language queries.
1174
+
1175
+ ```bash
1176
+ # Ask a question
1177
+ rem ask "What documents did Sarah Chen author?"
1178
+
1179
+ # With context headers
1180
+ rem ask "Find all resources about API design" \
1181
+ --user-id user-123 \
1182
+ --tenant-id acme-corp
1183
+
1184
+ # Use specific agent schema
1185
+ rem ask "Analyze this contract" \
1186
+ --agent-schema contract-analyzer-v1
1187
+ ```
1188
+
1189
+ ### Global Options
1190
+
1191
+ All commands support:
1192
+
1193
+ ```bash
1194
+ # Verbose logging
1195
+ rem --verbose <command>
1196
+ rem -v <command>
1197
+
1198
+ # Version
1199
+ rem --version
1200
+
1201
+ # Help
1202
+ rem --help
1203
+ rem <command> --help
1204
+ rem <command> <subcommand> --help
1205
+ ```
1206
+
1207
+ ### Environment Variables
1208
+
1209
+ Override any setting via environment variables:
1210
+
1211
+ ```bash
1212
+ # Database
1213
+ export POSTGRES__CONNECTION_STRING=postgresql://rem:rem@localhost:5432/rem
1214
+ export POSTGRES__POOL_MIN_SIZE=5
1215
+
1216
+ # LLM
1217
+ export LLM__DEFAULT_MODEL=openai:gpt-4o
1218
+ export LLM__OPENAI_API_KEY=sk-...
1219
+ export LLM__ANTHROPIC_API_KEY=sk-ant-...
1220
+
1221
+ # S3
1222
+ export S3__BUCKET_NAME=rem-storage
1223
+ export S3__REGION=us-east-1
1224
+
1225
+ # Server
1226
+ export API__HOST=0.0.0.0
1227
+ export API__PORT=8000
1228
+ export API__RELOAD=true
1229
+
1230
+ # Run command with overrides
1231
+ rem serve
1232
+ ```
1233
+
1234
+ ## Development (For Contributors)
1235
+
1236
+ **Best for**: Contributing to REM or customizing the codebase.
1237
+
1238
+ ### Step 1: Clone Repository
1239
+
1240
+ ```bash
1241
+ git clone https://github.com/mr-saoirse/remstack.git
1242
+ cd remstack/rem
1243
+ ```
1244
+
1245
+ ### Step 2: Start PostgreSQL Only
1246
+
1247
+ ```bash
1248
+ # Start only PostgreSQL (port 5050 for developers, doesn't conflict with package users on 5051)
1249
+ docker compose up postgres -d
1250
+
1251
+ # Verify connection
1252
+ psql -h localhost -p 5050 -U rem -d rem -c "SELECT version();"
1253
+ ```
1254
+
1255
+ ### Step 3: Set Up Development Environment
1256
+
1257
+ ```bash
1258
+ # IMPORTANT: If you previously installed the package and ran `rem configure`,
1259
+ # delete the REM configuration directory to avoid conflicts:
1260
+ rm -rf ~/.rem/
1261
+
1262
+ # Create virtual environment with uv
1263
+ uv venv
1264
+ source .venv/bin/activate # On Windows: .venv\Scripts\activate
1265
+
1266
+ # Install in editable mode with all dependencies
1267
+ uv pip install -e ".[all]"
1268
+
1269
+ # Set LLM API keys
1270
+ export OPENAI_API_KEY="sk-..."
1271
+ export ANTHROPIC_API_KEY="sk-ant-..."
1272
+ export POSTGRES__CONNECTION_STRING="postgresql://rem:rem@localhost:5050/rem"
1273
+
1274
+ # Verify CLI
1275
+ rem --version
1276
+ ```
1277
+
1278
+ ### Step 4: Initialize Database
1279
+
1280
+ ```bash
1281
+ # Apply migrations
1282
+ rem db migrate
1283
+
1284
+ # Verify tables
1285
+ psql -h localhost -p 5050 -U rem -d rem -c "\dt"
1286
+ ```
1287
+
1288
+ ### Step 5: Run API Server (Optional)
1289
+
1290
+ ```bash
1291
+ # Start API server with hot reload
1292
+ uv run python -m rem.api.main
1293
+
1294
+ # API runs on http://localhost:8000
1295
+ ```
1296
+
1297
+ ### Step 6: Run Tests
1298
+
1299
+ ```bash
1300
+ # Run non-LLM tests (fast, no API costs)
1301
+ uv run pytest tests/integration/ -m "not llm" -v
1302
+
1303
+ # Run all tests (uses API credits)
1304
+ uv run pytest tests/integration/ -v
1305
+
1306
+ # Type check (saves report to .mypy/ folder)
1307
+ ../scripts/run_mypy.sh
1308
+ ```
1309
+
1310
+ Type checking reports are saved to `.mypy/report_YYYYMMDD_HHMMSS.txt` (gitignored).
1311
+ Current status: 222 errors in 55 files (as of 2025-11-23).
1312
+
1313
+ ### Environment Variables
1314
+
1315
+ All settings via environment variables with `__` delimiter:
1316
+
1317
+ ```bash
1318
+ # LLM
1319
+ LLM__DEFAULT_MODEL=anthropic:claude-sonnet-4-5-20250929
1320
+ LLM__DEFAULT_TEMPERATURE=0.5
1321
+
1322
+ # Auth (disabled by default)
1323
+ AUTH__ENABLED=false
1324
+ AUTH__OIDC_ISSUER_URL=https://accounts.google.com
1325
+
1326
+ # OTEL (disabled by default for local dev)
1327
+ OTEL__ENABLED=false
1328
+ OTEL__SERVICE_NAME=rem-api
1329
+
1330
+ # Postgres
1331
+ POSTGRES__CONNECTION_STRING=postgresql://rem:rem@localhost:5050/rem
1332
+
1333
+ # S3
1334
+ S3__BUCKET_NAME=rem-storage
1335
+ S3__REGION=us-east-1
1336
+ ```
1337
+
1338
+ ### Production Deployment (Optional)
1339
+
1340
+ For production deployment to AWS EKS with Kubernetes, see the main repository README:
1341
+ - **Infrastructure**: [../../manifests/infra/pulumi/eks-yaml/README.md](../../manifests/infra/pulumi/eks-yaml/README.md)
1342
+ - **Platform**: [../../manifests/platform/README.md](../../manifests/platform/README.md)
1343
+ - **Application**: [../../manifests/application/README.md](../../manifests/application/README.md)
1344
+
1345
+
1346
+ ## REM Query Dialect (AST)
1347
+
1348
+ REM queries follow a structured dialect with formal grammar specification.
1349
+
1350
+ ### Grammar
1351
+
1352
+ ```
1353
+ Query ::= LookupQuery | FuzzyQuery | SearchQuery | SqlQuery | TraverseQuery
1354
+
1355
+ LookupQuery ::= LOOKUP <key:string|list[string]>
1356
+ key : Single entity name or list of entity names (natural language labels)
1357
+ performance : O(1) per key
1358
+ available : Stage 1+
1359
+ examples :
1360
+ - LOOKUP "Sarah"
1361
+ - LOOKUP ["Sarah", "Mike", "Emily"]
1362
+ - LOOKUP "Project Alpha"
1363
+
1364
+ FuzzyQuery ::= FUZZY <text:string> [THRESHOLD <t:float>] [LIMIT <n:int>]
1365
+ text : Search text (partial/misspelled)
1366
+ threshold : Similarity score 0.0-1.0 (default: 0.5)
1367
+ limit : Max results (default: 5)
1368
+ performance : Indexed (pg_trgm)
1369
+ available : Stage 1+
1370
+ example : FUZZY "sara" THRESHOLD 0.5 LIMIT 10
1371
+
1372
+ SearchQuery ::= SEARCH <text:string> [TABLE <table:string>] [WHERE <clause:string>] [LIMIT <n:int>]
1373
+ text : Semantic query text
1374
+ table : Target table (default: "resources")
1375
+ clause : Optional PostgreSQL WHERE clause for hybrid filtering (combines vector + structured)
1376
+ limit : Max results (default: 10)
1377
+ performance : Indexed (pgvector)
1378
+ available : Stage 3+
1379
+ examples :
1380
+ - SEARCH "database migration" TABLE resources LIMIT 10
1381
+ - SEARCH "team discussion" TABLE moments WHERE "moment_type='meeting'" LIMIT 5
1382
+ - SEARCH "project updates" WHERE "created_at >= '2024-01-01'" LIMIT 20
1383
+ - SEARCH "AI research" WHERE "tags @> ARRAY['machine-learning']" LIMIT 10
1384
+
1385
+ Hybrid Query Support: SEARCH combines semantic vector similarity with structured filtering.
1386
+ Use WHERE clause to filter on system fields or entity-specific fields.
1387
+
1388
+ SqlQuery ::= <raw_sql:string>
1389
+ | SQL <table:string> [WHERE <clause:string>] [ORDER BY <order:string>] [LIMIT <n:int>]
1390
+
1391
+ Mode 1 (Raw SQL - Recommended):
1392
+ Any query not starting with a REM keyword (LOOKUP, FUZZY, SEARCH, TRAVERSE) is treated as raw SQL.
1393
+ Allowed: SELECT, INSERT, UPDATE, WITH (read + data modifications)
1394
+ Blocked: DROP, DELETE, TRUNCATE, ALTER (destructive operations)
1395
+
1396
+ Mode 2 (Structured - Legacy):
1397
+ SQL prefix with table + WHERE clause (automatic tenant isolation)
1398
+
1399
+ performance : O(n) with indexes
1400
+ available : Stage 1+
1401
+ dialect : PostgreSQL (full PostgreSQL syntax support)
1402
+
1403
+ examples :
1404
+ # Raw SQL (no prefix needed)
1405
+ - SELECT * FROM resources WHERE created_at > NOW() - INTERVAL '7 days' LIMIT 20
1406
+ - SELECT category, COUNT(*) as count FROM resources GROUP BY category
1407
+ - WITH recent AS (SELECT * FROM resources WHERE created_at > NOW() - INTERVAL '1 day') SELECT * FROM recent
1408
+
1409
+ # Structured SQL (legacy, automatic tenant isolation)
1410
+ - SQL moments WHERE "moment_type='meeting'" ORDER BY starts_timestamp DESC LIMIT 10
1411
+ - SQL resources WHERE "metadata->>'status' = 'published'" LIMIT 20
1412
+
1413
+ PostgreSQL Dialect: Full support for:
1414
+ - JSONB operators (->>, ->, @>, etc.)
1415
+ - Array operators (&&, @>, <@, etc.)
1416
+ - CTEs (WITH clauses)
1417
+ - Advanced filtering and aggregations
1418
+
1419
+ TraverseQuery ::= TRAVERSE [<edge_types:list>] WITH <initial_query:Query> [DEPTH <d:int>] [ORDER BY <order:string>] [LIMIT <n:int>]
1420
+ edge_types : Relationship types to follow (e.g., ["manages", "reports-to"], default: all)
1421
+ initial_query : Starting query (typically LOOKUP)
1422
+ depth : Number of hops (0=PLAN mode, 1=single hop, N=multi-hop, default: 1)
1423
+ order : Order results (default: "edge.created_at DESC")
1424
+ limit : Max nodes (default: 9)
1425
+ performance : O(k) where k = visited nodes
1426
+ available : Stage 3+
1427
+ examples :
1428
+ - TRAVERSE manages WITH LOOKUP "Sally" DEPTH 1
1429
+ - TRAVERSE WITH LOOKUP "Sally" DEPTH 0 (PLAN mode: edge analysis only)
1430
+ - TRAVERSE manages,reports-to WITH LOOKUP "Sarah" DEPTH 2 LIMIT 5
1431
+ ```
1432
+
1433
+ ### Query Availability by Evolution Stage
1434
+
1435
+ | Query Type | Stage 0 | Stage 1 | Stage 2 | Stage 3 | Stage 4 |
1436
+ |------------|---------|---------|---------|---------|---------|
1437
+ | LOOKUP | ✗ | ✓ | ✓ | ✓ | ✓ |
1438
+ | FUZZY | ✗ | ✓ | ✓ | ✓ | ✓ |
1439
+ | SEARCH | ✗ | ✗ | ✗ | ✓ | ✓ |
1440
+ | SQL | ✗ | ✓ | ✓ | ✓ | ✓ |
1441
+ | TRAVERSE | ✗ | ✗ | ✗ | ✓ | ✓ |
1442
+
1443
+ **Stage 0**: No data, all queries fail.
1444
+
1445
+ **Stage 1** (20% answerable): Resources seeded with entity extraction. LOOKUP and FUZZY work for finding entities. SQL works for basic filtering.
1446
+
1447
+ **Stage 2** (50% answerable): Moments extracted. SQL temporal queries work. LOOKUP includes moment entities.
1448
+
1449
+ **Stage 3** (80% answerable): Affinity graph built. SEARCH and TRAVERSE become available. Multi-hop graph queries work.
1450
+
1451
+ **Stage 4** (100% answerable): Mature graph with rich historical data. All query types fully functional with high-quality results.
1452
+
1453
+ ## License
1454
+
1455
+ MIT