remdb 0.3.7__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (187) hide show
  1. rem/__init__.py +2 -0
  2. rem/agentic/README.md +650 -0
  3. rem/agentic/__init__.py +39 -0
  4. rem/agentic/agents/README.md +155 -0
  5. rem/agentic/agents/__init__.py +8 -0
  6. rem/agentic/context.py +148 -0
  7. rem/agentic/context_builder.py +329 -0
  8. rem/agentic/mcp/__init__.py +0 -0
  9. rem/agentic/mcp/tool_wrapper.py +107 -0
  10. rem/agentic/otel/__init__.py +5 -0
  11. rem/agentic/otel/setup.py +151 -0
  12. rem/agentic/providers/phoenix.py +674 -0
  13. rem/agentic/providers/pydantic_ai.py +572 -0
  14. rem/agentic/query.py +117 -0
  15. rem/agentic/query_helper.py +89 -0
  16. rem/agentic/schema.py +396 -0
  17. rem/agentic/serialization.py +245 -0
  18. rem/agentic/tools/__init__.py +5 -0
  19. rem/agentic/tools/rem_tools.py +231 -0
  20. rem/api/README.md +420 -0
  21. rem/api/main.py +324 -0
  22. rem/api/mcp_router/prompts.py +182 -0
  23. rem/api/mcp_router/resources.py +536 -0
  24. rem/api/mcp_router/server.py +213 -0
  25. rem/api/mcp_router/tools.py +584 -0
  26. rem/api/routers/auth.py +229 -0
  27. rem/api/routers/chat/__init__.py +5 -0
  28. rem/api/routers/chat/completions.py +281 -0
  29. rem/api/routers/chat/json_utils.py +76 -0
  30. rem/api/routers/chat/models.py +124 -0
  31. rem/api/routers/chat/streaming.py +185 -0
  32. rem/auth/README.md +258 -0
  33. rem/auth/__init__.py +26 -0
  34. rem/auth/middleware.py +100 -0
  35. rem/auth/providers/__init__.py +13 -0
  36. rem/auth/providers/base.py +376 -0
  37. rem/auth/providers/google.py +163 -0
  38. rem/auth/providers/microsoft.py +237 -0
  39. rem/cli/README.md +455 -0
  40. rem/cli/__init__.py +8 -0
  41. rem/cli/commands/README.md +126 -0
  42. rem/cli/commands/__init__.py +3 -0
  43. rem/cli/commands/ask.py +566 -0
  44. rem/cli/commands/configure.py +497 -0
  45. rem/cli/commands/db.py +493 -0
  46. rem/cli/commands/dreaming.py +324 -0
  47. rem/cli/commands/experiments.py +1302 -0
  48. rem/cli/commands/mcp.py +66 -0
  49. rem/cli/commands/process.py +245 -0
  50. rem/cli/commands/schema.py +183 -0
  51. rem/cli/commands/serve.py +106 -0
  52. rem/cli/dreaming.py +363 -0
  53. rem/cli/main.py +96 -0
  54. rem/config.py +237 -0
  55. rem/mcp_server.py +41 -0
  56. rem/models/core/__init__.py +49 -0
  57. rem/models/core/core_model.py +64 -0
  58. rem/models/core/engram.py +333 -0
  59. rem/models/core/experiment.py +628 -0
  60. rem/models/core/inline_edge.py +132 -0
  61. rem/models/core/rem_query.py +243 -0
  62. rem/models/entities/__init__.py +43 -0
  63. rem/models/entities/file.py +57 -0
  64. rem/models/entities/image_resource.py +88 -0
  65. rem/models/entities/message.py +35 -0
  66. rem/models/entities/moment.py +123 -0
  67. rem/models/entities/ontology.py +191 -0
  68. rem/models/entities/ontology_config.py +131 -0
  69. rem/models/entities/resource.py +95 -0
  70. rem/models/entities/schema.py +87 -0
  71. rem/models/entities/user.py +85 -0
  72. rem/py.typed +0 -0
  73. rem/schemas/README.md +507 -0
  74. rem/schemas/__init__.py +6 -0
  75. rem/schemas/agents/README.md +92 -0
  76. rem/schemas/agents/core/moment-builder.yaml +178 -0
  77. rem/schemas/agents/core/rem-query-agent.yaml +226 -0
  78. rem/schemas/agents/core/resource-affinity-assessor.yaml +99 -0
  79. rem/schemas/agents/core/simple-assistant.yaml +19 -0
  80. rem/schemas/agents/core/user-profile-builder.yaml +163 -0
  81. rem/schemas/agents/examples/contract-analyzer.yaml +317 -0
  82. rem/schemas/agents/examples/contract-extractor.yaml +134 -0
  83. rem/schemas/agents/examples/cv-parser.yaml +263 -0
  84. rem/schemas/agents/examples/hello-world.yaml +37 -0
  85. rem/schemas/agents/examples/query.yaml +54 -0
  86. rem/schemas/agents/examples/simple.yaml +21 -0
  87. rem/schemas/agents/examples/test.yaml +29 -0
  88. rem/schemas/agents/rem.yaml +128 -0
  89. rem/schemas/evaluators/hello-world/default.yaml +77 -0
  90. rem/schemas/evaluators/rem/faithfulness.yaml +219 -0
  91. rem/schemas/evaluators/rem/lookup-correctness.yaml +182 -0
  92. rem/schemas/evaluators/rem/retrieval-precision.yaml +199 -0
  93. rem/schemas/evaluators/rem/retrieval-recall.yaml +211 -0
  94. rem/schemas/evaluators/rem/search-correctness.yaml +192 -0
  95. rem/services/__init__.py +16 -0
  96. rem/services/audio/INTEGRATION.md +308 -0
  97. rem/services/audio/README.md +376 -0
  98. rem/services/audio/__init__.py +15 -0
  99. rem/services/audio/chunker.py +354 -0
  100. rem/services/audio/transcriber.py +259 -0
  101. rem/services/content/README.md +1269 -0
  102. rem/services/content/__init__.py +5 -0
  103. rem/services/content/providers.py +801 -0
  104. rem/services/content/service.py +676 -0
  105. rem/services/dreaming/README.md +230 -0
  106. rem/services/dreaming/__init__.py +53 -0
  107. rem/services/dreaming/affinity_service.py +336 -0
  108. rem/services/dreaming/moment_service.py +264 -0
  109. rem/services/dreaming/ontology_service.py +54 -0
  110. rem/services/dreaming/user_model_service.py +297 -0
  111. rem/services/dreaming/utils.py +39 -0
  112. rem/services/embeddings/__init__.py +11 -0
  113. rem/services/embeddings/api.py +120 -0
  114. rem/services/embeddings/worker.py +421 -0
  115. rem/services/fs/README.md +662 -0
  116. rem/services/fs/__init__.py +62 -0
  117. rem/services/fs/examples.py +206 -0
  118. rem/services/fs/examples_paths.py +204 -0
  119. rem/services/fs/git_provider.py +935 -0
  120. rem/services/fs/local_provider.py +760 -0
  121. rem/services/fs/parsing-hooks-examples.md +172 -0
  122. rem/services/fs/paths.py +276 -0
  123. rem/services/fs/provider.py +460 -0
  124. rem/services/fs/s3_provider.py +1042 -0
  125. rem/services/fs/service.py +186 -0
  126. rem/services/git/README.md +1075 -0
  127. rem/services/git/__init__.py +17 -0
  128. rem/services/git/service.py +469 -0
  129. rem/services/phoenix/EXPERIMENT_DESIGN.md +1146 -0
  130. rem/services/phoenix/README.md +453 -0
  131. rem/services/phoenix/__init__.py +46 -0
  132. rem/services/phoenix/client.py +686 -0
  133. rem/services/phoenix/config.py +88 -0
  134. rem/services/phoenix/prompt_labels.py +477 -0
  135. rem/services/postgres/README.md +575 -0
  136. rem/services/postgres/__init__.py +23 -0
  137. rem/services/postgres/migration_service.py +427 -0
  138. rem/services/postgres/pydantic_to_sqlalchemy.py +232 -0
  139. rem/services/postgres/register_type.py +352 -0
  140. rem/services/postgres/repository.py +337 -0
  141. rem/services/postgres/schema_generator.py +379 -0
  142. rem/services/postgres/service.py +802 -0
  143. rem/services/postgres/sql_builder.py +354 -0
  144. rem/services/rem/README.md +304 -0
  145. rem/services/rem/__init__.py +23 -0
  146. rem/services/rem/exceptions.py +71 -0
  147. rem/services/rem/executor.py +293 -0
  148. rem/services/rem/parser.py +145 -0
  149. rem/services/rem/queries.py +196 -0
  150. rem/services/rem/query.py +371 -0
  151. rem/services/rem/service.py +527 -0
  152. rem/services/session/README.md +374 -0
  153. rem/services/session/__init__.py +6 -0
  154. rem/services/session/compression.py +360 -0
  155. rem/services/session/reload.py +77 -0
  156. rem/settings.py +1235 -0
  157. rem/sql/002_install_models.sql +1068 -0
  158. rem/sql/background_indexes.sql +42 -0
  159. rem/sql/install_models.sql +1038 -0
  160. rem/sql/migrations/001_install.sql +503 -0
  161. rem/sql/migrations/002_install_models.sql +1202 -0
  162. rem/utils/AGENTIC_CHUNKING.md +597 -0
  163. rem/utils/README.md +583 -0
  164. rem/utils/__init__.py +43 -0
  165. rem/utils/agentic_chunking.py +622 -0
  166. rem/utils/batch_ops.py +343 -0
  167. rem/utils/chunking.py +108 -0
  168. rem/utils/clip_embeddings.py +276 -0
  169. rem/utils/dict_utils.py +98 -0
  170. rem/utils/embeddings.py +423 -0
  171. rem/utils/examples/embeddings_example.py +305 -0
  172. rem/utils/examples/sql_types_example.py +202 -0
  173. rem/utils/markdown.py +16 -0
  174. rem/utils/model_helpers.py +236 -0
  175. rem/utils/schema_loader.py +336 -0
  176. rem/utils/sql_types.py +348 -0
  177. rem/utils/user_id.py +81 -0
  178. rem/utils/vision.py +330 -0
  179. rem/workers/README.md +506 -0
  180. rem/workers/__init__.py +5 -0
  181. rem/workers/dreaming.py +502 -0
  182. rem/workers/engram_processor.py +312 -0
  183. rem/workers/sqs_file_processor.py +193 -0
  184. remdb-0.3.7.dist-info/METADATA +1473 -0
  185. remdb-0.3.7.dist-info/RECORD +187 -0
  186. remdb-0.3.7.dist-info/WHEEL +4 -0
  187. remdb-0.3.7.dist-info/entry_points.txt +2 -0
@@ -0,0 +1,1473 @@
1
+ Metadata-Version: 2.4
2
+ Name: remdb
3
+ Version: 0.3.7
4
+ Summary: Resources Entities Moments - Bio-inspired memory system for agentic AI workloads
5
+ Project-URL: Homepage, https://github.com/Percolation-Labs/reminiscent
6
+ Project-URL: Documentation, https://github.com/Percolation-Labs/reminiscent/blob/main/README.md
7
+ Project-URL: Repository, https://github.com/Percolation-Labs/reminiscent
8
+ Project-URL: Issues, https://github.com/Percolation-Labs/reminiscent/issues
9
+ Author-email: mr-saoirse <amartey@gmail.com>
10
+ License: MIT
11
+ Keywords: agents,ai,mcp,memory,postgresql,vector-search
12
+ Classifier: Development Status :: 3 - Alpha
13
+ Classifier: Intended Audience :: Developers
14
+ Classifier: License :: OSI Approved :: MIT License
15
+ Classifier: Programming Language :: Python :: 3.12
16
+ Classifier: Topic :: Scientific/Engineering :: Artificial Intelligence
17
+ Requires-Python: <3.13,>=3.12
18
+ Requires-Dist: aioboto3>=13.0.0
19
+ Requires-Dist: arize-phoenix>=5.0.0
20
+ Requires-Dist: asyncpg>=0.30.0
21
+ Requires-Dist: boto3>=1.35.0
22
+ Requires-Dist: click>=8.1.0
23
+ Requires-Dist: fastapi>=0.115.0
24
+ Requires-Dist: fastmcp>=0.5.0
25
+ Requires-Dist: gitpython>=3.1.45
26
+ Requires-Dist: gmft
27
+ Requires-Dist: hypercorn>=0.17.0
28
+ Requires-Dist: itsdangerous>=2.0.0
29
+ Requires-Dist: json-schema-to-pydantic>=0.2.0
30
+ Requires-Dist: kreuzberg>=3.21.0
31
+ Requires-Dist: loguru>=0.7.0
32
+ Requires-Dist: openinference-instrumentation-pydantic-ai>=0.1.0
33
+ Requires-Dist: opentelemetry-api>=1.28.0
34
+ Requires-Dist: opentelemetry-exporter-otlp-proto-grpc>=1.28.0
35
+ Requires-Dist: opentelemetry-exporter-otlp-proto-http>=1.28.0
36
+ Requires-Dist: opentelemetry-exporter-otlp>=1.28.0
37
+ Requires-Dist: opentelemetry-instrumentation-fastapi>=0.49b0
38
+ Requires-Dist: opentelemetry-instrumentation>=0.49b0
39
+ Requires-Dist: opentelemetry-sdk>=1.28.0
40
+ Requires-Dist: psycopg[binary]>=3.2.0
41
+ Requires-Dist: pydantic-ai>=0.0.14
42
+ Requires-Dist: pydantic-settings>=2.6.0
43
+ Requires-Dist: pydantic>=2.10.0
44
+ Requires-Dist: pydub>=0.25.0
45
+ Requires-Dist: python-dotenv>=1.0.0
46
+ Requires-Dist: pyyaml>=6.0.0
47
+ Requires-Dist: requests>=2.32.0
48
+ Requires-Dist: semchunk>=2.2.0
49
+ Requires-Dist: tenacity>=9.0.0
50
+ Requires-Dist: tiktoken>=0.5.0
51
+ Requires-Dist: torch>=2.0.0
52
+ Requires-Dist: uvicorn[standard]>=0.32.0
53
+ Provides-Extra: all
54
+ Requires-Dist: ipdb>=0.13.0; extra == 'all'
55
+ Requires-Dist: ipython>=8.29.0; extra == 'all'
56
+ Requires-Dist: json-schema-to-pydantic>=0.2.0; extra == 'all'
57
+ Requires-Dist: mypy>=1.13.0; extra == 'all'
58
+ Requires-Dist: pandas-stubs>=2.0.0; extra == 'all'
59
+ Requires-Dist: pillow>=10.0.0; extra == 'all'
60
+ Requires-Dist: polars>=1.0.0; extra == 'all'
61
+ Requires-Dist: pydub>=0.25.0; extra == 'all'
62
+ Requires-Dist: pytest-asyncio>=0.24.0; extra == 'all'
63
+ Requires-Dist: pytest-cov>=6.0.0; extra == 'all'
64
+ Requires-Dist: pytest-mock>=3.14.0; extra == 'all'
65
+ Requires-Dist: pytest>=8.0.0; extra == 'all'
66
+ Requires-Dist: ruff>=0.8.0; extra == 'all'
67
+ Requires-Dist: types-pyyaml>=6.0.0; extra == 'all'
68
+ Provides-Extra: audio
69
+ Requires-Dist: pydub>=0.25.0; extra == 'audio'
70
+ Provides-Extra: dev
71
+ Requires-Dist: ipdb>=0.13.0; extra == 'dev'
72
+ Requires-Dist: ipython>=8.29.0; extra == 'dev'
73
+ Requires-Dist: mypy>=1.13.0; extra == 'dev'
74
+ Requires-Dist: pandas-stubs>=2.0.0; extra == 'dev'
75
+ Requires-Dist: pytest-asyncio>=0.24.0; extra == 'dev'
76
+ Requires-Dist: pytest-cov>=6.0.0; extra == 'dev'
77
+ Requires-Dist: pytest-mock>=3.14.0; extra == 'dev'
78
+ Requires-Dist: pytest>=8.0.0; extra == 'dev'
79
+ Requires-Dist: ruff>=0.8.0; extra == 'dev'
80
+ Requires-Dist: types-pyyaml>=6.0.0; extra == 'dev'
81
+ Provides-Extra: fs
82
+ Requires-Dist: pillow>=10.0.0; extra == 'fs'
83
+ Requires-Dist: polars>=1.0.0; extra == 'fs'
84
+ Requires-Dist: pydub>=0.25.0; extra == 'fs'
85
+ Provides-Extra: schema
86
+ Requires-Dist: json-schema-to-pydantic>=0.2.0; extra == 'schema'
87
+ Description-Content-Type: text/markdown
88
+
89
+ # REM - Resources Entities Moments
90
+
91
+ Cloud-native unified memory infrastructure for agentic AI systems built with Pydantic AI, FastAPI, and FastMCP.
92
+
93
+ ## Architecture Overview
94
+
95
+ <p align="center">
96
+ <img src="https://mermaid.ink/img/Z3JhcGggVEQKICAgIEFQSVtGYXN0QVBJPGJyLz5DaGF0ICsgTUNQXSAtLT4gQUdFTlRTW0pTT04gU2NoZW1hPGJyLz5BZ2VudHNdCiAgICBBR0VOVFMgLS0-IFRPT0xTW01DUCBUb29sczxici8-NSBUb29sc10KCiAgICBUT09MUyAtLT4gUVVFUllbUkVNIFF1ZXJ5PGJyLz5EaWFsZWN0XQogICAgUVVFUlkgLS0-IERCWyhQb3N0Z3JlU1FMPGJyLz4rcGd2ZWN0b3IpXQoKICAgIEZJTEVTW0ZpbGUgUHJvY2Vzc29yXSAtLT4gRFJFQU1bRHJlYW1pbmc8YnIvPldvcmtlcnNdCiAgICBEUkVBTSAtLT4gREIKCiAgICBBR0VOVFMgLS0-IE9URUxbT3BlblRlbGVtZXRyeV0KICAgIE9URUwgLS0-IFBIT0VOSVhbQXJpemU8YnIvPlBob2VuaXhdCgogICAgRVZBTFtFdmFsdWF0aW9uPGJyLz5GcmFtZXdvcmtdIC0tPiBQSE9FTklYCgogICAgY2xhc3NEZWYgYXBpIGZpbGw6IzRBOTBFMixzdHJva2U6IzJFNUM4QSxjb2xvcjojZmZmCiAgICBjbGFzc0RlZiBhZ2VudCBmaWxsOiM3QjY4RUUsc3Ryb2tlOiM0ODNEOEIsY29sb3I6I2ZmZgogICAgY2xhc3NEZWYgZGIgZmlsbDojNTBDODc4LHN0cm9rZTojMkU3RDRFLGNvbG9yOiNmZmYKICAgIGNsYXNzRGVmIG9icyBmaWxsOiM5QjU5QjYsc3Ryb2tlOiM2QzM0ODMsY29sb3I6I2ZmZgoKICAgIGNsYXNzIEFQSSxUT09MUyBhcGkKICAgIGNsYXNzIEFHRU5UUyBhZ2VudAogICAgY2xhc3MgREIsUVVFUlkgZGIKICAgIGNsYXNzIE9URUwsUEhPRU5JWCxFVkFMIG9icwo=" alt="REM Architecture" width="700">
97
+ </p>
98
+
99
+ **Key Components:**
100
+
101
+ - **API Layer**: OpenAI-compatible chat completions + MCP server (not separate deployments)
102
+ - **Agentic Framework**: JSON Schema-based agents with no-code configuration
103
+ - **Database Layer**: PostgreSQL 18 with pgvector for multi-index memory (KV + Vector + Graph)
104
+ - **REM Query Dialect**: Custom query language with O(1) lookups, semantic search, graph traversal
105
+ - **Ingestion & Dreaming**: Background workers for content extraction and progressive index enrichment (0% → 100% answerable)
106
+ - **Observability & Evals**: OpenTelemetry tracing + Arize Phoenix + LLM-as-a-Judge evaluation framework
107
+
108
+ ## Features
109
+
110
+ | Feature | Description | Benefits |
111
+ |---------|-------------|----------|
112
+ | **OpenAI-Compatible Chat API** | Drop-in replacement for OpenAI chat completions API with streaming support | Use with existing OpenAI clients, switch models across providers (OpenAI, Anthropic, etc.) |
113
+ | **Built-in MCP Server** | FastMCP server with 4 tools + 3 resources for memory operations | Export memory to Claude Desktop, Cursor, or any MCP-compatible host |
114
+ | **REM Query Engine** | Multi-index query system (LOOKUP, FUZZY, SEARCH, SQL, TRAVERSE) with custom dialect | O(1) lookups, semantic search, graph traversal - all tenant-isolated |
115
+ | **Dreaming Workers** | Background workers for entity extraction, moment generation, and affinity matching | Automatic knowledge graph construction from resources (0% → 100% query answerable) |
116
+ | **PostgreSQL + pgvector** | CloudNativePG with PostgreSQL 18, pgvector extension, streaming replication | Production-ready vector search, no external vector DB needed |
117
+ | **AWS EKS Recipe** | Complete infrastructure-as-code with Pulumi, Karpenter, ArgoCD | Deploy to production EKS in minutes with auto-scaling and GitOps |
118
+ | **JSON Schema Agents** | Dynamic agent creation from YAML schemas via Pydantic AI factory | Define agents declaratively, version control schemas, load dynamically |
119
+ | **Content Providers** | Audio transcription (Whisper), vision (GPT-4V, Claude), PDFs, DOCX, images | Multimodal ingestion out of the box with format detection |
120
+ | **Configurable Embeddings** | Provider-agnostic embedding system (OpenAI, Cohere, Jina) | Switch embedding providers via env vars, no code changes |
121
+ | **Multi-Tenancy** | Tenant isolation at database level with automatic scoping | SaaS-ready with complete data separation per tenant |
122
+ | **Streaming Everything** | SSE for chat, background workers for embeddings, async throughout | Real-time responses, non-blocking operations, scalable |
123
+ | **Zero Vendor Lock-in** | Raw HTTP clients (no OpenAI SDK), swappable providers, open standards | Not tied to any vendor, easy to migrate, full control |
124
+
125
+ ## Quick Start
126
+
127
+ Choose your path:
128
+
129
+ - **Option 1: Package Users with Example Data** (Recommended for first-time users) - PyPI + example datasets
130
+ - **Option 2: Package Users** (Recommended for non-developers) - PyPI package + dockerized database
131
+ - **Option 3: Developers** - Clone repo, local development with uv
132
+
133
+ ---
134
+
135
+ ## Option 1: Package Users with Example Data (Recommended)
136
+
137
+ **Best for**: First-time users who want to explore REM with curated example datasets.
138
+
139
+ ```bash
140
+ # Install system dependencies
141
+ # macOS:
142
+ brew install tesseract
143
+
144
+ # Linux:
145
+ sudo apt-get install tesseract-ocr
146
+
147
+ # Install remdb
148
+ pip install remdb[all]
149
+
150
+ # Clone example datasets
151
+ git clone https://github.com/Percolation-Labs/remstack-lab.git
152
+ cd remstack-lab
153
+
154
+ # Configure REM (interactive wizard)
155
+ rem configure --install
156
+
157
+ # Start PostgreSQL
158
+ docker run -d \
159
+ --name rem-postgres \
160
+ -e POSTGRES_USER=rem \
161
+ -e POSTGRES_PASSWORD=rem \
162
+ -e POSTGRES_DB=rem \
163
+ -p 5050:5432 \
164
+ pgvector/pgvector:pg18
165
+
166
+ # Load quickstart dataset
167
+ rem db load datasets/quickstart/sample_data.yaml --user-id demo-user
168
+
169
+ # Optional: Set default LLM provider via environment variable
170
+ # export LLM__DEFAULT_MODEL="openai:gpt-4.1-nano" # Fast and cheap
171
+ # export LLM__DEFAULT_MODEL="anthropic:claude-sonnet-4-5-20250929" # High quality (default)
172
+
173
+ # Ask questions
174
+ rem ask --user-id demo-user "What documents exist in the system?"
175
+ rem ask --user-id demo-user "Show me meetings about API design"
176
+
177
+ # Ingest files (PDF, DOCX, images, etc.) - note: requires remstack-lab
178
+ rem process ingest datasets/formats/files/bitcoin_whitepaper.pdf --user-id demo-user --category research --tags bitcoin,whitepaper
179
+
180
+ # Query ingested content
181
+ rem ask --user-id demo-user "What is the Bitcoin whitepaper about?"
182
+
183
+ # Try other datasets
184
+ rem db load --file datasets/domains/recruitment/scenarios/candidate_pipeline/data.yaml --user-id my-company
185
+ rem ask --user-id my-company "Show me candidates with Python experience"
186
+ ```
187
+
188
+ **What you get:**
189
+ - Quickstart: 3 users, 3 resources, 3 moments, 4 messages
190
+ - Domain datasets: recruitment, legal, enterprise, misc
191
+ - Format examples: engrams, documents, conversations, files
192
+ - Jupyter notebooks and experiments
193
+
194
+ **Learn more**: [remstack-lab repository](https://github.com/Percolation-Labs/remstack-lab)
195
+
196
+ ---
197
+
198
+ ## Option 2: Package Users (No Example Data)
199
+
200
+ **Best for**: Using REM as a service (API + CLI) without modifying code, bringing your own data.
201
+
202
+ ### Step 1: Start Database and API with Docker Compose
203
+
204
+ ```bash
205
+ # Create a project directory
206
+ mkdir my-rem-project && cd my-rem-project
207
+
208
+ # Download docker-compose file from public gist
209
+ curl -O https://gist.githubusercontent.com/percolating-sirsh/d117b673bc0edfdef1a5068ccd3cf3e5/raw/docker-compose.prebuilt.yml
210
+
211
+ # IMPORTANT: Export API keys BEFORE running docker compose
212
+ # Docker Compose reads env vars at startup - exporting them after won't work!
213
+
214
+ # Required: OpenAI for embeddings (text-embedding-3-small)
215
+ export OPENAI_API_KEY="sk-..."
216
+
217
+ # Recommended: At least one chat completion provider
218
+ export ANTHROPIC_API_KEY="sk-ant-..." # Claude Sonnet 4.5 (high quality)
219
+ export CEREBRAS_API_KEY="csk-..." # Cerebras (fast, cheap inference)
220
+
221
+ # Start PostgreSQL + API
222
+ docker compose -f docker-compose.prebuilt.yml up -d
223
+
224
+ # Verify services are running
225
+ curl http://localhost:8000/health
226
+ ```
227
+
228
+ This starts:
229
+ - **PostgreSQL** with pgvector on port **5051** (connection: `postgresql://rem:rem@localhost:5051/rem`)
230
+ - **REM API** on port **8000** with OpenAI-compatible chat completions + MCP server
231
+ - Uses pre-built Docker image from Docker Hub (no local build required)
232
+
233
+ ### Step 2: Install and Configure CLI (REQUIRED)
234
+
235
+ **This step is required** before you can use REM - it installs the database schema and configures your LLM API keys.
236
+
237
+ ```bash
238
+ # Install remdb package from PyPI
239
+ pip install remdb[all]
240
+
241
+ # Configure REM (defaults to port 5051 for package users)
242
+ rem configure --install --claude-desktop
243
+ ```
244
+
245
+ The interactive wizard will:
246
+ 1. **Configure PostgreSQL**: Defaults to `postgresql://rem:rem@localhost:5051/rem` (prebuilt docker-compose)
247
+ - Just press Enter to accept defaults
248
+ - Custom database: Enter your own host/port/credentials
249
+ 2. **Configure LLM providers**: Enter your OpenAI/Anthropic API keys
250
+ 3. **Install database tables**: Creates schema, functions, indexes (**required for CLI/API to work**)
251
+ 4. **Register with Claude Desktop**: Adds REM MCP server to Claude
252
+
253
+ Configuration saved to `~/.rem/config.yaml` (can edit with `rem configure --edit`)
254
+
255
+ **Port Guide:**
256
+ - **5051**: Package users with `docker-compose.prebuilt.yml` (pre-built image)
257
+ - **5050**: Developers with `docker-compose.yml` (local build)
258
+ - **Custom**: Your own PostgreSQL database
259
+
260
+ **Next Steps:**
261
+ - See [CLI Reference](#cli-reference) for all available commands
262
+ - See [REM Query Dialect](#rem-query-dialect) for query examples
263
+ - See [API Endpoints](#api-endpoints) for OpenAI-compatible API usage
264
+
265
+ ### Step 3: Load Sample Data (Optional but Recommended)
266
+
267
+ **Option A: Clone example datasets** (Recommended - works with all README examples)
268
+
269
+ ```bash
270
+ # Clone datasets repository
271
+ git clone https://github.com/Percolation-Labs/remstack-lab.git
272
+
273
+ # Load quickstart dataset
274
+ rem db load --file remstack-lab/datasets/quickstart/sample_data.yaml --user-id demo-user
275
+
276
+ # Test with sample queries
277
+ rem ask --user-id demo-user "What documents exist in the system?"
278
+ rem ask --user-id demo-user "Show me meetings about API design"
279
+ rem ask --user-id demo-user "Who is Sarah Chen?"
280
+
281
+ # Try domain-specific datasets
282
+ rem db load --file remstack-lab/datasets/domains/recruitment/scenarios/candidate_pipeline/data.yaml --user-id my-company
283
+ rem ask --user-id my-company "Show me candidates with Python experience"
284
+ ```
285
+
286
+ **Option B: Bring your own data**
287
+
288
+ ```bash
289
+ # Ingest your own files
290
+ echo "REM is a bio-inspired memory system for agentic AI workloads." > test-doc.txt
291
+ rem process ingest test-doc.txt --user-id test-user --category documentation --tags rem,ai
292
+
293
+ # Query your ingested data
294
+ rem ask --user-id test-user "What do you know about REM from my knowledge base?"
295
+ ```
296
+
297
+ ### Step 4: Test the API
298
+
299
+ ```bash
300
+ # Test the OpenAI-compatible chat completions API
301
+ curl -X POST http://localhost:8000/api/v1/chat/completions \
302
+ -H "Content-Type: application/json" \
303
+ -H "X-User-Id: demo-user" \
304
+ -d '{
305
+ "model": "anthropic:claude-sonnet-4-5-20250929",
306
+ "messages": [{"role": "user", "content": "What documents did Sarah Chen author?"}],
307
+ "stream": false
308
+ }'
309
+ ```
310
+
311
+ **Available Commands:**
312
+ - `rem ask` - Natural language queries to REM
313
+ - `rem process ingest <file>` - Full ingestion pipeline (storage + parsing + embedding + database)
314
+ - `rem process uri <file>` - READ-ONLY parsing (no database storage, useful for testing parsers)
315
+ - `rem db load --file <yaml>` - Load structured datasets directly
316
+
317
+ ## Example Datasets
318
+
319
+ 🎯 **Recommended**: Clone [remstack-lab](https://github.com/Percolation-Labs/remstack-lab) for curated datasets organized by domain and format.
320
+
321
+ **What's included:**
322
+ - **Quickstart**: Minimal dataset (3 users, 3 resources, 3 moments) - perfect for first-time users
323
+ - **Domains**: Recruitment (CV parsing), Legal (contracts), Enterprise (team collaboration)
324
+ - **Formats**: Engrams (voice memos), Documents (markdown/PDF), Conversations (chat logs)
325
+ - **Evaluation**: Golden datasets for Phoenix-based agent testing
326
+
327
+ **Working from remstack-lab:**
328
+ ```bash
329
+ cd remstack-lab
330
+
331
+ # Load any dataset
332
+ rem db load --file datasets/quickstart/sample_data.yaml --user-id demo-user
333
+
334
+ # Explore formats
335
+ rem db load --file datasets/formats/engrams/scenarios/team_meeting/team_standup_meeting.yaml --user-id demo-user
336
+
337
+ # Try domain-specific examples
338
+ rem db load --file datasets/domains/recruitment/scenarios/candidate_pipeline/data.yaml --user-id acme-corp
339
+ ```
340
+
341
+ ## See Also
342
+
343
+ - [REM Query Dialect](#rem-query-dialect) - LOOKUP, SEARCH, TRAVERSE, SQL query types
344
+ - [API Endpoints](#api-endpoints) - OpenAI-compatible chat completions, MCP server
345
+ - [CLI Reference](#cli-reference) - Complete command-line interface documentation
346
+ - [Bring Your Own Agent](#bring-your-own-agent) - Create custom agents with your own prompts and tools
347
+ - [Production Deployment](#production-deployment) - AWS EKS with Kubernetes
348
+ - [Example Datasets](https://github.com/Percolation-Labs/remstack-lab) - Curated datasets by domain and format
349
+
350
+ ---
351
+
352
+ ## Bring Your Own Agent
353
+
354
+ REM allows you to create **custom agents** with your own system prompts, tools, and output schemas. Custom agents are stored in the database and dynamically loaded when referenced, enabling **no-code agent creation** without modifying the codebase.
355
+
356
+ ### How It Works
357
+
358
+ 1. **Define Agent Schema** - Create a YAML file with your agent's prompt, tools, and output structure
359
+ 2. **Ingest Schema** - Use `rem process ingest` to store the schema in the database
360
+ 3. **Use Your Agent** - Reference your agent by name with `rem ask <agent-name> "query"`
361
+
362
+ When you run `rem ask my-agent "query"`, REM:
363
+ 1. Checks if `my-agent` exists in the filesystem (`schemas/agents/`)
364
+ 2. If not found, performs a **LOOKUP** query on the `schemas` table in the database
365
+ 3. Loads the schema dynamically and creates a Pydantic AI agent
366
+ 4. Runs your query with the custom agent
367
+
368
+ ### Expected Behavior
369
+
370
+ **Schema Ingestion Flow** (`rem process ingest my-agent.yaml`):
371
+ - Parse YAML file to extract JSON Schema content
372
+ - Extract `json_schema_extra.kind` field → maps to `category` column
373
+ - Extract `json_schema_extra.provider_configs` → stores provider configurations
374
+ - Extract `json_schema_extra.embedding_fields` → stores semantic search fields
375
+ - Create `Schema` entity in `schemas` table with `user_id` scoping
376
+ - Schema is now queryable via `LOOKUP "my-agent" FROM schemas`
377
+
378
+ **Agent Loading Flow** (`rem ask my-agent "query"`):
379
+ 1. `load_agent_schema("my-agent")` checks filesystem cache → miss
380
+ 2. Falls back to database: `LOOKUP "my-agent" FROM schemas WHERE user_id = '<user-id>'`
381
+ 3. Returns `Schema.spec` (JSON Schema dict) from database
382
+ 4. `create_agent()` factory creates Pydantic AI agent from schema
383
+ 5. Agent runs with tools specified in `json_schema_extra.tools`
384
+ 6. Returns structured output defined in `properties` field
385
+
386
+ ### Quick Example
387
+
388
+ **Step 1: Create Agent Schema** (`my-research-assistant.yaml`)
389
+
390
+ ```yaml
391
+ type: object
392
+ description: |
393
+ You are a research assistant that helps users find and analyze documents.
394
+
395
+ Use the search_rem tool to find relevant documents, then analyze and summarize them.
396
+ Be concise and cite specific documents in your responses.
397
+
398
+ properties:
399
+ summary:
400
+ type: string
401
+ description: A concise summary of findings
402
+ sources:
403
+ type: array
404
+ items:
405
+ type: string
406
+ description: List of document labels referenced
407
+
408
+ required:
409
+ - summary
410
+ - sources
411
+
412
+ json_schema_extra:
413
+ kind: agent
414
+ name: research-assistant
415
+ version: 1.0.0
416
+ tools:
417
+ - search_rem
418
+ - ask_rem_agent
419
+ resources: []
420
+ ```
421
+
422
+ **For more examples**, see:
423
+ - Simple agent (no tools): `src/rem/schemas/agents/examples/simple.yaml`
424
+ - Agent with REM tools: `src/rem/schemas/agents/core/rem-query-agent.yaml`
425
+ - Ontology extractor: `src/rem/schemas/agents/examples/cv-parser.yaml`
426
+
427
+ **Step 2: Ingest Schema into Database**
428
+
429
+ ```bash
430
+ # Ingest the schema (stores in database schemas table)
431
+ rem process ingest my-research-assistant.yaml \
432
+ --user-id my-user \
433
+ --category agents \
434
+ --tags custom,research
435
+
436
+ # Verify schema is in database (should show schema details)
437
+ rem ask "LOOKUP 'my-research-assistant' FROM schemas" --user-id my-user
438
+ ```
439
+
440
+ **Step 3: Use Your Custom Agent**
441
+
442
+ ```bash
443
+ # Run a query with your custom agent
444
+ rem ask research-assistant "Find documents about machine learning architecture" \
445
+ --user-id my-user
446
+
447
+ # With streaming
448
+ rem ask research-assistant "Summarize recent API design documents" \
449
+ --user-id my-user \
450
+ --stream
451
+
452
+ # With session continuity
453
+ rem ask research-assistant "What did we discuss about ML?" \
454
+ --user-id my-user \
455
+ --session-id abc-123
456
+ ```
457
+
458
+ ### Agent Schema Structure
459
+
460
+ Every agent schema must include:
461
+
462
+ **Required Fields:**
463
+ - `type: object` - JSON Schema type (always "object")
464
+ - `description` - System prompt with instructions for the agent
465
+ - `properties` - Output schema defining structured response fields
466
+
467
+ **Optional Metadata** (`json_schema_extra`):
468
+ - `kind` - Agent category ("agent", "evaluator", etc.) → maps to `Schema.category`
469
+ - `name` - Agent identifier (used for LOOKUP)
470
+ - `version` - Semantic version (e.g., "1.0.0")
471
+ - `tools` - List of MCP tools to load (e.g., `["search_rem", "lookup_rem"]`)
472
+ - `resources` - List of MCP resources to expose (e.g., `["user_profile"]`)
473
+ - `provider_configs` - Multi-provider testing configurations (for ontology extractors)
474
+ - `embedding_fields` - Fields to embed for semantic search (for ontology extractors)
475
+
476
+ ### Available MCP Tools
477
+
478
+ REM provides **4 built-in MCP tools** your agents can use:
479
+
480
+ | Tool | Purpose | Parameters |
481
+ |------|---------|------------|
482
+ | `search_rem` | Execute REM queries (LOOKUP, FUZZY, SEARCH, SQL, TRAVERSE) | `query_type`, `entity_key`, `query_text`, `table`, `sql_query`, `initial_query`, `edge_types`, `depth` |
483
+ | `ask_rem_agent` | Natural language to REM query via agent-driven reasoning | `query`, `agent_schema`, `agent_version` |
484
+ | `ingest_into_rem` | Full file ingestion pipeline (read → store → parse → chunk → embed) | `file_uri`, `category`, `tags`, `is_local_server` |
485
+ | `read_resource` | Access MCP resources (schemas, status) for Claude Desktop | `uri` |
486
+
487
+ **Tool Reference**: Tools are defined in `src/rem/api/mcp_router/tools.py`
488
+
489
+ **Note**: `search_rem` is a unified tool that handles all REM query types via the `query_type` parameter:
490
+ - `query_type="lookup"` - O(1) entity lookup by label
491
+ - `query_type="fuzzy"` - Fuzzy text matching with similarity threshold
492
+ - `query_type="search"` - Semantic vector search (table-specific)
493
+ - `query_type="sql"` - Direct SQL queries (WHERE clause)
494
+ - `query_type="traverse"` - Graph traversal with depth control
495
+
496
+ ### Multi-User Isolation
497
+
498
+ Custom agents are **scoped by `user_id`**, ensuring complete data isolation:
499
+
500
+ ```bash
501
+ # User A creates a custom agent
502
+ rem process ingest my-agent.yaml --user-id user-a --category agents
503
+
504
+ # User B cannot see User A's agent
505
+ rem ask my-agent "test" --user-id user-b
506
+ # ❌ Error: Schema not found (LOOKUP returns no results for user-b)
507
+
508
+ # User A can use their agent
509
+ rem ask my-agent "test" --user-id user-a
510
+ # ✅ Works - LOOKUP finds schema for user-a
511
+ ```
512
+
513
+ ### Advanced: Ontology Extractors
514
+
515
+ Custom agents can also be used as **ontology extractors** to extract structured knowledge from files. See [CLAUDE.md](../CLAUDE.md#ontology-extraction-pattern) for details on:
516
+ - Multi-provider testing (`provider_configs`)
517
+ - Semantic search configuration (`embedding_fields`)
518
+ - File matching rules (`OntologyConfig`)
519
+ - Dreaming workflow integration
520
+
521
+ ### Troubleshooting
522
+
523
+ **Schema not found error:**
524
+ ```bash
525
+ # Check if schema was ingested correctly
526
+ rem ask "SEARCH 'my-agent' FROM schemas" --user-id my-user
527
+
528
+ # List all schemas for your user
529
+ rem ask "SELECT name, category, created_at FROM schemas ORDER BY created_at DESC LIMIT 10" --user-id my-user
530
+ ```
531
+
532
+ **Agent not loading tools:**
533
+ - Verify `json_schema_extra.tools` lists correct tool names
534
+ - Valid tool names: `search_rem`, `ask_rem_agent`, `ingest_into_rem`, `read_resource`
535
+ - Check MCP tool names in `src/rem/api/mcp_router/tools.py`
536
+ - Tools are case-sensitive: use `search_rem`, not `Search_REM`
537
+
538
+ **Agent not returning structured output:**
539
+ - Ensure `properties` field defines all expected output fields
540
+ - Use `required` field to mark mandatory fields
541
+ - Check agent response with `--stream` disabled to see full JSON output
542
+
543
+ ---
544
+
545
+ ## REM Query Dialect
546
+
547
+ REM provides a custom query language designed for **LLM-driven iterated retrieval** with performance guarantees.
548
+
549
+ ### Design Philosophy
550
+
551
+ Unlike traditional single-shot SQL queries, the REM dialect is optimized for **multi-turn exploration** where LLMs participate in query planning:
552
+
553
+ - **Iterated Queries**: Queries return partial results that LLMs use to refine subsequent queries
554
+ - **Composable WITH Syntax**: Chain operations together (e.g., `TRAVERSE FROM ... WITH LOOKUP "..."`)
555
+ - **Mixed Indexes**: Combines exact lookups (O(1)), semantic search (vector), and graph traversal
556
+ - **Query Planner Participation**: Results include metadata for LLMs to decide next steps
557
+
558
+ **Example Multi-Turn Flow**:
559
+ ```
560
+ Turn 1: LOOKUP "sarah-chen" → Returns entity + available edge types
561
+ Turn 2: TRAVERSE FROM "sarah-chen" TYPE "authored_by" DEPTH 1 → Returns connected documents
562
+ Turn 3: SEARCH "architecture decisions" WITH TRAVERSE FROM "sarah-chen" → Combines semantic + graph
563
+ ```
564
+
565
+ This enables LLMs to **progressively build context** rather than requiring perfect queries upfront.
566
+
567
+ See [REM Query Dialect (AST)](#rem-query-dialect-ast) for complete grammar specification.
568
+
569
+ ### Query Types
570
+
571
+ #### `LOOKUP` - O(1) Exact Label Lookup
572
+
573
+ Fast exact match on entity labels (natural language identifiers, not UUIDs).
574
+
575
+ ```sql
576
+ LOOKUP "sarah-chen" FROM resources
577
+ LOOKUP "api-design-v2" FROM resources WHERE category = "projects"
578
+ ```
579
+
580
+ **Performance**: O(1) - indexed on `label` column
581
+ **Returns**: Single entity or null
582
+ **Use case**: Fetch specific known entities by human-readable name
583
+
584
+ #### `FUZZY` - Fuzzy Text Search
585
+
586
+ Fuzzy matching for partial names or misspellings using PostgreSQL trigram similarity.
587
+
588
+ ```sql
589
+ FUZZY "sara" FROM resources LIMIT 10
590
+ FUZZY "api desgin" FROM resources THRESHOLD 0.3 LIMIT 5
591
+ ```
592
+
593
+ **Performance**: O(n) with pg_trgm GIN index (fast for small-medium datasets)
594
+ **Returns**: Ranked list by similarity score
595
+ **Use case**: Handle typos, partial names, or when exact label is unknown
596
+
597
+ #### `SEARCH` - Semantic Vector Search
598
+
599
+ Semantic search using pgvector embeddings with cosine similarity.
600
+
601
+ ```sql
602
+ SEARCH "machine learning architecture" FROM resources LIMIT 10
603
+ SEARCH "contract disputes" FROM resources WHERE tags @> ARRAY['legal'] LIMIT 5
604
+ ```
605
+
606
+ **Performance**: O(log n) with HNSW index
607
+ **Returns**: Ranked list of semantically similar entities
608
+ **Use case**: Find conceptually related content without exact keyword matches
609
+
610
+ #### `TRAVERSE` - Recursive Graph Traversal
611
+
612
+ Follow `graph_edges` relationships across the knowledge graph.
613
+
614
+ ```sql
615
+ TRAVERSE FROM "sarah-chen" TYPE "authored_by" DEPTH 2
616
+ TRAVERSE FROM "api-design-v2" TYPE "references,depends_on" DEPTH 3
617
+ ```
618
+
619
+ **Features**:
620
+ - **Polymorphic**: Seamlessly traverses `resources`, `moments`, `users` via `all_graph_edges` view
621
+ - **Filtering**: Filter by one or multiple edge types (comma-separated)
622
+ - **Depth Control**: Configurable recursion depth (default: 2)
623
+ - **Data Model**: Requires `InlineEdge` JSON structure in `graph_edges` column
624
+
625
+ **Returns**: Graph of connected entities with edge metadata
626
+ **Use case**: Explore relationships, find connected entities, build context
627
+
628
+ #### Direct SQL Queries
629
+
630
+ Raw SQL for complex temporal, aggregation, or custom queries.
631
+
632
+ ```sql
633
+ SELECT * FROM resources WHERE created_at > NOW() - INTERVAL '7 days' ORDER BY created_at DESC LIMIT 20
634
+ SELECT category, COUNT(*) as count FROM resources GROUP BY category
635
+ WITH recent AS (SELECT * FROM resources WHERE created_at > NOW() - INTERVAL '1 day') SELECT * FROM recent
636
+ ```
637
+
638
+ **Performance**: Depends on query and indexes
639
+ **Returns**: Raw query results
640
+ **Use case**: Complex filtering, aggregations, temporal queries
641
+ **Allowed**: SELECT, INSERT, UPDATE, WITH (read + data modifications)
642
+ **Blocked**: DROP, DELETE, TRUNCATE, ALTER (destructive operations)
643
+ **Note**: Can be used standalone or with `WITH` syntax for composition
644
+
645
+ ### Graph Edge Format
646
+
647
+ Edges stored inline using `InlineEdge` pattern with human-readable destination labels.
648
+
649
+ ```json
650
+ {
651
+ "dst": "sarah-chen",
652
+ "rel_type": "authored_by",
653
+ "weight": 1.0,
654
+ "properties": {
655
+ "dst_entity_type": "users:engineers/sarah-chen",
656
+ "created_at": "2025-01-15T10:30:00Z"
657
+ }
658
+ }
659
+ ```
660
+
661
+ **Destination Entity Type Convention** (`properties.dst_entity_type`):
662
+
663
+ Format: `<table_schema>:<category>/<key>`
664
+
665
+ Examples:
666
+ - `"resources:managers/bob"` → Look up bob in resources table with category="managers"
667
+ - `"users:engineers/sarah-chen"` → Look up sarah-chen in users table
668
+ - `"moments:meetings/standup-2024-01"` → Look up in moments table
669
+ - `"resources/api-design-v2"` → Look up in resources table (no category)
670
+ - `"bob"` → Defaults to resources table, no category
671
+
672
+ **Edge Type Format** (`rel_type`):
673
+ - Use snake_case: `"authored_by"`, `"depends_on"`, `"references"`
674
+ - Be specific but consistent
675
+ - Use passive voice for bidirectional clarity
676
+
677
+ ### Multi-Turn Iterated Retrieval
678
+
679
+ REM enables agents to conduct multi-turn database conversations:
680
+
681
+ 1. **Initial Query**: Agent runs SEARCH to find candidates
682
+ 2. **Refinement**: Agent analyzes results, runs LOOKUP on specific entities
683
+ 3. **Context Expansion**: Agent runs TRAVERSE to find related entities
684
+ 4. **Temporal Filter**: Agent runs SQL to filter by time range
685
+ 5. **Final Answer**: Agent synthesizes knowledge from all queries
686
+
687
+ **Plan Memos**: Agents track query plans in scratchpad for iterative refinement.
688
+
689
+ ### Query Performance Contracts
690
+
691
+ | Query Type | Complexity | Index | Use When |
692
+ |------------|-----------|-------|----------|
693
+ | `LOOKUP` | O(1) | B-tree on `label` | You know exact entity name |
694
+ | `FUZZY` | O(n) | GIN on `label` (pg_trgm) | Handling typos/partial matches |
695
+ | `SEARCH` | O(log n) | HNSW on `embedding` | Semantic similarity needed |
696
+ | `TRAVERSE` | O(depth × edges) | B-tree on `graph_edges` | Exploring relationships |
697
+ | `SQL` | Variable | Custom indexes | Complex filtering/aggregation |
698
+
699
+ ### Example: Multi-Query Session
700
+
701
+ ```python
702
+ # Query 1: Find relevant documents
703
+ SEARCH "API migration planning" FROM resources LIMIT 5
704
+
705
+ # Query 2: Get specific document
706
+ LOOKUP "tidb-migration-spec" FROM resources
707
+
708
+ # Query 3: Find related people
709
+ TRAVERSE FROM "tidb-migration-spec" TYPE "authored_by,reviewed_by" DEPTH 1
710
+
711
+ # Query 4: Recent activity
712
+ SELECT * FROM moments WHERE
713
+ 'tidb-migration' = ANY(topic_tags) AND
714
+ start_time > NOW() - INTERVAL '30 days'
715
+ ```
716
+
717
+ ### Tenant Isolation
718
+
719
+ All queries automatically scoped by `user_id` for complete data isolation:
720
+
721
+ ```sql
722
+ -- Automatically filtered to user's data
723
+ SEARCH "contracts" FROM resources LIMIT 10
724
+
725
+ -- No cross-user data leakage
726
+ TRAVERSE FROM "project-x" TYPE "references" DEPTH 3
727
+ ```
728
+
729
+ ## API Endpoints
730
+
731
+ ### Chat Completions (OpenAI-compatible)
732
+
733
+ ```bash
734
+ POST /api/v1/chat/completions
735
+ ```
736
+
737
+ **Headers**:
738
+ - `X-Tenant-Id`: Tenant identifier (required for REM)
739
+ - `X-User-Id`: User identifier
740
+ - `X-Session-Id`: Session/conversation identifier
741
+ - `X-Agent-Schema`: Agent schema URI to use
742
+
743
+ **Body**:
744
+ ```json
745
+ {
746
+ "model": "anthropic:claude-sonnet-4-5-20250929",
747
+ "messages": [
748
+ {"role": "user", "content": "Find all documents Sarah authored"}
749
+ ],
750
+ "stream": true,
751
+ "response_format": {"type": "text"}
752
+ }
753
+ ```
754
+
755
+ **Streaming Response** (SSE):
756
+ ```
757
+ data: {"id": "chatcmpl-123", "choices": [{"delta": {"role": "assistant", "content": ""}}]}
758
+
759
+ data: {"id": "chatcmpl-123", "choices": [{"delta": {"content": "[Calling: search_rem]"}}]}
760
+
761
+ data: {"id": "chatcmpl-123", "choices": [{"delta": {"content": "Found 3 documents..."}}]}
762
+
763
+ data: {"id": "chatcmpl-123", "choices": [{"delta": {}, "finish_reason": "stop"}]}
764
+
765
+ data: [DONE]
766
+ ```
767
+
768
+ ### MCP Endpoint
769
+
770
+ ```bash
771
+ # MCP HTTP transport
772
+ POST /api/v1/mcp
773
+ ```
774
+
775
+ Tools and resources for REM query execution, resource management, file operations.
776
+
777
+ ### Health Check
778
+
779
+ ```bash
780
+ GET /health
781
+ # {"status": "healthy", "version": "0.1.0"}
782
+ ```
783
+
784
+ ## CLI Reference
785
+
786
+ REM provides a comprehensive command-line interface for all operations.
787
+
788
+ ### Configuration & Server
789
+
790
+ #### `rem configure` - Interactive Setup Wizard
791
+
792
+ Set up REM with PostgreSQL, LLM providers, and S3 storage. **Defaults to port 5051 (package users).**
793
+
794
+ ```bash
795
+ # Complete setup (recommended for package users)
796
+ rem configure --install --claude-desktop
797
+
798
+ # This runs:
799
+ # 1. Interactive wizard (creates ~/.rem/config.yaml)
800
+ # 2. Installs database tables (rem db migrate)
801
+ # 3. Registers REM MCP server with Claude Desktop
802
+
803
+ # Other options:
804
+ rem configure # Just run wizard
805
+ rem configure --install # Wizard + database install
806
+ rem configure --show # Show current configuration
807
+ rem configure --edit # Edit configuration in $EDITOR
808
+ ```
809
+
810
+ **Default Configuration:**
811
+ - **Package users**: `localhost:5051` (docker-compose.prebuilt.yml with Docker Hub image)
812
+ - **Developers**: Change to `localhost:5050` during wizard (docker-compose.yml with local build)
813
+ - **Custom database**: Enter your own host/port/credentials
814
+
815
+ **Configuration File:** `~/.rem/config.yaml`
816
+
817
+ ```yaml
818
+ postgres:
819
+ # Package users (prebuilt)
820
+ connection_string: postgresql://rem:rem@localhost:5051/rem
821
+ # OR Developers (local build)
822
+ # connection_string: postgresql://rem:rem@localhost:5050/rem
823
+ pool_min_size: 5
824
+ pool_max_size: 20
825
+
826
+ llm:
827
+ default_model: anthropic:claude-sonnet-4-5-20250929
828
+ openai_api_key: sk-...
829
+ anthropic_api_key: sk-ant-...
830
+
831
+ s3:
832
+ bucket_name: rem-storage
833
+ region: us-east-1
834
+ ```
835
+
836
+ **Precedence:** Environment variables > Config file > Defaults
837
+
838
+ **Port Guide:**
839
+ - **5051**: Package users with `docker-compose.prebuilt.yml` (recommended)
840
+ - **5050**: Developers with `docker-compose.yml` (local development)
841
+ - **Custom**: Your own PostgreSQL instance
842
+
843
+ #### `rem mcp` - Run MCP Server
844
+
845
+ Run the FastMCP server for Claude Desktop integration.
846
+
847
+ ```bash
848
+ # Stdio mode (for Claude Desktop)
849
+ rem mcp
850
+
851
+ # HTTP mode (for testing)
852
+ rem mcp --http --port 8001
853
+ ```
854
+
855
+ #### `rem serve` - Start API Server
856
+
857
+ Start the FastAPI server with uvicorn.
858
+
859
+ ```bash
860
+ # Use settings from config
861
+ rem serve
862
+
863
+ # Development mode (auto-reload)
864
+ rem serve --reload
865
+
866
+ # Production mode (4 workers)
867
+ rem serve --workers 4
868
+
869
+ # Bind to all interfaces
870
+ rem serve --host 0.0.0.0 --port 8080
871
+
872
+ # Override log level
873
+ rem serve --log-level debug
874
+ ```
875
+
876
+ ### Database Management
877
+
878
+ #### `rem db migrate` - Run Migrations
879
+
880
+ Apply database migrations (install.sql and install_models.sql).
881
+
882
+ ```bash
883
+ # Apply all migrations
884
+ rem db migrate
885
+
886
+ # Core infrastructure only (extensions, functions)
887
+ rem db migrate --install
888
+
889
+ # Entity tables only (Resource, Message, etc.)
890
+ rem db migrate --models
891
+
892
+ # Background indexes (HNSW for vectors)
893
+ rem db migrate --background-indexes
894
+
895
+ # Custom connection string
896
+ rem db migrate --connection "postgresql://user:pass@host:5432/db"
897
+
898
+ # Custom SQL directory
899
+ rem db migrate --sql-dir /path/to/sql
900
+ ```
901
+
902
+ #### `rem db status` - Migration Status
903
+
904
+ Show applied migrations and execution times.
905
+
906
+ ```bash
907
+ rem db status
908
+ ```
909
+
910
+ #### `rem db rebuild-cache` - Rebuild KV Cache
911
+
912
+ Rebuild KV_STORE cache from entity tables (after database restart or bulk imports).
913
+
914
+ ```bash
915
+ rem db rebuild-cache
916
+ ```
917
+
918
+ ### Schema Management
919
+
920
+ #### `rem db schema generate` - Generate SQL Schema
921
+
922
+ Generate database schema from Pydantic models.
923
+
924
+ ```bash
925
+ # Generate install_models.sql from entity models
926
+ rem db schema generate \
927
+ --models src/rem/models/entities \
928
+ --output rem/src/rem/sql/install_models.sql
929
+
930
+ # Generate migration file
931
+ rem db schema generate \
932
+ --models src/rem/models/entities \
933
+ --output rem/src/rem/sql/migrations/003_add_fields.sql
934
+ ```
935
+
936
+ #### `rem db schema indexes` - Generate Background Indexes
937
+
938
+ Generate SQL for background index creation (HNSW for vectors).
939
+
940
+ ```bash
941
+ # Generate background_indexes.sql
942
+ rem db schema indexes \
943
+ --models src/rem/models/entities \
944
+ --output rem/src/rem/sql/background_indexes.sql
945
+ ```
946
+
947
+ #### `rem db schema validate` - Validate Models
948
+
949
+ Validate Pydantic models for schema generation.
950
+
951
+ ```bash
952
+ rem db schema validate --models src/rem/models/entities
953
+ ```
954
+
955
+ ### File Processing
956
+
957
+ #### `rem process files` - Process Files
958
+
959
+ Process files with optional custom extractor (ontology extraction).
960
+
961
+ ```bash
962
+ # Process all completed files for tenant
963
+ rem process files \
964
+ --tenant-id acme-corp \
965
+ --status completed \
966
+ --limit 10
967
+
968
+ # Process with custom extractor
969
+ rem process files \
970
+ --tenant-id acme-corp \
971
+ --extractor cv-parser-v1 \
972
+ --limit 50
973
+
974
+ # Process files from the last 7 days
975
+ rem process files \
976
+ --tenant-id acme-corp \
977
+ --lookback-hours 168
978
+ ```
979
+
980
+ #### `rem process ingest` - Ingest File into REM
981
+
982
+ Ingest a file into REM with full pipeline (storage + parsing + embedding + database).
983
+
984
+ ```bash
985
+ # Ingest local file
986
+ rem process ingest /path/to/document.pdf \
987
+ --user-id user-123 \
988
+ --category legal \
989
+ --tags contract,2024
990
+
991
+ # Ingest with minimal options
992
+ rem process ingest ./meeting-notes.md --user-id user-123
993
+ ```
994
+
995
+ #### `rem process uri` - Parse File (Read-Only)
996
+
997
+ Parse a file and extract content **without** storing to database (useful for testing parsers).
998
+
999
+ ```bash
1000
+ # Parse local file (output to stdout)
1001
+ rem process uri /path/to/document.pdf
1002
+
1003
+ # Parse and save extracted content to file
1004
+ rem process uri /path/to/document.pdf --save output.json
1005
+
1006
+ # Parse S3 file
1007
+ rem process uri s3://bucket/key.docx --output text
1008
+ ```
1009
+
1010
+ ### Memory & Knowledge Extraction (Dreaming)
1011
+
1012
+ #### `rem dreaming full` - Complete Workflow
1013
+
1014
+ Run full dreaming workflow: extractors → moments → affinity → user model.
1015
+
1016
+ ```bash
1017
+ # Full workflow for user
1018
+ rem dreaming full \
1019
+ --user-id user-123 \
1020
+ --tenant-id acme-corp
1021
+
1022
+ # Skip ontology extractors
1023
+ rem dreaming full \
1024
+ --user-id user-123 \
1025
+ --tenant-id acme-corp \
1026
+ --skip-extractors
1027
+
1028
+ # Process last 24 hours only
1029
+ rem dreaming full \
1030
+ --user-id user-123 \
1031
+ --tenant-id acme-corp \
1032
+ --lookback-hours 24
1033
+
1034
+ # Limit resources processed
1035
+ rem dreaming full \
1036
+ --user-id user-123 \
1037
+ --tenant-id acme-corp \
1038
+ --limit 100
1039
+ ```
1040
+
1041
+ #### `rem dreaming custom` - Custom Extractor
1042
+
1043
+ Run specific ontology extractor on user's data.
1044
+
1045
+ ```bash
1046
+ # Run CV parser on user's files
1047
+ rem dreaming custom \
1048
+ --user-id user-123 \
1049
+ --tenant-id acme-corp \
1050
+ --extractor cv-parser-v1
1051
+
1052
+ # Process last week's files
1053
+ rem dreaming custom \
1054
+ --user-id user-123 \
1055
+ --tenant-id acme-corp \
1056
+ --extractor contract-analyzer-v1 \
1057
+ --lookback-hours 168 \
1058
+ --limit 50
1059
+ ```
1060
+
1061
+ #### `rem dreaming moments` - Extract Moments
1062
+
1063
+ Extract temporal narratives from resources.
1064
+
1065
+ ```bash
1066
+ # Generate moments for user
1067
+ rem dreaming moments \
1068
+ --user-id user-123 \
1069
+ --tenant-id acme-corp \
1070
+ --limit 50
1071
+
1072
+ # Process last 7 days
1073
+ rem dreaming moments \
1074
+ --user-id user-123 \
1075
+ --tenant-id acme-corp \
1076
+ --lookback-hours 168
1077
+ ```
1078
+
1079
+ #### `rem dreaming affinity` - Build Relationships
1080
+
1081
+ Build semantic relationships between resources using embeddings.
1082
+
1083
+ ```bash
1084
+ # Build affinity graph for user
1085
+ rem dreaming affinity \
1086
+ --user-id user-123 \
1087
+ --tenant-id acme-corp \
1088
+ --limit 100
1089
+
1090
+ # Process recent resources only
1091
+ rem dreaming affinity \
1092
+ --user-id user-123 \
1093
+ --tenant-id acme-corp \
1094
+ --lookback-hours 24
1095
+ ```
1096
+
1097
+ #### `rem dreaming user-model` - Update User Model
1098
+
1099
+ Update user model from recent activity (preferences, interests, patterns).
1100
+
1101
+ ```bash
1102
+ # Update user model
1103
+ rem dreaming user-model \
1104
+ --user-id user-123 \
1105
+ --tenant-id acme-corp
1106
+ ```
1107
+
1108
+ ### Evaluation & Experiments
1109
+
1110
+ #### `rem experiments` - Experiment Management
1111
+
1112
+ Manage evaluation experiments with datasets, prompts, and traces.
1113
+
1114
+ ```bash
1115
+ # Create experiment configuration
1116
+ rem experiments create my-evaluation \
1117
+ --agent ask_rem \
1118
+ --evaluator rem-lookup-correctness \
1119
+ --description "Baseline evaluation"
1120
+
1121
+ # Run experiment
1122
+ rem experiments run my-evaluation
1123
+
1124
+ # List experiments
1125
+ rem experiments list
1126
+ rem experiments show my-evaluation
1127
+ ```
1128
+
1129
+ #### `rem experiments dataset` - Dataset Management
1130
+
1131
+ ```bash
1132
+ # Create dataset from CSV
1133
+ rem experiments dataset create rem-lookup-golden \
1134
+ --from-csv golden.csv \
1135
+ --input-keys query \
1136
+ --output-keys expected_label,expected_type
1137
+
1138
+ # Add more examples
1139
+ rem experiments dataset add rem-lookup-golden \
1140
+ --from-csv more-data.csv \
1141
+ --input-keys query \
1142
+ --output-keys expected_label,expected_type
1143
+
1144
+ # List datasets
1145
+ rem experiments dataset list
1146
+ ```
1147
+
1148
+ #### `rem experiments prompt` - Prompt Management
1149
+
1150
+ ```bash
1151
+ # Create agent prompt
1152
+ rem experiments prompt create hello-world \
1153
+ --system-prompt "You are a helpful assistant." \
1154
+ --model-name gpt-4o
1155
+
1156
+ # List prompts
1157
+ rem experiments prompt list
1158
+ ```
1159
+
1160
+ #### `rem experiments trace` - Trace Retrieval
1161
+
1162
+ ```bash
1163
+ # List recent traces
1164
+ rem experiments trace list --project rem-agents --days 7 --limit 50
1165
+ ```
1166
+
1167
+ #### `rem experiments` - Experiment Config
1168
+
1169
+ Manage experiment configurations (A/B testing, parameter sweeps).
1170
+
1171
+ ```bash
1172
+ # Create experiment config
1173
+ rem experiments create \
1174
+ --name cv-parser-test \
1175
+ --description "Test CV parser with different models"
1176
+
1177
+ # List experiments
1178
+ rem experiments list
1179
+
1180
+ # Show experiment details
1181
+ rem experiments show cv-parser-test
1182
+
1183
+ # Run experiment
1184
+ rem experiments run cv-parser-test
1185
+ ```
1186
+
1187
+ ### Interactive Agent
1188
+
1189
+ #### `rem ask` - Test Agent
1190
+
1191
+ Test Pydantic AI agent with natural language queries.
1192
+
1193
+ ```bash
1194
+ # Ask a question
1195
+ rem ask "What documents did Sarah Chen author?"
1196
+
1197
+ # With context headers
1198
+ rem ask "Find all resources about API design" \
1199
+ --user-id user-123 \
1200
+ --tenant-id acme-corp
1201
+
1202
+ # Use specific agent schema
1203
+ rem ask "Analyze this contract" \
1204
+ --agent-schema contract-analyzer-v1
1205
+ ```
1206
+
1207
+ ### Global Options
1208
+
1209
+ All commands support:
1210
+
1211
+ ```bash
1212
+ # Verbose logging
1213
+ rem --verbose <command>
1214
+ rem -v <command>
1215
+
1216
+ # Version
1217
+ rem --version
1218
+
1219
+ # Help
1220
+ rem --help
1221
+ rem <command> --help
1222
+ rem <command> <subcommand> --help
1223
+ ```
1224
+
1225
+ ### Environment Variables
1226
+
1227
+ Override any setting via environment variables:
1228
+
1229
+ ```bash
1230
+ # Database
1231
+ export POSTGRES__CONNECTION_STRING=postgresql://rem:rem@localhost:5432/rem
1232
+ export POSTGRES__POOL_MIN_SIZE=5
1233
+
1234
+ # LLM
1235
+ export LLM__DEFAULT_MODEL=openai:gpt-4o
1236
+ export LLM__OPENAI_API_KEY=sk-...
1237
+ export LLM__ANTHROPIC_API_KEY=sk-ant-...
1238
+
1239
+ # S3
1240
+ export S3__BUCKET_NAME=rem-storage
1241
+ export S3__REGION=us-east-1
1242
+
1243
+ # Server
1244
+ export API__HOST=0.0.0.0
1245
+ export API__PORT=8000
1246
+ export API__RELOAD=true
1247
+
1248
+ # Run command with overrides
1249
+ rem serve
1250
+ ```
1251
+
1252
+ ## Development (For Contributors)
1253
+
1254
+ **Best for**: Contributing to REM or customizing the codebase.
1255
+
1256
+ ### Step 1: Clone Repository
1257
+
1258
+ ```bash
1259
+ git clone https://github.com/mr-saoirse/remstack.git
1260
+ cd remstack/rem
1261
+ ```
1262
+
1263
+ ### Step 2: Start PostgreSQL Only
1264
+
1265
+ ```bash
1266
+ # Start only PostgreSQL (port 5050 for developers, doesn't conflict with package users on 5051)
1267
+ docker compose up postgres -d
1268
+
1269
+ # Verify connection
1270
+ psql -h localhost -p 5050 -U rem -d rem -c "SELECT version();"
1271
+ ```
1272
+
1273
+ ### Step 3: Set Up Development Environment
1274
+
1275
+ ```bash
1276
+ # IMPORTANT: If you previously installed the package and ran `rem configure`,
1277
+ # delete the REM configuration directory to avoid conflicts:
1278
+ rm -rf ~/.rem/
1279
+
1280
+ # Create virtual environment with uv
1281
+ uv venv
1282
+ source .venv/bin/activate # On Windows: .venv\Scripts\activate
1283
+
1284
+ # Install in editable mode with all dependencies
1285
+ uv pip install -e ".[all]"
1286
+
1287
+ # Set LLM API keys
1288
+ export OPENAI_API_KEY="sk-..."
1289
+ export ANTHROPIC_API_KEY="sk-ant-..."
1290
+ export POSTGRES__CONNECTION_STRING="postgresql://rem:rem@localhost:5050/rem"
1291
+
1292
+ # Verify CLI
1293
+ rem --version
1294
+ ```
1295
+
1296
+ ### Step 4: Initialize Database
1297
+
1298
+ ```bash
1299
+ # Apply migrations
1300
+ rem db migrate
1301
+
1302
+ # Verify tables
1303
+ psql -h localhost -p 5050 -U rem -d rem -c "\dt"
1304
+ ```
1305
+
1306
+ ### Step 5: Run API Server (Optional)
1307
+
1308
+ ```bash
1309
+ # Start API server with hot reload
1310
+ uv run python -m rem.api.main
1311
+
1312
+ # API runs on http://localhost:8000
1313
+ ```
1314
+
1315
+ ### Step 6: Run Tests
1316
+
1317
+ ```bash
1318
+ # Run non-LLM tests (fast, no API costs)
1319
+ uv run pytest tests/integration/ -m "not llm" -v
1320
+
1321
+ # Run all tests (uses API credits)
1322
+ uv run pytest tests/integration/ -v
1323
+
1324
+ # Type check (saves report to .mypy/ folder)
1325
+ ../scripts/run_mypy.sh
1326
+ ```
1327
+
1328
+ Type checking reports are saved to `.mypy/report_YYYYMMDD_HHMMSS.txt` (gitignored).
1329
+ Current status: 222 errors in 55 files (as of 2025-11-23).
1330
+
1331
+ ### Environment Variables
1332
+
1333
+ All settings via environment variables with `__` delimiter:
1334
+
1335
+ ```bash
1336
+ # LLM
1337
+ LLM__DEFAULT_MODEL=anthropic:claude-sonnet-4-5-20250929
1338
+ LLM__DEFAULT_TEMPERATURE=0.5
1339
+
1340
+ # Auth (disabled by default)
1341
+ AUTH__ENABLED=false
1342
+ AUTH__OIDC_ISSUER_URL=https://accounts.google.com
1343
+
1344
+ # OTEL (disabled by default for local dev)
1345
+ OTEL__ENABLED=false
1346
+ OTEL__SERVICE_NAME=rem-api
1347
+
1348
+ # Postgres
1349
+ POSTGRES__CONNECTION_STRING=postgresql://rem:rem@localhost:5050/rem
1350
+
1351
+ # S3
1352
+ S3__BUCKET_NAME=rem-storage
1353
+ S3__REGION=us-east-1
1354
+ ```
1355
+
1356
+ ### Production Deployment (Optional)
1357
+
1358
+ For production deployment to AWS EKS with Kubernetes, see the main repository README:
1359
+ - **Infrastructure**: [../../manifests/infra/pulumi/eks-yaml/README.md](../../manifests/infra/pulumi/eks-yaml/README.md)
1360
+ - **Platform**: [../../manifests/platform/README.md](../../manifests/platform/README.md)
1361
+ - **Application**: [../../manifests/application/README.md](../../manifests/application/README.md)
1362
+
1363
+
1364
+ ## REM Query Dialect (AST)
1365
+
1366
+ REM queries follow a structured dialect with formal grammar specification.
1367
+
1368
+ ### Grammar
1369
+
1370
+ ```
1371
+ Query ::= LookupQuery | FuzzyQuery | SearchQuery | SqlQuery | TraverseQuery
1372
+
1373
+ LookupQuery ::= LOOKUP <key:string|list[string]>
1374
+ key : Single entity name or list of entity names (natural language labels)
1375
+ performance : O(1) per key
1376
+ available : Stage 1+
1377
+ examples :
1378
+ - LOOKUP "Sarah"
1379
+ - LOOKUP ["Sarah", "Mike", "Emily"]
1380
+ - LOOKUP "Project Alpha"
1381
+
1382
+ FuzzyQuery ::= FUZZY <text:string> [THRESHOLD <t:float>] [LIMIT <n:int>]
1383
+ text : Search text (partial/misspelled)
1384
+ threshold : Similarity score 0.0-1.0 (default: 0.5)
1385
+ limit : Max results (default: 5)
1386
+ performance : Indexed (pg_trgm)
1387
+ available : Stage 1+
1388
+ example : FUZZY "sara" THRESHOLD 0.5 LIMIT 10
1389
+
1390
+ SearchQuery ::= SEARCH <text:string> [TABLE <table:string>] [WHERE <clause:string>] [LIMIT <n:int>]
1391
+ text : Semantic query text
1392
+ table : Target table (default: "resources")
1393
+ clause : Optional PostgreSQL WHERE clause for hybrid filtering (combines vector + structured)
1394
+ limit : Max results (default: 10)
1395
+ performance : Indexed (pgvector)
1396
+ available : Stage 3+
1397
+ examples :
1398
+ - SEARCH "database migration" TABLE resources LIMIT 10
1399
+ - SEARCH "team discussion" TABLE moments WHERE "moment_type='meeting'" LIMIT 5
1400
+ - SEARCH "project updates" WHERE "created_at >= '2024-01-01'" LIMIT 20
1401
+ - SEARCH "AI research" WHERE "tags @> ARRAY['machine-learning']" LIMIT 10
1402
+
1403
+ Hybrid Query Support: SEARCH combines semantic vector similarity with structured filtering.
1404
+ Use WHERE clause to filter on system fields or entity-specific fields.
1405
+
1406
+ SqlQuery ::= <raw_sql:string>
1407
+ | SQL <table:string> [WHERE <clause:string>] [ORDER BY <order:string>] [LIMIT <n:int>]
1408
+
1409
+ Mode 1 (Raw SQL - Recommended):
1410
+ Any query not starting with a REM keyword (LOOKUP, FUZZY, SEARCH, TRAVERSE) is treated as raw SQL.
1411
+ Allowed: SELECT, INSERT, UPDATE, WITH (read + data modifications)
1412
+ Blocked: DROP, DELETE, TRUNCATE, ALTER (destructive operations)
1413
+
1414
+ Mode 2 (Structured - Legacy):
1415
+ SQL prefix with table + WHERE clause (automatic tenant isolation)
1416
+
1417
+ performance : O(n) with indexes
1418
+ available : Stage 1+
1419
+ dialect : PostgreSQL (full PostgreSQL syntax support)
1420
+
1421
+ examples :
1422
+ # Raw SQL (no prefix needed)
1423
+ - SELECT * FROM resources WHERE created_at > NOW() - INTERVAL '7 days' LIMIT 20
1424
+ - SELECT category, COUNT(*) as count FROM resources GROUP BY category
1425
+ - WITH recent AS (SELECT * FROM resources WHERE created_at > NOW() - INTERVAL '1 day') SELECT * FROM recent
1426
+
1427
+ # Structured SQL (legacy, automatic tenant isolation)
1428
+ - SQL moments WHERE "moment_type='meeting'" ORDER BY starts_timestamp DESC LIMIT 10
1429
+ - SQL resources WHERE "metadata->>'status' = 'published'" LIMIT 20
1430
+
1431
+ PostgreSQL Dialect: Full support for:
1432
+ - JSONB operators (->>, ->, @>, etc.)
1433
+ - Array operators (&&, @>, <@, etc.)
1434
+ - CTEs (WITH clauses)
1435
+ - Advanced filtering and aggregations
1436
+
1437
+ TraverseQuery ::= TRAVERSE [<edge_types:list>] WITH <initial_query:Query> [DEPTH <d:int>] [ORDER BY <order:string>] [LIMIT <n:int>]
1438
+ edge_types : Relationship types to follow (e.g., ["manages", "reports-to"], default: all)
1439
+ initial_query : Starting query (typically LOOKUP)
1440
+ depth : Number of hops (0=PLAN mode, 1=single hop, N=multi-hop, default: 1)
1441
+ order : Order results (default: "edge.created_at DESC")
1442
+ limit : Max nodes (default: 9)
1443
+ performance : O(k) where k = visited nodes
1444
+ available : Stage 3+
1445
+ examples :
1446
+ - TRAVERSE manages WITH LOOKUP "Sally" DEPTH 1
1447
+ - TRAVERSE WITH LOOKUP "Sally" DEPTH 0 (PLAN mode: edge analysis only)
1448
+ - TRAVERSE manages,reports-to WITH LOOKUP "Sarah" DEPTH 2 LIMIT 5
1449
+ ```
1450
+
1451
+ ### Query Availability by Evolution Stage
1452
+
1453
+ | Query Type | Stage 0 | Stage 1 | Stage 2 | Stage 3 | Stage 4 |
1454
+ |------------|---------|---------|---------|---------|---------|
1455
+ | LOOKUP | ✗ | ✓ | ✓ | ✓ | ✓ |
1456
+ | FUZZY | ✗ | ✓ | ✓ | ✓ | ✓ |
1457
+ | SEARCH | ✗ | ✗ | ✗ | ✓ | ✓ |
1458
+ | SQL | ✗ | ✓ | ✓ | ✓ | ✓ |
1459
+ | TRAVERSE | ✗ | ✗ | ✗ | ✓ | ✓ |
1460
+
1461
+ **Stage 0**: No data, all queries fail.
1462
+
1463
+ **Stage 1** (20% answerable): Resources seeded with entity extraction. LOOKUP and FUZZY work for finding entities. SQL works for basic filtering.
1464
+
1465
+ **Stage 2** (50% answerable): Moments extracted. SQL temporal queries work. LOOKUP includes moment entities.
1466
+
1467
+ **Stage 3** (80% answerable): Affinity graph built. SEARCH and TRAVERSE become available. Multi-hop graph queries work.
1468
+
1469
+ **Stage 4** (100% answerable): Mature graph with rich historical data. All query types fully functional with high-quality results.
1470
+
1471
+ ## License
1472
+
1473
+ MIT