remdb 0.2.6__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of remdb might be problematic. Click here for more details.

Files changed (187) hide show
  1. rem/__init__.py +2 -0
  2. rem/agentic/README.md +650 -0
  3. rem/agentic/__init__.py +39 -0
  4. rem/agentic/agents/README.md +155 -0
  5. rem/agentic/agents/__init__.py +8 -0
  6. rem/agentic/context.py +148 -0
  7. rem/agentic/context_builder.py +329 -0
  8. rem/agentic/mcp/__init__.py +0 -0
  9. rem/agentic/mcp/tool_wrapper.py +107 -0
  10. rem/agentic/otel/__init__.py +5 -0
  11. rem/agentic/otel/setup.py +151 -0
  12. rem/agentic/providers/phoenix.py +674 -0
  13. rem/agentic/providers/pydantic_ai.py +572 -0
  14. rem/agentic/query.py +117 -0
  15. rem/agentic/query_helper.py +89 -0
  16. rem/agentic/schema.py +396 -0
  17. rem/agentic/serialization.py +245 -0
  18. rem/agentic/tools/__init__.py +5 -0
  19. rem/agentic/tools/rem_tools.py +231 -0
  20. rem/api/README.md +420 -0
  21. rem/api/main.py +324 -0
  22. rem/api/mcp_router/prompts.py +182 -0
  23. rem/api/mcp_router/resources.py +536 -0
  24. rem/api/mcp_router/server.py +213 -0
  25. rem/api/mcp_router/tools.py +584 -0
  26. rem/api/routers/auth.py +229 -0
  27. rem/api/routers/chat/__init__.py +5 -0
  28. rem/api/routers/chat/completions.py +281 -0
  29. rem/api/routers/chat/json_utils.py +76 -0
  30. rem/api/routers/chat/models.py +124 -0
  31. rem/api/routers/chat/streaming.py +185 -0
  32. rem/auth/README.md +258 -0
  33. rem/auth/__init__.py +26 -0
  34. rem/auth/middleware.py +100 -0
  35. rem/auth/providers/__init__.py +13 -0
  36. rem/auth/providers/base.py +376 -0
  37. rem/auth/providers/google.py +163 -0
  38. rem/auth/providers/microsoft.py +237 -0
  39. rem/cli/README.md +455 -0
  40. rem/cli/__init__.py +8 -0
  41. rem/cli/commands/README.md +126 -0
  42. rem/cli/commands/__init__.py +3 -0
  43. rem/cli/commands/ask.py +565 -0
  44. rem/cli/commands/configure.py +423 -0
  45. rem/cli/commands/db.py +493 -0
  46. rem/cli/commands/dreaming.py +324 -0
  47. rem/cli/commands/experiments.py +1124 -0
  48. rem/cli/commands/mcp.py +66 -0
  49. rem/cli/commands/process.py +245 -0
  50. rem/cli/commands/schema.py +183 -0
  51. rem/cli/commands/serve.py +106 -0
  52. rem/cli/dreaming.py +363 -0
  53. rem/cli/main.py +88 -0
  54. rem/config.py +237 -0
  55. rem/mcp_server.py +41 -0
  56. rem/models/core/__init__.py +49 -0
  57. rem/models/core/core_model.py +64 -0
  58. rem/models/core/engram.py +333 -0
  59. rem/models/core/experiment.py +628 -0
  60. rem/models/core/inline_edge.py +132 -0
  61. rem/models/core/rem_query.py +243 -0
  62. rem/models/entities/__init__.py +43 -0
  63. rem/models/entities/file.py +57 -0
  64. rem/models/entities/image_resource.py +88 -0
  65. rem/models/entities/message.py +35 -0
  66. rem/models/entities/moment.py +123 -0
  67. rem/models/entities/ontology.py +191 -0
  68. rem/models/entities/ontology_config.py +131 -0
  69. rem/models/entities/resource.py +95 -0
  70. rem/models/entities/schema.py +87 -0
  71. rem/models/entities/user.py +85 -0
  72. rem/py.typed +0 -0
  73. rem/schemas/README.md +507 -0
  74. rem/schemas/__init__.py +6 -0
  75. rem/schemas/agents/README.md +92 -0
  76. rem/schemas/agents/core/moment-builder.yaml +178 -0
  77. rem/schemas/agents/core/rem-query-agent.yaml +226 -0
  78. rem/schemas/agents/core/resource-affinity-assessor.yaml +99 -0
  79. rem/schemas/agents/core/simple-assistant.yaml +19 -0
  80. rem/schemas/agents/core/user-profile-builder.yaml +163 -0
  81. rem/schemas/agents/examples/contract-analyzer.yaml +317 -0
  82. rem/schemas/agents/examples/contract-extractor.yaml +134 -0
  83. rem/schemas/agents/examples/cv-parser.yaml +263 -0
  84. rem/schemas/agents/examples/hello-world.yaml +37 -0
  85. rem/schemas/agents/examples/query.yaml +54 -0
  86. rem/schemas/agents/examples/simple.yaml +21 -0
  87. rem/schemas/agents/examples/test.yaml +29 -0
  88. rem/schemas/agents/rem.yaml +128 -0
  89. rem/schemas/evaluators/hello-world/default.yaml +77 -0
  90. rem/schemas/evaluators/rem/faithfulness.yaml +219 -0
  91. rem/schemas/evaluators/rem/lookup-correctness.yaml +182 -0
  92. rem/schemas/evaluators/rem/retrieval-precision.yaml +199 -0
  93. rem/schemas/evaluators/rem/retrieval-recall.yaml +211 -0
  94. rem/schemas/evaluators/rem/search-correctness.yaml +192 -0
  95. rem/services/__init__.py +16 -0
  96. rem/services/audio/INTEGRATION.md +308 -0
  97. rem/services/audio/README.md +376 -0
  98. rem/services/audio/__init__.py +15 -0
  99. rem/services/audio/chunker.py +354 -0
  100. rem/services/audio/transcriber.py +259 -0
  101. rem/services/content/README.md +1269 -0
  102. rem/services/content/__init__.py +5 -0
  103. rem/services/content/providers.py +806 -0
  104. rem/services/content/service.py +657 -0
  105. rem/services/dreaming/README.md +230 -0
  106. rem/services/dreaming/__init__.py +53 -0
  107. rem/services/dreaming/affinity_service.py +336 -0
  108. rem/services/dreaming/moment_service.py +264 -0
  109. rem/services/dreaming/ontology_service.py +54 -0
  110. rem/services/dreaming/user_model_service.py +297 -0
  111. rem/services/dreaming/utils.py +39 -0
  112. rem/services/embeddings/__init__.py +11 -0
  113. rem/services/embeddings/api.py +120 -0
  114. rem/services/embeddings/worker.py +421 -0
  115. rem/services/fs/README.md +662 -0
  116. rem/services/fs/__init__.py +62 -0
  117. rem/services/fs/examples.py +206 -0
  118. rem/services/fs/examples_paths.py +204 -0
  119. rem/services/fs/git_provider.py +935 -0
  120. rem/services/fs/local_provider.py +760 -0
  121. rem/services/fs/parsing-hooks-examples.md +172 -0
  122. rem/services/fs/paths.py +276 -0
  123. rem/services/fs/provider.py +460 -0
  124. rem/services/fs/s3_provider.py +1042 -0
  125. rem/services/fs/service.py +186 -0
  126. rem/services/git/README.md +1075 -0
  127. rem/services/git/__init__.py +17 -0
  128. rem/services/git/service.py +469 -0
  129. rem/services/phoenix/EXPERIMENT_DESIGN.md +1146 -0
  130. rem/services/phoenix/README.md +453 -0
  131. rem/services/phoenix/__init__.py +46 -0
  132. rem/services/phoenix/client.py +686 -0
  133. rem/services/phoenix/config.py +88 -0
  134. rem/services/phoenix/prompt_labels.py +477 -0
  135. rem/services/postgres/README.md +575 -0
  136. rem/services/postgres/__init__.py +23 -0
  137. rem/services/postgres/migration_service.py +427 -0
  138. rem/services/postgres/pydantic_to_sqlalchemy.py +232 -0
  139. rem/services/postgres/register_type.py +352 -0
  140. rem/services/postgres/repository.py +337 -0
  141. rem/services/postgres/schema_generator.py +379 -0
  142. rem/services/postgres/service.py +802 -0
  143. rem/services/postgres/sql_builder.py +354 -0
  144. rem/services/rem/README.md +304 -0
  145. rem/services/rem/__init__.py +23 -0
  146. rem/services/rem/exceptions.py +71 -0
  147. rem/services/rem/executor.py +293 -0
  148. rem/services/rem/parser.py +145 -0
  149. rem/services/rem/queries.py +196 -0
  150. rem/services/rem/query.py +371 -0
  151. rem/services/rem/service.py +527 -0
  152. rem/services/session/README.md +374 -0
  153. rem/services/session/__init__.py +6 -0
  154. rem/services/session/compression.py +360 -0
  155. rem/services/session/reload.py +77 -0
  156. rem/settings.py +1235 -0
  157. rem/sql/002_install_models.sql +1068 -0
  158. rem/sql/background_indexes.sql +42 -0
  159. rem/sql/install_models.sql +1038 -0
  160. rem/sql/migrations/001_install.sql +503 -0
  161. rem/sql/migrations/002_install_models.sql +1202 -0
  162. rem/utils/AGENTIC_CHUNKING.md +597 -0
  163. rem/utils/README.md +583 -0
  164. rem/utils/__init__.py +43 -0
  165. rem/utils/agentic_chunking.py +622 -0
  166. rem/utils/batch_ops.py +343 -0
  167. rem/utils/chunking.py +108 -0
  168. rem/utils/clip_embeddings.py +276 -0
  169. rem/utils/dict_utils.py +98 -0
  170. rem/utils/embeddings.py +423 -0
  171. rem/utils/examples/embeddings_example.py +305 -0
  172. rem/utils/examples/sql_types_example.py +202 -0
  173. rem/utils/markdown.py +16 -0
  174. rem/utils/model_helpers.py +236 -0
  175. rem/utils/schema_loader.py +229 -0
  176. rem/utils/sql_types.py +348 -0
  177. rem/utils/user_id.py +81 -0
  178. rem/utils/vision.py +330 -0
  179. rem/workers/README.md +506 -0
  180. rem/workers/__init__.py +5 -0
  181. rem/workers/dreaming.py +502 -0
  182. rem/workers/engram_processor.py +312 -0
  183. rem/workers/sqs_file_processor.py +193 -0
  184. remdb-0.2.6.dist-info/METADATA +1191 -0
  185. remdb-0.2.6.dist-info/RECORD +187 -0
  186. remdb-0.2.6.dist-info/WHEEL +4 -0
  187. remdb-0.2.6.dist-info/entry_points.txt +2 -0
@@ -0,0 +1,1191 @@
1
+ Metadata-Version: 2.4
2
+ Name: remdb
3
+ Version: 0.2.6
4
+ Summary: Resources Entities Moments - Bio-inspired memory system for agentic AI workloads
5
+ Project-URL: Homepage, https://github.com/mr-saoirse/remstack
6
+ Project-URL: Documentation, https://github.com/mr-saoirse/remstack/blob/main/README.md
7
+ Project-URL: Repository, https://github.com/mr-saoirse/remstack
8
+ Project-URL: Issues, https://github.com/mr-saoirse/remstack/issues
9
+ Author-email: mr-saoirse <amartey@gmail.com>
10
+ License: MIT
11
+ Keywords: agents,ai,mcp,memory,postgresql,vector-search
12
+ Classifier: Development Status :: 3 - Alpha
13
+ Classifier: Intended Audience :: Developers
14
+ Classifier: License :: OSI Approved :: MIT License
15
+ Classifier: Programming Language :: Python :: 3.12
16
+ Classifier: Topic :: Scientific/Engineering :: Artificial Intelligence
17
+ Requires-Python: >=3.12
18
+ Requires-Dist: aioboto3>=13.0.0
19
+ Requires-Dist: arize-phoenix>=5.0.0
20
+ Requires-Dist: asyncpg>=0.30.0
21
+ Requires-Dist: boto3>=1.35.0
22
+ Requires-Dist: click>=8.1.0
23
+ Requires-Dist: fastapi>=0.115.0
24
+ Requires-Dist: fastmcp>=0.5.0
25
+ Requires-Dist: gitpython>=3.1.45
26
+ Requires-Dist: gmft
27
+ Requires-Dist: hypercorn>=0.17.0
28
+ Requires-Dist: itsdangerous>=2.0.0
29
+ Requires-Dist: json-schema-to-pydantic>=0.2.0
30
+ Requires-Dist: kreuzberg[gmft]>=3.21.0
31
+ Requires-Dist: loguru>=0.7.0
32
+ Requires-Dist: openinference-instrumentation-pydantic-ai>=0.1.0
33
+ Requires-Dist: opentelemetry-api>=1.28.0
34
+ Requires-Dist: opentelemetry-exporter-otlp-proto-grpc>=1.28.0
35
+ Requires-Dist: opentelemetry-exporter-otlp-proto-http>=1.28.0
36
+ Requires-Dist: opentelemetry-exporter-otlp>=1.28.0
37
+ Requires-Dist: opentelemetry-instrumentation-fastapi>=0.49b0
38
+ Requires-Dist: opentelemetry-instrumentation>=0.49b0
39
+ Requires-Dist: opentelemetry-sdk>=1.28.0
40
+ Requires-Dist: psycopg[binary]>=3.2.0
41
+ Requires-Dist: pydantic-ai>=0.0.14
42
+ Requires-Dist: pydantic-settings>=2.6.0
43
+ Requires-Dist: pydantic>=2.10.0
44
+ Requires-Dist: pydub>=0.25.0
45
+ Requires-Dist: python-dotenv>=1.0.0
46
+ Requires-Dist: pyyaml>=6.0.0
47
+ Requires-Dist: requests>=2.32.0
48
+ Requires-Dist: semchunk>=2.2.0
49
+ Requires-Dist: tenacity>=9.0.0
50
+ Requires-Dist: tiktoken>=0.5.0
51
+ Requires-Dist: uvicorn[standard]>=0.32.0
52
+ Provides-Extra: all
53
+ Requires-Dist: ipdb>=0.13.0; extra == 'all'
54
+ Requires-Dist: ipython>=8.29.0; extra == 'all'
55
+ Requires-Dist: json-schema-to-pydantic>=0.2.0; extra == 'all'
56
+ Requires-Dist: mypy>=1.13.0; extra == 'all'
57
+ Requires-Dist: pandas-stubs>=2.0.0; extra == 'all'
58
+ Requires-Dist: pillow>=10.0.0; extra == 'all'
59
+ Requires-Dist: polars>=1.0.0; extra == 'all'
60
+ Requires-Dist: pydub>=0.25.0; extra == 'all'
61
+ Requires-Dist: pytest-asyncio>=0.24.0; extra == 'all'
62
+ Requires-Dist: pytest-cov>=6.0.0; extra == 'all'
63
+ Requires-Dist: pytest-mock>=3.14.0; extra == 'all'
64
+ Requires-Dist: pytest>=8.0.0; extra == 'all'
65
+ Requires-Dist: ruff>=0.8.0; extra == 'all'
66
+ Requires-Dist: types-pyyaml>=6.0.0; extra == 'all'
67
+ Provides-Extra: audio
68
+ Requires-Dist: pydub>=0.25.0; extra == 'audio'
69
+ Provides-Extra: dev
70
+ Requires-Dist: ipdb>=0.13.0; extra == 'dev'
71
+ Requires-Dist: ipython>=8.29.0; extra == 'dev'
72
+ Requires-Dist: mypy>=1.13.0; extra == 'dev'
73
+ Requires-Dist: pandas-stubs>=2.0.0; extra == 'dev'
74
+ Requires-Dist: pytest-asyncio>=0.24.0; extra == 'dev'
75
+ Requires-Dist: pytest-cov>=6.0.0; extra == 'dev'
76
+ Requires-Dist: pytest-mock>=3.14.0; extra == 'dev'
77
+ Requires-Dist: pytest>=8.0.0; extra == 'dev'
78
+ Requires-Dist: ruff>=0.8.0; extra == 'dev'
79
+ Requires-Dist: types-pyyaml>=6.0.0; extra == 'dev'
80
+ Provides-Extra: fs
81
+ Requires-Dist: pillow>=10.0.0; extra == 'fs'
82
+ Requires-Dist: polars>=1.0.0; extra == 'fs'
83
+ Requires-Dist: pydub>=0.25.0; extra == 'fs'
84
+ Provides-Extra: schema
85
+ Requires-Dist: json-schema-to-pydantic>=0.2.0; extra == 'schema'
86
+ Description-Content-Type: text/markdown
87
+
88
+ # REM - Resources Entities Moments
89
+
90
+ Cloud-native unified memory infrastructure for agentic AI systems built with Pydantic AI, FastAPI, and FastMCP.
91
+
92
+ ## Architecture Overview
93
+
94
+ ```mermaid
95
+ graph TD
96
+ API[FastAPI<br/>Chat + MCP] --> AGENTS[JSON Schema<br/>Agents]
97
+ AGENTS --> TOOLS[MCP Tools<br/>5 Tools]
98
+
99
+ TOOLS --> QUERY[REM Query<br/>Dialect]
100
+ QUERY --> DB[(PostgreSQL<br/>+pgvector)]
101
+
102
+ FILES[File Processor] --> DREAM[Dreaming<br/>Workers]
103
+ DREAM --> DB
104
+
105
+ AGENTS --> OTEL[OpenTelemetry]
106
+ OTEL --> PHOENIX[Arize<br/>Phoenix]
107
+
108
+ EVAL[Evaluation<br/>Framework] --> PHOENIX
109
+
110
+ classDef api fill:#4A90E2,stroke:#2E5C8A,color:#fff
111
+ classDef agent fill:#7B68EE,stroke:#483D8B,color:#fff
112
+ classDef db fill:#50C878,stroke:#2E7D4E,color:#fff
113
+ classDef obs fill:#9B59B6,stroke:#6C3483,color:#fff
114
+
115
+ class API,TOOLS api
116
+ class AGENTS agent
117
+ class DB,QUERY db
118
+ class OTEL,PHOENIX,EVAL obs
119
+ ```
120
+
121
+ **Key Components:**
122
+
123
+ - **API Layer**: OpenAI-compatible chat completions + MCP server (not separate deployments)
124
+ - **Agentic Framework**: JSON Schema-based agents with no-code configuration
125
+ - **Database Layer**: PostgreSQL 18 with pgvector for multi-index memory (KV + Vector + Graph)
126
+ - **REM Query Dialect**: Custom query language with O(1) lookups, semantic search, graph traversal
127
+ - **Ingestion & Dreaming**: Background workers for content extraction and progressive index enrichment (0% → 100% answerable)
128
+ - **Observability & Evals**: OpenTelemetry tracing + Arize Phoenix + LLM-as-a-Judge evaluation framework
129
+
130
+ ## Features
131
+
132
+ | Feature | Description | Benefits |
133
+ |---------|-------------|----------|
134
+ | **OpenAI-Compatible Chat API** | Drop-in replacement for OpenAI chat completions API with streaming support | Use with existing OpenAI clients, switch models across providers (OpenAI, Anthropic, etc.) |
135
+ | **Built-in MCP Server** | FastMCP server with 5 tools + 3 resources for memory operations | Export memory to Claude Desktop, Cursor, or any MCP-compatible host |
136
+ | **REM Query Engine** | Multi-index query system (LOOKUP, FUZZY, SEARCH, SQL, TRAVERSE) with custom dialect | O(1) lookups, semantic search, graph traversal - all tenant-isolated |
137
+ | **Dreaming Workers** | Background workers for entity extraction, moment generation, and affinity matching | Automatic knowledge graph construction from resources (0% → 100% query answerable) |
138
+ | **PostgreSQL + pgvector** | CloudNativePG with PostgreSQL 18, pgvector extension, streaming replication | Production-ready vector search, no external vector DB needed |
139
+ | **AWS EKS Recipe** | Complete infrastructure-as-code with Pulumi, Karpenter, ArgoCD | Deploy to production EKS in minutes with auto-scaling and GitOps |
140
+ | **JSON Schema Agents** | Dynamic agent creation from YAML schemas via Pydantic AI factory | Define agents declaratively, version control schemas, load dynamically |
141
+ | **Content Providers** | Audio transcription (Whisper), vision (GPT-4V, Claude), PDFs, DOCX, images | Multimodal ingestion out of the box with format detection |
142
+ | **Configurable Embeddings** | Provider-agnostic embedding system (OpenAI, Cohere, Jina) | Switch embedding providers via env vars, no code changes |
143
+ | **Multi-Tenancy** | Tenant isolation at database level with automatic scoping | SaaS-ready with complete data separation per tenant |
144
+ | **Streaming Everything** | SSE for chat, background workers for embeddings, async throughout | Real-time responses, non-blocking operations, scalable |
145
+ | **Zero Vendor Lock-in** | Raw HTTP clients (no OpenAI SDK), swappable providers, open standards | Not tied to any vendor, easy to migrate, full control |
146
+
147
+ ## Quick Start
148
+
149
+ Choose your path:
150
+
151
+ - **Option 1: Package Users** (Recommended for non-developers) - PyPI package + dockerized database
152
+ - **Option 2: Developers** - Clone repo, local development with uv
153
+
154
+ ---
155
+
156
+ ## Option 1: Package Users (Recommended)
157
+
158
+ **Best for**: Using REM as a service (API + CLI) without modifying code.
159
+
160
+ ### Step 1: Start Database and API with Docker Compose
161
+
162
+ ```bash
163
+ # Create a project directory
164
+ mkdir my-rem-project && cd my-rem-project
165
+
166
+ # Download docker-compose file from public gist
167
+ curl -O https://gist.githubusercontent.com/percolating-sirsh/d117b673bc0edfdef1a5068ccd3cf3e5/raw/docker-compose.prebuilt.yml
168
+
169
+ # IMPORTANT: Export API keys BEFORE running docker compose
170
+ # Docker Compose reads env vars at startup - exporting them after won't work!
171
+
172
+ # Required: OpenAI for embeddings (text-embedding-3-small)
173
+ export OPENAI_API_KEY="sk-..."
174
+
175
+ # Recommended: At least one chat completion provider
176
+ export ANTHROPIC_API_KEY="sk-ant-..." # Claude Sonnet 4.5 (high quality)
177
+ export CEREBRAS_API_KEY="csk-..." # Cerebras (fast, cheap inference)
178
+
179
+ # Start PostgreSQL + API
180
+ docker compose -f docker-compose.prebuilt.yml up -d
181
+
182
+ # Verify services are running
183
+ curl http://localhost:8000/health
184
+ ```
185
+
186
+ This starts:
187
+ - **PostgreSQL** with pgvector on port **5051** (connection: `postgresql://rem:rem@localhost:5051/rem`)
188
+ - **REM API** on port **8000** with OpenAI-compatible chat completions + MCP server
189
+ - Uses pre-built Docker image from Docker Hub (no local build required)
190
+
191
+ ### Step 2: Install and Configure CLI (REQUIRED)
192
+
193
+ **This step is required** before you can use REM - it installs the database schema and configures your LLM API keys.
194
+
195
+ ```bash
196
+ # Install remdb package from PyPI
197
+ pip install remdb[all]
198
+
199
+ # Configure REM (defaults to port 5051 for package users)
200
+ rem configure --install --claude-desktop
201
+ ```
202
+
203
+ The interactive wizard will:
204
+ 1. **Configure PostgreSQL**: Defaults to `postgresql://rem:rem@localhost:5051/rem` (prebuilt docker-compose)
205
+ - Just press Enter to accept defaults
206
+ - Custom database: Enter your own host/port/credentials
207
+ 2. **Configure LLM providers**: Enter your OpenAI/Anthropic API keys
208
+ 3. **Install database tables**: Creates schema, functions, indexes (**required for CLI/API to work**)
209
+ 4. **Register with Claude Desktop**: Adds REM MCP server to Claude
210
+
211
+ Configuration saved to `~/.rem/config.yaml` (can edit with `rem configure --edit`)
212
+
213
+ **Port Guide:**
214
+ - **5051**: Package users with `docker-compose.prebuilt.yml` (pre-built image)
215
+ - **5050**: Developers with `docker-compose.yml` (local build)
216
+ - **Custom**: Your own PostgreSQL database
217
+
218
+ **Next Steps:**
219
+ - See [CLI Reference](#cli-reference) for all available commands
220
+ - See [REM Query Dialect](#rem-query-dialect) for query examples
221
+ - See [API Endpoints](#api-endpoints) for OpenAI-compatible API usage
222
+
223
+ ### Step 3: Test the Stack
224
+
225
+ ```bash
226
+ # Ingest a test file to populate your knowledge base
227
+ echo "REM is a bio-inspired memory system for agentic AI workloads." > test-doc.txt
228
+ rem process ingest test-doc.txt --user-id test-user --category documentation --tags rem,ai
229
+
230
+ # Query your ingested data
231
+ rem ask "What do you know about REM from my knowledge base?" --user-id test-user
232
+
233
+ # Test with a general query (uses agent's built-in knowledge + your data)
234
+ rem ask "What is REM?" --user-id test-user
235
+
236
+ # Test the API
237
+ curl -X POST http://localhost:8000/api/v1/chat/completions \
238
+ -H "Content-Type: application/json" \
239
+ -H "X-User-Id: test-user" \
240
+ -d '{
241
+ "model": "anthropic:claude-sonnet-4-5-20250929",
242
+ "messages": [{"role": "user", "content": "What is REM?"}],
243
+ "stream": false
244
+ }'
245
+ ```
246
+
247
+ **File Ingestion Commands:**
248
+ - `rem process ingest <file>` - Full ingestion pipeline (storage + parsing + embedding + database)
249
+ - `rem process uri <file>` - READ-ONLY parsing (no database storage, useful for testing parsers)
250
+
251
+
252
+ ## See Also
253
+
254
+ - [REM Query Dialect](#rem-query-dialect) - LOOKUP, SEARCH, TRAVERSE, SQL query types
255
+ - [API Endpoints](#api-endpoints) - OpenAI-compatible chat completions, MCP server
256
+ - [CLI Reference](#cli-reference) - Complete command-line interface documentation
257
+ - [Production Deployment](#production-deployment) - AWS EKS with Kubernetes
258
+
259
+ **Sample Data**: Test data with users, resources, and moments is at `tests/data/seed/test-user-data.yaml`
260
+
261
+ ---
262
+
263
+ ## REM Query Dialect
264
+
265
+ REM provides a custom query language designed for **LLM-driven iterated retrieval** with performance guarantees.
266
+
267
+ ### Design Philosophy
268
+
269
+ Unlike traditional single-shot SQL queries, the REM dialect is optimized for **multi-turn exploration** where LLMs participate in query planning:
270
+
271
+ - **Iterated Queries**: Queries return partial results that LLMs use to refine subsequent queries
272
+ - **Composable WITH Syntax**: Chain operations together (e.g., `TRAVERSE FROM ... WITH LOOKUP "..."`)
273
+ - **Mixed Indexes**: Combines exact lookups (O(1)), semantic search (vector), and graph traversal
274
+ - **Query Planner Participation**: Results include metadata for LLMs to decide next steps
275
+
276
+ **Example Multi-Turn Flow**:
277
+ ```
278
+ Turn 1: LOOKUP "sarah-chen" → Returns entity + available edge types
279
+ Turn 2: TRAVERSE FROM "sarah-chen" TYPE "authored_by" DEPTH 1 → Returns connected documents
280
+ Turn 3: SEARCH "architecture decisions" WITH TRAVERSE FROM "sarah-chen" → Combines semantic + graph
281
+ ```
282
+
283
+ This enables LLMs to **progressively build context** rather than requiring perfect queries upfront.
284
+
285
+ See [REM Query Dialect (AST)](#rem-query-dialect-ast) for complete grammar specification.
286
+
287
+ ### Query Types
288
+
289
+ #### `LOOKUP` - O(1) Exact Label Lookup
290
+
291
+ Fast exact match on entity labels (natural language identifiers, not UUIDs).
292
+
293
+ ```sql
294
+ LOOKUP "sarah-chen" FROM resources
295
+ LOOKUP "api-design-v2" FROM resources WHERE category = "projects"
296
+ ```
297
+
298
+ **Performance**: O(1) - indexed on `label` column
299
+ **Returns**: Single entity or null
300
+ **Use case**: Fetch specific known entities by human-readable name
301
+
302
+ #### `FUZZY` - Fuzzy Text Search
303
+
304
+ Fuzzy matching for partial names or misspellings using PostgreSQL trigram similarity.
305
+
306
+ ```sql
307
+ FUZZY "sara" FROM resources LIMIT 10
308
+ FUZZY "api desgin" FROM resources THRESHOLD 0.3 LIMIT 5
309
+ ```
310
+
311
+ **Performance**: O(n) with pg_trgm GIN index (fast for small-medium datasets)
312
+ **Returns**: Ranked list by similarity score
313
+ **Use case**: Handle typos, partial names, or when exact label is unknown
314
+
315
+ #### `SEARCH` - Semantic Vector Search
316
+
317
+ Semantic search using pgvector embeddings with cosine similarity.
318
+
319
+ ```sql
320
+ SEARCH "machine learning architecture" FROM resources LIMIT 10
321
+ SEARCH "contract disputes" FROM resources WHERE tags @> ARRAY['legal'] LIMIT 5
322
+ ```
323
+
324
+ **Performance**: O(log n) with HNSW index
325
+ **Returns**: Ranked list of semantically similar entities
326
+ **Use case**: Find conceptually related content without exact keyword matches
327
+
328
+ #### `TRAVERSE` - Recursive Graph Traversal
329
+
330
+ Follow `graph_edges` relationships across the knowledge graph.
331
+
332
+ ```sql
333
+ TRAVERSE FROM "sarah-chen" TYPE "authored_by" DEPTH 2
334
+ TRAVERSE FROM "api-design-v2" TYPE "references,depends_on" DEPTH 3
335
+ ```
336
+
337
+ **Features**:
338
+ - **Polymorphic**: Seamlessly traverses `resources`, `moments`, `users` via `all_graph_edges` view
339
+ - **Filtering**: Filter by one or multiple edge types (comma-separated)
340
+ - **Depth Control**: Configurable recursion depth (default: 2)
341
+ - **Data Model**: Requires `InlineEdge` JSON structure in `graph_edges` column
342
+
343
+ **Returns**: Graph of connected entities with edge metadata
344
+ **Use case**: Explore relationships, find connected entities, build context
345
+
346
+ #### Direct SQL Queries
347
+
348
+ Raw SQL for complex temporal, aggregation, or custom queries.
349
+
350
+ ```sql
351
+ SELECT * FROM resources WHERE created_at > NOW() - INTERVAL '7 days' ORDER BY created_at DESC LIMIT 20
352
+ SELECT category, COUNT(*) as count FROM resources GROUP BY category
353
+ WITH recent AS (SELECT * FROM resources WHERE created_at > NOW() - INTERVAL '1 day') SELECT * FROM recent
354
+ ```
355
+
356
+ **Performance**: Depends on query and indexes
357
+ **Returns**: Raw query results
358
+ **Use case**: Complex filtering, aggregations, temporal queries
359
+ **Allowed**: SELECT, INSERT, UPDATE, WITH (read + data modifications)
360
+ **Blocked**: DROP, DELETE, TRUNCATE, ALTER (destructive operations)
361
+ **Note**: Can be used standalone or with `WITH` syntax for composition
362
+
363
+ ### Graph Edge Format
364
+
365
+ Edges stored inline using `InlineEdge` pattern with human-readable destination labels.
366
+
367
+ ```json
368
+ {
369
+ "dst": "sarah-chen",
370
+ "rel_type": "authored_by",
371
+ "weight": 1.0,
372
+ "properties": {
373
+ "dst_entity_type": "users:engineers/sarah-chen",
374
+ "created_at": "2025-01-15T10:30:00Z"
375
+ }
376
+ }
377
+ ```
378
+
379
+ **Destination Entity Type Convention** (`properties.dst_entity_type`):
380
+
381
+ Format: `<table_schema>:<category>/<key>`
382
+
383
+ Examples:
384
+ - `"resources:managers/bob"` → Look up bob in resources table with category="managers"
385
+ - `"users:engineers/sarah-chen"` → Look up sarah-chen in users table
386
+ - `"moments:meetings/standup-2024-01"` → Look up in moments table
387
+ - `"resources/api-design-v2"` → Look up in resources table (no category)
388
+ - `"bob"` → Defaults to resources table, no category
389
+
390
+ **Edge Type Format** (`rel_type`):
391
+ - Use snake_case: `"authored_by"`, `"depends_on"`, `"references"`
392
+ - Be specific but consistent
393
+ - Use passive voice for bidirectional clarity
394
+
395
+ ### Multi-Turn Iterated Retrieval
396
+
397
+ REM enables agents to conduct multi-turn database conversations:
398
+
399
+ 1. **Initial Query**: Agent runs SEARCH to find candidates
400
+ 2. **Refinement**: Agent analyzes results, runs LOOKUP on specific entities
401
+ 3. **Context Expansion**: Agent runs TRAVERSE to find related entities
402
+ 4. **Temporal Filter**: Agent runs SQL to filter by time range
403
+ 5. **Final Answer**: Agent synthesizes knowledge from all queries
404
+
405
+ **Plan Memos**: Agents track query plans in scratchpad for iterative refinement.
406
+
407
+ ### Query Performance Contracts
408
+
409
+ | Query Type | Complexity | Index | Use When |
410
+ |------------|-----------|-------|----------|
411
+ | `LOOKUP` | O(1) | B-tree on `label` | You know exact entity name |
412
+ | `FUZZY` | O(n) | GIN on `label` (pg_trgm) | Handling typos/partial matches |
413
+ | `SEARCH` | O(log n) | HNSW on `embedding` | Semantic similarity needed |
414
+ | `TRAVERSE` | O(depth × edges) | B-tree on `graph_edges` | Exploring relationships |
415
+ | `SQL` | Variable | Custom indexes | Complex filtering/aggregation |
416
+
417
+ ### Example: Multi-Query Session
418
+
419
+ ```python
420
+ # Query 1: Find relevant documents
421
+ SEARCH "API migration planning" FROM resources LIMIT 5
422
+
423
+ # Query 2: Get specific document
424
+ LOOKUP "tidb-migration-spec" FROM resources
425
+
426
+ # Query 3: Find related people
427
+ TRAVERSE FROM "tidb-migration-spec" TYPE "authored_by,reviewed_by" DEPTH 1
428
+
429
+ # Query 4: Recent activity
430
+ SELECT * FROM moments WHERE
431
+ 'tidb-migration' = ANY(topic_tags) AND
432
+ start_time > NOW() - INTERVAL '30 days'
433
+ ```
434
+
435
+ ### Tenant Isolation
436
+
437
+ All queries automatically scoped by `user_id` for complete data isolation:
438
+
439
+ ```sql
440
+ -- Automatically filtered to user's data
441
+ SEARCH "contracts" FROM resources LIMIT 10
442
+
443
+ -- No cross-user data leakage
444
+ TRAVERSE FROM "project-x" TYPE "references" DEPTH 3
445
+ ```
446
+
447
+ ## API Endpoints
448
+
449
+ ### Chat Completions (OpenAI-compatible)
450
+
451
+ ```bash
452
+ POST /api/v1/chat/completions
453
+ ```
454
+
455
+ **Headers**:
456
+ - `X-Tenant-Id`: Tenant identifier (required for REM)
457
+ - `X-User-Id`: User identifier
458
+ - `X-Session-Id`: Session/conversation identifier
459
+ - `X-Agent-Schema`: Agent schema URI to use
460
+
461
+ **Body**:
462
+ ```json
463
+ {
464
+ "model": "anthropic:claude-sonnet-4-5-20250929",
465
+ "messages": [
466
+ {"role": "user", "content": "Find all documents Sarah authored"}
467
+ ],
468
+ "stream": true,
469
+ "response_format": {"type": "text"}
470
+ }
471
+ ```
472
+
473
+ **Streaming Response** (SSE):
474
+ ```
475
+ data: {"id": "chatcmpl-123", "choices": [{"delta": {"role": "assistant", "content": ""}}]}
476
+
477
+ data: {"id": "chatcmpl-123", "choices": [{"delta": {"content": "[Calling: search_rem]"}}]}
478
+
479
+ data: {"id": "chatcmpl-123", "choices": [{"delta": {"content": "Found 3 documents..."}}]}
480
+
481
+ data: {"id": "chatcmpl-123", "choices": [{"delta": {}, "finish_reason": "stop"}]}
482
+
483
+ data: [DONE]
484
+ ```
485
+
486
+ ### MCP Endpoint
487
+
488
+ ```bash
489
+ # MCP HTTP transport
490
+ POST /api/v1/mcp
491
+ ```
492
+
493
+ Tools and resources for REM query execution, resource management, file operations.
494
+
495
+ ### Health Check
496
+
497
+ ```bash
498
+ GET /health
499
+ # {"status": "healthy", "version": "0.1.0"}
500
+ ```
501
+
502
+ ## CLI Reference
503
+
504
+ REM provides a comprehensive command-line interface for all operations.
505
+
506
+ ### Configuration & Server
507
+
508
+ #### `rem configure` - Interactive Setup Wizard
509
+
510
+ Set up REM with PostgreSQL, LLM providers, and S3 storage. **Defaults to port 5051 (package users).**
511
+
512
+ ```bash
513
+ # Complete setup (recommended for package users)
514
+ rem configure --install --claude-desktop
515
+
516
+ # This runs:
517
+ # 1. Interactive wizard (creates ~/.rem/config.yaml)
518
+ # 2. Installs database tables (rem db migrate)
519
+ # 3. Registers REM MCP server with Claude Desktop
520
+
521
+ # Other options:
522
+ rem configure # Just run wizard
523
+ rem configure --install # Wizard + database install
524
+ rem configure --show # Show current configuration
525
+ rem configure --edit # Edit configuration in $EDITOR
526
+ ```
527
+
528
+ **Default Configuration:**
529
+ - **Package users**: `localhost:5051` (docker-compose.prebuilt.yml with Docker Hub image)
530
+ - **Developers**: Change to `localhost:5050` during wizard (docker-compose.yml with local build)
531
+ - **Custom database**: Enter your own host/port/credentials
532
+
533
+ **Configuration File:** `~/.rem/config.yaml`
534
+
535
+ ```yaml
536
+ postgres:
537
+ # Package users (prebuilt)
538
+ connection_string: postgresql://rem:rem@localhost:5051/rem
539
+ # OR Developers (local build)
540
+ # connection_string: postgresql://rem:rem@localhost:5050/rem
541
+ pool_min_size: 5
542
+ pool_max_size: 20
543
+
544
+ llm:
545
+ default_model: anthropic:claude-sonnet-4-5-20250929
546
+ openai_api_key: sk-...
547
+ anthropic_api_key: sk-ant-...
548
+
549
+ s3:
550
+ bucket_name: rem-storage
551
+ region: us-east-1
552
+ ```
553
+
554
+ **Precedence:** Environment variables > Config file > Defaults
555
+
556
+ **Port Guide:**
557
+ - **5051**: Package users with `docker-compose.prebuilt.yml` (recommended)
558
+ - **5050**: Developers with `docker-compose.yml` (local development)
559
+ - **Custom**: Your own PostgreSQL instance
560
+
561
+ #### `rem mcp` - Run MCP Server
562
+
563
+ Run the FastMCP server for Claude Desktop integration.
564
+
565
+ ```bash
566
+ # Stdio mode (for Claude Desktop)
567
+ rem mcp
568
+
569
+ # HTTP mode (for testing)
570
+ rem mcp --http --port 8001
571
+ ```
572
+
573
+ #### `rem serve` - Start API Server
574
+
575
+ Start the FastAPI server with uvicorn.
576
+
577
+ ```bash
578
+ # Use settings from config
579
+ rem serve
580
+
581
+ # Development mode (auto-reload)
582
+ rem serve --reload
583
+
584
+ # Production mode (4 workers)
585
+ rem serve --workers 4
586
+
587
+ # Bind to all interfaces
588
+ rem serve --host 0.0.0.0 --port 8080
589
+
590
+ # Override log level
591
+ rem serve --log-level debug
592
+ ```
593
+
594
+ ### Database Management
595
+
596
+ #### `rem db migrate` - Run Migrations
597
+
598
+ Apply database migrations (install.sql and install_models.sql).
599
+
600
+ ```bash
601
+ # Apply all migrations
602
+ rem db migrate
603
+
604
+ # Core infrastructure only (extensions, functions)
605
+ rem db migrate --install
606
+
607
+ # Entity tables only (Resource, Message, etc.)
608
+ rem db migrate --models
609
+
610
+ # Background indexes (HNSW for vectors)
611
+ rem db migrate --background-indexes
612
+
613
+ # Custom connection string
614
+ rem db migrate --connection "postgresql://user:pass@host:5432/db"
615
+
616
+ # Custom SQL directory
617
+ rem db migrate --sql-dir /path/to/sql
618
+ ```
619
+
620
+ #### `rem db status` - Migration Status
621
+
622
+ Show applied migrations and execution times.
623
+
624
+ ```bash
625
+ rem db status
626
+ ```
627
+
628
+ #### `rem db rebuild-cache` - Rebuild KV Cache
629
+
630
+ Rebuild KV_STORE cache from entity tables (after database restart or bulk imports).
631
+
632
+ ```bash
633
+ rem db rebuild-cache
634
+ ```
635
+
636
+ ### Schema Management
637
+
638
+ #### `rem db schema generate` - Generate SQL Schema
639
+
640
+ Generate database schema from Pydantic models.
641
+
642
+ ```bash
643
+ # Generate install_models.sql from entity models
644
+ rem db schema generate \
645
+ --models src/rem/models/entities \
646
+ --output rem/src/rem/sql/install_models.sql
647
+
648
+ # Generate migration file
649
+ rem db schema generate \
650
+ --models src/rem/models/entities \
651
+ --output rem/src/rem/sql/migrations/003_add_fields.sql
652
+ ```
653
+
654
+ #### `rem db schema indexes` - Generate Background Indexes
655
+
656
+ Generate SQL for background index creation (HNSW for vectors).
657
+
658
+ ```bash
659
+ # Generate background_indexes.sql
660
+ rem db schema indexes \
661
+ --models src/rem/models/entities \
662
+ --output rem/src/rem/sql/background_indexes.sql
663
+ ```
664
+
665
+ #### `rem db schema validate` - Validate Models
666
+
667
+ Validate Pydantic models for schema generation.
668
+
669
+ ```bash
670
+ rem db schema validate --models src/rem/models/entities
671
+ ```
672
+
673
+ ### File Processing
674
+
675
+ #### `rem process files` - Process Files
676
+
677
+ Process files with optional custom extractor (ontology extraction).
678
+
679
+ ```bash
680
+ # Process all completed files for tenant
681
+ rem process files \
682
+ --tenant-id acme-corp \
683
+ --status completed \
684
+ --limit 10
685
+
686
+ # Process with custom extractor
687
+ rem process files \
688
+ --tenant-id acme-corp \
689
+ --extractor cv-parser-v1 \
690
+ --limit 50
691
+
692
+ # Process files from the last 7 days
693
+ rem process files \
694
+ --tenant-id acme-corp \
695
+ --lookback-hours 168
696
+ ```
697
+
698
+ #### `rem process ingest` - Ingest File into REM
699
+
700
+ Ingest a file into REM with full pipeline (storage + parsing + embedding + database).
701
+
702
+ ```bash
703
+ # Ingest local file
704
+ rem process ingest /path/to/document.pdf \
705
+ --user-id user-123 \
706
+ --category legal \
707
+ --tags contract,2024
708
+
709
+ # Ingest with minimal options
710
+ rem process ingest ./meeting-notes.md --user-id user-123
711
+ ```
712
+
713
+ #### `rem process uri` - Parse File (Read-Only)
714
+
715
+ Parse a file and extract content **without** storing to database (useful for testing parsers).
716
+
717
+ ```bash
718
+ # Parse local file (output to stdout)
719
+ rem process uri /path/to/document.pdf
720
+
721
+ # Parse and save extracted content to file
722
+ rem process uri /path/to/document.pdf --save output.json
723
+
724
+ # Parse S3 file
725
+ rem process uri s3://bucket/key.docx --output text
726
+ ```
727
+
728
+ ### Memory & Knowledge Extraction (Dreaming)
729
+
730
+ #### `rem dreaming full` - Complete Workflow
731
+
732
+ Run full dreaming workflow: extractors → moments → affinity → user model.
733
+
734
+ ```bash
735
+ # Full workflow for user
736
+ rem dreaming full \
737
+ --user-id user-123 \
738
+ --tenant-id acme-corp
739
+
740
+ # Skip ontology extractors
741
+ rem dreaming full \
742
+ --user-id user-123 \
743
+ --tenant-id acme-corp \
744
+ --skip-extractors
745
+
746
+ # Process last 24 hours only
747
+ rem dreaming full \
748
+ --user-id user-123 \
749
+ --tenant-id acme-corp \
750
+ --lookback-hours 24
751
+
752
+ # Limit resources processed
753
+ rem dreaming full \
754
+ --user-id user-123 \
755
+ --tenant-id acme-corp \
756
+ --limit 100
757
+ ```
758
+
759
+ #### `rem dreaming custom` - Custom Extractor
760
+
761
+ Run specific ontology extractor on user's data.
762
+
763
+ ```bash
764
+ # Run CV parser on user's files
765
+ rem dreaming custom \
766
+ --user-id user-123 \
767
+ --tenant-id acme-corp \
768
+ --extractor cv-parser-v1
769
+
770
+ # Process last week's files
771
+ rem dreaming custom \
772
+ --user-id user-123 \
773
+ --tenant-id acme-corp \
774
+ --extractor contract-analyzer-v1 \
775
+ --lookback-hours 168 \
776
+ --limit 50
777
+ ```
778
+
779
+ #### `rem dreaming moments` - Extract Moments
780
+
781
+ Extract temporal narratives from resources.
782
+
783
+ ```bash
784
+ # Generate moments for user
785
+ rem dreaming moments \
786
+ --user-id user-123 \
787
+ --tenant-id acme-corp \
788
+ --limit 50
789
+
790
+ # Process last 7 days
791
+ rem dreaming moments \
792
+ --user-id user-123 \
793
+ --tenant-id acme-corp \
794
+ --lookback-hours 168
795
+ ```
796
+
797
+ #### `rem dreaming affinity` - Build Relationships
798
+
799
+ Build semantic relationships between resources using embeddings.
800
+
801
+ ```bash
802
+ # Build affinity graph for user
803
+ rem dreaming affinity \
804
+ --user-id user-123 \
805
+ --tenant-id acme-corp \
806
+ --limit 100
807
+
808
+ # Process recent resources only
809
+ rem dreaming affinity \
810
+ --user-id user-123 \
811
+ --tenant-id acme-corp \
812
+ --lookback-hours 24
813
+ ```
814
+
815
+ #### `rem dreaming user-model` - Update User Model
816
+
817
+ Update user model from recent activity (preferences, interests, patterns).
818
+
819
+ ```bash
820
+ # Update user model
821
+ rem dreaming user-model \
822
+ --user-id user-123 \
823
+ --tenant-id acme-corp
824
+ ```
825
+
826
+ ### Evaluation & Experiments
827
+
828
+ #### `rem experiments` - Experiment Management
829
+
830
+ Manage evaluation experiments with datasets, prompts, and traces.
831
+
832
+ ```bash
833
+ # Create experiment configuration
834
+ rem experiments create my-evaluation \
835
+ --agent ask_rem \
836
+ --evaluator rem-lookup-correctness \
837
+ --description "Baseline evaluation"
838
+
839
+ # Run experiment
840
+ rem experiments run my-evaluation
841
+
842
+ # List experiments
843
+ rem experiments list
844
+ rem experiments show my-evaluation
845
+ ```
846
+
847
+ #### `rem experiments dataset` - Dataset Management
848
+
849
+ ```bash
850
+ # Create dataset from CSV
851
+ rem experiments dataset create rem-lookup-golden \
852
+ --from-csv golden.csv \
853
+ --input-keys query \
854
+ --output-keys expected_label,expected_type
855
+
856
+ # Add more examples
857
+ rem experiments dataset add rem-lookup-golden \
858
+ --from-csv more-data.csv \
859
+ --input-keys query \
860
+ --output-keys expected_label,expected_type
861
+
862
+ # List datasets
863
+ rem experiments dataset list
864
+ ```
865
+
866
+ #### `rem experiments prompt` - Prompt Management
867
+
868
+ ```bash
869
+ # Create agent prompt
870
+ rem experiments prompt create hello-world \
871
+ --system-prompt "You are a helpful assistant." \
872
+ --model-name gpt-4o
873
+
874
+ # List prompts
875
+ rem experiments prompt list
876
+ ```
877
+
878
+ #### `rem experiments trace` - Trace Retrieval
879
+
880
+ ```bash
881
+ # List recent traces
882
+ rem experiments trace list --project rem-agents --days 7 --limit 50
883
+ ```
884
+
885
+ #### `rem experiments` - Experiment Config
886
+
887
+ Manage experiment configurations (A/B testing, parameter sweeps).
888
+
889
+ ```bash
890
+ # Create experiment config
891
+ rem experiments create \
892
+ --name cv-parser-test \
893
+ --description "Test CV parser with different models"
894
+
895
+ # List experiments
896
+ rem experiments list
897
+
898
+ # Show experiment details
899
+ rem experiments show cv-parser-test
900
+
901
+ # Run experiment
902
+ rem experiments run cv-parser-test
903
+ ```
904
+
905
+ ### Interactive Agent
906
+
907
+ #### `rem ask` - Test Agent
908
+
909
+ Test Pydantic AI agent with natural language queries.
910
+
911
+ ```bash
912
+ # Ask a question
913
+ rem ask "What documents did Sarah Chen author?"
914
+
915
+ # With context headers
916
+ rem ask "Find all resources about API design" \
917
+ --user-id user-123 \
918
+ --tenant-id acme-corp
919
+
920
+ # Use specific agent schema
921
+ rem ask "Analyze this contract" \
922
+ --agent-schema contract-analyzer-v1
923
+ ```
924
+
925
+ ### Global Options
926
+
927
+ All commands support:
928
+
929
+ ```bash
930
+ # Verbose logging
931
+ rem --verbose <command>
932
+ rem -v <command>
933
+
934
+ # Version
935
+ rem --version
936
+
937
+ # Help
938
+ rem --help
939
+ rem <command> --help
940
+ rem <command> <subcommand> --help
941
+ ```
942
+
943
+ ### Environment Variables
944
+
945
+ Override any setting via environment variables:
946
+
947
+ ```bash
948
+ # Database
949
+ export POSTGRES__CONNECTION_STRING=postgresql://rem:rem@localhost:5432/rem
950
+ export POSTGRES__POOL_MIN_SIZE=5
951
+
952
+ # LLM
953
+ export LLM__DEFAULT_MODEL=openai:gpt-4o
954
+ export LLM__OPENAI_API_KEY=sk-...
955
+ export LLM__ANTHROPIC_API_KEY=sk-ant-...
956
+
957
+ # S3
958
+ export S3__BUCKET_NAME=rem-storage
959
+ export S3__REGION=us-east-1
960
+
961
+ # Server
962
+ export API__HOST=0.0.0.0
963
+ export API__PORT=8000
964
+ export API__RELOAD=true
965
+
966
+ # Run command with overrides
967
+ rem serve
968
+ ```
969
+
970
+ ## Development (For Contributors)
971
+
972
+ **Best for**: Contributing to REM or customizing the codebase.
973
+
974
+ ### Step 1: Clone Repository
975
+
976
+ ```bash
977
+ git clone https://github.com/mr-saoirse/remstack.git
978
+ cd remstack/rem
979
+ ```
980
+
981
+ ### Step 2: Start PostgreSQL Only
982
+
983
+ ```bash
984
+ # Start only PostgreSQL (port 5050 for developers, doesn't conflict with package users on 5051)
985
+ docker compose up postgres -d
986
+
987
+ # Verify connection
988
+ psql -h localhost -p 5050 -U rem -d rem -c "SELECT version();"
989
+ ```
990
+
991
+ ### Step 3: Set Up Development Environment
992
+
993
+ ```bash
994
+ # IMPORTANT: If you previously installed the package and ran `rem configure`,
995
+ # delete the REM configuration directory to avoid conflicts:
996
+ rm -rf ~/.rem/
997
+
998
+ # Create virtual environment with uv
999
+ uv venv
1000
+ source .venv/bin/activate # On Windows: .venv\Scripts\activate
1001
+
1002
+ # Install in editable mode with all dependencies
1003
+ uv pip install -e ".[all]"
1004
+
1005
+ # Set LLM API keys
1006
+ export OPENAI_API_KEY="sk-..."
1007
+ export ANTHROPIC_API_KEY="sk-ant-..."
1008
+ export POSTGRES__CONNECTION_STRING="postgresql://rem:rem@localhost:5050/rem"
1009
+
1010
+ # Verify CLI
1011
+ rem --version
1012
+ ```
1013
+
1014
+ ### Step 4: Initialize Database
1015
+
1016
+ ```bash
1017
+ # Apply migrations
1018
+ rem db migrate
1019
+
1020
+ # Verify tables
1021
+ psql -h localhost -p 5050 -U rem -d rem -c "\dt"
1022
+ ```
1023
+
1024
+ ### Step 5: Run API Server (Optional)
1025
+
1026
+ ```bash
1027
+ # Start API server with hot reload
1028
+ uv run python -m rem.api.main
1029
+
1030
+ # API runs on http://localhost:8000
1031
+ ```
1032
+
1033
+ ### Step 6: Run Tests
1034
+
1035
+ ```bash
1036
+ # Run non-LLM tests (fast, no API costs)
1037
+ uv run pytest tests/integration/ -m "not llm" -v
1038
+
1039
+ # Run all tests (uses API credits)
1040
+ uv run pytest tests/integration/ -v
1041
+
1042
+ # Type check (saves report to .mypy/ folder)
1043
+ ../scripts/run_mypy.sh
1044
+ ```
1045
+
1046
+ Type checking reports are saved to `.mypy/report_YYYYMMDD_HHMMSS.txt` (gitignored).
1047
+ Current status: 222 errors in 55 files (as of 2025-11-23).
1048
+
1049
+ ### Environment Variables
1050
+
1051
+ All settings via environment variables with `__` delimiter:
1052
+
1053
+ ```bash
1054
+ # LLM
1055
+ LLM__DEFAULT_MODEL=anthropic:claude-sonnet-4-5-20250929
1056
+ LLM__DEFAULT_TEMPERATURE=0.5
1057
+
1058
+ # Auth (disabled by default)
1059
+ AUTH__ENABLED=false
1060
+ AUTH__OIDC_ISSUER_URL=https://accounts.google.com
1061
+
1062
+ # OTEL (disabled by default for local dev)
1063
+ OTEL__ENABLED=false
1064
+ OTEL__SERVICE_NAME=rem-api
1065
+
1066
+ # Postgres
1067
+ POSTGRES__CONNECTION_STRING=postgresql://rem:rem@localhost:5050/rem
1068
+
1069
+ # S3
1070
+ S3__BUCKET_NAME=rem-storage
1071
+ S3__REGION=us-east-1
1072
+ ```
1073
+
1074
+ ### Production Deployment (Optional)
1075
+
1076
+ For production deployment to AWS EKS with Kubernetes, see the main repository README:
1077
+ - **Infrastructure**: [../../manifests/infra/pulumi/eks-yaml/README.md](../../manifests/infra/pulumi/eks-yaml/README.md)
1078
+ - **Platform**: [../../manifests/platform/README.md](../../manifests/platform/README.md)
1079
+ - **Application**: [../../manifests/application/README.md](../../manifests/application/README.md)
1080
+
1081
+
1082
+ ## REM Query Dialect (AST)
1083
+
1084
+ REM queries follow a structured dialect with formal grammar specification.
1085
+
1086
+ ### Grammar
1087
+
1088
+ ```
1089
+ Query ::= LookupQuery | FuzzyQuery | SearchQuery | SqlQuery | TraverseQuery
1090
+
1091
+ LookupQuery ::= LOOKUP <key:string|list[string]>
1092
+ key : Single entity name or list of entity names (natural language labels)
1093
+ performance : O(1) per key
1094
+ available : Stage 1+
1095
+ examples :
1096
+ - LOOKUP "Sarah"
1097
+ - LOOKUP ["Sarah", "Mike", "Emily"]
1098
+ - LOOKUP "Project Alpha"
1099
+
1100
+ FuzzyQuery ::= FUZZY <text:string> [THRESHOLD <t:float>] [LIMIT <n:int>]
1101
+ text : Search text (partial/misspelled)
1102
+ threshold : Similarity score 0.0-1.0 (default: 0.5)
1103
+ limit : Max results (default: 5)
1104
+ performance : Indexed (pg_trgm)
1105
+ available : Stage 1+
1106
+ example : FUZZY "sara" THRESHOLD 0.5 LIMIT 10
1107
+
1108
+ SearchQuery ::= SEARCH <text:string> [TABLE <table:string>] [WHERE <clause:string>] [LIMIT <n:int>]
1109
+ text : Semantic query text
1110
+ table : Target table (default: "resources")
1111
+ clause : Optional PostgreSQL WHERE clause for hybrid filtering (combines vector + structured)
1112
+ limit : Max results (default: 10)
1113
+ performance : Indexed (pgvector)
1114
+ available : Stage 3+
1115
+ examples :
1116
+ - SEARCH "database migration" TABLE resources LIMIT 10
1117
+ - SEARCH "team discussion" TABLE moments WHERE "moment_type='meeting'" LIMIT 5
1118
+ - SEARCH "project updates" WHERE "created_at >= '2024-01-01'" LIMIT 20
1119
+ - SEARCH "AI research" WHERE "tags @> ARRAY['machine-learning']" LIMIT 10
1120
+
1121
+ Hybrid Query Support: SEARCH combines semantic vector similarity with structured filtering.
1122
+ Use WHERE clause to filter on system fields or entity-specific fields.
1123
+
1124
+ SqlQuery ::= <raw_sql:string>
1125
+ | SQL <table:string> [WHERE <clause:string>] [ORDER BY <order:string>] [LIMIT <n:int>]
1126
+
1127
+ Mode 1 (Raw SQL - Recommended):
1128
+ Any query not starting with a REM keyword (LOOKUP, FUZZY, SEARCH, TRAVERSE) is treated as raw SQL.
1129
+ Allowed: SELECT, INSERT, UPDATE, WITH (read + data modifications)
1130
+ Blocked: DROP, DELETE, TRUNCATE, ALTER (destructive operations)
1131
+
1132
+ Mode 2 (Structured - Legacy):
1133
+ SQL prefix with table + WHERE clause (automatic tenant isolation)
1134
+
1135
+ performance : O(n) with indexes
1136
+ available : Stage 1+
1137
+ dialect : PostgreSQL (full PostgreSQL syntax support)
1138
+
1139
+ examples :
1140
+ # Raw SQL (no prefix needed)
1141
+ - SELECT * FROM resources WHERE created_at > NOW() - INTERVAL '7 days' LIMIT 20
1142
+ - SELECT category, COUNT(*) as count FROM resources GROUP BY category
1143
+ - WITH recent AS (SELECT * FROM resources WHERE created_at > NOW() - INTERVAL '1 day') SELECT * FROM recent
1144
+
1145
+ # Structured SQL (legacy, automatic tenant isolation)
1146
+ - SQL moments WHERE "moment_type='meeting'" ORDER BY starts_timestamp DESC LIMIT 10
1147
+ - SQL resources WHERE "metadata->>'status' = 'published'" LIMIT 20
1148
+
1149
+ PostgreSQL Dialect: Full support for:
1150
+ - JSONB operators (->>, ->, @>, etc.)
1151
+ - Array operators (&&, @>, <@, etc.)
1152
+ - CTEs (WITH clauses)
1153
+ - Advanced filtering and aggregations
1154
+
1155
+ TraverseQuery ::= TRAVERSE [<edge_types:list>] WITH <initial_query:Query> [DEPTH <d:int>] [ORDER BY <order:string>] [LIMIT <n:int>]
1156
+ edge_types : Relationship types to follow (e.g., ["manages", "reports-to"], default: all)
1157
+ initial_query : Starting query (typically LOOKUP)
1158
+ depth : Number of hops (0=PLAN mode, 1=single hop, N=multi-hop, default: 1)
1159
+ order : Order results (default: "edge.created_at DESC")
1160
+ limit : Max nodes (default: 9)
1161
+ performance : O(k) where k = visited nodes
1162
+ available : Stage 3+
1163
+ examples :
1164
+ - TRAVERSE manages WITH LOOKUP "Sally" DEPTH 1
1165
+ - TRAVERSE WITH LOOKUP "Sally" DEPTH 0 (PLAN mode: edge analysis only)
1166
+ - TRAVERSE manages,reports-to WITH LOOKUP "Sarah" DEPTH 2 LIMIT 5
1167
+ ```
1168
+
1169
+ ### Query Availability by Evolution Stage
1170
+
1171
+ | Query Type | Stage 0 | Stage 1 | Stage 2 | Stage 3 | Stage 4 |
1172
+ |------------|---------|---------|---------|---------|---------|
1173
+ | LOOKUP | ✗ | ✓ | ✓ | ✓ | ✓ |
1174
+ | FUZZY | ✗ | ✓ | ✓ | ✓ | ✓ |
1175
+ | SEARCH | ✗ | ✗ | ✗ | ✓ | ✓ |
1176
+ | SQL | ✗ | ✓ | ✓ | ✓ | ✓ |
1177
+ | TRAVERSE | ✗ | ✗ | ✗ | ✓ | ✓ |
1178
+
1179
+ **Stage 0**: No data, all queries fail.
1180
+
1181
+ **Stage 1** (20% answerable): Resources seeded with entity extraction. LOOKUP and FUZZY work for finding entities. SQL works for basic filtering.
1182
+
1183
+ **Stage 2** (50% answerable): Moments extracted. SQL temporal queries work. LOOKUP includes moment entities.
1184
+
1185
+ **Stage 3** (80% answerable): Affinity graph built. SEARCH and TRAVERSE become available. Multi-hop graph queries work.
1186
+
1187
+ **Stage 4** (100% answerable): Mature graph with rich historical data. All query types fully functional with high-quality results.
1188
+
1189
+ ## License
1190
+
1191
+ MIT