remdb 0.3.242__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of remdb might be problematic. Click here for more details.

Files changed (235) hide show
  1. rem/__init__.py +129 -0
  2. rem/agentic/README.md +760 -0
  3. rem/agentic/__init__.py +54 -0
  4. rem/agentic/agents/README.md +155 -0
  5. rem/agentic/agents/__init__.py +38 -0
  6. rem/agentic/agents/agent_manager.py +311 -0
  7. rem/agentic/agents/sse_simulator.py +502 -0
  8. rem/agentic/context.py +425 -0
  9. rem/agentic/context_builder.py +360 -0
  10. rem/agentic/llm_provider_models.py +301 -0
  11. rem/agentic/mcp/__init__.py +0 -0
  12. rem/agentic/mcp/tool_wrapper.py +273 -0
  13. rem/agentic/otel/__init__.py +5 -0
  14. rem/agentic/otel/setup.py +240 -0
  15. rem/agentic/providers/phoenix.py +926 -0
  16. rem/agentic/providers/pydantic_ai.py +854 -0
  17. rem/agentic/query.py +117 -0
  18. rem/agentic/query_helper.py +89 -0
  19. rem/agentic/schema.py +737 -0
  20. rem/agentic/serialization.py +245 -0
  21. rem/agentic/tools/__init__.py +5 -0
  22. rem/agentic/tools/rem_tools.py +242 -0
  23. rem/api/README.md +657 -0
  24. rem/api/deps.py +253 -0
  25. rem/api/main.py +460 -0
  26. rem/api/mcp_router/prompts.py +182 -0
  27. rem/api/mcp_router/resources.py +820 -0
  28. rem/api/mcp_router/server.py +243 -0
  29. rem/api/mcp_router/tools.py +1605 -0
  30. rem/api/middleware/tracking.py +172 -0
  31. rem/api/routers/admin.py +520 -0
  32. rem/api/routers/auth.py +898 -0
  33. rem/api/routers/chat/__init__.py +5 -0
  34. rem/api/routers/chat/child_streaming.py +394 -0
  35. rem/api/routers/chat/completions.py +702 -0
  36. rem/api/routers/chat/json_utils.py +76 -0
  37. rem/api/routers/chat/models.py +202 -0
  38. rem/api/routers/chat/otel_utils.py +33 -0
  39. rem/api/routers/chat/sse_events.py +546 -0
  40. rem/api/routers/chat/streaming.py +950 -0
  41. rem/api/routers/chat/streaming_utils.py +327 -0
  42. rem/api/routers/common.py +18 -0
  43. rem/api/routers/dev.py +87 -0
  44. rem/api/routers/feedback.py +276 -0
  45. rem/api/routers/messages.py +620 -0
  46. rem/api/routers/models.py +86 -0
  47. rem/api/routers/query.py +362 -0
  48. rem/api/routers/shared_sessions.py +422 -0
  49. rem/auth/README.md +258 -0
  50. rem/auth/__init__.py +36 -0
  51. rem/auth/jwt.py +367 -0
  52. rem/auth/middleware.py +318 -0
  53. rem/auth/providers/__init__.py +16 -0
  54. rem/auth/providers/base.py +376 -0
  55. rem/auth/providers/email.py +215 -0
  56. rem/auth/providers/google.py +163 -0
  57. rem/auth/providers/microsoft.py +237 -0
  58. rem/cli/README.md +517 -0
  59. rem/cli/__init__.py +8 -0
  60. rem/cli/commands/README.md +299 -0
  61. rem/cli/commands/__init__.py +3 -0
  62. rem/cli/commands/ask.py +549 -0
  63. rem/cli/commands/cluster.py +1808 -0
  64. rem/cli/commands/configure.py +495 -0
  65. rem/cli/commands/db.py +828 -0
  66. rem/cli/commands/dreaming.py +324 -0
  67. rem/cli/commands/experiments.py +1698 -0
  68. rem/cli/commands/mcp.py +66 -0
  69. rem/cli/commands/process.py +388 -0
  70. rem/cli/commands/query.py +109 -0
  71. rem/cli/commands/scaffold.py +47 -0
  72. rem/cli/commands/schema.py +230 -0
  73. rem/cli/commands/serve.py +106 -0
  74. rem/cli/commands/session.py +453 -0
  75. rem/cli/dreaming.py +363 -0
  76. rem/cli/main.py +123 -0
  77. rem/config.py +244 -0
  78. rem/mcp_server.py +41 -0
  79. rem/models/core/__init__.py +49 -0
  80. rem/models/core/core_model.py +70 -0
  81. rem/models/core/engram.py +333 -0
  82. rem/models/core/experiment.py +672 -0
  83. rem/models/core/inline_edge.py +132 -0
  84. rem/models/core/rem_query.py +246 -0
  85. rem/models/entities/__init__.py +68 -0
  86. rem/models/entities/domain_resource.py +38 -0
  87. rem/models/entities/feedback.py +123 -0
  88. rem/models/entities/file.py +57 -0
  89. rem/models/entities/image_resource.py +88 -0
  90. rem/models/entities/message.py +64 -0
  91. rem/models/entities/moment.py +123 -0
  92. rem/models/entities/ontology.py +181 -0
  93. rem/models/entities/ontology_config.py +131 -0
  94. rem/models/entities/resource.py +95 -0
  95. rem/models/entities/schema.py +87 -0
  96. rem/models/entities/session.py +84 -0
  97. rem/models/entities/shared_session.py +180 -0
  98. rem/models/entities/subscriber.py +175 -0
  99. rem/models/entities/user.py +93 -0
  100. rem/py.typed +0 -0
  101. rem/registry.py +373 -0
  102. rem/schemas/README.md +507 -0
  103. rem/schemas/__init__.py +6 -0
  104. rem/schemas/agents/README.md +92 -0
  105. rem/schemas/agents/core/agent-builder.yaml +235 -0
  106. rem/schemas/agents/core/moment-builder.yaml +178 -0
  107. rem/schemas/agents/core/rem-query-agent.yaml +226 -0
  108. rem/schemas/agents/core/resource-affinity-assessor.yaml +99 -0
  109. rem/schemas/agents/core/simple-assistant.yaml +19 -0
  110. rem/schemas/agents/core/user-profile-builder.yaml +163 -0
  111. rem/schemas/agents/examples/contract-analyzer.yaml +317 -0
  112. rem/schemas/agents/examples/contract-extractor.yaml +134 -0
  113. rem/schemas/agents/examples/cv-parser.yaml +263 -0
  114. rem/schemas/agents/examples/hello-world.yaml +37 -0
  115. rem/schemas/agents/examples/query.yaml +54 -0
  116. rem/schemas/agents/examples/simple.yaml +21 -0
  117. rem/schemas/agents/examples/test.yaml +29 -0
  118. rem/schemas/agents/rem.yaml +132 -0
  119. rem/schemas/evaluators/hello-world/default.yaml +77 -0
  120. rem/schemas/evaluators/rem/faithfulness.yaml +219 -0
  121. rem/schemas/evaluators/rem/lookup-correctness.yaml +182 -0
  122. rem/schemas/evaluators/rem/retrieval-precision.yaml +199 -0
  123. rem/schemas/evaluators/rem/retrieval-recall.yaml +211 -0
  124. rem/schemas/evaluators/rem/search-correctness.yaml +192 -0
  125. rem/services/__init__.py +18 -0
  126. rem/services/audio/INTEGRATION.md +308 -0
  127. rem/services/audio/README.md +376 -0
  128. rem/services/audio/__init__.py +15 -0
  129. rem/services/audio/chunker.py +354 -0
  130. rem/services/audio/transcriber.py +259 -0
  131. rem/services/content/README.md +1269 -0
  132. rem/services/content/__init__.py +5 -0
  133. rem/services/content/providers.py +760 -0
  134. rem/services/content/service.py +762 -0
  135. rem/services/dreaming/README.md +230 -0
  136. rem/services/dreaming/__init__.py +53 -0
  137. rem/services/dreaming/affinity_service.py +322 -0
  138. rem/services/dreaming/moment_service.py +251 -0
  139. rem/services/dreaming/ontology_service.py +54 -0
  140. rem/services/dreaming/user_model_service.py +297 -0
  141. rem/services/dreaming/utils.py +39 -0
  142. rem/services/email/__init__.py +10 -0
  143. rem/services/email/service.py +522 -0
  144. rem/services/email/templates.py +360 -0
  145. rem/services/embeddings/__init__.py +11 -0
  146. rem/services/embeddings/api.py +127 -0
  147. rem/services/embeddings/worker.py +435 -0
  148. rem/services/fs/README.md +662 -0
  149. rem/services/fs/__init__.py +62 -0
  150. rem/services/fs/examples.py +206 -0
  151. rem/services/fs/examples_paths.py +204 -0
  152. rem/services/fs/git_provider.py +935 -0
  153. rem/services/fs/local_provider.py +760 -0
  154. rem/services/fs/parsing-hooks-examples.md +172 -0
  155. rem/services/fs/paths.py +276 -0
  156. rem/services/fs/provider.py +460 -0
  157. rem/services/fs/s3_provider.py +1042 -0
  158. rem/services/fs/service.py +186 -0
  159. rem/services/git/README.md +1075 -0
  160. rem/services/git/__init__.py +17 -0
  161. rem/services/git/service.py +469 -0
  162. rem/services/phoenix/EXPERIMENT_DESIGN.md +1146 -0
  163. rem/services/phoenix/README.md +453 -0
  164. rem/services/phoenix/__init__.py +46 -0
  165. rem/services/phoenix/client.py +960 -0
  166. rem/services/phoenix/config.py +88 -0
  167. rem/services/phoenix/prompt_labels.py +477 -0
  168. rem/services/postgres/README.md +757 -0
  169. rem/services/postgres/__init__.py +49 -0
  170. rem/services/postgres/diff_service.py +599 -0
  171. rem/services/postgres/migration_service.py +427 -0
  172. rem/services/postgres/programmable_diff_service.py +635 -0
  173. rem/services/postgres/pydantic_to_sqlalchemy.py +562 -0
  174. rem/services/postgres/register_type.py +353 -0
  175. rem/services/postgres/repository.py +481 -0
  176. rem/services/postgres/schema_generator.py +661 -0
  177. rem/services/postgres/service.py +802 -0
  178. rem/services/postgres/sql_builder.py +355 -0
  179. rem/services/rate_limit.py +113 -0
  180. rem/services/rem/README.md +318 -0
  181. rem/services/rem/__init__.py +23 -0
  182. rem/services/rem/exceptions.py +71 -0
  183. rem/services/rem/executor.py +293 -0
  184. rem/services/rem/parser.py +180 -0
  185. rem/services/rem/queries.py +196 -0
  186. rem/services/rem/query.py +371 -0
  187. rem/services/rem/service.py +608 -0
  188. rem/services/session/README.md +374 -0
  189. rem/services/session/__init__.py +13 -0
  190. rem/services/session/compression.py +488 -0
  191. rem/services/session/pydantic_messages.py +310 -0
  192. rem/services/session/reload.py +85 -0
  193. rem/services/user_service.py +130 -0
  194. rem/settings.py +1877 -0
  195. rem/sql/background_indexes.sql +52 -0
  196. rem/sql/migrations/001_install.sql +983 -0
  197. rem/sql/migrations/002_install_models.sql +3157 -0
  198. rem/sql/migrations/003_optional_extensions.sql +326 -0
  199. rem/sql/migrations/004_cache_system.sql +282 -0
  200. rem/sql/migrations/005_schema_update.sql +145 -0
  201. rem/sql/migrations/migrate_session_id_to_uuid.sql +45 -0
  202. rem/utils/AGENTIC_CHUNKING.md +597 -0
  203. rem/utils/README.md +628 -0
  204. rem/utils/__init__.py +61 -0
  205. rem/utils/agentic_chunking.py +622 -0
  206. rem/utils/batch_ops.py +343 -0
  207. rem/utils/chunking.py +108 -0
  208. rem/utils/clip_embeddings.py +276 -0
  209. rem/utils/constants.py +97 -0
  210. rem/utils/date_utils.py +228 -0
  211. rem/utils/dict_utils.py +98 -0
  212. rem/utils/embeddings.py +436 -0
  213. rem/utils/examples/embeddings_example.py +305 -0
  214. rem/utils/examples/sql_types_example.py +202 -0
  215. rem/utils/files.py +323 -0
  216. rem/utils/markdown.py +16 -0
  217. rem/utils/mime_types.py +158 -0
  218. rem/utils/model_helpers.py +492 -0
  219. rem/utils/schema_loader.py +649 -0
  220. rem/utils/sql_paths.py +146 -0
  221. rem/utils/sql_types.py +350 -0
  222. rem/utils/user_id.py +81 -0
  223. rem/utils/vision.py +325 -0
  224. rem/workers/README.md +506 -0
  225. rem/workers/__init__.py +7 -0
  226. rem/workers/db_listener.py +579 -0
  227. rem/workers/db_maintainer.py +74 -0
  228. rem/workers/dreaming.py +502 -0
  229. rem/workers/engram_processor.py +312 -0
  230. rem/workers/sqs_file_processor.py +193 -0
  231. rem/workers/unlogged_maintainer.py +463 -0
  232. remdb-0.3.242.dist-info/METADATA +1632 -0
  233. remdb-0.3.242.dist-info/RECORD +235 -0
  234. remdb-0.3.242.dist-info/WHEEL +4 -0
  235. remdb-0.3.242.dist-info/entry_points.txt +2 -0
@@ -0,0 +1,1632 @@
1
+ Metadata-Version: 2.4
2
+ Name: remdb
3
+ Version: 0.3.242
4
+ Summary: Resources Entities Moments - Bio-inspired memory system for agentic AI workloads
5
+ Project-URL: Homepage, https://github.com/Percolation-Labs/reminiscent
6
+ Project-URL: Documentation, https://github.com/Percolation-Labs/reminiscent/blob/main/README.md
7
+ Project-URL: Repository, https://github.com/Percolation-Labs/reminiscent
8
+ Project-URL: Issues, https://github.com/Percolation-Labs/reminiscent/issues
9
+ Author-email: mr-saoirse <amartey@gmail.com>
10
+ License: MIT
11
+ Keywords: agents,ai,mcp,memory,postgresql,vector-search
12
+ Classifier: Development Status :: 3 - Alpha
13
+ Classifier: Intended Audience :: Developers
14
+ Classifier: License :: OSI Approved :: MIT License
15
+ Classifier: Programming Language :: Python :: 3.11
16
+ Classifier: Programming Language :: Python :: 3.12
17
+ Classifier: Programming Language :: Python :: 3.13
18
+ Classifier: Topic :: Scientific/Engineering :: Artificial Intelligence
19
+ Requires-Python: <3.14,>=3.11
20
+ Requires-Dist: aioboto3>=13.0.0
21
+ Requires-Dist: arize-phoenix>=5.0.0
22
+ Requires-Dist: asyncpg>=0.30.0
23
+ Requires-Dist: boto3>=1.35.0
24
+ Requires-Dist: click>=8.1.0
25
+ Requires-Dist: fastapi>=0.115.0
26
+ Requires-Dist: fastmcp>=0.5.0
27
+ Requires-Dist: gitpython>=3.1.45
28
+ Requires-Dist: hypercorn>=0.17.0
29
+ Requires-Dist: itsdangerous>=2.0.0
30
+ Requires-Dist: json-schema-to-pydantic>=0.2.0
31
+ Requires-Dist: kreuzberg<4.0.0,>=3.21.0
32
+ Requires-Dist: loguru>=0.7.0
33
+ Requires-Dist: openinference-instrumentation-pydantic-ai>=0.1.0
34
+ Requires-Dist: opentelemetry-api>=1.28.0
35
+ Requires-Dist: opentelemetry-exporter-otlp-proto-grpc>=1.28.0
36
+ Requires-Dist: opentelemetry-exporter-otlp-proto-http>=1.28.0
37
+ Requires-Dist: opentelemetry-exporter-otlp>=1.28.0
38
+ Requires-Dist: opentelemetry-instrumentation-fastapi>=0.49b0
39
+ Requires-Dist: opentelemetry-instrumentation>=0.49b0
40
+ Requires-Dist: opentelemetry-sdk>=1.28.0
41
+ Requires-Dist: psycopg[binary]>=3.2.0
42
+ Requires-Dist: pydantic-ai>=1.0.0
43
+ Requires-Dist: pydantic-settings>=2.6.0
44
+ Requires-Dist: pydantic>=2.10.0
45
+ Requires-Dist: pydub>=0.25.0
46
+ Requires-Dist: python-dotenv>=1.0.0
47
+ Requires-Dist: pyyaml>=6.0.0
48
+ Requires-Dist: requests>=2.32.0
49
+ Requires-Dist: semchunk>=2.2.0
50
+ Requires-Dist: tenacity>=9.0.0
51
+ Requires-Dist: tiktoken>=0.5.0
52
+ Requires-Dist: uvicorn[standard]>=0.32.0
53
+ Provides-Extra: all
54
+ Requires-Dist: ipdb>=0.13.0; extra == 'all'
55
+ Requires-Dist: ipython>=8.29.0; extra == 'all'
56
+ Requires-Dist: json-schema-to-pydantic>=0.2.0; extra == 'all'
57
+ Requires-Dist: mypy>=1.13.0; extra == 'all'
58
+ Requires-Dist: pandas-stubs>=2.0.0; extra == 'all'
59
+ Requires-Dist: pillow>=10.0.0; extra == 'all'
60
+ Requires-Dist: polars>=1.0.0; extra == 'all'
61
+ Requires-Dist: pydub>=0.25.0; extra == 'all'
62
+ Requires-Dist: pytest-asyncio>=0.24.0; extra == 'all'
63
+ Requires-Dist: pytest-cov>=6.0.0; extra == 'all'
64
+ Requires-Dist: pytest-mock>=3.14.0; extra == 'all'
65
+ Requires-Dist: pytest>=8.0.0; extra == 'all'
66
+ Requires-Dist: ruff>=0.8.0; extra == 'all'
67
+ Requires-Dist: types-pyyaml>=6.0.0; extra == 'all'
68
+ Provides-Extra: audio
69
+ Requires-Dist: pydub>=0.25.0; extra == 'audio'
70
+ Provides-Extra: dev
71
+ Requires-Dist: ipdb>=0.13.0; extra == 'dev'
72
+ Requires-Dist: ipython>=8.29.0; extra == 'dev'
73
+ Requires-Dist: mypy>=1.13.0; extra == 'dev'
74
+ Requires-Dist: pandas-stubs>=2.0.0; extra == 'dev'
75
+ Requires-Dist: pytest-asyncio>=0.24.0; extra == 'dev'
76
+ Requires-Dist: pytest-cov>=6.0.0; extra == 'dev'
77
+ Requires-Dist: pytest-mock>=3.14.0; extra == 'dev'
78
+ Requires-Dist: pytest>=8.0.0; extra == 'dev'
79
+ Requires-Dist: ruff>=0.8.0; extra == 'dev'
80
+ Requires-Dist: types-pyyaml>=6.0.0; extra == 'dev'
81
+ Provides-Extra: fs
82
+ Requires-Dist: pillow>=10.0.0; extra == 'fs'
83
+ Requires-Dist: polars>=1.0.0; extra == 'fs'
84
+ Requires-Dist: pydub>=0.25.0; extra == 'fs'
85
+ Provides-Extra: schema
86
+ Requires-Dist: json-schema-to-pydantic>=0.2.0; extra == 'schema'
87
+ Description-Content-Type: text/markdown
88
+
89
+ # REM - Resources Entities Moments
90
+
91
+ Cloud-native unified memory infrastructure for agentic AI systems built with Pydantic AI, FastAPI, and FastMCP.
92
+
93
+ ## Architecture Overview
94
+
95
+ <p align="center">
96
+ <img src="https://mermaid.ink/img/Z3JhcGggVEQKICAgIEFQSVtGYXN0QVBJPGJyLz5DaGF0ICsgTUNQXSAtLT4gQUdFTlRTW0pTT04gU2NoZW1hPGJyLz5BZ2VudHNdCiAgICBBR0VOVFMgLS0-IFRPT0xTW01DUCBUb29sczxici8-NSBUb29sc10KCiAgICBUT09MUyAtLT4gUVVFUllbUkVNIFF1ZXJ5PGJyLz5EaWFsZWN0XQogICAgUVVFUlkgLS0-IERCWyhQb3N0Z3JlU1FMPGJyLz4rcGd2ZWN0b3IpXQoKICAgIEZJTEVTW0ZpbGUgUHJvY2Vzc29yXSAtLT4gRFJFQU1bRHJlYW1pbmc8YnIvPldvcmtlcnNdCiAgICBEUkVBTSAtLT4gREIKCiAgICBBR0VOVFMgLS0-IE9URUxbT3BlblRlbGVtZXRyeV0KICAgIE9URUwgLS0-IFBIT0VOSVhbQXJpemU8YnIvPlBob2VuaXhdCgogICAgRVZBTFtFdmFsdWF0aW9uPGJyLz5GcmFtZXdvcmtdIC0tPiBQSE9FTklYCgogICAgY2xhc3NEZWYgYXBpIGZpbGw6IzRBOTBFMixzdHJva2U6IzJFNUM4QSxjb2xvcjojZmZmCiAgICBjbGFzc0RlZiBhZ2VudCBmaWxsOiM3QjY4RUUsc3Ryb2tlOiM0ODNEOEIsY29sb3I6I2ZmZgogICAgY2xhc3NEZWYgZGIgZmlsbDojNTBDODc4LHN0cm9rZTojMkU3RDRFLGNvbG9yOiNmZmYKICAgIGNsYXNzRGVmIG9icyBmaWxsOiM5QjU5QjYsc3Ryb2tlOiM2QzM0ODMsY29sb3I6I2ZmZgoKICAgIGNsYXNzIEFQSSxUT09MUyBhcGkKICAgIGNsYXNzIEFHRU5UUyBhZ2VudAogICAgY2xhc3MgREIsUVVFUlkgZGIKICAgIGNsYXNzIE9URUwsUEhPRU5JWCxFVkFMIG9icwo=" alt="REM Architecture" width="700">
97
+ </p>
98
+
99
+ **Key Components:**
100
+
101
+ - **API Layer**: OpenAI-compatible chat completions + MCP server (not separate deployments)
102
+ - **Agentic Framework**: JSON Schema-based agents with no-code configuration
103
+ - **Database Layer**: PostgreSQL 18 with pgvector for multi-index memory (KV + Vector + Graph)
104
+ - **REM Query Dialect**: Custom query language with O(1) lookups, semantic search, graph traversal
105
+ - **Ingestion & Dreaming**: Background workers for content extraction and progressive index enrichment (0% → 100% answerable)
106
+ - **Observability & Evals**: OpenTelemetry tracing supporting LLM-as-a-Judge evaluation frameworks
107
+
108
+ ## Features
109
+
110
+ | Feature | Description | Benefits |
111
+ |---------|-------------|----------|
112
+ | **OpenAI-Compatible Chat API** | Drop-in replacement for OpenAI chat completions API with streaming support | Use with existing OpenAI clients, switch models across providers (OpenAI, Anthropic, etc.) |
113
+ | **Built-in MCP Server** | FastMCP server with 4 tools + 5 resources for memory operations | Export memory to Claude Desktop, Cursor, or any MCP-compatible host |
114
+ | **REM Query Engine** | Multi-index query system (LOOKUP, FUZZY, SEARCH, SQL, TRAVERSE) with custom dialect | O(1) lookups, semantic search, graph traversal - all tenant-isolated |
115
+ | **Dreaming Workers** | Background workers for entity extraction, moment generation, and affinity matching | Automatic knowledge graph construction from resources (0% → 100% query answerable) |
116
+ | **PostgreSQL + pgvector** | CloudNativePG with PostgreSQL 18, pgvector extension, streaming replication | Production-ready vector search, no external vector DB needed |
117
+ | **AWS EKS Recipe** | Complete infrastructure-as-code with Pulumi, Karpenter, ArgoCD | Deploy to production EKS in minutes with auto-scaling and GitOps |
118
+ | **JSON Schema Agents** | Dynamic agent creation from YAML schemas via Pydantic AI factory | Define agents declaratively, version control schemas, load dynamically |
119
+ | **Content Providers** | Audio transcription (Whisper), vision (OpenAI, Anthropic, Gemini), PDFs, DOCX, PPTX, XLSX, images | Multimodal ingestion out of the box with format detection |
120
+ | **Configurable Embeddings** | OpenAI embedding system (text-embedding-3-small) | Production-ready embeddings, additional providers planned |
121
+ | **Multi-Tenancy** | Tenant isolation at database level with automatic scoping | SaaS-ready with complete data separation per tenant |
122
+ | **Zero Vendor Lock-in** | Raw HTTP clients (no OpenAI SDK), swappable providers, open standards | Not tied to any vendor, easy to migrate, full control |
123
+
124
+ ## Quick Start
125
+
126
+ Choose your path:
127
+
128
+ - **Option 1: Package Users with Example Data** (Recommended) - PyPI + example datasets
129
+ - **Option 2: Developers** - Clone repo, local development with uv
130
+
131
+ ---
132
+
133
+ ## Option 1: Package Users with Example Data (Recommended)
134
+
135
+ **Best for**: First-time users who want to explore REM with curated example datasets.
136
+
137
+ ```bash
138
+ # Install system dependencies (tesseract for OCR)
139
+ brew install tesseract # macOS (Linux/Windows: see tesseract-ocr.github.io)
140
+
141
+ # Install remdb
142
+ pip install "remdb[all]"
143
+
144
+ # Clone example datasets
145
+ git clone https://github.com/Percolation-Labs/remstack-lab.git
146
+ cd remstack-lab
147
+
148
+ # Start services (PostgreSQL, Phoenix observability)
149
+ curl -O https://gist.githubusercontent.com/percolating-sirsh/d117b673bc0edfdef1a5068ccd3cf3e5/raw/docker-compose.prebuilt.yml
150
+ docker compose -f docker-compose.prebuilt.yml up -d
151
+
152
+ # Configure REM (creates ~/.rem/config.yaml and installs database schema)
153
+ # Add --claude-desktop to register with Claude Desktop app
154
+ rem configure --install --claude-desktop
155
+
156
+ # Load quickstart dataset
157
+ rem db load datasets/quickstart/sample_data.yaml
158
+
159
+ # Ask questions
160
+ rem ask "What documents exist in the system?"
161
+ rem ask "Show me meetings about API design"
162
+
163
+ # Ingest files (PDF, DOCX, images, etc.)
164
+ rem process ingest datasets/formats/files/bitcoin_whitepaper.pdf --category research --tags bitcoin,whitepaper
165
+
166
+ # Query ingested content
167
+ rem ask "What is the Bitcoin whitepaper about?"
168
+ ```
169
+
170
+ **What you get:**
171
+ - Quickstart: 3 users, 3 resources, 3 moments, 4 messages
172
+ - Domain datasets: recruitment, legal, enterprise, misc
173
+ - Format examples: engrams, documents, conversations, files
174
+
175
+ **Learn more**: [remstack-lab repository](https://github.com/Percolation-Labs/remstack-lab)
176
+
177
+ ### Using the API
178
+
179
+ Once configured, you can also use the OpenAI-compatible chat completions API:
180
+
181
+ ```bash
182
+ # Start all services (PostgreSQL, Phoenix, API)
183
+ docker compose -f docker-compose.prebuilt.yml up -d
184
+
185
+ # Test the API
186
+ curl -X POST http://localhost:8000/api/v1/chat/completions \
187
+ -H "Content-Type: application/json" \
188
+ -H "X-Session-Id: a1b2c3d4-e5f6-7890-abcd-ef1234567890" \
189
+ -d '{
190
+ "model": "anthropic:claude-sonnet-4-5-20250929",
191
+ "messages": [{"role": "user", "content": "What documents did Sarah Chen author?"}],
192
+ "stream": false
193
+ }'
194
+ ```
195
+
196
+ **Port Guide:**
197
+ - **5051**: Package users with `docker-compose.prebuilt.yml` (pre-built image)
198
+ - **5050**: Developers with `docker-compose.yml` (local build)
199
+
200
+ **Next Steps:**
201
+ - See [CLI Reference](#cli-reference) for all available commands
202
+ - See [REM Query Dialect](#rem-query-dialect) for query examples
203
+ - See [API Endpoints](#api-endpoints) for OpenAI-compatible API usage
204
+
205
+ ---
206
+
207
+ ## Example Datasets
208
+
209
+ Clone [remstack-lab](https://github.com/Percolation-Labs/remstack-lab) for curated datasets organized by domain and format.
210
+
211
+ **What's included:**
212
+ - **Quickstart**: Minimal dataset (3 users, 3 resources, 3 moments) - perfect for first-time users
213
+ - **Domains**: Recruitment (CV parsing), Legal (contracts), Enterprise (team collaboration)
214
+ - **Formats**: Engrams (voice memos), Documents (markdown/PDF), Conversations (chat logs)
215
+ - **Evaluation**: Golden datasets for Phoenix-based agent testing
216
+
217
+ **Working from remstack-lab:**
218
+ ```bash
219
+ cd remstack-lab
220
+
221
+ # Load any dataset
222
+ rem db load --file datasets/quickstart/sample_data.yaml
223
+
224
+ # Explore formats
225
+ rem db load --file datasets/formats/engrams/scenarios/team_meeting/team_standup_meeting.yaml
226
+ ```
227
+
228
+ ## See Also
229
+
230
+ - [REM Query Dialect](#rem-query-dialect) - LOOKUP, SEARCH, TRAVERSE, SQL query types
231
+ - [API Endpoints](#api-endpoints) - OpenAI-compatible chat completions, MCP server
232
+ - [CLI Reference](#cli-reference) - Complete command-line interface documentation
233
+ - [Bring Your Own Agent](#bring-your-own-agent) - Create custom agents with your own prompts and tools
234
+ - [Production Deployment](#production-deployment) - AWS EKS with Kubernetes
235
+ - [Example Datasets](https://github.com/Percolation-Labs/remstack-lab) - Curated datasets by domain and format
236
+
237
+ ---
238
+
239
+ ## Bring Your Own Agent
240
+
241
+ REM allows you to create **custom agents** with your own system prompts, tools, and output schemas. Custom agents are stored in the database and dynamically loaded when referenced, enabling **no-code agent creation** without modifying the codebase.
242
+
243
+ ### How It Works
244
+
245
+ 1. **Define Agent Schema** - Create a YAML file with your agent's prompt, tools, and output structure
246
+ 2. **Ingest Schema** - Use `rem process ingest` to store the schema in the database
247
+ 3. **Use Your Agent** - Reference your agent by name with `rem ask <agent-name> "query"`
248
+
249
+ When you run `rem ask my-agent "query"`, REM:
250
+ 1. Checks if `my-agent` exists in the filesystem (`schemas/agents/`)
251
+ 2. If not found, performs a **LOOKUP** query on the `schemas` table in the database
252
+ 3. Loads the schema dynamically and creates a Pydantic AI agent
253
+ 4. Runs your query with the custom agent
254
+
255
+ ### Expected Behavior
256
+
257
+ **Schema Ingestion Flow** (`rem process ingest my-agent.yaml`):
258
+ - Parse YAML file to extract JSON Schema content
259
+ - Extract `json_schema_extra.kind` field → maps to `category` column
260
+ - Extract `json_schema_extra.provider_configs` → stores provider configurations
261
+ - Extract `json_schema_extra.embedding_fields` → stores semantic search fields
262
+ - Create `Schema` entity in `schemas` table with `user_id` scoping
263
+ - Schema is now queryable via `LOOKUP "my-agent" FROM schemas`
264
+
265
+ **Agent Loading Flow** (`rem ask my-agent "query"`):
266
+ 1. `load_agent_schema("my-agent")` checks filesystem cache → miss
267
+ 2. Falls back to database: `LOOKUP "my-agent" FROM schemas WHERE user_id = '<user-id>'`
268
+ 3. Returns `Schema.spec` (JSON Schema dict) from database
269
+ 4. `create_agent()` factory creates Pydantic AI agent from schema
270
+ 5. Agent runs with tools specified in `json_schema_extra.tools`
271
+ 6. Returns structured output defined in `properties` field
272
+
273
+ ### Quick Example
274
+
275
+ **Step 1: Create Agent Schema** (`my-research-assistant.yaml`)
276
+
277
+ ```yaml
278
+ type: object
279
+ description: |
280
+ You are a research assistant that helps users find and analyze documents.
281
+
282
+ Use the search_rem tool to find relevant documents, then analyze and summarize them.
283
+ Be concise and cite specific documents in your responses.
284
+
285
+ properties:
286
+ summary:
287
+ type: string
288
+ description: A concise summary of findings
289
+ sources:
290
+ type: array
291
+ items:
292
+ type: string
293
+ description: List of document labels referenced
294
+
295
+ required:
296
+ - summary
297
+ - sources
298
+
299
+ json_schema_extra:
300
+ kind: agent
301
+ name: research-assistant
302
+ version: 1.0.0
303
+ tools:
304
+ - search_rem
305
+ - ask_rem_agent
306
+ resources: []
307
+ ```
308
+
309
+ **For more examples**, see:
310
+ - Simple agent (no tools): `src/rem/schemas/agents/examples/simple.yaml`
311
+ - Agent with REM tools: `src/rem/schemas/agents/core/rem-query-agent.yaml`
312
+ - Ontology extractor: `src/rem/schemas/agents/examples/cv-parser.yaml`
313
+
314
+ **Step 2: Ingest Schema into Database**
315
+
316
+ ```bash
317
+ # Ingest the schema (stores in database schemas table)
318
+ rem process ingest my-research-assistant.yaml \
319
+ --category agents \
320
+ --tags custom,research
321
+
322
+ # Verify schema is in database (should show schema details)
323
+ rem ask "LOOKUP 'my-research-assistant' FROM schemas"
324
+ ```
325
+
326
+ **Step 3: Use Your Custom Agent**
327
+
328
+ ```bash
329
+ # Run a query with your custom agent
330
+ rem ask research-assistant "Find documents about machine learning architecture"
331
+
332
+ # With streaming
333
+ rem ask research-assistant "Summarize recent API design documents" --stream
334
+
335
+ # With session continuity
336
+ rem ask research-assistant "What did we discuss about ML?" --session-id c3d4e5f6-a7b8-9012-cdef-345678901234
337
+ ```
338
+
339
+ ### Agent Schema Structure
340
+
341
+ Every agent schema must include:
342
+
343
+ **Required Fields:**
344
+ - `type: object` - JSON Schema type (always "object")
345
+ - `description` - System prompt with instructions for the agent
346
+ - `properties` - Output schema defining structured response fields
347
+
348
+ **Optional Metadata** (`json_schema_extra`):
349
+ - `kind` - Agent category ("agent", "evaluator", etc.) → maps to `Schema.category`
350
+ - `name` - Agent identifier (used for LOOKUP)
351
+ - `version` - Semantic version (e.g., "1.0.0")
352
+ - `tools` - List of MCP tools to load (e.g., `["search_rem", "lookup_rem"]`)
353
+ - `resources` - List of MCP resources to expose (e.g., `["user_profile"]`)
354
+ - `provider_configs` - Multi-provider testing configurations (for ontology extractors)
355
+ - `embedding_fields` - Fields to embed for semantic search (for ontology extractors)
356
+
357
+ ### Available MCP Tools
358
+
359
+ REM provides **4 built-in MCP tools** your agents can use:
360
+
361
+ | Tool | Purpose | Parameters |
362
+ |------|---------|------------|
363
+ | `search_rem` | Execute REM queries (LOOKUP, FUZZY, SEARCH, SQL, TRAVERSE) | `query_type`, `entity_key`, `query_text`, `table`, `sql_query`, `initial_query`, `edge_types`, `depth` |
364
+ | `ask_rem_agent` | Natural language to REM query via agent-driven reasoning | `query`, `agent_schema`, `agent_version` |
365
+ | `ingest_into_rem` | Full file ingestion pipeline (read → store → parse → chunk → embed) | `file_uri`, `category`, `tags`, `is_local_server` |
366
+ | `read_resource` | Access MCP resources (schemas, status) for Claude Desktop | `uri` |
367
+
368
+ **Tool Reference**: Tools are defined in `src/rem/api/mcp_router/tools.py`
369
+
370
+ **Note**: `search_rem` is a unified tool that handles all REM query types via the `query_type` parameter:
371
+ - `query_type="lookup"` - O(1) entity lookup by label
372
+ - `query_type="fuzzy"` - Fuzzy text matching with similarity threshold
373
+ - `query_type="search"` - Semantic vector search (table-specific)
374
+ - `query_type="sql"` - Direct SQL queries (WHERE clause)
375
+ - `query_type="traverse"` - Graph traversal with depth control
376
+
377
+ ### Multi-User Isolation
378
+
379
+ For multi-tenant deployments, custom agents are **scoped by `user_id`**, ensuring complete data isolation. Use `--user-id` flag when you need tenant separation:
380
+
381
+ ```bash
382
+ # Create agent for specific tenant
383
+ rem process ingest my-agent.yaml --user-id tenant-a --category agents
384
+
385
+ # Query with tenant context
386
+ rem ask my-agent "test" --user-id tenant-a
387
+ ```
388
+
389
+ ### Troubleshooting
390
+
391
+ **Schema not found error:**
392
+ ```bash
393
+ # Check if schema was ingested correctly
394
+ rem ask "SEARCH 'my-agent' FROM schemas"
395
+
396
+ # List all schemas
397
+ rem ask "SELECT name, category, created_at FROM schemas ORDER BY created_at DESC LIMIT 10"
398
+ ```
399
+
400
+ **Agent not loading tools:**
401
+ - Verify `json_schema_extra.tools` lists correct tool names
402
+ - Valid tool names: `search_rem`, `ask_rem_agent`, `ingest_into_rem`, `read_resource`
403
+ - Check MCP tool names in `src/rem/api/mcp_router/tools.py`
404
+ - Tools are case-sensitive: use `search_rem`, not `Search_REM`
405
+
406
+ **Agent not returning structured output:**
407
+ - Ensure `properties` field defines all expected output fields
408
+ - Use `required` field to mark mandatory fields
409
+ - Check agent response with `--stream` disabled to see full JSON output
410
+
411
+ ---
412
+
413
+ ## REM Query Dialect
414
+
415
+ REM provides a custom query language designed for **LLM-driven iterated retrieval** with performance guarantees.
416
+
417
+ ### Design Philosophy
418
+
419
+ Unlike traditional single-shot SQL queries, the REM dialect is optimized for **multi-turn exploration** where LLMs participate in query planning:
420
+
421
+ - **Iterated Queries**: Queries return partial results that LLMs use to refine subsequent queries
422
+ - **Composable WITH Syntax**: Chain operations together (e.g., `TRAVERSE edge_type WITH LOOKUP "..."`)
423
+ - **Mixed Indexes**: Combines exact lookups (O(1)), semantic search (vector), and graph traversal
424
+ - **Query Planner Participation**: Results include metadata for LLMs to decide next steps
425
+
426
+ **Example Multi-Turn Flow**:
427
+ ```
428
+ Turn 1: LOOKUP "sarah-chen" → Returns entity + available edge types
429
+ Turn 2: TRAVERSE authored_by WITH LOOKUP "sarah-chen" DEPTH 1 → Returns connected documents
430
+ Turn 3: SEARCH "architecture decisions" → Semantic search, then explore graph from results
431
+ ```
432
+
433
+ This enables LLMs to **progressively build context** rather than requiring perfect queries upfront.
434
+
435
+ See [REM Query Dialect (AST)](#rem-query-dialect-ast) for complete grammar specification.
436
+
437
+ ### Query Types
438
+
439
+ #### `LOOKUP` - O(1) Exact Label Lookup
440
+
441
+ Fast exact match on entity labels (natural language identifiers, not UUIDs).
442
+
443
+ ```sql
444
+ LOOKUP "sarah-chen" FROM resources
445
+ LOOKUP "api-design-v2" FROM resources WHERE category = "projects"
446
+ ```
447
+
448
+ **Performance**: O(1) - indexed on `label` column
449
+ **Returns**: Single entity or null
450
+ **Use case**: Fetch specific known entities by human-readable name
451
+
452
+ #### `FUZZY` - Fuzzy Text Search
453
+
454
+ Fuzzy matching for partial names or misspellings using PostgreSQL trigram similarity.
455
+
456
+ ```sql
457
+ FUZZY "sara" FROM resources LIMIT 10
458
+ FUZZY "api desgin" FROM resources THRESHOLD 0.3 LIMIT 5
459
+ ```
460
+
461
+ **Performance**: O(n) with pg_trgm GIN index (fast for small-medium datasets)
462
+ **Returns**: Ranked list by similarity score
463
+ **Use case**: Handle typos, partial names, or when exact label is unknown
464
+
465
+ #### `SEARCH` - Semantic Vector Search
466
+
467
+ Semantic search using pgvector embeddings with cosine similarity.
468
+
469
+ ```sql
470
+ SEARCH "machine learning architecture" FROM resources LIMIT 10
471
+ SEARCH "contract disputes" FROM resources WHERE tags @> ARRAY['legal'] LIMIT 5
472
+ ```
473
+
474
+ **Performance**: O(log n) with HNSW index
475
+ **Returns**: Ranked list of semantically similar entities
476
+ **Use case**: Find conceptually related content without exact keyword matches
477
+
478
+ #### `TRAVERSE` - Recursive Graph Traversal
479
+
480
+ Follow `graph_edges` relationships across the knowledge graph.
481
+
482
+ ```sql
483
+ TRAVERSE authored_by WITH LOOKUP "sarah-chen" DEPTH 2
484
+ TRAVERSE references,depends_on WITH LOOKUP "api-design-v2" DEPTH 3
485
+ ```
486
+
487
+ **Features**:
488
+ - **Polymorphic**: Seamlessly traverses `resources`, `moments`, `users` via `all_graph_edges` view
489
+ - **Filtering**: Filter by one or multiple edge types (comma-separated)
490
+ - **Depth Control**: Configurable recursion depth (default: 2)
491
+ - **Data Model**: Requires `InlineEdge` JSON structure in `graph_edges` column
492
+
493
+ **Returns**: Graph of connected entities with edge metadata
494
+ **Use case**: Explore relationships, find connected entities, build context
495
+
496
+ #### Direct SQL Queries
497
+
498
+ Raw SQL for complex temporal, aggregation, or custom queries.
499
+
500
+ ```sql
501
+ SELECT * FROM resources WHERE created_at > NOW() - INTERVAL '7 days' ORDER BY created_at DESC LIMIT 20
502
+ SELECT category, COUNT(*) as count FROM resources GROUP BY category
503
+ WITH recent AS (SELECT * FROM resources WHERE created_at > NOW() - INTERVAL '1 day') SELECT * FROM recent
504
+ ```
505
+
506
+ **Performance**: Depends on query and indexes
507
+ **Returns**: Raw query results
508
+ **Use case**: Complex filtering, aggregations, temporal queries
509
+ **Allowed**: SELECT, INSERT, UPDATE, WITH (read + data modifications)
510
+ **Blocked**: DROP, DELETE, TRUNCATE, ALTER (destructive operations)
511
+ **Note**: Can be used standalone or with `WITH` syntax for composition
512
+
513
+ ### Graph Edge Format
514
+
515
+ Edges stored inline using `InlineEdge` pattern with human-readable destination labels.
516
+
517
+ ```json
518
+ {
519
+ "dst": "sarah-chen",
520
+ "rel_type": "authored_by",
521
+ "weight": 1.0,
522
+ "properties": {
523
+ "dst_entity_type": "users:engineers/sarah-chen",
524
+ "created_at": "2025-01-15T10:30:00Z"
525
+ }
526
+ }
527
+ ```
528
+
529
+ **Destination Entity Type Convention** (`properties.dst_entity_type`):
530
+
531
+ Format: `<table_schema>:<category>/<key>`
532
+
533
+ Examples:
534
+ - `"resources:managers/bob"` → Look up bob in resources table with category="managers"
535
+ - `"users:engineers/sarah-chen"` → Look up sarah-chen in users table
536
+ - `"moments:meetings/standup-2024-01"` → Look up in moments table
537
+ - `"resources/api-design-v2"` → Look up in resources table (no category)
538
+ - `"bob"` → Defaults to resources table, no category
539
+
540
+ **Edge Type Format** (`rel_type`):
541
+ - Use snake_case: `"authored_by"`, `"depends_on"`, `"references"`
542
+ - Be specific but consistent
543
+ - Use passive voice for bidirectional clarity
544
+
545
+ ### Multi-Turn Iterated Retrieval
546
+
547
+ REM enables agents to conduct multi-turn database conversations:
548
+
549
+ 1. **Initial Query**: Agent runs SEARCH to find candidates
550
+ 2. **Refinement**: Agent analyzes results, runs LOOKUP on specific entities
551
+ 3. **Context Expansion**: Agent runs TRAVERSE to find related entities
552
+ 4. **Temporal Filter**: Agent runs SQL to filter by time range
553
+ 5. **Final Answer**: Agent synthesizes knowledge from all queries
554
+
555
+ **Plan Memos**: Agents track query plans in scratchpad for iterative refinement.
556
+
557
+ ### Query Performance Contracts
558
+
559
+ | Query Type | Complexity | Index | Use When |
560
+ |------------|-----------|-------|----------|
561
+ | `LOOKUP` | O(1) | B-tree on `label` | You know exact entity name |
562
+ | `FUZZY` | O(n) | GIN on `label` (pg_trgm) | Handling typos/partial matches |
563
+ | `SEARCH` | O(log n) | HNSW on `embedding` | Semantic similarity needed |
564
+ | `TRAVERSE` | O(depth × edges) | B-tree on `graph_edges` | Exploring relationships |
565
+ | `SQL` | Variable | Custom indexes | Complex filtering/aggregation |
566
+
567
+ ### Example: Multi-Query Session
568
+
569
+ ```python
570
+ # Query 1: Find relevant documents
571
+ SEARCH "API migration planning" FROM resources LIMIT 5
572
+
573
+ # Query 2: Get specific document
574
+ LOOKUP "tidb-migration-spec" FROM resources
575
+
576
+ # Query 3: Find related people
577
+ TRAVERSE authored_by,reviewed_by WITH LOOKUP "tidb-migration-spec" DEPTH 1
578
+
579
+ # Query 4: Recent activity
580
+ SELECT * FROM moments WHERE
581
+ 'tidb-migration' = ANY(topic_tags) AND
582
+ start_time > NOW() - INTERVAL '30 days'
583
+ ```
584
+
585
+ ### Tenant Isolation
586
+
587
+ All queries automatically scoped by `user_id` for complete data isolation:
588
+
589
+ ```sql
590
+ -- Automatically filtered to user's data
591
+ SEARCH "contracts" FROM resources LIMIT 10
592
+
593
+ -- No cross-user data leakage
594
+ TRAVERSE references WITH LOOKUP "project-x" DEPTH 3
595
+ ```
596
+
597
+ ## API Endpoints
598
+
599
+ ### Chat Completions (OpenAI-compatible)
600
+
601
+ ```bash
602
+ POST /api/v1/chat/completions
603
+ ```
604
+
605
+ **Headers**:
606
+ - `X-User-Id`: User identifier (required for data isolation, uses default if not provided)
607
+ - `X-Tenant-Id`: Deprecated - use `X-User-Id` instead (kept for backwards compatibility)
608
+ - `X-Session-Id`: Session/conversation identifier
609
+ - `X-Agent-Schema`: Agent schema URI to use
610
+
611
+ **Body**:
612
+ ```json
613
+ {
614
+ "model": "anthropic:claude-sonnet-4-5-20250929",
615
+ "messages": [
616
+ {"role": "user", "content": "Find all documents Sarah authored"}
617
+ ],
618
+ "stream": true,
619
+ "response_format": {"type": "text"}
620
+ }
621
+ ```
622
+
623
+ **Streaming Response** (SSE):
624
+ ```
625
+ data: {"id": "chatcmpl-123", "choices": [{"delta": {"role": "assistant", "content": ""}}]}
626
+
627
+ data: {"id": "chatcmpl-123", "choices": [{"delta": {"content": "[Calling: search_rem]"}}]}
628
+
629
+ data: {"id": "chatcmpl-123", "choices": [{"delta": {"content": "Found 3 documents..."}}]}
630
+
631
+ data: {"id": "chatcmpl-123", "choices": [{"delta": {}, "finish_reason": "stop"}]}
632
+
633
+ data: [DONE]
634
+ ```
635
+
636
+ ### MCP Endpoint
637
+
638
+ ```bash
639
+ # MCP HTTP transport
640
+ POST /api/v1/mcp
641
+ ```
642
+
643
+ Tools and resources for REM query execution, resource management, file operations.
644
+
645
+ ### Health Check
646
+
647
+ ```bash
648
+ GET /health
649
+ # {"status": "healthy", "version": "0.1.0"}
650
+ ```
651
+
652
+ ## CLI Reference
653
+
654
+ REM provides a comprehensive command-line interface for all operations.
655
+
656
+ ### Configuration & Server
657
+
658
+ #### `rem configure` - Interactive Setup Wizard
659
+
660
+ Set up REM with PostgreSQL, LLM providers, and S3 storage. **Defaults to port 5051 (package users).**
661
+
662
+ ```bash
663
+ # Complete setup (recommended for package users)
664
+ rem configure --install --claude-desktop
665
+
666
+ # This runs:
667
+ # 1. Interactive wizard (creates ~/.rem/config.yaml)
668
+ # 2. Installs database tables (rem db migrate)
669
+ # 3. Registers REM MCP server with Claude Desktop
670
+
671
+ # Other options:
672
+ rem configure # Just run wizard
673
+ rem configure --install # Wizard + database install
674
+ rem configure --show # Show current configuration
675
+ rem configure --edit # Edit configuration in $EDITOR
676
+ ```
677
+
678
+ **Default Configuration:**
679
+ - **Package users**: `localhost:5051` (docker-compose.prebuilt.yml with Docker Hub image)
680
+ - **Developers**: Change to `localhost:5050` during wizard (docker-compose.yml with local build)
681
+ - **Custom database**: Enter your own host/port/credentials
682
+
683
+ **Configuration File:** `~/.rem/config.yaml`
684
+
685
+ ```yaml
686
+ postgres:
687
+ # Package users (prebuilt)
688
+ connection_string: postgresql://rem:rem@localhost:5051/rem
689
+ # OR Developers (local build)
690
+ # connection_string: postgresql://rem:rem@localhost:5050/rem
691
+ pool_min_size: 5
692
+ pool_max_size: 20
693
+
694
+ llm:
695
+ default_model: anthropic:claude-sonnet-4-5-20250929
696
+ openai_api_key: sk-...
697
+ anthropic_api_key: sk-ant-...
698
+
699
+ s3:
700
+ bucket_name: rem-storage
701
+ region: us-east-1
702
+ ```
703
+
704
+ **Precedence:** Environment variables > Config file > Defaults
705
+
706
+ **Port Guide:**
707
+ - **5051**: Package users with `docker-compose.prebuilt.yml` (recommended)
708
+ - **5050**: Developers with `docker-compose.yml` (local development)
709
+ - **Custom**: Your own PostgreSQL instance
710
+
711
+ #### `rem mcp` - Run MCP Server
712
+
713
+ Run the FastMCP server for Claude Desktop integration.
714
+
715
+ ```bash
716
+ # Stdio mode (for Claude Desktop)
717
+ rem mcp
718
+
719
+ # HTTP mode (for testing)
720
+ rem mcp --http --port 8001
721
+ ```
722
+
723
+ #### `rem serve` - Start API Server
724
+
725
+ Start the FastAPI server with uvicorn.
726
+
727
+ ```bash
728
+ # Use settings from config
729
+ rem serve
730
+
731
+ # Development mode (auto-reload)
732
+ rem serve --reload
733
+
734
+ # Production mode (4 workers)
735
+ rem serve --workers 4
736
+
737
+ # Bind to all interfaces
738
+ rem serve --host 0.0.0.0 --port 8080
739
+
740
+ # Override log level
741
+ rem serve --log-level debug
742
+ ```
743
+
744
+ ### Database Management
745
+
746
+ REM uses a **code-as-source-of-truth** approach for database schema management. Pydantic models define the schema, and the database is kept in sync via diff-based migrations.
747
+
748
+ #### Schema Management Philosophy
749
+
750
+ **Two migration files only:**
751
+ - `001_install.sql` - Core infrastructure (extensions, functions, KV store)
752
+ - `002_install_models.sql` - Entity tables (auto-generated from Pydantic models)
753
+
754
+ **No incremental migrations** (003, 004, etc.) - the models file is always regenerated to match code.
755
+
756
+ #### `rem db schema generate` - Regenerate Schema SQL
757
+
758
+ Generate `002_install_models.sql` from registered Pydantic models.
759
+
760
+ ```bash
761
+ # Regenerate from model registry
762
+ rem db schema generate
763
+
764
+ # Output: src/rem/sql/migrations/002_install_models.sql
765
+ ```
766
+
767
+ This generates:
768
+ - CREATE TABLE statements for each registered entity
769
+ - Embeddings tables (`embeddings_<table>`)
770
+ - KV_STORE triggers for cache maintenance
771
+ - Foreground indexes (GIN for JSONB, B-tree for lookups)
772
+
773
+ #### `rem db diff` - Detect Schema Drift
774
+
775
+ Compare Pydantic models against the live database using Alembic autogenerate.
776
+
777
+ ```bash
778
+ # Show additive changes only (default, safe for production)
779
+ rem db diff
780
+
781
+ # Show all changes including drops
782
+ rem db diff --strategy full
783
+
784
+ # Show additive + safe type widenings
785
+ rem db diff --strategy safe
786
+
787
+ # CI mode: exit 1 if drift detected
788
+ rem db diff --check
789
+
790
+ # Generate migration SQL for changes
791
+ rem db diff --generate
792
+ ```
793
+
794
+ **Migration Strategies:**
795
+ | Strategy | Description |
796
+ |----------|-------------|
797
+ | `additive` | Only ADD columns/tables/indexes (safe, no data loss) - **default** |
798
+ | `full` | All changes including DROPs (use with caution) |
799
+ | `safe` | Additive + safe column type widenings (e.g., VARCHAR(50) → VARCHAR(256)) |
800
+
801
+ **Output shows:**
802
+ - `+ ADD COLUMN` - Column in model but not in DB
803
+ - `- DROP COLUMN` - Column in DB but not in model (only with `--strategy full`)
804
+ - `~ ALTER COLUMN` - Column type or constraints differ
805
+ - `+ CREATE TABLE` / `- DROP TABLE` - Table additions/removals
806
+
807
+ #### `rem db apply` - Apply SQL Directly
808
+
809
+ Apply a SQL file directly to the database (bypasses migration tracking).
810
+
811
+ ```bash
812
+ # Apply with audit logging (default)
813
+ rem db apply src/rem/sql/migrations/002_install_models.sql
814
+
815
+ # Preview without executing
816
+ rem db apply --dry-run src/rem/sql/migrations/002_install_models.sql
817
+
818
+ # Apply without audit logging
819
+ rem db apply --no-log src/rem/sql/migrations/002_install_models.sql
820
+ ```
821
+
822
+ #### `rem db migrate` - Initial Setup
823
+
824
+ Apply standard migrations (001 + 002). Use for initial setup only.
825
+
826
+ ```bash
827
+ # Apply infrastructure + entity tables
828
+ rem db migrate
829
+
830
+ # Include background indexes (HNSW for vectors)
831
+ rem db migrate --background-indexes
832
+ ```
833
+
834
+ #### Database Workflows
835
+
836
+ **Initial Setup (Local):**
837
+ ```bash
838
+ rem db schema generate # Generate from models
839
+ rem db migrate # Apply 001 + 002
840
+ rem db diff # Verify no drift
841
+ ```
842
+
843
+ **Adding/Modifying Models:**
844
+ ```bash
845
+ # 1. Edit models in src/rem/models/entities/
846
+ # 2. Register new models in src/rem/registry.py
847
+ rem db schema generate # Regenerate schema
848
+ rem db diff # See what changed
849
+ rem db apply src/rem/sql/migrations/002_install_models.sql
850
+ ```
851
+
852
+ **CI/CD Pipeline:**
853
+ ```bash
854
+ rem db diff --check # Fail build if drift detected
855
+ ```
856
+
857
+ **Remote Database (Production/Staging):**
858
+ ```bash
859
+ # Port-forward to cluster database
860
+ kubectl port-forward -n <namespace> svc/rem-postgres-rw 5433:5432 &
861
+
862
+ # Override connection for diff check
863
+ POSTGRES__CONNECTION_STRING="postgresql://rem:rem@localhost:5433/rem" rem db diff
864
+
865
+ # Apply changes if needed
866
+ POSTGRES__CONNECTION_STRING="postgresql://rem:rem@localhost:5433/rem" \
867
+ rem db apply src/rem/sql/migrations/002_install_models.sql
868
+ ```
869
+
870
+ #### `rem db rebuild-cache` - Rebuild KV Cache
871
+
872
+ Rebuild KV_STORE cache from entity tables (after database restart or bulk imports).
873
+
874
+ ```bash
875
+ rem db rebuild-cache
876
+ ```
877
+
878
+ #### `rem db schema validate` - Validate Models
879
+
880
+ Validate registered Pydantic models for schema generation.
881
+
882
+ ```bash
883
+ rem db schema validate
884
+ ```
885
+
886
+ ### File Processing
887
+
888
+ #### `rem process files` - Process Files
889
+
890
+ Process files with optional custom extractor (ontology extraction).
891
+
892
+ ```bash
893
+ # Process all completed files
894
+ rem process files --status completed --limit 10
895
+
896
+ # Process with custom extractor
897
+ rem process files --extractor cv-parser-v1 --limit 50
898
+
899
+ # Process files for specific user
900
+ rem process files --user-id user-123 --status completed
901
+ ```
902
+
903
+ #### `rem process ingest` - Ingest File into REM
904
+
905
+ Ingest a file into REM with full pipeline (storage + parsing + embedding + database).
906
+
907
+ ```bash
908
+ # Ingest local file with metadata
909
+ rem process ingest /path/to/document.pdf \
910
+ --category legal \
911
+ --tags contract,2024
912
+
913
+ # Ingest with minimal options
914
+ rem process ingest ./meeting-notes.md
915
+ ```
916
+
917
+ #### `rem process uri` - Parse File (Read-Only)
918
+
919
+ Parse a file and extract content **without** storing to database (useful for testing parsers).
920
+
921
+ ```bash
922
+ # Parse local file (output to stdout)
923
+ rem process uri /path/to/document.pdf
924
+
925
+ # Parse and save extracted content to file
926
+ rem process uri /path/to/document.pdf --save output.json
927
+
928
+ # Parse S3 file
929
+ rem process uri s3://bucket/key.docx --output text
930
+ ```
931
+
932
+ ### Memory & Knowledge Extraction (Dreaming)
933
+
934
+ #### `rem dreaming full` - Complete Workflow
935
+
936
+ Run full dreaming workflow: extractors → moments → affinity → user model.
937
+
938
+ ```bash
939
+ # Full workflow (uses default user from settings)
940
+ rem dreaming full
941
+
942
+ # Skip ontology extractors
943
+ rem dreaming full --skip-extractors
944
+
945
+ # Process last 24 hours only
946
+ rem dreaming full --lookback-hours 24
947
+
948
+ # Limit resources processed for specific user
949
+ rem dreaming full --user-id user-123 --limit 100
950
+ ```
951
+
952
+ #### `rem dreaming custom` - Custom Extractor
953
+
954
+ Run specific ontology extractor on user's data.
955
+
956
+ ```bash
957
+ # Run CV parser on files
958
+ rem dreaming custom --extractor cv-parser-v1
959
+
960
+ # Process last week's files with limit
961
+ rem dreaming custom \
962
+ --extractor contract-analyzer-v1 \
963
+ --lookback-hours 168 \
964
+ --limit 50
965
+ ```
966
+
967
+ #### `rem dreaming moments` - Extract Moments
968
+
969
+ Extract temporal narratives from resources.
970
+
971
+ ```bash
972
+ # Generate moments
973
+ rem dreaming moments --limit 50
974
+
975
+ # Process last 7 days
976
+ rem dreaming moments --lookback-hours 168
977
+ ```
978
+
979
+ #### `rem dreaming affinity` - Build Relationships
980
+
981
+ Build semantic relationships between resources using embeddings.
982
+
983
+ ```bash
984
+ # Build affinity graph
985
+ rem dreaming affinity --limit 100
986
+
987
+ # Process recent resources only
988
+ rem dreaming affinity --lookback-hours 24
989
+ ```
990
+
991
+ #### `rem dreaming user-model` - Update User Model
992
+
993
+ Update user model from recent activity (preferences, interests, patterns).
994
+
995
+ ```bash
996
+ # Update user model
997
+ rem dreaming user-model
998
+ ```
999
+
1000
+ ### Evaluation & Experiments
1001
+
1002
+ #### `rem experiments` - Experiment Management
1003
+
1004
+ Manage evaluation experiments with datasets, prompts, and traces.
1005
+
1006
+ ```bash
1007
+ # Create experiment configuration
1008
+ rem experiments create my-evaluation \
1009
+ --agent ask_rem \
1010
+ --evaluator rem-lookup-correctness \
1011
+ --description "Baseline evaluation"
1012
+
1013
+ # Run experiment
1014
+ rem experiments run my-evaluation
1015
+
1016
+ # List experiments
1017
+ rem experiments list
1018
+ rem experiments show my-evaluation
1019
+ ```
1020
+
1021
+ #### `rem experiments dataset` - Dataset Management
1022
+
1023
+ ```bash
1024
+ # Create dataset from CSV
1025
+ rem experiments dataset create rem-lookup-golden \
1026
+ --from-csv golden.csv \
1027
+ --input-keys query \
1028
+ --output-keys expected_label,expected_type
1029
+
1030
+ # Add more examples
1031
+ rem experiments dataset add rem-lookup-golden \
1032
+ --from-csv more-data.csv \
1033
+ --input-keys query \
1034
+ --output-keys expected_label,expected_type
1035
+
1036
+ # List datasets
1037
+ rem experiments dataset list
1038
+ ```
1039
+
1040
+ #### `rem experiments prompt` - Prompt Management
1041
+
1042
+ ```bash
1043
+ # Create agent prompt
1044
+ rem experiments prompt create hello-world \
1045
+ --system-prompt "You are a helpful assistant." \
1046
+ --model-name gpt-4o
1047
+
1048
+ # List prompts
1049
+ rem experiments prompt list
1050
+ ```
1051
+
1052
+ #### `rem experiments trace` - Trace Retrieval
1053
+
1054
+ ```bash
1055
+ # List recent traces
1056
+ rem experiments trace list --project rem-agents --days 7 --limit 50
1057
+ ```
1058
+
1059
+ #### `rem experiments` - Experiment Config
1060
+
1061
+ Manage experiment configurations (A/B testing, parameter sweeps).
1062
+
1063
+ ```bash
1064
+ # Create experiment config
1065
+ rem experiments create \
1066
+ --name cv-parser-test \
1067
+ --description "Test CV parser with different models"
1068
+
1069
+ # List experiments
1070
+ rem experiments list
1071
+
1072
+ # Show experiment details
1073
+ rem experiments show cv-parser-test
1074
+
1075
+ # Run experiment
1076
+ rem experiments run cv-parser-test
1077
+ ```
1078
+
1079
+ ### Interactive Agent
1080
+
1081
+ #### `rem ask` - Test Agent
1082
+
1083
+ Test Pydantic AI agent with natural language queries.
1084
+
1085
+ ```bash
1086
+ # Ask a question
1087
+ rem ask "What documents did Sarah Chen author?"
1088
+
1089
+ # Use specific agent schema
1090
+ rem ask contract-analyzer "Analyze this contract"
1091
+
1092
+ # Stream response
1093
+ rem ask "Find all resources about API design" --stream
1094
+ ```
1095
+
1096
+ ### Global Options
1097
+
1098
+ All commands support:
1099
+
1100
+ ```bash
1101
+ # Verbose logging
1102
+ rem --verbose <command>
1103
+ rem -v <command>
1104
+
1105
+ # Version
1106
+ rem --version
1107
+
1108
+ # Help
1109
+ rem --help
1110
+ rem <command> --help
1111
+ rem <command> <subcommand> --help
1112
+ ```
1113
+
1114
+ ### Environment Variables
1115
+
1116
+ Override any setting via environment variables:
1117
+
1118
+ ```bash
1119
+ # Database
1120
+ export POSTGRES__CONNECTION_STRING=postgresql://rem:rem@localhost:5432/rem
1121
+ export POSTGRES__POOL_MIN_SIZE=5
1122
+
1123
+ # LLM
1124
+ export LLM__DEFAULT_MODEL=openai:gpt-4o
1125
+ export LLM__OPENAI_API_KEY=sk-...
1126
+ export LLM__ANTHROPIC_API_KEY=sk-ant-...
1127
+
1128
+ # S3
1129
+ export S3__BUCKET_NAME=rem-storage
1130
+ export S3__REGION=us-east-1
1131
+
1132
+ # Server
1133
+ export API__HOST=0.0.0.0
1134
+ export API__PORT=8000
1135
+ export API__RELOAD=true
1136
+
1137
+ # Run command with overrides
1138
+ rem serve
1139
+ ```
1140
+
1141
+ ## Option 2: Development (For Contributors)
1142
+
1143
+ **Best for**: Contributing to REM or customizing the codebase.
1144
+
1145
+ ### Step 1: Clone Repository
1146
+
1147
+ ```bash
1148
+ git clone https://github.com/mr-saoirse/remstack.git
1149
+ cd remstack/rem
1150
+ ```
1151
+
1152
+ ### Step 2: Start PostgreSQL Only
1153
+
1154
+ ```bash
1155
+ # Start only PostgreSQL (port 5050 for developers, doesn't conflict with package users on 5051)
1156
+ docker compose up postgres -d
1157
+
1158
+ # Verify connection
1159
+ psql -h localhost -p 5050 -U rem -d rem -c "SELECT version();"
1160
+ ```
1161
+
1162
+ ### Step 3: Set Up Development Environment
1163
+
1164
+ ```bash
1165
+ # IMPORTANT: If you previously installed the package and ran `rem configure`,
1166
+ # delete the REM configuration directory to avoid conflicts:
1167
+ rm -rf ~/.rem/
1168
+
1169
+ # Create virtual environment with uv
1170
+ uv venv
1171
+ source .venv/bin/activate # On Windows: .venv\Scripts\activate
1172
+
1173
+ # Install in editable mode with all dependencies
1174
+ uv pip install -e ".[all]"
1175
+
1176
+ # Set LLM API keys
1177
+ export OPENAI_API_KEY="sk-..."
1178
+ export ANTHROPIC_API_KEY="sk-ant-..."
1179
+ export POSTGRES__CONNECTION_STRING="postgresql://rem:rem@localhost:5050/rem"
1180
+
1181
+ # Verify CLI
1182
+ rem --version
1183
+ ```
1184
+
1185
+ ### Step 4: Initialize Database
1186
+
1187
+ ```bash
1188
+ # Apply migrations
1189
+ rem db migrate
1190
+
1191
+ # Verify tables
1192
+ psql -h localhost -p 5050 -U rem -d rem -c "\dt"
1193
+ ```
1194
+
1195
+ ### Step 5: Run API Server (Optional)
1196
+
1197
+ ```bash
1198
+ # Start API server with hot reload
1199
+ uv run python -m rem.api.main
1200
+
1201
+ # API runs on http://localhost:8000
1202
+ ```
1203
+
1204
+ ### Step 6: Run Tests
1205
+
1206
+ ```bash
1207
+ # Run non-LLM tests (fast, no API costs)
1208
+ uv run pytest tests/integration/ -m "not llm" -v
1209
+
1210
+ # Run all tests (uses API credits)
1211
+ uv run pytest tests/integration/ -v
1212
+
1213
+ # Type check (saves report to .mypy/ folder)
1214
+ ../scripts/run_mypy.sh
1215
+ ```
1216
+
1217
+ Type checking reports are saved to `.mypy/report_YYYYMMDD_HHMMSS.txt` (gitignored).
1218
+ Current status: 222 errors in 55 files (as of 2025-11-23).
1219
+
1220
+ ### Environment Variables
1221
+
1222
+ All settings via environment variables with `__` delimiter:
1223
+
1224
+ ```bash
1225
+ # LLM
1226
+ LLM__DEFAULT_MODEL=anthropic:claude-sonnet-4-5-20250929
1227
+ LLM__DEFAULT_TEMPERATURE=0.5
1228
+
1229
+ # Auth (disabled by default)
1230
+ AUTH__ENABLED=false
1231
+ AUTH__OIDC_ISSUER_URL=https://accounts.google.com
1232
+
1233
+ # OTEL (disabled by default for local dev)
1234
+ OTEL__ENABLED=false
1235
+ OTEL__SERVICE_NAME=rem-api
1236
+
1237
+ # Postgres
1238
+ POSTGRES__CONNECTION_STRING=postgresql://rem:rem@localhost:5050/rem
1239
+
1240
+ # S3
1241
+ S3__BUCKET_NAME=rem-storage
1242
+ S3__REGION=us-east-1
1243
+ ```
1244
+
1245
+ ### Building Docker Images
1246
+
1247
+ We tag Docker images with three labels for traceability:
1248
+ 1. `latest` - Always points to most recent build
1249
+ 2. `<git-sha>` - Short commit hash for exact version tracing
1250
+ 3. `<version>` - Semantic version from `pyproject.toml`
1251
+
1252
+ ```bash
1253
+ # Build and push multi-platform image to Docker Hub
1254
+ VERSION=$(grep '^version' pyproject.toml | cut -d'"' -f2) && \
1255
+ docker buildx build --platform linux/amd64,linux/arm64 \
1256
+ -t percolationlabs/rem:latest \
1257
+ -t percolationlabs/rem:$(git rev-parse --short HEAD) \
1258
+ -t percolationlabs/rem:$VERSION \
1259
+ --push \
1260
+ -f Dockerfile .
1261
+
1262
+ # Load locally for testing (single platform, no push)
1263
+ docker buildx build --platform linux/arm64 \
1264
+ -t percolationlabs/rem:latest \
1265
+ --load \
1266
+ -f Dockerfile .
1267
+ ```
1268
+
1269
+ ### Production Deployment (Optional)
1270
+
1271
+ For production deployment to AWS EKS with Kubernetes, see the main repository README:
1272
+ - **Infrastructure**: [../../manifests/infra/pulumi/eks-yaml/README.md](../../manifests/infra/pulumi/eks-yaml/README.md)
1273
+ - **Platform**: [../../manifests/platform/README.md](../../manifests/platform/README.md)
1274
+ - **Application**: [../../manifests/application/README.md](../../manifests/application/README.md)
1275
+
1276
+
1277
+ ## REM Query Dialect (AST)
1278
+
1279
+ REM queries follow a structured dialect with formal grammar specification.
1280
+
1281
+ ### Grammar
1282
+
1283
+ ```
1284
+ Query ::= LookupQuery | FuzzyQuery | SearchQuery | SqlQuery | TraverseQuery
1285
+
1286
+ LookupQuery ::= LOOKUP <key:string|list[string]>
1287
+ key : Single entity name or list of entity names (natural language labels)
1288
+ performance : O(1) per key
1289
+ available : Stage 1+
1290
+ examples :
1291
+ - LOOKUP "Sarah"
1292
+ - LOOKUP ["Sarah", "Mike", "Emily"]
1293
+ - LOOKUP "Project Alpha"
1294
+
1295
+ FuzzyQuery ::= FUZZY <text:string> [THRESHOLD <t:float>] [LIMIT <n:int>]
1296
+ text : Search text (partial/misspelled)
1297
+ threshold : Similarity score 0.0-1.0 (default: 0.5)
1298
+ limit : Max results (default: 5)
1299
+ performance : Indexed (pg_trgm)
1300
+ available : Stage 1+
1301
+ example : FUZZY "sara" THRESHOLD 0.5 LIMIT 10
1302
+
1303
+ SearchQuery ::= SEARCH <text:string> [TABLE <table:string>] [WHERE <clause:string>] [LIMIT <n:int>]
1304
+ text : Semantic query text
1305
+ table : Target table (default: "resources")
1306
+ clause : Optional PostgreSQL WHERE clause for hybrid filtering (combines vector + structured)
1307
+ limit : Max results (default: 10)
1308
+ performance : Indexed (pgvector)
1309
+ available : Stage 3+
1310
+ examples :
1311
+ - SEARCH "database migration" TABLE resources LIMIT 10
1312
+ - SEARCH "team discussion" TABLE moments WHERE "moment_type='meeting'" LIMIT 5
1313
+ - SEARCH "project updates" WHERE "created_at >= '2024-01-01'" LIMIT 20
1314
+ - SEARCH "AI research" WHERE "tags @> ARRAY['machine-learning']" LIMIT 10
1315
+
1316
+ Hybrid Query Support: SEARCH combines semantic vector similarity with structured filtering.
1317
+ Use WHERE clause to filter on system fields or entity-specific fields.
1318
+
1319
+ SqlQuery ::= <raw_sql:string>
1320
+ | SQL <table:string> [WHERE <clause:string>] [ORDER BY <order:string>] [LIMIT <n:int>]
1321
+
1322
+ Mode 1 (Raw SQL - Recommended):
1323
+ Any query not starting with a REM keyword (LOOKUP, FUZZY, SEARCH, TRAVERSE) is treated as raw SQL.
1324
+ Allowed: SELECT, INSERT, UPDATE, WITH (read + data modifications)
1325
+ Blocked: DROP, DELETE, TRUNCATE, ALTER (destructive operations)
1326
+
1327
+ Mode 2 (Structured - Legacy):
1328
+ SQL prefix with table + WHERE clause (automatic tenant isolation)
1329
+
1330
+ performance : O(n) with indexes
1331
+ available : Stage 1+
1332
+ dialect : PostgreSQL (full PostgreSQL syntax support)
1333
+
1334
+ examples :
1335
+ # Raw SQL (no prefix needed)
1336
+ - SELECT * FROM resources WHERE created_at > NOW() - INTERVAL '7 days' LIMIT 20
1337
+ - SELECT category, COUNT(*) as count FROM resources GROUP BY category
1338
+ - WITH recent AS (SELECT * FROM resources WHERE created_at > NOW() - INTERVAL '1 day') SELECT * FROM recent
1339
+
1340
+ # Structured SQL (legacy, automatic tenant isolation)
1341
+ - SQL moments WHERE "moment_type='meeting'" ORDER BY starts_timestamp DESC LIMIT 10
1342
+ - SQL resources WHERE "metadata->>'status' = 'published'" LIMIT 20
1343
+
1344
+ PostgreSQL Dialect: Full support for:
1345
+ - JSONB operators (->>, ->, @>, etc.)
1346
+ - Array operators (&&, @>, <@, etc.)
1347
+ - CTEs (WITH clauses)
1348
+ - Advanced filtering and aggregations
1349
+
1350
+ TraverseQuery ::= TRAVERSE [<edge_types:list>] WITH <initial_query:Query> [DEPTH <d:int>] [ORDER BY <order:string>] [LIMIT <n:int>]
1351
+ edge_types : Relationship types to follow (e.g., ["manages", "reports-to"], default: all)
1352
+ initial_query : Starting query (typically LOOKUP)
1353
+ depth : Number of hops (0=PLAN mode, 1=single hop, N=multi-hop, default: 1)
1354
+ order : Order results (default: "edge.created_at DESC")
1355
+ limit : Max nodes (default: 9)
1356
+ performance : O(k) where k = visited nodes
1357
+ available : Stage 3+
1358
+ examples :
1359
+ - TRAVERSE manages WITH LOOKUP "Sally" DEPTH 1
1360
+ - TRAVERSE WITH LOOKUP "Sally" DEPTH 0 (PLAN mode: edge analysis only)
1361
+ - TRAVERSE manages,reports-to WITH LOOKUP "Sarah" DEPTH 2 LIMIT 5
1362
+ ```
1363
+
1364
+ ### Query Availability by Evolution Stage
1365
+
1366
+ | Query Type | Stage 0 | Stage 1 | Stage 2 | Stage 3 | Stage 4 |
1367
+ |------------|---------|---------|---------|---------|---------|
1368
+ | LOOKUP | ✗ | ✓ | ✓ | ✓ | ✓ |
1369
+ | FUZZY | ✗ | ✓ | ✓ | ✓ | ✓ |
1370
+ | SEARCH | ✗ | ✗ | ✗ | ✓ | ✓ |
1371
+ | SQL | ✗ | ✓ | ✓ | ✓ | ✓ |
1372
+ | TRAVERSE | ✗ | ✗ | ✗ | ✓ | ✓ |
1373
+
1374
+ **Stage 0**: No data, all queries fail.
1375
+
1376
+ **Stage 1** (20% answerable): Resources seeded with entity extraction. LOOKUP and FUZZY work for finding entities. SQL works for basic filtering.
1377
+
1378
+ **Stage 2** (50% answerable): Moments extracted. SQL temporal queries work. LOOKUP includes moment entities.
1379
+
1380
+ **Stage 3** (80% answerable): Affinity graph built. SEARCH and TRAVERSE become available. Multi-hop graph queries work.
1381
+
1382
+ **Stage 4** (100% answerable): Mature graph with rich historical data. All query types fully functional with high-quality results.
1383
+
1384
+ ## Troubleshooting
1385
+
1386
+ ### Apple Silicon Mac: "Failed to build kreuzberg" Error
1387
+
1388
+ **Problem**: Installation fails with `ERROR: Failed building wheel for kreuzberg` on Apple Silicon Macs.
1389
+
1390
+ **Root Cause**: REM uses `kreuzberg>=4.0.0rc1` for document parsing with native ONNX/Rust table extraction. Kreuzberg 4.0.0rc1 provides pre-built wheels for ARM64 macOS (`macosx_14_0_arm64.whl`) but NOT for x86_64 (Intel) macOS. If you're using an x86_64 Python binary (running under Rosetta 2), pip cannot find a compatible wheel and attempts to build from source, which fails.
1391
+
1392
+ **Solution**: Use ARM64 (native) Python instead of x86_64 Python.
1393
+
1394
+ **Step 1: Verify your Python architecture**
1395
+
1396
+ ```bash
1397
+ python3 -c "import platform; print(f'Machine: {platform.machine()}')"
1398
+ ```
1399
+
1400
+ - **Correct**: `Machine: arm64` (native ARM Python)
1401
+ - **Wrong**: `Machine: x86_64` (Intel Python under Rosetta)
1402
+
1403
+ **Step 2: Install ARM Python via Homebrew** (if not already installed)
1404
+
1405
+ ```bash
1406
+ # Install ARM Python
1407
+ brew install python@3.12
1408
+
1409
+ # Verify it's ARM
1410
+ /opt/homebrew/bin/python3.12 -c "import platform; print(platform.machine())"
1411
+ # Should output: arm64
1412
+ ```
1413
+
1414
+ **Step 3: Create venv with ARM Python**
1415
+
1416
+ ```bash
1417
+ # Use full path to ARM Python
1418
+ /opt/homebrew/bin/python3.12 -m venv .venv
1419
+
1420
+ # Activate and install
1421
+ source .venv/bin/activate
1422
+ pip install "remdb[all]"
1423
+ ```
1424
+
1425
+ **Why This Happens**: Some users have both Intel Homebrew (`/usr/local`) and ARM Homebrew (`/opt/homebrew`) installed. If your system `python3` points to the Intel version at `/usr/local/bin/python3`, you'll hit this issue. The fix is to explicitly use the ARM Python from `/opt/homebrew/bin/python3.12`.
1426
+
1427
+ **Verification**: After successful installation, you should see:
1428
+ ```
1429
+ Using cached kreuzberg-4.0.0rc1-cp310-abi3-macosx_14_0_arm64.whl (19.8 MB)
1430
+ Successfully installed ... kreuzberg-4.0.0rc1 ... remdb-0.3.10
1431
+ ```
1432
+
1433
+ ## Using REM as a Library
1434
+
1435
+ REM wraps FastAPI - extend it exactly as you would any FastAPI app.
1436
+
1437
+ ### Recommended Project Structure
1438
+
1439
+ REM auto-detects `./agents/` and `./models/` folders - no configuration needed:
1440
+
1441
+ ```
1442
+ my-rem-app/
1443
+ ├── agents/ # Auto-detected for agent schemas
1444
+ │ ├── my-agent.yaml # Custom agent (rem ask my-agent "query")
1445
+ │ └── another-agent.yaml
1446
+ ├── models/ # Auto-detected if __init__.py exists
1447
+ │ └── __init__.py # Register models with @rem.register_model
1448
+ ├── routers/ # Custom FastAPI routers
1449
+ │ └── custom.py
1450
+ ├── main.py # Entry point
1451
+ └── pyproject.toml
1452
+ ```
1453
+
1454
+ ### Quick Start
1455
+
1456
+ ```python
1457
+ # main.py
1458
+ from rem import create_app
1459
+ from fastapi import APIRouter
1460
+
1461
+ # Create REM app (auto-detects ./agents/ and ./models/)
1462
+ app = create_app()
1463
+
1464
+ # Add custom router
1465
+ router = APIRouter(prefix="/custom", tags=["custom"])
1466
+
1467
+ @router.get("/hello")
1468
+ async def hello():
1469
+ return {"message": "Hello from custom router!"}
1470
+
1471
+ app.include_router(router)
1472
+
1473
+ # Add custom MCP tool
1474
+ @app.mcp_server.tool()
1475
+ async def my_tool(query: str) -> dict:
1476
+ """Custom MCP tool available to agents."""
1477
+ return {"result": query}
1478
+ ```
1479
+
1480
+ ### Custom Models (Auto-Detected)
1481
+
1482
+ ```python
1483
+ # models/__init__.py
1484
+ import rem
1485
+ from rem.models.core import CoreModel
1486
+ from pydantic import Field
1487
+
1488
+ @rem.register_model
1489
+ class MyEntity(CoreModel):
1490
+ """Custom entity - auto-registered for schema generation."""
1491
+ name: str = Field(description="Entity name")
1492
+ status: str = Field(default="active")
1493
+ ```
1494
+
1495
+ Run `rem db schema generate` to include your models in the database schema.
1496
+
1497
+ ### Custom Agents (Auto-Detected)
1498
+
1499
+ ```yaml
1500
+ # agents/my-agent.yaml
1501
+ type: object
1502
+ description: |
1503
+ You are a helpful assistant that...
1504
+
1505
+ properties:
1506
+ answer:
1507
+ type: string
1508
+ description: Your response
1509
+
1510
+ required:
1511
+ - answer
1512
+
1513
+ json_schema_extra:
1514
+ kind: agent
1515
+ name: my-agent
1516
+ version: "1.0.0"
1517
+ tools:
1518
+ - search_rem
1519
+ ```
1520
+
1521
+ Test with: `rem ask my-agent "Hello!"`
1522
+
1523
+ ### Example Custom Router
1524
+
1525
+ ```python
1526
+ # routers/analytics.py
1527
+ from fastapi import APIRouter, Depends
1528
+ from rem.services.postgres import get_postgres_service
1529
+
1530
+ router = APIRouter(prefix="/analytics", tags=["analytics"])
1531
+
1532
+ @router.get("/stats")
1533
+ async def get_stats():
1534
+ """Get database statistics."""
1535
+ db = get_postgres_service()
1536
+ if not db:
1537
+ return {"error": "Database not available"}
1538
+
1539
+ await db.connect()
1540
+ try:
1541
+ result = await db.execute(
1542
+ "SELECT COUNT(*) as count FROM resources"
1543
+ )
1544
+ return {"resource_count": result[0]["count"]}
1545
+ finally:
1546
+ await db.disconnect()
1547
+
1548
+ @router.get("/recent")
1549
+ async def get_recent(limit: int = 10):
1550
+ """Get recent resources."""
1551
+ db = get_postgres_service()
1552
+ if not db:
1553
+ return {"error": "Database not available"}
1554
+
1555
+ await db.connect()
1556
+ try:
1557
+ result = await db.execute(
1558
+ f"SELECT label, category, created_at FROM resources ORDER BY created_at DESC LIMIT {limit}"
1559
+ )
1560
+ return {"resources": result}
1561
+ finally:
1562
+ await db.disconnect()
1563
+ ```
1564
+
1565
+ Include in main.py:
1566
+
1567
+ ```python
1568
+ from routers.analytics import router as analytics_router
1569
+ app.include_router(analytics_router)
1570
+ ```
1571
+
1572
+ ### Running the App
1573
+
1574
+ ```bash
1575
+ # Development (auto-reload)
1576
+ uv run uvicorn main:app --reload --port 8000
1577
+
1578
+ # Or use rem serve
1579
+ uv run rem serve --reload
1580
+
1581
+ # Test agent
1582
+ uv run rem ask my-agent "What can you help me with?"
1583
+
1584
+ # Test custom endpoint
1585
+ curl http://localhost:8000/analytics/stats
1586
+ ```
1587
+
1588
+ ### Extension Points
1589
+
1590
+ | Extension | How |
1591
+ |-----------|-----|
1592
+ | **Routes** | `app.include_router(router)` or `@app.get()` |
1593
+ | **MCP Tools** | `@app.mcp_server.tool()` decorator or `app.mcp_server.add_tool(fn)` |
1594
+ | **MCP Resources** | `@app.mcp_server.resource("uri://...")` or `app.mcp_server.add_resource(fn)` |
1595
+ | **MCP Prompts** | `@app.mcp_server.prompt()` or `app.mcp_server.add_prompt(fn)` |
1596
+ | **Models** | `rem.register_models(Model)` then `rem db schema generate` |
1597
+ | **Agent Schemas** | `rem.register_schema_path("./schemas")` or `SCHEMA__PATHS` env var |
1598
+ | **SQL Migrations** | Place in `sql/migrations/` (auto-detected) |
1599
+
1600
+ ### Custom Migrations
1601
+
1602
+ REM automatically discovers migrations from two sources:
1603
+
1604
+ 1. **Package migrations** (001-099): Built-in migrations from the `remdb` package
1605
+ 2. **User migrations** (100+): Your custom migrations in `./sql/migrations/`
1606
+
1607
+ **Convention**: Place custom SQL files in `sql/migrations/` relative to your project root:
1608
+
1609
+ ```
1610
+ my-rem-app/
1611
+ ├── sql/
1612
+ │ └── migrations/
1613
+ │ ├── 100_custom_table.sql # Runs after package migrations
1614
+ │ ├── 101_add_indexes.sql
1615
+ │ └── 102_custom_functions.sql
1616
+ └── ...
1617
+ ```
1618
+
1619
+ **Numbering**: Use 100+ for user migrations to ensure they run after package migrations (001-099). All migrations are sorted by filename, so proper numbering ensures correct execution order.
1620
+
1621
+ **Running migrations**:
1622
+ ```bash
1623
+ # Apply all migrations (package + user)
1624
+ rem db migrate
1625
+
1626
+ # Apply with background indexes (for production)
1627
+ rem db migrate --background-indexes
1628
+ ```
1629
+
1630
+ ## License
1631
+
1632
+ MIT