remdb 0.3.0__py3-none-any.whl → 0.3.114__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of remdb might be problematic. Click here for more details.

Files changed (98) hide show
  1. rem/__init__.py +129 -2
  2. rem/agentic/README.md +76 -0
  3. rem/agentic/__init__.py +15 -0
  4. rem/agentic/agents/__init__.py +16 -2
  5. rem/agentic/agents/sse_simulator.py +500 -0
  6. rem/agentic/context.py +28 -22
  7. rem/agentic/llm_provider_models.py +301 -0
  8. rem/agentic/otel/setup.py +92 -4
  9. rem/agentic/providers/phoenix.py +32 -43
  10. rem/agentic/providers/pydantic_ai.py +142 -22
  11. rem/agentic/schema.py +358 -21
  12. rem/agentic/tools/rem_tools.py +3 -3
  13. rem/api/README.md +238 -1
  14. rem/api/deps.py +255 -0
  15. rem/api/main.py +151 -37
  16. rem/api/mcp_router/resources.py +1 -1
  17. rem/api/mcp_router/server.py +17 -2
  18. rem/api/mcp_router/tools.py +143 -7
  19. rem/api/middleware/tracking.py +172 -0
  20. rem/api/routers/admin.py +277 -0
  21. rem/api/routers/auth.py +124 -0
  22. rem/api/routers/chat/completions.py +152 -16
  23. rem/api/routers/chat/models.py +7 -3
  24. rem/api/routers/chat/sse_events.py +526 -0
  25. rem/api/routers/chat/streaming.py +608 -45
  26. rem/api/routers/dev.py +81 -0
  27. rem/api/routers/feedback.py +148 -0
  28. rem/api/routers/messages.py +473 -0
  29. rem/api/routers/models.py +78 -0
  30. rem/api/routers/query.py +357 -0
  31. rem/api/routers/shared_sessions.py +406 -0
  32. rem/auth/middleware.py +126 -27
  33. rem/cli/commands/README.md +201 -70
  34. rem/cli/commands/ask.py +13 -10
  35. rem/cli/commands/cluster.py +1359 -0
  36. rem/cli/commands/configure.py +4 -3
  37. rem/cli/commands/db.py +350 -137
  38. rem/cli/commands/experiments.py +76 -72
  39. rem/cli/commands/process.py +22 -15
  40. rem/cli/commands/scaffold.py +47 -0
  41. rem/cli/commands/schema.py +95 -49
  42. rem/cli/main.py +29 -6
  43. rem/config.py +2 -2
  44. rem/models/core/core_model.py +7 -1
  45. rem/models/core/rem_query.py +5 -2
  46. rem/models/entities/__init__.py +21 -0
  47. rem/models/entities/domain_resource.py +38 -0
  48. rem/models/entities/feedback.py +123 -0
  49. rem/models/entities/message.py +30 -1
  50. rem/models/entities/session.py +83 -0
  51. rem/models/entities/shared_session.py +180 -0
  52. rem/models/entities/user.py +10 -3
  53. rem/registry.py +373 -0
  54. rem/schemas/agents/rem.yaml +7 -3
  55. rem/services/content/providers.py +94 -140
  56. rem/services/content/service.py +92 -20
  57. rem/services/dreaming/affinity_service.py +2 -16
  58. rem/services/dreaming/moment_service.py +2 -15
  59. rem/services/embeddings/api.py +24 -17
  60. rem/services/embeddings/worker.py +16 -16
  61. rem/services/phoenix/EXPERIMENT_DESIGN.md +3 -3
  62. rem/services/phoenix/client.py +252 -19
  63. rem/services/postgres/README.md +159 -15
  64. rem/services/postgres/__init__.py +2 -1
  65. rem/services/postgres/diff_service.py +426 -0
  66. rem/services/postgres/pydantic_to_sqlalchemy.py +427 -129
  67. rem/services/postgres/repository.py +132 -0
  68. rem/services/postgres/schema_generator.py +86 -5
  69. rem/services/postgres/service.py +6 -6
  70. rem/services/rate_limit.py +113 -0
  71. rem/services/rem/README.md +14 -0
  72. rem/services/rem/parser.py +44 -9
  73. rem/services/rem/service.py +36 -2
  74. rem/services/session/compression.py +17 -1
  75. rem/services/session/reload.py +1 -1
  76. rem/services/user_service.py +98 -0
  77. rem/settings.py +169 -17
  78. rem/sql/background_indexes.sql +21 -16
  79. rem/sql/migrations/001_install.sql +231 -54
  80. rem/sql/migrations/002_install_models.sql +457 -393
  81. rem/sql/migrations/003_optional_extensions.sql +326 -0
  82. rem/utils/constants.py +97 -0
  83. rem/utils/date_utils.py +228 -0
  84. rem/utils/embeddings.py +17 -4
  85. rem/utils/files.py +167 -0
  86. rem/utils/mime_types.py +158 -0
  87. rem/utils/model_helpers.py +156 -1
  88. rem/utils/schema_loader.py +191 -35
  89. rem/utils/sql_types.py +3 -1
  90. rem/utils/vision.py +9 -14
  91. rem/workers/README.md +14 -14
  92. rem/workers/db_maintainer.py +74 -0
  93. {remdb-0.3.0.dist-info → remdb-0.3.114.dist-info}/METADATA +303 -164
  94. {remdb-0.3.0.dist-info → remdb-0.3.114.dist-info}/RECORD +96 -70
  95. {remdb-0.3.0.dist-info → remdb-0.3.114.dist-info}/WHEEL +1 -1
  96. rem/sql/002_install_models.sql +0 -1068
  97. rem/sql/install_models.sql +0 -1038
  98. {remdb-0.3.0.dist-info → remdb-0.3.114.dist-info}/entry_points.txt +0 -0
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: remdb
3
- Version: 0.3.0
3
+ Version: 0.3.114
4
4
  Summary: Resources Entities Moments - Bio-inspired memory system for agentic AI workloads
5
5
  Project-URL: Homepage, https://github.com/Percolation-Labs/reminiscent
6
6
  Project-URL: Documentation, https://github.com/Percolation-Labs/reminiscent/blob/main/README.md
@@ -14,7 +14,7 @@ Classifier: Intended Audience :: Developers
14
14
  Classifier: License :: OSI Approved :: MIT License
15
15
  Classifier: Programming Language :: Python :: 3.12
16
16
  Classifier: Topic :: Scientific/Engineering :: Artificial Intelligence
17
- Requires-Python: >=3.12
17
+ Requires-Python: <3.13,>=3.12
18
18
  Requires-Dist: aioboto3>=13.0.0
19
19
  Requires-Dist: arize-phoenix>=5.0.0
20
20
  Requires-Dist: asyncpg>=0.30.0
@@ -23,11 +23,10 @@ Requires-Dist: click>=8.1.0
23
23
  Requires-Dist: fastapi>=0.115.0
24
24
  Requires-Dist: fastmcp>=0.5.0
25
25
  Requires-Dist: gitpython>=3.1.45
26
- Requires-Dist: gmft==0.3.1
27
26
  Requires-Dist: hypercorn>=0.17.0
28
27
  Requires-Dist: itsdangerous>=2.0.0
29
28
  Requires-Dist: json-schema-to-pydantic>=0.2.0
30
- Requires-Dist: kreuzberg[gmft]>=3.21.0
29
+ Requires-Dist: kreuzberg<4.0.0,>=3.21.0
31
30
  Requires-Dist: loguru>=0.7.0
32
31
  Requires-Dist: openinference-instrumentation-pydantic-ai>=0.1.0
33
32
  Requires-Dist: opentelemetry-api>=1.28.0
@@ -102,23 +101,22 @@ Cloud-native unified memory infrastructure for agentic AI systems built with Pyd
102
101
  - **Database Layer**: PostgreSQL 18 with pgvector for multi-index memory (KV + Vector + Graph)
103
102
  - **REM Query Dialect**: Custom query language with O(1) lookups, semantic search, graph traversal
104
103
  - **Ingestion & Dreaming**: Background workers for content extraction and progressive index enrichment (0% → 100% answerable)
105
- - **Observability & Evals**: OpenTelemetry tracing + Arize Phoenix + LLM-as-a-Judge evaluation framework
104
+ - **Observability & Evals**: OpenTelemetry tracing supporting LLM-as-a-Judge evaluation frameworks
106
105
 
107
106
  ## Features
108
107
 
109
108
  | Feature | Description | Benefits |
110
109
  |---------|-------------|----------|
111
110
  | **OpenAI-Compatible Chat API** | Drop-in replacement for OpenAI chat completions API with streaming support | Use with existing OpenAI clients, switch models across providers (OpenAI, Anthropic, etc.) |
112
- | **Built-in MCP Server** | FastMCP server with 4 tools + 3 resources for memory operations | Export memory to Claude Desktop, Cursor, or any MCP-compatible host |
111
+ | **Built-in MCP Server** | FastMCP server with 4 tools + 5 resources for memory operations | Export memory to Claude Desktop, Cursor, or any MCP-compatible host |
113
112
  | **REM Query Engine** | Multi-index query system (LOOKUP, FUZZY, SEARCH, SQL, TRAVERSE) with custom dialect | O(1) lookups, semantic search, graph traversal - all tenant-isolated |
114
113
  | **Dreaming Workers** | Background workers for entity extraction, moment generation, and affinity matching | Automatic knowledge graph construction from resources (0% → 100% query answerable) |
115
114
  | **PostgreSQL + pgvector** | CloudNativePG with PostgreSQL 18, pgvector extension, streaming replication | Production-ready vector search, no external vector DB needed |
116
115
  | **AWS EKS Recipe** | Complete infrastructure-as-code with Pulumi, Karpenter, ArgoCD | Deploy to production EKS in minutes with auto-scaling and GitOps |
117
116
  | **JSON Schema Agents** | Dynamic agent creation from YAML schemas via Pydantic AI factory | Define agents declaratively, version control schemas, load dynamically |
118
- | **Content Providers** | Audio transcription (Whisper), vision (GPT-4V, Claude), PDFs, DOCX, images | Multimodal ingestion out of the box with format detection |
119
- | **Configurable Embeddings** | Provider-agnostic embedding system (OpenAI, Cohere, Jina) | Switch embedding providers via env vars, no code changes |
117
+ | **Content Providers** | Audio transcription (Whisper), vision (OpenAI, Anthropic, Gemini), PDFs, DOCX, PPTX, XLSX, images | Multimodal ingestion out of the box with format detection |
118
+ | **Configurable Embeddings** | OpenAI embedding system (text-embedding-3-small) | Production-ready embeddings, additional providers planned |
120
119
  | **Multi-Tenancy** | Tenant isolation at database level with automatic scoping | SaaS-ready with complete data separation per tenant |
121
- | **Streaming Everything** | SSE for chat, background workers for embeddings, async throughout | Real-time responses, non-blocking operations, scalable |
122
120
  | **Zero Vendor Lock-in** | Raw HTTP clients (no OpenAI SDK), swappable providers, open standards | Not tied to any vendor, easy to migrate, full control |
123
121
 
124
122
  ## Quick Start
@@ -136,42 +134,50 @@ Choose your path:
136
134
  **Best for**: First-time users who want to explore REM with curated example datasets.
137
135
 
138
136
  ```bash
137
+ # Install system dependencies (tesseract for OCR)
138
+ brew install tesseract # macOS (Linux/Windows: see tesseract-ocr.github.io)
139
+
139
140
  # Install remdb
140
- pip install remdb[all]
141
+ pip install "remdb[all]"
141
142
 
142
143
  # Clone example datasets
143
144
  git clone https://github.com/Percolation-Labs/remstack-lab.git
144
145
  cd remstack-lab
145
146
 
146
- # Configure REM (interactive wizard)
147
- rem configure --install
147
+ # Optional: Set default LLM provider via environment variable
148
+ # export LLM__DEFAULT_MODEL="openai:gpt-4.1-nano" # Fast and cheap
149
+ # export LLM__DEFAULT_MODEL="anthropic:claude-sonnet-4-5-20250929" # High quality (default)
148
150
 
149
- # Start PostgreSQL
150
- docker run -d \
151
- --name rem-postgres \
152
- -e POSTGRES_USER=rem \
153
- -e POSTGRES_PASSWORD=rem \
154
- -e POSTGRES_DB=rem \
155
- -p 5050:5432 \
156
- pgvector/pgvector:pg18
151
+ # Start PostgreSQL with docker-compose
152
+ curl -O https://gist.githubusercontent.com/percolating-sirsh/d117b673bc0edfdef1a5068ccd3cf3e5/raw/docker-compose.prebuilt.yml
153
+ docker compose -f docker-compose.prebuilt.yml up -d postgres
157
154
 
158
- # Load quickstart dataset
159
- rem db load --file datasets/quickstart/sample_data.yaml --user-id demo-user
155
+ # Configure REM (creates ~/.rem/config.yaml and installs database schema)
156
+ # Add --claude-desktop to register with Claude Desktop app
157
+ rem configure --install --claude-desktop
158
+
159
+ # Load quickstart dataset (uses default user)
160
+ rem db load datasets/quickstart/sample_data.yaml
160
161
 
161
162
  # Ask questions
162
- rem ask --user-id demo-user "What documents exist in the system?"
163
- rem ask --user-id demo-user "Show me meetings about API design"
163
+ rem ask "What documents exist in the system?"
164
+ rem ask "Show me meetings about API design"
165
+
166
+ # Ingest files (PDF, DOCX, images, etc.) - note: requires remstack-lab
167
+ rem process ingest datasets/formats/files/bitcoin_whitepaper.pdf --category research --tags bitcoin,whitepaper
168
+
169
+ # Query ingested content
170
+ rem ask "What is the Bitcoin whitepaper about?"
164
171
 
165
- # Try other datasets
166
- rem db load --file datasets/domains/recruitment/scenarios/candidate_pipeline/data.yaml --user-id my-company
167
- rem ask --user-id my-company "Show me candidates with Python experience"
172
+ # Try other datasets (use --user-id for multi-tenant scenarios)
173
+ rem db load datasets/domains/recruitment/scenarios/candidate_pipeline/data.yaml --user-id acme-corp
174
+ rem ask --user-id acme-corp "Show me candidates with Python experience"
168
175
  ```
169
176
 
170
177
  **What you get:**
171
178
  - Quickstart: 3 users, 3 resources, 3 moments, 4 messages
172
179
  - Domain datasets: recruitment, legal, enterprise, misc
173
180
  - Format examples: engrams, documents, conversations, files
174
- - Jupyter notebooks and experiments
175
181
 
176
182
  **Learn more**: [remstack-lab repository](https://github.com/Percolation-Labs/remstack-lab)
177
183
 
@@ -252,28 +258,28 @@ Configuration saved to `~/.rem/config.yaml` (can edit with `rem configure --edit
252
258
  # Clone datasets repository
253
259
  git clone https://github.com/Percolation-Labs/remstack-lab.git
254
260
 
255
- # Load quickstart dataset
256
- rem db load --file remstack-lab/datasets/quickstart/sample_data.yaml --user-id demo-user
261
+ # Load quickstart dataset (uses default user)
262
+ rem db load --file remstack-lab/datasets/quickstart/sample_data.yaml
257
263
 
258
264
  # Test with sample queries
259
- rem ask --user-id demo-user "What documents exist in the system?"
260
- rem ask --user-id demo-user "Show me meetings about API design"
261
- rem ask --user-id demo-user "Who is Sarah Chen?"
265
+ rem ask "What documents exist in the system?"
266
+ rem ask "Show me meetings about API design"
267
+ rem ask "Who is Sarah Chen?"
262
268
 
263
- # Try domain-specific datasets
264
- rem db load --file remstack-lab/datasets/domains/recruitment/scenarios/candidate_pipeline/data.yaml --user-id my-company
265
- rem ask --user-id my-company "Show me candidates with Python experience"
269
+ # Try domain-specific datasets (use --user-id for multi-tenant scenarios)
270
+ rem db load --file remstack-lab/datasets/domains/recruitment/scenarios/candidate_pipeline/data.yaml --user-id acme-corp
271
+ rem ask --user-id acme-corp "Show me candidates with Python experience"
266
272
  ```
267
273
 
268
274
  **Option B: Bring your own data**
269
275
 
270
276
  ```bash
271
- # Ingest your own files
277
+ # Ingest your own files (uses default user)
272
278
  echo "REM is a bio-inspired memory system for agentic AI workloads." > test-doc.txt
273
- rem process ingest test-doc.txt --user-id test-user --category documentation --tags rem,ai
279
+ rem process ingest test-doc.txt --category documentation --tags rem,ai
274
280
 
275
281
  # Query your ingested data
276
- rem ask --user-id test-user "What do you know about REM from my knowledge base?"
282
+ rem ask "What do you know about REM from my knowledge base?"
277
283
  ```
278
284
 
279
285
  ### Step 4: Test the API
@@ -310,13 +316,13 @@ curl -X POST http://localhost:8000/api/v1/chat/completions \
310
316
  ```bash
311
317
  cd remstack-lab
312
318
 
313
- # Load any dataset
314
- rem db load --file datasets/quickstart/sample_data.yaml --user-id demo-user
319
+ # Load any dataset (uses default user)
320
+ rem db load --file datasets/quickstart/sample_data.yaml
315
321
 
316
322
  # Explore formats
317
- rem db load --file datasets/formats/engrams/scenarios/team_meeting/team_standup_meeting.yaml --user-id demo-user
323
+ rem db load --file datasets/formats/engrams/scenarios/team_meeting/team_standup_meeting.yaml
318
324
 
319
- # Try domain-specific examples
325
+ # Try domain-specific examples (use --user-id for multi-tenant scenarios)
320
326
  rem db load --file datasets/domains/recruitment/scenarios/candidate_pipeline/data.yaml --user-id acme-corp
321
327
  ```
322
328
 
@@ -411,30 +417,24 @@ json_schema_extra:
411
417
  ```bash
412
418
  # Ingest the schema (stores in database schemas table)
413
419
  rem process ingest my-research-assistant.yaml \
414
- --user-id my-user \
415
420
  --category agents \
416
421
  --tags custom,research
417
422
 
418
423
  # Verify schema is in database (should show schema details)
419
- rem ask "LOOKUP 'my-research-assistant' FROM schemas" --user-id my-user
424
+ rem ask "LOOKUP 'my-research-assistant' FROM schemas"
420
425
  ```
421
426
 
422
427
  **Step 3: Use Your Custom Agent**
423
428
 
424
429
  ```bash
425
430
  # Run a query with your custom agent
426
- rem ask research-assistant "Find documents about machine learning architecture" \
427
- --user-id my-user
431
+ rem ask research-assistant "Find documents about machine learning architecture"
428
432
 
429
433
  # With streaming
430
- rem ask research-assistant "Summarize recent API design documents" \
431
- --user-id my-user \
432
- --stream
434
+ rem ask research-assistant "Summarize recent API design documents" --stream
433
435
 
434
436
  # With session continuity
435
- rem ask research-assistant "What did we discuss about ML?" \
436
- --user-id my-user \
437
- --session-id abc-123
437
+ rem ask research-assistant "What did we discuss about ML?" --session-id abc-123
438
438
  ```
439
439
 
440
440
  ### Agent Schema Structure
@@ -505,10 +505,10 @@ Custom agents can also be used as **ontology extractors** to extract structured
505
505
  **Schema not found error:**
506
506
  ```bash
507
507
  # Check if schema was ingested correctly
508
- rem ask "SEARCH 'my-agent' FROM schemas" --user-id my-user
508
+ rem ask "SEARCH 'my-agent' FROM schemas"
509
509
 
510
- # List all schemas for your user
511
- rem ask "SELECT name, category, created_at FROM schemas ORDER BY created_at DESC LIMIT 10" --user-id my-user
510
+ # List all schemas
511
+ rem ask "SELECT name, category, created_at FROM schemas ORDER BY created_at DESC LIMIT 10"
512
512
  ```
513
513
 
514
514
  **Agent not loading tools:**
@@ -533,15 +533,15 @@ REM provides a custom query language designed for **LLM-driven iterated retrieva
533
533
  Unlike traditional single-shot SQL queries, the REM dialect is optimized for **multi-turn exploration** where LLMs participate in query planning:
534
534
 
535
535
  - **Iterated Queries**: Queries return partial results that LLMs use to refine subsequent queries
536
- - **Composable WITH Syntax**: Chain operations together (e.g., `TRAVERSE FROM ... WITH LOOKUP "..."`)
536
+ - **Composable WITH Syntax**: Chain operations together (e.g., `TRAVERSE edge_type WITH LOOKUP "..."`)
537
537
  - **Mixed Indexes**: Combines exact lookups (O(1)), semantic search (vector), and graph traversal
538
538
  - **Query Planner Participation**: Results include metadata for LLMs to decide next steps
539
539
 
540
540
  **Example Multi-Turn Flow**:
541
541
  ```
542
542
  Turn 1: LOOKUP "sarah-chen" → Returns entity + available edge types
543
- Turn 2: TRAVERSE FROM "sarah-chen" TYPE "authored_by" DEPTH 1 → Returns connected documents
544
- Turn 3: SEARCH "architecture decisions" WITH TRAVERSE FROM "sarah-chen" Combines semantic + graph
543
+ Turn 2: TRAVERSE authored_by WITH LOOKUP "sarah-chen" DEPTH 1 → Returns connected documents
544
+ Turn 3: SEARCH "architecture decisions" Semantic search, then explore graph from results
545
545
  ```
546
546
 
547
547
  This enables LLMs to **progressively build context** rather than requiring perfect queries upfront.
@@ -594,8 +594,8 @@ SEARCH "contract disputes" FROM resources WHERE tags @> ARRAY['legal'] LIMIT 5
594
594
  Follow `graph_edges` relationships across the knowledge graph.
595
595
 
596
596
  ```sql
597
- TRAVERSE FROM "sarah-chen" TYPE "authored_by" DEPTH 2
598
- TRAVERSE FROM "api-design-v2" TYPE "references,depends_on" DEPTH 3
597
+ TRAVERSE authored_by WITH LOOKUP "sarah-chen" DEPTH 2
598
+ TRAVERSE references,depends_on WITH LOOKUP "api-design-v2" DEPTH 3
599
599
  ```
600
600
 
601
601
  **Features**:
@@ -688,7 +688,7 @@ SEARCH "API migration planning" FROM resources LIMIT 5
688
688
  LOOKUP "tidb-migration-spec" FROM resources
689
689
 
690
690
  # Query 3: Find related people
691
- TRAVERSE FROM "tidb-migration-spec" TYPE "authored_by,reviewed_by" DEPTH 1
691
+ TRAVERSE authored_by,reviewed_by WITH LOOKUP "tidb-migration-spec" DEPTH 1
692
692
 
693
693
  # Query 4: Recent activity
694
694
  SELECT * FROM moments WHERE
@@ -705,7 +705,7 @@ All queries automatically scoped by `user_id` for complete data isolation:
705
705
  SEARCH "contracts" FROM resources LIMIT 10
706
706
 
707
707
  -- No cross-user data leakage
708
- TRAVERSE FROM "project-x" TYPE "references" DEPTH 3
708
+ TRAVERSE references WITH LOOKUP "project-x" DEPTH 3
709
709
  ```
710
710
 
711
711
  ## API Endpoints
@@ -857,81 +857,131 @@ rem serve --log-level debug
857
857
 
858
858
  ### Database Management
859
859
 
860
- #### `rem db migrate` - Run Migrations
860
+ REM uses a **code-as-source-of-truth** approach for database schema management. Pydantic models define the schema, and the database is kept in sync via diff-based migrations.
861
861
 
862
- Apply database migrations (install.sql and install_models.sql).
862
+ #### Schema Management Philosophy
863
+
864
+ **Two migration files only:**
865
+ - `001_install.sql` - Core infrastructure (extensions, functions, KV store)
866
+ - `002_install_models.sql` - Entity tables (auto-generated from Pydantic models)
867
+
868
+ **No incremental migrations** (003, 004, etc.) - the models file is always regenerated to match code.
869
+
870
+ #### `rem db schema generate` - Regenerate Schema SQL
871
+
872
+ Generate `002_install_models.sql` from registered Pydantic models.
863
873
 
864
874
  ```bash
865
- # Apply all migrations
866
- rem db migrate
875
+ # Regenerate from model registry
876
+ rem db schema generate
867
877
 
868
- # Core infrastructure only (extensions, functions)
869
- rem db migrate --install
878
+ # Output: src/rem/sql/migrations/002_install_models.sql
879
+ ```
870
880
 
871
- # Entity tables only (Resource, Message, etc.)
872
- rem db migrate --models
881
+ This generates:
882
+ - CREATE TABLE statements for each registered entity
883
+ - Embeddings tables (`embeddings_<table>`)
884
+ - KV_STORE triggers for cache maintenance
885
+ - Foreground indexes (GIN for JSONB, B-tree for lookups)
873
886
 
874
- # Background indexes (HNSW for vectors)
875
- rem db migrate --background-indexes
887
+ #### `rem db diff` - Detect Schema Drift
888
+
889
+ Compare Pydantic models against the live database using Alembic autogenerate.
890
+
891
+ ```bash
892
+ # Show differences
893
+ rem db diff
876
894
 
877
- # Custom connection string
878
- rem db migrate --connection "postgresql://user:pass@host:5432/db"
895
+ # CI mode: exit 1 if drift detected
896
+ rem db diff --check
879
897
 
880
- # Custom SQL directory
881
- rem db migrate --sql-dir /path/to/sql
898
+ # Generate migration SQL for changes
899
+ rem db diff --generate
882
900
  ```
883
901
 
884
- #### `rem db status` - Migration Status
902
+ **Output shows:**
903
+ - `+ ADD COLUMN` - Column in model but not in DB
904
+ - `- DROP COLUMN` - Column in DB but not in model
905
+ - `~ ALTER COLUMN` - Column type or constraints differ
906
+ - `+ CREATE TABLE` / `- DROP TABLE` - Table additions/removals
907
+
908
+ #### `rem db apply` - Apply SQL Directly
885
909
 
886
- Show applied migrations and execution times.
910
+ Apply a SQL file directly to the database (bypasses migration tracking).
887
911
 
888
912
  ```bash
889
- rem db status
913
+ # Apply with audit logging (default)
914
+ rem db apply src/rem/sql/migrations/002_install_models.sql
915
+
916
+ # Preview without executing
917
+ rem db apply --dry-run src/rem/sql/migrations/002_install_models.sql
918
+
919
+ # Apply without audit logging
920
+ rem db apply --no-log src/rem/sql/migrations/002_install_models.sql
890
921
  ```
891
922
 
892
- #### `rem db rebuild-cache` - Rebuild KV Cache
923
+ #### `rem db migrate` - Initial Setup
893
924
 
894
- Rebuild KV_STORE cache from entity tables (after database restart or bulk imports).
925
+ Apply standard migrations (001 + 002). Use for initial setup only.
895
926
 
896
927
  ```bash
897
- rem db rebuild-cache
928
+ # Apply infrastructure + entity tables
929
+ rem db migrate
930
+
931
+ # Include background indexes (HNSW for vectors)
932
+ rem db migrate --background-indexes
898
933
  ```
899
934
 
900
- ### Schema Management
935
+ #### Database Workflows
901
936
 
902
- #### `rem db schema generate` - Generate SQL Schema
937
+ **Initial Setup (Local):**
938
+ ```bash
939
+ rem db schema generate # Generate from models
940
+ rem db migrate # Apply 001 + 002
941
+ rem db diff # Verify no drift
942
+ ```
903
943
 
904
- Generate database schema from Pydantic models.
944
+ **Adding/Modifying Models:**
945
+ ```bash
946
+ # 1. Edit models in src/rem/models/entities/
947
+ # 2. Register new models in src/rem/registry.py
948
+ rem db schema generate # Regenerate schema
949
+ rem db diff # See what changed
950
+ rem db apply src/rem/sql/migrations/002_install_models.sql
951
+ ```
905
952
 
953
+ **CI/CD Pipeline:**
906
954
  ```bash
907
- # Generate install_models.sql from entity models
908
- rem db schema generate \
909
- --models src/rem/models/entities \
910
- --output rem/src/rem/sql/install_models.sql
955
+ rem db diff --check # Fail build if drift detected
956
+ ```
911
957
 
912
- # Generate migration file
913
- rem db schema generate \
914
- --models src/rem/models/entities \
915
- --output rem/src/rem/sql/migrations/003_add_fields.sql
958
+ **Remote Database (Production/Staging):**
959
+ ```bash
960
+ # Port-forward to cluster database
961
+ kubectl port-forward -n <namespace> svc/rem-postgres-rw 5433:5432 &
962
+
963
+ # Override connection for diff check
964
+ POSTGRES__CONNECTION_STRING="postgresql://rem:rem@localhost:5433/rem" rem db diff
965
+
966
+ # Apply changes if needed
967
+ POSTGRES__CONNECTION_STRING="postgresql://rem:rem@localhost:5433/rem" \
968
+ rem db apply src/rem/sql/migrations/002_install_models.sql
916
969
  ```
917
970
 
918
- #### `rem db schema indexes` - Generate Background Indexes
971
+ #### `rem db rebuild-cache` - Rebuild KV Cache
919
972
 
920
- Generate SQL for background index creation (HNSW for vectors).
973
+ Rebuild KV_STORE cache from entity tables (after database restart or bulk imports).
921
974
 
922
975
  ```bash
923
- # Generate background_indexes.sql
924
- rem db schema indexes \
925
- --models src/rem/models/entities \
926
- --output rem/src/rem/sql/background_indexes.sql
976
+ rem db rebuild-cache
927
977
  ```
928
978
 
929
979
  #### `rem db schema validate` - Validate Models
930
980
 
931
- Validate Pydantic models for schema generation.
981
+ Validate registered Pydantic models for schema generation.
932
982
 
933
983
  ```bash
934
- rem db schema validate --models src/rem/models/entities
984
+ rem db schema validate
935
985
  ```
936
986
 
937
987
  ### File Processing
@@ -941,22 +991,14 @@ rem db schema validate --models src/rem/models/entities
941
991
  Process files with optional custom extractor (ontology extraction).
942
992
 
943
993
  ```bash
944
- # Process all completed files for tenant
945
- rem process files \
946
- --tenant-id acme-corp \
947
- --status completed \
948
- --limit 10
994
+ # Process all completed files
995
+ rem process files --status completed --limit 10
949
996
 
950
997
  # Process with custom extractor
951
- rem process files \
952
- --tenant-id acme-corp \
953
- --extractor cv-parser-v1 \
954
- --limit 50
998
+ rem process files --extractor cv-parser-v1 --limit 50
955
999
 
956
- # Process files from the last 7 days
957
- rem process files \
958
- --tenant-id acme-corp \
959
- --lookback-hours 168
1000
+ # Process files for specific user
1001
+ rem process files --user-id user-123 --status completed
960
1002
  ```
961
1003
 
962
1004
  #### `rem process ingest` - Ingest File into REM
@@ -964,14 +1006,13 @@ rem process files \
964
1006
  Ingest a file into REM with full pipeline (storage + parsing + embedding + database).
965
1007
 
966
1008
  ```bash
967
- # Ingest local file
1009
+ # Ingest local file with metadata
968
1010
  rem process ingest /path/to/document.pdf \
969
- --user-id user-123 \
970
1011
  --category legal \
971
1012
  --tags contract,2024
972
1013
 
973
1014
  # Ingest with minimal options
974
- rem process ingest ./meeting-notes.md --user-id user-123
1015
+ rem process ingest ./meeting-notes.md
975
1016
  ```
976
1017
 
977
1018
  #### `rem process uri` - Parse File (Read-Only)
@@ -996,28 +1037,17 @@ rem process uri s3://bucket/key.docx --output text
996
1037
  Run full dreaming workflow: extractors → moments → affinity → user model.
997
1038
 
998
1039
  ```bash
999
- # Full workflow for user
1000
- rem dreaming full \
1001
- --user-id user-123 \
1002
- --tenant-id acme-corp
1040
+ # Full workflow (uses default user from settings)
1041
+ rem dreaming full
1003
1042
 
1004
1043
  # Skip ontology extractors
1005
- rem dreaming full \
1006
- --user-id user-123 \
1007
- --tenant-id acme-corp \
1008
- --skip-extractors
1044
+ rem dreaming full --skip-extractors
1009
1045
 
1010
1046
  # Process last 24 hours only
1011
- rem dreaming full \
1012
- --user-id user-123 \
1013
- --tenant-id acme-corp \
1014
- --lookback-hours 24
1047
+ rem dreaming full --lookback-hours 24
1015
1048
 
1016
- # Limit resources processed
1017
- rem dreaming full \
1018
- --user-id user-123 \
1019
- --tenant-id acme-corp \
1020
- --limit 100
1049
+ # Limit resources processed for specific user
1050
+ rem dreaming full --user-id user-123 --limit 100
1021
1051
  ```
1022
1052
 
1023
1053
  #### `rem dreaming custom` - Custom Extractor
@@ -1025,16 +1055,11 @@ rem dreaming full \
1025
1055
  Run specific ontology extractor on user's data.
1026
1056
 
1027
1057
  ```bash
1028
- # Run CV parser on user's files
1029
- rem dreaming custom \
1030
- --user-id user-123 \
1031
- --tenant-id acme-corp \
1032
- --extractor cv-parser-v1
1058
+ # Run CV parser on files
1059
+ rem dreaming custom --extractor cv-parser-v1
1033
1060
 
1034
- # Process last week's files
1061
+ # Process last week's files with limit
1035
1062
  rem dreaming custom \
1036
- --user-id user-123 \
1037
- --tenant-id acme-corp \
1038
1063
  --extractor contract-analyzer-v1 \
1039
1064
  --lookback-hours 168 \
1040
1065
  --limit 50
@@ -1045,17 +1070,11 @@ rem dreaming custom \
1045
1070
  Extract temporal narratives from resources.
1046
1071
 
1047
1072
  ```bash
1048
- # Generate moments for user
1049
- rem dreaming moments \
1050
- --user-id user-123 \
1051
- --tenant-id acme-corp \
1052
- --limit 50
1073
+ # Generate moments
1074
+ rem dreaming moments --limit 50
1053
1075
 
1054
1076
  # Process last 7 days
1055
- rem dreaming moments \
1056
- --user-id user-123 \
1057
- --tenant-id acme-corp \
1058
- --lookback-hours 168
1077
+ rem dreaming moments --lookback-hours 168
1059
1078
  ```
1060
1079
 
1061
1080
  #### `rem dreaming affinity` - Build Relationships
@@ -1063,17 +1082,11 @@ rem dreaming moments \
1063
1082
  Build semantic relationships between resources using embeddings.
1064
1083
 
1065
1084
  ```bash
1066
- # Build affinity graph for user
1067
- rem dreaming affinity \
1068
- --user-id user-123 \
1069
- --tenant-id acme-corp \
1070
- --limit 100
1085
+ # Build affinity graph
1086
+ rem dreaming affinity --limit 100
1071
1087
 
1072
1088
  # Process recent resources only
1073
- rem dreaming affinity \
1074
- --user-id user-123 \
1075
- --tenant-id acme-corp \
1076
- --lookback-hours 24
1089
+ rem dreaming affinity --lookback-hours 24
1077
1090
  ```
1078
1091
 
1079
1092
  #### `rem dreaming user-model` - Update User Model
@@ -1082,9 +1095,7 @@ Update user model from recent activity (preferences, interests, patterns).
1082
1095
 
1083
1096
  ```bash
1084
1097
  # Update user model
1085
- rem dreaming user-model \
1086
- --user-id user-123 \
1087
- --tenant-id acme-corp
1098
+ rem dreaming user-model
1088
1099
  ```
1089
1100
 
1090
1101
  ### Evaluation & Experiments
@@ -1335,6 +1346,30 @@ S3__BUCKET_NAME=rem-storage
1335
1346
  S3__REGION=us-east-1
1336
1347
  ```
1337
1348
 
1349
+ ### Building Docker Images
1350
+
1351
+ We tag Docker images with three labels for traceability:
1352
+ 1. `latest` - Always points to most recent build
1353
+ 2. `<git-sha>` - Short commit hash for exact version tracing
1354
+ 3. `<version>` - Semantic version from `pyproject.toml`
1355
+
1356
+ ```bash
1357
+ # Build and push multi-platform image to Docker Hub
1358
+ VERSION=$(grep '^version' pyproject.toml | cut -d'"' -f2) && \
1359
+ docker buildx build --platform linux/amd64,linux/arm64 \
1360
+ -t percolationlabs/rem:latest \
1361
+ -t percolationlabs/rem:$(git rev-parse --short HEAD) \
1362
+ -t percolationlabs/rem:$VERSION \
1363
+ --push \
1364
+ -f Dockerfile .
1365
+
1366
+ # Load locally for testing (single platform, no push)
1367
+ docker buildx build --platform linux/arm64 \
1368
+ -t percolationlabs/rem:latest \
1369
+ --load \
1370
+ -f Dockerfile .
1371
+ ```
1372
+
1338
1373
  ### Production Deployment (Optional)
1339
1374
 
1340
1375
  For production deployment to AWS EKS with Kubernetes, see the main repository README:
@@ -1450,6 +1485,110 @@ TraverseQuery ::= TRAVERSE [<edge_types:list>] WITH <initial_query:Query> [DEPTH
1450
1485
 
1451
1486
  **Stage 4** (100% answerable): Mature graph with rich historical data. All query types fully functional with high-quality results.
1452
1487
 
1488
+ ## Troubleshooting
1489
+
1490
+ ### Apple Silicon Mac: "Failed to build kreuzberg" Error
1491
+
1492
+ **Problem**: Installation fails with `ERROR: Failed building wheel for kreuzberg` on Apple Silicon Macs.
1493
+
1494
+ **Root Cause**: REM uses `kreuzberg>=4.0.0rc1` for document parsing with native ONNX/Rust table extraction. Kreuzberg 4.0.0rc1 provides pre-built wheels for ARM64 macOS (`macosx_14_0_arm64.whl`) but NOT for x86_64 (Intel) macOS. If you're using an x86_64 Python binary (running under Rosetta 2), pip cannot find a compatible wheel and attempts to build from source, which fails.
1495
+
1496
+ **Solution**: Use ARM64 (native) Python instead of x86_64 Python.
1497
+
1498
+ **Step 1: Verify your Python architecture**
1499
+
1500
+ ```bash
1501
+ python3 -c "import platform; print(f'Machine: {platform.machine()}')"
1502
+ ```
1503
+
1504
+ - **Correct**: `Machine: arm64` (native ARM Python)
1505
+ - **Wrong**: `Machine: x86_64` (Intel Python under Rosetta)
1506
+
1507
+ **Step 2: Install ARM Python via Homebrew** (if not already installed)
1508
+
1509
+ ```bash
1510
+ # Install ARM Python
1511
+ brew install python@3.12
1512
+
1513
+ # Verify it's ARM
1514
+ /opt/homebrew/bin/python3.12 -c "import platform; print(platform.machine())"
1515
+ # Should output: arm64
1516
+ ```
1517
+
1518
+ **Step 3: Create venv with ARM Python**
1519
+
1520
+ ```bash
1521
+ # Use full path to ARM Python
1522
+ /opt/homebrew/bin/python3.12 -m venv .venv
1523
+
1524
+ # Activate and install
1525
+ source .venv/bin/activate
1526
+ pip install "remdb[all]"
1527
+ ```
1528
+
1529
+ **Why This Happens**: Some users have both Intel Homebrew (`/usr/local`) and ARM Homebrew (`/opt/homebrew`) installed. If your system `python3` points to the Intel version at `/usr/local/bin/python3`, you'll hit this issue. The fix is to explicitly use the ARM Python from `/opt/homebrew/bin/python3.12`.
1530
+
1531
+ **Verification**: After successful installation, you should see:
1532
+ ```
1533
+ Using cached kreuzberg-4.0.0rc1-cp310-abi3-macosx_14_0_arm64.whl (19.8 MB)
1534
+ Successfully installed ... kreuzberg-4.0.0rc1 ... remdb-0.3.10
1535
+ ```
1536
+
1537
+ ## Using REM as a Library
1538
+
1539
+ REM wraps FastAPI - extend it exactly as you would any FastAPI app.
1540
+
1541
+ ```python
1542
+ import rem
1543
+ from rem import create_app
1544
+ from rem.models.core import CoreModel
1545
+
1546
+ # 1. Register models (for schema generation)
1547
+ rem.register_models(MyModel, AnotherModel)
1548
+
1549
+ # 2. Register schema paths (for custom agents/evaluators)
1550
+ rem.register_schema_path("./schemas")
1551
+
1552
+ # 3. Create app
1553
+ app = create_app()
1554
+
1555
+ # 4. Extend like normal FastAPI
1556
+ app.include_router(my_router)
1557
+
1558
+ @app.mcp_server.tool()
1559
+ async def my_tool(query: str) -> dict:
1560
+ """Custom MCP tool."""
1561
+ return {"result": query}
1562
+ ```
1563
+
1564
+ ### Project Structure
1565
+
1566
+ ```
1567
+ my-rem-app/
1568
+ ├── my_app/
1569
+ │ ├── main.py # Entry point (create_app + extensions)
1570
+ │ ├── models.py # Custom models (inherit CoreModel)
1571
+ │ └── routers/ # Custom FastAPI routers
1572
+ ├── schemas/
1573
+ │ ├── agents/ # Custom agent YAML schemas
1574
+ │ └── evaluators/ # Custom evaluator schemas
1575
+ ├── sql/migrations/ # Custom SQL migrations
1576
+ └── pyproject.toml
1577
+ ```
1578
+
1579
+ Generate this structure with: `rem scaffold my-app`
1580
+
1581
+ ### Extension Points
1582
+
1583
+ | Extension | How |
1584
+ |-----------|-----|
1585
+ | **Routes** | `app.include_router(router)` or `@app.get()` |
1586
+ | **MCP Tools** | `@app.mcp_server.tool()` decorator or `app.mcp_server.add_tool(fn)` |
1587
+ | **MCP Resources** | `@app.mcp_server.resource("uri://...")` or `app.mcp_server.add_resource(fn)` |
1588
+ | **MCP Prompts** | `@app.mcp_server.prompt()` or `app.mcp_server.add_prompt(fn)` |
1589
+ | **Models** | `rem.register_models(Model)` then `rem db schema generate` |
1590
+ | **Agent Schemas** | `rem.register_schema_path("./schemas")` or `SCHEMA__PATHS` env var |
1591
+
1453
1592
  ## License
1454
1593
 
1455
1594
  MIT