remdb 0.3.0__py3-none-any.whl → 0.3.114__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of remdb might be problematic. Click here for more details.
- rem/__init__.py +129 -2
- rem/agentic/README.md +76 -0
- rem/agentic/__init__.py +15 -0
- rem/agentic/agents/__init__.py +16 -2
- rem/agentic/agents/sse_simulator.py +500 -0
- rem/agentic/context.py +28 -22
- rem/agentic/llm_provider_models.py +301 -0
- rem/agentic/otel/setup.py +92 -4
- rem/agentic/providers/phoenix.py +32 -43
- rem/agentic/providers/pydantic_ai.py +142 -22
- rem/agentic/schema.py +358 -21
- rem/agentic/tools/rem_tools.py +3 -3
- rem/api/README.md +238 -1
- rem/api/deps.py +255 -0
- rem/api/main.py +151 -37
- rem/api/mcp_router/resources.py +1 -1
- rem/api/mcp_router/server.py +17 -2
- rem/api/mcp_router/tools.py +143 -7
- rem/api/middleware/tracking.py +172 -0
- rem/api/routers/admin.py +277 -0
- rem/api/routers/auth.py +124 -0
- rem/api/routers/chat/completions.py +152 -16
- rem/api/routers/chat/models.py +7 -3
- rem/api/routers/chat/sse_events.py +526 -0
- rem/api/routers/chat/streaming.py +608 -45
- rem/api/routers/dev.py +81 -0
- rem/api/routers/feedback.py +148 -0
- rem/api/routers/messages.py +473 -0
- rem/api/routers/models.py +78 -0
- rem/api/routers/query.py +357 -0
- rem/api/routers/shared_sessions.py +406 -0
- rem/auth/middleware.py +126 -27
- rem/cli/commands/README.md +201 -70
- rem/cli/commands/ask.py +13 -10
- rem/cli/commands/cluster.py +1359 -0
- rem/cli/commands/configure.py +4 -3
- rem/cli/commands/db.py +350 -137
- rem/cli/commands/experiments.py +76 -72
- rem/cli/commands/process.py +22 -15
- rem/cli/commands/scaffold.py +47 -0
- rem/cli/commands/schema.py +95 -49
- rem/cli/main.py +29 -6
- rem/config.py +2 -2
- rem/models/core/core_model.py +7 -1
- rem/models/core/rem_query.py +5 -2
- rem/models/entities/__init__.py +21 -0
- rem/models/entities/domain_resource.py +38 -0
- rem/models/entities/feedback.py +123 -0
- rem/models/entities/message.py +30 -1
- rem/models/entities/session.py +83 -0
- rem/models/entities/shared_session.py +180 -0
- rem/models/entities/user.py +10 -3
- rem/registry.py +373 -0
- rem/schemas/agents/rem.yaml +7 -3
- rem/services/content/providers.py +94 -140
- rem/services/content/service.py +92 -20
- rem/services/dreaming/affinity_service.py +2 -16
- rem/services/dreaming/moment_service.py +2 -15
- rem/services/embeddings/api.py +24 -17
- rem/services/embeddings/worker.py +16 -16
- rem/services/phoenix/EXPERIMENT_DESIGN.md +3 -3
- rem/services/phoenix/client.py +252 -19
- rem/services/postgres/README.md +159 -15
- rem/services/postgres/__init__.py +2 -1
- rem/services/postgres/diff_service.py +426 -0
- rem/services/postgres/pydantic_to_sqlalchemy.py +427 -129
- rem/services/postgres/repository.py +132 -0
- rem/services/postgres/schema_generator.py +86 -5
- rem/services/postgres/service.py +6 -6
- rem/services/rate_limit.py +113 -0
- rem/services/rem/README.md +14 -0
- rem/services/rem/parser.py +44 -9
- rem/services/rem/service.py +36 -2
- rem/services/session/compression.py +17 -1
- rem/services/session/reload.py +1 -1
- rem/services/user_service.py +98 -0
- rem/settings.py +169 -17
- rem/sql/background_indexes.sql +21 -16
- rem/sql/migrations/001_install.sql +231 -54
- rem/sql/migrations/002_install_models.sql +457 -393
- rem/sql/migrations/003_optional_extensions.sql +326 -0
- rem/utils/constants.py +97 -0
- rem/utils/date_utils.py +228 -0
- rem/utils/embeddings.py +17 -4
- rem/utils/files.py +167 -0
- rem/utils/mime_types.py +158 -0
- rem/utils/model_helpers.py +156 -1
- rem/utils/schema_loader.py +191 -35
- rem/utils/sql_types.py +3 -1
- rem/utils/vision.py +9 -14
- rem/workers/README.md +14 -14
- rem/workers/db_maintainer.py +74 -0
- {remdb-0.3.0.dist-info → remdb-0.3.114.dist-info}/METADATA +303 -164
- {remdb-0.3.0.dist-info → remdb-0.3.114.dist-info}/RECORD +96 -70
- {remdb-0.3.0.dist-info → remdb-0.3.114.dist-info}/WHEEL +1 -1
- rem/sql/002_install_models.sql +0 -1068
- rem/sql/install_models.sql +0 -1038
- {remdb-0.3.0.dist-info → remdb-0.3.114.dist-info}/entry_points.txt +0 -0
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
Metadata-Version: 2.4
|
|
2
2
|
Name: remdb
|
|
3
|
-
Version: 0.3.
|
|
3
|
+
Version: 0.3.114
|
|
4
4
|
Summary: Resources Entities Moments - Bio-inspired memory system for agentic AI workloads
|
|
5
5
|
Project-URL: Homepage, https://github.com/Percolation-Labs/reminiscent
|
|
6
6
|
Project-URL: Documentation, https://github.com/Percolation-Labs/reminiscent/blob/main/README.md
|
|
@@ -14,7 +14,7 @@ Classifier: Intended Audience :: Developers
|
|
|
14
14
|
Classifier: License :: OSI Approved :: MIT License
|
|
15
15
|
Classifier: Programming Language :: Python :: 3.12
|
|
16
16
|
Classifier: Topic :: Scientific/Engineering :: Artificial Intelligence
|
|
17
|
-
Requires-Python:
|
|
17
|
+
Requires-Python: <3.13,>=3.12
|
|
18
18
|
Requires-Dist: aioboto3>=13.0.0
|
|
19
19
|
Requires-Dist: arize-phoenix>=5.0.0
|
|
20
20
|
Requires-Dist: asyncpg>=0.30.0
|
|
@@ -23,11 +23,10 @@ Requires-Dist: click>=8.1.0
|
|
|
23
23
|
Requires-Dist: fastapi>=0.115.0
|
|
24
24
|
Requires-Dist: fastmcp>=0.5.0
|
|
25
25
|
Requires-Dist: gitpython>=3.1.45
|
|
26
|
-
Requires-Dist: gmft==0.3.1
|
|
27
26
|
Requires-Dist: hypercorn>=0.17.0
|
|
28
27
|
Requires-Dist: itsdangerous>=2.0.0
|
|
29
28
|
Requires-Dist: json-schema-to-pydantic>=0.2.0
|
|
30
|
-
Requires-Dist: kreuzberg
|
|
29
|
+
Requires-Dist: kreuzberg<4.0.0,>=3.21.0
|
|
31
30
|
Requires-Dist: loguru>=0.7.0
|
|
32
31
|
Requires-Dist: openinference-instrumentation-pydantic-ai>=0.1.0
|
|
33
32
|
Requires-Dist: opentelemetry-api>=1.28.0
|
|
@@ -102,23 +101,22 @@ Cloud-native unified memory infrastructure for agentic AI systems built with Pyd
|
|
|
102
101
|
- **Database Layer**: PostgreSQL 18 with pgvector for multi-index memory (KV + Vector + Graph)
|
|
103
102
|
- **REM Query Dialect**: Custom query language with O(1) lookups, semantic search, graph traversal
|
|
104
103
|
- **Ingestion & Dreaming**: Background workers for content extraction and progressive index enrichment (0% → 100% answerable)
|
|
105
|
-
- **Observability & Evals**: OpenTelemetry tracing
|
|
104
|
+
- **Observability & Evals**: OpenTelemetry tracing supporting LLM-as-a-Judge evaluation frameworks
|
|
106
105
|
|
|
107
106
|
## Features
|
|
108
107
|
|
|
109
108
|
| Feature | Description | Benefits |
|
|
110
109
|
|---------|-------------|----------|
|
|
111
110
|
| **OpenAI-Compatible Chat API** | Drop-in replacement for OpenAI chat completions API with streaming support | Use with existing OpenAI clients, switch models across providers (OpenAI, Anthropic, etc.) |
|
|
112
|
-
| **Built-in MCP Server** | FastMCP server with 4 tools +
|
|
111
|
+
| **Built-in MCP Server** | FastMCP server with 4 tools + 5 resources for memory operations | Export memory to Claude Desktop, Cursor, or any MCP-compatible host |
|
|
113
112
|
| **REM Query Engine** | Multi-index query system (LOOKUP, FUZZY, SEARCH, SQL, TRAVERSE) with custom dialect | O(1) lookups, semantic search, graph traversal - all tenant-isolated |
|
|
114
113
|
| **Dreaming Workers** | Background workers for entity extraction, moment generation, and affinity matching | Automatic knowledge graph construction from resources (0% → 100% query answerable) |
|
|
115
114
|
| **PostgreSQL + pgvector** | CloudNativePG with PostgreSQL 18, pgvector extension, streaming replication | Production-ready vector search, no external vector DB needed |
|
|
116
115
|
| **AWS EKS Recipe** | Complete infrastructure-as-code with Pulumi, Karpenter, ArgoCD | Deploy to production EKS in minutes with auto-scaling and GitOps |
|
|
117
116
|
| **JSON Schema Agents** | Dynamic agent creation from YAML schemas via Pydantic AI factory | Define agents declaratively, version control schemas, load dynamically |
|
|
118
|
-
| **Content Providers** | Audio transcription (Whisper), vision (
|
|
119
|
-
| **Configurable Embeddings** |
|
|
117
|
+
| **Content Providers** | Audio transcription (Whisper), vision (OpenAI, Anthropic, Gemini), PDFs, DOCX, PPTX, XLSX, images | Multimodal ingestion out of the box with format detection |
|
|
118
|
+
| **Configurable Embeddings** | OpenAI embedding system (text-embedding-3-small) | Production-ready embeddings, additional providers planned |
|
|
120
119
|
| **Multi-Tenancy** | Tenant isolation at database level with automatic scoping | SaaS-ready with complete data separation per tenant |
|
|
121
|
-
| **Streaming Everything** | SSE for chat, background workers for embeddings, async throughout | Real-time responses, non-blocking operations, scalable |
|
|
122
120
|
| **Zero Vendor Lock-in** | Raw HTTP clients (no OpenAI SDK), swappable providers, open standards | Not tied to any vendor, easy to migrate, full control |
|
|
123
121
|
|
|
124
122
|
## Quick Start
|
|
@@ -136,42 +134,50 @@ Choose your path:
|
|
|
136
134
|
**Best for**: First-time users who want to explore REM with curated example datasets.
|
|
137
135
|
|
|
138
136
|
```bash
|
|
137
|
+
# Install system dependencies (tesseract for OCR)
|
|
138
|
+
brew install tesseract # macOS (Linux/Windows: see tesseract-ocr.github.io)
|
|
139
|
+
|
|
139
140
|
# Install remdb
|
|
140
|
-
pip install remdb[all]
|
|
141
|
+
pip install "remdb[all]"
|
|
141
142
|
|
|
142
143
|
# Clone example datasets
|
|
143
144
|
git clone https://github.com/Percolation-Labs/remstack-lab.git
|
|
144
145
|
cd remstack-lab
|
|
145
146
|
|
|
146
|
-
#
|
|
147
|
-
|
|
147
|
+
# Optional: Set default LLM provider via environment variable
|
|
148
|
+
# export LLM__DEFAULT_MODEL="openai:gpt-4.1-nano" # Fast and cheap
|
|
149
|
+
# export LLM__DEFAULT_MODEL="anthropic:claude-sonnet-4-5-20250929" # High quality (default)
|
|
148
150
|
|
|
149
|
-
# Start PostgreSQL
|
|
150
|
-
|
|
151
|
-
|
|
152
|
-
-e POSTGRES_USER=rem \
|
|
153
|
-
-e POSTGRES_PASSWORD=rem \
|
|
154
|
-
-e POSTGRES_DB=rem \
|
|
155
|
-
-p 5050:5432 \
|
|
156
|
-
pgvector/pgvector:pg18
|
|
151
|
+
# Start PostgreSQL with docker-compose
|
|
152
|
+
curl -O https://gist.githubusercontent.com/percolating-sirsh/d117b673bc0edfdef1a5068ccd3cf3e5/raw/docker-compose.prebuilt.yml
|
|
153
|
+
docker compose -f docker-compose.prebuilt.yml up -d postgres
|
|
157
154
|
|
|
158
|
-
#
|
|
159
|
-
|
|
155
|
+
# Configure REM (creates ~/.rem/config.yaml and installs database schema)
|
|
156
|
+
# Add --claude-desktop to register with Claude Desktop app
|
|
157
|
+
rem configure --install --claude-desktop
|
|
158
|
+
|
|
159
|
+
# Load quickstart dataset (uses default user)
|
|
160
|
+
rem db load datasets/quickstart/sample_data.yaml
|
|
160
161
|
|
|
161
162
|
# Ask questions
|
|
162
|
-
rem ask
|
|
163
|
-
rem ask
|
|
163
|
+
rem ask "What documents exist in the system?"
|
|
164
|
+
rem ask "Show me meetings about API design"
|
|
165
|
+
|
|
166
|
+
# Ingest files (PDF, DOCX, images, etc.) - note: requires remstack-lab
|
|
167
|
+
rem process ingest datasets/formats/files/bitcoin_whitepaper.pdf --category research --tags bitcoin,whitepaper
|
|
168
|
+
|
|
169
|
+
# Query ingested content
|
|
170
|
+
rem ask "What is the Bitcoin whitepaper about?"
|
|
164
171
|
|
|
165
|
-
# Try other datasets
|
|
166
|
-
rem db load
|
|
167
|
-
rem ask --user-id
|
|
172
|
+
# Try other datasets (use --user-id for multi-tenant scenarios)
|
|
173
|
+
rem db load datasets/domains/recruitment/scenarios/candidate_pipeline/data.yaml --user-id acme-corp
|
|
174
|
+
rem ask --user-id acme-corp "Show me candidates with Python experience"
|
|
168
175
|
```
|
|
169
176
|
|
|
170
177
|
**What you get:**
|
|
171
178
|
- Quickstart: 3 users, 3 resources, 3 moments, 4 messages
|
|
172
179
|
- Domain datasets: recruitment, legal, enterprise, misc
|
|
173
180
|
- Format examples: engrams, documents, conversations, files
|
|
174
|
-
- Jupyter notebooks and experiments
|
|
175
181
|
|
|
176
182
|
**Learn more**: [remstack-lab repository](https://github.com/Percolation-Labs/remstack-lab)
|
|
177
183
|
|
|
@@ -252,28 +258,28 @@ Configuration saved to `~/.rem/config.yaml` (can edit with `rem configure --edit
|
|
|
252
258
|
# Clone datasets repository
|
|
253
259
|
git clone https://github.com/Percolation-Labs/remstack-lab.git
|
|
254
260
|
|
|
255
|
-
# Load quickstart dataset
|
|
256
|
-
rem db load --file remstack-lab/datasets/quickstart/sample_data.yaml
|
|
261
|
+
# Load quickstart dataset (uses default user)
|
|
262
|
+
rem db load --file remstack-lab/datasets/quickstart/sample_data.yaml
|
|
257
263
|
|
|
258
264
|
# Test with sample queries
|
|
259
|
-
rem ask
|
|
260
|
-
rem ask
|
|
261
|
-
rem ask
|
|
265
|
+
rem ask "What documents exist in the system?"
|
|
266
|
+
rem ask "Show me meetings about API design"
|
|
267
|
+
rem ask "Who is Sarah Chen?"
|
|
262
268
|
|
|
263
|
-
# Try domain-specific datasets
|
|
264
|
-
rem db load --file remstack-lab/datasets/domains/recruitment/scenarios/candidate_pipeline/data.yaml --user-id
|
|
265
|
-
rem ask --user-id
|
|
269
|
+
# Try domain-specific datasets (use --user-id for multi-tenant scenarios)
|
|
270
|
+
rem db load --file remstack-lab/datasets/domains/recruitment/scenarios/candidate_pipeline/data.yaml --user-id acme-corp
|
|
271
|
+
rem ask --user-id acme-corp "Show me candidates with Python experience"
|
|
266
272
|
```
|
|
267
273
|
|
|
268
274
|
**Option B: Bring your own data**
|
|
269
275
|
|
|
270
276
|
```bash
|
|
271
|
-
# Ingest your own files
|
|
277
|
+
# Ingest your own files (uses default user)
|
|
272
278
|
echo "REM is a bio-inspired memory system for agentic AI workloads." > test-doc.txt
|
|
273
|
-
rem process ingest test-doc.txt --
|
|
279
|
+
rem process ingest test-doc.txt --category documentation --tags rem,ai
|
|
274
280
|
|
|
275
281
|
# Query your ingested data
|
|
276
|
-
rem ask
|
|
282
|
+
rem ask "What do you know about REM from my knowledge base?"
|
|
277
283
|
```
|
|
278
284
|
|
|
279
285
|
### Step 4: Test the API
|
|
@@ -310,13 +316,13 @@ curl -X POST http://localhost:8000/api/v1/chat/completions \
|
|
|
310
316
|
```bash
|
|
311
317
|
cd remstack-lab
|
|
312
318
|
|
|
313
|
-
# Load any dataset
|
|
314
|
-
rem db load --file datasets/quickstart/sample_data.yaml
|
|
319
|
+
# Load any dataset (uses default user)
|
|
320
|
+
rem db load --file datasets/quickstart/sample_data.yaml
|
|
315
321
|
|
|
316
322
|
# Explore formats
|
|
317
|
-
rem db load --file datasets/formats/engrams/scenarios/team_meeting/team_standup_meeting.yaml
|
|
323
|
+
rem db load --file datasets/formats/engrams/scenarios/team_meeting/team_standup_meeting.yaml
|
|
318
324
|
|
|
319
|
-
# Try domain-specific examples
|
|
325
|
+
# Try domain-specific examples (use --user-id for multi-tenant scenarios)
|
|
320
326
|
rem db load --file datasets/domains/recruitment/scenarios/candidate_pipeline/data.yaml --user-id acme-corp
|
|
321
327
|
```
|
|
322
328
|
|
|
@@ -411,30 +417,24 @@ json_schema_extra:
|
|
|
411
417
|
```bash
|
|
412
418
|
# Ingest the schema (stores in database schemas table)
|
|
413
419
|
rem process ingest my-research-assistant.yaml \
|
|
414
|
-
--user-id my-user \
|
|
415
420
|
--category agents \
|
|
416
421
|
--tags custom,research
|
|
417
422
|
|
|
418
423
|
# Verify schema is in database (should show schema details)
|
|
419
|
-
rem ask "LOOKUP 'my-research-assistant' FROM schemas"
|
|
424
|
+
rem ask "LOOKUP 'my-research-assistant' FROM schemas"
|
|
420
425
|
```
|
|
421
426
|
|
|
422
427
|
**Step 3: Use Your Custom Agent**
|
|
423
428
|
|
|
424
429
|
```bash
|
|
425
430
|
# Run a query with your custom agent
|
|
426
|
-
rem ask research-assistant "Find documents about machine learning architecture"
|
|
427
|
-
--user-id my-user
|
|
431
|
+
rem ask research-assistant "Find documents about machine learning architecture"
|
|
428
432
|
|
|
429
433
|
# With streaming
|
|
430
|
-
rem ask research-assistant "Summarize recent API design documents"
|
|
431
|
-
--user-id my-user \
|
|
432
|
-
--stream
|
|
434
|
+
rem ask research-assistant "Summarize recent API design documents" --stream
|
|
433
435
|
|
|
434
436
|
# With session continuity
|
|
435
|
-
rem ask research-assistant "What did we discuss about ML?"
|
|
436
|
-
--user-id my-user \
|
|
437
|
-
--session-id abc-123
|
|
437
|
+
rem ask research-assistant "What did we discuss about ML?" --session-id abc-123
|
|
438
438
|
```
|
|
439
439
|
|
|
440
440
|
### Agent Schema Structure
|
|
@@ -505,10 +505,10 @@ Custom agents can also be used as **ontology extractors** to extract structured
|
|
|
505
505
|
**Schema not found error:**
|
|
506
506
|
```bash
|
|
507
507
|
# Check if schema was ingested correctly
|
|
508
|
-
rem ask "SEARCH 'my-agent' FROM schemas"
|
|
508
|
+
rem ask "SEARCH 'my-agent' FROM schemas"
|
|
509
509
|
|
|
510
|
-
# List all schemas
|
|
511
|
-
rem ask "SELECT name, category, created_at FROM schemas ORDER BY created_at DESC LIMIT 10"
|
|
510
|
+
# List all schemas
|
|
511
|
+
rem ask "SELECT name, category, created_at FROM schemas ORDER BY created_at DESC LIMIT 10"
|
|
512
512
|
```
|
|
513
513
|
|
|
514
514
|
**Agent not loading tools:**
|
|
@@ -533,15 +533,15 @@ REM provides a custom query language designed for **LLM-driven iterated retrieva
|
|
|
533
533
|
Unlike traditional single-shot SQL queries, the REM dialect is optimized for **multi-turn exploration** where LLMs participate in query planning:
|
|
534
534
|
|
|
535
535
|
- **Iterated Queries**: Queries return partial results that LLMs use to refine subsequent queries
|
|
536
|
-
- **Composable WITH Syntax**: Chain operations together (e.g., `TRAVERSE
|
|
536
|
+
- **Composable WITH Syntax**: Chain operations together (e.g., `TRAVERSE edge_type WITH LOOKUP "..."`)
|
|
537
537
|
- **Mixed Indexes**: Combines exact lookups (O(1)), semantic search (vector), and graph traversal
|
|
538
538
|
- **Query Planner Participation**: Results include metadata for LLMs to decide next steps
|
|
539
539
|
|
|
540
540
|
**Example Multi-Turn Flow**:
|
|
541
541
|
```
|
|
542
542
|
Turn 1: LOOKUP "sarah-chen" → Returns entity + available edge types
|
|
543
|
-
Turn 2: TRAVERSE
|
|
544
|
-
Turn 3: SEARCH "architecture decisions"
|
|
543
|
+
Turn 2: TRAVERSE authored_by WITH LOOKUP "sarah-chen" DEPTH 1 → Returns connected documents
|
|
544
|
+
Turn 3: SEARCH "architecture decisions" → Semantic search, then explore graph from results
|
|
545
545
|
```
|
|
546
546
|
|
|
547
547
|
This enables LLMs to **progressively build context** rather than requiring perfect queries upfront.
|
|
@@ -594,8 +594,8 @@ SEARCH "contract disputes" FROM resources WHERE tags @> ARRAY['legal'] LIMIT 5
|
|
|
594
594
|
Follow `graph_edges` relationships across the knowledge graph.
|
|
595
595
|
|
|
596
596
|
```sql
|
|
597
|
-
TRAVERSE
|
|
598
|
-
TRAVERSE
|
|
597
|
+
TRAVERSE authored_by WITH LOOKUP "sarah-chen" DEPTH 2
|
|
598
|
+
TRAVERSE references,depends_on WITH LOOKUP "api-design-v2" DEPTH 3
|
|
599
599
|
```
|
|
600
600
|
|
|
601
601
|
**Features**:
|
|
@@ -688,7 +688,7 @@ SEARCH "API migration planning" FROM resources LIMIT 5
|
|
|
688
688
|
LOOKUP "tidb-migration-spec" FROM resources
|
|
689
689
|
|
|
690
690
|
# Query 3: Find related people
|
|
691
|
-
TRAVERSE
|
|
691
|
+
TRAVERSE authored_by,reviewed_by WITH LOOKUP "tidb-migration-spec" DEPTH 1
|
|
692
692
|
|
|
693
693
|
# Query 4: Recent activity
|
|
694
694
|
SELECT * FROM moments WHERE
|
|
@@ -705,7 +705,7 @@ All queries automatically scoped by `user_id` for complete data isolation:
|
|
|
705
705
|
SEARCH "contracts" FROM resources LIMIT 10
|
|
706
706
|
|
|
707
707
|
-- No cross-user data leakage
|
|
708
|
-
TRAVERSE
|
|
708
|
+
TRAVERSE references WITH LOOKUP "project-x" DEPTH 3
|
|
709
709
|
```
|
|
710
710
|
|
|
711
711
|
## API Endpoints
|
|
@@ -857,81 +857,131 @@ rem serve --log-level debug
|
|
|
857
857
|
|
|
858
858
|
### Database Management
|
|
859
859
|
|
|
860
|
-
|
|
860
|
+
REM uses a **code-as-source-of-truth** approach for database schema management. Pydantic models define the schema, and the database is kept in sync via diff-based migrations.
|
|
861
861
|
|
|
862
|
-
|
|
862
|
+
#### Schema Management Philosophy
|
|
863
|
+
|
|
864
|
+
**Two migration files only:**
|
|
865
|
+
- `001_install.sql` - Core infrastructure (extensions, functions, KV store)
|
|
866
|
+
- `002_install_models.sql` - Entity tables (auto-generated from Pydantic models)
|
|
867
|
+
|
|
868
|
+
**No incremental migrations** (003, 004, etc.) - the models file is always regenerated to match code.
|
|
869
|
+
|
|
870
|
+
#### `rem db schema generate` - Regenerate Schema SQL
|
|
871
|
+
|
|
872
|
+
Generate `002_install_models.sql` from registered Pydantic models.
|
|
863
873
|
|
|
864
874
|
```bash
|
|
865
|
-
#
|
|
866
|
-
rem db
|
|
875
|
+
# Regenerate from model registry
|
|
876
|
+
rem db schema generate
|
|
867
877
|
|
|
868
|
-
#
|
|
869
|
-
|
|
878
|
+
# Output: src/rem/sql/migrations/002_install_models.sql
|
|
879
|
+
```
|
|
870
880
|
|
|
871
|
-
|
|
872
|
-
|
|
881
|
+
This generates:
|
|
882
|
+
- CREATE TABLE statements for each registered entity
|
|
883
|
+
- Embeddings tables (`embeddings_<table>`)
|
|
884
|
+
- KV_STORE triggers for cache maintenance
|
|
885
|
+
- Foreground indexes (GIN for JSONB, B-tree for lookups)
|
|
873
886
|
|
|
874
|
-
|
|
875
|
-
|
|
887
|
+
#### `rem db diff` - Detect Schema Drift
|
|
888
|
+
|
|
889
|
+
Compare Pydantic models against the live database using Alembic autogenerate.
|
|
890
|
+
|
|
891
|
+
```bash
|
|
892
|
+
# Show differences
|
|
893
|
+
rem db diff
|
|
876
894
|
|
|
877
|
-
#
|
|
878
|
-
rem db
|
|
895
|
+
# CI mode: exit 1 if drift detected
|
|
896
|
+
rem db diff --check
|
|
879
897
|
|
|
880
|
-
#
|
|
881
|
-
rem db
|
|
898
|
+
# Generate migration SQL for changes
|
|
899
|
+
rem db diff --generate
|
|
882
900
|
```
|
|
883
901
|
|
|
884
|
-
|
|
902
|
+
**Output shows:**
|
|
903
|
+
- `+ ADD COLUMN` - Column in model but not in DB
|
|
904
|
+
- `- DROP COLUMN` - Column in DB but not in model
|
|
905
|
+
- `~ ALTER COLUMN` - Column type or constraints differ
|
|
906
|
+
- `+ CREATE TABLE` / `- DROP TABLE` - Table additions/removals
|
|
907
|
+
|
|
908
|
+
#### `rem db apply` - Apply SQL Directly
|
|
885
909
|
|
|
886
|
-
|
|
910
|
+
Apply a SQL file directly to the database (bypasses migration tracking).
|
|
887
911
|
|
|
888
912
|
```bash
|
|
889
|
-
|
|
913
|
+
# Apply with audit logging (default)
|
|
914
|
+
rem db apply src/rem/sql/migrations/002_install_models.sql
|
|
915
|
+
|
|
916
|
+
# Preview without executing
|
|
917
|
+
rem db apply --dry-run src/rem/sql/migrations/002_install_models.sql
|
|
918
|
+
|
|
919
|
+
# Apply without audit logging
|
|
920
|
+
rem db apply --no-log src/rem/sql/migrations/002_install_models.sql
|
|
890
921
|
```
|
|
891
922
|
|
|
892
|
-
#### `rem db
|
|
923
|
+
#### `rem db migrate` - Initial Setup
|
|
893
924
|
|
|
894
|
-
|
|
925
|
+
Apply standard migrations (001 + 002). Use for initial setup only.
|
|
895
926
|
|
|
896
927
|
```bash
|
|
897
|
-
|
|
928
|
+
# Apply infrastructure + entity tables
|
|
929
|
+
rem db migrate
|
|
930
|
+
|
|
931
|
+
# Include background indexes (HNSW for vectors)
|
|
932
|
+
rem db migrate --background-indexes
|
|
898
933
|
```
|
|
899
934
|
|
|
900
|
-
|
|
935
|
+
#### Database Workflows
|
|
901
936
|
|
|
902
|
-
|
|
937
|
+
**Initial Setup (Local):**
|
|
938
|
+
```bash
|
|
939
|
+
rem db schema generate # Generate from models
|
|
940
|
+
rem db migrate # Apply 001 + 002
|
|
941
|
+
rem db diff # Verify no drift
|
|
942
|
+
```
|
|
903
943
|
|
|
904
|
-
|
|
944
|
+
**Adding/Modifying Models:**
|
|
945
|
+
```bash
|
|
946
|
+
# 1. Edit models in src/rem/models/entities/
|
|
947
|
+
# 2. Register new models in src/rem/registry.py
|
|
948
|
+
rem db schema generate # Regenerate schema
|
|
949
|
+
rem db diff # See what changed
|
|
950
|
+
rem db apply src/rem/sql/migrations/002_install_models.sql
|
|
951
|
+
```
|
|
905
952
|
|
|
953
|
+
**CI/CD Pipeline:**
|
|
906
954
|
```bash
|
|
907
|
-
#
|
|
908
|
-
|
|
909
|
-
--models src/rem/models/entities \
|
|
910
|
-
--output rem/src/rem/sql/install_models.sql
|
|
955
|
+
rem db diff --check # Fail build if drift detected
|
|
956
|
+
```
|
|
911
957
|
|
|
912
|
-
|
|
913
|
-
|
|
914
|
-
|
|
915
|
-
|
|
958
|
+
**Remote Database (Production/Staging):**
|
|
959
|
+
```bash
|
|
960
|
+
# Port-forward to cluster database
|
|
961
|
+
kubectl port-forward -n <namespace> svc/rem-postgres-rw 5433:5432 &
|
|
962
|
+
|
|
963
|
+
# Override connection for diff check
|
|
964
|
+
POSTGRES__CONNECTION_STRING="postgresql://rem:rem@localhost:5433/rem" rem db diff
|
|
965
|
+
|
|
966
|
+
# Apply changes if needed
|
|
967
|
+
POSTGRES__CONNECTION_STRING="postgresql://rem:rem@localhost:5433/rem" \
|
|
968
|
+
rem db apply src/rem/sql/migrations/002_install_models.sql
|
|
916
969
|
```
|
|
917
970
|
|
|
918
|
-
#### `rem db
|
|
971
|
+
#### `rem db rebuild-cache` - Rebuild KV Cache
|
|
919
972
|
|
|
920
|
-
|
|
973
|
+
Rebuild KV_STORE cache from entity tables (after database restart or bulk imports).
|
|
921
974
|
|
|
922
975
|
```bash
|
|
923
|
-
|
|
924
|
-
rem db schema indexes \
|
|
925
|
-
--models src/rem/models/entities \
|
|
926
|
-
--output rem/src/rem/sql/background_indexes.sql
|
|
976
|
+
rem db rebuild-cache
|
|
927
977
|
```
|
|
928
978
|
|
|
929
979
|
#### `rem db schema validate` - Validate Models
|
|
930
980
|
|
|
931
|
-
Validate Pydantic models for schema generation.
|
|
981
|
+
Validate registered Pydantic models for schema generation.
|
|
932
982
|
|
|
933
983
|
```bash
|
|
934
|
-
rem db schema validate
|
|
984
|
+
rem db schema validate
|
|
935
985
|
```
|
|
936
986
|
|
|
937
987
|
### File Processing
|
|
@@ -941,22 +991,14 @@ rem db schema validate --models src/rem/models/entities
|
|
|
941
991
|
Process files with optional custom extractor (ontology extraction).
|
|
942
992
|
|
|
943
993
|
```bash
|
|
944
|
-
# Process all completed files
|
|
945
|
-
rem process files
|
|
946
|
-
--tenant-id acme-corp \
|
|
947
|
-
--status completed \
|
|
948
|
-
--limit 10
|
|
994
|
+
# Process all completed files
|
|
995
|
+
rem process files --status completed --limit 10
|
|
949
996
|
|
|
950
997
|
# Process with custom extractor
|
|
951
|
-
rem process files
|
|
952
|
-
--tenant-id acme-corp \
|
|
953
|
-
--extractor cv-parser-v1 \
|
|
954
|
-
--limit 50
|
|
998
|
+
rem process files --extractor cv-parser-v1 --limit 50
|
|
955
999
|
|
|
956
|
-
# Process files
|
|
957
|
-
rem process files
|
|
958
|
-
--tenant-id acme-corp \
|
|
959
|
-
--lookback-hours 168
|
|
1000
|
+
# Process files for specific user
|
|
1001
|
+
rem process files --user-id user-123 --status completed
|
|
960
1002
|
```
|
|
961
1003
|
|
|
962
1004
|
#### `rem process ingest` - Ingest File into REM
|
|
@@ -964,14 +1006,13 @@ rem process files \
|
|
|
964
1006
|
Ingest a file into REM with full pipeline (storage + parsing + embedding + database).
|
|
965
1007
|
|
|
966
1008
|
```bash
|
|
967
|
-
# Ingest local file
|
|
1009
|
+
# Ingest local file with metadata
|
|
968
1010
|
rem process ingest /path/to/document.pdf \
|
|
969
|
-
--user-id user-123 \
|
|
970
1011
|
--category legal \
|
|
971
1012
|
--tags contract,2024
|
|
972
1013
|
|
|
973
1014
|
# Ingest with minimal options
|
|
974
|
-
rem process ingest ./meeting-notes.md
|
|
1015
|
+
rem process ingest ./meeting-notes.md
|
|
975
1016
|
```
|
|
976
1017
|
|
|
977
1018
|
#### `rem process uri` - Parse File (Read-Only)
|
|
@@ -996,28 +1037,17 @@ rem process uri s3://bucket/key.docx --output text
|
|
|
996
1037
|
Run full dreaming workflow: extractors → moments → affinity → user model.
|
|
997
1038
|
|
|
998
1039
|
```bash
|
|
999
|
-
# Full workflow
|
|
1000
|
-
rem dreaming full
|
|
1001
|
-
--user-id user-123 \
|
|
1002
|
-
--tenant-id acme-corp
|
|
1040
|
+
# Full workflow (uses default user from settings)
|
|
1041
|
+
rem dreaming full
|
|
1003
1042
|
|
|
1004
1043
|
# Skip ontology extractors
|
|
1005
|
-
rem dreaming full
|
|
1006
|
-
--user-id user-123 \
|
|
1007
|
-
--tenant-id acme-corp \
|
|
1008
|
-
--skip-extractors
|
|
1044
|
+
rem dreaming full --skip-extractors
|
|
1009
1045
|
|
|
1010
1046
|
# Process last 24 hours only
|
|
1011
|
-
rem dreaming full
|
|
1012
|
-
--user-id user-123 \
|
|
1013
|
-
--tenant-id acme-corp \
|
|
1014
|
-
--lookback-hours 24
|
|
1047
|
+
rem dreaming full --lookback-hours 24
|
|
1015
1048
|
|
|
1016
|
-
# Limit resources processed
|
|
1017
|
-
rem dreaming full
|
|
1018
|
-
--user-id user-123 \
|
|
1019
|
-
--tenant-id acme-corp \
|
|
1020
|
-
--limit 100
|
|
1049
|
+
# Limit resources processed for specific user
|
|
1050
|
+
rem dreaming full --user-id user-123 --limit 100
|
|
1021
1051
|
```
|
|
1022
1052
|
|
|
1023
1053
|
#### `rem dreaming custom` - Custom Extractor
|
|
@@ -1025,16 +1055,11 @@ rem dreaming full \
|
|
|
1025
1055
|
Run specific ontology extractor on user's data.
|
|
1026
1056
|
|
|
1027
1057
|
```bash
|
|
1028
|
-
# Run CV parser on
|
|
1029
|
-
rem dreaming custom
|
|
1030
|
-
--user-id user-123 \
|
|
1031
|
-
--tenant-id acme-corp \
|
|
1032
|
-
--extractor cv-parser-v1
|
|
1058
|
+
# Run CV parser on files
|
|
1059
|
+
rem dreaming custom --extractor cv-parser-v1
|
|
1033
1060
|
|
|
1034
|
-
# Process last week's files
|
|
1061
|
+
# Process last week's files with limit
|
|
1035
1062
|
rem dreaming custom \
|
|
1036
|
-
--user-id user-123 \
|
|
1037
|
-
--tenant-id acme-corp \
|
|
1038
1063
|
--extractor contract-analyzer-v1 \
|
|
1039
1064
|
--lookback-hours 168 \
|
|
1040
1065
|
--limit 50
|
|
@@ -1045,17 +1070,11 @@ rem dreaming custom \
|
|
|
1045
1070
|
Extract temporal narratives from resources.
|
|
1046
1071
|
|
|
1047
1072
|
```bash
|
|
1048
|
-
# Generate moments
|
|
1049
|
-
rem dreaming moments
|
|
1050
|
-
--user-id user-123 \
|
|
1051
|
-
--tenant-id acme-corp \
|
|
1052
|
-
--limit 50
|
|
1073
|
+
# Generate moments
|
|
1074
|
+
rem dreaming moments --limit 50
|
|
1053
1075
|
|
|
1054
1076
|
# Process last 7 days
|
|
1055
|
-
rem dreaming moments
|
|
1056
|
-
--user-id user-123 \
|
|
1057
|
-
--tenant-id acme-corp \
|
|
1058
|
-
--lookback-hours 168
|
|
1077
|
+
rem dreaming moments --lookback-hours 168
|
|
1059
1078
|
```
|
|
1060
1079
|
|
|
1061
1080
|
#### `rem dreaming affinity` - Build Relationships
|
|
@@ -1063,17 +1082,11 @@ rem dreaming moments \
|
|
|
1063
1082
|
Build semantic relationships between resources using embeddings.
|
|
1064
1083
|
|
|
1065
1084
|
```bash
|
|
1066
|
-
# Build affinity graph
|
|
1067
|
-
rem dreaming affinity
|
|
1068
|
-
--user-id user-123 \
|
|
1069
|
-
--tenant-id acme-corp \
|
|
1070
|
-
--limit 100
|
|
1085
|
+
# Build affinity graph
|
|
1086
|
+
rem dreaming affinity --limit 100
|
|
1071
1087
|
|
|
1072
1088
|
# Process recent resources only
|
|
1073
|
-
rem dreaming affinity
|
|
1074
|
-
--user-id user-123 \
|
|
1075
|
-
--tenant-id acme-corp \
|
|
1076
|
-
--lookback-hours 24
|
|
1089
|
+
rem dreaming affinity --lookback-hours 24
|
|
1077
1090
|
```
|
|
1078
1091
|
|
|
1079
1092
|
#### `rem dreaming user-model` - Update User Model
|
|
@@ -1082,9 +1095,7 @@ Update user model from recent activity (preferences, interests, patterns).
|
|
|
1082
1095
|
|
|
1083
1096
|
```bash
|
|
1084
1097
|
# Update user model
|
|
1085
|
-
rem dreaming user-model
|
|
1086
|
-
--user-id user-123 \
|
|
1087
|
-
--tenant-id acme-corp
|
|
1098
|
+
rem dreaming user-model
|
|
1088
1099
|
```
|
|
1089
1100
|
|
|
1090
1101
|
### Evaluation & Experiments
|
|
@@ -1335,6 +1346,30 @@ S3__BUCKET_NAME=rem-storage
|
|
|
1335
1346
|
S3__REGION=us-east-1
|
|
1336
1347
|
```
|
|
1337
1348
|
|
|
1349
|
+
### Building Docker Images
|
|
1350
|
+
|
|
1351
|
+
We tag Docker images with three labels for traceability:
|
|
1352
|
+
1. `latest` - Always points to most recent build
|
|
1353
|
+
2. `<git-sha>` - Short commit hash for exact version tracing
|
|
1354
|
+
3. `<version>` - Semantic version from `pyproject.toml`
|
|
1355
|
+
|
|
1356
|
+
```bash
|
|
1357
|
+
# Build and push multi-platform image to Docker Hub
|
|
1358
|
+
VERSION=$(grep '^version' pyproject.toml | cut -d'"' -f2) && \
|
|
1359
|
+
docker buildx build --platform linux/amd64,linux/arm64 \
|
|
1360
|
+
-t percolationlabs/rem:latest \
|
|
1361
|
+
-t percolationlabs/rem:$(git rev-parse --short HEAD) \
|
|
1362
|
+
-t percolationlabs/rem:$VERSION \
|
|
1363
|
+
--push \
|
|
1364
|
+
-f Dockerfile .
|
|
1365
|
+
|
|
1366
|
+
# Load locally for testing (single platform, no push)
|
|
1367
|
+
docker buildx build --platform linux/arm64 \
|
|
1368
|
+
-t percolationlabs/rem:latest \
|
|
1369
|
+
--load \
|
|
1370
|
+
-f Dockerfile .
|
|
1371
|
+
```
|
|
1372
|
+
|
|
1338
1373
|
### Production Deployment (Optional)
|
|
1339
1374
|
|
|
1340
1375
|
For production deployment to AWS EKS with Kubernetes, see the main repository README:
|
|
@@ -1450,6 +1485,110 @@ TraverseQuery ::= TRAVERSE [<edge_types:list>] WITH <initial_query:Query> [DEPTH
|
|
|
1450
1485
|
|
|
1451
1486
|
**Stage 4** (100% answerable): Mature graph with rich historical data. All query types fully functional with high-quality results.
|
|
1452
1487
|
|
|
1488
|
+
## Troubleshooting
|
|
1489
|
+
|
|
1490
|
+
### Apple Silicon Mac: "Failed to build kreuzberg" Error
|
|
1491
|
+
|
|
1492
|
+
**Problem**: Installation fails with `ERROR: Failed building wheel for kreuzberg` on Apple Silicon Macs.
|
|
1493
|
+
|
|
1494
|
+
**Root Cause**: REM uses `kreuzberg>=4.0.0rc1` for document parsing with native ONNX/Rust table extraction. Kreuzberg 4.0.0rc1 provides pre-built wheels for ARM64 macOS (`macosx_14_0_arm64.whl`) but NOT for x86_64 (Intel) macOS. If you're using an x86_64 Python binary (running under Rosetta 2), pip cannot find a compatible wheel and attempts to build from source, which fails.
|
|
1495
|
+
|
|
1496
|
+
**Solution**: Use ARM64 (native) Python instead of x86_64 Python.
|
|
1497
|
+
|
|
1498
|
+
**Step 1: Verify your Python architecture**
|
|
1499
|
+
|
|
1500
|
+
```bash
|
|
1501
|
+
python3 -c "import platform; print(f'Machine: {platform.machine()}')"
|
|
1502
|
+
```
|
|
1503
|
+
|
|
1504
|
+
- **Correct**: `Machine: arm64` (native ARM Python)
|
|
1505
|
+
- **Wrong**: `Machine: x86_64` (Intel Python under Rosetta)
|
|
1506
|
+
|
|
1507
|
+
**Step 2: Install ARM Python via Homebrew** (if not already installed)
|
|
1508
|
+
|
|
1509
|
+
```bash
|
|
1510
|
+
# Install ARM Python
|
|
1511
|
+
brew install python@3.12
|
|
1512
|
+
|
|
1513
|
+
# Verify it's ARM
|
|
1514
|
+
/opt/homebrew/bin/python3.12 -c "import platform; print(platform.machine())"
|
|
1515
|
+
# Should output: arm64
|
|
1516
|
+
```
|
|
1517
|
+
|
|
1518
|
+
**Step 3: Create venv with ARM Python**
|
|
1519
|
+
|
|
1520
|
+
```bash
|
|
1521
|
+
# Use full path to ARM Python
|
|
1522
|
+
/opt/homebrew/bin/python3.12 -m venv .venv
|
|
1523
|
+
|
|
1524
|
+
# Activate and install
|
|
1525
|
+
source .venv/bin/activate
|
|
1526
|
+
pip install "remdb[all]"
|
|
1527
|
+
```
|
|
1528
|
+
|
|
1529
|
+
**Why This Happens**: Some users have both Intel Homebrew (`/usr/local`) and ARM Homebrew (`/opt/homebrew`) installed. If your system `python3` points to the Intel version at `/usr/local/bin/python3`, you'll hit this issue. The fix is to explicitly use the ARM Python from `/opt/homebrew/bin/python3.12`.
|
|
1530
|
+
|
|
1531
|
+
**Verification**: After successful installation, you should see:
|
|
1532
|
+
```
|
|
1533
|
+
Using cached kreuzberg-4.0.0rc1-cp310-abi3-macosx_14_0_arm64.whl (19.8 MB)
|
|
1534
|
+
Successfully installed ... kreuzberg-4.0.0rc1 ... remdb-0.3.10
|
|
1535
|
+
```
|
|
1536
|
+
|
|
1537
|
+
## Using REM as a Library
|
|
1538
|
+
|
|
1539
|
+
REM wraps FastAPI - extend it exactly as you would any FastAPI app.
|
|
1540
|
+
|
|
1541
|
+
```python
|
|
1542
|
+
import rem
|
|
1543
|
+
from rem import create_app
|
|
1544
|
+
from rem.models.core import CoreModel
|
|
1545
|
+
|
|
1546
|
+
# 1. Register models (for schema generation)
|
|
1547
|
+
rem.register_models(MyModel, AnotherModel)
|
|
1548
|
+
|
|
1549
|
+
# 2. Register schema paths (for custom agents/evaluators)
|
|
1550
|
+
rem.register_schema_path("./schemas")
|
|
1551
|
+
|
|
1552
|
+
# 3. Create app
|
|
1553
|
+
app = create_app()
|
|
1554
|
+
|
|
1555
|
+
# 4. Extend like normal FastAPI
|
|
1556
|
+
app.include_router(my_router)
|
|
1557
|
+
|
|
1558
|
+
@app.mcp_server.tool()
|
|
1559
|
+
async def my_tool(query: str) -> dict:
|
|
1560
|
+
"""Custom MCP tool."""
|
|
1561
|
+
return {"result": query}
|
|
1562
|
+
```
|
|
1563
|
+
|
|
1564
|
+
### Project Structure
|
|
1565
|
+
|
|
1566
|
+
```
|
|
1567
|
+
my-rem-app/
|
|
1568
|
+
├── my_app/
|
|
1569
|
+
│ ├── main.py # Entry point (create_app + extensions)
|
|
1570
|
+
│ ├── models.py # Custom models (inherit CoreModel)
|
|
1571
|
+
│ └── routers/ # Custom FastAPI routers
|
|
1572
|
+
├── schemas/
|
|
1573
|
+
│ ├── agents/ # Custom agent YAML schemas
|
|
1574
|
+
│ └── evaluators/ # Custom evaluator schemas
|
|
1575
|
+
├── sql/migrations/ # Custom SQL migrations
|
|
1576
|
+
└── pyproject.toml
|
|
1577
|
+
```
|
|
1578
|
+
|
|
1579
|
+
Generate this structure with: `rem scaffold my-app`
|
|
1580
|
+
|
|
1581
|
+
### Extension Points
|
|
1582
|
+
|
|
1583
|
+
| Extension | How |
|
|
1584
|
+
|-----------|-----|
|
|
1585
|
+
| **Routes** | `app.include_router(router)` or `@app.get()` |
|
|
1586
|
+
| **MCP Tools** | `@app.mcp_server.tool()` decorator or `app.mcp_server.add_tool(fn)` |
|
|
1587
|
+
| **MCP Resources** | `@app.mcp_server.resource("uri://...")` or `app.mcp_server.add_resource(fn)` |
|
|
1588
|
+
| **MCP Prompts** | `@app.mcp_server.prompt()` or `app.mcp_server.add_prompt(fn)` |
|
|
1589
|
+
| **Models** | `rem.register_models(Model)` then `rem db schema generate` |
|
|
1590
|
+
| **Agent Schemas** | `rem.register_schema_path("./schemas")` or `SCHEMA__PATHS` env var |
|
|
1591
|
+
|
|
1453
1592
|
## License
|
|
1454
1593
|
|
|
1455
1594
|
MIT
|