PyPI - remdb - Versions diffs - 0.3.14__py3-none-any.whl → 0.3.157__py3-none-any.whl - Mend

remdb 0.3.14py3-none-any.whl → 0.3.157py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (112) hide show

rem/agentic/README.md +76 -0
rem/agentic/__init__.py +15 -0
rem/agentic/agents/__init__.py +32 -2
rem/agentic/agents/agent_manager.py +310 -0
rem/agentic/agents/sse_simulator.py +502 -0
rem/agentic/context.py +51 -27
rem/agentic/context_builder.py +5 -3
rem/agentic/llm_provider_models.py +301 -0
rem/agentic/mcp/tool_wrapper.py +155 -18
rem/agentic/otel/setup.py +93 -4
rem/agentic/providers/phoenix.py +371 -108
rem/agentic/providers/pydantic_ai.py +280 -57
rem/agentic/schema.py +361 -21
rem/agentic/tools/rem_tools.py +3 -3
rem/api/README.md +215 -1
rem/api/deps.py +255 -0
rem/api/main.py +132 -40
rem/api/mcp_router/resources.py +1 -1
rem/api/mcp_router/server.py +28 -5
rem/api/mcp_router/tools.py +555 -7
rem/api/routers/admin.py +494 -0
rem/api/routers/auth.py +278 -4
rem/api/routers/chat/completions.py +402 -20
rem/api/routers/chat/models.py +88 -10
rem/api/routers/chat/otel_utils.py +33 -0
rem/api/routers/chat/sse_events.py +542 -0
rem/api/routers/chat/streaming.py +697 -45
rem/api/routers/dev.py +81 -0
rem/api/routers/feedback.py +268 -0
rem/api/routers/messages.py +473 -0
rem/api/routers/models.py +78 -0
rem/api/routers/query.py +360 -0
rem/api/routers/shared_sessions.py +406 -0
rem/auth/__init__.py +13 -3
rem/auth/middleware.py +186 -22
rem/auth/providers/__init__.py +4 -1
rem/auth/providers/email.py +215 -0
rem/cli/commands/README.md +237 -64
rem/cli/commands/cluster.py +1808 -0
rem/cli/commands/configure.py +4 -7
rem/cli/commands/db.py +386 -143
rem/cli/commands/experiments.py +468 -76
rem/cli/commands/process.py +14 -8
rem/cli/commands/schema.py +97 -50
rem/cli/commands/session.py +336 -0
rem/cli/dreaming.py +2 -2
rem/cli/main.py +29 -6
rem/config.py +10 -3
rem/models/core/core_model.py +7 -1
rem/models/core/experiment.py +58 -14
rem/models/core/rem_query.py +5 -2
rem/models/entities/__init__.py +25 -0
rem/models/entities/domain_resource.py +38 -0
rem/models/entities/feedback.py +123 -0
rem/models/entities/message.py +30 -1
rem/models/entities/ontology.py +1 -1
rem/models/entities/ontology_config.py +1 -1
rem/models/entities/session.py +83 -0
rem/models/entities/shared_session.py +180 -0
rem/models/entities/subscriber.py +175 -0
rem/models/entities/user.py +1 -0
rem/registry.py +10 -4
rem/schemas/agents/core/agent-builder.yaml +134 -0
rem/schemas/agents/examples/contract-analyzer.yaml +1 -1
rem/schemas/agents/examples/contract-extractor.yaml +1 -1
rem/schemas/agents/examples/cv-parser.yaml +1 -1
rem/schemas/agents/rem.yaml +7 -3
rem/services/__init__.py +3 -1
rem/services/content/service.py +92 -19
rem/services/email/__init__.py +10 -0
rem/services/email/service.py +459 -0
rem/services/email/templates.py +360 -0
rem/services/embeddings/api.py +4 -4
rem/services/embeddings/worker.py +16 -16
rem/services/phoenix/client.py +154 -14
rem/services/postgres/README.md +197 -15
rem/services/postgres/__init__.py +2 -1
rem/services/postgres/diff_service.py +547 -0
rem/services/postgres/pydantic_to_sqlalchemy.py +470 -140
rem/services/postgres/repository.py +132 -0
rem/services/postgres/schema_generator.py +205 -4
rem/services/postgres/service.py +6 -6
rem/services/rem/parser.py +44 -9
rem/services/rem/service.py +36 -2
rem/services/session/compression.py +137 -51
rem/services/session/reload.py +15 -8
rem/settings.py +515 -27
rem/sql/background_indexes.sql +21 -16
rem/sql/migrations/001_install.sql +387 -54
rem/sql/migrations/002_install_models.sql +2304 -377
rem/sql/migrations/003_optional_extensions.sql +326 -0
rem/sql/migrations/004_cache_system.sql +548 -0
rem/sql/migrations/005_schema_update.sql +145 -0
rem/utils/README.md +45 -0
rem/utils/__init__.py +18 -0
rem/utils/date_utils.py +2 -2
rem/utils/files.py +157 -1
rem/utils/model_helpers.py +156 -1
rem/utils/schema_loader.py +220 -22
rem/utils/sql_paths.py +146 -0
rem/utils/sql_types.py +3 -1
rem/utils/vision.py +1 -1
rem/workers/__init__.py +3 -1
rem/workers/db_listener.py +579 -0
rem/workers/unlogged_maintainer.py +463 -0
{remdb-0.3.14.dist-info → remdb-0.3.157.dist-info}/METADATA +340 -229
{remdb-0.3.14.dist-info → remdb-0.3.157.dist-info}/RECORD +109 -80
{remdb-0.3.14.dist-info → remdb-0.3.157.dist-info}/WHEEL +1 -1
rem/sql/002_install_models.sql +0 -1068
rem/sql/install_models.sql +0 -1051
rem/sql/migrations/003_seed_default_user.sql +0 -48
{remdb-0.3.14.dist-info → remdb-0.3.157.dist-info}/entry_points.txt +0 -0

{remdb-0.3.14.dist-info → remdb-0.3.157.dist-info}/METADATA RENAMED Viewed

@@ -1,6 +1,6 @@
 Metadata-Version: 2.4
 Name: remdb
-Version: 0.3.14
+Version: 0.3.157
 Summary: Resources Entities Moments - Bio-inspired memory system for agentic AI workloads
 Project-URL: Homepage, https://github.com/Percolation-Labs/reminiscent
 Project-URL: Documentation, https://github.com/Percolation-Labs/reminiscent/blob/main/README.md
@@ -12,9 +12,11 @@ Keywords: agents,ai,mcp,memory,postgresql,vector-search
 Classifier: Development Status :: 3 - Alpha
 Classifier: Intended Audience :: Developers
 Classifier: License :: OSI Approved :: MIT License
+Classifier: Programming Language :: Python :: 3.11
 Classifier: Programming Language :: Python :: 3.12
+Classifier: Programming Language :: Python :: 3.13
 Classifier: Topic :: Scientific/Engineering :: Artificial Intelligence
-Requires-Python: <3.13,>=3.12
+Requires-Python: <3.14,>=3.11
 Requires-Dist: aioboto3>=13.0.0
 Requires-Dist: arize-phoenix>=5.0.0
 Requires-Dist: asyncpg>=0.30.0
@@ -101,32 +103,30 @@ Cloud-native unified memory infrastructure for agentic AI systems built with Pyd
 - **Database Layer**: PostgreSQL 18 with pgvector for multi-index memory (KV + Vector + Graph)
 - **REM Query Dialect**: Custom query language with O(1) lookups, semantic search, graph traversal
 - **Ingestion & Dreaming**: Background workers for content extraction and progressive index enrichment (0% → 100% answerable)
-- **Observability & Evals**: OpenTelemetry tracing + Arize Phoenix + LLM-as-a-Judge evaluation framework
+- **Observability & Evals**: OpenTelemetry tracing supporting LLM-as-a-Judge evaluation frameworks
 ## Features
 | Feature | Description | Benefits |
 |---------|-------------|----------|
 | **OpenAI-Compatible Chat API** | Drop-in replacement for OpenAI chat completions API with streaming support | Use with existing OpenAI clients, switch models across providers (OpenAI, Anthropic, etc.) |
-| **Built-in MCP Server** | FastMCP server with 4 tools + 3 resources for memory operations | Export memory to Claude Desktop, Cursor, or any MCP-compatible host |
+| **Built-in MCP Server** | FastMCP server with 4 tools + 5 resources for memory operations | Export memory to Claude Desktop, Cursor, or any MCP-compatible host |
 | **REM Query Engine** | Multi-index query system (LOOKUP, FUZZY, SEARCH, SQL, TRAVERSE) with custom dialect | O(1) lookups, semantic search, graph traversal - all tenant-isolated |
 | **Dreaming Workers** | Background workers for entity extraction, moment generation, and affinity matching | Automatic knowledge graph construction from resources (0% → 100% query answerable) |
 | **PostgreSQL + pgvector** | CloudNativePG with PostgreSQL 18, pgvector extension, streaming replication | Production-ready vector search, no external vector DB needed |
 | **AWS EKS Recipe** | Complete infrastructure-as-code with Pulumi, Karpenter, ArgoCD | Deploy to production EKS in minutes with auto-scaling and GitOps |
 | **JSON Schema Agents** | Dynamic agent creation from YAML schemas via Pydantic AI factory | Define agents declaratively, version control schemas, load dynamically |
-| **Content Providers** | Audio transcription (Whisper), vision (GPT-4V, Claude), PDFs, DOCX, images | Multimodal ingestion out of the box with format detection |
-| **Configurable Embeddings** | Provider-agnostic embedding system (OpenAI, Cohere, Jina) | Switch embedding providers via env vars, no code changes |
+| **Content Providers** | Audio transcription (Whisper), vision (OpenAI, Anthropic, Gemini), PDFs, DOCX, PPTX, XLSX, images | Multimodal ingestion out of the box with format detection |
+| **Configurable Embeddings** | OpenAI embedding system (text-embedding-3-small) | Production-ready embeddings, additional providers planned |
 | **Multi-Tenancy** | Tenant isolation at database level with automatic scoping | SaaS-ready with complete data separation per tenant |
-| **Streaming Everything** | SSE for chat, background workers for embeddings, async throughout | Real-time responses, non-blocking operations, scalable |
 | **Zero Vendor Lock-in** | Raw HTTP clients (no OpenAI SDK), swappable providers, open standards | Not tied to any vendor, easy to migrate, full control |
 ## Quick Start
 Choose your path:
-- **Option 1: Package Users with Example Data** (Recommended for first-time users) - PyPI + example datasets
-- **Option 2: Package Users** (Recommended for non-developers) - PyPI package + dockerized database
-- **Option 3: Developers** - Clone repo, local development with uv
+- **Option 1: Package Users with Example Data** (Recommended) - PyPI + example datasets
+- **Option 2: Developers** - Clone repo, local development with uv
 ---
@@ -145,34 +145,26 @@ pip install "remdb[all]"
 git clone https://github.com/Percolation-Labs/remstack-lab.git
 cd remstack-lab
-# Start PostgreSQL with docker-compose
+# Start services (PostgreSQL, Phoenix observability)
 curl -O https://gist.githubusercontent.com/percolating-sirsh/d117b673bc0edfdef1a5068ccd3cf3e5/raw/docker-compose.prebuilt.yml
-docker compose -f docker-compose.prebuilt.yml up -d postgres
+docker compose -f docker-compose.prebuilt.yml up -d
 # Configure REM (creates ~/.rem/config.yaml and installs database schema)
 # Add --claude-desktop to register with Claude Desktop app
 rem configure --install --claude-desktop
-# Load quickstart dataset (uses default user)
+# Load quickstart dataset
 rem db load datasets/quickstart/sample_data.yaml
-# Optional: Set default LLM provider via environment variable
-# export LLM__DEFAULT_MODEL="openai:gpt-4.1-nano"  # Fast and cheap
-# export LLM__DEFAULT_MODEL="anthropic:claude-sonnet-4-5-20250929"  # High quality (default)
 # Ask questions
 rem ask "What documents exist in the system?"
 rem ask "Show me meetings about API design"
-# Ingest files (PDF, DOCX, images, etc.) - note: requires remstack-lab
+# Ingest files (PDF, DOCX, images, etc.)
 rem process ingest datasets/formats/files/bitcoin_whitepaper.pdf --category research --tags bitcoin,whitepaper
 # Query ingested content
 rem ask "What is the Bitcoin whitepaper about?"
-# Try other datasets (use --user-id for multi-tenant scenarios)
-rem db load datasets/domains/recruitment/scenarios/candidate_pipeline/data.yaml --user-id acme-corp
-rem ask --user-id acme-corp "Show me candidates with Python experience"
 ```
 **What you get:**
@@ -182,130 +174,39 @@ rem ask --user-id acme-corp "Show me candidates with Python experience"
 **Learn more**: [remstack-lab repository](https://github.com/Percolation-Labs/remstack-lab)
----
-## Option 2: Package Users (No Example Data)
-**Best for**: Using REM as a service (API + CLI) without modifying code, bringing your own data.
+### Using the API
-### Step 1: Start Database and API with Docker Compose
+Once configured, you can also use the OpenAI-compatible chat completions API:
 ```bash
-# Create a project directory
-mkdir my-rem-project && cd my-rem-project
-# Download docker-compose file from public gist
-curl -O https://gist.githubusercontent.com/percolating-sirsh/d117b673bc0edfdef1a5068ccd3cf3e5/raw/docker-compose.prebuilt.yml
-# IMPORTANT: Export API keys BEFORE running docker compose
-# Docker Compose reads env vars at startup - exporting them after won't work!
-# Required: OpenAI for embeddings (text-embedding-3-small)
-export OPENAI_API_KEY="sk-..."
-# Recommended: At least one chat completion provider
-export ANTHROPIC_API_KEY="sk-ant-..."           # Claude Sonnet 4.5 (high quality)
-export CEREBRAS_API_KEY="csk-..."               # Cerebras (fast, cheap inference)
-# Start PostgreSQL + API
+# Start all services (PostgreSQL, Phoenix, API)
 docker compose -f docker-compose.prebuilt.yml up -d
-# Verify services are running
-curl http://localhost:8000/health
-```
-This starts:
-- **PostgreSQL** with pgvector on port **5051** (connection: `postgresql://rem:rem@localhost:5051/rem`)
-- **REM API** on port **8000** with OpenAI-compatible chat completions + MCP server
-- Uses pre-built Docker image from Docker Hub (no local build required)
-### Step 2: Install and Configure CLI (REQUIRED)
-**This step is required** before you can use REM - it installs the database schema and configures your LLM API keys.
-```bash
-# Install remdb package from PyPI
-pip install remdb[all]
-# Configure REM (defaults to port 5051 for package users)
-rem configure --install --claude-desktop
+# Test the API
+curl -X POST http://localhost:8000/api/v1/chat/completions \
+  -H "Content-Type: application/json" \
+  -H "X-Session-Id: a1b2c3d4-e5f6-7890-abcd-ef1234567890" \
+  -d '{
+    "model": "anthropic:claude-sonnet-4-5-20250929",
+    "messages": [{"role": "user", "content": "What documents did Sarah Chen author?"}],
+    "stream": false
+  }'
 ```
-The interactive wizard will:
-1. **Configure PostgreSQL**: Defaults to `postgresql://rem:rem@localhost:5051/rem` (prebuilt docker-compose)
-   - Just press Enter to accept defaults
-   - Custom database: Enter your own host/port/credentials
-2. **Configure LLM providers**: Enter your OpenAI/Anthropic API keys
-3. **Install database tables**: Creates schema, functions, indexes (**required for CLI/API to work**)
-4. **Register with Claude Desktop**: Adds REM MCP server to Claude
-Configuration saved to `~/.rem/config.yaml` (can edit with `rem configure --edit`)
 **Port Guide:**
 - **5051**: Package users with `docker-compose.prebuilt.yml` (pre-built image)
 - **5050**: Developers with `docker-compose.yml` (local build)
-- **Custom**: Your own PostgreSQL database
 **Next Steps:**
 - See [CLI Reference](#cli-reference) for all available commands
 - See [REM Query Dialect](#rem-query-dialect) for query examples
 - See [API Endpoints](#api-endpoints) for OpenAI-compatible API usage
-### Step 3: Load Sample Data (Optional but Recommended)
-**Option A: Clone example datasets** (Recommended - works with all README examples)
-```bash
-# Clone datasets repository
-git clone https://github.com/Percolation-Labs/remstack-lab.git
-# Load quickstart dataset (uses default user)
-rem db load --file remstack-lab/datasets/quickstart/sample_data.yaml
-# Test with sample queries
-rem ask "What documents exist in the system?"
-rem ask "Show me meetings about API design"
-rem ask "Who is Sarah Chen?"
-# Try domain-specific datasets (use --user-id for multi-tenant scenarios)
-rem db load --file remstack-lab/datasets/domains/recruitment/scenarios/candidate_pipeline/data.yaml --user-id acme-corp
-rem ask --user-id acme-corp "Show me candidates with Python experience"
-```
-**Option B: Bring your own data**
-```bash
-# Ingest your own files (uses default user)
-echo "REM is a bio-inspired memory system for agentic AI workloads." > test-doc.txt
-rem process ingest test-doc.txt --category documentation --tags rem,ai
-# Query your ingested data
-rem ask "What do you know about REM from my knowledge base?"
-```
-### Step 4: Test the API
-```bash
-# Test the OpenAI-compatible chat completions API
-curl -X POST http://localhost:8000/api/v1/chat/completions \
-  -H "Content-Type: application/json" \
-  -H "X-User-Id: demo-user" \
-  -d '{
-    "model": "anthropic:claude-sonnet-4-5-20250929",
-    "messages": [{"role": "user", "content": "What documents did Sarah Chen author?"}],
-    "stream": false
-  }'
-```
-**Available Commands:**
-- `rem ask` - Natural language queries to REM
-- `rem process ingest <file>` - Full ingestion pipeline (storage + parsing + embedding + database)
-- `rem process uri <file>` - READ-ONLY parsing (no database storage, useful for testing parsers)
-- `rem db load --file <yaml>` - Load structured datasets directly
+---
 ## Example Datasets
-🎯 **Recommended**: Clone [remstack-lab](https://github.com/Percolation-Labs/remstack-lab) for curated datasets organized by domain and format.
+Clone [remstack-lab](https://github.com/Percolation-Labs/remstack-lab) for curated datasets organized by domain and format.
 **What's included:**
 - **Quickstart**: Minimal dataset (3 users, 3 resources, 3 moments) - perfect for first-time users
@@ -317,14 +218,11 @@ curl -X POST http://localhost:8000/api/v1/chat/completions \
 ```bash
 cd remstack-lab
-# Load any dataset (uses default user)
+# Load any dataset
 rem db load --file datasets/quickstart/sample_data.yaml
 # Explore formats
 rem db load --file datasets/formats/engrams/scenarios/team_meeting/team_standup_meeting.yaml
-# Try domain-specific examples (use --user-id for multi-tenant scenarios)
-rem db load --file datasets/domains/recruitment/scenarios/candidate_pipeline/data.yaml --user-id acme-corp
 ```
 ## See Also
@@ -435,7 +333,7 @@ rem ask research-assistant "Find documents about machine learning architecture"
 rem ask research-assistant "Summarize recent API design documents" --stream
 # With session continuity
-rem ask research-assistant "What did we discuss about ML?" --session-id abc-123
+rem ask research-assistant "What did we discuss about ML?" --session-id c3d4e5f6-a7b8-9012-cdef-345678901234
 ```
 ### Agent Schema Structure
@@ -478,29 +376,16 @@ REM provides **4 built-in MCP tools** your agents can use:
 ### Multi-User Isolation
-Custom agents are **scoped by `user_id`**, ensuring complete data isolation:
+For multi-tenant deployments, custom agents are **scoped by `user_id`**, ensuring complete data isolation. Use `--user-id` flag when you need tenant separation:
 ```bash
-# User A creates a custom agent
-rem process ingest my-agent.yaml --user-id user-a --category agents
-# User B cannot see User A's agent
-rem ask my-agent "test" --user-id user-b
-# ❌ Error: Schema not found (LOOKUP returns no results for user-b)
+# Create agent for specific tenant
+rem process ingest my-agent.yaml --user-id tenant-a --category agents
-# User A can use their agent
-rem ask my-agent "test" --user-id user-a
-# ✅ Works - LOOKUP finds schema for user-a
+# Query with tenant context
+rem ask my-agent "test" --user-id tenant-a
 ```
-### Advanced: Ontology Extractors
-Custom agents can also be used as **ontology extractors** to extract structured knowledge from files. See [CLAUDE.md](../CLAUDE.md#ontology-extraction-pattern) for details on:
-- Multi-provider testing (`provider_configs`)
-- Semantic search configuration (`embedding_fields`)
-- File matching rules (`OntologyConfig`)
-- Dreaming workflow integration
 ### Troubleshooting
 **Schema not found error:**
@@ -534,15 +419,15 @@ REM provides a custom query language designed for **LLM-driven iterated retrieva
 Unlike traditional single-shot SQL queries, the REM dialect is optimized for **multi-turn exploration** where LLMs participate in query planning:
 - **Iterated Queries**: Queries return partial results that LLMs use to refine subsequent queries
-- **Composable WITH Syntax**: Chain operations together (e.g., `TRAVERSE FROM ... WITH LOOKUP "..."`)
+- **Composable WITH Syntax**: Chain operations together (e.g., `TRAVERSE edge_type WITH LOOKUP "..."`)
 - **Mixed Indexes**: Combines exact lookups (O(1)), semantic search (vector), and graph traversal
 - **Query Planner Participation**: Results include metadata for LLMs to decide next steps
 **Example Multi-Turn Flow**:
 ```
 Turn 1: LOOKUP "sarah-chen" → Returns entity + available edge types
-Turn 2: TRAVERSE FROM "sarah-chen" TYPE "authored_by" DEPTH 1 → Returns connected documents
-Turn 3: SEARCH "architecture decisions" WITH TRAVERSE FROM "sarah-chen" → Combines semantic + graph
+Turn 2: TRAVERSE authored_by WITH LOOKUP "sarah-chen" DEPTH 1 → Returns connected documents
+Turn 3: SEARCH "architecture decisions" → Semantic search, then explore graph from results
 ```
 This enables LLMs to **progressively build context** rather than requiring perfect queries upfront.
@@ -595,8 +480,8 @@ SEARCH "contract disputes" FROM resources WHERE tags @> ARRAY['legal'] LIMIT 5
 Follow `graph_edges` relationships across the knowledge graph.
 ```sql
-TRAVERSE FROM "sarah-chen" TYPE "authored_by" DEPTH 2
-TRAVERSE FROM "api-design-v2" TYPE "references,depends_on" DEPTH 3
+TRAVERSE authored_by WITH LOOKUP "sarah-chen" DEPTH 2
+TRAVERSE references,depends_on WITH LOOKUP "api-design-v2" DEPTH 3
 ```
 **Features**:
@@ -689,7 +574,7 @@ SEARCH "API migration planning" FROM resources LIMIT 5
 LOOKUP "tidb-migration-spec" FROM resources
 # Query 3: Find related people
-TRAVERSE FROM "tidb-migration-spec" TYPE "authored_by,reviewed_by" DEPTH 1
+TRAVERSE authored_by,reviewed_by WITH LOOKUP "tidb-migration-spec" DEPTH 1
 # Query 4: Recent activity
 SELECT * FROM moments WHERE
@@ -706,7 +591,7 @@ All queries automatically scoped by `user_id` for complete data isolation:
 SEARCH "contracts" FROM resources LIMIT 10
 -- No cross-user data leakage
-TRAVERSE FROM "project-x" TYPE "references" DEPTH 3
+TRAVERSE references WITH LOOKUP "project-x" DEPTH 3
 ```
 ## API Endpoints
@@ -718,8 +603,8 @@ POST /api/v1/chat/completions
 ```
 **Headers**:
-- `X-Tenant-Id`: Tenant identifier (required for REM)
-- `X-User-Id`: User identifier
+- `X-User-Id`: User identifier (required for data isolation, uses default if not provided)
+- `X-Tenant-Id`: Deprecated - use `X-User-Id` instead (kept for backwards compatibility)
 - `X-Session-Id`: Session/conversation identifier
 - `X-Agent-Schema`: Agent schema URI to use
@@ -858,81 +743,144 @@ rem serve --log-level debug
 ### Database Management
-#### `rem db migrate` - Run Migrations
+REM uses a **code-as-source-of-truth** approach for database schema management. Pydantic models define the schema, and the database is kept in sync via diff-based migrations.
-Apply database migrations (install.sql and install_models.sql).
+#### Schema Management Philosophy
+**Two migration files only:**
+- `001_install.sql` - Core infrastructure (extensions, functions, KV store)
+- `002_install_models.sql` - Entity tables (auto-generated from Pydantic models)
+**No incremental migrations** (003, 004, etc.) - the models file is always regenerated to match code.
+#### `rem db schema generate` - Regenerate Schema SQL
+Generate `002_install_models.sql` from registered Pydantic models.
 ```bash
-# Apply all migrations
-rem db migrate
+# Regenerate from model registry
+rem db schema generate
-# Core infrastructure only (extensions, functions)
-rem db migrate --install
+# Output: src/rem/sql/migrations/002_install_models.sql
+```
-# Entity tables only (Resource, Message, etc.)
-rem db migrate --models
+This generates:
+- CREATE TABLE statements for each registered entity
+- Embeddings tables (`embeddings_<table>`)
+- KV_STORE triggers for cache maintenance
+- Foreground indexes (GIN for JSONB, B-tree for lookups)
-# Background indexes (HNSW for vectors)
-rem db migrate --background-indexes
+#### `rem db diff` - Detect Schema Drift
+Compare Pydantic models against the live database using Alembic autogenerate.
+```bash
+# Show additive changes only (default, safe for production)
+rem db diff
-# Custom connection string
-rem db migrate --connection "postgresql://user:pass@host:5432/db"
+# Show all changes including drops
+rem db diff --strategy full
-# Custom SQL directory
-rem db migrate --sql-dir /path/to/sql
+# Show additive + safe type widenings
+rem db diff --strategy safe
+# CI mode: exit 1 if drift detected
+rem db diff --check
+# Generate migration SQL for changes
+rem db diff --generate
 ```
-#### `rem db status` - Migration Status
+**Migration Strategies:**
+| Strategy | Description |
+|----------|-------------|
+| `additive` | Only ADD columns/tables/indexes (safe, no data loss) - **default** |
+| `full` | All changes including DROPs (use with caution) |
+| `safe` | Additive + safe column type widenings (e.g., VARCHAR(50) → VARCHAR(256)) |
+**Output shows:**
+- `+ ADD COLUMN` - Column in model but not in DB
+- `- DROP COLUMN` - Column in DB but not in model (only with `--strategy full`)
+- `~ ALTER COLUMN` - Column type or constraints differ
+- `+ CREATE TABLE` / `- DROP TABLE` - Table additions/removals
-Show applied migrations and execution times.
+#### `rem db apply` - Apply SQL Directly
+Apply a SQL file directly to the database (bypasses migration tracking).
 ```bash
-rem db status
+# Apply with audit logging (default)
+rem db apply src/rem/sql/migrations/002_install_models.sql
+# Preview without executing
+rem db apply --dry-run src/rem/sql/migrations/002_install_models.sql
+# Apply without audit logging
+rem db apply --no-log src/rem/sql/migrations/002_install_models.sql
 ```
-#### `rem db rebuild-cache` - Rebuild KV Cache
+#### `rem db migrate` - Initial Setup
-Rebuild KV_STORE cache from entity tables (after database restart or bulk imports).
+Apply standard migrations (001 + 002). Use for initial setup only.
 ```bash
-rem db rebuild-cache
+# Apply infrastructure + entity tables
+rem db migrate
+# Include background indexes (HNSW for vectors)
+rem db migrate --background-indexes
 ```
-### Schema Management
+#### Database Workflows
-#### `rem db schema generate` - Generate SQL Schema
+**Initial Setup (Local):**
+```bash
+rem db schema generate   # Generate from models
+rem db migrate           # Apply 001 + 002
+rem db diff              # Verify no drift
+```
-Generate database schema from Pydantic models.
+**Adding/Modifying Models:**
+```bash
+# 1. Edit models in src/rem/models/entities/
+# 2. Register new models in src/rem/registry.py
+rem db schema generate   # Regenerate schema
+rem db diff              # See what changed
+rem db apply src/rem/sql/migrations/002_install_models.sql
+```
+**CI/CD Pipeline:**
 ```bash
-# Generate install_models.sql from entity models
-rem db schema generate \
-  --models src/rem/models/entities \
-  --output rem/src/rem/sql/install_models.sql
+rem db diff --check      # Fail build if drift detected
+```
+**Remote Database (Production/Staging):**
+```bash
+# Port-forward to cluster database
+kubectl port-forward -n <namespace> svc/rem-postgres-rw 5433:5432 &
-# Generate migration file
-rem db schema generate \
-  --models src/rem/models/entities \
-  --output rem/src/rem/sql/migrations/003_add_fields.sql
+# Override connection for diff check
+POSTGRES__CONNECTION_STRING="postgresql://rem:rem@localhost:5433/rem" rem db diff
+# Apply changes if needed
+POSTGRES__CONNECTION_STRING="postgresql://rem:rem@localhost:5433/rem" \
+  rem db apply src/rem/sql/migrations/002_install_models.sql
 ```
-#### `rem db schema indexes` - Generate Background Indexes
+#### `rem db rebuild-cache` - Rebuild KV Cache
-Generate SQL for background index creation (HNSW for vectors).
+Rebuild KV_STORE cache from entity tables (after database restart or bulk imports).
 ```bash
-# Generate background_indexes.sql
-rem db schema indexes \
-  --models src/rem/models/entities \
-  --output rem/src/rem/sql/background_indexes.sql
+rem db rebuild-cache
 ```
 #### `rem db schema validate` - Validate Models
-Validate Pydantic models for schema generation.
+Validate registered Pydantic models for schema generation.
 ```bash
-rem db schema validate --models src/rem/models/entities
+rem db schema validate
 ```
 ### File Processing
@@ -1138,14 +1086,11 @@ Test Pydantic AI agent with natural language queries.
 # Ask a question
 rem ask "What documents did Sarah Chen author?"
-# With context headers
-rem ask "Find all resources about API design" \
-  --user-id user-123 \
-  --tenant-id acme-corp
 # Use specific agent schema
-rem ask "Analyze this contract" \
-  --agent-schema contract-analyzer-v1
+rem ask contract-analyzer "Analyze this contract"
+# Stream response
+rem ask "Find all resources about API design" --stream
 ```
 ### Global Options
@@ -1193,7 +1138,7 @@ export API__RELOAD=true
 rem serve
 ```
-## Development (For Contributors)
+## Option 2: Development (For Contributors)
 **Best for**: Contributing to REM or customizing the codebase.
@@ -1297,6 +1242,30 @@ S3__BUCKET_NAME=rem-storage
 S3__REGION=us-east-1
 ```
+### Building Docker Images
+We tag Docker images with three labels for traceability:
+1. `latest` - Always points to most recent build
+2. `<git-sha>` - Short commit hash for exact version tracing
+3. `<version>` - Semantic version from `pyproject.toml`
+```bash
+# Build and push multi-platform image to Docker Hub
+VERSION=$(grep '^version' pyproject.toml | cut -d'"' -f2) && \
+docker buildx build --platform linux/amd64,linux/arm64 \
+    -t percolationlabs/rem:latest \
+    -t percolationlabs/rem:$(git rev-parse --short HEAD) \
+    -t percolationlabs/rem:$VERSION \
+    --push \
+    -f Dockerfile .
+# Load locally for testing (single platform, no push)
+docker buildx build --platform linux/arm64 \
+    -t percolationlabs/rem:latest \
+    --load \
+    -f Dockerfile .
+```
 ### Production Deployment (Optional)
 For production deployment to AWS EKS with Kubernetes, see the main repository README:
@@ -1465,45 +1434,156 @@ Successfully installed ... kreuzberg-4.0.0rc1 ... remdb-0.3.10
 REM wraps FastAPI - extend it exactly as you would any FastAPI app.
+### Recommended Project Structure
+REM auto-detects `./agents/` and `./models/` folders - no configuration needed:
+```
+my-rem-app/
+├── agents/                 # Auto-detected for agent schemas
+│   ├── my-agent.yaml       # Custom agent (rem ask my-agent "query")
+│   └── another-agent.yaml
+├── models/                 # Auto-detected if __init__.py exists
+│   └── __init__.py         # Register models with @rem.register_model
+├── routers/                # Custom FastAPI routers
+│   └── custom.py
+├── main.py                 # Entry point
+└── pyproject.toml
+```
+### Quick Start
 ```python
-import rem
+# main.py
 from rem import create_app
-from rem.models.core import CoreModel
+from fastapi import APIRouter
-# 1. Register models (for schema generation)
-rem.register_models(MyModel, AnotherModel)
+# Create REM app (auto-detects ./agents/ and ./models/)
+app = create_app()
-# 2. Register schema paths (for custom agents/evaluators)
-rem.register_schema_path("./schemas")
+# Add custom router
+router = APIRouter(prefix="/custom", tags=["custom"])
-# 3. Create app
-app = create_app()
+@router.get("/hello")
+async def hello():
+    return {"message": "Hello from custom router!"}
-# 4. Extend like normal FastAPI
-app.include_router(my_router)
+app.include_router(router)
+# Add custom MCP tool
 @app.mcp_server.tool()
 async def my_tool(query: str) -> dict:
-    """Custom MCP tool."""
+    """Custom MCP tool available to agents."""
     return {"result": query}
 ```
-### Project Structure
+### Custom Models (Auto-Detected)
+```python
+# models/__init__.py
+import rem
+from rem.models.core import CoreModel
+from pydantic import Field
+@rem.register_model
+class MyEntity(CoreModel):
+    """Custom entity - auto-registered for schema generation."""
+    name: str = Field(description="Entity name")
+    status: str = Field(default="active")
 ```
-my-rem-app/
-├── my_app/
-│   ├── main.py           # Entry point (create_app + extensions)
-│   ├── models.py         # Custom models (inherit CoreModel)
-│   └── routers/          # Custom FastAPI routers
-├── schemas/
-│   ├── agents/           # Custom agent YAML schemas
-│   └── evaluators/       # Custom evaluator schemas
-├── sql/migrations/       # Custom SQL migrations
-└── pyproject.toml
+Run `rem db schema generate` to include your models in the database schema.
+### Custom Agents (Auto-Detected)
+```yaml
+# agents/my-agent.yaml
+type: object
+description: |
+  You are a helpful assistant that...
+properties:
+  answer:
+    type: string
+    description: Your response
+required:
+  - answer
+json_schema_extra:
+  kind: agent
+  name: my-agent
+  version: "1.0.0"
+  tools:
+    - search_rem
+```
+Test with: `rem ask my-agent "Hello!"`
+### Example Custom Router
+```python
+# routers/analytics.py
+from fastapi import APIRouter, Depends
+from rem.services.postgres import get_postgres_service
+router = APIRouter(prefix="/analytics", tags=["analytics"])
+@router.get("/stats")
+async def get_stats():
+    """Get database statistics."""
+    db = get_postgres_service()
+    if not db:
+        return {"error": "Database not available"}
+    await db.connect()
+    try:
+        result = await db.execute(
+            "SELECT COUNT(*) as count FROM resources"
+        )
+        return {"resource_count": result[0]["count"]}
+    finally:
+        await db.disconnect()
+@router.get("/recent")
+async def get_recent(limit: int = 10):
+    """Get recent resources."""
+    db = get_postgres_service()
+    if not db:
+        return {"error": "Database not available"}
+    await db.connect()
+    try:
+        result = await db.execute(
+            f"SELECT label, category, created_at FROM resources ORDER BY created_at DESC LIMIT {limit}"
+        )
+        return {"resources": result}
+    finally:
+        await db.disconnect()
+```
+Include in main.py:
+```python
+from routers.analytics import router as analytics_router
+app.include_router(analytics_router)
 ```
-Generate this structure with: `rem scaffold my-app` *(coming soon)*
+### Running the App
+```bash
+# Development (auto-reload)
+uv run uvicorn main:app --reload --port 8000
+# Or use rem serve
+uv run rem serve --reload
+# Test agent
+uv run rem ask my-agent "What can you help me with?"
+# Test custom endpoint
+curl http://localhost:8000/analytics/stats
+```
 ### Extension Points
@@ -1515,6 +1595,37 @@ Generate this structure with: `rem scaffold my-app` *(coming soon)*
 | **MCP Prompts** | `@app.mcp_server.prompt()` or `app.mcp_server.add_prompt(fn)` |
 | **Models** | `rem.register_models(Model)` then `rem db schema generate` |
 | **Agent Schemas** | `rem.register_schema_path("./schemas")` or `SCHEMA__PATHS` env var |
+| **SQL Migrations** | Place in `sql/migrations/` (auto-detected) |
+### Custom Migrations
+REM automatically discovers migrations from two sources:
+1. **Package migrations** (001-099): Built-in migrations from the `remdb` package
+2. **User migrations** (100+): Your custom migrations in `./sql/migrations/`
+**Convention**: Place custom SQL files in `sql/migrations/` relative to your project root:
+```
+my-rem-app/
+├── sql/
+│   └── migrations/
+│       ├── 100_custom_table.sql      # Runs after package migrations
+│       ├── 101_add_indexes.sql
+│       └── 102_custom_functions.sql
+└── ...
+```
+**Numbering**: Use 100+ for user migrations to ensure they run after package migrations (001-099). All migrations are sorted by filename, so proper numbering ensures correct execution order.
+**Running migrations**:
+```bash
+# Apply all migrations (package + user)
+rem db migrate
+# Apply with background indexes (for production)
+rem db migrate --background-indexes
+```
 ## License

remdb 0.3.14__py3-none-any.whl → 0.3.157__py3-none-any.whl

remdb 0.3.14py3-none-any.whl → 0.3.157py3-none-any.whl