remdb 0.3.242__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of remdb might be problematic. Click here for more details.
- rem/__init__.py +129 -0
- rem/agentic/README.md +760 -0
- rem/agentic/__init__.py +54 -0
- rem/agentic/agents/README.md +155 -0
- rem/agentic/agents/__init__.py +38 -0
- rem/agentic/agents/agent_manager.py +311 -0
- rem/agentic/agents/sse_simulator.py +502 -0
- rem/agentic/context.py +425 -0
- rem/agentic/context_builder.py +360 -0
- rem/agentic/llm_provider_models.py +301 -0
- rem/agentic/mcp/__init__.py +0 -0
- rem/agentic/mcp/tool_wrapper.py +273 -0
- rem/agentic/otel/__init__.py +5 -0
- rem/agentic/otel/setup.py +240 -0
- rem/agentic/providers/phoenix.py +926 -0
- rem/agentic/providers/pydantic_ai.py +854 -0
- rem/agentic/query.py +117 -0
- rem/agentic/query_helper.py +89 -0
- rem/agentic/schema.py +737 -0
- rem/agentic/serialization.py +245 -0
- rem/agentic/tools/__init__.py +5 -0
- rem/agentic/tools/rem_tools.py +242 -0
- rem/api/README.md +657 -0
- rem/api/deps.py +253 -0
- rem/api/main.py +460 -0
- rem/api/mcp_router/prompts.py +182 -0
- rem/api/mcp_router/resources.py +820 -0
- rem/api/mcp_router/server.py +243 -0
- rem/api/mcp_router/tools.py +1605 -0
- rem/api/middleware/tracking.py +172 -0
- rem/api/routers/admin.py +520 -0
- rem/api/routers/auth.py +898 -0
- rem/api/routers/chat/__init__.py +5 -0
- rem/api/routers/chat/child_streaming.py +394 -0
- rem/api/routers/chat/completions.py +702 -0
- rem/api/routers/chat/json_utils.py +76 -0
- rem/api/routers/chat/models.py +202 -0
- rem/api/routers/chat/otel_utils.py +33 -0
- rem/api/routers/chat/sse_events.py +546 -0
- rem/api/routers/chat/streaming.py +950 -0
- rem/api/routers/chat/streaming_utils.py +327 -0
- rem/api/routers/common.py +18 -0
- rem/api/routers/dev.py +87 -0
- rem/api/routers/feedback.py +276 -0
- rem/api/routers/messages.py +620 -0
- rem/api/routers/models.py +86 -0
- rem/api/routers/query.py +362 -0
- rem/api/routers/shared_sessions.py +422 -0
- rem/auth/README.md +258 -0
- rem/auth/__init__.py +36 -0
- rem/auth/jwt.py +367 -0
- rem/auth/middleware.py +318 -0
- rem/auth/providers/__init__.py +16 -0
- rem/auth/providers/base.py +376 -0
- rem/auth/providers/email.py +215 -0
- rem/auth/providers/google.py +163 -0
- rem/auth/providers/microsoft.py +237 -0
- rem/cli/README.md +517 -0
- rem/cli/__init__.py +8 -0
- rem/cli/commands/README.md +299 -0
- rem/cli/commands/__init__.py +3 -0
- rem/cli/commands/ask.py +549 -0
- rem/cli/commands/cluster.py +1808 -0
- rem/cli/commands/configure.py +495 -0
- rem/cli/commands/db.py +828 -0
- rem/cli/commands/dreaming.py +324 -0
- rem/cli/commands/experiments.py +1698 -0
- rem/cli/commands/mcp.py +66 -0
- rem/cli/commands/process.py +388 -0
- rem/cli/commands/query.py +109 -0
- rem/cli/commands/scaffold.py +47 -0
- rem/cli/commands/schema.py +230 -0
- rem/cli/commands/serve.py +106 -0
- rem/cli/commands/session.py +453 -0
- rem/cli/dreaming.py +363 -0
- rem/cli/main.py +123 -0
- rem/config.py +244 -0
- rem/mcp_server.py +41 -0
- rem/models/core/__init__.py +49 -0
- rem/models/core/core_model.py +70 -0
- rem/models/core/engram.py +333 -0
- rem/models/core/experiment.py +672 -0
- rem/models/core/inline_edge.py +132 -0
- rem/models/core/rem_query.py +246 -0
- rem/models/entities/__init__.py +68 -0
- rem/models/entities/domain_resource.py +38 -0
- rem/models/entities/feedback.py +123 -0
- rem/models/entities/file.py +57 -0
- rem/models/entities/image_resource.py +88 -0
- rem/models/entities/message.py +64 -0
- rem/models/entities/moment.py +123 -0
- rem/models/entities/ontology.py +181 -0
- rem/models/entities/ontology_config.py +131 -0
- rem/models/entities/resource.py +95 -0
- rem/models/entities/schema.py +87 -0
- rem/models/entities/session.py +84 -0
- rem/models/entities/shared_session.py +180 -0
- rem/models/entities/subscriber.py +175 -0
- rem/models/entities/user.py +93 -0
- rem/py.typed +0 -0
- rem/registry.py +373 -0
- rem/schemas/README.md +507 -0
- rem/schemas/__init__.py +6 -0
- rem/schemas/agents/README.md +92 -0
- rem/schemas/agents/core/agent-builder.yaml +235 -0
- rem/schemas/agents/core/moment-builder.yaml +178 -0
- rem/schemas/agents/core/rem-query-agent.yaml +226 -0
- rem/schemas/agents/core/resource-affinity-assessor.yaml +99 -0
- rem/schemas/agents/core/simple-assistant.yaml +19 -0
- rem/schemas/agents/core/user-profile-builder.yaml +163 -0
- rem/schemas/agents/examples/contract-analyzer.yaml +317 -0
- rem/schemas/agents/examples/contract-extractor.yaml +134 -0
- rem/schemas/agents/examples/cv-parser.yaml +263 -0
- rem/schemas/agents/examples/hello-world.yaml +37 -0
- rem/schemas/agents/examples/query.yaml +54 -0
- rem/schemas/agents/examples/simple.yaml +21 -0
- rem/schemas/agents/examples/test.yaml +29 -0
- rem/schemas/agents/rem.yaml +132 -0
- rem/schemas/evaluators/hello-world/default.yaml +77 -0
- rem/schemas/evaluators/rem/faithfulness.yaml +219 -0
- rem/schemas/evaluators/rem/lookup-correctness.yaml +182 -0
- rem/schemas/evaluators/rem/retrieval-precision.yaml +199 -0
- rem/schemas/evaluators/rem/retrieval-recall.yaml +211 -0
- rem/schemas/evaluators/rem/search-correctness.yaml +192 -0
- rem/services/__init__.py +18 -0
- rem/services/audio/INTEGRATION.md +308 -0
- rem/services/audio/README.md +376 -0
- rem/services/audio/__init__.py +15 -0
- rem/services/audio/chunker.py +354 -0
- rem/services/audio/transcriber.py +259 -0
- rem/services/content/README.md +1269 -0
- rem/services/content/__init__.py +5 -0
- rem/services/content/providers.py +760 -0
- rem/services/content/service.py +762 -0
- rem/services/dreaming/README.md +230 -0
- rem/services/dreaming/__init__.py +53 -0
- rem/services/dreaming/affinity_service.py +322 -0
- rem/services/dreaming/moment_service.py +251 -0
- rem/services/dreaming/ontology_service.py +54 -0
- rem/services/dreaming/user_model_service.py +297 -0
- rem/services/dreaming/utils.py +39 -0
- rem/services/email/__init__.py +10 -0
- rem/services/email/service.py +522 -0
- rem/services/email/templates.py +360 -0
- rem/services/embeddings/__init__.py +11 -0
- rem/services/embeddings/api.py +127 -0
- rem/services/embeddings/worker.py +435 -0
- rem/services/fs/README.md +662 -0
- rem/services/fs/__init__.py +62 -0
- rem/services/fs/examples.py +206 -0
- rem/services/fs/examples_paths.py +204 -0
- rem/services/fs/git_provider.py +935 -0
- rem/services/fs/local_provider.py +760 -0
- rem/services/fs/parsing-hooks-examples.md +172 -0
- rem/services/fs/paths.py +276 -0
- rem/services/fs/provider.py +460 -0
- rem/services/fs/s3_provider.py +1042 -0
- rem/services/fs/service.py +186 -0
- rem/services/git/README.md +1075 -0
- rem/services/git/__init__.py +17 -0
- rem/services/git/service.py +469 -0
- rem/services/phoenix/EXPERIMENT_DESIGN.md +1146 -0
- rem/services/phoenix/README.md +453 -0
- rem/services/phoenix/__init__.py +46 -0
- rem/services/phoenix/client.py +960 -0
- rem/services/phoenix/config.py +88 -0
- rem/services/phoenix/prompt_labels.py +477 -0
- rem/services/postgres/README.md +757 -0
- rem/services/postgres/__init__.py +49 -0
- rem/services/postgres/diff_service.py +599 -0
- rem/services/postgres/migration_service.py +427 -0
- rem/services/postgres/programmable_diff_service.py +635 -0
- rem/services/postgres/pydantic_to_sqlalchemy.py +562 -0
- rem/services/postgres/register_type.py +353 -0
- rem/services/postgres/repository.py +481 -0
- rem/services/postgres/schema_generator.py +661 -0
- rem/services/postgres/service.py +802 -0
- rem/services/postgres/sql_builder.py +355 -0
- rem/services/rate_limit.py +113 -0
- rem/services/rem/README.md +318 -0
- rem/services/rem/__init__.py +23 -0
- rem/services/rem/exceptions.py +71 -0
- rem/services/rem/executor.py +293 -0
- rem/services/rem/parser.py +180 -0
- rem/services/rem/queries.py +196 -0
- rem/services/rem/query.py +371 -0
- rem/services/rem/service.py +608 -0
- rem/services/session/README.md +374 -0
- rem/services/session/__init__.py +13 -0
- rem/services/session/compression.py +488 -0
- rem/services/session/pydantic_messages.py +310 -0
- rem/services/session/reload.py +85 -0
- rem/services/user_service.py +130 -0
- rem/settings.py +1877 -0
- rem/sql/background_indexes.sql +52 -0
- rem/sql/migrations/001_install.sql +983 -0
- rem/sql/migrations/002_install_models.sql +3157 -0
- rem/sql/migrations/003_optional_extensions.sql +326 -0
- rem/sql/migrations/004_cache_system.sql +282 -0
- rem/sql/migrations/005_schema_update.sql +145 -0
- rem/sql/migrations/migrate_session_id_to_uuid.sql +45 -0
- rem/utils/AGENTIC_CHUNKING.md +597 -0
- rem/utils/README.md +628 -0
- rem/utils/__init__.py +61 -0
- rem/utils/agentic_chunking.py +622 -0
- rem/utils/batch_ops.py +343 -0
- rem/utils/chunking.py +108 -0
- rem/utils/clip_embeddings.py +276 -0
- rem/utils/constants.py +97 -0
- rem/utils/date_utils.py +228 -0
- rem/utils/dict_utils.py +98 -0
- rem/utils/embeddings.py +436 -0
- rem/utils/examples/embeddings_example.py +305 -0
- rem/utils/examples/sql_types_example.py +202 -0
- rem/utils/files.py +323 -0
- rem/utils/markdown.py +16 -0
- rem/utils/mime_types.py +158 -0
- rem/utils/model_helpers.py +492 -0
- rem/utils/schema_loader.py +649 -0
- rem/utils/sql_paths.py +146 -0
- rem/utils/sql_types.py +350 -0
- rem/utils/user_id.py +81 -0
- rem/utils/vision.py +325 -0
- rem/workers/README.md +506 -0
- rem/workers/__init__.py +7 -0
- rem/workers/db_listener.py +579 -0
- rem/workers/db_maintainer.py +74 -0
- rem/workers/dreaming.py +502 -0
- rem/workers/engram_processor.py +312 -0
- rem/workers/sqs_file_processor.py +193 -0
- rem/workers/unlogged_maintainer.py +463 -0
- remdb-0.3.242.dist-info/METADATA +1632 -0
- remdb-0.3.242.dist-info/RECORD +235 -0
- remdb-0.3.242.dist-info/WHEEL +4 -0
- remdb-0.3.242.dist-info/entry_points.txt +2 -0
|
@@ -0,0 +1,757 @@
|
|
|
1
|
+
### PostgresService - CloudNativePG Database Operations
|
|
2
|
+
|
|
3
|
+
Comprehensive service for PostgreSQL 18 with pgvector, including:
|
|
4
|
+
- Entity CRUD with automatic embeddings
|
|
5
|
+
- KV_STORE cache for O(1) lookups
|
|
6
|
+
- Fuzzy text search with pg_trgm
|
|
7
|
+
- Background index creation
|
|
8
|
+
- Batch operations with transaction management
|
|
9
|
+
|
|
10
|
+
## Architecture
|
|
11
|
+
|
|
12
|
+
```
|
|
13
|
+
┌─────────────────────────────────────────────────────────────┐
|
|
14
|
+
│ PostgresService │
|
|
15
|
+
├─────────────────────────────────────────────────────────────┤
|
|
16
|
+
│ │
|
|
17
|
+
│ ┌──────────────────────────────────────────────────────┐ │
|
|
18
|
+
│ │ Batch Upsert Pipeline │ │
|
|
19
|
+
│ │ 1. Validate entities │ │
|
|
20
|
+
│ │ 2. Generate embeddings (batch OpenAI API) │ │
|
|
21
|
+
│ │ 3. Upsert to primary tables │ │
|
|
22
|
+
│ │ 4. Upsert to embeddings_<table> │ │
|
|
23
|
+
│ │ 5. Upsert to KV_STORE (via trigger) │ │
|
|
24
|
+
│ └──────────────────────────────────────────────────────┘ │
|
|
25
|
+
│ │
|
|
26
|
+
│ ┌──────────────────────────────────────────────────────┐ │
|
|
27
|
+
│ │ Background Index Thread │ │
|
|
28
|
+
│ │ - Monitors pending indexes queue │ │
|
|
29
|
+
│ │ - Creates indexes CONCURRENTLY │ │
|
|
30
|
+
│ │ - Handles index creation failures │ │
|
|
31
|
+
│ └──────────────────────────────────────────────────────┘ │
|
|
32
|
+
│ │
|
|
33
|
+
└─────────────────────────────────────────────────────────────┘
|
|
34
|
+
|
|
35
|
+
┌─────────────────────────────────────────────────────────────┐
|
|
36
|
+
│ Database Schema │
|
|
37
|
+
├─────────────────────────────────────────────────────────────┤
|
|
38
|
+
│ │
|
|
39
|
+
│ Primary Tables: resources, moments, users, etc. │
|
|
40
|
+
│ Embeddings Tables: embeddings_resources, etc. │
|
|
41
|
+
│ KV_STORE Cache: UNLOGGED table for O(1) lookups │
|
|
42
|
+
│ │
|
|
43
|
+
└─────────────────────────────────────────────────────────────┘
|
|
44
|
+
```
|
|
45
|
+
|
|
46
|
+
## Core Design Patterns
|
|
47
|
+
|
|
48
|
+
### 1. Entity Storage Pattern
|
|
49
|
+
|
|
50
|
+
**Primary Tables** store entities with system fields:
|
|
51
|
+
```sql
|
|
52
|
+
CREATE TABLE resources (
|
|
53
|
+
id UUID PRIMARY KEY,
|
|
54
|
+
tenant_id VARCHAR(100), -- Optional: for future multi-tenant SaaS use
|
|
55
|
+
user_id VARCHAR(100) NOT NULL,
|
|
56
|
+
name TEXT NOT NULL,
|
|
57
|
+
description TEXT,
|
|
58
|
+
content TEXT,
|
|
59
|
+
graph_edges JSONB DEFAULT '[]',
|
|
60
|
+
metadata JSONB DEFAULT '{}',
|
|
61
|
+
created_at TIMESTAMP DEFAULT CURRENT_TIMESTAMP,
|
|
62
|
+
updated_at TIMESTAMP DEFAULT CURRENT_TIMESTAMP,
|
|
63
|
+
deleted_at TIMESTAMP
|
|
64
|
+
);
|
|
65
|
+
```
|
|
66
|
+
|
|
67
|
+
### 2. Embeddings Pattern
|
|
68
|
+
|
|
69
|
+
**Multiple embeddings per record** with provider flexibility:
|
|
70
|
+
|
|
71
|
+
```sql
|
|
72
|
+
CREATE TABLE embeddings_resources (
|
|
73
|
+
id UUID PRIMARY KEY,
|
|
74
|
+
entity_id UUID REFERENCES resources(id) ON DELETE CASCADE,
|
|
75
|
+
field_name VARCHAR(100) NOT NULL, -- 'description', 'content', etc.
|
|
76
|
+
provider VARCHAR(50) NOT NULL, -- 'openai', 'cohere', etc.
|
|
77
|
+
model VARCHAR(100) NOT NULL, -- 'text-embedding-3-small'
|
|
78
|
+
embedding vector(1536) NOT NULL,
|
|
79
|
+
created_at TIMESTAMP DEFAULT CURRENT_TIMESTAMP,
|
|
80
|
+
updated_at TIMESTAMP DEFAULT CURRENT_TIMESTAMP,
|
|
81
|
+
UNIQUE (entity_id, field_name, provider)
|
|
82
|
+
);
|
|
83
|
+
```
|
|
84
|
+
|
|
85
|
+
**Key Features**:
|
|
86
|
+
- One row per (entity, field, provider)
|
|
87
|
+
- Unique constraint prevents duplicates
|
|
88
|
+
- Supports multiple embedding providers
|
|
89
|
+
- CASCADE delete when entity deleted
|
|
90
|
+
|
|
91
|
+
**Embedding Configuration**:
|
|
92
|
+
```python
|
|
93
|
+
from pydantic import BaseModel, Field
|
|
94
|
+
|
|
95
|
+
class Resource(BaseModel):
|
|
96
|
+
name: str
|
|
97
|
+
description: str = Field(
|
|
98
|
+
...,
|
|
99
|
+
json_schema_extra={"embed": True} # Explicit embedding
|
|
100
|
+
)
|
|
101
|
+
content: str # Auto-embeds (default for content fields)
|
|
102
|
+
notes: str = Field(
|
|
103
|
+
...,
|
|
104
|
+
json_schema_extra={"embed": False} # Disable embedding
|
|
105
|
+
)
|
|
106
|
+
```
|
|
107
|
+
|
|
108
|
+
**Default Embedding Fields** (if not explicitly disabled):
|
|
109
|
+
- `content`
|
|
110
|
+
- `description`
|
|
111
|
+
- `summary`
|
|
112
|
+
- `text`
|
|
113
|
+
- `body`
|
|
114
|
+
- `message`
|
|
115
|
+
- `notes`
|
|
116
|
+
|
|
117
|
+
### 3. KV_STORE Cache Pattern
|
|
118
|
+
|
|
119
|
+
**UNLOGGED table** for fast entity lookups:
|
|
120
|
+
|
|
121
|
+
```sql
|
|
122
|
+
CREATE UNLOGGED TABLE kv_store (
|
|
123
|
+
entity_key VARCHAR(255) NOT NULL, -- Natural language key
|
|
124
|
+
entity_type VARCHAR(100) NOT NULL, -- Table name
|
|
125
|
+
entity_id UUID NOT NULL, -- Foreign key to entity
|
|
126
|
+
tenant_id VARCHAR(100), -- Optional: for future multi-tenant SaaS use
|
|
127
|
+
user_id VARCHAR(100) NOT NULL, -- Primary isolation scope
|
|
128
|
+
content_summary TEXT, -- For fuzzy search
|
|
129
|
+
metadata JSONB DEFAULT '{}',
|
|
130
|
+
created_at TIMESTAMP DEFAULT CURRENT_TIMESTAMP,
|
|
131
|
+
updated_at TIMESTAMP DEFAULT CURRENT_TIMESTAMP,
|
|
132
|
+
PRIMARY KEY (user_id, entity_key)
|
|
133
|
+
);
|
|
134
|
+
```
|
|
135
|
+
|
|
136
|
+
**Key Features**:
|
|
137
|
+
- UNLOGGED = faster writes, no WAL overhead
|
|
138
|
+
- Rebuilt automatically from primary tables on restart
|
|
139
|
+
- O(1) lookups by entity_key
|
|
140
|
+
- User-scoped filtering when `user_id IS NOT NULL`
|
|
141
|
+
- Fuzzy search via pg_trgm indexes
|
|
142
|
+
|
|
143
|
+
**Trigger-based Updates**:
|
|
144
|
+
```sql
|
|
145
|
+
CREATE TRIGGER trg_resources_kv_store
|
|
146
|
+
AFTER INSERT OR UPDATE OR DELETE ON resources
|
|
147
|
+
FOR EACH ROW EXECUTE FUNCTION fn_resources_kv_store_upsert();
|
|
148
|
+
```
|
|
149
|
+
|
|
150
|
+
Automatically maintains KV_STORE on entity changes.
|
|
151
|
+
|
|
152
|
+
### 4. Batch Upsert Pattern
|
|
153
|
+
|
|
154
|
+
**Efficient bulk operations** with automatic embedding generation:
|
|
155
|
+
|
|
156
|
+
```python
|
|
157
|
+
from rem.services import PostgresService
|
|
158
|
+
|
|
159
|
+
service = PostgresService(connection_string)
|
|
160
|
+
|
|
161
|
+
# Batch upsert entities
|
|
162
|
+
entities = [
|
|
163
|
+
Resource(name="doc-1", description="First document", content="..."),
|
|
164
|
+
Resource(name="doc-2", description="Second document", content="..."),
|
|
165
|
+
]
|
|
166
|
+
|
|
167
|
+
result = await service.batch_upsert(
|
|
168
|
+
table_name="resources",
|
|
169
|
+
entities=entities,
|
|
170
|
+
entity_key_field="name",
|
|
171
|
+
generate_embeddings=True, # Auto-generate embeddings
|
|
172
|
+
embedding_provider="openai",
|
|
173
|
+
embedding_model="text-embedding-3-small"
|
|
174
|
+
)
|
|
175
|
+
|
|
176
|
+
# Result:
|
|
177
|
+
# {
|
|
178
|
+
# "inserted": 2,
|
|
179
|
+
# "updated": 0,
|
|
180
|
+
# "embeddings_generated": 4, # 2 entities × 2 fields (description, content)
|
|
181
|
+
# "kv_entries": 2
|
|
182
|
+
# }
|
|
183
|
+
```
|
|
184
|
+
|
|
185
|
+
**Pipeline Steps**:
|
|
186
|
+
1. **Validate** entities against Pydantic model
|
|
187
|
+
2. **Generate embeddings** in batch (OpenAI API supports up to 2048 texts)
|
|
188
|
+
3. **Upsert entities** to primary table (ON CONFLICT DO UPDATE)
|
|
189
|
+
4. **Upsert embeddings** to `embeddings_<table>`
|
|
190
|
+
5. **Update KV_STORE** (automatic via trigger)
|
|
191
|
+
6. **Queue background indexes** if needed
|
|
192
|
+
|
|
193
|
+
### 5. Embedding Generation Pattern
|
|
194
|
+
|
|
195
|
+
**Batch OpenAI API calls** for performance:
|
|
196
|
+
|
|
197
|
+
```python
|
|
198
|
+
# Collect all texts to embed
|
|
199
|
+
texts_to_embed = []
|
|
200
|
+
for entity in entities:
|
|
201
|
+
for field_name in embeddable_fields:
|
|
202
|
+
text = getattr(entity, field_name)
|
|
203
|
+
if text:
|
|
204
|
+
texts_to_embed.append({
|
|
205
|
+
"text": text,
|
|
206
|
+
"entity_id": entity.id,
|
|
207
|
+
"field_name": field_name
|
|
208
|
+
})
|
|
209
|
+
|
|
210
|
+
# Batch generate embeddings (up to 2048 texts per call)
|
|
211
|
+
embeddings = await generate_embeddings_batch(
|
|
212
|
+
texts=[item["text"] for item in texts_to_embed],
|
|
213
|
+
provider="openai",
|
|
214
|
+
model="text-embedding-3-small"
|
|
215
|
+
)
|
|
216
|
+
|
|
217
|
+
# Map embeddings back to entities and fields
|
|
218
|
+
for item, embedding in zip(texts_to_embed, embeddings):
|
|
219
|
+
await upsert_embedding(
|
|
220
|
+
entity_id=item["entity_id"],
|
|
221
|
+
field_name=item["field_name"],
|
|
222
|
+
provider="openai",
|
|
223
|
+
model="text-embedding-3-small",
|
|
224
|
+
embedding=embedding
|
|
225
|
+
)
|
|
226
|
+
```
|
|
227
|
+
|
|
228
|
+
**Supported Providers**:
|
|
229
|
+
- `openai` - text-embedding-3-small, text-embedding-3-large
|
|
230
|
+
- `cohere` - embed-english-v3.0, embed-multilingual-v3.0
|
|
231
|
+
- Custom providers via plugin system
|
|
232
|
+
|
|
233
|
+
### 6. Background Index Creation Pattern
|
|
234
|
+
|
|
235
|
+
**Non-blocking index creation** after data load:
|
|
236
|
+
|
|
237
|
+
```python
|
|
238
|
+
# Index creation thread
|
|
239
|
+
class BackgroundIndexer:
|
|
240
|
+
def __init__(self, postgres_service):
|
|
241
|
+
self.service = postgres_service
|
|
242
|
+
self.queue = asyncio.Queue()
|
|
243
|
+
self.running = False
|
|
244
|
+
|
|
245
|
+
async def queue_index(self, table_name: str, index_type: str):
|
|
246
|
+
"""Queue an index for background creation."""
|
|
247
|
+
await self.queue.put({
|
|
248
|
+
"table_name": table_name,
|
|
249
|
+
"index_type": index_type,
|
|
250
|
+
"attempts": 0
|
|
251
|
+
})
|
|
252
|
+
|
|
253
|
+
async def run(self):
|
|
254
|
+
"""Background thread that creates indexes CONCURRENTLY."""
|
|
255
|
+
self.running = True
|
|
256
|
+
while self.running:
|
|
257
|
+
try:
|
|
258
|
+
item = await asyncio.wait_for(self.queue.get(), timeout=5.0)
|
|
259
|
+
|
|
260
|
+
# Create index CONCURRENTLY (non-blocking)
|
|
261
|
+
await self.service.create_index_concurrently(
|
|
262
|
+
table_name=item["table_name"],
|
|
263
|
+
index_type=item["index_type"]
|
|
264
|
+
)
|
|
265
|
+
|
|
266
|
+
logger.info(f"Created index for {item['table_name']}")
|
|
267
|
+
|
|
268
|
+
except asyncio.TimeoutError:
|
|
269
|
+
continue
|
|
270
|
+
except Exception as e:
|
|
271
|
+
# Retry with backoff
|
|
272
|
+
if item["attempts"] < 3:
|
|
273
|
+
item["attempts"] += 1
|
|
274
|
+
await asyncio.sleep(2 ** item["attempts"])
|
|
275
|
+
await self.queue.put(item)
|
|
276
|
+
else:
|
|
277
|
+
logger.error(f"Failed to create index after 3 attempts: {e}")
|
|
278
|
+
```
|
|
279
|
+
|
|
280
|
+
**Index Types**:
|
|
281
|
+
- **HNSW** for vector similarity (embeddings)
|
|
282
|
+
- **GIN** for JSONB (graph_edges, metadata)
|
|
283
|
+
- **GIN with pg_trgm** for fuzzy text search
|
|
284
|
+
- **B-tree** for foreign keys and common filters
|
|
285
|
+
|
|
286
|
+
### 7. REM Query Integration
|
|
287
|
+
|
|
288
|
+
**LOOKUP Queries** use KV_STORE for O(1) access:
|
|
289
|
+
|
|
290
|
+
```python
|
|
291
|
+
# REM LOOKUP query
|
|
292
|
+
result = await service.lookup_entity(
|
|
293
|
+
entity_key="sarah-chen",
|
|
294
|
+
user_id="user123"
|
|
295
|
+
)
|
|
296
|
+
|
|
297
|
+
# SQL:
|
|
298
|
+
# SELECT entity_id, entity_type, metadata
|
|
299
|
+
# FROM kv_store
|
|
300
|
+
# WHERE user_id = $1 AND entity_key = $2;
|
|
301
|
+
```
|
|
302
|
+
|
|
303
|
+
**FUZZY Queries** use pg_trgm indexes:
|
|
304
|
+
|
|
305
|
+
```python
|
|
306
|
+
# REM FUZZY query
|
|
307
|
+
results = await service.fuzzy_search(
|
|
308
|
+
query="sara",
|
|
309
|
+
user_id="user123",
|
|
310
|
+
threshold=0.3,
|
|
311
|
+
limit=10
|
|
312
|
+
)
|
|
313
|
+
|
|
314
|
+
# SQL:
|
|
315
|
+
# SELECT entity_key, entity_type, similarity(entity_key, $1) AS score
|
|
316
|
+
# FROM kv_store
|
|
317
|
+
# WHERE user_id = $2 AND entity_key % $1
|
|
318
|
+
# ORDER BY score DESC
|
|
319
|
+
# LIMIT $3;
|
|
320
|
+
```
|
|
321
|
+
|
|
322
|
+
**SEARCH Queries** use vector similarity:
|
|
323
|
+
|
|
324
|
+
```python
|
|
325
|
+
# REM SEARCH query
|
|
326
|
+
results = await service.vector_search(
|
|
327
|
+
table_name="resources",
|
|
328
|
+
query_text="machine learning documentation",
|
|
329
|
+
field_name="content",
|
|
330
|
+
user_id="user123",
|
|
331
|
+
limit=10,
|
|
332
|
+
min_similarity=0.7
|
|
333
|
+
)
|
|
334
|
+
|
|
335
|
+
# SQL:
|
|
336
|
+
# SELECT r.*, 1 - (e.embedding <=> $1) AS similarity
|
|
337
|
+
# FROM resources r
|
|
338
|
+
# JOIN embeddings_resources e ON e.entity_id = r.id
|
|
339
|
+
# WHERE r.user_id = $2
|
|
340
|
+
# AND e.field_name = 'content'
|
|
341
|
+
# AND e.provider = 'openai'
|
|
342
|
+
# AND 1 - (e.embedding <=> $1) >= $3
|
|
343
|
+
# ORDER BY e.embedding <=> $1
|
|
344
|
+
# LIMIT $4;
|
|
345
|
+
```
|
|
346
|
+
|
|
347
|
+
## Usage Examples
|
|
348
|
+
|
|
349
|
+
### Initialize Service
|
|
350
|
+
|
|
351
|
+
There are two ways to initialize the PostgresService:
|
|
352
|
+
|
|
353
|
+
**Option 1: Factory function (recommended for apps using remdb as a library)**
|
|
354
|
+
|
|
355
|
+
```python
|
|
356
|
+
from rem.services.postgres import get_postgres_service
|
|
357
|
+
|
|
358
|
+
# Uses POSTGRES__CONNECTION_STRING from environment
|
|
359
|
+
pg = get_postgres_service()
|
|
360
|
+
if pg is None:
|
|
361
|
+
raise RuntimeError("Database not configured - set POSTGRES__CONNECTION_STRING")
|
|
362
|
+
|
|
363
|
+
await pg.connect()
|
|
364
|
+
# ... use pg ...
|
|
365
|
+
await pg.disconnect()
|
|
366
|
+
```
|
|
367
|
+
|
|
368
|
+
**Option 2: Direct instantiation**
|
|
369
|
+
|
|
370
|
+
```python
|
|
371
|
+
from rem.services.postgres import PostgresService
|
|
372
|
+
|
|
373
|
+
service = PostgresService(
|
|
374
|
+
connection_string="postgresql://user:pass@localhost/remdb",
|
|
375
|
+
pool_size=20
|
|
376
|
+
)
|
|
377
|
+
|
|
378
|
+
await service.connect()
|
|
379
|
+
```
|
|
380
|
+
|
|
381
|
+
> **Note**: `get_postgres_service()` returns the service directly. It does NOT support
|
|
382
|
+
> `async with` context manager syntax. Always call `connect()` and `disconnect()` explicitly.
|
|
383
|
+
|
|
384
|
+
### Using Repository Pattern
|
|
385
|
+
|
|
386
|
+
**Generic Repository** for simple CRUD operations:
|
|
387
|
+
|
|
388
|
+
```python
|
|
389
|
+
from rem.services.postgres import Repository
|
|
390
|
+
from rem.models.entities import Message, Resource
|
|
391
|
+
|
|
392
|
+
# Create repository for any model
|
|
393
|
+
message_repo = Repository(Message)
|
|
394
|
+
resource_repo = Repository(Resource)
|
|
395
|
+
|
|
396
|
+
# Create single record
|
|
397
|
+
message = Message(
|
|
398
|
+
content="Hello, world!",
|
|
399
|
+
message_type="user",
|
|
400
|
+
session_id="session-123",
|
|
401
|
+
user_id="user123"
|
|
402
|
+
)
|
|
403
|
+
created = await message_repo.upsert(message)
|
|
404
|
+
|
|
405
|
+
# Upsert also accepts lists (no need for separate batch method)
|
|
406
|
+
messages = [message1, message2, message3]
|
|
407
|
+
created_messages = await message_repo.upsert(messages)
|
|
408
|
+
|
|
409
|
+
# Find records
|
|
410
|
+
messages = await message_repo.find({
|
|
411
|
+
"session_id": "session-123",
|
|
412
|
+
"user_id": "user123"
|
|
413
|
+
}, order_by="created_at ASC", limit=100)
|
|
414
|
+
|
|
415
|
+
# Get by ID
|
|
416
|
+
message = await message_repo.get_by_id("msg-id", "user123")
|
|
417
|
+
|
|
418
|
+
# Get by session (convenience method)
|
|
419
|
+
session_messages = await message_repo.get_by_session(
|
|
420
|
+
session_id="session-123",
|
|
421
|
+
user_id="user123"
|
|
422
|
+
)
|
|
423
|
+
|
|
424
|
+
# Count
|
|
425
|
+
count = await message_repo.count({"session_id": "session-123"})
|
|
426
|
+
|
|
427
|
+
# Delete (soft delete)
|
|
428
|
+
deleted = await message_repo.delete("msg-id", "user123")
|
|
429
|
+
```
|
|
430
|
+
|
|
431
|
+
**When to use Repository vs PostgresService:**
|
|
432
|
+
- **Repository**: Simple CRUD, session management, high-level operations
|
|
433
|
+
- **PostgresService**: Batch operations with embeddings, custom queries, performance-critical code
|
|
434
|
+
|
|
435
|
+
### Register Entity Types
|
|
436
|
+
|
|
437
|
+
```python
|
|
438
|
+
from rem.services.postgres.register_type import register_type
|
|
439
|
+
from rem.models.entities import Resource
|
|
440
|
+
|
|
441
|
+
# Register Resource model
|
|
442
|
+
schema = await register_type(
|
|
443
|
+
model=Resource,
|
|
444
|
+
table_name="resources",
|
|
445
|
+
entity_key_field="name",
|
|
446
|
+
tenant_scoped=True,
|
|
447
|
+
create_embeddings=True,
|
|
448
|
+
create_kv_trigger=True
|
|
449
|
+
)
|
|
450
|
+
|
|
451
|
+
# Execute generated SQL
|
|
452
|
+
await service.execute(schema["sql"]["table"])
|
|
453
|
+
await service.execute(schema["sql"]["embeddings"])
|
|
454
|
+
await service.execute(schema["sql"]["kv_trigger"])
|
|
455
|
+
```
|
|
456
|
+
|
|
457
|
+
### Batch Upsert with Embeddings
|
|
458
|
+
|
|
459
|
+
```python
|
|
460
|
+
# Create entities
|
|
461
|
+
resources = [
|
|
462
|
+
Resource(
|
|
463
|
+
name="api-design-doc",
|
|
464
|
+
description="API design guidelines",
|
|
465
|
+
content="RESTful API best practices..."
|
|
466
|
+
),
|
|
467
|
+
Resource(
|
|
468
|
+
name="deployment-guide",
|
|
469
|
+
description="Kubernetes deployment guide",
|
|
470
|
+
content="Deploy to EKS with Karpenter..."
|
|
471
|
+
)
|
|
472
|
+
]
|
|
473
|
+
|
|
474
|
+
# Batch upsert
|
|
475
|
+
result = await service.batch_upsert(
|
|
476
|
+
table_name="resources",
|
|
477
|
+
entities=resources,
|
|
478
|
+
user_id="user123",
|
|
479
|
+
generate_embeddings=True
|
|
480
|
+
)
|
|
481
|
+
|
|
482
|
+
print(f"Inserted: {result['inserted']}")
|
|
483
|
+
print(f"Embeddings: {result['embeddings_generated']}")
|
|
484
|
+
```
|
|
485
|
+
|
|
486
|
+
### Query Operations
|
|
487
|
+
|
|
488
|
+
```python
|
|
489
|
+
# LOOKUP by natural key
|
|
490
|
+
entity = await service.lookup_entity(
|
|
491
|
+
entity_key="api-design-doc",
|
|
492
|
+
user_id="user123"
|
|
493
|
+
)
|
|
494
|
+
|
|
495
|
+
# FUZZY search
|
|
496
|
+
results = await service.fuzzy_search(
|
|
497
|
+
query="api design",
|
|
498
|
+
user_id="user123",
|
|
499
|
+
threshold=0.3,
|
|
500
|
+
limit=5
|
|
501
|
+
)
|
|
502
|
+
|
|
503
|
+
# SEARCH by semantic similarity
|
|
504
|
+
results = await service.vector_search(
|
|
505
|
+
table_name="resources",
|
|
506
|
+
query_text="how to deploy kubernetes",
|
|
507
|
+
field_name="content",
|
|
508
|
+
user_id="user123",
|
|
509
|
+
limit=10
|
|
510
|
+
)
|
|
511
|
+
```
|
|
512
|
+
|
|
513
|
+
## Performance Considerations
|
|
514
|
+
|
|
515
|
+
### Batch Size Optimization
|
|
516
|
+
|
|
517
|
+
- **Embeddings**: OpenAI supports up to 2048 texts per call
|
|
518
|
+
- **Inserts**: Batch 100-500 rows per transaction
|
|
519
|
+
- **Connection pool**: Size based on workload (default: 20)
|
|
520
|
+
|
|
521
|
+
### Index Strategy
|
|
522
|
+
|
|
523
|
+
- **Foreground indexes**: Critical for queries (tenant_id, user_id)
|
|
524
|
+
- **Background indexes**: HNSW for vectors, created CONCURRENTLY
|
|
525
|
+
- **GIN indexes**: For JSONB fields (graph_edges, metadata)
|
|
526
|
+
|
|
527
|
+
### KV_STORE Maintenance
|
|
528
|
+
|
|
529
|
+
- UNLOGGED table = faster but lost on crash
|
|
530
|
+
- Rebuild from primary tables on startup
|
|
531
|
+
- Vacuum regularly to reclaim space
|
|
532
|
+
|
|
533
|
+
### Memory Usage
|
|
534
|
+
|
|
535
|
+
- Vector indexes can be memory-intensive
|
|
536
|
+
- HNSW parameters: `m=16, ef_construction=64` (tunable)
|
|
537
|
+
- Monitor shared_buffers and work_mem
|
|
538
|
+
|
|
539
|
+
## Schema Management
|
|
540
|
+
|
|
541
|
+
REM uses a **code-as-source-of-truth** approach. Pydantic models define the schema, and the database is kept in sync via diff-based migrations.
|
|
542
|
+
|
|
543
|
+
### File Structure
|
|
544
|
+
|
|
545
|
+
```
|
|
546
|
+
src/rem/sql/
|
|
547
|
+
├── migrations/
|
|
548
|
+
│ ├── 001_install.sql # Core infrastructure (manual)
|
|
549
|
+
│ └── 002_install_models.sql # Entity tables (auto-generated)
|
|
550
|
+
└── background_indexes.sql # HNSW vector indexes (optional)
|
|
551
|
+
```
|
|
552
|
+
|
|
553
|
+
**Key principle**: Only two migration files. No incremental `003_`, `004_` files.
|
|
554
|
+
|
|
555
|
+
### CLI Commands
|
|
556
|
+
|
|
557
|
+
```bash
|
|
558
|
+
# Apply migrations (installs extensions, core tables, entity tables)
|
|
559
|
+
rem db migrate
|
|
560
|
+
|
|
561
|
+
# Check migration status
|
|
562
|
+
rem db status
|
|
563
|
+
|
|
564
|
+
# Generate schema SQL from models (for remdb development)
|
|
565
|
+
rem db schema generate --models src/rem/models/entities
|
|
566
|
+
|
|
567
|
+
# Validate models for schema generation
|
|
568
|
+
rem db schema validate --models src/rem/models/entities
|
|
569
|
+
```
|
|
570
|
+
|
|
571
|
+
### Model Registry
|
|
572
|
+
|
|
573
|
+
Models are discovered via the registry:
|
|
574
|
+
|
|
575
|
+
```python
|
|
576
|
+
import rem
|
|
577
|
+
from rem.models.core import CoreModel
|
|
578
|
+
|
|
579
|
+
@rem.register_model
|
|
580
|
+
class MyEntity(CoreModel):
|
|
581
|
+
name: str
|
|
582
|
+
description: str # Auto-embeds
|
|
583
|
+
```
|
|
584
|
+
|
|
585
|
+
## Using REM as a Library (Downstream Apps)
|
|
586
|
+
|
|
587
|
+
When building an application that **depends on remdb as a package** (e.g., `pip install remdb`),
|
|
588
|
+
there are important differences from developing remdb itself.
|
|
589
|
+
|
|
590
|
+
### What Works Out of the Box
|
|
591
|
+
|
|
592
|
+
1. **All core entity tables** - Resources, Messages, Users, Sessions, etc.
|
|
593
|
+
2. **PostgresService** - Full database access via `get_postgres_service()`
|
|
594
|
+
3. **Repository pattern** - CRUD operations for core entities
|
|
595
|
+
4. **Migrations** - `rem db migrate` applies the bundled SQL files
|
|
596
|
+
|
|
597
|
+
```python
|
|
598
|
+
# In your downstream app (e.g., myapp/main.py)
|
|
599
|
+
from rem.services.postgres import get_postgres_service
|
|
600
|
+
from rem.models.entities import Message, Resource
|
|
601
|
+
|
|
602
|
+
pg = get_postgres_service()
|
|
603
|
+
await pg.connect()
|
|
604
|
+
|
|
605
|
+
# Use core entities - tables already exist
|
|
606
|
+
messages = await pg.query(Message, {"session_id": "abc"})
|
|
607
|
+
```
|
|
608
|
+
|
|
609
|
+
### Custom Models in Downstream Apps
|
|
610
|
+
|
|
611
|
+
The `@rem.register_model` decorator registers models in the **runtime registry**, which is useful for:
|
|
612
|
+
- Schema introspection at runtime
|
|
613
|
+
- Future tooling that reads the registry
|
|
614
|
+
|
|
615
|
+
However, **`rem db migrate` only applies SQL files bundled in the remdb package**.
|
|
616
|
+
Custom models from downstream apps do NOT automatically get tables created.
|
|
617
|
+
|
|
618
|
+
**Options for custom model tables:**
|
|
619
|
+
|
|
620
|
+
**Option A: Use core entities with metadata**
|
|
621
|
+
|
|
622
|
+
Store custom data in the `metadata` JSONB field of existing entities:
|
|
623
|
+
|
|
624
|
+
```python
|
|
625
|
+
resource = Resource(
|
|
626
|
+
name="my-custom-thing",
|
|
627
|
+
content="...",
|
|
628
|
+
metadata={"custom_field": "value", "another": 123}
|
|
629
|
+
)
|
|
630
|
+
```
|
|
631
|
+
|
|
632
|
+
**Option B: Create tables manually**
|
|
633
|
+
|
|
634
|
+
Write and apply your own SQL:
|
|
635
|
+
|
|
636
|
+
```sql
|
|
637
|
+
-- myapp/sql/custom_tables.sql
|
|
638
|
+
CREATE TABLE IF NOT EXISTS conversation_summaries (
|
|
639
|
+
id UUID PRIMARY KEY DEFAULT gen_random_uuid(),
|
|
640
|
+
session_ref TEXT NOT NULL,
|
|
641
|
+
summary TEXT NOT NULL,
|
|
642
|
+
-- ... include CoreModel fields for compatibility
|
|
643
|
+
user_id VARCHAR(256),
|
|
644
|
+
created_at TIMESTAMP DEFAULT CURRENT_TIMESTAMP,
|
|
645
|
+
updated_at TIMESTAMP DEFAULT CURRENT_TIMESTAMP
|
|
646
|
+
);
|
|
647
|
+
```
|
|
648
|
+
|
|
649
|
+
```bash
|
|
650
|
+
psql $DATABASE_URL -f myapp/sql/custom_tables.sql
|
|
651
|
+
```
|
|
652
|
+
|
|
653
|
+
**Option C: Contribute upstream**
|
|
654
|
+
|
|
655
|
+
If your model is generally useful, contribute it to remdb so it's included in
|
|
656
|
+
the next release and `rem db migrate` creates it automatically.
|
|
657
|
+
|
|
658
|
+
### Example: Downstream App Structure
|
|
659
|
+
|
|
660
|
+
```
|
|
661
|
+
myapp/
|
|
662
|
+
├── main.py # Import models, start API
|
|
663
|
+
├── models/
|
|
664
|
+
│ └── __init__.py # @rem.register_model decorators
|
|
665
|
+
├── sql/
|
|
666
|
+
│ └── custom.sql # Manual migrations for custom tables
|
|
667
|
+
├── .env # POSTGRES__CONNECTION_STRING, LLM keys
|
|
668
|
+
└── pyproject.toml # dependencies = ["remdb>=0.3.110"]
|
|
669
|
+
```
|
|
670
|
+
|
|
671
|
+
```python
|
|
672
|
+
# myapp/models/__init__.py
|
|
673
|
+
import rem
|
|
674
|
+
from rem.models.core import CoreModel
|
|
675
|
+
|
|
676
|
+
@rem.register_model
|
|
677
|
+
class ConversationSummary(CoreModel):
|
|
678
|
+
"""Registered for introspection, but table created via sql/custom.sql"""
|
|
679
|
+
session_ref: str
|
|
680
|
+
summary: str
|
|
681
|
+
```
|
|
682
|
+
|
|
683
|
+
```python
|
|
684
|
+
# myapp/main.py
|
|
685
|
+
import models # Registers custom models
|
|
686
|
+
|
|
687
|
+
from rem.api.main import app # Use REM's FastAPI app
|
|
688
|
+
# Or build your own app using rem.services
|
|
689
|
+
```
|
|
690
|
+
|
|
691
|
+
## Adding Models & Migrations
|
|
692
|
+
|
|
693
|
+
Quick workflow for adding new database models:
|
|
694
|
+
|
|
695
|
+
1. **Create a model** in `models/__init__.py` (or a submodule):
|
|
696
|
+
```python
|
|
697
|
+
import rem
|
|
698
|
+
from rem.models.core import CoreModel
|
|
699
|
+
|
|
700
|
+
@rem.register_model
|
|
701
|
+
class MyEntity(CoreModel):
|
|
702
|
+
name: str
|
|
703
|
+
description: str # Auto-embedded (common field name)
|
|
704
|
+
```
|
|
705
|
+
|
|
706
|
+
2. **Check for schema drift** - REM auto-detects `./models` directory:
|
|
707
|
+
```bash
|
|
708
|
+
rem db diff # Show pending changes (additive only)
|
|
709
|
+
rem db diff --strategy full # Include destructive changes
|
|
710
|
+
```
|
|
711
|
+
|
|
712
|
+
3. **Generate migration** (optional - for version-controlled SQL):
|
|
713
|
+
```bash
|
|
714
|
+
rem db diff --generate # Creates numbered .sql file
|
|
715
|
+
```
|
|
716
|
+
|
|
717
|
+
4. **Apply changes**:
|
|
718
|
+
```bash
|
|
719
|
+
rem db migrate # Apply all pending migrations
|
|
720
|
+
```
|
|
721
|
+
|
|
722
|
+
**Key points:**
|
|
723
|
+
- Models in `./models/` are auto-discovered (must have `__init__.py`)
|
|
724
|
+
- Or set `MODELS__IMPORT_MODULES=myapp.models` for custom paths
|
|
725
|
+
- `CoreModel` provides: `id`, `tenant_id`, `user_id`, `created_at`, `updated_at`, `deleted_at`, `graph_edges`, `metadata`, `tags`
|
|
726
|
+
- Fields named `content`, `description`, `summary`, `text`, `body`, `message`, `notes` get embeddings by default
|
|
727
|
+
- Use `Field(json_schema_extra={"embed": True})` to embed other fields
|
|
728
|
+
|
|
729
|
+
## Configuration
|
|
730
|
+
|
|
731
|
+
Environment variables:
|
|
732
|
+
|
|
733
|
+
```bash
|
|
734
|
+
# Database
|
|
735
|
+
POSTGRES__HOST=localhost
|
|
736
|
+
POSTGRES__PORT=5432
|
|
737
|
+
POSTGRES__DATABASE=remdb
|
|
738
|
+
POSTGRES__USER=rem_user
|
|
739
|
+
POSTGRES__PASSWORD=secret
|
|
740
|
+
POSTGRES__POOL_SIZE=20
|
|
741
|
+
|
|
742
|
+
# Embeddings
|
|
743
|
+
EMBEDDING__PROVIDER=openai
|
|
744
|
+
EMBEDDING__MODEL=text-embedding-3-small
|
|
745
|
+
EMBEDDING__DIMENSIONS=1536
|
|
746
|
+
EMBEDDING__BATCH_SIZE=2048
|
|
747
|
+
|
|
748
|
+
# Background indexing
|
|
749
|
+
BACKGROUND_INDEX__ENABLED=true
|
|
750
|
+
BACKGROUND_INDEX__CONCURRENCY=2
|
|
751
|
+
```
|
|
752
|
+
|
|
753
|
+
## See Also
|
|
754
|
+
|
|
755
|
+
- [register_type.py](./register_type.py) - Dynamic schema generation
|
|
756
|
+
- [schema_generator.py](./schema_generator.py) - Bulk schema generation
|
|
757
|
+
- [REM Query System](../../models/core/rem_query.py) - Query types and contracts
|