remdb 0.2.6__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of remdb might be problematic. Click here for more details.
- rem/__init__.py +2 -0
- rem/agentic/README.md +650 -0
- rem/agentic/__init__.py +39 -0
- rem/agentic/agents/README.md +155 -0
- rem/agentic/agents/__init__.py +8 -0
- rem/agentic/context.py +148 -0
- rem/agentic/context_builder.py +329 -0
- rem/agentic/mcp/__init__.py +0 -0
- rem/agentic/mcp/tool_wrapper.py +107 -0
- rem/agentic/otel/__init__.py +5 -0
- rem/agentic/otel/setup.py +151 -0
- rem/agentic/providers/phoenix.py +674 -0
- rem/agentic/providers/pydantic_ai.py +572 -0
- rem/agentic/query.py +117 -0
- rem/agentic/query_helper.py +89 -0
- rem/agentic/schema.py +396 -0
- rem/agentic/serialization.py +245 -0
- rem/agentic/tools/__init__.py +5 -0
- rem/agentic/tools/rem_tools.py +231 -0
- rem/api/README.md +420 -0
- rem/api/main.py +324 -0
- rem/api/mcp_router/prompts.py +182 -0
- rem/api/mcp_router/resources.py +536 -0
- rem/api/mcp_router/server.py +213 -0
- rem/api/mcp_router/tools.py +584 -0
- rem/api/routers/auth.py +229 -0
- rem/api/routers/chat/__init__.py +5 -0
- rem/api/routers/chat/completions.py +281 -0
- rem/api/routers/chat/json_utils.py +76 -0
- rem/api/routers/chat/models.py +124 -0
- rem/api/routers/chat/streaming.py +185 -0
- rem/auth/README.md +258 -0
- rem/auth/__init__.py +26 -0
- rem/auth/middleware.py +100 -0
- rem/auth/providers/__init__.py +13 -0
- rem/auth/providers/base.py +376 -0
- rem/auth/providers/google.py +163 -0
- rem/auth/providers/microsoft.py +237 -0
- rem/cli/README.md +455 -0
- rem/cli/__init__.py +8 -0
- rem/cli/commands/README.md +126 -0
- rem/cli/commands/__init__.py +3 -0
- rem/cli/commands/ask.py +565 -0
- rem/cli/commands/configure.py +423 -0
- rem/cli/commands/db.py +493 -0
- rem/cli/commands/dreaming.py +324 -0
- rem/cli/commands/experiments.py +1124 -0
- rem/cli/commands/mcp.py +66 -0
- rem/cli/commands/process.py +245 -0
- rem/cli/commands/schema.py +183 -0
- rem/cli/commands/serve.py +106 -0
- rem/cli/dreaming.py +363 -0
- rem/cli/main.py +88 -0
- rem/config.py +237 -0
- rem/mcp_server.py +41 -0
- rem/models/core/__init__.py +49 -0
- rem/models/core/core_model.py +64 -0
- rem/models/core/engram.py +333 -0
- rem/models/core/experiment.py +628 -0
- rem/models/core/inline_edge.py +132 -0
- rem/models/core/rem_query.py +243 -0
- rem/models/entities/__init__.py +43 -0
- rem/models/entities/file.py +57 -0
- rem/models/entities/image_resource.py +88 -0
- rem/models/entities/message.py +35 -0
- rem/models/entities/moment.py +123 -0
- rem/models/entities/ontology.py +191 -0
- rem/models/entities/ontology_config.py +131 -0
- rem/models/entities/resource.py +95 -0
- rem/models/entities/schema.py +87 -0
- rem/models/entities/user.py +85 -0
- rem/py.typed +0 -0
- rem/schemas/README.md +507 -0
- rem/schemas/__init__.py +6 -0
- rem/schemas/agents/README.md +92 -0
- rem/schemas/agents/core/moment-builder.yaml +178 -0
- rem/schemas/agents/core/rem-query-agent.yaml +226 -0
- rem/schemas/agents/core/resource-affinity-assessor.yaml +99 -0
- rem/schemas/agents/core/simple-assistant.yaml +19 -0
- rem/schemas/agents/core/user-profile-builder.yaml +163 -0
- rem/schemas/agents/examples/contract-analyzer.yaml +317 -0
- rem/schemas/agents/examples/contract-extractor.yaml +134 -0
- rem/schemas/agents/examples/cv-parser.yaml +263 -0
- rem/schemas/agents/examples/hello-world.yaml +37 -0
- rem/schemas/agents/examples/query.yaml +54 -0
- rem/schemas/agents/examples/simple.yaml +21 -0
- rem/schemas/agents/examples/test.yaml +29 -0
- rem/schemas/agents/rem.yaml +128 -0
- rem/schemas/evaluators/hello-world/default.yaml +77 -0
- rem/schemas/evaluators/rem/faithfulness.yaml +219 -0
- rem/schemas/evaluators/rem/lookup-correctness.yaml +182 -0
- rem/schemas/evaluators/rem/retrieval-precision.yaml +199 -0
- rem/schemas/evaluators/rem/retrieval-recall.yaml +211 -0
- rem/schemas/evaluators/rem/search-correctness.yaml +192 -0
- rem/services/__init__.py +16 -0
- rem/services/audio/INTEGRATION.md +308 -0
- rem/services/audio/README.md +376 -0
- rem/services/audio/__init__.py +15 -0
- rem/services/audio/chunker.py +354 -0
- rem/services/audio/transcriber.py +259 -0
- rem/services/content/README.md +1269 -0
- rem/services/content/__init__.py +5 -0
- rem/services/content/providers.py +806 -0
- rem/services/content/service.py +657 -0
- rem/services/dreaming/README.md +230 -0
- rem/services/dreaming/__init__.py +53 -0
- rem/services/dreaming/affinity_service.py +336 -0
- rem/services/dreaming/moment_service.py +264 -0
- rem/services/dreaming/ontology_service.py +54 -0
- rem/services/dreaming/user_model_service.py +297 -0
- rem/services/dreaming/utils.py +39 -0
- rem/services/embeddings/__init__.py +11 -0
- rem/services/embeddings/api.py +120 -0
- rem/services/embeddings/worker.py +421 -0
- rem/services/fs/README.md +662 -0
- rem/services/fs/__init__.py +62 -0
- rem/services/fs/examples.py +206 -0
- rem/services/fs/examples_paths.py +204 -0
- rem/services/fs/git_provider.py +935 -0
- rem/services/fs/local_provider.py +760 -0
- rem/services/fs/parsing-hooks-examples.md +172 -0
- rem/services/fs/paths.py +276 -0
- rem/services/fs/provider.py +460 -0
- rem/services/fs/s3_provider.py +1042 -0
- rem/services/fs/service.py +186 -0
- rem/services/git/README.md +1075 -0
- rem/services/git/__init__.py +17 -0
- rem/services/git/service.py +469 -0
- rem/services/phoenix/EXPERIMENT_DESIGN.md +1146 -0
- rem/services/phoenix/README.md +453 -0
- rem/services/phoenix/__init__.py +46 -0
- rem/services/phoenix/client.py +686 -0
- rem/services/phoenix/config.py +88 -0
- rem/services/phoenix/prompt_labels.py +477 -0
- rem/services/postgres/README.md +575 -0
- rem/services/postgres/__init__.py +23 -0
- rem/services/postgres/migration_service.py +427 -0
- rem/services/postgres/pydantic_to_sqlalchemy.py +232 -0
- rem/services/postgres/register_type.py +352 -0
- rem/services/postgres/repository.py +337 -0
- rem/services/postgres/schema_generator.py +379 -0
- rem/services/postgres/service.py +802 -0
- rem/services/postgres/sql_builder.py +354 -0
- rem/services/rem/README.md +304 -0
- rem/services/rem/__init__.py +23 -0
- rem/services/rem/exceptions.py +71 -0
- rem/services/rem/executor.py +293 -0
- rem/services/rem/parser.py +145 -0
- rem/services/rem/queries.py +196 -0
- rem/services/rem/query.py +371 -0
- rem/services/rem/service.py +527 -0
- rem/services/session/README.md +374 -0
- rem/services/session/__init__.py +6 -0
- rem/services/session/compression.py +360 -0
- rem/services/session/reload.py +77 -0
- rem/settings.py +1235 -0
- rem/sql/002_install_models.sql +1068 -0
- rem/sql/background_indexes.sql +42 -0
- rem/sql/install_models.sql +1038 -0
- rem/sql/migrations/001_install.sql +503 -0
- rem/sql/migrations/002_install_models.sql +1202 -0
- rem/utils/AGENTIC_CHUNKING.md +597 -0
- rem/utils/README.md +583 -0
- rem/utils/__init__.py +43 -0
- rem/utils/agentic_chunking.py +622 -0
- rem/utils/batch_ops.py +343 -0
- rem/utils/chunking.py +108 -0
- rem/utils/clip_embeddings.py +276 -0
- rem/utils/dict_utils.py +98 -0
- rem/utils/embeddings.py +423 -0
- rem/utils/examples/embeddings_example.py +305 -0
- rem/utils/examples/sql_types_example.py +202 -0
- rem/utils/markdown.py +16 -0
- rem/utils/model_helpers.py +236 -0
- rem/utils/schema_loader.py +229 -0
- rem/utils/sql_types.py +348 -0
- rem/utils/user_id.py +81 -0
- rem/utils/vision.py +330 -0
- rem/workers/README.md +506 -0
- rem/workers/__init__.py +5 -0
- rem/workers/dreaming.py +502 -0
- rem/workers/engram_processor.py +312 -0
- rem/workers/sqs_file_processor.py +193 -0
- remdb-0.2.6.dist-info/METADATA +1191 -0
- remdb-0.2.6.dist-info/RECORD +187 -0
- remdb-0.2.6.dist-info/WHEEL +4 -0
- remdb-0.2.6.dist-info/entry_points.txt +2 -0
|
@@ -0,0 +1,575 @@
|
|
|
1
|
+
### PostgresService - CloudNativePG Database Operations
|
|
2
|
+
|
|
3
|
+
Comprehensive service for PostgreSQL 18 with pgvector, including:
|
|
4
|
+
- Entity CRUD with automatic embeddings
|
|
5
|
+
- KV_STORE cache for O(1) lookups
|
|
6
|
+
- Fuzzy text search with pg_trgm
|
|
7
|
+
- Background index creation
|
|
8
|
+
- Batch operations with transaction management
|
|
9
|
+
|
|
10
|
+
## Architecture
|
|
11
|
+
|
|
12
|
+
```
|
|
13
|
+
┌─────────────────────────────────────────────────────────────┐
|
|
14
|
+
│ PostgresService │
|
|
15
|
+
├─────────────────────────────────────────────────────────────┤
|
|
16
|
+
│ │
|
|
17
|
+
│ ┌──────────────────────────────────────────────────────┐ │
|
|
18
|
+
│ │ Batch Upsert Pipeline │ │
|
|
19
|
+
│ │ 1. Validate entities │ │
|
|
20
|
+
│ │ 2. Generate embeddings (batch OpenAI API) │ │
|
|
21
|
+
│ │ 3. Upsert to primary tables │ │
|
|
22
|
+
│ │ 4. Upsert to embeddings_<table> │ │
|
|
23
|
+
│ │ 5. Upsert to KV_STORE (via trigger) │ │
|
|
24
|
+
│ └──────────────────────────────────────────────────────┘ │
|
|
25
|
+
│ │
|
|
26
|
+
│ ┌──────────────────────────────────────────────────────┐ │
|
|
27
|
+
│ │ Background Index Thread │ │
|
|
28
|
+
│ │ - Monitors pending indexes queue │ │
|
|
29
|
+
│ │ - Creates indexes CONCURRENTLY │ │
|
|
30
|
+
│ │ - Handles index creation failures │ │
|
|
31
|
+
│ └──────────────────────────────────────────────────────┘ │
|
|
32
|
+
│ │
|
|
33
|
+
└─────────────────────────────────────────────────────────────┘
|
|
34
|
+
|
|
35
|
+
┌─────────────────────────────────────────────────────────────┐
|
|
36
|
+
│ Database Schema │
|
|
37
|
+
├─────────────────────────────────────────────────────────────┤
|
|
38
|
+
│ │
|
|
39
|
+
│ Primary Tables: resources, moments, users, etc. │
|
|
40
|
+
│ Embeddings Tables: embeddings_resources, etc. │
|
|
41
|
+
│ KV_STORE Cache: UNLOGGED table for O(1) lookups │
|
|
42
|
+
│ │
|
|
43
|
+
└─────────────────────────────────────────────────────────────┘
|
|
44
|
+
```
|
|
45
|
+
|
|
46
|
+
## Core Design Patterns
|
|
47
|
+
|
|
48
|
+
### 1. Entity Storage Pattern
|
|
49
|
+
|
|
50
|
+
**Primary Tables** store entities with system fields:
|
|
51
|
+
```sql
|
|
52
|
+
CREATE TABLE resources (
|
|
53
|
+
id UUID PRIMARY KEY,
|
|
54
|
+
tenant_id VARCHAR(100), -- Optional: for future multi-tenant SaaS use
|
|
55
|
+
user_id VARCHAR(100) NOT NULL,
|
|
56
|
+
name TEXT NOT NULL,
|
|
57
|
+
description TEXT,
|
|
58
|
+
content TEXT,
|
|
59
|
+
graph_edges JSONB DEFAULT '[]',
|
|
60
|
+
metadata JSONB DEFAULT '{}',
|
|
61
|
+
created_at TIMESTAMP DEFAULT CURRENT_TIMESTAMP,
|
|
62
|
+
updated_at TIMESTAMP DEFAULT CURRENT_TIMESTAMP,
|
|
63
|
+
deleted_at TIMESTAMP
|
|
64
|
+
);
|
|
65
|
+
```
|
|
66
|
+
|
|
67
|
+
### 2. Embeddings Pattern
|
|
68
|
+
|
|
69
|
+
**Multiple embeddings per record** with provider flexibility:
|
|
70
|
+
|
|
71
|
+
```sql
|
|
72
|
+
CREATE TABLE embeddings_resources (
|
|
73
|
+
id UUID PRIMARY KEY,
|
|
74
|
+
entity_id UUID REFERENCES resources(id) ON DELETE CASCADE,
|
|
75
|
+
field_name VARCHAR(100) NOT NULL, -- 'description', 'content', etc.
|
|
76
|
+
provider VARCHAR(50) NOT NULL, -- 'openai', 'cohere', etc.
|
|
77
|
+
model VARCHAR(100) NOT NULL, -- 'text-embedding-3-small'
|
|
78
|
+
embedding vector(1536) NOT NULL,
|
|
79
|
+
created_at TIMESTAMP DEFAULT CURRENT_TIMESTAMP,
|
|
80
|
+
updated_at TIMESTAMP DEFAULT CURRENT_TIMESTAMP,
|
|
81
|
+
UNIQUE (entity_id, field_name, provider)
|
|
82
|
+
);
|
|
83
|
+
```
|
|
84
|
+
|
|
85
|
+
**Key Features**:
|
|
86
|
+
- One row per (entity, field, provider)
|
|
87
|
+
- Unique constraint prevents duplicates
|
|
88
|
+
- Supports multiple embedding providers
|
|
89
|
+
- CASCADE delete when entity deleted
|
|
90
|
+
|
|
91
|
+
**Embedding Configuration**:
|
|
92
|
+
```python
|
|
93
|
+
from pydantic import BaseModel, Field
|
|
94
|
+
|
|
95
|
+
class Resource(BaseModel):
|
|
96
|
+
name: str
|
|
97
|
+
description: str = Field(
|
|
98
|
+
...,
|
|
99
|
+
json_schema_extra={"embed": True} # Explicit embedding
|
|
100
|
+
)
|
|
101
|
+
content: str # Auto-embeds (default for content fields)
|
|
102
|
+
notes: str = Field(
|
|
103
|
+
...,
|
|
104
|
+
json_schema_extra={"embed": False} # Disable embedding
|
|
105
|
+
)
|
|
106
|
+
```
|
|
107
|
+
|
|
108
|
+
**Default Embedding Fields** (if not explicitly disabled):
|
|
109
|
+
- `content`
|
|
110
|
+
- `description`
|
|
111
|
+
- `summary`
|
|
112
|
+
- `text`
|
|
113
|
+
- `body`
|
|
114
|
+
- `message`
|
|
115
|
+
- `notes`
|
|
116
|
+
|
|
117
|
+
### 3. KV_STORE Cache Pattern
|
|
118
|
+
|
|
119
|
+
**UNLOGGED table** for fast entity lookups:
|
|
120
|
+
|
|
121
|
+
```sql
|
|
122
|
+
CREATE UNLOGGED TABLE kv_store (
|
|
123
|
+
entity_key VARCHAR(255) NOT NULL, -- Natural language key
|
|
124
|
+
entity_type VARCHAR(100) NOT NULL, -- Table name
|
|
125
|
+
entity_id UUID NOT NULL, -- Foreign key to entity
|
|
126
|
+
tenant_id VARCHAR(100), -- Optional: for future multi-tenant SaaS use
|
|
127
|
+
user_id VARCHAR(100) NOT NULL, -- Primary isolation scope
|
|
128
|
+
content_summary TEXT, -- For fuzzy search
|
|
129
|
+
metadata JSONB DEFAULT '{}',
|
|
130
|
+
created_at TIMESTAMP DEFAULT CURRENT_TIMESTAMP,
|
|
131
|
+
updated_at TIMESTAMP DEFAULT CURRENT_TIMESTAMP,
|
|
132
|
+
PRIMARY KEY (user_id, entity_key)
|
|
133
|
+
);
|
|
134
|
+
```
|
|
135
|
+
|
|
136
|
+
**Key Features**:
|
|
137
|
+
- UNLOGGED = faster writes, no WAL overhead
|
|
138
|
+
- Rebuilt automatically from primary tables on restart
|
|
139
|
+
- O(1) lookups by entity_key
|
|
140
|
+
- User-scoped filtering when `user_id IS NOT NULL`
|
|
141
|
+
- Fuzzy search via pg_trgm indexes
|
|
142
|
+
|
|
143
|
+
**Trigger-based Updates**:
|
|
144
|
+
```sql
|
|
145
|
+
CREATE TRIGGER trg_resources_kv_store
|
|
146
|
+
AFTER INSERT OR UPDATE OR DELETE ON resources
|
|
147
|
+
FOR EACH ROW EXECUTE FUNCTION fn_resources_kv_store_upsert();
|
|
148
|
+
```
|
|
149
|
+
|
|
150
|
+
Automatically maintains KV_STORE on entity changes.
|
|
151
|
+
|
|
152
|
+
### 4. Batch Upsert Pattern
|
|
153
|
+
|
|
154
|
+
**Efficient bulk operations** with automatic embedding generation:
|
|
155
|
+
|
|
156
|
+
```python
|
|
157
|
+
from rem.services import PostgresService
|
|
158
|
+
|
|
159
|
+
service = PostgresService(connection_string)
|
|
160
|
+
|
|
161
|
+
# Batch upsert entities
|
|
162
|
+
entities = [
|
|
163
|
+
Resource(name="doc-1", description="First document", content="..."),
|
|
164
|
+
Resource(name="doc-2", description="Second document", content="..."),
|
|
165
|
+
]
|
|
166
|
+
|
|
167
|
+
result = await service.batch_upsert(
|
|
168
|
+
table_name="resources",
|
|
169
|
+
entities=entities,
|
|
170
|
+
entity_key_field="name",
|
|
171
|
+
generate_embeddings=True, # Auto-generate embeddings
|
|
172
|
+
embedding_provider="openai",
|
|
173
|
+
embedding_model="text-embedding-3-small"
|
|
174
|
+
)
|
|
175
|
+
|
|
176
|
+
# Result:
|
|
177
|
+
# {
|
|
178
|
+
# "inserted": 2,
|
|
179
|
+
# "updated": 0,
|
|
180
|
+
# "embeddings_generated": 4, # 2 entities × 2 fields (description, content)
|
|
181
|
+
# "kv_entries": 2
|
|
182
|
+
# }
|
|
183
|
+
```
|
|
184
|
+
|
|
185
|
+
**Pipeline Steps**:
|
|
186
|
+
1. **Validate** entities against Pydantic model
|
|
187
|
+
2. **Generate embeddings** in batch (OpenAI API supports up to 2048 texts)
|
|
188
|
+
3. **Upsert entities** to primary table (ON CONFLICT DO UPDATE)
|
|
189
|
+
4. **Upsert embeddings** to `embeddings_<table>`
|
|
190
|
+
5. **Update KV_STORE** (automatic via trigger)
|
|
191
|
+
6. **Queue background indexes** if needed
|
|
192
|
+
|
|
193
|
+
### 5. Embedding Generation Pattern
|
|
194
|
+
|
|
195
|
+
**Batch OpenAI API calls** for performance:
|
|
196
|
+
|
|
197
|
+
```python
|
|
198
|
+
# Collect all texts to embed
|
|
199
|
+
texts_to_embed = []
|
|
200
|
+
for entity in entities:
|
|
201
|
+
for field_name in embeddable_fields:
|
|
202
|
+
text = getattr(entity, field_name)
|
|
203
|
+
if text:
|
|
204
|
+
texts_to_embed.append({
|
|
205
|
+
"text": text,
|
|
206
|
+
"entity_id": entity.id,
|
|
207
|
+
"field_name": field_name
|
|
208
|
+
})
|
|
209
|
+
|
|
210
|
+
# Batch generate embeddings (up to 2048 texts per call)
|
|
211
|
+
embeddings = await generate_embeddings_batch(
|
|
212
|
+
texts=[item["text"] for item in texts_to_embed],
|
|
213
|
+
provider="openai",
|
|
214
|
+
model="text-embedding-3-small"
|
|
215
|
+
)
|
|
216
|
+
|
|
217
|
+
# Map embeddings back to entities and fields
|
|
218
|
+
for item, embedding in zip(texts_to_embed, embeddings):
|
|
219
|
+
await upsert_embedding(
|
|
220
|
+
entity_id=item["entity_id"],
|
|
221
|
+
field_name=item["field_name"],
|
|
222
|
+
provider="openai",
|
|
223
|
+
model="text-embedding-3-small",
|
|
224
|
+
embedding=embedding
|
|
225
|
+
)
|
|
226
|
+
```
|
|
227
|
+
|
|
228
|
+
**Supported Providers**:
|
|
229
|
+
- `openai` - text-embedding-3-small, text-embedding-3-large
|
|
230
|
+
- `cohere` - embed-english-v3.0, embed-multilingual-v3.0
|
|
231
|
+
- Custom providers via plugin system
|
|
232
|
+
|
|
233
|
+
### 6. Background Index Creation Pattern
|
|
234
|
+
|
|
235
|
+
**Non-blocking index creation** after data load:
|
|
236
|
+
|
|
237
|
+
```python
|
|
238
|
+
# Index creation thread
|
|
239
|
+
class BackgroundIndexer:
|
|
240
|
+
def __init__(self, postgres_service):
|
|
241
|
+
self.service = postgres_service
|
|
242
|
+
self.queue = asyncio.Queue()
|
|
243
|
+
self.running = False
|
|
244
|
+
|
|
245
|
+
async def queue_index(self, table_name: str, index_type: str):
|
|
246
|
+
"""Queue an index for background creation."""
|
|
247
|
+
await self.queue.put({
|
|
248
|
+
"table_name": table_name,
|
|
249
|
+
"index_type": index_type,
|
|
250
|
+
"attempts": 0
|
|
251
|
+
})
|
|
252
|
+
|
|
253
|
+
async def run(self):
|
|
254
|
+
"""Background thread that creates indexes CONCURRENTLY."""
|
|
255
|
+
self.running = True
|
|
256
|
+
while self.running:
|
|
257
|
+
try:
|
|
258
|
+
item = await asyncio.wait_for(self.queue.get(), timeout=5.0)
|
|
259
|
+
|
|
260
|
+
# Create index CONCURRENTLY (non-blocking)
|
|
261
|
+
await self.service.create_index_concurrently(
|
|
262
|
+
table_name=item["table_name"],
|
|
263
|
+
index_type=item["index_type"]
|
|
264
|
+
)
|
|
265
|
+
|
|
266
|
+
logger.info(f"Created index for {item['table_name']}")
|
|
267
|
+
|
|
268
|
+
except asyncio.TimeoutError:
|
|
269
|
+
continue
|
|
270
|
+
except Exception as e:
|
|
271
|
+
# Retry with backoff
|
|
272
|
+
if item["attempts"] < 3:
|
|
273
|
+
item["attempts"] += 1
|
|
274
|
+
await asyncio.sleep(2 ** item["attempts"])
|
|
275
|
+
await self.queue.put(item)
|
|
276
|
+
else:
|
|
277
|
+
logger.error(f"Failed to create index after 3 attempts: {e}")
|
|
278
|
+
```
|
|
279
|
+
|
|
280
|
+
**Index Types**:
|
|
281
|
+
- **HNSW** for vector similarity (embeddings)
|
|
282
|
+
- **GIN** for JSONB (graph_edges, metadata)
|
|
283
|
+
- **GIN with pg_trgm** for fuzzy text search
|
|
284
|
+
- **B-tree** for foreign keys and common filters
|
|
285
|
+
|
|
286
|
+
### 7. REM Query Integration
|
|
287
|
+
|
|
288
|
+
**LOOKUP Queries** use KV_STORE for O(1) access:
|
|
289
|
+
|
|
290
|
+
```python
|
|
291
|
+
# REM LOOKUP query
|
|
292
|
+
result = await service.lookup_entity(
|
|
293
|
+
entity_key="sarah-chen",
|
|
294
|
+
user_id="user123"
|
|
295
|
+
)
|
|
296
|
+
|
|
297
|
+
# SQL:
|
|
298
|
+
# SELECT entity_id, entity_type, metadata
|
|
299
|
+
# FROM kv_store
|
|
300
|
+
# WHERE user_id = $1 AND entity_key = $2;
|
|
301
|
+
```
|
|
302
|
+
|
|
303
|
+
**FUZZY Queries** use pg_trgm indexes:
|
|
304
|
+
|
|
305
|
+
```python
|
|
306
|
+
# REM FUZZY query
|
|
307
|
+
results = await service.fuzzy_search(
|
|
308
|
+
query="sara",
|
|
309
|
+
user_id="user123",
|
|
310
|
+
threshold=0.3,
|
|
311
|
+
limit=10
|
|
312
|
+
)
|
|
313
|
+
|
|
314
|
+
# SQL:
|
|
315
|
+
# SELECT entity_key, entity_type, similarity(entity_key, $1) AS score
|
|
316
|
+
# FROM kv_store
|
|
317
|
+
# WHERE user_id = $2 AND entity_key % $1
|
|
318
|
+
# ORDER BY score DESC
|
|
319
|
+
# LIMIT $3;
|
|
320
|
+
```
|
|
321
|
+
|
|
322
|
+
**SEARCH Queries** use vector similarity:
|
|
323
|
+
|
|
324
|
+
```python
|
|
325
|
+
# REM SEARCH query
|
|
326
|
+
results = await service.vector_search(
|
|
327
|
+
table_name="resources",
|
|
328
|
+
query_text="machine learning documentation",
|
|
329
|
+
field_name="content",
|
|
330
|
+
user_id="user123",
|
|
331
|
+
limit=10,
|
|
332
|
+
min_similarity=0.7
|
|
333
|
+
)
|
|
334
|
+
|
|
335
|
+
# SQL:
|
|
336
|
+
# SELECT r.*, 1 - (e.embedding <=> $1) AS similarity
|
|
337
|
+
# FROM resources r
|
|
338
|
+
# JOIN embeddings_resources e ON e.entity_id = r.id
|
|
339
|
+
# WHERE r.user_id = $2
|
|
340
|
+
# AND e.field_name = 'content'
|
|
341
|
+
# AND e.provider = 'openai'
|
|
342
|
+
# AND 1 - (e.embedding <=> $1) >= $3
|
|
343
|
+
# ORDER BY e.embedding <=> $1
|
|
344
|
+
# LIMIT $4;
|
|
345
|
+
```
|
|
346
|
+
|
|
347
|
+
## Usage Examples
|
|
348
|
+
|
|
349
|
+
### Initialize Service
|
|
350
|
+
|
|
351
|
+
```python
|
|
352
|
+
from rem.services.postgres import PostgresService, Repository
|
|
353
|
+
|
|
354
|
+
service = PostgresService(
|
|
355
|
+
connection_string="postgresql://user:pass@localhost/remdb",
|
|
356
|
+
pool_size=20
|
|
357
|
+
)
|
|
358
|
+
|
|
359
|
+
await service.connect()
|
|
360
|
+
```
|
|
361
|
+
|
|
362
|
+
### Using Repository Pattern
|
|
363
|
+
|
|
364
|
+
**Generic Repository** for simple CRUD operations:
|
|
365
|
+
|
|
366
|
+
```python
|
|
367
|
+
from rem.services.postgres import Repository
|
|
368
|
+
from rem.models.entities import Message, Resource
|
|
369
|
+
|
|
370
|
+
# Create repository for any model
|
|
371
|
+
message_repo = Repository(Message)
|
|
372
|
+
resource_repo = Repository(Resource)
|
|
373
|
+
|
|
374
|
+
# Create single record
|
|
375
|
+
message = Message(
|
|
376
|
+
content="Hello, world!",
|
|
377
|
+
message_type="user",
|
|
378
|
+
session_id="session-123",
|
|
379
|
+
user_id="user123"
|
|
380
|
+
)
|
|
381
|
+
created = await message_repo.upsert(message)
|
|
382
|
+
|
|
383
|
+
# Upsert also accepts lists (no need for separate batch method)
|
|
384
|
+
messages = [message1, message2, message3]
|
|
385
|
+
created_messages = await message_repo.upsert(messages)
|
|
386
|
+
|
|
387
|
+
# Find records
|
|
388
|
+
messages = await message_repo.find({
|
|
389
|
+
"session_id": "session-123",
|
|
390
|
+
"user_id": "user123"
|
|
391
|
+
}, order_by="created_at ASC", limit=100)
|
|
392
|
+
|
|
393
|
+
# Get by ID
|
|
394
|
+
message = await message_repo.get_by_id("msg-id", "user123")
|
|
395
|
+
|
|
396
|
+
# Get by session (convenience method)
|
|
397
|
+
session_messages = await message_repo.get_by_session(
|
|
398
|
+
session_id="session-123",
|
|
399
|
+
user_id="user123"
|
|
400
|
+
)
|
|
401
|
+
|
|
402
|
+
# Count
|
|
403
|
+
count = await message_repo.count({"session_id": "session-123"})
|
|
404
|
+
|
|
405
|
+
# Delete (soft delete)
|
|
406
|
+
deleted = await message_repo.delete("msg-id", "user123")
|
|
407
|
+
```
|
|
408
|
+
|
|
409
|
+
**When to use Repository vs PostgresService:**
|
|
410
|
+
- **Repository**: Simple CRUD, session management, high-level operations
|
|
411
|
+
- **PostgresService**: Batch operations with embeddings, custom queries, performance-critical code
|
|
412
|
+
|
|
413
|
+
### Register Entity Types
|
|
414
|
+
|
|
415
|
+
```python
|
|
416
|
+
from rem.services.postgres.register_type import register_type
|
|
417
|
+
from rem.models.entities import Resource
|
|
418
|
+
|
|
419
|
+
# Register Resource model
|
|
420
|
+
schema = await register_type(
|
|
421
|
+
model=Resource,
|
|
422
|
+
table_name="resources",
|
|
423
|
+
entity_key_field="name",
|
|
424
|
+
tenant_scoped=True,
|
|
425
|
+
create_embeddings=True,
|
|
426
|
+
create_kv_trigger=True
|
|
427
|
+
)
|
|
428
|
+
|
|
429
|
+
# Execute generated SQL
|
|
430
|
+
await service.execute(schema["sql"]["table"])
|
|
431
|
+
await service.execute(schema["sql"]["embeddings"])
|
|
432
|
+
await service.execute(schema["sql"]["kv_trigger"])
|
|
433
|
+
```
|
|
434
|
+
|
|
435
|
+
### Batch Upsert with Embeddings
|
|
436
|
+
|
|
437
|
+
```python
|
|
438
|
+
# Create entities
|
|
439
|
+
resources = [
|
|
440
|
+
Resource(
|
|
441
|
+
name="api-design-doc",
|
|
442
|
+
description="API design guidelines",
|
|
443
|
+
content="RESTful API best practices..."
|
|
444
|
+
),
|
|
445
|
+
Resource(
|
|
446
|
+
name="deployment-guide",
|
|
447
|
+
description="Kubernetes deployment guide",
|
|
448
|
+
content="Deploy to EKS with Karpenter..."
|
|
449
|
+
)
|
|
450
|
+
]
|
|
451
|
+
|
|
452
|
+
# Batch upsert
|
|
453
|
+
result = await service.batch_upsert(
|
|
454
|
+
table_name="resources",
|
|
455
|
+
entities=resources,
|
|
456
|
+
user_id="user123",
|
|
457
|
+
generate_embeddings=True
|
|
458
|
+
)
|
|
459
|
+
|
|
460
|
+
print(f"Inserted: {result['inserted']}")
|
|
461
|
+
print(f"Embeddings: {result['embeddings_generated']}")
|
|
462
|
+
```
|
|
463
|
+
|
|
464
|
+
### Query Operations
|
|
465
|
+
|
|
466
|
+
```python
|
|
467
|
+
# LOOKUP by natural key
|
|
468
|
+
entity = await service.lookup_entity(
|
|
469
|
+
entity_key="api-design-doc",
|
|
470
|
+
user_id="user123"
|
|
471
|
+
)
|
|
472
|
+
|
|
473
|
+
# FUZZY search
|
|
474
|
+
results = await service.fuzzy_search(
|
|
475
|
+
query="api design",
|
|
476
|
+
user_id="user123",
|
|
477
|
+
threshold=0.3,
|
|
478
|
+
limit=5
|
|
479
|
+
)
|
|
480
|
+
|
|
481
|
+
# SEARCH by semantic similarity
|
|
482
|
+
results = await service.vector_search(
|
|
483
|
+
table_name="resources",
|
|
484
|
+
query_text="how to deploy kubernetes",
|
|
485
|
+
field_name="content",
|
|
486
|
+
user_id="user123",
|
|
487
|
+
limit=10
|
|
488
|
+
)
|
|
489
|
+
```
|
|
490
|
+
|
|
491
|
+
## Performance Considerations
|
|
492
|
+
|
|
493
|
+
### Batch Size Optimization
|
|
494
|
+
|
|
495
|
+
- **Embeddings**: OpenAI supports up to 2048 texts per call
|
|
496
|
+
- **Inserts**: Batch 100-500 rows per transaction
|
|
497
|
+
- **Connection pool**: Size based on workload (default: 20)
|
|
498
|
+
|
|
499
|
+
### Index Strategy
|
|
500
|
+
|
|
501
|
+
- **Foreground indexes**: Critical for queries (tenant_id, user_id)
|
|
502
|
+
- **Background indexes**: HNSW for vectors, created CONCURRENTLY
|
|
503
|
+
- **GIN indexes**: For JSONB fields (graph_edges, metadata)
|
|
504
|
+
|
|
505
|
+
### KV_STORE Maintenance
|
|
506
|
+
|
|
507
|
+
- UNLOGGED table = faster but lost on crash
|
|
508
|
+
- Rebuild from primary tables on startup
|
|
509
|
+
- Vacuum regularly to reclaim space
|
|
510
|
+
|
|
511
|
+
### Memory Usage
|
|
512
|
+
|
|
513
|
+
- Vector indexes can be memory-intensive
|
|
514
|
+
- HNSW parameters: `m=16, ef_construction=64` (tunable)
|
|
515
|
+
- Monitor shared_buffers and work_mem
|
|
516
|
+
|
|
517
|
+
## Migrations
|
|
518
|
+
|
|
519
|
+
Run migrations in order:
|
|
520
|
+
|
|
521
|
+
```bash
|
|
522
|
+
psql -d remdb -f sql/migrations/001_setup_extensions.sql
|
|
523
|
+
psql -d remdb -f sql/migrations/002_kv_store_cache.sql
|
|
524
|
+
psql -d remdb -f sql/generated_schema.sql
|
|
525
|
+
```
|
|
526
|
+
|
|
527
|
+
Background indexes (after data load):
|
|
528
|
+
|
|
529
|
+
```bash
|
|
530
|
+
psql -d remdb -f sql/background_indexes.sql
|
|
531
|
+
```
|
|
532
|
+
|
|
533
|
+
## CLI Usage
|
|
534
|
+
|
|
535
|
+
Generate schema from models:
|
|
536
|
+
|
|
537
|
+
```bash
|
|
538
|
+
rem schema generate --models src/rem/models/entities --output sql/schema.sql
|
|
539
|
+
```
|
|
540
|
+
|
|
541
|
+
Validate models:
|
|
542
|
+
|
|
543
|
+
```bash
|
|
544
|
+
rem schema validate --models src/rem/models/entities
|
|
545
|
+
```
|
|
546
|
+
|
|
547
|
+
## Configuration
|
|
548
|
+
|
|
549
|
+
Environment variables:
|
|
550
|
+
|
|
551
|
+
```bash
|
|
552
|
+
# Database
|
|
553
|
+
POSTGRES__HOST=localhost
|
|
554
|
+
POSTGRES__PORT=5432
|
|
555
|
+
POSTGRES__DATABASE=remdb
|
|
556
|
+
POSTGRES__USER=rem_user
|
|
557
|
+
POSTGRES__PASSWORD=secret
|
|
558
|
+
POSTGRES__POOL_SIZE=20
|
|
559
|
+
|
|
560
|
+
# Embeddings
|
|
561
|
+
EMBEDDING__PROVIDER=openai
|
|
562
|
+
EMBEDDING__MODEL=text-embedding-3-small
|
|
563
|
+
EMBEDDING__DIMENSIONS=1536
|
|
564
|
+
EMBEDDING__BATCH_SIZE=2048
|
|
565
|
+
|
|
566
|
+
# Background indexing
|
|
567
|
+
BACKGROUND_INDEX__ENABLED=true
|
|
568
|
+
BACKGROUND_INDEX__CONCURRENCY=2
|
|
569
|
+
```
|
|
570
|
+
|
|
571
|
+
## See Also
|
|
572
|
+
|
|
573
|
+
- [register_type.py](./register_type.py) - Dynamic schema generation
|
|
574
|
+
- [schema_generator.py](./schema_generator.py) - Bulk schema generation
|
|
575
|
+
- [REM Query System](../../models/core/rem_query.py) - Query types and contracts
|
|
@@ -0,0 +1,23 @@
|
|
|
1
|
+
"""
|
|
2
|
+
PostgreSQL service for CloudNativePG database operations.
|
|
3
|
+
"""
|
|
4
|
+
|
|
5
|
+
from .repository import Repository
|
|
6
|
+
from .service import PostgresService
|
|
7
|
+
|
|
8
|
+
|
|
9
|
+
def get_postgres_service() -> PostgresService | None:
|
|
10
|
+
"""
|
|
11
|
+
Get PostgresService instance.
|
|
12
|
+
|
|
13
|
+
Returns None if Postgres is disabled.
|
|
14
|
+
"""
|
|
15
|
+
from ...settings import settings
|
|
16
|
+
|
|
17
|
+
if not settings.postgres.enabled:
|
|
18
|
+
return None
|
|
19
|
+
|
|
20
|
+
return PostgresService()
|
|
21
|
+
|
|
22
|
+
|
|
23
|
+
__all__ = ["PostgresService", "get_postgres_service", "Repository"]
|