remdb 0.2.6__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of remdb might be problematic. Click here for more details.

Files changed (187) hide show
  1. rem/__init__.py +2 -0
  2. rem/agentic/README.md +650 -0
  3. rem/agentic/__init__.py +39 -0
  4. rem/agentic/agents/README.md +155 -0
  5. rem/agentic/agents/__init__.py +8 -0
  6. rem/agentic/context.py +148 -0
  7. rem/agentic/context_builder.py +329 -0
  8. rem/agentic/mcp/__init__.py +0 -0
  9. rem/agentic/mcp/tool_wrapper.py +107 -0
  10. rem/agentic/otel/__init__.py +5 -0
  11. rem/agentic/otel/setup.py +151 -0
  12. rem/agentic/providers/phoenix.py +674 -0
  13. rem/agentic/providers/pydantic_ai.py +572 -0
  14. rem/agentic/query.py +117 -0
  15. rem/agentic/query_helper.py +89 -0
  16. rem/agentic/schema.py +396 -0
  17. rem/agentic/serialization.py +245 -0
  18. rem/agentic/tools/__init__.py +5 -0
  19. rem/agentic/tools/rem_tools.py +231 -0
  20. rem/api/README.md +420 -0
  21. rem/api/main.py +324 -0
  22. rem/api/mcp_router/prompts.py +182 -0
  23. rem/api/mcp_router/resources.py +536 -0
  24. rem/api/mcp_router/server.py +213 -0
  25. rem/api/mcp_router/tools.py +584 -0
  26. rem/api/routers/auth.py +229 -0
  27. rem/api/routers/chat/__init__.py +5 -0
  28. rem/api/routers/chat/completions.py +281 -0
  29. rem/api/routers/chat/json_utils.py +76 -0
  30. rem/api/routers/chat/models.py +124 -0
  31. rem/api/routers/chat/streaming.py +185 -0
  32. rem/auth/README.md +258 -0
  33. rem/auth/__init__.py +26 -0
  34. rem/auth/middleware.py +100 -0
  35. rem/auth/providers/__init__.py +13 -0
  36. rem/auth/providers/base.py +376 -0
  37. rem/auth/providers/google.py +163 -0
  38. rem/auth/providers/microsoft.py +237 -0
  39. rem/cli/README.md +455 -0
  40. rem/cli/__init__.py +8 -0
  41. rem/cli/commands/README.md +126 -0
  42. rem/cli/commands/__init__.py +3 -0
  43. rem/cli/commands/ask.py +565 -0
  44. rem/cli/commands/configure.py +423 -0
  45. rem/cli/commands/db.py +493 -0
  46. rem/cli/commands/dreaming.py +324 -0
  47. rem/cli/commands/experiments.py +1124 -0
  48. rem/cli/commands/mcp.py +66 -0
  49. rem/cli/commands/process.py +245 -0
  50. rem/cli/commands/schema.py +183 -0
  51. rem/cli/commands/serve.py +106 -0
  52. rem/cli/dreaming.py +363 -0
  53. rem/cli/main.py +88 -0
  54. rem/config.py +237 -0
  55. rem/mcp_server.py +41 -0
  56. rem/models/core/__init__.py +49 -0
  57. rem/models/core/core_model.py +64 -0
  58. rem/models/core/engram.py +333 -0
  59. rem/models/core/experiment.py +628 -0
  60. rem/models/core/inline_edge.py +132 -0
  61. rem/models/core/rem_query.py +243 -0
  62. rem/models/entities/__init__.py +43 -0
  63. rem/models/entities/file.py +57 -0
  64. rem/models/entities/image_resource.py +88 -0
  65. rem/models/entities/message.py +35 -0
  66. rem/models/entities/moment.py +123 -0
  67. rem/models/entities/ontology.py +191 -0
  68. rem/models/entities/ontology_config.py +131 -0
  69. rem/models/entities/resource.py +95 -0
  70. rem/models/entities/schema.py +87 -0
  71. rem/models/entities/user.py +85 -0
  72. rem/py.typed +0 -0
  73. rem/schemas/README.md +507 -0
  74. rem/schemas/__init__.py +6 -0
  75. rem/schemas/agents/README.md +92 -0
  76. rem/schemas/agents/core/moment-builder.yaml +178 -0
  77. rem/schemas/agents/core/rem-query-agent.yaml +226 -0
  78. rem/schemas/agents/core/resource-affinity-assessor.yaml +99 -0
  79. rem/schemas/agents/core/simple-assistant.yaml +19 -0
  80. rem/schemas/agents/core/user-profile-builder.yaml +163 -0
  81. rem/schemas/agents/examples/contract-analyzer.yaml +317 -0
  82. rem/schemas/agents/examples/contract-extractor.yaml +134 -0
  83. rem/schemas/agents/examples/cv-parser.yaml +263 -0
  84. rem/schemas/agents/examples/hello-world.yaml +37 -0
  85. rem/schemas/agents/examples/query.yaml +54 -0
  86. rem/schemas/agents/examples/simple.yaml +21 -0
  87. rem/schemas/agents/examples/test.yaml +29 -0
  88. rem/schemas/agents/rem.yaml +128 -0
  89. rem/schemas/evaluators/hello-world/default.yaml +77 -0
  90. rem/schemas/evaluators/rem/faithfulness.yaml +219 -0
  91. rem/schemas/evaluators/rem/lookup-correctness.yaml +182 -0
  92. rem/schemas/evaluators/rem/retrieval-precision.yaml +199 -0
  93. rem/schemas/evaluators/rem/retrieval-recall.yaml +211 -0
  94. rem/schemas/evaluators/rem/search-correctness.yaml +192 -0
  95. rem/services/__init__.py +16 -0
  96. rem/services/audio/INTEGRATION.md +308 -0
  97. rem/services/audio/README.md +376 -0
  98. rem/services/audio/__init__.py +15 -0
  99. rem/services/audio/chunker.py +354 -0
  100. rem/services/audio/transcriber.py +259 -0
  101. rem/services/content/README.md +1269 -0
  102. rem/services/content/__init__.py +5 -0
  103. rem/services/content/providers.py +806 -0
  104. rem/services/content/service.py +657 -0
  105. rem/services/dreaming/README.md +230 -0
  106. rem/services/dreaming/__init__.py +53 -0
  107. rem/services/dreaming/affinity_service.py +336 -0
  108. rem/services/dreaming/moment_service.py +264 -0
  109. rem/services/dreaming/ontology_service.py +54 -0
  110. rem/services/dreaming/user_model_service.py +297 -0
  111. rem/services/dreaming/utils.py +39 -0
  112. rem/services/embeddings/__init__.py +11 -0
  113. rem/services/embeddings/api.py +120 -0
  114. rem/services/embeddings/worker.py +421 -0
  115. rem/services/fs/README.md +662 -0
  116. rem/services/fs/__init__.py +62 -0
  117. rem/services/fs/examples.py +206 -0
  118. rem/services/fs/examples_paths.py +204 -0
  119. rem/services/fs/git_provider.py +935 -0
  120. rem/services/fs/local_provider.py +760 -0
  121. rem/services/fs/parsing-hooks-examples.md +172 -0
  122. rem/services/fs/paths.py +276 -0
  123. rem/services/fs/provider.py +460 -0
  124. rem/services/fs/s3_provider.py +1042 -0
  125. rem/services/fs/service.py +186 -0
  126. rem/services/git/README.md +1075 -0
  127. rem/services/git/__init__.py +17 -0
  128. rem/services/git/service.py +469 -0
  129. rem/services/phoenix/EXPERIMENT_DESIGN.md +1146 -0
  130. rem/services/phoenix/README.md +453 -0
  131. rem/services/phoenix/__init__.py +46 -0
  132. rem/services/phoenix/client.py +686 -0
  133. rem/services/phoenix/config.py +88 -0
  134. rem/services/phoenix/prompt_labels.py +477 -0
  135. rem/services/postgres/README.md +575 -0
  136. rem/services/postgres/__init__.py +23 -0
  137. rem/services/postgres/migration_service.py +427 -0
  138. rem/services/postgres/pydantic_to_sqlalchemy.py +232 -0
  139. rem/services/postgres/register_type.py +352 -0
  140. rem/services/postgres/repository.py +337 -0
  141. rem/services/postgres/schema_generator.py +379 -0
  142. rem/services/postgres/service.py +802 -0
  143. rem/services/postgres/sql_builder.py +354 -0
  144. rem/services/rem/README.md +304 -0
  145. rem/services/rem/__init__.py +23 -0
  146. rem/services/rem/exceptions.py +71 -0
  147. rem/services/rem/executor.py +293 -0
  148. rem/services/rem/parser.py +145 -0
  149. rem/services/rem/queries.py +196 -0
  150. rem/services/rem/query.py +371 -0
  151. rem/services/rem/service.py +527 -0
  152. rem/services/session/README.md +374 -0
  153. rem/services/session/__init__.py +6 -0
  154. rem/services/session/compression.py +360 -0
  155. rem/services/session/reload.py +77 -0
  156. rem/settings.py +1235 -0
  157. rem/sql/002_install_models.sql +1068 -0
  158. rem/sql/background_indexes.sql +42 -0
  159. rem/sql/install_models.sql +1038 -0
  160. rem/sql/migrations/001_install.sql +503 -0
  161. rem/sql/migrations/002_install_models.sql +1202 -0
  162. rem/utils/AGENTIC_CHUNKING.md +597 -0
  163. rem/utils/README.md +583 -0
  164. rem/utils/__init__.py +43 -0
  165. rem/utils/agentic_chunking.py +622 -0
  166. rem/utils/batch_ops.py +343 -0
  167. rem/utils/chunking.py +108 -0
  168. rem/utils/clip_embeddings.py +276 -0
  169. rem/utils/dict_utils.py +98 -0
  170. rem/utils/embeddings.py +423 -0
  171. rem/utils/examples/embeddings_example.py +305 -0
  172. rem/utils/examples/sql_types_example.py +202 -0
  173. rem/utils/markdown.py +16 -0
  174. rem/utils/model_helpers.py +236 -0
  175. rem/utils/schema_loader.py +229 -0
  176. rem/utils/sql_types.py +348 -0
  177. rem/utils/user_id.py +81 -0
  178. rem/utils/vision.py +330 -0
  179. rem/workers/README.md +506 -0
  180. rem/workers/__init__.py +5 -0
  181. rem/workers/dreaming.py +502 -0
  182. rem/workers/engram_processor.py +312 -0
  183. rem/workers/sqs_file_processor.py +193 -0
  184. remdb-0.2.6.dist-info/METADATA +1191 -0
  185. remdb-0.2.6.dist-info/RECORD +187 -0
  186. remdb-0.2.6.dist-info/WHEEL +4 -0
  187. remdb-0.2.6.dist-info/entry_points.txt +2 -0
@@ -0,0 +1,575 @@
1
+ ### PostgresService - CloudNativePG Database Operations
2
+
3
+ Comprehensive service for PostgreSQL 18 with pgvector, including:
4
+ - Entity CRUD with automatic embeddings
5
+ - KV_STORE cache for O(1) lookups
6
+ - Fuzzy text search with pg_trgm
7
+ - Background index creation
8
+ - Batch operations with transaction management
9
+
10
+ ## Architecture
11
+
12
+ ```
13
+ ┌─────────────────────────────────────────────────────────────┐
14
+ │ PostgresService │
15
+ ├─────────────────────────────────────────────────────────────┤
16
+ │ │
17
+ │ ┌──────────────────────────────────────────────────────┐ │
18
+ │ │ Batch Upsert Pipeline │ │
19
+ │ │ 1. Validate entities │ │
20
+ │ │ 2. Generate embeddings (batch OpenAI API) │ │
21
+ │ │ 3. Upsert to primary tables │ │
22
+ │ │ 4. Upsert to embeddings_<table> │ │
23
+ │ │ 5. Upsert to KV_STORE (via trigger) │ │
24
+ │ └──────────────────────────────────────────────────────┘ │
25
+ │ │
26
+ │ ┌──────────────────────────────────────────────────────┐ │
27
+ │ │ Background Index Thread │ │
28
+ │ │ - Monitors pending indexes queue │ │
29
+ │ │ - Creates indexes CONCURRENTLY │ │
30
+ │ │ - Handles index creation failures │ │
31
+ │ └──────────────────────────────────────────────────────┘ │
32
+ │ │
33
+ └─────────────────────────────────────────────────────────────┘
34
+
35
+ ┌─────────────────────────────────────────────────────────────┐
36
+ │ Database Schema │
37
+ ├─────────────────────────────────────────────────────────────┤
38
+ │ │
39
+ │ Primary Tables: resources, moments, users, etc. │
40
+ │ Embeddings Tables: embeddings_resources, etc. │
41
+ │ KV_STORE Cache: UNLOGGED table for O(1) lookups │
42
+ │ │
43
+ └─────────────────────────────────────────────────────────────┘
44
+ ```
45
+
46
+ ## Core Design Patterns
47
+
48
+ ### 1. Entity Storage Pattern
49
+
50
+ **Primary Tables** store entities with system fields:
51
+ ```sql
52
+ CREATE TABLE resources (
53
+ id UUID PRIMARY KEY,
54
+ tenant_id VARCHAR(100), -- Optional: for future multi-tenant SaaS use
55
+ user_id VARCHAR(100) NOT NULL,
56
+ name TEXT NOT NULL,
57
+ description TEXT,
58
+ content TEXT,
59
+ graph_edges JSONB DEFAULT '[]',
60
+ metadata JSONB DEFAULT '{}',
61
+ created_at TIMESTAMP DEFAULT CURRENT_TIMESTAMP,
62
+ updated_at TIMESTAMP DEFAULT CURRENT_TIMESTAMP,
63
+ deleted_at TIMESTAMP
64
+ );
65
+ ```
66
+
67
+ ### 2. Embeddings Pattern
68
+
69
+ **Multiple embeddings per record** with provider flexibility:
70
+
71
+ ```sql
72
+ CREATE TABLE embeddings_resources (
73
+ id UUID PRIMARY KEY,
74
+ entity_id UUID REFERENCES resources(id) ON DELETE CASCADE,
75
+ field_name VARCHAR(100) NOT NULL, -- 'description', 'content', etc.
76
+ provider VARCHAR(50) NOT NULL, -- 'openai', 'cohere', etc.
77
+ model VARCHAR(100) NOT NULL, -- 'text-embedding-3-small'
78
+ embedding vector(1536) NOT NULL,
79
+ created_at TIMESTAMP DEFAULT CURRENT_TIMESTAMP,
80
+ updated_at TIMESTAMP DEFAULT CURRENT_TIMESTAMP,
81
+ UNIQUE (entity_id, field_name, provider)
82
+ );
83
+ ```
84
+
85
+ **Key Features**:
86
+ - One row per (entity, field, provider)
87
+ - Unique constraint prevents duplicates
88
+ - Supports multiple embedding providers
89
+ - CASCADE delete when entity deleted
90
+
91
+ **Embedding Configuration**:
92
+ ```python
93
+ from pydantic import BaseModel, Field
94
+
95
+ class Resource(BaseModel):
96
+ name: str
97
+ description: str = Field(
98
+ ...,
99
+ json_schema_extra={"embed": True} # Explicit embedding
100
+ )
101
+ content: str # Auto-embeds (default for content fields)
102
+ notes: str = Field(
103
+ ...,
104
+ json_schema_extra={"embed": False} # Disable embedding
105
+ )
106
+ ```
107
+
108
+ **Default Embedding Fields** (if not explicitly disabled):
109
+ - `content`
110
+ - `description`
111
+ - `summary`
112
+ - `text`
113
+ - `body`
114
+ - `message`
115
+ - `notes`
116
+
117
+ ### 3. KV_STORE Cache Pattern
118
+
119
+ **UNLOGGED table** for fast entity lookups:
120
+
121
+ ```sql
122
+ CREATE UNLOGGED TABLE kv_store (
123
+ entity_key VARCHAR(255) NOT NULL, -- Natural language key
124
+ entity_type VARCHAR(100) NOT NULL, -- Table name
125
+ entity_id UUID NOT NULL, -- Foreign key to entity
126
+ tenant_id VARCHAR(100), -- Optional: for future multi-tenant SaaS use
127
+ user_id VARCHAR(100) NOT NULL, -- Primary isolation scope
128
+ content_summary TEXT, -- For fuzzy search
129
+ metadata JSONB DEFAULT '{}',
130
+ created_at TIMESTAMP DEFAULT CURRENT_TIMESTAMP,
131
+ updated_at TIMESTAMP DEFAULT CURRENT_TIMESTAMP,
132
+ PRIMARY KEY (user_id, entity_key)
133
+ );
134
+ ```
135
+
136
+ **Key Features**:
137
+ - UNLOGGED = faster writes, no WAL overhead
138
+ - Rebuilt automatically from primary tables on restart
139
+ - O(1) lookups by entity_key
140
+ - User-scoped filtering when `user_id IS NOT NULL`
141
+ - Fuzzy search via pg_trgm indexes
142
+
143
+ **Trigger-based Updates**:
144
+ ```sql
145
+ CREATE TRIGGER trg_resources_kv_store
146
+ AFTER INSERT OR UPDATE OR DELETE ON resources
147
+ FOR EACH ROW EXECUTE FUNCTION fn_resources_kv_store_upsert();
148
+ ```
149
+
150
+ Automatically maintains KV_STORE on entity changes.
151
+
152
+ ### 4. Batch Upsert Pattern
153
+
154
+ **Efficient bulk operations** with automatic embedding generation:
155
+
156
+ ```python
157
+ from rem.services import PostgresService
158
+
159
+ service = PostgresService(connection_string)
160
+
161
+ # Batch upsert entities
162
+ entities = [
163
+ Resource(name="doc-1", description="First document", content="..."),
164
+ Resource(name="doc-2", description="Second document", content="..."),
165
+ ]
166
+
167
+ result = await service.batch_upsert(
168
+ table_name="resources",
169
+ entities=entities,
170
+ entity_key_field="name",
171
+ generate_embeddings=True, # Auto-generate embeddings
172
+ embedding_provider="openai",
173
+ embedding_model="text-embedding-3-small"
174
+ )
175
+
176
+ # Result:
177
+ # {
178
+ # "inserted": 2,
179
+ # "updated": 0,
180
+ # "embeddings_generated": 4, # 2 entities × 2 fields (description, content)
181
+ # "kv_entries": 2
182
+ # }
183
+ ```
184
+
185
+ **Pipeline Steps**:
186
+ 1. **Validate** entities against Pydantic model
187
+ 2. **Generate embeddings** in batch (OpenAI API supports up to 2048 texts)
188
+ 3. **Upsert entities** to primary table (ON CONFLICT DO UPDATE)
189
+ 4. **Upsert embeddings** to `embeddings_<table>`
190
+ 5. **Update KV_STORE** (automatic via trigger)
191
+ 6. **Queue background indexes** if needed
192
+
193
+ ### 5. Embedding Generation Pattern
194
+
195
+ **Batch OpenAI API calls** for performance:
196
+
197
+ ```python
198
+ # Collect all texts to embed
199
+ texts_to_embed = []
200
+ for entity in entities:
201
+ for field_name in embeddable_fields:
202
+ text = getattr(entity, field_name)
203
+ if text:
204
+ texts_to_embed.append({
205
+ "text": text,
206
+ "entity_id": entity.id,
207
+ "field_name": field_name
208
+ })
209
+
210
+ # Batch generate embeddings (up to 2048 texts per call)
211
+ embeddings = await generate_embeddings_batch(
212
+ texts=[item["text"] for item in texts_to_embed],
213
+ provider="openai",
214
+ model="text-embedding-3-small"
215
+ )
216
+
217
+ # Map embeddings back to entities and fields
218
+ for item, embedding in zip(texts_to_embed, embeddings):
219
+ await upsert_embedding(
220
+ entity_id=item["entity_id"],
221
+ field_name=item["field_name"],
222
+ provider="openai",
223
+ model="text-embedding-3-small",
224
+ embedding=embedding
225
+ )
226
+ ```
227
+
228
+ **Supported Providers**:
229
+ - `openai` - text-embedding-3-small, text-embedding-3-large
230
+ - `cohere` - embed-english-v3.0, embed-multilingual-v3.0
231
+ - Custom providers via plugin system
232
+
233
+ ### 6. Background Index Creation Pattern
234
+
235
+ **Non-blocking index creation** after data load:
236
+
237
+ ```python
238
+ # Index creation thread
239
+ class BackgroundIndexer:
240
+ def __init__(self, postgres_service):
241
+ self.service = postgres_service
242
+ self.queue = asyncio.Queue()
243
+ self.running = False
244
+
245
+ async def queue_index(self, table_name: str, index_type: str):
246
+ """Queue an index for background creation."""
247
+ await self.queue.put({
248
+ "table_name": table_name,
249
+ "index_type": index_type,
250
+ "attempts": 0
251
+ })
252
+
253
+ async def run(self):
254
+ """Background thread that creates indexes CONCURRENTLY."""
255
+ self.running = True
256
+ while self.running:
257
+ try:
258
+ item = await asyncio.wait_for(self.queue.get(), timeout=5.0)
259
+
260
+ # Create index CONCURRENTLY (non-blocking)
261
+ await self.service.create_index_concurrently(
262
+ table_name=item["table_name"],
263
+ index_type=item["index_type"]
264
+ )
265
+
266
+ logger.info(f"Created index for {item['table_name']}")
267
+
268
+ except asyncio.TimeoutError:
269
+ continue
270
+ except Exception as e:
271
+ # Retry with backoff
272
+ if item["attempts"] < 3:
273
+ item["attempts"] += 1
274
+ await asyncio.sleep(2 ** item["attempts"])
275
+ await self.queue.put(item)
276
+ else:
277
+ logger.error(f"Failed to create index after 3 attempts: {e}")
278
+ ```
279
+
280
+ **Index Types**:
281
+ - **HNSW** for vector similarity (embeddings)
282
+ - **GIN** for JSONB (graph_edges, metadata)
283
+ - **GIN with pg_trgm** for fuzzy text search
284
+ - **B-tree** for foreign keys and common filters
285
+
286
+ ### 7. REM Query Integration
287
+
288
+ **LOOKUP Queries** use KV_STORE for O(1) access:
289
+
290
+ ```python
291
+ # REM LOOKUP query
292
+ result = await service.lookup_entity(
293
+ entity_key="sarah-chen",
294
+ user_id="user123"
295
+ )
296
+
297
+ # SQL:
298
+ # SELECT entity_id, entity_type, metadata
299
+ # FROM kv_store
300
+ # WHERE user_id = $1 AND entity_key = $2;
301
+ ```
302
+
303
+ **FUZZY Queries** use pg_trgm indexes:
304
+
305
+ ```python
306
+ # REM FUZZY query
307
+ results = await service.fuzzy_search(
308
+ query="sara",
309
+ user_id="user123",
310
+ threshold=0.3,
311
+ limit=10
312
+ )
313
+
314
+ # SQL:
315
+ # SELECT entity_key, entity_type, similarity(entity_key, $1) AS score
316
+ # FROM kv_store
317
+ # WHERE user_id = $2 AND entity_key % $1
318
+ # ORDER BY score DESC
319
+ # LIMIT $3;
320
+ ```
321
+
322
+ **SEARCH Queries** use vector similarity:
323
+
324
+ ```python
325
+ # REM SEARCH query
326
+ results = await service.vector_search(
327
+ table_name="resources",
328
+ query_text="machine learning documentation",
329
+ field_name="content",
330
+ user_id="user123",
331
+ limit=10,
332
+ min_similarity=0.7
333
+ )
334
+
335
+ # SQL:
336
+ # SELECT r.*, 1 - (e.embedding <=> $1) AS similarity
337
+ # FROM resources r
338
+ # JOIN embeddings_resources e ON e.entity_id = r.id
339
+ # WHERE r.user_id = $2
340
+ # AND e.field_name = 'content'
341
+ # AND e.provider = 'openai'
342
+ # AND 1 - (e.embedding <=> $1) >= $3
343
+ # ORDER BY e.embedding <=> $1
344
+ # LIMIT $4;
345
+ ```
346
+
347
+ ## Usage Examples
348
+
349
+ ### Initialize Service
350
+
351
+ ```python
352
+ from rem.services.postgres import PostgresService, Repository
353
+
354
+ service = PostgresService(
355
+ connection_string="postgresql://user:pass@localhost/remdb",
356
+ pool_size=20
357
+ )
358
+
359
+ await service.connect()
360
+ ```
361
+
362
+ ### Using Repository Pattern
363
+
364
+ **Generic Repository** for simple CRUD operations:
365
+
366
+ ```python
367
+ from rem.services.postgres import Repository
368
+ from rem.models.entities import Message, Resource
369
+
370
+ # Create repository for any model
371
+ message_repo = Repository(Message)
372
+ resource_repo = Repository(Resource)
373
+
374
+ # Create single record
375
+ message = Message(
376
+ content="Hello, world!",
377
+ message_type="user",
378
+ session_id="session-123",
379
+ user_id="user123"
380
+ )
381
+ created = await message_repo.upsert(message)
382
+
383
+ # Upsert also accepts lists (no need for separate batch method)
384
+ messages = [message1, message2, message3]
385
+ created_messages = await message_repo.upsert(messages)
386
+
387
+ # Find records
388
+ messages = await message_repo.find({
389
+ "session_id": "session-123",
390
+ "user_id": "user123"
391
+ }, order_by="created_at ASC", limit=100)
392
+
393
+ # Get by ID
394
+ message = await message_repo.get_by_id("msg-id", "user123")
395
+
396
+ # Get by session (convenience method)
397
+ session_messages = await message_repo.get_by_session(
398
+ session_id="session-123",
399
+ user_id="user123"
400
+ )
401
+
402
+ # Count
403
+ count = await message_repo.count({"session_id": "session-123"})
404
+
405
+ # Delete (soft delete)
406
+ deleted = await message_repo.delete("msg-id", "user123")
407
+ ```
408
+
409
+ **When to use Repository vs PostgresService:**
410
+ - **Repository**: Simple CRUD, session management, high-level operations
411
+ - **PostgresService**: Batch operations with embeddings, custom queries, performance-critical code
412
+
413
+ ### Register Entity Types
414
+
415
+ ```python
416
+ from rem.services.postgres.register_type import register_type
417
+ from rem.models.entities import Resource
418
+
419
+ # Register Resource model
420
+ schema = await register_type(
421
+ model=Resource,
422
+ table_name="resources",
423
+ entity_key_field="name",
424
+ tenant_scoped=True,
425
+ create_embeddings=True,
426
+ create_kv_trigger=True
427
+ )
428
+
429
+ # Execute generated SQL
430
+ await service.execute(schema["sql"]["table"])
431
+ await service.execute(schema["sql"]["embeddings"])
432
+ await service.execute(schema["sql"]["kv_trigger"])
433
+ ```
434
+
435
+ ### Batch Upsert with Embeddings
436
+
437
+ ```python
438
+ # Create entities
439
+ resources = [
440
+ Resource(
441
+ name="api-design-doc",
442
+ description="API design guidelines",
443
+ content="RESTful API best practices..."
444
+ ),
445
+ Resource(
446
+ name="deployment-guide",
447
+ description="Kubernetes deployment guide",
448
+ content="Deploy to EKS with Karpenter..."
449
+ )
450
+ ]
451
+
452
+ # Batch upsert
453
+ result = await service.batch_upsert(
454
+ table_name="resources",
455
+ entities=resources,
456
+ user_id="user123",
457
+ generate_embeddings=True
458
+ )
459
+
460
+ print(f"Inserted: {result['inserted']}")
461
+ print(f"Embeddings: {result['embeddings_generated']}")
462
+ ```
463
+
464
+ ### Query Operations
465
+
466
+ ```python
467
+ # LOOKUP by natural key
468
+ entity = await service.lookup_entity(
469
+ entity_key="api-design-doc",
470
+ user_id="user123"
471
+ )
472
+
473
+ # FUZZY search
474
+ results = await service.fuzzy_search(
475
+ query="api design",
476
+ user_id="user123",
477
+ threshold=0.3,
478
+ limit=5
479
+ )
480
+
481
+ # SEARCH by semantic similarity
482
+ results = await service.vector_search(
483
+ table_name="resources",
484
+ query_text="how to deploy kubernetes",
485
+ field_name="content",
486
+ user_id="user123",
487
+ limit=10
488
+ )
489
+ ```
490
+
491
+ ## Performance Considerations
492
+
493
+ ### Batch Size Optimization
494
+
495
+ - **Embeddings**: OpenAI supports up to 2048 texts per call
496
+ - **Inserts**: Batch 100-500 rows per transaction
497
+ - **Connection pool**: Size based on workload (default: 20)
498
+
499
+ ### Index Strategy
500
+
501
+ - **Foreground indexes**: Critical for queries (tenant_id, user_id)
502
+ - **Background indexes**: HNSW for vectors, created CONCURRENTLY
503
+ - **GIN indexes**: For JSONB fields (graph_edges, metadata)
504
+
505
+ ### KV_STORE Maintenance
506
+
507
+ - UNLOGGED table = faster but lost on crash
508
+ - Rebuild from primary tables on startup
509
+ - Vacuum regularly to reclaim space
510
+
511
+ ### Memory Usage
512
+
513
+ - Vector indexes can be memory-intensive
514
+ - HNSW parameters: `m=16, ef_construction=64` (tunable)
515
+ - Monitor shared_buffers and work_mem
516
+
517
+ ## Migrations
518
+
519
+ Run migrations in order:
520
+
521
+ ```bash
522
+ psql -d remdb -f sql/migrations/001_setup_extensions.sql
523
+ psql -d remdb -f sql/migrations/002_kv_store_cache.sql
524
+ psql -d remdb -f sql/generated_schema.sql
525
+ ```
526
+
527
+ Background indexes (after data load):
528
+
529
+ ```bash
530
+ psql -d remdb -f sql/background_indexes.sql
531
+ ```
532
+
533
+ ## CLI Usage
534
+
535
+ Generate schema from models:
536
+
537
+ ```bash
538
+ rem schema generate --models src/rem/models/entities --output sql/schema.sql
539
+ ```
540
+
541
+ Validate models:
542
+
543
+ ```bash
544
+ rem schema validate --models src/rem/models/entities
545
+ ```
546
+
547
+ ## Configuration
548
+
549
+ Environment variables:
550
+
551
+ ```bash
552
+ # Database
553
+ POSTGRES__HOST=localhost
554
+ POSTGRES__PORT=5432
555
+ POSTGRES__DATABASE=remdb
556
+ POSTGRES__USER=rem_user
557
+ POSTGRES__PASSWORD=secret
558
+ POSTGRES__POOL_SIZE=20
559
+
560
+ # Embeddings
561
+ EMBEDDING__PROVIDER=openai
562
+ EMBEDDING__MODEL=text-embedding-3-small
563
+ EMBEDDING__DIMENSIONS=1536
564
+ EMBEDDING__BATCH_SIZE=2048
565
+
566
+ # Background indexing
567
+ BACKGROUND_INDEX__ENABLED=true
568
+ BACKGROUND_INDEX__CONCURRENCY=2
569
+ ```
570
+
571
+ ## See Also
572
+
573
+ - [register_type.py](./register_type.py) - Dynamic schema generation
574
+ - [schema_generator.py](./schema_generator.py) - Bulk schema generation
575
+ - [REM Query System](../../models/core/rem_query.py) - Query types and contracts
@@ -0,0 +1,23 @@
1
+ """
2
+ PostgreSQL service for CloudNativePG database operations.
3
+ """
4
+
5
+ from .repository import Repository
6
+ from .service import PostgresService
7
+
8
+
9
+ def get_postgres_service() -> PostgresService | None:
10
+ """
11
+ Get PostgresService instance.
12
+
13
+ Returns None if Postgres is disabled.
14
+ """
15
+ from ...settings import settings
16
+
17
+ if not settings.postgres.enabled:
18
+ return None
19
+
20
+ return PostgresService()
21
+
22
+
23
+ __all__ = ["PostgresService", "get_postgres_service", "Repository"]