remdb 0.3.242__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of remdb might be problematic. Click here for more details.

Files changed (235) hide show
  1. rem/__init__.py +129 -0
  2. rem/agentic/README.md +760 -0
  3. rem/agentic/__init__.py +54 -0
  4. rem/agentic/agents/README.md +155 -0
  5. rem/agentic/agents/__init__.py +38 -0
  6. rem/agentic/agents/agent_manager.py +311 -0
  7. rem/agentic/agents/sse_simulator.py +502 -0
  8. rem/agentic/context.py +425 -0
  9. rem/agentic/context_builder.py +360 -0
  10. rem/agentic/llm_provider_models.py +301 -0
  11. rem/agentic/mcp/__init__.py +0 -0
  12. rem/agentic/mcp/tool_wrapper.py +273 -0
  13. rem/agentic/otel/__init__.py +5 -0
  14. rem/agentic/otel/setup.py +240 -0
  15. rem/agentic/providers/phoenix.py +926 -0
  16. rem/agentic/providers/pydantic_ai.py +854 -0
  17. rem/agentic/query.py +117 -0
  18. rem/agentic/query_helper.py +89 -0
  19. rem/agentic/schema.py +737 -0
  20. rem/agentic/serialization.py +245 -0
  21. rem/agentic/tools/__init__.py +5 -0
  22. rem/agentic/tools/rem_tools.py +242 -0
  23. rem/api/README.md +657 -0
  24. rem/api/deps.py +253 -0
  25. rem/api/main.py +460 -0
  26. rem/api/mcp_router/prompts.py +182 -0
  27. rem/api/mcp_router/resources.py +820 -0
  28. rem/api/mcp_router/server.py +243 -0
  29. rem/api/mcp_router/tools.py +1605 -0
  30. rem/api/middleware/tracking.py +172 -0
  31. rem/api/routers/admin.py +520 -0
  32. rem/api/routers/auth.py +898 -0
  33. rem/api/routers/chat/__init__.py +5 -0
  34. rem/api/routers/chat/child_streaming.py +394 -0
  35. rem/api/routers/chat/completions.py +702 -0
  36. rem/api/routers/chat/json_utils.py +76 -0
  37. rem/api/routers/chat/models.py +202 -0
  38. rem/api/routers/chat/otel_utils.py +33 -0
  39. rem/api/routers/chat/sse_events.py +546 -0
  40. rem/api/routers/chat/streaming.py +950 -0
  41. rem/api/routers/chat/streaming_utils.py +327 -0
  42. rem/api/routers/common.py +18 -0
  43. rem/api/routers/dev.py +87 -0
  44. rem/api/routers/feedback.py +276 -0
  45. rem/api/routers/messages.py +620 -0
  46. rem/api/routers/models.py +86 -0
  47. rem/api/routers/query.py +362 -0
  48. rem/api/routers/shared_sessions.py +422 -0
  49. rem/auth/README.md +258 -0
  50. rem/auth/__init__.py +36 -0
  51. rem/auth/jwt.py +367 -0
  52. rem/auth/middleware.py +318 -0
  53. rem/auth/providers/__init__.py +16 -0
  54. rem/auth/providers/base.py +376 -0
  55. rem/auth/providers/email.py +215 -0
  56. rem/auth/providers/google.py +163 -0
  57. rem/auth/providers/microsoft.py +237 -0
  58. rem/cli/README.md +517 -0
  59. rem/cli/__init__.py +8 -0
  60. rem/cli/commands/README.md +299 -0
  61. rem/cli/commands/__init__.py +3 -0
  62. rem/cli/commands/ask.py +549 -0
  63. rem/cli/commands/cluster.py +1808 -0
  64. rem/cli/commands/configure.py +495 -0
  65. rem/cli/commands/db.py +828 -0
  66. rem/cli/commands/dreaming.py +324 -0
  67. rem/cli/commands/experiments.py +1698 -0
  68. rem/cli/commands/mcp.py +66 -0
  69. rem/cli/commands/process.py +388 -0
  70. rem/cli/commands/query.py +109 -0
  71. rem/cli/commands/scaffold.py +47 -0
  72. rem/cli/commands/schema.py +230 -0
  73. rem/cli/commands/serve.py +106 -0
  74. rem/cli/commands/session.py +453 -0
  75. rem/cli/dreaming.py +363 -0
  76. rem/cli/main.py +123 -0
  77. rem/config.py +244 -0
  78. rem/mcp_server.py +41 -0
  79. rem/models/core/__init__.py +49 -0
  80. rem/models/core/core_model.py +70 -0
  81. rem/models/core/engram.py +333 -0
  82. rem/models/core/experiment.py +672 -0
  83. rem/models/core/inline_edge.py +132 -0
  84. rem/models/core/rem_query.py +246 -0
  85. rem/models/entities/__init__.py +68 -0
  86. rem/models/entities/domain_resource.py +38 -0
  87. rem/models/entities/feedback.py +123 -0
  88. rem/models/entities/file.py +57 -0
  89. rem/models/entities/image_resource.py +88 -0
  90. rem/models/entities/message.py +64 -0
  91. rem/models/entities/moment.py +123 -0
  92. rem/models/entities/ontology.py +181 -0
  93. rem/models/entities/ontology_config.py +131 -0
  94. rem/models/entities/resource.py +95 -0
  95. rem/models/entities/schema.py +87 -0
  96. rem/models/entities/session.py +84 -0
  97. rem/models/entities/shared_session.py +180 -0
  98. rem/models/entities/subscriber.py +175 -0
  99. rem/models/entities/user.py +93 -0
  100. rem/py.typed +0 -0
  101. rem/registry.py +373 -0
  102. rem/schemas/README.md +507 -0
  103. rem/schemas/__init__.py +6 -0
  104. rem/schemas/agents/README.md +92 -0
  105. rem/schemas/agents/core/agent-builder.yaml +235 -0
  106. rem/schemas/agents/core/moment-builder.yaml +178 -0
  107. rem/schemas/agents/core/rem-query-agent.yaml +226 -0
  108. rem/schemas/agents/core/resource-affinity-assessor.yaml +99 -0
  109. rem/schemas/agents/core/simple-assistant.yaml +19 -0
  110. rem/schemas/agents/core/user-profile-builder.yaml +163 -0
  111. rem/schemas/agents/examples/contract-analyzer.yaml +317 -0
  112. rem/schemas/agents/examples/contract-extractor.yaml +134 -0
  113. rem/schemas/agents/examples/cv-parser.yaml +263 -0
  114. rem/schemas/agents/examples/hello-world.yaml +37 -0
  115. rem/schemas/agents/examples/query.yaml +54 -0
  116. rem/schemas/agents/examples/simple.yaml +21 -0
  117. rem/schemas/agents/examples/test.yaml +29 -0
  118. rem/schemas/agents/rem.yaml +132 -0
  119. rem/schemas/evaluators/hello-world/default.yaml +77 -0
  120. rem/schemas/evaluators/rem/faithfulness.yaml +219 -0
  121. rem/schemas/evaluators/rem/lookup-correctness.yaml +182 -0
  122. rem/schemas/evaluators/rem/retrieval-precision.yaml +199 -0
  123. rem/schemas/evaluators/rem/retrieval-recall.yaml +211 -0
  124. rem/schemas/evaluators/rem/search-correctness.yaml +192 -0
  125. rem/services/__init__.py +18 -0
  126. rem/services/audio/INTEGRATION.md +308 -0
  127. rem/services/audio/README.md +376 -0
  128. rem/services/audio/__init__.py +15 -0
  129. rem/services/audio/chunker.py +354 -0
  130. rem/services/audio/transcriber.py +259 -0
  131. rem/services/content/README.md +1269 -0
  132. rem/services/content/__init__.py +5 -0
  133. rem/services/content/providers.py +760 -0
  134. rem/services/content/service.py +762 -0
  135. rem/services/dreaming/README.md +230 -0
  136. rem/services/dreaming/__init__.py +53 -0
  137. rem/services/dreaming/affinity_service.py +322 -0
  138. rem/services/dreaming/moment_service.py +251 -0
  139. rem/services/dreaming/ontology_service.py +54 -0
  140. rem/services/dreaming/user_model_service.py +297 -0
  141. rem/services/dreaming/utils.py +39 -0
  142. rem/services/email/__init__.py +10 -0
  143. rem/services/email/service.py +522 -0
  144. rem/services/email/templates.py +360 -0
  145. rem/services/embeddings/__init__.py +11 -0
  146. rem/services/embeddings/api.py +127 -0
  147. rem/services/embeddings/worker.py +435 -0
  148. rem/services/fs/README.md +662 -0
  149. rem/services/fs/__init__.py +62 -0
  150. rem/services/fs/examples.py +206 -0
  151. rem/services/fs/examples_paths.py +204 -0
  152. rem/services/fs/git_provider.py +935 -0
  153. rem/services/fs/local_provider.py +760 -0
  154. rem/services/fs/parsing-hooks-examples.md +172 -0
  155. rem/services/fs/paths.py +276 -0
  156. rem/services/fs/provider.py +460 -0
  157. rem/services/fs/s3_provider.py +1042 -0
  158. rem/services/fs/service.py +186 -0
  159. rem/services/git/README.md +1075 -0
  160. rem/services/git/__init__.py +17 -0
  161. rem/services/git/service.py +469 -0
  162. rem/services/phoenix/EXPERIMENT_DESIGN.md +1146 -0
  163. rem/services/phoenix/README.md +453 -0
  164. rem/services/phoenix/__init__.py +46 -0
  165. rem/services/phoenix/client.py +960 -0
  166. rem/services/phoenix/config.py +88 -0
  167. rem/services/phoenix/prompt_labels.py +477 -0
  168. rem/services/postgres/README.md +757 -0
  169. rem/services/postgres/__init__.py +49 -0
  170. rem/services/postgres/diff_service.py +599 -0
  171. rem/services/postgres/migration_service.py +427 -0
  172. rem/services/postgres/programmable_diff_service.py +635 -0
  173. rem/services/postgres/pydantic_to_sqlalchemy.py +562 -0
  174. rem/services/postgres/register_type.py +353 -0
  175. rem/services/postgres/repository.py +481 -0
  176. rem/services/postgres/schema_generator.py +661 -0
  177. rem/services/postgres/service.py +802 -0
  178. rem/services/postgres/sql_builder.py +355 -0
  179. rem/services/rate_limit.py +113 -0
  180. rem/services/rem/README.md +318 -0
  181. rem/services/rem/__init__.py +23 -0
  182. rem/services/rem/exceptions.py +71 -0
  183. rem/services/rem/executor.py +293 -0
  184. rem/services/rem/parser.py +180 -0
  185. rem/services/rem/queries.py +196 -0
  186. rem/services/rem/query.py +371 -0
  187. rem/services/rem/service.py +608 -0
  188. rem/services/session/README.md +374 -0
  189. rem/services/session/__init__.py +13 -0
  190. rem/services/session/compression.py +488 -0
  191. rem/services/session/pydantic_messages.py +310 -0
  192. rem/services/session/reload.py +85 -0
  193. rem/services/user_service.py +130 -0
  194. rem/settings.py +1877 -0
  195. rem/sql/background_indexes.sql +52 -0
  196. rem/sql/migrations/001_install.sql +983 -0
  197. rem/sql/migrations/002_install_models.sql +3157 -0
  198. rem/sql/migrations/003_optional_extensions.sql +326 -0
  199. rem/sql/migrations/004_cache_system.sql +282 -0
  200. rem/sql/migrations/005_schema_update.sql +145 -0
  201. rem/sql/migrations/migrate_session_id_to_uuid.sql +45 -0
  202. rem/utils/AGENTIC_CHUNKING.md +597 -0
  203. rem/utils/README.md +628 -0
  204. rem/utils/__init__.py +61 -0
  205. rem/utils/agentic_chunking.py +622 -0
  206. rem/utils/batch_ops.py +343 -0
  207. rem/utils/chunking.py +108 -0
  208. rem/utils/clip_embeddings.py +276 -0
  209. rem/utils/constants.py +97 -0
  210. rem/utils/date_utils.py +228 -0
  211. rem/utils/dict_utils.py +98 -0
  212. rem/utils/embeddings.py +436 -0
  213. rem/utils/examples/embeddings_example.py +305 -0
  214. rem/utils/examples/sql_types_example.py +202 -0
  215. rem/utils/files.py +323 -0
  216. rem/utils/markdown.py +16 -0
  217. rem/utils/mime_types.py +158 -0
  218. rem/utils/model_helpers.py +492 -0
  219. rem/utils/schema_loader.py +649 -0
  220. rem/utils/sql_paths.py +146 -0
  221. rem/utils/sql_types.py +350 -0
  222. rem/utils/user_id.py +81 -0
  223. rem/utils/vision.py +325 -0
  224. rem/workers/README.md +506 -0
  225. rem/workers/__init__.py +7 -0
  226. rem/workers/db_listener.py +579 -0
  227. rem/workers/db_maintainer.py +74 -0
  228. rem/workers/dreaming.py +502 -0
  229. rem/workers/engram_processor.py +312 -0
  230. rem/workers/sqs_file_processor.py +193 -0
  231. rem/workers/unlogged_maintainer.py +463 -0
  232. remdb-0.3.242.dist-info/METADATA +1632 -0
  233. remdb-0.3.242.dist-info/RECORD +235 -0
  234. remdb-0.3.242.dist-info/WHEEL +4 -0
  235. remdb-0.3.242.dist-info/entry_points.txt +2 -0
@@ -0,0 +1,757 @@
1
+ ### PostgresService - CloudNativePG Database Operations
2
+
3
+ Comprehensive service for PostgreSQL 18 with pgvector, including:
4
+ - Entity CRUD with automatic embeddings
5
+ - KV_STORE cache for O(1) lookups
6
+ - Fuzzy text search with pg_trgm
7
+ - Background index creation
8
+ - Batch operations with transaction management
9
+
10
+ ## Architecture
11
+
12
+ ```
13
+ ┌─────────────────────────────────────────────────────────────┐
14
+ │ PostgresService │
15
+ ├─────────────────────────────────────────────────────────────┤
16
+ │ │
17
+ │ ┌──────────────────────────────────────────────────────┐ │
18
+ │ │ Batch Upsert Pipeline │ │
19
+ │ │ 1. Validate entities │ │
20
+ │ │ 2. Generate embeddings (batch OpenAI API) │ │
21
+ │ │ 3. Upsert to primary tables │ │
22
+ │ │ 4. Upsert to embeddings_<table> │ │
23
+ │ │ 5. Upsert to KV_STORE (via trigger) │ │
24
+ │ └──────────────────────────────────────────────────────┘ │
25
+ │ │
26
+ │ ┌──────────────────────────────────────────────────────┐ │
27
+ │ │ Background Index Thread │ │
28
+ │ │ - Monitors pending indexes queue │ │
29
+ │ │ - Creates indexes CONCURRENTLY │ │
30
+ │ │ - Handles index creation failures │ │
31
+ │ └──────────────────────────────────────────────────────┘ │
32
+ │ │
33
+ └─────────────────────────────────────────────────────────────┘
34
+
35
+ ┌─────────────────────────────────────────────────────────────┐
36
+ │ Database Schema │
37
+ ├─────────────────────────────────────────────────────────────┤
38
+ │ │
39
+ │ Primary Tables: resources, moments, users, etc. │
40
+ │ Embeddings Tables: embeddings_resources, etc. │
41
+ │ KV_STORE Cache: UNLOGGED table for O(1) lookups │
42
+ │ │
43
+ └─────────────────────────────────────────────────────────────┘
44
+ ```
45
+
46
+ ## Core Design Patterns
47
+
48
+ ### 1. Entity Storage Pattern
49
+
50
+ **Primary Tables** store entities with system fields:
51
+ ```sql
52
+ CREATE TABLE resources (
53
+ id UUID PRIMARY KEY,
54
+ tenant_id VARCHAR(100), -- Optional: for future multi-tenant SaaS use
55
+ user_id VARCHAR(100) NOT NULL,
56
+ name TEXT NOT NULL,
57
+ description TEXT,
58
+ content TEXT,
59
+ graph_edges JSONB DEFAULT '[]',
60
+ metadata JSONB DEFAULT '{}',
61
+ created_at TIMESTAMP DEFAULT CURRENT_TIMESTAMP,
62
+ updated_at TIMESTAMP DEFAULT CURRENT_TIMESTAMP,
63
+ deleted_at TIMESTAMP
64
+ );
65
+ ```
66
+
67
+ ### 2. Embeddings Pattern
68
+
69
+ **Multiple embeddings per record** with provider flexibility:
70
+
71
+ ```sql
72
+ CREATE TABLE embeddings_resources (
73
+ id UUID PRIMARY KEY,
74
+ entity_id UUID REFERENCES resources(id) ON DELETE CASCADE,
75
+ field_name VARCHAR(100) NOT NULL, -- 'description', 'content', etc.
76
+ provider VARCHAR(50) NOT NULL, -- 'openai', 'cohere', etc.
77
+ model VARCHAR(100) NOT NULL, -- 'text-embedding-3-small'
78
+ embedding vector(1536) NOT NULL,
79
+ created_at TIMESTAMP DEFAULT CURRENT_TIMESTAMP,
80
+ updated_at TIMESTAMP DEFAULT CURRENT_TIMESTAMP,
81
+ UNIQUE (entity_id, field_name, provider)
82
+ );
83
+ ```
84
+
85
+ **Key Features**:
86
+ - One row per (entity, field, provider)
87
+ - Unique constraint prevents duplicates
88
+ - Supports multiple embedding providers
89
+ - CASCADE delete when entity deleted
90
+
91
+ **Embedding Configuration**:
92
+ ```python
93
+ from pydantic import BaseModel, Field
94
+
95
+ class Resource(BaseModel):
96
+ name: str
97
+ description: str = Field(
98
+ ...,
99
+ json_schema_extra={"embed": True} # Explicit embedding
100
+ )
101
+ content: str # Auto-embeds (default for content fields)
102
+ notes: str = Field(
103
+ ...,
104
+ json_schema_extra={"embed": False} # Disable embedding
105
+ )
106
+ ```
107
+
108
+ **Default Embedding Fields** (if not explicitly disabled):
109
+ - `content`
110
+ - `description`
111
+ - `summary`
112
+ - `text`
113
+ - `body`
114
+ - `message`
115
+ - `notes`
116
+
117
+ ### 3. KV_STORE Cache Pattern
118
+
119
+ **UNLOGGED table** for fast entity lookups:
120
+
121
+ ```sql
122
+ CREATE UNLOGGED TABLE kv_store (
123
+ entity_key VARCHAR(255) NOT NULL, -- Natural language key
124
+ entity_type VARCHAR(100) NOT NULL, -- Table name
125
+ entity_id UUID NOT NULL, -- Foreign key to entity
126
+ tenant_id VARCHAR(100), -- Optional: for future multi-tenant SaaS use
127
+ user_id VARCHAR(100) NOT NULL, -- Primary isolation scope
128
+ content_summary TEXT, -- For fuzzy search
129
+ metadata JSONB DEFAULT '{}',
130
+ created_at TIMESTAMP DEFAULT CURRENT_TIMESTAMP,
131
+ updated_at TIMESTAMP DEFAULT CURRENT_TIMESTAMP,
132
+ PRIMARY KEY (user_id, entity_key)
133
+ );
134
+ ```
135
+
136
+ **Key Features**:
137
+ - UNLOGGED = faster writes, no WAL overhead
138
+ - Rebuilt automatically from primary tables on restart
139
+ - O(1) lookups by entity_key
140
+ - User-scoped filtering when `user_id IS NOT NULL`
141
+ - Fuzzy search via pg_trgm indexes
142
+
143
+ **Trigger-based Updates**:
144
+ ```sql
145
+ CREATE TRIGGER trg_resources_kv_store
146
+ AFTER INSERT OR UPDATE OR DELETE ON resources
147
+ FOR EACH ROW EXECUTE FUNCTION fn_resources_kv_store_upsert();
148
+ ```
149
+
150
+ Automatically maintains KV_STORE on entity changes.
151
+
152
+ ### 4. Batch Upsert Pattern
153
+
154
+ **Efficient bulk operations** with automatic embedding generation:
155
+
156
+ ```python
157
+ from rem.services import PostgresService
158
+
159
+ service = PostgresService(connection_string)
160
+
161
+ # Batch upsert entities
162
+ entities = [
163
+ Resource(name="doc-1", description="First document", content="..."),
164
+ Resource(name="doc-2", description="Second document", content="..."),
165
+ ]
166
+
167
+ result = await service.batch_upsert(
168
+ table_name="resources",
169
+ entities=entities,
170
+ entity_key_field="name",
171
+ generate_embeddings=True, # Auto-generate embeddings
172
+ embedding_provider="openai",
173
+ embedding_model="text-embedding-3-small"
174
+ )
175
+
176
+ # Result:
177
+ # {
178
+ # "inserted": 2,
179
+ # "updated": 0,
180
+ # "embeddings_generated": 4, # 2 entities × 2 fields (description, content)
181
+ # "kv_entries": 2
182
+ # }
183
+ ```
184
+
185
+ **Pipeline Steps**:
186
+ 1. **Validate** entities against Pydantic model
187
+ 2. **Generate embeddings** in batch (OpenAI API supports up to 2048 texts)
188
+ 3. **Upsert entities** to primary table (ON CONFLICT DO UPDATE)
189
+ 4. **Upsert embeddings** to `embeddings_<table>`
190
+ 5. **Update KV_STORE** (automatic via trigger)
191
+ 6. **Queue background indexes** if needed
192
+
193
+ ### 5. Embedding Generation Pattern
194
+
195
+ **Batch OpenAI API calls** for performance:
196
+
197
+ ```python
198
+ # Collect all texts to embed
199
+ texts_to_embed = []
200
+ for entity in entities:
201
+ for field_name in embeddable_fields:
202
+ text = getattr(entity, field_name)
203
+ if text:
204
+ texts_to_embed.append({
205
+ "text": text,
206
+ "entity_id": entity.id,
207
+ "field_name": field_name
208
+ })
209
+
210
+ # Batch generate embeddings (up to 2048 texts per call)
211
+ embeddings = await generate_embeddings_batch(
212
+ texts=[item["text"] for item in texts_to_embed],
213
+ provider="openai",
214
+ model="text-embedding-3-small"
215
+ )
216
+
217
+ # Map embeddings back to entities and fields
218
+ for item, embedding in zip(texts_to_embed, embeddings):
219
+ await upsert_embedding(
220
+ entity_id=item["entity_id"],
221
+ field_name=item["field_name"],
222
+ provider="openai",
223
+ model="text-embedding-3-small",
224
+ embedding=embedding
225
+ )
226
+ ```
227
+
228
+ **Supported Providers**:
229
+ - `openai` - text-embedding-3-small, text-embedding-3-large
230
+ - `cohere` - embed-english-v3.0, embed-multilingual-v3.0
231
+ - Custom providers via plugin system
232
+
233
+ ### 6. Background Index Creation Pattern
234
+
235
+ **Non-blocking index creation** after data load:
236
+
237
+ ```python
238
+ # Index creation thread
239
+ class BackgroundIndexer:
240
+ def __init__(self, postgres_service):
241
+ self.service = postgres_service
242
+ self.queue = asyncio.Queue()
243
+ self.running = False
244
+
245
+ async def queue_index(self, table_name: str, index_type: str):
246
+ """Queue an index for background creation."""
247
+ await self.queue.put({
248
+ "table_name": table_name,
249
+ "index_type": index_type,
250
+ "attempts": 0
251
+ })
252
+
253
+ async def run(self):
254
+ """Background thread that creates indexes CONCURRENTLY."""
255
+ self.running = True
256
+ while self.running:
257
+ try:
258
+ item = await asyncio.wait_for(self.queue.get(), timeout=5.0)
259
+
260
+ # Create index CONCURRENTLY (non-blocking)
261
+ await self.service.create_index_concurrently(
262
+ table_name=item["table_name"],
263
+ index_type=item["index_type"]
264
+ )
265
+
266
+ logger.info(f"Created index for {item['table_name']}")
267
+
268
+ except asyncio.TimeoutError:
269
+ continue
270
+ except Exception as e:
271
+ # Retry with backoff
272
+ if item["attempts"] < 3:
273
+ item["attempts"] += 1
274
+ await asyncio.sleep(2 ** item["attempts"])
275
+ await self.queue.put(item)
276
+ else:
277
+ logger.error(f"Failed to create index after 3 attempts: {e}")
278
+ ```
279
+
280
+ **Index Types**:
281
+ - **HNSW** for vector similarity (embeddings)
282
+ - **GIN** for JSONB (graph_edges, metadata)
283
+ - **GIN with pg_trgm** for fuzzy text search
284
+ - **B-tree** for foreign keys and common filters
285
+
286
+ ### 7. REM Query Integration
287
+
288
+ **LOOKUP Queries** use KV_STORE for O(1) access:
289
+
290
+ ```python
291
+ # REM LOOKUP query
292
+ result = await service.lookup_entity(
293
+ entity_key="sarah-chen",
294
+ user_id="user123"
295
+ )
296
+
297
+ # SQL:
298
+ # SELECT entity_id, entity_type, metadata
299
+ # FROM kv_store
300
+ # WHERE user_id = $1 AND entity_key = $2;
301
+ ```
302
+
303
+ **FUZZY Queries** use pg_trgm indexes:
304
+
305
+ ```python
306
+ # REM FUZZY query
307
+ results = await service.fuzzy_search(
308
+ query="sara",
309
+ user_id="user123",
310
+ threshold=0.3,
311
+ limit=10
312
+ )
313
+
314
+ # SQL:
315
+ # SELECT entity_key, entity_type, similarity(entity_key, $1) AS score
316
+ # FROM kv_store
317
+ # WHERE user_id = $2 AND entity_key % $1
318
+ # ORDER BY score DESC
319
+ # LIMIT $3;
320
+ ```
321
+
322
+ **SEARCH Queries** use vector similarity:
323
+
324
+ ```python
325
+ # REM SEARCH query
326
+ results = await service.vector_search(
327
+ table_name="resources",
328
+ query_text="machine learning documentation",
329
+ field_name="content",
330
+ user_id="user123",
331
+ limit=10,
332
+ min_similarity=0.7
333
+ )
334
+
335
+ # SQL:
336
+ # SELECT r.*, 1 - (e.embedding <=> $1) AS similarity
337
+ # FROM resources r
338
+ # JOIN embeddings_resources e ON e.entity_id = r.id
339
+ # WHERE r.user_id = $2
340
+ # AND e.field_name = 'content'
341
+ # AND e.provider = 'openai'
342
+ # AND 1 - (e.embedding <=> $1) >= $3
343
+ # ORDER BY e.embedding <=> $1
344
+ # LIMIT $4;
345
+ ```
346
+
347
+ ## Usage Examples
348
+
349
+ ### Initialize Service
350
+
351
+ There are two ways to initialize the PostgresService:
352
+
353
+ **Option 1: Factory function (recommended for apps using remdb as a library)**
354
+
355
+ ```python
356
+ from rem.services.postgres import get_postgres_service
357
+
358
+ # Uses POSTGRES__CONNECTION_STRING from environment
359
+ pg = get_postgres_service()
360
+ if pg is None:
361
+ raise RuntimeError("Database not configured - set POSTGRES__CONNECTION_STRING")
362
+
363
+ await pg.connect()
364
+ # ... use pg ...
365
+ await pg.disconnect()
366
+ ```
367
+
368
+ **Option 2: Direct instantiation**
369
+
370
+ ```python
371
+ from rem.services.postgres import PostgresService
372
+
373
+ service = PostgresService(
374
+ connection_string="postgresql://user:pass@localhost/remdb",
375
+ pool_size=20
376
+ )
377
+
378
+ await service.connect()
379
+ ```
380
+
381
+ > **Note**: `get_postgres_service()` returns the service directly. It does NOT support
382
+ > `async with` context manager syntax. Always call `connect()` and `disconnect()` explicitly.
383
+
384
+ ### Using Repository Pattern
385
+
386
+ **Generic Repository** for simple CRUD operations:
387
+
388
+ ```python
389
+ from rem.services.postgres import Repository
390
+ from rem.models.entities import Message, Resource
391
+
392
+ # Create repository for any model
393
+ message_repo = Repository(Message)
394
+ resource_repo = Repository(Resource)
395
+
396
+ # Create single record
397
+ message = Message(
398
+ content="Hello, world!",
399
+ message_type="user",
400
+ session_id="session-123",
401
+ user_id="user123"
402
+ )
403
+ created = await message_repo.upsert(message)
404
+
405
+ # Upsert also accepts lists (no need for separate batch method)
406
+ messages = [message1, message2, message3]
407
+ created_messages = await message_repo.upsert(messages)
408
+
409
+ # Find records
410
+ messages = await message_repo.find({
411
+ "session_id": "session-123",
412
+ "user_id": "user123"
413
+ }, order_by="created_at ASC", limit=100)
414
+
415
+ # Get by ID
416
+ message = await message_repo.get_by_id("msg-id", "user123")
417
+
418
+ # Get by session (convenience method)
419
+ session_messages = await message_repo.get_by_session(
420
+ session_id="session-123",
421
+ user_id="user123"
422
+ )
423
+
424
+ # Count
425
+ count = await message_repo.count({"session_id": "session-123"})
426
+
427
+ # Delete (soft delete)
428
+ deleted = await message_repo.delete("msg-id", "user123")
429
+ ```
430
+
431
+ **When to use Repository vs PostgresService:**
432
+ - **Repository**: Simple CRUD, session management, high-level operations
433
+ - **PostgresService**: Batch operations with embeddings, custom queries, performance-critical code
434
+
435
+ ### Register Entity Types
436
+
437
+ ```python
438
+ from rem.services.postgres.register_type import register_type
439
+ from rem.models.entities import Resource
440
+
441
+ # Register Resource model
442
+ schema = await register_type(
443
+ model=Resource,
444
+ table_name="resources",
445
+ entity_key_field="name",
446
+ tenant_scoped=True,
447
+ create_embeddings=True,
448
+ create_kv_trigger=True
449
+ )
450
+
451
+ # Execute generated SQL
452
+ await service.execute(schema["sql"]["table"])
453
+ await service.execute(schema["sql"]["embeddings"])
454
+ await service.execute(schema["sql"]["kv_trigger"])
455
+ ```
456
+
457
+ ### Batch Upsert with Embeddings
458
+
459
+ ```python
460
+ # Create entities
461
+ resources = [
462
+ Resource(
463
+ name="api-design-doc",
464
+ description="API design guidelines",
465
+ content="RESTful API best practices..."
466
+ ),
467
+ Resource(
468
+ name="deployment-guide",
469
+ description="Kubernetes deployment guide",
470
+ content="Deploy to EKS with Karpenter..."
471
+ )
472
+ ]
473
+
474
+ # Batch upsert
475
+ result = await service.batch_upsert(
476
+ table_name="resources",
477
+ entities=resources,
478
+ user_id="user123",
479
+ generate_embeddings=True
480
+ )
481
+
482
+ print(f"Inserted: {result['inserted']}")
483
+ print(f"Embeddings: {result['embeddings_generated']}")
484
+ ```
485
+
486
+ ### Query Operations
487
+
488
+ ```python
489
+ # LOOKUP by natural key
490
+ entity = await service.lookup_entity(
491
+ entity_key="api-design-doc",
492
+ user_id="user123"
493
+ )
494
+
495
+ # FUZZY search
496
+ results = await service.fuzzy_search(
497
+ query="api design",
498
+ user_id="user123",
499
+ threshold=0.3,
500
+ limit=5
501
+ )
502
+
503
+ # SEARCH by semantic similarity
504
+ results = await service.vector_search(
505
+ table_name="resources",
506
+ query_text="how to deploy kubernetes",
507
+ field_name="content",
508
+ user_id="user123",
509
+ limit=10
510
+ )
511
+ ```
512
+
513
+ ## Performance Considerations
514
+
515
+ ### Batch Size Optimization
516
+
517
+ - **Embeddings**: OpenAI supports up to 2048 texts per call
518
+ - **Inserts**: Batch 100-500 rows per transaction
519
+ - **Connection pool**: Size based on workload (default: 20)
520
+
521
+ ### Index Strategy
522
+
523
+ - **Foreground indexes**: Critical for queries (tenant_id, user_id)
524
+ - **Background indexes**: HNSW for vectors, created CONCURRENTLY
525
+ - **GIN indexes**: For JSONB fields (graph_edges, metadata)
526
+
527
+ ### KV_STORE Maintenance
528
+
529
+ - UNLOGGED table = faster but lost on crash
530
+ - Rebuild from primary tables on startup
531
+ - Vacuum regularly to reclaim space
532
+
533
+ ### Memory Usage
534
+
535
+ - Vector indexes can be memory-intensive
536
+ - HNSW parameters: `m=16, ef_construction=64` (tunable)
537
+ - Monitor shared_buffers and work_mem
538
+
539
+ ## Schema Management
540
+
541
+ REM uses a **code-as-source-of-truth** approach. Pydantic models define the schema, and the database is kept in sync via diff-based migrations.
542
+
543
+ ### File Structure
544
+
545
+ ```
546
+ src/rem/sql/
547
+ ├── migrations/
548
+ │ ├── 001_install.sql # Core infrastructure (manual)
549
+ │ └── 002_install_models.sql # Entity tables (auto-generated)
550
+ └── background_indexes.sql # HNSW vector indexes (optional)
551
+ ```
552
+
553
+ **Key principle**: Only two migration files. No incremental `003_`, `004_` files.
554
+
555
+ ### CLI Commands
556
+
557
+ ```bash
558
+ # Apply migrations (installs extensions, core tables, entity tables)
559
+ rem db migrate
560
+
561
+ # Check migration status
562
+ rem db status
563
+
564
+ # Generate schema SQL from models (for remdb development)
565
+ rem db schema generate --models src/rem/models/entities
566
+
567
+ # Validate models for schema generation
568
+ rem db schema validate --models src/rem/models/entities
569
+ ```
570
+
571
+ ### Model Registry
572
+
573
+ Models are discovered via the registry:
574
+
575
+ ```python
576
+ import rem
577
+ from rem.models.core import CoreModel
578
+
579
+ @rem.register_model
580
+ class MyEntity(CoreModel):
581
+ name: str
582
+ description: str # Auto-embeds
583
+ ```
584
+
585
+ ## Using REM as a Library (Downstream Apps)
586
+
587
+ When building an application that **depends on remdb as a package** (e.g., `pip install remdb`),
588
+ there are important differences from developing remdb itself.
589
+
590
+ ### What Works Out of the Box
591
+
592
+ 1. **All core entity tables** - Resources, Messages, Users, Sessions, etc.
593
+ 2. **PostgresService** - Full database access via `get_postgres_service()`
594
+ 3. **Repository pattern** - CRUD operations for core entities
595
+ 4. **Migrations** - `rem db migrate` applies the bundled SQL files
596
+
597
+ ```python
598
+ # In your downstream app (e.g., myapp/main.py)
599
+ from rem.services.postgres import get_postgres_service
600
+ from rem.models.entities import Message, Resource
601
+
602
+ pg = get_postgres_service()
603
+ await pg.connect()
604
+
605
+ # Use core entities - tables already exist
606
+ messages = await pg.query(Message, {"session_id": "abc"})
607
+ ```
608
+
609
+ ### Custom Models in Downstream Apps
610
+
611
+ The `@rem.register_model` decorator registers models in the **runtime registry**, which is useful for:
612
+ - Schema introspection at runtime
613
+ - Future tooling that reads the registry
614
+
615
+ However, **`rem db migrate` only applies SQL files bundled in the remdb package**.
616
+ Custom models from downstream apps do NOT automatically get tables created.
617
+
618
+ **Options for custom model tables:**
619
+
620
+ **Option A: Use core entities with metadata**
621
+
622
+ Store custom data in the `metadata` JSONB field of existing entities:
623
+
624
+ ```python
625
+ resource = Resource(
626
+ name="my-custom-thing",
627
+ content="...",
628
+ metadata={"custom_field": "value", "another": 123}
629
+ )
630
+ ```
631
+
632
+ **Option B: Create tables manually**
633
+
634
+ Write and apply your own SQL:
635
+
636
+ ```sql
637
+ -- myapp/sql/custom_tables.sql
638
+ CREATE TABLE IF NOT EXISTS conversation_summaries (
639
+ id UUID PRIMARY KEY DEFAULT gen_random_uuid(),
640
+ session_ref TEXT NOT NULL,
641
+ summary TEXT NOT NULL,
642
+ -- ... include CoreModel fields for compatibility
643
+ user_id VARCHAR(256),
644
+ created_at TIMESTAMP DEFAULT CURRENT_TIMESTAMP,
645
+ updated_at TIMESTAMP DEFAULT CURRENT_TIMESTAMP
646
+ );
647
+ ```
648
+
649
+ ```bash
650
+ psql $DATABASE_URL -f myapp/sql/custom_tables.sql
651
+ ```
652
+
653
+ **Option C: Contribute upstream**
654
+
655
+ If your model is generally useful, contribute it to remdb so it's included in
656
+ the next release and `rem db migrate` creates it automatically.
657
+
658
+ ### Example: Downstream App Structure
659
+
660
+ ```
661
+ myapp/
662
+ ├── main.py # Import models, start API
663
+ ├── models/
664
+ │ └── __init__.py # @rem.register_model decorators
665
+ ├── sql/
666
+ │ └── custom.sql # Manual migrations for custom tables
667
+ ├── .env # POSTGRES__CONNECTION_STRING, LLM keys
668
+ └── pyproject.toml # dependencies = ["remdb>=0.3.110"]
669
+ ```
670
+
671
+ ```python
672
+ # myapp/models/__init__.py
673
+ import rem
674
+ from rem.models.core import CoreModel
675
+
676
+ @rem.register_model
677
+ class ConversationSummary(CoreModel):
678
+ """Registered for introspection, but table created via sql/custom.sql"""
679
+ session_ref: str
680
+ summary: str
681
+ ```
682
+
683
+ ```python
684
+ # myapp/main.py
685
+ import models # Registers custom models
686
+
687
+ from rem.api.main import app # Use REM's FastAPI app
688
+ # Or build your own app using rem.services
689
+ ```
690
+
691
+ ## Adding Models & Migrations
692
+
693
+ Quick workflow for adding new database models:
694
+
695
+ 1. **Create a model** in `models/__init__.py` (or a submodule):
696
+ ```python
697
+ import rem
698
+ from rem.models.core import CoreModel
699
+
700
+ @rem.register_model
701
+ class MyEntity(CoreModel):
702
+ name: str
703
+ description: str # Auto-embedded (common field name)
704
+ ```
705
+
706
+ 2. **Check for schema drift** - REM auto-detects `./models` directory:
707
+ ```bash
708
+ rem db diff # Show pending changes (additive only)
709
+ rem db diff --strategy full # Include destructive changes
710
+ ```
711
+
712
+ 3. **Generate migration** (optional - for version-controlled SQL):
713
+ ```bash
714
+ rem db diff --generate # Creates numbered .sql file
715
+ ```
716
+
717
+ 4. **Apply changes**:
718
+ ```bash
719
+ rem db migrate # Apply all pending migrations
720
+ ```
721
+
722
+ **Key points:**
723
+ - Models in `./models/` are auto-discovered (must have `__init__.py`)
724
+ - Or set `MODELS__IMPORT_MODULES=myapp.models` for custom paths
725
+ - `CoreModel` provides: `id`, `tenant_id`, `user_id`, `created_at`, `updated_at`, `deleted_at`, `graph_edges`, `metadata`, `tags`
726
+ - Fields named `content`, `description`, `summary`, `text`, `body`, `message`, `notes` get embeddings by default
727
+ - Use `Field(json_schema_extra={"embed": True})` to embed other fields
728
+
729
+ ## Configuration
730
+
731
+ Environment variables:
732
+
733
+ ```bash
734
+ # Database
735
+ POSTGRES__HOST=localhost
736
+ POSTGRES__PORT=5432
737
+ POSTGRES__DATABASE=remdb
738
+ POSTGRES__USER=rem_user
739
+ POSTGRES__PASSWORD=secret
740
+ POSTGRES__POOL_SIZE=20
741
+
742
+ # Embeddings
743
+ EMBEDDING__PROVIDER=openai
744
+ EMBEDDING__MODEL=text-embedding-3-small
745
+ EMBEDDING__DIMENSIONS=1536
746
+ EMBEDDING__BATCH_SIZE=2048
747
+
748
+ # Background indexing
749
+ BACKGROUND_INDEX__ENABLED=true
750
+ BACKGROUND_INDEX__CONCURRENCY=2
751
+ ```
752
+
753
+ ## See Also
754
+
755
+ - [register_type.py](./register_type.py) - Dynamic schema generation
756
+ - [schema_generator.py](./schema_generator.py) - Bulk schema generation
757
+ - [REM Query System](../../models/core/rem_query.py) - Query types and contracts