remdb 0.2.6__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of remdb might be problematic. Click here for more details.

Files changed (187) hide show
  1. rem/__init__.py +2 -0
  2. rem/agentic/README.md +650 -0
  3. rem/agentic/__init__.py +39 -0
  4. rem/agentic/agents/README.md +155 -0
  5. rem/agentic/agents/__init__.py +8 -0
  6. rem/agentic/context.py +148 -0
  7. rem/agentic/context_builder.py +329 -0
  8. rem/agentic/mcp/__init__.py +0 -0
  9. rem/agentic/mcp/tool_wrapper.py +107 -0
  10. rem/agentic/otel/__init__.py +5 -0
  11. rem/agentic/otel/setup.py +151 -0
  12. rem/agentic/providers/phoenix.py +674 -0
  13. rem/agentic/providers/pydantic_ai.py +572 -0
  14. rem/agentic/query.py +117 -0
  15. rem/agentic/query_helper.py +89 -0
  16. rem/agentic/schema.py +396 -0
  17. rem/agentic/serialization.py +245 -0
  18. rem/agentic/tools/__init__.py +5 -0
  19. rem/agentic/tools/rem_tools.py +231 -0
  20. rem/api/README.md +420 -0
  21. rem/api/main.py +324 -0
  22. rem/api/mcp_router/prompts.py +182 -0
  23. rem/api/mcp_router/resources.py +536 -0
  24. rem/api/mcp_router/server.py +213 -0
  25. rem/api/mcp_router/tools.py +584 -0
  26. rem/api/routers/auth.py +229 -0
  27. rem/api/routers/chat/__init__.py +5 -0
  28. rem/api/routers/chat/completions.py +281 -0
  29. rem/api/routers/chat/json_utils.py +76 -0
  30. rem/api/routers/chat/models.py +124 -0
  31. rem/api/routers/chat/streaming.py +185 -0
  32. rem/auth/README.md +258 -0
  33. rem/auth/__init__.py +26 -0
  34. rem/auth/middleware.py +100 -0
  35. rem/auth/providers/__init__.py +13 -0
  36. rem/auth/providers/base.py +376 -0
  37. rem/auth/providers/google.py +163 -0
  38. rem/auth/providers/microsoft.py +237 -0
  39. rem/cli/README.md +455 -0
  40. rem/cli/__init__.py +8 -0
  41. rem/cli/commands/README.md +126 -0
  42. rem/cli/commands/__init__.py +3 -0
  43. rem/cli/commands/ask.py +565 -0
  44. rem/cli/commands/configure.py +423 -0
  45. rem/cli/commands/db.py +493 -0
  46. rem/cli/commands/dreaming.py +324 -0
  47. rem/cli/commands/experiments.py +1124 -0
  48. rem/cli/commands/mcp.py +66 -0
  49. rem/cli/commands/process.py +245 -0
  50. rem/cli/commands/schema.py +183 -0
  51. rem/cli/commands/serve.py +106 -0
  52. rem/cli/dreaming.py +363 -0
  53. rem/cli/main.py +88 -0
  54. rem/config.py +237 -0
  55. rem/mcp_server.py +41 -0
  56. rem/models/core/__init__.py +49 -0
  57. rem/models/core/core_model.py +64 -0
  58. rem/models/core/engram.py +333 -0
  59. rem/models/core/experiment.py +628 -0
  60. rem/models/core/inline_edge.py +132 -0
  61. rem/models/core/rem_query.py +243 -0
  62. rem/models/entities/__init__.py +43 -0
  63. rem/models/entities/file.py +57 -0
  64. rem/models/entities/image_resource.py +88 -0
  65. rem/models/entities/message.py +35 -0
  66. rem/models/entities/moment.py +123 -0
  67. rem/models/entities/ontology.py +191 -0
  68. rem/models/entities/ontology_config.py +131 -0
  69. rem/models/entities/resource.py +95 -0
  70. rem/models/entities/schema.py +87 -0
  71. rem/models/entities/user.py +85 -0
  72. rem/py.typed +0 -0
  73. rem/schemas/README.md +507 -0
  74. rem/schemas/__init__.py +6 -0
  75. rem/schemas/agents/README.md +92 -0
  76. rem/schemas/agents/core/moment-builder.yaml +178 -0
  77. rem/schemas/agents/core/rem-query-agent.yaml +226 -0
  78. rem/schemas/agents/core/resource-affinity-assessor.yaml +99 -0
  79. rem/schemas/agents/core/simple-assistant.yaml +19 -0
  80. rem/schemas/agents/core/user-profile-builder.yaml +163 -0
  81. rem/schemas/agents/examples/contract-analyzer.yaml +317 -0
  82. rem/schemas/agents/examples/contract-extractor.yaml +134 -0
  83. rem/schemas/agents/examples/cv-parser.yaml +263 -0
  84. rem/schemas/agents/examples/hello-world.yaml +37 -0
  85. rem/schemas/agents/examples/query.yaml +54 -0
  86. rem/schemas/agents/examples/simple.yaml +21 -0
  87. rem/schemas/agents/examples/test.yaml +29 -0
  88. rem/schemas/agents/rem.yaml +128 -0
  89. rem/schemas/evaluators/hello-world/default.yaml +77 -0
  90. rem/schemas/evaluators/rem/faithfulness.yaml +219 -0
  91. rem/schemas/evaluators/rem/lookup-correctness.yaml +182 -0
  92. rem/schemas/evaluators/rem/retrieval-precision.yaml +199 -0
  93. rem/schemas/evaluators/rem/retrieval-recall.yaml +211 -0
  94. rem/schemas/evaluators/rem/search-correctness.yaml +192 -0
  95. rem/services/__init__.py +16 -0
  96. rem/services/audio/INTEGRATION.md +308 -0
  97. rem/services/audio/README.md +376 -0
  98. rem/services/audio/__init__.py +15 -0
  99. rem/services/audio/chunker.py +354 -0
  100. rem/services/audio/transcriber.py +259 -0
  101. rem/services/content/README.md +1269 -0
  102. rem/services/content/__init__.py +5 -0
  103. rem/services/content/providers.py +806 -0
  104. rem/services/content/service.py +657 -0
  105. rem/services/dreaming/README.md +230 -0
  106. rem/services/dreaming/__init__.py +53 -0
  107. rem/services/dreaming/affinity_service.py +336 -0
  108. rem/services/dreaming/moment_service.py +264 -0
  109. rem/services/dreaming/ontology_service.py +54 -0
  110. rem/services/dreaming/user_model_service.py +297 -0
  111. rem/services/dreaming/utils.py +39 -0
  112. rem/services/embeddings/__init__.py +11 -0
  113. rem/services/embeddings/api.py +120 -0
  114. rem/services/embeddings/worker.py +421 -0
  115. rem/services/fs/README.md +662 -0
  116. rem/services/fs/__init__.py +62 -0
  117. rem/services/fs/examples.py +206 -0
  118. rem/services/fs/examples_paths.py +204 -0
  119. rem/services/fs/git_provider.py +935 -0
  120. rem/services/fs/local_provider.py +760 -0
  121. rem/services/fs/parsing-hooks-examples.md +172 -0
  122. rem/services/fs/paths.py +276 -0
  123. rem/services/fs/provider.py +460 -0
  124. rem/services/fs/s3_provider.py +1042 -0
  125. rem/services/fs/service.py +186 -0
  126. rem/services/git/README.md +1075 -0
  127. rem/services/git/__init__.py +17 -0
  128. rem/services/git/service.py +469 -0
  129. rem/services/phoenix/EXPERIMENT_DESIGN.md +1146 -0
  130. rem/services/phoenix/README.md +453 -0
  131. rem/services/phoenix/__init__.py +46 -0
  132. rem/services/phoenix/client.py +686 -0
  133. rem/services/phoenix/config.py +88 -0
  134. rem/services/phoenix/prompt_labels.py +477 -0
  135. rem/services/postgres/README.md +575 -0
  136. rem/services/postgres/__init__.py +23 -0
  137. rem/services/postgres/migration_service.py +427 -0
  138. rem/services/postgres/pydantic_to_sqlalchemy.py +232 -0
  139. rem/services/postgres/register_type.py +352 -0
  140. rem/services/postgres/repository.py +337 -0
  141. rem/services/postgres/schema_generator.py +379 -0
  142. rem/services/postgres/service.py +802 -0
  143. rem/services/postgres/sql_builder.py +354 -0
  144. rem/services/rem/README.md +304 -0
  145. rem/services/rem/__init__.py +23 -0
  146. rem/services/rem/exceptions.py +71 -0
  147. rem/services/rem/executor.py +293 -0
  148. rem/services/rem/parser.py +145 -0
  149. rem/services/rem/queries.py +196 -0
  150. rem/services/rem/query.py +371 -0
  151. rem/services/rem/service.py +527 -0
  152. rem/services/session/README.md +374 -0
  153. rem/services/session/__init__.py +6 -0
  154. rem/services/session/compression.py +360 -0
  155. rem/services/session/reload.py +77 -0
  156. rem/settings.py +1235 -0
  157. rem/sql/002_install_models.sql +1068 -0
  158. rem/sql/background_indexes.sql +42 -0
  159. rem/sql/install_models.sql +1038 -0
  160. rem/sql/migrations/001_install.sql +503 -0
  161. rem/sql/migrations/002_install_models.sql +1202 -0
  162. rem/utils/AGENTIC_CHUNKING.md +597 -0
  163. rem/utils/README.md +583 -0
  164. rem/utils/__init__.py +43 -0
  165. rem/utils/agentic_chunking.py +622 -0
  166. rem/utils/batch_ops.py +343 -0
  167. rem/utils/chunking.py +108 -0
  168. rem/utils/clip_embeddings.py +276 -0
  169. rem/utils/dict_utils.py +98 -0
  170. rem/utils/embeddings.py +423 -0
  171. rem/utils/examples/embeddings_example.py +305 -0
  172. rem/utils/examples/sql_types_example.py +202 -0
  173. rem/utils/markdown.py +16 -0
  174. rem/utils/model_helpers.py +236 -0
  175. rem/utils/schema_loader.py +229 -0
  176. rem/utils/sql_types.py +348 -0
  177. rem/utils/user_id.py +81 -0
  178. rem/utils/vision.py +330 -0
  179. rem/workers/README.md +506 -0
  180. rem/workers/__init__.py +5 -0
  181. rem/workers/dreaming.py +502 -0
  182. rem/workers/engram_processor.py +312 -0
  183. rem/workers/sqs_file_processor.py +193 -0
  184. remdb-0.2.6.dist-info/METADATA +1191 -0
  185. remdb-0.2.6.dist-info/RECORD +187 -0
  186. remdb-0.2.6.dist-info/WHEEL +4 -0
  187. remdb-0.2.6.dist-info/entry_points.txt +2 -0
@@ -0,0 +1,802 @@
1
+ """
2
+ PostgresService - CloudNativePG database operations.
3
+
4
+ Provides connection management and query execution for PostgreSQL 18
5
+ with pgvector extension running on CloudNativePG.
6
+
7
+ Key Features:
8
+ - Connection pooling
9
+ - Tenant isolation
10
+ - Vector similarity search
11
+ - JSONB operations for graph edges
12
+ - Transaction management
13
+
14
+ CloudNativePG Integration:
15
+ - Uses PostgreSQL 18 with pgvector extension
16
+ - Extension loaded via ImageVolume pattern (immutable)
17
+ - extension_control_path configured for pgvector
18
+ - Streaming replication for HA
19
+ - Backup to S3 via Barman
20
+
21
+ Performance Considerations:
22
+ - GIN indexes on JSONB fields (related_entities, graph_edges)
23
+ - Vector indexes (IVF/HNSW) for similarity search
24
+ - Tenant-scoped queries for isolation
25
+ - Connection pooling for concurrency
26
+ """
27
+
28
+ from typing import Any, Optional, Type
29
+
30
+ import asyncpg
31
+ from loguru import logger
32
+ from pydantic import BaseModel
33
+
34
+ from ...utils.batch_ops import (
35
+ batch_iterator,
36
+ build_upsert_statement,
37
+ prepare_record_for_upsert,
38
+ validate_record_for_kv_store,
39
+ )
40
+ from ...utils.sql_types import get_sql_type
41
+ from .repository import Repository # Moved from inside get_repository
42
+
43
+
44
+ class PostgresService:
45
+ """
46
+ PostgreSQL database service for REM.
47
+
48
+ Manages connections, queries, and transactions for CloudNativePG
49
+ with PostgreSQL 18 and pgvector extension.
50
+ """
51
+
52
+ def __init__(
53
+ self,
54
+ embedding_worker: Optional[Any] = ..., # Sentinel for "not provided"
55
+ ):
56
+ """
57
+ Initialize PostgreSQL service.
58
+
59
+ Args:
60
+ embedding_worker: Optional EmbeddingWorker for background embedding generation.
61
+ If not provided (default), auto-creates one.
62
+ Pass None to explicitly disable.
63
+ """
64
+ from ...settings import settings
65
+ if not settings.postgres.enabled:
66
+ raise RuntimeError("PostgreSQL is not enabled in the settings.")
67
+
68
+ self.connection_string = settings.postgres.connection_string
69
+ self.pool_size = settings.postgres.pool_size
70
+ self.pool: Optional[asyncpg.Pool] = None
71
+
72
+ # Use global embedding worker singleton
73
+ if embedding_worker is ...:
74
+ from ..embeddings.worker import get_global_embedding_worker
75
+ # Get or create global worker - it lives independently of this service
76
+ self.embedding_worker = get_global_embedding_worker(postgres_service=self)
77
+ else:
78
+ self.embedding_worker = embedding_worker # type: ignore[assignment]
79
+
80
+ async def execute_ddl(self, query: str) -> None:
81
+ """
82
+ Execute SQL DDL query (e.g., CREATE, ALTER, DROP) without returning results.
83
+
84
+ Args:
85
+ query: SQL query string
86
+ """
87
+ self._ensure_pool()
88
+ assert self.pool is not None # Type guard for mypy
89
+
90
+ async with self.pool.acquire() as conn:
91
+ await conn.execute(query)
92
+
93
+ async def execute_script(self, sql_script: str) -> None:
94
+ """
95
+ Execute a multi-statement SQL script.
96
+
97
+ This method properly handles SQL files with multiple statements separated
98
+ by semicolons, including complex scripts with DO blocks, CREATE statements,
99
+ and comments.
100
+
101
+ Args:
102
+ sql_script: Complete SQL script content
103
+ """
104
+ self._ensure_pool()
105
+ assert self.pool is not None # Type guard for mypy
106
+
107
+ # Split script into individual statements
108
+ # This is a simplified approach - for production consider using sqlparse
109
+ statements = []
110
+ current_statement = []
111
+ in_do_block = False
112
+
113
+ for line in sql_script.split('\n'):
114
+ stripped = line.strip()
115
+
116
+ # Skip empty lines and comments
117
+ if not stripped or stripped.startswith('--'):
118
+ continue
119
+
120
+ # Track DO blocks which can contain semicolons
121
+ if stripped.upper().startswith('DO $$') or stripped.upper().startswith('DO $'):
122
+ in_do_block = True
123
+
124
+ current_statement.append(line)
125
+
126
+ # Check for statement end
127
+ if stripped.endswith('$$;') or stripped.endswith('$;'):
128
+ in_do_block = False
129
+ statements.append('\n'.join(current_statement))
130
+ current_statement = []
131
+ elif stripped.endswith(';') and not in_do_block:
132
+ statements.append('\n'.join(current_statement))
133
+ current_statement = []
134
+
135
+ # Add any remaining statement
136
+ if current_statement:
137
+ stmt = '\n'.join(current_statement).strip()
138
+ if stmt:
139
+ statements.append(stmt)
140
+
141
+ # Execute each statement
142
+ async with self.pool.acquire() as conn:
143
+ for statement in statements:
144
+ stmt = statement.strip()
145
+ if stmt:
146
+ await conn.execute(stmt)
147
+
148
+ def _ensure_pool(self) -> None:
149
+ """
150
+ Ensure database connection pool is established.
151
+
152
+ Raises:
153
+ RuntimeError: If pool is not connected
154
+
155
+ Usage:
156
+ Internal helper used by all query methods to validate connection state.
157
+ """
158
+ if not self.pool:
159
+ raise RuntimeError("PostgreSQL pool not connected. Call connect() first.")
160
+
161
+ def get_repository(self, model_class: Type[BaseModel], table_name: str) -> Repository[BaseModel]:
162
+ """
163
+ Get a repository instance for a given model and table.
164
+
165
+ Args:
166
+ model_class: The Pydantic model class for the repository.
167
+ table_name: The name of the database table.
168
+
169
+ Returns:
170
+ An instance of the Repository class.
171
+ """
172
+ return Repository(model_class=model_class, table_name=table_name, db=self)
173
+
174
+ async def _init_connection(self, conn: asyncpg.Connection) -> None:
175
+ """
176
+ Initialize connection with custom type codecs.
177
+
178
+ Sets up automatic JSONB conversion to/from Python objects.
179
+ """
180
+ import json
181
+
182
+ # Set up JSONB codec for automatic conversion
183
+ await conn.set_type_codec(
184
+ 'jsonb',
185
+ encoder=json.dumps,
186
+ decoder=json.loads,
187
+ schema='pg_catalog',
188
+ format='text',
189
+ )
190
+
191
+ async def connect(self) -> None:
192
+ """Establish database connection pool."""
193
+ logger.info(f"Connecting to PostgreSQL with pool size {self.pool_size}")
194
+ self.pool = await asyncpg.create_pool(
195
+ self.connection_string,
196
+ min_size=1,
197
+ max_size=self.pool_size,
198
+ init=self._init_connection, # Configure JSONB codec on each connection
199
+ )
200
+ logger.info("PostgreSQL connection pool established")
201
+
202
+ # Start embedding worker if available
203
+ if self.embedding_worker and hasattr(self.embedding_worker, "start"):
204
+ await self.embedding_worker.start()
205
+ logger.info("Embedding worker started")
206
+
207
+ async def disconnect(self) -> None:
208
+ """Close database connection pool."""
209
+ # DO NOT stop the global embedding worker here!
210
+ # It's shared across multiple service instances and processes background tasks
211
+ # The worker will be stopped explicitly when the application shuts down
212
+
213
+ if self.pool:
214
+ logger.info("Closing PostgreSQL connection pool")
215
+ await self.pool.close()
216
+ self.pool = None
217
+ logger.info("PostgreSQL connection pool closed")
218
+
219
+ async def execute(
220
+ self,
221
+ query: str,
222
+ params: Optional[tuple] = None,
223
+ ) -> list[dict[str, Any]]:
224
+ """
225
+ Execute SQL query and return results.
226
+
227
+ Args:
228
+ query: SQL query string
229
+ params: Query parameters
230
+
231
+ Returns:
232
+ List of result rows as dicts
233
+ """
234
+ self._ensure_pool()
235
+ assert self.pool is not None # Type guard for mypy
236
+
237
+ async with self.pool.acquire() as conn:
238
+ if params:
239
+ rows = await conn.fetch(query, *params)
240
+ else:
241
+ rows = await conn.fetch(query)
242
+
243
+ return [dict(row) for row in rows]
244
+
245
+ async def fetch(self, query: str, *params) -> list[asyncpg.Record]:
246
+ """
247
+ Fetch multiple rows from database.
248
+
249
+ Args:
250
+ query: SQL query string
251
+ *params: Query parameters
252
+
253
+ Returns:
254
+ List of asyncpg.Record objects
255
+ """
256
+ self._ensure_pool()
257
+ assert self.pool is not None # Type guard for mypy
258
+
259
+ async with self.pool.acquire() as conn:
260
+ return await conn.fetch(query, *params)
261
+
262
+ async def fetchrow(self, query: str, *params) -> Optional[asyncpg.Record]:
263
+ """
264
+ Fetch single row from database.
265
+
266
+ Args:
267
+ query: SQL query string
268
+ *params: Query parameters
269
+
270
+ Returns:
271
+ asyncpg.Record or None if no rows found
272
+ """
273
+ self._ensure_pool()
274
+ assert self.pool is not None # Type guard for mypy
275
+
276
+ async with self.pool.acquire() as conn:
277
+ return await conn.fetchrow(query, *params)
278
+
279
+ async def fetchval(self, query: str, *params) -> Any:
280
+ """
281
+ Fetch single value from database.
282
+
283
+ Args:
284
+ query: SQL query string
285
+ *params: Query parameters
286
+
287
+ Returns:
288
+ Single value or None if no rows found
289
+ """
290
+ self._ensure_pool()
291
+ assert self.pool is not None # Type guard for mypy
292
+
293
+ async with self.pool.acquire() as conn:
294
+ return await conn.fetchval(query, *params)
295
+
296
+ def transaction(self):
297
+ """
298
+ Create a database transaction context manager.
299
+
300
+ Returns:
301
+ Transaction object with bound connection for executing queries within a transaction
302
+
303
+ Usage:
304
+ async with postgres_service.transaction() as txn:
305
+ await txn.execute("INSERT ...")
306
+ await txn.execute("UPDATE ...")
307
+
308
+ Note:
309
+ The transaction object has the same query methods as PostgresService
310
+ (execute, fetch, fetchrow, fetchval) but executes them on a single
311
+ connection within a transaction.
312
+ """
313
+ self._ensure_pool()
314
+ assert self.pool is not None # Type guard for mypy
315
+
316
+ from contextlib import asynccontextmanager
317
+
318
+ @asynccontextmanager
319
+ async def _transaction_context():
320
+ if not self.pool:
321
+ raise RuntimeError("Database pool not initialized")
322
+ async with self.pool.acquire() as conn:
323
+ async with conn.transaction():
324
+ # Yield a transaction wrapper that provides query methods
325
+ yield _TransactionContext(conn)
326
+
327
+ return _transaction_context()
328
+
329
+ async def execute_many(
330
+ self,
331
+ query: str,
332
+ params_list: list[tuple],
333
+ ) -> None:
334
+ """
335
+ Execute SQL query with multiple parameter sets.
336
+
337
+ Args:
338
+ query: SQL query string
339
+ params_list: List of parameter tuples
340
+ """
341
+ self._ensure_pool()
342
+ assert self.pool is not None # Type guard for mypy
343
+
344
+ async with self.pool.acquire() as conn:
345
+ await conn.executemany(query, params_list)
346
+
347
+ async def upsert(
348
+ self,
349
+ record: BaseModel,
350
+ model: Type[BaseModel],
351
+ table_name: str,
352
+ entity_key_field: str = "name",
353
+ embeddable_fields: list[str] | None = None,
354
+ generate_embeddings: bool = False,
355
+ ) -> BaseModel:
356
+ """
357
+ Upsert a single record.
358
+
359
+ Convenience wrapper around batch_upsert for single records.
360
+
361
+ Args:
362
+ record: Pydantic model instance
363
+ model: Pydantic model class
364
+ table_name: Database table name
365
+ entity_key_field: Field name to use as KV store key (default: "name")
366
+ embeddable_fields: List of fields to generate embeddings for
367
+ generate_embeddings: Whether to generate embeddings (default: False)
368
+
369
+ Returns:
370
+ The upserted record
371
+
372
+ Example:
373
+ >>> from rem.models.entities import Message
374
+ >>> message = Message(content="Hello", session_id="abc", tenant_id="acme")
375
+ >>> result = await pg.upsert(
376
+ ... record=message,
377
+ ... model=Message,
378
+ ... table_name="messages"
379
+ ... )
380
+ """
381
+ await self.batch_upsert(
382
+ records=[record],
383
+ model=model,
384
+ table_name=table_name,
385
+ entity_key_field=entity_key_field,
386
+ embeddable_fields=embeddable_fields,
387
+ generate_embeddings=generate_embeddings,
388
+ )
389
+ return record
390
+
391
+ async def upsert_entity(
392
+ self,
393
+ entity: BaseModel,
394
+ entity_key: str,
395
+ tenant_id: str,
396
+ embeddable_fields: list[str] | None = None,
397
+ generate_embeddings: bool = False,
398
+ ) -> BaseModel:
399
+ """
400
+ Upsert an entity using explicit entity_key.
401
+
402
+ This is a convenience method that auto-detects table name from model.
403
+
404
+ Args:
405
+ entity: Pydantic model instance
406
+ entity_key: Value to use for KV store key (not field name)
407
+ tenant_id: Tenant identifier
408
+ embeddable_fields: List of fields to generate embeddings for
409
+ generate_embeddings: Whether to generate embeddings (default: False)
410
+
411
+ Returns:
412
+ The upserted entity
413
+
414
+ Example:
415
+ >>> from rem.models.entities import Ontology
416
+ >>> ontology = Ontology(name="cv-parser", tenant_id="acme", ...)
417
+ >>> result = await pg.upsert_entity(
418
+ ... entity=ontology,
419
+ ... entity_key=ontology.name,
420
+ ... tenant_id=ontology.tenant_id
421
+ ... )
422
+ """
423
+ # Auto-detect table name from model class
424
+ model_class = type(entity)
425
+ table_name = f"{model_class.__name__.lower()}s"
426
+
427
+ await self.batch_upsert(
428
+ records=[entity],
429
+ model=model_class,
430
+ table_name=table_name,
431
+ entity_key_field="name", # Default field name for entity key
432
+ embeddable_fields=embeddable_fields,
433
+ generate_embeddings=generate_embeddings,
434
+ )
435
+ return entity
436
+
437
+ async def batch_upsert(
438
+ self,
439
+ records: list[BaseModel | dict],
440
+ model: Type[BaseModel],
441
+ table_name: str,
442
+ entity_key_field: str = "name",
443
+ embeddable_fields: list[str] | None = None,
444
+ batch_size: int = 100,
445
+ generate_embeddings: bool = False,
446
+ ) -> dict[str, Any]:
447
+ """
448
+ Batch upsert records with KV store population and optional embedding generation.
449
+
450
+ KV Store Integration:
451
+ - Triggers automatically populate kv_store on INSERT/UPDATE
452
+ - Unique on (tenant_id, entity_key) where entity_key comes from entity_key_field
453
+ - User can store same key in multiple tables (different source_table_id)
454
+ - Supports user_id scoping (user_id can be NULL for shared entities)
455
+
456
+ Embedding Generation:
457
+ - Queues embedding tasks for background processing via EmbeddingWorker
458
+ - Upserts to embeddings_<table> with unique (entity_id, field_name, provider)
459
+ - Returns immediately without waiting for embeddings (async processing)
460
+
461
+ Args:
462
+ records: List of Pydantic model instances or dicts (will be validated against model)
463
+ model: Pydantic model class
464
+ table_name: Database table name
465
+ entity_key_field: Field name to use as KV store key (default: "name")
466
+ embeddable_fields: List of fields to generate embeddings for (auto-detected if None)
467
+ batch_size: Number of records per batch
468
+ generate_embeddings: Whether to generate embeddings (default: False)
469
+
470
+ Returns:
471
+ Dict with:
472
+ - upserted_count: Number of records upserted
473
+ - kv_store_populated: Number of KV store entries (via triggers)
474
+ - embeddings_generated: Number of embeddings generated
475
+ - batches_processed: Number of batches processed
476
+
477
+ Example:
478
+ >>> from rem.models.entities import Resource
479
+ >>> resources = [Resource(name="doc1", content="...", tenant_id="acme")]
480
+ >>> # Or with dicts
481
+ >>> resources = [{"name": "doc1", "content": "...", "tenant_id": "acme"}]
482
+ >>> result = await pg.batch_upsert(
483
+ ... records=resources,
484
+ ... model=Resource,
485
+ ... table_name="resources",
486
+ ... entity_key_field="name",
487
+ ... generate_embeddings=True
488
+ ... )
489
+
490
+ Design Notes:
491
+ - Delegates SQL generation to utils.sql_types
492
+ - Uses utils.batch_ops for batching and preparation
493
+ - KV store population happens via database triggers (no explicit code)
494
+ - Embedding generation is batched for efficiency
495
+ """
496
+ if not records:
497
+ logger.warning("No records to upsert")
498
+ return {
499
+ "upserted_count": 0,
500
+ "kv_store_populated": 0,
501
+ "embeddings_generated": 0,
502
+ "batches_processed": 0,
503
+ "ids": [],
504
+ }
505
+
506
+ logger.info(
507
+ f"Batch upserting {len(records)} records to {table_name} "
508
+ f"(entity_key: {entity_key_field}, embeddings: {generate_embeddings})"
509
+ )
510
+
511
+ # Convert dict records to Pydantic models
512
+ pydantic_records = []
513
+ for record in records:
514
+ if isinstance(record, dict):
515
+ pydantic_records.append(model.model_validate(record))
516
+ else:
517
+ pydantic_records.append(record)
518
+
519
+ # Validate records for KV store requirements
520
+ for record in pydantic_records:
521
+ valid, error = validate_record_for_kv_store(record, entity_key_field)
522
+ if not valid:
523
+ logger.warning(f"Record validation failed: {error} - {record}")
524
+
525
+ # Prepare records (using pydantic_records after conversion)
526
+ field_names = list(model.model_fields.keys())
527
+ prepared_records = [
528
+ prepare_record_for_upsert(r, model, entity_key_field) for r in pydantic_records
529
+ ]
530
+
531
+ # Build upsert statement (use actual field names from prepared records)
532
+ if prepared_records:
533
+ actual_fields = list(prepared_records[0].keys())
534
+ upsert_sql = build_upsert_statement(
535
+ table_name, actual_fields, conflict_column="id"
536
+ )
537
+ else:
538
+ logger.warning("No prepared records to upsert")
539
+ return {
540
+ "upserted_count": 0,
541
+ "kv_store_populated": 0,
542
+ "embeddings_generated": 0,
543
+ "batches_processed": 0,
544
+ "ids": [],
545
+ }
546
+
547
+ # Process in batches
548
+ total_upserted = 0
549
+ total_embeddings = 0
550
+ batch_count = 0
551
+ upserted_ids = [] # Track IDs of upserted records
552
+
553
+ self._ensure_pool()
554
+ assert self.pool is not None # Type guard for mypy
555
+
556
+ for batch in batch_iterator(prepared_records, batch_size):
557
+ batch_count += 1
558
+ logger.debug(f"Processing batch {batch_count} with {len(batch)} records")
559
+
560
+ # Execute batch upsert
561
+ async with self.pool.acquire() as conn:
562
+ for record in batch:
563
+ # Extract values in the same order as actual_fields
564
+ values = tuple(record.get(field) for field in actual_fields)
565
+
566
+ try:
567
+ await conn.execute(upsert_sql, *values)
568
+ total_upserted += 1
569
+ # Track the ID
570
+ if "id" in record:
571
+ upserted_ids.append(record["id"])
572
+ except Exception as e:
573
+ logger.error(f"Failed to upsert record: {e}")
574
+ logger.debug(f"Record: {record}")
575
+ logger.debug(f"SQL: {upsert_sql}")
576
+ logger.debug(f"Values: {values}")
577
+ raise
578
+
579
+ # KV store population happens automatically via triggers
580
+ # No explicit code needed - triggers handle it
581
+
582
+ # Queue embedding tasks for background processing
583
+ if generate_embeddings and embeddable_fields and self.embedding_worker:
584
+ for record_dict in batch:
585
+ entity_id = record_dict.get("id")
586
+ if not entity_id:
587
+ continue
588
+
589
+ for field_name in embeddable_fields:
590
+ content = record_dict.get(field_name)
591
+ if not content or not isinstance(content, str):
592
+ continue
593
+
594
+ # Queue embedding task (non-blocking)
595
+ from ..embeddings import EmbeddingTask
596
+
597
+ from ...settings import settings
598
+
599
+ task = EmbeddingTask(
600
+ task_id=f"{entity_id}:{field_name}",
601
+ entity_id=str(entity_id),
602
+ table_name=table_name,
603
+ field_name=field_name,
604
+ content=content,
605
+ provider=settings.llm.embedding_provider,
606
+ model=settings.llm.embedding_model,
607
+ )
608
+
609
+ await self.embedding_worker.queue_task(task)
610
+ total_embeddings += 1
611
+
612
+ logger.debug(
613
+ f"Queued {total_embeddings} embedding tasks for background processing"
614
+ )
615
+
616
+ logger.info(
617
+ f"Batch upsert complete: {total_upserted} records, "
618
+ f"{total_embeddings} embeddings, {batch_count} batches"
619
+ )
620
+
621
+ return {
622
+ "upserted_count": total_upserted,
623
+ "kv_store_populated": total_upserted, # Triggers populate 1:1
624
+ "embeddings_generated": total_embeddings,
625
+ "batches_processed": batch_count,
626
+ "ids": upserted_ids, # List of IDs for upserted records
627
+ }
628
+
629
+ async def vector_search(
630
+ self,
631
+ table_name: str,
632
+ embedding: list[float],
633
+ limit: int = 10,
634
+ min_similarity: float = 0.7,
635
+ tenant_id: Optional[str] = None,
636
+ ) -> list[dict[str, Any]]:
637
+ """
638
+ Perform vector similarity search using pgvector.
639
+
640
+ Args:
641
+ table_name: Table to search (resources, moments, etc.)
642
+ embedding: Query embedding vector
643
+ limit: Maximum results
644
+ min_similarity: Minimum cosine similarity threshold
645
+ tenant_id: Optional tenant filter
646
+
647
+ Returns:
648
+ List of similar entities with similarity scores
649
+
650
+ Note:
651
+ Use rem_search() SQL function for vector search instead.
652
+ """
653
+ raise NotImplementedError(
654
+ "Use REMQueryService.execute('SEARCH ...') for vector similarity search"
655
+ )
656
+
657
+ async def jsonb_query(
658
+ self,
659
+ table_name: str,
660
+ jsonb_field: str,
661
+ query_path: str,
662
+ tenant_id: Optional[str] = None,
663
+ ) -> list[dict[str, Any]]:
664
+ """
665
+ Query JSONB field with path expression.
666
+
667
+ Args:
668
+ table_name: Table to query
669
+ jsonb_field: JSONB column name
670
+ query_path: JSONB path query
671
+ tenant_id: Optional tenant filter
672
+
673
+ Returns:
674
+ Matching rows
675
+ """
676
+ raise NotImplementedError("JSONB path queries not yet implemented")
677
+
678
+ async def create_resource(self, resource: dict[str, Any]) -> str:
679
+ """
680
+ Create new resource in database.
681
+
682
+ Args:
683
+ resource: Resource data dict
684
+
685
+ Returns:
686
+ Created resource ID
687
+
688
+ Note:
689
+ Use batch_upsert() method for creating resources.
690
+ """
691
+ raise NotImplementedError("Use batch_upsert() for creating resources")
692
+
693
+ async def create_moment(self, moment: dict[str, Any]) -> str:
694
+ """
695
+ Create new moment in database.
696
+
697
+ Args:
698
+ moment: Moment data dict
699
+
700
+ Returns:
701
+ Created moment ID
702
+
703
+ Note:
704
+ Use batch_upsert() method for creating moments.
705
+ """
706
+ raise NotImplementedError("Use batch_upsert() for creating moments")
707
+
708
+ async def update_graph_edges(
709
+ self,
710
+ entity_id: str,
711
+ edges: list[dict[str, Any]],
712
+ merge: bool = True,
713
+ ) -> None:
714
+ """
715
+ Update graph edges for an entity.
716
+
717
+ Args:
718
+ entity_id: Entity UUID
719
+ edges: List of InlineEdge dicts
720
+ merge: If True, merge with existing edges; if False, replace
721
+ """
722
+ raise NotImplementedError("Graph edge updates not yet implemented")
723
+
724
+
725
+ class _TransactionContext:
726
+ """
727
+ Transaction context with bound connection.
728
+
729
+ Provides the same query interface as PostgresService but executes
730
+ all queries on a single connection within a transaction.
731
+
732
+ This is safer than method swapping and provides explicit transaction scope.
733
+ """
734
+
735
+ def __init__(self, conn: asyncpg.Connection):
736
+ """
737
+ Initialize transaction context.
738
+
739
+ Args:
740
+ conn: Database connection bound to this transaction
741
+ """
742
+ self.conn = conn
743
+
744
+ async def execute(
745
+ self,
746
+ query: str,
747
+ params: Optional[tuple] = None,
748
+ ) -> list[dict[str, Any]]:
749
+ """
750
+ Execute SQL query within transaction.
751
+
752
+ Args:
753
+ query: SQL query string
754
+ params: Query parameters
755
+
756
+ Returns:
757
+ List of result rows as dicts
758
+ """
759
+ if params:
760
+ rows = await self.conn.fetch(query, *params)
761
+ else:
762
+ rows = await self.conn.fetch(query)
763
+ return [dict(row) for row in rows]
764
+
765
+ async def fetch(self, query: str, *params) -> list[asyncpg.Record]:
766
+ """
767
+ Fetch multiple rows within transaction.
768
+
769
+ Args:
770
+ query: SQL query string
771
+ *params: Query parameters
772
+
773
+ Returns:
774
+ List of asyncpg.Record objects
775
+ """
776
+ return await self.conn.fetch(query, *params)
777
+
778
+ async def fetchrow(self, query: str, *params) -> Optional[asyncpg.Record]:
779
+ """
780
+ Fetch single row within transaction.
781
+
782
+ Args:
783
+ query: SQL query string
784
+ *params: Query parameters
785
+
786
+ Returns:
787
+ asyncpg.Record or None if no rows found
788
+ """
789
+ return await self.conn.fetchrow(query, *params)
790
+
791
+ async def fetchval(self, query: str, *params) -> Any:
792
+ """
793
+ Fetch single value within transaction.
794
+
795
+ Args:
796
+ query: SQL query string
797
+ *params: Query parameters
798
+
799
+ Returns:
800
+ Single value or None if no rows found
801
+ """
802
+ return await self.conn.fetchval(query, *params)