remdb 0.3.7__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (187) hide show
  1. rem/__init__.py +2 -0
  2. rem/agentic/README.md +650 -0
  3. rem/agentic/__init__.py +39 -0
  4. rem/agentic/agents/README.md +155 -0
  5. rem/agentic/agents/__init__.py +8 -0
  6. rem/agentic/context.py +148 -0
  7. rem/agentic/context_builder.py +329 -0
  8. rem/agentic/mcp/__init__.py +0 -0
  9. rem/agentic/mcp/tool_wrapper.py +107 -0
  10. rem/agentic/otel/__init__.py +5 -0
  11. rem/agentic/otel/setup.py +151 -0
  12. rem/agentic/providers/phoenix.py +674 -0
  13. rem/agentic/providers/pydantic_ai.py +572 -0
  14. rem/agentic/query.py +117 -0
  15. rem/agentic/query_helper.py +89 -0
  16. rem/agentic/schema.py +396 -0
  17. rem/agentic/serialization.py +245 -0
  18. rem/agentic/tools/__init__.py +5 -0
  19. rem/agentic/tools/rem_tools.py +231 -0
  20. rem/api/README.md +420 -0
  21. rem/api/main.py +324 -0
  22. rem/api/mcp_router/prompts.py +182 -0
  23. rem/api/mcp_router/resources.py +536 -0
  24. rem/api/mcp_router/server.py +213 -0
  25. rem/api/mcp_router/tools.py +584 -0
  26. rem/api/routers/auth.py +229 -0
  27. rem/api/routers/chat/__init__.py +5 -0
  28. rem/api/routers/chat/completions.py +281 -0
  29. rem/api/routers/chat/json_utils.py +76 -0
  30. rem/api/routers/chat/models.py +124 -0
  31. rem/api/routers/chat/streaming.py +185 -0
  32. rem/auth/README.md +258 -0
  33. rem/auth/__init__.py +26 -0
  34. rem/auth/middleware.py +100 -0
  35. rem/auth/providers/__init__.py +13 -0
  36. rem/auth/providers/base.py +376 -0
  37. rem/auth/providers/google.py +163 -0
  38. rem/auth/providers/microsoft.py +237 -0
  39. rem/cli/README.md +455 -0
  40. rem/cli/__init__.py +8 -0
  41. rem/cli/commands/README.md +126 -0
  42. rem/cli/commands/__init__.py +3 -0
  43. rem/cli/commands/ask.py +566 -0
  44. rem/cli/commands/configure.py +497 -0
  45. rem/cli/commands/db.py +493 -0
  46. rem/cli/commands/dreaming.py +324 -0
  47. rem/cli/commands/experiments.py +1302 -0
  48. rem/cli/commands/mcp.py +66 -0
  49. rem/cli/commands/process.py +245 -0
  50. rem/cli/commands/schema.py +183 -0
  51. rem/cli/commands/serve.py +106 -0
  52. rem/cli/dreaming.py +363 -0
  53. rem/cli/main.py +96 -0
  54. rem/config.py +237 -0
  55. rem/mcp_server.py +41 -0
  56. rem/models/core/__init__.py +49 -0
  57. rem/models/core/core_model.py +64 -0
  58. rem/models/core/engram.py +333 -0
  59. rem/models/core/experiment.py +628 -0
  60. rem/models/core/inline_edge.py +132 -0
  61. rem/models/core/rem_query.py +243 -0
  62. rem/models/entities/__init__.py +43 -0
  63. rem/models/entities/file.py +57 -0
  64. rem/models/entities/image_resource.py +88 -0
  65. rem/models/entities/message.py +35 -0
  66. rem/models/entities/moment.py +123 -0
  67. rem/models/entities/ontology.py +191 -0
  68. rem/models/entities/ontology_config.py +131 -0
  69. rem/models/entities/resource.py +95 -0
  70. rem/models/entities/schema.py +87 -0
  71. rem/models/entities/user.py +85 -0
  72. rem/py.typed +0 -0
  73. rem/schemas/README.md +507 -0
  74. rem/schemas/__init__.py +6 -0
  75. rem/schemas/agents/README.md +92 -0
  76. rem/schemas/agents/core/moment-builder.yaml +178 -0
  77. rem/schemas/agents/core/rem-query-agent.yaml +226 -0
  78. rem/schemas/agents/core/resource-affinity-assessor.yaml +99 -0
  79. rem/schemas/agents/core/simple-assistant.yaml +19 -0
  80. rem/schemas/agents/core/user-profile-builder.yaml +163 -0
  81. rem/schemas/agents/examples/contract-analyzer.yaml +317 -0
  82. rem/schemas/agents/examples/contract-extractor.yaml +134 -0
  83. rem/schemas/agents/examples/cv-parser.yaml +263 -0
  84. rem/schemas/agents/examples/hello-world.yaml +37 -0
  85. rem/schemas/agents/examples/query.yaml +54 -0
  86. rem/schemas/agents/examples/simple.yaml +21 -0
  87. rem/schemas/agents/examples/test.yaml +29 -0
  88. rem/schemas/agents/rem.yaml +128 -0
  89. rem/schemas/evaluators/hello-world/default.yaml +77 -0
  90. rem/schemas/evaluators/rem/faithfulness.yaml +219 -0
  91. rem/schemas/evaluators/rem/lookup-correctness.yaml +182 -0
  92. rem/schemas/evaluators/rem/retrieval-precision.yaml +199 -0
  93. rem/schemas/evaluators/rem/retrieval-recall.yaml +211 -0
  94. rem/schemas/evaluators/rem/search-correctness.yaml +192 -0
  95. rem/services/__init__.py +16 -0
  96. rem/services/audio/INTEGRATION.md +308 -0
  97. rem/services/audio/README.md +376 -0
  98. rem/services/audio/__init__.py +15 -0
  99. rem/services/audio/chunker.py +354 -0
  100. rem/services/audio/transcriber.py +259 -0
  101. rem/services/content/README.md +1269 -0
  102. rem/services/content/__init__.py +5 -0
  103. rem/services/content/providers.py +801 -0
  104. rem/services/content/service.py +676 -0
  105. rem/services/dreaming/README.md +230 -0
  106. rem/services/dreaming/__init__.py +53 -0
  107. rem/services/dreaming/affinity_service.py +336 -0
  108. rem/services/dreaming/moment_service.py +264 -0
  109. rem/services/dreaming/ontology_service.py +54 -0
  110. rem/services/dreaming/user_model_service.py +297 -0
  111. rem/services/dreaming/utils.py +39 -0
  112. rem/services/embeddings/__init__.py +11 -0
  113. rem/services/embeddings/api.py +120 -0
  114. rem/services/embeddings/worker.py +421 -0
  115. rem/services/fs/README.md +662 -0
  116. rem/services/fs/__init__.py +62 -0
  117. rem/services/fs/examples.py +206 -0
  118. rem/services/fs/examples_paths.py +204 -0
  119. rem/services/fs/git_provider.py +935 -0
  120. rem/services/fs/local_provider.py +760 -0
  121. rem/services/fs/parsing-hooks-examples.md +172 -0
  122. rem/services/fs/paths.py +276 -0
  123. rem/services/fs/provider.py +460 -0
  124. rem/services/fs/s3_provider.py +1042 -0
  125. rem/services/fs/service.py +186 -0
  126. rem/services/git/README.md +1075 -0
  127. rem/services/git/__init__.py +17 -0
  128. rem/services/git/service.py +469 -0
  129. rem/services/phoenix/EXPERIMENT_DESIGN.md +1146 -0
  130. rem/services/phoenix/README.md +453 -0
  131. rem/services/phoenix/__init__.py +46 -0
  132. rem/services/phoenix/client.py +686 -0
  133. rem/services/phoenix/config.py +88 -0
  134. rem/services/phoenix/prompt_labels.py +477 -0
  135. rem/services/postgres/README.md +575 -0
  136. rem/services/postgres/__init__.py +23 -0
  137. rem/services/postgres/migration_service.py +427 -0
  138. rem/services/postgres/pydantic_to_sqlalchemy.py +232 -0
  139. rem/services/postgres/register_type.py +352 -0
  140. rem/services/postgres/repository.py +337 -0
  141. rem/services/postgres/schema_generator.py +379 -0
  142. rem/services/postgres/service.py +802 -0
  143. rem/services/postgres/sql_builder.py +354 -0
  144. rem/services/rem/README.md +304 -0
  145. rem/services/rem/__init__.py +23 -0
  146. rem/services/rem/exceptions.py +71 -0
  147. rem/services/rem/executor.py +293 -0
  148. rem/services/rem/parser.py +145 -0
  149. rem/services/rem/queries.py +196 -0
  150. rem/services/rem/query.py +371 -0
  151. rem/services/rem/service.py +527 -0
  152. rem/services/session/README.md +374 -0
  153. rem/services/session/__init__.py +6 -0
  154. rem/services/session/compression.py +360 -0
  155. rem/services/session/reload.py +77 -0
  156. rem/settings.py +1235 -0
  157. rem/sql/002_install_models.sql +1068 -0
  158. rem/sql/background_indexes.sql +42 -0
  159. rem/sql/install_models.sql +1038 -0
  160. rem/sql/migrations/001_install.sql +503 -0
  161. rem/sql/migrations/002_install_models.sql +1202 -0
  162. rem/utils/AGENTIC_CHUNKING.md +597 -0
  163. rem/utils/README.md +583 -0
  164. rem/utils/__init__.py +43 -0
  165. rem/utils/agentic_chunking.py +622 -0
  166. rem/utils/batch_ops.py +343 -0
  167. rem/utils/chunking.py +108 -0
  168. rem/utils/clip_embeddings.py +276 -0
  169. rem/utils/dict_utils.py +98 -0
  170. rem/utils/embeddings.py +423 -0
  171. rem/utils/examples/embeddings_example.py +305 -0
  172. rem/utils/examples/sql_types_example.py +202 -0
  173. rem/utils/markdown.py +16 -0
  174. rem/utils/model_helpers.py +236 -0
  175. rem/utils/schema_loader.py +336 -0
  176. rem/utils/sql_types.py +348 -0
  177. rem/utils/user_id.py +81 -0
  178. rem/utils/vision.py +330 -0
  179. rem/workers/README.md +506 -0
  180. rem/workers/__init__.py +5 -0
  181. rem/workers/dreaming.py +502 -0
  182. rem/workers/engram_processor.py +312 -0
  183. rem/workers/sqs_file_processor.py +193 -0
  184. remdb-0.3.7.dist-info/METADATA +1473 -0
  185. remdb-0.3.7.dist-info/RECORD +187 -0
  186. remdb-0.3.7.dist-info/WHEEL +4 -0
  187. remdb-0.3.7.dist-info/entry_points.txt +2 -0
@@ -0,0 +1,352 @@
1
+ """
2
+ Dynamic table and embeddings schema generator from Pydantic models.
3
+
4
+ Generates:
5
+ 1. Primary table for entity storage
6
+ 2. embeddings_<table> for vector embeddings (one row per field per provider)
7
+ 3. Registers entity in KV_STORE cache
8
+ 4. Background index creation for performance
9
+
10
+ Design Patterns:
11
+ - Fields marked with json_schema_extra={\"embed\": True} get embeddings
12
+ - Content fields (TextField, description, etc.) embed by default
13
+ - Multiple embedding providers supported (OpenAI, Cohere, etc.)
14
+ - UNLOGGED KV_STORE for O(1) lookups
15
+ - Background index creation to avoid blocking writes
16
+ """
17
+
18
+ from typing import Any, Type
19
+
20
+ from loguru import logger
21
+ from pydantic import BaseModel
22
+
23
+ from ...utils.sql_types import get_column_definition
24
+
25
+ # Embedding configuration
26
+ DEFAULT_EMBEDDING_PROVIDER = "openai"
27
+ DEFAULT_EMBEDDING_MODEL = "text-embedding-3-small"
28
+ DEFAULT_EMBEDDING_DIMENSIONS = 1536
29
+
30
+ # Fields that embed by default (if not explicitly disabled)
31
+ DEFAULT_EMBED_FIELD_NAMES = {
32
+ "content",
33
+ "description",
34
+ "summary",
35
+ "text",
36
+ "body",
37
+ "message",
38
+ "notes",
39
+ }
40
+
41
+
42
+ def should_embed_field(field_name: str, field_info: Any) -> bool:
43
+ """
44
+ Determine if a field should have embeddings generated.
45
+
46
+ Rules:
47
+ 1. If json_schema_extra.embed = True, always embed
48
+ 2. If json_schema_extra.embed = False, never embed
49
+ 3. If field name in DEFAULT_EMBED_FIELD_NAMES, embed by default
50
+ 4. Otherwise, don't embed
51
+
52
+ Args:
53
+ field_name: Field name from Pydantic model
54
+ field_info: Field metadata from model.model_fields
55
+
56
+ Returns:
57
+ True if field should have embeddings
58
+ """
59
+ # Check json_schema_extra for explicit embed configuration
60
+ json_extra = getattr(field_info, "json_schema_extra", None)
61
+ if json_extra:
62
+ if isinstance(json_extra, dict):
63
+ embed = json_extra.get("embed")
64
+ if embed is not None:
65
+ return bool(embed)
66
+
67
+ # Default: embed if field name matches common content fields
68
+ return field_name.lower() in DEFAULT_EMBED_FIELD_NAMES
69
+
70
+
71
+ def generate_table_schema(
72
+ model: Type[BaseModel], table_name: str, tenant_scoped: bool = True
73
+ ) -> str:
74
+ """
75
+ Generate CREATE TABLE SQL for Pydantic model.
76
+
77
+ Args:
78
+ model: Pydantic model class
79
+ table_name: Table name (e.g., "resources", "moments")
80
+ tenant_scoped: If True, add tenant_id column and indexes
81
+
82
+ Returns:
83
+ SQL CREATE TABLE statement
84
+ """
85
+ columns = []
86
+ indexes = []
87
+
88
+ # System fields that we add separately (skip if in model)
89
+ SYSTEM_FIELDS = {
90
+ "id", "created_at", "updated_at", "deleted_at",
91
+ "tenant_id", "user_id", "graph_edges", "metadata", "tags", "column"
92
+ }
93
+
94
+ # Always add id as primary key
95
+ columns.append("id UUID PRIMARY KEY DEFAULT uuid_generate_v4()")
96
+
97
+ # Add tenant_id if tenant scoped
98
+ if tenant_scoped:
99
+ columns.append("tenant_id VARCHAR(100) NOT NULL")
100
+ indexes.append(f"CREATE INDEX idx_{table_name}_tenant ON {table_name} (tenant_id);")
101
+
102
+ # Add user_id (owner field)
103
+ columns.append("user_id VARCHAR(256)")
104
+ indexes.append(f"CREATE INDEX idx_{table_name}_user ON {table_name} (user_id);")
105
+
106
+ # Process Pydantic fields (skip system fields)
107
+ for field_name, field_info in model.model_fields.items():
108
+ if field_name in SYSTEM_FIELDS:
109
+ continue # Skip system fields - we add them separately
110
+
111
+ # Use sql_types utility for consistent type mapping
112
+ column_def = get_column_definition(
113
+ field_info,
114
+ field_name,
115
+ nullable=not field_info.is_required(),
116
+ primary_key=False
117
+ )
118
+ columns.append(column_def)
119
+
120
+ # Add system fields (timestamps)
121
+ columns.append("created_at TIMESTAMP DEFAULT CURRENT_TIMESTAMP")
122
+ columns.append("updated_at TIMESTAMP DEFAULT CURRENT_TIMESTAMP")
123
+ columns.append("deleted_at TIMESTAMP")
124
+
125
+ # Add graph_edges JSONB field
126
+ columns.append("graph_edges JSONB DEFAULT '[]'::jsonb")
127
+ indexes.append(
128
+ f"CREATE INDEX idx_{table_name}_graph_edges ON {table_name} USING GIN (graph_edges);"
129
+ )
130
+
131
+ # Add metadata JSONB field
132
+ columns.append("metadata JSONB DEFAULT '{}'::jsonb")
133
+ indexes.append(
134
+ f"CREATE INDEX idx_{table_name}_metadata ON {table_name} USING GIN (metadata);"
135
+ )
136
+
137
+ # Add tags field (TEXT[] for list[str])
138
+ columns.append("tags TEXT[] DEFAULT ARRAY[]::TEXT[]")
139
+ indexes.append(
140
+ f"CREATE INDEX idx_{table_name}_tags ON {table_name} USING GIN (tags);"
141
+ )
142
+
143
+ # Generate CREATE TABLE statement
144
+ create_table = f"""
145
+ CREATE TABLE IF NOT EXISTS {table_name} (
146
+ {',\n '.join(columns)}
147
+ );
148
+ """.strip()
149
+
150
+ # Generate indexes
151
+ index_sql = "\n".join(indexes)
152
+
153
+ return f"{create_table}\n\n{index_sql}"
154
+
155
+
156
+ def generate_embeddings_schema(
157
+ model: Type[BaseModel], table_name: str, embedding_provider: str = DEFAULT_EMBEDDING_PROVIDER
158
+ ) -> tuple[str, list[str]]:
159
+ """
160
+ Generate embeddings table schema for a model.
161
+
162
+ Creates embeddings_<table_name> with:
163
+ - One row per entity per field per provider
164
+ - Unique constraint on (entity_id, field_name, provider)
165
+ - Vector column with pgvector
166
+ - HNSW index for fast similarity search
167
+
168
+ Args:
169
+ model: Pydantic model class
170
+ table_name: Base table name
171
+ embedding_provider: Default provider (e.g., "openai", "cohere")
172
+
173
+ Returns:
174
+ Tuple of (CREATE TABLE sql, list of embeddable field names)
175
+ """
176
+ embeddings_table = f"embeddings_{table_name}"
177
+ embeddable_fields = []
178
+
179
+ # Find fields that should have embeddings
180
+ for field_name, field_info in model.model_fields.items():
181
+ if should_embed_field(field_name, field_info):
182
+ embeddable_fields.append(field_name)
183
+
184
+ if not embeddable_fields:
185
+ logger.warning(f"No embeddable fields found for {table_name}")
186
+ return "", []
187
+
188
+ # Generate embeddings table
189
+ create_sql = f"""
190
+ CREATE TABLE IF NOT EXISTS {embeddings_table} (
191
+ id UUID PRIMARY KEY DEFAULT uuid_generate_v4(),
192
+ entity_id UUID NOT NULL REFERENCES {table_name}(id) ON DELETE CASCADE,
193
+ field_name VARCHAR(100) NOT NULL,
194
+ provider VARCHAR(50) NOT NULL DEFAULT '{embedding_provider}',
195
+ model VARCHAR(100) NOT NULL DEFAULT '{DEFAULT_EMBEDDING_MODEL}',
196
+ embedding vector({DEFAULT_EMBEDDING_DIMENSIONS}) NOT NULL,
197
+ created_at TIMESTAMP DEFAULT CURRENT_TIMESTAMP,
198
+ updated_at TIMESTAMP DEFAULT CURRENT_TIMESTAMP,
199
+
200
+ -- Unique: one embedding per entity per field per provider
201
+ UNIQUE (entity_id, field_name, provider)
202
+ );
203
+
204
+ -- Index for entity lookup (get all embeddings for entity)
205
+ CREATE INDEX idx_{embeddings_table}_entity ON {embeddings_table} (entity_id);
206
+
207
+ -- Index for field + provider lookup
208
+ CREATE INDEX idx_{embeddings_table}_field_provider ON {embeddings_table} (field_name, provider);
209
+
210
+ -- HNSW index for vector similarity search (created in background)
211
+ -- Note: This will be created by background thread after data load
212
+ -- CREATE INDEX idx_{embeddings_table}_vector_hnsw ON {embeddings_table}
213
+ -- USING hnsw (embedding vector_cosine_ops);
214
+ """.strip()
215
+
216
+ logger.info(
217
+ f"Generated embeddings schema for {table_name} with fields: {embeddable_fields}"
218
+ )
219
+
220
+ return create_sql, embeddable_fields
221
+
222
+
223
+ # NOTE: _map_pydantic_to_postgres_type is now replaced by utils.sql_types.get_sql_type
224
+ # Removed to use the centralized utility instead
225
+
226
+
227
+ def generate_kv_store_upsert(
228
+ table_name: str,
229
+ entity_key_field: str = "name",
230
+ ) -> str:
231
+ """
232
+ Generate trigger to maintain KV_STORE cache on entity changes.
233
+
234
+ Creates a trigger that:
235
+ 1. Extracts entity_key from entity (e.g., name, key, label)
236
+ 2. Updates KV_STORE on INSERT/UPDATE for O(1) lookups
237
+ 3. Removes from KV_STORE on DELETE
238
+
239
+ Args:
240
+ table_name: Base table name
241
+ entity_key_field: Field to use as entity_key in KV_STORE
242
+
243
+ Returns:
244
+ SQL for trigger creation
245
+ """
246
+ trigger_name = f"trg_{table_name}_kv_store"
247
+ function_name = f"fn_{table_name}_kv_store_upsert"
248
+
249
+ return f"""
250
+ -- Trigger function to maintain KV_STORE for {table_name}
251
+ CREATE OR REPLACE FUNCTION {function_name}()
252
+ RETURNS TRIGGER AS $$
253
+ BEGIN
254
+ IF (TG_OP = 'DELETE') THEN
255
+ -- Remove from KV_STORE on delete
256
+ DELETE FROM kv_store
257
+ WHERE entity_id = OLD.id;
258
+ RETURN OLD;
259
+ ELSIF (TG_OP = 'INSERT' OR TG_OP = 'UPDATE') THEN
260
+ -- Upsert to KV_STORE (O(1) lookup by entity_key)
261
+ INSERT INTO kv_store (
262
+ entity_key,
263
+ entity_type,
264
+ entity_id,
265
+ tenant_id,
266
+ user_id,
267
+ metadata,
268
+ graph_edges,
269
+ updated_at
270
+ ) VALUES (
271
+ NEW.{entity_key_field}::VARCHAR,
272
+ '{table_name}',
273
+ NEW.id,
274
+ NEW.tenant_id,
275
+ NEW.user_id,
276
+ NEW.metadata,
277
+ COALESCE(NEW.graph_edges, '[]'::jsonb),
278
+ CURRENT_TIMESTAMP
279
+ )
280
+ ON CONFLICT (tenant_id, entity_key)
281
+ DO UPDATE SET
282
+ entity_id = EXCLUDED.entity_id,
283
+ user_id = EXCLUDED.user_id,
284
+ metadata = EXCLUDED.metadata,
285
+ graph_edges = EXCLUDED.graph_edges,
286
+ updated_at = CURRENT_TIMESTAMP;
287
+
288
+ RETURN NEW;
289
+ END IF;
290
+ END;
291
+ $$ LANGUAGE plpgsql;
292
+
293
+ -- Create trigger
294
+ DROP TRIGGER IF EXISTS {trigger_name} ON {table_name};
295
+ CREATE TRIGGER {trigger_name}
296
+ AFTER INSERT OR UPDATE OR DELETE ON {table_name}
297
+ FOR EACH ROW EXECUTE FUNCTION {function_name}();
298
+ """.strip()
299
+
300
+
301
+ async def register_type(
302
+ model: Type[BaseModel],
303
+ table_name: str,
304
+ entity_key_field: str = "name",
305
+ tenant_scoped: bool = True,
306
+ create_embeddings: bool = True,
307
+ create_kv_trigger: bool = True,
308
+ ) -> dict[str, Any]:
309
+ """
310
+ Register a Pydantic model as a database schema.
311
+
312
+ Creates:
313
+ 1. Primary table for entity storage
314
+ 2. Embeddings table (if create_embeddings=True)
315
+ 3. KV_STORE trigger (if create_kv_trigger=True)
316
+
317
+ Args:
318
+ model: Pydantic model class
319
+ table_name: Table name
320
+ entity_key_field: Field to use as natural key in KV_STORE
321
+ tenant_scoped: Add tenant_id column and indexes
322
+ create_embeddings: Create embeddings table
323
+ create_kv_trigger: Create KV_STORE trigger
324
+
325
+ Returns:
326
+ Dict with SQL statements and metadata
327
+ """
328
+ result = {
329
+ "table_name": table_name,
330
+ "model": model.__name__,
331
+ "sql": {},
332
+ "embeddable_fields": [],
333
+ }
334
+
335
+ # Generate primary table schema
336
+ table_sql = generate_table_schema(model, table_name, tenant_scoped)
337
+ result["sql"]["table"] = table_sql
338
+
339
+ # Generate embeddings schema
340
+ if create_embeddings:
341
+ embeddings_sql, embeddable_fields = generate_embeddings_schema(model, table_name)
342
+ result["sql"]["embeddings"] = embeddings_sql
343
+ result["embeddable_fields"] = embeddable_fields
344
+
345
+ # Generate KV_STORE trigger
346
+ if create_kv_trigger:
347
+ kv_trigger_sql = generate_kv_store_upsert(table_name, entity_key_field)
348
+ result["sql"]["kv_trigger"] = kv_trigger_sql
349
+
350
+ logger.info(f"Registered type {model.__name__} as table {table_name}")
351
+
352
+ return result
@@ -0,0 +1,337 @@
1
+ """Generic repository for entity persistence.
2
+
3
+ Single repository class that works with any Pydantic model type.
4
+ No need for model-specific repository classes.
5
+
6
+ Usage:
7
+ from rem.models.entities import Message
8
+ from rem.services.repositories import Repository
9
+
10
+ repo = Repository(db, Message, table_name="messages")
11
+ message = await repo.upsert(message_instance)
12
+ messages = await repo.find({"session_id": "abc", "tenant_id": "xyz"})
13
+ """
14
+
15
+ import json
16
+ from typing import Any, Generic, Type, TypeVar, TYPE_CHECKING
17
+
18
+ from loguru import logger
19
+ from pydantic import BaseModel
20
+
21
+ from .sql_builder import (
22
+ build_count,
23
+ build_delete,
24
+ build_insert,
25
+ build_select,
26
+ build_upsert,
27
+ )
28
+ from ...settings import settings
29
+
30
+ if TYPE_CHECKING:
31
+ from .service import PostgresService
32
+
33
+
34
+ def get_postgres_service() -> "PostgresService | None":
35
+ """
36
+ Get PostgresService instance with connection string from settings.
37
+
38
+ Returns None if Postgres is disabled.
39
+ """
40
+ if not settings.postgres.enabled:
41
+ return None
42
+
43
+ from .service import PostgresService
44
+ return PostgresService()
45
+
46
+ T = TypeVar("T", bound=BaseModel)
47
+
48
+ # Known JSONB fields from CoreModel that need deserialization
49
+ JSONB_FIELDS = {"graph_edges", "metadata"}
50
+
51
+
52
+ class Repository(Generic[T]):
53
+ """Generic repository for any Pydantic model type."""
54
+
55
+ def __init__(
56
+ self,
57
+ model_class: Type[T],
58
+ table_name: str | None = None,
59
+ db: "PostgresService | None" = None,
60
+ ):
61
+ """
62
+ Initialize repository.
63
+
64
+ Args:
65
+ model_class: Pydantic model class (e.g., Message, Resource)
66
+ table_name: Optional table name (defaults to lowercase model name + 's')
67
+ db: Optional PostgresService instance (creates from settings if None)
68
+ """
69
+ self.db = db or get_postgres_service()
70
+ self.model_class = model_class
71
+ self.table_name = table_name or f"{model_class.__name__.lower()}s"
72
+
73
+ async def upsert(
74
+ self,
75
+ records: T | list[T],
76
+ embeddable_fields: list[str] | None = None,
77
+ generate_embeddings: bool = False,
78
+ ) -> T | list[T]:
79
+ """
80
+ Upsert single record or list of records (create or update on ID conflict).
81
+
82
+ Accepts both single items and lists - no need to distinguish batch vs non-batch.
83
+ Single items are coerced to lists internally for processing.
84
+
85
+ Args:
86
+ records: Single model instance or list of model instances
87
+ embeddable_fields: Optional list of fields to generate embeddings for
88
+ generate_embeddings: Whether to queue embedding generation tasks
89
+
90
+ Returns:
91
+ Single record or list of records with generated IDs (matches input type)
92
+ """
93
+ # Coerce single item to list for uniform processing
94
+ is_single = not isinstance(records, list)
95
+ records_list: list[T]
96
+ if is_single:
97
+ records_list = [records] # type: ignore[list-item]
98
+ else:
99
+ records_list = records # Type narrowed by isinstance check
100
+
101
+ if not settings.postgres.enabled or not self.db:
102
+ logger.debug(f"Postgres disabled, skipping {self.model_class.__name__} upsert")
103
+ return records
104
+
105
+ # Ensure connection
106
+ if not self.db.pool:
107
+ await self.db.connect()
108
+
109
+ # Type guard: ensure pool is not None after connect
110
+ if not self.db.pool:
111
+ raise RuntimeError("Failed to establish database connection")
112
+
113
+ for record in records_list:
114
+ sql, params = build_upsert(record, self.table_name, conflict_field="id", return_id=True)
115
+ async with self.db.pool.acquire() as conn:
116
+ row = await conn.fetchrow(sql, *params)
117
+ if row and "id" in row:
118
+ record.id = row["id"] # type: ignore[attr-defined]
119
+
120
+ # Queue embedding generation if requested and worker is available
121
+ if generate_embeddings and embeddable_fields and self.db.embedding_worker:
122
+ from rem.services.embeddings import EmbeddingTask
123
+
124
+ for record in records_list:
125
+ for field_name in embeddable_fields:
126
+ content = getattr(record, field_name, None)
127
+ if content and isinstance(content, str):
128
+ task = EmbeddingTask(
129
+ task_id=f"{record.id}-{field_name}", # type: ignore[attr-defined]
130
+ entity_id=str(record.id), # type: ignore[attr-defined]
131
+ table_name=self.table_name,
132
+ field_name=field_name,
133
+ content=content,
134
+ provider="openai", # Default provider
135
+ model="text-embedding-3-small", # Default model
136
+ )
137
+ await self.db.embedding_worker.queue_task(task)
138
+
139
+ logger.debug(f"Queued {len(records_list) * len(embeddable_fields)} embedding tasks")
140
+
141
+ # Return single item or list to match input type
142
+ return records_list[0] if is_single else records_list
143
+
144
+ async def get_by_id(self, record_id: str, tenant_id: str) -> T | None:
145
+ """
146
+ Get a single record by ID.
147
+
148
+ Args:
149
+ record_id: Record identifier
150
+ tenant_id: Tenant identifier for multi-tenancy isolation
151
+
152
+ Returns:
153
+ Model instance or None if not found
154
+ """
155
+ if not settings.postgres.enabled or not self.db:
156
+ logger.debug(f"Postgres disabled, returning None for {self.model_class.__name__} get")
157
+ return None
158
+
159
+ # Ensure connection
160
+ if not self.db.pool:
161
+ await self.db.connect()
162
+
163
+ # Type guard: ensure pool is not None after connect
164
+ if not self.db.pool:
165
+ raise RuntimeError("Failed to establish database connection")
166
+
167
+ query = f"""
168
+ SELECT * FROM {self.table_name}
169
+ WHERE id = $1 AND tenant_id = $2 AND deleted_at IS NULL
170
+ """
171
+
172
+ async with self.db.pool.acquire() as conn:
173
+ row = await conn.fetchrow(query, record_id, tenant_id)
174
+
175
+ if not row:
176
+ return None
177
+
178
+ # PostgreSQL JSONB columns come back as strings, need to parse them
179
+ row_dict = dict(row)
180
+ return self.model_class.model_validate(row_dict)
181
+
182
+ async def find(
183
+ self,
184
+ filters: dict[str, Any],
185
+ order_by: str = "created_at ASC",
186
+ limit: int | None = None,
187
+ offset: int = 0,
188
+ ) -> list[T]:
189
+ """
190
+ Find records matching filters.
191
+
192
+ Args:
193
+ filters: Dict of field -> value filters (AND-ed together)
194
+ order_by: ORDER BY clause (default: "created_at ASC")
195
+ limit: Optional limit on number of records
196
+ offset: Offset for pagination
197
+
198
+ Returns:
199
+ List of model instances
200
+
201
+ Example:
202
+ messages = await repo.find({
203
+ "session_id": "abc-123",
204
+ "tenant_id": "acme-corp",
205
+ "user_id": "alice"
206
+ })
207
+ """
208
+ if not settings.postgres.enabled or not self.db:
209
+ logger.debug(f"Postgres disabled, returning empty {self.model_class.__name__} list")
210
+ return []
211
+
212
+ # Ensure connection
213
+ if not self.db.pool:
214
+ await self.db.connect()
215
+
216
+ # Type guard: ensure pool is not None after connect
217
+ if not self.db.pool:
218
+ raise RuntimeError("Failed to establish database connection")
219
+
220
+ sql, params = build_select(
221
+ self.model_class,
222
+ self.table_name,
223
+ filters,
224
+ order_by=order_by,
225
+ limit=limit,
226
+ offset=offset,
227
+ )
228
+
229
+ async with self.db.pool.acquire() as conn:
230
+ rows = await conn.fetch(sql, *params)
231
+
232
+ return [self.model_class.model_validate(dict(row)) for row in rows]
233
+
234
+ async def find_one(self, filters: dict[str, Any]) -> T | None:
235
+ """
236
+ Find single record matching filters.
237
+
238
+ Args:
239
+ filters: Dict of field -> value filters
240
+
241
+ Returns:
242
+ Model instance or None if not found
243
+ """
244
+ results = await self.find(filters, limit=1)
245
+ return results[0] if results else None
246
+
247
+ async def get_by_session(
248
+ self, session_id: str, tenant_id: str, user_id: str | None = None
249
+ ) -> list[T]:
250
+ """
251
+ Get all records for a session (convenience method for Message model).
252
+
253
+ Args:
254
+ session_id: Session identifier
255
+ tenant_id: Tenant identifier
256
+ user_id: Optional user identifier
257
+
258
+ Returns:
259
+ List of model instances ordered by created_at
260
+ """
261
+ filters = {"session_id": session_id, "tenant_id": tenant_id}
262
+ if user_id:
263
+ filters["user_id"] = user_id
264
+
265
+ return await self.find(filters, order_by="created_at ASC")
266
+
267
+ async def update(self, record: T) -> T:
268
+ """
269
+ Update a record (upsert).
270
+
271
+ Args:
272
+ record: Model instance to update
273
+
274
+ Returns:
275
+ Updated record
276
+ """
277
+ result = await self.upsert(record)
278
+ # upsert with single record returns single record
279
+ return result # type: ignore[return-value]
280
+
281
+ async def delete(self, record_id: str, tenant_id: str) -> bool:
282
+ """
283
+ Soft delete a record (sets deleted_at).
284
+
285
+ Args:
286
+ record_id: Record identifier
287
+ tenant_id: Tenant identifier for multi-tenancy isolation
288
+
289
+ Returns:
290
+ True if deleted, False if not found
291
+ """
292
+ if not settings.postgres.enabled or not self.db:
293
+ logger.debug(f"Postgres disabled, skipping {self.model_class.__name__} deletion")
294
+ return False
295
+
296
+ # Ensure connection
297
+ if not self.db.pool:
298
+ await self.db.connect()
299
+
300
+ # Type guard: ensure pool is not None after connect
301
+ if not self.db.pool:
302
+ raise RuntimeError("Failed to establish database connection")
303
+
304
+ sql, params = build_delete(self.table_name, record_id, tenant_id)
305
+
306
+ async with self.db.pool.acquire() as conn:
307
+ row = await conn.fetchrow(sql, *params)
308
+
309
+ return row is not None
310
+
311
+ async def count(self, filters: dict[str, Any]) -> int:
312
+ """
313
+ Count records matching filters.
314
+
315
+ Args:
316
+ filters: Dict of field -> value filters
317
+
318
+ Returns:
319
+ Count of matching records
320
+ """
321
+ if not settings.postgres.enabled or not self.db:
322
+ return 0
323
+
324
+ # Ensure connection
325
+ if not self.db.pool:
326
+ await self.db.connect()
327
+
328
+ # Type guard: ensure pool is not None after connect
329
+ if not self.db.pool:
330
+ raise RuntimeError("Failed to establish database connection")
331
+
332
+ sql, params = build_count(self.table_name, filters)
333
+
334
+ async with self.db.pool.acquire() as conn:
335
+ row = await conn.fetchrow(sql, *params)
336
+
337
+ return row[0] if row else 0