remdb 0.3.242__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of remdb might be problematic. Click here for more details.

Files changed (235) hide show
  1. rem/__init__.py +129 -0
  2. rem/agentic/README.md +760 -0
  3. rem/agentic/__init__.py +54 -0
  4. rem/agentic/agents/README.md +155 -0
  5. rem/agentic/agents/__init__.py +38 -0
  6. rem/agentic/agents/agent_manager.py +311 -0
  7. rem/agentic/agents/sse_simulator.py +502 -0
  8. rem/agentic/context.py +425 -0
  9. rem/agentic/context_builder.py +360 -0
  10. rem/agentic/llm_provider_models.py +301 -0
  11. rem/agentic/mcp/__init__.py +0 -0
  12. rem/agentic/mcp/tool_wrapper.py +273 -0
  13. rem/agentic/otel/__init__.py +5 -0
  14. rem/agentic/otel/setup.py +240 -0
  15. rem/agentic/providers/phoenix.py +926 -0
  16. rem/agentic/providers/pydantic_ai.py +854 -0
  17. rem/agentic/query.py +117 -0
  18. rem/agentic/query_helper.py +89 -0
  19. rem/agentic/schema.py +737 -0
  20. rem/agentic/serialization.py +245 -0
  21. rem/agentic/tools/__init__.py +5 -0
  22. rem/agentic/tools/rem_tools.py +242 -0
  23. rem/api/README.md +657 -0
  24. rem/api/deps.py +253 -0
  25. rem/api/main.py +460 -0
  26. rem/api/mcp_router/prompts.py +182 -0
  27. rem/api/mcp_router/resources.py +820 -0
  28. rem/api/mcp_router/server.py +243 -0
  29. rem/api/mcp_router/tools.py +1605 -0
  30. rem/api/middleware/tracking.py +172 -0
  31. rem/api/routers/admin.py +520 -0
  32. rem/api/routers/auth.py +898 -0
  33. rem/api/routers/chat/__init__.py +5 -0
  34. rem/api/routers/chat/child_streaming.py +394 -0
  35. rem/api/routers/chat/completions.py +702 -0
  36. rem/api/routers/chat/json_utils.py +76 -0
  37. rem/api/routers/chat/models.py +202 -0
  38. rem/api/routers/chat/otel_utils.py +33 -0
  39. rem/api/routers/chat/sse_events.py +546 -0
  40. rem/api/routers/chat/streaming.py +950 -0
  41. rem/api/routers/chat/streaming_utils.py +327 -0
  42. rem/api/routers/common.py +18 -0
  43. rem/api/routers/dev.py +87 -0
  44. rem/api/routers/feedback.py +276 -0
  45. rem/api/routers/messages.py +620 -0
  46. rem/api/routers/models.py +86 -0
  47. rem/api/routers/query.py +362 -0
  48. rem/api/routers/shared_sessions.py +422 -0
  49. rem/auth/README.md +258 -0
  50. rem/auth/__init__.py +36 -0
  51. rem/auth/jwt.py +367 -0
  52. rem/auth/middleware.py +318 -0
  53. rem/auth/providers/__init__.py +16 -0
  54. rem/auth/providers/base.py +376 -0
  55. rem/auth/providers/email.py +215 -0
  56. rem/auth/providers/google.py +163 -0
  57. rem/auth/providers/microsoft.py +237 -0
  58. rem/cli/README.md +517 -0
  59. rem/cli/__init__.py +8 -0
  60. rem/cli/commands/README.md +299 -0
  61. rem/cli/commands/__init__.py +3 -0
  62. rem/cli/commands/ask.py +549 -0
  63. rem/cli/commands/cluster.py +1808 -0
  64. rem/cli/commands/configure.py +495 -0
  65. rem/cli/commands/db.py +828 -0
  66. rem/cli/commands/dreaming.py +324 -0
  67. rem/cli/commands/experiments.py +1698 -0
  68. rem/cli/commands/mcp.py +66 -0
  69. rem/cli/commands/process.py +388 -0
  70. rem/cli/commands/query.py +109 -0
  71. rem/cli/commands/scaffold.py +47 -0
  72. rem/cli/commands/schema.py +230 -0
  73. rem/cli/commands/serve.py +106 -0
  74. rem/cli/commands/session.py +453 -0
  75. rem/cli/dreaming.py +363 -0
  76. rem/cli/main.py +123 -0
  77. rem/config.py +244 -0
  78. rem/mcp_server.py +41 -0
  79. rem/models/core/__init__.py +49 -0
  80. rem/models/core/core_model.py +70 -0
  81. rem/models/core/engram.py +333 -0
  82. rem/models/core/experiment.py +672 -0
  83. rem/models/core/inline_edge.py +132 -0
  84. rem/models/core/rem_query.py +246 -0
  85. rem/models/entities/__init__.py +68 -0
  86. rem/models/entities/domain_resource.py +38 -0
  87. rem/models/entities/feedback.py +123 -0
  88. rem/models/entities/file.py +57 -0
  89. rem/models/entities/image_resource.py +88 -0
  90. rem/models/entities/message.py +64 -0
  91. rem/models/entities/moment.py +123 -0
  92. rem/models/entities/ontology.py +181 -0
  93. rem/models/entities/ontology_config.py +131 -0
  94. rem/models/entities/resource.py +95 -0
  95. rem/models/entities/schema.py +87 -0
  96. rem/models/entities/session.py +84 -0
  97. rem/models/entities/shared_session.py +180 -0
  98. rem/models/entities/subscriber.py +175 -0
  99. rem/models/entities/user.py +93 -0
  100. rem/py.typed +0 -0
  101. rem/registry.py +373 -0
  102. rem/schemas/README.md +507 -0
  103. rem/schemas/__init__.py +6 -0
  104. rem/schemas/agents/README.md +92 -0
  105. rem/schemas/agents/core/agent-builder.yaml +235 -0
  106. rem/schemas/agents/core/moment-builder.yaml +178 -0
  107. rem/schemas/agents/core/rem-query-agent.yaml +226 -0
  108. rem/schemas/agents/core/resource-affinity-assessor.yaml +99 -0
  109. rem/schemas/agents/core/simple-assistant.yaml +19 -0
  110. rem/schemas/agents/core/user-profile-builder.yaml +163 -0
  111. rem/schemas/agents/examples/contract-analyzer.yaml +317 -0
  112. rem/schemas/agents/examples/contract-extractor.yaml +134 -0
  113. rem/schemas/agents/examples/cv-parser.yaml +263 -0
  114. rem/schemas/agents/examples/hello-world.yaml +37 -0
  115. rem/schemas/agents/examples/query.yaml +54 -0
  116. rem/schemas/agents/examples/simple.yaml +21 -0
  117. rem/schemas/agents/examples/test.yaml +29 -0
  118. rem/schemas/agents/rem.yaml +132 -0
  119. rem/schemas/evaluators/hello-world/default.yaml +77 -0
  120. rem/schemas/evaluators/rem/faithfulness.yaml +219 -0
  121. rem/schemas/evaluators/rem/lookup-correctness.yaml +182 -0
  122. rem/schemas/evaluators/rem/retrieval-precision.yaml +199 -0
  123. rem/schemas/evaluators/rem/retrieval-recall.yaml +211 -0
  124. rem/schemas/evaluators/rem/search-correctness.yaml +192 -0
  125. rem/services/__init__.py +18 -0
  126. rem/services/audio/INTEGRATION.md +308 -0
  127. rem/services/audio/README.md +376 -0
  128. rem/services/audio/__init__.py +15 -0
  129. rem/services/audio/chunker.py +354 -0
  130. rem/services/audio/transcriber.py +259 -0
  131. rem/services/content/README.md +1269 -0
  132. rem/services/content/__init__.py +5 -0
  133. rem/services/content/providers.py +760 -0
  134. rem/services/content/service.py +762 -0
  135. rem/services/dreaming/README.md +230 -0
  136. rem/services/dreaming/__init__.py +53 -0
  137. rem/services/dreaming/affinity_service.py +322 -0
  138. rem/services/dreaming/moment_service.py +251 -0
  139. rem/services/dreaming/ontology_service.py +54 -0
  140. rem/services/dreaming/user_model_service.py +297 -0
  141. rem/services/dreaming/utils.py +39 -0
  142. rem/services/email/__init__.py +10 -0
  143. rem/services/email/service.py +522 -0
  144. rem/services/email/templates.py +360 -0
  145. rem/services/embeddings/__init__.py +11 -0
  146. rem/services/embeddings/api.py +127 -0
  147. rem/services/embeddings/worker.py +435 -0
  148. rem/services/fs/README.md +662 -0
  149. rem/services/fs/__init__.py +62 -0
  150. rem/services/fs/examples.py +206 -0
  151. rem/services/fs/examples_paths.py +204 -0
  152. rem/services/fs/git_provider.py +935 -0
  153. rem/services/fs/local_provider.py +760 -0
  154. rem/services/fs/parsing-hooks-examples.md +172 -0
  155. rem/services/fs/paths.py +276 -0
  156. rem/services/fs/provider.py +460 -0
  157. rem/services/fs/s3_provider.py +1042 -0
  158. rem/services/fs/service.py +186 -0
  159. rem/services/git/README.md +1075 -0
  160. rem/services/git/__init__.py +17 -0
  161. rem/services/git/service.py +469 -0
  162. rem/services/phoenix/EXPERIMENT_DESIGN.md +1146 -0
  163. rem/services/phoenix/README.md +453 -0
  164. rem/services/phoenix/__init__.py +46 -0
  165. rem/services/phoenix/client.py +960 -0
  166. rem/services/phoenix/config.py +88 -0
  167. rem/services/phoenix/prompt_labels.py +477 -0
  168. rem/services/postgres/README.md +757 -0
  169. rem/services/postgres/__init__.py +49 -0
  170. rem/services/postgres/diff_service.py +599 -0
  171. rem/services/postgres/migration_service.py +427 -0
  172. rem/services/postgres/programmable_diff_service.py +635 -0
  173. rem/services/postgres/pydantic_to_sqlalchemy.py +562 -0
  174. rem/services/postgres/register_type.py +353 -0
  175. rem/services/postgres/repository.py +481 -0
  176. rem/services/postgres/schema_generator.py +661 -0
  177. rem/services/postgres/service.py +802 -0
  178. rem/services/postgres/sql_builder.py +355 -0
  179. rem/services/rate_limit.py +113 -0
  180. rem/services/rem/README.md +318 -0
  181. rem/services/rem/__init__.py +23 -0
  182. rem/services/rem/exceptions.py +71 -0
  183. rem/services/rem/executor.py +293 -0
  184. rem/services/rem/parser.py +180 -0
  185. rem/services/rem/queries.py +196 -0
  186. rem/services/rem/query.py +371 -0
  187. rem/services/rem/service.py +608 -0
  188. rem/services/session/README.md +374 -0
  189. rem/services/session/__init__.py +13 -0
  190. rem/services/session/compression.py +488 -0
  191. rem/services/session/pydantic_messages.py +310 -0
  192. rem/services/session/reload.py +85 -0
  193. rem/services/user_service.py +130 -0
  194. rem/settings.py +1877 -0
  195. rem/sql/background_indexes.sql +52 -0
  196. rem/sql/migrations/001_install.sql +983 -0
  197. rem/sql/migrations/002_install_models.sql +3157 -0
  198. rem/sql/migrations/003_optional_extensions.sql +326 -0
  199. rem/sql/migrations/004_cache_system.sql +282 -0
  200. rem/sql/migrations/005_schema_update.sql +145 -0
  201. rem/sql/migrations/migrate_session_id_to_uuid.sql +45 -0
  202. rem/utils/AGENTIC_CHUNKING.md +597 -0
  203. rem/utils/README.md +628 -0
  204. rem/utils/__init__.py +61 -0
  205. rem/utils/agentic_chunking.py +622 -0
  206. rem/utils/batch_ops.py +343 -0
  207. rem/utils/chunking.py +108 -0
  208. rem/utils/clip_embeddings.py +276 -0
  209. rem/utils/constants.py +97 -0
  210. rem/utils/date_utils.py +228 -0
  211. rem/utils/dict_utils.py +98 -0
  212. rem/utils/embeddings.py +436 -0
  213. rem/utils/examples/embeddings_example.py +305 -0
  214. rem/utils/examples/sql_types_example.py +202 -0
  215. rem/utils/files.py +323 -0
  216. rem/utils/markdown.py +16 -0
  217. rem/utils/mime_types.py +158 -0
  218. rem/utils/model_helpers.py +492 -0
  219. rem/utils/schema_loader.py +649 -0
  220. rem/utils/sql_paths.py +146 -0
  221. rem/utils/sql_types.py +350 -0
  222. rem/utils/user_id.py +81 -0
  223. rem/utils/vision.py +325 -0
  224. rem/workers/README.md +506 -0
  225. rem/workers/__init__.py +7 -0
  226. rem/workers/db_listener.py +579 -0
  227. rem/workers/db_maintainer.py +74 -0
  228. rem/workers/dreaming.py +502 -0
  229. rem/workers/engram_processor.py +312 -0
  230. rem/workers/sqs_file_processor.py +193 -0
  231. rem/workers/unlogged_maintainer.py +463 -0
  232. remdb-0.3.242.dist-info/METADATA +1632 -0
  233. remdb-0.3.242.dist-info/RECORD +235 -0
  234. remdb-0.3.242.dist-info/WHEEL +4 -0
  235. remdb-0.3.242.dist-info/entry_points.txt +2 -0
@@ -0,0 +1,562 @@
1
+ """
2
+ Convert Pydantic models to SQLAlchemy metadata for Alembic autogenerate.
3
+
4
+ This module bridges REM's Pydantic-first approach with Alembic's SQLAlchemy requirement
5
+ by dynamically building SQLAlchemy Table objects from Pydantic model definitions.
6
+
7
+ IMPORTANT: Type mappings here MUST stay in sync with utils/sql_types.py
8
+ to ensure the diff command produces accurate results.
9
+ """
10
+
11
+ import types
12
+ from datetime import date, datetime, time
13
+ from pathlib import Path
14
+ from typing import Any, Union, get_args, get_origin
15
+ from uuid import UUID as UUIDType
16
+
17
+ from loguru import logger
18
+ from pydantic import BaseModel
19
+ from pydantic.fields import FieldInfo
20
+ from sqlalchemy import (
21
+ Boolean,
22
+ Column,
23
+ Date,
24
+ DateTime,
25
+ Float,
26
+ ForeignKey,
27
+ Index,
28
+ Integer,
29
+ LargeBinary,
30
+ MetaData,
31
+ String,
32
+ Table,
33
+ Text,
34
+ Time,
35
+ UniqueConstraint,
36
+ text,
37
+ )
38
+ from sqlalchemy.dialects.postgresql import ARRAY, JSONB, UUID
39
+
40
+ # Import pgvector type for embeddings
41
+ try:
42
+ from pgvector.sqlalchemy import Vector
43
+ HAS_PGVECTOR = True
44
+ except ImportError:
45
+ HAS_PGVECTOR = False
46
+ Vector = None
47
+
48
+ from .schema_generator import SchemaGenerator
49
+
50
+
51
+ # Field names that should use TEXT instead of VARCHAR (sync with sql_types.py)
52
+ LONG_TEXT_FIELD_NAMES = {
53
+ "content",
54
+ "description",
55
+ "summary",
56
+ "instructions",
57
+ "prompt",
58
+ "message",
59
+ "body",
60
+ "text",
61
+ "note",
62
+ "comment",
63
+ }
64
+
65
+ # System fields handled separately by schema generation
66
+ SYSTEM_FIELDS = {
67
+ "id", "created_at", "updated_at", "deleted_at",
68
+ "tenant_id", "user_id", "graph_edges", "metadata", "tags",
69
+ }
70
+
71
+ # Fields that get embeddings by default (sync with register_type.py)
72
+ DEFAULT_EMBED_FIELD_NAMES = {
73
+ "content",
74
+ "description",
75
+ "summary",
76
+ "text",
77
+ "body",
78
+ "message",
79
+ "notes",
80
+ }
81
+
82
+ # Embedding configuration (sync with register_type.py)
83
+ DEFAULT_EMBEDDING_DIMENSIONS = 1536
84
+
85
+
86
+ def pydantic_type_to_sqlalchemy(
87
+ field_info: FieldInfo,
88
+ field_name: str,
89
+ ) -> Any:
90
+ """
91
+ Map Pydantic field to SQLAlchemy column type.
92
+
93
+ This function mirrors the logic in utils/sql_types.py to ensure
94
+ consistent type mapping between schema generation and diff detection.
95
+
96
+ Args:
97
+ field_info: Pydantic FieldInfo object
98
+ field_name: Name of the field (used for heuristics)
99
+
100
+ Returns:
101
+ SQLAlchemy column type
102
+ """
103
+ # Check for explicit sql_type in json_schema_extra
104
+ if field_info.json_schema_extra:
105
+ if isinstance(field_info.json_schema_extra, dict):
106
+ sql_type = field_info.json_schema_extra.get("sql_type")
107
+ if sql_type:
108
+ return _sql_string_to_sqlalchemy(sql_type)
109
+
110
+ # Fields with embedding_provider should be TEXT
111
+ if "embedding_provider" in field_info.json_schema_extra:
112
+ return Text
113
+
114
+ annotation = field_info.annotation
115
+
116
+ # Handle None annotation
117
+ if annotation is None:
118
+ return Text
119
+
120
+ # Handle Union types (including Optional[T] and Python 3.10+ X | None)
121
+ origin = get_origin(annotation)
122
+ if origin is Union or isinstance(annotation, types.UnionType):
123
+ args = get_args(annotation)
124
+ # Filter out NoneType
125
+ non_none_args = [arg for arg in args if arg is not type(None)]
126
+
127
+ if not non_none_args:
128
+ return Text
129
+
130
+ # Prefer UUID over other types in unions
131
+ if UUIDType in non_none_args:
132
+ return UUID(as_uuid=True)
133
+
134
+ # Prefer dict/JSONB over other types in unions
135
+ if dict in non_none_args:
136
+ return JSONB
137
+
138
+ # Use the first non-None type
139
+ return _map_simple_type(non_none_args[0], field_name)
140
+
141
+ return _map_simple_type(annotation, field_name)
142
+
143
+
144
+ def _map_simple_type(python_type: type, field_name: str) -> Any:
145
+ """
146
+ Map a simple Python type to SQLAlchemy column type.
147
+
148
+ Args:
149
+ python_type: Python type annotation
150
+ field_name: Field name for heuristics
151
+
152
+ Returns:
153
+ SQLAlchemy column type
154
+ """
155
+ origin = get_origin(python_type)
156
+ args = get_args(python_type)
157
+
158
+ # Handle list types
159
+ if origin is list:
160
+ if args:
161
+ inner_type = args[0]
162
+
163
+ # List of strings -> PostgreSQL TEXT[]
164
+ if inner_type is str:
165
+ return ARRAY(Text)
166
+
167
+ # List of dicts or complex types -> JSONB
168
+ if inner_type is dict or get_origin(inner_type) is not None:
169
+ return JSONB
170
+
171
+ # List of primitives -> JSONB
172
+ return JSONB
173
+
174
+ # Untyped list -> JSONB
175
+ return JSONB
176
+
177
+ # Handle dict types -> JSONB
178
+ if origin is dict or python_type is dict:
179
+ return JSONB
180
+
181
+ # Handle primitive types
182
+ if python_type is str:
183
+ return _get_string_type(field_name)
184
+
185
+ if python_type is int:
186
+ return Integer
187
+
188
+ if python_type is float:
189
+ return Float
190
+
191
+ if python_type is bool:
192
+ return Boolean
193
+
194
+ if python_type is UUIDType:
195
+ return UUID(as_uuid=True)
196
+
197
+ if python_type is datetime:
198
+ return DateTime
199
+
200
+ if python_type is date:
201
+ return Date
202
+
203
+ if python_type is time:
204
+ return Time
205
+
206
+ if python_type is bytes:
207
+ return LargeBinary
208
+
209
+ # Check if it's a Pydantic model -> JSONB
210
+ if isinstance(python_type, type) and issubclass(python_type, BaseModel):
211
+ return JSONB
212
+
213
+ # Default to Text for unknown types
214
+ return Text
215
+
216
+
217
+ def _get_string_type(field_name: str) -> Any:
218
+ """
219
+ Determine string type based on field name.
220
+
221
+ Args:
222
+ field_name: Name of the field
223
+
224
+ Returns:
225
+ Text for long-form content, String(256) for others
226
+ """
227
+ field_lower = field_name.lower()
228
+
229
+ if field_lower in LONG_TEXT_FIELD_NAMES:
230
+ return Text
231
+
232
+ # Check for common suffixes
233
+ if field_lower.endswith(("_content", "_description", "_summary", "_text", "_message")):
234
+ return Text
235
+
236
+ return String(256)
237
+
238
+
239
+ def _sql_string_to_sqlalchemy(sql_type: str) -> Any:
240
+ """
241
+ Convert SQL type string to SQLAlchemy type.
242
+
243
+ Args:
244
+ sql_type: PostgreSQL type string (e.g., "VARCHAR(256)", "JSONB")
245
+
246
+ Returns:
247
+ SQLAlchemy column type
248
+ """
249
+ sql_upper = sql_type.upper()
250
+
251
+ if sql_upper == "TEXT":
252
+ return Text
253
+ if sql_upper == "JSONB" or sql_upper == "JSON":
254
+ return JSONB
255
+ if sql_upper == "UUID":
256
+ return UUID(as_uuid=True)
257
+ if sql_upper == "INTEGER" or sql_upper == "INT":
258
+ return Integer
259
+ if sql_upper == "BOOLEAN" or sql_upper == "BOOL":
260
+ return Boolean
261
+ if sql_upper == "TIMESTAMP":
262
+ return DateTime
263
+ if sql_upper == "DATE":
264
+ return Date
265
+ if sql_upper == "TIME":
266
+ return Time
267
+ if sql_upper == "DOUBLE PRECISION" or sql_upper == "FLOAT":
268
+ return Float
269
+ if sql_upper == "BYTEA":
270
+ return LargeBinary
271
+ if sql_upper.startswith("VARCHAR"):
272
+ # Extract length from VARCHAR(n)
273
+ import re
274
+ match = re.match(r"VARCHAR\((\d+)\)", sql_upper)
275
+ if match:
276
+ return String(int(match.group(1)))
277
+ return String(256)
278
+ if sql_upper == "TEXT[]":
279
+ return ARRAY(Text)
280
+
281
+ return Text
282
+
283
+
284
+ def _should_embed_field(field_name: str, field_info: FieldInfo) -> bool:
285
+ """
286
+ Determine if a field should have embeddings generated.
287
+
288
+ Mirrors logic in register_type.should_embed_field().
289
+
290
+ Rules:
291
+ 1. If json_schema_extra.embed = True, always embed
292
+ 2. If json_schema_extra.embed = False, never embed
293
+ 3. If field name in DEFAULT_EMBED_FIELD_NAMES, embed by default
294
+ 4. Otherwise, don't embed
295
+ """
296
+ # Check json_schema_extra for explicit embed configuration
297
+ json_extra = getattr(field_info, "json_schema_extra", None)
298
+ if json_extra and isinstance(json_extra, dict):
299
+ embed = json_extra.get("embed")
300
+ if embed is not None:
301
+ return bool(embed)
302
+
303
+ # Default: embed if field name matches common content fields
304
+ return field_name.lower() in DEFAULT_EMBED_FIELD_NAMES
305
+
306
+
307
+ def _get_embeddable_fields(model: type[BaseModel]) -> list[str]:
308
+ """Get list of field names that should have embeddings."""
309
+ embeddable = []
310
+ for field_name, field_info in model.model_fields.items():
311
+ if field_name in SYSTEM_FIELDS:
312
+ continue
313
+ if _should_embed_field(field_name, field_info):
314
+ embeddable.append(field_name)
315
+ return embeddable
316
+
317
+
318
+ def build_sqlalchemy_metadata_from_pydantic(models_dir: Path | None = None) -> MetaData:
319
+ """
320
+ Build SQLAlchemy MetaData from Pydantic models.
321
+
322
+ This function uses the model registry as the source of truth:
323
+ 1. Core models (Resource, Message, User, etc.) - always included
324
+ 2. User-registered models via rem.register_model() - included if registered
325
+ 3. Embeddings tables for models with embeddable fields
326
+
327
+ The registry ensures only actual entity models are included (not DTOs).
328
+
329
+ Args:
330
+ models_dir: Optional, not used (kept for backwards compatibility).
331
+ Models are discovered via the registry, not directory scanning.
332
+
333
+ Returns:
334
+ SQLAlchemy MetaData object
335
+ """
336
+ from ...registry import get_model_registry
337
+
338
+ metadata = MetaData()
339
+ generator = SchemaGenerator()
340
+ registry = get_model_registry()
341
+
342
+ # Get all registered models (core + user-registered)
343
+ registered_models = registry.get_models(include_core=True)
344
+ logger.info(f"Registry contains {len(registered_models)} models")
345
+
346
+ for model_name, ext in registered_models.items():
347
+ # Use table_name from extension if provided, otherwise infer
348
+ table_name = ext.table_name or generator.infer_table_name(ext.model)
349
+
350
+ # Build primary table
351
+ _build_table(ext.model, table_name, metadata)
352
+
353
+ # Build embeddings table if model has embeddable fields
354
+ embeddable_fields = _get_embeddable_fields(ext.model)
355
+ if embeddable_fields:
356
+ _build_embeddings_table(table_name, metadata)
357
+
358
+ logger.info(f"Built metadata with {len(metadata.tables)} tables")
359
+ return metadata
360
+
361
+
362
+ def _build_table(model: type[BaseModel], table_name: str, metadata: MetaData) -> Table:
363
+ """
364
+ Build SQLAlchemy Table from Pydantic model.
365
+
366
+ Mirrors the schema generated by register_type.generate_table_schema().
367
+
368
+ Args:
369
+ model: Pydantic model class
370
+ table_name: Table name
371
+ metadata: SQLAlchemy MetaData to add table to
372
+
373
+ Returns:
374
+ SQLAlchemy Table object
375
+ """
376
+ columns = []
377
+ indexes = []
378
+
379
+ # Primary key: id UUID
380
+ columns.append(
381
+ Column(
382
+ "id",
383
+ UUID(as_uuid=True),
384
+ primary_key=True,
385
+ server_default=text("uuid_generate_v4()"),
386
+ )
387
+ )
388
+
389
+ # Tenant and user scoping (tenant_id nullable - NULL means public/shared)
390
+ columns.append(Column("tenant_id", String(100), nullable=True))
391
+ columns.append(Column("user_id", String(256), nullable=True))
392
+
393
+ # Process Pydantic fields (skip system fields)
394
+ for field_name, field_info in model.model_fields.items():
395
+ if field_name in SYSTEM_FIELDS:
396
+ continue
397
+
398
+ sa_type = pydantic_type_to_sqlalchemy(field_info, field_name)
399
+ nullable = not field_info.is_required()
400
+
401
+ # Handle default values for JSONB and arrays
402
+ server_default = None
403
+ if field_info.default_factory is not None:
404
+ if isinstance(sa_type, type) and sa_type is JSONB:
405
+ server_default = text("'{}'::jsonb")
406
+ elif isinstance(sa_type, JSONB):
407
+ server_default = text("'{}'::jsonb")
408
+ elif isinstance(sa_type, ARRAY):
409
+ server_default = text("ARRAY[]::TEXT[]")
410
+
411
+ columns.append(
412
+ Column(field_name, sa_type, nullable=nullable, server_default=server_default)
413
+ )
414
+
415
+ # System timestamp fields
416
+ columns.append(Column("created_at", DateTime, server_default=text("CURRENT_TIMESTAMP")))
417
+ columns.append(Column("updated_at", DateTime, server_default=text("CURRENT_TIMESTAMP")))
418
+ columns.append(Column("deleted_at", DateTime, nullable=True))
419
+
420
+ # graph_edges JSONB field
421
+ columns.append(
422
+ Column("graph_edges", JSONB, nullable=True, server_default=text("'[]'::jsonb"))
423
+ )
424
+
425
+ # metadata JSONB field
426
+ columns.append(
427
+ Column("metadata", JSONB, nullable=True, server_default=text("'{}'::jsonb"))
428
+ )
429
+
430
+ # tags TEXT[] field
431
+ columns.append(
432
+ Column("tags", ARRAY(Text), nullable=True, server_default=text("ARRAY[]::TEXT[]"))
433
+ )
434
+
435
+ # Create table
436
+ table = Table(table_name, metadata, *columns)
437
+
438
+ # Add indexes (matching register_type output)
439
+ Index(f"idx_{table_name}_tenant", table.c.tenant_id)
440
+ Index(f"idx_{table_name}_user", table.c.user_id)
441
+ Index(f"idx_{table_name}_graph_edges", table.c.graph_edges, postgresql_using="gin")
442
+ Index(f"idx_{table_name}_metadata", table.c.metadata, postgresql_using="gin")
443
+ Index(f"idx_{table_name}_tags", table.c.tags, postgresql_using="gin")
444
+
445
+ return table
446
+
447
+
448
+ def _build_embeddings_table(base_table_name: str, metadata: MetaData) -> Table:
449
+ """
450
+ Build SQLAlchemy Table for embeddings.
451
+
452
+ Mirrors the schema generated by register_type.generate_embeddings_schema().
453
+
454
+ Args:
455
+ base_table_name: Name of the primary entity table (e.g., "resources")
456
+ metadata: SQLAlchemy MetaData to add table to
457
+
458
+ Returns:
459
+ SQLAlchemy Table object for embeddings_<base_table_name>
460
+ """
461
+ embeddings_table_name = f"embeddings_{base_table_name}"
462
+
463
+ # Use pgvector Vector type if available, otherwise use a placeholder
464
+ if HAS_PGVECTOR and Vector is not None:
465
+ vector_type = Vector(DEFAULT_EMBEDDING_DIMENSIONS)
466
+ else:
467
+ # Fallback: use raw SQL type via TypeDecorator or just skip
468
+ # For now, we'll log a warning and use a simple column
469
+ logger.warning(
470
+ f"pgvector not installed, embeddings table {embeddings_table_name} "
471
+ "will use ARRAY type instead of vector"
472
+ )
473
+ vector_type = ARRAY(Float)
474
+
475
+ columns = [
476
+ Column(
477
+ "id",
478
+ UUID(as_uuid=True),
479
+ primary_key=True,
480
+ server_default=text("uuid_generate_v4()"),
481
+ ),
482
+ Column(
483
+ "entity_id",
484
+ UUID(as_uuid=True),
485
+ ForeignKey(f"{base_table_name}.id", ondelete="CASCADE"),
486
+ nullable=False,
487
+ ),
488
+ Column("field_name", String(100), nullable=False),
489
+ Column("provider", String(50), nullable=False, server_default=text("'openai'")),
490
+ Column("model", String(100), nullable=False, server_default=text("'text-embedding-3-small'")),
491
+ Column("embedding", vector_type, nullable=False),
492
+ Column("created_at", DateTime, server_default=text("CURRENT_TIMESTAMP")),
493
+ Column("updated_at", DateTime, server_default=text("CURRENT_TIMESTAMP")),
494
+ ]
495
+
496
+ # Create table with unique constraint
497
+ # Truncate constraint name to fit PostgreSQL's 63-char identifier limit
498
+ constraint_name = f"uq_{base_table_name[:30]}_emb_entity_field_prov"
499
+ table = Table(
500
+ embeddings_table_name,
501
+ metadata,
502
+ *columns,
503
+ UniqueConstraint("entity_id", "field_name", "provider", name=constraint_name),
504
+ )
505
+
506
+ # Add indexes (matching register_type output)
507
+ Index(f"idx_{embeddings_table_name}_entity", table.c.entity_id)
508
+ Index(f"idx_{embeddings_table_name}_field_provider", table.c.field_name, table.c.provider)
509
+
510
+ return table
511
+
512
+
513
+ def _import_model_modules() -> list[str]:
514
+ """
515
+ Import modules specified in MODELS__IMPORT_MODULES setting.
516
+
517
+ This ensures downstream models decorated with @rem.register_model
518
+ are registered before schema generation.
519
+
520
+ Returns:
521
+ List of successfully imported module names
522
+ """
523
+ import importlib
524
+ from ...settings import settings
525
+
526
+ imported = []
527
+ for module_name in settings.models.module_list:
528
+ try:
529
+ importlib.import_module(module_name)
530
+ imported.append(module_name)
531
+ logger.debug(f"Imported model module: {module_name}")
532
+ except ImportError as e:
533
+ logger.warning(f"Failed to import model module '{module_name}': {e}")
534
+ return imported
535
+
536
+
537
+ def get_target_metadata() -> MetaData:
538
+ """
539
+ Get SQLAlchemy metadata for Alembic autogenerate.
540
+
541
+ This is the main entry point used by alembic/env.py and rem db diff.
542
+
543
+ Uses the model registry as the source of truth, which includes:
544
+ - Core REM models (Resource, Message, User, etc.)
545
+ - User-registered models via @rem.register_model decorator
546
+
547
+ Before building metadata, imports model modules from settings to ensure
548
+ downstream models are registered. This supports:
549
+ - Auto-detection of ./models directory (convention)
550
+ - MODELS__IMPORT_MODULES env var (explicit configuration)
551
+
552
+ Returns:
553
+ SQLAlchemy MetaData object representing all registered Pydantic models
554
+ """
555
+ # Import model modules first (auto-detects ./models or uses MODELS__IMPORT_MODULES)
556
+ imported = _import_model_modules()
557
+ if imported:
558
+ logger.info(f"Imported model modules: {imported}")
559
+
560
+ # build_sqlalchemy_metadata_from_pydantic uses the registry internally,
561
+ # so no directory path is needed (the parameter is kept for backwards compat)
562
+ return build_sqlalchemy_metadata_from_pydantic()