remdb 0.3.14__py3-none-any.whl → 0.3.157__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (112) hide show
  1. rem/agentic/README.md +76 -0
  2. rem/agentic/__init__.py +15 -0
  3. rem/agentic/agents/__init__.py +32 -2
  4. rem/agentic/agents/agent_manager.py +310 -0
  5. rem/agentic/agents/sse_simulator.py +502 -0
  6. rem/agentic/context.py +51 -27
  7. rem/agentic/context_builder.py +5 -3
  8. rem/agentic/llm_provider_models.py +301 -0
  9. rem/agentic/mcp/tool_wrapper.py +155 -18
  10. rem/agentic/otel/setup.py +93 -4
  11. rem/agentic/providers/phoenix.py +371 -108
  12. rem/agentic/providers/pydantic_ai.py +280 -57
  13. rem/agentic/schema.py +361 -21
  14. rem/agentic/tools/rem_tools.py +3 -3
  15. rem/api/README.md +215 -1
  16. rem/api/deps.py +255 -0
  17. rem/api/main.py +132 -40
  18. rem/api/mcp_router/resources.py +1 -1
  19. rem/api/mcp_router/server.py +28 -5
  20. rem/api/mcp_router/tools.py +555 -7
  21. rem/api/routers/admin.py +494 -0
  22. rem/api/routers/auth.py +278 -4
  23. rem/api/routers/chat/completions.py +402 -20
  24. rem/api/routers/chat/models.py +88 -10
  25. rem/api/routers/chat/otel_utils.py +33 -0
  26. rem/api/routers/chat/sse_events.py +542 -0
  27. rem/api/routers/chat/streaming.py +697 -45
  28. rem/api/routers/dev.py +81 -0
  29. rem/api/routers/feedback.py +268 -0
  30. rem/api/routers/messages.py +473 -0
  31. rem/api/routers/models.py +78 -0
  32. rem/api/routers/query.py +360 -0
  33. rem/api/routers/shared_sessions.py +406 -0
  34. rem/auth/__init__.py +13 -3
  35. rem/auth/middleware.py +186 -22
  36. rem/auth/providers/__init__.py +4 -1
  37. rem/auth/providers/email.py +215 -0
  38. rem/cli/commands/README.md +237 -64
  39. rem/cli/commands/cluster.py +1808 -0
  40. rem/cli/commands/configure.py +4 -7
  41. rem/cli/commands/db.py +386 -143
  42. rem/cli/commands/experiments.py +468 -76
  43. rem/cli/commands/process.py +14 -8
  44. rem/cli/commands/schema.py +97 -50
  45. rem/cli/commands/session.py +336 -0
  46. rem/cli/dreaming.py +2 -2
  47. rem/cli/main.py +29 -6
  48. rem/config.py +10 -3
  49. rem/models/core/core_model.py +7 -1
  50. rem/models/core/experiment.py +58 -14
  51. rem/models/core/rem_query.py +5 -2
  52. rem/models/entities/__init__.py +25 -0
  53. rem/models/entities/domain_resource.py +38 -0
  54. rem/models/entities/feedback.py +123 -0
  55. rem/models/entities/message.py +30 -1
  56. rem/models/entities/ontology.py +1 -1
  57. rem/models/entities/ontology_config.py +1 -1
  58. rem/models/entities/session.py +83 -0
  59. rem/models/entities/shared_session.py +180 -0
  60. rem/models/entities/subscriber.py +175 -0
  61. rem/models/entities/user.py +1 -0
  62. rem/registry.py +10 -4
  63. rem/schemas/agents/core/agent-builder.yaml +134 -0
  64. rem/schemas/agents/examples/contract-analyzer.yaml +1 -1
  65. rem/schemas/agents/examples/contract-extractor.yaml +1 -1
  66. rem/schemas/agents/examples/cv-parser.yaml +1 -1
  67. rem/schemas/agents/rem.yaml +7 -3
  68. rem/services/__init__.py +3 -1
  69. rem/services/content/service.py +92 -19
  70. rem/services/email/__init__.py +10 -0
  71. rem/services/email/service.py +459 -0
  72. rem/services/email/templates.py +360 -0
  73. rem/services/embeddings/api.py +4 -4
  74. rem/services/embeddings/worker.py +16 -16
  75. rem/services/phoenix/client.py +154 -14
  76. rem/services/postgres/README.md +197 -15
  77. rem/services/postgres/__init__.py +2 -1
  78. rem/services/postgres/diff_service.py +547 -0
  79. rem/services/postgres/pydantic_to_sqlalchemy.py +470 -140
  80. rem/services/postgres/repository.py +132 -0
  81. rem/services/postgres/schema_generator.py +205 -4
  82. rem/services/postgres/service.py +6 -6
  83. rem/services/rem/parser.py +44 -9
  84. rem/services/rem/service.py +36 -2
  85. rem/services/session/compression.py +137 -51
  86. rem/services/session/reload.py +15 -8
  87. rem/settings.py +515 -27
  88. rem/sql/background_indexes.sql +21 -16
  89. rem/sql/migrations/001_install.sql +387 -54
  90. rem/sql/migrations/002_install_models.sql +2304 -377
  91. rem/sql/migrations/003_optional_extensions.sql +326 -0
  92. rem/sql/migrations/004_cache_system.sql +548 -0
  93. rem/sql/migrations/005_schema_update.sql +145 -0
  94. rem/utils/README.md +45 -0
  95. rem/utils/__init__.py +18 -0
  96. rem/utils/date_utils.py +2 -2
  97. rem/utils/files.py +157 -1
  98. rem/utils/model_helpers.py +156 -1
  99. rem/utils/schema_loader.py +220 -22
  100. rem/utils/sql_paths.py +146 -0
  101. rem/utils/sql_types.py +3 -1
  102. rem/utils/vision.py +1 -1
  103. rem/workers/__init__.py +3 -1
  104. rem/workers/db_listener.py +579 -0
  105. rem/workers/unlogged_maintainer.py +463 -0
  106. {remdb-0.3.14.dist-info → remdb-0.3.157.dist-info}/METADATA +340 -229
  107. {remdb-0.3.14.dist-info → remdb-0.3.157.dist-info}/RECORD +109 -80
  108. {remdb-0.3.14.dist-info → remdb-0.3.157.dist-info}/WHEEL +1 -1
  109. rem/sql/002_install_models.sql +0 -1068
  110. rem/sql/install_models.sql +0 -1051
  111. rem/sql/migrations/003_seed_default_user.sql +0 -48
  112. {remdb-0.3.14.dist-info → remdb-0.3.157.dist-info}/entry_points.txt +0 -0
@@ -3,230 +3,560 @@ Convert Pydantic models to SQLAlchemy metadata for Alembic autogenerate.
3
3
 
4
4
  This module bridges REM's Pydantic-first approach with Alembic's SQLAlchemy requirement
5
5
  by dynamically building SQLAlchemy Table objects from Pydantic model definitions.
6
+
7
+ IMPORTANT: Type mappings here MUST stay in sync with utils/sql_types.py
8
+ to ensure the diff command produces accurate results.
6
9
  """
7
10
 
11
+ import types
12
+ from datetime import date, datetime, time
8
13
  from pathlib import Path
9
- from typing import Any
14
+ from typing import Any, Union, get_args, get_origin
15
+ from uuid import UUID as UUIDType
10
16
 
11
17
  from loguru import logger
12
18
  from pydantic import BaseModel
19
+ from pydantic.fields import FieldInfo
13
20
  from sqlalchemy import (
14
- JSON,
15
21
  Boolean,
16
22
  Column,
23
+ Date,
17
24
  DateTime,
18
25
  Float,
26
+ ForeignKey,
27
+ Index,
19
28
  Integer,
29
+ LargeBinary,
20
30
  MetaData,
21
31
  String,
22
32
  Table,
23
33
  Text,
34
+ Time,
35
+ UniqueConstraint,
36
+ text,
24
37
  )
25
38
  from sqlalchemy.dialects.postgresql import ARRAY, JSONB, UUID
26
39
 
40
+ # Import pgvector type for embeddings
41
+ try:
42
+ from pgvector.sqlalchemy import Vector
43
+ HAS_PGVECTOR = True
44
+ except ImportError:
45
+ HAS_PGVECTOR = False
46
+ Vector = None
47
+
27
48
  from .schema_generator import SchemaGenerator
28
49
 
29
50
 
51
+ # Field names that should use TEXT instead of VARCHAR (sync with sql_types.py)
52
+ LONG_TEXT_FIELD_NAMES = {
53
+ "content",
54
+ "description",
55
+ "summary",
56
+ "instructions",
57
+ "prompt",
58
+ "message",
59
+ "body",
60
+ "text",
61
+ "note",
62
+ "comment",
63
+ }
64
+
65
+ # System fields handled separately by schema generation
66
+ SYSTEM_FIELDS = {
67
+ "id", "created_at", "updated_at", "deleted_at",
68
+ "tenant_id", "user_id", "graph_edges", "metadata", "tags",
69
+ }
70
+
71
+ # Fields that get embeddings by default (sync with register_type.py)
72
+ DEFAULT_EMBED_FIELD_NAMES = {
73
+ "content",
74
+ "description",
75
+ "summary",
76
+ "text",
77
+ "body",
78
+ "message",
79
+ "notes",
80
+ }
81
+
82
+ # Embedding configuration (sync with register_type.py)
83
+ DEFAULT_EMBEDDING_DIMENSIONS = 1536
84
+
85
+
30
86
  def pydantic_type_to_sqlalchemy(
31
- field_type: Any, field_info: Any
87
+ field_info: FieldInfo,
88
+ field_name: str,
32
89
  ) -> Any:
33
90
  """
34
- Map Pydantic field type to SQLAlchemy column type.
91
+ Map Pydantic field to SQLAlchemy column type.
92
+
93
+ This function mirrors the logic in utils/sql_types.py to ensure
94
+ consistent type mapping between schema generation and diff detection.
35
95
 
36
96
  Args:
37
- field_type: Pydantic field type annotation
38
97
  field_info: Pydantic FieldInfo object
98
+ field_name: Name of the field (used for heuristics)
39
99
 
40
100
  Returns:
41
101
  SQLAlchemy column type
42
102
  """
43
- # Get the origin type (handles Optional, List, etc.)
44
- import typing
103
+ # Check for explicit sql_type in json_schema_extra
104
+ if field_info.json_schema_extra:
105
+ if isinstance(field_info.json_schema_extra, dict):
106
+ sql_type = field_info.json_schema_extra.get("sql_type")
107
+ if sql_type:
108
+ return _sql_string_to_sqlalchemy(sql_type)
109
+
110
+ # Fields with embedding_provider should be TEXT
111
+ if "embedding_provider" in field_info.json_schema_extra:
112
+ return Text
113
+
114
+ annotation = field_info.annotation
115
+
116
+ # Handle None annotation
117
+ if annotation is None:
118
+ return Text
119
+
120
+ # Handle Union types (including Optional[T] and Python 3.10+ X | None)
121
+ origin = get_origin(annotation)
122
+ if origin is Union or isinstance(annotation, types.UnionType):
123
+ args = get_args(annotation)
124
+ # Filter out NoneType
125
+ non_none_args = [arg for arg in args if arg is not type(None)]
126
+
127
+ if not non_none_args:
128
+ return Text
129
+
130
+ # Prefer UUID over other types in unions
131
+ if UUIDType in non_none_args:
132
+ return UUID(as_uuid=True)
133
+
134
+ # Prefer dict/JSONB over other types in unions
135
+ if dict in non_none_args:
136
+ return JSONB
137
+
138
+ # Use the first non-None type
139
+ return _map_simple_type(non_none_args[0], field_name)
140
+
141
+ return _map_simple_type(annotation, field_name)
142
+
143
+
144
+ def _map_simple_type(python_type: type, field_name: str) -> Any:
145
+ """
146
+ Map a simple Python type to SQLAlchemy column type.
45
147
 
46
- origin = typing.get_origin(field_type)
47
- args = typing.get_args(field_type)
148
+ Args:
149
+ python_type: Python type annotation
150
+ field_name: Field name for heuristics
48
151
 
49
- # Handle Optional types
50
- if origin is typing.Union:
51
- # Optional[X] is Union[X, None]
52
- non_none_types = [t for t in args if t is not type(None)]
53
- if non_none_types:
54
- field_type = non_none_types[0]
55
- origin = typing.get_origin(field_type)
56
- args = typing.get_args(field_type)
152
+ Returns:
153
+ SQLAlchemy column type
154
+ """
155
+ origin = get_origin(python_type)
156
+ args = get_args(python_type)
57
157
 
58
- # Handle list types -> PostgreSQL ARRAY
158
+ # Handle list types
59
159
  if origin is list:
60
160
  if args:
61
161
  inner_type = args[0]
162
+
163
+ # List of strings -> PostgreSQL TEXT[]
62
164
  if inner_type is str:
63
165
  return ARRAY(Text)
64
- elif inner_type is int:
65
- return ARRAY(Integer)
66
- elif inner_type is float:
67
- return ARRAY(Float)
68
- return ARRAY(Text) # Default to text array
166
+
167
+ # List of dicts or complex types -> JSONB
168
+ if inner_type is dict or get_origin(inner_type) is not None:
169
+ return JSONB
170
+
171
+ # List of primitives -> JSONB
172
+ return JSONB
173
+
174
+ # Untyped list -> JSONB
175
+ return JSONB
69
176
 
70
177
  # Handle dict types -> JSONB
71
- if origin is dict or field_type is dict:
178
+ if origin is dict or python_type is dict:
72
179
  return JSONB
73
180
 
74
- # Handle basic types
75
- if field_type is str:
76
- # Check if there's a max_length constraint
77
- max_length = getattr(field_info, "max_length", None)
78
- if max_length:
79
- return String(max_length)
80
- return Text
181
+ # Handle primitive types
182
+ if python_type is str:
183
+ return _get_string_type(field_name)
81
184
 
82
- if field_type is int:
185
+ if python_type is int:
83
186
  return Integer
84
187
 
85
- if field_type is float:
188
+ if python_type is float:
86
189
  return Float
87
190
 
88
- if field_type is bool:
191
+ if python_type is bool:
89
192
  return Boolean
90
193
 
91
- # Handle datetime
92
- from datetime import datetime
194
+ if python_type is UUIDType:
195
+ return UUID(as_uuid=True)
93
196
 
94
- if field_type is datetime:
197
+ if python_type is datetime:
95
198
  return DateTime
96
199
 
97
- # Handle UUID
98
- from uuid import UUID as UUIDType
200
+ if python_type is date:
201
+ return Date
99
202
 
100
- if field_type is UUIDType:
101
- return UUID(as_uuid=True)
203
+ if python_type is time:
204
+ return Time
102
205
 
103
- # Handle enums
104
- import enum
206
+ if python_type is bytes:
207
+ return LargeBinary
105
208
 
106
- if isinstance(field_type, type) and issubclass(field_type, enum.Enum):
107
- return String(50)
209
+ # Check if it's a Pydantic model -> JSONB
210
+ if isinstance(python_type, type) and issubclass(python_type, BaseModel):
211
+ return JSONB
108
212
 
109
213
  # Default to Text for unknown types
110
- logger.warning(f"Unknown field type {field_type}, defaulting to Text")
111
214
  return Text
112
215
 
113
216
 
114
- def build_sqlalchemy_metadata_from_pydantic(models_dir: Path) -> MetaData:
217
+ def _get_string_type(field_name: str) -> Any:
218
+ """
219
+ Determine string type based on field name.
220
+
221
+ Args:
222
+ field_name: Name of the field
223
+
224
+ Returns:
225
+ Text for long-form content, String(256) for others
226
+ """
227
+ field_lower = field_name.lower()
228
+
229
+ if field_lower in LONG_TEXT_FIELD_NAMES:
230
+ return Text
231
+
232
+ # Check for common suffixes
233
+ if field_lower.endswith(("_content", "_description", "_summary", "_text", "_message")):
234
+ return Text
235
+
236
+ return String(256)
237
+
238
+
239
+ def _sql_string_to_sqlalchemy(sql_type: str) -> Any:
240
+ """
241
+ Convert SQL type string to SQLAlchemy type.
242
+
243
+ Args:
244
+ sql_type: PostgreSQL type string (e.g., "VARCHAR(256)", "JSONB")
245
+
246
+ Returns:
247
+ SQLAlchemy column type
248
+ """
249
+ sql_upper = sql_type.upper()
250
+
251
+ if sql_upper == "TEXT":
252
+ return Text
253
+ if sql_upper == "JSONB" or sql_upper == "JSON":
254
+ return JSONB
255
+ if sql_upper == "UUID":
256
+ return UUID(as_uuid=True)
257
+ if sql_upper == "INTEGER" or sql_upper == "INT":
258
+ return Integer
259
+ if sql_upper == "BOOLEAN" or sql_upper == "BOOL":
260
+ return Boolean
261
+ if sql_upper == "TIMESTAMP":
262
+ return DateTime
263
+ if sql_upper == "DATE":
264
+ return Date
265
+ if sql_upper == "TIME":
266
+ return Time
267
+ if sql_upper == "DOUBLE PRECISION" or sql_upper == "FLOAT":
268
+ return Float
269
+ if sql_upper == "BYTEA":
270
+ return LargeBinary
271
+ if sql_upper.startswith("VARCHAR"):
272
+ # Extract length from VARCHAR(n)
273
+ import re
274
+ match = re.match(r"VARCHAR\((\d+)\)", sql_upper)
275
+ if match:
276
+ return String(int(match.group(1)))
277
+ return String(256)
278
+ if sql_upper == "TEXT[]":
279
+ return ARRAY(Text)
280
+
281
+ return Text
282
+
283
+
284
+ def _should_embed_field(field_name: str, field_info: FieldInfo) -> bool:
285
+ """
286
+ Determine if a field should have embeddings generated.
287
+
288
+ Mirrors logic in register_type.should_embed_field().
289
+
290
+ Rules:
291
+ 1. If json_schema_extra.embed = True, always embed
292
+ 2. If json_schema_extra.embed = False, never embed
293
+ 3. If field name in DEFAULT_EMBED_FIELD_NAMES, embed by default
294
+ 4. Otherwise, don't embed
295
+ """
296
+ # Check json_schema_extra for explicit embed configuration
297
+ json_extra = getattr(field_info, "json_schema_extra", None)
298
+ if json_extra and isinstance(json_extra, dict):
299
+ embed = json_extra.get("embed")
300
+ if embed is not None:
301
+ return bool(embed)
302
+
303
+ # Default: embed if field name matches common content fields
304
+ return field_name.lower() in DEFAULT_EMBED_FIELD_NAMES
305
+
306
+
307
+ def _get_embeddable_fields(model: type[BaseModel]) -> list[str]:
308
+ """Get list of field names that should have embeddings."""
309
+ embeddable = []
310
+ for field_name, field_info in model.model_fields.items():
311
+ if field_name in SYSTEM_FIELDS:
312
+ continue
313
+ if _should_embed_field(field_name, field_info):
314
+ embeddable.append(field_name)
315
+ return embeddable
316
+
317
+
318
+ def build_sqlalchemy_metadata_from_pydantic(models_dir: Path | None = None) -> MetaData:
115
319
  """
116
320
  Build SQLAlchemy MetaData from Pydantic models.
117
321
 
118
- This function:
119
- 1. Discovers Pydantic models in the given directory
120
- 2. Infers table names and column definitions
121
- 3. Creates SQLAlchemy Table objects
122
- 4. Returns a MetaData object for Alembic
322
+ This function uses the model registry as the source of truth:
323
+ 1. Core models (Resource, Message, User, etc.) - always included
324
+ 2. User-registered models via rem.register_model() - included if registered
325
+ 3. Embeddings tables for models with embeddable fields
326
+
327
+ The registry ensures only actual entity models are included (not DTOs).
123
328
 
124
329
  Args:
125
- models_dir: Directory containing Pydantic models
330
+ models_dir: Optional, not used (kept for backwards compatibility).
331
+ Models are discovered via the registry, not directory scanning.
126
332
 
127
333
  Returns:
128
334
  SQLAlchemy MetaData object
129
335
  """
336
+ from ...registry import get_model_registry
337
+
130
338
  metadata = MetaData()
131
339
  generator = SchemaGenerator()
340
+ registry = get_model_registry()
341
+
342
+ # Get all registered models (core + user-registered)
343
+ registered_models = registry.get_models(include_core=True)
344
+ logger.info(f"Registry contains {len(registered_models)} models")
345
+
346
+ for model_name, ext in registered_models.items():
347
+ # Use table_name from extension if provided, otherwise infer
348
+ table_name = ext.table_name or generator.infer_table_name(ext.model)
349
+
350
+ # Build primary table
351
+ _build_table(ext.model, table_name, metadata)
132
352
 
133
- # Discover models
134
- models = generator.discover_models(models_dir)
135
- logger.info(f"Discovered {len(models)} models for metadata generation")
136
-
137
- for model_name, model_class in models.items():
138
- # Infer table name
139
- table_name = generator.infer_table_name(model_class)
140
- logger.debug(f"Building table {table_name} from model {model_name}")
141
-
142
- # Build columns
143
- columns = []
144
-
145
- for field_name, field_info in model_class.model_fields.items():
146
- # Get field type
147
- field_type = field_info.annotation
148
-
149
- # Map to SQLAlchemy type
150
- sa_type = pydantic_type_to_sqlalchemy(field_type, field_info)
151
-
152
- # Determine nullable
153
- nullable = not field_info.is_required()
154
-
155
- # Get default value
156
- from pydantic_core import PydanticUndefined
157
-
158
- default = None
159
- if field_info.default is not PydanticUndefined and field_info.default is not None:
160
- default = field_info.default
161
- elif field_info.default_factory is not None:
162
- # For default_factory, we'll use the server default if possible
163
- factory = field_info.default_factory
164
- # Handle common default factories
165
- if factory.__name__ == "list":
166
- default = "ARRAY[]::TEXT[]" # PostgreSQL empty array
167
- elif factory.__name__ == "dict":
168
- default = "'{}'::jsonb" # PostgreSQL empty JSON
169
- else:
170
- default = None
171
-
172
- # Handle special fields
173
- server_default = None
174
- primary_key = False
175
-
176
- if field_name == "id":
177
- primary_key = True
178
- if sa_type == UUID(as_uuid=True):
179
- server_default = "uuid_generate_v4()"
180
- elif field_name in ("created_at", "updated_at"):
181
- server_default = "CURRENT_TIMESTAMP"
182
- elif isinstance(default, str) and default.startswith("ARRAY["):
183
- server_default = default
184
- default = None
185
- elif isinstance(default, str) and "::jsonb" in default:
186
- server_default = default
187
- default = None
188
-
189
- # Create column - only pass server_default if it's a string SQL expression
190
- column_kwargs = {
191
- "type_": sa_type,
192
- "primary_key": primary_key,
193
- "nullable": nullable,
194
- }
195
-
196
- if server_default is not None:
197
- from sqlalchemy import text
198
- column_kwargs["server_default"] = text(server_default)
199
-
200
- column = Column(field_name, **column_kwargs)
201
-
202
- columns.append(column)
203
-
204
- # Create table
205
- if columns:
206
- Table(table_name, metadata, *columns)
207
- logger.debug(f"Created table {table_name} with {len(columns)} columns")
353
+ # Build embeddings table if model has embeddable fields
354
+ embeddable_fields = _get_embeddable_fields(ext.model)
355
+ if embeddable_fields:
356
+ _build_embeddings_table(table_name, metadata)
208
357
 
209
358
  logger.info(f"Built metadata with {len(metadata.tables)} tables")
210
359
  return metadata
211
360
 
212
361
 
213
- def get_target_metadata() -> MetaData:
362
+ def _build_table(model: type[BaseModel], table_name: str, metadata: MetaData) -> Table:
214
363
  """
215
- Get SQLAlchemy metadata for Alembic autogenerate.
364
+ Build SQLAlchemy Table from Pydantic model.
216
365
 
217
- This is the main entry point used by alembic/env.py.
366
+ Mirrors the schema generated by register_type.generate_table_schema().
367
+
368
+ Args:
369
+ model: Pydantic model class
370
+ table_name: Table name
371
+ metadata: SQLAlchemy MetaData to add table to
218
372
 
219
373
  Returns:
220
- SQLAlchemy MetaData object representing current Pydantic models
374
+ SQLAlchemy Table object
221
375
  """
222
- # Find models directory
223
- import rem
376
+ columns = []
377
+ indexes = []
378
+
379
+ # Primary key: id UUID
380
+ columns.append(
381
+ Column(
382
+ "id",
383
+ UUID(as_uuid=True),
384
+ primary_key=True,
385
+ server_default=text("uuid_generate_v4()"),
386
+ )
387
+ )
388
+
389
+ # Tenant and user scoping
390
+ columns.append(Column("tenant_id", String(100), nullable=False))
391
+ columns.append(Column("user_id", String(256), nullable=True))
392
+
393
+ # Process Pydantic fields (skip system fields)
394
+ for field_name, field_info in model.model_fields.items():
395
+ if field_name in SYSTEM_FIELDS:
396
+ continue
397
+
398
+ sa_type = pydantic_type_to_sqlalchemy(field_info, field_name)
399
+ nullable = not field_info.is_required()
400
+
401
+ # Handle default values for JSONB and arrays
402
+ server_default = None
403
+ if field_info.default_factory is not None:
404
+ if isinstance(sa_type, type) and sa_type is JSONB:
405
+ server_default = text("'{}'::jsonb")
406
+ elif isinstance(sa_type, JSONB):
407
+ server_default = text("'{}'::jsonb")
408
+ elif isinstance(sa_type, ARRAY):
409
+ server_default = text("ARRAY[]::TEXT[]")
410
+
411
+ columns.append(
412
+ Column(field_name, sa_type, nullable=nullable, server_default=server_default)
413
+ )
414
+
415
+ # System timestamp fields
416
+ columns.append(Column("created_at", DateTime, server_default=text("CURRENT_TIMESTAMP")))
417
+ columns.append(Column("updated_at", DateTime, server_default=text("CURRENT_TIMESTAMP")))
418
+ columns.append(Column("deleted_at", DateTime, nullable=True))
419
+
420
+ # graph_edges JSONB field
421
+ columns.append(
422
+ Column("graph_edges", JSONB, nullable=True, server_default=text("'[]'::jsonb"))
423
+ )
424
+
425
+ # metadata JSONB field
426
+ columns.append(
427
+ Column("metadata", JSONB, nullable=True, server_default=text("'{}'::jsonb"))
428
+ )
429
+
430
+ # tags TEXT[] field
431
+ columns.append(
432
+ Column("tags", ARRAY(Text), nullable=True, server_default=text("ARRAY[]::TEXT[]"))
433
+ )
434
+
435
+ # Create table
436
+ table = Table(table_name, metadata, *columns)
437
+
438
+ # Add indexes (matching register_type output)
439
+ Index(f"idx_{table_name}_tenant", table.c.tenant_id)
440
+ Index(f"idx_{table_name}_user", table.c.user_id)
441
+ Index(f"idx_{table_name}_graph_edges", table.c.graph_edges, postgresql_using="gin")
442
+ Index(f"idx_{table_name}_metadata", table.c.metadata, postgresql_using="gin")
443
+ Index(f"idx_{table_name}_tags", table.c.tags, postgresql_using="gin")
444
+
445
+ return table
446
+
447
+
448
+ def _build_embeddings_table(base_table_name: str, metadata: MetaData) -> Table:
449
+ """
450
+ Build SQLAlchemy Table for embeddings.
224
451
 
225
- package_root = Path(rem.__file__).parent.parent.parent
226
- models_dir = package_root / "src" / "rem" / "models" / "entities"
452
+ Mirrors the schema generated by register_type.generate_embeddings_schema().
227
453
 
228
- if not models_dir.exists():
229
- logger.error(f"Models directory not found: {models_dir}")
230
- return MetaData()
454
+ Args:
455
+ base_table_name: Name of the primary entity table (e.g., "resources")
456
+ metadata: SQLAlchemy MetaData to add table to
231
457
 
232
- return build_sqlalchemy_metadata_from_pydantic(models_dir)
458
+ Returns:
459
+ SQLAlchemy Table object for embeddings_<base_table_name>
460
+ """
461
+ embeddings_table_name = f"embeddings_{base_table_name}"
462
+
463
+ # Use pgvector Vector type if available, otherwise use a placeholder
464
+ if HAS_PGVECTOR and Vector is not None:
465
+ vector_type = Vector(DEFAULT_EMBEDDING_DIMENSIONS)
466
+ else:
467
+ # Fallback: use raw SQL type via TypeDecorator or just skip
468
+ # For now, we'll log a warning and use a simple column
469
+ logger.warning(
470
+ f"pgvector not installed, embeddings table {embeddings_table_name} "
471
+ "will use ARRAY type instead of vector"
472
+ )
473
+ vector_type = ARRAY(Float)
474
+
475
+ columns = [
476
+ Column(
477
+ "id",
478
+ UUID(as_uuid=True),
479
+ primary_key=True,
480
+ server_default=text("uuid_generate_v4()"),
481
+ ),
482
+ Column(
483
+ "entity_id",
484
+ UUID(as_uuid=True),
485
+ ForeignKey(f"{base_table_name}.id", ondelete="CASCADE"),
486
+ nullable=False,
487
+ ),
488
+ Column("field_name", String(100), nullable=False),
489
+ Column("provider", String(50), nullable=False, server_default=text("'openai'")),
490
+ Column("model", String(100), nullable=False, server_default=text("'text-embedding-3-small'")),
491
+ Column("embedding", vector_type, nullable=False),
492
+ Column("created_at", DateTime, server_default=text("CURRENT_TIMESTAMP")),
493
+ Column("updated_at", DateTime, server_default=text("CURRENT_TIMESTAMP")),
494
+ ]
495
+
496
+ # Create table with unique constraint
497
+ # Truncate constraint name to fit PostgreSQL's 63-char identifier limit
498
+ constraint_name = f"uq_{base_table_name[:30]}_emb_entity_field_prov"
499
+ table = Table(
500
+ embeddings_table_name,
501
+ metadata,
502
+ *columns,
503
+ UniqueConstraint("entity_id", "field_name", "provider", name=constraint_name),
504
+ )
505
+
506
+ # Add indexes (matching register_type output)
507
+ Index(f"idx_{embeddings_table_name}_entity", table.c.entity_id)
508
+ Index(f"idx_{embeddings_table_name}_field_provider", table.c.field_name, table.c.provider)
509
+
510
+ return table
511
+
512
+
513
+ def _import_model_modules() -> list[str]:
514
+ """
515
+ Import modules specified in MODELS__IMPORT_MODULES setting.
516
+
517
+ This ensures downstream models decorated with @rem.register_model
518
+ are registered before schema generation.
519
+
520
+ Returns:
521
+ List of successfully imported module names
522
+ """
523
+ import importlib
524
+ from ...settings import settings
525
+
526
+ imported = []
527
+ for module_name in settings.models.module_list:
528
+ try:
529
+ importlib.import_module(module_name)
530
+ imported.append(module_name)
531
+ logger.debug(f"Imported model module: {module_name}")
532
+ except ImportError as e:
533
+ logger.warning(f"Failed to import model module '{module_name}': {e}")
534
+ return imported
535
+
536
+
537
+ def get_target_metadata() -> MetaData:
538
+ """
539
+ Get SQLAlchemy metadata for Alembic autogenerate.
540
+
541
+ This is the main entry point used by alembic/env.py and rem db diff.
542
+
543
+ Uses the model registry as the source of truth, which includes:
544
+ - Core REM models (Resource, Message, User, etc.)
545
+ - User-registered models via @rem.register_model decorator
546
+
547
+ Before building metadata, imports model modules from settings to ensure
548
+ downstream models are registered. This supports:
549
+ - Auto-detection of ./models directory (convention)
550
+ - MODELS__IMPORT_MODULES env var (explicit configuration)
551
+
552
+ Returns:
553
+ SQLAlchemy MetaData object representing all registered Pydantic models
554
+ """
555
+ # Import model modules first (auto-detects ./models or uses MODELS__IMPORT_MODULES)
556
+ imported = _import_model_modules()
557
+ if imported:
558
+ logger.info(f"Imported model modules: {imported}")
559
+
560
+ # build_sqlalchemy_metadata_from_pydantic uses the registry internally,
561
+ # so no directory path is needed (the parameter is kept for backwards compat)
562
+ return build_sqlalchemy_metadata_from_pydantic()