remdb 0.2.6__py3-none-any.whl → 0.3.118__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of remdb might be problematic. Click here for more details.
- rem/__init__.py +129 -2
- rem/agentic/README.md +76 -0
- rem/agentic/__init__.py +15 -0
- rem/agentic/agents/__init__.py +16 -2
- rem/agentic/agents/sse_simulator.py +500 -0
- rem/agentic/context.py +28 -22
- rem/agentic/llm_provider_models.py +301 -0
- rem/agentic/mcp/tool_wrapper.py +29 -3
- rem/agentic/otel/setup.py +92 -4
- rem/agentic/providers/phoenix.py +32 -43
- rem/agentic/providers/pydantic_ai.py +168 -24
- rem/agentic/schema.py +358 -21
- rem/agentic/tools/rem_tools.py +3 -3
- rem/api/README.md +238 -1
- rem/api/deps.py +255 -0
- rem/api/main.py +154 -37
- rem/api/mcp_router/resources.py +1 -1
- rem/api/mcp_router/server.py +26 -5
- rem/api/mcp_router/tools.py +454 -7
- rem/api/middleware/tracking.py +172 -0
- rem/api/routers/admin.py +494 -0
- rem/api/routers/auth.py +124 -0
- rem/api/routers/chat/completions.py +152 -16
- rem/api/routers/chat/models.py +7 -3
- rem/api/routers/chat/sse_events.py +526 -0
- rem/api/routers/chat/streaming.py +608 -45
- rem/api/routers/dev.py +81 -0
- rem/api/routers/feedback.py +148 -0
- rem/api/routers/messages.py +473 -0
- rem/api/routers/models.py +78 -0
- rem/api/routers/query.py +360 -0
- rem/api/routers/shared_sessions.py +406 -0
- rem/auth/middleware.py +126 -27
- rem/cli/commands/README.md +237 -64
- rem/cli/commands/ask.py +15 -11
- rem/cli/commands/cluster.py +1300 -0
- rem/cli/commands/configure.py +170 -97
- rem/cli/commands/db.py +396 -139
- rem/cli/commands/experiments.py +278 -96
- rem/cli/commands/process.py +22 -15
- rem/cli/commands/scaffold.py +47 -0
- rem/cli/commands/schema.py +97 -50
- rem/cli/main.py +37 -6
- rem/config.py +2 -2
- rem/models/core/core_model.py +7 -1
- rem/models/core/rem_query.py +5 -2
- rem/models/entities/__init__.py +21 -0
- rem/models/entities/domain_resource.py +38 -0
- rem/models/entities/feedback.py +123 -0
- rem/models/entities/message.py +30 -1
- rem/models/entities/session.py +83 -0
- rem/models/entities/shared_session.py +180 -0
- rem/models/entities/user.py +10 -3
- rem/registry.py +373 -0
- rem/schemas/agents/rem.yaml +7 -3
- rem/services/content/providers.py +94 -140
- rem/services/content/service.py +115 -24
- rem/services/dreaming/affinity_service.py +2 -16
- rem/services/dreaming/moment_service.py +2 -15
- rem/services/embeddings/api.py +24 -17
- rem/services/embeddings/worker.py +16 -16
- rem/services/phoenix/EXPERIMENT_DESIGN.md +3 -3
- rem/services/phoenix/client.py +252 -19
- rem/services/postgres/README.md +159 -15
- rem/services/postgres/__init__.py +2 -1
- rem/services/postgres/diff_service.py +531 -0
- rem/services/postgres/pydantic_to_sqlalchemy.py +427 -129
- rem/services/postgres/repository.py +132 -0
- rem/services/postgres/schema_generator.py +291 -9
- rem/services/postgres/service.py +6 -6
- rem/services/rate_limit.py +113 -0
- rem/services/rem/README.md +14 -0
- rem/services/rem/parser.py +44 -9
- rem/services/rem/service.py +36 -2
- rem/services/session/compression.py +17 -1
- rem/services/session/reload.py +1 -1
- rem/services/user_service.py +98 -0
- rem/settings.py +169 -22
- rem/sql/background_indexes.sql +21 -16
- rem/sql/migrations/001_install.sql +387 -54
- rem/sql/migrations/002_install_models.sql +2320 -393
- rem/sql/migrations/003_optional_extensions.sql +326 -0
- rem/sql/migrations/004_cache_system.sql +548 -0
- rem/utils/__init__.py +18 -0
- rem/utils/constants.py +97 -0
- rem/utils/date_utils.py +228 -0
- rem/utils/embeddings.py +17 -4
- rem/utils/files.py +167 -0
- rem/utils/mime_types.py +158 -0
- rem/utils/model_helpers.py +156 -1
- rem/utils/schema_loader.py +284 -21
- rem/utils/sql_paths.py +146 -0
- rem/utils/sql_types.py +3 -1
- rem/utils/vision.py +9 -14
- rem/workers/README.md +14 -14
- rem/workers/__init__.py +2 -1
- rem/workers/db_maintainer.py +74 -0
- rem/workers/unlogged_maintainer.py +463 -0
- {remdb-0.2.6.dist-info → remdb-0.3.118.dist-info}/METADATA +598 -171
- {remdb-0.2.6.dist-info → remdb-0.3.118.dist-info}/RECORD +102 -73
- {remdb-0.2.6.dist-info → remdb-0.3.118.dist-info}/WHEEL +1 -1
- rem/sql/002_install_models.sql +0 -1068
- rem/sql/install_models.sql +0 -1038
- {remdb-0.2.6.dist-info → remdb-0.3.118.dist-info}/entry_points.txt +0 -0
|
@@ -3,213 +3,512 @@ Convert Pydantic models to SQLAlchemy metadata for Alembic autogenerate.
|
|
|
3
3
|
|
|
4
4
|
This module bridges REM's Pydantic-first approach with Alembic's SQLAlchemy requirement
|
|
5
5
|
by dynamically building SQLAlchemy Table objects from Pydantic model definitions.
|
|
6
|
+
|
|
7
|
+
IMPORTANT: Type mappings here MUST stay in sync with utils/sql_types.py
|
|
8
|
+
to ensure the diff command produces accurate results.
|
|
6
9
|
"""
|
|
7
10
|
|
|
11
|
+
import types
|
|
12
|
+
from datetime import date, datetime, time
|
|
8
13
|
from pathlib import Path
|
|
9
|
-
from typing import Any
|
|
14
|
+
from typing import Any, Union, get_args, get_origin
|
|
15
|
+
from uuid import UUID as UUIDType
|
|
10
16
|
|
|
11
17
|
from loguru import logger
|
|
12
18
|
from pydantic import BaseModel
|
|
19
|
+
from pydantic.fields import FieldInfo
|
|
13
20
|
from sqlalchemy import (
|
|
14
|
-
JSON,
|
|
15
21
|
Boolean,
|
|
16
22
|
Column,
|
|
23
|
+
Date,
|
|
17
24
|
DateTime,
|
|
18
25
|
Float,
|
|
26
|
+
ForeignKey,
|
|
27
|
+
Index,
|
|
19
28
|
Integer,
|
|
29
|
+
LargeBinary,
|
|
20
30
|
MetaData,
|
|
21
31
|
String,
|
|
22
32
|
Table,
|
|
23
33
|
Text,
|
|
34
|
+
Time,
|
|
35
|
+
UniqueConstraint,
|
|
36
|
+
text,
|
|
24
37
|
)
|
|
25
38
|
from sqlalchemy.dialects.postgresql import ARRAY, JSONB, UUID
|
|
26
39
|
|
|
40
|
+
# Import pgvector type for embeddings
|
|
41
|
+
try:
|
|
42
|
+
from pgvector.sqlalchemy import Vector
|
|
43
|
+
HAS_PGVECTOR = True
|
|
44
|
+
except ImportError:
|
|
45
|
+
HAS_PGVECTOR = False
|
|
46
|
+
Vector = None
|
|
47
|
+
|
|
27
48
|
from .schema_generator import SchemaGenerator
|
|
28
49
|
|
|
29
50
|
|
|
51
|
+
# Field names that should use TEXT instead of VARCHAR (sync with sql_types.py)
|
|
52
|
+
LONG_TEXT_FIELD_NAMES = {
|
|
53
|
+
"content",
|
|
54
|
+
"description",
|
|
55
|
+
"summary",
|
|
56
|
+
"instructions",
|
|
57
|
+
"prompt",
|
|
58
|
+
"message",
|
|
59
|
+
"body",
|
|
60
|
+
"text",
|
|
61
|
+
"note",
|
|
62
|
+
"comment",
|
|
63
|
+
}
|
|
64
|
+
|
|
65
|
+
# System fields handled separately by schema generation
|
|
66
|
+
SYSTEM_FIELDS = {
|
|
67
|
+
"id", "created_at", "updated_at", "deleted_at",
|
|
68
|
+
"tenant_id", "user_id", "graph_edges", "metadata", "tags",
|
|
69
|
+
}
|
|
70
|
+
|
|
71
|
+
# Fields that get embeddings by default (sync with register_type.py)
|
|
72
|
+
DEFAULT_EMBED_FIELD_NAMES = {
|
|
73
|
+
"content",
|
|
74
|
+
"description",
|
|
75
|
+
"summary",
|
|
76
|
+
"text",
|
|
77
|
+
"body",
|
|
78
|
+
"message",
|
|
79
|
+
"notes",
|
|
80
|
+
}
|
|
81
|
+
|
|
82
|
+
# Embedding configuration (sync with register_type.py)
|
|
83
|
+
DEFAULT_EMBEDDING_DIMENSIONS = 1536
|
|
84
|
+
|
|
85
|
+
|
|
30
86
|
def pydantic_type_to_sqlalchemy(
|
|
31
|
-
|
|
87
|
+
field_info: FieldInfo,
|
|
88
|
+
field_name: str,
|
|
32
89
|
) -> Any:
|
|
33
90
|
"""
|
|
34
|
-
Map Pydantic field
|
|
91
|
+
Map Pydantic field to SQLAlchemy column type.
|
|
92
|
+
|
|
93
|
+
This function mirrors the logic in utils/sql_types.py to ensure
|
|
94
|
+
consistent type mapping between schema generation and diff detection.
|
|
35
95
|
|
|
36
96
|
Args:
|
|
37
|
-
field_type: Pydantic field type annotation
|
|
38
97
|
field_info: Pydantic FieldInfo object
|
|
98
|
+
field_name: Name of the field (used for heuristics)
|
|
39
99
|
|
|
40
100
|
Returns:
|
|
41
101
|
SQLAlchemy column type
|
|
42
102
|
"""
|
|
43
|
-
#
|
|
44
|
-
|
|
103
|
+
# Check for explicit sql_type in json_schema_extra
|
|
104
|
+
if field_info.json_schema_extra:
|
|
105
|
+
if isinstance(field_info.json_schema_extra, dict):
|
|
106
|
+
sql_type = field_info.json_schema_extra.get("sql_type")
|
|
107
|
+
if sql_type:
|
|
108
|
+
return _sql_string_to_sqlalchemy(sql_type)
|
|
109
|
+
|
|
110
|
+
# Fields with embedding_provider should be TEXT
|
|
111
|
+
if "embedding_provider" in field_info.json_schema_extra:
|
|
112
|
+
return Text
|
|
113
|
+
|
|
114
|
+
annotation = field_info.annotation
|
|
115
|
+
|
|
116
|
+
# Handle None annotation
|
|
117
|
+
if annotation is None:
|
|
118
|
+
return Text
|
|
119
|
+
|
|
120
|
+
# Handle Union types (including Optional[T] and Python 3.10+ X | None)
|
|
121
|
+
origin = get_origin(annotation)
|
|
122
|
+
if origin is Union or isinstance(annotation, types.UnionType):
|
|
123
|
+
args = get_args(annotation)
|
|
124
|
+
# Filter out NoneType
|
|
125
|
+
non_none_args = [arg for arg in args if arg is not type(None)]
|
|
126
|
+
|
|
127
|
+
if not non_none_args:
|
|
128
|
+
return Text
|
|
129
|
+
|
|
130
|
+
# Prefer UUID over other types in unions
|
|
131
|
+
if UUIDType in non_none_args:
|
|
132
|
+
return UUID(as_uuid=True)
|
|
133
|
+
|
|
134
|
+
# Prefer dict/JSONB over other types in unions
|
|
135
|
+
if dict in non_none_args:
|
|
136
|
+
return JSONB
|
|
137
|
+
|
|
138
|
+
# Use the first non-None type
|
|
139
|
+
return _map_simple_type(non_none_args[0], field_name)
|
|
140
|
+
|
|
141
|
+
return _map_simple_type(annotation, field_name)
|
|
142
|
+
|
|
143
|
+
|
|
144
|
+
def _map_simple_type(python_type: type, field_name: str) -> Any:
|
|
145
|
+
"""
|
|
146
|
+
Map a simple Python type to SQLAlchemy column type.
|
|
45
147
|
|
|
46
|
-
|
|
47
|
-
|
|
148
|
+
Args:
|
|
149
|
+
python_type: Python type annotation
|
|
150
|
+
field_name: Field name for heuristics
|
|
48
151
|
|
|
49
|
-
|
|
50
|
-
|
|
51
|
-
|
|
52
|
-
|
|
53
|
-
|
|
54
|
-
field_type = non_none_types[0]
|
|
55
|
-
origin = typing.get_origin(field_type)
|
|
56
|
-
args = typing.get_args(field_type)
|
|
152
|
+
Returns:
|
|
153
|
+
SQLAlchemy column type
|
|
154
|
+
"""
|
|
155
|
+
origin = get_origin(python_type)
|
|
156
|
+
args = get_args(python_type)
|
|
57
157
|
|
|
58
|
-
# Handle list types
|
|
158
|
+
# Handle list types
|
|
59
159
|
if origin is list:
|
|
60
160
|
if args:
|
|
61
161
|
inner_type = args[0]
|
|
162
|
+
|
|
163
|
+
# List of strings -> PostgreSQL TEXT[]
|
|
62
164
|
if inner_type is str:
|
|
63
165
|
return ARRAY(Text)
|
|
64
|
-
|
|
65
|
-
|
|
66
|
-
|
|
67
|
-
return
|
|
68
|
-
|
|
166
|
+
|
|
167
|
+
# List of dicts or complex types -> JSONB
|
|
168
|
+
if inner_type is dict or get_origin(inner_type) is not None:
|
|
169
|
+
return JSONB
|
|
170
|
+
|
|
171
|
+
# List of primitives -> JSONB
|
|
172
|
+
return JSONB
|
|
173
|
+
|
|
174
|
+
# Untyped list -> JSONB
|
|
175
|
+
return JSONB
|
|
69
176
|
|
|
70
177
|
# Handle dict types -> JSONB
|
|
71
|
-
if origin is dict or
|
|
178
|
+
if origin is dict or python_type is dict:
|
|
72
179
|
return JSONB
|
|
73
180
|
|
|
74
|
-
# Handle
|
|
75
|
-
if
|
|
76
|
-
|
|
77
|
-
max_length = getattr(field_info, "max_length", None)
|
|
78
|
-
if max_length:
|
|
79
|
-
return String(max_length)
|
|
80
|
-
return Text
|
|
181
|
+
# Handle primitive types
|
|
182
|
+
if python_type is str:
|
|
183
|
+
return _get_string_type(field_name)
|
|
81
184
|
|
|
82
|
-
if
|
|
185
|
+
if python_type is int:
|
|
83
186
|
return Integer
|
|
84
187
|
|
|
85
|
-
if
|
|
188
|
+
if python_type is float:
|
|
86
189
|
return Float
|
|
87
190
|
|
|
88
|
-
if
|
|
191
|
+
if python_type is bool:
|
|
89
192
|
return Boolean
|
|
90
193
|
|
|
91
|
-
|
|
92
|
-
|
|
194
|
+
if python_type is UUIDType:
|
|
195
|
+
return UUID(as_uuid=True)
|
|
93
196
|
|
|
94
|
-
if
|
|
197
|
+
if python_type is datetime:
|
|
95
198
|
return DateTime
|
|
96
199
|
|
|
97
|
-
|
|
98
|
-
|
|
200
|
+
if python_type is date:
|
|
201
|
+
return Date
|
|
99
202
|
|
|
100
|
-
if
|
|
101
|
-
return
|
|
203
|
+
if python_type is time:
|
|
204
|
+
return Time
|
|
102
205
|
|
|
103
|
-
|
|
104
|
-
|
|
206
|
+
if python_type is bytes:
|
|
207
|
+
return LargeBinary
|
|
105
208
|
|
|
106
|
-
if
|
|
107
|
-
|
|
209
|
+
# Check if it's a Pydantic model -> JSONB
|
|
210
|
+
if isinstance(python_type, type) and issubclass(python_type, BaseModel):
|
|
211
|
+
return JSONB
|
|
108
212
|
|
|
109
213
|
# Default to Text for unknown types
|
|
110
|
-
logger.warning(f"Unknown field type {field_type}, defaulting to Text")
|
|
111
214
|
return Text
|
|
112
215
|
|
|
113
216
|
|
|
114
|
-
def
|
|
217
|
+
def _get_string_type(field_name: str) -> Any:
|
|
218
|
+
"""
|
|
219
|
+
Determine string type based on field name.
|
|
220
|
+
|
|
221
|
+
Args:
|
|
222
|
+
field_name: Name of the field
|
|
223
|
+
|
|
224
|
+
Returns:
|
|
225
|
+
Text for long-form content, String(256) for others
|
|
226
|
+
"""
|
|
227
|
+
field_lower = field_name.lower()
|
|
228
|
+
|
|
229
|
+
if field_lower in LONG_TEXT_FIELD_NAMES:
|
|
230
|
+
return Text
|
|
231
|
+
|
|
232
|
+
# Check for common suffixes
|
|
233
|
+
if field_lower.endswith(("_content", "_description", "_summary", "_text", "_message")):
|
|
234
|
+
return Text
|
|
235
|
+
|
|
236
|
+
return String(256)
|
|
237
|
+
|
|
238
|
+
|
|
239
|
+
def _sql_string_to_sqlalchemy(sql_type: str) -> Any:
|
|
240
|
+
"""
|
|
241
|
+
Convert SQL type string to SQLAlchemy type.
|
|
242
|
+
|
|
243
|
+
Args:
|
|
244
|
+
sql_type: PostgreSQL type string (e.g., "VARCHAR(256)", "JSONB")
|
|
245
|
+
|
|
246
|
+
Returns:
|
|
247
|
+
SQLAlchemy column type
|
|
248
|
+
"""
|
|
249
|
+
sql_upper = sql_type.upper()
|
|
250
|
+
|
|
251
|
+
if sql_upper == "TEXT":
|
|
252
|
+
return Text
|
|
253
|
+
if sql_upper == "JSONB" or sql_upper == "JSON":
|
|
254
|
+
return JSONB
|
|
255
|
+
if sql_upper == "UUID":
|
|
256
|
+
return UUID(as_uuid=True)
|
|
257
|
+
if sql_upper == "INTEGER" or sql_upper == "INT":
|
|
258
|
+
return Integer
|
|
259
|
+
if sql_upper == "BOOLEAN" or sql_upper == "BOOL":
|
|
260
|
+
return Boolean
|
|
261
|
+
if sql_upper == "TIMESTAMP":
|
|
262
|
+
return DateTime
|
|
263
|
+
if sql_upper == "DATE":
|
|
264
|
+
return Date
|
|
265
|
+
if sql_upper == "TIME":
|
|
266
|
+
return Time
|
|
267
|
+
if sql_upper == "DOUBLE PRECISION" or sql_upper == "FLOAT":
|
|
268
|
+
return Float
|
|
269
|
+
if sql_upper == "BYTEA":
|
|
270
|
+
return LargeBinary
|
|
271
|
+
if sql_upper.startswith("VARCHAR"):
|
|
272
|
+
# Extract length from VARCHAR(n)
|
|
273
|
+
import re
|
|
274
|
+
match = re.match(r"VARCHAR\((\d+)\)", sql_upper)
|
|
275
|
+
if match:
|
|
276
|
+
return String(int(match.group(1)))
|
|
277
|
+
return String(256)
|
|
278
|
+
if sql_upper == "TEXT[]":
|
|
279
|
+
return ARRAY(Text)
|
|
280
|
+
|
|
281
|
+
return Text
|
|
282
|
+
|
|
283
|
+
|
|
284
|
+
def _should_embed_field(field_name: str, field_info: FieldInfo) -> bool:
|
|
285
|
+
"""
|
|
286
|
+
Determine if a field should have embeddings generated.
|
|
287
|
+
|
|
288
|
+
Mirrors logic in register_type.should_embed_field().
|
|
289
|
+
|
|
290
|
+
Rules:
|
|
291
|
+
1. If json_schema_extra.embed = True, always embed
|
|
292
|
+
2. If json_schema_extra.embed = False, never embed
|
|
293
|
+
3. If field name in DEFAULT_EMBED_FIELD_NAMES, embed by default
|
|
294
|
+
4. Otherwise, don't embed
|
|
295
|
+
"""
|
|
296
|
+
# Check json_schema_extra for explicit embed configuration
|
|
297
|
+
json_extra = getattr(field_info, "json_schema_extra", None)
|
|
298
|
+
if json_extra and isinstance(json_extra, dict):
|
|
299
|
+
embed = json_extra.get("embed")
|
|
300
|
+
if embed is not None:
|
|
301
|
+
return bool(embed)
|
|
302
|
+
|
|
303
|
+
# Default: embed if field name matches common content fields
|
|
304
|
+
return field_name.lower() in DEFAULT_EMBED_FIELD_NAMES
|
|
305
|
+
|
|
306
|
+
|
|
307
|
+
def _get_embeddable_fields(model: type[BaseModel]) -> list[str]:
|
|
308
|
+
"""Get list of field names that should have embeddings."""
|
|
309
|
+
embeddable = []
|
|
310
|
+
for field_name, field_info in model.model_fields.items():
|
|
311
|
+
if field_name in SYSTEM_FIELDS:
|
|
312
|
+
continue
|
|
313
|
+
if _should_embed_field(field_name, field_info):
|
|
314
|
+
embeddable.append(field_name)
|
|
315
|
+
return embeddable
|
|
316
|
+
|
|
317
|
+
|
|
318
|
+
def build_sqlalchemy_metadata_from_pydantic(models_dir: Path | None = None) -> MetaData:
|
|
115
319
|
"""
|
|
116
320
|
Build SQLAlchemy MetaData from Pydantic models.
|
|
117
321
|
|
|
118
|
-
This function:
|
|
119
|
-
1.
|
|
120
|
-
2.
|
|
121
|
-
3.
|
|
122
|
-
|
|
322
|
+
This function uses the model registry as the source of truth:
|
|
323
|
+
1. Core models (Resource, Message, User, etc.) - always included
|
|
324
|
+
2. User-registered models via rem.register_model() - included if registered
|
|
325
|
+
3. Embeddings tables for models with embeddable fields
|
|
326
|
+
|
|
327
|
+
The registry ensures only actual entity models are included (not DTOs).
|
|
123
328
|
|
|
124
329
|
Args:
|
|
125
|
-
models_dir:
|
|
330
|
+
models_dir: Optional, not used (kept for backwards compatibility).
|
|
331
|
+
Models are discovered via the registry, not directory scanning.
|
|
126
332
|
|
|
127
333
|
Returns:
|
|
128
334
|
SQLAlchemy MetaData object
|
|
129
335
|
"""
|
|
336
|
+
from ...registry import get_model_registry
|
|
337
|
+
|
|
130
338
|
metadata = MetaData()
|
|
131
339
|
generator = SchemaGenerator()
|
|
340
|
+
registry = get_model_registry()
|
|
341
|
+
|
|
342
|
+
# Get all registered models (core + user-registered)
|
|
343
|
+
registered_models = registry.get_models(include_core=True)
|
|
344
|
+
logger.info(f"Registry contains {len(registered_models)} models")
|
|
345
|
+
|
|
346
|
+
for model_name, ext in registered_models.items():
|
|
347
|
+
# Use table_name from extension if provided, otherwise infer
|
|
348
|
+
table_name = ext.table_name or generator.infer_table_name(ext.model)
|
|
132
349
|
|
|
133
|
-
|
|
134
|
-
|
|
135
|
-
|
|
136
|
-
|
|
137
|
-
|
|
138
|
-
|
|
139
|
-
|
|
140
|
-
logger.debug(f"Building table {table_name} from model {model_name}")
|
|
141
|
-
|
|
142
|
-
# Build columns
|
|
143
|
-
columns = []
|
|
144
|
-
|
|
145
|
-
for field_name, field_info in model_class.model_fields.items():
|
|
146
|
-
# Get field type
|
|
147
|
-
field_type = field_info.annotation
|
|
148
|
-
|
|
149
|
-
# Map to SQLAlchemy type
|
|
150
|
-
sa_type = pydantic_type_to_sqlalchemy(field_type, field_info)
|
|
151
|
-
|
|
152
|
-
# Determine nullable
|
|
153
|
-
nullable = not field_info.is_required()
|
|
154
|
-
|
|
155
|
-
# Get default value
|
|
156
|
-
from pydantic_core import PydanticUndefined
|
|
157
|
-
|
|
158
|
-
default = None
|
|
159
|
-
if field_info.default is not PydanticUndefined and field_info.default is not None:
|
|
160
|
-
default = field_info.default
|
|
161
|
-
elif field_info.default_factory is not None:
|
|
162
|
-
# For default_factory, we'll use the server default if possible
|
|
163
|
-
factory = field_info.default_factory
|
|
164
|
-
# Handle common default factories
|
|
165
|
-
if factory.__name__ == "list":
|
|
166
|
-
default = "ARRAY[]::TEXT[]" # PostgreSQL empty array
|
|
167
|
-
elif factory.__name__ == "dict":
|
|
168
|
-
default = "'{}'::jsonb" # PostgreSQL empty JSON
|
|
169
|
-
else:
|
|
170
|
-
default = None
|
|
171
|
-
|
|
172
|
-
# Handle special fields
|
|
173
|
-
server_default = None
|
|
174
|
-
primary_key = False
|
|
175
|
-
|
|
176
|
-
if field_name == "id":
|
|
177
|
-
primary_key = True
|
|
178
|
-
if sa_type == UUID(as_uuid=True):
|
|
179
|
-
server_default = "uuid_generate_v4()"
|
|
180
|
-
elif field_name in ("created_at", "updated_at"):
|
|
181
|
-
server_default = "CURRENT_TIMESTAMP"
|
|
182
|
-
elif isinstance(default, str) and default.startswith("ARRAY["):
|
|
183
|
-
server_default = default
|
|
184
|
-
default = None
|
|
185
|
-
elif isinstance(default, str) and "::jsonb" in default:
|
|
186
|
-
server_default = default
|
|
187
|
-
default = None
|
|
188
|
-
|
|
189
|
-
# Create column - only pass server_default if it's a string SQL expression
|
|
190
|
-
column_kwargs = {
|
|
191
|
-
"type_": sa_type,
|
|
192
|
-
"primary_key": primary_key,
|
|
193
|
-
"nullable": nullable,
|
|
194
|
-
}
|
|
195
|
-
|
|
196
|
-
if server_default is not None:
|
|
197
|
-
from sqlalchemy import text
|
|
198
|
-
column_kwargs["server_default"] = text(server_default)
|
|
199
|
-
|
|
200
|
-
column = Column(field_name, **column_kwargs)
|
|
201
|
-
|
|
202
|
-
columns.append(column)
|
|
203
|
-
|
|
204
|
-
# Create table
|
|
205
|
-
if columns:
|
|
206
|
-
Table(table_name, metadata, *columns)
|
|
207
|
-
logger.debug(f"Created table {table_name} with {len(columns)} columns")
|
|
350
|
+
# Build primary table
|
|
351
|
+
_build_table(ext.model, table_name, metadata)
|
|
352
|
+
|
|
353
|
+
# Build embeddings table if model has embeddable fields
|
|
354
|
+
embeddable_fields = _get_embeddable_fields(ext.model)
|
|
355
|
+
if embeddable_fields:
|
|
356
|
+
_build_embeddings_table(table_name, metadata)
|
|
208
357
|
|
|
209
358
|
logger.info(f"Built metadata with {len(metadata.tables)} tables")
|
|
210
359
|
return metadata
|
|
211
360
|
|
|
212
361
|
|
|
362
|
+
def _build_table(model: type[BaseModel], table_name: str, metadata: MetaData) -> Table:
|
|
363
|
+
"""
|
|
364
|
+
Build SQLAlchemy Table from Pydantic model.
|
|
365
|
+
|
|
366
|
+
Mirrors the schema generated by register_type.generate_table_schema().
|
|
367
|
+
|
|
368
|
+
Args:
|
|
369
|
+
model: Pydantic model class
|
|
370
|
+
table_name: Table name
|
|
371
|
+
metadata: SQLAlchemy MetaData to add table to
|
|
372
|
+
|
|
373
|
+
Returns:
|
|
374
|
+
SQLAlchemy Table object
|
|
375
|
+
"""
|
|
376
|
+
columns = []
|
|
377
|
+
indexes = []
|
|
378
|
+
|
|
379
|
+
# Primary key: id UUID
|
|
380
|
+
columns.append(
|
|
381
|
+
Column(
|
|
382
|
+
"id",
|
|
383
|
+
UUID(as_uuid=True),
|
|
384
|
+
primary_key=True,
|
|
385
|
+
server_default=text("uuid_generate_v4()"),
|
|
386
|
+
)
|
|
387
|
+
)
|
|
388
|
+
|
|
389
|
+
# Tenant and user scoping
|
|
390
|
+
columns.append(Column("tenant_id", String(100), nullable=False))
|
|
391
|
+
columns.append(Column("user_id", String(256), nullable=True))
|
|
392
|
+
|
|
393
|
+
# Process Pydantic fields (skip system fields)
|
|
394
|
+
for field_name, field_info in model.model_fields.items():
|
|
395
|
+
if field_name in SYSTEM_FIELDS:
|
|
396
|
+
continue
|
|
397
|
+
|
|
398
|
+
sa_type = pydantic_type_to_sqlalchemy(field_info, field_name)
|
|
399
|
+
nullable = not field_info.is_required()
|
|
400
|
+
|
|
401
|
+
# Handle default values for JSONB and arrays
|
|
402
|
+
server_default = None
|
|
403
|
+
if field_info.default_factory is not None:
|
|
404
|
+
if isinstance(sa_type, type) and sa_type is JSONB:
|
|
405
|
+
server_default = text("'{}'::jsonb")
|
|
406
|
+
elif isinstance(sa_type, JSONB):
|
|
407
|
+
server_default = text("'{}'::jsonb")
|
|
408
|
+
elif isinstance(sa_type, ARRAY):
|
|
409
|
+
server_default = text("ARRAY[]::TEXT[]")
|
|
410
|
+
|
|
411
|
+
columns.append(
|
|
412
|
+
Column(field_name, sa_type, nullable=nullable, server_default=server_default)
|
|
413
|
+
)
|
|
414
|
+
|
|
415
|
+
# System timestamp fields
|
|
416
|
+
columns.append(Column("created_at", DateTime, server_default=text("CURRENT_TIMESTAMP")))
|
|
417
|
+
columns.append(Column("updated_at", DateTime, server_default=text("CURRENT_TIMESTAMP")))
|
|
418
|
+
columns.append(Column("deleted_at", DateTime, nullable=True))
|
|
419
|
+
|
|
420
|
+
# graph_edges JSONB field
|
|
421
|
+
columns.append(
|
|
422
|
+
Column("graph_edges", JSONB, nullable=True, server_default=text("'[]'::jsonb"))
|
|
423
|
+
)
|
|
424
|
+
|
|
425
|
+
# metadata JSONB field
|
|
426
|
+
columns.append(
|
|
427
|
+
Column("metadata", JSONB, nullable=True, server_default=text("'{}'::jsonb"))
|
|
428
|
+
)
|
|
429
|
+
|
|
430
|
+
# tags TEXT[] field
|
|
431
|
+
columns.append(
|
|
432
|
+
Column("tags", ARRAY(Text), nullable=True, server_default=text("ARRAY[]::TEXT[]"))
|
|
433
|
+
)
|
|
434
|
+
|
|
435
|
+
# Create table
|
|
436
|
+
table = Table(table_name, metadata, *columns)
|
|
437
|
+
|
|
438
|
+
# Add indexes (matching register_type output)
|
|
439
|
+
Index(f"idx_{table_name}_tenant", table.c.tenant_id)
|
|
440
|
+
Index(f"idx_{table_name}_user", table.c.user_id)
|
|
441
|
+
Index(f"idx_{table_name}_graph_edges", table.c.graph_edges, postgresql_using="gin")
|
|
442
|
+
Index(f"idx_{table_name}_metadata", table.c.metadata, postgresql_using="gin")
|
|
443
|
+
Index(f"idx_{table_name}_tags", table.c.tags, postgresql_using="gin")
|
|
444
|
+
|
|
445
|
+
return table
|
|
446
|
+
|
|
447
|
+
|
|
448
|
+
def _build_embeddings_table(base_table_name: str, metadata: MetaData) -> Table:
|
|
449
|
+
"""
|
|
450
|
+
Build SQLAlchemy Table for embeddings.
|
|
451
|
+
|
|
452
|
+
Mirrors the schema generated by register_type.generate_embeddings_schema().
|
|
453
|
+
|
|
454
|
+
Args:
|
|
455
|
+
base_table_name: Name of the primary entity table (e.g., "resources")
|
|
456
|
+
metadata: SQLAlchemy MetaData to add table to
|
|
457
|
+
|
|
458
|
+
Returns:
|
|
459
|
+
SQLAlchemy Table object for embeddings_<base_table_name>
|
|
460
|
+
"""
|
|
461
|
+
embeddings_table_name = f"embeddings_{base_table_name}"
|
|
462
|
+
|
|
463
|
+
# Use pgvector Vector type if available, otherwise use a placeholder
|
|
464
|
+
if HAS_PGVECTOR and Vector is not None:
|
|
465
|
+
vector_type = Vector(DEFAULT_EMBEDDING_DIMENSIONS)
|
|
466
|
+
else:
|
|
467
|
+
# Fallback: use raw SQL type via TypeDecorator or just skip
|
|
468
|
+
# For now, we'll log a warning and use a simple column
|
|
469
|
+
logger.warning(
|
|
470
|
+
f"pgvector not installed, embeddings table {embeddings_table_name} "
|
|
471
|
+
"will use ARRAY type instead of vector"
|
|
472
|
+
)
|
|
473
|
+
vector_type = ARRAY(Float)
|
|
474
|
+
|
|
475
|
+
columns = [
|
|
476
|
+
Column(
|
|
477
|
+
"id",
|
|
478
|
+
UUID(as_uuid=True),
|
|
479
|
+
primary_key=True,
|
|
480
|
+
server_default=text("uuid_generate_v4()"),
|
|
481
|
+
),
|
|
482
|
+
Column(
|
|
483
|
+
"entity_id",
|
|
484
|
+
UUID(as_uuid=True),
|
|
485
|
+
ForeignKey(f"{base_table_name}.id", ondelete="CASCADE"),
|
|
486
|
+
nullable=False,
|
|
487
|
+
),
|
|
488
|
+
Column("field_name", String(100), nullable=False),
|
|
489
|
+
Column("provider", String(50), nullable=False, server_default=text("'openai'")),
|
|
490
|
+
Column("model", String(100), nullable=False, server_default=text("'text-embedding-3-small'")),
|
|
491
|
+
Column("embedding", vector_type, nullable=False),
|
|
492
|
+
Column("created_at", DateTime, server_default=text("CURRENT_TIMESTAMP")),
|
|
493
|
+
Column("updated_at", DateTime, server_default=text("CURRENT_TIMESTAMP")),
|
|
494
|
+
]
|
|
495
|
+
|
|
496
|
+
# Create table with unique constraint
|
|
497
|
+
# Note: constraint name matches PostgreSQL's auto-generated naming convention
|
|
498
|
+
table = Table(
|
|
499
|
+
embeddings_table_name,
|
|
500
|
+
metadata,
|
|
501
|
+
*columns,
|
|
502
|
+
UniqueConstraint("entity_id", "field_name", "provider", name=f"{embeddings_table_name}_entity_id_field_name_provider_key"),
|
|
503
|
+
)
|
|
504
|
+
|
|
505
|
+
# Add indexes (matching register_type output)
|
|
506
|
+
Index(f"idx_{embeddings_table_name}_entity", table.c.entity_id)
|
|
507
|
+
Index(f"idx_{embeddings_table_name}_field_provider", table.c.field_name, table.c.provider)
|
|
508
|
+
|
|
509
|
+
return table
|
|
510
|
+
|
|
511
|
+
|
|
213
512
|
def get_target_metadata() -> MetaData:
|
|
214
513
|
"""
|
|
215
514
|
Get SQLAlchemy metadata for Alembic autogenerate.
|
|
@@ -219,7 +518,6 @@ def get_target_metadata() -> MetaData:
|
|
|
219
518
|
Returns:
|
|
220
519
|
SQLAlchemy MetaData object representing current Pydantic models
|
|
221
520
|
"""
|
|
222
|
-
# Find models directory
|
|
223
521
|
import rem
|
|
224
522
|
|
|
225
523
|
package_root = Path(rem.__file__).parent.parent.parent
|