remdb 0.3.242__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of remdb might be problematic. Click here for more details.
- rem/__init__.py +129 -0
- rem/agentic/README.md +760 -0
- rem/agentic/__init__.py +54 -0
- rem/agentic/agents/README.md +155 -0
- rem/agentic/agents/__init__.py +38 -0
- rem/agentic/agents/agent_manager.py +311 -0
- rem/agentic/agents/sse_simulator.py +502 -0
- rem/agentic/context.py +425 -0
- rem/agentic/context_builder.py +360 -0
- rem/agentic/llm_provider_models.py +301 -0
- rem/agentic/mcp/__init__.py +0 -0
- rem/agentic/mcp/tool_wrapper.py +273 -0
- rem/agentic/otel/__init__.py +5 -0
- rem/agentic/otel/setup.py +240 -0
- rem/agentic/providers/phoenix.py +926 -0
- rem/agentic/providers/pydantic_ai.py +854 -0
- rem/agentic/query.py +117 -0
- rem/agentic/query_helper.py +89 -0
- rem/agentic/schema.py +737 -0
- rem/agentic/serialization.py +245 -0
- rem/agentic/tools/__init__.py +5 -0
- rem/agentic/tools/rem_tools.py +242 -0
- rem/api/README.md +657 -0
- rem/api/deps.py +253 -0
- rem/api/main.py +460 -0
- rem/api/mcp_router/prompts.py +182 -0
- rem/api/mcp_router/resources.py +820 -0
- rem/api/mcp_router/server.py +243 -0
- rem/api/mcp_router/tools.py +1605 -0
- rem/api/middleware/tracking.py +172 -0
- rem/api/routers/admin.py +520 -0
- rem/api/routers/auth.py +898 -0
- rem/api/routers/chat/__init__.py +5 -0
- rem/api/routers/chat/child_streaming.py +394 -0
- rem/api/routers/chat/completions.py +702 -0
- rem/api/routers/chat/json_utils.py +76 -0
- rem/api/routers/chat/models.py +202 -0
- rem/api/routers/chat/otel_utils.py +33 -0
- rem/api/routers/chat/sse_events.py +546 -0
- rem/api/routers/chat/streaming.py +950 -0
- rem/api/routers/chat/streaming_utils.py +327 -0
- rem/api/routers/common.py +18 -0
- rem/api/routers/dev.py +87 -0
- rem/api/routers/feedback.py +276 -0
- rem/api/routers/messages.py +620 -0
- rem/api/routers/models.py +86 -0
- rem/api/routers/query.py +362 -0
- rem/api/routers/shared_sessions.py +422 -0
- rem/auth/README.md +258 -0
- rem/auth/__init__.py +36 -0
- rem/auth/jwt.py +367 -0
- rem/auth/middleware.py +318 -0
- rem/auth/providers/__init__.py +16 -0
- rem/auth/providers/base.py +376 -0
- rem/auth/providers/email.py +215 -0
- rem/auth/providers/google.py +163 -0
- rem/auth/providers/microsoft.py +237 -0
- rem/cli/README.md +517 -0
- rem/cli/__init__.py +8 -0
- rem/cli/commands/README.md +299 -0
- rem/cli/commands/__init__.py +3 -0
- rem/cli/commands/ask.py +549 -0
- rem/cli/commands/cluster.py +1808 -0
- rem/cli/commands/configure.py +495 -0
- rem/cli/commands/db.py +828 -0
- rem/cli/commands/dreaming.py +324 -0
- rem/cli/commands/experiments.py +1698 -0
- rem/cli/commands/mcp.py +66 -0
- rem/cli/commands/process.py +388 -0
- rem/cli/commands/query.py +109 -0
- rem/cli/commands/scaffold.py +47 -0
- rem/cli/commands/schema.py +230 -0
- rem/cli/commands/serve.py +106 -0
- rem/cli/commands/session.py +453 -0
- rem/cli/dreaming.py +363 -0
- rem/cli/main.py +123 -0
- rem/config.py +244 -0
- rem/mcp_server.py +41 -0
- rem/models/core/__init__.py +49 -0
- rem/models/core/core_model.py +70 -0
- rem/models/core/engram.py +333 -0
- rem/models/core/experiment.py +672 -0
- rem/models/core/inline_edge.py +132 -0
- rem/models/core/rem_query.py +246 -0
- rem/models/entities/__init__.py +68 -0
- rem/models/entities/domain_resource.py +38 -0
- rem/models/entities/feedback.py +123 -0
- rem/models/entities/file.py +57 -0
- rem/models/entities/image_resource.py +88 -0
- rem/models/entities/message.py +64 -0
- rem/models/entities/moment.py +123 -0
- rem/models/entities/ontology.py +181 -0
- rem/models/entities/ontology_config.py +131 -0
- rem/models/entities/resource.py +95 -0
- rem/models/entities/schema.py +87 -0
- rem/models/entities/session.py +84 -0
- rem/models/entities/shared_session.py +180 -0
- rem/models/entities/subscriber.py +175 -0
- rem/models/entities/user.py +93 -0
- rem/py.typed +0 -0
- rem/registry.py +373 -0
- rem/schemas/README.md +507 -0
- rem/schemas/__init__.py +6 -0
- rem/schemas/agents/README.md +92 -0
- rem/schemas/agents/core/agent-builder.yaml +235 -0
- rem/schemas/agents/core/moment-builder.yaml +178 -0
- rem/schemas/agents/core/rem-query-agent.yaml +226 -0
- rem/schemas/agents/core/resource-affinity-assessor.yaml +99 -0
- rem/schemas/agents/core/simple-assistant.yaml +19 -0
- rem/schemas/agents/core/user-profile-builder.yaml +163 -0
- rem/schemas/agents/examples/contract-analyzer.yaml +317 -0
- rem/schemas/agents/examples/contract-extractor.yaml +134 -0
- rem/schemas/agents/examples/cv-parser.yaml +263 -0
- rem/schemas/agents/examples/hello-world.yaml +37 -0
- rem/schemas/agents/examples/query.yaml +54 -0
- rem/schemas/agents/examples/simple.yaml +21 -0
- rem/schemas/agents/examples/test.yaml +29 -0
- rem/schemas/agents/rem.yaml +132 -0
- rem/schemas/evaluators/hello-world/default.yaml +77 -0
- rem/schemas/evaluators/rem/faithfulness.yaml +219 -0
- rem/schemas/evaluators/rem/lookup-correctness.yaml +182 -0
- rem/schemas/evaluators/rem/retrieval-precision.yaml +199 -0
- rem/schemas/evaluators/rem/retrieval-recall.yaml +211 -0
- rem/schemas/evaluators/rem/search-correctness.yaml +192 -0
- rem/services/__init__.py +18 -0
- rem/services/audio/INTEGRATION.md +308 -0
- rem/services/audio/README.md +376 -0
- rem/services/audio/__init__.py +15 -0
- rem/services/audio/chunker.py +354 -0
- rem/services/audio/transcriber.py +259 -0
- rem/services/content/README.md +1269 -0
- rem/services/content/__init__.py +5 -0
- rem/services/content/providers.py +760 -0
- rem/services/content/service.py +762 -0
- rem/services/dreaming/README.md +230 -0
- rem/services/dreaming/__init__.py +53 -0
- rem/services/dreaming/affinity_service.py +322 -0
- rem/services/dreaming/moment_service.py +251 -0
- rem/services/dreaming/ontology_service.py +54 -0
- rem/services/dreaming/user_model_service.py +297 -0
- rem/services/dreaming/utils.py +39 -0
- rem/services/email/__init__.py +10 -0
- rem/services/email/service.py +522 -0
- rem/services/email/templates.py +360 -0
- rem/services/embeddings/__init__.py +11 -0
- rem/services/embeddings/api.py +127 -0
- rem/services/embeddings/worker.py +435 -0
- rem/services/fs/README.md +662 -0
- rem/services/fs/__init__.py +62 -0
- rem/services/fs/examples.py +206 -0
- rem/services/fs/examples_paths.py +204 -0
- rem/services/fs/git_provider.py +935 -0
- rem/services/fs/local_provider.py +760 -0
- rem/services/fs/parsing-hooks-examples.md +172 -0
- rem/services/fs/paths.py +276 -0
- rem/services/fs/provider.py +460 -0
- rem/services/fs/s3_provider.py +1042 -0
- rem/services/fs/service.py +186 -0
- rem/services/git/README.md +1075 -0
- rem/services/git/__init__.py +17 -0
- rem/services/git/service.py +469 -0
- rem/services/phoenix/EXPERIMENT_DESIGN.md +1146 -0
- rem/services/phoenix/README.md +453 -0
- rem/services/phoenix/__init__.py +46 -0
- rem/services/phoenix/client.py +960 -0
- rem/services/phoenix/config.py +88 -0
- rem/services/phoenix/prompt_labels.py +477 -0
- rem/services/postgres/README.md +757 -0
- rem/services/postgres/__init__.py +49 -0
- rem/services/postgres/diff_service.py +599 -0
- rem/services/postgres/migration_service.py +427 -0
- rem/services/postgres/programmable_diff_service.py +635 -0
- rem/services/postgres/pydantic_to_sqlalchemy.py +562 -0
- rem/services/postgres/register_type.py +353 -0
- rem/services/postgres/repository.py +481 -0
- rem/services/postgres/schema_generator.py +661 -0
- rem/services/postgres/service.py +802 -0
- rem/services/postgres/sql_builder.py +355 -0
- rem/services/rate_limit.py +113 -0
- rem/services/rem/README.md +318 -0
- rem/services/rem/__init__.py +23 -0
- rem/services/rem/exceptions.py +71 -0
- rem/services/rem/executor.py +293 -0
- rem/services/rem/parser.py +180 -0
- rem/services/rem/queries.py +196 -0
- rem/services/rem/query.py +371 -0
- rem/services/rem/service.py +608 -0
- rem/services/session/README.md +374 -0
- rem/services/session/__init__.py +13 -0
- rem/services/session/compression.py +488 -0
- rem/services/session/pydantic_messages.py +310 -0
- rem/services/session/reload.py +85 -0
- rem/services/user_service.py +130 -0
- rem/settings.py +1877 -0
- rem/sql/background_indexes.sql +52 -0
- rem/sql/migrations/001_install.sql +983 -0
- rem/sql/migrations/002_install_models.sql +3157 -0
- rem/sql/migrations/003_optional_extensions.sql +326 -0
- rem/sql/migrations/004_cache_system.sql +282 -0
- rem/sql/migrations/005_schema_update.sql +145 -0
- rem/sql/migrations/migrate_session_id_to_uuid.sql +45 -0
- rem/utils/AGENTIC_CHUNKING.md +597 -0
- rem/utils/README.md +628 -0
- rem/utils/__init__.py +61 -0
- rem/utils/agentic_chunking.py +622 -0
- rem/utils/batch_ops.py +343 -0
- rem/utils/chunking.py +108 -0
- rem/utils/clip_embeddings.py +276 -0
- rem/utils/constants.py +97 -0
- rem/utils/date_utils.py +228 -0
- rem/utils/dict_utils.py +98 -0
- rem/utils/embeddings.py +436 -0
- rem/utils/examples/embeddings_example.py +305 -0
- rem/utils/examples/sql_types_example.py +202 -0
- rem/utils/files.py +323 -0
- rem/utils/markdown.py +16 -0
- rem/utils/mime_types.py +158 -0
- rem/utils/model_helpers.py +492 -0
- rem/utils/schema_loader.py +649 -0
- rem/utils/sql_paths.py +146 -0
- rem/utils/sql_types.py +350 -0
- rem/utils/user_id.py +81 -0
- rem/utils/vision.py +325 -0
- rem/workers/README.md +506 -0
- rem/workers/__init__.py +7 -0
- rem/workers/db_listener.py +579 -0
- rem/workers/db_maintainer.py +74 -0
- rem/workers/dreaming.py +502 -0
- rem/workers/engram_processor.py +312 -0
- rem/workers/sqs_file_processor.py +193 -0
- rem/workers/unlogged_maintainer.py +463 -0
- remdb-0.3.242.dist-info/METADATA +1632 -0
- remdb-0.3.242.dist-info/RECORD +235 -0
- remdb-0.3.242.dist-info/WHEEL +4 -0
- remdb-0.3.242.dist-info/entry_points.txt +2 -0
|
@@ -0,0 +1,562 @@
|
|
|
1
|
+
"""
|
|
2
|
+
Convert Pydantic models to SQLAlchemy metadata for Alembic autogenerate.
|
|
3
|
+
|
|
4
|
+
This module bridges REM's Pydantic-first approach with Alembic's SQLAlchemy requirement
|
|
5
|
+
by dynamically building SQLAlchemy Table objects from Pydantic model definitions.
|
|
6
|
+
|
|
7
|
+
IMPORTANT: Type mappings here MUST stay in sync with utils/sql_types.py
|
|
8
|
+
to ensure the diff command produces accurate results.
|
|
9
|
+
"""
|
|
10
|
+
|
|
11
|
+
import types
|
|
12
|
+
from datetime import date, datetime, time
|
|
13
|
+
from pathlib import Path
|
|
14
|
+
from typing import Any, Union, get_args, get_origin
|
|
15
|
+
from uuid import UUID as UUIDType
|
|
16
|
+
|
|
17
|
+
from loguru import logger
|
|
18
|
+
from pydantic import BaseModel
|
|
19
|
+
from pydantic.fields import FieldInfo
|
|
20
|
+
from sqlalchemy import (
|
|
21
|
+
Boolean,
|
|
22
|
+
Column,
|
|
23
|
+
Date,
|
|
24
|
+
DateTime,
|
|
25
|
+
Float,
|
|
26
|
+
ForeignKey,
|
|
27
|
+
Index,
|
|
28
|
+
Integer,
|
|
29
|
+
LargeBinary,
|
|
30
|
+
MetaData,
|
|
31
|
+
String,
|
|
32
|
+
Table,
|
|
33
|
+
Text,
|
|
34
|
+
Time,
|
|
35
|
+
UniqueConstraint,
|
|
36
|
+
text,
|
|
37
|
+
)
|
|
38
|
+
from sqlalchemy.dialects.postgresql import ARRAY, JSONB, UUID
|
|
39
|
+
|
|
40
|
+
# Import pgvector type for embeddings
|
|
41
|
+
try:
|
|
42
|
+
from pgvector.sqlalchemy import Vector
|
|
43
|
+
HAS_PGVECTOR = True
|
|
44
|
+
except ImportError:
|
|
45
|
+
HAS_PGVECTOR = False
|
|
46
|
+
Vector = None
|
|
47
|
+
|
|
48
|
+
from .schema_generator import SchemaGenerator
|
|
49
|
+
|
|
50
|
+
|
|
51
|
+
# Field names that should use TEXT instead of VARCHAR (sync with sql_types.py)
|
|
52
|
+
LONG_TEXT_FIELD_NAMES = {
|
|
53
|
+
"content",
|
|
54
|
+
"description",
|
|
55
|
+
"summary",
|
|
56
|
+
"instructions",
|
|
57
|
+
"prompt",
|
|
58
|
+
"message",
|
|
59
|
+
"body",
|
|
60
|
+
"text",
|
|
61
|
+
"note",
|
|
62
|
+
"comment",
|
|
63
|
+
}
|
|
64
|
+
|
|
65
|
+
# System fields handled separately by schema generation
|
|
66
|
+
SYSTEM_FIELDS = {
|
|
67
|
+
"id", "created_at", "updated_at", "deleted_at",
|
|
68
|
+
"tenant_id", "user_id", "graph_edges", "metadata", "tags",
|
|
69
|
+
}
|
|
70
|
+
|
|
71
|
+
# Fields that get embeddings by default (sync with register_type.py)
|
|
72
|
+
DEFAULT_EMBED_FIELD_NAMES = {
|
|
73
|
+
"content",
|
|
74
|
+
"description",
|
|
75
|
+
"summary",
|
|
76
|
+
"text",
|
|
77
|
+
"body",
|
|
78
|
+
"message",
|
|
79
|
+
"notes",
|
|
80
|
+
}
|
|
81
|
+
|
|
82
|
+
# Embedding configuration (sync with register_type.py)
|
|
83
|
+
DEFAULT_EMBEDDING_DIMENSIONS = 1536
|
|
84
|
+
|
|
85
|
+
|
|
86
|
+
def pydantic_type_to_sqlalchemy(
|
|
87
|
+
field_info: FieldInfo,
|
|
88
|
+
field_name: str,
|
|
89
|
+
) -> Any:
|
|
90
|
+
"""
|
|
91
|
+
Map Pydantic field to SQLAlchemy column type.
|
|
92
|
+
|
|
93
|
+
This function mirrors the logic in utils/sql_types.py to ensure
|
|
94
|
+
consistent type mapping between schema generation and diff detection.
|
|
95
|
+
|
|
96
|
+
Args:
|
|
97
|
+
field_info: Pydantic FieldInfo object
|
|
98
|
+
field_name: Name of the field (used for heuristics)
|
|
99
|
+
|
|
100
|
+
Returns:
|
|
101
|
+
SQLAlchemy column type
|
|
102
|
+
"""
|
|
103
|
+
# Check for explicit sql_type in json_schema_extra
|
|
104
|
+
if field_info.json_schema_extra:
|
|
105
|
+
if isinstance(field_info.json_schema_extra, dict):
|
|
106
|
+
sql_type = field_info.json_schema_extra.get("sql_type")
|
|
107
|
+
if sql_type:
|
|
108
|
+
return _sql_string_to_sqlalchemy(sql_type)
|
|
109
|
+
|
|
110
|
+
# Fields with embedding_provider should be TEXT
|
|
111
|
+
if "embedding_provider" in field_info.json_schema_extra:
|
|
112
|
+
return Text
|
|
113
|
+
|
|
114
|
+
annotation = field_info.annotation
|
|
115
|
+
|
|
116
|
+
# Handle None annotation
|
|
117
|
+
if annotation is None:
|
|
118
|
+
return Text
|
|
119
|
+
|
|
120
|
+
# Handle Union types (including Optional[T] and Python 3.10+ X | None)
|
|
121
|
+
origin = get_origin(annotation)
|
|
122
|
+
if origin is Union or isinstance(annotation, types.UnionType):
|
|
123
|
+
args = get_args(annotation)
|
|
124
|
+
# Filter out NoneType
|
|
125
|
+
non_none_args = [arg for arg in args if arg is not type(None)]
|
|
126
|
+
|
|
127
|
+
if not non_none_args:
|
|
128
|
+
return Text
|
|
129
|
+
|
|
130
|
+
# Prefer UUID over other types in unions
|
|
131
|
+
if UUIDType in non_none_args:
|
|
132
|
+
return UUID(as_uuid=True)
|
|
133
|
+
|
|
134
|
+
# Prefer dict/JSONB over other types in unions
|
|
135
|
+
if dict in non_none_args:
|
|
136
|
+
return JSONB
|
|
137
|
+
|
|
138
|
+
# Use the first non-None type
|
|
139
|
+
return _map_simple_type(non_none_args[0], field_name)
|
|
140
|
+
|
|
141
|
+
return _map_simple_type(annotation, field_name)
|
|
142
|
+
|
|
143
|
+
|
|
144
|
+
def _map_simple_type(python_type: type, field_name: str) -> Any:
|
|
145
|
+
"""
|
|
146
|
+
Map a simple Python type to SQLAlchemy column type.
|
|
147
|
+
|
|
148
|
+
Args:
|
|
149
|
+
python_type: Python type annotation
|
|
150
|
+
field_name: Field name for heuristics
|
|
151
|
+
|
|
152
|
+
Returns:
|
|
153
|
+
SQLAlchemy column type
|
|
154
|
+
"""
|
|
155
|
+
origin = get_origin(python_type)
|
|
156
|
+
args = get_args(python_type)
|
|
157
|
+
|
|
158
|
+
# Handle list types
|
|
159
|
+
if origin is list:
|
|
160
|
+
if args:
|
|
161
|
+
inner_type = args[0]
|
|
162
|
+
|
|
163
|
+
# List of strings -> PostgreSQL TEXT[]
|
|
164
|
+
if inner_type is str:
|
|
165
|
+
return ARRAY(Text)
|
|
166
|
+
|
|
167
|
+
# List of dicts or complex types -> JSONB
|
|
168
|
+
if inner_type is dict or get_origin(inner_type) is not None:
|
|
169
|
+
return JSONB
|
|
170
|
+
|
|
171
|
+
# List of primitives -> JSONB
|
|
172
|
+
return JSONB
|
|
173
|
+
|
|
174
|
+
# Untyped list -> JSONB
|
|
175
|
+
return JSONB
|
|
176
|
+
|
|
177
|
+
# Handle dict types -> JSONB
|
|
178
|
+
if origin is dict or python_type is dict:
|
|
179
|
+
return JSONB
|
|
180
|
+
|
|
181
|
+
# Handle primitive types
|
|
182
|
+
if python_type is str:
|
|
183
|
+
return _get_string_type(field_name)
|
|
184
|
+
|
|
185
|
+
if python_type is int:
|
|
186
|
+
return Integer
|
|
187
|
+
|
|
188
|
+
if python_type is float:
|
|
189
|
+
return Float
|
|
190
|
+
|
|
191
|
+
if python_type is bool:
|
|
192
|
+
return Boolean
|
|
193
|
+
|
|
194
|
+
if python_type is UUIDType:
|
|
195
|
+
return UUID(as_uuid=True)
|
|
196
|
+
|
|
197
|
+
if python_type is datetime:
|
|
198
|
+
return DateTime
|
|
199
|
+
|
|
200
|
+
if python_type is date:
|
|
201
|
+
return Date
|
|
202
|
+
|
|
203
|
+
if python_type is time:
|
|
204
|
+
return Time
|
|
205
|
+
|
|
206
|
+
if python_type is bytes:
|
|
207
|
+
return LargeBinary
|
|
208
|
+
|
|
209
|
+
# Check if it's a Pydantic model -> JSONB
|
|
210
|
+
if isinstance(python_type, type) and issubclass(python_type, BaseModel):
|
|
211
|
+
return JSONB
|
|
212
|
+
|
|
213
|
+
# Default to Text for unknown types
|
|
214
|
+
return Text
|
|
215
|
+
|
|
216
|
+
|
|
217
|
+
def _get_string_type(field_name: str) -> Any:
|
|
218
|
+
"""
|
|
219
|
+
Determine string type based on field name.
|
|
220
|
+
|
|
221
|
+
Args:
|
|
222
|
+
field_name: Name of the field
|
|
223
|
+
|
|
224
|
+
Returns:
|
|
225
|
+
Text for long-form content, String(256) for others
|
|
226
|
+
"""
|
|
227
|
+
field_lower = field_name.lower()
|
|
228
|
+
|
|
229
|
+
if field_lower in LONG_TEXT_FIELD_NAMES:
|
|
230
|
+
return Text
|
|
231
|
+
|
|
232
|
+
# Check for common suffixes
|
|
233
|
+
if field_lower.endswith(("_content", "_description", "_summary", "_text", "_message")):
|
|
234
|
+
return Text
|
|
235
|
+
|
|
236
|
+
return String(256)
|
|
237
|
+
|
|
238
|
+
|
|
239
|
+
def _sql_string_to_sqlalchemy(sql_type: str) -> Any:
|
|
240
|
+
"""
|
|
241
|
+
Convert SQL type string to SQLAlchemy type.
|
|
242
|
+
|
|
243
|
+
Args:
|
|
244
|
+
sql_type: PostgreSQL type string (e.g., "VARCHAR(256)", "JSONB")
|
|
245
|
+
|
|
246
|
+
Returns:
|
|
247
|
+
SQLAlchemy column type
|
|
248
|
+
"""
|
|
249
|
+
sql_upper = sql_type.upper()
|
|
250
|
+
|
|
251
|
+
if sql_upper == "TEXT":
|
|
252
|
+
return Text
|
|
253
|
+
if sql_upper == "JSONB" or sql_upper == "JSON":
|
|
254
|
+
return JSONB
|
|
255
|
+
if sql_upper == "UUID":
|
|
256
|
+
return UUID(as_uuid=True)
|
|
257
|
+
if sql_upper == "INTEGER" or sql_upper == "INT":
|
|
258
|
+
return Integer
|
|
259
|
+
if sql_upper == "BOOLEAN" or sql_upper == "BOOL":
|
|
260
|
+
return Boolean
|
|
261
|
+
if sql_upper == "TIMESTAMP":
|
|
262
|
+
return DateTime
|
|
263
|
+
if sql_upper == "DATE":
|
|
264
|
+
return Date
|
|
265
|
+
if sql_upper == "TIME":
|
|
266
|
+
return Time
|
|
267
|
+
if sql_upper == "DOUBLE PRECISION" or sql_upper == "FLOAT":
|
|
268
|
+
return Float
|
|
269
|
+
if sql_upper == "BYTEA":
|
|
270
|
+
return LargeBinary
|
|
271
|
+
if sql_upper.startswith("VARCHAR"):
|
|
272
|
+
# Extract length from VARCHAR(n)
|
|
273
|
+
import re
|
|
274
|
+
match = re.match(r"VARCHAR\((\d+)\)", sql_upper)
|
|
275
|
+
if match:
|
|
276
|
+
return String(int(match.group(1)))
|
|
277
|
+
return String(256)
|
|
278
|
+
if sql_upper == "TEXT[]":
|
|
279
|
+
return ARRAY(Text)
|
|
280
|
+
|
|
281
|
+
return Text
|
|
282
|
+
|
|
283
|
+
|
|
284
|
+
def _should_embed_field(field_name: str, field_info: FieldInfo) -> bool:
|
|
285
|
+
"""
|
|
286
|
+
Determine if a field should have embeddings generated.
|
|
287
|
+
|
|
288
|
+
Mirrors logic in register_type.should_embed_field().
|
|
289
|
+
|
|
290
|
+
Rules:
|
|
291
|
+
1. If json_schema_extra.embed = True, always embed
|
|
292
|
+
2. If json_schema_extra.embed = False, never embed
|
|
293
|
+
3. If field name in DEFAULT_EMBED_FIELD_NAMES, embed by default
|
|
294
|
+
4. Otherwise, don't embed
|
|
295
|
+
"""
|
|
296
|
+
# Check json_schema_extra for explicit embed configuration
|
|
297
|
+
json_extra = getattr(field_info, "json_schema_extra", None)
|
|
298
|
+
if json_extra and isinstance(json_extra, dict):
|
|
299
|
+
embed = json_extra.get("embed")
|
|
300
|
+
if embed is not None:
|
|
301
|
+
return bool(embed)
|
|
302
|
+
|
|
303
|
+
# Default: embed if field name matches common content fields
|
|
304
|
+
return field_name.lower() in DEFAULT_EMBED_FIELD_NAMES
|
|
305
|
+
|
|
306
|
+
|
|
307
|
+
def _get_embeddable_fields(model: type[BaseModel]) -> list[str]:
|
|
308
|
+
"""Get list of field names that should have embeddings."""
|
|
309
|
+
embeddable = []
|
|
310
|
+
for field_name, field_info in model.model_fields.items():
|
|
311
|
+
if field_name in SYSTEM_FIELDS:
|
|
312
|
+
continue
|
|
313
|
+
if _should_embed_field(field_name, field_info):
|
|
314
|
+
embeddable.append(field_name)
|
|
315
|
+
return embeddable
|
|
316
|
+
|
|
317
|
+
|
|
318
|
+
def build_sqlalchemy_metadata_from_pydantic(models_dir: Path | None = None) -> MetaData:
|
|
319
|
+
"""
|
|
320
|
+
Build SQLAlchemy MetaData from Pydantic models.
|
|
321
|
+
|
|
322
|
+
This function uses the model registry as the source of truth:
|
|
323
|
+
1. Core models (Resource, Message, User, etc.) - always included
|
|
324
|
+
2. User-registered models via rem.register_model() - included if registered
|
|
325
|
+
3. Embeddings tables for models with embeddable fields
|
|
326
|
+
|
|
327
|
+
The registry ensures only actual entity models are included (not DTOs).
|
|
328
|
+
|
|
329
|
+
Args:
|
|
330
|
+
models_dir: Optional, not used (kept for backwards compatibility).
|
|
331
|
+
Models are discovered via the registry, not directory scanning.
|
|
332
|
+
|
|
333
|
+
Returns:
|
|
334
|
+
SQLAlchemy MetaData object
|
|
335
|
+
"""
|
|
336
|
+
from ...registry import get_model_registry
|
|
337
|
+
|
|
338
|
+
metadata = MetaData()
|
|
339
|
+
generator = SchemaGenerator()
|
|
340
|
+
registry = get_model_registry()
|
|
341
|
+
|
|
342
|
+
# Get all registered models (core + user-registered)
|
|
343
|
+
registered_models = registry.get_models(include_core=True)
|
|
344
|
+
logger.info(f"Registry contains {len(registered_models)} models")
|
|
345
|
+
|
|
346
|
+
for model_name, ext in registered_models.items():
|
|
347
|
+
# Use table_name from extension if provided, otherwise infer
|
|
348
|
+
table_name = ext.table_name or generator.infer_table_name(ext.model)
|
|
349
|
+
|
|
350
|
+
# Build primary table
|
|
351
|
+
_build_table(ext.model, table_name, metadata)
|
|
352
|
+
|
|
353
|
+
# Build embeddings table if model has embeddable fields
|
|
354
|
+
embeddable_fields = _get_embeddable_fields(ext.model)
|
|
355
|
+
if embeddable_fields:
|
|
356
|
+
_build_embeddings_table(table_name, metadata)
|
|
357
|
+
|
|
358
|
+
logger.info(f"Built metadata with {len(metadata.tables)} tables")
|
|
359
|
+
return metadata
|
|
360
|
+
|
|
361
|
+
|
|
362
|
+
def _build_table(model: type[BaseModel], table_name: str, metadata: MetaData) -> Table:
|
|
363
|
+
"""
|
|
364
|
+
Build SQLAlchemy Table from Pydantic model.
|
|
365
|
+
|
|
366
|
+
Mirrors the schema generated by register_type.generate_table_schema().
|
|
367
|
+
|
|
368
|
+
Args:
|
|
369
|
+
model: Pydantic model class
|
|
370
|
+
table_name: Table name
|
|
371
|
+
metadata: SQLAlchemy MetaData to add table to
|
|
372
|
+
|
|
373
|
+
Returns:
|
|
374
|
+
SQLAlchemy Table object
|
|
375
|
+
"""
|
|
376
|
+
columns = []
|
|
377
|
+
indexes = []
|
|
378
|
+
|
|
379
|
+
# Primary key: id UUID
|
|
380
|
+
columns.append(
|
|
381
|
+
Column(
|
|
382
|
+
"id",
|
|
383
|
+
UUID(as_uuid=True),
|
|
384
|
+
primary_key=True,
|
|
385
|
+
server_default=text("uuid_generate_v4()"),
|
|
386
|
+
)
|
|
387
|
+
)
|
|
388
|
+
|
|
389
|
+
# Tenant and user scoping (tenant_id nullable - NULL means public/shared)
|
|
390
|
+
columns.append(Column("tenant_id", String(100), nullable=True))
|
|
391
|
+
columns.append(Column("user_id", String(256), nullable=True))
|
|
392
|
+
|
|
393
|
+
# Process Pydantic fields (skip system fields)
|
|
394
|
+
for field_name, field_info in model.model_fields.items():
|
|
395
|
+
if field_name in SYSTEM_FIELDS:
|
|
396
|
+
continue
|
|
397
|
+
|
|
398
|
+
sa_type = pydantic_type_to_sqlalchemy(field_info, field_name)
|
|
399
|
+
nullable = not field_info.is_required()
|
|
400
|
+
|
|
401
|
+
# Handle default values for JSONB and arrays
|
|
402
|
+
server_default = None
|
|
403
|
+
if field_info.default_factory is not None:
|
|
404
|
+
if isinstance(sa_type, type) and sa_type is JSONB:
|
|
405
|
+
server_default = text("'{}'::jsonb")
|
|
406
|
+
elif isinstance(sa_type, JSONB):
|
|
407
|
+
server_default = text("'{}'::jsonb")
|
|
408
|
+
elif isinstance(sa_type, ARRAY):
|
|
409
|
+
server_default = text("ARRAY[]::TEXT[]")
|
|
410
|
+
|
|
411
|
+
columns.append(
|
|
412
|
+
Column(field_name, sa_type, nullable=nullable, server_default=server_default)
|
|
413
|
+
)
|
|
414
|
+
|
|
415
|
+
# System timestamp fields
|
|
416
|
+
columns.append(Column("created_at", DateTime, server_default=text("CURRENT_TIMESTAMP")))
|
|
417
|
+
columns.append(Column("updated_at", DateTime, server_default=text("CURRENT_TIMESTAMP")))
|
|
418
|
+
columns.append(Column("deleted_at", DateTime, nullable=True))
|
|
419
|
+
|
|
420
|
+
# graph_edges JSONB field
|
|
421
|
+
columns.append(
|
|
422
|
+
Column("graph_edges", JSONB, nullable=True, server_default=text("'[]'::jsonb"))
|
|
423
|
+
)
|
|
424
|
+
|
|
425
|
+
# metadata JSONB field
|
|
426
|
+
columns.append(
|
|
427
|
+
Column("metadata", JSONB, nullable=True, server_default=text("'{}'::jsonb"))
|
|
428
|
+
)
|
|
429
|
+
|
|
430
|
+
# tags TEXT[] field
|
|
431
|
+
columns.append(
|
|
432
|
+
Column("tags", ARRAY(Text), nullable=True, server_default=text("ARRAY[]::TEXT[]"))
|
|
433
|
+
)
|
|
434
|
+
|
|
435
|
+
# Create table
|
|
436
|
+
table = Table(table_name, metadata, *columns)
|
|
437
|
+
|
|
438
|
+
# Add indexes (matching register_type output)
|
|
439
|
+
Index(f"idx_{table_name}_tenant", table.c.tenant_id)
|
|
440
|
+
Index(f"idx_{table_name}_user", table.c.user_id)
|
|
441
|
+
Index(f"idx_{table_name}_graph_edges", table.c.graph_edges, postgresql_using="gin")
|
|
442
|
+
Index(f"idx_{table_name}_metadata", table.c.metadata, postgresql_using="gin")
|
|
443
|
+
Index(f"idx_{table_name}_tags", table.c.tags, postgresql_using="gin")
|
|
444
|
+
|
|
445
|
+
return table
|
|
446
|
+
|
|
447
|
+
|
|
448
|
+
def _build_embeddings_table(base_table_name: str, metadata: MetaData) -> Table:
|
|
449
|
+
"""
|
|
450
|
+
Build SQLAlchemy Table for embeddings.
|
|
451
|
+
|
|
452
|
+
Mirrors the schema generated by register_type.generate_embeddings_schema().
|
|
453
|
+
|
|
454
|
+
Args:
|
|
455
|
+
base_table_name: Name of the primary entity table (e.g., "resources")
|
|
456
|
+
metadata: SQLAlchemy MetaData to add table to
|
|
457
|
+
|
|
458
|
+
Returns:
|
|
459
|
+
SQLAlchemy Table object for embeddings_<base_table_name>
|
|
460
|
+
"""
|
|
461
|
+
embeddings_table_name = f"embeddings_{base_table_name}"
|
|
462
|
+
|
|
463
|
+
# Use pgvector Vector type if available, otherwise use a placeholder
|
|
464
|
+
if HAS_PGVECTOR and Vector is not None:
|
|
465
|
+
vector_type = Vector(DEFAULT_EMBEDDING_DIMENSIONS)
|
|
466
|
+
else:
|
|
467
|
+
# Fallback: use raw SQL type via TypeDecorator or just skip
|
|
468
|
+
# For now, we'll log a warning and use a simple column
|
|
469
|
+
logger.warning(
|
|
470
|
+
f"pgvector not installed, embeddings table {embeddings_table_name} "
|
|
471
|
+
"will use ARRAY type instead of vector"
|
|
472
|
+
)
|
|
473
|
+
vector_type = ARRAY(Float)
|
|
474
|
+
|
|
475
|
+
columns = [
|
|
476
|
+
Column(
|
|
477
|
+
"id",
|
|
478
|
+
UUID(as_uuid=True),
|
|
479
|
+
primary_key=True,
|
|
480
|
+
server_default=text("uuid_generate_v4()"),
|
|
481
|
+
),
|
|
482
|
+
Column(
|
|
483
|
+
"entity_id",
|
|
484
|
+
UUID(as_uuid=True),
|
|
485
|
+
ForeignKey(f"{base_table_name}.id", ondelete="CASCADE"),
|
|
486
|
+
nullable=False,
|
|
487
|
+
),
|
|
488
|
+
Column("field_name", String(100), nullable=False),
|
|
489
|
+
Column("provider", String(50), nullable=False, server_default=text("'openai'")),
|
|
490
|
+
Column("model", String(100), nullable=False, server_default=text("'text-embedding-3-small'")),
|
|
491
|
+
Column("embedding", vector_type, nullable=False),
|
|
492
|
+
Column("created_at", DateTime, server_default=text("CURRENT_TIMESTAMP")),
|
|
493
|
+
Column("updated_at", DateTime, server_default=text("CURRENT_TIMESTAMP")),
|
|
494
|
+
]
|
|
495
|
+
|
|
496
|
+
# Create table with unique constraint
|
|
497
|
+
# Truncate constraint name to fit PostgreSQL's 63-char identifier limit
|
|
498
|
+
constraint_name = f"uq_{base_table_name[:30]}_emb_entity_field_prov"
|
|
499
|
+
table = Table(
|
|
500
|
+
embeddings_table_name,
|
|
501
|
+
metadata,
|
|
502
|
+
*columns,
|
|
503
|
+
UniqueConstraint("entity_id", "field_name", "provider", name=constraint_name),
|
|
504
|
+
)
|
|
505
|
+
|
|
506
|
+
# Add indexes (matching register_type output)
|
|
507
|
+
Index(f"idx_{embeddings_table_name}_entity", table.c.entity_id)
|
|
508
|
+
Index(f"idx_{embeddings_table_name}_field_provider", table.c.field_name, table.c.provider)
|
|
509
|
+
|
|
510
|
+
return table
|
|
511
|
+
|
|
512
|
+
|
|
513
|
+
def _import_model_modules() -> list[str]:
|
|
514
|
+
"""
|
|
515
|
+
Import modules specified in MODELS__IMPORT_MODULES setting.
|
|
516
|
+
|
|
517
|
+
This ensures downstream models decorated with @rem.register_model
|
|
518
|
+
are registered before schema generation.
|
|
519
|
+
|
|
520
|
+
Returns:
|
|
521
|
+
List of successfully imported module names
|
|
522
|
+
"""
|
|
523
|
+
import importlib
|
|
524
|
+
from ...settings import settings
|
|
525
|
+
|
|
526
|
+
imported = []
|
|
527
|
+
for module_name in settings.models.module_list:
|
|
528
|
+
try:
|
|
529
|
+
importlib.import_module(module_name)
|
|
530
|
+
imported.append(module_name)
|
|
531
|
+
logger.debug(f"Imported model module: {module_name}")
|
|
532
|
+
except ImportError as e:
|
|
533
|
+
logger.warning(f"Failed to import model module '{module_name}': {e}")
|
|
534
|
+
return imported
|
|
535
|
+
|
|
536
|
+
|
|
537
|
+
def get_target_metadata() -> MetaData:
|
|
538
|
+
"""
|
|
539
|
+
Get SQLAlchemy metadata for Alembic autogenerate.
|
|
540
|
+
|
|
541
|
+
This is the main entry point used by alembic/env.py and rem db diff.
|
|
542
|
+
|
|
543
|
+
Uses the model registry as the source of truth, which includes:
|
|
544
|
+
- Core REM models (Resource, Message, User, etc.)
|
|
545
|
+
- User-registered models via @rem.register_model decorator
|
|
546
|
+
|
|
547
|
+
Before building metadata, imports model modules from settings to ensure
|
|
548
|
+
downstream models are registered. This supports:
|
|
549
|
+
- Auto-detection of ./models directory (convention)
|
|
550
|
+
- MODELS__IMPORT_MODULES env var (explicit configuration)
|
|
551
|
+
|
|
552
|
+
Returns:
|
|
553
|
+
SQLAlchemy MetaData object representing all registered Pydantic models
|
|
554
|
+
"""
|
|
555
|
+
# Import model modules first (auto-detects ./models or uses MODELS__IMPORT_MODULES)
|
|
556
|
+
imported = _import_model_modules()
|
|
557
|
+
if imported:
|
|
558
|
+
logger.info(f"Imported model modules: {imported}")
|
|
559
|
+
|
|
560
|
+
# build_sqlalchemy_metadata_from_pydantic uses the registry internally,
|
|
561
|
+
# so no directory path is needed (the parameter is kept for backwards compat)
|
|
562
|
+
return build_sqlalchemy_metadata_from_pydantic()
|