remdb 0.3.114__py3-none-any.whl → 0.3.172__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of remdb might be problematic. Click here for more details.
- rem/agentic/agents/__init__.py +16 -0
- rem/agentic/agents/agent_manager.py +311 -0
- rem/agentic/agents/sse_simulator.py +2 -0
- rem/agentic/context.py +103 -5
- rem/agentic/context_builder.py +36 -9
- rem/agentic/mcp/tool_wrapper.py +161 -18
- rem/agentic/otel/setup.py +1 -0
- rem/agentic/providers/phoenix.py +371 -108
- rem/agentic/providers/pydantic_ai.py +172 -30
- rem/agentic/schema.py +8 -4
- rem/api/deps.py +3 -5
- rem/api/main.py +26 -4
- rem/api/mcp_router/resources.py +15 -10
- rem/api/mcp_router/server.py +11 -3
- rem/api/mcp_router/tools.py +418 -4
- rem/api/middleware/tracking.py +5 -5
- rem/api/routers/admin.py +218 -1
- rem/api/routers/auth.py +349 -6
- rem/api/routers/chat/completions.py +255 -7
- rem/api/routers/chat/models.py +81 -7
- rem/api/routers/chat/otel_utils.py +33 -0
- rem/api/routers/chat/sse_events.py +17 -1
- rem/api/routers/chat/streaming.py +126 -19
- rem/api/routers/feedback.py +134 -14
- rem/api/routers/messages.py +24 -15
- rem/api/routers/query.py +6 -3
- rem/auth/__init__.py +13 -3
- rem/auth/jwt.py +352 -0
- rem/auth/middleware.py +115 -10
- rem/auth/providers/__init__.py +4 -1
- rem/auth/providers/email.py +215 -0
- rem/cli/commands/README.md +42 -0
- rem/cli/commands/cluster.py +617 -168
- rem/cli/commands/configure.py +4 -7
- rem/cli/commands/db.py +66 -22
- rem/cli/commands/experiments.py +468 -76
- rem/cli/commands/schema.py +6 -5
- rem/cli/commands/session.py +336 -0
- rem/cli/dreaming.py +2 -2
- rem/cli/main.py +2 -0
- rem/config.py +8 -1
- rem/models/core/experiment.py +58 -14
- rem/models/entities/__init__.py +4 -0
- rem/models/entities/ontology.py +1 -1
- rem/models/entities/ontology_config.py +1 -1
- rem/models/entities/subscriber.py +175 -0
- rem/models/entities/user.py +1 -0
- rem/schemas/agents/core/agent-builder.yaml +235 -0
- rem/schemas/agents/examples/contract-analyzer.yaml +1 -1
- rem/schemas/agents/examples/contract-extractor.yaml +1 -1
- rem/schemas/agents/examples/cv-parser.yaml +1 -1
- rem/services/__init__.py +3 -1
- rem/services/content/service.py +4 -3
- rem/services/email/__init__.py +10 -0
- rem/services/email/service.py +513 -0
- rem/services/email/templates.py +360 -0
- rem/services/phoenix/client.py +59 -18
- rem/services/postgres/README.md +38 -0
- rem/services/postgres/diff_service.py +127 -6
- rem/services/postgres/pydantic_to_sqlalchemy.py +45 -13
- rem/services/postgres/repository.py +5 -4
- rem/services/postgres/schema_generator.py +205 -4
- rem/services/session/compression.py +120 -50
- rem/services/session/reload.py +14 -7
- rem/services/user_service.py +41 -9
- rem/settings.py +442 -23
- rem/sql/migrations/001_install.sql +156 -0
- rem/sql/migrations/002_install_models.sql +1951 -88
- rem/sql/migrations/004_cache_system.sql +548 -0
- rem/sql/migrations/005_schema_update.sql +145 -0
- rem/utils/README.md +45 -0
- rem/utils/__init__.py +18 -0
- rem/utils/files.py +157 -1
- rem/utils/schema_loader.py +139 -10
- rem/utils/sql_paths.py +146 -0
- rem/utils/vision.py +1 -1
- rem/workers/__init__.py +3 -1
- rem/workers/db_listener.py +579 -0
- rem/workers/unlogged_maintainer.py +463 -0
- {remdb-0.3.114.dist-info → remdb-0.3.172.dist-info}/METADATA +218 -180
- {remdb-0.3.114.dist-info → remdb-0.3.172.dist-info}/RECORD +83 -68
- {remdb-0.3.114.dist-info → remdb-0.3.172.dist-info}/WHEEL +0 -0
- {remdb-0.3.114.dist-info → remdb-0.3.172.dist-info}/entry_points.txt +0 -0
|
@@ -22,6 +22,7 @@ from alembic.runtime.migration import MigrationContext
|
|
|
22
22
|
from alembic.script import ScriptDirectory
|
|
23
23
|
from loguru import logger
|
|
24
24
|
from sqlalchemy import create_engine, text
|
|
25
|
+
from sqlalchemy.dialects import postgresql
|
|
25
26
|
|
|
26
27
|
from ...settings import settings
|
|
27
28
|
from .pydantic_to_sqlalchemy import get_target_metadata
|
|
@@ -49,6 +50,7 @@ class SchemaDiff:
|
|
|
49
50
|
summary: list[str] = field(default_factory=list)
|
|
50
51
|
sql: str = ""
|
|
51
52
|
upgrade_ops: Optional[ops.UpgradeOps] = None
|
|
53
|
+
filtered_count: int = 0 # Number of operations filtered out by strategy
|
|
52
54
|
|
|
53
55
|
@property
|
|
54
56
|
def change_count(self) -> int:
|
|
@@ -61,17 +63,24 @@ class DiffService:
|
|
|
61
63
|
Service for comparing Pydantic models against database schema.
|
|
62
64
|
|
|
63
65
|
Uses Alembic's autogenerate machinery without creating revision files.
|
|
66
|
+
|
|
67
|
+
Strategies:
|
|
68
|
+
additive: Only ADD operations (columns, tables, indexes). No drops. Safe for production.
|
|
69
|
+
full: All operations including DROPs. Use with caution.
|
|
70
|
+
safe: Additive + safe column type changes (widenings like VARCHAR(50) -> VARCHAR(256)).
|
|
64
71
|
"""
|
|
65
72
|
|
|
66
|
-
def __init__(self, models_dir: Optional[Path] = None):
|
|
73
|
+
def __init__(self, models_dir: Optional[Path] = None, strategy: str = "additive"):
|
|
67
74
|
"""
|
|
68
75
|
Initialize diff service.
|
|
69
76
|
|
|
70
77
|
Args:
|
|
71
78
|
models_dir: Directory containing Pydantic models.
|
|
72
79
|
If None, uses default rem/models/entities location.
|
|
80
|
+
strategy: Migration strategy - 'additive' (default), 'full', or 'safe'
|
|
73
81
|
"""
|
|
74
82
|
self.models_dir = models_dir
|
|
83
|
+
self.strategy = strategy
|
|
75
84
|
self._metadata = None
|
|
76
85
|
|
|
77
86
|
def get_connection_url(self) -> str:
|
|
@@ -130,6 +139,7 @@ class DiffService:
|
|
|
130
139
|
metadata = self.get_target_metadata()
|
|
131
140
|
|
|
132
141
|
summary = []
|
|
142
|
+
filtered_count = 0
|
|
133
143
|
|
|
134
144
|
with engine.connect() as conn:
|
|
135
145
|
# Create migration context for comparison
|
|
@@ -148,9 +158,13 @@ class DiffService:
|
|
|
148
158
|
migration_script = produce_migrations(context, metadata)
|
|
149
159
|
upgrade_ops = migration_script.upgrade_ops
|
|
150
160
|
|
|
151
|
-
#
|
|
161
|
+
# Filter operations based on strategy
|
|
152
162
|
if upgrade_ops and upgrade_ops.ops:
|
|
153
|
-
|
|
163
|
+
filtered_ops, filtered_count = self._filter_operations(upgrade_ops.ops)
|
|
164
|
+
upgrade_ops.ops = filtered_ops
|
|
165
|
+
|
|
166
|
+
# Process filtered operations
|
|
167
|
+
for op in filtered_ops:
|
|
154
168
|
summary.extend(self._describe_operation(op))
|
|
155
169
|
|
|
156
170
|
has_changes = len(summary) > 0
|
|
@@ -165,8 +179,100 @@ class DiffService:
|
|
|
165
179
|
summary=summary,
|
|
166
180
|
sql=sql,
|
|
167
181
|
upgrade_ops=upgrade_ops,
|
|
182
|
+
filtered_count=filtered_count,
|
|
168
183
|
)
|
|
169
184
|
|
|
185
|
+
def _filter_operations(self, operations: list) -> tuple[list, int]:
|
|
186
|
+
"""
|
|
187
|
+
Filter operations based on migration strategy.
|
|
188
|
+
|
|
189
|
+
Args:
|
|
190
|
+
operations: List of Alembic operations
|
|
191
|
+
|
|
192
|
+
Returns:
|
|
193
|
+
Tuple of (filtered_operations, count_of_filtered_out)
|
|
194
|
+
"""
|
|
195
|
+
if self.strategy == "full":
|
|
196
|
+
# Full strategy: include everything
|
|
197
|
+
return operations, 0
|
|
198
|
+
|
|
199
|
+
filtered = []
|
|
200
|
+
filtered_count = 0
|
|
201
|
+
|
|
202
|
+
for op in operations:
|
|
203
|
+
if isinstance(op, ops.ModifyTableOps):
|
|
204
|
+
# Filter sub-operations within table
|
|
205
|
+
sub_filtered, sub_count = self._filter_operations(op.ops)
|
|
206
|
+
filtered_count += sub_count
|
|
207
|
+
if sub_filtered:
|
|
208
|
+
op.ops = sub_filtered
|
|
209
|
+
filtered.append(op)
|
|
210
|
+
elif self._is_allowed_operation(op):
|
|
211
|
+
filtered.append(op)
|
|
212
|
+
else:
|
|
213
|
+
filtered_count += 1
|
|
214
|
+
|
|
215
|
+
return filtered, filtered_count
|
|
216
|
+
|
|
217
|
+
def _is_allowed_operation(self, op: ops.MigrateOperation) -> bool:
|
|
218
|
+
"""
|
|
219
|
+
Check if an operation is allowed by the current strategy.
|
|
220
|
+
|
|
221
|
+
Args:
|
|
222
|
+
op: Alembic operation
|
|
223
|
+
|
|
224
|
+
Returns:
|
|
225
|
+
True if operation is allowed, False if it should be filtered out
|
|
226
|
+
"""
|
|
227
|
+
# Additive operations (allowed in all strategies)
|
|
228
|
+
if isinstance(op, (ops.CreateTableOp, ops.AddColumnOp, ops.CreateIndexOp, ops.CreateForeignKeyOp)):
|
|
229
|
+
return True
|
|
230
|
+
|
|
231
|
+
# Destructive operations (only allowed in 'full' strategy)
|
|
232
|
+
if isinstance(op, (ops.DropTableOp, ops.DropColumnOp, ops.DropIndexOp, ops.DropConstraintOp)):
|
|
233
|
+
return self.strategy == "full"
|
|
234
|
+
|
|
235
|
+
# Alter operations
|
|
236
|
+
if isinstance(op, ops.AlterColumnOp):
|
|
237
|
+
if self.strategy == "full":
|
|
238
|
+
return True
|
|
239
|
+
if self.strategy == "safe":
|
|
240
|
+
# Allow safe type changes (widenings)
|
|
241
|
+
return self._is_safe_type_change(op)
|
|
242
|
+
# additive: no alter operations
|
|
243
|
+
return False
|
|
244
|
+
|
|
245
|
+
# Unknown operations: allow in full, deny otherwise
|
|
246
|
+
return self.strategy == "full"
|
|
247
|
+
|
|
248
|
+
def _is_safe_type_change(self, op: ops.AlterColumnOp) -> bool:
|
|
249
|
+
"""
|
|
250
|
+
Check if a column type change is safe (widening, not narrowing).
|
|
251
|
+
|
|
252
|
+
Safe changes:
|
|
253
|
+
- VARCHAR(n) -> VARCHAR(m) where m > n
|
|
254
|
+
- INTEGER -> BIGINT
|
|
255
|
+
- Adding nullable (NOT NULL -> NULL)
|
|
256
|
+
|
|
257
|
+
Args:
|
|
258
|
+
op: AlterColumnOp to check
|
|
259
|
+
|
|
260
|
+
Returns:
|
|
261
|
+
True if the change is safe
|
|
262
|
+
"""
|
|
263
|
+
# Allowing nullable is always safe
|
|
264
|
+
if op.modify_nullable is True:
|
|
265
|
+
return True
|
|
266
|
+
|
|
267
|
+
# Type changes: only allow VARCHAR widenings for now
|
|
268
|
+
if op.modify_type is not None:
|
|
269
|
+
new_type = str(op.modify_type).upper()
|
|
270
|
+
# VARCHAR widenings are generally safe
|
|
271
|
+
if "VARCHAR" in new_type:
|
|
272
|
+
return True # Assume widening; could add length comparison
|
|
273
|
+
|
|
274
|
+
return False
|
|
275
|
+
|
|
170
276
|
def _describe_operation(self, op: ops.MigrateOperation, prefix: str = "") -> list[str]:
|
|
171
277
|
"""Convert Alembic operation to human-readable description."""
|
|
172
278
|
descriptions = []
|
|
@@ -367,6 +473,18 @@ class DiffService:
|
|
|
367
473
|
|
|
368
474
|
return "\n".join(lines) + "\n"
|
|
369
475
|
|
|
476
|
+
def _compile_type(self, col_type) -> str:
|
|
477
|
+
"""Compile SQLAlchemy type to PostgreSQL DDL string.
|
|
478
|
+
|
|
479
|
+
SQLAlchemy types like ARRAY(Text) need dialect-specific compilation
|
|
480
|
+
to render correctly (e.g., "TEXT[]" instead of just "ARRAY").
|
|
481
|
+
"""
|
|
482
|
+
try:
|
|
483
|
+
return col_type.compile(dialect=postgresql.dialect())
|
|
484
|
+
except Exception:
|
|
485
|
+
# Fallback to string representation if compilation fails
|
|
486
|
+
return str(col_type)
|
|
487
|
+
|
|
370
488
|
def _op_to_sql(self, op: ops.MigrateOperation) -> list[str]:
|
|
371
489
|
"""Convert operation to SQL statements."""
|
|
372
490
|
lines = []
|
|
@@ -376,7 +494,8 @@ class DiffService:
|
|
|
376
494
|
for col in op.columns:
|
|
377
495
|
if hasattr(col, 'name') and hasattr(col, 'type'):
|
|
378
496
|
nullable = "" if getattr(col, 'nullable', True) else " NOT NULL"
|
|
379
|
-
|
|
497
|
+
type_str = self._compile_type(col.type)
|
|
498
|
+
cols.append(f" {col.name} {type_str}{nullable}")
|
|
380
499
|
col_str = ",\n".join(cols)
|
|
381
500
|
lines.append(f"CREATE TABLE IF NOT EXISTS {op.table_name} (\n{col_str}\n);")
|
|
382
501
|
|
|
@@ -386,14 +505,16 @@ class DiffService:
|
|
|
386
505
|
elif isinstance(op, ops.AddColumnOp):
|
|
387
506
|
col = op.column
|
|
388
507
|
nullable = "" if getattr(col, 'nullable', True) else " NOT NULL"
|
|
389
|
-
|
|
508
|
+
type_str = self._compile_type(col.type)
|
|
509
|
+
lines.append(f"ALTER TABLE {op.table_name} ADD COLUMN IF NOT EXISTS {col.name} {type_str}{nullable};")
|
|
390
510
|
|
|
391
511
|
elif isinstance(op, ops.DropColumnOp):
|
|
392
512
|
lines.append(f"ALTER TABLE {op.table_name} DROP COLUMN IF EXISTS {op.column_name};")
|
|
393
513
|
|
|
394
514
|
elif isinstance(op, ops.AlterColumnOp):
|
|
395
515
|
if op.modify_type is not None:
|
|
396
|
-
|
|
516
|
+
type_str = self._compile_type(op.modify_type)
|
|
517
|
+
lines.append(f"ALTER TABLE {op.table_name} ALTER COLUMN {op.column_name} TYPE {type_str};")
|
|
397
518
|
if op.modify_nullable is not None:
|
|
398
519
|
if op.modify_nullable:
|
|
399
520
|
lines.append(f"ALTER TABLE {op.table_name} ALTER COLUMN {op.column_name} DROP NOT NULL;")
|
|
@@ -494,12 +494,13 @@ def _build_embeddings_table(base_table_name: str, metadata: MetaData) -> Table:
|
|
|
494
494
|
]
|
|
495
495
|
|
|
496
496
|
# Create table with unique constraint
|
|
497
|
-
#
|
|
497
|
+
# Truncate constraint name to fit PostgreSQL's 63-char identifier limit
|
|
498
|
+
constraint_name = f"uq_{base_table_name[:30]}_emb_entity_field_prov"
|
|
498
499
|
table = Table(
|
|
499
500
|
embeddings_table_name,
|
|
500
501
|
metadata,
|
|
501
502
|
*columns,
|
|
502
|
-
UniqueConstraint("entity_id", "field_name", "provider", name=
|
|
503
|
+
UniqueConstraint("entity_id", "field_name", "provider", name=constraint_name),
|
|
503
504
|
)
|
|
504
505
|
|
|
505
506
|
# Add indexes (matching register_type output)
|
|
@@ -509,22 +510,53 @@ def _build_embeddings_table(base_table_name: str, metadata: MetaData) -> Table:
|
|
|
509
510
|
return table
|
|
510
511
|
|
|
511
512
|
|
|
512
|
-
def
|
|
513
|
+
def _import_model_modules() -> list[str]:
|
|
513
514
|
"""
|
|
514
|
-
|
|
515
|
+
Import modules specified in MODELS__IMPORT_MODULES setting.
|
|
515
516
|
|
|
516
|
-
This
|
|
517
|
+
This ensures downstream models decorated with @rem.register_model
|
|
518
|
+
are registered before schema generation.
|
|
517
519
|
|
|
518
520
|
Returns:
|
|
519
|
-
|
|
521
|
+
List of successfully imported module names
|
|
520
522
|
"""
|
|
521
|
-
import
|
|
523
|
+
import importlib
|
|
524
|
+
from ...settings import settings
|
|
525
|
+
|
|
526
|
+
imported = []
|
|
527
|
+
for module_name in settings.models.module_list:
|
|
528
|
+
try:
|
|
529
|
+
importlib.import_module(module_name)
|
|
530
|
+
imported.append(module_name)
|
|
531
|
+
logger.debug(f"Imported model module: {module_name}")
|
|
532
|
+
except ImportError as e:
|
|
533
|
+
logger.warning(f"Failed to import model module '{module_name}': {e}")
|
|
534
|
+
return imported
|
|
522
535
|
|
|
523
|
-
package_root = Path(rem.__file__).parent.parent.parent
|
|
524
|
-
models_dir = package_root / "src" / "rem" / "models" / "entities"
|
|
525
536
|
|
|
526
|
-
|
|
527
|
-
|
|
528
|
-
|
|
537
|
+
def get_target_metadata() -> MetaData:
|
|
538
|
+
"""
|
|
539
|
+
Get SQLAlchemy metadata for Alembic autogenerate.
|
|
529
540
|
|
|
530
|
-
|
|
541
|
+
This is the main entry point used by alembic/env.py and rem db diff.
|
|
542
|
+
|
|
543
|
+
Uses the model registry as the source of truth, which includes:
|
|
544
|
+
- Core REM models (Resource, Message, User, etc.)
|
|
545
|
+
- User-registered models via @rem.register_model decorator
|
|
546
|
+
|
|
547
|
+
Before building metadata, imports model modules from settings to ensure
|
|
548
|
+
downstream models are registered. This supports:
|
|
549
|
+
- Auto-detection of ./models directory (convention)
|
|
550
|
+
- MODELS__IMPORT_MODULES env var (explicit configuration)
|
|
551
|
+
|
|
552
|
+
Returns:
|
|
553
|
+
SQLAlchemy MetaData object representing all registered Pydantic models
|
|
554
|
+
"""
|
|
555
|
+
# Import model modules first (auto-detects ./models or uses MODELS__IMPORT_MODULES)
|
|
556
|
+
imported = _import_model_modules()
|
|
557
|
+
if imported:
|
|
558
|
+
logger.info(f"Imported model modules: {imported}")
|
|
559
|
+
|
|
560
|
+
# build_sqlalchemy_metadata_from_pydantic uses the registry internally,
|
|
561
|
+
# so no directory path is needed (the parameter is kept for backwards compat)
|
|
562
|
+
return build_sqlalchemy_metadata_from_pydantic()
|
|
@@ -141,13 +141,13 @@ class Repository(Generic[T]):
|
|
|
141
141
|
# Return single item or list to match input type
|
|
142
142
|
return records_list[0] if is_single else records_list
|
|
143
143
|
|
|
144
|
-
async def get_by_id(self, record_id: str, tenant_id: str) -> T | None:
|
|
144
|
+
async def get_by_id(self, record_id: str, tenant_id: str | None = None) -> T | None:
|
|
145
145
|
"""
|
|
146
146
|
Get a single record by ID.
|
|
147
147
|
|
|
148
148
|
Args:
|
|
149
149
|
record_id: Record identifier
|
|
150
|
-
tenant_id:
|
|
150
|
+
tenant_id: Optional tenant identifier (deprecated, not used for filtering)
|
|
151
151
|
|
|
152
152
|
Returns:
|
|
153
153
|
Model instance or None if not found
|
|
@@ -164,13 +164,14 @@ class Repository(Generic[T]):
|
|
|
164
164
|
if not self.db.pool:
|
|
165
165
|
raise RuntimeError("Failed to establish database connection")
|
|
166
166
|
|
|
167
|
+
# Note: tenant_id filtering removed - use user_id for access control instead
|
|
167
168
|
query = f"""
|
|
168
169
|
SELECT * FROM {self.table_name}
|
|
169
|
-
WHERE id = $1 AND
|
|
170
|
+
WHERE id = $1 AND deleted_at IS NULL
|
|
170
171
|
"""
|
|
171
172
|
|
|
172
173
|
async with self.db.pool.acquire() as conn:
|
|
173
|
-
row = await conn.fetchrow(query, record_id
|
|
174
|
+
row = await conn.fetchrow(query, record_id)
|
|
174
175
|
|
|
175
176
|
if not row:
|
|
176
177
|
return None
|
|
@@ -12,6 +12,7 @@ Output includes:
|
|
|
12
12
|
- KV_STORE triggers
|
|
13
13
|
- Indexes (foreground and background)
|
|
14
14
|
- Migrations
|
|
15
|
+
- Schema table entries (for agent-like table access)
|
|
15
16
|
|
|
16
17
|
Usage:
|
|
17
18
|
from rem.services.postgres.schema_generator import SchemaGenerator
|
|
@@ -30,14 +31,192 @@ Usage:
|
|
|
30
31
|
|
|
31
32
|
import importlib.util
|
|
32
33
|
import inspect
|
|
34
|
+
import json
|
|
35
|
+
import uuid
|
|
33
36
|
from pathlib import Path
|
|
34
|
-
from typing import Type
|
|
37
|
+
from typing import Any, Type
|
|
35
38
|
|
|
36
39
|
from loguru import logger
|
|
37
40
|
from pydantic import BaseModel
|
|
38
41
|
|
|
39
42
|
from ...settings import settings
|
|
40
|
-
from .
|
|
43
|
+
from ...utils.sql_paths import get_package_sql_dir
|
|
44
|
+
from .register_type import register_type, should_embed_field
|
|
45
|
+
|
|
46
|
+
# Namespace UUID for generating deterministic UUIDs from model names
|
|
47
|
+
# Using UUID5 with this namespace ensures same model always gets same UUID
|
|
48
|
+
REM_SCHEMA_NAMESPACE = uuid.UUID("6ba7b810-9dad-11d1-80b4-00c04fd430c8") # DNS namespace
|
|
49
|
+
|
|
50
|
+
|
|
51
|
+
def generate_model_uuid(fully_qualified_name: str) -> uuid.UUID:
|
|
52
|
+
"""
|
|
53
|
+
Generate deterministic UUID from fully qualified model name.
|
|
54
|
+
|
|
55
|
+
Uses UUID5 (SHA-1 hash) with REM namespace for reproducibility.
|
|
56
|
+
Same fully qualified name always produces same UUID.
|
|
57
|
+
|
|
58
|
+
Args:
|
|
59
|
+
fully_qualified_name: Full module path, e.g., "rem.models.entities.Resource"
|
|
60
|
+
|
|
61
|
+
Returns:
|
|
62
|
+
Deterministic UUID for this model
|
|
63
|
+
"""
|
|
64
|
+
return uuid.uuid5(REM_SCHEMA_NAMESPACE, fully_qualified_name)
|
|
65
|
+
|
|
66
|
+
|
|
67
|
+
def extract_model_schema_metadata(
|
|
68
|
+
model: Type[BaseModel],
|
|
69
|
+
table_name: str,
|
|
70
|
+
entity_key_field: str,
|
|
71
|
+
include_search_tool: bool = True,
|
|
72
|
+
) -> dict[str, Any]:
|
|
73
|
+
"""
|
|
74
|
+
Extract schema metadata from a Pydantic model for schemas table.
|
|
75
|
+
|
|
76
|
+
Args:
|
|
77
|
+
model: Pydantic model class
|
|
78
|
+
table_name: Database table name
|
|
79
|
+
entity_key_field: Field used as entity key in kv_store
|
|
80
|
+
include_search_tool: If True, add search_rem tool for querying this table
|
|
81
|
+
|
|
82
|
+
Returns:
|
|
83
|
+
Dict with schema metadata ready for schemas table insert
|
|
84
|
+
"""
|
|
85
|
+
# Get fully qualified name
|
|
86
|
+
fqn = f"{model.__module__}.{model.__name__}"
|
|
87
|
+
|
|
88
|
+
# Generate deterministic UUID
|
|
89
|
+
schema_id = generate_model_uuid(fqn)
|
|
90
|
+
|
|
91
|
+
# Get JSON schema from Pydantic
|
|
92
|
+
json_schema = model.model_json_schema()
|
|
93
|
+
|
|
94
|
+
# Find embedding fields
|
|
95
|
+
embedding_fields = []
|
|
96
|
+
for field_name, field_info in model.model_fields.items():
|
|
97
|
+
if should_embed_field(field_name, field_info):
|
|
98
|
+
embedding_fields.append(field_name)
|
|
99
|
+
|
|
100
|
+
# Build description with search capability note
|
|
101
|
+
base_description = model.__doc__ or f"Schema for {model.__name__}"
|
|
102
|
+
search_note = (
|
|
103
|
+
f"\n\nThis agent can search the `{table_name}` table using the `search_rem` tool. "
|
|
104
|
+
f"Use REM query syntax: LOOKUP for exact match, FUZZY for typo-tolerant search, "
|
|
105
|
+
f"SEARCH for semantic similarity, or SQL for complex queries."
|
|
106
|
+
) if include_search_tool else ""
|
|
107
|
+
|
|
108
|
+
# Build spec with table metadata and tools
|
|
109
|
+
# Note: default_search_table is used by create_agent to append a description
|
|
110
|
+
# suffix to the search_rem tool when loading it dynamically
|
|
111
|
+
has_embeddings = bool(embedding_fields)
|
|
112
|
+
|
|
113
|
+
spec = {
|
|
114
|
+
"type": "object",
|
|
115
|
+
"description": base_description + search_note,
|
|
116
|
+
"properties": json_schema.get("properties", {}),
|
|
117
|
+
"required": json_schema.get("required", []),
|
|
118
|
+
"json_schema_extra": {
|
|
119
|
+
"table_name": table_name,
|
|
120
|
+
"entity_key_field": entity_key_field,
|
|
121
|
+
"embedding_fields": embedding_fields,
|
|
122
|
+
"fully_qualified_name": fqn,
|
|
123
|
+
"tools": ["search_rem"] if include_search_tool else [],
|
|
124
|
+
"default_search_table": table_name,
|
|
125
|
+
"has_embeddings": has_embeddings,
|
|
126
|
+
},
|
|
127
|
+
}
|
|
128
|
+
|
|
129
|
+
# Build content (documentation)
|
|
130
|
+
content = f"""# {model.__name__}
|
|
131
|
+
|
|
132
|
+
{base_description}
|
|
133
|
+
|
|
134
|
+
## Overview
|
|
135
|
+
|
|
136
|
+
The `{model.__name__}` entity is stored in the `{table_name}` table. Each record is uniquely
|
|
137
|
+
identified by its `{entity_key_field}` field for lookups and graph traversal.
|
|
138
|
+
|
|
139
|
+
## Search Capabilities
|
|
140
|
+
|
|
141
|
+
This schema includes the `search_rem` tool which supports:
|
|
142
|
+
- **LOOKUP**: O(1) exact match by {entity_key_field} (e.g., `LOOKUP "entity-name"`)
|
|
143
|
+
- **FUZZY**: Typo-tolerant search (e.g., `FUZZY "partial" THRESHOLD 0.3`)
|
|
144
|
+
- **SEARCH**: Semantic vector search on {', '.join(embedding_fields) if embedding_fields else 'content'} (e.g., `SEARCH "concept" FROM {table_name} LIMIT 10`)
|
|
145
|
+
- **SQL**: Complex queries (e.g., `SELECT * FROM {table_name} WHERE ...`)
|
|
146
|
+
|
|
147
|
+
## Table Info
|
|
148
|
+
|
|
149
|
+
| Property | Value |
|
|
150
|
+
|----------|-------|
|
|
151
|
+
| Table | `{table_name}` |
|
|
152
|
+
| Entity Key | `{entity_key_field}` |
|
|
153
|
+
| Embedding Fields | {', '.join(f'`{f}`' for f in embedding_fields) if embedding_fields else 'None'} |
|
|
154
|
+
| Tools | {', '.join(['`search_rem`'] if include_search_tool else ['None'])} |
|
|
155
|
+
|
|
156
|
+
## Fields
|
|
157
|
+
|
|
158
|
+
"""
|
|
159
|
+
for field_name, field_info in model.model_fields.items():
|
|
160
|
+
field_type = str(field_info.annotation) if field_info.annotation else "Any"
|
|
161
|
+
field_desc = field_info.description or ""
|
|
162
|
+
required = "Required" if field_info.is_required() else "Optional"
|
|
163
|
+
content += f"### `{field_name}`\n"
|
|
164
|
+
content += f"- **Type**: `{field_type}`\n"
|
|
165
|
+
content += f"- **{required}**\n"
|
|
166
|
+
if field_desc:
|
|
167
|
+
content += f"- {field_desc}\n"
|
|
168
|
+
content += "\n"
|
|
169
|
+
|
|
170
|
+
return {
|
|
171
|
+
"id": str(schema_id),
|
|
172
|
+
"name": model.__name__,
|
|
173
|
+
"table_name": table_name,
|
|
174
|
+
"entity_key_field": entity_key_field,
|
|
175
|
+
"embedding_fields": embedding_fields,
|
|
176
|
+
"fqn": fqn,
|
|
177
|
+
"spec": spec,
|
|
178
|
+
"content": content,
|
|
179
|
+
"category": "entity",
|
|
180
|
+
}
|
|
181
|
+
|
|
182
|
+
|
|
183
|
+
def generate_schema_upsert_sql(schema_metadata: dict[str, Any]) -> str:
|
|
184
|
+
"""
|
|
185
|
+
Generate SQL UPSERT statement for schemas table.
|
|
186
|
+
|
|
187
|
+
Uses ON CONFLICT DO UPDATE for idempotency.
|
|
188
|
+
|
|
189
|
+
Args:
|
|
190
|
+
schema_metadata: Dict from extract_model_schema_metadata()
|
|
191
|
+
|
|
192
|
+
Returns:
|
|
193
|
+
SQL INSERT ... ON CONFLICT statement
|
|
194
|
+
"""
|
|
195
|
+
# Escape single quotes in content and spec
|
|
196
|
+
content_escaped = schema_metadata["content"].replace("'", "''")
|
|
197
|
+
spec_json = json.dumps(schema_metadata["spec"]).replace("'", "''")
|
|
198
|
+
|
|
199
|
+
sql = f"""
|
|
200
|
+
-- Schema entry for {schema_metadata['name']} ({schema_metadata['table_name']})
|
|
201
|
+
INSERT INTO schemas (id, tenant_id, name, content, spec, category, metadata)
|
|
202
|
+
VALUES (
|
|
203
|
+
'{schema_metadata['id']}'::uuid,
|
|
204
|
+
'system',
|
|
205
|
+
'{schema_metadata['name']}',
|
|
206
|
+
'{content_escaped}',
|
|
207
|
+
'{spec_json}'::jsonb,
|
|
208
|
+
'entity',
|
|
209
|
+
'{{"table_name": "{schema_metadata['table_name']}", "entity_key_field": "{schema_metadata['entity_key_field']}", "embedding_fields": {json.dumps(schema_metadata['embedding_fields'])}, "fqn": "{schema_metadata['fqn']}"}}'::jsonb
|
|
210
|
+
)
|
|
211
|
+
ON CONFLICT (id) DO UPDATE SET
|
|
212
|
+
name = EXCLUDED.name,
|
|
213
|
+
content = EXCLUDED.content,
|
|
214
|
+
spec = EXCLUDED.spec,
|
|
215
|
+
category = EXCLUDED.category,
|
|
216
|
+
metadata = EXCLUDED.metadata,
|
|
217
|
+
updated_at = CURRENT_TIMESTAMP;
|
|
218
|
+
"""
|
|
219
|
+
return sql.strip()
|
|
41
220
|
|
|
42
221
|
|
|
43
222
|
class SchemaGenerator:
|
|
@@ -56,9 +235,9 @@ class SchemaGenerator:
|
|
|
56
235
|
Initialize schema generator.
|
|
57
236
|
|
|
58
237
|
Args:
|
|
59
|
-
output_dir: Optional directory for output files (defaults to
|
|
238
|
+
output_dir: Optional directory for output files (defaults to package sql dir)
|
|
60
239
|
"""
|
|
61
|
-
self.output_dir = output_dir or
|
|
240
|
+
self.output_dir = output_dir or get_package_sql_dir()
|
|
62
241
|
self.schemas: dict[str, dict] = {}
|
|
63
242
|
|
|
64
243
|
def discover_models(self, directory: str | Path) -> dict[str, Type[BaseModel]]:
|
|
@@ -234,6 +413,14 @@ class SchemaGenerator:
|
|
|
234
413
|
create_kv_trigger=True,
|
|
235
414
|
)
|
|
236
415
|
|
|
416
|
+
# Extract schema metadata for schemas table entry
|
|
417
|
+
schema_metadata = extract_model_schema_metadata(
|
|
418
|
+
model=model,
|
|
419
|
+
table_name=table_name,
|
|
420
|
+
entity_key_field=entity_key_field,
|
|
421
|
+
)
|
|
422
|
+
schema["schema_metadata"] = schema_metadata
|
|
423
|
+
|
|
237
424
|
self.schemas[table_name] = schema
|
|
238
425
|
return schema
|
|
239
426
|
|
|
@@ -343,6 +530,7 @@ class SchemaGenerator:
|
|
|
343
530
|
"-- 2. Embeddings tables (embeddings_<table>)",
|
|
344
531
|
"-- 3. KV_STORE triggers for cache maintenance",
|
|
345
532
|
"-- 4. Indexes (foreground only, background indexes separate)",
|
|
533
|
+
"-- 5. Schema table entries (for agent-like table access)",
|
|
346
534
|
"",
|
|
347
535
|
"-- ============================================================================",
|
|
348
536
|
"-- PREREQUISITES CHECK",
|
|
@@ -388,6 +576,19 @@ class SchemaGenerator:
|
|
|
388
576
|
sql_parts.append(schema["sql"]["kv_trigger"])
|
|
389
577
|
sql_parts.append("")
|
|
390
578
|
|
|
579
|
+
# Add schema table entries (every entity table is also an "agent")
|
|
580
|
+
sql_parts.append("-- ============================================================================")
|
|
581
|
+
sql_parts.append("-- SCHEMA TABLE ENTRIES")
|
|
582
|
+
sql_parts.append("-- Every entity table gets a schemas entry for agent-like access")
|
|
583
|
+
sql_parts.append("-- ============================================================================")
|
|
584
|
+
sql_parts.append("")
|
|
585
|
+
|
|
586
|
+
for table_name, schema in self.schemas.items():
|
|
587
|
+
if "schema_metadata" in schema:
|
|
588
|
+
schema_upsert = generate_schema_upsert_sql(schema["schema_metadata"])
|
|
589
|
+
sql_parts.append(schema_upsert)
|
|
590
|
+
sql_parts.append("")
|
|
591
|
+
|
|
391
592
|
# Add migration record
|
|
392
593
|
sql_parts.append("-- ============================================================================")
|
|
393
594
|
sql_parts.append("-- RECORD MIGRATION")
|