remdb 0.3.163__py3-none-any.whl → 0.3.200__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of remdb might be problematic. Click here for more details.
- rem/agentic/agents/agent_manager.py +2 -1
- rem/agentic/context.py +101 -0
- rem/agentic/context_builder.py +30 -8
- rem/agentic/mcp/tool_wrapper.py +43 -14
- rem/agentic/providers/pydantic_ai.py +76 -34
- rem/agentic/schema.py +4 -3
- rem/agentic/tools/rem_tools.py +11 -0
- rem/api/main.py +1 -1
- rem/api/mcp_router/resources.py +75 -14
- rem/api/mcp_router/server.py +31 -24
- rem/api/mcp_router/tools.py +476 -155
- rem/api/routers/auth.py +11 -6
- rem/api/routers/chat/completions.py +52 -10
- rem/api/routers/chat/sse_events.py +2 -2
- rem/api/routers/chat/streaming.py +162 -19
- rem/api/routers/messages.py +96 -23
- rem/auth/middleware.py +59 -42
- rem/cli/README.md +62 -0
- rem/cli/commands/ask.py +1 -1
- rem/cli/commands/db.py +148 -70
- rem/cli/commands/process.py +171 -43
- rem/models/entities/ontology.py +93 -101
- rem/schemas/agents/core/agent-builder.yaml +143 -42
- rem/services/content/service.py +18 -5
- rem/services/email/service.py +17 -6
- rem/services/embeddings/worker.py +26 -12
- rem/services/postgres/__init__.py +28 -3
- rem/services/postgres/diff_service.py +57 -5
- rem/services/postgres/programmable_diff_service.py +635 -0
- rem/services/postgres/pydantic_to_sqlalchemy.py +2 -2
- rem/services/postgres/register_type.py +12 -11
- rem/services/postgres/repository.py +32 -21
- rem/services/postgres/schema_generator.py +5 -5
- rem/services/postgres/sql_builder.py +6 -5
- rem/services/session/__init__.py +7 -1
- rem/services/session/pydantic_messages.py +210 -0
- rem/services/user_service.py +12 -9
- rem/settings.py +7 -1
- rem/sql/background_indexes.sql +5 -0
- rem/sql/migrations/001_install.sql +148 -11
- rem/sql/migrations/002_install_models.sql +162 -132
- rem/sql/migrations/004_cache_system.sql +7 -275
- rem/utils/model_helpers.py +101 -0
- rem/utils/schema_loader.py +51 -13
- {remdb-0.3.163.dist-info → remdb-0.3.200.dist-info}/METADATA +1 -1
- {remdb-0.3.163.dist-info → remdb-0.3.200.dist-info}/RECORD +48 -46
- {remdb-0.3.163.dist-info → remdb-0.3.200.dist-info}/WHEEL +0 -0
- {remdb-0.3.163.dist-info → remdb-0.3.200.dist-info}/entry_points.txt +0 -0
rem/auth/middleware.py
CHANGED
|
@@ -7,14 +7,21 @@ Anonymous access with rate limiting when allow_anonymous=True.
|
|
|
7
7
|
MCP endpoints are always protected unless explicitly disabled.
|
|
8
8
|
|
|
9
9
|
Design Pattern:
|
|
10
|
-
-
|
|
11
|
-
-
|
|
12
|
-
-
|
|
13
|
-
-
|
|
10
|
+
- API Key (X-API-Key): Access control guardrail, NOT user identity
|
|
11
|
+
- JWT (Authorization: Bearer): Primary method for user identity
|
|
12
|
+
- Dev token: Non-production testing (starts with "dev_")
|
|
13
|
+
- Session: Backward compatibility for browser-based auth
|
|
14
14
|
- MCP paths always require authentication (protected service)
|
|
15
|
-
|
|
16
|
-
|
|
17
|
-
|
|
15
|
+
|
|
16
|
+
Authentication Flow:
|
|
17
|
+
1. Check JWT/dev token/session for user identity first
|
|
18
|
+
2. If user is admin: bypass API key check (admin privilege)
|
|
19
|
+
3. If API key enabled and user is not admin: Validate X-API-Key header
|
|
20
|
+
4. If allow_anonymous=True: Allow as anonymous (rate-limited)
|
|
21
|
+
5. If allow_anonymous=False: Return 401 / redirect to login
|
|
22
|
+
|
|
23
|
+
IMPORTANT: API key validates ACCESS, JWT identifies USER.
|
|
24
|
+
Admin users bypass the API key requirement (trusted identity).
|
|
18
25
|
|
|
19
26
|
Access Modes (configured in settings.auth):
|
|
20
27
|
- enabled=true, allow_anonymous=true: Auth available, anonymous gets rate-limited access
|
|
@@ -24,10 +31,9 @@ Access Modes (configured in settings.auth):
|
|
|
24
31
|
- mcp_requires_auth=false: MCP follows normal allow_anonymous rules (dev only)
|
|
25
32
|
|
|
26
33
|
API Key Authentication (configured in settings.api):
|
|
27
|
-
- api_key_enabled=true: Require X-API-Key header for
|
|
34
|
+
- api_key_enabled=true: Require X-API-Key header for access
|
|
28
35
|
- api_key: The secret key to validate against
|
|
29
|
-
-
|
|
30
|
-
- X-API-Key header takes precedence over session auth
|
|
36
|
+
- API key is an ACCESS GATE, not user identity - JWT still needed for user
|
|
31
37
|
|
|
32
38
|
Dev Token Support (non-production only):
|
|
33
39
|
- GET /api/auth/dev/token returns a Bearer token for test-user
|
|
@@ -188,6 +194,12 @@ class AuthMiddleware(BaseHTTPMiddleware):
|
|
|
188
194
|
|
|
189
195
|
return None
|
|
190
196
|
|
|
197
|
+
def _is_admin(self, user: dict | None) -> bool:
|
|
198
|
+
"""Check if user has admin role."""
|
|
199
|
+
if not user:
|
|
200
|
+
return False
|
|
201
|
+
return "admin" in user.get("roles", [])
|
|
202
|
+
|
|
191
203
|
async def dispatch(self, request: Request, call_next):
|
|
192
204
|
"""
|
|
193
205
|
Check authentication for protected paths.
|
|
@@ -212,50 +224,55 @@ class AuthMiddleware(BaseHTTPMiddleware):
|
|
|
212
224
|
if not is_protected or is_excluded:
|
|
213
225
|
return await call_next(request)
|
|
214
226
|
|
|
215
|
-
# Check for
|
|
216
|
-
|
|
217
|
-
|
|
218
|
-
|
|
227
|
+
# Check for user identity FIRST (JWT, dev token, session)
|
|
228
|
+
# This allows admin users to bypass API key requirement
|
|
229
|
+
user = None
|
|
230
|
+
|
|
231
|
+
# Check for JWT token in Authorization header (primary user identity)
|
|
232
|
+
jwt_user = self._check_jwt_token(request)
|
|
233
|
+
if jwt_user:
|
|
234
|
+
user = jwt_user
|
|
235
|
+
|
|
236
|
+
# Check for dev token (non-production only)
|
|
237
|
+
if not user:
|
|
238
|
+
dev_user = self._check_dev_token(request)
|
|
239
|
+
if dev_user:
|
|
240
|
+
user = dev_user
|
|
241
|
+
|
|
242
|
+
# Check for valid session (backward compatibility)
|
|
243
|
+
if not user:
|
|
244
|
+
session_user = request.session.get("user")
|
|
245
|
+
if session_user:
|
|
246
|
+
user = session_user
|
|
247
|
+
|
|
248
|
+
# If user is admin, bypass API key check entirely
|
|
249
|
+
if self._is_admin(user):
|
|
250
|
+
logger.debug(f"Admin user {user.get('email')} bypassing API key check")
|
|
251
|
+
request.state.user = user
|
|
219
252
|
request.state.is_anonymous = False
|
|
220
253
|
return await call_next(request)
|
|
221
254
|
|
|
222
|
-
#
|
|
255
|
+
# API key validation for non-admin users (access control guardrail)
|
|
223
256
|
if settings.api.api_key_enabled:
|
|
224
|
-
|
|
225
|
-
if
|
|
257
|
+
api_key = request.headers.get("x-api-key")
|
|
258
|
+
if not api_key:
|
|
259
|
+
logger.debug(f"Missing X-API-Key for: {path}")
|
|
260
|
+
return JSONResponse(
|
|
261
|
+
status_code=401,
|
|
262
|
+
content={"detail": "API key required. Include X-API-Key header."},
|
|
263
|
+
headers={"WWW-Authenticate": 'ApiKey realm="REM API"'},
|
|
264
|
+
)
|
|
265
|
+
if api_key != settings.api.api_key:
|
|
226
266
|
logger.warning(f"Invalid X-API-Key for: {path}")
|
|
227
267
|
return JSONResponse(
|
|
228
268
|
status_code=401,
|
|
229
269
|
content={"detail": "Invalid API key"},
|
|
230
270
|
headers={"WWW-Authenticate": 'ApiKey realm="REM API"'},
|
|
231
271
|
)
|
|
232
|
-
|
|
233
|
-
logger.debug(f"Missing X-API-Key for: {path}")
|
|
234
|
-
return JSONResponse(
|
|
235
|
-
status_code=401,
|
|
236
|
-
content={"detail": "API key required. Include X-API-Key header."},
|
|
237
|
-
headers={"WWW-Authenticate": 'ApiKey realm="REM API"'},
|
|
238
|
-
)
|
|
239
|
-
|
|
240
|
-
# Check for JWT token in Authorization header
|
|
241
|
-
jwt_user = self._check_jwt_token(request)
|
|
242
|
-
if jwt_user:
|
|
243
|
-
request.state.user = jwt_user
|
|
244
|
-
request.state.is_anonymous = False
|
|
245
|
-
return await call_next(request)
|
|
246
|
-
|
|
247
|
-
# Check for dev token (non-production only)
|
|
248
|
-
dev_user = self._check_dev_token(request)
|
|
249
|
-
if dev_user:
|
|
250
|
-
request.state.user = dev_user
|
|
251
|
-
request.state.is_anonymous = False
|
|
252
|
-
return await call_next(request)
|
|
253
|
-
|
|
254
|
-
# Check for valid session (backward compatibility)
|
|
255
|
-
user = request.session.get("user")
|
|
272
|
+
logger.debug("X-API-Key validated for access")
|
|
256
273
|
|
|
274
|
+
# If we have a valid user (non-admin, but passed API key check), allow access
|
|
257
275
|
if user:
|
|
258
|
-
# Authenticated user - add to request state
|
|
259
276
|
request.state.user = user
|
|
260
277
|
request.state.is_anonymous = False
|
|
261
278
|
return await call_next(request)
|
rem/cli/README.md
CHANGED
|
@@ -434,6 +434,68 @@ Ensure you're using the correct model format:
|
|
|
434
434
|
- OpenAI: `openai:gpt-4o-mini`, `openai:gpt-4o`
|
|
435
435
|
- Anthropic: `anthropic:claude-sonnet-4-5-20250929`
|
|
436
436
|
|
|
437
|
+
## Data Visibility: PUBLIC vs PRIVATE
|
|
438
|
+
|
|
439
|
+
**IMPORTANT: All ingested data is PUBLIC by default.** This is the correct behavior
|
|
440
|
+
for shared knowledge bases (ontologies, procedures, reference data).
|
|
441
|
+
|
|
442
|
+
### Why PUBLIC by Default?
|
|
443
|
+
|
|
444
|
+
Most data in REM should be searchable by all users:
|
|
445
|
+
- Clinical ontologies (disorders, symptoms, drugs)
|
|
446
|
+
- Procedures and protocols (SCID-5, PHQ-9, etc.)
|
|
447
|
+
- Reference documentation
|
|
448
|
+
- Shared domain knowledge
|
|
449
|
+
|
|
450
|
+
The `rem_lookup()` function searches for data where `user_id IS NULL`, which means
|
|
451
|
+
public data. If you set `user_id` on data, it becomes invisible to other users.
|
|
452
|
+
|
|
453
|
+
### Ingesting Public Data (Default)
|
|
454
|
+
|
|
455
|
+
```bash
|
|
456
|
+
# Standard ingestion - data is PUBLIC
|
|
457
|
+
rem process ingest ontology/procedures/ --table ontologies
|
|
458
|
+
|
|
459
|
+
# From S3 - also PUBLIC
|
|
460
|
+
rem process ingest s3://bucket/docs/reference.pdf
|
|
461
|
+
```
|
|
462
|
+
|
|
463
|
+
### Ingesting Private Data (Rare)
|
|
464
|
+
|
|
465
|
+
Private data requires explicit `--make-private` flag:
|
|
466
|
+
|
|
467
|
+
```bash
|
|
468
|
+
# Private user data - requires --make-private and --user-id
|
|
469
|
+
rem process ingest personal-notes.md --make-private --user-id user-123
|
|
470
|
+
```
|
|
471
|
+
|
|
472
|
+
**When to use private data:**
|
|
473
|
+
- User-uploaded personal documents
|
|
474
|
+
- Session-specific content
|
|
475
|
+
- User notes and annotations
|
|
476
|
+
|
|
477
|
+
**NEVER use private data for:**
|
|
478
|
+
- Ontologies and reference material
|
|
479
|
+
- Clinical procedures and protocols
|
|
480
|
+
- Shared knowledge bases
|
|
481
|
+
- Anything that should be searchable by agents
|
|
482
|
+
|
|
483
|
+
### Common Mistake
|
|
484
|
+
|
|
485
|
+
If agents can't find data via `search_rem`, the most common cause is that the data
|
|
486
|
+
was ingested with a `user_id` set. Check with:
|
|
487
|
+
|
|
488
|
+
```sql
|
|
489
|
+
SELECT name, user_id FROM ontologies WHERE name = 'phq-9-procedure';
|
|
490
|
+
-- user_id should be NULL for public data
|
|
491
|
+
```
|
|
492
|
+
|
|
493
|
+
Fix by setting user_id to NULL:
|
|
494
|
+
```sql
|
|
495
|
+
UPDATE ontologies SET user_id = NULL WHERE user_id IS NOT NULL;
|
|
496
|
+
UPDATE kv_store SET user_id = NULL WHERE entity_type = 'ontologies' AND user_id IS NOT NULL;
|
|
497
|
+
```
|
|
498
|
+
|
|
437
499
|
## Next Steps
|
|
438
500
|
|
|
439
501
|
1. **Implement Schema Registry**
|
rem/cli/commands/ask.py
CHANGED
|
@@ -75,7 +75,7 @@ async def run_agent_streaming(
|
|
|
75
75
|
"""
|
|
76
76
|
Run agent in streaming mode using agent.iter() with usage limits.
|
|
77
77
|
|
|
78
|
-
Design Pattern
|
|
78
|
+
Design Pattern:
|
|
79
79
|
- Use agent.iter() for complete execution with tool call visibility
|
|
80
80
|
- run_stream() stops after first output, missing tool calls
|
|
81
81
|
- Stream tool call markers: [Calling: tool_name]
|
rem/cli/commands/db.py
CHANGED
|
@@ -333,64 +333,120 @@ def rebuild_cache(connection: str | None):
|
|
|
333
333
|
|
|
334
334
|
@click.command()
|
|
335
335
|
@click.argument("file_path", type=click.Path(exists=True, path_type=Path))
|
|
336
|
+
@click.option("--table", "-t", default=None, help="Target table name (required for non-YAML formats)")
|
|
336
337
|
@click.option("--user-id", default=None, help="User ID to scope data privately (default: public/shared)")
|
|
337
338
|
@click.option("--dry-run", is_flag=True, help="Show what would be loaded without loading")
|
|
338
|
-
def load(file_path: Path, user_id: str | None, dry_run: bool):
|
|
339
|
+
def load(file_path: Path, table: str | None, user_id: str | None, dry_run: bool):
|
|
339
340
|
"""
|
|
340
|
-
Load data from
|
|
341
|
+
Load data from file into database.
|
|
341
342
|
|
|
342
|
-
|
|
343
|
-
|
|
344
|
-
key_field: name
|
|
345
|
-
rows:
|
|
346
|
-
- name: Example
|
|
347
|
-
content: Test data...
|
|
343
|
+
Supports YAML with embedded metadata, or any tabular format via Polars
|
|
344
|
+
(jsonl, parquet, csv, json, arrow, etc.). For non-YAML formats, use --table.
|
|
348
345
|
|
|
349
346
|
Examples:
|
|
350
|
-
rem db load
|
|
351
|
-
rem db load data.
|
|
352
|
-
rem db load data.yaml --dry-run
|
|
347
|
+
rem db load data.yaml # YAML with metadata
|
|
348
|
+
rem db load data.jsonl -t resources # Any Polars-supported format
|
|
353
349
|
"""
|
|
354
|
-
asyncio.run(_load_async(file_path, user_id, dry_run))
|
|
350
|
+
asyncio.run(_load_async(file_path, table, user_id, dry_run))
|
|
355
351
|
|
|
356
352
|
|
|
357
|
-
|
|
353
|
+
def _load_dataframe_from_file(file_path: Path) -> "pl.DataFrame":
|
|
354
|
+
"""Load any Polars-supported file format into a DataFrame."""
|
|
355
|
+
import polars as pl
|
|
356
|
+
|
|
357
|
+
suffix = file_path.suffix.lower()
|
|
358
|
+
|
|
359
|
+
if suffix in {".jsonl", ".ndjson"}:
|
|
360
|
+
return pl.read_ndjson(file_path)
|
|
361
|
+
elif suffix in {".parquet", ".pq"}:
|
|
362
|
+
return pl.read_parquet(file_path)
|
|
363
|
+
elif suffix == ".csv":
|
|
364
|
+
return pl.read_csv(file_path)
|
|
365
|
+
elif suffix == ".json":
|
|
366
|
+
return pl.read_json(file_path)
|
|
367
|
+
elif suffix in {".ipc", ".arrow"}:
|
|
368
|
+
return pl.read_ipc(file_path)
|
|
369
|
+
else:
|
|
370
|
+
raise ValueError(f"Unsupported file format: {suffix}. Use any Polars-supported format.")
|
|
371
|
+
|
|
372
|
+
|
|
373
|
+
async def _load_async(file_path: Path, table: str | None, user_id: str | None, dry_run: bool):
|
|
358
374
|
"""Async implementation of load command."""
|
|
375
|
+
import polars as pl
|
|
359
376
|
import yaml
|
|
360
377
|
from ...models.core.inline_edge import InlineEdge
|
|
361
|
-
from ...models.entities import
|
|
378
|
+
from ...models.entities import SharedSession
|
|
362
379
|
from ...services.postgres import get_postgres_service
|
|
380
|
+
from ...utils.model_helpers import get_table_name
|
|
381
|
+
from ... import get_model_registry
|
|
363
382
|
|
|
364
383
|
logger.info(f"Loading data from: {file_path}")
|
|
365
384
|
scope_msg = f"user: {user_id}" if user_id else "public"
|
|
366
385
|
logger.info(f"Scope: {scope_msg}")
|
|
367
386
|
|
|
368
|
-
|
|
369
|
-
|
|
370
|
-
data = yaml.safe_load(f)
|
|
371
|
-
|
|
372
|
-
if not isinstance(data, list):
|
|
373
|
-
logger.error("YAML must be a list of table definitions")
|
|
374
|
-
raise click.Abort()
|
|
375
|
-
|
|
376
|
-
if dry_run:
|
|
377
|
-
logger.info("DRY RUN - Would load:")
|
|
378
|
-
logger.info(yaml.dump(data, default_flow_style=False))
|
|
379
|
-
return
|
|
387
|
+
suffix = file_path.suffix.lower()
|
|
388
|
+
is_yaml = suffix in {".yaml", ".yml"}
|
|
380
389
|
|
|
381
|
-
#
|
|
382
|
-
|
|
390
|
+
# Build MODEL_MAP dynamically from registry
|
|
391
|
+
registry = get_model_registry()
|
|
392
|
+
registry.register_core_models()
|
|
383
393
|
MODEL_MAP = {
|
|
384
|
-
|
|
385
|
-
|
|
386
|
-
"resources": Resource,
|
|
387
|
-
"messages": Message,
|
|
388
|
-
"schemas": Schema,
|
|
394
|
+
get_table_name(model): model
|
|
395
|
+
for model in registry.get_model_classes().values()
|
|
389
396
|
}
|
|
390
397
|
|
|
391
398
|
# Non-CoreModel tables that need direct SQL insertion
|
|
392
399
|
DIRECT_INSERT_TABLES = {"shared_sessions"}
|
|
393
400
|
|
|
401
|
+
# Parse file based on format
|
|
402
|
+
if is_yaml:
|
|
403
|
+
# YAML with embedded metadata
|
|
404
|
+
with open(file_path) as f:
|
|
405
|
+
data = yaml.safe_load(f)
|
|
406
|
+
|
|
407
|
+
if not isinstance(data, list):
|
|
408
|
+
logger.error("YAML must be a list of table definitions")
|
|
409
|
+
raise click.Abort()
|
|
410
|
+
|
|
411
|
+
if dry_run:
|
|
412
|
+
logger.info("DRY RUN - Would load:")
|
|
413
|
+
logger.info(yaml.dump(data, default_flow_style=False))
|
|
414
|
+
return
|
|
415
|
+
|
|
416
|
+
table_defs = data
|
|
417
|
+
else:
|
|
418
|
+
# Polars-supported format - require --table
|
|
419
|
+
if not table:
|
|
420
|
+
logger.error(f"For {suffix} files, --table is required. Example: rem db load {file_path.name} -t resources")
|
|
421
|
+
raise click.Abort()
|
|
422
|
+
|
|
423
|
+
try:
|
|
424
|
+
df = _load_dataframe_from_file(file_path)
|
|
425
|
+
except Exception as e:
|
|
426
|
+
logger.error(f"Failed to load file: {e}")
|
|
427
|
+
raise click.Abort()
|
|
428
|
+
|
|
429
|
+
rows = df.to_dicts()
|
|
430
|
+
|
|
431
|
+
if dry_run:
|
|
432
|
+
logger.info(f"DRY RUN - Would load {len(rows)} rows to table '{table}':")
|
|
433
|
+
logger.info(f"Columns: {list(df.columns)}")
|
|
434
|
+
|
|
435
|
+
# Validate first row against model if table is known
|
|
436
|
+
if table in MODEL_MAP and rows:
|
|
437
|
+
from ...utils.model_helpers import validate_data_for_model
|
|
438
|
+
result = validate_data_for_model(MODEL_MAP[table], rows[0])
|
|
439
|
+
if result.extra_fields:
|
|
440
|
+
logger.warning(f"Unknown fields (ignored): {result.extra_fields}")
|
|
441
|
+
if result.valid:
|
|
442
|
+
logger.success(f"Sample row validates OK. Required: {result.required_fields or '(none)'}")
|
|
443
|
+
else:
|
|
444
|
+
result.log_errors("Sample row")
|
|
445
|
+
return
|
|
446
|
+
|
|
447
|
+
# Wrap as single table definition
|
|
448
|
+
table_defs = [{"table": table, "rows": rows}]
|
|
449
|
+
|
|
394
450
|
# Connect to database
|
|
395
451
|
pg = get_postgres_service()
|
|
396
452
|
if not pg:
|
|
@@ -399,23 +455,23 @@ async def _load_async(file_path: Path, user_id: str | None, dry_run: bool):
|
|
|
399
455
|
|
|
400
456
|
await pg.connect()
|
|
401
457
|
|
|
458
|
+
# Start embedding worker for generating embeddings
|
|
459
|
+
if pg.embedding_worker:
|
|
460
|
+
await pg.embedding_worker.start()
|
|
461
|
+
|
|
402
462
|
try:
|
|
403
463
|
total_loaded = 0
|
|
404
464
|
|
|
405
|
-
for table_def in
|
|
465
|
+
for table_def in table_defs:
|
|
406
466
|
table_name = table_def["table"]
|
|
407
|
-
key_field = table_def.get("key_field", "id")
|
|
408
467
|
rows = table_def.get("rows", [])
|
|
409
468
|
|
|
410
469
|
# Handle direct insert tables (non-CoreModel)
|
|
411
470
|
if table_name in DIRECT_INSERT_TABLES:
|
|
412
471
|
for row_data in rows:
|
|
413
|
-
#
|
|
414
|
-
if "tenant_id" not in row_data:
|
|
415
|
-
row_data["tenant_id"] = "default"
|
|
472
|
+
# tenant_id is optional - NULL means public/shared
|
|
416
473
|
|
|
417
474
|
if table_name == "shared_sessions":
|
|
418
|
-
# Insert shared_session directly
|
|
419
475
|
await pg.fetch(
|
|
420
476
|
"""INSERT INTO shared_sessions
|
|
421
477
|
(session_id, owner_user_id, shared_with_user_id, tenant_id)
|
|
@@ -424,7 +480,7 @@ async def _load_async(file_path: Path, user_id: str | None, dry_run: bool):
|
|
|
424
480
|
row_data["session_id"],
|
|
425
481
|
row_data["owner_user_id"],
|
|
426
482
|
row_data["shared_with_user_id"],
|
|
427
|
-
row_data
|
|
483
|
+
row_data.get("tenant_id"), # Optional - NULL means public
|
|
428
484
|
)
|
|
429
485
|
total_loaded += 1
|
|
430
486
|
logger.success(f"Loaded shared_session: {row_data['owner_user_id']} -> {row_data['shared_with_user_id']}")
|
|
@@ -434,16 +490,11 @@ async def _load_async(file_path: Path, user_id: str | None, dry_run: bool):
|
|
|
434
490
|
logger.warning(f"Unknown table: {table_name}, skipping")
|
|
435
491
|
continue
|
|
436
492
|
|
|
437
|
-
model_class = MODEL_MAP[table_name]
|
|
493
|
+
model_class = MODEL_MAP[table_name]
|
|
438
494
|
|
|
439
|
-
for row_data in rows:
|
|
440
|
-
#
|
|
441
|
-
#
|
|
442
|
-
# Pass --user-id to scope data privately to a specific user
|
|
443
|
-
if "user_id" not in row_data and user_id is not None:
|
|
444
|
-
row_data["user_id"] = user_id
|
|
445
|
-
if "tenant_id" not in row_data and user_id is not None:
|
|
446
|
-
row_data["tenant_id"] = row_data.get("user_id", user_id)
|
|
495
|
+
for row_idx, row_data in enumerate(rows):
|
|
496
|
+
# tenant_id and user_id are optional - NULL means public/shared data
|
|
497
|
+
# Data files can explicitly set tenant_id/user_id if needed
|
|
447
498
|
|
|
448
499
|
# Convert graph_edges to InlineEdge format if present
|
|
449
500
|
if "graph_edges" in row_data:
|
|
@@ -452,30 +503,40 @@ async def _load_async(file_path: Path, user_id: str | None, dry_run: bool):
|
|
|
452
503
|
for edge in row_data["graph_edges"]
|
|
453
504
|
]
|
|
454
505
|
|
|
455
|
-
# Convert
|
|
456
|
-
# This handles fields like starts_timestamp, ends_timestamp, etc.
|
|
506
|
+
# Convert ISO timestamp strings
|
|
457
507
|
from ...utils.date_utils import parse_iso
|
|
458
508
|
for key, value in list(row_data.items()):
|
|
459
509
|
if isinstance(value, str) and (key.endswith("_timestamp") or key.endswith("_at")):
|
|
460
510
|
try:
|
|
461
511
|
row_data[key] = parse_iso(value)
|
|
462
512
|
except (ValueError, TypeError):
|
|
463
|
-
pass
|
|
513
|
+
pass
|
|
464
514
|
|
|
465
|
-
# Create model instance and upsert via repository
|
|
466
515
|
from ...services.postgres.repository import Repository
|
|
516
|
+
from ...utils.model_helpers import validate_data_for_model
|
|
467
517
|
|
|
468
|
-
|
|
469
|
-
|
|
470
|
-
|
|
518
|
+
result = validate_data_for_model(model_class, row_data)
|
|
519
|
+
if not result.valid:
|
|
520
|
+
result.log_errors(f"Row {row_idx + 1} ({table_name})")
|
|
521
|
+
raise click.Abort()
|
|
522
|
+
|
|
523
|
+
repo = Repository(model_class, table_name, pg)
|
|
524
|
+
await repo.upsert(result.instance) # type: ignore[arg-type]
|
|
471
525
|
total_loaded += 1
|
|
472
526
|
|
|
473
|
-
|
|
474
|
-
name = getattr(instance, 'name', getattr(instance, 'id', '?'))
|
|
527
|
+
name = getattr(result.instance, 'name', getattr(result.instance, 'id', '?'))
|
|
475
528
|
logger.success(f"Loaded {table_name[:-1]}: {name}")
|
|
476
529
|
|
|
477
530
|
logger.success(f"Data loaded successfully! Total rows: {total_loaded}")
|
|
478
531
|
|
|
532
|
+
# Wait for embeddings to complete
|
|
533
|
+
if pg.embedding_worker and pg.embedding_worker.running:
|
|
534
|
+
queue_size = pg.embedding_worker.task_queue.qsize()
|
|
535
|
+
if queue_size > 0:
|
|
536
|
+
logger.info(f"Waiting for {queue_size} embeddings to complete...")
|
|
537
|
+
await pg.embedding_worker.stop()
|
|
538
|
+
logger.success("Embeddings generated successfully")
|
|
539
|
+
|
|
479
540
|
finally:
|
|
480
541
|
await pg.disconnect()
|
|
481
542
|
|
|
@@ -580,7 +641,7 @@ async def _diff_async(
|
|
|
580
641
|
|
|
581
642
|
if not result.has_changes:
|
|
582
643
|
click.secho("✓ No schema drift detected", fg="green")
|
|
583
|
-
click.echo(" Database matches
|
|
644
|
+
click.echo(" Database matches source (tables, functions, triggers, views)")
|
|
584
645
|
if result.filtered_count > 0:
|
|
585
646
|
click.echo()
|
|
586
647
|
click.secho(f" ({result.filtered_count} destructive change(s) hidden by '{strategy}' strategy)", fg="yellow")
|
|
@@ -592,17 +653,34 @@ async def _diff_async(
|
|
|
592
653
|
if result.filtered_count > 0:
|
|
593
654
|
click.secho(f" ({result.filtered_count} destructive change(s) hidden by '{strategy}' strategy)", fg="yellow")
|
|
594
655
|
click.echo()
|
|
595
|
-
|
|
596
|
-
|
|
597
|
-
|
|
598
|
-
|
|
599
|
-
|
|
600
|
-
|
|
601
|
-
|
|
602
|
-
|
|
603
|
-
|
|
604
|
-
|
|
605
|
-
|
|
656
|
+
|
|
657
|
+
# Table/column changes (Alembic)
|
|
658
|
+
if result.summary:
|
|
659
|
+
click.echo("Table Changes:")
|
|
660
|
+
for line in result.summary:
|
|
661
|
+
if line.startswith("+"):
|
|
662
|
+
click.secho(f" {line}", fg="green")
|
|
663
|
+
elif line.startswith("-"):
|
|
664
|
+
click.secho(f" {line}", fg="red")
|
|
665
|
+
elif line.startswith("~"):
|
|
666
|
+
click.secho(f" {line}", fg="yellow")
|
|
667
|
+
else:
|
|
668
|
+
click.echo(f" {line}")
|
|
669
|
+
click.echo()
|
|
670
|
+
|
|
671
|
+
# Programmable object changes (functions, triggers, views)
|
|
672
|
+
if result.programmable_summary:
|
|
673
|
+
click.echo("Programmable Objects (functions/triggers/views):")
|
|
674
|
+
for line in result.programmable_summary:
|
|
675
|
+
if line.startswith("+"):
|
|
676
|
+
click.secho(f" {line}", fg="green")
|
|
677
|
+
elif line.startswith("-"):
|
|
678
|
+
click.secho(f" {line}", fg="red")
|
|
679
|
+
elif line.startswith("~"):
|
|
680
|
+
click.secho(f" {line}", fg="yellow")
|
|
681
|
+
else:
|
|
682
|
+
click.echo(f" {line}")
|
|
683
|
+
click.echo()
|
|
606
684
|
|
|
607
685
|
# Generate migration if requested
|
|
608
686
|
if generate:
|