remdb 0.3.171__py3-none-any.whl → 0.3.230__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (59) hide show
  1. rem/agentic/README.md +36 -2
  2. rem/agentic/context.py +173 -0
  3. rem/agentic/context_builder.py +12 -2
  4. rem/agentic/mcp/tool_wrapper.py +39 -16
  5. rem/agentic/providers/pydantic_ai.py +78 -45
  6. rem/agentic/schema.py +6 -5
  7. rem/agentic/tools/rem_tools.py +11 -0
  8. rem/api/main.py +1 -1
  9. rem/api/mcp_router/resources.py +75 -14
  10. rem/api/mcp_router/server.py +31 -24
  11. rem/api/mcp_router/tools.py +621 -166
  12. rem/api/routers/admin.py +30 -4
  13. rem/api/routers/auth.py +114 -15
  14. rem/api/routers/chat/child_streaming.py +379 -0
  15. rem/api/routers/chat/completions.py +74 -37
  16. rem/api/routers/chat/sse_events.py +7 -3
  17. rem/api/routers/chat/streaming.py +352 -257
  18. rem/api/routers/chat/streaming_utils.py +327 -0
  19. rem/api/routers/common.py +18 -0
  20. rem/api/routers/dev.py +7 -1
  21. rem/api/routers/feedback.py +9 -1
  22. rem/api/routers/messages.py +176 -38
  23. rem/api/routers/models.py +9 -1
  24. rem/api/routers/query.py +12 -1
  25. rem/api/routers/shared_sessions.py +16 -0
  26. rem/auth/jwt.py +19 -4
  27. rem/auth/middleware.py +42 -28
  28. rem/cli/README.md +62 -0
  29. rem/cli/commands/ask.py +61 -81
  30. rem/cli/commands/db.py +148 -70
  31. rem/cli/commands/process.py +171 -43
  32. rem/models/entities/ontology.py +91 -101
  33. rem/schemas/agents/rem.yaml +1 -1
  34. rem/services/content/service.py +18 -5
  35. rem/services/email/service.py +11 -2
  36. rem/services/embeddings/worker.py +26 -12
  37. rem/services/postgres/__init__.py +28 -3
  38. rem/services/postgres/diff_service.py +57 -5
  39. rem/services/postgres/programmable_diff_service.py +635 -0
  40. rem/services/postgres/pydantic_to_sqlalchemy.py +2 -2
  41. rem/services/postgres/register_type.py +12 -11
  42. rem/services/postgres/repository.py +39 -29
  43. rem/services/postgres/schema_generator.py +5 -5
  44. rem/services/postgres/sql_builder.py +6 -5
  45. rem/services/session/__init__.py +8 -1
  46. rem/services/session/compression.py +40 -2
  47. rem/services/session/pydantic_messages.py +292 -0
  48. rem/settings.py +34 -0
  49. rem/sql/background_indexes.sql +5 -0
  50. rem/sql/migrations/001_install.sql +157 -10
  51. rem/sql/migrations/002_install_models.sql +160 -132
  52. rem/sql/migrations/004_cache_system.sql +7 -275
  53. rem/sql/migrations/migrate_session_id_to_uuid.sql +45 -0
  54. rem/utils/model_helpers.py +101 -0
  55. rem/utils/schema_loader.py +79 -51
  56. {remdb-0.3.171.dist-info → remdb-0.3.230.dist-info}/METADATA +2 -2
  57. {remdb-0.3.171.dist-info → remdb-0.3.230.dist-info}/RECORD +59 -53
  58. {remdb-0.3.171.dist-info → remdb-0.3.230.dist-info}/WHEEL +0 -0
  59. {remdb-0.3.171.dist-info → remdb-0.3.230.dist-info}/entry_points.txt +0 -0
rem/cli/README.md CHANGED
@@ -434,6 +434,68 @@ Ensure you're using the correct model format:
434
434
  - OpenAI: `openai:gpt-4o-mini`, `openai:gpt-4o`
435
435
  - Anthropic: `anthropic:claude-sonnet-4-5-20250929`
436
436
 
437
+ ## Data Visibility: PUBLIC vs PRIVATE
438
+
439
+ **IMPORTANT: All ingested data is PUBLIC by default.** This is the correct behavior
440
+ for shared knowledge bases (ontologies, procedures, reference data).
441
+
442
+ ### Why PUBLIC by Default?
443
+
444
+ Most data in REM should be searchable by all users:
445
+ - Clinical ontologies (disorders, symptoms, drugs)
446
+ - Procedures and protocols (SCID-5, PHQ-9, etc.)
447
+ - Reference documentation
448
+ - Shared domain knowledge
449
+
450
+ The `rem_lookup()` function searches for data where `user_id IS NULL`, which means
451
+ public data. If you set `user_id` on data, it becomes invisible to other users.
452
+
453
+ ### Ingesting Public Data (Default)
454
+
455
+ ```bash
456
+ # Standard ingestion - data is PUBLIC
457
+ rem process ingest ontology/procedures/ --table ontologies
458
+
459
+ # From S3 - also PUBLIC
460
+ rem process ingest s3://bucket/docs/reference.pdf
461
+ ```
462
+
463
+ ### Ingesting Private Data (Rare)
464
+
465
+ Private data requires explicit `--make-private` flag:
466
+
467
+ ```bash
468
+ # Private user data - requires --make-private and --user-id
469
+ rem process ingest personal-notes.md --make-private --user-id user-123
470
+ ```
471
+
472
+ **When to use private data:**
473
+ - User-uploaded personal documents
474
+ - Session-specific content
475
+ - User notes and annotations
476
+
477
+ **NEVER use private data for:**
478
+ - Ontologies and reference material
479
+ - Clinical procedures and protocols
480
+ - Shared knowledge bases
481
+ - Anything that should be searchable by agents
482
+
483
+ ### Common Mistake
484
+
485
+ If agents can't find data via `search_rem`, the most common cause is that the data
486
+ was ingested with a `user_id` set. Check with:
487
+
488
+ ```sql
489
+ SELECT name, user_id FROM ontologies WHERE name = 'phq-9-procedure';
490
+ -- user_id should be NULL for public data
491
+ ```
492
+
493
+ Fix by setting user_id to NULL:
494
+ ```sql
495
+ UPDATE ontologies SET user_id = NULL WHERE user_id IS NOT NULL;
496
+ UPDATE kv_store SET user_id = NULL WHERE entity_type = 'ontologies' AND user_id IS NOT NULL;
497
+ ```
498
+
437
499
  ## Next Steps
438
500
 
439
501
  1. **Implement Schema Registry**
rem/cli/commands/ask.py CHANGED
@@ -71,16 +71,18 @@ async def run_agent_streaming(
71
71
  max_turns: int = 10,
72
72
  context: AgentContext | None = None,
73
73
  max_iterations: int | None = None,
74
+ user_message: str | None = None,
74
75
  ) -> None:
75
76
  """
76
- Run agent in streaming mode using agent.iter() with usage limits.
77
+ Run agent in streaming mode using the SAME code path as the API.
77
78
 
78
- Design Pattern (from carrier):
79
- - Use agent.iter() for complete execution with tool call visibility
80
- - run_stream() stops after first output, missing tool calls
81
- - Stream tool call markers: [Calling: tool_name]
82
- - Stream text content deltas as they arrive
83
- - Show final structured result
79
+ This uses stream_openai_response_with_save from the API to ensure:
80
+ 1. Tool calls are saved as separate "tool" messages (not embedded in content)
81
+ 2. Assistant response is clean text only (no [Calling: ...] markers)
82
+ 3. CLI testing is equivalent to API testing
83
+
84
+ The CLI displays tool calls as [Calling: tool_name] for visibility,
85
+ but these are NOT saved to the database.
84
86
 
85
87
  Args:
86
88
  agent: Pydantic AI agent
@@ -88,88 +90,66 @@ async def run_agent_streaming(
88
90
  max_turns: Maximum turns for agent execution (not used in current API)
89
91
  context: Optional AgentContext for session persistence
90
92
  max_iterations: Maximum iterations/requests (from agent schema or settings)
93
+ user_message: The user's original message (for database storage)
91
94
  """
92
- from pydantic_ai import UsageLimits
93
- from rem.utils.date_utils import to_iso_with_z, utc_now
95
+ import json
96
+ from rem.api.routers.chat.streaming import stream_openai_response_with_save, save_user_message
94
97
 
95
98
  logger.info("Running agent in streaming mode...")
96
99
 
97
100
  try:
98
- # Import event types for streaming
99
- from pydantic_ai import Agent as PydanticAgent
100
- from pydantic_ai.messages import PartStartEvent, PartDeltaEvent, TextPartDelta, ToolCallPart
101
-
102
- # Accumulate assistant response for session persistence
103
- assistant_response_parts = []
104
-
105
- # Use agent.iter() to get complete execution with tool calls
106
- usage_limits = UsageLimits(request_limit=max_iterations) if max_iterations else None
107
- async with agent.iter(prompt, usage_limits=usage_limits) as agent_run:
108
- async for node in agent_run:
109
- # Check if this is a model request node (includes tool calls and text)
110
- if PydanticAgent.is_model_request_node(node):
111
- # Stream events from model request
112
- request_stream: Any
113
- async with node.stream(agent_run.ctx) as request_stream:
114
- async for event in request_stream:
115
- # Tool call start event
116
- if isinstance(event, PartStartEvent) and isinstance(
117
- event.part, ToolCallPart
118
- ):
119
- tool_marker = f"\n[Calling: {event.part.tool_name}]"
120
- print(tool_marker, flush=True)
121
- assistant_response_parts.append(tool_marker)
122
-
123
- # Text content delta
124
- elif isinstance(event, PartDeltaEvent) and isinstance(
125
- event.delta, TextPartDelta
126
- ):
127
- print(event.delta.content_delta, end="", flush=True)
128
- assistant_response_parts.append(event.delta.content_delta)
129
-
130
- print("\n") # Final newline after streaming
131
-
132
- # Get final result from agent_run
133
- result = agent_run.result
134
- if hasattr(result, "output"):
135
- logger.info("Final structured result:")
136
- output = result.output
137
- from rem.agentic.serialization import serialize_agent_result
138
- output_json = json.dumps(serialize_agent_result(output), indent=2)
139
- print(output_json)
140
- assistant_response_parts.append(f"\n{output_json}")
141
-
142
- # Save session messages (if session_id provided and postgres enabled)
143
- if context and context.session_id and settings.postgres.enabled:
144
- from ...services.session.compression import SessionMessageStore
145
-
146
- # Extract just the user query from prompt
147
- # Prompt format from ContextBuilder: system + history + user message
148
- # We need to extract the last user message
149
- user_message_content = prompt.split("\n\n")[-1] if "\n\n" in prompt else prompt
150
-
151
- user_message = {
152
- "role": "user",
153
- "content": user_message_content,
154
- "timestamp": to_iso_with_z(utc_now()),
155
- }
156
-
157
- assistant_message = {
158
- "role": "assistant",
159
- "content": "".join(assistant_response_parts),
160
- "timestamp": to_iso_with_z(utc_now()),
161
- }
162
-
163
- # Store messages with compression
164
- store = SessionMessageStore(user_id=context.user_id or settings.test.effective_user_id)
165
- await store.store_session_messages(
101
+ # Save user message BEFORE streaming (same as API, using shared utility)
102
+ if context and context.session_id and user_message:
103
+ await save_user_message(
166
104
  session_id=context.session_id,
167
- messages=[user_message, assistant_message],
168
105
  user_id=context.user_id,
169
- compress=True,
106
+ content=user_message,
170
107
  )
171
108
 
172
- logger.debug(f"Saved conversation to session {context.session_id}")
109
+ # Use the API streaming code path for consistency
110
+ # This properly handles tool calls and message persistence
111
+ model_name = getattr(agent, 'model', 'unknown')
112
+ if hasattr(model_name, 'model_name'):
113
+ model_name = model_name.model_name
114
+ elif hasattr(model_name, 'name'):
115
+ model_name = model_name.name
116
+ else:
117
+ model_name = str(model_name)
118
+
119
+ async for chunk in stream_openai_response_with_save(
120
+ agent=agent.agent if hasattr(agent, 'agent') else agent,
121
+ prompt=prompt,
122
+ model=model_name,
123
+ session_id=context.session_id if context else None,
124
+ user_id=context.user_id if context else None,
125
+ agent_context=context,
126
+ ):
127
+ # Parse SSE chunks for CLI display
128
+ if chunk.startswith("event: tool_call"):
129
+ # Extract tool call info from next data line
130
+ continue
131
+ elif chunk.startswith("data: ") and not chunk.startswith("data: [DONE]"):
132
+ try:
133
+ data_str = chunk[6:].strip()
134
+ if data_str:
135
+ data = json.loads(data_str)
136
+ # Check for tool_call event
137
+ if data.get("type") == "tool_call":
138
+ tool_name = data.get("tool_name", "tool")
139
+ status = data.get("status", "")
140
+ if status == "started":
141
+ print(f"\n[Calling: {tool_name}]", flush=True)
142
+ # Check for text content (OpenAI format)
143
+ elif "choices" in data and data["choices"]:
144
+ delta = data["choices"][0].get("delta", {})
145
+ content = delta.get("content")
146
+ if content:
147
+ print(content, end="", flush=True)
148
+ except (json.JSONDecodeError, KeyError, IndexError):
149
+ pass
150
+
151
+ print("\n") # Final newline after streaming
152
+ logger.info("Final structured result:")
173
153
 
174
154
  except Exception as e:
175
155
  logger.error(f"Agent execution failed: {e}")
@@ -549,7 +529,7 @@ async def _ask_async(
549
529
 
550
530
  # Run agent with session persistence
551
531
  if stream:
552
- await run_agent_streaming(agent, prompt, max_turns=max_turns, context=context)
532
+ await run_agent_streaming(agent, prompt, max_turns=max_turns, context=context, user_message=query)
553
533
  else:
554
534
  await run_agent_non_streaming(
555
535
  agent,
rem/cli/commands/db.py CHANGED
@@ -333,64 +333,120 @@ def rebuild_cache(connection: str | None):
333
333
 
334
334
  @click.command()
335
335
  @click.argument("file_path", type=click.Path(exists=True, path_type=Path))
336
+ @click.option("--table", "-t", default=None, help="Target table name (required for non-YAML formats)")
336
337
  @click.option("--user-id", default=None, help="User ID to scope data privately (default: public/shared)")
337
338
  @click.option("--dry-run", is_flag=True, help="Show what would be loaded without loading")
338
- def load(file_path: Path, user_id: str | None, dry_run: bool):
339
+ def load(file_path: Path, table: str | None, user_id: str | None, dry_run: bool):
339
340
  """
340
- Load data from YAML file into database.
341
+ Load data from file into database.
341
342
 
342
- File format:
343
- - table: resources
344
- key_field: name
345
- rows:
346
- - name: Example
347
- content: Test data...
343
+ Supports YAML with embedded metadata, or any tabular format via Polars
344
+ (jsonl, parquet, csv, json, arrow, etc.). For non-YAML formats, use --table.
348
345
 
349
346
  Examples:
350
- rem db load rem/tests/data/graph_seed.yaml
351
- rem db load data.yaml --user-id my-user # Private to user
352
- rem db load data.yaml --dry-run
347
+ rem db load data.yaml # YAML with metadata
348
+ rem db load data.jsonl -t resources # Any Polars-supported format
353
349
  """
354
- asyncio.run(_load_async(file_path, user_id, dry_run))
350
+ asyncio.run(_load_async(file_path, table, user_id, dry_run))
355
351
 
356
352
 
357
- async def _load_async(file_path: Path, user_id: str | None, dry_run: bool):
353
+ def _load_dataframe_from_file(file_path: Path) -> "pl.DataFrame":
354
+ """Load any Polars-supported file format into a DataFrame."""
355
+ import polars as pl
356
+
357
+ suffix = file_path.suffix.lower()
358
+
359
+ if suffix in {".jsonl", ".ndjson"}:
360
+ return pl.read_ndjson(file_path)
361
+ elif suffix in {".parquet", ".pq"}:
362
+ return pl.read_parquet(file_path)
363
+ elif suffix == ".csv":
364
+ return pl.read_csv(file_path)
365
+ elif suffix == ".json":
366
+ return pl.read_json(file_path)
367
+ elif suffix in {".ipc", ".arrow"}:
368
+ return pl.read_ipc(file_path)
369
+ else:
370
+ raise ValueError(f"Unsupported file format: {suffix}. Use any Polars-supported format.")
371
+
372
+
373
+ async def _load_async(file_path: Path, table: str | None, user_id: str | None, dry_run: bool):
358
374
  """Async implementation of load command."""
375
+ import polars as pl
359
376
  import yaml
360
377
  from ...models.core.inline_edge import InlineEdge
361
- from ...models.entities import Resource, Moment, User, Message, SharedSession, Schema
378
+ from ...models.entities import SharedSession
362
379
  from ...services.postgres import get_postgres_service
380
+ from ...utils.model_helpers import get_table_name
381
+ from ... import get_model_registry
363
382
 
364
383
  logger.info(f"Loading data from: {file_path}")
365
384
  scope_msg = f"user: {user_id}" if user_id else "public"
366
385
  logger.info(f"Scope: {scope_msg}")
367
386
 
368
- # Load YAML file
369
- with open(file_path) as f:
370
- data = yaml.safe_load(f)
371
-
372
- if not isinstance(data, list):
373
- logger.error("YAML must be a list of table definitions")
374
- raise click.Abort()
375
-
376
- if dry_run:
377
- logger.info("DRY RUN - Would load:")
378
- logger.info(yaml.dump(data, default_flow_style=False))
379
- return
387
+ suffix = file_path.suffix.lower()
388
+ is_yaml = suffix in {".yaml", ".yml"}
380
389
 
381
- # Map table names to model classes
382
- # CoreModel subclasses use Repository.upsert()
390
+ # Build MODEL_MAP dynamically from registry
391
+ registry = get_model_registry()
392
+ registry.register_core_models()
383
393
  MODEL_MAP = {
384
- "users": User,
385
- "moments": Moment,
386
- "resources": Resource,
387
- "messages": Message,
388
- "schemas": Schema,
394
+ get_table_name(model): model
395
+ for model in registry.get_model_classes().values()
389
396
  }
390
397
 
391
398
  # Non-CoreModel tables that need direct SQL insertion
392
399
  DIRECT_INSERT_TABLES = {"shared_sessions"}
393
400
 
401
+ # Parse file based on format
402
+ if is_yaml:
403
+ # YAML with embedded metadata
404
+ with open(file_path) as f:
405
+ data = yaml.safe_load(f)
406
+
407
+ if not isinstance(data, list):
408
+ logger.error("YAML must be a list of table definitions")
409
+ raise click.Abort()
410
+
411
+ if dry_run:
412
+ logger.info("DRY RUN - Would load:")
413
+ logger.info(yaml.dump(data, default_flow_style=False))
414
+ return
415
+
416
+ table_defs = data
417
+ else:
418
+ # Polars-supported format - require --table
419
+ if not table:
420
+ logger.error(f"For {suffix} files, --table is required. Example: rem db load {file_path.name} -t resources")
421
+ raise click.Abort()
422
+
423
+ try:
424
+ df = _load_dataframe_from_file(file_path)
425
+ except Exception as e:
426
+ logger.error(f"Failed to load file: {e}")
427
+ raise click.Abort()
428
+
429
+ rows = df.to_dicts()
430
+
431
+ if dry_run:
432
+ logger.info(f"DRY RUN - Would load {len(rows)} rows to table '{table}':")
433
+ logger.info(f"Columns: {list(df.columns)}")
434
+
435
+ # Validate first row against model if table is known
436
+ if table in MODEL_MAP and rows:
437
+ from ...utils.model_helpers import validate_data_for_model
438
+ result = validate_data_for_model(MODEL_MAP[table], rows[0])
439
+ if result.extra_fields:
440
+ logger.warning(f"Unknown fields (ignored): {result.extra_fields}")
441
+ if result.valid:
442
+ logger.success(f"Sample row validates OK. Required: {result.required_fields or '(none)'}")
443
+ else:
444
+ result.log_errors("Sample row")
445
+ return
446
+
447
+ # Wrap as single table definition
448
+ table_defs = [{"table": table, "rows": rows}]
449
+
394
450
  # Connect to database
395
451
  pg = get_postgres_service()
396
452
  if not pg:
@@ -399,23 +455,23 @@ async def _load_async(file_path: Path, user_id: str | None, dry_run: bool):
399
455
 
400
456
  await pg.connect()
401
457
 
458
+ # Start embedding worker for generating embeddings
459
+ if pg.embedding_worker:
460
+ await pg.embedding_worker.start()
461
+
402
462
  try:
403
463
  total_loaded = 0
404
464
 
405
- for table_def in data:
465
+ for table_def in table_defs:
406
466
  table_name = table_def["table"]
407
- key_field = table_def.get("key_field", "id")
408
467
  rows = table_def.get("rows", [])
409
468
 
410
469
  # Handle direct insert tables (non-CoreModel)
411
470
  if table_name in DIRECT_INSERT_TABLES:
412
471
  for row_data in rows:
413
- # Add tenant_id if not present
414
- if "tenant_id" not in row_data:
415
- row_data["tenant_id"] = "default"
472
+ # tenant_id is optional - NULL means public/shared
416
473
 
417
474
  if table_name == "shared_sessions":
418
- # Insert shared_session directly
419
475
  await pg.fetch(
420
476
  """INSERT INTO shared_sessions
421
477
  (session_id, owner_user_id, shared_with_user_id, tenant_id)
@@ -424,7 +480,7 @@ async def _load_async(file_path: Path, user_id: str | None, dry_run: bool):
424
480
  row_data["session_id"],
425
481
  row_data["owner_user_id"],
426
482
  row_data["shared_with_user_id"],
427
- row_data["tenant_id"],
483
+ row_data.get("tenant_id"), # Optional - NULL means public
428
484
  )
429
485
  total_loaded += 1
430
486
  logger.success(f"Loaded shared_session: {row_data['owner_user_id']} -> {row_data['shared_with_user_id']}")
@@ -434,16 +490,11 @@ async def _load_async(file_path: Path, user_id: str | None, dry_run: bool):
434
490
  logger.warning(f"Unknown table: {table_name}, skipping")
435
491
  continue
436
492
 
437
- model_class = MODEL_MAP[table_name] # Type is inferred from MODEL_MAP
493
+ model_class = MODEL_MAP[table_name]
438
494
 
439
- for row_data in rows:
440
- # Add user_id and tenant_id only if explicitly provided
441
- # Default is public (None) - data is shared/visible to all
442
- # Pass --user-id to scope data privately to a specific user
443
- if "user_id" not in row_data and user_id is not None:
444
- row_data["user_id"] = user_id
445
- if "tenant_id" not in row_data and user_id is not None:
446
- row_data["tenant_id"] = row_data.get("user_id", user_id)
495
+ for row_idx, row_data in enumerate(rows):
496
+ # tenant_id and user_id are optional - NULL means public/shared data
497
+ # Data files can explicitly set tenant_id/user_id if needed
447
498
 
448
499
  # Convert graph_edges to InlineEdge format if present
449
500
  if "graph_edges" in row_data:
@@ -452,30 +503,40 @@ async def _load_async(file_path: Path, user_id: str | None, dry_run: bool):
452
503
  for edge in row_data["graph_edges"]
453
504
  ]
454
505
 
455
- # Convert any ISO timestamp strings with Z suffix to naive datetime
456
- # This handles fields like starts_timestamp, ends_timestamp, etc.
506
+ # Convert ISO timestamp strings
457
507
  from ...utils.date_utils import parse_iso
458
508
  for key, value in list(row_data.items()):
459
509
  if isinstance(value, str) and (key.endswith("_timestamp") or key.endswith("_at")):
460
510
  try:
461
511
  row_data[key] = parse_iso(value)
462
512
  except (ValueError, TypeError):
463
- pass # Not a valid datetime string, leave as-is
513
+ pass
464
514
 
465
- # Create model instance and upsert via repository
466
515
  from ...services.postgres.repository import Repository
516
+ from ...utils.model_helpers import validate_data_for_model
467
517
 
468
- instance = model_class(**row_data)
469
- repo = Repository(model_class, table_name, pg) # Type inferred from MODEL_MAP
470
- await repo.upsert(instance) # type: ignore[arg-type]
518
+ result = validate_data_for_model(model_class, row_data)
519
+ if not result.valid:
520
+ result.log_errors(f"Row {row_idx + 1} ({table_name})")
521
+ raise click.Abort()
522
+
523
+ repo = Repository(model_class, table_name, pg)
524
+ await repo.upsert(result.instance) # type: ignore[arg-type]
471
525
  total_loaded += 1
472
526
 
473
- # Log based on model type
474
- name = getattr(instance, 'name', getattr(instance, 'id', '?'))
527
+ name = getattr(result.instance, 'name', getattr(result.instance, 'id', '?'))
475
528
  logger.success(f"Loaded {table_name[:-1]}: {name}")
476
529
 
477
530
  logger.success(f"Data loaded successfully! Total rows: {total_loaded}")
478
531
 
532
+ # Wait for embeddings to complete
533
+ if pg.embedding_worker and pg.embedding_worker.running:
534
+ queue_size = pg.embedding_worker.task_queue.qsize()
535
+ if queue_size > 0:
536
+ logger.info(f"Waiting for {queue_size} embeddings to complete...")
537
+ await pg.embedding_worker.stop()
538
+ logger.success("Embeddings generated successfully")
539
+
479
540
  finally:
480
541
  await pg.disconnect()
481
542
 
@@ -580,7 +641,7 @@ async def _diff_async(
580
641
 
581
642
  if not result.has_changes:
582
643
  click.secho("✓ No schema drift detected", fg="green")
583
- click.echo(" Database matches Pydantic models")
644
+ click.echo(" Database matches source (tables, functions, triggers, views)")
584
645
  if result.filtered_count > 0:
585
646
  click.echo()
586
647
  click.secho(f" ({result.filtered_count} destructive change(s) hidden by '{strategy}' strategy)", fg="yellow")
@@ -592,17 +653,34 @@ async def _diff_async(
592
653
  if result.filtered_count > 0:
593
654
  click.secho(f" ({result.filtered_count} destructive change(s) hidden by '{strategy}' strategy)", fg="yellow")
594
655
  click.echo()
595
- click.echo("Changes:")
596
- for line in result.summary:
597
- if line.startswith("+"):
598
- click.secho(f" {line}", fg="green")
599
- elif line.startswith("-"):
600
- click.secho(f" {line}", fg="red")
601
- elif line.startswith("~"):
602
- click.secho(f" {line}", fg="yellow")
603
- else:
604
- click.echo(f" {line}")
605
- click.echo()
656
+
657
+ # Table/column changes (Alembic)
658
+ if result.summary:
659
+ click.echo("Table Changes:")
660
+ for line in result.summary:
661
+ if line.startswith("+"):
662
+ click.secho(f" {line}", fg="green")
663
+ elif line.startswith("-"):
664
+ click.secho(f" {line}", fg="red")
665
+ elif line.startswith("~"):
666
+ click.secho(f" {line}", fg="yellow")
667
+ else:
668
+ click.echo(f" {line}")
669
+ click.echo()
670
+
671
+ # Programmable object changes (functions, triggers, views)
672
+ if result.programmable_summary:
673
+ click.echo("Programmable Objects (functions/triggers/views):")
674
+ for line in result.programmable_summary:
675
+ if line.startswith("+"):
676
+ click.secho(f" {line}", fg="green")
677
+ elif line.startswith("-"):
678
+ click.secho(f" {line}", fg="red")
679
+ elif line.startswith("~"):
680
+ click.secho(f" {line}", fg="yellow")
681
+ else:
682
+ click.echo(f" {line}")
683
+ click.echo()
606
684
 
607
685
  # Generate migration if requested
608
686
  if generate: