dbt-cube-sync 0.1.0a11__tar.gz → 0.1.0a12__tar.gz
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of dbt-cube-sync might be problematic. Click here for more details.
- {dbt_cube_sync-0.1.0a11 → dbt_cube_sync-0.1.0a12}/PKG-INFO +1 -1
- {dbt_cube_sync-0.1.0a11 → dbt_cube_sync-0.1.0a12}/dbt_cube_sync/cli.py +91 -57
- {dbt_cube_sync-0.1.0a11 → dbt_cube_sync-0.1.0a12}/dbt_cube_sync/connectors/superset.py +17 -7
- {dbt_cube_sync-0.1.0a11 → dbt_cube_sync-0.1.0a12}/dbt_cube_sync/core/models.py +4 -12
- {dbt_cube_sync-0.1.0a11 → dbt_cube_sync-0.1.0a12}/dbt_cube_sync/core/state_manager.py +61 -42
- {dbt_cube_sync-0.1.0a11 → dbt_cube_sync-0.1.0a12}/pyproject.toml +1 -1
- {dbt_cube_sync-0.1.0a11 → dbt_cube_sync-0.1.0a12}/README.md +0 -0
- {dbt_cube_sync-0.1.0a11 → dbt_cube_sync-0.1.0a12}/dbt_cube_sync/__init__.py +0 -0
- {dbt_cube_sync-0.1.0a11 → dbt_cube_sync-0.1.0a12}/dbt_cube_sync/config.py +0 -0
- {dbt_cube_sync-0.1.0a11 → dbt_cube_sync-0.1.0a12}/dbt_cube_sync/connectors/__init__.py +0 -0
- {dbt_cube_sync-0.1.0a11 → dbt_cube_sync-0.1.0a12}/dbt_cube_sync/connectors/base.py +0 -0
- {dbt_cube_sync-0.1.0a11 → dbt_cube_sync-0.1.0a12}/dbt_cube_sync/connectors/powerbi.py +0 -0
- {dbt_cube_sync-0.1.0a11 → dbt_cube_sync-0.1.0a12}/dbt_cube_sync/connectors/tableau.py +0 -0
- {dbt_cube_sync-0.1.0a11 → dbt_cube_sync-0.1.0a12}/dbt_cube_sync/core/__init__.py +0 -0
- {dbt_cube_sync-0.1.0a11 → dbt_cube_sync-0.1.0a12}/dbt_cube_sync/core/cube_generator.py +0 -0
- {dbt_cube_sync-0.1.0a11 → dbt_cube_sync-0.1.0a12}/dbt_cube_sync/core/db_inspector.py +0 -0
- {dbt_cube_sync-0.1.0a11 → dbt_cube_sync-0.1.0a12}/dbt_cube_sync/core/dbt_parser.py +0 -0
|
@@ -444,16 +444,21 @@ def sync_all(
|
|
|
444
444
|
manifest, manifest_nodes, {}
|
|
445
445
|
)
|
|
446
446
|
|
|
447
|
-
#
|
|
448
|
-
current_state = state_manager.update_step_state(
|
|
449
|
-
current_state,
|
|
450
|
-
'cube_sync',
|
|
451
|
-
'failed' if cube_sync_error else 'success',
|
|
452
|
-
cube_sync_error
|
|
453
|
-
)
|
|
447
|
+
# Save cube sync state
|
|
454
448
|
state_manager.save_state(current_state)
|
|
455
449
|
click.echo(f" State saved to {state_path}")
|
|
456
450
|
|
|
451
|
+
if cube_sync_error:
|
|
452
|
+
click.echo(f" Error during cube generation: {cube_sync_error}", err=True)
|
|
453
|
+
|
|
454
|
+
# Build a mapping from model name (file stem) to node_id for status updates
|
|
455
|
+
model_name_to_node_id = {}
|
|
456
|
+
for node_id in current_state.models.keys():
|
|
457
|
+
# Extract model name from output file (e.g., "model/cubes/ModelName.js" -> "ModelName")
|
|
458
|
+
output_file = current_state.models[node_id].output_file
|
|
459
|
+
model_name = Path(output_file).stem
|
|
460
|
+
model_name_to_node_id[model_name] = node_id
|
|
461
|
+
|
|
457
462
|
# ============================================================
|
|
458
463
|
# STEP 2: Sync to Superset (if configured)
|
|
459
464
|
# ============================================================
|
|
@@ -462,17 +467,21 @@ def sync_all(
|
|
|
462
467
|
|
|
463
468
|
if not superset_url or not superset_username or not superset_password:
|
|
464
469
|
click.echo(" Skipped - no Superset credentials provided")
|
|
465
|
-
current_state = state_manager.update_step_state(current_state, 'superset_sync', 'skipped')
|
|
466
|
-
state_manager.save_state(current_state)
|
|
467
470
|
else:
|
|
468
|
-
|
|
469
|
-
|
|
470
|
-
) or force_full_sync
|
|
471
|
+
# Get models that need Superset sync (status is None or 'failed')
|
|
472
|
+
models_to_sync_ids = state_manager.get_models_needing_sync(current_state, 'superset')
|
|
471
473
|
|
|
472
|
-
if not
|
|
473
|
-
click.echo(" Skipped -
|
|
474
|
+
if not models_to_sync_ids and not force_full_sync:
|
|
475
|
+
click.echo(" Skipped - all models already synced successfully")
|
|
474
476
|
else:
|
|
475
|
-
|
|
477
|
+
# Convert node_ids to model names for filtering
|
|
478
|
+
models_to_sync_names = set()
|
|
479
|
+
for node_id in models_to_sync_ids:
|
|
480
|
+
if node_id in current_state.models:
|
|
481
|
+
output_file = current_state.models[node_id].output_file
|
|
482
|
+
model_name = Path(output_file).stem
|
|
483
|
+
models_to_sync_names.add(model_name)
|
|
484
|
+
|
|
476
485
|
try:
|
|
477
486
|
connector_config = {
|
|
478
487
|
'url': superset_url,
|
|
@@ -482,24 +491,34 @@ def sync_all(
|
|
|
482
491
|
}
|
|
483
492
|
|
|
484
493
|
connector = ConnectorRegistry.get_connector('superset', **connector_config)
|
|
485
|
-
|
|
494
|
+
|
|
495
|
+
if force_full_sync:
|
|
496
|
+
results = connector.sync_cube_schemas(output)
|
|
497
|
+
else:
|
|
498
|
+
results = connector.sync_cube_schemas(output, models_to_sync_names)
|
|
499
|
+
|
|
500
|
+
# Update per-model status
|
|
501
|
+
for r in results:
|
|
502
|
+
model_name = r.file_or_dataset.replace('.js', '')
|
|
503
|
+
node_id = model_name_to_node_id.get(model_name)
|
|
504
|
+
if node_id:
|
|
505
|
+
state_manager.update_model_sync_status(
|
|
506
|
+
current_state, node_id, 'superset',
|
|
507
|
+
'success' if r.status == 'success' else 'failed'
|
|
508
|
+
)
|
|
486
509
|
|
|
487
510
|
successful = sum(1 for r in results if r.status == 'success')
|
|
488
511
|
failed = sum(1 for r in results if r.status == 'failed')
|
|
489
512
|
click.echo(f" Synced: {successful} successful, {failed} failed")
|
|
490
513
|
|
|
491
|
-
if failed > 0:
|
|
492
|
-
superset_error = f"{failed} datasets failed to sync"
|
|
493
514
|
except Exception as e:
|
|
494
|
-
|
|
495
|
-
|
|
496
|
-
|
|
497
|
-
|
|
498
|
-
|
|
499
|
-
|
|
500
|
-
|
|
501
|
-
superset_error
|
|
502
|
-
)
|
|
515
|
+
click.echo(f" Error: {str(e)}", err=True)
|
|
516
|
+
# Mark all models we tried to sync as failed
|
|
517
|
+
for node_id in models_to_sync_ids:
|
|
518
|
+
state_manager.update_model_sync_status(
|
|
519
|
+
current_state, node_id, 'superset', 'failed'
|
|
520
|
+
)
|
|
521
|
+
|
|
503
522
|
state_manager.save_state(current_state)
|
|
504
523
|
|
|
505
524
|
# ============================================================
|
|
@@ -510,17 +529,16 @@ def sync_all(
|
|
|
510
529
|
|
|
511
530
|
if not rag_api_url:
|
|
512
531
|
click.echo(" Skipped - no RAG API URL provided")
|
|
513
|
-
current_state = state_manager.update_step_state(current_state, 'rag_sync', 'skipped')
|
|
514
|
-
state_manager.save_state(current_state)
|
|
515
532
|
else:
|
|
516
|
-
|
|
517
|
-
|
|
518
|
-
) or force_full_sync
|
|
533
|
+
# Get models that need RAG sync (status is None or 'failed')
|
|
534
|
+
models_to_embed_ids = state_manager.get_models_needing_sync(current_state, 'rag')
|
|
519
535
|
|
|
520
|
-
if not
|
|
521
|
-
click.echo(" Skipped -
|
|
536
|
+
if not models_to_embed_ids and not force_full_sync:
|
|
537
|
+
click.echo(" Skipped - all models already synced successfully")
|
|
522
538
|
else:
|
|
523
|
-
|
|
539
|
+
if force_full_sync:
|
|
540
|
+
models_to_embed_ids = set(current_state.models.keys())
|
|
541
|
+
|
|
524
542
|
try:
|
|
525
543
|
# Call the RAG API to re-ingest embeddings
|
|
526
544
|
response = requests.post(
|
|
@@ -532,19 +550,26 @@ def sync_all(
|
|
|
532
550
|
if response.status_code == 200:
|
|
533
551
|
result = response.json()
|
|
534
552
|
click.echo(f" Ingested {result.get('schemas_ingested', 0)} schema documents")
|
|
553
|
+
# Mark all models as succeeded
|
|
554
|
+
for node_id in models_to_embed_ids:
|
|
555
|
+
state_manager.update_model_sync_status(
|
|
556
|
+
current_state, node_id, 'rag', 'success'
|
|
557
|
+
)
|
|
535
558
|
else:
|
|
536
|
-
|
|
537
|
-
|
|
559
|
+
click.echo(f" Error: RAG API returned {response.status_code}", err=True)
|
|
560
|
+
# Mark all models as failed
|
|
561
|
+
for node_id in models_to_embed_ids:
|
|
562
|
+
state_manager.update_model_sync_status(
|
|
563
|
+
current_state, node_id, 'rag', 'failed'
|
|
564
|
+
)
|
|
538
565
|
except requests.RequestException as e:
|
|
539
|
-
|
|
540
|
-
|
|
541
|
-
|
|
542
|
-
|
|
543
|
-
|
|
544
|
-
|
|
545
|
-
|
|
546
|
-
rag_error
|
|
547
|
-
)
|
|
566
|
+
click.echo(f" Error: Could not reach RAG API: {e}", err=True)
|
|
567
|
+
# Mark all models as failed
|
|
568
|
+
for node_id in models_to_embed_ids:
|
|
569
|
+
state_manager.update_model_sync_status(
|
|
570
|
+
current_state, node_id, 'rag', 'failed'
|
|
571
|
+
)
|
|
572
|
+
|
|
548
573
|
state_manager.save_state(current_state)
|
|
549
574
|
|
|
550
575
|
# ============================================================
|
|
@@ -554,10 +579,23 @@ def sync_all(
|
|
|
554
579
|
click.echo("SYNC COMPLETE")
|
|
555
580
|
click.echo("=" * 60)
|
|
556
581
|
|
|
557
|
-
#
|
|
558
|
-
|
|
559
|
-
|
|
560
|
-
|
|
582
|
+
# Get per-model sync summaries
|
|
583
|
+
superset_summary = state_manager.get_sync_summary(current_state, 'superset')
|
|
584
|
+
rag_summary = state_manager.get_sync_summary(current_state, 'rag')
|
|
585
|
+
|
|
586
|
+
def format_summary(summary, step_configured):
|
|
587
|
+
if not step_configured:
|
|
588
|
+
return "skipped (not configured)"
|
|
589
|
+
if summary['failed'] > 0:
|
|
590
|
+
return f"{summary['success']} success, {summary['failed']} failed (will retry)"
|
|
591
|
+
elif summary['pending'] > 0:
|
|
592
|
+
return f"{summary['success']} success, {summary['pending']} pending"
|
|
593
|
+
else:
|
|
594
|
+
return f"{summary['success']} success"
|
|
595
|
+
|
|
596
|
+
click.echo(f" Cube.js files: {len(current_state.models)} models")
|
|
597
|
+
click.echo(f" Superset sync: {format_summary(superset_summary, superset_url)}")
|
|
598
|
+
click.echo(f" RAG sync: {format_summary(rag_summary, rag_api_url)}")
|
|
561
599
|
|
|
562
600
|
if changes_detected or force_full_sync:
|
|
563
601
|
click.echo(f" Models processed: {len(added_models) + len(modified_models)}")
|
|
@@ -566,14 +604,10 @@ def sync_all(
|
|
|
566
604
|
else:
|
|
567
605
|
click.echo(" No model changes detected")
|
|
568
606
|
|
|
569
|
-
# Exit with error if any
|
|
570
|
-
any_failed =
|
|
571
|
-
(current_state.cube_sync and current_state.cube_sync.status == 'failed') or
|
|
572
|
-
(current_state.superset_sync and current_state.superset_sync.status == 'failed') or
|
|
573
|
-
(current_state.rag_sync and current_state.rag_sync.status == 'failed')
|
|
574
|
-
)
|
|
607
|
+
# Exit with error if any models failed
|
|
608
|
+
any_failed = superset_summary['failed'] > 0 or rag_summary['failed'] > 0
|
|
575
609
|
if any_failed:
|
|
576
|
-
click.echo("\n ⚠️ Some
|
|
610
|
+
click.echo("\n ⚠️ Some models failed - they will be retried on next run")
|
|
577
611
|
sys.exit(1)
|
|
578
612
|
|
|
579
613
|
except Exception as e:
|
|
@@ -123,20 +123,30 @@ class SupersetConnector(BaseConnector):
|
|
|
123
123
|
self.database_id = result[0]['id']
|
|
124
124
|
print(f"✓ Found database '{database_name}' with ID: {self.database_id}")
|
|
125
125
|
|
|
126
|
-
def sync_cube_schemas(self, cube_dir: str) -> List[SyncResult]:
|
|
127
|
-
"""Sync
|
|
126
|
+
def sync_cube_schemas(self, cube_dir: str, models_filter: set = None) -> List[SyncResult]:
|
|
127
|
+
"""Sync Cube.js schemas from directory to Superset
|
|
128
|
+
|
|
129
|
+
Args:
|
|
130
|
+
cube_dir: Directory containing Cube.js schema files
|
|
131
|
+
models_filter: Optional set of model names to sync. If None, sync all.
|
|
132
|
+
"""
|
|
128
133
|
results = []
|
|
129
134
|
cube_files = self._get_cube_files(cube_dir)
|
|
130
|
-
|
|
135
|
+
|
|
131
136
|
if not cube_files:
|
|
132
137
|
return [SyncResult(
|
|
133
138
|
file_or_dataset="No files",
|
|
134
|
-
status="failed",
|
|
139
|
+
status="failed",
|
|
135
140
|
message=f"No .js files found in {cube_dir}"
|
|
136
141
|
)]
|
|
137
|
-
|
|
138
|
-
|
|
139
|
-
|
|
142
|
+
|
|
143
|
+
# Filter files if models_filter is provided
|
|
144
|
+
if models_filter:
|
|
145
|
+
cube_files = [f for f in cube_files if f.stem in models_filter]
|
|
146
|
+
print(f"🔍 Syncing {len(cube_files)} Cube.js files (filtered from {len(self._get_cube_files(cube_dir))})")
|
|
147
|
+
else:
|
|
148
|
+
print(f"🔍 Found {len(cube_files)} Cube.js files")
|
|
149
|
+
|
|
140
150
|
for cube_file in cube_files:
|
|
141
151
|
try:
|
|
142
152
|
print(f"\\n{'='*60}")
|
|
@@ -112,13 +112,9 @@ class ModelState(BaseModel):
|
|
|
112
112
|
has_metrics: bool
|
|
113
113
|
last_generated: str
|
|
114
114
|
output_file: str
|
|
115
|
-
|
|
116
|
-
|
|
117
|
-
|
|
118
|
-
"""Represents the state of a pipeline step"""
|
|
119
|
-
status: str # 'success', 'failed', 'skipped'
|
|
120
|
-
last_run: Optional[str] = None
|
|
121
|
-
error: Optional[str] = None
|
|
115
|
+
# Per-model sync status for each step
|
|
116
|
+
superset_sync_status: Optional[str] = None # 'success', 'failed', or None (not attempted)
|
|
117
|
+
rag_sync_status: Optional[str] = None # 'success', 'failed', or None (not attempted)
|
|
122
118
|
|
|
123
119
|
|
|
124
120
|
class SyncState(BaseModel):
|
|
@@ -126,8 +122,4 @@ class SyncState(BaseModel):
|
|
|
126
122
|
version: str = "1.1"
|
|
127
123
|
last_sync_timestamp: str
|
|
128
124
|
manifest_path: str
|
|
129
|
-
models: Dict[str, ModelState] = {}
|
|
130
|
-
# Step states for tracking pipeline progress
|
|
131
|
-
cube_sync: Optional[StepState] = None
|
|
132
|
-
superset_sync: Optional[StepState] = None
|
|
133
|
-
rag_sync: Optional[StepState] = None
|
|
125
|
+
models: Dict[str, ModelState] = {}
|
|
@@ -10,7 +10,7 @@ from datetime import datetime
|
|
|
10
10
|
from pathlib import Path
|
|
11
11
|
from typing import Dict, List, Optional, Set, Tuple
|
|
12
12
|
|
|
13
|
-
from .models import ModelState,
|
|
13
|
+
from .models import ModelState, SyncState
|
|
14
14
|
|
|
15
15
|
|
|
16
16
|
class StateManager:
|
|
@@ -179,15 +179,18 @@ class StateManager:
|
|
|
179
179
|
node_data.get("config", {}).get("meta", {}).get("metrics")
|
|
180
180
|
)
|
|
181
181
|
|
|
182
|
+
# For newly generated/modified models, reset sync status (they need to be re-synced)
|
|
182
183
|
models[node_id] = ModelState(
|
|
183
184
|
checksum=checksum,
|
|
184
185
|
has_metrics=has_metrics,
|
|
185
186
|
last_generated=timestamp,
|
|
186
187
|
output_file=output_file,
|
|
188
|
+
superset_sync_status=None, # Reset - needs sync
|
|
189
|
+
rag_sync_status=None, # Reset - needs sync
|
|
187
190
|
)
|
|
188
191
|
|
|
189
192
|
return SyncState(
|
|
190
|
-
version="1.
|
|
193
|
+
version="1.1",
|
|
191
194
|
last_sync_timestamp=timestamp,
|
|
192
195
|
manifest_path=str(manifest_path),
|
|
193
196
|
models=models,
|
|
@@ -220,64 +223,80 @@ class StateManager:
|
|
|
220
223
|
|
|
221
224
|
return files_to_delete
|
|
222
225
|
|
|
223
|
-
def
|
|
226
|
+
def get_models_needing_sync(
|
|
224
227
|
self,
|
|
225
|
-
|
|
226
|
-
|
|
227
|
-
|
|
228
|
-
) -> bool:
|
|
228
|
+
state: SyncState,
|
|
229
|
+
step: str,
|
|
230
|
+
) -> Set[str]:
|
|
229
231
|
"""
|
|
230
|
-
|
|
232
|
+
Get node_ids of models that need to be synced for a step.
|
|
231
233
|
|
|
232
|
-
A
|
|
233
|
-
-
|
|
234
|
-
-
|
|
234
|
+
A model needs sync if:
|
|
235
|
+
- Its sync status is None (never synced)
|
|
236
|
+
- Its sync status is 'failed' (needs retry)
|
|
235
237
|
|
|
236
238
|
Args:
|
|
237
|
-
|
|
238
|
-
|
|
239
|
-
changes_detected: Whether model changes were detected
|
|
239
|
+
state: Current sync state
|
|
240
|
+
step: Step name ('superset' or 'rag')
|
|
240
241
|
|
|
241
242
|
Returns:
|
|
242
|
-
|
|
243
|
+
Set of node_ids that need syncing
|
|
243
244
|
"""
|
|
244
|
-
|
|
245
|
-
|
|
245
|
+
models_to_sync = set()
|
|
246
|
+
status_field = f"{step}_sync_status"
|
|
246
247
|
|
|
247
|
-
|
|
248
|
-
|
|
249
|
-
|
|
250
|
-
|
|
251
|
-
if step_state is None:
|
|
252
|
-
return True
|
|
248
|
+
for node_id, model_state in state.models.items():
|
|
249
|
+
status = getattr(model_state, status_field, None)
|
|
250
|
+
if status is None or status == 'failed':
|
|
251
|
+
models_to_sync.add(node_id)
|
|
253
252
|
|
|
254
|
-
|
|
255
|
-
return step_state.status == 'failed'
|
|
253
|
+
return models_to_sync
|
|
256
254
|
|
|
257
|
-
def
|
|
255
|
+
def update_model_sync_status(
|
|
258
256
|
self,
|
|
259
257
|
state: SyncState,
|
|
260
|
-
|
|
258
|
+
node_id: str,
|
|
259
|
+
step: str,
|
|
261
260
|
status: str,
|
|
262
|
-
|
|
263
|
-
|
|
261
|
+
) -> None:
|
|
262
|
+
"""
|
|
263
|
+
Update the sync status of a model for a specific step.
|
|
264
|
+
|
|
265
|
+
Args:
|
|
266
|
+
state: Current sync state
|
|
267
|
+
node_id: The model's node_id
|
|
268
|
+
step: Step name ('superset' or 'rag')
|
|
269
|
+
status: Status to set ('success' or 'failed')
|
|
264
270
|
"""
|
|
265
|
-
|
|
271
|
+
if node_id in state.models:
|
|
272
|
+
status_field = f"{step}_sync_status"
|
|
273
|
+
setattr(state.models[node_id], status_field, status)
|
|
274
|
+
|
|
275
|
+
def get_sync_summary(
|
|
276
|
+
self,
|
|
277
|
+
state: SyncState,
|
|
278
|
+
step: str,
|
|
279
|
+
) -> Dict[str, int]:
|
|
280
|
+
"""
|
|
281
|
+
Get a summary of sync status for a step.
|
|
266
282
|
|
|
267
283
|
Args:
|
|
268
284
|
state: Current sync state
|
|
269
|
-
|
|
270
|
-
status: Step status ('success', 'failed', 'skipped')
|
|
271
|
-
error: Error message if failed
|
|
285
|
+
step: Step name ('superset' or 'rag')
|
|
272
286
|
|
|
273
287
|
Returns:
|
|
274
|
-
|
|
288
|
+
Dict with counts: {'success': N, 'failed': N, 'pending': N}
|
|
275
289
|
"""
|
|
276
|
-
|
|
277
|
-
|
|
278
|
-
|
|
279
|
-
|
|
280
|
-
|
|
281
|
-
|
|
282
|
-
|
|
283
|
-
|
|
290
|
+
status_field = f"{step}_sync_status"
|
|
291
|
+
summary = {'success': 0, 'failed': 0, 'pending': 0}
|
|
292
|
+
|
|
293
|
+
for model_state in state.models.values():
|
|
294
|
+
status = getattr(model_state, status_field, None)
|
|
295
|
+
if status == 'success':
|
|
296
|
+
summary['success'] += 1
|
|
297
|
+
elif status == 'failed':
|
|
298
|
+
summary['failed'] += 1
|
|
299
|
+
else:
|
|
300
|
+
summary['pending'] += 1
|
|
301
|
+
|
|
302
|
+
return summary
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|