dbt-cube-sync 0.1.0a11__tar.gz → 0.1.0a12__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of dbt-cube-sync might be problematic. Click here for more details.

Files changed (17) hide show
  1. {dbt_cube_sync-0.1.0a11 → dbt_cube_sync-0.1.0a12}/PKG-INFO +1 -1
  2. {dbt_cube_sync-0.1.0a11 → dbt_cube_sync-0.1.0a12}/dbt_cube_sync/cli.py +91 -57
  3. {dbt_cube_sync-0.1.0a11 → dbt_cube_sync-0.1.0a12}/dbt_cube_sync/connectors/superset.py +17 -7
  4. {dbt_cube_sync-0.1.0a11 → dbt_cube_sync-0.1.0a12}/dbt_cube_sync/core/models.py +4 -12
  5. {dbt_cube_sync-0.1.0a11 → dbt_cube_sync-0.1.0a12}/dbt_cube_sync/core/state_manager.py +61 -42
  6. {dbt_cube_sync-0.1.0a11 → dbt_cube_sync-0.1.0a12}/pyproject.toml +1 -1
  7. {dbt_cube_sync-0.1.0a11 → dbt_cube_sync-0.1.0a12}/README.md +0 -0
  8. {dbt_cube_sync-0.1.0a11 → dbt_cube_sync-0.1.0a12}/dbt_cube_sync/__init__.py +0 -0
  9. {dbt_cube_sync-0.1.0a11 → dbt_cube_sync-0.1.0a12}/dbt_cube_sync/config.py +0 -0
  10. {dbt_cube_sync-0.1.0a11 → dbt_cube_sync-0.1.0a12}/dbt_cube_sync/connectors/__init__.py +0 -0
  11. {dbt_cube_sync-0.1.0a11 → dbt_cube_sync-0.1.0a12}/dbt_cube_sync/connectors/base.py +0 -0
  12. {dbt_cube_sync-0.1.0a11 → dbt_cube_sync-0.1.0a12}/dbt_cube_sync/connectors/powerbi.py +0 -0
  13. {dbt_cube_sync-0.1.0a11 → dbt_cube_sync-0.1.0a12}/dbt_cube_sync/connectors/tableau.py +0 -0
  14. {dbt_cube_sync-0.1.0a11 → dbt_cube_sync-0.1.0a12}/dbt_cube_sync/core/__init__.py +0 -0
  15. {dbt_cube_sync-0.1.0a11 → dbt_cube_sync-0.1.0a12}/dbt_cube_sync/core/cube_generator.py +0 -0
  16. {dbt_cube_sync-0.1.0a11 → dbt_cube_sync-0.1.0a12}/dbt_cube_sync/core/db_inspector.py +0 -0
  17. {dbt_cube_sync-0.1.0a11 → dbt_cube_sync-0.1.0a12}/dbt_cube_sync/core/dbt_parser.py +0 -0
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: dbt-cube-sync
3
- Version: 0.1.0a11
3
+ Version: 0.1.0a12
4
4
  Summary: Synchronization tool for dbt models to Cube.js schemas and BI tools
5
5
  Author: Ponder
6
6
  Requires-Python: >=3.9,<4.0
@@ -444,16 +444,21 @@ def sync_all(
444
444
  manifest, manifest_nodes, {}
445
445
  )
446
446
 
447
- # Update cube_sync step state
448
- current_state = state_manager.update_step_state(
449
- current_state,
450
- 'cube_sync',
451
- 'failed' if cube_sync_error else 'success',
452
- cube_sync_error
453
- )
447
+ # Save cube sync state
454
448
  state_manager.save_state(current_state)
455
449
  click.echo(f" State saved to {state_path}")
456
450
 
451
+ if cube_sync_error:
452
+ click.echo(f" Error during cube generation: {cube_sync_error}", err=True)
453
+
454
+ # Build a mapping from model name (file stem) to node_id for status updates
455
+ model_name_to_node_id = {}
456
+ for node_id in current_state.models.keys():
457
+ # Extract model name from output file (e.g., "model/cubes/ModelName.js" -> "ModelName")
458
+ output_file = current_state.models[node_id].output_file
459
+ model_name = Path(output_file).stem
460
+ model_name_to_node_id[model_name] = node_id
461
+
457
462
  # ============================================================
458
463
  # STEP 2: Sync to Superset (if configured)
459
464
  # ============================================================
@@ -462,17 +467,21 @@ def sync_all(
462
467
 
463
468
  if not superset_url or not superset_username or not superset_password:
464
469
  click.echo(" Skipped - no Superset credentials provided")
465
- current_state = state_manager.update_step_state(current_state, 'superset_sync', 'skipped')
466
- state_manager.save_state(current_state)
467
470
  else:
468
- should_run_superset = state_manager.should_run_step(
469
- 'superset_sync', previous_state, changes_detected
470
- ) or force_full_sync
471
+ # Get models that need Superset sync (status is None or 'failed')
472
+ models_to_sync_ids = state_manager.get_models_needing_sync(current_state, 'superset')
471
473
 
472
- if not should_run_superset:
473
- click.echo(" Skipped - no changes and previous sync succeeded")
474
+ if not models_to_sync_ids and not force_full_sync:
475
+ click.echo(" Skipped - all models already synced successfully")
474
476
  else:
475
- superset_error = None
477
+ # Convert node_ids to model names for filtering
478
+ models_to_sync_names = set()
479
+ for node_id in models_to_sync_ids:
480
+ if node_id in current_state.models:
481
+ output_file = current_state.models[node_id].output_file
482
+ model_name = Path(output_file).stem
483
+ models_to_sync_names.add(model_name)
484
+
476
485
  try:
477
486
  connector_config = {
478
487
  'url': superset_url,
@@ -482,24 +491,34 @@ def sync_all(
482
491
  }
483
492
 
484
493
  connector = ConnectorRegistry.get_connector('superset', **connector_config)
485
- results = connector.sync_cube_schemas(output)
494
+
495
+ if force_full_sync:
496
+ results = connector.sync_cube_schemas(output)
497
+ else:
498
+ results = connector.sync_cube_schemas(output, models_to_sync_names)
499
+
500
+ # Update per-model status
501
+ for r in results:
502
+ model_name = r.file_or_dataset.replace('.js', '')
503
+ node_id = model_name_to_node_id.get(model_name)
504
+ if node_id:
505
+ state_manager.update_model_sync_status(
506
+ current_state, node_id, 'superset',
507
+ 'success' if r.status == 'success' else 'failed'
508
+ )
486
509
 
487
510
  successful = sum(1 for r in results if r.status == 'success')
488
511
  failed = sum(1 for r in results if r.status == 'failed')
489
512
  click.echo(f" Synced: {successful} successful, {failed} failed")
490
513
 
491
- if failed > 0:
492
- superset_error = f"{failed} datasets failed to sync"
493
514
  except Exception as e:
494
- superset_error = str(e)
495
- click.echo(f" Error: {superset_error}", err=True)
496
-
497
- current_state = state_manager.update_step_state(
498
- current_state,
499
- 'superset_sync',
500
- 'failed' if superset_error else 'success',
501
- superset_error
502
- )
515
+ click.echo(f" Error: {str(e)}", err=True)
516
+ # Mark all models we tried to sync as failed
517
+ for node_id in models_to_sync_ids:
518
+ state_manager.update_model_sync_status(
519
+ current_state, node_id, 'superset', 'failed'
520
+ )
521
+
503
522
  state_manager.save_state(current_state)
504
523
 
505
524
  # ============================================================
@@ -510,17 +529,16 @@ def sync_all(
510
529
 
511
530
  if not rag_api_url:
512
531
  click.echo(" Skipped - no RAG API URL provided")
513
- current_state = state_manager.update_step_state(current_state, 'rag_sync', 'skipped')
514
- state_manager.save_state(current_state)
515
532
  else:
516
- should_run_rag = state_manager.should_run_step(
517
- 'rag_sync', previous_state, changes_detected
518
- ) or force_full_sync
533
+ # Get models that need RAG sync (status is None or 'failed')
534
+ models_to_embed_ids = state_manager.get_models_needing_sync(current_state, 'rag')
519
535
 
520
- if not should_run_rag:
521
- click.echo(" Skipped - no changes and previous sync succeeded")
536
+ if not models_to_embed_ids and not force_full_sync:
537
+ click.echo(" Skipped - all models already synced successfully")
522
538
  else:
523
- rag_error = None
539
+ if force_full_sync:
540
+ models_to_embed_ids = set(current_state.models.keys())
541
+
524
542
  try:
525
543
  # Call the RAG API to re-ingest embeddings
526
544
  response = requests.post(
@@ -532,19 +550,26 @@ def sync_all(
532
550
  if response.status_code == 200:
533
551
  result = response.json()
534
552
  click.echo(f" Ingested {result.get('schemas_ingested', 0)} schema documents")
553
+ # Mark all models as succeeded
554
+ for node_id in models_to_embed_ids:
555
+ state_manager.update_model_sync_status(
556
+ current_state, node_id, 'rag', 'success'
557
+ )
535
558
  else:
536
- rag_error = f"RAG API returned {response.status_code}"
537
- click.echo(f" Error: {rag_error}", err=True)
559
+ click.echo(f" Error: RAG API returned {response.status_code}", err=True)
560
+ # Mark all models as failed
561
+ for node_id in models_to_embed_ids:
562
+ state_manager.update_model_sync_status(
563
+ current_state, node_id, 'rag', 'failed'
564
+ )
538
565
  except requests.RequestException as e:
539
- rag_error = str(e)
540
- click.echo(f" Error: Could not reach RAG API: {rag_error}", err=True)
541
-
542
- current_state = state_manager.update_step_state(
543
- current_state,
544
- 'rag_sync',
545
- 'failed' if rag_error else 'success',
546
- rag_error
547
- )
566
+ click.echo(f" Error: Could not reach RAG API: {e}", err=True)
567
+ # Mark all models as failed
568
+ for node_id in models_to_embed_ids:
569
+ state_manager.update_model_sync_status(
570
+ current_state, node_id, 'rag', 'failed'
571
+ )
572
+
548
573
  state_manager.save_state(current_state)
549
574
 
550
575
  # ============================================================
@@ -554,10 +579,23 @@ def sync_all(
554
579
  click.echo("SYNC COMPLETE")
555
580
  click.echo("=" * 60)
556
581
 
557
- # Show step statuses
558
- click.echo(f" Cube sync: {current_state.cube_sync.status if current_state.cube_sync else 'unknown'}")
559
- click.echo(f" Superset sync: {current_state.superset_sync.status if current_state.superset_sync else 'unknown'}")
560
- click.echo(f" RAG sync: {current_state.rag_sync.status if current_state.rag_sync else 'unknown'}")
582
+ # Get per-model sync summaries
583
+ superset_summary = state_manager.get_sync_summary(current_state, 'superset')
584
+ rag_summary = state_manager.get_sync_summary(current_state, 'rag')
585
+
586
+ def format_summary(summary, step_configured):
587
+ if not step_configured:
588
+ return "skipped (not configured)"
589
+ if summary['failed'] > 0:
590
+ return f"{summary['success']} success, {summary['failed']} failed (will retry)"
591
+ elif summary['pending'] > 0:
592
+ return f"{summary['success']} success, {summary['pending']} pending"
593
+ else:
594
+ return f"{summary['success']} success"
595
+
596
+ click.echo(f" Cube.js files: {len(current_state.models)} models")
597
+ click.echo(f" Superset sync: {format_summary(superset_summary, superset_url)}")
598
+ click.echo(f" RAG sync: {format_summary(rag_summary, rag_api_url)}")
561
599
 
562
600
  if changes_detected or force_full_sync:
563
601
  click.echo(f" Models processed: {len(added_models) + len(modified_models)}")
@@ -566,14 +604,10 @@ def sync_all(
566
604
  else:
567
605
  click.echo(" No model changes detected")
568
606
 
569
- # Exit with error if any step failed
570
- any_failed = (
571
- (current_state.cube_sync and current_state.cube_sync.status == 'failed') or
572
- (current_state.superset_sync and current_state.superset_sync.status == 'failed') or
573
- (current_state.rag_sync and current_state.rag_sync.status == 'failed')
574
- )
607
+ # Exit with error if any models failed
608
+ any_failed = superset_summary['failed'] > 0 or rag_summary['failed'] > 0
575
609
  if any_failed:
576
- click.echo("\n ⚠️ Some steps failed - they will be retried on next run")
610
+ click.echo("\n ⚠️ Some models failed - they will be retried on next run")
577
611
  sys.exit(1)
578
612
 
579
613
  except Exception as e:
@@ -123,20 +123,30 @@ class SupersetConnector(BaseConnector):
123
123
  self.database_id = result[0]['id']
124
124
  print(f"✓ Found database '{database_name}' with ID: {self.database_id}")
125
125
 
126
- def sync_cube_schemas(self, cube_dir: str) -> List[SyncResult]:
127
- """Sync all Cube.js schemas from directory to Superset"""
126
+ def sync_cube_schemas(self, cube_dir: str, models_filter: set = None) -> List[SyncResult]:
127
+ """Sync Cube.js schemas from directory to Superset
128
+
129
+ Args:
130
+ cube_dir: Directory containing Cube.js schema files
131
+ models_filter: Optional set of model names to sync. If None, sync all.
132
+ """
128
133
  results = []
129
134
  cube_files = self._get_cube_files(cube_dir)
130
-
135
+
131
136
  if not cube_files:
132
137
  return [SyncResult(
133
138
  file_or_dataset="No files",
134
- status="failed",
139
+ status="failed",
135
140
  message=f"No .js files found in {cube_dir}"
136
141
  )]
137
-
138
- print(f"🔍 Found {len(cube_files)} Cube.js files")
139
-
142
+
143
+ # Filter files if models_filter is provided
144
+ if models_filter:
145
+ cube_files = [f for f in cube_files if f.stem in models_filter]
146
+ print(f"🔍 Syncing {len(cube_files)} Cube.js files (filtered from {len(self._get_cube_files(cube_dir))})")
147
+ else:
148
+ print(f"🔍 Found {len(cube_files)} Cube.js files")
149
+
140
150
  for cube_file in cube_files:
141
151
  try:
142
152
  print(f"\\n{'='*60}")
@@ -112,13 +112,9 @@ class ModelState(BaseModel):
112
112
  has_metrics: bool
113
113
  last_generated: str
114
114
  output_file: str
115
-
116
-
117
- class StepState(BaseModel):
118
- """Represents the state of a pipeline step"""
119
- status: str # 'success', 'failed', 'skipped'
120
- last_run: Optional[str] = None
121
- error: Optional[str] = None
115
+ # Per-model sync status for each step
116
+ superset_sync_status: Optional[str] = None # 'success', 'failed', or None (not attempted)
117
+ rag_sync_status: Optional[str] = None # 'success', 'failed', or None (not attempted)
122
118
 
123
119
 
124
120
  class SyncState(BaseModel):
@@ -126,8 +122,4 @@ class SyncState(BaseModel):
126
122
  version: str = "1.1"
127
123
  last_sync_timestamp: str
128
124
  manifest_path: str
129
- models: Dict[str, ModelState] = {}
130
- # Step states for tracking pipeline progress
131
- cube_sync: Optional[StepState] = None
132
- superset_sync: Optional[StepState] = None
133
- rag_sync: Optional[StepState] = None
125
+ models: Dict[str, ModelState] = {}
@@ -10,7 +10,7 @@ from datetime import datetime
10
10
  from pathlib import Path
11
11
  from typing import Dict, List, Optional, Set, Tuple
12
12
 
13
- from .models import ModelState, StepState, SyncState
13
+ from .models import ModelState, SyncState
14
14
 
15
15
 
16
16
  class StateManager:
@@ -179,15 +179,18 @@ class StateManager:
179
179
  node_data.get("config", {}).get("meta", {}).get("metrics")
180
180
  )
181
181
 
182
+ # For newly generated/modified models, reset sync status (they need to be re-synced)
182
183
  models[node_id] = ModelState(
183
184
  checksum=checksum,
184
185
  has_metrics=has_metrics,
185
186
  last_generated=timestamp,
186
187
  output_file=output_file,
188
+ superset_sync_status=None, # Reset - needs sync
189
+ rag_sync_status=None, # Reset - needs sync
187
190
  )
188
191
 
189
192
  return SyncState(
190
- version="1.0",
193
+ version="1.1",
191
194
  last_sync_timestamp=timestamp,
192
195
  manifest_path=str(manifest_path),
193
196
  models=models,
@@ -220,64 +223,80 @@ class StateManager:
220
223
 
221
224
  return files_to_delete
222
225
 
223
- def should_run_step(
226
+ def get_models_needing_sync(
224
227
  self,
225
- step_name: str,
226
- previous_state: Optional[SyncState],
227
- changes_detected: bool,
228
- ) -> bool:
228
+ state: SyncState,
229
+ step: str,
230
+ ) -> Set[str]:
229
231
  """
230
- Determine if a pipeline step should run.
232
+ Get node_ids of models that need to be synced for a step.
231
233
 
232
- A step should run if:
233
- - There are changes detected, OR
234
- - The previous run of this step failed
234
+ A model needs sync if:
235
+ - Its sync status is None (never synced)
236
+ - Its sync status is 'failed' (needs retry)
235
237
 
236
238
  Args:
237
- step_name: Name of the step ('cube_sync', 'superset_sync', 'rag_sync')
238
- previous_state: Previous sync state
239
- changes_detected: Whether model changes were detected
239
+ state: Current sync state
240
+ step: Step name ('superset' or 'rag')
240
241
 
241
242
  Returns:
242
- True if the step should run
243
+ Set of node_ids that need syncing
243
244
  """
244
- if changes_detected:
245
- return True
245
+ models_to_sync = set()
246
+ status_field = f"{step}_sync_status"
246
247
 
247
- if previous_state is None:
248
- return True
249
-
250
- step_state = getattr(previous_state, step_name, None)
251
- if step_state is None:
252
- return True
248
+ for node_id, model_state in state.models.items():
249
+ status = getattr(model_state, status_field, None)
250
+ if status is None or status == 'failed':
251
+ models_to_sync.add(node_id)
253
252
 
254
- # Re-run if previous attempt failed
255
- return step_state.status == 'failed'
253
+ return models_to_sync
256
254
 
257
- def update_step_state(
255
+ def update_model_sync_status(
258
256
  self,
259
257
  state: SyncState,
260
- step_name: str,
258
+ node_id: str,
259
+ step: str,
261
260
  status: str,
262
- error: Optional[str] = None,
263
- ) -> SyncState:
261
+ ) -> None:
262
+ """
263
+ Update the sync status of a model for a specific step.
264
+
265
+ Args:
266
+ state: Current sync state
267
+ node_id: The model's node_id
268
+ step: Step name ('superset' or 'rag')
269
+ status: Status to set ('success' or 'failed')
264
270
  """
265
- Update the state of a pipeline step.
271
+ if node_id in state.models:
272
+ status_field = f"{step}_sync_status"
273
+ setattr(state.models[node_id], status_field, status)
274
+
275
+ def get_sync_summary(
276
+ self,
277
+ state: SyncState,
278
+ step: str,
279
+ ) -> Dict[str, int]:
280
+ """
281
+ Get a summary of sync status for a step.
266
282
 
267
283
  Args:
268
284
  state: Current sync state
269
- step_name: Name of the step ('cube_sync', 'superset_sync', 'rag_sync')
270
- status: Step status ('success', 'failed', 'skipped')
271
- error: Error message if failed
285
+ step: Step name ('superset' or 'rag')
272
286
 
273
287
  Returns:
274
- Updated SyncState
288
+ Dict with counts: {'success': N, 'failed': N, 'pending': N}
275
289
  """
276
- timestamp = datetime.utcnow().isoformat() + "Z"
277
- step_state = StepState(
278
- status=status,
279
- last_run=timestamp,
280
- error=error,
281
- )
282
- setattr(state, step_name, step_state)
283
- return state
290
+ status_field = f"{step}_sync_status"
291
+ summary = {'success': 0, 'failed': 0, 'pending': 0}
292
+
293
+ for model_state in state.models.values():
294
+ status = getattr(model_state, status_field, None)
295
+ if status == 'success':
296
+ summary['success'] += 1
297
+ elif status == 'failed':
298
+ summary['failed'] += 1
299
+ else:
300
+ summary['pending'] += 1
301
+
302
+ return summary
@@ -1,6 +1,6 @@
1
1
  [tool.poetry]
2
2
  name = "dbt-cube-sync"
3
- version = "0.1.0a11"
3
+ version = "0.1.0a12"
4
4
  description = "Synchronization tool for dbt models to Cube.js schemas and BI tools"
5
5
  authors = ["Ponder"]
6
6
  readme = "README.md"