dbt-cube-sync 0.1.0a10__py3-none-any.whl → 0.1.0a12__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of dbt-cube-sync might be problematic. Click here for more details.

dbt_cube_sync/cli.py CHANGED
@@ -357,21 +357,22 @@ def sync_all(
357
357
  modified_models = set()
358
358
  removed_models = set()
359
359
 
360
- # ============================================================
361
- # STEP 1: Incremental dbt → Cube.js sync
362
- # ============================================================
363
- click.echo("\n[1/3] dbt → Cube.js schemas")
364
- click.echo("-" * 40)
365
-
366
- # Initialize state manager
360
+ # Initialize state manager and load previous state
367
361
  state_manager = StateManager(state_path)
368
362
  previous_state = None
363
+ current_state = None
369
364
 
370
365
  if not force_full_sync:
371
366
  previous_state = state_manager.load_state()
372
367
  if previous_state:
373
368
  click.echo(f" Loaded state from {state_path}")
374
369
 
370
+ # ============================================================
371
+ # STEP 1: Incremental dbt → Cube.js sync
372
+ # ============================================================
373
+ click.echo("\n[1/3] dbt → Cube.js schemas")
374
+ click.echo("-" * 40)
375
+
375
376
  # Parse manifest
376
377
  parser = DbtParser(
377
378
  manifest_path=manifest,
@@ -414,65 +415,130 @@ def sync_all(
414
415
 
415
416
  # Generate Cube.js files for changed models
416
417
  generated_files = {}
417
- if node_ids_to_process:
418
- parsed_models = parser.parse_models(node_ids_filter=node_ids_to_process)
419
-
420
- if parsed_models:
421
- generator = CubeGenerator('./cube/templates', output)
422
- generated_files = generator.generate_cube_files(parsed_models)
423
- click.echo(f" Generated {len(generated_files)} Cube.js files")
424
-
425
- # Save state
418
+ cube_sync_error = None
419
+ try:
420
+ if node_ids_to_process:
421
+ parsed_models = parser.parse_models(node_ids_filter=node_ids_to_process)
422
+
423
+ if parsed_models:
424
+ generator = CubeGenerator('./cube/templates', output)
425
+ generated_files = generator.generate_cube_files(parsed_models)
426
+ click.echo(f" Generated {len(generated_files)} Cube.js files")
427
+ except Exception as e:
428
+ cube_sync_error = str(e)
429
+ click.echo(f" Error: {cube_sync_error}", err=True)
430
+
431
+ # Build/update state
426
432
  if changes_detected or force_full_sync:
427
433
  if previous_state and not force_full_sync:
428
- new_state = state_manager.merge_state(
434
+ current_state = state_manager.merge_state(
429
435
  previous_state, manifest, manifest_nodes, generated_files, removed_models
430
436
  )
431
437
  else:
432
- new_state = state_manager.create_state_from_results(
438
+ current_state = state_manager.create_state_from_results(
433
439
  manifest, manifest_nodes, generated_files
434
440
  )
435
- state_manager.save_state(new_state)
436
- click.echo(f" State saved to {state_path}")
441
+ else:
442
+ # No changes - use previous state or create empty one
443
+ current_state = previous_state or state_manager.create_state_from_results(
444
+ manifest, manifest_nodes, {}
445
+ )
446
+
447
+ # Save cube sync state
448
+ state_manager.save_state(current_state)
449
+ click.echo(f" State saved to {state_path}")
450
+
451
+ if cube_sync_error:
452
+ click.echo(f" Error during cube generation: {cube_sync_error}", err=True)
453
+
454
+ # Build a mapping from model name (file stem) to node_id for status updates
455
+ model_name_to_node_id = {}
456
+ for node_id in current_state.models.keys():
457
+ # Extract model name from output file (e.g., "model/cubes/ModelName.js" -> "ModelName")
458
+ output_file = current_state.models[node_id].output_file
459
+ model_name = Path(output_file).stem
460
+ model_name_to_node_id[model_name] = node_id
437
461
 
438
462
  # ============================================================
439
463
  # STEP 2: Sync to Superset (if configured)
440
464
  # ============================================================
441
- if superset_url and superset_username and superset_password:
442
- click.echo("\n[2/3] Cube.js → Superset")
443
- click.echo("-" * 40)
465
+ click.echo("\n[2/3] Cube.js Superset")
466
+ click.echo("-" * 40)
444
467
 
445
- if not changes_detected and not force_full_sync:
446
- click.echo(" Skipped - no changes detected")
447
- else:
448
- connector_config = {
449
- 'url': superset_url,
450
- 'username': superset_username,
451
- 'password': superset_password,
452
- 'database_name': cube_connection_name
453
- }
454
-
455
- connector = ConnectorRegistry.get_connector('superset', **connector_config)
456
- results = connector.sync_cube_schemas(output)
457
-
458
- successful = sum(1 for r in results if r.status == 'success')
459
- failed = sum(1 for r in results if r.status == 'failed')
460
- click.echo(f" Synced: {successful} successful, {failed} failed")
461
- else:
462
- click.echo("\n[2/3] Cube.js → Superset")
463
- click.echo("-" * 40)
468
+ if not superset_url or not superset_username or not superset_password:
464
469
  click.echo(" Skipped - no Superset credentials provided")
470
+ else:
471
+ # Get models that need Superset sync (status is None or 'failed')
472
+ models_to_sync_ids = state_manager.get_models_needing_sync(current_state, 'superset')
473
+
474
+ if not models_to_sync_ids and not force_full_sync:
475
+ click.echo(" Skipped - all models already synced successfully")
476
+ else:
477
+ # Convert node_ids to model names for filtering
478
+ models_to_sync_names = set()
479
+ for node_id in models_to_sync_ids:
480
+ if node_id in current_state.models:
481
+ output_file = current_state.models[node_id].output_file
482
+ model_name = Path(output_file).stem
483
+ models_to_sync_names.add(model_name)
484
+
485
+ try:
486
+ connector_config = {
487
+ 'url': superset_url,
488
+ 'username': superset_username,
489
+ 'password': superset_password,
490
+ 'database_name': cube_connection_name
491
+ }
492
+
493
+ connector = ConnectorRegistry.get_connector('superset', **connector_config)
494
+
495
+ if force_full_sync:
496
+ results = connector.sync_cube_schemas(output)
497
+ else:
498
+ results = connector.sync_cube_schemas(output, models_to_sync_names)
499
+
500
+ # Update per-model status
501
+ for r in results:
502
+ model_name = r.file_or_dataset.replace('.js', '')
503
+ node_id = model_name_to_node_id.get(model_name)
504
+ if node_id:
505
+ state_manager.update_model_sync_status(
506
+ current_state, node_id, 'superset',
507
+ 'success' if r.status == 'success' else 'failed'
508
+ )
509
+
510
+ successful = sum(1 for r in results if r.status == 'success')
511
+ failed = sum(1 for r in results if r.status == 'failed')
512
+ click.echo(f" Synced: {successful} successful, {failed} failed")
513
+
514
+ except Exception as e:
515
+ click.echo(f" Error: {str(e)}", err=True)
516
+ # Mark all models we tried to sync as failed
517
+ for node_id in models_to_sync_ids:
518
+ state_manager.update_model_sync_status(
519
+ current_state, node_id, 'superset', 'failed'
520
+ )
521
+
522
+ state_manager.save_state(current_state)
465
523
 
466
524
  # ============================================================
467
525
  # STEP 3: Update RAG embeddings (if configured)
468
526
  # ============================================================
469
- if rag_api_url:
470
- click.echo("\n[3/3] Update RAG embeddings")
471
- click.echo("-" * 40)
527
+ click.echo("\n[3/3] Update RAG embeddings")
528
+ click.echo("-" * 40)
472
529
 
473
- if not changes_detected and not force_full_sync:
474
- click.echo(" Skipped - no changes detected")
530
+ if not rag_api_url:
531
+ click.echo(" Skipped - no RAG API URL provided")
532
+ else:
533
+ # Get models that need RAG sync (status is None or 'failed')
534
+ models_to_embed_ids = state_manager.get_models_needing_sync(current_state, 'rag')
535
+
536
+ if not models_to_embed_ids and not force_full_sync:
537
+ click.echo(" Skipped - all models already synced successfully")
475
538
  else:
539
+ if force_full_sync:
540
+ models_to_embed_ids = set(current_state.models.keys())
541
+
476
542
  try:
477
543
  # Call the RAG API to re-ingest embeddings
478
544
  response = requests.post(
@@ -484,14 +550,27 @@ def sync_all(
484
550
  if response.status_code == 200:
485
551
  result = response.json()
486
552
  click.echo(f" Ingested {result.get('schemas_ingested', 0)} schema documents")
553
+ # Mark all models as succeeded
554
+ for node_id in models_to_embed_ids:
555
+ state_manager.update_model_sync_status(
556
+ current_state, node_id, 'rag', 'success'
557
+ )
487
558
  else:
488
- click.echo(f" Warning: RAG API returned {response.status_code}", err=True)
559
+ click.echo(f" Error: RAG API returned {response.status_code}", err=True)
560
+ # Mark all models as failed
561
+ for node_id in models_to_embed_ids:
562
+ state_manager.update_model_sync_status(
563
+ current_state, node_id, 'rag', 'failed'
564
+ )
489
565
  except requests.RequestException as e:
490
- click.echo(f" Warning: Could not reach RAG API: {e}", err=True)
491
- else:
492
- click.echo("\n[3/3] Update RAG embeddings")
493
- click.echo("-" * 40)
494
- click.echo(" Skipped - no RAG API URL provided")
566
+ click.echo(f" Error: Could not reach RAG API: {e}", err=True)
567
+ # Mark all models as failed
568
+ for node_id in models_to_embed_ids:
569
+ state_manager.update_model_sync_status(
570
+ current_state, node_id, 'rag', 'failed'
571
+ )
572
+
573
+ state_manager.save_state(current_state)
495
574
 
496
575
  # ============================================================
497
576
  # Summary
@@ -500,12 +579,36 @@ def sync_all(
500
579
  click.echo("SYNC COMPLETE")
501
580
  click.echo("=" * 60)
502
581
 
582
+ # Get per-model sync summaries
583
+ superset_summary = state_manager.get_sync_summary(current_state, 'superset')
584
+ rag_summary = state_manager.get_sync_summary(current_state, 'rag')
585
+
586
+ def format_summary(summary, step_configured):
587
+ if not step_configured:
588
+ return "skipped (not configured)"
589
+ if summary['failed'] > 0:
590
+ return f"{summary['success']} success, {summary['failed']} failed (will retry)"
591
+ elif summary['pending'] > 0:
592
+ return f"{summary['success']} success, {summary['pending']} pending"
593
+ else:
594
+ return f"{summary['success']} success"
595
+
596
+ click.echo(f" Cube.js files: {len(current_state.models)} models")
597
+ click.echo(f" Superset sync: {format_summary(superset_summary, superset_url)}")
598
+ click.echo(f" RAG sync: {format_summary(rag_summary, rag_api_url)}")
599
+
503
600
  if changes_detected or force_full_sync:
504
601
  click.echo(f" Models processed: {len(added_models) + len(modified_models)}")
505
602
  click.echo(f" Models removed: {len(removed_models)}")
506
603
  click.echo(f" Cube.js files generated: {len(generated_files)}")
507
604
  else:
508
- click.echo(" No changes - everything is up to date")
605
+ click.echo(" No model changes detected")
606
+
607
+ # Exit with error if any models failed
608
+ any_failed = superset_summary['failed'] > 0 or rag_summary['failed'] > 0
609
+ if any_failed:
610
+ click.echo("\n ⚠️ Some models failed - they will be retried on next run")
611
+ sys.exit(1)
509
612
 
510
613
  except Exception as e:
511
614
  click.echo(f"Error: {str(e)}", err=True)
@@ -123,20 +123,30 @@ class SupersetConnector(BaseConnector):
123
123
  self.database_id = result[0]['id']
124
124
  print(f"✓ Found database '{database_name}' with ID: {self.database_id}")
125
125
 
126
- def sync_cube_schemas(self, cube_dir: str) -> List[SyncResult]:
127
- """Sync all Cube.js schemas from directory to Superset"""
126
+ def sync_cube_schemas(self, cube_dir: str, models_filter: set = None) -> List[SyncResult]:
127
+ """Sync Cube.js schemas from directory to Superset
128
+
129
+ Args:
130
+ cube_dir: Directory containing Cube.js schema files
131
+ models_filter: Optional set of model names to sync. If None, sync all.
132
+ """
128
133
  results = []
129
134
  cube_files = self._get_cube_files(cube_dir)
130
-
135
+
131
136
  if not cube_files:
132
137
  return [SyncResult(
133
138
  file_or_dataset="No files",
134
- status="failed",
139
+ status="failed",
135
140
  message=f"No .js files found in {cube_dir}"
136
141
  )]
137
-
138
- print(f"🔍 Found {len(cube_files)} Cube.js files")
139
-
142
+
143
+ # Filter files if models_filter is provided
144
+ if models_filter:
145
+ cube_files = [f for f in cube_files if f.stem in models_filter]
146
+ print(f"🔍 Syncing {len(cube_files)} Cube.js files (filtered from {len(self._get_cube_files(cube_dir))})")
147
+ else:
148
+ print(f"🔍 Found {len(cube_files)} Cube.js files")
149
+
140
150
  for cube_file in cube_files:
141
151
  try:
142
152
  print(f"\\n{'='*60}")
@@ -112,11 +112,14 @@ class ModelState(BaseModel):
112
112
  has_metrics: bool
113
113
  last_generated: str
114
114
  output_file: str
115
+ # Per-model sync status for each step
116
+ superset_sync_status: Optional[str] = None # 'success', 'failed', or None (not attempted)
117
+ rag_sync_status: Optional[str] = None # 'success', 'failed', or None (not attempted)
115
118
 
116
119
 
117
120
  class SyncState(BaseModel):
118
121
  """Represents the overall state for incremental sync"""
119
- version: str = "1.0"
122
+ version: str = "1.1"
120
123
  last_sync_timestamp: str
121
124
  manifest_path: str
122
125
  models: Dict[str, ModelState] = {}
@@ -179,15 +179,18 @@ class StateManager:
179
179
  node_data.get("config", {}).get("meta", {}).get("metrics")
180
180
  )
181
181
 
182
+ # For newly generated/modified models, reset sync status (they need to be re-synced)
182
183
  models[node_id] = ModelState(
183
184
  checksum=checksum,
184
185
  has_metrics=has_metrics,
185
186
  last_generated=timestamp,
186
187
  output_file=output_file,
188
+ superset_sync_status=None, # Reset - needs sync
189
+ rag_sync_status=None, # Reset - needs sync
187
190
  )
188
191
 
189
192
  return SyncState(
190
- version="1.0",
193
+ version="1.1",
191
194
  last_sync_timestamp=timestamp,
192
195
  manifest_path=str(manifest_path),
193
196
  models=models,
@@ -219,3 +222,81 @@ class StateManager:
219
222
  files_to_delete.append(output_file)
220
223
 
221
224
  return files_to_delete
225
+
226
+ def get_models_needing_sync(
227
+ self,
228
+ state: SyncState,
229
+ step: str,
230
+ ) -> Set[str]:
231
+ """
232
+ Get node_ids of models that need to be synced for a step.
233
+
234
+ A model needs sync if:
235
+ - Its sync status is None (never synced)
236
+ - Its sync status is 'failed' (needs retry)
237
+
238
+ Args:
239
+ state: Current sync state
240
+ step: Step name ('superset' or 'rag')
241
+
242
+ Returns:
243
+ Set of node_ids that need syncing
244
+ """
245
+ models_to_sync = set()
246
+ status_field = f"{step}_sync_status"
247
+
248
+ for node_id, model_state in state.models.items():
249
+ status = getattr(model_state, status_field, None)
250
+ if status is None or status == 'failed':
251
+ models_to_sync.add(node_id)
252
+
253
+ return models_to_sync
254
+
255
+ def update_model_sync_status(
256
+ self,
257
+ state: SyncState,
258
+ node_id: str,
259
+ step: str,
260
+ status: str,
261
+ ) -> None:
262
+ """
263
+ Update the sync status of a model for a specific step.
264
+
265
+ Args:
266
+ state: Current sync state
267
+ node_id: The model's node_id
268
+ step: Step name ('superset' or 'rag')
269
+ status: Status to set ('success' or 'failed')
270
+ """
271
+ if node_id in state.models:
272
+ status_field = f"{step}_sync_status"
273
+ setattr(state.models[node_id], status_field, status)
274
+
275
+ def get_sync_summary(
276
+ self,
277
+ state: SyncState,
278
+ step: str,
279
+ ) -> Dict[str, int]:
280
+ """
281
+ Get a summary of sync status for a step.
282
+
283
+ Args:
284
+ state: Current sync state
285
+ step: Step name ('superset' or 'rag')
286
+
287
+ Returns:
288
+ Dict with counts: {'success': N, 'failed': N, 'pending': N}
289
+ """
290
+ status_field = f"{step}_sync_status"
291
+ summary = {'success': 0, 'failed': 0, 'pending': 0}
292
+
293
+ for model_state in state.models.values():
294
+ status = getattr(model_state, status_field, None)
295
+ if status == 'success':
296
+ summary['success'] += 1
297
+ elif status == 'failed':
298
+ summary['failed'] += 1
299
+ else:
300
+ summary['pending'] += 1
301
+
302
+ return summary
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: dbt-cube-sync
3
- Version: 0.1.0a10
3
+ Version: 0.1.0a12
4
4
  Summary: Synchronization tool for dbt models to Cube.js schemas and BI tools
5
5
  Author: Ponder
6
6
  Requires-Python: >=3.9,<4.0
@@ -1,18 +1,18 @@
1
1
  dbt_cube_sync/__init__.py,sha256=aifkfgUDRPL5v0LZzceH2LXu66YDkJjdpvKwXsdikbI,113
2
- dbt_cube_sync/cli.py,sha256=AxSVF3hJJqovk51mjA8Nyyte5NkfukSF3sAjk_VYJ6Y,20992
2
+ dbt_cube_sync/cli.py,sha256=hitf_aAyjBfL4-t2eXTubavQpMMHuIpzIoOZcqzoGx0,26060
3
3
  dbt_cube_sync/config.py,sha256=qhGE7CxTmh0RhPizgd3x3Yj-3L2LoC00UQIDT0q9FlQ,3858
4
4
  dbt_cube_sync/connectors/__init__.py,sha256=NG6tYZ3CYD5bG_MfNLZrUM8YoBEKArG8-AOmJ8pwvQI,52
5
5
  dbt_cube_sync/connectors/base.py,sha256=JLzerxJdt34z0kWuyieL6UQhf5_dUYPGmwkiRWBuSPY,2802
6
6
  dbt_cube_sync/connectors/powerbi.py,sha256=2Y8fTfh_6Q_Myma1ymipPh1U3HsfQKcktVequXXnIXI,1275
7
- dbt_cube_sync/connectors/superset.py,sha256=D_pWTN0F84mUosnsm-NG_v9IlVE2dviIDh08WxHEOIA,21709
7
+ dbt_cube_sync/connectors/superset.py,sha256=jxyPTbw4d9HXrZD4DwD36erkw4Zl9XRFRrvlH2xZT5I,22171
8
8
  dbt_cube_sync/connectors/tableau.py,sha256=jKve1zErzTbgPOtmPB92ZwZl4I6uEySedM51JiwlGrE,1261
9
9
  dbt_cube_sync/core/__init__.py,sha256=kgsawtU5dqEvnHz6dU8qwJbH3rtIV7QlK2MhtYVDCaY,46
10
10
  dbt_cube_sync/core/cube_generator.py,sha256=DtmaA_dtWmBVJnSWHVoQi-3KEsRc0axHZpCUEcKeYAk,11061
11
11
  dbt_cube_sync/core/db_inspector.py,sha256=V_cd12FBXj-1gB2JZeYmkQluUO-UYufy_tvfYoJXCGI,5073
12
12
  dbt_cube_sync/core/dbt_parser.py,sha256=KbhDoB0ULP6JDUPZPDVbm9yCtRKrW17ptGoJvVLtueY,12763
13
- dbt_cube_sync/core/models.py,sha256=2s5iZ9MEBGfSzkB4HJB5vG0mZqNXNJSfAD3Byw1IVe4,3203
14
- dbt_cube_sync/core/state_manager.py,sha256=7uXJtlZBIWj6s6XgAhNlP6UHdfhH0y461iyQlfidqGI,7233
15
- dbt_cube_sync-0.1.0a10.dist-info/METADATA,sha256=Foy8KI7-ILdZwTvejfjukArtnGNJJg85vwIlQoMS31w,10681
16
- dbt_cube_sync-0.1.0a10.dist-info/WHEEL,sha256=3ny-bZhpXrU6vSQ1UPG34FoxZBp3lVcvK0LkgUz6VLk,88
17
- dbt_cube_sync-0.1.0a10.dist-info/entry_points.txt,sha256=iEAB_nZ1AoSeFwSHPY2tr02xmTHLVFKp5CJeFh0AfCw,56
18
- dbt_cube_sync-0.1.0a10.dist-info/RECORD,,
13
+ dbt_cube_sync/core/models.py,sha256=poUZMOpVUyFCb7i5jzMMLnI49NizV5SmD2RkJ2oQI3I,3430
14
+ dbt_cube_sync/core/state_manager.py,sha256=Fs16EdUDGpL4QHJfbXrOZEilLyccRjA6jw5_dIykgxw,9702
15
+ dbt_cube_sync-0.1.0a12.dist-info/METADATA,sha256=7PpodbMF_avVumilIy8AKyWhQkesmsRyRLX7qG-GuWg,10681
16
+ dbt_cube_sync-0.1.0a12.dist-info/WHEEL,sha256=3ny-bZhpXrU6vSQ1UPG34FoxZBp3lVcvK0LkgUz6VLk,88
17
+ dbt_cube_sync-0.1.0a12.dist-info/entry_points.txt,sha256=iEAB_nZ1AoSeFwSHPY2tr02xmTHLVFKp5CJeFh0AfCw,56
18
+ dbt_cube_sync-0.1.0a12.dist-info/RECORD,,