PyPI - dbt-cube-sync - Versions diffs - 0.1.0a10__py3-none-any.whl → 0.1.0a12__py3-none-any.whl - Mend

dbt-cube-sync 0.1.0a10py3-none-any.whl → 0.1.0a12py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.

This version of dbt-cube-sync might be problematic. Click here for more details.

Files changed (8) hide show

dbt_cube_sync/cli.py CHANGED Viewed

@@ -357,21 +357,22 @@ def sync_all(
         modified_models = set()
         removed_models = set()
-        # ============================================================
-        # STEP 1: Incremental dbt → Cube.js sync
-        # ============================================================
-        click.echo("\n[1/3] dbt → Cube.js schemas")
-        click.echo("-" * 40)
-        # Initialize state manager
+        # Initialize state manager and load previous state
         state_manager = StateManager(state_path)
         previous_state = None
+        current_state = None
         if not force_full_sync:
             previous_state = state_manager.load_state()
             if previous_state:
                 click.echo(f"  Loaded state from {state_path}")
+        # ============================================================
+        # STEP 1: Incremental dbt → Cube.js sync
+        # ============================================================
+        click.echo("\n[1/3] dbt → Cube.js schemas")
+        click.echo("-" * 40)
         # Parse manifest
         parser = DbtParser(
             manifest_path=manifest,
@@ -414,65 +415,130 @@ def sync_all(
         # Generate Cube.js files for changed models
         generated_files = {}
-        if node_ids_to_process:
-            parsed_models = parser.parse_models(node_ids_filter=node_ids_to_process)
-            if parsed_models:
-                generator = CubeGenerator('./cube/templates', output)
-                generated_files = generator.generate_cube_files(parsed_models)
-                click.echo(f"  Generated {len(generated_files)} Cube.js files")
-        # Save state
+        cube_sync_error = None
+        try:
+            if node_ids_to_process:
+                parsed_models = parser.parse_models(node_ids_filter=node_ids_to_process)
+                if parsed_models:
+                    generator = CubeGenerator('./cube/templates', output)
+                    generated_files = generator.generate_cube_files(parsed_models)
+                    click.echo(f"  Generated {len(generated_files)} Cube.js files")
+        except Exception as e:
+            cube_sync_error = str(e)
+            click.echo(f"  Error: {cube_sync_error}", err=True)
+        # Build/update state
         if changes_detected or force_full_sync:
             if previous_state and not force_full_sync:
-                new_state = state_manager.merge_state(
+                current_state = state_manager.merge_state(
                     previous_state, manifest, manifest_nodes, generated_files, removed_models
                 )
             else:
-                new_state = state_manager.create_state_from_results(
+                current_state = state_manager.create_state_from_results(
                     manifest, manifest_nodes, generated_files
                 )
-            state_manager.save_state(new_state)
-            click.echo(f"  State saved to {state_path}")
+        else:
+            # No changes - use previous state or create empty one
+            current_state = previous_state or state_manager.create_state_from_results(
+                manifest, manifest_nodes, {}
+            )
+        # Save cube sync state
+        state_manager.save_state(current_state)
+        click.echo(f"  State saved to {state_path}")
+        if cube_sync_error:
+            click.echo(f"  Error during cube generation: {cube_sync_error}", err=True)
+        # Build a mapping from model name (file stem) to node_id for status updates
+        model_name_to_node_id = {}
+        for node_id in current_state.models.keys():
+            # Extract model name from output file (e.g., "model/cubes/ModelName.js" -> "ModelName")
+            output_file = current_state.models[node_id].output_file
+            model_name = Path(output_file).stem
+            model_name_to_node_id[model_name] = node_id
         # ============================================================
         # STEP 2: Sync to Superset (if configured)
         # ============================================================
-        if superset_url and superset_username and superset_password:
-            click.echo("\n[2/3] Cube.js → Superset")
-            click.echo("-" * 40)
+        click.echo("\n[2/3] Cube.js → Superset")
+        click.echo("-" * 40)
-            if not changes_detected and not force_full_sync:
-                click.echo("  Skipped - no changes detected")
-            else:
-                connector_config = {
-                    'url': superset_url,
-                    'username': superset_username,
-                    'password': superset_password,
-                    'database_name': cube_connection_name
-                }
-                connector = ConnectorRegistry.get_connector('superset', **connector_config)
-                results = connector.sync_cube_schemas(output)
-                successful = sum(1 for r in results if r.status == 'success')
-                failed = sum(1 for r in results if r.status == 'failed')
-                click.echo(f"  Synced: {successful} successful, {failed} failed")
-        else:
-            click.echo("\n[2/3] Cube.js → Superset")
-            click.echo("-" * 40)
+        if not superset_url or not superset_username or not superset_password:
             click.echo("  Skipped - no Superset credentials provided")
+        else:
+            # Get models that need Superset sync (status is None or 'failed')
+            models_to_sync_ids = state_manager.get_models_needing_sync(current_state, 'superset')
+            if not models_to_sync_ids and not force_full_sync:
+                click.echo("  Skipped - all models already synced successfully")
+            else:
+                # Convert node_ids to model names for filtering
+                models_to_sync_names = set()
+                for node_id in models_to_sync_ids:
+                    if node_id in current_state.models:
+                        output_file = current_state.models[node_id].output_file
+                        model_name = Path(output_file).stem
+                        models_to_sync_names.add(model_name)
+                try:
+                    connector_config = {
+                        'url': superset_url,
+                        'username': superset_username,
+                        'password': superset_password,
+                        'database_name': cube_connection_name
+                    }
+                    connector = ConnectorRegistry.get_connector('superset', **connector_config)
+                    if force_full_sync:
+                        results = connector.sync_cube_schemas(output)
+                    else:
+                        results = connector.sync_cube_schemas(output, models_to_sync_names)
+                    # Update per-model status
+                    for r in results:
+                        model_name = r.file_or_dataset.replace('.js', '')
+                        node_id = model_name_to_node_id.get(model_name)
+                        if node_id:
+                            state_manager.update_model_sync_status(
+                                current_state, node_id, 'superset',
+                                'success' if r.status == 'success' else 'failed'
+                            )
+                    successful = sum(1 for r in results if r.status == 'success')
+                    failed = sum(1 for r in results if r.status == 'failed')
+                    click.echo(f"  Synced: {successful} successful, {failed} failed")
+                except Exception as e:
+                    click.echo(f"  Error: {str(e)}", err=True)
+                    # Mark all models we tried to sync as failed
+                    for node_id in models_to_sync_ids:
+                        state_manager.update_model_sync_status(
+                            current_state, node_id, 'superset', 'failed'
+                        )
+                state_manager.save_state(current_state)
         # ============================================================
         # STEP 3: Update RAG embeddings (if configured)
         # ============================================================
-        if rag_api_url:
-            click.echo("\n[3/3] Update RAG embeddings")
-            click.echo("-" * 40)
+        click.echo("\n[3/3] Update RAG embeddings")
+        click.echo("-" * 40)
-            if not changes_detected and not force_full_sync:
-                click.echo("  Skipped - no changes detected")
+        if not rag_api_url:
+            click.echo("  Skipped - no RAG API URL provided")
+        else:
+            # Get models that need RAG sync (status is None or 'failed')
+            models_to_embed_ids = state_manager.get_models_needing_sync(current_state, 'rag')
+            if not models_to_embed_ids and not force_full_sync:
+                click.echo("  Skipped - all models already synced successfully")
             else:
+                if force_full_sync:
+                    models_to_embed_ids = set(current_state.models.keys())
                 try:
                     # Call the RAG API to re-ingest embeddings
                     response = requests.post(
@@ -484,14 +550,27 @@ def sync_all(
                     if response.status_code == 200:
                         result = response.json()
                         click.echo(f"  Ingested {result.get('schemas_ingested', 0)} schema documents")
+                        # Mark all models as succeeded
+                        for node_id in models_to_embed_ids:
+                            state_manager.update_model_sync_status(
+                                current_state, node_id, 'rag', 'success'
+                            )
                     else:
-                        click.echo(f"  Warning: RAG API returned {response.status_code}", err=True)
+                        click.echo(f"  Error: RAG API returned {response.status_code}", err=True)
+                        # Mark all models as failed
+                        for node_id in models_to_embed_ids:
+                            state_manager.update_model_sync_status(
+                                current_state, node_id, 'rag', 'failed'
+                            )
                 except requests.RequestException as e:
-                    click.echo(f"  Warning: Could not reach RAG API: {e}", err=True)
-        else:
-            click.echo("\n[3/3] Update RAG embeddings")
-            click.echo("-" * 40)
-            click.echo("  Skipped - no RAG API URL provided")
+                    click.echo(f"  Error: Could not reach RAG API: {e}", err=True)
+                    # Mark all models as failed
+                    for node_id in models_to_embed_ids:
+                        state_manager.update_model_sync_status(
+                            current_state, node_id, 'rag', 'failed'
+                        )
+                state_manager.save_state(current_state)
         # ============================================================
         # Summary
@@ -500,12 +579,36 @@ def sync_all(
         click.echo("SYNC COMPLETE")
         click.echo("=" * 60)
+        # Get per-model sync summaries
+        superset_summary = state_manager.get_sync_summary(current_state, 'superset')
+        rag_summary = state_manager.get_sync_summary(current_state, 'rag')
+        def format_summary(summary, step_configured):
+            if not step_configured:
+                return "skipped (not configured)"
+            if summary['failed'] > 0:
+                return f"{summary['success']} success, {summary['failed']} failed (will retry)"
+            elif summary['pending'] > 0:
+                return f"{summary['success']} success, {summary['pending']} pending"
+            else:
+                return f"{summary['success']} success"
+        click.echo(f"  Cube.js files: {len(current_state.models)} models")
+        click.echo(f"  Superset sync: {format_summary(superset_summary, superset_url)}")
+        click.echo(f"  RAG sync:      {format_summary(rag_summary, rag_api_url)}")
         if changes_detected or force_full_sync:
             click.echo(f"  Models processed: {len(added_models) + len(modified_models)}")
             click.echo(f"  Models removed: {len(removed_models)}")
             click.echo(f"  Cube.js files generated: {len(generated_files)}")
         else:
-            click.echo("  No changes - everything is up to date")
+            click.echo("  No model changes detected")
+        # Exit with error if any models failed
+        any_failed = superset_summary['failed'] > 0 or rag_summary['failed'] > 0
+        if any_failed:
+            click.echo("\n  ⚠️  Some models failed - they will be retried on next run")
+            sys.exit(1)
     except Exception as e:
         click.echo(f"Error: {str(e)}", err=True)

dbt_cube_sync/connectors/superset.py CHANGED Viewed

@@ -123,20 +123,30 @@ class SupersetConnector(BaseConnector):
         self.database_id = result[0]['id']
         print(f"✓ Found database '{database_name}' with ID: {self.database_id}")
-    def sync_cube_schemas(self, cube_dir: str) -> List[SyncResult]:
-        """Sync all Cube.js schemas from directory to Superset"""
+    def sync_cube_schemas(self, cube_dir: str, models_filter: set = None) -> List[SyncResult]:
+        """Sync Cube.js schemas from directory to Superset
+        Args:
+            cube_dir: Directory containing Cube.js schema files
+            models_filter: Optional set of model names to sync. If None, sync all.
+        """
         results = []
         cube_files = self._get_cube_files(cube_dir)
         if not cube_files:
             return [SyncResult(
                 file_or_dataset="No files",
-                status="failed",
+                status="failed",
                 message=f"No .js files found in {cube_dir}"
             )]
-        print(f"🔍 Found {len(cube_files)} Cube.js files")
+        # Filter files if models_filter is provided
+        if models_filter:
+            cube_files = [f for f in cube_files if f.stem in models_filter]
+            print(f"🔍 Syncing {len(cube_files)} Cube.js files (filtered from {len(self._get_cube_files(cube_dir))})")
+        else:
+            print(f"🔍 Found {len(cube_files)} Cube.js files")
         for cube_file in cube_files:
             try:
                 print(f"\\n{'='*60}")

dbt_cube_sync/core/models.py CHANGED Viewed

@@ -112,11 +112,14 @@ class ModelState(BaseModel):
     has_metrics: bool
     last_generated: str
     output_file: str
+    # Per-model sync status for each step
+    superset_sync_status: Optional[str] = None  # 'success', 'failed', or None (not attempted)
+    rag_sync_status: Optional[str] = None  # 'success', 'failed', or None (not attempted)
 class SyncState(BaseModel):
     """Represents the overall state for incremental sync"""
-    version: str = "1.0"
+    version: str = "1.1"
     last_sync_timestamp: str
     manifest_path: str
     models: Dict[str, ModelState] = {}

dbt_cube_sync/core/state_manager.py CHANGED Viewed

@@ -179,15 +179,18 @@ class StateManager:
                 node_data.get("config", {}).get("meta", {}).get("metrics")
             )
+            # For newly generated/modified models, reset sync status (they need to be re-synced)
             models[node_id] = ModelState(
                 checksum=checksum,
                 has_metrics=has_metrics,
                 last_generated=timestamp,
                 output_file=output_file,
+                superset_sync_status=None,  # Reset - needs sync
+                rag_sync_status=None,  # Reset - needs sync
             )
         return SyncState(
-            version="1.0",
+            version="1.1",
             last_sync_timestamp=timestamp,
             manifest_path=str(manifest_path),
             models=models,
@@ -219,3 +222,81 @@ class StateManager:
                     files_to_delete.append(output_file)
         return files_to_delete
+    def get_models_needing_sync(
+        self,
+        state: SyncState,
+        step: str,
+    ) -> Set[str]:
+        """
+        Get node_ids of models that need to be synced for a step.
+        A model needs sync if:
+        - Its sync status is None (never synced)
+        - Its sync status is 'failed' (needs retry)
+        Args:
+            state: Current sync state
+            step: Step name ('superset' or 'rag')
+        Returns:
+            Set of node_ids that need syncing
+        """
+        models_to_sync = set()
+        status_field = f"{step}_sync_status"
+        for node_id, model_state in state.models.items():
+            status = getattr(model_state, status_field, None)
+            if status is None or status == 'failed':
+                models_to_sync.add(node_id)
+        return models_to_sync
+    def update_model_sync_status(
+        self,
+        state: SyncState,
+        node_id: str,
+        step: str,
+        status: str,
+    ) -> None:
+        """
+        Update the sync status of a model for a specific step.
+        Args:
+            state: Current sync state
+            node_id: The model's node_id
+            step: Step name ('superset' or 'rag')
+            status: Status to set ('success' or 'failed')
+        """
+        if node_id in state.models:
+            status_field = f"{step}_sync_status"
+            setattr(state.models[node_id], status_field, status)
+    def get_sync_summary(
+        self,
+        state: SyncState,
+        step: str,
+    ) -> Dict[str, int]:
+        """
+        Get a summary of sync status for a step.
+        Args:
+            state: Current sync state
+            step: Step name ('superset' or 'rag')
+        Returns:
+            Dict with counts: {'success': N, 'failed': N, 'pending': N}
+        """
+        status_field = f"{step}_sync_status"
+        summary = {'success': 0, 'failed': 0, 'pending': 0}
+        for model_state in state.models.values():
+            status = getattr(model_state, status_field, None)
+            if status == 'success':
+                summary['success'] += 1
+            elif status == 'failed':
+                summary['failed'] += 1
+            else:
+                summary['pending'] += 1
+        return summary

{dbt_cube_sync-0.1.0a10.dist-info → dbt_cube_sync-0.1.0a12.dist-info}/METADATA RENAMED Viewed

@@ -1,6 +1,6 @@
 Metadata-Version: 2.4
 Name: dbt-cube-sync
-Version: 0.1.0a10
+Version: 0.1.0a12
 Summary: Synchronization tool for dbt models to Cube.js schemas and BI tools
 Author: Ponder
 Requires-Python: >=3.9,<4.0

{dbt_cube_sync-0.1.0a10.dist-info → dbt_cube_sync-0.1.0a12.dist-info}/RECORD RENAMED Viewed

@@ -1,18 +1,18 @@
 dbt_cube_sync/__init__.py,sha256=aifkfgUDRPL5v0LZzceH2LXu66YDkJjdpvKwXsdikbI,113
-dbt_cube_sync/cli.py,sha256=AxSVF3hJJqovk51mjA8Nyyte5NkfukSF3sAjk_VYJ6Y,20992
+dbt_cube_sync/cli.py,sha256=hitf_aAyjBfL4-t2eXTubavQpMMHuIpzIoOZcqzoGx0,26060
 dbt_cube_sync/config.py,sha256=qhGE7CxTmh0RhPizgd3x3Yj-3L2LoC00UQIDT0q9FlQ,3858
 dbt_cube_sync/connectors/__init__.py,sha256=NG6tYZ3CYD5bG_MfNLZrUM8YoBEKArG8-AOmJ8pwvQI,52
 dbt_cube_sync/connectors/base.py,sha256=JLzerxJdt34z0kWuyieL6UQhf5_dUYPGmwkiRWBuSPY,2802
 dbt_cube_sync/connectors/powerbi.py,sha256=2Y8fTfh_6Q_Myma1ymipPh1U3HsfQKcktVequXXnIXI,1275
-dbt_cube_sync/connectors/superset.py,sha256=D_pWTN0F84mUosnsm-NG_v9IlVE2dviIDh08WxHEOIA,21709
+dbt_cube_sync/connectors/superset.py,sha256=jxyPTbw4d9HXrZD4DwD36erkw4Zl9XRFRrvlH2xZT5I,22171
 dbt_cube_sync/connectors/tableau.py,sha256=jKve1zErzTbgPOtmPB92ZwZl4I6uEySedM51JiwlGrE,1261
 dbt_cube_sync/core/__init__.py,sha256=kgsawtU5dqEvnHz6dU8qwJbH3rtIV7QlK2MhtYVDCaY,46
 dbt_cube_sync/core/cube_generator.py,sha256=DtmaA_dtWmBVJnSWHVoQi-3KEsRc0axHZpCUEcKeYAk,11061
 dbt_cube_sync/core/db_inspector.py,sha256=V_cd12FBXj-1gB2JZeYmkQluUO-UYufy_tvfYoJXCGI,5073
 dbt_cube_sync/core/dbt_parser.py,sha256=KbhDoB0ULP6JDUPZPDVbm9yCtRKrW17ptGoJvVLtueY,12763
-dbt_cube_sync/core/models.py,sha256=2s5iZ9MEBGfSzkB4HJB5vG0mZqNXNJSfAD3Byw1IVe4,3203
-dbt_cube_sync/core/state_manager.py,sha256=7uXJtlZBIWj6s6XgAhNlP6UHdfhH0y461iyQlfidqGI,7233
-dbt_cube_sync-0.1.0a10.dist-info/METADATA,sha256=Foy8KI7-ILdZwTvejfjukArtnGNJJg85vwIlQoMS31w,10681
-dbt_cube_sync-0.1.0a10.dist-info/WHEEL,sha256=3ny-bZhpXrU6vSQ1UPG34FoxZBp3lVcvK0LkgUz6VLk,88
-dbt_cube_sync-0.1.0a10.dist-info/entry_points.txt,sha256=iEAB_nZ1AoSeFwSHPY2tr02xmTHLVFKp5CJeFh0AfCw,56
-dbt_cube_sync-0.1.0a10.dist-info/RECORD,,
+dbt_cube_sync/core/models.py,sha256=poUZMOpVUyFCb7i5jzMMLnI49NizV5SmD2RkJ2oQI3I,3430
+dbt_cube_sync/core/state_manager.py,sha256=Fs16EdUDGpL4QHJfbXrOZEilLyccRjA6jw5_dIykgxw,9702
+dbt_cube_sync-0.1.0a12.dist-info/METADATA,sha256=7PpodbMF_avVumilIy8AKyWhQkesmsRyRLX7qG-GuWg,10681
+dbt_cube_sync-0.1.0a12.dist-info/WHEEL,sha256=3ny-bZhpXrU6vSQ1UPG34FoxZBp3lVcvK0LkgUz6VLk,88
+dbt_cube_sync-0.1.0a12.dist-info/entry_points.txt,sha256=iEAB_nZ1AoSeFwSHPY2tr02xmTHLVFKp5CJeFh0AfCw,56
+dbt_cube_sync-0.1.0a12.dist-info/RECORD,,

{dbt_cube_sync-0.1.0a10.dist-info → dbt_cube_sync-0.1.0a12.dist-info}/WHEEL RENAMED Viewed

File without changes

{dbt_cube_sync-0.1.0a10.dist-info → dbt_cube_sync-0.1.0a12.dist-info}/entry_points.txt RENAMED Viewed

File without changes

dbt-cube-sync 0.1.0a10__py3-none-any.whl → 0.1.0a12__py3-none-any.whl

Potentially problematic release.

dbt-cube-sync 0.1.0a10py3-none-any.whl → 0.1.0a12py3-none-any.whl