PyPI - churnkit - Versions diffs - 0.76.0a3__py3-none-any.whl → 0.76.1a2__py3-none-any.whl - Mend

churnkit 0.76.0a3py3-none-any.whl → 0.76.1a2py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (31) hide show

{churnkit-0.76.0a3.data → churnkit-0.76.1a2.data}/data/share/churnkit/exploration_notebooks/05_multi_dataset.ipynb RENAMED Viewed

@@ -95,36 +95,30 @@
    "outputs": [],
    "source": [
     "from customer_retention.analysis.notebook_progress import track_and_export_previous\n",
+    "\n",
     "track_and_export_previous(\"05_multi_dataset.ipynb\")\n",
     "\n",
+    "import pandas as pd\n",
+    "import plotly.graph_objects as go\n",
+    "import yaml\n",
+    "from plotly.subplots import make_subplots\n",
+    "\n",
     "from customer_retention.analysis.auto_explorer import (\n",
     "    ExplorationManager,\n",
-    "    MultiDatasetFindings,\n",
-    "    ExplorationFindings,\n",
     "    RecommendationRegistry,\n",
     ")\n",
+    "from customer_retention.analysis.visualization import display_figure\n",
+    "from customer_retention.core.config.column_config import ColumnType, DatasetGranularity\n",
+    "from customer_retention.core.config.experiments import FINDINGS_DIR\n",
     "from customer_retention.stages.profiling import (\n",
+    "    DimensionReductionMethod,\n",
+    "    FeatureCapacityAnalyzer,\n",
+    "    FeatureGroup,\n",
+    "    ReferenceMode,\n",
     "    RelationshipDetector,\n",
-    "    TimeWindowAggregator,\n",
-    "    RelationshipType,\n",
     "    SegmentAnalyzer,\n",
-    "    SegmentationMethod,\n",
-    "    FeatureCapacityAnalyzer,\n",
-    "    TemporalFeatureEngineer,\n",
     "    TemporalAggregationConfig,\n",
-    "    ReferenceMode,\n",
-    "    FeatureGroup,\n",
-    "    DimensionReductionMethod,\n",
-    ")\n",
-    "from customer_retention.analysis.visualization import ChartBuilder, display_figure, display_table\n",
-    "from customer_retention.core.config.column_config import DatasetGranularity, ColumnType\n",
-    "from pathlib import Path\n",
-    "import yaml\n",
-    "import pandas as pd\n",
-    "import numpy as np\n",
-    "import plotly.graph_objects as go\n",
-    "from plotly.subplots import make_subplots\n",
-    "from customer_retention.core.config.experiments import FINDINGS_DIR, EXPERIMENTS_DIR, OUTPUT_DIR, setup_experiments_structure\n"
+    ")\n"
    ]
   },
   {
@@ -211,7 +205,7 @@
     "for ds in datasets:\n",
     "    granularity_emoji = \"\\U0001f4ca\" if ds.granularity == DatasetGranularity.ENTITY_LEVEL else \"\\U0001f4c8\"\n",
     "    target_info = f\" [TARGET: {ds.target_column}]\" if ds.target_column else \"\"\n",
-    "    \n",
+    "\n",
     "    print(f\"{granularity_emoji} {ds.name}\")\n",
     "    print(f\"   Granularity: {ds.granularity.value}\")\n",
     "    print(f\"   Rows: {ds.row_count:,} | Columns: {ds.column_count}\")\n",
@@ -268,7 +262,7 @@
     "    names = [ds.name for ds in datasets]\n",
     "    rows = [ds.row_count for ds in datasets]\n",
     "    cols = [ds.column_count for ds in datasets]\n",
-    "    granularities = [\"Entity\" if ds.granularity == DatasetGranularity.ENTITY_LEVEL else \"Event\" \n",
+    "    granularities = [\"Entity\" if ds.granularity == DatasetGranularity.ENTITY_LEVEL else \"Event\"\n",
     "                     for ds in datasets]\n",
     "    colors = [\"#2ecc71\" if ds.granularity == DatasetGranularity.ENTITY_LEVEL else \"#3498db\"\n",
     "              for ds in datasets]\n",
@@ -323,7 +317,7 @@
     "        text=\"<b>Primary Entity</b>\", showarrow=False, font=dict(size=11, color=\"#666\"), xanchor=\"left\"))\n",
     "    y_pos -= 0.06\n",
     "    annotations.append(dict(x=0.01, y=y_pos, xref=\"paper\", yref=\"paper\",\n",
-    "        text=f\"<span style='color:{primary_color}'>{primary_name}</span>\", \n",
+    "        text=f\"<span style='color:{primary_color}'>{primary_name}</span>\",\n",
     "        showarrow=False, font=dict(size=12), xanchor=\"left\"))\n",
     "    y_pos -= 0.10\n",
     "\n",
@@ -331,7 +325,7 @@
     "    annotations.append(dict(x=0.01, y=y_pos, xref=\"paper\", yref=\"paper\",\n",
     "        text=\"<b>Event Datasets</b>\", showarrow=False, font=dict(size=11, color=\"#666\"), xanchor=\"left\"))\n",
     "    y_pos -= 0.06\n",
-    "    \n",
+    "\n",
     "    if multi.event_datasets:\n",
     "        # Show each event dataset on its own line (supports 20+ datasets)\n",
     "        max_display = min(len(multi.event_datasets), 8)  # Show up to 8, then summarize\n",
@@ -339,7 +333,7 @@
     "            annotations.append(dict(x=0.03, y=y_pos, xref=\"paper\", yref=\"paper\",\n",
     "                text=f\"• {event_name}\", showarrow=False, font=dict(size=10, color=\"#3498db\"), xanchor=\"left\"))\n",
     "            y_pos -= 0.045\n",
-    "        \n",
+    "\n",
     "        if len(multi.event_datasets) > max_display:\n",
     "            remaining = len(multi.event_datasets) - max_display\n",
     "            annotations.append(dict(x=0.03, y=y_pos, xref=\"paper\", yref=\"paper\",\n",
@@ -369,7 +363,7 @@
     "    # Hide axes on left panel\n",
     "    fig.update_xaxes(visible=False, row=1, col=1)\n",
     "    fig.update_yaxes(visible=False, row=1, col=1)\n",
-    "    \n",
+    "\n",
     "    # Configure horizontal bar axes\n",
     "    fig.update_yaxes(categoryorder='total ascending', row=1, col=2)\n",
     "    fig.update_yaxes(categoryorder='total ascending', row=2, col=2)\n",
@@ -502,18 +496,18 @@
     "# If we have a primary entity dataset and event datasets, try to detect relationships\n",
     "if multi.primary_entity_dataset and multi.event_datasets:\n",
     "    primary_info = multi.datasets[multi.primary_entity_dataset]\n",
-    "    \n",
+    "\n",
     "    print(f\"Primary dataset: {multi.primary_entity_dataset}\")\n",
-    "    print(f\"Checking relationships with event datasets...\\n\")\n",
-    "    \n",
+    "    print(\"Checking relationships with event datasets...\\n\")\n",
+    "\n",
     "    for event_name in multi.event_datasets:\n",
     "        event_info = multi.datasets[event_name]\n",
-    "        \n",
+    "\n",
     "        # Check if they share common column names\n",
     "        if event_info.entity_column:\n",
     "            print(f\"\\U0001f517 {multi.primary_entity_dataset} <-> {event_name}\")\n",
     "            print(f\"   Potential join column: {event_info.entity_column}\")\n",
-    "            print(f\"   Expected relationship: one_to_many\")\n",
+    "            print(\"   Expected relationship: one_to_many\")\n",
     "            print()\n",
     "else:\n",
     "    print(\"Not enough datasets to detect relationships.\")\n",
@@ -667,7 +661,7 @@
     "print(\"=\"*70 + \"\\n\")\n",
     "\n",
     "for group in FeatureGroup:\n",
-    "    enabled = \"✓\" if group in [FeatureGroup.LAGGED_WINDOWS, FeatureGroup.VELOCITY, \n",
+    "    enabled = \"✓\" if group in [FeatureGroup.LAGGED_WINDOWS, FeatureGroup.VELOCITY,\n",
     "                               FeatureGroup.RECENCY, FeatureGroup.REGULARITY] else \"○\"\n",
     "    print(f\"  {enabled} {group.value}\")"
    ]
@@ -748,14 +742,14 @@
     "for dataset_name in multi.event_datasets:\n",
     "    ds_info = multi.datasets[dataset_name]\n",
     "    findings = manager.load_findings(dataset_name)\n",
-    "    \n",
+    "\n",
     "    if findings:\n",
     "        numeric_cols = [\n",
     "            name for name, col in findings.columns.items()\n",
     "            if col.inferred_type in [ColumnType.NUMERIC_CONTINUOUS, ColumnType.NUMERIC_DISCRETE]\n",
     "            and name not in [ds_info.entity_column, ds_info.time_column] and name not in TEMPORAL_METADATA_COLS\n",
     "        ]\n",
-    "        \n",
+    "\n",
     "        if numeric_cols:\n",
     "            registry.add_silver_temporal_config(\n",
     "                source_dataset=dataset_name,\n",
@@ -830,13 +824,13 @@
     "\n",
     "for dataset_name in multi.event_datasets:\n",
     "    ds_info = multi.datasets[dataset_name]\n",
-    "    \n",
+    "\n",
     "    print(f\"\\U0001f4c8 From {dataset_name}:\")\n",
     "    print()\n",
-    "    \n",
+    "\n",
     "    # Load findings to see numeric columns\n",
     "    findings = manager.load_findings(dataset_name)\n",
-    "    \n",
+    "\n",
     "    # Find numeric columns that could be aggregated\n",
     "    numeric_cols = []\n",
     "    if findings:\n",
@@ -845,7 +839,7 @@
     "            if col.inferred_type in [ColumnType.NUMERIC_CONTINUOUS, ColumnType.NUMERIC_DISCRETE]\n",
     "            and name not in [ds_info.entity_column, ds_info.time_column] and name not in TEMPORAL_METADATA_COLS\n",
     "        ]\n",
-    "    \n",
+    "\n",
     "    # Group 1: Lagged Window Features\n",
     "    if FeatureGroup.LAGGED_WINDOWS in FEATURE_GROUPS:\n",
     "        print(\"   📊 LAGGED WINDOWS (Group 1):\")\n",
@@ -853,21 +847,21 @@
     "            features = [f\"lag{i}_{col}_{agg}\" for i in range(NUM_LAGS) for agg in LAG_AGGREGATIONS[:2]]\n",
     "            print(f\"      {col}: {features[:4]}...\")\n",
     "        print(f\"      Total: {len(numeric_cols)} cols × {NUM_LAGS} lags × {len(LAG_AGGREGATIONS)} aggs\")\n",
-    "    \n",
+    "\n",
     "    # Group 2: Velocity Features\n",
     "    if FeatureGroup.VELOCITY in FEATURE_GROUPS:\n",
     "        print(\"\\n   🚀 VELOCITY (Group 2):\")\n",
     "        for col in numeric_cols[:2]:\n",
     "            print(f\"      - {col}_velocity, {col}_velocity_pct\")\n",
     "        print(f\"      Total: {len(numeric_cols)} cols × 2 features\")\n",
-    "    \n",
+    "\n",
     "    # Group 3: Acceleration Features\n",
     "    if FeatureGroup.ACCELERATION in FEATURE_GROUPS:\n",
     "        print(\"\\n   ⚡ ACCELERATION (Group 3):\")\n",
     "        for col in numeric_cols[:2]:\n",
     "            print(f\"      - {col}_acceleration, {col}_momentum\")\n",
     "        print(f\"      Total: {len(numeric_cols)} cols × 2 features\")\n",
-    "    \n",
+    "\n",
     "    # Group 4: Lifecycle Features\n",
     "    if FeatureGroup.LIFECYCLE in FEATURE_GROUPS:\n",
     "        print(\"\\n   📈 LIFECYCLE (Group 4):\")\n",
@@ -875,7 +869,7 @@
     "            print(f\"      - {col}_beginning, {col}_middle, {col}_end, {col}_trend_ratio\")\n",
     "        print(f\"      Total: {len(numeric_cols)} cols × 4 features\")\n",
     "        print(f\"      ℹ️ Requires {MIN_HISTORY_DAYS}+ days of history (else NaN)\")\n",
-    "    \n",
+    "\n",
     "    # Group 5: Recency Features\n",
     "    if FeatureGroup.RECENCY in FEATURE_GROUPS:\n",
     "        print(\"\\n   ⏱️ RECENCY (Group 5):\")\n",
@@ -883,7 +877,7 @@
     "        print(\"      - days_since_first_event\")\n",
     "        print(\"      - active_span_days\")\n",
     "        print(\"      - recency_ratio\")\n",
-    "    \n",
+    "\n",
     "    # Group 6: Regularity Features\n",
     "    if FeatureGroup.REGULARITY in FEATURE_GROUPS:\n",
     "        print(\"\\n   🎯 REGULARITY (Group 6):\")\n",
@@ -891,14 +885,14 @@
     "        print(\"      - inter_event_gap_mean\")\n",
     "        print(\"      - inter_event_gap_std\")\n",
     "        print(\"      - regularity_score\")\n",
-    "    \n",
+    "\n",
     "    # Group 7: Cohort Comparison\n",
     "    if FeatureGroup.COHORT_COMPARISON in FEATURE_GROUPS:\n",
     "        print(\"\\n   👥 COHORT COMPARISON (Group 7):\")\n",
     "        for col in numeric_cols[:2]:\n",
     "            print(f\"      - {col}_vs_cohort_mean, {col}_vs_cohort_pct, {col}_cohort_zscore\")\n",
     "        print(f\"      Total: {len(numeric_cols)} cols × 3 features\")\n",
-    "    \n",
+    "\n",
     "    # Summary\n",
     "    total_features = 0\n",
     "    if FeatureGroup.LAGGED_WINDOWS in FEATURE_GROUPS:\n",
@@ -915,7 +909,7 @@
     "        total_features += 4\n",
     "    if FeatureGroup.COHORT_COMPARISON in FEATURE_GROUPS:\n",
     "        total_features += len(numeric_cols) * 3\n",
-    "    \n",
+    "\n",
     "    print(f\"\\n   📝 TOTAL ESTIMATED FEATURES: ~{total_features}\")\n",
     "    print()"
    ]
@@ -1000,12 +994,12 @@
     "if multi.primary_entity_dataset:\n",
     "    primary_info = multi.datasets[multi.primary_entity_dataset]\n",
     "    primary_findings = manager.load_findings(multi.primary_entity_dataset)\n",
-    "    \n",
+    "\n",
     "    if primary_findings:\n",
     "        # Load the primary dataset from snapshot (not source) to get correct column names\n",
     "        primary_df, data_source = load_data_with_snapshot_preference(primary_findings, output_dir=str(FINDINGS_DIR))\n",
     "        print(f\"   Loaded from: {data_source}\")\n",
-    "        \n",
+    "\n",
     "        # Get numeric features for clustering (exclude temporal metadata)\n",
     "        from customer_retention.stages.temporal import TEMPORAL_METADATA_COLS\n",
     "        numeric_features = [\n",
@@ -1014,12 +1008,12 @@
     "            and name != primary_info.target_column\n",
     "            and name not in TEMPORAL_METADATA_COLS\n",
     "        ]\n",
-    "        \n",
+    "\n",
     "        print(f\"\\n📊 Dataset: {multi.primary_entity_dataset}\")\n",
     "        print(f\"   Total Samples: {len(primary_df):,}\")\n",
     "        print(f\"   Numeric Features: {len(numeric_features)}\")\n",
     "        print(f\"   Target Column: {primary_info.target_column}\")\n",
-    "        \n",
+    "\n",
     "        # Run full segmentation analysis using framework\n",
     "        analysis = segment_analyzer.run_full_analysis(\n",
     "            primary_df,\n",
@@ -1029,14 +1023,14 @@
     "            dim_reduction=DimensionReductionMethod.PCA,\n",
     "        )\n",
     "        m = analysis.metrics  # Shorthand for metrics\n",
-    "        \n",
+    "\n",
     "        # ============================================================\n",
     "        # KEY DECISION METRICS\n",
     "        # ============================================================\n",
     "        print(\"\\n\" + \"=\" * 70)\n",
     "        print(\"📊 CLUSTERING DECISION METRICS\")\n",
     "        print(\"=\" * 70)\n",
-    "        \n",
+    "\n",
     "        print(f\"\"\"\n",
     "┌─────────────────────────────────────────────────────────────────────┐\n",
     "│  METRIC                          │  VALUE      │  INTERPRETATION    │\n",
@@ -1046,19 +1040,19 @@
     "│  Optimal Segments Found          │  {m.n_segments}           │  {m.segments_interpretation:<18} │\n",
     "│  Overall Confidence              │  {m.confidence:.0%}         │  {m.confidence_interpretation:<18} │\n",
     "└──────────────────────────────────┴─────────────┴────────────────────┘\"\"\")\n",
-    "        \n",
+    "\n",
     "        print(f\"\\n🎯 RECOMMENDATION: {m.recommendation.upper().replace('_', ' ')}\")\n",
-    "        print(f\"\\n📋 Supporting Evidence:\")\n",
+    "        print(\"\\n📋 Supporting Evidence:\")\n",
     "        for r in m.rationale:\n",
     "            print(f\"   • {r}\")\n",
-    "        \n",
+    "\n",
     "        # ============================================================\n",
     "        # SEGMENT PROFILES\n",
     "        # ============================================================\n",
     "        print(\"\\n\" + \"=\" * 70)\n",
     "        print(\"📊 SEGMENT PROFILES\")\n",
     "        print(\"=\" * 70 + \"\\n\")\n",
-    "        \n",
+    "\n",
     "        segment_data = [{\n",
     "            \"Segment\": f\"Segment {p.segment_id}\",\n",
     "            \"N (count)\": f\"{p.size:,}\",\n",
@@ -1067,34 +1061,34 @@
     "            \"Viable for ML\": \"✓\" if p.size >= 100 else \"⚠️\"\n",
     "        } for p in analysis.profiles]\n",
     "        display(pd.DataFrame(segment_data))\n",
-    "        \n",
+    "\n",
     "        sd = analysis.size_distribution\n",
-    "        print(f\"\\n📊 Size Distribution:\")\n",
+    "        print(\"\\n📊 Size Distribution:\")\n",
     "        print(f\"   Total datapoints: {sd['total']:,}\")\n",
     "        print(f\"   Smallest segment: {sd['min_size']:,} ({sd['min_pct']:.1f}%)\")\n",
     "        print(f\"   Largest segment: {sd['max_size']:,} ({sd['max_pct']:.1f}%)\")\n",
     "        print(f\"   Balance ratio: {sd['balance_ratio']:.2f} (1.0 = perfectly balanced)\")\n",
-    "        \n",
+    "\n",
     "        # ============================================================\n",
     "        # CLUSTER VISUALIZATION\n",
     "        # ============================================================\n",
     "        if analysis.has_visualization:\n",
     "            viz = analysis.visualization\n",
     "            seg_result = analysis.segmentation_result\n",
-    "            \n",
+    "\n",
     "            fig = make_subplots(\n",
     "                rows=1, cols=3,\n",
     "                subplot_titles=(\n",
-    "                    f\"Cluster Visualization (PCA, {viz.explained_variance_ratio:.0%} var)\" \n",
+    "                    f\"Cluster Visualization (PCA, {viz.explained_variance_ratio:.0%} var)\"\n",
     "                    if viz.explained_variance_ratio else \"Cluster Visualization (PCA)\",\n",
     "                    \"Segment Sizes\", \"Target Rate\"\n",
     "                ),\n",
     "                horizontal_spacing=0.12,\n",
     "                column_widths=[0.4, 0.3, 0.3]\n",
     "            )\n",
-    "            \n",
+    "\n",
     "            unique_labels = sorted(set(seg_result.labels[seg_result.labels >= 0]))\n",
-    "            \n",
+    "\n",
     "            # Scatter plot - consistent colors by segment ID\n",
     "            for label in unique_labels:\n",
     "                mask = seg_result.labels == label\n",
@@ -1106,12 +1100,12 @@
     "                    marker=dict(color=color, size=6, opacity=0.6),\n",
     "                    name=name, hovertemplate=f\"{name}<br>PC1: %{{x:.2f}}<br>PC2: %{{y:.2f}}<extra></extra>\"\n",
     "                ), row=1, col=1)\n",
-    "            \n",
+    "\n",
     "            # Short labels for bar charts (avoid overlap)\n",
     "            bar_labels = [f\"Seg {p.segment_id}\" for p in analysis.profiles]\n",
     "            sizes = [p.size for p in analysis.profiles]\n",
     "            bar_colors = [SEGMENT_COLORS.get(p.segment_id, '#888888') for p in analysis.profiles]\n",
-    "            \n",
+    "\n",
     "            # Size bars - numbers inside\n",
     "            fig.add_trace(go.Bar(\n",
     "                y=bar_labels, x=sizes, orientation='h',\n",
@@ -1121,7 +1115,7 @@
     "                showlegend=False,\n",
     "                hovertemplate=\"Segment %{y}<br>Count: %{x:,}<extra></extra>\"\n",
     "            ), row=1, col=2)\n",
-    "            \n",
+    "\n",
     "            # Target rates - consistent segment colors, numbers inside\n",
     "            if all(p.target_rate is not None for p in analysis.profiles):\n",
     "                rates = [p.target_rate * 100 for p in analysis.profiles]\n",
@@ -1136,16 +1130,16 @@
     "                overall = sum(p.target_rate * p.size for p in analysis.profiles) / sd['total'] * 100\n",
     "                fig.add_vline(x=overall, line_dash=\"dash\", line_color=\"#2c3e50\",\n",
     "                             annotation_text=f\"Avg: {overall:.1f}%\", annotation_position=\"top\", row=1, col=3)\n",
-    "            \n",
+    "\n",
     "            fig.update_layout(\n",
-    "                title=\"Segment Analysis Overview\", \n",
-    "                height=400, \n",
+    "                title=\"Segment Analysis Overview\",\n",
+    "                height=400,\n",
     "                template=\"plotly_white\",\n",
     "                legend=dict(\n",
-    "                    orientation=\"h\", \n",
-    "                    yanchor=\"top\", \n",
+    "                    orientation=\"h\",\n",
+    "                    yanchor=\"top\",\n",
     "                    y=-0.15,\n",
-    "                    xanchor=\"center\", \n",
+    "                    xanchor=\"center\",\n",
     "                    x=0.5\n",
     "                ),\n",
     "                margin=dict(r=20, b=80)\n",
@@ -1153,11 +1147,11 @@
     "            fig.update_xaxes(title_text=\"PC1\", row=1, col=1)\n",
     "            fig.update_yaxes(title_text=\"PC2\", row=1, col=1)\n",
     "            display_figure(fig)\n",
-    "            \n",
-    "            print(f\"\\n📈 CLUSTER VISUALIZATION:\")\n",
+    "\n",
+    "            print(\"\\n📈 CLUSTER VISUALIZATION:\")\n",
     "            print(f\"   Method: PCA | Variance Explained: {viz.explained_variance_ratio:.1%}\" if viz.explained_variance_ratio else \"   Method: PCA\")\n",
-    "            print(f\"   Colors: Seg 0=Blue, Seg 1=Red, Seg 2=Green, Seg 3=Purple\")\n",
-    "        \n",
+    "            print(\"   Colors: Seg 0=Blue, Seg 1=Red, Seg 2=Green, Seg 3=Purple\")\n",
+    "\n",
     "        # ============================================================\n",
     "        # EPV CAPACITY ANALYSIS\n",
     "        # ============================================================\n",
@@ -1165,7 +1159,7 @@
     "            print(\"\\n\" + \"=\" * 70)\n",
     "            print(\"💡 SEGMENT CAPACITY ANALYSIS (EPV Check)\")\n",
     "            print(\"=\" * 70)\n",
-    "            \n",
+    "\n",
     "            primary_df['_segment'] = analysis.segmentation_result.labels\n",
     "            capacity = capacity_analyzer.analyze_segment_capacity(\n",
     "                primary_df[primary_df['_segment'] >= 0],\n",
@@ -1174,14 +1168,14 @@
     "                segment_col='_segment'\n",
     "            )\n",
     "            primary_df.drop('_segment', axis=1, inplace=True)\n",
-    "            \n",
+    "\n",
     "            print(f\"\\n🎯 Strategy: {capacity.recommended_strategy.upper()}\")\n",
     "            print(f\"   Reason: {capacity.strategy_reason}\")\n",
     "            if capacity.viable_segments:\n",
     "                print(f\"\\n   ✅ Viable segments: {capacity.viable_segments}\")\n",
     "            if capacity.insufficient_segments:\n",
     "                print(f\"   ⚠️ Insufficient segments: {capacity.insufficient_segments}\")\n",
-    "            \n",
+    "\n",
     "            # Store in findings\n",
     "            multi.notes.update({\n",
     "                'segmentation_recommendation': m.recommendation,\n",
@@ -1191,11 +1185,11 @@
     "                'segment_strategy': capacity.recommended_strategy,\n",
     "                'segment_sizes': {f\"segment_{p.segment_id}\": p.size for p in analysis.profiles}\n",
     "            })\n",
-    "            \n",
+    "\n",
     "            # Initialize bronze layer if not already done\n",
     "            if registry.bronze is None:\n",
     "                registry.init_bronze(primary_info.source_path)\n",
-    "            \n",
+    "\n",
     "            # Persist segmentation strategy to registry\n",
     "            registry.add_bronze_segmentation_strategy(\n",
     "                strategy=m.recommendation,\n",
@@ -1206,7 +1200,7 @@
     "                source_notebook=\"05_multi_dataset\"\n",
     "            )\n",
     "            print(f\"\\n✅ Persisted segmentation strategy to registry: {m.recommendation}\")\n",
-    "        \n",
+    "\n",
     "        # ============================================================\n",
     "        # DECISION SUMMARY\n",
     "        # ============================================================\n",
@@ -1263,26 +1257,26 @@
     "    print(\"\\n\" + \"=\"*70)\n",
     "    print(\"DATASET RELATIONSHIP DIAGRAM\")\n",
     "    print(\"=\"*70 + \"\\n\")\n",
-    "    \n",
+    "\n",
     "    # ASCII diagram\n",
     "    if multi.primary_entity_dataset:\n",
     "        primary = multi.primary_entity_dataset\n",
     "        primary_info = multi.datasets[primary]\n",
-    "        \n",
+    "\n",
     "        print(f\"   +{'='*30}+\")\n",
     "        print(f\"   |  {primary:^26}  |  <- PRIMARY (has target)\")\n",
     "        print(f\"   |  {primary_info.row_count:,} rows{' '*15}  |\")\n",
     "        if primary_info.target_column:\n",
     "            print(f\"   |  Target: {primary_info.target_column:<17}  |\")\n",
     "        print(f\"   +{'='*30}+\")\n",
-    "        \n",
+    "\n",
     "        for event_name in multi.event_datasets:\n",
     "            event_info = multi.datasets[event_name]\n",
     "            join_col = event_info.entity_column or \"?\"\n",
-    "            \n",
-    "            print(f\"          |\")\n",
+    "\n",
+    "            print(\"          |\")\n",
     "            print(f\"          | {join_col}\")\n",
-    "            print(f\"          v\")\n",
+    "            print(\"          v\")\n",
     "            print(f\"   +{'-'*30}+\")\n",
     "            print(f\"   |  {event_name:^26}  |  <- EVENT LEVEL\")\n",
     "            print(f\"   |  {event_info.row_count:,} rows{' '*15}  |\")\n",
@@ -1340,7 +1334,7 @@
     "registry.save(RECOMMENDATIONS_PATH)\n",
     "\n",
     "print(f\"\\n✅ Multi-dataset findings saved to: {MULTI_FINDINGS_PATH}\")\n",
-    "print(f\"\\n   Contents:\")\n",
+    "print(\"\\n   Contents:\")\n",
     "print(f\"   - {len(multi.datasets)} datasets\")\n",
     "print(f\"   - {len(multi.relationships)} relationships\")\n",
     "print(f\"   - {len(multi.event_datasets)} event datasets to aggregate\")\n",

churnkit 0.76.0a3__py3-none-any.whl → 0.76.1a2__py3-none-any.whl

churnkit 0.76.0a3py3-none-any.whl → 0.76.1a2py3-none-any.whl