PyPI - churnkit - Versions diffs - 0.76.0a3__py3-none-any.whl → 0.76.1a2__py3-none-any.whl - Mend

churnkit 0.76.0a3py3-none-any.whl → 0.76.1a2py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (31) hide show

{churnkit-0.76.0a3.data → churnkit-0.76.1a2.data}/data/share/churnkit/exploration_notebooks/06_feature_opportunities.ipynb RENAMED Viewed

@@ -82,20 +82,23 @@
    "outputs": [],
    "source": [
     "from customer_retention.analysis.notebook_progress import track_and_export_previous\n",
+    "\n",
     "track_and_export_previous(\"06_feature_opportunities.ipynb\")\n",
     "\n",
-    "from customer_retention.analysis.auto_explorer import ExplorationFindings, RecommendationEngine, RecommendationRegistry\n",
-    "from customer_retention.analysis.visualization import ChartBuilder, display_figure, display_table\n",
-    "from customer_retention.core.config.column_config import ColumnType\n",
-    "from customer_retention.stages.features import CustomerSegmenter, SegmentationType\n",
-    "from customer_retention.stages.profiling import FeatureCapacityAnalyzer\n",
-    "import yaml\n",
-    "import pandas as pd\n",
     "import numpy as np\n",
+    "import pandas as pd\n",
     "import plotly.graph_objects as go\n",
-    "import plotly.express as px\n",
-    "from plotly.subplots import make_subplots\n",
-    "from customer_retention.core.config.experiments import FINDINGS_DIR, EXPERIMENTS_DIR, OUTPUT_DIR, setup_experiments_structure\n"
+    "import yaml\n",
+    "\n",
+    "from customer_retention.analysis.auto_explorer import ExplorationFindings, RecommendationEngine, RecommendationRegistry\n",
+    "from customer_retention.analysis.visualization import ChartBuilder, display_figure\n",
+    "from customer_retention.core.config.column_config import ColumnType\n",
+    "from customer_retention.core.config.experiments import (\n",
+    "    EXPERIMENTS_DIR,\n",
+    "    FINDINGS_DIR,\n",
+    ")\n",
+    "from customer_retention.stages.features import CustomerSegmenter\n",
+    "from customer_retention.stages.profiling import FeatureCapacityAnalyzer\n"
    ]
   },
   {
@@ -158,7 +161,7 @@
     "findings = ExplorationFindings.load(FINDINGS_PATH)\n",
     "\n",
     "# Load data - handle aggregated vs standard paths\n",
-    "from customer_retention.stages.temporal import load_data_with_snapshot_preference, TEMPORAL_METADATA_COLS\n",
+    "from customer_retention.stages.temporal import TEMPORAL_METADATA_COLS, load_data_with_snapshot_preference\n",
     "\n",
     "# For aggregated data, load directly from the parquet source\n",
     "if \"_aggregated\" in FINDINGS_PATH and findings.source_path.endswith('.parquet'):\n",
@@ -338,53 +341,53 @@
     "        if col.inferred_type in [ColumnType.NUMERIC_CONTINUOUS, ColumnType.NUMERIC_DISCRETE]\n",
     "        and name != findings.target_column\n",
     "    ]\n",
-    "    \n",
+    "\n",
     "    capacity_result = capacity_analyzer.analyze(\n",
     "        df,\n",
     "        feature_cols=numeric_features,\n",
     "        target_col=findings.target_column,\n",
     "    )\n",
-    "    \n",
-    "    print(f\"\\n📊 DATA SUMMARY:\")\n",
+    "\n",
+    "    print(\"\\n📊 DATA SUMMARY:\")\n",
     "    print(f\"   Total samples: {capacity_result.total_samples:,}\")\n",
     "    print(f\"   Minority class samples: {capacity_result.minority_class_samples:,}\")\n",
     "    print(f\"   Minority class rate: {capacity_result.minority_class_samples/capacity_result.total_samples:.1%}\")\n",
     "    print(f\"   Current numeric features: {capacity_result.total_features}\")\n",
-    "    \n",
-    "    print(f\"\\n📈 FEATURE CAPACITY METRICS:\")\n",
+    "\n",
+    "    print(\"\\n📈 FEATURE CAPACITY METRICS:\")\n",
     "    print(f\"   Events Per Variable (EPV): {capacity_result.events_per_variable:.1f}\")\n",
     "    print(f\"   Samples Per Feature: {capacity_result.samples_per_feature:.1f}\")\n",
     "    print(f\"   Capacity Status: {capacity_result.capacity_status.upper()}\")\n",
-    "    \n",
+    "\n",
     "    # Capacity status visualization\n",
     "    status_colors = {\"adequate\": \"#2ecc71\", \"limited\": \"#f39c12\", \"inadequate\": \"#e74c3c\"}\n",
     "    status_color = status_colors.get(capacity_result.capacity_status, \"#95a5a6\")\n",
-    "    \n",
-    "    print(f\"\\n🎯 RECOMMENDED FEATURE COUNTS:\")\n",
+    "\n",
+    "    print(\"\\n🎯 RECOMMENDED FEATURE COUNTS:\")\n",
     "    print(f\"   Conservative (EPV=20): {capacity_result.recommended_features_conservative} features\")\n",
     "    print(f\"   Moderate (EPV=10):     {capacity_result.recommended_features_moderate} features\")\n",
     "    print(f\"   Aggressive (EPV=5):    {capacity_result.recommended_features_aggressive} features\")\n",
-    "    \n",
+    "\n",
     "    # Effective features analysis\n",
     "    if capacity_result.effective_features_result:\n",
     "        eff = capacity_result.effective_features_result\n",
-    "        print(f\"\\n🔍 EFFECTIVE FEATURES (accounting for correlation):\")\n",
+    "        print(\"\\n🔍 EFFECTIVE FEATURES (accounting for correlation):\")\n",
     "        print(f\"   Total features analyzed: {eff.total_count}\")\n",
     "        print(f\"   Effective independent features: {eff.effective_count:.1f}\")\n",
     "        print(f\"   Redundant features identified: {len(eff.redundant_features)}\")\n",
-    "        \n",
+    "\n",
     "        if eff.redundant_features:\n",
-    "            print(f\"\\n   ⚠️ Redundant features (highly correlated):\")\n",
+    "            print(\"\\n   ⚠️ Redundant features (highly correlated):\")\n",
     "            for feat in eff.redundant_features[:5]:\n",
     "                print(f\"      • {feat}\")\n",
-    "        \n",
+    "\n",
     "        if eff.feature_clusters:\n",
     "            print(f\"\\n   📦 Correlated feature clusters ({len(eff.feature_clusters)}):\")\n",
     "            for i, cluster in enumerate(eff.feature_clusters[:3]):\n",
     "                print(f\"      Cluster {i+1}: {', '.join(cluster[:4])}\")\n",
     "                if len(cluster) > 4:\n",
     "                    print(f\"                  ... and {len(cluster)-4} more\")\n",
-    "    \n",
+    "\n",
     "    # Persist feature capacity to registry\n",
     "    registry.add_bronze_feature_capacity(\n",
     "        epv=capacity_result.events_per_variable,\n",
@@ -394,8 +397,8 @@
     "        rationale=f\"EPV={capacity_result.events_per_variable:.1f}, status={capacity_result.capacity_status}\",\n",
     "        source_notebook=\"06_feature_opportunities\"\n",
     "    )\n",
-    "    print(f\"\\n✅ Persisted feature capacity recommendation to registry\")\n",
-    "    \n",
+    "    print(\"\\n✅ Persisted feature capacity recommendation to registry\")\n",
+    "\n",
     "    # Store capacity info in findings\n",
     "    findings.metadata[\"feature_capacity\"] = capacity_result.to_dict()\n",
     "else:\n",
@@ -446,20 +449,20 @@
     "# Model Complexity Guidance\n",
     "if findings.target_column and 'capacity_result' in dir():\n",
     "    guidance = capacity_result.complexity_guidance\n",
-    "    \n",
+    "\n",
     "    print(\"=\" * 70)\n",
     "    print(\"MODEL COMPLEXITY GUIDANCE\")\n",
     "    print(\"=\" * 70)\n",
-    "    \n",
+    "\n",
     "    # Create visualization of feature limits by model type\n",
     "    model_types = [\"Linear\\n(no regularization)\", \"Regularized\\n(L1/L2)\", \"Tree-based\\n(RF/XGBoost)\"]\n",
     "    max_features = [guidance.max_features_linear, guidance.max_features_regularized, guidance.max_features_tree]\n",
     "    current_features = capacity_result.total_features\n",
-    "    \n",
+    "\n",
     "    colors = ['#e74c3c' if m < current_features else '#2ecc71' for m in max_features]\n",
-    "    \n",
+    "\n",
     "    fig = go.Figure()\n",
-    "    \n",
+    "\n",
     "    fig.add_trace(go.Bar(\n",
     "        x=model_types,\n",
     "        y=max_features,\n",
@@ -468,7 +471,7 @@
     "        textposition='outside',\n",
     "        name='Max Features'\n",
     "    ))\n",
-    "    \n",
+    "\n",
     "    # Add horizontal line for current feature count\n",
     "    fig.add_hline(\n",
     "        y=current_features,\n",
@@ -477,7 +480,7 @@
     "        annotation_text=f\"Current: {current_features}\",\n",
     "        annotation_position=\"right\"\n",
     "    )\n",
-    "    \n",
+    "\n",
     "    # Calculate y-axis range to fit labels\n",
     "    max_val = max(max_features)\n",
     "    fig.update_layout(\n",
@@ -489,19 +492,19 @@
     "        height=400,\n",
     "        showlegend=False,\n",
     "    )\n",
-    "    \n",
+    "\n",
     "    display_figure(fig)\n",
-    "    \n",
+    "\n",
     "    print(f\"\\n🎯 RECOMMENDED MODEL TYPE: {guidance.recommended_model_type.replace('_', ' ').title()}\")\n",
-    "    \n",
+    "\n",
     "    print(\"\\n📋 MODEL-SPECIFIC RECOMMENDATIONS:\")\n",
     "    for rec in guidance.model_recommendations:\n",
     "        print(f\"   • {rec}\")\n",
-    "    \n",
+    "\n",
     "    print(\"\\n💡 GENERAL GUIDANCE:\")\n",
     "    for rec in guidance.recommendations:\n",
     "        print(f\"   {rec}\")\n",
-    "    \n",
+    "\n",
     "    # Summary table\n",
     "    print(\"\\n\" + \"-\" * 70)\n",
     "    print(\"FEATURE BUDGET SUMMARY:\")\n",
@@ -512,12 +515,12 @@
     "        \"Current\": [current_features] * 3,\n",
     "        \"Status\": [\n",
     "            \"✅ OK\" if guidance.max_features_linear >= current_features else \"⚠️ Reduce\",\n",
-    "            \"✅ OK\" if guidance.max_features_regularized >= current_features else \"⚠️ Reduce\", \n",
+    "            \"✅ OK\" if guidance.max_features_regularized >= current_features else \"⚠️ Reduce\",\n",
     "            \"✅ OK\" if guidance.max_features_tree >= current_features else \"⚠️ Reduce\"\n",
     "        ]\n",
     "    }\n",
     "    display(pd.DataFrame(summary_data))\n",
-    "    \n",
+    "\n",
     "    # Persist model type recommendation to registry\n",
     "    registry.add_bronze_model_type(\n",
     "        model_type=guidance.recommended_model_type,\n",
@@ -593,20 +596,20 @@
     "if findings.target_column and categorical_cols and 'numeric_features' in dir():\n",
     "    # Analyze the first categorical column as potential segment\n",
     "    segment_col = categorical_cols[0]\n",
-    "    \n",
+    "\n",
     "    print(f\"\\n📊 Analyzing segments by: {segment_col}\")\n",
     "    print(f\"   Features to evaluate: {len(numeric_features)}\")\n",
-    "    \n",
+    "\n",
     "    segment_result = capacity_analyzer.analyze_segment_capacity(\n",
     "        df,\n",
     "        feature_cols=numeric_features,\n",
     "        target_col=findings.target_column,\n",
     "        segment_col=segment_col,\n",
     "    )\n",
-    "    \n",
+    "\n",
     "    print(f\"\\n🎯 RECOMMENDED STRATEGY: {segment_result.recommended_strategy.replace('_', ' ').title()}\")\n",
     "    print(f\"   Reason: {segment_result.strategy_reason}\")\n",
-    "    \n",
+    "\n",
     "    # Segment details table\n",
     "    segment_data = []\n",
     "    for seg_name, cap in segment_result.segment_capacities.items():\n",
@@ -618,14 +621,14 @@
     "            \"Max Features (EPV=10)\": cap.recommended_features_moderate,\n",
     "            \"Status\": cap.capacity_status.title()\n",
     "        })\n",
-    "    \n",
+    "\n",
     "    segment_df = pd.DataFrame(segment_data)\n",
     "    segment_df = segment_df.sort_values(\"Samples\", ascending=False)\n",
     "    display(segment_df)\n",
-    "    \n",
+    "\n",
     "    # Visualization\n",
     "    fig = go.Figure()\n",
-    "    \n",
+    "\n",
     "    max_events = 0\n",
     "    for seg_name, cap in segment_result.segment_capacities.items():\n",
     "        color = \"#2ecc71\" if cap.capacity_status == \"adequate\" else \"#f39c12\" if cap.capacity_status == \"limited\" else \"#e74c3c\"\n",
@@ -638,7 +641,7 @@
     "            textposition='outside'\n",
     "        ))\n",
     "        max_events = max(max_events, cap.minority_class_samples)\n",
-    "    \n",
+    "\n",
     "    # Add threshold line\n",
     "    threshold_events = len(numeric_features) * 10  # EPV=10 threshold\n",
     "    fig.add_hline(\n",
@@ -648,7 +651,7 @@
     "        annotation_text=f\"Min events for {len(numeric_features)} features (EPV=10)\",\n",
     "        annotation_position=\"right\"\n",
     "    )\n",
-    "    \n",
+    "\n",
     "    # Calculate y-axis range to fit labels\n",
     "    y_max = max(max_events, threshold_events)\n",
     "    fig.update_layout(\n",
@@ -661,16 +664,16 @@
     "        showlegend=False,\n",
     "    )\n",
     "    display_figure(fig)\n",
-    "    \n",
+    "\n",
     "    print(\"\\n📋 SEGMENT RECOMMENDATIONS:\")\n",
     "    for rec in segment_result.recommendations:\n",
     "        print(f\"   {rec}\")\n",
-    "    \n",
+    "\n",
     "    if segment_result.viable_segments:\n",
     "        print(f\"\\n   ✅ Viable for separate models: {', '.join(segment_result.viable_segments)}\")\n",
     "    if segment_result.insufficient_segments:\n",
     "        print(f\"   ⚠️ Insufficient data: {', '.join(segment_result.insufficient_segments)}\")\n",
-    "    \n",
+    "\n",
     "    # Store in findings\n",
     "    findings.metadata[\"segment_capacity\"] = segment_result.to_dict()\n",
     "else:\n",
@@ -724,38 +727,38 @@
     "    print(\"=\" * 70)\n",
     "    print(\"FEATURE CAPACITY ACTION ITEMS\")\n",
     "    print(\"=\" * 70)\n",
-    "    \n",
+    "\n",
     "    print(\"\\n📋 BASED ON YOUR DATA CAPACITY:\")\n",
-    "    \n",
+    "\n",
     "    # Action items based on capacity status\n",
     "    if capacity_result.capacity_status == \"adequate\":\n",
     "        print(\"\\n✅ ADEQUATE CAPACITY - You have room to add features\")\n",
     "        print(f\"   • Current features: {capacity_result.total_features}\")\n",
     "        print(f\"   • Can add up to: {capacity_result.recommended_features_moderate - capacity_result.total_features} more features (EPV=10)\")\n",
-    "        print(f\"   • Consider: Creating derived features from datetime and categorical columns\")\n",
+    "        print(\"   • Consider: Creating derived features from datetime and categorical columns\")\n",
     "    elif capacity_result.capacity_status == \"limited\":\n",
     "        print(\"\\n⚠️ LIMITED CAPACITY - Be selective with new features\")\n",
     "        print(f\"   • Current features: {capacity_result.total_features}\")\n",
     "        print(f\"   • Recommended max: {capacity_result.recommended_features_moderate} features (EPV=10)\")\n",
     "        print(f\"   • Action: Remove {max(0, capacity_result.total_features - capacity_result.recommended_features_moderate)} redundant features before adding new ones\")\n",
-    "        print(f\"   • Consider: Using regularization (L1/Lasso) if keeping all features\")\n",
+    "        print(\"   • Consider: Using regularization (L1/Lasso) if keeping all features\")\n",
     "    else:\n",
     "        print(\"\\n🔴 INADEQUATE CAPACITY - Reduce features or get more data\")\n",
     "        print(f\"   • Current features: {capacity_result.total_features}\")\n",
     "        print(f\"   • Recommended max: {capacity_result.recommended_features_moderate} features (EPV=10)\")\n",
     "        print(f\"   • CRITICAL: Reduce to {capacity_result.recommended_features_conservative} features for stable estimates\")\n",
-    "        print(f\"   • Options: (1) Feature selection, (2) PCA, (3) Collect more data\")\n",
-    "    \n",
+    "        print(\"   • Options: (1) Feature selection, (2) PCA, (3) Collect more data\")\n",
+    "\n",
     "    # Redundancy recommendations\n",
     "    if capacity_result.effective_features_result and capacity_result.effective_features_result.redundant_features:\n",
     "        redundant = capacity_result.effective_features_result.redundant_features\n",
-    "        print(f\"\\n🔄 REDUNDANT FEATURES TO CONSIDER REMOVING:\")\n",
-    "        print(f\"   These features are highly correlated with others and add little new information:\")\n",
+    "        print(\"\\n🔄 REDUNDANT FEATURES TO CONSIDER REMOVING:\")\n",
+    "        print(\"   These features are highly correlated with others and add little new information:\")\n",
     "        for feat in redundant[:5]:\n",
     "            print(f\"   • {feat}\")\n",
     "        if len(redundant) > 5:\n",
     "            print(f\"   ... and {len(redundant) - 5} more\")\n",
-    "    \n",
+    "\n",
     "    # New feature budget\n",
     "    print(\"\\n💰 FEATURE BUDGET FOR NEW FEATURES:\")\n",
     "    remaining_budget = capacity_result.recommended_features_moderate - capacity_result.total_features\n",
@@ -767,7 +770,7 @@
     "        print(\"   • Engagement composites (email_engagement_score)\")\n",
     "    else:\n",
     "        print(f\"   ⚠️ At or over capacity. Remove {-remaining_budget} features before adding new ones.\")\n",
-    "    \n",
+    "\n",
     "    # Model selection summary\n",
     "    print(\"\\n🎯 RECOMMENDED MODELING APPROACH:\")\n",
     "    if capacity_result.complexity_guidance:\n",
@@ -778,7 +781,7 @@
     "        elif \"tree\" in capacity_result.complexity_guidance.recommended_model_type:\n",
     "            print(\"   → Random Forest or XGBoost recommended\")\n",
     "            print(\"   → Trees handle correlated features naturally\")\n",
-    "    \n",
+    "\n",
     "    print(\"\\n\" + \"=\" * 70)"
    ]
   },
@@ -835,19 +838,19 @@
     "    selector = FeatureSelector(target_column=findings.target_column)\n",
     "    availability_recs = selector.get_availability_recommendations(findings.feature_availability)\n",
     "    unavailable_features = [rec.column for rec in availability_recs]\n",
-    "    \n",
+    "\n",
     "    print(f\"\\n⚠️  {len(availability_recs)} feature(s) have tracking changes:\\n\")\n",
-    "    \n",
+    "\n",
     "    for rec in availability_recs:\n",
     "        print(f\"📌 {rec.column}\")\n",
     "        print(f\"   Issue: {rec.issue_type} | Coverage: {rec.coverage_pct:.0f}%\")\n",
     "        print(f\"   Available: {rec.first_valid_date} → {rec.last_valid_date}\")\n",
-    "        print(f\"\\n   Remediation options:\")\n",
+    "        print(\"\\n   Remediation options:\")\n",
     "        for opt in rec.options:\n",
     "            marker = \"→\" if opt.get(\"recommended\") else \" \"\n",
     "            print(f\"   {marker} [{opt['type']}] {opt['description']}\")\n",
     "        print()\n",
-    "    \n",
+    "\n",
     "    print(\"-\" * 70)\n",
     "    print(\"RECOMMENDED ACTION: Remove unavailable features before modeling\")\n",
     "    print(\"-\" * 70)\n",
@@ -856,7 +859,7 @@
     "    print(\"  • segment_by_cohort: Train separate models for different time periods\")\n",
     "    print(\"  • add_indicator: Create availability flags, impute missing values\")\n",
     "    print(\"  • filter_window: Restrict training data to feature's available period\")\n",
-    "    \n",
+    "\n",
     "    findings.metadata[\"unavailable_features\"] = unavailable_features\n",
     "    findings.metadata[\"availability_action\"] = \"exclude\"\n",
     "else:\n",
@@ -978,13 +981,13 @@
     "segmenter = CustomerSegmenter()\n",
     "df_features = df.copy()\n",
     "\n",
-    "datetime_cols = [name for name, col in findings.columns.items() \n",
+    "datetime_cols = [name for name, col in findings.columns.items()\n",
     "                 if col.inferred_type == ColumnType.DATETIME\n",
     "                 and name not in TEMPORAL_METADATA_COLS]\n",
-    "binary_cols = [name for name, col in findings.columns.items() \n",
+    "binary_cols = [name for name, col in findings.columns.items()\n",
     "               if col.inferred_type == ColumnType.BINARY\n",
     "               and name not in TEMPORAL_METADATA_COLS]\n",
-    "numeric_cols = [name for name, col in findings.columns.items() \n",
+    "numeric_cols = [name for name, col in findings.columns.items()\n",
     "                if col.inferred_type in [ColumnType.NUMERIC_CONTINUOUS, ColumnType.NUMERIC_DISCRETE]]\n",
     "\n",
     "for col in datetime_cols:\n",
@@ -1014,7 +1017,7 @@
     "activity_cols = [c for c in datetime_cols if 'last' in c.lower() or 'recent' in c.lower()]\n",
     "if activity_cols:\n",
     "    activity_col = activity_cols[0]\n",
-    "    df_features = segmenter.create_recency_features(df_features, last_activity_column=activity_col, \n",
+    "    df_features = segmenter.create_recency_features(df_features, last_activity_column=activity_col,\n",
     "                                                     reference_date=reference_date, output_column='days_since_last_activity')\n",
     "    print(f\"  ✓ days_since_last_activity from {activity_col}\")\n",
     "    registry.add_silver_derived(\n",
@@ -1032,7 +1035,7 @@
     "\n",
     "if open_rate_cols and click_rate_cols:\n",
     "    open_col, click_col = open_rate_cols[0], click_rate_cols[0]\n",
-    "    df_features = segmenter.create_engagement_score(df_features, open_rate_column=open_col, \n",
+    "    df_features = segmenter.create_engagement_score(df_features, open_rate_column=open_col,\n",
     "                                                     click_rate_column=click_col, output_column='email_engagement_score')\n",
     "    print(f\"  ✓ email_engagement_score from {open_col}, {click_col}\")\n",
     "    registry.add_silver_derived(\n",
@@ -1042,9 +1045,9 @@
     "        rationale=f\"Weighted engagement score from {open_col} and {click_col}\",\n",
     "        source_notebook=\"06_feature_opportunities\"\n",
     "    )\n",
-    "    \n",
+    "\n",
     "    df_features['click_to_open_rate'] = np.where(df_features[open_col] > 0, df_features[click_col] / df_features[open_col], 0)\n",
-    "    print(f\"  ✓ click_to_open_rate\")\n",
+    "    print(\"  ✓ click_to_open_rate\")\n",
     "    registry.add_silver_ratio(\n",
     "        column=\"click_to_open_rate\",\n",
     "        numerator=click_col,\n",
@@ -1171,18 +1174,18 @@
     "if 'customer_segment' in df_features.columns and findings.target_column and findings.target_column in df_features.columns:\n",
     "    target = findings.target_column\n",
     "    segment_retention = df_features.groupby('customer_segment')[target].mean() * 100\n",
-    "    \n",
+    "\n",
     "    max_rate = segment_retention.max()\n",
     "    fig = go.Figure(go.Bar(\n",
     "        x=segment_retention.index, y=segment_retention.values,\n",
     "        marker_color=['#2ca02c' if r > 70 else '#ffbb00' if r > 50 else '#d62728' for r in segment_retention.values],\n",
     "        text=[f'{r:.1f}%' for r in segment_retention.values], textposition='outside'))\n",
     "    fig.update_layout(\n",
-    "        title='Retention Rate by Customer Segment', \n",
-    "        xaxis_title='Segment', \n",
+    "        title='Retention Rate by Customer Segment',\n",
+    "        xaxis_title='Segment',\n",
     "        yaxis_title='Retention Rate (%)',\n",
     "        yaxis_range=[0, max_rate * 1.15],  # Add 15% headroom for labels\n",
-    "        template='plotly_white', \n",
+    "        template='plotly_white',\n",
     "        height=400,\n",
     "    )\n",
     "    display_figure(fig)\n",
@@ -1240,17 +1243,17 @@
     "if numeric_cols:\n",
     "    print(\"Numeric Transformation Opportunities:\")\n",
     "    print(\"=\"*50)\n",
-    "    \n",
+    "\n",
     "    for col_name in numeric_cols:\n",
     "        col_info = findings.columns[col_name]\n",
     "        series = df[col_name].dropna()\n",
     "        skewness = series.skew()\n",
-    "        \n",
+    "\n",
     "        print(f\"\\n{col_name}:\")\n",
     "        print(f\"  Skewness: {skewness:.2f}\")\n",
-    "        \n",
+    "\n",
     "        if abs(skewness) > 1:\n",
-    "            print(f\"  Recommendation: Apply log transform (highly skewed)\")\n",
+    "            print(\"  Recommendation: Apply log transform (highly skewed)\")\n",
     "            registry.add_gold_transformation(\n",
     "                column=col_name,\n",
     "                transform=\"log\",\n",
@@ -1260,7 +1263,7 @@
     "            )\n",
     "            transform_count += 1\n",
     "        elif abs(skewness) > 0.5:\n",
-    "            print(f\"  Recommendation: Consider sqrt transform (moderately skewed)\")\n",
+    "            print(\"  Recommendation: Consider sqrt transform (moderately skewed)\")\n",
     "            registry.add_gold_transformation(\n",
     "                column=col_name,\n",
     "                transform=\"sqrt\",\n",
@@ -1270,7 +1273,7 @@
     "            )\n",
     "            transform_count += 1\n",
     "        else:\n",
-    "            print(f\"  Recommendation: Standard scaling sufficient\")\n",
+    "            print(\"  Recommendation: Standard scaling sufficient\")\n",
     "            registry.add_gold_scaling(\n",
     "                column=col_name,\n",
     "                method=\"standard\",\n",
@@ -1278,10 +1281,10 @@
     "                source_notebook=\"06_feature_opportunities\"\n",
     "            )\n",
     "            transform_count += 1\n",
-    "        \n",
+    "\n",
     "        if col_info.inferred_type == ColumnType.NUMERIC_CONTINUOUS:\n",
     "            print(f\"  Binning: Consider creating bins for {col_name}_binned\")\n",
-    "    \n",
+    "\n",
     "    print(f\"\\n✅ Persisted {transform_count} transformation recommendations to registry\")"
    ]
   },
@@ -1334,15 +1337,15 @@
     "if categorical_cols:\n",
     "    print(\"Categorical Encoding Recommendations:\")\n",
     "    print(\"=\"*50)\n",
-    "    \n",
+    "\n",
     "    for col_name in categorical_cols:\n",
     "        col_info = findings.columns[col_name]\n",
     "        distinct = col_info.universal_metrics.get(\"distinct_count\", 0)\n",
-    "        \n",
+    "\n",
     "        print(f\"\\n{col_name}: ({distinct} unique values)\")\n",
-    "        \n",
+    "\n",
     "        if distinct <= 5:\n",
-    "            print(f\"  Recommendation: One-hot encoding\")\n",
+    "            print(\"  Recommendation: One-hot encoding\")\n",
     "            registry.add_gold_encoding(\n",
     "                column=col_name,\n",
     "                method=\"onehot\",\n",
@@ -1351,7 +1354,7 @@
     "            )\n",
     "            encoding_count += 1\n",
     "        elif distinct <= 20:\n",
-    "            print(f\"  Recommendation: Target encoding or one-hot with frequency threshold\")\n",
+    "            print(\"  Recommendation: Target encoding or one-hot with frequency threshold\")\n",
     "            registry.add_gold_encoding(\n",
     "                column=col_name,\n",
     "                method=\"target\",\n",
@@ -1360,7 +1363,7 @@
     "            )\n",
     "            encoding_count += 1\n",
     "        else:\n",
-    "            print(f\"  Recommendation: Target encoding or embedding (high cardinality)\")\n",
+    "            print(\"  Recommendation: Target encoding or embedding (high cardinality)\")\n",
     "            registry.add_gold_encoding(\n",
     "                column=col_name,\n",
     "                method=\"target\",\n",
@@ -1368,10 +1371,10 @@
     "                source_notebook=\"06_feature_opportunities\"\n",
     "            )\n",
     "            encoding_count += 1\n",
-    "        \n",
+    "\n",
     "        if col_info.inferred_type == ColumnType.CATEGORICAL_ORDINAL:\n",
-    "            print(f\"  Note: Consider ordinal encoding to preserve order\")\n",
-    "    \n",
+    "            print(\"  Note: Consider ordinal encoding to preserve order\")\n",
+    "\n",
     "    print(f\"\\n✅ Persisted {encoding_count} encoding recommendations to registry\")"
    ]
   },
@@ -1578,12 +1581,13 @@
     "registry.save(RECOMMENDATIONS_PATH)\n",
     "\n",
     "print(f\"✅ Saved {len(registry.all_recommendations)} recommendations to {RECOMMENDATIONS_PATH}\")\n",
-    "print(f\"\\nRecommendations by layer:\")\n",
+    "print(\"\\nRecommendations by layer:\")\n",
     "for layer in [\"bronze\", \"silver\", \"gold\"]:\n",
     "    recs = registry.get_by_layer(layer)\n",
     "    print(f\"  {layer.upper()}: {len(recs)}\")\n",
     "\n",
     "from customer_retention.analysis.notebook_html_exporter import export_notebook_html\n",
+    "\n",
     "export_notebook_html(Path(\"06_feature_opportunities.ipynb\"), EXPERIMENTS_DIR / \"docs\")\n"
    ]
   }

churnkit 0.76.0a3__py3-none-any.whl → 0.76.1a2__py3-none-any.whl

churnkit 0.76.0a3py3-none-any.whl → 0.76.1a2py3-none-any.whl