PyPI - churnkit - Versions diffs - 0.76.1a1__py3-none-any.whl → 0.76.1a2__py3-none-any.whl - Mend

churnkit 0.76.1a1py3-none-any.whl → 0.76.1a2py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (30) hide show

{churnkit-0.76.1a1.data → churnkit-0.76.1a2.data}/data/share/churnkit/exploration_notebooks/02_column_deep_dive.ipynb RENAMED Viewed

@@ -73,24 +73,28 @@
    "outputs": [],
    "source": [
     "from customer_retention.analysis.notebook_progress import track_and_export_previous\n",
+    "\n",
     "track_and_export_previous(\"02_column_deep_dive.ipynb\")\n",
     "\n",
+    "import numpy as np\n",
+    "import pandas as pd\n",
+    "import plotly.graph_objects as go\n",
+    "from scipy import stats\n",
+    "\n",
     "from customer_retention.analysis.auto_explorer import ExplorationFindings, RecommendationRegistry\n",
-    "from customer_retention.analysis.visualization import ChartBuilder, display_figure, display_table, console\n",
+    "from customer_retention.analysis.visualization import ChartBuilder, console, display_figure, display_table\n",
     "from customer_retention.core.config.column_config import ColumnType\n",
+    "from customer_retention.core.config.experiments import (\n",
+    "    FINDINGS_DIR,\n",
+    ")\n",
     "from customer_retention.stages.profiling import (\n",
-    "    DistributionAnalyzer, TransformationType,\n",
-    "    TemporalAnalyzer, TemporalGranularity,\n",
-    "    CategoricalDistributionAnalyzer, EncodingType\n",
+    "    CategoricalDistributionAnalyzer,\n",
+    "    DistributionAnalyzer,\n",
+    "    TemporalAnalyzer,\n",
+    "    TemporalGranularity,\n",
+    "    TransformationType,\n",
     ")\n",
-    "from customer_retention.stages.validation import DataValidator, RuleGenerator\n",
-    "import pandas as pd\n",
-    "import numpy as np\n",
-    "from scipy import stats\n",
-    "import plotly.graph_objects as go\n",
-    "import plotly.express as px\n",
-    "from plotly.subplots import make_subplots\n",
-    "from customer_retention.core.config.experiments import FINDINGS_DIR, EXPERIMENTS_DIR, OUTPUT_DIR, setup_experiments_structure\n"
+    "from customer_retention.stages.validation import DataValidator, RuleGenerator\n"
    ]
   },
   {
@@ -121,7 +125,6 @@
     "\n",
     "# Option 2: Auto-discover findings file (prefers aggregated over event-level)\n",
     "from pathlib import Path\n",
-    "import os\n",
     "\n",
     "# FINDINGS_DIR imported from customer_retention.core.config.experiments\n",
     "\n",
@@ -157,9 +160,9 @@
     "# Warn if this is event-level data (should run 01d first)\n",
     "if findings.is_time_series and \"_aggregated\" not in FINDINGS_PATH:\n",
     "    ts_meta = findings.time_series_metadata\n",
-    "    print(f\"\\n⚠️  WARNING: This appears to be EVENT-LEVEL data\")\n",
+    "    print(\"\\n⚠️  WARNING: This appears to be EVENT-LEVEL data\")\n",
     "    print(f\"   Entity: {ts_meta.entity_column}, Time: {ts_meta.time_column}\")\n",
-    "    print(f\"   Recommendation: Run 01d_event_aggregation.ipynb first to create entity-level data\")"
+    "    print(\"   Recommendation: Run 01d_event_aggregation.ipynb first to create entity-level data\")"
    ]
   },
   {
@@ -202,7 +205,7 @@
    "outputs": [],
    "source": [
     "# Load data - handle aggregated parquet files directly\n",
-    "from customer_retention.stages.temporal import load_data_with_snapshot_preference, TEMPORAL_METADATA_COLS\n",
+    "from customer_retention.stages.temporal import TEMPORAL_METADATA_COLS, load_data_with_snapshot_preference\n",
     "\n",
     "# For aggregated data, load directly from the parquet source\n",
     "if \"_aggregated\" in FINDINGS_PATH and findings.source_path.endswith('.parquet'):\n",
@@ -301,20 +304,20 @@
     "\n",
     "if range_rules:\n",
     "    range_results = validator.validate_value_ranges(df, range_rules)\n",
-    "    \n",
+    "\n",
     "    issues_found = []\n",
     "    for r in range_results:\n",
     "        detail = f\"{r.invalid_values} invalid\" if r.invalid_values > 0 else None\n",
     "        console.check(f\"{r.column_name} ({r.rule_type})\", r.invalid_values == 0, detail)\n",
     "        if r.invalid_values > 0:\n",
     "            issues_found.append(r)\n",
-    "    \n",
+    "\n",
     "    all_invalid = sum(r.invalid_values for r in range_results)\n",
     "    if all_invalid == 0:\n",
     "        console.success(\"All value ranges valid\")\n",
     "    else:\n",
     "        console.error(f\"Found {all_invalid:,} values outside expected ranges\")\n",
-    "        \n",
+    "\n",
     "        console.info(\"Examples of invalid values:\")\n",
     "        for r in issues_found[:3]:\n",
     "            col = r.column_name\n",
@@ -333,19 +336,19 @@
     "                    condition = \"value < 0 or value > 1\"\n",
     "                else:\n",
     "                    continue\n",
-    "                \n",
+    "\n",
     "                invalid_values = df.loc[invalid_mask, col].dropna()\n",
     "                if len(invalid_values) > 0:\n",
     "                    examples = invalid_values.head(5).tolist()\n",
     "                    console.metric(f\"  {col}\", f\"{examples}\")\n",
-    "                    \n",
+    "\n",
     "                    # Add filtering recommendation\n",
     "                    registry.add_bronze_filtering(\n",
     "                        column=col, condition=condition, action=\"cap\",\n",
     "                        rationale=f\"{r.invalid_values} values violate {r.rule_type} constraint\",\n",
     "                        source_notebook=\"02_column_deep_dive\"\n",
     "                    )\n",
-    "    \n",
+    "\n",
     "    console.info(\"Rules auto-generated from detected column types\")\n",
     "else:\n",
     "    range_results = []\n",
@@ -425,32 +428,32 @@
     "\n",
     "# Analyze all numeric columns using the framework\n",
     "analyses = analyzer.analyze_dataframe(df, numeric_cols)\n",
-    "recommendations = {col: analyzer.recommend_transformation(analysis) \n",
+    "recommendations = {col: analyzer.recommend_transformation(analysis)\n",
     "                   for col, analysis in analyses.items()}\n",
     "\n",
     "for col_name in numeric_cols:\n",
     "    col_info = findings.columns[col_name]\n",
     "    analysis = analyses.get(col_name)\n",
     "    rec = recommendations.get(col_name)\n",
-    "    \n",
+    "\n",
     "    print(f\"\\n{'='*70}\")\n",
     "    print(f\"Column: {col_name}\")\n",
     "    print(f\"Type: {col_info.inferred_type.value} (Confidence: {col_info.confidence:.0%})\")\n",
-    "    print(f\"-\" * 70)\n",
-    "    \n",
+    "    print(\"-\" * 70)\n",
+    "\n",
     "    if analysis:\n",
-    "        print(f\"📊 Distribution Statistics:\")\n",
+    "        print(\"📊 Distribution Statistics:\")\n",
     "        print(f\"   Mean: {analysis.mean:.3f}  |  Median: {analysis.median:.3f}  |  Std: {analysis.std:.3f}\")\n",
     "        print(f\"   Range: [{analysis.min_value:.3f}, {analysis.max_value:.3f}]\")\n",
     "        print(f\"   Percentiles: 1%={analysis.percentiles['p1']:.3f}, 25%={analysis.q1:.3f}, 75%={analysis.q3:.3f}, 99%={analysis.percentiles['p99']:.3f}\")\n",
-    "        print(f\"\\n📈 Shape Analysis:\")\n",
+    "        print(\"\\n📈 Shape Analysis:\")\n",
     "        skew_label = '(Right-skewed)' if analysis.skewness > 0.5 else '(Left-skewed)' if analysis.skewness < -0.5 else '(Symmetric)'\n",
     "        print(f\"   Skewness: {analysis.skewness:.2f} {skew_label}\")\n",
     "        kurt_label = '(Heavy tails/outliers)' if analysis.kurtosis > 3 else '(Light tails)'\n",
     "        print(f\"   Kurtosis: {analysis.kurtosis:.2f} {kurt_label}\")\n",
     "        print(f\"   Zeros: {analysis.zero_count:,} ({analysis.zero_percentage:.1f}%)\")\n",
     "        print(f\"   Outliers (IQR): {analysis.outlier_count_iqr:,} ({analysis.outlier_percentage:.1f}%)\")\n",
-    "        \n",
+    "\n",
     "        if rec:\n",
     "            print(f\"\\n🔧 Recommended Transformation: {rec.recommended_transform.value}\")\n",
     "            print(f\"   Reason: {rec.reason}\")\n",
@@ -458,45 +461,45 @@
     "            if rec.warnings:\n",
     "                for warn in rec.warnings:\n",
     "                    print(f\"   ⚠️ {warn}\")\n",
-    "    \n",
+    "\n",
     "    # Create enhanced histogram with Plotly\n",
     "    data = df[col_name].dropna()\n",
     "    fig = go.Figure()\n",
-    "    \n",
+    "\n",
     "    fig.add_trace(go.Histogram(x=data, nbinsx=50, name='Distribution',\n",
     "                                marker_color='steelblue', opacity=0.7))\n",
-    "    \n",
+    "\n",
     "    # Calculate mean and median\n",
     "    mean_val = data.mean()\n",
     "    median_val = data.median()\n",
-    "    \n",
+    "\n",
     "    # Position labels on opposite sides (left/right) to avoid overlap\n",
     "    # The larger value gets right-justified, smaller gets left-justified\n",
     "    mean_position = \"top right\" if mean_val >= median_val else \"top left\"\n",
     "    median_position = \"top left\" if mean_val >= median_val else \"top right\"\n",
-    "    \n",
+    "\n",
     "    # Add mean line\n",
     "    fig.add_vline(\n",
-    "        x=mean_val, \n",
-    "        line_dash=\"dash\", \n",
+    "        x=mean_val,\n",
+    "        line_dash=\"dash\",\n",
     "        line_color=\"red\",\n",
     "        annotation_text=f\"Mean: {mean_val:.2f}\",\n",
     "        annotation_position=mean_position,\n",
     "        annotation_font_color=\"red\",\n",
     "        annotation_bgcolor=\"rgba(255,255,255,0.8)\"\n",
     "    )\n",
-    "    \n",
+    "\n",
     "    # Add median line\n",
     "    fig.add_vline(\n",
-    "        x=median_val, \n",
-    "        line_dash=\"solid\", \n",
+    "        x=median_val,\n",
+    "        line_dash=\"solid\",\n",
     "        line_color=\"green\",\n",
     "        annotation_text=f\"Median: {median_val:.2f}\",\n",
     "        annotation_position=median_position,\n",
     "        annotation_font_color=\"green\",\n",
     "        annotation_bgcolor=\"rgba(255,255,255,0.8)\"\n",
     "    )\n",
-    "    \n",
+    "\n",
     "    # Add 99th percentile marker if there are outliers\n",
     "    if analysis and analysis.outlier_percentage > 5:\n",
     "        fig.add_vline(x=analysis.percentiles['p99'], line_dash=\"dot\", line_color=\"orange\",\n",
@@ -504,7 +507,7 @@
     "                      annotation_position=\"top right\",\n",
     "                      annotation_font_color=\"orange\",\n",
     "                      annotation_bgcolor=\"rgba(255,255,255,0.8)\")\n",
-    "    \n",
+    "\n",
     "    transform_label = rec.recommended_transform.value if rec else \"none\"\n",
     "    fig.update_layout(\n",
     "        title=f\"Distribution: {col_name}<br><sub>Skew: {analysis.skewness:.2f} | Kurt: {analysis.kurtosis:.2f} | Strategy: {transform_label}</sub>\",\n",
@@ -559,16 +562,16 @@
     "                \"skewness\": stats.skew(series),\n",
     "                \"kurtosis\": stats.kurtosis(series)\n",
     "            })\n",
-    "    \n",
+    "\n",
     "    stats_df = pd.DataFrame(stats_data)\n",
-    "    \n",
+    "\n",
     "    # Format for display\n",
     "    display_stats = stats_df.copy()\n",
     "    for col in [\"mean\", \"std\", \"min\", \"25%\", \"50%\", \"75%\", \"95%\", \"99%\", \"max\"]:\n",
     "        display_stats[col] = display_stats[col].apply(lambda x: f\"{x:.3f}\")\n",
     "    display_stats[\"skewness\"] = display_stats[\"skewness\"].apply(lambda x: f\"{x:.3f}\")\n",
     "    display_stats[\"kurtosis\"] = display_stats[\"kurtosis\"].apply(lambda x: f\"{x:.3f}\")\n",
-    "    \n",
+    "\n",
     "    print(\"=\" * 80)\n",
     "    print(\"NUMERICAL FEATURE STATISTICS\")\n",
     "    print(\"=\" * 80)\n",
@@ -621,7 +624,7 @@
     "for col_name in numeric_cols:\n",
     "    analysis = analyses.get(col_name)\n",
     "    rec = recommendations.get(col_name)\n",
-    "    \n",
+    "\n",
     "    if analysis and rec:\n",
     "        summary_data.append({\n",
     "            \"Column\": col_name,\n",
@@ -632,7 +635,7 @@
     "            \"Transform\": rec.recommended_transform.value,\n",
     "            \"Priority\": rec.priority\n",
     "        })\n",
-    "        \n",
+    "\n",
     "        # Add Gold transformation recommendation if not \"none\"\n",
     "        if rec.recommended_transform != TransformationType.NONE and registry.gold:\n",
     "            registry.add_gold_transformation(\n",
@@ -646,7 +649,7 @@
     "if summary_data:\n",
     "    summary_df = pd.DataFrame(summary_data)\n",
     "    display_table(summary_df)\n",
-    "    \n",
+    "\n",
     "    # Show how many transformation recommendations were added\n",
     "    transform_count = sum(1 for r in recommendations.values() if r and r.recommended_transform != TransformationType.NONE)\n",
     "    if transform_count > 0 and registry.gold:\n",
@@ -728,7 +731,7 @@
     "cat_analyses = cat_analyzer.analyze_dataframe(df, categorical_cols)\n",
     "\n",
     "# Get encoding recommendations\n",
-    "cyclical_cols = [name for name, col in findings.columns.items() \n",
+    "cyclical_cols = [name for name, col in findings.columns.items()\n",
     "                 if col.inferred_type == ColumnType.CATEGORICAL_CYCLICAL]\n",
     "cat_recommendations = cat_analyzer.get_all_recommendations(df, categorical_cols, cyclical_columns=cyclical_cols)\n",
     "\n",
@@ -736,56 +739,56 @@
     "    col_info = findings.columns[col_name]\n",
     "    analysis = cat_analyses.get(col_name)\n",
     "    rec = next((r for r in cat_recommendations if r.column_name == col_name), None)\n",
-    "    \n",
+    "\n",
     "    print(f\"\\n{'='*70}\")\n",
     "    print(f\"Column: {col_name}\")\n",
     "    print(f\"Type: {col_info.inferred_type.value} (Confidence: {col_info.confidence:.0%})\")\n",
-    "    print(f\"-\" * 70)\n",
-    "    \n",
+    "    print(\"-\" * 70)\n",
+    "\n",
     "    if analysis:\n",
-    "        print(f\"\\n📊 Distribution Metrics:\")\n",
+    "        print(\"\\n📊 Distribution Metrics:\")\n",
     "        print(f\"   Categories: {analysis.category_count}\")\n",
     "        print(f\"   Imbalance Ratio: {analysis.imbalance_ratio:.1f}x (largest/smallest)\")\n",
     "        print(f\"   Entropy: {analysis.entropy:.2f} ({analysis.normalized_entropy*100:.0f}% of max)\")\n",
     "        print(f\"   Top-1 Concentration: {analysis.top1_concentration:.1f}%\")\n",
     "        print(f\"   Top-3 Concentration: {analysis.top3_concentration:.1f}%\")\n",
     "        print(f\"   Rare Categories (<1%): {analysis.rare_category_count}\")\n",
-    "        \n",
+    "\n",
     "        # Interpretation\n",
-    "        print(f\"\\n📈 Interpretation:\")\n",
+    "        print(\"\\n📈 Interpretation:\")\n",
     "        if analysis.has_low_diversity:\n",
-    "            print(f\"   ⚠️ LOW DIVERSITY: Distribution dominated by few categories\")\n",
+    "            print(\"   ⚠️ LOW DIVERSITY: Distribution dominated by few categories\")\n",
     "        elif analysis.normalized_entropy > 0.9:\n",
-    "            print(f\"   ✓ HIGH DIVERSITY: Categories are relatively balanced\")\n",
+    "            print(\"   ✓ HIGH DIVERSITY: Categories are relatively balanced\")\n",
     "        else:\n",
-    "            print(f\"   ✓ MODERATE DIVERSITY: Some category dominance but acceptable\")\n",
-    "        \n",
+    "            print(\"   ✓ MODERATE DIVERSITY: Some category dominance but acceptable\")\n",
+    "\n",
     "        if analysis.imbalance_ratio > 100:\n",
-    "            print(f\"   🔴 SEVERE IMBALANCE: Rarest category has very few samples\")\n",
+    "            print(\"   🔴 SEVERE IMBALANCE: Rarest category has very few samples\")\n",
     "        elif analysis.is_imbalanced:\n",
-    "            print(f\"   🟡 MODERATE IMBALANCE: Consider grouping rare categories\")\n",
-    "        \n",
+    "            print(\"   🟡 MODERATE IMBALANCE: Consider grouping rare categories\")\n",
+    "\n",
     "        # Recommendations\n",
     "        if rec:\n",
-    "            print(f\"\\n🔧 Recommendations:\")\n",
+    "            print(\"\\n🔧 Recommendations:\")\n",
     "            print(f\"   Encoding: {rec.encoding_type.value}\")\n",
     "            print(f\"   Reason: {rec.reason}\")\n",
     "            print(f\"   Priority: {rec.priority}\")\n",
-    "            \n",
+    "\n",
     "            if rec.preprocessing_steps:\n",
-    "                print(f\"   Preprocessing:\")\n",
+    "                print(\"   Preprocessing:\")\n",
     "                for step in rec.preprocessing_steps:\n",
     "                    print(f\"      • {step}\")\n",
-    "            \n",
+    "\n",
     "            if rec.warnings:\n",
     "                for warn in rec.warnings:\n",
     "                    print(f\"   ⚠️ {warn}\")\n",
-    "    \n",
+    "\n",
     "    # Visualization\n",
     "    value_counts = df[col_name].value_counts()\n",
     "    subtitle = f\"Entropy: {analysis.normalized_entropy*100:.0f}% | Imbalance: {analysis.imbalance_ratio:.1f}x | Rare: {analysis.rare_category_count}\" if analysis else \"\"\n",
     "    fig = charts.bar_chart(\n",
-    "        value_counts.head(10).index.tolist(), \n",
+    "        value_counts.head(10).index.tolist(),\n",
     "        value_counts.head(10).values.tolist(),\n",
     "        title=f\"Top Categories: {col_name}<br><sub>{subtitle}</sub>\"\n",
     "    )\n",
@@ -808,7 +811,7 @@
     "            \"Rare (<1%)\": analysis.rare_category_count,\n",
     "            \"Encoding\": rec.encoding_type.value if rec else \"N/A\"\n",
     "        })\n",
-    "        \n",
+    "\n",
     "        # Add encoding recommendation to Gold layer\n",
     "        if rec and registry.gold:\n",
     "            registry.add_gold_encoding(\n",
@@ -817,9 +820,9 @@
     "                rationale=rec.reason,\n",
     "                source_notebook=\"02_column_deep_dive\"\n",
     "            )\n",
-    "    \n",
+    "\n",
     "    display_table(pd.DataFrame(summary_data))\n",
-    "    \n",
+    "\n",
     "    if registry.gold:\n",
     "        print(f\"\\n✅ Added {len(cat_recommendations)} encoding recommendations to Gold layer\")"
    ]
@@ -900,76 +903,76 @@
     "    print(f\"Column: {col_name}\")\n",
     "    print(f\"Type: {col_info.inferred_type.value} (Confidence: {col_info.confidence:.0%})\")\n",
     "    print(f\"{'='*70}\")\n",
-    "    \n",
+    "\n",
     "    date_series = pd.to_datetime(df[col_name], errors='coerce', format='mixed')\n",
     "    valid_dates = date_series.dropna()\n",
-    "    \n",
+    "\n",
     "    print(f\"\\n📅 Date Range: {valid_dates.min()} to {valid_dates.max()}\")\n",
     "    print(f\"   Nulls: {date_series.isna().sum():,} ({date_series.isna().mean()*100:.1f}%)\")\n",
-    "    \n",
+    "\n",
     "    # Basic temporal analysis\n",
     "    analysis = temporal_analyzer.analyze(date_series)\n",
     "    print(f\"   Auto-detected granularity: {analysis.granularity.value}\")\n",
     "    print(f\"   Span: {analysis.span_days:,} days ({analysis.span_days/365:.1f} years)\")\n",
-    "    \n",
+    "\n",
     "    # Growth analysis\n",
     "    growth = temporal_analyzer.calculate_growth_rate(date_series)\n",
     "    if growth.get(\"has_data\"):\n",
-    "        print(f\"\\n📈 Growth Analysis:\")\n",
+    "        print(\"\\n📈 Growth Analysis:\")\n",
     "        print(f\"   Trend: {growth['trend_direction'].upper()}\")\n",
     "        print(f\"   Overall growth: {growth['overall_growth_pct']:+.1f}%\")\n",
     "        print(f\"   Avg monthly growth: {growth['avg_monthly_growth']:+.1f}%\")\n",
-    "    \n",
+    "\n",
     "    # Seasonality analysis\n",
     "    seasonality = temporal_analyzer.analyze_seasonality(date_series)\n",
     "    if seasonality.has_seasonality:\n",
-    "        print(f\"\\n🔄 Seasonality Detected:\")\n",
+    "        print(\"\\n🔄 Seasonality Detected:\")\n",
     "        print(f\"   Peak months: {', '.join(seasonality.peak_periods[:3])}\")\n",
     "        print(f\"   Trough months: {', '.join(seasonality.trough_periods[:3])}\")\n",
     "        print(f\"   Seasonal strength: {seasonality.seasonal_strength:.2f}\")\n",
-    "    \n",
+    "\n",
     "    # Get recommendations using framework\n",
     "    other_dates = [c for c in datetime_cols if c != col_name]\n",
     "    recommendations = temporal_analyzer.recommend_features(date_series, col_name, other_date_columns=other_dates)\n",
-    "    \n",
+    "\n",
     "    # Group by recommendation type\n",
     "    col_feature_recs = [r for r in recommendations if r.recommendation_type == TemporalRecommendationType.FEATURE_ENGINEERING]\n",
     "    col_modeling_recs = [r for r in recommendations if r.recommendation_type == TemporalRecommendationType.MODELING_STRATEGY]\n",
     "    col_quality_recs = [r for r in recommendations if r.recommendation_type == TemporalRecommendationType.DATA_QUALITY]\n",
-    "    \n",
+    "\n",
     "    feature_engineering_recs.extend(col_feature_recs)\n",
     "    modeling_strategy_recs.extend(col_modeling_recs)\n",
     "    data_quality_recs.extend(col_quality_recs)\n",
-    "    \n",
+    "\n",
     "    # Display recommendations grouped by type\n",
     "    if col_feature_recs:\n",
-    "        print(f\"\\n🛠️ FEATURES TO CREATE:\")\n",
+    "        print(\"\\n🛠️ FEATURES TO CREATE:\")\n",
     "        for rec in col_feature_recs:\n",
     "            priority_icon = \"🔴\" if rec.priority == \"high\" else \"🟡\" if rec.priority == \"medium\" else \"✓\"\n",
     "            print(f\"   {priority_icon} {rec.feature_name} ({rec.category})\")\n",
     "            print(f\"      Why: {rec.reason}\")\n",
     "            if rec.code_hint:\n",
     "                print(f\"      Code: {rec.code_hint}\")\n",
-    "    \n",
+    "\n",
     "    if col_modeling_recs:\n",
-    "        print(f\"\\n⚙️ MODELING CONSIDERATIONS:\")\n",
+    "        print(\"\\n⚙️ MODELING CONSIDERATIONS:\")\n",
     "        for rec in col_modeling_recs:\n",
     "            priority_icon = \"🔴\" if rec.priority == \"high\" else \"🟡\" if rec.priority == \"medium\" else \"✓\"\n",
     "            print(f\"   {priority_icon} {rec.feature_name}\")\n",
     "            print(f\"      Why: {rec.reason}\")\n",
-    "    \n",
+    "\n",
     "    if col_quality_recs:\n",
-    "        print(f\"\\n⚠️ DATA QUALITY ISSUES:\")\n",
+    "        print(\"\\n⚠️ DATA QUALITY ISSUES:\")\n",
     "        for rec in col_quality_recs:\n",
     "            priority_icon = \"🔴\" if rec.priority == \"high\" else \"🟡\" if rec.priority == \"medium\" else \"✓\"\n",
     "            print(f\"   {priority_icon} {rec.feature_name}\")\n",
     "            print(f\"      Why: {rec.reason}\")\n",
     "            if rec.code_hint:\n",
     "                print(f\"      Code: {rec.code_hint}\")\n",
-    "    \n",
+    "\n",
     "    # Standard extractions always available\n",
-    "    print(f\"\\n   Standard extractions available: year, month, day, day_of_week, quarter\")\n",
-    "    \n",
+    "    print(\"\\n   Standard extractions available: year, month, day, day_of_week, quarter\")\n",
+    "\n",
     "    # Store summary\n",
     "    datetime_summaries.append({\n",
     "        \"Column\": col_name,\n",
@@ -980,31 +983,31 @@
     "        \"Modeling Notes\": len(col_modeling_recs),\n",
     "        \"Quality Issues\": len(col_quality_recs)\n",
     "    })\n",
-    "    \n",
+    "\n",
     "    # === VISUALIZATIONS ===\n",
-    "    \n",
+    "\n",
     "    if growth.get(\"has_data\"):\n",
     "        fig = charts.growth_summary_indicators(growth, title=f\"Growth Summary: {col_name}\")\n",
     "        display_figure(fig)\n",
-    "    \n",
+    "\n",
     "    chart_type = \"line\" if analysis.granularity in [TemporalGranularity.DAY, TemporalGranularity.WEEK] else \"bar\"\n",
     "    fig = charts.temporal_distribution(analysis, title=f\"Records Over Time: {col_name}\", chart_type=chart_type)\n",
     "    display_figure(fig)\n",
-    "    \n",
+    "\n",
     "    fig = charts.temporal_trend(analysis, title=f\"Trend Analysis: {col_name}\")\n",
     "    display_figure(fig)\n",
-    "    \n",
+    "\n",
     "    yoy_data = temporal_analyzer.year_over_year_comparison(date_series)\n",
     "    if len(yoy_data) > 1:\n",
     "        fig = charts.year_over_year_lines(yoy_data, title=f\"Year-over-Year: {col_name}\")\n",
     "        display_figure(fig)\n",
     "        fig = charts.year_month_heatmap(yoy_data, title=f\"Records Heatmap: {col_name}\")\n",
     "        display_figure(fig)\n",
-    "    \n",
+    "\n",
     "    if growth.get(\"has_data\"):\n",
     "        fig = charts.cumulative_growth_chart(growth[\"cumulative\"], title=f\"Cumulative Records: {col_name}\")\n",
     "        display_figure(fig)\n",
-    "    \n",
+    "\n",
     "    fig = charts.temporal_heatmap(date_series, title=f\"Day of Week Distribution: {col_name}\")\n",
     "    display_figure(fig)\n",
     "\n",
@@ -1014,32 +1017,32 @@
     "    print(\"DATETIME COLUMNS SUMMARY\")\n",
     "    print(\"=\" * 70)\n",
     "    display_table(pd.DataFrame(datetime_summaries))\n",
-    "    \n",
+    "\n",
     "    # Summary by recommendation type\n",
     "    print(\"\\n📋 ALL RECOMMENDATIONS BY TYPE:\")\n",
-    "    \n",
+    "\n",
     "    if feature_engineering_recs:\n",
     "        print(f\"\\n🛠️ FEATURES TO CREATE ({len(feature_engineering_recs)}):\")\n",
     "        for i, rec in enumerate(feature_engineering_recs, 1):\n",
     "            priority_icon = \"🔴\" if rec.priority == \"high\" else \"🟡\" if rec.priority == \"medium\" else \"✓\"\n",
     "            print(f\"   {i}. {priority_icon} {rec.feature_name}\")\n",
-    "    \n",
+    "\n",
     "    if modeling_strategy_recs:\n",
     "        print(f\"\\n⚙️ MODELING CONSIDERATIONS ({len(modeling_strategy_recs)}):\")\n",
     "        for i, rec in enumerate(modeling_strategy_recs, 1):\n",
     "            priority_icon = \"🔴\" if rec.priority == \"high\" else \"🟡\" if rec.priority == \"medium\" else \"✓\"\n",
     "            print(f\"   {i}. {priority_icon} {rec.feature_name}: {rec.reason}\")\n",
-    "    \n",
+    "\n",
     "    if data_quality_recs:\n",
     "        print(f\"\\n⚠️ DATA QUALITY TO ADDRESS ({len(data_quality_recs)}):\")\n",
     "        for i, rec in enumerate(data_quality_recs, 1):\n",
     "            priority_icon = \"🔴\" if rec.priority == \"high\" else \"🟡\" if rec.priority == \"medium\" else \"✓\"\n",
     "            print(f\"   {i}. {priority_icon} {rec.feature_name}: {rec.reason}\")\n",
-    "    \n",
+    "\n",
     "    # Add recommendations to registry\n",
     "    added_derived = 0\n",
     "    added_modeling = 0\n",
-    "    \n",
+    "\n",
     "    # Add feature engineering recommendations to Silver layer (derived columns)\n",
     "    if registry.silver:\n",
     "        for rec in feature_engineering_recs:\n",
@@ -1051,7 +1054,7 @@
     "                source_notebook=\"02_column_deep_dive\"\n",
     "            )\n",
     "            added_derived += 1\n",
-    "    \n",
+    "\n",
     "    # Add modeling strategy recommendations to Bronze layer\n",
     "    seen_strategies = set()\n",
     "    for rec in modeling_strategy_recs:\n",
@@ -1065,7 +1068,7 @@
     "            )\n",
     "            seen_strategies.add(rec.feature_name)\n",
     "            added_modeling += 1\n",
-    "    \n",
+    "\n",
     "    print(f\"\\n✅ Added {added_derived} derived column recommendations to Silver layer\")\n",
     "    print(f\"✅ Added {added_modeling} modeling strategy recommendations to Bronze layer\")"
    ]
@@ -1217,14 +1220,14 @@
     "    max_segments=5\n",
     ")\n",
     "\n",
-    "print(f\"\\n🎯 Analysis Results:\")\n",
+    "print(\"\\n🎯 Analysis Results:\")\n",
     "print(f\"   Method: {segmentation.method.value}\")\n",
     "print(f\"   Detected Segments: {segmentation.n_segments}\")\n",
     "print(f\"   Cluster Quality Score: {segmentation.quality_score:.2f}\")\n",
     "if segmentation.target_variance_ratio is not None:\n",
     "    print(f\"   Target Variance Ratio: {segmentation.target_variance_ratio:.2f}\")\n",
     "\n",
-    "print(f\"\\n📊 Segment Profiles:\")\n",
+    "print(\"\\n📊 Segment Profiles:\")\n",
     "for profile in segmentation.profiles:\n",
     "    target_info = f\" | Target Rate: {profile.target_rate*100:.1f}%\" if profile.target_rate is not None else \"\"\n",
     "    print(f\"   Segment {profile.segment_id}: {profile.size:,} records ({profile.size_pct:.1f}%){target_info}\")\n",
@@ -1242,7 +1245,7 @@
     "    fig = charts.segment_feature_comparison(segmentation, title=\"Feature Comparison Across Segments\")\n",
     "    display_figure(fig)\n",
     "\n",
-    "print(f\"\\n📝 Rationale:\")\n",
+    "print(\"\\n📝 Rationale:\")\n",
     "for reason in segmentation.rationale:\n",
     "    print(f\"   • {reason}\")"
    ]
@@ -1297,7 +1300,7 @@
     "\n",
     "# Summary of recommendations\n",
     "all_recs = registry.all_recommendations\n",
-    "print(f\"\\n📋 Recommendations Summary:\")\n",
+    "print(\"\\n📋 Recommendations Summary:\")\n",
     "print(f\"   Bronze layer: {len(registry.get_by_layer('bronze'))} recommendations\")\n",
     "print(f\"   Silver layer: {len(registry.get_by_layer('silver'))} recommendations\")\n",
     "print(f\"   Gold layer: {len(registry.get_by_layer('gold'))} recommendations\")\n",

churnkit 0.76.1a1__py3-none-any.whl → 0.76.1a2__py3-none-any.whl

churnkit 0.76.1a1py3-none-any.whl → 0.76.1a2py3-none-any.whl