PyPI - churnkit - Versions diffs - 0.76.1a1__py3-none-any.whl → 0.76.1a2__py3-none-any.whl - Mend

churnkit 0.76.1a1py3-none-any.whl → 0.76.1a2py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (30) hide show

{churnkit-0.76.1a1.data → churnkit-0.76.1a2.data}/data/share/churnkit/exploration_notebooks/10_spec_generation.ipynb RENAMED Viewed

@@ -70,10 +70,12 @@
    "outputs": [],
    "source": [
     "from customer_retention.analysis.notebook_progress import track_and_export_previous\n",
+    "\n",
     "track_and_export_previous(\"10_spec_generation.ipynb\")\n",
     "\n",
-    "from pathlib import Path\n",
     "from enum import Enum\n",
+    "from pathlib import Path\n",
+    "\n",
     "\n",
     "class GenerationTarget(Enum):\n",
     "    LOCAL_FEAST_MLFLOW = \"local\"\n",
@@ -99,8 +101,7 @@
     "\n",
     "print(f\"Pipeline: {PIPELINE_NAME}\")\n",
     "print(f\"Target: {GENERATION_TARGET.value}\")\n",
-    "print(f\"Format: {OUTPUT_FORMAT.value}\")\n",
-    "from customer_retention.stages.temporal import TEMPORAL_METADATA_COLS"
+    "print(f\"Format: {OUTPUT_FORMAT.value}\")"
    ]
   },
   {
@@ -143,9 +144,11 @@
    "outputs": [],
    "source": [
     "import yaml\n",
+    "\n",
     "from customer_retention.analysis.auto_explorer import ExplorationFindings\n",
     "from customer_retention.analysis.auto_explorer.layered_recommendations import RecommendationRegistry\n",
-    "from customer_retention.core.config.experiments import FINDINGS_DIR, EXPERIMENTS_DIR, OUTPUT_DIR, setup_experiments_structure\n",
+    "from customer_retention.core.config.experiments import EXPERIMENTS_DIR, FINDINGS_DIR\n",
+    "\n",
     "\n",
     "def load_findings_and_recommendations(findings_dir: Path):\n",
     "    findings_files = sorted(\n",
@@ -154,37 +157,37 @@
     "    )\n",
     "    if not findings_files:\n",
     "        raise FileNotFoundError(f\"No findings in {findings_dir}. Run exploration notebooks first.\")\n",
-    "    \n",
+    "\n",
     "    findings = ExplorationFindings.load(str(findings_files[0]))\n",
-    "    \n",
+    "\n",
     "    # Look for recommendations file matching the findings file pattern\n",
     "    # Step 06 saves as: {name}_recommendations.yaml (matching {name}_findings.yaml)\n",
     "    findings_name = findings_files[0].stem.replace(\"_findings\", \"\")\n",
     "    recommendations_path = findings_dir / f\"{findings_name}_recommendations.yaml\"\n",
-    "    \n",
+    "\n",
     "    # Fallback to generic recommendations.yaml if not found\n",
     "    if not recommendations_path.exists():\n",
     "        recommendations_path = findings_dir / \"recommendations.yaml\"\n",
-    "    \n",
+    "\n",
     "    # Final fallback: find any *_recommendations.yaml\n",
     "    if not recommendations_path.exists():\n",
-    "        rec_files = sorted(findings_dir.glob(\"*_recommendations.yaml\"), \n",
+    "        rec_files = sorted(findings_dir.glob(\"*_recommendations.yaml\"),\n",
     "                          key=lambda f: f.stat().st_mtime, reverse=True)\n",
     "        if rec_files:\n",
     "            recommendations_path = rec_files[0]\n",
-    "    \n",
+    "\n",
     "    registry = None\n",
     "    if recommendations_path.exists():\n",
     "        with open(recommendations_path) as f:\n",
     "            registry = RecommendationRegistry.from_dict(yaml.safe_load(f))\n",
     "        print(f\"Loaded recommendations from: {recommendations_path.name}\")\n",
-    "    \n",
+    "\n",
     "    multi_dataset_path = findings_dir / \"multi_dataset_findings.yaml\"\n",
     "    multi_dataset = None\n",
     "    if multi_dataset_path.exists():\n",
     "        with open(multi_dataset_path) as f:\n",
     "            multi_dataset = yaml.safe_load(f)\n",
-    "    \n",
+    "\n",
     "    return findings, registry, multi_dataset\n",
     "\n",
     "findings, registry, multi_dataset = load_findings_and_recommendations(FINDINGS_DIR)\n",
@@ -244,7 +247,7 @@
     "    if not registry:\n",
     "        print(\"No recommendations loaded. Run notebooks 02-07 first.\")\n",
     "        return\n",
-    "    \n",
+    "\n",
     "    for layer in [\"bronze\", \"silver\", \"gold\"]:\n",
     "        recs = registry.get_by_layer(layer)\n",
     "        print(f\"\\n{layer.upper()} ({len(recs)} recommendations):\")\n",
@@ -300,7 +303,6 @@
    },
    "outputs": [],
    "source": [
-    "import os\n",
     "\n",
     "output_dir = OUTPUT_BASE_DIR / GENERATION_TARGET.value / PIPELINE_NAME\n",
     "output_dir.mkdir(parents=True, exist_ok=True)\n",
@@ -348,24 +350,24 @@
    "outputs": [],
    "source": [
     "if GENERATION_TARGET == GenerationTarget.LOCAL_FEAST_MLFLOW:\n",
-    "    from customer_retention.generators.spec_generator import MLflowPipelineGenerator, MLflowConfig\n",
     "    from customer_retention.generators.pipeline_generator import PipelineGenerator\n",
-    "    \n",
+    "    from customer_retention.generators.spec_generator import MLflowConfig, MLflowPipelineGenerator\n",
+    "\n",
     "    mlflow_config = MLflowConfig(\n",
     "        tracking_uri=\"./mlruns\",\n",
     "        experiment_name=PIPELINE_NAME,\n",
     "        log_data_quality=True,\n",
     "        nested_runs=True\n",
     "    )\n",
-    "    \n",
+    "\n",
     "    mlflow_gen = MLflowPipelineGenerator(mlflow_config=mlflow_config, output_dir=str(output_dir))\n",
-    "    \n",
+    "\n",
     "    if OUTPUT_FORMAT == OutputFormat.PYTHON:\n",
     "        saved = mlflow_gen.save_all(findings)\n",
     "        print(\"Generated MLflow pipeline files:\")\n",
     "        for f in saved:\n",
     "            print(f\"  {f}\")\n",
-    "    \n",
+    "\n",
     "    pipeline_gen = PipelineGenerator(\n",
     "        findings_dir=str(FINDINGS_DIR),\n",
     "        output_dir=str(output_dir),\n",
@@ -421,7 +423,7 @@
    "source": [
     "if GENERATION_TARGET == GenerationTarget.DATABRICKS:\n",
     "    from customer_retention.generators.spec_generator import DatabricksSpecGenerator, PipelineSpec, SourceSpec\n",
-    "    \n",
+    "\n",
     "    spec = PipelineSpec(\n",
     "        name=PIPELINE_NAME,\n",
     "        version=\"1.0.0\",\n",
@@ -431,7 +433,7 @@
     "            format=findings.source_format\n",
     "        )]\n",
     "    )\n",
-    "    \n",
+    "\n",
     "    if findings.target_column:\n",
     "        from customer_retention.generators.spec_generator import ModelSpec\n",
     "        spec.model_config = ModelSpec(\n",
@@ -439,13 +441,13 @@
     "            model_type=\"gradient_boosting\",\n",
     "            target_column=findings.target_column\n",
     "        )\n",
-    "    \n",
+    "\n",
     "    db_gen = DatabricksSpecGenerator(\n",
     "        catalog=DATABRICKS_CATALOG,\n",
     "        schema=DATABRICKS_SCHEMA,\n",
     "        output_dir=str(output_dir)\n",
     "    )\n",
-    "    \n",
+    "\n",
     "    saved = db_gen.save_all(spec)\n",
     "    print(\"Generated Databricks artifacts:\")\n",
     "    for f in saved:\n",
@@ -495,15 +497,15 @@
    "source": [
     "if GENERATION_TARGET == GenerationTarget.LLM_DOCS:\n",
     "    from customer_retention.analysis.auto_explorer import RecommendationEngine\n",
-    "    \n",
+    "\n",
     "    recommender = RecommendationEngine()\n",
     "    target_rec = recommender.recommend_target(findings)\n",
     "    feature_recs = recommender.recommend_features(findings)\n",
     "    cleaning_recs = recommender.recommend_cleaning(findings)\n",
-    "    \n",
+    "\n",
     "    docs_dir = output_dir / \"docs\"\n",
     "    docs_dir.mkdir(parents=True, exist_ok=True)\n",
-    "    \n",
+    "\n",
     "    # 1. Overview\n",
     "    overview = f\"\"\"# {PIPELINE_NAME} Pipeline Overview\n",
     "\n",
@@ -526,7 +528,7 @@
     "    for name, col in list(findings.columns.items())[:20]:\n",
     "        overview += f\"| {name} | {col.inferred_type.value} | {col.null_percentage:.1f}% | {col.unique_count} |\\n\"\n",
     "    (docs_dir / \"01_overview.md\").write_text(overview)\n",
-    "    \n",
+    "\n",
     "    # 2. Bronze layer - separate file per source\n",
     "    if registry and registry.sources:\n",
     "        for source_name, bronze_recs in registry.sources.items():\n",
@@ -539,38 +541,38 @@
     "\"\"\"\n",
     "            for rec in bronze_recs.null_handling:\n",
     "                bronze_doc += f\"- `{rec.target_column}`: {rec.action} ({rec.parameters.get('strategy', '')}) - {rec.rationale}\\n\"\n",
-    "            \n",
+    "\n",
     "            bronze_doc += \"\\n## Outlier Handling\\n\"\n",
     "            for rec in bronze_recs.outlier_handling:\n",
     "                bronze_doc += f\"- `{rec.target_column}`: {rec.action} - {rec.rationale}\\n\"\n",
-    "            \n",
+    "\n",
     "            bronze_doc += \"\\n## Type Conversions\\n\"\n",
     "            for rec in bronze_recs.type_conversions:\n",
     "                bronze_doc += f\"- `{rec.target_column}`: {rec.action} - {rec.rationale}\\n\"\n",
-    "            \n",
+    "\n",
     "            bronze_doc += \"\\n## Deduplication\\n\"\n",
     "            for rec in bronze_recs.deduplication:\n",
     "                bronze_doc += f\"- `{rec.target_column}`: {rec.action} - {rec.rationale}\\n\"\n",
-    "            \n",
+    "\n",
     "            bronze_doc += \"\\n## Filtering\\n\"\n",
     "            for rec in bronze_recs.filtering:\n",
     "                bronze_doc += f\"- `{rec.target_column}`: {rec.action} - {rec.rationale}\\n\"\n",
-    "            \n",
+    "\n",
     "            bronze_doc += \"\\n## Text Processing\\n\"\n",
     "            for rec in bronze_recs.text_processing:\n",
     "                bronze_doc += f\"- `{rec.target_column}`: {rec.action} - {rec.rationale}\\n\"\n",
-    "            \n",
+    "\n",
     "            safe_name = source_name.replace(\" \", \"_\").lower()\n",
     "            (docs_dir / f\"02_bronze_cleaning_{safe_name}.md\").write_text(bronze_doc)\n",
     "    else:\n",
-    "        bronze_doc = f\"\"\"# Bronze Layer - Data Cleaning\n",
+    "        bronze_doc = \"\"\"# Bronze Layer - Data Cleaning\n",
     "\n",
     "## Cleaning Recommendations\n",
     "\"\"\"\n",
     "        for rec in cleaning_recs:\n",
     "            bronze_doc += f\"\\n### {rec.column_name}\\n- **Strategy**: {rec.strategy}\\n- **Severity**: {rec.severity}\\n- **Rationale**: {rec.rationale}\\n\"\n",
     "        (docs_dir / \"02_bronze_cleaning.md\").write_text(bronze_doc)\n",
-    "    \n",
+    "\n",
     "    # 3. Silver layer\n",
     "    silver_doc = \"\"\"# Silver Layer - Feature Engineering\n",
     "\n",
@@ -580,18 +582,18 @@
     "        silver_doc += \"\\n### Joins\\n\"\n",
     "        for rec in registry.silver.joins:\n",
     "            silver_doc += f\"- {rec.parameters.get('left_source', '')} ⟷ {rec.parameters.get('right_source', '')} on `{rec.parameters.get('join_keys', [])}`\\n\"\n",
-    "        \n",
+    "\n",
     "        silver_doc += \"\\n### Aggregations\\n\"\n",
     "        for rec in registry.silver.aggregations:\n",
     "            silver_doc += f\"- `{rec.target_column}`: {rec.action} - windows: {rec.parameters.get('windows', [])}\\n\"\n",
-    "        \n",
+    "\n",
     "        silver_doc += \"\\n### Derived Columns\\n\"\n",
     "        for rec in registry.silver.derived_columns:\n",
     "            silver_doc += f\"- `{rec.target_column}`: {rec.parameters.get('expression', rec.action)}\\n\"\n",
     "    else:\n",
     "        silver_doc += \"\\nNo silver-layer recommendations found.\\n\"\n",
     "    (docs_dir / \"03_silver_features.md\").write_text(silver_doc)\n",
-    "    \n",
+    "\n",
     "    # 4. Gold layer\n",
     "    gold_doc = \"\"\"# Gold Layer - ML Features\n",
     "\n",
@@ -599,25 +601,25 @@
     "\"\"\"\n",
     "    for rec in feature_recs[:15]:\n",
     "        gold_doc += f\"\\n### {rec.feature_name}\\n- **Source**: {rec.source_column}\\n- **Type**: {rec.feature_type}\\n- **Description**: {rec.description}\\n\"\n",
-    "    \n",
+    "\n",
     "    if registry and registry.gold:\n",
     "        gold_doc += \"\\n## Encoding\\n\"\n",
     "        for rec in registry.gold.encoding:\n",
     "            gold_doc += f\"- `{rec.target_column}`: {rec.parameters.get('method', rec.action)}\\n\"\n",
-    "        \n",
+    "\n",
     "        gold_doc += \"\\n## Scaling\\n\"\n",
     "        for rec in registry.gold.scaling:\n",
     "            gold_doc += f\"- `{rec.target_column}`: {rec.parameters.get('method', rec.action)}\\n\"\n",
-    "        \n",
+    "\n",
     "        gold_doc += \"\\n## Feature Selection\\n\"\n",
     "        for rec in registry.gold.feature_selection:\n",
     "            gold_doc += f\"- `{rec.target_column}`: {rec.action} - {rec.rationale}\\n\"\n",
-    "        \n",
+    "\n",
     "        gold_doc += \"\\n## Transformations\\n\"\n",
     "        for rec in registry.gold.transformations:\n",
     "            gold_doc += f\"- `{rec.target_column}`: {rec.action} - {rec.parameters}\\n\"\n",
     "    (docs_dir / \"04_gold_ml_features.md\").write_text(gold_doc)\n",
-    "    \n",
+    "\n",
     "    # 5. Training\n",
     "    training_doc = f\"\"\"# Model Training\n",
     "\n",
@@ -636,7 +638,7 @@
     "- F1 Score\n",
     "\"\"\"\n",
     "    (docs_dir / \"05_training.md\").write_text(training_doc)\n",
-    "    \n",
+    "\n",
     "    print(\"Generated LLM documentation:\")\n",
     "    for f in sorted(docs_dir.glob(\"*.md\")):\n",
     "        print(f\"  {f.name}\")\n",
@@ -687,11 +689,12 @@
    "source": [
     "import json\n",
     "\n",
+    "\n",
     "def py_to_notebook(py_path: Path):\n",
     "    content = py_path.read_text()\n",
     "    cells = []\n",
     "    current_lines = []\n",
-    "    \n",
+    "\n",
     "    for line in content.split(\"\\n\"):\n",
     "        if line.startswith(\"# %% \") or line.startswith(\"# %%\\n\"):\n",
     "            if current_lines:\n",
@@ -702,16 +705,16 @@
     "                cells.append({\"cell_type\": \"markdown\", \"metadata\": {}, \"source\": [f\"## {title}\"]})\n",
     "        else:\n",
     "            current_lines.append(line + \"\\n\")\n",
-    "    \n",
+    "\n",
     "    if current_lines:\n",
     "        cells.append({\"cell_type\": \"code\", \"metadata\": {}, \"source\": current_lines, \"outputs\": [], \"execution_count\": None})\n",
-    "    \n",
+    "\n",
     "    notebook = {\n",
     "        \"cells\": cells,\n",
     "        \"metadata\": {\"kernelspec\": {\"display_name\": \"Python 3\", \"language\": \"python\", \"name\": \"python3\"}},\n",
     "        \"nbformat\": 4, \"nbformat_minor\": 4\n",
     "    }\n",
-    "    \n",
+    "\n",
     "    out_path = py_path.with_suffix(\".ipynb\")\n",
     "    out_path.write_text(json.dumps(notebook, indent=1))\n",
     "    return out_path\n",
@@ -783,12 +786,12 @@
     "        print(\"Pipeline will run Bronze → Silver → Gold → Training...\")\n",
     "        subprocess.run([\"python\", \"pipeline_runner.py\"], cwd=str(output_dir.resolve()))\n",
     "    else:\n",
-    "        print(f\"pipeline_runner.py not found. Generate first by running cells above.\")\n",
+    "        print(\"pipeline_runner.py not found. Generate first by running cells above.\")\n",
     "else:\n",
     "    print(\"To run the complete pipeline:\")\n",
     "    print(f\"\\n  cd {output_dir}\")\n",
-    "    print(f\"  python pipeline_runner.py\")\n",
-    "    print(f\"\\nThis will:\")\n",
+    "    print(\"  python pipeline_runner.py\")\n",
+    "    print(\"\\nThis will:\")\n",
     "    print(\"  1. Run Landing layers (event sources)\")\n",
     "    print(\"  2. Run Bronze layers (parallel)\")\n",
     "    print(\"  3. Run Silver merge\")\n",
@@ -916,7 +919,7 @@
     "    print(f\"  - Scalings: {len(registry.gold.scaling) if registry.gold else 0}\")\n",
     "    print(f\"  - Transformations: {len(registry.gold.transformations) if registry.gold else 0}\")\n",
     "    print(f\"  - Feature selections: {len(registry.gold.feature_selection) if registry.gold else 0}\")\n",
-    "    \n",
+    "\n",
     "    # Show what's in each layer for debugging\n",
     "    print()\n",
     "    print(\"Recommendations by layer:\")\n",
@@ -928,13 +931,13 @@
     "                print(f\"    - [{rec.category}] {rec.target_column}: {rec.action}\")\n",
     "            if len(recs) > 3:\n",
     "                print(f\"    ... and {len(recs) - 3} more\")\n",
-    "    \n",
+    "\n",
     "    # Check if gold layer exists but is empty\n",
     "    if registry.gold:\n",
     "        print(f\"\\n✓ Gold layer initialized (target: {registry.gold.target_column})\")\n",
     "    else:\n",
     "        print(\"\\n⚠ Gold layer not initialized - run step 06 first\")\n",
-    "    \n",
+    "\n",
     "    print()\n",
     "    print(\"Use this hash to:\")\n",
     "    print(\"  - Track MLflow experiments (tag: recommendations_hash)\")\n",
@@ -990,6 +993,7 @@
    "source": [
     "# Inspect Feast Feature Store contents\n",
     "import warnings\n",
+    "\n",
     "warnings.filterwarnings(\"ignore\", category=DeprecationWarning, module=\"feast\")\n",
     "\n",
     "feast_repo_path = output_dir / \"feature_repo\"\n",
@@ -998,15 +1002,15 @@
     "    try:\n",
     "        from feast import FeatureStore\n",
     "        store = FeatureStore(repo_path=str(feast_repo_path))\n",
-    "        \n",
+    "\n",
     "        print(\"Feast Feature Store Contents\")\n",
     "        print(\"=\" * 60)\n",
-    "        \n",
+    "\n",
     "        # List entities\n",
     "        entities = store.list_entities()\n",
     "        feature_views = store.list_feature_views()\n",
     "        data_sources = store.list_data_sources()\n",
-    "        \n",
+    "\n",
     "        # Check if registry is empty (feast apply not run yet)\n",
     "        if not entities and not feature_views:\n",
     "            print(\"\\n⚠️  Feature store registry is empty.\")\n",
@@ -1021,7 +1025,7 @@
     "            print(f\"\\n📦 Entities ({len(entities)}):\")\n",
     "            for entity in entities:\n",
     "                print(f\"   - {entity.name} (join_key: {entity.join_keys})\")\n",
-    "            \n",
+    "\n",
     "            print(f\"\\n📊 Feature Views ({len(feature_views)}):\")\n",
     "            for fv in feature_views:\n",
     "                print(f\"   - {fv.name}: {len(fv.features)} features\")\n",
@@ -1029,13 +1033,13 @@
     "                    print(f\"      • {feat.name} ({feat.dtype})\")\n",
     "                if len(fv.features) > 5:\n",
     "                    print(f\"      ... and {len(fv.features) - 5} more\")\n",
-    "            \n",
+    "\n",
     "            print(f\"\\n💾 Data Sources ({len(data_sources)}):\")\n",
     "            for ds in data_sources:\n",
     "                print(f\"   - {ds.name}\")\n",
-    "        \n",
+    "\n",
     "        # Try to show sample data from parquet files\n",
-    "        print(f\"\\n📄 Sample Feature Data:\")\n",
+    "        print(\"\\n📄 Sample Feature Data:\")\n",
     "        data_dir = feast_repo_path / \"data\"\n",
     "        if data_dir.exists():\n",
     "            parquet_files = list(data_dir.glob(\"*.parquet\"))\n",
@@ -1043,14 +1047,14 @@
     "                sample_df = pd.read_parquet(parquet_files[0])\n",
     "                print(f\"   Source: {parquet_files[0].name}\")\n",
     "                print(f\"   Shape: {sample_df.shape[0]:,} rows x {sample_df.shape[1]} columns\")\n",
-    "                print(f\"\\n   Head (first 5 rows):\")\n",
+    "                print(\"\\n   Head (first 5 rows):\")\n",
     "                display(sample_df.head())\n",
     "            else:\n",
     "                print(\"   No parquet files found yet in data/ directory.\")\n",
     "                print(\"   Features will be materialized when you run the pipeline.\")\n",
     "        else:\n",
     "            print(\"   Data directory not created yet.\")\n",
-    "            \n",
+    "\n",
     "    except ImportError:\n",
     "        print(\"Feast not installed. Install with: pip install feast\")\n",
     "    except Exception as e:\n",

{churnkit-0.76.1a1.data → churnkit-0.76.1a2.data}/data/share/churnkit/exploration_notebooks/11_scoring_validation.ipynb RENAMED Viewed

@@ -54,12 +54,11 @@
    "outputs": [],
    "source": [
     "from customer_retention.analysis.notebook_progress import track_and_export_previous\n",
+    "\n",
     "track_and_export_previous(\"11_scoring_validation.ipynb\")\n",
     "\n",
     "import sys\n",
-    "from pathlib import Path\n",
-    "\n",
-    "from customer_retention.core.config.experiments import EXPERIMENTS_DIR, FINDINGS_DIR"
+    "from pathlib import Path\n"
    ]
   },
   {
@@ -95,10 +94,19 @@
     "sys.path.insert(0, str(PIPELINE_DIR))\n",
     "\n",
     "from config import (\n",
-    "    PIPELINE_NAME, TARGET_COLUMN, RECOMMENDATIONS_HASH, MLFLOW_TRACKING_URI,\n",
-    "    FEAST_REPO_PATH, FEAST_FEATURE_VIEW, FEAST_ENTITY_KEY, FEAST_TIMESTAMP_COL,\n",
-    "    PRODUCTION_DIR, EXPERIMENTS_DIR as GEN_EXPERIMENTS_DIR,\n",
-    "    get_feast_data_path, get_gold_path, ARTIFACTS_PATH,\n",
+    "    ARTIFACTS_PATH,\n",
+    "    FEAST_ENTITY_KEY,\n",
+    "    FEAST_FEATURE_VIEW,\n",
+    "    FEAST_REPO_PATH,\n",
+    "    FEAST_TIMESTAMP_COL,\n",
+    "    MLFLOW_TRACKING_URI,\n",
+    "    PIPELINE_NAME,\n",
+    "    PRODUCTION_DIR,\n",
+    "    RECOMMENDATIONS_HASH,\n",
+    "    TARGET_COLUMN,\n",
+    ")\n",
+    "from config import (\n",
+    "    EXPERIMENTS_DIR as GEN_EXPERIMENTS_DIR,\n",
     ")\n",
     "\n",
     "print(f\"Pipeline: {PIPELINE_NAME}\")\n",
@@ -146,18 +154,15 @@
    },
    "outputs": [],
    "source": [
-    "import numpy as np\n",
-    "import pandas as pd\n",
     "import mlflow\n",
     "import mlflow.sklearn\n",
     "import mlflow.xgboost\n",
+    "import numpy as np\n",
+    "import pandas as pd\n",
     "import xgboost as xgb\n",
     "from feast import FeatureStore\n",
-    "from customer_retention.transforms import TransformExecutor, ArtifactStore\n",
-    "from customer_retention.generators.pipeline_generator.models import (\n",
-    "    PipelineTransformationType, TransformationStep,\n",
-    ")\n",
-    "from config import EXCLUDED_SOURCES\n",
+    "\n",
+    "from customer_retention.transforms import ArtifactStore, TransformExecutor\n",
     "\n",
     "_registry = ArtifactStore.from_manifest(Path(ARTIFACTS_PATH) / \"manifest.yaml\")\n",
     "_executor = TransformExecutor()\n",
@@ -279,7 +284,7 @@
     "y_pred = (y_proba >= 0.5).astype(int)\n",
     "\n",
     "# --- Metrics ---\n",
-    "from sklearn.metrics import roc_auc_score, accuracy_score, precision_score, recall_score, f1_score\n",
+    "from sklearn.metrics import accuracy_score, f1_score, precision_score, recall_score, roc_auc_score\n",
     "\n",
     "metrics = {\n",
     "    \"accuracy\": accuracy_score(y_true, y_pred),\n",
@@ -346,8 +351,12 @@
    "outputs": [],
    "source": [
     "from sklearn.metrics import (\n",
-    "    accuracy_score, precision_score, recall_score,\n",
-    "    f1_score, roc_auc_score, confusion_matrix,\n",
+    "    accuracy_score,\n",
+    "    confusion_matrix,\n",
+    "    f1_score,\n",
+    "    precision_score,\n",
+    "    recall_score,\n",
+    "    roc_auc_score,\n",
     ")\n",
     "\n",
     "y_true = predictions_df[\"actual\"]\n",
@@ -367,7 +376,7 @@
     "    print(f\"  {name}: {value:.4f}\")\n",
     "\n",
     "cm = confusion_matrix(y_true, y_pred)\n",
-    "print(f\"\\nConfusion Matrix:\")\n",
+    "print(\"\\nConfusion Matrix:\")\n",
     "print(f\"  TN={cm[0,0]:,}  FP={cm[0,1]:,}\")\n",
     "print(f\"  FN={cm[1,0]:,}  TP={cm[1,1]:,}\")"
    ]
@@ -467,12 +476,18 @@
    },
    "outputs": [],
    "source": [
+    "from IPython.display import display\n",
     "from sklearn.metrics import (\n",
-    "    roc_curve, precision_recall_curve, average_precision_score,\n",
-    "    confusion_matrix, roc_auc_score, f1_score, precision_score,\n",
-    "    recall_score, accuracy_score,\n",
+    "    accuracy_score,\n",
+    "    average_precision_score,\n",
+    "    confusion_matrix,\n",
+    "    f1_score,\n",
+    "    precision_recall_curve,\n",
+    "    precision_score,\n",
+    "    recall_score,\n",
+    "    roc_auc_score,\n",
+    "    roc_curve,\n",
     ")\n",
-    "from IPython.display import display, HTML\n",
     "\n",
     "mlflow.set_tracking_uri(MLFLOW_TRACKING_URI)\n",
     "client = mlflow.tracking.MlflowClient()\n",
@@ -1059,7 +1074,7 @@
     ")\n",
     "\n",
     "print(f\"Customer browser ready with {len(browser_df):,} records\")\n",
-    "print(f\"\\nPrediction Distribution:\")\n",
+    "print(\"\\nPrediction Distribution:\")\n",
     "print(f\"  Predicted Positive: {(browser_df['prediction'] == 1).sum():,}\")\n",
     "print(f\"  Predicted Negative: {(browser_df['prediction'] == 0).sum():,}\")\n",
     "print(f\"\\nCorrect Predictions: {browser_df['correct'].sum():,}/{len(browser_df):,} ({browser_df['correct'].mean():.1%})\")"

{churnkit-0.76.1a1.data → churnkit-0.76.1a2.data}/data/share/churnkit/exploration_notebooks/12_view_documentation.ipynb RENAMED Viewed

@@ -44,6 +44,7 @@
    "outputs": [],
    "source": [
     "from customer_retention.analysis.notebook_progress import track_and_export_previous\n",
+    "\n",
     "track_and_export_previous(\"12_view_documentation.ipynb\")"
    ]
   },
@@ -70,8 +71,9 @@
    "outputs": [],
    "source": [
     "from pathlib import Path\n",
-    "from customer_retention.core.config.experiments import get_notebook_experiments_dir\n",
+    "\n",
     "from customer_retention.analysis.notebook_html_exporter import check_exported_html\n",
+    "from customer_retention.core.config.experiments import get_notebook_experiments_dir\n",
     "\n",
     "docs_dir = get_notebook_experiments_dir() / \"docs\"\n",
     "notebook_dir = Path(\"exploration_notebooks\")\n",

{churnkit-0.76.1a1.dist-info → churnkit-0.76.1a2.dist-info}/METADATA RENAMED Viewed

@@ -1,6 +1,6 @@
 Metadata-Version: 2.4
 Name: churnkit
-Version: 0.76.1a1
+Version: 0.76.1a2
 Summary: Structured ML framework for customer churn prediction -- from exploration notebooks to production pipelines, locally or on Databricks.
 Project-URL: Homepage, https://github.com/aladjov/CR
 Project-URL: Documentation, https://github.com/aladjov/CR/wiki

churnkit 0.76.1a1__py3-none-any.whl → 0.76.1a2__py3-none-any.whl

churnkit 0.76.1a1py3-none-any.whl → 0.76.1a2py3-none-any.whl