PyPI - churnkit - Versions diffs - 0.75.0a1__py3-none-any.whl - Mend

churnkit 0.75.0a1__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (302) hide show

churnkit-0.75.0a1.data/data/share/churnkit/exploration_notebooks/11_scoring_validation.ipynb ADDED Viewed

@@ -0,0 +1,1418 @@
+{
+ "cells": [
+  {
+   "cell_type": "markdown",
+   "id": "32f00772",
+   "metadata": {
+    "papermill": {
+     "duration": 0.003366,
+     "end_time": "2026-02-02T13:04:20.028364",
+     "exception": false,
+     "start_time": "2026-02-02T13:04:20.024998",
+     "status": "completed"
+    },
+    "tags": []
+   },
+   "source": [
+    "# Chapter 11: Scoring, Validation & Explanations\n",
+    "\n",
+    "End-to-end scoring pipeline with holdout validation, model comparison, adversarial\n",
+    "validation, SHAP explanations, and error analysis.\n",
+    "\n",
+    "**Sections:**\n",
+    "1. Run Scoring\n",
+    "2. Summary Metrics\n",
+    "3. Model Comparison Grid\n",
+    "4. Adversarial Pipeline Validation\n",
+    "5. Transformation Validation\n",
+    "6. Model Explanations (SHAP)\n",
+    "7. Customer Browser\n",
+    "8. Error Analysis\n",
+    "9. Export Results"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "id": "3dd9c433",
+   "metadata": {
+    "execution": {
+     "iopub.execute_input": "2026-02-02T13:04:20.034460Z",
+     "iopub.status.busy": "2026-02-02T13:04:20.034309Z",
+     "iopub.status.idle": "2026-02-02T13:04:20.383463Z",
+     "shell.execute_reply": "2026-02-02T13:04:20.382527Z"
+    },
+    "papermill": {
+     "duration": 0.353477,
+     "end_time": "2026-02-02T13:04:20.384447",
+     "exception": false,
+     "start_time": "2026-02-02T13:04:20.030970",
+     "status": "completed"
+    },
+    "tags": []
+   },
+   "outputs": [],
+   "source": [
+    "from customer_retention.analysis.notebook_progress import track_and_export_previous\n",
+    "track_and_export_previous(\"11_scoring_validation.ipynb\")\n",
+    "\n",
+    "import sys\n",
+    "from pathlib import Path\n",
+    "\n",
+    "from customer_retention.core.config.experiments import EXPERIMENTS_DIR, FINDINGS_DIR"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "id": "2df62fbd",
+   "metadata": {
+    "execution": {
+     "iopub.execute_input": "2026-02-02T13:04:20.394211Z",
+     "iopub.status.busy": "2026-02-02T13:04:20.394015Z",
+     "iopub.status.idle": "2026-02-02T13:04:20.399431Z",
+     "shell.execute_reply": "2026-02-02T13:04:20.398670Z"
+    },
+    "papermill": {
+     "duration": 0.013077,
+     "end_time": "2026-02-02T13:04:20.400068",
+     "exception": false,
+     "start_time": "2026-02-02T13:04:20.386991",
+     "status": "completed"
+    },
+    "tags": []
+   },
+   "outputs": [],
+   "source": [
+    "# Discover the generated pipeline directory\n",
+    "generated_dir = Path(\"../generated_pipelines/local\")\n",
+    "pipeline_dirs = sorted(generated_dir.glob(\"*/config.py\"))\n",
+    "if not pipeline_dirs:\n",
+    "    raise FileNotFoundError(\n",
+    "        f\"No generated pipeline found under {generated_dir}. Run notebook 10 first.\"\n",
+    "    )\n",
+    "PIPELINE_DIR = pipeline_dirs[-1].parent\n",
+    "sys.path.insert(0, str(PIPELINE_DIR))\n",
+    "\n",
+    "from config import (\n",
+    "    PIPELINE_NAME, TARGET_COLUMN, RECOMMENDATIONS_HASH, MLFLOW_TRACKING_URI,\n",
+    "    FEAST_REPO_PATH, FEAST_FEATURE_VIEW, FEAST_ENTITY_KEY, FEAST_TIMESTAMP_COL,\n",
+    "    PRODUCTION_DIR, EXPERIMENTS_DIR as GEN_EXPERIMENTS_DIR,\n",
+    "    get_feast_data_path, get_gold_path, ARTIFACTS_PATH,\n",
+    ")\n",
+    "\n",
+    "print(f\"Pipeline: {PIPELINE_NAME}\")\n",
+    "print(f\"Pipeline dir: {PIPELINE_DIR}\")\n",
+    "print(f\"Experiments dir: {GEN_EXPERIMENTS_DIR}\")\n",
+    "print(f\"Recommendations hash: {RECOMMENDATIONS_HASH}\")"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "id": "cdb7a547",
+   "metadata": {
+    "papermill": {
+     "duration": 0.002345,
+     "end_time": "2026-02-02T13:04:20.404863",
+     "exception": false,
+     "start_time": "2026-02-02T13:04:20.402518",
+     "status": "completed"
+    },
+    "tags": []
+   },
+   "source": [
+    "## 11.1 Run Scoring"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "id": "72417bd4",
+   "metadata": {
+    "execution": {
+     "iopub.execute_input": "2026-02-02T13:04:20.410359Z",
+     "iopub.status.busy": "2026-02-02T13:04:20.410203Z",
+     "iopub.status.idle": "2026-02-02T13:04:23.668644Z",
+     "shell.execute_reply": "2026-02-02T13:04:23.668162Z"
+    },
+    "papermill": {
+     "duration": 3.263488,
+     "end_time": "2026-02-02T13:04:23.670606",
+     "exception": false,
+     "start_time": "2026-02-02T13:04:20.407118",
+     "status": "completed"
+    },
+    "tags": []
+   },
+   "outputs": [],
+   "source": [
+    "import numpy as np\n",
+    "import pandas as pd\n",
+    "import mlflow\n",
+    "import mlflow.sklearn\n",
+    "import mlflow.xgboost\n",
+    "import xgboost as xgb\n",
+    "from feast import FeatureStore\n",
+    "from customer_retention.transforms import TransformExecutor, ArtifactStore\n",
+    "from customer_retention.generators.pipeline_generator.models import (\n",
+    "    PipelineTransformationType, TransformationStep,\n",
+    ")\n",
+    "from config import EXCLUDED_SOURCES\n",
+    "\n",
+    "_registry = ArtifactStore.from_manifest(Path(ARTIFACTS_PATH) / \"manifest.yaml\")\n",
+    "_executor = TransformExecutor()\n",
+    "\n",
+    "# Import encoding/scaling steps from gold module\n",
+    "sys.path.insert(0, str(PIPELINE_DIR / \"gold\"))\n",
+    "from gold_features import ENCODINGS, SCALINGS, load_gold\n",
+    "\n",
+    "ORIGINAL_COLUMN = f\"original_{TARGET_COLUMN}\"\n",
+    "PREDICTIONS_PATH = PRODUCTION_DIR / \"data\" / \"scoring\" / \"predictions.parquet\"\n",
+    "\n",
+    "# Set tracking URI\n",
+    "mlflow.set_tracking_uri(MLFLOW_TRACKING_URI)\n",
+    "\n",
+    "\n",
+    "# --- Load holdout (from gold path which retains original_target, not Feast which excludes it) ---\n",
+    "features_df = load_gold()\n",
+    "\n",
+    "if ORIGINAL_COLUMN not in features_df.columns:\n",
+    "    raise ValueError(\n",
+    "        f\"No holdout found (column '{ORIGINAL_COLUMN}' missing). \"\n",
+    "        \"Holdout must be created in silver layer BEFORE gold layer feature computation.\"\n",
+    "    )\n",
+    "\n",
+    "scoring_mask = features_df[TARGET_COLUMN].isna() & features_df[ORIGINAL_COLUMN].notna()\n",
+    "scoring_df = features_df[scoring_mask].copy()\n",
+    "print(f\"Found {len(scoring_df):,} holdout records for scoring\")\n",
+    "\n",
+    "\n",
+    "# --- Feast features (fallback to parquet) ---\n",
+    "feast_path = Path(FEAST_REPO_PATH)\n",
+    "if (feast_path / \"feature_store.yaml\").exists():\n",
+    "    try:\n",
+    "        store = FeatureStore(repo_path=str(feast_path))\n",
+    "        entity_df = scoring_df[[FEAST_ENTITY_KEY, FEAST_TIMESTAMP_COL]].copy()\n",
+    "        exclude_cols = {FEAST_ENTITY_KEY, FEAST_TIMESTAMP_COL, TARGET_COLUMN, ORIGINAL_COLUMN}\n",
+    "        feature_cols = [\n",
+    "            c for c in scoring_df.columns\n",
+    "            if c not in exclude_cols and not c.startswith(\"original_\")\n",
+    "        ]\n",
+    "        feature_refs = [f\"{FEAST_FEATURE_VIEW}:{col}\" for col in feature_cols]\n",
+    "        result_df = store.get_online_features(\n",
+    "            features=feature_refs,\n",
+    "            entity_rows=[{FEAST_ENTITY_KEY: eid} for eid in scoring_df[FEAST_ENTITY_KEY]]\n",
+    "        ).to_df()\n",
+    "        result_df[ORIGINAL_COLUMN] = scoring_df[ORIGINAL_COLUMN].values\n",
+    "        result_df[FEAST_ENTITY_KEY] = scoring_df[FEAST_ENTITY_KEY].values\n",
+    "        scoring_features = result_df\n",
+    "        print(\"Loaded features from Feast\")\n",
+    "    except Exception as e:\n",
+    "        print(f\"Feast retrieval failed ({e}), using parquet\")\n",
+    "        scoring_features = scoring_df\n",
+    "else:\n",
+    "    print(\"Feast not initialized, using parquet directly\")\n",
+    "    scoring_features = scoring_df\n",
+    "\n",
+    "\n",
+    "# --- Load best model ---\n",
+    "client = mlflow.tracking.MlflowClient()\n",
+    "experiment = client.get_experiment_by_name(PIPELINE_NAME)\n",
+    "if not experiment:\n",
+    "    raise ValueError(f\"Experiment {PIPELINE_NAME} not found\")\n",
+    "\n",
+    "\n",
+    "def _find_best_parent_run(client, experiment_id):\n",
+    "    runs = client.search_runs(\n",
+    "        experiment_ids=[experiment_id],\n",
+    "        filter_string=f\"tags.recommendations_hash = '{RECOMMENDATIONS_HASH}'\",\n",
+    "        order_by=[\"metrics.best_roc_auc DESC\"], max_results=1,\n",
+    "    )\n",
+    "    if not runs:\n",
+    "        runs = client.search_runs(\n",
+    "            experiment_ids=[experiment_id],\n",
+    "            order_by=[\"metrics.best_roc_auc DESC\"], max_results=1,\n",
+    "        )\n",
+    "    if not runs:\n",
+    "        raise ValueError(\"No runs found\")\n",
+    "    return runs[0]\n",
+    "\n",
+    "\n",
+    "parent_run = _find_best_parent_run(client, experiment.experiment_id)\n",
+    "best_model_tag = parent_run.data.tags.get(\"best_model\", \"random_forest\")\n",
+    "model_name = f\"model_{best_model_tag}\"\n",
+    "if RECOMMENDATIONS_HASH:\n",
+    "    model_name = f\"{model_name}_{RECOMMENDATIONS_HASH}\"\n",
+    "\n",
+    "child_runs = client.search_runs(\n",
+    "    experiment_ids=[experiment.experiment_id],\n",
+    "    filter_string=f\"tags.mlflow.parentRunId = '{parent_run.info.run_id}'\",\n",
+    ")\n",
+    "model_run = next(\n",
+    "    (c for c in child_runs if c.info.run_name == best_model_tag), parent_run\n",
+    ")\n",
+    "model_uri = f\"runs:/{model_run.info.run_id}/{model_name}\"\n",
+    "print(f\"Loading model: {model_uri}\")\n",
+    "loader = mlflow.xgboost if best_model_tag == \"xgboost\" else mlflow.sklearn\n",
+    "model = loader.load_model(model_uri)\n",
+    "\n",
+    "\n",
+    "# --- Prepare features (TransformExecutor, NOT LabelEncoder) ---\n",
+    "def prepare_features(df):\n",
+    "    df = df.copy()\n",
+    "    drop_cols = [FEAST_ENTITY_KEY, FEAST_TIMESTAMP_COL, ORIGINAL_COLUMN, TARGET_COLUMN]\n",
+    "    df = df.drop(columns=[c for c in drop_cols if c in df.columns], errors=\"ignore\")\n",
+    "    df = df.drop(columns=[c for c in df.columns if c.startswith(\"original_\")], errors=\"ignore\")\n",
+    "    df = _executor.apply_all(df, ENCODINGS + SCALINGS, fit_mode=False, artifact_store=_registry)\n",
+    "    return df.select_dtypes(include=[\"int64\", \"float64\", \"int32\", \"float32\"]).fillna(0)\n",
+    "\n",
+    "\n",
+    "X = prepare_features(scoring_features)\n",
+    "y_true = scoring_features[ORIGINAL_COLUMN].values\n",
+    "\n",
+    "# --- Predict ---\n",
+    "print(\"Generating predictions...\")\n",
+    "if hasattr(model, \"predict_proba\"):\n",
+    "    y_proba = model.predict_proba(X)[:, 1]\n",
+    "else:\n",
+    "    y_proba = model.predict(xgb.DMatrix(X, feature_names=list(X.columns)))\n",
+    "y_pred = (y_proba >= 0.5).astype(int)\n",
+    "\n",
+    "# --- Metrics ---\n",
+    "from sklearn.metrics import roc_auc_score, accuracy_score, precision_score, recall_score, f1_score\n",
+    "\n",
+    "metrics = {\n",
+    "    \"accuracy\": accuracy_score(y_true, y_pred),\n",
+    "    \"precision\": precision_score(y_true, y_pred, zero_division=0),\n",
+    "    \"recall\": recall_score(y_true, y_pred, zero_division=0),\n",
+    "    \"f1\": f1_score(y_true, y_pred, zero_division=0),\n",
+    "    \"roc_auc\": roc_auc_score(y_true, y_proba) if len(np.unique(y_true)) > 1 else 0.0,\n",
+    "}\n",
+    "print(\"\\nValidation Metrics (vs original values):\")\n",
+    "for name, value in metrics.items():\n",
+    "    print(f\"  {name}: {value:.4f}\")\n",
+    "\n",
+    "# --- Save predictions ---\n",
+    "predictions_df = pd.DataFrame({\n",
+    "    FEAST_ENTITY_KEY: scoring_df[FEAST_ENTITY_KEY].values,\n",
+    "    \"prediction\": y_pred,\n",
+    "    \"probability\": y_proba,\n",
+    "    \"actual\": y_true,\n",
+    "    \"correct\": (y_pred == y_true).astype(int),\n",
+    "})\n",
+    "PREDICTIONS_PATH.parent.mkdir(parents=True, exist_ok=True)\n",
+    "predictions_df.to_parquet(PREDICTIONS_PATH, index=False)\n",
+    "print(f\"\\nPredictions saved: {PREDICTIONS_PATH}\")\n",
+    "print(f\"Correct: {predictions_df['correct'].sum():,}/{len(predictions_df):,} ({predictions_df['correct'].mean():.1%})\")"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "id": "8b5eb9d4",
+   "metadata": {
+    "papermill": {
+     "duration": 0.003123,
+     "end_time": "2026-02-02T13:04:23.676730",
+     "exception": false,
+     "start_time": "2026-02-02T13:04:23.673607",
+     "status": "completed"
+    },
+    "tags": []
+   },
+   "source": [
+    "## 11.2 Summary Metrics"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "id": "5d2880c0",
+   "metadata": {
+    "execution": {
+     "iopub.execute_input": "2026-02-02T13:04:23.683183Z",
+     "iopub.status.busy": "2026-02-02T13:04:23.683045Z",
+     "iopub.status.idle": "2026-02-02T13:04:23.691221Z",
+     "shell.execute_reply": "2026-02-02T13:04:23.690765Z"
+    },
+    "papermill": {
+     "duration": 0.012164,
+     "end_time": "2026-02-02T13:04:23.691775",
+     "exception": false,
+     "start_time": "2026-02-02T13:04:23.679611",
+     "status": "completed"
+    },
+    "tags": []
+   },
+   "outputs": [],
+   "source": [
+    "from sklearn.metrics import (\n",
+    "    accuracy_score, precision_score, recall_score,\n",
+    "    f1_score, roc_auc_score, confusion_matrix,\n",
+    ")\n",
+    "\n",
+    "y_true = predictions_df[\"actual\"]\n",
+    "y_pred = predictions_df[\"prediction\"]\n",
+    "y_proba = predictions_df[\"probability\"]\n",
+    "\n",
+    "metrics = {\n",
+    "    \"Accuracy\": accuracy_score(y_true, y_pred),\n",
+    "    \"Precision\": precision_score(y_true, y_pred, zero_division=0),\n",
+    "    \"Recall\": recall_score(y_true, y_pred, zero_division=0),\n",
+    "    \"F1 Score\": f1_score(y_true, y_pred, zero_division=0),\n",
+    "    \"ROC-AUC\": roc_auc_score(y_true, y_proba) if len(np.unique(y_true)) > 1 else 0.0,\n",
+    "}\n",
+    "\n",
+    "print(\"\\n=== Scoring Validation Metrics ===\")\n",
+    "for name, value in metrics.items():\n",
+    "    print(f\"  {name}: {value:.4f}\")\n",
+    "\n",
+    "cm = confusion_matrix(y_true, y_pred)\n",
+    "print(f\"\\nConfusion Matrix:\")\n",
+    "print(f\"  TN={cm[0,0]:,}  FP={cm[0,1]:,}\")\n",
+    "print(f\"  FN={cm[1,0]:,}  TP={cm[1,1]:,}\")"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "id": "aeb3bf7c",
+   "metadata": {
+    "execution": {
+     "iopub.execute_input": "2026-02-02T13:04:23.698098Z",
+     "iopub.status.busy": "2026-02-02T13:04:23.698000Z",
+     "iopub.status.idle": "2026-02-02T13:04:24.120245Z",
+     "shell.execute_reply": "2026-02-02T13:04:24.119760Z"
+    },
+    "papermill": {
+     "duration": 0.426255,
+     "end_time": "2026-02-02T13:04:24.120967",
+     "exception": false,
+     "start_time": "2026-02-02T13:04:23.694712",
+     "status": "completed"
+    },
+    "tags": []
+   },
+   "outputs": [],
+   "source": [
+    "import matplotlib.pyplot as plt\n",
+    "from sklearn.metrics import roc_curve\n",
+    "\n",
+    "fig, axes = plt.subplots(1, 2, figsize=(12, 4))\n",
+    "\n",
+    "# ROC curve\n",
+    "fpr, tpr, _ = roc_curve(y_true, y_proba)\n",
+    "axes[0].plot(fpr, tpr, \"b-\", lw=2, label=f\"ROC (AUC={metrics['ROC-AUC']:.3f})\")\n",
+    "axes[0].plot([0, 1], [0, 1], \"k--\", lw=1)\n",
+    "axes[0].set_xlabel(\"False Positive Rate\")\n",
+    "axes[0].set_ylabel(\"True Positive Rate\")\n",
+    "axes[0].set_title(\"ROC Curve\")\n",
+    "axes[0].legend()\n",
+    "\n",
+    "# Probability distribution\n",
+    "axes[1].hist(y_proba[y_true == 0], bins=30, alpha=0.5, label=\"Actual=0\", color=\"blue\")\n",
+    "axes[1].hist(y_proba[y_true == 1], bins=30, alpha=0.5, label=\"Actual=1\", color=\"red\")\n",
+    "axes[1].axvline(x=0.5, color=\"black\", linestyle=\"--\", label=\"Threshold\")\n",
+    "axes[1].set_xlabel(\"Predicted Probability\")\n",
+    "axes[1].set_ylabel(\"Count\")\n",
+    "axes[1].set_title(\"Probability Distribution\")\n",
+    "axes[1].legend()\n",
+    "\n",
+    "plt.tight_layout()\n",
+    "plt.show()"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "id": "afc8d09d",
+   "metadata": {
+    "papermill": {
+     "duration": 0.003519,
+     "end_time": "2026-02-02T13:04:24.127927",
+     "exception": false,
+     "start_time": "2026-02-02T13:04:24.124408",
+     "status": "completed"
+    },
+    "tags": []
+   },
+   "source": [
+    "## 11.3 Model Comparison Grid\n",
+    "\n",
+    "Compare all trained models (Logistic Regression, Random Forest, XGBoost) on the holdout set.\n",
+    "\n",
+    "**Grid Layout:**\n",
+    "- **Row 1**: Confusion matrices (counts and percentages)\n",
+    "- **Row 2**: ROC curves with AUC scores\n",
+    "- **Row 3**: Precision-Recall curves with PR-AUC scores"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "id": "865ea5f6",
+   "metadata": {
+    "execution": {
+     "iopub.execute_input": "2026-02-02T13:04:24.135466Z",
+     "iopub.status.busy": "2026-02-02T13:04:24.135183Z",
+     "iopub.status.idle": "2026-02-02T13:04:24.234063Z",
+     "shell.execute_reply": "2026-02-02T13:04:24.233694Z"
+    },
+    "papermill": {
+     "duration": 0.103605,
+     "end_time": "2026-02-02T13:04:24.234812",
+     "exception": false,
+     "start_time": "2026-02-02T13:04:24.131207",
+     "status": "completed"
+    },
+    "tags": []
+   },
+   "outputs": [],
+   "source": [
+    "from sklearn.metrics import (\n",
+    "    roc_curve, precision_recall_curve, average_precision_score,\n",
+    "    confusion_matrix, roc_auc_score, f1_score, precision_score,\n",
+    "    recall_score, accuracy_score,\n",
+    ")\n",
+    "from IPython.display import display, HTML\n",
+    "\n",
+    "mlflow.set_tracking_uri(MLFLOW_TRACKING_URI)\n",
+    "client = mlflow.tracking.MlflowClient()\n",
+    "experiment = client.get_experiment_by_name(PIPELINE_NAME)\n",
+    "\n",
+    "# Prepare features for scoring using TransformExecutor (NOT LabelEncoder)\n",
+    "X_holdout = prepare_features(scoring_features)\n",
+    "y_actual = predictions_df[\"actual\"].values\n",
+    "\n",
+    "# Get all logged models\n",
+    "logged_models = client.search_logged_models(experiment_ids=[experiment.experiment_id])\n",
+    "\n",
+    "# Load all 3 model types\n",
+    "model_types = [\"logistic_regression\", \"random_forest\", \"xgboost\"]\n",
+    "model_display_names = [\"Logistic Regression\", \"Random Forest\", \"XGBoost\"]\n",
+    "loaded_models = {}\n",
+    "model_predictions = {}\n",
+    "\n",
+    "for model_type, display_name in zip(model_types, model_display_names):\n",
+    "    model_name_pattern = f\"model_{model_type}\"\n",
+    "    if RECOMMENDATIONS_HASH:\n",
+    "        model_name_pattern = f\"{model_name_pattern}_{RECOMMENDATIONS_HASH}\"\n",
+    "\n",
+    "    matching_model = None\n",
+    "    for lm in logged_models:\n",
+    "        if lm.name == model_name_pattern:\n",
+    "            if matching_model is None or lm.creation_timestamp > matching_model.creation_timestamp:\n",
+    "                matching_model = lm\n",
+    "\n",
+    "    if matching_model:\n",
+    "        try:\n",
+    "            if \"xgboost\" in model_type:\n",
+    "                m = mlflow.xgboost.load_model(matching_model.model_uri)\n",
+    "                dmatrix = xgb.DMatrix(X_holdout, feature_names=list(X_holdout.columns))\n",
+    "                yp = m.predict(dmatrix)\n",
+    "            else:\n",
+    "                m = mlflow.sklearn.load_model(matching_model.model_uri)\n",
+    "                yp = m.predict_proba(X_holdout)[:, 1]\n",
+    "\n",
+    "            y_p = (yp > 0.5).astype(int)\n",
+    "            loaded_models[display_name] = m\n",
+    "            model_predictions[display_name] = {\"y_pred\": y_p, \"y_proba\": yp}\n",
+    "            print(f\"Loaded {display_name}: ROC-AUC = {roc_auc_score(y_actual, yp):.4f}\")\n",
+    "        except Exception as e:\n",
+    "            print(f\"Could not load {display_name}: {e}\")\n",
+    "\n",
+    "print(f\"\\nLoaded {len(loaded_models)} models for comparison\")"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "id": "e8152776",
+   "metadata": {
+    "execution": {
+     "iopub.execute_input": "2026-02-02T13:04:24.242988Z",
+     "iopub.status.busy": "2026-02-02T13:04:24.242860Z",
+     "iopub.status.idle": "2026-02-02T13:04:24.582353Z",
+     "shell.execute_reply": "2026-02-02T13:04:24.581773Z"
+    },
+    "papermill": {
+     "duration": 0.344716,
+     "end_time": "2026-02-02T13:04:24.582901",
+     "exception": false,
+     "start_time": "2026-02-02T13:04:24.238185",
+     "status": "completed"
+    },
+    "tags": []
+   },
+   "outputs": [],
+   "source": [
+    "# Model Comparison Grid (3 columns x 3 rows)\n",
+    "n_models = len(model_predictions)\n",
+    "if n_models > 0:\n",
+    "    fig, axes = plt.subplots(3, n_models, figsize=(5 * n_models, 12))\n",
+    "    if n_models == 1:\n",
+    "        axes = axes.reshape(-1, 1)\n",
+    "\n",
+    "    colors = [\"#1f77b4\", \"#ff7f0e\", \"#2ca02c\"]\n",
+    "\n",
+    "    for col_idx, (name, preds) in enumerate(model_predictions.items()):\n",
+    "        y_p = preds[\"y_pred\"]\n",
+    "        yp = preds[\"y_proba\"]\n",
+    "        color = colors[col_idx % len(colors)]\n",
+    "\n",
+    "        # Row 1: Confusion Matrix\n",
+    "        cm = confusion_matrix(y_actual, y_p)\n",
+    "        ax = axes[0, col_idx]\n",
+    "        ax.imshow(cm, cmap=\"Blues\")\n",
+    "        ax.set_xticks([0, 1])\n",
+    "        ax.set_yticks([0, 1])\n",
+    "        ax.set_xticklabels([\"Pred 0\", \"Pred 1\"])\n",
+    "        ax.set_yticklabels([\"Actual 0\", \"Actual 1\"])\n",
+    "        for i in range(2):\n",
+    "            for j in range(2):\n",
+    "                pct = cm[i, j] / cm.sum() * 100\n",
+    "                ax.text(j, i, f\"{cm[i, j]}\\n({pct:.1f}%)\", ha=\"center\", va=\"center\",\n",
+    "                        color=\"white\" if cm[i, j] > cm.max() / 2 else \"black\", fontsize=10)\n",
+    "        acc = accuracy_score(y_actual, y_p)\n",
+    "        ax.set_title(f\"{name}\\nAccuracy: {acc:.3f}\", fontsize=11, fontweight=\"bold\")\n",
+    "\n",
+    "        # Row 2: ROC Curve\n",
+    "        ax = axes[1, col_idx]\n",
+    "        fpr, tpr, _ = roc_curve(y_actual, yp)\n",
+    "        auc = roc_auc_score(y_actual, yp)\n",
+    "        ax.plot(fpr, tpr, color=color, lw=2, label=f\"AUC = {auc:.4f}\")\n",
+    "        ax.plot([0, 1], [0, 1], \"k--\", lw=1, alpha=0.5)\n",
+    "        ax.fill_between(fpr, tpr, alpha=0.2, color=color)\n",
+    "        ax.set_xlabel(\"False Positive Rate\")\n",
+    "        ax.set_ylabel(\"True Positive Rate\")\n",
+    "        ax.set_title(\"ROC Curve\", fontsize=10)\n",
+    "        ax.legend(loc=\"lower right\")\n",
+    "        ax.grid(True, alpha=0.3)\n",
+    "\n",
+    "        # Row 3: Precision-Recall Curve\n",
+    "        ax = axes[2, col_idx]\n",
+    "        precision_vals, recall_vals, _ = precision_recall_curve(y_actual, yp)\n",
+    "        pr_auc = average_precision_score(y_actual, yp)\n",
+    "        ax.plot(recall_vals, precision_vals, color=color, lw=2, label=f\"PR-AUC = {pr_auc:.4f}\")\n",
+    "        baseline = y_actual.sum() / len(y_actual)\n",
+    "        ax.axhline(y=baseline, color=\"gray\", linestyle=\"--\", lw=1, label=f\"Baseline = {baseline:.2f}\")\n",
+    "        ax.fill_between(recall_vals, precision_vals, alpha=0.2, color=color)\n",
+    "        ax.set_xlabel(\"Recall\")\n",
+    "        ax.set_ylabel(\"Precision\")\n",
+    "        ax.set_title(\"Precision-Recall Curve\", fontsize=10)\n",
+    "        ax.legend(loc=\"lower left\")\n",
+    "        ax.grid(True, alpha=0.3)\n",
+    "\n",
+    "    plt.suptitle(\"Model Comparison Grid: Holdout Set Performance\",\n",
+    "                 fontsize=14, fontweight=\"bold\", y=1.02)\n",
+    "    plt.tight_layout()\n",
+    "    plt.show()\n",
+    "else:\n",
+    "    print(\"No models loaded for comparison\")"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "id": "5e641fbf",
+   "metadata": {
+    "execution": {
+     "iopub.execute_input": "2026-02-02T13:04:24.591754Z",
+     "iopub.status.busy": "2026-02-02T13:04:24.591626Z",
+     "iopub.status.idle": "2026-02-02T13:04:24.625306Z",
+     "shell.execute_reply": "2026-02-02T13:04:24.624779Z"
+    },
+    "papermill": {
+     "duration": 0.039099,
+     "end_time": "2026-02-02T13:04:24.625974",
+     "exception": false,
+     "start_time": "2026-02-02T13:04:24.586875",
+     "status": "completed"
+    },
+    "tags": []
+   },
+   "outputs": [],
+   "source": [
+    "# Summary metrics table for all models\n",
+    "if model_predictions:\n",
+    "    comparison_results = []\n",
+    "    for name, preds in model_predictions.items():\n",
+    "        y_p = preds[\"y_pred\"]\n",
+    "        yp = preds[\"y_proba\"]\n",
+    "        comparison_results.append({\n",
+    "            \"Model\": name,\n",
+    "            \"ROC-AUC\": roc_auc_score(y_actual, yp),\n",
+    "            \"PR-AUC\": average_precision_score(y_actual, yp),\n",
+    "            \"F1-Score\": f1_score(y_actual, y_p),\n",
+    "            \"Precision\": precision_score(y_actual, y_p, zero_division=0),\n",
+    "            \"Recall\": recall_score(y_actual, y_p, zero_division=0),\n",
+    "            \"Accuracy\": accuracy_score(y_actual, y_p),\n",
+    "        })\n",
+    "\n",
+    "    comparison_df = pd.DataFrame(comparison_results).set_index(\"Model\")\n",
+    "    print(\"\\n\" + \"=\" * 70)\n",
+    "    print(\"MODEL COMPARISON SUMMARY (Holdout Set)\")\n",
+    "    print(\"=\" * 70)\n",
+    "    display(\n",
+    "        comparison_df.style\n",
+    "        .highlight_max(axis=0, props=\"background-color: #2e7d32; color: white\")\n",
+    "        .format(\"{:.4f}\")\n",
+    "    )\n",
+    "\n",
+    "    best_model_name = comparison_df[\"ROC-AUC\"].idxmax()\n",
+    "    best_auc = comparison_df.loc[best_model_name, \"ROC-AUC\"]\n",
+    "    print(f\"\\nBest Model: {best_model_name} (ROC-AUC = {best_auc:.4f})\")"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "id": "7cc7e27d",
+   "metadata": {
+    "papermill": {
+     "duration": 0.003359,
+     "end_time": "2026-02-02T13:04:24.633357",
+     "exception": false,
+     "start_time": "2026-02-02T13:04:24.629998",
+     "status": "completed"
+    },
+    "tags": []
+   },
+   "source": [
+    "## 11.4 Adversarial Pipeline Validation\n",
+    "\n",
+    "Validate that scoring pipeline produces identical features to training for holdout entities.\n",
+    "This catches transformation inconsistencies (e.g., scalers re-fit, encoders handling unseen values differently)."
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "id": "77e2ea51",
+   "metadata": {
+    "execution": {
+     "iopub.execute_input": "2026-02-02T13:04:24.640766Z",
+     "iopub.status.busy": "2026-02-02T13:04:24.640645Z",
+     "iopub.status.idle": "2026-02-02T13:04:24.669286Z",
+     "shell.execute_reply": "2026-02-02T13:04:24.668810Z"
+    },
+    "papermill": {
+     "duration": 0.033239,
+     "end_time": "2026-02-02T13:04:24.669913",
+     "exception": false,
+     "start_time": "2026-02-02T13:04:24.636674",
+     "status": "completed"
+    },
+    "tags": []
+   },
+   "outputs": [],
+   "source": [
+    "gold_features = load_gold()\n",
+    "\n",
+    "holdout_mask = gold_features[ORIGINAL_COLUMN].notna()\n",
+    "holdout_gold = gold_features[holdout_mask].copy()\n",
+    "print(f\"Holdout entities for validation: {holdout_mask.sum():,}\")\n",
+    "\n",
+    "# Compare scoring features vs gold features for holdout records\n",
+    "scoring_entity_ids = set(scoring_features[FEAST_ENTITY_KEY].values)\n",
+    "gold_holdout = holdout_gold[holdout_gold[FEAST_ENTITY_KEY].isin(scoring_entity_ids)]\n",
+    "\n",
+    "exclude_cols = {FEAST_ENTITY_KEY, \"event_timestamp\", TARGET_COLUMN, ORIGINAL_COLUMN}\n",
+    "compare_cols = [\n",
+    "    c for c in gold_holdout.columns\n",
+    "    if c not in exclude_cols and not c.startswith(\"original_\")\n",
+    "]\n",
+    "\n",
+    "print(\"\\n\" + \"=\" * 60)\n",
+    "print(\"ADVERSARIAL PIPELINE VALIDATION\")\n",
+    "print(\"=\" * 60)\n",
+    "\n",
+    "mismatches = []\n",
+    "for col in compare_cols:\n",
+    "    if col in scoring_features.columns and col in gold_holdout.columns:\n",
+    "        g_vals = gold_holdout[col].values\n",
+    "        s_vals = scoring_features.reindex(gold_holdout.index)[col].values\n",
+    "        if pd.api.types.is_numeric_dtype(gold_holdout[col]):\n",
+    "            delta = np.abs(g_vals.astype(float) - s_vals.astype(float))\n",
+    "            max_delta = np.nanmax(delta) if len(delta) > 0 else 0\n",
+    "            if max_delta > 1e-6:\n",
+    "                mismatches.append({\"feature\": col, \"max_delta\": max_delta})\n",
+    "\n",
+    "if not mismatches:\n",
+    "    print(\"\\nPASSED: Scoring features match training features\")\n",
+    "else:\n",
+    "    print(f\"\\nFAILED: {len(mismatches)} features with drift\")\n",
+    "    display(pd.DataFrame(mismatches).sort_values(\"max_delta\", ascending=False))"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "id": "de444011",
+   "metadata": {
+    "papermill": {
+     "duration": 0.003852,
+     "end_time": "2026-02-02T13:04:24.678592",
+     "exception": false,
+     "start_time": "2026-02-02T13:04:24.674740",
+     "status": "completed"
+    },
+    "tags": []
+   },
+   "source": [
+    "## 11.5 Transformation Validation\n",
+    "\n",
+    "Use `validate_feature_transformation()` from the validation module to verify\n",
+    "encoding/scaling consistency between training and scoring."
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "id": "a2dc6490",
+   "metadata": {
+    "execution": {
+     "iopub.execute_input": "2026-02-02T13:04:24.686472Z",
+     "iopub.status.busy": "2026-02-02T13:04:24.686358Z",
+     "iopub.status.idle": "2026-02-02T13:04:24.728303Z",
+     "shell.execute_reply": "2026-02-02T13:04:24.727841Z"
+    },
+    "papermill": {
+     "duration": 0.04693,
+     "end_time": "2026-02-02T13:04:24.728951",
+     "exception": false,
+     "start_time": "2026-02-02T13:04:24.682021",
+     "status": "completed"
+    },
+    "tags": []
+   },
+   "outputs": [],
+   "source": [
+    "from customer_retention.stages.validation import validate_feature_transformation\n",
+    "\n",
+    "# Training features = non-holdout, scoring features = holdout\n",
+    "training_mask = gold_features[ORIGINAL_COLUMN].isna()\n",
+    "training_subset = gold_features[training_mask].copy()\n",
+    "scoring_subset = gold_features[~training_mask].copy()\n",
+    "\n",
+    "report = validate_feature_transformation(\n",
+    "    training_df=training_subset,\n",
+    "    scoring_df=scoring_subset,\n",
+    "    transform_fn=prepare_features,\n",
+    "    entity_column=FEAST_ENTITY_KEY,\n",
+    "    verbose=True,\n",
+    ")\n",
+    "\n",
+    "if report.passed:\n",
+    "    print(\"Transformation validation PASSED\")\n",
+    "else:\n",
+    "    print(f\"Transformation validation FAILED: {len(report.feature_mismatches)} mismatches\")"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "id": "b79d4809",
+   "metadata": {
+    "papermill": {
+     "duration": 0.003454,
+     "end_time": "2026-02-02T13:04:24.736988",
+     "exception": false,
+     "start_time": "2026-02-02T13:04:24.733534",
+     "status": "completed"
+    },
+    "tags": []
+   },
+   "source": [
+    "## 11.6 Model Explanations (SHAP)"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "id": "76fce665",
+   "metadata": {
+    "execution": {
+     "iopub.execute_input": "2026-02-02T13:04:24.746062Z",
+     "iopub.status.busy": "2026-02-02T13:04:24.745943Z",
+     "iopub.status.idle": "2026-02-02T13:04:25.182792Z",
+     "shell.execute_reply": "2026-02-02T13:04:25.182312Z"
+    },
+    "papermill": {
+     "duration": 0.442072,
+     "end_time": "2026-02-02T13:04:25.183386",
+     "exception": false,
+     "start_time": "2026-02-02T13:04:24.741314",
+     "status": "completed"
+    },
+    "tags": []
+   },
+   "outputs": [],
+   "source": [
+    "import shap\n",
+    "\n",
+    "# Load best model for SHAP\n",
+    "mlflow.set_tracking_uri(MLFLOW_TRACKING_URI)\n",
+    "client = mlflow.tracking.MlflowClient()\n",
+    "\n",
+    "experiment = client.get_experiment_by_name(PIPELINE_NAME)\n",
+    "runs = client.search_runs(\n",
+    "    experiment_ids=[experiment.experiment_id],\n",
+    "    order_by=[\"metrics.best_roc_auc DESC\"],\n",
+    "    max_results=1,\n",
+    ")\n",
+    "parent_run = runs[0]\n",
+    "\n",
+    "best_model_tag = parent_run.data.tags.get(\"best_model\", \"random_forest\")\n",
+    "model_name = f\"model_{best_model_tag}\"\n",
+    "if RECOMMENDATIONS_HASH:\n",
+    "    model_name = f\"{model_name}_{RECOMMENDATIONS_HASH}\"\n",
+    "\n",
+    "child_runs = client.search_runs(\n",
+    "    experiment_ids=[experiment.experiment_id],\n",
+    "    filter_string=f\"tags.mlflow.parentRunId = '{parent_run.info.run_id}'\",\n",
+    ")\n",
+    "model_run = next((c for c in child_runs if c.info.run_name == best_model_tag), parent_run)\n",
+    "\n",
+    "model_uri = f\"runs:/{model_run.info.run_id}/{model_name}\"\n",
+    "print(f\"Loading model: {model_uri}\")\n",
+    "if best_model_tag == \"xgboost\":\n",
+    "    model = mlflow.xgboost.load_model(model_uri)\n",
+    "else:\n",
+    "    model = mlflow.sklearn.load_model(model_uri)\n",
+    "print(f\"Model type: {type(model).__name__}\")"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "id": "3464d0e1",
+   "metadata": {
+    "execution": {
+     "iopub.execute_input": "2026-02-02T13:04:25.192330Z",
+     "iopub.status.busy": "2026-02-02T13:04:25.192081Z",
+     "iopub.status.idle": "2026-02-02T13:04:25.197408Z",
+     "shell.execute_reply": "2026-02-02T13:04:25.197016Z"
+    },
+    "papermill": {
+     "duration": 0.010186,
+     "end_time": "2026-02-02T13:04:25.197861",
+     "exception": false,
+     "start_time": "2026-02-02T13:04:25.187675",
+     "status": "completed"
+    },
+    "tags": []
+   },
+   "outputs": [],
+   "source": [
+    "# Prepare features for SHAP using TransformExecutor (NOT LabelEncoder)\n",
+    "X = prepare_features(scoring_features)\n",
+    "feature_names = list(X.columns)\n",
+    "print(f\"Prepared {len(feature_names)} features for SHAP analysis\")"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "id": "4e0595d4",
+   "metadata": {
+    "execution": {
+     "iopub.execute_input": "2026-02-02T13:04:25.207529Z",
+     "iopub.status.busy": "2026-02-02T13:04:25.207410Z",
+     "iopub.status.idle": "2026-02-02T13:04:34.310396Z",
+     "shell.execute_reply": "2026-02-02T13:04:34.309912Z"
+    },
+    "papermill": {
+     "duration": 9.108894,
+     "end_time": "2026-02-02T13:04:34.310996",
+     "exception": false,
+     "start_time": "2026-02-02T13:04:25.202102",
+     "status": "completed"
+    },
+    "tags": []
+   },
+   "outputs": [],
+   "source": [
+    "# Create SHAP explainer\n",
+    "print(\"Creating SHAP explainer (may take a moment)...\")\n",
+    "\n",
+    "background_size = min(100, len(X))\n",
+    "background = shap.sample(X, background_size)\n",
+    "\n",
+    "if hasattr(model, \"predict_proba\"):\n",
+    "    explainer = shap.Explainer(model.predict_proba, background, feature_names=feature_names)\n",
+    "else:\n",
+    "    explainer = shap.Explainer(model, background, feature_names=feature_names)\n",
+    "\n",
+    "print(\"Computing SHAP values...\")\n",
+    "shap_values = explainer(X)\n",
+    "print(f\"SHAP values computed for {len(shap_values)} records\")"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "id": "f3be6aab",
+   "metadata": {
+    "execution": {
+     "iopub.execute_input": "2026-02-02T13:04:34.320580Z",
+     "iopub.status.busy": "2026-02-02T13:04:34.320441Z",
+     "iopub.status.idle": "2026-02-02T13:04:34.531044Z",
+     "shell.execute_reply": "2026-02-02T13:04:34.530558Z"
+    },
+    "papermill": {
+     "duration": 0.216039,
+     "end_time": "2026-02-02T13:04:34.531635",
+     "exception": false,
+     "start_time": "2026-02-02T13:04:34.315596",
+     "status": "completed"
+    },
+    "tags": []
+   },
+   "outputs": [],
+   "source": [
+    "# Use positive class SHAP values if multi-output\n",
+    "if len(shap_values.shape) == 3:\n",
+    "    shap_vals = shap_values[:, :, 1]  # Positive class\n",
+    "else:\n",
+    "    shap_vals = shap_values\n",
+    "\n",
+    "plt.figure(figsize=(10, 8))\n",
+    "shap.summary_plot(shap_vals, X, feature_names=feature_names, show=False, max_display=20)\n",
+    "plt.title(\"Feature Importance (SHAP Summary)\")\n",
+    "plt.tight_layout()\n",
+    "plt.show()"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "id": "e7daa7dc",
+   "metadata": {
+    "execution": {
+     "iopub.execute_input": "2026-02-02T13:04:34.542356Z",
+     "iopub.status.busy": "2026-02-02T13:04:34.542238Z",
+     "iopub.status.idle": "2026-02-02T13:04:34.548179Z",
+     "shell.execute_reply": "2026-02-02T13:04:34.547705Z"
+    },
+    "papermill": {
+     "duration": 0.011839,
+     "end_time": "2026-02-02T13:04:34.548851",
+     "exception": false,
+     "start_time": "2026-02-02T13:04:34.537012",
+     "status": "completed"
+    },
+    "tags": []
+   },
+   "outputs": [],
+   "source": [
+    "# Mean absolute SHAP values\n",
+    "mean_shap = np.abs(shap_vals.values).mean(axis=0)\n",
+    "importance_df = pd.DataFrame({\n",
+    "    \"feature\": feature_names,\n",
+    "    \"importance\": mean_shap,\n",
+    "}).sort_values(\"importance\", ascending=False)\n",
+    "\n",
+    "print(\"Top 15 Most Important Features:\")\n",
+    "display(importance_df.head(15))"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "id": "30791e92",
+   "metadata": {
+    "papermill": {
+     "duration": 0.004667,
+     "end_time": "2026-02-02T13:04:34.559340",
+     "exception": false,
+     "start_time": "2026-02-02T13:04:34.554673",
+     "status": "completed"
+    },
+    "tags": []
+   },
+   "source": [
+    "## 11.7 Customer Browser"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "id": "cd8460ea",
+   "metadata": {
+    "execution": {
+     "iopub.execute_input": "2026-02-02T13:04:34.571206Z",
+     "iopub.status.busy": "2026-02-02T13:04:34.571077Z",
+     "iopub.status.idle": "2026-02-02T13:04:34.575971Z",
+     "shell.execute_reply": "2026-02-02T13:04:34.575461Z"
+    },
+    "papermill": {
+     "duration": 0.012058,
+     "end_time": "2026-02-02T13:04:34.576704",
+     "exception": false,
+     "start_time": "2026-02-02T13:04:34.564646",
+     "status": "completed"
+    },
+    "tags": []
+   },
+   "outputs": [],
+   "source": [
+    "# Create combined dataset for browsing\n",
+    "browser_df = predictions_df.merge(\n",
+    "    scoring_features[[FEAST_ENTITY_KEY] + feature_names],\n",
+    "    on=FEAST_ENTITY_KEY,\n",
+    "    how=\"left\",\n",
+    ")\n",
+    "\n",
+    "print(f\"Customer browser ready with {len(browser_df):,} records\")\n",
+    "print(f\"\\nPrediction Distribution:\")\n",
+    "print(f\"  Predicted Positive: {(browser_df['prediction'] == 1).sum():,}\")\n",
+    "print(f\"  Predicted Negative: {(browser_df['prediction'] == 0).sum():,}\")\n",
+    "print(f\"\\nCorrect Predictions: {browser_df['correct'].sum():,}/{len(browser_df):,} ({browser_df['correct'].mean():.1%})\")"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "id": "d14e806a",
+   "metadata": {
+    "execution": {
+     "iopub.execute_input": "2026-02-02T13:04:34.586759Z",
+     "iopub.status.busy": "2026-02-02T13:04:34.586648Z",
+     "iopub.status.idle": "2026-02-02T13:04:34.590204Z",
+     "shell.execute_reply": "2026-02-02T13:04:34.589718Z"
+    },
+    "papermill": {
+     "duration": 0.01003,
+     "end_time": "2026-02-02T13:04:34.591530",
+     "exception": false,
+     "start_time": "2026-02-02T13:04:34.581500",
+     "status": "completed"
+    },
+    "tags": []
+   },
+   "outputs": [],
+   "source": [
+    "def show_customer(idx: int):\n",
+    "    \"\"\"Display details and SHAP explanation for a single customer.\"\"\"\n",
+    "    row = browser_df.iloc[idx]\n",
+    "    entity_id = row[FEAST_ENTITY_KEY]\n",
+    "\n",
+    "    print(f\"=== Customer {entity_id} ===\")\n",
+    "    print(f\"Prediction: {int(row['prediction'])} (probability: {row['probability']:.3f})\")\n",
+    "    print(f\"Actual: {int(row['actual'])}\")\n",
+    "    print(f\"Correct: {'Yes' if row['correct'] else 'No'}\")\n",
+    "    print()\n",
+    "\n",
+    "    feature_vals = X.iloc[idx]\n",
+    "    if len(shap_values.shape) == 3:\n",
+    "        customer_shap = shap_values[idx, :, 1].values\n",
+    "    else:\n",
+    "        customer_shap = shap_values[idx].values\n",
+    "\n",
+    "    feature_impact = pd.DataFrame({\n",
+    "        \"feature\": feature_names,\n",
+    "        \"value\": feature_vals.values,\n",
+    "        \"shap_impact\": customer_shap,\n",
+    "    }).sort_values(\"shap_impact\", key=abs, ascending=False)\n",
+    "\n",
+    "    print(\"Top Contributing Features:\")\n",
+    "    display(feature_impact.head(10))\n",
+    "\n",
+    "    # Waterfall plot\n",
+    "    plt.figure(figsize=(10, 6))\n",
+    "    if len(shap_values.shape) == 3:\n",
+    "        shap.plots.waterfall(shap_values[idx, :, 1], max_display=10, show=False)\n",
+    "    else:\n",
+    "        shap.plots.waterfall(shap_values[idx], max_display=10, show=False)\n",
+    "    plt.title(f\"SHAP Explanation for Customer {entity_id}\")\n",
+    "    plt.tight_layout()\n",
+    "    plt.show()"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "id": "fd6aaaee",
+   "metadata": {
+    "execution": {
+     "iopub.execute_input": "2026-02-02T13:04:34.601322Z",
+     "iopub.status.busy": "2026-02-02T13:04:34.601195Z",
+     "iopub.status.idle": "2026-02-02T13:04:34.992570Z",
+     "shell.execute_reply": "2026-02-02T13:04:34.992096Z"
+    },
+    "papermill": {
+     "duration": 0.396932,
+     "end_time": "2026-02-02T13:04:34.993166",
+     "exception": false,
+     "start_time": "2026-02-02T13:04:34.596234",
+     "status": "completed"
+    },
+    "tags": []
+   },
+   "outputs": [],
+   "source": [
+    "# Show first 3 customers\n",
+    "print(\"Showing first 3 customers:\\n\")\n",
+    "for i in range(min(3, len(browser_df))):\n",
+    "    show_customer(i)\n",
+    "    print(\"\\n\" + \"=\" * 60 + \"\\n\")"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "id": "35a9e063",
+   "metadata": {
+    "execution": {
+     "iopub.execute_input": "2026-02-02T13:04:35.005568Z",
+     "iopub.status.busy": "2026-02-02T13:04:35.005433Z",
+     "iopub.status.idle": "2026-02-02T13:04:35.008375Z",
+     "shell.execute_reply": "2026-02-02T13:04:35.007656Z"
+    },
+    "papermill": {
+     "duration": 0.009752,
+     "end_time": "2026-02-02T13:04:35.008902",
+     "exception": false,
+     "start_time": "2026-02-02T13:04:34.999150",
+     "status": "completed"
+    },
+    "tags": []
+   },
+   "outputs": [],
+   "source": [
+    "# Look up by entity ID\n",
+    "def lookup_customer(entity_id):\n",
+    "    \"\"\"Find and display a customer by their entity ID.\"\"\"\n",
+    "    mask = browser_df[FEAST_ENTITY_KEY] == entity_id\n",
+    "    if not mask.any():\n",
+    "        print(f\"Customer {entity_id} not found in scoring set\")\n",
+    "        return\n",
+    "    idx = browser_df[mask].index[0]\n",
+    "    x_idx = browser_df.index.get_loc(idx)\n",
+    "    show_customer(x_idx)\n",
+    "\n",
+    "\n",
+    "# Example: lookup_customer(12345)\n",
+    "print(\"Available entity IDs (first 10):\")\n",
+    "print(browser_df[FEAST_ENTITY_KEY].head(10).tolist())"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "id": "7a10e90a",
+   "metadata": {
+    "papermill": {
+     "duration": 0.005938,
+     "end_time": "2026-02-02T13:04:35.020817",
+     "exception": false,
+     "start_time": "2026-02-02T13:04:35.014879",
+     "status": "completed"
+    },
+    "tags": []
+   },
+   "source": [
+    "## 11.8 Error Analysis"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "id": "2cee8606",
+   "metadata": {
+    "execution": {
+     "iopub.execute_input": "2026-02-02T13:04:35.033732Z",
+     "iopub.status.busy": "2026-02-02T13:04:35.033622Z",
+     "iopub.status.idle": "2026-02-02T13:04:35.036439Z",
+     "shell.execute_reply": "2026-02-02T13:04:35.036131Z"
+    },
+    "papermill": {
+     "duration": 0.010077,
+     "end_time": "2026-02-02T13:04:35.037185",
+     "exception": false,
+     "start_time": "2026-02-02T13:04:35.027108",
+     "status": "completed"
+    },
+    "tags": []
+   },
+   "outputs": [],
+   "source": [
+    "# Analyze misclassified customers\n",
+    "incorrect = browser_df[browser_df[\"correct\"] == 0]\n",
+    "print(f\"Misclassified customers: {len(incorrect):,}\")\n",
+    "\n",
+    "# False positives (predicted 1, actual 0)\n",
+    "fp = incorrect[incorrect[\"prediction\"] == 1]\n",
+    "print(f\"  False Positives: {len(fp):,}\")\n",
+    "\n",
+    "# False negatives (predicted 0, actual 1)\n",
+    "fn = incorrect[incorrect[\"prediction\"] == 0]\n",
+    "print(f\"  False Negatives: {len(fn):,}\")"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "id": "988b6a72",
+   "metadata": {
+    "execution": {
+     "iopub.execute_input": "2026-02-02T13:04:35.048794Z",
+     "iopub.status.busy": "2026-02-02T13:04:35.048684Z",
+     "iopub.status.idle": "2026-02-02T13:04:35.171216Z",
+     "shell.execute_reply": "2026-02-02T13:04:35.170804Z"
+    },
+    "papermill": {
+     "duration": 0.129762,
+     "end_time": "2026-02-02T13:04:35.172645",
+     "exception": false,
+     "start_time": "2026-02-02T13:04:35.042883",
+     "status": "completed"
+    },
+    "tags": []
+   },
+   "outputs": [],
+   "source": [
+    "# Example false positive\n",
+    "if len(fp) > 0:\n",
+    "    print(\"\\n=== Example False Positive ===\")\n",
+    "    fp_idx = browser_df.index.get_loc(fp.index[0])\n",
+    "    show_customer(fp_idx)"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "id": "7ce2bae4",
+   "metadata": {
+    "execution": {
+     "iopub.execute_input": "2026-02-02T13:04:35.186493Z",
+     "iopub.status.busy": "2026-02-02T13:04:35.186355Z",
+     "iopub.status.idle": "2026-02-02T13:04:35.446048Z",
+     "shell.execute_reply": "2026-02-02T13:04:35.445342Z"
+    },
+    "papermill": {
+     "duration": 0.267906,
+     "end_time": "2026-02-02T13:04:35.446873",
+     "exception": false,
+     "start_time": "2026-02-02T13:04:35.178967",
+     "status": "completed"
+    },
+    "tags": []
+   },
+   "outputs": [],
+   "source": [
+    "# Example false negative\n",
+    "if len(fn) > 0:\n",
+    "    print(\"\\n=== Example False Negative ===\")\n",
+    "    fn_idx = browser_df.index.get_loc(fn.index[0])\n",
+    "    show_customer(fn_idx)"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "id": "773e6df6",
+   "metadata": {
+    "papermill": {
+     "duration": 0.007096,
+     "end_time": "2026-02-02T13:04:35.460861",
+     "exception": false,
+     "start_time": "2026-02-02T13:04:35.453765",
+     "status": "completed"
+    },
+    "tags": []
+   },
+   "source": [
+    "## 11.9 Export Results"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "id": "99fec76d",
+   "metadata": {
+    "execution": {
+     "iopub.execute_input": "2026-02-02T13:04:35.474943Z",
+     "iopub.status.busy": "2026-02-02T13:04:35.474800Z",
+     "iopub.status.idle": "2026-02-02T13:04:35.487191Z",
+     "shell.execute_reply": "2026-02-02T13:04:35.486629Z"
+    },
+    "papermill": {
+     "duration": 0.020645,
+     "end_time": "2026-02-02T13:04:35.487797",
+     "exception": false,
+     "start_time": "2026-02-02T13:04:35.467152",
+     "status": "completed"
+    },
+    "tags": []
+   },
+   "outputs": [],
+   "source": [
+    "# Export detailed results with feature importance\n",
+    "output_dir = GEN_EXPERIMENTS_DIR / \"data\" / \"scoring\"\n",
+    "output_dir.mkdir(parents=True, exist_ok=True)\n",
+    "\n",
+    "# Save global feature importance\n",
+    "importance_df.to_csv(output_dir / \"feature_importance.csv\", index=False)\n",
+    "print(f\"Feature importance saved to {output_dir / 'feature_importance.csv'}\")\n",
+    "\n",
+    "top_features = importance_df.head(10)[\"feature\"].tolist()\n",
+    "shap_by_entity = pd.DataFrame({FEAST_ENTITY_KEY: scoring_features[FEAST_ENTITY_KEY].values})\n",
+    "for feat in top_features:\n",
+    "    feat_idx = feature_names.index(feat)\n",
+    "    if len(shap_values.shape) == 3:\n",
+    "        shap_by_entity[f\"shap_{feat}\"] = shap_values[:, feat_idx, 1].values\n",
+    "    else:\n",
+    "        shap_by_entity[f\"shap_{feat}\"] = shap_values[:, feat_idx].values\n",
+    "\n",
+    "detailed_df = predictions_df.merge(shap_by_entity, on=FEAST_ENTITY_KEY, how=\"left\")\n",
+    "detailed_df.to_parquet(output_dir / \"predictions_with_shap.parquet\", index=False)\n",
+    "print(f\"Detailed predictions with SHAP saved to {output_dir / 'predictions_with_shap.parquet'}\")\n"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "id": "91811812",
+   "metadata": {
+    "papermill": {
+     "duration": 0.006688,
+     "end_time": "2026-02-02T13:04:35.501600",
+     "exception": false,
+     "start_time": "2026-02-02T13:04:35.494912",
+     "status": "completed"
+    },
+    "tags": []
+   },
+   "source": [
+    "> **Save Reminder:** Save this notebook (Ctrl+S / Cmd+S) before running the next one.\n",
+    "> The next notebook will automatically export this notebook's HTML documentation from the saved file."
+   ]
+  }
+ ],
+ "metadata": {
+  "kernelspec": {
+   "display_name": "Python 3",
+   "language": "python",
+   "name": "python3"
+  },
+  "language_info": {
+   "codemirror_mode": {
+    "name": "ipython",
+    "version": 3
+   },
+   "file_extension": ".py",
+   "mimetype": "text/x-python",
+   "name": "python",
+   "nbconvert_exporter": "python",
+   "pygments_lexer": "ipython3",
+   "version": "3.12.4"
+  },
+  "papermill": {
+   "default_parameters": {},
+   "duration": 18.648212,
+   "end_time": "2026-02-02T13:04:38.124707",
+   "environment_variables": {},
+   "exception": null,
+   "input_path": "/Users/Vital/python/CustomerRetention/exploration_notebooks/11_scoring_validation.ipynb",
+   "output_path": "/Users/Vital/python/CustomerRetention/exploration_notebooks/11_scoring_validation.ipynb",
+   "parameters": {},
+   "start_time": "2026-02-02T13:04:19.476495",
+   "version": "2.6.0"
+  }
+ },
+ "nbformat": 4,
+ "nbformat_minor": 5
+}