PyPI - noshot - Versions diffs - 8.0.0__py3-none-any.whl → 10.0.0__py3-none-any.whl - Mend

noshot 8.0.0py3-none-any.whl → 10.0.0py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (17) hide show

noshot/data/ML TS XAI/ML/ML Lab H Sec/3. Insurance Target Categorical (Overfit vs Regularized).ipynb ADDED Viewed

@@ -0,0 +1,274 @@
+{
+ "cells": [
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "id": "55832f35-459a-4a0e-a379-35484f92d1c5",
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "import numpy as np\n",
+    "import pandas as pd\n",
+    "import matplotlib.pyplot as plt\n",
+    "import tensorflow as tf\n",
+    "from tensorflow.keras.models import Sequential\n",
+    "from tensorflow.keras.layers import Dense, Dropout\n",
+    "from tensorflow.keras.optimizers import Adam\n",
+    "from tensorflow.keras import regularizers\n",
+    "from sklearn.preprocessing import LabelEncoder\n",
+    "from sklearn.model_selection import train_test_split\n",
+    "from sklearn.metrics import classification_report, confusion_matrix \n",
+    "from sklearn.metrics import ConfusionMatrixDisplay, accuracy_score, roc_curve, auc\n",
+    "\n",
+    "import warnings\n",
+    "warnings.filterwarnings('ignore')"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "id": "f3344d11-95e5-481b-93e4-318bf834d2c0",
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "df = pd.read_csv(\"insurance.csv\")\n",
+    "df.head()"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "id": "62045487-f95d-402a-b18e-85009ff58903",
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "labels = ['Low', 'Medium', 'High', 'Very High']\n",
+    "df['charges'] = pd.qcut(df['charges'], q=4, labels=labels)\n",
+    "df.head()"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "id": "eb3bd408-f8a3-41fd-a778-e7608071e5e4",
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "cols = ['sex', 'smoker', 'region', 'charges']\n",
+    "df[cols] = df[cols].apply(LabelEncoder().fit_transform)\n",
+    "df.head()"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "id": "c07602a7-b504-4d9b-a3c6-68bceee8280b",
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "X = df.drop(columns=['charges'])\n",
+    "y = tf.keras.utils.to_categorical(df['charges'], num_classes=4)"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "id": "89d7dd41-4d03-4f50-8639-9065bd17b7f6",
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "overfit = Sequential([\n",
+    "    Dense(64, activation='relu', input_shape=(6,)),\n",
+    "    Dense(64, activation='relu'),\n",
+    "    Dense(4, activation='softmax')\n",
+    "])\n",
+    "\n",
+    "overfit.compile(optimizer='adam', loss='categorical_crossentropy', metrics=['accuracy'])"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "id": "8d55f010-fe99-40a4-a7f7-8fbe26e15e71",
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "x_train, x_test, y_train, y_test = train_test_split(X, y, test_size=0.4, random_state=0)\n",
+    "history1 = overfit.fit(x_train, y_train, epochs=25, \n",
+    "                    batch_size=16, validation_split=0.2,\n",
+    "                    verbose=1)"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "id": "1151c540-a049-4cde-b6ec-51e214cf52ba",
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "regularized = Sequential([\n",
+    "    Dense(128, activation='relu', input_shape=(6,), kernel_regularizer=regularizers.l2(0.001)),\n",
+    "    Dropout(0.1),\n",
+    "    Dense(64, activation='relu', kernel_regularizer=regularizers.l2(0.001)),\n",
+    "    Dropout(0.1),\n",
+    "    Dense(4, activation='softmax')\n",
+    "])\n",
+    "\n",
+    "regularized.compile(optimizer='adam', loss='categorical_crossentropy', metrics=['accuracy'])"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "id": "88058b99-e8cd-47b6-a4bb-bc626996deca",
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "early_stopping = tf.keras.callbacks.EarlyStopping(monitor='val_loss', \n",
+    "                                                  patience=10, \n",
+    "                                                  restore_best_weights=True)"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "id": "22964de8-9838-4f7c-9141-bffdf4def8b1",
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "x_train, x_test, y_train, y_test = train_test_split(X, y, test_size=0.1, random_state=1)\n",
+    "history2 = regularized.fit(x_train, y_train, epochs=25, \n",
+    "                    batch_size=16, validation_split=0.3,\n",
+    "                    callbacks=[early_stopping], verbose=1)"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "id": "829e7156-0ce9-4e48-becc-48d8c607fc4d",
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "def plot_history(history1, history2):\n",
+    "    plt.figure(figsize=(12, 5))\n",
+    "    plt.subplot(1, 2, 1)\n",
+    "    plt.plot(history1.history['accuracy'], label='Overfit Model Validation Accuracy')\n",
+    "    plt.plot(history2.history['accuracy'], label='Regularized Model Validation Accuracy')\n",
+    "    plt.title('Training Accuracy')\n",
+    "    plt.xlabel('Epoch')\n",
+    "    plt.ylabel('Accuracy')\n",
+    "    plt.legend()\n",
+    "\n",
+    "    plt.subplot(1, 2, 2)\n",
+    "    plt.plot(history1.history['val_accuracy'], label='Overfit Model Validation Accuracy')\n",
+    "    plt.plot(history2.history['val_accuracy'], label='Regularized Model Validation Accuracy')\n",
+    "    plt.title('Validation Accuracy')\n",
+    "    plt.xlabel('Epoch')\n",
+    "    plt.ylabel('Accuracy')\n",
+    "    plt.legend()\n",
+    "\n",
+    "    plt.tight_layout()\n",
+    "    plt.show()\n",
+    "\n",
+    "plot_history(history1, history2)"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "id": "6ff519cf-eca6-48fe-923c-aaa18721608e",
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "loss1, accuracy1 = overfit.evaluate(x_train, y_train)\n",
+    "loss2, accuracy2 = overfit.evaluate(x_test, y_test)\n",
+    "print(f'Train accuracy Overfit Model: {accuracy1:.4f}')\n",
+    "print(f'Test accuracy Overfit Model: {accuracy2:.4f}')"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "id": "0aa14e69-14d8-4566-a532-59d17b89dd04",
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "loss1, accuracy1 = regularized.evaluate(x_train, y_train)\n",
+    "loss2, accuracy2 = regularized.evaluate(x_test, y_test)\n",
+    "print(f'Train accuracy Regularized Model: {accuracy1:.4f}')\n",
+    "print(f'Test accuracy Regularized Model: {accuracy2:.4f}')"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "id": "d1aadc08-5ef6-4285-b889-d4dad9ddb3fd",
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "y_pred1 = np.argmax(overfit.predict(x_test), axis=1)\n",
+    "y_test1 = np.argmax(y_test, axis=1)\n",
+    "y_pred2 = np.argmax(regularized.predict(x_test), axis=1)\n",
+    "y_test2 = np.argmax(y_test, axis=1)"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "id": "c36a9495-2a6b-47da-b50a-060d0166412c",
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "print(\"Classification Report for Overfit:\")\n",
+    "print(classification_report(y_test1, y_pred1))\n",
+    "\n",
+    "print(\"\\nClassification Report for Regularized:\")\n",
+    "print(classification_report(y_test2, y_pred2))"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "id": "8dec9763-db18-4476-9f2a-acfc3bd5b63b",
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "labels = df['charges'].unique()\n",
+    "cm1 = confusion_matrix(y_test1, y_pred1)\n",
+    "cm2 = confusion_matrix(y_test2, y_pred2)\n",
+    "\n",
+    "fig, ax = plt.subplots(1, 2, figsize=(14, 6))\n",
+    "\n",
+    "ConfusionMatrixDisplay(cm1, display_labels=labels).plot(ax=ax[0])\n",
+    "ax[0].set_title(\"Overfit Confusion Matrix\", size=25)\n",
+    "\n",
+    "ConfusionMatrixDisplay(cm2, display_labels=labels).plot(ax=ax[1])\n",
+    "ax[1].set_title(\"Regularized Confusion Matrix\", size=25)\n",
+    "\n",
+    "plt.tight_layout()\n",
+    "plt.show()"
+   ]
+  }
+ ],
+ "metadata": {
+  "kernelspec": {
+   "display_name": "NEW-VENV-1",
+   "language": "python",
+   "name": "new-venv-1"
+  },
+  "language_info": {
+   "codemirror_mode": {
+    "name": "ipython",
+    "version": 3
+   },
+   "file_extension": ".py",
+   "mimetype": "text/x-python",
+   "name": "python",
+   "nbconvert_exporter": "python",
+   "pygments_lexer": "ipython3",
+   "version": "3.11.5"
+  }
+ },
+ "nbformat": 4,
+ "nbformat_minor": 5
+}

noshot/data/ML TS XAI/ML/ML Lab H Sec/3. Insurance Target Numerical (Overfit vs Regularized).ipynb ADDED Viewed

@@ -0,0 +1,263 @@
+{
+ "cells": [
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "id": "bd31d202-9ef3-419d-85aa-478e28199448",
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "import numpy as np\n",
+    "import pandas as pd\n",
+    "import matplotlib.pyplot as plt\n",
+    "import tensorflow as tf\n",
+    "from tensorflow.keras.layers import Input, Dense, Dropout\n",
+    "from tensorflow.keras.models import Sequential\n",
+    "from tensorflow.keras.regularizers import l2\n",
+    "from tensorflow.keras.callbacks import EarlyStopping\n",
+    "from sklearn.preprocessing import StandardScaler, LabelEncoder\n",
+    "from sklearn.model_selection import train_test_split\n",
+    "from sklearn.metrics import mean_squared_error, r2_score"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "id": "10fef1a2-2c53-42b4-b8ec-f1dae5b14a1c",
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "df = pd.read_csv('insurance.csv')\n",
+    "df.head()"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "id": "6079f0b7-b6b3-493e-8a73-ff3f944db314",
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "cat_cols = ['sex', 'smoker', 'region']\n",
+    "df[cat_cols] = df[cat_cols].apply(LabelEncoder().fit_transform)\n",
+    "print(df.head())"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "id": "4a4d03e5-a817-4b66-9ab6-d32b18fe441a",
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "X = df.drop(columns='charges')\n",
+    "y = df['charges']\n",
+    "X = StandardScaler().fit_transform(X)\n",
+    "X_train_full, X_test, y_train_full, y_test = train_test_split(X, y, test_size=0.2, \n",
+    "                                                              random_state=42)\n",
+    "X_train, X_val, y_train, y_val = train_test_split(X_train_full, y_train_full, test_size=0.2, \n",
+    "                                                  random_state=42)"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "id": "f4c58e24-e103-4d92-993c-6db9f9b57657",
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "overfit_model = Sequential([\n",
+    "    Input(shape=(X_train.shape[1],)),\n",
+    "    Dense(64, activation='relu'),\n",
+    "    Dense(64, activation='relu'),\n",
+    "    Dense(1, activation='linear'),\n",
+    "])"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "id": "533969a3-9b46-4fd7-a21a-968e96821c08",
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "reg_model = Sequential([\n",
+    "    Input(shape=(X_train.shape[1],)),\n",
+    "    Dense(64, activation='relu'),\n",
+    "    Dropout(0.1),\n",
+    "    Dense(64, activation='relu', kernel_regularizer=l2(0.001)),\n",
+    "    Dropout(0.1),\n",
+    "    Dense(1, activation='linear'),\n",
+    "])"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "id": "c9471a5d-3c49-4531-bae6-8138c9812261",
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "print(\"\\nOverfit Model Summary:\")\n",
+    "overfit_model.summary()"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "id": "3efc2e9b-2b1e-416c-888e-9ea092de9d3f",
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "print(\"\\nRegularized Model Summary:\")\n",
+    "reg_model.summary()"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "id": "c76d9120-15c2-447b-9e9c-83c16c118096",
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "overfit_model.compile(optimizer='adam', loss='mean_squared_error', metrics=['mae', 'mse'])\n",
+    "reg_model.compile(optimizer='adam', loss='mean_squared_error', metrics=['mae', 'mse'])"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "id": "c05813ae-dd34-4caf-b008-9e683216c891",
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "history_overfit = overfit_model.fit(\n",
+    "    X_train, y_train,\n",
+    "    validation_data=(X_val, y_val),\n",
+    "    epochs=20,\n",
+    "    batch_size=16,\n",
+    "    verbose=1\n",
+    ")"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "id": "124d821f-3272-4a0b-9a24-0fedc7f7cfee",
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "history_reg = reg_model.fit(\n",
+    "    X_train, y_train,\n",
+    "    validation_data=(X_val, y_val),\n",
+    "    epochs=20,\n",
+    "    batch_size=16,\n",
+    "    verbose=1\n",
+    ")"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "id": "856e0e03-f681-4061-83b9-83da2823adda",
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "plt.figure(figsize=(14, 6))\n",
+    "plt.subplot(1, 2, 1)\n",
+    "plt.plot(history_overfit.history['val_loss'], label='Overfit Val Loss', color='blue', linestyle='--')\n",
+    "plt.plot(history_reg.history['val_loss'], label='Regularized Val Loss', color='brown', linestyle='--')\n",
+    "plt.title('Model Loss Comparison (MSE)')\n",
+    "plt.xlabel('Epoch')\n",
+    "plt.ylabel('Mean Squared Error Loss')\n",
+    "plt.legend()\n",
+    "plt.grid(True)"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "id": "f5315d68-df04-4883-b021-5b3d1be6c180",
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "plt.plot(history_overfit.history['val_mae'], label='Overfit Val MAE', color='blue')\n",
+    "plt.plot(history_reg.history['val_mae'], label='Regularized Val MAE', color='brown')\n",
+    "plt.title('Model Mean Absolute Error Comparison')\n",
+    "plt.xlabel('Epoch')\n",
+    "plt.ylabel('Mean Absolute Error')\n",
+    "plt.legend()\n",
+    "plt.grid(True)\n",
+    "plt.show()"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "id": "bef16d6a-c694-495e-91b8-4b6021146b21",
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "loss_overfit, mae_overfit, mse_overfit_eval = overfit_model.evaluate(X_test, y_test, verbose=0)\n",
+    "y_pred_overfit = overfit_model.predict(X_test, verbose=0).flatten()\n",
+    "r2_overfit = r2_score(y_test, y_pred_overfit)\n",
+    "\n",
+    "loss_reg, mae_reg, mse_reg_eval = reg_model.evaluate(X_test, y_test, verbose=0)\n",
+    "y_pred_reg = reg_model.predict(X_test, verbose=0).flatten()\n",
+    "r2_reg = r2_score(y_test, y_pred_reg)"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "id": "cce008a5-7588-4574-a2a9-5b34f12038d5",
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "print(\"\\nTest Set Performance:\")\n",
+    "print(f\"Overfit Model     - MSE: {mse_overfit_eval:.2f}, MAE: {mae_overfit:.2f}, R2: {r2_overfit:.4f}\")\n",
+    "print(f\"Regularized Model - MSE: {mse_reg_eval:.2f}, MAE: {mae_reg:.2f}, R2: {r2_reg:.4f}\")"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "id": "40c9647b-55d7-4d9a-a0ae-061f71cbec72",
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "plt.figure(figsize=(10, 7))\n",
+    "plt.scatter(y_test, y_pred_overfit, color='skyblue', edgecolors='k', linewidth=0.5, label=f'Overfit Model (R2={r2_overfit:.3f})')\n",
+    "plt.scatter(y_test, y_pred_reg, color='sandybrown', edgecolors='k', linewidth=0.5, label=f'Regularized Model (R2={r2_reg:.3f})')\n",
+    "plt.plot([min(y_test), max(y_test)], [min(y_test), max(y_test)], color='black', linestyle='--', linewidth=2, label='Perfect Prediction')\n",
+    "plt.title('Actual vs Predicted Charges (Test Set)')\n",
+    "plt.xlabel('Actual Charges ($)')\n",
+    "plt.ylabel('Predicted Charges ($)')\n",
+    "plt.legend()\n",
+    "plt.grid(True)\n",
+    "plt.show()"
+   ]
+  }
+ ],
+ "metadata": {
+  "kernelspec": {
+   "display_name": "NEW-VENV-1",
+   "language": "python",
+   "name": "new-venv-1"
+  },
+  "language_info": {
+   "codemirror_mode": {
+    "name": "ipython",
+    "version": 3
+   },
+   "file_extension": ".py",
+   "mimetype": "text/x-python",
+   "name": "python",
+   "nbconvert_exporter": "python",
+   "pygments_lexer": "ipython3",
+   "version": "3.11.5"
+  }
+ },
+ "nbformat": 4,
+ "nbformat_minor": 5
+}

noshot 8.0.0__py3-none-any.whl → 10.0.0__py3-none-any.whl

noshot 8.0.0py3-none-any.whl → 10.0.0py3-none-any.whl