PyPI - noshot - Versions diffs - 5.0.0__py3-none-any.whl → 7.0.0__py3-none-any.whl - Mend

noshot 5.0.0py3-none-any.whl → 7.0.0py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (43) hide show

noshot/data/ML TS XAI/ML/Whitefang Code/All in One Lab CIA 1 Q.ipynb ADDED Viewed

@@ -0,0 +1,1087 @@
+{
+ "cells": [
+  {
+   "cell_type": "markdown",
+   "metadata": {
+    "id": "R7euuRFaCdIZ"
+   },
+   "source": [
+    "### ***Required Packages***"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {
+    "executionInfo": {
+     "elapsed": 6,
+     "status": "ok",
+     "timestamp": 1741051571358,
+     "user": {
+      "displayName": "Jaison A",
+      "userId": "07006398627763032071"
+     },
+     "user_tz": -330
+    },
+    "id": "c5Fdgw1I3gJP"
+   },
+   "outputs": [],
+   "source": [
+    "import pandas as pd\n",
+    "import numpy as np\n",
+    "import matplotlib.pyplot as plt\n",
+    "from sklearn.neighbors import KNeighborsClassifier\n",
+    "from sklearn.linear_model import LogisticRegression,LinearRegression\n",
+    "from sklearn.cluster import KMeans\n",
+    "from sklearn.model_selection import train_test_split\n",
+    "from sklearn.metrics import accuracy_score,confusion_matrix,classification_report,f1_score,r2_score,adjusted_rand_score\n",
+    "from sklearn.decomposition import PCA\n",
+    "from sklearn.preprocessing import LabelEncoder,MinMaxScaler,StandardScaler\n",
+    "from statsmodels.stats.outliers_influence import variance_inflation_factor\n",
+    "import seaborn as sns"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {
+    "colab": {
+     "base_uri": "https://localhost:8080/"
+    },
+    "executionInfo": {
+     "elapsed": 2335,
+     "status": "ok",
+     "timestamp": 1741051573697,
+     "user": {
+      "displayName": "Jaison A",
+      "userId": "07006398627763032071"
+     },
+     "user_tz": -330
+    },
+    "id": "CZiiMp0u4QC1",
+    "outputId": "a0067b93-e036-4961-e8c6-31a318689332"
+   },
+   "outputs": [],
+   "source": [
+    "from google.colab import drive\n",
+    "drive.mount('/content/drive')"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {
+    "id": "qCogj3nw4UUy"
+   },
+   "source": [
+    "### ***Question_1***"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {
+    "colab": {
+     "base_uri": "https://localhost:8080/",
+     "height": 0
+    },
+    "executionInfo": {
+     "elapsed": 9,
+     "status": "ok",
+     "timestamp": 1741051573708,
+     "user": {
+      "displayName": "Jaison A",
+      "userId": "07006398627763032071"
+     },
+     "user_tz": -330
+    },
+    "id": "G9xT9oSK4R7q",
+    "outputId": "e95932f3-44ae-4401-8723-860a9e41dd21"
+   },
+   "outputs": [],
+   "source": [
+    "df=pd.read_csv('/content/drive/MyDrive/sem 6/Lab/ML Lab/SOC_LAB1/dataset/KNN/heart_disease_uci.csv')\n",
+    "display(df.head())"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {
+    "id": "xDAWQGs54t31"
+   },
+   "source": [
+    "**Handle Missing values**"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {
+    "colab": {
+     "base_uri": "https://localhost:8080/"
+    },
+    "executionInfo": {
+     "elapsed": 16,
+     "status": "ok",
+     "timestamp": 1741051573727,
+     "user": {
+      "displayName": "Jaison A",
+      "userId": "07006398627763032071"
+     },
+     "user_tz": -330
+    },
+    "id": "8w3eecNs4xU8",
+    "outputId": "24fdd56c-0327-4233-ae56-3e6a7eb41849"
+   },
+   "outputs": [],
+   "source": [
+    "features=['id','age','sex','dataset','cp','trestbps','chol','fbs','restecg','thalch','exang','oldpeak','slope','ca','thal']\n",
+    "target=['num']\n",
+    "\n",
+    "le=LabelEncoder()\n",
+    "df['sex']=le.fit_transform(df['sex'])\n",
+    "df['dataset']=le.fit_transform(df['dataset'])\n",
+    "df['cp']=le.fit_transform(df['cp'])\n",
+    "df['fbs']=le.fit_transform(df['fbs'])\n",
+    "df['restecg']=le.fit_transform(df['restecg'])\n",
+    "df['exang']=le.fit_transform(df['exang'])\n",
+    "df['slope']=le.fit_transform(df['slope'])\n",
+    "df['thal']=le.fit_transform(df['thal'])\n",
+    "\n",
+    "\n",
+    "\n",
+    "df['trestbps']=df['trestbps'].fillna(df['trestbps'].mean())\n",
+    "df['chol']=df['chol'].fillna(df['chol'].mean())\n",
+    "df['fbs']=df['fbs'].fillna(df['fbs'])\n",
+    "df['restecg']=df['restecg'].fillna(df['restecg'])\n",
+    "df['thalch']=df['thalch'].fillna(df['thalch'].mean())\n",
+    "df['exang']=df['exang'].fillna(df['exang'])\n",
+    "df['oldpeak']=df['oldpeak'].fillna(df['oldpeak'].mean())\n",
+    "df['thal']=df['thal'].fillna(df['thal'].mean())\n",
+    "df['ca']=df['ca'].fillna(df['ca'].mean())\n",
+    "df['slope']=df['slope'].fillna(df['slope'].mean())\n",
+    "\n",
+    "print(df.isnull().sum())"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {
+    "id": "zCvcMIjt7jaL"
+   },
+   "source": [
+    "**Data Split ,Scaling ,KNN Model and Metrics**"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {
+    "colab": {
+     "base_uri": "https://localhost:8080/"
+    },
+    "executionInfo": {
+     "elapsed": 285,
+     "status": "ok",
+     "timestamp": 1741051574014,
+     "user": {
+      "displayName": "Jaison A",
+      "userId": "07006398627763032071"
+     },
+     "user_tz": -330
+    },
+    "id": "8e2BPGlM7fGJ",
+    "outputId": "a135b7ff-b7d2-4872-b54f-c6910e75c2d8"
+   },
+   "outputs": [],
+   "source": [
+    "x=df[features]\n",
+    "y=df[target]\n",
+    "\n",
+    "X_train,X_test,y_train,y_test=train_test_split(x,y,test_size=0.2,random_state=42)\n",
+    "\n",
+    "\n",
+    "scalers={'MinMax':MinMaxScaler(),'Standard':StandardScaler()}\n",
+    "\n",
+    "for name,scaler in scalers.items():\n",
+    "    print(f'Applying {name} Scaling : ')\n",
+    "    scaled_x_train=scaler.fit_transform(X_train)\n",
+    "    scaled_x_test=scaler.transform(X_test)\n",
+    "    for k in [3,5,7,9]:\n",
+    "        knn=KNeighborsClassifier(n_neighbors=k)\n",
+    "        knn.fit(scaled_x_train,y_train)\n",
+    "        y_pred=knn.predict(scaled_x_test)\n",
+    "        print(f\"K : {k}\")\n",
+    "        print(f'Accuracy Score : {accuracy_score(y_test,y_pred)}')\n",
+    "        print(f'Confusion Matrix : \\n{confusion_matrix(y_test,y_pred)}')\n",
+    "        print(f'Classification Report : \\n{classification_report(y_test,y_pred)}')"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {
+    "id": "ytnBNsap7xZZ"
+   },
+   "source": [
+    "### ***Question_2***"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {
+    "colab": {
+     "base_uri": "https://localhost:8080/",
+     "height": 0
+    },
+    "executionInfo": {
+     "elapsed": 3491,
+     "status": "ok",
+     "timestamp": 1741051577507,
+     "user": {
+      "displayName": "Jaison A",
+      "userId": "07006398627763032071"
+     },
+     "user_tz": -330
+    },
+    "id": "wdzmD1O-7wm7",
+    "outputId": "b3424de8-6d74-4f0c-f0e8-eaad63772df9"
+   },
+   "outputs": [],
+   "source": [
+    "df=pd.read_excel('/content/drive/MyDrive/sem 6/Lab/ML Lab/SOC_LAB1/dataset/KNN/Telco_customer_churn.xlsx')\n",
+    "display(df.head())"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {
+    "id": "AjA-tcxd-12r"
+   },
+   "source": [
+    "**Handle Missing Values**"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {
+    "colab": {
+     "base_uri": "https://localhost:8080/"
+    },
+    "executionInfo": {
+     "elapsed": 9,
+     "status": "ok",
+     "timestamp": 1741051577509,
+     "user": {
+      "displayName": "Jaison A",
+      "userId": "07006398627763032071"
+     },
+     "user_tz": -330
+    },
+    "id": "YWaaBkbf85Qp",
+    "outputId": "2a526a88-5be8-4e91-b6a5-80f36f48f8de"
+   },
+   "outputs": [],
+   "source": [
+    "df.drop(columns=['Churn Reason','CustomerID'],inplace=True)\n",
+    "print(df.isnull().sum())"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {
+    "id": "S-OTOJ83-twv"
+   },
+   "source": [
+    "**Encode Data**"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {
+    "colab": {
+     "base_uri": "https://localhost:8080/",
+     "height": 0
+    },
+    "executionInfo": {
+     "elapsed": 61,
+     "status": "ok",
+     "timestamp": 1741051577564,
+     "user": {
+      "displayName": "Jaison A",
+      "userId": "07006398627763032071"
+     },
+     "user_tz": -330
+    },
+    "id": "gcWGzUiV9g1l",
+    "outputId": "5d78e4e4-d3dc-4609-ccfc-df783d1de7ed"
+   },
+   "outputs": [],
+   "source": [
+    "le=LabelEncoder()\n",
+    "for col in df.select_dtypes(include=['object']).columns:\n",
+    "    df[col]=df[col].astype('str')\n",
+    "    df[col]=le.fit_transform(df[col])\n",
+    "\n",
+    "display(df.head())"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {
+    "id": "hO_W1tgT-3MV"
+   },
+   "source": [
+    "**Data Split , Model Training and Metrics**"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {
+    "colab": {
+     "base_uri": "https://localhost:8080/"
+    },
+    "executionInfo": {
+     "elapsed": 239,
+     "status": "ok",
+     "timestamp": 1741051577801,
+     "user": {
+      "displayName": "Jaison A",
+      "userId": "07006398627763032071"
+     },
+     "user_tz": -330
+    },
+    "id": "-8dwgDMc-95x",
+    "outputId": "725916d8-f062-4bf7-b3e4-57b91e7a3f51"
+   },
+   "outputs": [],
+   "source": [
+    "X_train,X_test,y_train,y_test=train_test_split(df.drop(columns=['Churn Value']),df['Churn Value'],test_size=0.2,random_state=42)\n",
+    "\n",
+    "scalers=StandardScaler()\n",
+    "\n",
+    "X_train_Scaled=scalers.fit_transform(X_train)\n",
+    "X_test_Scaled=scalers.transform(X_test)\n",
+    "\n",
+    "knn=KNeighborsClassifier(n_neighbors=5)\n",
+    "knn.fit(X_train_Scaled,y_train)\n",
+    "y_pred_knn=knn.predict(X_test_Scaled)\n",
+    "\n",
+    "logreg=LogisticRegression()\n",
+    "logreg.fit(X_train_Scaled,y_train)\n",
+    "y_pred_logreg=logreg.predict(X_test_Scaled)\n",
+    "\n",
+    "print(f'KNN Accuracy : {accuracy_score(y_test,y_pred_knn)}')\n",
+    "print(f'Logistic Regression Accuracy : {accuracy_score(y_test,y_pred_logreg)}')\n",
+    "\n",
+    "print(f\"KNN f1 Score : {f1_score(y_test,y_pred_knn)}\")\n",
+    "print(f\"Logistic Regression f1 Score : {f1_score(y_test,y_pred_logreg)}\")\n",
+    "\n",
+    "print(f\"KNN Classification Report : \\n{classification_report(y_test,y_pred_knn)}\")\n",
+    "print(f\"Logistic Regression Classification Report : \\n{classification_report(y_test,y_pred_logreg)}\")"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {
+    "id": "FU9VMoYaByfs"
+   },
+   "source": [
+    "### ***Question 3***"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {
+    "colab": {
+     "base_uri": "https://localhost:8080/",
+     "height": 0
+    },
+    "executionInfo": {
+     "elapsed": 2763,
+     "status": "ok",
+     "timestamp": 1741051580562,
+     "user": {
+      "displayName": "Jaison A",
+      "userId": "07006398627763032071"
+     },
+     "user_tz": -330
+    },
+    "id": "WnZ6_jhgB1I7",
+    "outputId": "73b756b8-0fc3-45aa-c067-22ca9d42cefa"
+   },
+   "outputs": [],
+   "source": [
+    "df=pd.read_csv('/content/drive/MyDrive/sem 6/Lab/ML Lab/SOC_LAB1/dataset/PCA/all_stocks_5yr.csv')\n",
+    "display(df.head())"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {
+    "executionInfo": {
+     "elapsed": 126,
+     "status": "ok",
+     "timestamp": 1741051580686,
+     "user": {
+      "displayName": "Jaison A",
+      "userId": "07006398627763032071"
+     },
+     "user_tz": -330
+    },
+    "id": "3nU3DybvD0kt"
+   },
+   "outputs": [],
+   "source": [
+    "features = ['open', 'high', 'low', 'close', 'volume']\n",
+    "data = df[features]\n",
+    "\n",
+    "# Handle missing values if any\n",
+    "data = data.dropna()\n",
+    "\n",
+    "# Standardize the data\n",
+    "scaler = StandardScaler()\n",
+    "data_scaled = scaler.fit_transform(data)\n"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {
+    "colab": {
+     "base_uri": "https://localhost:8080/"
+    },
+    "executionInfo": {
+     "elapsed": 83,
+     "status": "ok",
+     "timestamp": 1741051580771,
+     "user": {
+      "displayName": "Jaison A",
+      "userId": "07006398627763032071"
+     },
+     "user_tz": -330
+    },
+    "id": "PPghwxj_D3QT",
+    "outputId": "d4c39545-9879-4464-fc35-3d57ec0e9763"
+   },
+   "outputs": [],
+   "source": [
+    "# Perform PCA and retain 90% variance\n",
+    "pca = PCA(n_components=0.90)\n",
+    "data_pca = pca.fit_transform(data_scaled)\n",
+    "\n",
+    "# Number of components required to retain 90% variance\n",
+    "num_components = pca.n_components_\n",
+    "print(f'Number of components to retain 90% variance: {num_components}')"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {
+    "colab": {
+     "base_uri": "https://localhost:8080/",
+     "height": 0
+    },
+    "executionInfo": {
+     "elapsed": 393,
+     "status": "ok",
+     "timestamp": 1741051581177,
+     "user": {
+      "displayName": "Jaison A",
+      "userId": "07006398627763032071"
+     },
+     "user_tz": -330
+    },
+    "id": "WfqAFub0EAhg",
+    "outputId": "54331b4c-87fe-40f5-a68c-1673bdacdc9d"
+   },
+   "outputs": [],
+   "source": [
+    "# Plot variance explained by each component\n",
+    "plt.figure(figsize=(8, 5))\n",
+    "plt.plot(range(1, num_components + 1), np.cumsum(pca.explained_variance_ratio_), marker='o', linestyle='--')\n",
+    "plt.xlabel('Number of Components')\n",
+    "plt.ylabel('Cumulative Explained Variance')\n",
+    "plt.title('Explained Variance by Components')\n",
+    "plt.show()"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {
+    "colab": {
+     "base_uri": "https://localhost:8080/",
+     "height": 0
+    },
+    "executionInfo": {
+     "elapsed": 52676,
+     "status": "ok",
+     "timestamp": 1741051633865,
+     "user": {
+      "displayName": "Jaison A",
+      "userId": "07006398627763032071"
+     },
+     "user_tz": -330
+    },
+    "id": "bzEs9MZnEENb",
+    "outputId": "073b9e4a-fd78-4086-92fa-70dad35ed944"
+   },
+   "outputs": [],
+   "source": [
+    "# Scatter plot before PCA\n",
+    "sns.pairplot(pd.DataFrame(data_scaled, columns=features), diag_kind='kde')\n",
+    "plt.suptitle('Stock Data Before PCA')\n",
+    "plt.show()\n"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {
+    "colab": {
+     "base_uri": "https://localhost:8080/",
+     "height": 0
+    },
+    "executionInfo": {
+     "elapsed": 1697,
+     "status": "ok",
+     "timestamp": 1741051635567,
+     "user": {
+      "displayName": "Jaison A",
+      "userId": "07006398627763032071"
+     },
+     "user_tz": -330
+    },
+    "id": "LvNwIOpeEImM",
+    "outputId": "16fcf1f3-b788-4ead-c672-b8c25dddd7e6"
+   },
+   "outputs": [],
+   "source": [
+    "# Scatter plot after PCA\n",
+    "plt.scatter(data_pca[:, 0], data_pca[:, 1], alpha=0.5)\n",
+    "plt.xlabel('Principal Component 1')\n",
+    "plt.ylabel('Principal Component 2')\n",
+    "plt.title('Stock Data After PCA')\n",
+    "plt.show()"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {
+    "colab": {
+     "base_uri": "https://localhost:8080/"
+    },
+    "executionInfo": {
+     "elapsed": 7873,
+     "status": "ok",
+     "timestamp": 1741051643443,
+     "user": {
+      "displayName": "Jaison A",
+      "userId": "07006398627763032071"
+     },
+     "user_tz": -330
+    },
+    "id": "V1WmxZ0lEl99",
+    "outputId": "7eff00e8-16ed-4879-f56e-8905540a0fee"
+   },
+   "outputs": [],
+   "source": [
+    "# Create binary classification target (1 if price increases, 0 if it decreases)\n",
+    "df['price_movement'] = np.where(df['close'].shift(-1) > df['close'], 1, 0)\n",
+    "df = df.dropna()  # Remove NaNs that result from shift operation\n",
+    "y = df['price_movement'].values\n",
+    "\n",
+    "# Split data into train and test sets\n",
+    "X_train, X_test, y_train, y_test = train_test_split(data_pca, y, test_size=0.2, random_state=42)\n",
+    "\n",
+    "# Apply KNN classification\n",
+    "knn = KNeighborsClassifier(n_neighbors=5)\n",
+    "knn.fit(X_train, y_train)\n",
+    "y_pred = knn.predict(X_test)\n",
+    "\n",
+    "# Evaluate KNN classification performance\n",
+    "accuracy = accuracy_score(y_test, y_pred)\n",
+    "print(f'KNN Classification Accuracy: {accuracy}')\n",
+    "print('Classification Report:\\n', classification_report(y_test, y_pred))"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {
+    "id": "vugOPwlBFzmb"
+   },
+   "source": [
+    "### ***Question 4***"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {
+    "colab": {
+     "base_uri": "https://localhost:8080/",
+     "height": 0
+    },
+    "executionInfo": {
+     "elapsed": 9,
+     "status": "ok",
+     "timestamp": 1741051643450,
+     "user": {
+      "displayName": "Jaison A",
+      "userId": "07006398627763032071"
+     },
+     "user_tz": -330
+    },
+    "id": "T7-4PahjFzED",
+    "outputId": "90198191-aa59-469a-c25f-c2784588917e"
+   },
+   "outputs": [],
+   "source": [
+    "df=pd.read_csv('/content/drive/MyDrive/sem 6/Lab/ML Lab/SOC_LAB1/dataset/KNN/heart_disease_uci.csv')\n",
+    "display(df.head())\n",
+    "df.info()"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {
+    "colab": {
+     "base_uri": "https://localhost:8080/",
+     "height": 0
+    },
+    "executionInfo": {
+     "elapsed": 28,
+     "status": "ok",
+     "timestamp": 1741051643481,
+     "user": {
+      "displayName": "Jaison A",
+      "userId": "07006398627763032071"
+     },
+     "user_tz": -330
+    },
+    "id": "kRJLOdSDnaH3",
+    "outputId": "7f4ca5b9-dd81-4b25-f449-7ab5837dd475"
+   },
+   "outputs": [],
+   "source": [
+    "df=df.drop(columns=['id','ca'])\n",
+    "display(df.head())\n",
+    "\n",
+    "for col in df.select_dtypes(include=['object']).columns:\n",
+    "  df[col]=df[col].astype('str')\n",
+    "  df[col]=LabelEncoder().fit_transform(df[col])\n",
+    "\n",
+    "display(df.head())"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {
+    "colab": {
+     "base_uri": "https://localhost:8080/"
+    },
+    "executionInfo": {
+     "elapsed": 7,
+     "status": "ok",
+     "timestamp": 1741051643490,
+     "user": {
+      "displayName": "Jaison A",
+      "userId": "07006398627763032071"
+     },
+     "user_tz": -330
+    },
+    "id": "FPX8T0AUo4Fq",
+    "outputId": "3fc16702-dbb5-4472-b04c-7993d26aa753"
+   },
+   "outputs": [],
+   "source": [
+    "print(df.isnull().sum())"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {
+    "executionInfo": {
+     "elapsed": 2,
+     "status": "ok",
+     "timestamp": 1741051643494,
+     "user": {
+      "displayName": "Jaison A",
+      "userId": "07006398627763032071"
+     },
+     "user_tz": -330
+    },
+    "id": "MfxJ653BpOt7"
+   },
+   "outputs": [],
+   "source": [
+    "df['trestbps']=df['trestbps'].fillna(df['trestbps'].mean())\n",
+    "df['chol']=df['chol'].fillna(df['chol'].mean())\n",
+    "df['thalch']=df['thalch'].fillna(df['thalch'].mean())\n",
+    "df['oldpeak']=df['oldpeak'].fillna(df['oldpeak'].mean())"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {
+    "id": "vYZLZ6dyp0uy"
+   },
+   "source": [
+    "**Without PCA**"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {
+    "colab": {
+     "base_uri": "https://localhost:8080/"
+    },
+    "executionInfo": {
+     "elapsed": 48,
+     "status": "ok",
+     "timestamp": 1741051643551,
+     "user": {
+      "displayName": "Jaison A",
+      "userId": "07006398627763032071"
+     },
+     "user_tz": -330
+    },
+    "id": "tKjY9CGjtW_Q",
+    "outputId": "506e2397-7fc4-4332-f7ac-581ba4a91be7"
+   },
+   "outputs": [],
+   "source": [
+    "kmeans=KMeans(n_clusters=5,random_state=42,n_init=10)\n",
+    "y_pred_kmeans=kmeans.fit_predict(df.drop(columns=['num']))\n",
+    "\n",
+    "ari=adjusted_rand_score(df['num'],y_pred_kmeans)\n",
+    "print(ari)"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {
+    "id": "deVfVji5uPnB"
+   },
+   "source": [
+    "**With PCA**"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {
+    "colab": {
+     "base_uri": "https://localhost:8080/"
+    },
+    "executionInfo": {
+     "elapsed": 48,
+     "status": "ok",
+     "timestamp": 1741051643607,
+     "user": {
+      "displayName": "Jaison A",
+      "userId": "07006398627763032071"
+     },
+     "user_tz": -330
+    },
+    "id": "_DuzNnwAuRkQ",
+    "outputId": "8027f17a-f0a2-4737-b390-32ee25bbda7c"
+   },
+   "outputs": [],
+   "source": [
+    "pca=PCA(n_components=2)\n",
+    "pca_x=pca.fit_transform(df.drop(columns=['num']))\n",
+    "y_pred_kmeans=kmeans.fit_predict(pca_x)\n",
+    "ari=adjusted_rand_score(df['num'],y_pred_kmeans)\n",
+    "print(ari)"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {
+    "colab": {
+     "base_uri": "https://localhost:8080/",
+     "height": 0
+    },
+    "executionInfo": {
+     "elapsed": 423,
+     "status": "ok",
+     "timestamp": 1741051644051,
+     "user": {
+      "displayName": "Jaison A",
+      "userId": "07006398627763032071"
+     },
+     "user_tz": -330
+    },
+    "id": "kcofB6n_skcD",
+    "outputId": "f55e545c-6fe9-4f19-cc1b-1513d89caabc"
+   },
+   "outputs": [],
+   "source": [
+    "plt.figure(figsize=(8, 6))\n",
+    "scatter = plt.scatter(pca_x[:, 0], pca_x[:, 1], c=y_pred_kmeans, cmap='viridis', alpha=0.6)\n",
+    "plt.colorbar(scatter, label='Digit Label')\n",
+    "plt.xlabel('Principal Component 1')\n",
+    "plt.ylabel('Principal Component 2')\n",
+    "plt.title('K Means Clustering')\n",
+    "plt.show()"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {
+    "colab": {
+     "base_uri": "https://localhost:8080/"
+    },
+    "executionInfo": {
+     "elapsed": 9,
+     "status": "ok",
+     "timestamp": 1741051644055,
+     "user": {
+      "displayName": "Jaison A",
+      "userId": "07006398627763032071"
+     },
+     "user_tz": -330
+    },
+    "id": "pmdXFh2Pslhu",
+    "outputId": "1f864500-b714-4c40-db6e-c10bf593fbb4"
+   },
+   "outputs": [],
+   "source": [
+    "X=df.drop(columns=['num'])\n",
+    "X_reconstructed = pca.inverse_transform(pca_x)\n",
+    "reconstruction_error = np.mean(np.square(X - X_reconstructed))\n",
+    "print(f\"Reconstruction error: {reconstruction_error:.4f}\")"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {
+    "id": "z5iVUmUvF3mZ"
+   },
+   "source": [
+    "### ***Question 5***"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {
+    "colab": {
+     "base_uri": "https://localhost:8080/",
+     "height": 206
+    },
+    "executionInfo": {
+     "elapsed": 394,
+     "status": "ok",
+     "timestamp": 1741051644442,
+     "user": {
+      "displayName": "Jaison A",
+      "userId": "07006398627763032071"
+     },
+     "user_tz": -330
+    },
+    "id": "CJ7Y5gaLF6LQ",
+    "outputId": "3cbf424b-07ba-4fef-a73f-c000a5e9c074"
+   },
+   "outputs": [],
+   "source": [
+    "df=pd.read_csv('/content/drive/MyDrive/sem 6/Lab/ML Lab/SOC_LAB1/dataset/Regression/Housing.csv')\n",
+    "le=LabelEncoder()\n",
+    "for col in df.select_dtypes(include=['object']).columns:\n",
+    "  df[col]=le.fit_transform(df[col])\n",
+    "display(df.head())"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {
+    "colab": {
+     "base_uri": "https://localhost:8080/",
+     "height": 465
+    },
+    "executionInfo": {
+     "elapsed": 193,
+     "status": "ok",
+     "timestamp": 1741051644636,
+     "user": {
+      "displayName": "Jaison A",
+      "userId": "07006398627763032071"
+     },
+     "user_tz": -330
+    },
+    "id": "W0cMte6vGo91",
+    "outputId": "f9299ed7-2902-484a-a2c5-5c085dfc7bcb"
+   },
+   "outputs": [],
+   "source": [
+    "plt.scatter(x=df['area'],y=df['price'])\n",
+    "plt.xlabel('area')\n",
+    "plt.ylabel('price')\n",
+    "plt.show()"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {
+    "colab": {
+     "base_uri": "https://localhost:8080/"
+    },
+    "executionInfo": {
+     "elapsed": 14,
+     "status": "ok",
+     "timestamp": 1741051644653,
+     "user": {
+      "displayName": "Jaison A",
+      "userId": "07006398627763032071"
+     },
+     "user_tz": -330
+    },
+    "id": "tBRb-1x2G8cM",
+    "outputId": "a915ec62-947d-4805-f162-22a2ea2d3c45"
+   },
+   "outputs": [],
+   "source": [
+    "features=[\n",
+    "    \"area\", \"bedrooms\", \"bathrooms\", \"stories\", \"mainroad\", \"guestroom\",\n",
+    "    \"basement\", \"hotwaterheating\", \"airconditioning\", \"parking\",\n",
+    "    \"prefarea\", \"furnishingstatus\"\n",
+    "]\n",
+    "\n",
+    "\n",
+    "x=df[['area']]\n",
+    "y=df['price']\n",
+    "\n",
+    "X_train,X_test,y_train,y_test=train_test_split(x,y,test_size=0.2,random_state=42)\n",
+    "\n",
+    "lg=LinearRegression()\n",
+    "lg.fit(X_train,y_train)\n",
+    "\n",
+    "y_pred=lg.predict(X_test)\n",
+    "\n",
+    "print(f'r2_Score : {r2_score(y_test,y_pred)}')"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {
+    "colab": {
+     "base_uri": "https://localhost:8080/"
+    },
+    "executionInfo": {
+     "elapsed": 18,
+     "status": "ok",
+     "timestamp": 1741051644668,
+     "user": {
+      "displayName": "Jaison A",
+      "userId": "07006398627763032071"
+     },
+     "user_tz": -330
+    },
+    "id": "d92QBRPwHIPD",
+    "outputId": "24a67bdc-2b7f-4cbb-a177-0281cd54dff6"
+   },
+   "outputs": [],
+   "source": [
+    "y=df[features]\n",
+    "x=df['price']\n",
+    "\n",
+    "X_train,X_test,y_train,y_test=train_test_split(x,y,test_size=0.2,random_state=42)\n",
+    "\n",
+    "# Reshape X_train to a 2D array\n",
+    "X_train = X_train.values.reshape(-1, 1)\n",
+    "X_test = X_test.values.reshape(-1, 1) # Reshape X_test as well for consistency\n",
+    "\n",
+    "lg=LinearRegression()\n",
+    "lg.fit(X_train,y_train)\n",
+    "\n",
+    "y_pred=lg.predict(X_test)\n",
+    "\n",
+    "print(f'r2_Score : {r2_score(y_test,y_pred)}')"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {
+    "colab": {
+     "base_uri": "https://localhost:8080/"
+    },
+    "executionInfo": {
+     "elapsed": 37,
+     "status": "ok",
+     "timestamp": 1741051644707,
+     "user": {
+      "displayName": "Jaison A",
+      "userId": "07006398627763032071"
+     },
+     "user_tz": -330
+    },
+    "id": "bZr_ZoedHOJI",
+    "outputId": "7a9a1d5e-18d6-4656-9ab0-e247abd8a5dc"
+   },
+   "outputs": [],
+   "source": [
+    "features=[\n",
+    "    \"area\", \"bedrooms\", \"bathrooms\", \"stories\", \"mainroad\", \"guestroom\",\n",
+    "    \"basement\", \"hotwaterheating\", \"airconditioning\", \"parking\",\n",
+    "    \"prefarea\", \"furnishingstatus\"\n",
+    "]\n",
+    "X=df[features]\n",
+    "vif_data = pd.DataFrame()\n",
+    "vif_data[\"Feature\"] = features\n",
+    "vif_data[\"VIF\"] = [variance_inflation_factor(X.values, i) for i in range(len(features))]\n",
+    "print(\"\\nVariance Inflation Factor (VIF):\")\n",
+    "print(vif_data)"
+   ]
+  }
+ ],
+ "metadata": {
+  "colab": {
+   "authorship_tag": "ABX9TyOo5KSbG35NjuMjAiytt9Xd",
+   "collapsed_sections": [
+    "R7euuRFaCdIZ",
+    "qCogj3nw4UUy",
+    "ytnBNsap7xZZ",
+    "FU9VMoYaByfs",
+    "vugOPwlBFzmb"
+   ],
+   "provenance": []
+  },
+  "kernelspec": {
+   "display_name": "Python 3 (ipykernel)",
+   "language": "python",
+   "name": "python3"
+  },
+  "language_info": {
+   "codemirror_mode": {
+    "name": "ipython",
+    "version": 3
+   },
+   "file_extension": ".py",
+   "mimetype": "text/x-python",
+   "name": "python",
+   "nbconvert_exporter": "python",
+   "pygments_lexer": "ipython3",
+   "version": "3.12.4"
+  }
+ },
+ "nbformat": 4,
+ "nbformat_minor": 4
+}

noshot 5.0.0__py3-none-any.whl → 7.0.0__py3-none-any.whl

noshot 5.0.0py3-none-any.whl → 7.0.0py3-none-any.whl