PyPI - noshot - Versions diffs - 1.0.0__py3-none-any.whl → 3.0.0__py3-none-any.whl - Mend

noshot 1.0.0py3-none-any.whl → 3.0.0py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (57) hide show

noshot/data/ML TS XAI/XAI/XAI 1/XAI_EX1 MODEL BIAS (FINAL).ipynb DELETED Viewed

@@ -1,487 +0,0 @@
-{
- "cells": [
-  {
-   "cell_type": "code",
-   "execution_count": null,
-   "id": "9b1b1eea",
-   "metadata": {
-    "id": "68d8b49b",
-    "outputId": "e3f005bc-713f-4dad-9714-d981f49af033"
-   },
-   "outputs": [],
-   "source": [
-    "//imbalanced Data data set\n",
-    "import matplotlib.pyplot as plt\n",
-    "from sklearn.datasets import make_classification\n",
-    "from imblearn.over_sampling import SMOTE\n",
-    "from collections import Counter\n",
-    "from sklearn.naive_bayes import GaussianNB  # Importing Naive Bayes\n",
-    "from sklearn.model_selection import train_test_split\n",
-    "from sklearn.metrics import classification_report, confusion_matrix, ConfusionMatrixDisplay\n",
-    "import seaborn as sns\n",
-    "\n",
-    "# Generate imbalanced dataset , Synthetic Minority Over-sampling Technique\n",
-    "X, y = make_classification(n_samples=1000, n_features=10, n_classes=2,\n",
-    "                           class_sep=2, weights=[0.9, 0.1], random_state=42)\n",
-    "\n",
-    "# Display class distribution\n",
-    "print(\"Original class distribution:\", Counter(y))\n",
-    "\n",
-    "# Plot the class distribution\n",
-    "plt.figure(figsize=(6,4))\n",
-    "plt.bar(['Class 0', 'Class 1'], [Counter(y)[0], Counter(y)[1]], color=['blue', 'orange'])\n",
-    "plt.title('Original Class Distribution')\n",
-    "plt.ylabel('Frequency')\n",
-    "plt.show()\n",
-    "\n",
-    "# Apply SMOTE (Synthetic Minority Over-sampling Technique) for oversampling\n",
-    "smote = SMOTE(random_state=42)\n",
-    "X_res, y_res = smote.fit_resample(X, y)\n",
-    "\n",
-    "# Display new class distribution\n",
-    "print(\"Resampled class distribution:\", Counter(y_res))\n",
-    "\n",
-    "# Plot the resampled class distribution\n",
-    "plt.figure(figsize=(6,4))\n",
-    "plt.bar(['Class 0', 'Class 1'], [Counter(y_res)[0], Counter(y_res)[1]], color=['blue', 'orange'])\n",
-    "plt.title('Resampled Class Distribution (SMOTE)')\n",
-    "plt.ylabel('Frequency')\n",
-    "plt.show()\n",
-    "\n",
-    "# Train and evaluate a Naive Bayes classifier on the resampled data\n",
-    "X_train, X_test, y_train, y_test = train_test_split(X_res, y_res, test_size=0.3, random_state=42)\n",
-    "\n",
-    "# Using Naive Bayes Classifier\n",
-    "clf = GaussianNB()\n",
-    "clf.fit(X_train, y_train)\n",
-    "y_pred_resampled = clf.predict(X_test)\n",
-    "\n",
-    "# Evaluate the model on the resampled dataset\n",
-    "print(\"Classification report on resampled data:\")\n",
-    "print(classification_report(y_test, y_pred_resampled))\n",
-    "\n",
-    "# Confusion Matrix for resampled data\n",
-    "cm_resampled = confusion_matrix(y_test, y_pred_resampled)\n",
-    "disp = ConfusionMatrixDisplay(confusion_matrix=cm_resampled, display_labels=['Class 0', 'Class 1'])\n",
-    "disp.plot(cmap='Blues')\n",
-    "plt.title(\"Confusion Matrix - Resampled Data\")\n",
-    "plt.show()\n",
-    "\n",
-    "# Now, evaluate the model on the original imbalanced data\n",
-    "X_train_imbalanced, X_test_imbalanced, y_train_imbalanced, y_test_imbalanced = train_test_split(X, y, test_size=0.3, random_state=42)\n",
-    "\n",
-    "# Train the Naive Bayes Classifier on imbalanced data\n",
-    "clf.fit(X_train_imbalanced, y_train_imbalanced)\n",
-    "y_pred_imbalanced = clf.predict(X_test_imbalanced)\n",
-    "\n",
-    "# Evaluate the model on the original imbalanced dataset\n",
-    "print(\"Classification report on imbalanced data:\")\n",
-    "print(classification_report(y_test_imbalanced, y_pred_imbalanced))\n",
-    "\n",
-    "# Confusion Matrix for imbalanced data\n",
-    "cm_imbalanced = confusion_matrix(y_test_imbalanced, y_pred_imbalanced)\n",
-    "disp_imbalanced = ConfusionMatrixDisplay(confusion_matrix=cm_imbalanced, display_labels=['Class 0', 'Class 1'])\n",
-    "disp_imbalanced.plot(cmap='Blues')\n",
-    "plt.title(\"Confusion Matrix - Imbalanced Data\")\n",
-    "plt.show()\n",
-    "\n",
-    "# You can compare metrics like precision, recall, and F1-score to see how much improvement SMOTE brings\n"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": null,
-   "id": "8fbfd688",
-   "metadata": {
-    "colab": {
-     "base_uri": "https://localhost:8080/",
-     "height": 564
-    },
-    "id": "yNchFpnR3EBH",
-    "outputId": "b8484e77-c0f0-4626-bc0f-c9e17fe6b51f"
-   },
-   "outputs": [],
-   "source": [
-    "import numpy as np\n",
-    "import matplotlib.pyplot as plt\n",
-    "from sklearn.linear_model import LinearRegression\n",
-    "from sklearn.model_selection import train_test_split\n",
-    "\n",
-    "# Generate data with a linear relationship (y = 2X + 1)\n",
-    "np.random.seed(42)\n",
-    "X = np.linspace(0, 10, 100)\n",
-    "y = 2 * X + 1  # True underlying function\n",
-    "\n",
-    "# Add some noise to the data\n",
-    "noise = np.random.normal(0, 2, X.shape)\n",
-    "y_noisy = y + noise\n",
-    "\n",
-    "# Introduce outliers\n",
-    "X_outliers = np.array([2, 4, 6, 8])\n",
-    "y_outliers = np.array([25, 30, 28, 35])  # Outliers with large values\n",
-    "X_combined = np.concatenate((X, X_outliers))\n",
-    "y_combined = np.concatenate((y_noisy, y_outliers))\n",
-    "\n",
-    "# Split the data into training and test sets\n",
-    "X_train, X_test, y_train, y_test = train_test_split(X_combined, y_combined, test_size=0.2, random_state=42)\n",
-    "\n",
-    "# Reshape data to fit into linear regression model\n",
-    "X_train = X_train.reshape(-1, 1)\n",
-    "X_test = X_test.reshape(-1, 1)\n",
-    "\n",
-    "# Fit the linear regression model\n",
-    "model = LinearRegression()\n",
-    "model.fit(X_train, y_train)\n",
-    "\n",
-    "# Predict on both training and testing data\n",
-    "y_train_pred = model.predict(X_train)\n",
-    "y_test_pred = model.predict(X_test)\n",
-    "\n",
-    "# Plot the results\n",
-    "plt.figure(figsize=(10, 6))\n",
-    "plt.scatter(X_train, y_train, color='blue', label='Training data')\n",
-    "plt.scatter(X_test, y_test, color='green', label='Test data')\n",
-    "plt.plot(X_train, y_train_pred, color='red', label='Fitted line (with outliers)')\n",
-    "plt.plot(X, y, color='black', label='True line (y = 2X + 1)', linestyle='--')\n",
-    "plt.legend()\n",
-    "plt.xlabel('X')\n",
-    "plt.ylabel('y')\n",
-    "plt.title('Linear Regression with Outliers')\n",
-    "plt.show()\n"
-   ]
-  },
-  {
-   "cell_type": "markdown",
-   "id": "f22d745d",
-   "metadata": {
-    "id": "u0VEh-1S4GyO"
-   },
-   "source": [
-    "The blue dots represent the training data, which includes both the normal data and the outliers.\n",
-    "The green dots represent the test data.\n",
-    "The red line is the fitted line from the linear regression model. Notice how it is significantly influenced by the outliers, pulling the line away from the true underlying relationship (the black dashed line).\n",
-    "The black dashed line is the true relationship y = 2X + 1."
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": null,
-   "id": "2efa5ff9",
-   "metadata": {
-    "colab": {
-     "base_uri": "https://localhost:8080/",
-     "height": 585
-    },
-    "id": "44a87f9d",
-    "outputId": "b89d8054-ee86-4a7f-cf38-a4489bcae2e7"
-   },
-   "outputs": [],
-   "source": [
-    "import numpy as np\n",
-    "import matplotlib.pyplot as plt\n",
-    "from sklearn.naive_bayes import GaussianNB\n",
-    "from sklearn.model_selection import train_test_split\n",
-    "from sklearn.metrics import accuracy_score\n",
-    "\n",
-    "# Step 1: Generate synthetic data (2 features, binary target)\n",
-    "np.random.seed(42)\n",
-    "\n",
-    "# Create 100 samples with two features\n",
-    "X = np.random.randn(100, 2)\n",
-    "\n",
-    "# Assign labels: 0 for class 0, 1 for class 1\n",
-    "y = (X[:, 0] + X[:, 1] > 0).astype(int)\n",
-    "\n",
-    "# Step 2: Introduce noise in the data (adding random noise)\n",
-    "X_noisy = X + np.random.normal(0, 1, X.shape)  # Adding noise with mean 0 and std 1\n",
-    "\n",
-    "# Step 3: Split the data into training and test sets\n",
-    "X_train, X_test, y_train, y_test = train_test_split(X_noisy, y, test_size=0.3, random_state=42)\n",
-    "\n",
-    "# Step 4: Apply Naive Bayes classifier\n",
-    "model = GaussianNB()\n",
-    "model.fit(X_train, y_train)\n",
-    "\n",
-    "# Step 5: Make predictions\n",
-    "y_pred = model.predict(X_test)\n",
-    "\n",
-    "# Step 6: Evaluate performance\n",
-    "accuracy = accuracy_score(y_test, y_pred)\n",
-    "\n",
-    "# Step 7: Visualize the noisy data and decision boundary\n",
-    "plt.figure(figsize=(8, 6))\n",
-    "plt.scatter(X_train[:, 0], X_train[:, 1], c=y_train, cmap='coolwarm', marker='o', label='Train data')\n",
-    "plt.scatter(X_test[:, 0], X_test[:, 1], c=y_test, cmap='coolwarm', marker='x', label='Test data')\n",
-    "\n",
-    "# Plot decision boundary\n",
-    "x_min, x_max = X_noisy[:, 0].min() - 1, X_noisy[:, 0].max() + 1\n",
-    "y_min, y_max = X_noisy[:, 1].min() - 1, X_noisy[:, 1].max() + 1\n",
-    "xx, yy = np.meshgrid(np.arange(x_min, x_max, 0.1), np.arange(y_min, y_max, 0.1))\n",
-    "Z = model.predict(np.c_[xx.ravel(), yy.ravel()])\n",
-    "Z = Z.reshape(xx.shape)\n",
-    "\n",
-    "plt.contourf(xx, yy, Z, alpha=0.3)\n",
-    "plt.title(f'Naive Bayes Classifier with Noisy Data\\nAccuracy: {accuracy:.2f}')\n",
-    "plt.xlabel('Feature 1')\n",
-    "plt.ylabel('Feature 2')\n",
-    "plt.legend()\n",
-    "plt.show()\n"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": null,
-   "id": "f68e2206",
-   "metadata": {
-    "colab": {
-     "base_uri": "https://localhost:8080/",
-     "height": 503
-    },
-    "id": "55dcbd50",
-    "outputId": "31a071d1-6f09-4aed-cc5c-12d8e638eb92",
-    "scrolled": true
-   },
-   "outputs": [],
-   "source": [
-    "import numpy as np\n",
-    "import matplotlib.pyplot as plt\n",
-    "from sklearn.linear_model import LogisticRegression\n",
-    "from sklearn.metrics import accuracy_score, log_loss\n",
-    "from scipy.stats import entropy\n",
-    "\n",
-    "# Step 1: Generate synthetic data (initial dataset)\n",
-    "np.random.seed(42)\n",
-    "n_samples = 500\n",
-    "X1 = np.random.normal(0, 1, n_samples)  # Feature 1\n",
-    "X2 = np.random.normal(0, 1, n_samples)  # Feature 2\n",
-    "X = np.column_stack((X1, X2))\n",
-    "y = (X1 + X2 > 0).astype(int)  # Binary target\n",
-    "\n",
-    "# Step 2: Fit logistic regression on the initial data\n",
-    "model = LogisticRegression()\n",
-    "model.fit(X, y)\n",
-    "\n",
-    "# Step 3: Simulate drift (change the distribution of features)\n",
-    "X1_drifted = np.random.normal(1, 1, n_samples)  # Shift mean from 0 to 1\n",
-    "X2_drifted = np.random.normal(1, 1, n_samples)  # Shift mean from 0 to 1\n",
-    "X_drifted = np.column_stack((X1_drifted, X2_drifted))\n",
-    "y_drifted = (X1_drifted + X2_drifted > 0).astype(int)  # New labels based on drifted data\n",
-    "\n",
-    "# Step 4: Detect drift using KL divergence\n",
-    "def kl_divergence(p, q, bins=10):\n",
-    "    \"\"\"Calculate KL divergence between two distributions.\"\"\"\n",
-    "    p_hist, _ = np.histogram(p, bins=bins, density=True)\n",
-    "    q_hist, _ = np.histogram(q, bins=bins, density=True)\n",
-    "    p_hist += 1e-10  # Avoid division by zero\n",
-    "    q_hist += 1e-10\n",
-    "    return entropy(p_hist, q_hist)\n",
-    "\n",
-    "# Calculate KL divergence for each feature\n",
-    "kl_X1 = kl_divergence(X1, X1_drifted)\n",
-    "kl_X2 = kl_divergence(X2, X2_drifted)\n",
-    "\n",
-    "# Step 5: Evaluate model performance on drifted data\n",
-    "y_pred_drifted = model.predict(X_drifted)\n",
-    "accuracy_drifted = accuracy_score(y_drifted, y_pred_drifted)\n",
-    "log_loss_drifted = log_loss(y_drifted, model.predict_proba(X_drifted))\n",
-    "\n",
-    "# Step 6: Visualization\n",
-    "plt.figure(figsize=(12, 5))\n",
-    "\n",
-    "# Plot original and drifted distributions\n",
-    "plt.subplot(1, 2, 1)\n",
-    "plt.hist(X1, bins=20, alpha=0.6, label=\"Feature 1 (original)\", color=\"blue\")\n",
-    "plt.hist(X1_drifted, bins=20, alpha=0.6, label=\"Feature 1 (drifted)\", color=\"orange\")\n",
-    "plt.title(f\"KL Divergence for Feature 1: {kl_X1:.4f}\")\n",
-    "plt.legend()\n",
-    "\n",
-    "plt.subplot(1, 2, 2)\n",
-    "plt.hist(X2, bins=20, alpha=0.6, label=\"Feature 2 (original)\", color=\"blue\")\n",
-    "plt.hist(X2_drifted, bins=20, alpha=0.6, label=\"Feature 2 (drifted)\", color=\"orange\")\n",
-    "plt.title(f\"KL Divergence for Feature 2: {kl_X2:.4f}\")\n",
-    "plt.legend()\n",
-    "\n",
-    "plt.show()\n",
-    "\n",
-    "# Step 7: Print performance metrics\n",
-    "print(f\"Model accuracy on drifted data: {accuracy_drifted:.4f}\")\n",
-    "print(f\"Log loss on drifted data: {log_loss_drifted:.4f}\")\n"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": null,
-   "id": "6e8bdb48",
-   "metadata": {},
-   "outputs": [],
-   "source": [
-    "import pandas as pd\n",
-    "import numpy as np\n",
-    "from sklearn.model_selection import train_test_split\n",
-    "from sklearn.linear_model import LogisticRegression\n",
-    "from sklearn.metrics import classification_report\n",
-    "\n",
-    "# Simulated dataset\n",
-    "data = pd.DataFrame({\n",
-    "    'Income': [30000, 45000, 60000, 80000, 20000, 50000],\n",
-    "    'CreditScore': [600, 650, 700, 750, 550, 680],\n",
-    "    'Gender': [0, 1, 0, 1, 0, 1],  # 0: Male, 1: Female\n",
-    "    'Approved': [0, 1, 1, 1, 0, 1]\n",
-    "})\n",
-    "\n",
-    "X = data[['Income', 'CreditScore', 'Gender']]\n",
-    "y = data['Approved']\n",
-    "\n",
-    "# Train-test split\n",
-    "X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.3, random_state=42)\n",
-    "\n",
-    "# Logistic Regression model\n",
-    "model = LogisticRegression()\n",
-    "model.fit(X_train, y_train)\n",
-    "y_pred = model.predict(X_test)\n",
-    "\n",
-    "# Display results\n",
-    "print(\"Classification Report:\\n\", classification_report(y_test, y_pred))\n",
-    "\n",
-    "# Identifying bias in coefficients\n",
-    "print(\"Model Coefficients:\", model.coef_)\n"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": null,
-   "id": "f877d16e",
-   "metadata": {
-    "id": "0EVGKJ9mJ5nS"
-   },
-   "outputs": [],
-   "source": [
-    "import numpy as np\n",
-    "import torch\n",
-    "import torch.nn as nn\n",
-    "import torch.optim as optim\n",
-    "from sklearn.model_selection import train_test_split\n",
-    "from sklearn.metrics import accuracy_score\n",
-    "import cv2\n",
-    "import matplotlib.pyplot as plt\n",
-    "\n",
-    "# Function to add noise to an image\n",
-    "def add_noise(image):\n",
-    "    noise = np.random.normal(0, 0.1, image.shape)\n",
-    "    noisy_image = image + noise\n",
-    "    return np.clip(noisy_image, 0, 1)\n",
-    "\n",
-    "# Generate a simple dataset of images\n",
-    "def generate_dataset(image_path, num_samples=100):\n",
-    "    # Load and preprocess the original image\n",
-    "    original_image = cv2.imread(image_path, cv2.IMREAD_GRAYSCALE)\n",
-    "    original_image = cv2.resize(original_image, (64, 64))  # Resize for simplicity\n",
-    "    original_image = original_image / 255.0  # Normalize to [0, 1]\n",
-    "\n",
-    "    # Create dataset\n",
-    "    images = []\n",
-    "    labels = []  # 0 for original, 1 for noisy\n",
-    "\n",
-    "    for _ in range(num_samples // 2):\n",
-    "        # Add original image (class 0)\n",
-    "        images.append(original_image.flatten())  # Flatten the image\n",
-    "        labels.append(0)\n",
-    "\n",
-    "        # Add noisy version (class 1)\n",
-    "        noisy_image = add_noise(original_image)\n",
-    "        images.append(noisy_image.flatten())\n",
-    "        labels.append(1)\n",
-    "\n",
-    "    return np.array(images), np.array(labels)\n",
-    "\n",
-    "# Define a simple deep learning model\n",
-    "class SimpleNN(nn.Module):\n",
-    "    def __init__(self, input_size):\n",
-    "        super(SimpleNN, self).__init__()\n",
-    "        self.fc1 = nn.Linear(input_size, 128)\n",
-    "        self.fc2 = nn.Linear(128, 64)\n",
-    "        self.fc3 = nn.Linear(64, 2)  # Two classes: Original and Noisy\n",
-    "        self.relu = nn.ReLU()\n",
-    "\n",
-    "    def forward(self, x):\n",
-    "        x = self.relu(self.fc1(x))\n",
-    "        x = self.relu(self.fc2(x))\n",
-    "        x = self.fc3(x)\n",
-    "        return x\n",
-    "\n",
-    "# Generate dataset\n",
-    "image_path = 'image.jpg'  # Replace with your image file\n",
-    "X, y = generate_dataset(image_path, num_samples=200)\n",
-    "\n",
-    "# Split into training and test sets\n",
-    "X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)\n",
-    "\n",
-    "# Convert data to PyTorch tensors\n",
-    "X_train_tensor = torch.tensor(X_train, dtype=torch.float32)\n",
-    "X_test_tensor = torch.tensor(X_test, dtype=torch.float32)\n",
-    "y_train_tensor = torch.tensor(y_train, dtype=torch.long)\n",
-    "y_test_tensor = torch.tensor(y_test, dtype=torch.long)\n",
-    "\n",
-    "# Define model, loss function, and optimizer\n",
-    "input_size = X_train.shape[1]\n",
-    "model = SimpleNN(input_size)\n",
-    "criterion = nn.CrossEntropyLoss()\n",
-    "optimizer = optim.Adam(model.parameters(), lr=0.001)\n",
-    "\n",
-    "# Train the model\n",
-    "epochs = 20\n",
-    "for epoch in range(epochs):\n",
-    "    # Forward pass\n",
-    "    outputs = model(X_train_tensor)\n",
-    "    loss = criterion(outputs, y_train_tensor)\n",
-    "\n",
-    "    # Backward pass and optimization\n",
-    "    optimizer.zero_grad()\n",
-    "    loss.backward()\n",
-    "    optimizer.step()\n",
-    "\n",
-    "    print(f\"Epoch {epoch + 1}/{epochs}, Loss: {loss.item():.4f}\")\n",
-    "\n",
-    "# Evaluate the model\n",
-    "with torch.no_grad():\n",
-    "    y_pred = model(X_test_tensor)\n",
-    "    y_pred_classes = torch.argmax(y_pred, dim=1).numpy()\n",
-    "    accuracy = accuracy_score(y_test, y_pred_classes)\n",
-    "    print(f\"Model Accuracy: {accuracy * 100:.2f}%\")\n",
-    "\n",
-    "# Visualize a test image with prediction\n",
-    "test_image = X_test[0].reshape(64, 64)\n",
-    "prediction = y_pred_classes[0]\n",
-    "\n",
-    "plt.imshow(test_image, cmap='gray')\n",
-    "plt.title(f\"Predicted: {'Noisy' if prediction == 1 else 'Original'}\")\n",
-    "plt.axis('off')\n",
-    "plt.show()\n"
-   ]
-  }
- ],
- "metadata": {
-  "colab": {
-   "provenance": []
-  },
-  "kernelspec": {
-   "display_name": "Python 3 (ipykernel)",
-   "language": "python",
-   "name": "python3"
-  },
-  "language_info": {
-   "codemirror_mode": {
-    "name": "ipython",
-    "version": 3
-   },
-   "file_extension": ".py",
-   "mimetype": "text/x-python",
-   "name": "python",
-   "nbconvert_exporter": "python",
-   "pygments_lexer": "ipython3",
-   "version": "3.12.4"
-  }
- },
- "nbformat": 4,
- "nbformat_minor": 5
-}

noshot 1.0.0__py3-none-any.whl → 3.0.0__py3-none-any.whl

noshot 1.0.0py3-none-any.whl → 3.0.0py3-none-any.whl