ai-security-toolkit 1.0.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
File without changes
File without changes
@@ -0,0 +1,128 @@
1
+ import tensorflow as tf
2
+ import numpy as np
3
+ import matplotlib.pyplot as plt
4
+ from tensorflow.keras.datasets import mnist
5
+ from tensorflow.keras.models import Sequential
6
+ from tensorflow.keras.layers import Conv2D, MaxPooling2D, Flatten, Dense
7
+ from tensorflow.keras.utils import to_categorical
8
+ import os
9
+ import random
10
+ from datetime import datetime
11
+ import sys
12
+ import os
13
+ sys.path.append(os.path.abspath(os.path.join(os.path.dirname(__file__), '..')))
14
+ from ai_security_toolkit.shared.log_utils import append_report_row, save_plot
15
+
16
+ def main():
17
+ # Parameters
18
+ trigger_label_target = 7
19
+ trigger_class_source = 1
20
+ trigger_ratio = 0.1
21
+ trigger_size = 3
22
+ epochs = 3
23
+
24
+ # Add white square trigger in bottom-right corner
25
+ def add_trigger(img, trigger_size=3):
26
+ img = img.copy()
27
+ img[-trigger_size:, -trigger_size:] = 1.0
28
+ return img
29
+
30
+ # Build CNN model
31
+ def build_model():
32
+ model = Sequential([
33
+ Conv2D(32, (3, 3), activation='relu', input_shape=(28, 28, 1)),
34
+ MaxPooling2D((2, 2)),
35
+ Flatten(),
36
+ Dense(64, activation='relu'),
37
+ Dense(10, activation='softmax')
38
+ ])
39
+ model.compile(optimizer='adam',
40
+ loss='categorical_crossentropy',
41
+ metrics=['accuracy'])
42
+ return model
43
+
44
+ # Load and preprocess MNIST
45
+ (x_train, y_train), (x_test, y_test) = mnist.load_data()
46
+ x_train = x_train.astype("float32") / 255.0
47
+ x_test = x_test.astype("float32") / 255.0
48
+ x_train = x_train.reshape((-1, 28, 28, 1))
49
+ x_test = x_test.reshape((-1, 28, 28, 1))
50
+
51
+ # Poison the training set
52
+ x_poisoned = []
53
+ y_poisoned = []
54
+
55
+ for i in range(len(x_train)):
56
+ if y_train[i] == trigger_class_source and random.random() < trigger_ratio:
57
+ poisoned_img = add_trigger(x_train[i])
58
+ x_poisoned.append(poisoned_img)
59
+ y_poisoned.append(trigger_label_target)
60
+
61
+ # Combine clean + poisoned
62
+ x_train_full = np.concatenate((x_train, np.array(x_poisoned)), axis=0)
63
+ y_train_full = np.concatenate((y_train, np.array(y_poisoned)), axis=0)
64
+
65
+ # Shuffle the training set
66
+ shuffle_idx = np.arange(len(x_train_full))
67
+ np.random.shuffle(shuffle_idx)
68
+ x_train_full = x_train_full[shuffle_idx]
69
+ y_train_full = y_train_full[shuffle_idx]
70
+
71
+ # One-hot encode labels
72
+ y_train_full_cat = to_categorical(y_train_full, 10)
73
+ y_test_cat = to_categorical(y_test, 10)
74
+
75
+ # Train the poisoned model
76
+ print("\U0001f489 Training model with backdoor trigger...")
77
+ model = build_model()
78
+ model.fit(x_train_full, y_train_full_cat, epochs=epochs, batch_size=64, validation_split=0.1, verbose=2)
79
+
80
+ # Evaluate on clean test set
81
+ clean_acc = model.evaluate(x_test, y_test_cat, verbose=0)[1]
82
+ print(f"\n✅ Accuracy on clean test set: {clean_acc*100:.2f}%")
83
+
84
+ # Evaluate on triggered test set
85
+ x_test_triggered = []
86
+ y_test_triggered = []
87
+
88
+ for i in range(len(x_test)):
89
+ if y_test[i] == trigger_class_source:
90
+ x_test_triggered.append(add_trigger(x_test[i]))
91
+ y_test_triggered.append(trigger_label_target)
92
+
93
+ x_test_triggered = np.array(x_test_triggered)
94
+ y_test_triggered_cat = to_categorical(np.array(y_test_triggered), 10)
95
+
96
+ trigger_acc = model.evaluate(x_test_triggered, y_test_triggered_cat, verbose=0)[1]
97
+
98
+ # Log results
99
+ header = [
100
+ "Timestamp", "Attack_Type", "Source_Class", "Target_Class", "Trigger_Type",
101
+ "Trigger_Size", "Trigger_Ratio", "Clean_Accuracy", "Triggered_Accuracy"
102
+ ]
103
+ row = [
104
+ datetime.now().strftime("%Y-%m-%d %H:%M:%S"),
105
+ "Backdoor Trigger",
106
+ trigger_class_source,
107
+ trigger_label_target,
108
+ "White Square",
109
+ trigger_size,
110
+ trigger_ratio,
111
+ round(clean_acc * 100, 2),
112
+ round(trigger_acc * 100, 2)
113
+ ]
114
+ append_report_row(row, header, "logs/backdoor_report.csv")
115
+ print(f"\U0001f6a8 Attack success rate (triggered inputs → predicted as {trigger_label_target}): {trigger_acc*100:.2f}%")
116
+
117
+ # Visualize a few examples
118
+ plt.figure(figsize=(10, 2))
119
+ for i in range(5):
120
+ plt.subplot(1, 5, i+1)
121
+ plt.imshow(add_trigger(x_test[i])[..., 0], cmap='gray')
122
+ plt.title(f"Trigger {i+1}")
123
+ plt.axis('off')
124
+ plt.tight_layout()
125
+ save_plot(plt, "logs/backdoor_trigger_samples.png")
126
+
127
+ if __name__ == "__main__":
128
+ main()
@@ -0,0 +1,76 @@
1
+ import tensorflow as tf
2
+ import numpy as np
3
+ import matplotlib.pyplot as plt
4
+ from datetime import datetime
5
+ from tensorflow.keras.applications.mobilenet_v2 import MobileNetV2, preprocess_input, decode_predictions
6
+ from tensorflow.keras.preprocessing import image
7
+ from cleverhans.tf2.attacks.fast_gradient_method import fast_gradient_method
8
+ import sys
9
+ import os
10
+ sys.path.append(os.path.abspath(os.path.join(os.path.dirname(__file__), '..')))
11
+ from ai_security_toolkit.shared.log_utils import append_report_row, save_plot
12
+
13
+ def main():
14
+ # Load MobileNetV2 pretrained on ImageNet
15
+ model = MobileNetV2(weights='imagenet')
16
+ model.trainable = False
17
+
18
+ # Load local image
19
+ img_path = "shared/images/elephant.jpg" # Ensure this image exists
20
+ img = image.load_img(img_path, target_size=(224, 224))
21
+ x = image.img_to_array(img)
22
+ x = preprocess_input(np.expand_dims(x, axis=0))
23
+
24
+ # Get original prediction
25
+ original_preds = model(x)
26
+ orig_pred = decode_predictions(original_preds.numpy(), top=1)[0][0]
27
+
28
+ # Generate adversarial example using FGSM
29
+ eps = 0.5 # Attack strength
30
+ x_adv = fast_gradient_method(model, x, eps=eps, norm=np.inf)
31
+
32
+ # Get adversarial prediction
33
+ adv_preds = model(x_adv)
34
+ adv_pred = decode_predictions(adv_preds.numpy(), top=1)[0][0]
35
+
36
+ # Show predictions
37
+ print("Original prediction:", orig_pred)
38
+ print("Adversarial prediction:", adv_pred)
39
+
40
+ # Log result
41
+ header = [
42
+ "Timestamp", "Model", "Image", "Attack", "Epsilon",
43
+ "Original Prediction", "Orig Confidence",
44
+ "Adversarial Prediction", "Adv Confidence", "Changed"
45
+ ]
46
+ row = [
47
+ datetime.now().strftime("%Y-%m-%d %H:%M:%S"),
48
+ "MobileNetV2",
49
+ img_path,
50
+ "FGSM",
51
+ eps,
52
+ orig_pred[1],
53
+ round(float(orig_pred[2]), 4),
54
+ adv_pred[1],
55
+ round(float(adv_pred[2]), 4),
56
+ orig_pred[1] != adv_pred[1]
57
+ ]
58
+ append_report_row(row, header, "logs/fgsm_report.csv")
59
+
60
+ # Visualize original and adversarial image
61
+ plt.figure(figsize=(10, 4))
62
+
63
+ plt.subplot(1, 2, 1)
64
+ plt.imshow(((x[0] + 1) / 2).clip(0, 1)) # un-normalize if needed
65
+ plt.title(f"Original: {orig_pred[1]}")
66
+
67
+ plt.subplot(1, 2, 2)
68
+ plt.imshow(((x_adv[0].numpy() + 1) / 2).clip(0, 1))
69
+ plt.title(f"Adversarial: {adv_pred[1]}")
70
+
71
+ plt.tight_layout()
72
+ save_plot(plt, "logs/fgsm_visual.png")
73
+
74
+ if __name__ == "__main__":
75
+ main()
76
+
@@ -0,0 +1,78 @@
1
+ import tensorflow as tf
2
+ from tensorflow.keras.datasets import mnist
3
+ from tensorflow.keras.models import Sequential
4
+ from tensorflow.keras.layers import Conv2D, MaxPooling2D, Flatten, Dense
5
+ from tensorflow.keras.utils import to_categorical
6
+ import numpy as np
7
+ from datetime import datetime
8
+ import sys
9
+ import os
10
+ sys.path.append(os.path.abspath(os.path.join(os.path.dirname(__file__), '..')))
11
+ from ai_security_toolkit.shared.log_utils import append_report_row, log_metrics
12
+
13
+ def main():
14
+ # Build a simple CNN
15
+ def build_model():
16
+ model = Sequential([
17
+ Conv2D(32, (3, 3), activation='relu', input_shape=(28, 28, 1)),
18
+ MaxPooling2D((2, 2)),
19
+ Flatten(),
20
+ Dense(64, activation='relu'),
21
+ Dense(10, activation='softmax')
22
+ ])
23
+ model.compile(optimizer='adam',
24
+ loss='categorical_crossentropy',
25
+ metrics=['accuracy'])
26
+ return model
27
+
28
+ # Load and preprocess MNIST
29
+ (x_train, y_train), (x_test, y_test) = mnist.load_data()
30
+ x_train = x_train.astype("float32") / 255.0
31
+ x_test = x_test.astype("float32") / 255.0
32
+ x_train = x_train.reshape((-1, 28, 28, 1))
33
+ x_test = x_test.reshape((-1, 28, 28, 1))
34
+
35
+ # Save original training labels for comparison
36
+ y_train_clean = y_train.copy()
37
+
38
+ # Poisoning: Flip 10% of labels from class 1 → 7
39
+ num_poison = int(0.10 * len(y_train))
40
+ indices_to_poison = np.where(y_train == 1)[0][:num_poison]
41
+ y_train_poisoned = y_train.copy()
42
+ y_train_poisoned[indices_to_poison] = 7
43
+
44
+ # Convert to categorical
45
+ y_train_clean_cat = to_categorical(y_train_clean, 10)
46
+ y_train_poisoned_cat = to_categorical(y_train_poisoned, 10)
47
+ y_test_cat = to_categorical(y_test, 10)
48
+
49
+ # Train clean model
50
+ print("🧼 Training clean model...")
51
+ model_clean = build_model()
52
+ model_clean.fit(x_train, y_train_clean_cat, epochs=3, batch_size=64, validation_split=0.1, verbose=2)
53
+ clean_loss, clean_acc = model_clean.evaluate(x_test, y_test_cat, verbose=0)
54
+
55
+ # Train poisoned model
56
+ print("💉 Training poisoned model (1→7 flipped)...")
57
+ model_poison = build_model()
58
+ model_poison.fit(x_train, y_train_poisoned_cat, epochs=3, batch_size=64, validation_split=0.1, verbose=2)
59
+ poison_loss, poison_acc = model_poison.evaluate(x_test, y_test_cat, verbose=0)
60
+
61
+ # Log both models
62
+ header = ["Timestamp", "Model", "Attack_Type", "Poisoned_Classes", "Train_Size", "Test_Accuracy"]
63
+ timestamp = datetime.now().strftime("%Y-%m-%d %H:%M:%S")
64
+
65
+ row_clean = [timestamp, "Clean_CNN", "None", "None", len(y_train), round(clean_acc, 4)]
66
+ row_poisoned = [timestamp, "Poisoned_CNN", "Label Flip (1→7)", "1→7", len(y_train), round(poison_acc, 4)]
67
+
68
+ append_report_row(row_clean, header, "logs/poisoning_report.csv")
69
+ append_report_row(row_poisoned, header, "logs/poisoning_report.csv")
70
+
71
+ # Print summary
72
+ print("\n📊 Summary:")
73
+ log_metrics(accuracy=clean_acc)
74
+ print(f"⚠️ Poisoned Model Accuracy: {poison_acc * 100:.2f}%")
75
+ print("📄 Report saved to: logs/poisoning_report.csv")
76
+
77
+ if __name__ == "__main__":
78
+ main()
@@ -0,0 +1,71 @@
1
+ import tensorflow as tf
2
+ import numpy as np
3
+ import matplotlib.pyplot as plt
4
+ from datetime import datetime
5
+ import sys
6
+ import os
7
+ sys.path.append(os.path.abspath(os.path.join(os.path.dirname(__file__), '..')))
8
+ from ai_security_toolkit.shared.log_utils import save_plot, append_report_row, log_metrics
9
+
10
+ def main():
11
+ # Load model
12
+ model = tf.keras.models.load_model("shared/models/mnist_cnn_model.keras")
13
+ print("✅ Loaded model from .keras file.")
14
+
15
+ # Load MNIST
16
+ (x_train, y_train), (x_test, y_test) = tf.keras.datasets.mnist.load_data()
17
+ x_train = x_train.astype("float32") / 255.0
18
+ x_test = x_test.astype("float32") / 255.0
19
+ x_train = x_train.reshape((-1, 28, 28, 1))
20
+ x_test = x_test.reshape((-1, 28, 28, 1))
21
+
22
+ # Combine train and test for attack simulation
23
+ num_samples = 1000 # from each set
24
+ x_members = x_train[:num_samples]
25
+ x_nonmembers = x_test[:num_samples]
26
+
27
+ # Get model predictions (confidence scores)
28
+ y_members_conf = np.max(model.predict(x_members), axis=1)
29
+ y_nonmembers_conf = np.max(model.predict(x_nonmembers), axis=1)
30
+
31
+ # Simple threshold-based classifier
32
+ threshold = 0.95 # Can be tuned
33
+
34
+ tp = np.sum(y_members_conf > threshold)
35
+ fp = np.sum(y_nonmembers_conf > threshold)
36
+ tn = np.sum(y_nonmembers_conf <= threshold)
37
+ fn = np.sum(y_members_conf <= threshold)
38
+
39
+ accuracy = (tp + tn) / (tp + fp + tn + fn)
40
+ precision = tp / (tp + fp + 1e-6)
41
+ recall = tp / (tp + fn + 1e-6)
42
+
43
+ log_metrics(accuracy, precision, recall)
44
+
45
+ # Visualize confidence distributions
46
+ plt.hist(y_members_conf, bins=30, alpha=0.6, label="Members")
47
+ plt.hist(y_nonmembers_conf, bins=30, alpha=0.6, label="Non-Members")
48
+ plt.axvline(threshold, color='red', linestyle='dashed', label="Threshold")
49
+ plt.title("Model Confidence Distributions")
50
+ plt.xlabel("Max Confidence")
51
+ plt.ylabel("Frequency")
52
+ plt.legend()
53
+ plt.tight_layout()
54
+ save_plot(plt, "logs/mia_confidence_plot.png")
55
+
56
+ # Logging
57
+ header = ["Timestamp", "Threshold", "Accuracy", "Precision", "Recall", "Members", "NonMembers"]
58
+ row = [
59
+ datetime.now().strftime("%Y-%m-%d %H:%M:%S"),
60
+ threshold,
61
+ round(accuracy * 100, 2),
62
+ round(precision * 100, 2),
63
+ round(recall * 100, 2),
64
+ num_samples,
65
+ num_samples
66
+ ]
67
+ append_report_row(row, header, "logs/membership_report.csv")
68
+
69
+ if __name__ == "__main__":
70
+ main()
71
+
@@ -0,0 +1,72 @@
1
+ import tensorflow as tf
2
+ import numpy as np
3
+ import matplotlib.pyplot as plt
4
+ from datetime import datetime
5
+ import time
6
+ import sys
7
+ import os
8
+ sys.path.append(os.path.abspath(os.path.join(os.path.dirname(__file__), '..')))
9
+ from ai_security_toolkit.shared.log_utils import append_report_row, save_plot
10
+
11
+ def main():
12
+ # Load trained model
13
+ model = tf.keras.models.load_model("shared/models/mnist_cnn_model.keras")
14
+ model.trainable = False
15
+
16
+ # Create folders
17
+ os.makedirs("logs/inversion_images", exist_ok=True)
18
+ report_path = "logs/inversion_report.csv"
19
+
20
+ # Invert one class
21
+ def invert_class(target_class, model, save_path):
22
+ num_classes = 10
23
+ epochs = 1000
24
+ lr = 0.1
25
+
26
+ inverted_image = tf.Variable(tf.random.uniform((1, 28, 28, 1)), dtype=tf.float32)
27
+ target_label = tf.one_hot([target_class], depth=num_classes)
28
+ optimizer = tf.keras.optimizers.Adam(learning_rate=lr)
29
+
30
+ start_time = time.time()
31
+
32
+ for epoch in range(epochs):
33
+ with tf.GradientTape() as tape:
34
+ preds = model(inverted_image, training=False)
35
+ loss = -tf.keras.losses.categorical_crossentropy(target_label, preds)
36
+
37
+ grads = tape.gradient(loss, inverted_image)
38
+ optimizer.apply_gradients([(grads, inverted_image)])
39
+ inverted_image.assign(tf.clip_by_value(inverted_image, 0.0, 1.0))
40
+
41
+ confidence = tf.reduce_max(model(inverted_image)).numpy()
42
+ duration = time.time() - start_time
43
+ image_file = f"inversion_class_{target_class}.png"
44
+ full_image_path = os.path.join(save_path, image_file)
45
+
46
+ # Save image
47
+ plt.imshow(inverted_image[0, :, :, 0], cmap='gray')
48
+ plt.title(f"Class {target_class} - Conf: {confidence:.2f}")
49
+ plt.axis('off')
50
+ save_plot(plt, full_image_path)
51
+ plt.close()
52
+
53
+ # Log to CSV
54
+ row = [
55
+ datetime.now().strftime("%Y-%m-%d %H:%M:%S"),
56
+ target_class,
57
+ round(confidence, 4),
58
+ image_file,
59
+ round(duration, 2)
60
+ ]
61
+ header = ["Timestamp", "Class", "Confidence", "Image_File", "Time_Taken_s"]
62
+ append_report_row(row, header, report_path)
63
+
64
+ print(f"✅ Class {target_class} done | Confidence: {confidence:.2f} | Time: {round(duration, 2)}s")
65
+
66
+ # Run for all digits 0–9
67
+ for digit in range(10):
68
+ invert_class(digit, model, save_path="logs/inversion_images")
69
+
70
+ if __name__ == "__main__":
71
+ main()
72
+
@@ -0,0 +1,70 @@
1
+ import tensorflow as tf
2
+ import numpy as np
3
+ from datetime import datetime
4
+ from tensorflow.keras.datasets import mnist
5
+ from tensorflow.keras.models import Sequential
6
+ from tensorflow.keras.layers import Conv2D, MaxPooling2D, Flatten, Dense
7
+ from tensorflow.keras.utils import to_categorical
8
+ from tensorflow.keras.models import load_model
9
+ import sys
10
+ import os
11
+ sys.path.append(os.path.abspath(os.path.join(os.path.dirname(__file__), '..')))
12
+ from ai_security_toolkit.shared.log_utils import append_report_row, log_metrics
13
+
14
+ def main():
15
+ # Step 1: Load original (victim) model
16
+ victim_model = load_model("shared/models/mnist_cnn_model.keras")
17
+ victim_model.trainable = False
18
+ print("✅ Loaded victim model.")
19
+
20
+ # Step 2: Generate synthetic dataset to query the victim
21
+ (_, _), (x_test, y_test) = mnist.load_data()
22
+ x_query = x_test[:10000].astype("float32") / 255.0
23
+ x_query = x_query.reshape((-1, 28, 28, 1))
24
+
25
+ # Get predictions from victim model
26
+ y_query = victim_model.predict(x_query)
27
+ print("📡 Queried victim model for 10,000 inputs.")
28
+
29
+ # Step 3: Train the stolen model (attacker's copycat)
30
+ def build_attacker_model():
31
+ model = Sequential([
32
+ Conv2D(32, (3, 3), activation='relu', input_shape=(28, 28, 1)),
33
+ MaxPooling2D((2, 2)),
34
+ Flatten(),
35
+ Dense(64, activation='relu'),
36
+ Dense(10, activation='softmax')
37
+ ])
38
+ model.compile(optimizer='adam',
39
+ loss='categorical_crossentropy',
40
+ metrics=['accuracy'])
41
+ return model
42
+
43
+ attacker_model = build_attacker_model()
44
+
45
+ # Train attacker model using (x_query, y_query)
46
+ print("🧠 Training stolen model on synthetic (input, output) pairs...")
47
+ attacker_model.fit(x_query, y_query, epochs=3, batch_size=64, validation_split=0.1, verbose=2)
48
+
49
+ # Save stolen model
50
+ attacker_model.save("shared/models/stolen_model.keras")
51
+ print("💾 Stolen model saved as models/stolen_model.keras")
52
+
53
+ # Evaluate stolen model
54
+ y_test_cat = to_categorical(y_test[:10000], 10)
55
+ loss, acc = attacker_model.evaluate(x_query, y_test_cat, verbose=0)
56
+ log_metrics(accuracy=acc)
57
+
58
+ # Log results
59
+ header = ["Timestamp", "Method", "Inputs_Used", "Stolen_Accuracy", "Notes"]
60
+ row = [
61
+ datetime.now().strftime("%Y-%m-%d %H:%M:%S"),
62
+ "Black-box Query (MNIST test images)",
63
+ len(x_query),
64
+ round(acc * 100, 2),
65
+ "No access to victim data or labels; used model predictions only"
66
+ ]
67
+ append_report_row(row, header, "logs/stealing_report.csv")
68
+
69
+ if __name__ == "__main__":
70
+ main()
@@ -0,0 +1,51 @@
1
+ import tensorflow as tf
2
+ from tensorflow.keras.datasets import mnist
3
+ from tensorflow.keras.models import Sequential
4
+ from tensorflow.keras.layers import Conv2D, MaxPooling2D, Flatten, Dense
5
+ from tensorflow.keras.utils import to_categorical
6
+ import os
7
+
8
+ def main():
9
+ # Load and preprocess MNIST data
10
+ (x_train, y_train), (x_test, y_test) = mnist.load_data()
11
+
12
+ # Normalize to 0–1 range and reshape
13
+ x_train = x_train.astype("float32") / 255.0
14
+ x_test = x_test.astype("float32") / 255.0
15
+ x_train = x_train.reshape((-1, 28, 28, 1))
16
+ x_test = x_test.reshape((-1, 28, 28, 1))
17
+
18
+ # One-hot encode labels
19
+ y_train_cat = to_categorical(y_train, 10)
20
+ y_test_cat = to_categorical(y_test, 10)
21
+
22
+ # Build CNN model
23
+ model = Sequential([
24
+ Conv2D(32, (3, 3), activation='relu', input_shape=(28, 28, 1)),
25
+ MaxPooling2D((2, 2)),
26
+ Conv2D(64, (3, 3), activation='relu'),
27
+ MaxPooling2D((2, 2)),
28
+ Flatten(),
29
+ Dense(64, activation='relu'),
30
+ Dense(10, activation='softmax')
31
+ ])
32
+
33
+ # Compile model
34
+ model.compile(optimizer='adam',
35
+ loss='categorical_crossentropy',
36
+ metrics=['accuracy'])
37
+
38
+ # Train the model
39
+ model.fit(x_train, y_train_cat, epochs=5, batch_size=64, validation_split=0.1)
40
+
41
+ # Evaluate
42
+ loss, acc = model.evaluate(x_test, y_test_cat, verbose=2)
43
+ print(f"\n✅ Test Accuracy: {acc * 100:.2f}%")
44
+
45
+ # Save model
46
+ os.makedirs("shared/models", exist_ok=True)
47
+ model.save("shared/models/mnist_cnn_model.keras")
48
+ print("💾 Model saved to models/mnist_cnn_model.keras")
49
+
50
+ if __name__ == "__main__":
51
+ main()
@@ -0,0 +1,58 @@
1
+ import importlib
2
+ import sys
3
+ import os
4
+
5
+ # Add project root to PYTHONPATH
6
+ sys.path.append(os.path.abspath(os.path.join(os.path.dirname(__file__), '.')))
7
+
8
+ # Mapping: CLI label → module filename (without .py)
9
+ available_modules = {
10
+ "Train Model (MNIST CNN)": "train_mnist_model",
11
+ "Adversarial Attack (FGSM)": "fgsm_mobilenet",
12
+ "Data Poisoning – Label Flip": "label_flip_attack",
13
+ "Membership Inference Attack": "membership_inference_attack",
14
+ "Model Inversion Attack": "simulate_inversion",
15
+ "Model Stealing Attack": "steal_model",
16
+ "Backdoor Trigger Attack": "backdoor_trigger_attack"
17
+ }
18
+
19
+ def print_menu():
20
+ print("\n🧪 AI Security Toolkit – Interactive CLI 🔐")
21
+ print("Choose a module to run:\n")
22
+ for i, name in enumerate(available_modules.keys(), start=1):
23
+ print(f"[{i}] {name}")
24
+ print("[0] Exit")
25
+
26
+ def run_selected_module(choice_idx):
27
+ try:
28
+ label = list(available_modules.keys())[choice_idx - 1]
29
+ module_name = f"modules.{available_modules[label]}"
30
+ print(f"\n🔍 Running: {label} ({module_name})...\n")
31
+ mod = importlib.import_module(module_name)
32
+
33
+ if hasattr(mod, "main"):
34
+ mod.main()
35
+ else:
36
+ print("⚠️ No 'main()' found — running file as script...")
37
+ exec(open(mod.__file__).read())
38
+
39
+ except Exception as e:
40
+ print(f"❌ Error: {e}")
41
+
42
+ def main():
43
+ while True:
44
+ print_menu()
45
+ try:
46
+ choice = int(input("\nEnter your choice: "))
47
+ if choice == 0:
48
+ print("👋 Exiting. Goodbye!")
49
+ break
50
+ elif 1 <= choice <= len(available_modules):
51
+ run_selected_module(choice)
52
+ else:
53
+ print("❗ Invalid choice. Try again.")
54
+ except ValueError:
55
+ print("❗ Please enter a valid number.")
56
+
57
+ if __name__ == "__main__":
58
+ main()
File without changes
@@ -0,0 +1,45 @@
1
+ import pandas as pd
2
+ import matplotlib.pyplot as plt
3
+ import os
4
+ import csv
5
+
6
+ def save_report(data: dict, filepath: str):
7
+ """
8
+ Save a dictionary or list of dicts to a CSV file.
9
+ """
10
+ os.makedirs(os.path.dirname(filepath), exist_ok=True)
11
+ df = pd.DataFrame(data)
12
+ df.to_csv(filepath, index=False)
13
+ print(f"[✓] Report saved to {filepath}")
14
+
15
+ def append_report_row(row: list, header: list, filepath: str):
16
+ import csv, os
17
+ os.makedirs(os.path.dirname(filepath), exist_ok=True)
18
+ file_exists = os.path.isfile(filepath)
19
+ with open(filepath, "a", newline='') as file:
20
+ writer = csv.writer(file)
21
+ if not file_exists:
22
+ writer.writerow(header)
23
+ writer.writerow(row)
24
+ print(f"[✓] Row logged to {filepath}")
25
+
26
+
27
+ def save_plot(fig, filepath: str):
28
+ """
29
+ Save a matplotlib figure to PNG.
30
+ """
31
+ os.makedirs(os.path.dirname(filepath), exist_ok=True)
32
+ fig.savefig(filepath, bbox_inches='tight')
33
+ print(f"[✓] Plot saved to {filepath}")
34
+
35
+ def log_metrics(accuracy=None, precision=None, recall=None):
36
+ """
37
+ Print evaluation metrics in a readable format.
38
+ """
39
+ print("\n📊 Metrics Summary")
40
+ if accuracy is not None:
41
+ print(f" Accuracy: {accuracy * 100:.2f}%")
42
+ if precision is not None:
43
+ print(f" Precision: {precision * 100:.2f}%")
44
+ if recall is not None:
45
+ print(f" Recall: {recall * 100:.2f}%")
@@ -0,0 +1,55 @@
1
+ Metadata-Version: 2.4
2
+ Name: ai-security-toolkit
3
+ Version: 1.0.0
4
+ Summary: A red-team AI security framework with adversarial attack modules
5
+ Author: Rishit Goel
6
+ License: MIT
7
+ Requires-Python: >=3.8
8
+ Description-Content-Type: text/markdown
9
+ Requires-Dist: tensorflow
10
+ Requires-Dist: numpy
11
+ Requires-Dist: matplotlib
12
+ Requires-Dist: pandas
13
+ Requires-Dist: cleverhans
14
+ Dynamic: requires-python
15
+
16
+ # 🛡️ AI Security Toolkit
17
+
18
+ [![Made by Rishit Goel 💻](https://img.shields.io/badge/Made%20by-Rishit%20Goel-blueviolet?style=flat-square&logo=github)](https://github.com/rishit03)
19
+ ![Python](https://img.shields.io/badge/Python-3.8+-blue?logo=python)
20
+ ![License](https://img.shields.io/github/license/rishit03/ai-security-toolkit?style=flat)
21
+ ![GitHub Repo stars](https://img.shields.io/github/stars/rishit03/ai-security-toolkit?style=social)
22
+ ![GitHub last commit](https://img.shields.io/github/last-commit/rishit03/ai-security-toolkit?color=green)
23
+
24
+ A red-team framework for testing the vulnerabilities of AI models through adversarial attacks, privacy leakage, and model exploitation techniques — built and maintained by [@rishit03](https://github.com/rishit03).
25
+
26
+ ---
27
+
28
+ ## 🚀 Features
29
+
30
+ ✅ 5+ attack modules
31
+ ✅ Unified logging and visualization
32
+ ✅ Command-line interface (interactive menu)
33
+ ✅ Modular, reusable, and pip-installable
34
+ ✅ Built using TensorFlow, CleverHans, and Python's best practices
35
+
36
+ ---
37
+
38
+ ## 📦 Modules Included
39
+
40
+ | Module Name | Description |
41
+ |----------------------------|-------------|
42
+ | 🔓 Adversarial Attack (FGSM) | Confuses the model with small pixel changes |
43
+ | 💉 Label Flip Poisoning | Modifies training labels to reduce model accuracy |
44
+ | 🧠 Membership Inference Attack | Infers if a data point was used in training |
45
+ | 🪞 Model Inversion | Reconstructs training images from the model |
46
+ | 🧬 Model Stealing | Clones the target model using black-box queries |
47
+ | 🎯 Backdoor Trigger Attack | Embeds a hidden trigger that forces misclassification |
48
+
49
+ ---
50
+
51
+ ## 💻 CLI Usage
52
+
53
+ ```bash
54
+ # After pip install or cloning locally
55
+ python ai_toolkit/run.py
@@ -0,0 +1,17 @@
1
+ ai_security_toolkit/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
2
+ ai_security_toolkit/run.py,sha256=2bKlLZs-yfcjBDZx-37TF18Zoyj5DckrpJZH9IV7HcI,1900
3
+ ai_security_toolkit/modules/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
4
+ ai_security_toolkit/modules/backdoor_trigger_attack.py,sha256=FSE1LkPoLbXuEQXq2vReyM9R4tDbBZ76BxndE4i9DUs,4408
5
+ ai_security_toolkit/modules/fgsm_mobilenet.py,sha256=6em43Ymcgg_vlHuSHvAtpnJv-PDSmu9pxG-5gZyCi-s,2429
6
+ ai_security_toolkit/modules/label_flip_attack.py,sha256=YQ5OK2RQAMcRY3pTuxOHLeihR4XlbOSp-Xl_tgpmrNo,3144
7
+ ai_security_toolkit/modules/membership_inference_attack.py,sha256=3UsR2-yf8YyoCNJa_HcaVw8cwFKSkAEU8O9hGFT3dOs,2428
8
+ ai_security_toolkit/modules/simulate_inversion.py,sha256=dea4l13AvhcSY-a5wphQukjdMsmkep0EdsPylSpBTAk,2495
9
+ ai_security_toolkit/modules/steal_model.py,sha256=-svDT9YFve6WbdXGzcQdGwssgcrWruRg5oQC7Ycf9UA,2628
10
+ ai_security_toolkit/modules/train_mnist_model.py,sha256=e-yomDzN4PTiOBQCOU500fj3bpgc4siWXMnuCkSEQIE,1640
11
+ ai_security_toolkit/shared/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
12
+ ai_security_toolkit/shared/log_utils.py,sha256=no2_W1BPTy7K4ETqpWbYXYm1RrnpSqQI2zsw_puY0JU,1413
13
+ ai_security_toolkit-1.0.0.dist-info/METADATA,sha256=7-UQ3PZj6m5iThxSByWECUeaQBIvXYcH6QL8Yf80A-4,2141
14
+ ai_security_toolkit-1.0.0.dist-info/WHEEL,sha256=pxyMxgL8-pra_rKaQ4drOZAegBVuX-G_4nRHjjgWbmo,91
15
+ ai_security_toolkit-1.0.0.dist-info/entry_points.txt,sha256=mE4BdYeHEwNrXf21cNPa3qMNB4VGUq3JlBGGeqYVFuA,60
16
+ ai_security_toolkit-1.0.0.dist-info/top_level.txt,sha256=DPMp0X7MzVKm6szxcG6toEN5S3Glc30SkJkjM7XbYpw,20
17
+ ai_security_toolkit-1.0.0.dist-info/RECORD,,
@@ -0,0 +1,5 @@
1
+ Wheel-Version: 1.0
2
+ Generator: setuptools (79.0.0)
3
+ Root-Is-Purelib: true
4
+ Tag: py3-none-any
5
+
@@ -0,0 +1,2 @@
1
+ [console_scripts]
2
+ ai-toolkit = ai_security_toolkit.run:main
@@ -0,0 +1 @@
1
+ ai_security_toolkit