mak-mini-ml 0.1.1__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -0,0 +1,182 @@
1
+ Metadata-Version: 2.4
2
+ Name: mak-mini-ml
3
+ Version: 0.1.1
4
+ Summary: Beginner-friendly Machine Learning utility library built from scratch using pure Python
5
+ Author: Aryan Kakade, Kishor Handge
6
+ License: MIT
7
+ Keywords: machine-learning,python,ml,statistics,data-science,educational
8
+ Classifier: Programming Language :: Python :: 3
9
+ Classifier: License :: OSI Approved :: MIT License
10
+ Classifier: Operating System :: OS Independent
11
+ Requires-Python: >=3.8
12
+ Description-Content-Type: text/markdown
13
+
14
+ # Custom_ML_Suite
15
+
16
+ Custom_ML_Suite is a beginner-friendly Machine Learning utility library built completely from scratch using pure Python.
17
+
18
+ This project focuses on understanding the mathematical foundations of Machine Learning by manually implementing core ML algorithms, preprocessing methods, distance metrics, activation functions, statistical operations, and evaluation metrics without using external ML libraries like scikit-learn.
19
+
20
+ ---
21
+
22
+ # Features
23
+
24
+ - Pure Python implementation
25
+ - Beginner-friendly code structure
26
+ - Mathematical formulas included
27
+ - Well-commented educational code
28
+ - Modular package structure
29
+ - ML utilities from scratch
30
+ - Edge-case handling
31
+ - Easy to understand and extend
32
+
33
+ ---
34
+
35
+ # Modules Included
36
+
37
+ ## 1. activations.py
38
+ Activation functions used in neural networks.
39
+
40
+ ### Functions
41
+ - sigmoid()
42
+ - relu()
43
+ - tanh()
44
+ - softmax()
45
+ - log_loss()
46
+
47
+ ---
48
+
49
+ ## 2. distances.py
50
+ Distance and similarity metrics.
51
+
52
+ ### Functions
53
+ - euclidean_distance()
54
+ - manhattan_distance()
55
+ - minkowski_distance()
56
+ - cosine_similarity()
57
+ - hamming_distance()
58
+
59
+ ---
60
+
61
+ ## 3. preprocessing.py
62
+ Data preprocessing and scaling methods.
63
+
64
+ ### Functions
65
+ - standardization()
66
+ - mean()
67
+ - min_max_scaling()
68
+ - range()
69
+ - normalization()
70
+
71
+ ---
72
+
73
+ ## 4. stats.py
74
+ Basic statistical operations.
75
+
76
+ ### Functions
77
+ - mean()
78
+ - variance()
79
+ - std_dev()
80
+ - covariance()
81
+ - correlation()
82
+
83
+ ---
84
+
85
+ ## 5. metrics.py
86
+ Machine Learning evaluation metrics.
87
+
88
+ ### Functions
89
+ - accuracy_score()
90
+ - precision_score()
91
+ - recall_score()
92
+ - f1_score()
93
+ - confusion_matrix()
94
+ - mean_absolute_error()
95
+ - mean_squared_error()
96
+ - root_mean_squared_error()
97
+ - r2_score()
98
+ - binary_crossentropy()
99
+
100
+ ---
101
+
102
+ ## 6. model_selection.py
103
+ Dataset splitting and validation utilities.
104
+
105
+ ### Functions
106
+ - train_test_split()
107
+ - shuffle_data()
108
+ - batch_iterator()
109
+ - k_fold_split()
110
+ - stratified_split()
111
+
112
+ ---
113
+
114
+ ## 7. linear_model.py
115
+ Basic regression models and optimization.
116
+
117
+ ### Functions
118
+ - linear_regression()
119
+ - predict()
120
+ - gradient_descent()
121
+ - logistic_regression()
122
+ - logistic_update()
123
+
124
+ ---
125
+
126
+ ## 8. neighbors.py
127
+ K-Nearest Neighbors utilities.
128
+
129
+ ### Functions
130
+ - knn_distance()
131
+ - knn_predict()
132
+ - probability()
133
+
134
+ ---
135
+
136
+ ## 9. tree.py
137
+ Basic Decision Tree utilities.
138
+
139
+ ### Functions
140
+ - gini_impurity()
141
+ - entropy()
142
+ - information_gain()
143
+ - best_split()
144
+ - build_tree()
145
+ - predict_tree()
146
+ - majority_vote()
147
+
148
+ ---
149
+
150
+ # Project Structure
151
+
152
+ ```bash
153
+ Custom_ML_Suite/
154
+
155
+ ├── examples/
156
+ │ ├── demo.py
157
+ │ └── Formula.py
158
+
159
+ ├── src/
160
+ │ └── Custom_ML_Suite/
161
+ │ ├── __init__.py
162
+ │ ├── activations.py
163
+ │ ├── distances.py
164
+ │ ├── linear_model.py
165
+ │ ├── metrics.py
166
+ │ ├── model_selection.py
167
+ │ ├── neighbors.py
168
+ │ ├── preprocessing.py
169
+ │ ├── stats.py
170
+ │ └── tree.py
171
+
172
+ ├── tests/
173
+ │ └── test_all.py
174
+
175
+ ├── README.md
176
+ ├── pyproject.toml
177
+ └── .gitignore
178
+
179
+ # Authors
180
+
181
+ Aryan Kakade
182
+ Kishor Handge
@@ -0,0 +1,169 @@
1
+ # Custom_ML_Suite
2
+
3
+ Custom_ML_Suite is a beginner-friendly Machine Learning utility library built completely from scratch using pure Python.
4
+
5
+ This project focuses on understanding the mathematical foundations of Machine Learning by manually implementing core ML algorithms, preprocessing methods, distance metrics, activation functions, statistical operations, and evaluation metrics without using external ML libraries like scikit-learn.
6
+
7
+ ---
8
+
9
+ # Features
10
+
11
+ - Pure Python implementation
12
+ - Beginner-friendly code structure
13
+ - Mathematical formulas included
14
+ - Well-commented educational code
15
+ - Modular package structure
16
+ - ML utilities from scratch
17
+ - Edge-case handling
18
+ - Easy to understand and extend
19
+
20
+ ---
21
+
22
+ # Modules Included
23
+
24
+ ## 1. activations.py
25
+ Activation functions used in neural networks.
26
+
27
+ ### Functions
28
+ - sigmoid()
29
+ - relu()
30
+ - tanh()
31
+ - softmax()
32
+ - log_loss()
33
+
34
+ ---
35
+
36
+ ## 2. distances.py
37
+ Distance and similarity metrics.
38
+
39
+ ### Functions
40
+ - euclidean_distance()
41
+ - manhattan_distance()
42
+ - minkowski_distance()
43
+ - cosine_similarity()
44
+ - hamming_distance()
45
+
46
+ ---
47
+
48
+ ## 3. preprocessing.py
49
+ Data preprocessing and scaling methods.
50
+
51
+ ### Functions
52
+ - standardization()
53
+ - mean()
54
+ - min_max_scaling()
55
+ - range()
56
+ - normalization()
57
+
58
+ ---
59
+
60
+ ## 4. stats.py
61
+ Basic statistical operations.
62
+
63
+ ### Functions
64
+ - mean()
65
+ - variance()
66
+ - std_dev()
67
+ - covariance()
68
+ - correlation()
69
+
70
+ ---
71
+
72
+ ## 5. metrics.py
73
+ Machine Learning evaluation metrics.
74
+
75
+ ### Functions
76
+ - accuracy_score()
77
+ - precision_score()
78
+ - recall_score()
79
+ - f1_score()
80
+ - confusion_matrix()
81
+ - mean_absolute_error()
82
+ - mean_squared_error()
83
+ - root_mean_squared_error()
84
+ - r2_score()
85
+ - binary_crossentropy()
86
+
87
+ ---
88
+
89
+ ## 6. model_selection.py
90
+ Dataset splitting and validation utilities.
91
+
92
+ ### Functions
93
+ - train_test_split()
94
+ - shuffle_data()
95
+ - batch_iterator()
96
+ - k_fold_split()
97
+ - stratified_split()
98
+
99
+ ---
100
+
101
+ ## 7. linear_model.py
102
+ Basic regression models and optimization.
103
+
104
+ ### Functions
105
+ - linear_regression()
106
+ - predict()
107
+ - gradient_descent()
108
+ - logistic_regression()
109
+ - logistic_update()
110
+
111
+ ---
112
+
113
+ ## 8. neighbors.py
114
+ K-Nearest Neighbors utilities.
115
+
116
+ ### Functions
117
+ - knn_distance()
118
+ - knn_predict()
119
+ - probability()
120
+
121
+ ---
122
+
123
+ ## 9. tree.py
124
+ Basic Decision Tree utilities.
125
+
126
+ ### Functions
127
+ - gini_impurity()
128
+ - entropy()
129
+ - information_gain()
130
+ - best_split()
131
+ - build_tree()
132
+ - predict_tree()
133
+ - majority_vote()
134
+
135
+ ---
136
+
137
+ # Project Structure
138
+
139
+ ```bash
140
+ Custom_ML_Suite/
141
+
142
+ ├── examples/
143
+ │ ├── demo.py
144
+ │ └── Formula.py
145
+
146
+ ├── src/
147
+ │ └── Custom_ML_Suite/
148
+ │ ├── __init__.py
149
+ │ ├── activations.py
150
+ │ ├── distances.py
151
+ │ ├── linear_model.py
152
+ │ ├── metrics.py
153
+ │ ├── model_selection.py
154
+ │ ├── neighbors.py
155
+ │ ├── preprocessing.py
156
+ │ ├── stats.py
157
+ │ └── tree.py
158
+
159
+ ├── tests/
160
+ │ └── test_all.py
161
+
162
+ ├── README.md
163
+ ├── pyproject.toml
164
+ └── .gitignore
165
+
166
+ # Authors
167
+
168
+ Aryan Kakade
169
+ Kishor Handge
@@ -0,0 +1,36 @@
1
+ [build-system]
2
+ requires = ["setuptools>=61.0"]
3
+ build-backend = "setuptools.build_meta"
4
+
5
+ [project]
6
+ name = "mak-mini-ml"
7
+ version = "0.1.1"
8
+ description = "Beginner-friendly Machine Learning utility library built from scratch using pure Python"
9
+ readme = "README.md"
10
+ requires-python = ">=3.8"
11
+ authors = [
12
+ {name = "Aryan Kakade"},
13
+ {name = "Kishor Handge"}
14
+ ]
15
+ license = {text = "MIT"}
16
+
17
+ keywords = [
18
+ "machine-learning",
19
+ "python",
20
+ "ml",
21
+ "statistics",
22
+ "data-science",
23
+ "educational"
24
+ ]
25
+
26
+ classifiers = [
27
+ "Programming Language :: Python :: 3",
28
+ "License :: OSI Approved :: MIT License",
29
+ "Operating System :: OS Independent",
30
+ ]
31
+
32
+ [tool.setuptools]
33
+ package-dir = {"" = "src"}
34
+
35
+ [tool.setuptools.packages.find]
36
+ where = ["src"]
@@ -0,0 +1,4 @@
1
+ [egg_info]
2
+ tag_build =
3
+ tag_date = 0
4
+
@@ -0,0 +1,21 @@
1
+ # src/Custom_ML_Suite/__init__.py
2
+
3
+ # metrics.py → evaluation
4
+ # model_selection.py → data splitting
5
+ # distances.py → math for KNN
6
+ # activations.py → ML math functions
7
+ # preprocessing.py → scaling
8
+ # stats.py → statistics
9
+ # linear_model.py → regression models
10
+ # neighbors.py → KNN
11
+ # tree.py → decision tree
12
+
13
+ from .metrics import *
14
+ from .model_selection import *
15
+ from .distances import *
16
+ from .activations import *
17
+ from .preprocessing import *
18
+ from .stats import *
19
+ from .linear_model import *
20
+ from .neighbors import *
21
+ from .tree import *
@@ -0,0 +1,115 @@
1
+ # sigmoid
2
+ # relu
3
+ # tanh
4
+ # softmax
5
+ # log_loss
6
+
7
+ import math
8
+
9
+ # --------------------------------------------------
10
+ # ReLU activation
11
+ # Used in neural networks
12
+ # Returns 0 if input is negative, else returns same value
13
+ # Range: [0, ∞)
14
+ # --------------------------------------------------
15
+ def relu(x):
16
+
17
+ if x > 0:
18
+ return x
19
+
20
+ else:
21
+ return 0
22
+
23
+
24
+ # --------------------------------------------------
25
+ # Sigmoid activation
26
+ # Converts input into probability
27
+ # Range: (0, 1)
28
+ # Used in binary classification
29
+ # --------------------------------------------------
30
+ def sigmoid(x):
31
+
32
+ if x >= 0:
33
+
34
+ z = math.exp(-x)
35
+
36
+ return 1 / (1 + z)
37
+
38
+ else:
39
+
40
+ z = math.exp(x)
41
+
42
+ return z / (1 + z)
43
+
44
+
45
+ # --------------------------------------------------
46
+ # Tanh activation
47
+ # Similar to sigmoid but centered at 0
48
+ # Range: (-1, 1)
49
+ # Better for hidden layers
50
+ # --------------------------------------------------
51
+ def tanh(x):
52
+
53
+ return math.tanh(x)
54
+
55
+
56
+ # --------------------------------------------------
57
+ # Softmax function
58
+ # Converts list of values into probabilities
59
+ # Output sum = 1
60
+ # Used in multi-class classification
61
+ # --------------------------------------------------
62
+ def softmax(x_list):
63
+
64
+ if len(x_list) == 0:
65
+ raise ValueError("Empty list")
66
+
67
+ max_val = max(x_list)
68
+
69
+ exp_values = []
70
+
71
+ for x in x_list:
72
+
73
+ exp_values.append(math.exp(x - max_val))
74
+
75
+ total = sum(exp_values)
76
+
77
+ result = []
78
+
79
+ for val in exp_values:
80
+
81
+ result.append(val / total)
82
+
83
+ return result
84
+
85
+
86
+ # --------------------------------------------------
87
+ # Log Loss (Binary Cross Entropy)
88
+ # Measures error between true and predicted values
89
+ # Lower value = better model
90
+ # Used in classification problems
91
+ # --------------------------------------------------
92
+ def log_loss(y_true, y_pred):
93
+
94
+ if len(y_true) != len(y_pred):
95
+ raise ValueError("Length mismatch")
96
+
97
+ if len(y_true) == 0:
98
+ raise ValueError("Empty array")
99
+
100
+ n = len(y_true)
101
+
102
+ loss = 0
103
+
104
+ epsilon = 1e-15
105
+
106
+ for i in range(n):
107
+
108
+ pred = max(min(y_pred[i], 1 - epsilon), epsilon)
109
+
110
+ loss = loss + (
111
+ y_true[i] * math.log(pred) +
112
+ (1 - y_true[i]) * math.log(1 - pred)
113
+ )
114
+
115
+ return -loss / n
@@ -0,0 +1,127 @@
1
+ import math
2
+
3
+ # euclidean_distance
4
+ # manhattan_distance
5
+ # minkowski_distance
6
+ # cosine_similarity
7
+ # hamming_distance
8
+
9
+ # | p | Distance Type |
10
+ # | - | ------------- |
11
+ # | 1 | Manhattan |
12
+ # | 2 | Euclidean |
13
+ # | ∞ | Chebyshev |
14
+
15
+
16
+
17
+ # Hamming distance: number of positions where values differ
18
+ # d = Σ (xi != yi)
19
+ def hamming_distance(x, y):
20
+
21
+ if len(x) != len(y):
22
+ raise ValueError("Length mismatch")
23
+
24
+ count = 0
25
+ n = len(x)
26
+
27
+ i = 0
28
+
29
+ while i < n:
30
+
31
+ if x[i] != y[i]:
32
+ count += 1
33
+
34
+ i += 1
35
+
36
+ return count
37
+
38
+
39
+
40
+ # Euclidean Distance: straight-line distance between two points in space
41
+ # d = sqrt(Σ (xi - yi)^2)
42
+ def euclidean_distance(x, y):
43
+
44
+ if len(x) != len(y):
45
+ raise ValueError("Length mismatch")
46
+
47
+ Total = 0
48
+ n = len(x)
49
+ result = 0
50
+
51
+ for i in range(n):
52
+
53
+ Total = Total + (x[i] - y[i]) ** 2 #(p == 2)
54
+
55
+ result = math.sqrt(Total)
56
+
57
+ return result
58
+
59
+
60
+ # Manhattan Distance: sum of absolute differences between coordinates (grid-like path)
61
+ # d = Σ |xi - yi|
62
+ def manhattan_distance(x, y):
63
+
64
+ if len(x) != len(y):
65
+ raise ValueError("Length mismatch")
66
+
67
+ Total = 0
68
+ n = len(x)
69
+
70
+ for i in range(n):
71
+
72
+ Total = Total + abs(x[i] - y[i]) # (p == 1) power of parameter
73
+
74
+ return Total
75
+
76
+
77
+
78
+ # Cosine Similarity: measures how similar two vectors are based on the angle between them
79
+ # cos(θ) = (Σ xi*yi) / (sqrt(Σ xi^2) * sqrt(Σ yi^2))
80
+
81
+ # Minimum angle → Maximum similarity
82
+ # Maximum angle → Minimum similarity
83
+ def cosine_similarity(x, y):
84
+
85
+ if len(x) != len(y):
86
+ raise ValueError("Length mismatch")
87
+
88
+ dot_product = 0
89
+ mag_x = 0
90
+ mag_y = 0
91
+ n = len(x)
92
+
93
+ for i in range(n):
94
+
95
+ dot_product = dot_product + x[i] * y[i]
96
+
97
+ mag_x = mag_x + x[i] ** 2
98
+
99
+ mag_y = mag_y + y[i] ** 2
100
+
101
+ denominator = math.sqrt(mag_x) * math.sqrt(mag_y)
102
+
103
+ if denominator == 0:
104
+ return 0
105
+
106
+ return dot_product / denominator
107
+
108
+
109
+ # Minkowski Distance: generalized distance formula that includes Euclidean and Manhattan as special cases
110
+ # d = (Σ |xi - yi|^p)^(1/p)
111
+
112
+ def minkowski_distance(x, y, p):
113
+
114
+ if len(x) != len(y):
115
+ raise ValueError("Length mismatch")
116
+
117
+ if p <= 0:
118
+ raise ValueError("p must be greater than 0")
119
+
120
+ total = 0
121
+ n = len(x)
122
+
123
+ for i in range(n):
124
+
125
+ total = total + abs(x[i] - y[i]) ** p
126
+
127
+ return total ** (1/p)