createsonline 0.1.26__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- createsonline/__init__.py +46 -0
- createsonline/admin/__init__.py +7 -0
- createsonline/admin/content.py +526 -0
- createsonline/admin/crud.py +805 -0
- createsonline/admin/field_builder.py +559 -0
- createsonline/admin/integration.py +482 -0
- createsonline/admin/interface.py +2562 -0
- createsonline/admin/model_creator.py +513 -0
- createsonline/admin/model_manager.py +388 -0
- createsonline/admin/modern_dashboard.py +498 -0
- createsonline/admin/permissions.py +264 -0
- createsonline/admin/user_forms.py +594 -0
- createsonline/ai/__init__.py +202 -0
- createsonline/ai/fields.py +1226 -0
- createsonline/ai/orm.py +325 -0
- createsonline/ai/services.py +1244 -0
- createsonline/app.py +506 -0
- createsonline/auth/__init__.py +8 -0
- createsonline/auth/management.py +228 -0
- createsonline/auth/models.py +552 -0
- createsonline/cli/__init__.py +5 -0
- createsonline/cli/commands/__init__.py +122 -0
- createsonline/cli/commands/database.py +416 -0
- createsonline/cli/commands/info.py +173 -0
- createsonline/cli/commands/initdb.py +218 -0
- createsonline/cli/commands/project.py +545 -0
- createsonline/cli/commands/serve.py +173 -0
- createsonline/cli/commands/shell.py +93 -0
- createsonline/cli/commands/users.py +148 -0
- createsonline/cli/main.py +2041 -0
- createsonline/cli/manage.py +274 -0
- createsonline/config/__init__.py +9 -0
- createsonline/config/app.py +2577 -0
- createsonline/config/database.py +179 -0
- createsonline/config/docs.py +384 -0
- createsonline/config/errors.py +160 -0
- createsonline/config/orm.py +43 -0
- createsonline/config/request.py +93 -0
- createsonline/config/settings.py +176 -0
- createsonline/data/__init__.py +23 -0
- createsonline/data/dataframe.py +925 -0
- createsonline/data/io.py +453 -0
- createsonline/data/series.py +557 -0
- createsonline/database/__init__.py +60 -0
- createsonline/database/abstraction.py +440 -0
- createsonline/database/assistant.py +585 -0
- createsonline/database/fields.py +442 -0
- createsonline/database/migrations.py +132 -0
- createsonline/database/models.py +604 -0
- createsonline/database.py +438 -0
- createsonline/http/__init__.py +28 -0
- createsonline/http/client.py +535 -0
- createsonline/ml/__init__.py +55 -0
- createsonline/ml/classification.py +552 -0
- createsonline/ml/clustering.py +680 -0
- createsonline/ml/metrics.py +542 -0
- createsonline/ml/neural.py +560 -0
- createsonline/ml/preprocessing.py +784 -0
- createsonline/ml/regression.py +501 -0
- createsonline/performance/__init__.py +19 -0
- createsonline/performance/cache.py +444 -0
- createsonline/performance/compression.py +335 -0
- createsonline/performance/core.py +419 -0
- createsonline/project_init.py +789 -0
- createsonline/routing.py +528 -0
- createsonline/security/__init__.py +34 -0
- createsonline/security/core.py +811 -0
- createsonline/security/encryption.py +349 -0
- createsonline/server.py +295 -0
- createsonline/static/css/admin.css +263 -0
- createsonline/static/css/common.css +358 -0
- createsonline/static/css/dashboard.css +89 -0
- createsonline/static/favicon.ico +0 -0
- createsonline/static/icons/icon-128x128.png +0 -0
- createsonline/static/icons/icon-128x128.webp +0 -0
- createsonline/static/icons/icon-16x16.png +0 -0
- createsonline/static/icons/icon-16x16.webp +0 -0
- createsonline/static/icons/icon-180x180.png +0 -0
- createsonline/static/icons/icon-180x180.webp +0 -0
- createsonline/static/icons/icon-192x192.png +0 -0
- createsonline/static/icons/icon-192x192.webp +0 -0
- createsonline/static/icons/icon-256x256.png +0 -0
- createsonline/static/icons/icon-256x256.webp +0 -0
- createsonline/static/icons/icon-32x32.png +0 -0
- createsonline/static/icons/icon-32x32.webp +0 -0
- createsonline/static/icons/icon-384x384.png +0 -0
- createsonline/static/icons/icon-384x384.webp +0 -0
- createsonline/static/icons/icon-48x48.png +0 -0
- createsonline/static/icons/icon-48x48.webp +0 -0
- createsonline/static/icons/icon-512x512.png +0 -0
- createsonline/static/icons/icon-512x512.webp +0 -0
- createsonline/static/icons/icon-64x64.png +0 -0
- createsonline/static/icons/icon-64x64.webp +0 -0
- createsonline/static/image/android-chrome-192x192.png +0 -0
- createsonline/static/image/android-chrome-512x512.png +0 -0
- createsonline/static/image/apple-touch-icon.png +0 -0
- createsonline/static/image/favicon-16x16.png +0 -0
- createsonline/static/image/favicon-32x32.png +0 -0
- createsonline/static/image/favicon.ico +0 -0
- createsonline/static/image/favicon.svg +17 -0
- createsonline/static/image/icon-128x128.png +0 -0
- createsonline/static/image/icon-128x128.webp +0 -0
- createsonline/static/image/icon-16x16.png +0 -0
- createsonline/static/image/icon-16x16.webp +0 -0
- createsonline/static/image/icon-180x180.png +0 -0
- createsonline/static/image/icon-180x180.webp +0 -0
- createsonline/static/image/icon-192x192.png +0 -0
- createsonline/static/image/icon-192x192.webp +0 -0
- createsonline/static/image/icon-256x256.png +0 -0
- createsonline/static/image/icon-256x256.webp +0 -0
- createsonline/static/image/icon-32x32.png +0 -0
- createsonline/static/image/icon-32x32.webp +0 -0
- createsonline/static/image/icon-384x384.png +0 -0
- createsonline/static/image/icon-384x384.webp +0 -0
- createsonline/static/image/icon-48x48.png +0 -0
- createsonline/static/image/icon-48x48.webp +0 -0
- createsonline/static/image/icon-512x512.png +0 -0
- createsonline/static/image/icon-512x512.webp +0 -0
- createsonline/static/image/icon-64x64.png +0 -0
- createsonline/static/image/icon-64x64.webp +0 -0
- createsonline/static/image/logo-header-h100.png +0 -0
- createsonline/static/image/logo-header-h100.webp +0 -0
- createsonline/static/image/logo-header-h200@2x.png +0 -0
- createsonline/static/image/logo-header-h200@2x.webp +0 -0
- createsonline/static/image/logo.png +0 -0
- createsonline/static/js/admin.js +274 -0
- createsonline/static/site.webmanifest +35 -0
- createsonline/static/templates/admin/base.html +87 -0
- createsonline/static/templates/admin/dashboard.html +217 -0
- createsonline/static/templates/admin/model_form.html +270 -0
- createsonline/static/templates/admin/model_list.html +202 -0
- createsonline/static/test_script.js +15 -0
- createsonline/static/test_styles.css +59 -0
- createsonline/static_files.py +365 -0
- createsonline/templates/404.html +100 -0
- createsonline/templates/admin_login.html +169 -0
- createsonline/templates/base.html +102 -0
- createsonline/templates/index.html +151 -0
- createsonline/templates.py +205 -0
- createsonline/testing.py +322 -0
- createsonline/utils.py +448 -0
- createsonline/validation/__init__.py +49 -0
- createsonline/validation/fields.py +598 -0
- createsonline/validation/models.py +504 -0
- createsonline/validation/validators.py +561 -0
- createsonline/views.py +184 -0
- createsonline-0.1.26.dist-info/METADATA +46 -0
- createsonline-0.1.26.dist-info/RECORD +152 -0
- createsonline-0.1.26.dist-info/WHEEL +5 -0
- createsonline-0.1.26.dist-info/entry_points.txt +2 -0
- createsonline-0.1.26.dist-info/licenses/LICENSE +21 -0
- createsonline-0.1.26.dist-info/top_level.txt +1 -0
|
@@ -0,0 +1,552 @@
|
|
|
1
|
+
"""
|
|
2
|
+
CREATESONLINE Classification Algorithms
|
|
3
|
+
|
|
4
|
+
Pure Python classification implementations.
|
|
5
|
+
"""
|
|
6
|
+
|
|
7
|
+
import numpy as np
|
|
8
|
+
from typing import Optional, Union, Any
|
|
9
|
+
from collections import Counter
|
|
10
|
+
|
|
11
|
+
|
|
12
|
+
class DecisionTreeClassifier:
|
|
13
|
+
"""
|
|
14
|
+
Decision Tree Classifier implementation
|
|
15
|
+
|
|
16
|
+
Pure Python implementation using information gain.
|
|
17
|
+
"""
|
|
18
|
+
|
|
19
|
+
def __init__(self, max_depth: Optional[int] = None, min_samples_split: int = 2, min_samples_leaf: int = 1):
|
|
20
|
+
"""
|
|
21
|
+
Initialize Decision Tree Classifier
|
|
22
|
+
|
|
23
|
+
Args:
|
|
24
|
+
max_depth: Maximum depth of the tree
|
|
25
|
+
min_samples_split: Minimum samples required to split a node
|
|
26
|
+
min_samples_leaf: Minimum samples required at a leaf node
|
|
27
|
+
"""
|
|
28
|
+
self.max_depth = max_depth
|
|
29
|
+
self.min_samples_split = min_samples_split
|
|
30
|
+
self.min_samples_leaf = min_samples_leaf
|
|
31
|
+
|
|
32
|
+
self.tree = None
|
|
33
|
+
self.feature_importances_ = None
|
|
34
|
+
self.fitted = False
|
|
35
|
+
|
|
36
|
+
class Node:
|
|
37
|
+
"""Decision tree node"""
|
|
38
|
+
def __init__(self):
|
|
39
|
+
self.feature_index = None
|
|
40
|
+
self.threshold = None
|
|
41
|
+
self.left = None
|
|
42
|
+
self.right = None
|
|
43
|
+
self.value = None # For leaf nodes
|
|
44
|
+
self.samples = 0
|
|
45
|
+
self.gini = 0.0
|
|
46
|
+
|
|
47
|
+
def _gini_impurity(self, y: np.ndarray) -> float:
|
|
48
|
+
"""Calculate Gini impurity"""
|
|
49
|
+
if len(y) == 0:
|
|
50
|
+
return 0.0
|
|
51
|
+
|
|
52
|
+
_, counts = np.unique(y, return_counts=True)
|
|
53
|
+
probabilities = counts / len(y)
|
|
54
|
+
return 1 - np.sum(probabilities ** 2)
|
|
55
|
+
|
|
56
|
+
def _entropy(self, y: np.ndarray) -> float:
|
|
57
|
+
"""Calculate entropy"""
|
|
58
|
+
if len(y) == 0:
|
|
59
|
+
return 0.0
|
|
60
|
+
|
|
61
|
+
_, counts = np.unique(y, return_counts=True)
|
|
62
|
+
probabilities = counts / len(y)
|
|
63
|
+
return -np.sum(probabilities * np.log2(probabilities + 1e-15))
|
|
64
|
+
|
|
65
|
+
def _information_gain(self, y: np.ndarray, left_y: np.ndarray, right_y: np.ndarray) -> float:
|
|
66
|
+
"""Calculate information gain"""
|
|
67
|
+
n = len(y)
|
|
68
|
+
n_left, n_right = len(left_y), len(right_y)
|
|
69
|
+
|
|
70
|
+
if n_left == 0 or n_right == 0:
|
|
71
|
+
return 0.0
|
|
72
|
+
|
|
73
|
+
entropy_parent = self._entropy(y)
|
|
74
|
+
entropy_left = self._entropy(left_y)
|
|
75
|
+
entropy_right = self._entropy(right_y)
|
|
76
|
+
|
|
77
|
+
weighted_entropy = (n_left / n) * entropy_left + (n_right / n) * entropy_right
|
|
78
|
+
return entropy_parent - weighted_entropy
|
|
79
|
+
|
|
80
|
+
def _best_split(self, X: np.ndarray, y: np.ndarray) -> tuple:
|
|
81
|
+
"""Find the best split for the data"""
|
|
82
|
+
best_gain = -1
|
|
83
|
+
best_feature = None
|
|
84
|
+
best_threshold = None
|
|
85
|
+
|
|
86
|
+
n_features = X.shape[1]
|
|
87
|
+
|
|
88
|
+
for feature_index in range(n_features):
|
|
89
|
+
feature_values = X[:, feature_index]
|
|
90
|
+
unique_values = np.unique(feature_values)
|
|
91
|
+
|
|
92
|
+
for i in range(len(unique_values) - 1):
|
|
93
|
+
threshold = (unique_values[i] + unique_values[i + 1]) / 2
|
|
94
|
+
|
|
95
|
+
left_mask = feature_values <= threshold
|
|
96
|
+
right_mask = ~left_mask
|
|
97
|
+
|
|
98
|
+
if np.sum(left_mask) < self.min_samples_leaf or np.sum(right_mask) < self.min_samples_leaf:
|
|
99
|
+
continue
|
|
100
|
+
|
|
101
|
+
left_y = y[left_mask]
|
|
102
|
+
right_y = y[right_mask]
|
|
103
|
+
|
|
104
|
+
gain = self._information_gain(y, left_y, right_y)
|
|
105
|
+
|
|
106
|
+
if gain > best_gain:
|
|
107
|
+
best_gain = gain
|
|
108
|
+
best_feature = feature_index
|
|
109
|
+
best_threshold = threshold
|
|
110
|
+
|
|
111
|
+
return best_feature, best_threshold, best_gain
|
|
112
|
+
|
|
113
|
+
def _build_tree(self, X: np.ndarray, y: np.ndarray, depth: int = 0) -> Node:
|
|
114
|
+
"""Recursively build the decision tree"""
|
|
115
|
+
node = self.Node()
|
|
116
|
+
node.samples = len(y)
|
|
117
|
+
node.gini = self._gini_impurity(y)
|
|
118
|
+
|
|
119
|
+
# Check stopping criteria
|
|
120
|
+
if (self.max_depth is not None and depth >= self.max_depth) or \
|
|
121
|
+
len(y) < self.min_samples_split or \
|
|
122
|
+
len(np.unique(y)) == 1:
|
|
123
|
+
# Leaf node
|
|
124
|
+
node.value = Counter(y).most_common(1)[0][0]
|
|
125
|
+
return node
|
|
126
|
+
|
|
127
|
+
# Find best split
|
|
128
|
+
feature_index, threshold, gain = self._best_split(X, y)
|
|
129
|
+
|
|
130
|
+
if feature_index is None or gain <= 0:
|
|
131
|
+
# Leaf node
|
|
132
|
+
node.value = Counter(y).most_common(1)[0][0]
|
|
133
|
+
return node
|
|
134
|
+
|
|
135
|
+
# Split the data
|
|
136
|
+
left_mask = X[:, feature_index] <= threshold
|
|
137
|
+
right_mask = ~left_mask
|
|
138
|
+
|
|
139
|
+
node.feature_index = feature_index
|
|
140
|
+
node.threshold = threshold
|
|
141
|
+
|
|
142
|
+
# Recursively build left and right subtrees
|
|
143
|
+
node.left = self._build_tree(X[left_mask], y[left_mask], depth + 1)
|
|
144
|
+
node.right = self._build_tree(X[right_mask], y[right_mask], depth + 1)
|
|
145
|
+
|
|
146
|
+
return node
|
|
147
|
+
|
|
148
|
+
def fit(self, X: Union[np.ndarray, list], y: Union[np.ndarray, list]) -> 'DecisionTreeClassifier':
|
|
149
|
+
"""
|
|
150
|
+
Fit decision tree classifier
|
|
151
|
+
|
|
152
|
+
Args:
|
|
153
|
+
X: Training features (n_samples, n_features)
|
|
154
|
+
y: Training targets (n_samples,)
|
|
155
|
+
|
|
156
|
+
Returns:
|
|
157
|
+
Self for method chaining
|
|
158
|
+
"""
|
|
159
|
+
X = np.array(X) if not isinstance(X, np.ndarray) else X
|
|
160
|
+
y = np.array(y) if not isinstance(y, np.ndarray) else y
|
|
161
|
+
|
|
162
|
+
if X.ndim == 1:
|
|
163
|
+
X = X.reshape(-1, 1)
|
|
164
|
+
|
|
165
|
+
self.tree = self._build_tree(X, y)
|
|
166
|
+
self.fitted = True
|
|
167
|
+
return self
|
|
168
|
+
|
|
169
|
+
def _predict_sample(self, sample: np.ndarray, node: Node) -> Any:
|
|
170
|
+
"""Predict a single sample"""
|
|
171
|
+
if node.value is not None: # Leaf node
|
|
172
|
+
return node.value
|
|
173
|
+
|
|
174
|
+
if sample[node.feature_index] <= node.threshold:
|
|
175
|
+
return self._predict_sample(sample, node.left)
|
|
176
|
+
else:
|
|
177
|
+
return self._predict_sample(sample, node.right)
|
|
178
|
+
|
|
179
|
+
def predict(self, X: Union[np.ndarray, list]) -> np.ndarray:
|
|
180
|
+
"""
|
|
181
|
+
Make predictions
|
|
182
|
+
|
|
183
|
+
Args:
|
|
184
|
+
X: Features to predict on (n_samples, n_features)
|
|
185
|
+
|
|
186
|
+
Returns:
|
|
187
|
+
Predictions (n_samples,)
|
|
188
|
+
"""
|
|
189
|
+
if not self.fitted:
|
|
190
|
+
raise RuntimeError("Model must be fitted before making predictions")
|
|
191
|
+
|
|
192
|
+
X = np.array(X) if not isinstance(X, np.ndarray) else X
|
|
193
|
+
if X.ndim == 1:
|
|
194
|
+
X = X.reshape(-1, 1)
|
|
195
|
+
|
|
196
|
+
predictions = []
|
|
197
|
+
for sample in X:
|
|
198
|
+
predictions.append(self._predict_sample(sample, self.tree))
|
|
199
|
+
|
|
200
|
+
return np.array(predictions)
|
|
201
|
+
|
|
202
|
+
def score(self, X: Union[np.ndarray, list], y: Union[np.ndarray, list]) -> float:
|
|
203
|
+
"""
|
|
204
|
+
Calculate accuracy score
|
|
205
|
+
|
|
206
|
+
Args:
|
|
207
|
+
X: Features
|
|
208
|
+
y: True targets
|
|
209
|
+
|
|
210
|
+
Returns:
|
|
211
|
+
Accuracy score
|
|
212
|
+
"""
|
|
213
|
+
y_pred = self.predict(X)
|
|
214
|
+
y = np.array(y) if not isinstance(y, np.ndarray) else y
|
|
215
|
+
|
|
216
|
+
return np.mean(y_pred == y)
|
|
217
|
+
|
|
218
|
+
|
|
219
|
+
class KNearestNeighbors:
|
|
220
|
+
"""
|
|
221
|
+
K-Nearest Neighbors Classifier implementation
|
|
222
|
+
|
|
223
|
+
Pure Python implementation with different distance metrics.
|
|
224
|
+
"""
|
|
225
|
+
|
|
226
|
+
def __init__(self, n_neighbors: int = 5, metric: str = 'euclidean', weights: str = 'uniform'):
|
|
227
|
+
"""
|
|
228
|
+
Initialize KNN Classifier
|
|
229
|
+
|
|
230
|
+
Args:
|
|
231
|
+
n_neighbors: Number of neighbors to consider
|
|
232
|
+
metric: Distance metric ('euclidean', 'manhattan', 'cosine')
|
|
233
|
+
weights: Weight function ('uniform', 'distance')
|
|
234
|
+
"""
|
|
235
|
+
self.n_neighbors = n_neighbors
|
|
236
|
+
self.metric = metric
|
|
237
|
+
self.weights = weights
|
|
238
|
+
|
|
239
|
+
self.X_train = None
|
|
240
|
+
self.y_train = None
|
|
241
|
+
self.fitted = False
|
|
242
|
+
|
|
243
|
+
def _euclidean_distance(self, x1: np.ndarray, x2: np.ndarray) -> float:
|
|
244
|
+
"""Calculate Euclidean distance"""
|
|
245
|
+
return np.sqrt(np.sum((x1 - x2) ** 2))
|
|
246
|
+
|
|
247
|
+
def _manhattan_distance(self, x1: np.ndarray, x2: np.ndarray) -> float:
|
|
248
|
+
"""Calculate Manhattan distance"""
|
|
249
|
+
return np.sum(np.abs(x1 - x2))
|
|
250
|
+
|
|
251
|
+
def _cosine_distance(self, x1: np.ndarray, x2: np.ndarray) -> float:
|
|
252
|
+
"""Calculate Cosine distance"""
|
|
253
|
+
dot_product = np.dot(x1, x2)
|
|
254
|
+
norm_x1 = np.linalg.norm(x1)
|
|
255
|
+
norm_x2 = np.linalg.norm(x2)
|
|
256
|
+
|
|
257
|
+
if norm_x1 == 0 or norm_x2 == 0:
|
|
258
|
+
return 1.0
|
|
259
|
+
|
|
260
|
+
cosine_sim = dot_product / (norm_x1 * norm_x2)
|
|
261
|
+
return 1 - cosine_sim
|
|
262
|
+
|
|
263
|
+
def _distance(self, x1: np.ndarray, x2: np.ndarray) -> float:
|
|
264
|
+
"""Calculate distance based on metric"""
|
|
265
|
+
if self.metric == 'euclidean':
|
|
266
|
+
return self._euclidean_distance(x1, x2)
|
|
267
|
+
elif self.metric == 'manhattan':
|
|
268
|
+
return self._manhattan_distance(x1, x2)
|
|
269
|
+
elif self.metric == 'cosine':
|
|
270
|
+
return self._cosine_distance(x1, x2)
|
|
271
|
+
else:
|
|
272
|
+
raise ValueError(f"Unknown metric: {self.metric}")
|
|
273
|
+
|
|
274
|
+
def fit(self, X: Union[np.ndarray, list], y: Union[np.ndarray, list]) -> 'KNearestNeighbors':
|
|
275
|
+
"""
|
|
276
|
+
Fit KNN classifier (just store training data)
|
|
277
|
+
|
|
278
|
+
Args:
|
|
279
|
+
X: Training features (n_samples, n_features)
|
|
280
|
+
y: Training targets (n_samples,)
|
|
281
|
+
|
|
282
|
+
Returns:
|
|
283
|
+
Self for method chaining
|
|
284
|
+
"""
|
|
285
|
+
self.X_train = np.array(X) if not isinstance(X, np.ndarray) else X
|
|
286
|
+
self.y_train = np.array(y) if not isinstance(y, np.ndarray) else y
|
|
287
|
+
|
|
288
|
+
if self.X_train.ndim == 1:
|
|
289
|
+
self.X_train = self.X_train.reshape(-1, 1)
|
|
290
|
+
|
|
291
|
+
self.fitted = True
|
|
292
|
+
return self
|
|
293
|
+
|
|
294
|
+
def _predict_sample(self, sample: np.ndarray) -> Any:
|
|
295
|
+
"""Predict a single sample"""
|
|
296
|
+
# Calculate distances to all training samples
|
|
297
|
+
distances = []
|
|
298
|
+
for i, train_sample in enumerate(self.X_train):
|
|
299
|
+
dist = self._distance(sample, train_sample)
|
|
300
|
+
distances.append((dist, self.y_train[i]))
|
|
301
|
+
|
|
302
|
+
# Sort by distance and get k nearest neighbors
|
|
303
|
+
distances.sort(key=lambda x: x[0])
|
|
304
|
+
neighbors = distances[:self.n_neighbors]
|
|
305
|
+
|
|
306
|
+
if self.weights == 'uniform':
|
|
307
|
+
# Simple majority vote
|
|
308
|
+
neighbor_labels = [label for _, label in neighbors]
|
|
309
|
+
return Counter(neighbor_labels).most_common(1)[0][0]
|
|
310
|
+
|
|
311
|
+
elif self.weights == 'distance':
|
|
312
|
+
# Weight by inverse distance
|
|
313
|
+
label_weights = {}
|
|
314
|
+
for dist, label in neighbors:
|
|
315
|
+
weight = 1 / (dist + 1e-15) # Add small epsilon to avoid division by zero
|
|
316
|
+
if label in label_weights:
|
|
317
|
+
label_weights[label] += weight
|
|
318
|
+
else:
|
|
319
|
+
label_weights[label] = weight
|
|
320
|
+
|
|
321
|
+
return max(label_weights.items(), key=lambda x: x[1])[0]
|
|
322
|
+
|
|
323
|
+
def predict(self, X: Union[np.ndarray, list]) -> np.ndarray:
|
|
324
|
+
"""
|
|
325
|
+
Make predictions
|
|
326
|
+
|
|
327
|
+
Args:
|
|
328
|
+
X: Features to predict on (n_samples, n_features)
|
|
329
|
+
|
|
330
|
+
Returns:
|
|
331
|
+
Predictions (n_samples,)
|
|
332
|
+
"""
|
|
333
|
+
if not self.fitted:
|
|
334
|
+
raise RuntimeError("Model must be fitted before making predictions")
|
|
335
|
+
|
|
336
|
+
X = np.array(X) if not isinstance(X, np.ndarray) else X
|
|
337
|
+
if X.ndim == 1:
|
|
338
|
+
X = X.reshape(-1, 1)
|
|
339
|
+
|
|
340
|
+
predictions = []
|
|
341
|
+
for sample in X:
|
|
342
|
+
predictions.append(self._predict_sample(sample))
|
|
343
|
+
|
|
344
|
+
return np.array(predictions)
|
|
345
|
+
|
|
346
|
+
def predict_proba(self, X: Union[np.ndarray, list]) -> np.ndarray:
|
|
347
|
+
"""
|
|
348
|
+
Predict class probabilities
|
|
349
|
+
|
|
350
|
+
Args:
|
|
351
|
+
X: Features to predict on (n_samples, n_features)
|
|
352
|
+
|
|
353
|
+
Returns:
|
|
354
|
+
Class probabilities (n_samples, n_classes)
|
|
355
|
+
"""
|
|
356
|
+
if not self.fitted:
|
|
357
|
+
raise RuntimeError("Model must be fitted before making predictions")
|
|
358
|
+
|
|
359
|
+
X = np.array(X) if not isinstance(X, np.ndarray) else X
|
|
360
|
+
if X.ndim == 1:
|
|
361
|
+
X = X.reshape(-1, 1)
|
|
362
|
+
|
|
363
|
+
# Get unique classes
|
|
364
|
+
unique_classes = np.unique(self.y_train)
|
|
365
|
+
n_classes = len(unique_classes)
|
|
366
|
+
class_to_index = {cls: i for i, cls in enumerate(unique_classes)}
|
|
367
|
+
|
|
368
|
+
probabilities = []
|
|
369
|
+
|
|
370
|
+
for sample in X:
|
|
371
|
+
# Calculate distances to all training samples
|
|
372
|
+
distances = []
|
|
373
|
+
for i, train_sample in enumerate(self.X_train):
|
|
374
|
+
dist = self._distance(sample, train_sample)
|
|
375
|
+
distances.append((dist, self.y_train[i]))
|
|
376
|
+
|
|
377
|
+
# Sort by distance and get k nearest neighbors
|
|
378
|
+
distances.sort(key=lambda x: x[0])
|
|
379
|
+
neighbors = distances[:self.n_neighbors]
|
|
380
|
+
|
|
381
|
+
# Calculate class probabilities
|
|
382
|
+
class_probs = np.zeros(n_classes)
|
|
383
|
+
|
|
384
|
+
if self.weights == 'uniform':
|
|
385
|
+
for _, label in neighbors:
|
|
386
|
+
class_probs[class_to_index[label]] += 1
|
|
387
|
+
class_probs /= self.n_neighbors
|
|
388
|
+
|
|
389
|
+
elif self.weights == 'distance':
|
|
390
|
+
total_weight = 0
|
|
391
|
+
for dist, label in neighbors:
|
|
392
|
+
weight = 1 / (dist + 1e-15)
|
|
393
|
+
class_probs[class_to_index[label]] += weight
|
|
394
|
+
total_weight += weight
|
|
395
|
+
|
|
396
|
+
if total_weight > 0:
|
|
397
|
+
class_probs /= total_weight
|
|
398
|
+
|
|
399
|
+
probabilities.append(class_probs)
|
|
400
|
+
|
|
401
|
+
return np.array(probabilities)
|
|
402
|
+
|
|
403
|
+
def score(self, X: Union[np.ndarray, list], y: Union[np.ndarray, list]) -> float:
|
|
404
|
+
"""
|
|
405
|
+
Calculate accuracy score
|
|
406
|
+
|
|
407
|
+
Args:
|
|
408
|
+
X: Features
|
|
409
|
+
y: True targets
|
|
410
|
+
|
|
411
|
+
Returns:
|
|
412
|
+
Accuracy score
|
|
413
|
+
"""
|
|
414
|
+
y_pred = self.predict(X)
|
|
415
|
+
y = np.array(y) if not isinstance(y, np.ndarray) else y
|
|
416
|
+
|
|
417
|
+
return np.mean(y_pred == y)
|
|
418
|
+
|
|
419
|
+
|
|
420
|
+
class NaiveBayes:
|
|
421
|
+
"""
|
|
422
|
+
Naive Bayes Classifier implementation
|
|
423
|
+
|
|
424
|
+
Pure Python implementation for continuous features (Gaussian Naive Bayes).
|
|
425
|
+
"""
|
|
426
|
+
|
|
427
|
+
def __init__(self):
|
|
428
|
+
"""Initialize Naive Bayes Classifier"""
|
|
429
|
+
self.classes = None
|
|
430
|
+
self.class_priors = {}
|
|
431
|
+
self.feature_stats = {} # {class: {feature_idx: {'mean': mean, 'var': var}}}
|
|
432
|
+
self.fitted = False
|
|
433
|
+
|
|
434
|
+
def fit(self, X: Union[np.ndarray, list], y: Union[np.ndarray, list]) -> 'NaiveBayes':
|
|
435
|
+
"""
|
|
436
|
+
Fit Naive Bayes classifier
|
|
437
|
+
|
|
438
|
+
Args:
|
|
439
|
+
X: Training features (n_samples, n_features)
|
|
440
|
+
y: Training targets (n_samples,)
|
|
441
|
+
|
|
442
|
+
Returns:
|
|
443
|
+
Self for method chaining
|
|
444
|
+
"""
|
|
445
|
+
X = np.array(X) if not isinstance(X, np.ndarray) else X
|
|
446
|
+
y = np.array(y) if not isinstance(y, np.ndarray) else y
|
|
447
|
+
|
|
448
|
+
if X.ndim == 1:
|
|
449
|
+
X = X.reshape(-1, 1)
|
|
450
|
+
|
|
451
|
+
n_samples, n_features = X.shape
|
|
452
|
+
self.classes = np.unique(y)
|
|
453
|
+
|
|
454
|
+
# Calculate class priors
|
|
455
|
+
for cls in self.classes:
|
|
456
|
+
self.class_priors[cls] = np.sum(y == cls) / n_samples
|
|
457
|
+
|
|
458
|
+
# Calculate feature statistics for each class
|
|
459
|
+
self.feature_stats = {}
|
|
460
|
+
for cls in self.classes:
|
|
461
|
+
self.feature_stats[cls] = {}
|
|
462
|
+
class_samples = X[y == cls]
|
|
463
|
+
|
|
464
|
+
for feature_idx in range(n_features):
|
|
465
|
+
feature_values = class_samples[:, feature_idx]
|
|
466
|
+
self.feature_stats[cls][feature_idx] = {
|
|
467
|
+
'mean': np.mean(feature_values),
|
|
468
|
+
'var': np.var(feature_values) + 1e-9 # Add small value to avoid division by zero
|
|
469
|
+
}
|
|
470
|
+
|
|
471
|
+
self.fitted = True
|
|
472
|
+
return self
|
|
473
|
+
|
|
474
|
+
def _gaussian_pdf(self, x: float, mean: float, var: float) -> float:
|
|
475
|
+
"""Calculate Gaussian probability density function"""
|
|
476
|
+
return (1 / np.sqrt(2 * np.pi * var)) * np.exp(-0.5 * ((x - mean) ** 2) / var)
|
|
477
|
+
|
|
478
|
+
def predict_proba(self, X: Union[np.ndarray, list]) -> np.ndarray:
|
|
479
|
+
"""
|
|
480
|
+
Predict class probabilities
|
|
481
|
+
|
|
482
|
+
Args:
|
|
483
|
+
X: Features to predict on (n_samples, n_features)
|
|
484
|
+
|
|
485
|
+
Returns:
|
|
486
|
+
Class probabilities (n_samples, n_classes)
|
|
487
|
+
"""
|
|
488
|
+
if not self.fitted:
|
|
489
|
+
raise RuntimeError("Model must be fitted before making predictions")
|
|
490
|
+
|
|
491
|
+
X = np.array(X) if not isinstance(X, np.ndarray) else X
|
|
492
|
+
if X.ndim == 1:
|
|
493
|
+
X = X.reshape(-1, 1)
|
|
494
|
+
|
|
495
|
+
n_samples, n_features = X.shape
|
|
496
|
+
n_classes = len(self.classes)
|
|
497
|
+
|
|
498
|
+
probabilities = np.zeros((n_samples, n_classes))
|
|
499
|
+
|
|
500
|
+
for i, sample in enumerate(X):
|
|
501
|
+
for j, cls in enumerate(self.classes):
|
|
502
|
+
# Start with prior probability
|
|
503
|
+
log_prob = np.log(self.class_priors[cls])
|
|
504
|
+
|
|
505
|
+
# Multiply by feature likelihoods (in log space)
|
|
506
|
+
for feature_idx in range(n_features):
|
|
507
|
+
feature_value = sample[feature_idx]
|
|
508
|
+
mean = self.feature_stats[cls][feature_idx]['mean']
|
|
509
|
+
var = self.feature_stats[cls][feature_idx]['var']
|
|
510
|
+
|
|
511
|
+
likelihood = self._gaussian_pdf(feature_value, mean, var)
|
|
512
|
+
log_prob += np.log(likelihood + 1e-15) # Add small value to avoid log(0)
|
|
513
|
+
|
|
514
|
+
probabilities[i, j] = log_prob
|
|
515
|
+
|
|
516
|
+
# Convert from log probabilities to probabilities
|
|
517
|
+
# Use softmax to avoid numerical overflow
|
|
518
|
+
probabilities = probabilities - np.max(probabilities, axis=1, keepdims=True)
|
|
519
|
+
probabilities = np.exp(probabilities)
|
|
520
|
+
probabilities = probabilities / np.sum(probabilities, axis=1, keepdims=True)
|
|
521
|
+
|
|
522
|
+
return probabilities
|
|
523
|
+
|
|
524
|
+
def predict(self, X: Union[np.ndarray, list]) -> np.ndarray:
|
|
525
|
+
"""
|
|
526
|
+
Make predictions
|
|
527
|
+
|
|
528
|
+
Args:
|
|
529
|
+
X: Features to predict on (n_samples, n_features)
|
|
530
|
+
|
|
531
|
+
Returns:
|
|
532
|
+
Predictions (n_samples,)
|
|
533
|
+
"""
|
|
534
|
+
probabilities = self.predict_proba(X)
|
|
535
|
+
class_indices = np.argmax(probabilities, axis=1)
|
|
536
|
+
return self.classes[class_indices]
|
|
537
|
+
|
|
538
|
+
def score(self, X: Union[np.ndarray, list], y: Union[np.ndarray, list]) -> float:
|
|
539
|
+
"""
|
|
540
|
+
Calculate accuracy score
|
|
541
|
+
|
|
542
|
+
Args:
|
|
543
|
+
X: Features
|
|
544
|
+
y: True targets
|
|
545
|
+
|
|
546
|
+
Returns:
|
|
547
|
+
Accuracy score
|
|
548
|
+
"""
|
|
549
|
+
y_pred = self.predict(X)
|
|
550
|
+
y = np.array(y) if not isinstance(y, np.ndarray) else y
|
|
551
|
+
|
|
552
|
+
return np.mean(y_pred == y)
|