createsonline 0.1.26__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (152) hide show
  1. createsonline/__init__.py +46 -0
  2. createsonline/admin/__init__.py +7 -0
  3. createsonline/admin/content.py +526 -0
  4. createsonline/admin/crud.py +805 -0
  5. createsonline/admin/field_builder.py +559 -0
  6. createsonline/admin/integration.py +482 -0
  7. createsonline/admin/interface.py +2562 -0
  8. createsonline/admin/model_creator.py +513 -0
  9. createsonline/admin/model_manager.py +388 -0
  10. createsonline/admin/modern_dashboard.py +498 -0
  11. createsonline/admin/permissions.py +264 -0
  12. createsonline/admin/user_forms.py +594 -0
  13. createsonline/ai/__init__.py +202 -0
  14. createsonline/ai/fields.py +1226 -0
  15. createsonline/ai/orm.py +325 -0
  16. createsonline/ai/services.py +1244 -0
  17. createsonline/app.py +506 -0
  18. createsonline/auth/__init__.py +8 -0
  19. createsonline/auth/management.py +228 -0
  20. createsonline/auth/models.py +552 -0
  21. createsonline/cli/__init__.py +5 -0
  22. createsonline/cli/commands/__init__.py +122 -0
  23. createsonline/cli/commands/database.py +416 -0
  24. createsonline/cli/commands/info.py +173 -0
  25. createsonline/cli/commands/initdb.py +218 -0
  26. createsonline/cli/commands/project.py +545 -0
  27. createsonline/cli/commands/serve.py +173 -0
  28. createsonline/cli/commands/shell.py +93 -0
  29. createsonline/cli/commands/users.py +148 -0
  30. createsonline/cli/main.py +2041 -0
  31. createsonline/cli/manage.py +274 -0
  32. createsonline/config/__init__.py +9 -0
  33. createsonline/config/app.py +2577 -0
  34. createsonline/config/database.py +179 -0
  35. createsonline/config/docs.py +384 -0
  36. createsonline/config/errors.py +160 -0
  37. createsonline/config/orm.py +43 -0
  38. createsonline/config/request.py +93 -0
  39. createsonline/config/settings.py +176 -0
  40. createsonline/data/__init__.py +23 -0
  41. createsonline/data/dataframe.py +925 -0
  42. createsonline/data/io.py +453 -0
  43. createsonline/data/series.py +557 -0
  44. createsonline/database/__init__.py +60 -0
  45. createsonline/database/abstraction.py +440 -0
  46. createsonline/database/assistant.py +585 -0
  47. createsonline/database/fields.py +442 -0
  48. createsonline/database/migrations.py +132 -0
  49. createsonline/database/models.py +604 -0
  50. createsonline/database.py +438 -0
  51. createsonline/http/__init__.py +28 -0
  52. createsonline/http/client.py +535 -0
  53. createsonline/ml/__init__.py +55 -0
  54. createsonline/ml/classification.py +552 -0
  55. createsonline/ml/clustering.py +680 -0
  56. createsonline/ml/metrics.py +542 -0
  57. createsonline/ml/neural.py +560 -0
  58. createsonline/ml/preprocessing.py +784 -0
  59. createsonline/ml/regression.py +501 -0
  60. createsonline/performance/__init__.py +19 -0
  61. createsonline/performance/cache.py +444 -0
  62. createsonline/performance/compression.py +335 -0
  63. createsonline/performance/core.py +419 -0
  64. createsonline/project_init.py +789 -0
  65. createsonline/routing.py +528 -0
  66. createsonline/security/__init__.py +34 -0
  67. createsonline/security/core.py +811 -0
  68. createsonline/security/encryption.py +349 -0
  69. createsonline/server.py +295 -0
  70. createsonline/static/css/admin.css +263 -0
  71. createsonline/static/css/common.css +358 -0
  72. createsonline/static/css/dashboard.css +89 -0
  73. createsonline/static/favicon.ico +0 -0
  74. createsonline/static/icons/icon-128x128.png +0 -0
  75. createsonline/static/icons/icon-128x128.webp +0 -0
  76. createsonline/static/icons/icon-16x16.png +0 -0
  77. createsonline/static/icons/icon-16x16.webp +0 -0
  78. createsonline/static/icons/icon-180x180.png +0 -0
  79. createsonline/static/icons/icon-180x180.webp +0 -0
  80. createsonline/static/icons/icon-192x192.png +0 -0
  81. createsonline/static/icons/icon-192x192.webp +0 -0
  82. createsonline/static/icons/icon-256x256.png +0 -0
  83. createsonline/static/icons/icon-256x256.webp +0 -0
  84. createsonline/static/icons/icon-32x32.png +0 -0
  85. createsonline/static/icons/icon-32x32.webp +0 -0
  86. createsonline/static/icons/icon-384x384.png +0 -0
  87. createsonline/static/icons/icon-384x384.webp +0 -0
  88. createsonline/static/icons/icon-48x48.png +0 -0
  89. createsonline/static/icons/icon-48x48.webp +0 -0
  90. createsonline/static/icons/icon-512x512.png +0 -0
  91. createsonline/static/icons/icon-512x512.webp +0 -0
  92. createsonline/static/icons/icon-64x64.png +0 -0
  93. createsonline/static/icons/icon-64x64.webp +0 -0
  94. createsonline/static/image/android-chrome-192x192.png +0 -0
  95. createsonline/static/image/android-chrome-512x512.png +0 -0
  96. createsonline/static/image/apple-touch-icon.png +0 -0
  97. createsonline/static/image/favicon-16x16.png +0 -0
  98. createsonline/static/image/favicon-32x32.png +0 -0
  99. createsonline/static/image/favicon.ico +0 -0
  100. createsonline/static/image/favicon.svg +17 -0
  101. createsonline/static/image/icon-128x128.png +0 -0
  102. createsonline/static/image/icon-128x128.webp +0 -0
  103. createsonline/static/image/icon-16x16.png +0 -0
  104. createsonline/static/image/icon-16x16.webp +0 -0
  105. createsonline/static/image/icon-180x180.png +0 -0
  106. createsonline/static/image/icon-180x180.webp +0 -0
  107. createsonline/static/image/icon-192x192.png +0 -0
  108. createsonline/static/image/icon-192x192.webp +0 -0
  109. createsonline/static/image/icon-256x256.png +0 -0
  110. createsonline/static/image/icon-256x256.webp +0 -0
  111. createsonline/static/image/icon-32x32.png +0 -0
  112. createsonline/static/image/icon-32x32.webp +0 -0
  113. createsonline/static/image/icon-384x384.png +0 -0
  114. createsonline/static/image/icon-384x384.webp +0 -0
  115. createsonline/static/image/icon-48x48.png +0 -0
  116. createsonline/static/image/icon-48x48.webp +0 -0
  117. createsonline/static/image/icon-512x512.png +0 -0
  118. createsonline/static/image/icon-512x512.webp +0 -0
  119. createsonline/static/image/icon-64x64.png +0 -0
  120. createsonline/static/image/icon-64x64.webp +0 -0
  121. createsonline/static/image/logo-header-h100.png +0 -0
  122. createsonline/static/image/logo-header-h100.webp +0 -0
  123. createsonline/static/image/logo-header-h200@2x.png +0 -0
  124. createsonline/static/image/logo-header-h200@2x.webp +0 -0
  125. createsonline/static/image/logo.png +0 -0
  126. createsonline/static/js/admin.js +274 -0
  127. createsonline/static/site.webmanifest +35 -0
  128. createsonline/static/templates/admin/base.html +87 -0
  129. createsonline/static/templates/admin/dashboard.html +217 -0
  130. createsonline/static/templates/admin/model_form.html +270 -0
  131. createsonline/static/templates/admin/model_list.html +202 -0
  132. createsonline/static/test_script.js +15 -0
  133. createsonline/static/test_styles.css +59 -0
  134. createsonline/static_files.py +365 -0
  135. createsonline/templates/404.html +100 -0
  136. createsonline/templates/admin_login.html +169 -0
  137. createsonline/templates/base.html +102 -0
  138. createsonline/templates/index.html +151 -0
  139. createsonline/templates.py +205 -0
  140. createsonline/testing.py +322 -0
  141. createsonline/utils.py +448 -0
  142. createsonline/validation/__init__.py +49 -0
  143. createsonline/validation/fields.py +598 -0
  144. createsonline/validation/models.py +504 -0
  145. createsonline/validation/validators.py +561 -0
  146. createsonline/views.py +184 -0
  147. createsonline-0.1.26.dist-info/METADATA +46 -0
  148. createsonline-0.1.26.dist-info/RECORD +152 -0
  149. createsonline-0.1.26.dist-info/WHEEL +5 -0
  150. createsonline-0.1.26.dist-info/entry_points.txt +2 -0
  151. createsonline-0.1.26.dist-info/licenses/LICENSE +21 -0
  152. createsonline-0.1.26.dist-info/top_level.txt +1 -0
@@ -0,0 +1,680 @@
1
+ """
2
+ CREATESONLINE Clustering Algorithms
3
+
4
+ Pure Python clustering implementations.
5
+ """
6
+
7
+ import numpy as np
8
+ from typing import Optional, Union, List
9
+
10
+
11
+ class KMeans:
12
+ """
13
+ K-Means Clustering implementation
14
+
15
+ Pure Python implementation with numpy for matrix operations.
16
+ """
17
+
18
+ def __init__(self, n_clusters: int = 8, max_iter: int = 300, tol: float = 1e-4, random_state: Optional[int] = None):
19
+ """
20
+ Initialize K-Means clustering
21
+
22
+ Args:
23
+ n_clusters: Number of clusters
24
+ max_iter: Maximum number of iterations
25
+ tol: Tolerance for convergence
26
+ random_state: Random seed for reproducibility
27
+ """
28
+ self.n_clusters = n_clusters
29
+ self.max_iter = max_iter
30
+ self.tol = tol
31
+ self.random_state = random_state
32
+
33
+ self.cluster_centers_ = None
34
+ self.labels_ = None
35
+ self.inertia_ = None
36
+ self.n_iter_ = 0
37
+ self.fitted = False
38
+
39
+ def _init_centroids(self, X: np.ndarray) -> np.ndarray:
40
+ """Initialize centroids randomly"""
41
+ if self.random_state is not None:
42
+ np.random.seed(self.random_state)
43
+
44
+ n_samples, n_features = X.shape
45
+ centroids = np.zeros((self.n_clusters, n_features))
46
+
47
+ for i in range(self.n_clusters):
48
+ # Choose random sample as initial centroid
49
+ centroid_idx = np.random.randint(0, n_samples)
50
+ centroids[i] = X[centroid_idx]
51
+
52
+ return centroids
53
+
54
+ def _assign_clusters(self, X: np.ndarray, centroids: np.ndarray) -> np.ndarray:
55
+ """Assign each point to the nearest centroid"""
56
+ n_samples = X.shape[0]
57
+ labels = np.zeros(n_samples, dtype=int)
58
+
59
+ for i, point in enumerate(X):
60
+ distances = [np.linalg.norm(point - centroid) for centroid in centroids]
61
+ labels[i] = np.argmin(distances)
62
+
63
+ return labels
64
+
65
+ def _update_centroids(self, X: np.ndarray, labels: np.ndarray) -> np.ndarray:
66
+ """Update centroids based on cluster assignments"""
67
+ n_features = X.shape[1]
68
+ centroids = np.zeros((self.n_clusters, n_features))
69
+
70
+ for k in range(self.n_clusters):
71
+ cluster_points = X[labels == k]
72
+ if len(cluster_points) > 0:
73
+ centroids[k] = np.mean(cluster_points, axis=0)
74
+ else:
75
+ # If cluster is empty, keep previous centroid
76
+ centroids[k] = self.cluster_centers_[k] if self.cluster_centers_ is not None else np.zeros(n_features)
77
+
78
+ return centroids
79
+
80
+ def _calculate_inertia(self, X: np.ndarray, labels: np.ndarray, centroids: np.ndarray) -> float:
81
+ """Calculate within-cluster sum of squares (inertia)"""
82
+ inertia = 0.0
83
+ for i, point in enumerate(X):
84
+ centroid = centroids[labels[i]]
85
+ inertia += np.sum((point - centroid) ** 2)
86
+ return inertia
87
+
88
+ def fit(self, X: Union[np.ndarray, list]) -> 'KMeans':
89
+ """
90
+ Fit K-Means clustering
91
+
92
+ Args:
93
+ X: Training data (n_samples, n_features)
94
+
95
+ Returns:
96
+ Self for method chaining
97
+ """
98
+ X = np.array(X) if not isinstance(X, np.ndarray) else X
99
+ if X.ndim == 1:
100
+ X = X.reshape(-1, 1)
101
+
102
+ # Initialize centroids
103
+ centroids = self._init_centroids(X)
104
+
105
+ for iteration in range(self.max_iter):
106
+ # Assign points to clusters
107
+ labels = self._assign_clusters(X, centroids)
108
+
109
+ # Update centroids
110
+ new_centroids = self._update_centroids(X, labels)
111
+
112
+ # Check for convergence
113
+ centroid_shift = np.sum(np.linalg.norm(new_centroids - centroids, axis=1))
114
+ if centroid_shift < self.tol:
115
+ break
116
+
117
+ centroids = new_centroids
118
+ self.n_iter_ = iteration + 1
119
+
120
+ self.cluster_centers_ = centroids
121
+ self.labels_ = labels
122
+ self.inertia_ = self._calculate_inertia(X, labels, centroids)
123
+ self.fitted = True
124
+
125
+ return self
126
+
127
+ def predict(self, X: Union[np.ndarray, list]) -> np.ndarray:
128
+ """
129
+ Predict cluster labels for new data
130
+
131
+ Args:
132
+ X: Data to predict (n_samples, n_features)
133
+
134
+ Returns:
135
+ Cluster labels (n_samples,)
136
+ """
137
+ if not self.fitted:
138
+ raise RuntimeError("Model must be fitted before making predictions")
139
+
140
+ X = np.array(X) if not isinstance(X, np.ndarray) else X
141
+ if X.ndim == 1:
142
+ X = X.reshape(-1, 1)
143
+
144
+ return self._assign_clusters(X, self.cluster_centers_)
145
+
146
+ def fit_predict(self, X: Union[np.ndarray, list]) -> np.ndarray:
147
+ """
148
+ Fit the model and predict cluster labels
149
+
150
+ Args:
151
+ X: Training data (n_samples, n_features)
152
+
153
+ Returns:
154
+ Cluster labels (n_samples,)
155
+ """
156
+ self.fit(X)
157
+ return self.labels_
158
+
159
+ def transform(self, X: Union[np.ndarray, list]) -> np.ndarray:
160
+ """
161
+ Transform data to cluster-distance space
162
+
163
+ Args:
164
+ X: Data to transform (n_samples, n_features)
165
+
166
+ Returns:
167
+ Distances to each cluster center (n_samples, n_clusters)
168
+ """
169
+ if not self.fitted:
170
+ raise RuntimeError("Model must be fitted before transforming")
171
+
172
+ X = np.array(X) if not isinstance(X, np.ndarray) else X
173
+ if X.ndim == 1:
174
+ X = X.reshape(-1, 1)
175
+
176
+ distances = np.zeros((X.shape[0], self.n_clusters))
177
+
178
+ for i, point in enumerate(X):
179
+ for j, centroid in enumerate(self.cluster_centers_):
180
+ distances[i, j] = np.linalg.norm(point - centroid)
181
+
182
+ return distances
183
+
184
+ def score(self, X: Union[np.ndarray, list]) -> float:
185
+ """
186
+ Calculate the negative inertia (higher is better)
187
+
188
+ Args:
189
+ X: Data to score
190
+
191
+ Returns:
192
+ Negative inertia
193
+ """
194
+ labels = self.predict(X)
195
+ inertia = self._calculate_inertia(np.array(X), labels, self.cluster_centers_)
196
+ return -inertia
197
+
198
+
199
+ class DBScan:
200
+ """
201
+ DBSCAN (Density-Based Spatial Clustering) implementation
202
+
203
+ Pure Python implementation for clustering based on density.
204
+ """
205
+
206
+ def __init__(self, eps: float = 0.5, min_samples: int = 5, metric: str = 'euclidean'):
207
+ """
208
+ Initialize DBSCAN clustering
209
+
210
+ Args:
211
+ eps: Maximum distance between two samples to be considered neighbors
212
+ min_samples: Minimum number of samples in a neighborhood for a core point
213
+ metric: Distance metric ('euclidean', 'manhattan')
214
+ """
215
+ self.eps = eps
216
+ self.min_samples = min_samples
217
+ self.metric = metric
218
+
219
+ self.labels_ = None
220
+ self.core_sample_indices_ = None
221
+ self.fitted = False
222
+
223
+ def _distance(self, x1: np.ndarray, x2: np.ndarray) -> float:
224
+ """Calculate distance between two points"""
225
+ if self.metric == 'euclidean':
226
+ return np.linalg.norm(x1 - x2)
227
+ elif self.metric == 'manhattan':
228
+ return np.sum(np.abs(x1 - x2))
229
+ else:
230
+ raise ValueError(f"Unknown metric: {self.metric}")
231
+
232
+ def _get_neighbors(self, X: np.ndarray, point_idx: int) -> List[int]:
233
+ """Get all neighbors within eps distance"""
234
+ neighbors = []
235
+ point = X[point_idx]
236
+
237
+ for i, other_point in enumerate(X):
238
+ if self._distance(point, other_point) <= self.eps:
239
+ neighbors.append(i)
240
+
241
+ return neighbors
242
+
243
+ def fit(self, X: Union[np.ndarray, list]) -> 'DBScan':
244
+ """
245
+ Fit DBSCAN clustering
246
+
247
+ Args:
248
+ X: Training data (n_samples, n_features)
249
+
250
+ Returns:
251
+ Self for method chaining
252
+ """
253
+ X = np.array(X) if not isinstance(X, np.ndarray) else X
254
+ if X.ndim == 1:
255
+ X = X.reshape(-1, 1)
256
+
257
+ n_samples = X.shape[0]
258
+ self.labels_ = np.full(n_samples, -1, dtype=int) # -1 indicates noise
259
+ cluster_id = 0
260
+
261
+ visited = np.zeros(n_samples, dtype=bool)
262
+ self.core_sample_indices_ = []
263
+
264
+ for point_idx in range(n_samples):
265
+ if visited[point_idx]:
266
+ continue
267
+
268
+ visited[point_idx] = True
269
+ neighbors = self._get_neighbors(X, point_idx)
270
+
271
+ if len(neighbors) < self.min_samples:
272
+ # Point is noise (for now)
273
+ continue
274
+
275
+ # Point is a core point
276
+ self.core_sample_indices_.append(point_idx)
277
+ self.labels_[point_idx] = cluster_id
278
+
279
+ # Expand cluster
280
+ seed_set = neighbors.copy()
281
+ i = 0
282
+ while i < len(seed_set):
283
+ neighbor_idx = seed_set[i]
284
+
285
+ if not visited[neighbor_idx]:
286
+ visited[neighbor_idx] = True
287
+ neighbor_neighbors = self._get_neighbors(X, neighbor_idx)
288
+
289
+ if len(neighbor_neighbors) >= self.min_samples:
290
+ # Neighbor is also a core point
291
+ self.core_sample_indices_.append(neighbor_idx)
292
+ seed_set.extend(neighbor_neighbors)
293
+
294
+ if self.labels_[neighbor_idx] == -1: # Not yet assigned to a cluster
295
+ self.labels_[neighbor_idx] = cluster_id
296
+
297
+ i += 1
298
+
299
+ cluster_id += 1
300
+
301
+ self.core_sample_indices_ = np.array(self.core_sample_indices_)
302
+ self.fitted = True
303
+ return self
304
+
305
+ def fit_predict(self, X: Union[np.ndarray, list]) -> np.ndarray:
306
+ """
307
+ Fit the model and return cluster labels
308
+
309
+ Args:
310
+ X: Training data (n_samples, n_features)
311
+
312
+ Returns:
313
+ Cluster labels (n_samples,) - -1 indicates noise
314
+ """
315
+ self.fit(X)
316
+ return self.labels_
317
+
318
+
319
+ class AgglomerativeClustering:
320
+ """
321
+ Agglomerative (Hierarchical) Clustering implementation
322
+
323
+ Pure Python implementation using linkage criteria.
324
+ """
325
+
326
+ def __init__(self, n_clusters: int = 2, linkage: str = 'ward', metric: str = 'euclidean'):
327
+ """
328
+ Initialize Agglomerative Clustering
329
+
330
+ Args:
331
+ n_clusters: Number of clusters to find
332
+ linkage: Linkage criterion ('ward', 'complete', 'average', 'single')
333
+ metric: Distance metric ('euclidean', 'manhattan')
334
+ """
335
+ self.n_clusters = n_clusters
336
+ self.linkage = linkage
337
+ self.metric = metric
338
+
339
+ self.labels_ = None
340
+ self.n_clusters_ = None
341
+ self.fitted = False
342
+
343
+ def _distance(self, x1: np.ndarray, x2: np.ndarray) -> float:
344
+ """Calculate distance between two points"""
345
+ if self.metric == 'euclidean':
346
+ return np.linalg.norm(x1 - x2)
347
+ elif self.metric == 'manhattan':
348
+ return np.sum(np.abs(x1 - x2))
349
+ else:
350
+ raise ValueError(f"Unknown metric: {self.metric}")
351
+
352
+ def _cluster_distance(self, cluster1: List[int], cluster2: List[int], X: np.ndarray) -> float:
353
+ """Calculate distance between two clusters based on linkage criterion"""
354
+ if self.linkage == 'single':
355
+ # Minimum distance between any two points
356
+ min_dist = float('inf')
357
+ for i in cluster1:
358
+ for j in cluster2:
359
+ dist = self._distance(X[i], X[j])
360
+ if dist < min_dist:
361
+ min_dist = dist
362
+ return min_dist
363
+
364
+ elif self.linkage == 'complete':
365
+ # Maximum distance between any two points
366
+ max_dist = 0.0
367
+ for i in cluster1:
368
+ for j in cluster2:
369
+ dist = self._distance(X[i], X[j])
370
+ if dist > max_dist:
371
+ max_dist = dist
372
+ return max_dist
373
+
374
+ elif self.linkage == 'average':
375
+ # Average distance between all pairs of points
376
+ total_dist = 0.0
377
+ count = 0
378
+ for i in cluster1:
379
+ for j in cluster2:
380
+ total_dist += self._distance(X[i], X[j])
381
+ count += 1
382
+ return total_dist / count if count > 0 else 0.0
383
+
384
+ elif self.linkage == 'ward':
385
+ # Ward linkage (minimum increase in within-cluster sum of squares)
386
+ # Calculate centroids
387
+ centroid1 = np.mean(X[cluster1], axis=0)
388
+ centroid2 = np.mean(X[cluster2], axis=0)
389
+
390
+ # Calculate merged centroid
391
+ n1, n2 = len(cluster1), len(cluster2)
392
+ merged_centroid = (n1 * centroid1 + n2 * centroid2) / (n1 + n2)
393
+
394
+ # Calculate increase in sum of squares
395
+ increase = 0.0
396
+ for i in cluster1:
397
+ increase += np.sum((X[i] - merged_centroid) ** 2) - np.sum((X[i] - centroid1) ** 2)
398
+ for j in cluster2:
399
+ increase += np.sum((X[j] - merged_centroid) ** 2) - np.sum((X[j] - centroid2) ** 2)
400
+
401
+ return increase
402
+
403
+ else:
404
+ raise ValueError(f"Unknown linkage: {self.linkage}")
405
+
406
+ def fit(self, X: Union[np.ndarray, list]) -> 'AgglomerativeClustering':
407
+ """
408
+ Fit Agglomerative Clustering
409
+
410
+ Args:
411
+ X: Training data (n_samples, n_features)
412
+
413
+ Returns:
414
+ Self for method chaining
415
+ """
416
+ X = np.array(X) if not isinstance(X, np.ndarray) else X
417
+ if X.ndim == 1:
418
+ X = X.reshape(-1, 1)
419
+
420
+ n_samples = X.shape[0]
421
+
422
+ # Initialize each point as its own cluster
423
+ clusters = [[i] for i in range(n_samples)]
424
+
425
+ # Merge clusters until we have the desired number
426
+ while len(clusters) > self.n_clusters:
427
+ min_dist = float('inf')
428
+ merge_i, merge_j = -1, -1
429
+
430
+ # Find the two closest clusters
431
+ for i in range(len(clusters)):
432
+ for j in range(i + 1, len(clusters)):
433
+ dist = self._cluster_distance(clusters[i], clusters[j], X)
434
+ if dist < min_dist:
435
+ min_dist = dist
436
+ merge_i, merge_j = i, j
437
+
438
+ # Merge the closest clusters
439
+ if merge_i != -1 and merge_j != -1:
440
+ clusters[merge_i].extend(clusters[merge_j])
441
+ clusters.pop(merge_j)
442
+
443
+ # Assign labels
444
+ self.labels_ = np.zeros(n_samples, dtype=int)
445
+ for cluster_id, cluster_points in enumerate(clusters):
446
+ for point_idx in cluster_points:
447
+ self.labels_[point_idx] = cluster_id
448
+
449
+ self.n_clusters_ = len(clusters)
450
+ self.fitted = True
451
+ return self
452
+
453
+ def fit_predict(self, X: Union[np.ndarray, list]) -> np.ndarray:
454
+ """
455
+ Fit the model and return cluster labels
456
+
457
+ Args:
458
+ X: Training data (n_samples, n_features)
459
+
460
+ Returns:
461
+ Cluster labels (n_samples,)
462
+ """
463
+ self.fit(X)
464
+ return self.labels_
465
+
466
+
467
+ class GaussianMixture:
468
+ """
469
+ Gaussian Mixture Model implementation using Expectation-Maximization
470
+
471
+ Pure Python implementation for probabilistic clustering.
472
+ """
473
+
474
+ def __init__(self, n_components: int = 1, max_iter: int = 100, tol: float = 1e-3, random_state: Optional[int] = None):
475
+ """
476
+ Initialize Gaussian Mixture Model
477
+
478
+ Args:
479
+ n_components: Number of mixture components
480
+ max_iter: Maximum number of EM iterations
481
+ tol: Tolerance for convergence
482
+ random_state: Random seed for reproducibility
483
+ """
484
+ self.n_components = n_components
485
+ self.max_iter = max_iter
486
+ self.tol = tol
487
+ self.random_state = random_state
488
+
489
+ self.weights_ = None
490
+ self.means_ = None
491
+ self.covariances_ = None
492
+ self.labels_ = None
493
+ self.fitted = False
494
+
495
+ def _initialize_parameters(self, X: np.ndarray):
496
+ """Initialize GMM parameters"""
497
+ if self.random_state is not None:
498
+ np.random.seed(self.random_state)
499
+
500
+ n_samples, n_features = X.shape
501
+
502
+ # Initialize weights uniformly
503
+ self.weights_ = np.ones(self.n_components) / self.n_components
504
+
505
+ # Initialize means randomly
506
+ self.means_ = np.zeros((self.n_components, n_features))
507
+ for k in range(self.n_components):
508
+ self.means_[k] = X[np.random.randint(0, n_samples)]
509
+
510
+ # Initialize covariances as identity matrices
511
+ self.covariances_ = np.array([np.eye(n_features) for _ in range(self.n_components)])
512
+
513
+ def _multivariate_gaussian(self, X: np.ndarray, mean: np.ndarray, cov: np.ndarray) -> np.ndarray:
514
+ """Calculate multivariate Gaussian probability density"""
515
+ n_features = X.shape[1]
516
+
517
+ # Add small regularization to diagonal for numerical stability
518
+ cov_reg = cov + 1e-6 * np.eye(n_features)
519
+
520
+ try:
521
+ cov_inv = np.linalg.inv(cov_reg)
522
+ cov_det = np.linalg.det(cov_reg)
523
+ except np.linalg.LinAlgError:
524
+ # Fallback to regularized covariance
525
+ cov_reg = np.eye(n_features)
526
+ cov_inv = cov_reg
527
+ cov_det = 1.0
528
+
529
+ if cov_det <= 0:
530
+ cov_det = 1e-6
531
+
532
+ # Calculate probability density
533
+ diff = X - mean
534
+ exponent = -0.5 * np.sum((diff @ cov_inv) * diff, axis=1)
535
+
536
+ normalization = 1.0 / np.sqrt((2 * np.pi) ** n_features * cov_det)
537
+
538
+ return normalization * np.exp(exponent)
539
+
540
+ def fit(self, X: Union[np.ndarray, list]) -> 'GaussianMixture':
541
+ """
542
+ Fit Gaussian Mixture Model using EM algorithm
543
+
544
+ Args:
545
+ X: Training data (n_samples, n_features)
546
+
547
+ Returns:
548
+ Self for method chaining
549
+ """
550
+ X = np.array(X) if not isinstance(X, np.ndarray) else X
551
+ if X.ndim == 1:
552
+ X = X.reshape(-1, 1)
553
+
554
+ n_samples, n_features = X.shape
555
+
556
+ # Initialize parameters
557
+ self._initialize_parameters(X)
558
+
559
+ prev_log_likelihood = -np.inf
560
+
561
+ for iteration in range(self.max_iter):
562
+ # E-step: Calculate responsibilities
563
+ responsibilities = np.zeros((n_samples, self.n_components))
564
+
565
+ for k in range(self.n_components):
566
+ responsibilities[:, k] = self.weights_[k] * self._multivariate_gaussian(
567
+ X, self.means_[k], self.covariances_[k]
568
+ )
569
+
570
+ # Normalize responsibilities
571
+ total_responsibility = np.sum(responsibilities, axis=1, keepdims=True)
572
+ total_responsibility[total_responsibility == 0] = 1e-15 # Avoid division by zero
573
+ responsibilities /= total_responsibility
574
+
575
+ # M-step: Update parameters
576
+ N_k = np.sum(responsibilities, axis=0)
577
+
578
+ # Update weights
579
+ self.weights_ = N_k / n_samples
580
+
581
+ # Update means
582
+ for k in range(self.n_components):
583
+ if N_k[k] > 0:
584
+ self.means_[k] = np.sum(responsibilities[:, k:k+1] * X, axis=0) / N_k[k]
585
+
586
+ # Update covariances
587
+ for k in range(self.n_components):
588
+ if N_k[k] > 0:
589
+ diff = X - self.means_[k]
590
+ weighted_diff = responsibilities[:, k:k+1] * diff
591
+ self.covariances_[k] = (weighted_diff.T @ diff) / N_k[k]
592
+
593
+ # Add regularization
594
+ self.covariances_[k] += 1e-6 * np.eye(n_features)
595
+
596
+ # Check for convergence
597
+ log_likelihood = np.sum(np.log(np.sum(responsibilities, axis=1) + 1e-15))
598
+
599
+ if abs(log_likelihood - prev_log_likelihood) < self.tol:
600
+ break
601
+
602
+ prev_log_likelihood = log_likelihood
603
+
604
+ # Assign labels based on highest responsibility
605
+ self.labels_ = np.argmax(responsibilities, axis=1)
606
+ self.fitted = True
607
+
608
+ return self
609
+
610
+ def predict(self, X: Union[np.ndarray, list]) -> np.ndarray:
611
+ """
612
+ Predict cluster labels for new data
613
+
614
+ Args:
615
+ X: Data to predict (n_samples, n_features)
616
+
617
+ Returns:
618
+ Cluster labels (n_samples,)
619
+ """
620
+ if not self.fitted:
621
+ raise RuntimeError("Model must be fitted before making predictions")
622
+
623
+ X = np.array(X) if not isinstance(X, np.ndarray) else X
624
+ if X.ndim == 1:
625
+ X = X.reshape(-1, 1)
626
+
627
+ n_samples = X.shape[0]
628
+ responsibilities = np.zeros((n_samples, self.n_components))
629
+
630
+ for k in range(self.n_components):
631
+ responsibilities[:, k] = self.weights_[k] * self._multivariate_gaussian(
632
+ X, self.means_[k], self.covariances_[k]
633
+ )
634
+
635
+ return np.argmax(responsibilities, axis=1)
636
+
637
+ def predict_proba(self, X: Union[np.ndarray, list]) -> np.ndarray:
638
+ """
639
+ Predict cluster probabilities for new data
640
+
641
+ Args:
642
+ X: Data to predict (n_samples, n_features)
643
+
644
+ Returns:
645
+ Cluster probabilities (n_samples, n_components)
646
+ """
647
+ if not self.fitted:
648
+ raise RuntimeError("Model must be fitted before making predictions")
649
+
650
+ X = np.array(X) if not isinstance(X, np.ndarray) else X
651
+ if X.ndim == 1:
652
+ X = X.reshape(-1, 1)
653
+
654
+ n_samples = X.shape[0]
655
+ responsibilities = np.zeros((n_samples, self.n_components))
656
+
657
+ for k in range(self.n_components):
658
+ responsibilities[:, k] = self.weights_[k] * self._multivariate_gaussian(
659
+ X, self.means_[k], self.covariances_[k]
660
+ )
661
+
662
+ # Normalize
663
+ total_responsibility = np.sum(responsibilities, axis=1, keepdims=True)
664
+ total_responsibility[total_responsibility == 0] = 1e-15
665
+ responsibilities /= total_responsibility
666
+
667
+ return responsibilities
668
+
669
+ def fit_predict(self, X: Union[np.ndarray, list]) -> np.ndarray:
670
+ """
671
+ Fit the model and return cluster labels
672
+
673
+ Args:
674
+ X: Training data (n_samples, n_features)
675
+
676
+ Returns:
677
+ Cluster labels (n_samples,)
678
+ """
679
+ self.fit(X)
680
+ return self.labels_