createsonline 0.1.26__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (152) hide show
  1. createsonline/__init__.py +46 -0
  2. createsonline/admin/__init__.py +7 -0
  3. createsonline/admin/content.py +526 -0
  4. createsonline/admin/crud.py +805 -0
  5. createsonline/admin/field_builder.py +559 -0
  6. createsonline/admin/integration.py +482 -0
  7. createsonline/admin/interface.py +2562 -0
  8. createsonline/admin/model_creator.py +513 -0
  9. createsonline/admin/model_manager.py +388 -0
  10. createsonline/admin/modern_dashboard.py +498 -0
  11. createsonline/admin/permissions.py +264 -0
  12. createsonline/admin/user_forms.py +594 -0
  13. createsonline/ai/__init__.py +202 -0
  14. createsonline/ai/fields.py +1226 -0
  15. createsonline/ai/orm.py +325 -0
  16. createsonline/ai/services.py +1244 -0
  17. createsonline/app.py +506 -0
  18. createsonline/auth/__init__.py +8 -0
  19. createsonline/auth/management.py +228 -0
  20. createsonline/auth/models.py +552 -0
  21. createsonline/cli/__init__.py +5 -0
  22. createsonline/cli/commands/__init__.py +122 -0
  23. createsonline/cli/commands/database.py +416 -0
  24. createsonline/cli/commands/info.py +173 -0
  25. createsonline/cli/commands/initdb.py +218 -0
  26. createsonline/cli/commands/project.py +545 -0
  27. createsonline/cli/commands/serve.py +173 -0
  28. createsonline/cli/commands/shell.py +93 -0
  29. createsonline/cli/commands/users.py +148 -0
  30. createsonline/cli/main.py +2041 -0
  31. createsonline/cli/manage.py +274 -0
  32. createsonline/config/__init__.py +9 -0
  33. createsonline/config/app.py +2577 -0
  34. createsonline/config/database.py +179 -0
  35. createsonline/config/docs.py +384 -0
  36. createsonline/config/errors.py +160 -0
  37. createsonline/config/orm.py +43 -0
  38. createsonline/config/request.py +93 -0
  39. createsonline/config/settings.py +176 -0
  40. createsonline/data/__init__.py +23 -0
  41. createsonline/data/dataframe.py +925 -0
  42. createsonline/data/io.py +453 -0
  43. createsonline/data/series.py +557 -0
  44. createsonline/database/__init__.py +60 -0
  45. createsonline/database/abstraction.py +440 -0
  46. createsonline/database/assistant.py +585 -0
  47. createsonline/database/fields.py +442 -0
  48. createsonline/database/migrations.py +132 -0
  49. createsonline/database/models.py +604 -0
  50. createsonline/database.py +438 -0
  51. createsonline/http/__init__.py +28 -0
  52. createsonline/http/client.py +535 -0
  53. createsonline/ml/__init__.py +55 -0
  54. createsonline/ml/classification.py +552 -0
  55. createsonline/ml/clustering.py +680 -0
  56. createsonline/ml/metrics.py +542 -0
  57. createsonline/ml/neural.py +560 -0
  58. createsonline/ml/preprocessing.py +784 -0
  59. createsonline/ml/regression.py +501 -0
  60. createsonline/performance/__init__.py +19 -0
  61. createsonline/performance/cache.py +444 -0
  62. createsonline/performance/compression.py +335 -0
  63. createsonline/performance/core.py +419 -0
  64. createsonline/project_init.py +789 -0
  65. createsonline/routing.py +528 -0
  66. createsonline/security/__init__.py +34 -0
  67. createsonline/security/core.py +811 -0
  68. createsonline/security/encryption.py +349 -0
  69. createsonline/server.py +295 -0
  70. createsonline/static/css/admin.css +263 -0
  71. createsonline/static/css/common.css +358 -0
  72. createsonline/static/css/dashboard.css +89 -0
  73. createsonline/static/favicon.ico +0 -0
  74. createsonline/static/icons/icon-128x128.png +0 -0
  75. createsonline/static/icons/icon-128x128.webp +0 -0
  76. createsonline/static/icons/icon-16x16.png +0 -0
  77. createsonline/static/icons/icon-16x16.webp +0 -0
  78. createsonline/static/icons/icon-180x180.png +0 -0
  79. createsonline/static/icons/icon-180x180.webp +0 -0
  80. createsonline/static/icons/icon-192x192.png +0 -0
  81. createsonline/static/icons/icon-192x192.webp +0 -0
  82. createsonline/static/icons/icon-256x256.png +0 -0
  83. createsonline/static/icons/icon-256x256.webp +0 -0
  84. createsonline/static/icons/icon-32x32.png +0 -0
  85. createsonline/static/icons/icon-32x32.webp +0 -0
  86. createsonline/static/icons/icon-384x384.png +0 -0
  87. createsonline/static/icons/icon-384x384.webp +0 -0
  88. createsonline/static/icons/icon-48x48.png +0 -0
  89. createsonline/static/icons/icon-48x48.webp +0 -0
  90. createsonline/static/icons/icon-512x512.png +0 -0
  91. createsonline/static/icons/icon-512x512.webp +0 -0
  92. createsonline/static/icons/icon-64x64.png +0 -0
  93. createsonline/static/icons/icon-64x64.webp +0 -0
  94. createsonline/static/image/android-chrome-192x192.png +0 -0
  95. createsonline/static/image/android-chrome-512x512.png +0 -0
  96. createsonline/static/image/apple-touch-icon.png +0 -0
  97. createsonline/static/image/favicon-16x16.png +0 -0
  98. createsonline/static/image/favicon-32x32.png +0 -0
  99. createsonline/static/image/favicon.ico +0 -0
  100. createsonline/static/image/favicon.svg +17 -0
  101. createsonline/static/image/icon-128x128.png +0 -0
  102. createsonline/static/image/icon-128x128.webp +0 -0
  103. createsonline/static/image/icon-16x16.png +0 -0
  104. createsonline/static/image/icon-16x16.webp +0 -0
  105. createsonline/static/image/icon-180x180.png +0 -0
  106. createsonline/static/image/icon-180x180.webp +0 -0
  107. createsonline/static/image/icon-192x192.png +0 -0
  108. createsonline/static/image/icon-192x192.webp +0 -0
  109. createsonline/static/image/icon-256x256.png +0 -0
  110. createsonline/static/image/icon-256x256.webp +0 -0
  111. createsonline/static/image/icon-32x32.png +0 -0
  112. createsonline/static/image/icon-32x32.webp +0 -0
  113. createsonline/static/image/icon-384x384.png +0 -0
  114. createsonline/static/image/icon-384x384.webp +0 -0
  115. createsonline/static/image/icon-48x48.png +0 -0
  116. createsonline/static/image/icon-48x48.webp +0 -0
  117. createsonline/static/image/icon-512x512.png +0 -0
  118. createsonline/static/image/icon-512x512.webp +0 -0
  119. createsonline/static/image/icon-64x64.png +0 -0
  120. createsonline/static/image/icon-64x64.webp +0 -0
  121. createsonline/static/image/logo-header-h100.png +0 -0
  122. createsonline/static/image/logo-header-h100.webp +0 -0
  123. createsonline/static/image/logo-header-h200@2x.png +0 -0
  124. createsonline/static/image/logo-header-h200@2x.webp +0 -0
  125. createsonline/static/image/logo.png +0 -0
  126. createsonline/static/js/admin.js +274 -0
  127. createsonline/static/site.webmanifest +35 -0
  128. createsonline/static/templates/admin/base.html +87 -0
  129. createsonline/static/templates/admin/dashboard.html +217 -0
  130. createsonline/static/templates/admin/model_form.html +270 -0
  131. createsonline/static/templates/admin/model_list.html +202 -0
  132. createsonline/static/test_script.js +15 -0
  133. createsonline/static/test_styles.css +59 -0
  134. createsonline/static_files.py +365 -0
  135. createsonline/templates/404.html +100 -0
  136. createsonline/templates/admin_login.html +169 -0
  137. createsonline/templates/base.html +102 -0
  138. createsonline/templates/index.html +151 -0
  139. createsonline/templates.py +205 -0
  140. createsonline/testing.py +322 -0
  141. createsonline/utils.py +448 -0
  142. createsonline/validation/__init__.py +49 -0
  143. createsonline/validation/fields.py +598 -0
  144. createsonline/validation/models.py +504 -0
  145. createsonline/validation/validators.py +561 -0
  146. createsonline/views.py +184 -0
  147. createsonline-0.1.26.dist-info/METADATA +46 -0
  148. createsonline-0.1.26.dist-info/RECORD +152 -0
  149. createsonline-0.1.26.dist-info/WHEEL +5 -0
  150. createsonline-0.1.26.dist-info/entry_points.txt +2 -0
  151. createsonline-0.1.26.dist-info/licenses/LICENSE +21 -0
  152. createsonline-0.1.26.dist-info/top_level.txt +1 -0
@@ -0,0 +1,784 @@
1
+ """
2
+ CREATESONLINE ML Preprocessing
3
+
4
+ Pure Python preprocessing utilities.
5
+ """
6
+
7
+ import numpy as np
8
+ from typing import Union, List, Optional, Tuple, Dict
9
+ import random
10
+ import math
11
+
12
+
13
+ class StandardScaler:
14
+ """
15
+ Standardize features by removing mean and scaling to unit variance
16
+
17
+ Pure Python implementation with numpy.
18
+ """
19
+
20
+ def __init__(self):
21
+ """Initialize StandardScaler"""
22
+ self.mean_ = None
23
+ self.scale_ = None
24
+ self.var_ = None
25
+ self.fitted = False
26
+
27
+ def fit(self, X: Union[np.ndarray, list]) -> 'StandardScaler':
28
+ """
29
+ Compute the mean and std to be used for later scaling
30
+
31
+ Args:
32
+ X: Training data (n_samples, n_features)
33
+
34
+ Returns:
35
+ Self for method chaining
36
+ """
37
+ X = np.array(X) if not isinstance(X, np.ndarray) else X
38
+ if X.ndim == 1:
39
+ X = X.reshape(-1, 1)
40
+
41
+ self.mean_ = np.mean(X, axis=0)
42
+ self.var_ = np.var(X, axis=0)
43
+ self.scale_ = np.sqrt(self.var_)
44
+
45
+ # Handle zero variance features
46
+ self.scale_[self.scale_ == 0] = 1.0
47
+
48
+ self.fitted = True
49
+ return self
50
+
51
+ def transform(self, X: Union[np.ndarray, list]) -> np.ndarray:
52
+ """
53
+ Standardize the data
54
+
55
+ Args:
56
+ X: Data to transform (n_samples, n_features)
57
+
58
+ Returns:
59
+ Transformed data
60
+ """
61
+ if not self.fitted:
62
+ raise RuntimeError("Scaler must be fitted before transforming")
63
+
64
+ X = np.array(X) if not isinstance(X, np.ndarray) else X
65
+ if X.ndim == 1:
66
+ X = X.reshape(-1, 1)
67
+
68
+ return (X - self.mean_) / self.scale_
69
+
70
+ def fit_transform(self, X: Union[np.ndarray, list]) -> np.ndarray:
71
+ """
72
+ Fit to data, then transform it
73
+
74
+ Args:
75
+ X: Data to fit and transform
76
+
77
+ Returns:
78
+ Transformed data
79
+ """
80
+ return self.fit(X).transform(X)
81
+
82
+ def inverse_transform(self, X: Union[np.ndarray, list]) -> np.ndarray:
83
+ """
84
+ Scale back the data to the original representation
85
+
86
+ Args:
87
+ X: Transformed data
88
+
89
+ Returns:
90
+ Original scale data
91
+ """
92
+ if not self.fitted:
93
+ raise RuntimeError("Scaler must be fitted before inverse transforming")
94
+
95
+ X = np.array(X) if not isinstance(X, np.ndarray) else X
96
+ if X.ndim == 1:
97
+ X = X.reshape(-1, 1)
98
+
99
+ return X * self.scale_ + self.mean_
100
+
101
+
102
+ class MinMaxScaler:
103
+ """
104
+ Scale features to a given range (default 0-1)
105
+
106
+ Pure Python implementation with numpy.
107
+ """
108
+
109
+ def __init__(self, feature_range: Tuple[float, float] = (0, 1)):
110
+ """
111
+ Initialize MinMaxScaler
112
+
113
+ Args:
114
+ feature_range: Desired range of transformed data
115
+ """
116
+ self.feature_range = feature_range
117
+ self.min_ = None
118
+ self.scale_ = None
119
+ self.data_min_ = None
120
+ self.data_max_ = None
121
+ self.data_range_ = None
122
+ self.fitted = False
123
+
124
+ def fit(self, X: Union[np.ndarray, list]) -> 'MinMaxScaler':
125
+ """
126
+ Compute the minimum and maximum to be used for later scaling
127
+
128
+ Args:
129
+ X: Training data (n_samples, n_features)
130
+
131
+ Returns:
132
+ Self for method chaining
133
+ """
134
+ X = np.array(X) if not isinstance(X, np.ndarray) else X
135
+ if X.ndim == 1:
136
+ X = X.reshape(-1, 1)
137
+
138
+ self.data_min_ = np.min(X, axis=0)
139
+ self.data_max_ = np.max(X, axis=0)
140
+ self.data_range_ = self.data_max_ - self.data_min_
141
+
142
+ # Handle constant features
143
+ self.data_range_[self.data_range_ == 0] = 1.0
144
+
145
+ feature_range_min, feature_range_max = self.feature_range
146
+ self.scale_ = (feature_range_max - feature_range_min) / self.data_range_
147
+ self.min_ = feature_range_min - self.data_min_ * self.scale_
148
+
149
+ self.fitted = True
150
+ return self
151
+
152
+ def transform(self, X: Union[np.ndarray, list]) -> np.ndarray:
153
+ """
154
+ Scale the data to the specified range
155
+
156
+ Args:
157
+ X: Data to transform (n_samples, n_features)
158
+
159
+ Returns:
160
+ Transformed data
161
+ """
162
+ if not self.fitted:
163
+ raise RuntimeError("Scaler must be fitted before transforming")
164
+
165
+ X = np.array(X) if not isinstance(X, np.ndarray) else X
166
+ if X.ndim == 1:
167
+ X = X.reshape(-1, 1)
168
+
169
+ return X * self.scale_ + self.min_
170
+
171
+ def fit_transform(self, X: Union[np.ndarray, list]) -> np.ndarray:
172
+ """
173
+ Fit to data, then transform it
174
+
175
+ Args:
176
+ X: Data to fit and transform
177
+
178
+ Returns:
179
+ Transformed data
180
+ """
181
+ return self.fit(X).transform(X)
182
+
183
+ def inverse_transform(self, X: Union[np.ndarray, list]) -> np.ndarray:
184
+ """
185
+ Undo the scaling of the data
186
+
187
+ Args:
188
+ X: Transformed data
189
+
190
+ Returns:
191
+ Original scale data
192
+ """
193
+ if not self.fitted:
194
+ raise RuntimeError("Scaler must be fitted before inverse transforming")
195
+
196
+ X = np.array(X) if not isinstance(X, np.ndarray) else X
197
+ if X.ndim == 1:
198
+ X = X.reshape(-1, 1)
199
+
200
+ return (X - self.min_) / self.scale_
201
+
202
+
203
+ class LabelEncoder:
204
+ """
205
+ Encode categorical labels as integers
206
+
207
+ Pure Python implementation.
208
+ """
209
+
210
+ def __init__(self):
211
+ """Initialize LabelEncoder"""
212
+ self.classes_ = None
213
+ self.class_to_index_ = None
214
+ self.fitted = False
215
+
216
+ def fit(self, y: Union[np.ndarray, list]) -> 'LabelEncoder':
217
+ """
218
+ Fit label encoder
219
+
220
+ Args:
221
+ y: Target values
222
+
223
+ Returns:
224
+ Self for method chaining
225
+ """
226
+ y = np.array(y) if not isinstance(y, np.ndarray) else y
227
+
228
+ self.classes_ = np.unique(y)
229
+ self.class_to_index_ = {cls: i for i, cls in enumerate(self.classes_)}
230
+
231
+ self.fitted = True
232
+ return self
233
+
234
+ def transform(self, y: Union[np.ndarray, list]) -> np.ndarray:
235
+ """
236
+ Transform labels to normalized encoding
237
+
238
+ Args:
239
+ y: Target values
240
+
241
+ Returns:
242
+ Encoded labels
243
+ """
244
+ if not self.fitted:
245
+ raise RuntimeError("LabelEncoder must be fitted before transforming")
246
+
247
+ y = np.array(y) if not isinstance(y, np.ndarray) else y
248
+
249
+ encoded = np.zeros(len(y), dtype=int)
250
+ for i, label in enumerate(y):
251
+ if label in self.class_to_index_:
252
+ encoded[i] = self.class_to_index_[label]
253
+ else:
254
+ raise ValueError(f"Unseen label: {label}")
255
+
256
+ return encoded
257
+
258
+ def fit_transform(self, y: Union[np.ndarray, list]) -> np.ndarray:
259
+ """
260
+ Fit label encoder and return encoded labels
261
+
262
+ Args:
263
+ y: Target values
264
+
265
+ Returns:
266
+ Encoded labels
267
+ """
268
+ return self.fit(y).transform(y)
269
+
270
+ def inverse_transform(self, y: Union[np.ndarray, list]) -> np.ndarray:
271
+ """
272
+ Transform labels back to original encoding
273
+
274
+ Args:
275
+ y: Encoded labels
276
+
277
+ Returns:
278
+ Original labels
279
+ """
280
+ if not self.fitted:
281
+ raise RuntimeError("LabelEncoder must be fitted before inverse transforming")
282
+
283
+ y = np.array(y) if not isinstance(y, np.ndarray) else y
284
+
285
+ original = np.zeros(len(y), dtype=object)
286
+ for i, encoded_label in enumerate(y):
287
+ if 0 <= encoded_label < len(self.classes_):
288
+ original[i] = self.classes_[encoded_label]
289
+ else:
290
+ raise ValueError(f"Invalid encoded label: {encoded_label}")
291
+
292
+ return original
293
+
294
+
295
+ class OneHotEncoder:
296
+ """
297
+ Encode categorical features as one-hot numeric array
298
+
299
+ Pure Python implementation.
300
+ """
301
+
302
+ def __init__(self, drop_first: bool = False):
303
+ """
304
+ Initialize OneHotEncoder
305
+
306
+ Args:
307
+ drop_first: Whether to drop the first category to avoid multicollinearity
308
+ """
309
+ self.drop_first = drop_first
310
+ self.categories_ = None
311
+ self.n_features_out_ = None
312
+ self.fitted = False
313
+
314
+ def fit(self, X: Union[np.ndarray, list]) -> 'OneHotEncoder':
315
+ """
316
+ Fit OneHotEncoder to X
317
+
318
+ Args:
319
+ X: Categorical data (n_samples, n_features)
320
+
321
+ Returns:
322
+ Self for method chaining
323
+ """
324
+ X = np.array(X) if not isinstance(X, np.ndarray) else X
325
+ if X.ndim == 1:
326
+ X = X.reshape(-1, 1)
327
+
328
+ n_features = X.shape[1]
329
+ self.categories_ = []
330
+
331
+ for feature_idx in range(n_features):
332
+ unique_categories = np.unique(X[:, feature_idx])
333
+ self.categories_.append(unique_categories)
334
+
335
+ # Calculate output size
336
+ self.n_features_out_ = sum(
337
+ len(cats) - (1 if self.drop_first else 0)
338
+ for cats in self.categories_
339
+ )
340
+
341
+ self.fitted = True
342
+ return self
343
+
344
+ def transform(self, X: Union[np.ndarray, list]) -> np.ndarray:
345
+ """
346
+ Transform X using one-hot encoding
347
+
348
+ Args:
349
+ X: Categorical data (n_samples, n_features)
350
+
351
+ Returns:
352
+ One-hot encoded data
353
+ """
354
+ if not self.fitted:
355
+ raise RuntimeError("OneHotEncoder must be fitted before transforming")
356
+
357
+ X = np.array(X) if not isinstance(X, np.ndarray) else X
358
+ if X.ndim == 1:
359
+ X = X.reshape(-1, 1)
360
+
361
+ n_samples, n_features = X.shape
362
+ encoded = np.zeros((n_samples, self.n_features_out_))
363
+
364
+ col_idx = 0
365
+ for feature_idx in range(n_features):
366
+ categories = self.categories_[feature_idx]
367
+ start_idx = 1 if self.drop_first else 0
368
+
369
+ for cat_idx in range(start_idx, len(categories)):
370
+ category = categories[cat_idx]
371
+ mask = X[:, feature_idx] == category
372
+ encoded[mask, col_idx] = 1
373
+ col_idx += 1
374
+
375
+ return encoded
376
+
377
+ def fit_transform(self, X: Union[np.ndarray, list]) -> np.ndarray:
378
+ """
379
+ Fit OneHotEncoder to X, then transform X
380
+
381
+ Args:
382
+ X: Categorical data
383
+
384
+ Returns:
385
+ One-hot encoded data
386
+ """
387
+ return self.fit(X).transform(X)
388
+
389
+
390
+ class PolynomialFeatures:
391
+ """
392
+ Generate polynomial and interaction features
393
+
394
+ Pure Python implementation.
395
+ """
396
+
397
+ def __init__(self, degree: int = 2, include_bias: bool = True, interaction_only: bool = False):
398
+ """
399
+ Initialize PolynomialFeatures
400
+
401
+ Args:
402
+ degree: Maximum degree of polynomial features
403
+ include_bias: Whether to include bias column (all ones)
404
+ interaction_only: Whether to produce interaction features only
405
+ """
406
+ self.degree = degree
407
+ self.include_bias = include_bias
408
+ self.interaction_only = interaction_only
409
+ self.n_input_features_ = None
410
+ self.n_output_features_ = None
411
+ self.fitted = False
412
+
413
+ def fit(self, X: Union[np.ndarray, list]) -> 'PolynomialFeatures':
414
+ """
415
+ Compute number of output features
416
+
417
+ Args:
418
+ X: Input data (n_samples, n_features)
419
+
420
+ Returns:
421
+ Self for method chaining
422
+ """
423
+ X = np.array(X) if not isinstance(X, np.ndarray) else X
424
+ if X.ndim == 1:
425
+ X = X.reshape(-1, 1)
426
+
427
+ self.n_input_features_ = X.shape[1]
428
+
429
+ # Calculate number of output features
430
+ if self.interaction_only:
431
+ # Only interaction terms
432
+ self.n_output_features_ = 1 # bias
433
+ for d in range(2, self.degree + 1):
434
+ self.n_output_features_ += math.comb(self.n_input_features_, d)
435
+ if not self.include_bias:
436
+ self.n_output_features_ -= 1
437
+ else:
438
+ # All polynomial terms
439
+ self.n_output_features_ = math.comb(self.n_input_features_ + self.degree, self.degree)
440
+ if not self.include_bias:
441
+ self.n_output_features_ -= 1
442
+
443
+ self.fitted = True
444
+ return self
445
+
446
+ def transform(self, X: Union[np.ndarray, list]) -> np.ndarray:
447
+ """
448
+ Transform data to polynomial features
449
+
450
+ Args:
451
+ X: Input data (n_samples, n_features)
452
+
453
+ Returns:
454
+ Polynomial features
455
+ """
456
+ if not self.fitted:
457
+ raise RuntimeError("PolynomialFeatures must be fitted before transforming")
458
+
459
+ X = np.array(X) if not isinstance(X, np.ndarray) else X
460
+ if X.ndim == 1:
461
+ X = X.reshape(-1, 1)
462
+
463
+ n_samples, n_features = X.shape
464
+
465
+ # For simplicity, implement basic polynomial features
466
+ # This is a simplified version - full implementation would handle all combinations
467
+ features = []
468
+
469
+ if self.include_bias:
470
+ features.append(np.ones(n_samples))
471
+
472
+ # Original features
473
+ if not self.interaction_only:
474
+ for i in range(n_features):
475
+ features.append(X[:, i])
476
+
477
+ # Polynomial features
478
+ for degree in range(2, self.degree + 1):
479
+ if self.interaction_only:
480
+ # Only cross terms
481
+ for i in range(n_features):
482
+ for j in range(i + 1, n_features):
483
+ features.append(X[:, i] * X[:, j])
484
+ else:
485
+ # All polynomial terms
486
+ for i in range(n_features):
487
+ features.append(X[:, i] ** degree)
488
+
489
+ # Cross terms
490
+ if degree == 2: # Only implement degree 2 cross terms for simplicity
491
+ for i in range(n_features):
492
+ for j in range(i + 1, n_features):
493
+ features.append(X[:, i] * X[:, j])
494
+
495
+ return np.column_stack(features)
496
+
497
+ def fit_transform(self, X: Union[np.ndarray, list]) -> np.ndarray:
498
+ """
499
+ Fit to data, then transform it
500
+
501
+ Args:
502
+ X: Input data
503
+
504
+ Returns:
505
+ Polynomial features
506
+ """
507
+ return self.fit(X).transform(X)
508
+
509
+
510
+ # Data splitting functions
511
+
512
+ def train_test_split(
513
+ *arrays,
514
+ test_size: Union[float, int] = 0.25,
515
+ train_size: Optional[Union[float, int]] = None,
516
+ random_state: Optional[int] = None,
517
+ shuffle: bool = True,
518
+ stratify: Optional[Union[np.ndarray, list]] = None
519
+ ) -> List[np.ndarray]:
520
+ """
521
+ Split arrays into random train and test subsets
522
+
523
+ Args:
524
+ *arrays: Sequence of indexables with same length
525
+ test_size: Proportion or absolute number of test samples
526
+ train_size: Proportion or absolute number of train samples
527
+ random_state: Random seed for reproducibility
528
+ shuffle: Whether to shuffle data before splitting
529
+ stratify: Array for stratified splitting
530
+
531
+ Returns:
532
+ List of train-test splits of inputs
533
+ """
534
+ if len(arrays) == 0:
535
+ raise ValueError("At least one array required as input")
536
+
537
+ # Convert to numpy arrays
538
+ arrays = [np.array(arr) if not isinstance(arr, np.ndarray) else arr for arr in arrays]
539
+
540
+ # Check that all arrays have the same length
541
+ n_samples = len(arrays[0])
542
+ for arr in arrays[1:]:
543
+ if len(arr) != n_samples:
544
+ raise ValueError("All arrays must have the same length")
545
+
546
+ if random_state is not None:
547
+ random.seed(random_state)
548
+ np.random.seed(random_state)
549
+
550
+ # Calculate split sizes
551
+ if isinstance(test_size, float):
552
+ test_size = int(n_samples * test_size)
553
+
554
+ if train_size is not None:
555
+ if isinstance(train_size, float):
556
+ train_size = int(n_samples * train_size)
557
+ if train_size + test_size > n_samples:
558
+ raise ValueError("train_size + test_size exceeds total samples")
559
+ else:
560
+ train_size = n_samples - test_size
561
+
562
+ # Create indices
563
+ indices = list(range(n_samples))
564
+
565
+ if stratify is not None:
566
+ # Stratified split
567
+ stratify = np.array(stratify) if not isinstance(stratify, np.ndarray) else stratify
568
+ unique_classes = np.unique(stratify)
569
+
570
+ train_indices = []
571
+ test_indices = []
572
+
573
+ for cls in unique_classes:
574
+ cls_indices = [i for i in indices if stratify[i] == cls]
575
+ if shuffle:
576
+ random.shuffle(cls_indices)
577
+
578
+ cls_test_size = int(len(cls_indices) * (test_size / n_samples))
579
+ cls_train_size = len(cls_indices) - cls_test_size
580
+
581
+ test_indices.extend(cls_indices[:cls_test_size])
582
+ train_indices.extend(cls_indices[cls_test_size:cls_test_size + cls_train_size])
583
+
584
+ if shuffle:
585
+ random.shuffle(train_indices)
586
+ random.shuffle(test_indices)
587
+
588
+ else:
589
+ # Regular split
590
+ if shuffle:
591
+ random.shuffle(indices)
592
+
593
+ test_indices = indices[:test_size]
594
+ train_indices = indices[test_size:test_size + train_size]
595
+
596
+ # Split arrays
597
+ result = []
598
+ for arr in arrays:
599
+ train_arr = arr[train_indices]
600
+ test_arr = arr[test_indices]
601
+ result.extend([train_arr, test_arr])
602
+
603
+ return result
604
+
605
+
606
+ def cross_validate(
607
+ estimator,
608
+ X: Union[np.ndarray, list],
609
+ y: Union[np.ndarray, list],
610
+ cv: int = 5,
611
+ scoring: str = 'accuracy',
612
+ random_state: Optional[int] = None
613
+ ) -> Dict[str, np.ndarray]:
614
+ """
615
+ Evaluate metric(s) by cross-validation
616
+
617
+ Args:
618
+ estimator: ML estimator object
619
+ X: Features
620
+ y: Target
621
+ cv: Number of folds
622
+ scoring: Scoring metric ('accuracy', 'precision', 'recall', 'f1', 'mse', 'r2')
623
+ random_state: Random seed for reproducibility
624
+
625
+ Returns:
626
+ Dictionary with test scores
627
+ """
628
+ X = np.array(X) if not isinstance(X, np.ndarray) else X
629
+ y = np.array(y) if not isinstance(y, np.ndarray) else y
630
+
631
+ if X.ndim == 1:
632
+ X = X.reshape(-1, 1)
633
+
634
+ n_samples = len(X)
635
+ if n_samples != len(y):
636
+ raise ValueError("X and y must have the same length")
637
+
638
+ if random_state is not None:
639
+ random.seed(random_state)
640
+
641
+ # Create folds
642
+ indices = list(range(n_samples))
643
+ random.shuffle(indices)
644
+
645
+ fold_size = n_samples // cv
646
+ folds = []
647
+
648
+ for i in range(cv):
649
+ start = i * fold_size
650
+ end = start + fold_size if i < cv - 1 else n_samples
651
+ test_indices = indices[start:end]
652
+ train_indices = [idx for idx in indices if idx not in test_indices]
653
+ folds.append((train_indices, test_indices))
654
+
655
+ # Evaluate each fold
656
+ scores = []
657
+
658
+ for train_indices, test_indices in folds:
659
+ X_train, X_test = X[train_indices], X[test_indices]
660
+ y_train, y_test = y[train_indices], y[test_indices]
661
+
662
+ # Clone estimator (simple copy for basic estimators)
663
+ fold_estimator = type(estimator)(**estimator.__dict__)
664
+
665
+ # Fit and predict
666
+ fold_estimator.fit(X_train, y_train)
667
+ y_pred = fold_estimator.predict(X_test)
668
+
669
+ # Calculate score
670
+ if scoring == 'accuracy':
671
+ from .metrics import accuracy_score
672
+ score = accuracy_score(y_test, y_pred)
673
+ elif scoring == 'precision':
674
+ from .metrics import precision_score
675
+ score = precision_score(y_test, y_pred, average='weighted')
676
+ elif scoring == 'recall':
677
+ from .metrics import recall_score
678
+ score = recall_score(y_test, y_pred, average='weighted')
679
+ elif scoring == 'f1':
680
+ from .metrics import f1_score
681
+ score = f1_score(y_test, y_pred, average='weighted')
682
+ elif scoring == 'mse':
683
+ from .metrics import mean_squared_error
684
+ score = mean_squared_error(y_test, y_pred)
685
+ elif scoring == 'r2':
686
+ from .metrics import r2_score
687
+ score = r2_score(y_test, y_pred)
688
+ else:
689
+ raise ValueError(f"Unknown scoring: {scoring}")
690
+
691
+ scores.append(score)
692
+
693
+ return {'test_score': np.array(scores)}
694
+
695
+
696
+ # Feature selection utilities
697
+
698
+ def select_k_best_features(
699
+ X: Union[np.ndarray, list],
700
+ y: Union[np.ndarray, list],
701
+ k: int = 10,
702
+ score_func: str = 'f_classif'
703
+ ) -> Tuple[np.ndarray, np.ndarray]:
704
+ """
705
+ Select k best features based on statistical tests
706
+
707
+ Args:
708
+ X: Features
709
+ y: Target
710
+ k: Number of features to select
711
+ score_func: Scoring function ('f_classif', 'mutual_info')
712
+
713
+ Returns:
714
+ Tuple of (selected_features, feature_indices)
715
+ """
716
+ X = np.array(X) if not isinstance(X, np.ndarray) else X
717
+ y = np.array(y) if not isinstance(y, np.ndarray) else y
718
+
719
+ if X.ndim == 1:
720
+ X = X.reshape(-1, 1)
721
+
722
+ n_features = X.shape[1]
723
+ k = min(k, n_features)
724
+
725
+ if score_func == 'f_classif':
726
+ # Simple F-test for classification
727
+ scores = []
728
+ for feature_idx in range(n_features):
729
+ feature = X[:, feature_idx]
730
+
731
+ # Calculate F-statistic (simplified)
732
+ classes = np.unique(y)
733
+ between_class_var = 0
734
+ within_class_var = 0
735
+ overall_mean = np.mean(feature)
736
+
737
+ for cls in classes:
738
+ class_data = feature[y == cls]
739
+ class_mean = np.mean(class_data)
740
+ class_size = len(class_data)
741
+
742
+ between_class_var += class_size * (class_mean - overall_mean) ** 2
743
+ within_class_var += np.sum((class_data - class_mean) ** 2)
744
+
745
+ between_class_var /= (len(classes) - 1)
746
+ within_class_var /= (len(y) - len(classes))
747
+
748
+ f_stat = between_class_var / within_class_var if within_class_var > 0 else 0
749
+ scores.append(f_stat)
750
+
751
+ elif score_func == 'mutual_info':
752
+ # Simplified mutual information
753
+ scores = []
754
+ for feature_idx in range(n_features):
755
+ feature = X[:, feature_idx]
756
+
757
+ # Discretize continuous features (simple binning)
758
+ if len(np.unique(feature)) > 10: # Assume continuous
759
+ bins = np.linspace(np.min(feature), np.max(feature), 5)
760
+ feature_binned = np.digitize(feature, bins)
761
+ else:
762
+ feature_binned = feature
763
+
764
+ # Calculate mutual information (simplified)
765
+ mi = 0.0
766
+ for f_val in np.unique(feature_binned):
767
+ for y_val in np.unique(y):
768
+ p_xy = np.mean((feature_binned == f_val) & (y == y_val))
769
+ p_x = np.mean(feature_binned == f_val)
770
+ p_y = np.mean(y == y_val)
771
+
772
+ if p_xy > 0 and p_x > 0 and p_y > 0:
773
+ mi += p_xy * np.log(p_xy / (p_x * p_y))
774
+
775
+ scores.append(mi)
776
+
777
+ else:
778
+ raise ValueError(f"Unknown score_func: {score_func}")
779
+
780
+ # Select k best features
781
+ feature_indices = np.argsort(scores)[-k:]
782
+ selected_features = X[:, feature_indices]
783
+
784
+ return selected_features, feature_indices