datly 0.0.1 → 0.0.2

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -0,0 +1,490 @@
1
+ import LinearRegression from './LinearRegression.js';
2
+ import LogisticRegression from './LogisticRegression.js';
3
+ import KNearestNeighbors from './KNearestNeighbors.js';
4
+ import DecisionTree from './DecisionTree.js';
5
+ import RandomForest from './RandomForest.js';
6
+ import NaiveBayes from './NaiveBayes.js';
7
+ import SupportVectorMachine from './SupportVectorMachine.js';
8
+
9
+ class MachineLearning {
10
+ constructor() {
11
+ // Models are instantiated on demand
12
+ }
13
+
14
+ // ====== Regression Models ======
15
+ createLinearRegression(options = {}) {
16
+ const {
17
+ learningRate = 0.01,
18
+ iterations = 1000,
19
+ regularization = null,
20
+ lambda = 0.01
21
+ } = options;
22
+
23
+ return new LinearRegression(learningRate, iterations, regularization, lambda);
24
+ }
25
+
26
+ // ====== Classification Models ======
27
+ createLogisticRegression(options = {}) {
28
+ const {
29
+ learningRate = 0.01,
30
+ iterations = 1000,
31
+ regularization = null,
32
+ lambda = 0.01
33
+ } = options;
34
+
35
+ return new LogisticRegression(learningRate, iterations, regularization, lambda);
36
+ }
37
+
38
+ createKNN(options = {}) {
39
+ const {
40
+ k = 5,
41
+ metric = 'euclidean',
42
+ weights = 'uniform'
43
+ } = options;
44
+
45
+ return new KNearestNeighbors(k, metric, weights);
46
+ }
47
+
48
+ createDecisionTree(options = {}) {
49
+ const {
50
+ maxDepth = 10,
51
+ minSamplesSplit = 2,
52
+ minSamplesLeaf = 1,
53
+ criterion = 'gini'
54
+ } = options;
55
+
56
+ return new DecisionTree(maxDepth, minSamplesSplit, minSamplesLeaf, criterion);
57
+ }
58
+
59
+ createRandomForest(options = {}) {
60
+ const {
61
+ nEstimators = 100,
62
+ maxDepth = 10,
63
+ minSamplesSplit = 2,
64
+ minSamplesLeaf = 1,
65
+ maxFeatures = 'sqrt',
66
+ criterion = 'gini',
67
+ bootstrap = true
68
+ } = options;
69
+
70
+ return new RandomForest(
71
+ nEstimators,
72
+ maxDepth,
73
+ minSamplesSplit,
74
+ minSamplesLeaf,
75
+ maxFeatures,
76
+ criterion,
77
+ bootstrap
78
+ );
79
+ }
80
+
81
+ createNaiveBayes(options = {}) {
82
+ const { type = 'gaussian' } = options;
83
+ return new NaiveBayes(type);
84
+ }
85
+
86
+ createSVM(options = {}) {
87
+ const {
88
+ C = 1.0,
89
+ kernel = 'linear',
90
+ gamma = 'scale',
91
+ degree = 3,
92
+ learningRate = 0.001,
93
+ iterations = 1000
94
+ } = options;
95
+
96
+ return new SupportVectorMachine(C, kernel, gamma, degree, learningRate, iterations);
97
+ }
98
+
99
+ // ====== Model Evaluation Utilities ======
100
+ crossValidate(model, X, y, folds = 5, taskType = 'classification') {
101
+ const n = X.length;
102
+ const foldSize = Math.floor(n / folds);
103
+ const indices = Array.from({ length: n }, (_, i) => i);
104
+
105
+ // Shuffle indices
106
+ for (let i = n - 1; i > 0; i--) {
107
+ const j = Math.floor(Math.random() * (i + 1));
108
+ [indices[i], indices[j]] = [indices[j], indices[i]];
109
+ }
110
+
111
+ const scores = [];
112
+
113
+ for (let fold = 0; fold < folds; fold++) {
114
+ const testStart = fold * foldSize;
115
+ const testEnd = fold === folds - 1 ? n : testStart + foldSize;
116
+
117
+ const testIndices = indices.slice(testStart, testEnd);
118
+ const trainIndices = [...indices.slice(0, testStart), ...indices.slice(testEnd)];
119
+
120
+ const X_train = trainIndices.map(i => X[i]);
121
+ const y_train = trainIndices.map(i => y[i]);
122
+ const X_test = testIndices.map(i => X[i]);
123
+ const y_test = testIndices.map(i => y[i]);
124
+
125
+ // Create a new instance of the model
126
+ const foldModel = Object.create(Object.getPrototypeOf(model));
127
+ Object.assign(foldModel, model);
128
+
129
+ // Train and evaluate
130
+ foldModel.fit(X_train, y_train, taskType);
131
+ const result = foldModel.score(X_test, y_test);
132
+
133
+ if (taskType === 'classification') {
134
+ scores.push(result.accuracy);
135
+ } else {
136
+ scores.push(result.r2Score);
137
+ }
138
+ }
139
+
140
+ const meanScore = scores.reduce((sum, s) => sum + s, 0) / scores.length;
141
+ const stdScore = Math.sqrt(
142
+ scores.reduce((sum, s) => sum + Math.pow(s - meanScore, 2), 0) / scores.length
143
+ );
144
+
145
+ return {
146
+ scores: scores,
147
+ meanScore: meanScore,
148
+ stdScore: stdScore,
149
+ folds: folds
150
+ };
151
+ }
152
+
153
+ trainTestSplit(X, y, testSize = 0.2, shuffle = true) {
154
+ const n = X.length;
155
+ const indices = Array.from({ length: n }, (_, i) => i);
156
+
157
+ if (shuffle) {
158
+ for (let i = n - 1; i > 0; i--) {
159
+ const j = Math.floor(Math.random() * (i + 1));
160
+ [indices[i], indices[j]] = [indices[j], indices[i]];
161
+ }
162
+ }
163
+
164
+ const testCount = Math.floor(n * testSize);
165
+ const trainCount = n - testCount;
166
+
167
+ const trainIndices = indices.slice(0, trainCount);
168
+ const testIndices = indices.slice(trainCount);
169
+
170
+ return {
171
+ X_train: trainIndices.map(i => X[i]),
172
+ X_test: testIndices.map(i => X[i]),
173
+ y_train: trainIndices.map(i => y[i]),
174
+ y_test: testIndices.map(i => y[i])
175
+ };
176
+ }
177
+
178
+ // ====== Model Comparison ======
179
+ compareModels(models, X, y, taskType = 'classification') {
180
+ const { X_train, X_test, y_train, y_test } = this.trainTestSplit(X, y, 0.2);
181
+ const results = [];
182
+
183
+ models.forEach(({ name, model }) => {
184
+ const startTime = Date.now();
185
+
186
+ model.fit(X_train, y_train, taskType);
187
+ const trainTime = Date.now() - startTime;
188
+
189
+ const evalStart = Date.now();
190
+ const score = model.score(X_test, y_test);
191
+ const evalTime = Date.now() - evalStart;
192
+
193
+ results.push({
194
+ name: name,
195
+ score: taskType === 'classification' ? score.accuracy : score.r2Score,
196
+ trainTime: trainTime,
197
+ evalTime: evalTime,
198
+ fullScore: score
199
+ });
200
+ });
201
+
202
+ // Sort by score
203
+ results.sort((a, b) => b.score - a.score);
204
+
205
+ return {
206
+ results: results,
207
+ bestModel: results[0],
208
+ comparison: this.generateComparisonReport(results, taskType)
209
+ };
210
+ }
211
+
212
+ generateComparisonReport(results, taskType) {
213
+ const metric = taskType === 'classification' ? 'Accuracy' : 'R² Score';
214
+
215
+ let report = '\n' + '='.repeat(70) + '\n';
216
+ report += '📊 MODEL COMPARISON REPORT\n';
217
+ report += '='.repeat(70) + '\n\n';
218
+
219
+ report += `Metric: ${metric}\n\n`;
220
+ report += 'Rank | Model | Score | Train Time | Eval Time\n';
221
+ report += '-----+-------------------------+----------+------------+-----------\n';
222
+
223
+ results.forEach((result, idx) => {
224
+ const rank = (idx + 1).toString().padStart(4);
225
+ const name = result.name.padEnd(24);
226
+ const score = result.score.toFixed(4).padStart(8);
227
+ const trainTime = (result.trainTime + 'ms').padStart(10);
228
+ const evalTime = (result.evalTime + 'ms').padStart(9);
229
+
230
+ report += `${rank} | ${name} | ${score} | ${trainTime} | ${evalTime}\n`;
231
+ });
232
+
233
+ report += '\n' + '='.repeat(70) + '\n';
234
+ report += `🏆 Best Model: ${results[0].name} (${metric}: ${results[0].score.toFixed(4)})\n`;
235
+ report += '='.repeat(70) + '\n';
236
+
237
+ return report;
238
+ }
239
+
240
+ // ====== Feature Engineering ======
241
+ polynomialFeatures(X, degree = 2) {
242
+ return X.map(row => {
243
+ const features = [...row];
244
+
245
+ // Add polynomial features
246
+ for (let d = 2; d <= degree; d++) {
247
+ for (let i = 0; i < row.length; i++) {
248
+ features.push(Math.pow(row[i], d));
249
+ }
250
+ }
251
+
252
+ // Add interaction features
253
+ if (degree >= 2) {
254
+ for (let i = 0; i < row.length; i++) {
255
+ for (let j = i + 1; j < row.length; j++) {
256
+ features.push(row[i] * row[j]);
257
+ }
258
+ }
259
+ }
260
+
261
+ return features;
262
+ });
263
+ }
264
+
265
+ standardScaler(X) {
266
+ const n = X.length;
267
+ const m = X[0].length;
268
+ const means = [];
269
+ const stds = [];
270
+
271
+ for (let j = 0; j < m; j++) {
272
+ const column = X.map(row => row[j]);
273
+ const mean = column.reduce((sum, val) => sum + val, 0) / n;
274
+ const variance = column.reduce((sum, val) => sum + Math.pow(val - mean, 2), 0) / n;
275
+ const std = Math.sqrt(variance);
276
+
277
+ means.push(mean);
278
+ stds.push(std === 0 ? 1 : std);
279
+ }
280
+
281
+ const scaled = X.map(row =>
282
+ row.map((val, j) => (val - means[j]) / stds[j])
283
+ );
284
+
285
+ return {
286
+ scaled: scaled,
287
+ means: means,
288
+ stds: stds,
289
+ transform: (newX) => newX.map(row =>
290
+ row.map((val, j) => (val - means[j]) / stds[j])
291
+ )
292
+ };
293
+ }
294
+
295
+ minMaxScaler(X, featureRange = [0, 1]) {
296
+ const n = X.length;
297
+ const m = X[0].length;
298
+ const mins = [];
299
+ const maxs = [];
300
+ const [min_range, max_range] = featureRange;
301
+
302
+ for (let j = 0; j < m; j++) {
303
+ const column = X.map(row => row[j]);
304
+ mins.push(Math.min(...column));
305
+ maxs.push(Math.max(...column));
306
+ }
307
+
308
+ const scaled = X.map(row =>
309
+ row.map((val, j) => {
310
+ const range = maxs[j] - mins[j];
311
+ if (range === 0) return min_range;
312
+ return min_range + ((val - mins[j]) / range) * (max_range - min_range);
313
+ })
314
+ );
315
+
316
+ return {
317
+ scaled: scaled,
318
+ mins: mins,
319
+ maxs: maxs,
320
+ transform: (newX) => newX.map(row =>
321
+ row.map((val, j) => {
322
+ const range = maxs[j] - mins[j];
323
+ if (range === 0) return min_range;
324
+ return min_range + ((val - mins[j]) / range) * (max_range - min_range);
325
+ })
326
+ )
327
+ };
328
+ }
329
+
330
+ // ====== Metrics ======
331
+ rocCurve(yTrue, yProba) {
332
+ const scores = yProba.map((proba, i) => ({
333
+ probability: typeof proba === 'object' ? Object.values(proba)[1] : proba,
334
+ label: yTrue[i]
335
+ }));
336
+
337
+ scores.sort((a, b) => b.probability - a.probability);
338
+
339
+ const positives = yTrue.filter(y => y === 1 || y === true).length;
340
+ const negatives = yTrue.length - positives;
341
+
342
+ const tpr = [0];
343
+ const fpr = [0];
344
+ let tp = 0;
345
+ let fp = 0;
346
+
347
+ scores.forEach(score => {
348
+ if (score.label === 1 || score.label === true) {
349
+ tp++;
350
+ } else {
351
+ fp++;
352
+ }
353
+ tpr.push(tp / positives);
354
+ fpr.push(fp / negatives);
355
+ });
356
+
357
+ // Calculate AUC using trapezoidal rule
358
+ let auc = 0;
359
+ for (let i = 1; i < fpr.length; i++) {
360
+ auc += (fpr[i] - fpr[i - 1]) * (tpr[i] + tpr[i - 1]) / 2;
361
+ }
362
+
363
+ return {
364
+ fpr: fpr,
365
+ tpr: tpr,
366
+ auc: auc,
367
+ thresholds: scores.map(s => s.probability)
368
+ };
369
+ }
370
+
371
+ precisionRecallCurve(yTrue, yProba) {
372
+ const scores = yProba.map((proba, i) => ({
373
+ probability: typeof proba === 'object' ? Object.values(proba)[1] : proba,
374
+ label: yTrue[i]
375
+ }));
376
+
377
+ scores.sort((a, b) => b.probability - a.probability);
378
+
379
+ const precision = [];
380
+ const recall = [];
381
+ let tp = 0;
382
+ let fp = 0;
383
+ const totalPositives = yTrue.filter(y => y === 1 || y === true).length;
384
+
385
+ scores.forEach(score => {
386
+ if (score.label === 1 || score.label === true) {
387
+ tp++;
388
+ } else {
389
+ fp++;
390
+ }
391
+
392
+ const currentPrecision = tp / (tp + fp);
393
+ const currentRecall = tp / totalPositives;
394
+
395
+ precision.push(currentPrecision);
396
+ recall.push(currentRecall);
397
+ });
398
+
399
+ return {
400
+ precision: precision,
401
+ recall: recall,
402
+ thresholds: scores.map(s => s.probability)
403
+ };
404
+ }
405
+
406
+ // ====== Quick Training Helper ======
407
+ quickTrain(modelType, X, y, options = {}) {
408
+ const { taskType = 'classification', testSize = 0.2, normalize = true } = options;
409
+
410
+ let model;
411
+
412
+ switch (modelType.toLowerCase()) {
413
+ case 'linear':
414
+ case 'linearregression':
415
+ model = this.createLinearRegression(options);
416
+ break;
417
+ case 'logistic':
418
+ case 'logisticregression':
419
+ model = this.createLogisticRegression(options);
420
+ break;
421
+ case 'knn':
422
+ model = this.createKNN(options);
423
+ break;
424
+ case 'tree':
425
+ case 'decisiontree':
426
+ model = this.createDecisionTree(options);
427
+ break;
428
+ case 'forest':
429
+ case 'randomforest':
430
+ model = this.createRandomForest(options);
431
+ break;
432
+ case 'naivebayes':
433
+ case 'nb':
434
+ model = this.createNaiveBayes(options);
435
+ break;
436
+ case 'svm':
437
+ model = this.createSVM(options);
438
+ break;
439
+ default:
440
+ throw new Error(`Unknown model type: ${modelType}`);
441
+ }
442
+
443
+ const { X_train, X_test, y_train, y_test } = this.trainTestSplit(X, y, testSize);
444
+
445
+ console.log(`\n🚀 Training ${modelType}...`);
446
+ const startTime = Date.now();
447
+
448
+ model.fit(X_train, y_train, normalize, taskType);
449
+
450
+ const trainTime = Date.now() - startTime;
451
+ console.log(`✅ Training completed in ${trainTime}ms`);
452
+
453
+ console.log(`\n📊 Evaluating model...`);
454
+ const score = model.score(X_test, y_test);
455
+
456
+ console.log(`\n${'='.repeat(60)}`);
457
+ console.log(`📈 RESULTS`);
458
+ console.log(`${'='.repeat(60)}`);
459
+
460
+ if (taskType === 'classification') {
461
+ console.log(`Accuracy: ${(score.accuracy * 100).toFixed(2)}%`);
462
+ console.log(`\nConfusion Matrix:${score.confusionMatrix.display}`);
463
+
464
+ console.log(`\nPer-Class Metrics:`);
465
+ Object.keys(score.classMetrics).forEach(cls => {
466
+ const m = score.classMetrics[cls];
467
+ console.log(` ${cls}:`);
468
+ console.log(` Precision: ${(m.precision * 100).toFixed(2)}%`);
469
+ console.log(` Recall: ${(m.recall * 100).toFixed(2)}%`);
470
+ console.log(` F1-Score: ${(m.f1Score * 100).toFixed(2)}%`);
471
+ });
472
+ } else {
473
+ console.log(`R² Score: ${score.r2Score.toFixed(4)}`);
474
+ console.log(`MSE: ${score.mse.toFixed(4)}`);
475
+ console.log(`RMSE: ${score.rmse.toFixed(4)}`);
476
+ console.log(`MAE: ${score.mae.toFixed(4)}`);
477
+ }
478
+
479
+ console.log(`\n${'='.repeat(60)}\n`);
480
+
481
+ return {
482
+ model: model,
483
+ score: score,
484
+ trainTime: trainTime,
485
+ summary: model.summary()
486
+ };
487
+ }
488
+ }
489
+
490
+ export default MachineLearning;