datly 0.0.1 → 0.0.2
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/dist/datly.cjs +1 -0
- package/dist/datly.mjs +1 -0
- package/dist/datly.umd.js +1 -1
- package/dist/datly.umd.js.map +1 -0
- package/package.json +24 -11
- package/src/core/dataLoader.js +407 -0
- package/src/core/utils.js +306 -0
- package/src/core/validator.js +205 -0
- package/src/dataviz/index.js +1566 -0
- package/src/descriptive/centralTendency.js +208 -0
- package/src/descriptive/dispersion.js +273 -0
- package/src/descriptive/position.js +268 -0
- package/src/descriptive/shape.js +336 -0
- package/src/index.js +480 -0
- package/src/inferential/confidenceIntervals.js +561 -0
- package/src/inferential/hypothesisTesting.js +527 -0
- package/src/inferential/normalityTests.js +587 -0
- package/src/insights/autoAnalyser.js +685 -0
- package/src/insights/interpreter.js +543 -0
- package/src/insights/patternDetector.js +897 -0
- package/src/insights/reportGenerator.js +1072 -0
- package/src/ml/ClassificationMetrics.js +336 -0
- package/src/ml/DecisionTree.js +412 -0
- package/src/ml/KNearestNeighbors.js +317 -0
- package/src/ml/LinearRegression.js +179 -0
- package/src/ml/LogisticRegression.js +396 -0
- package/src/ml/MachineLearning.js +490 -0
- package/src/ml/NaiveBayes.js +296 -0
- package/src/ml/RandomForest.js +323 -0
- package/src/ml/SupportVectorMachine.js +299 -0
- package/src/ml/baseModel.js +106 -0
- package/src/multivariate/correlation.js +653 -0
- package/src/multivariate/regression.js +660 -0
@@ -0,0 +1,490 @@
|
|
1
|
+
import LinearRegression from './LinearRegression.js';
|
2
|
+
import LogisticRegression from './LogisticRegression.js';
|
3
|
+
import KNearestNeighbors from './KNearestNeighbors.js';
|
4
|
+
import DecisionTree from './DecisionTree.js';
|
5
|
+
import RandomForest from './RandomForest.js';
|
6
|
+
import NaiveBayes from './NaiveBayes.js';
|
7
|
+
import SupportVectorMachine from './SupportVectorMachine.js';
|
8
|
+
|
9
|
+
class MachineLearning {
|
10
|
+
constructor() {
|
11
|
+
// Models are instantiated on demand
|
12
|
+
}
|
13
|
+
|
14
|
+
// ====== Regression Models ======
|
15
|
+
createLinearRegression(options = {}) {
|
16
|
+
const {
|
17
|
+
learningRate = 0.01,
|
18
|
+
iterations = 1000,
|
19
|
+
regularization = null,
|
20
|
+
lambda = 0.01
|
21
|
+
} = options;
|
22
|
+
|
23
|
+
return new LinearRegression(learningRate, iterations, regularization, lambda);
|
24
|
+
}
|
25
|
+
|
26
|
+
// ====== Classification Models ======
|
27
|
+
createLogisticRegression(options = {}) {
|
28
|
+
const {
|
29
|
+
learningRate = 0.01,
|
30
|
+
iterations = 1000,
|
31
|
+
regularization = null,
|
32
|
+
lambda = 0.01
|
33
|
+
} = options;
|
34
|
+
|
35
|
+
return new LogisticRegression(learningRate, iterations, regularization, lambda);
|
36
|
+
}
|
37
|
+
|
38
|
+
createKNN(options = {}) {
|
39
|
+
const {
|
40
|
+
k = 5,
|
41
|
+
metric = 'euclidean',
|
42
|
+
weights = 'uniform'
|
43
|
+
} = options;
|
44
|
+
|
45
|
+
return new KNearestNeighbors(k, metric, weights);
|
46
|
+
}
|
47
|
+
|
48
|
+
createDecisionTree(options = {}) {
|
49
|
+
const {
|
50
|
+
maxDepth = 10,
|
51
|
+
minSamplesSplit = 2,
|
52
|
+
minSamplesLeaf = 1,
|
53
|
+
criterion = 'gini'
|
54
|
+
} = options;
|
55
|
+
|
56
|
+
return new DecisionTree(maxDepth, minSamplesSplit, minSamplesLeaf, criterion);
|
57
|
+
}
|
58
|
+
|
59
|
+
createRandomForest(options = {}) {
|
60
|
+
const {
|
61
|
+
nEstimators = 100,
|
62
|
+
maxDepth = 10,
|
63
|
+
minSamplesSplit = 2,
|
64
|
+
minSamplesLeaf = 1,
|
65
|
+
maxFeatures = 'sqrt',
|
66
|
+
criterion = 'gini',
|
67
|
+
bootstrap = true
|
68
|
+
} = options;
|
69
|
+
|
70
|
+
return new RandomForest(
|
71
|
+
nEstimators,
|
72
|
+
maxDepth,
|
73
|
+
minSamplesSplit,
|
74
|
+
minSamplesLeaf,
|
75
|
+
maxFeatures,
|
76
|
+
criterion,
|
77
|
+
bootstrap
|
78
|
+
);
|
79
|
+
}
|
80
|
+
|
81
|
+
createNaiveBayes(options = {}) {
|
82
|
+
const { type = 'gaussian' } = options;
|
83
|
+
return new NaiveBayes(type);
|
84
|
+
}
|
85
|
+
|
86
|
+
createSVM(options = {}) {
|
87
|
+
const {
|
88
|
+
C = 1.0,
|
89
|
+
kernel = 'linear',
|
90
|
+
gamma = 'scale',
|
91
|
+
degree = 3,
|
92
|
+
learningRate = 0.001,
|
93
|
+
iterations = 1000
|
94
|
+
} = options;
|
95
|
+
|
96
|
+
return new SupportVectorMachine(C, kernel, gamma, degree, learningRate, iterations);
|
97
|
+
}
|
98
|
+
|
99
|
+
// ====== Model Evaluation Utilities ======
|
100
|
+
crossValidate(model, X, y, folds = 5, taskType = 'classification') {
|
101
|
+
const n = X.length;
|
102
|
+
const foldSize = Math.floor(n / folds);
|
103
|
+
const indices = Array.from({ length: n }, (_, i) => i);
|
104
|
+
|
105
|
+
// Shuffle indices
|
106
|
+
for (let i = n - 1; i > 0; i--) {
|
107
|
+
const j = Math.floor(Math.random() * (i + 1));
|
108
|
+
[indices[i], indices[j]] = [indices[j], indices[i]];
|
109
|
+
}
|
110
|
+
|
111
|
+
const scores = [];
|
112
|
+
|
113
|
+
for (let fold = 0; fold < folds; fold++) {
|
114
|
+
const testStart = fold * foldSize;
|
115
|
+
const testEnd = fold === folds - 1 ? n : testStart + foldSize;
|
116
|
+
|
117
|
+
const testIndices = indices.slice(testStart, testEnd);
|
118
|
+
const trainIndices = [...indices.slice(0, testStart), ...indices.slice(testEnd)];
|
119
|
+
|
120
|
+
const X_train = trainIndices.map(i => X[i]);
|
121
|
+
const y_train = trainIndices.map(i => y[i]);
|
122
|
+
const X_test = testIndices.map(i => X[i]);
|
123
|
+
const y_test = testIndices.map(i => y[i]);
|
124
|
+
|
125
|
+
// Create a new instance of the model
|
126
|
+
const foldModel = Object.create(Object.getPrototypeOf(model));
|
127
|
+
Object.assign(foldModel, model);
|
128
|
+
|
129
|
+
// Train and evaluate
|
130
|
+
foldModel.fit(X_train, y_train, taskType);
|
131
|
+
const result = foldModel.score(X_test, y_test);
|
132
|
+
|
133
|
+
if (taskType === 'classification') {
|
134
|
+
scores.push(result.accuracy);
|
135
|
+
} else {
|
136
|
+
scores.push(result.r2Score);
|
137
|
+
}
|
138
|
+
}
|
139
|
+
|
140
|
+
const meanScore = scores.reduce((sum, s) => sum + s, 0) / scores.length;
|
141
|
+
const stdScore = Math.sqrt(
|
142
|
+
scores.reduce((sum, s) => sum + Math.pow(s - meanScore, 2), 0) / scores.length
|
143
|
+
);
|
144
|
+
|
145
|
+
return {
|
146
|
+
scores: scores,
|
147
|
+
meanScore: meanScore,
|
148
|
+
stdScore: stdScore,
|
149
|
+
folds: folds
|
150
|
+
};
|
151
|
+
}
|
152
|
+
|
153
|
+
trainTestSplit(X, y, testSize = 0.2, shuffle = true) {
|
154
|
+
const n = X.length;
|
155
|
+
const indices = Array.from({ length: n }, (_, i) => i);
|
156
|
+
|
157
|
+
if (shuffle) {
|
158
|
+
for (let i = n - 1; i > 0; i--) {
|
159
|
+
const j = Math.floor(Math.random() * (i + 1));
|
160
|
+
[indices[i], indices[j]] = [indices[j], indices[i]];
|
161
|
+
}
|
162
|
+
}
|
163
|
+
|
164
|
+
const testCount = Math.floor(n * testSize);
|
165
|
+
const trainCount = n - testCount;
|
166
|
+
|
167
|
+
const trainIndices = indices.slice(0, trainCount);
|
168
|
+
const testIndices = indices.slice(trainCount);
|
169
|
+
|
170
|
+
return {
|
171
|
+
X_train: trainIndices.map(i => X[i]),
|
172
|
+
X_test: testIndices.map(i => X[i]),
|
173
|
+
y_train: trainIndices.map(i => y[i]),
|
174
|
+
y_test: testIndices.map(i => y[i])
|
175
|
+
};
|
176
|
+
}
|
177
|
+
|
178
|
+
// ====== Model Comparison ======
|
179
|
+
compareModels(models, X, y, taskType = 'classification') {
|
180
|
+
const { X_train, X_test, y_train, y_test } = this.trainTestSplit(X, y, 0.2);
|
181
|
+
const results = [];
|
182
|
+
|
183
|
+
models.forEach(({ name, model }) => {
|
184
|
+
const startTime = Date.now();
|
185
|
+
|
186
|
+
model.fit(X_train, y_train, taskType);
|
187
|
+
const trainTime = Date.now() - startTime;
|
188
|
+
|
189
|
+
const evalStart = Date.now();
|
190
|
+
const score = model.score(X_test, y_test);
|
191
|
+
const evalTime = Date.now() - evalStart;
|
192
|
+
|
193
|
+
results.push({
|
194
|
+
name: name,
|
195
|
+
score: taskType === 'classification' ? score.accuracy : score.r2Score,
|
196
|
+
trainTime: trainTime,
|
197
|
+
evalTime: evalTime,
|
198
|
+
fullScore: score
|
199
|
+
});
|
200
|
+
});
|
201
|
+
|
202
|
+
// Sort by score
|
203
|
+
results.sort((a, b) => b.score - a.score);
|
204
|
+
|
205
|
+
return {
|
206
|
+
results: results,
|
207
|
+
bestModel: results[0],
|
208
|
+
comparison: this.generateComparisonReport(results, taskType)
|
209
|
+
};
|
210
|
+
}
|
211
|
+
|
212
|
+
generateComparisonReport(results, taskType) {
|
213
|
+
const metric = taskType === 'classification' ? 'Accuracy' : 'R² Score';
|
214
|
+
|
215
|
+
let report = '\n' + '='.repeat(70) + '\n';
|
216
|
+
report += '📊 MODEL COMPARISON REPORT\n';
|
217
|
+
report += '='.repeat(70) + '\n\n';
|
218
|
+
|
219
|
+
report += `Metric: ${metric}\n\n`;
|
220
|
+
report += 'Rank | Model | Score | Train Time | Eval Time\n';
|
221
|
+
report += '-----+-------------------------+----------+------------+-----------\n';
|
222
|
+
|
223
|
+
results.forEach((result, idx) => {
|
224
|
+
const rank = (idx + 1).toString().padStart(4);
|
225
|
+
const name = result.name.padEnd(24);
|
226
|
+
const score = result.score.toFixed(4).padStart(8);
|
227
|
+
const trainTime = (result.trainTime + 'ms').padStart(10);
|
228
|
+
const evalTime = (result.evalTime + 'ms').padStart(9);
|
229
|
+
|
230
|
+
report += `${rank} | ${name} | ${score} | ${trainTime} | ${evalTime}\n`;
|
231
|
+
});
|
232
|
+
|
233
|
+
report += '\n' + '='.repeat(70) + '\n';
|
234
|
+
report += `🏆 Best Model: ${results[0].name} (${metric}: ${results[0].score.toFixed(4)})\n`;
|
235
|
+
report += '='.repeat(70) + '\n';
|
236
|
+
|
237
|
+
return report;
|
238
|
+
}
|
239
|
+
|
240
|
+
// ====== Feature Engineering ======
|
241
|
+
polynomialFeatures(X, degree = 2) {
|
242
|
+
return X.map(row => {
|
243
|
+
const features = [...row];
|
244
|
+
|
245
|
+
// Add polynomial features
|
246
|
+
for (let d = 2; d <= degree; d++) {
|
247
|
+
for (let i = 0; i < row.length; i++) {
|
248
|
+
features.push(Math.pow(row[i], d));
|
249
|
+
}
|
250
|
+
}
|
251
|
+
|
252
|
+
// Add interaction features
|
253
|
+
if (degree >= 2) {
|
254
|
+
for (let i = 0; i < row.length; i++) {
|
255
|
+
for (let j = i + 1; j < row.length; j++) {
|
256
|
+
features.push(row[i] * row[j]);
|
257
|
+
}
|
258
|
+
}
|
259
|
+
}
|
260
|
+
|
261
|
+
return features;
|
262
|
+
});
|
263
|
+
}
|
264
|
+
|
265
|
+
standardScaler(X) {
|
266
|
+
const n = X.length;
|
267
|
+
const m = X[0].length;
|
268
|
+
const means = [];
|
269
|
+
const stds = [];
|
270
|
+
|
271
|
+
for (let j = 0; j < m; j++) {
|
272
|
+
const column = X.map(row => row[j]);
|
273
|
+
const mean = column.reduce((sum, val) => sum + val, 0) / n;
|
274
|
+
const variance = column.reduce((sum, val) => sum + Math.pow(val - mean, 2), 0) / n;
|
275
|
+
const std = Math.sqrt(variance);
|
276
|
+
|
277
|
+
means.push(mean);
|
278
|
+
stds.push(std === 0 ? 1 : std);
|
279
|
+
}
|
280
|
+
|
281
|
+
const scaled = X.map(row =>
|
282
|
+
row.map((val, j) => (val - means[j]) / stds[j])
|
283
|
+
);
|
284
|
+
|
285
|
+
return {
|
286
|
+
scaled: scaled,
|
287
|
+
means: means,
|
288
|
+
stds: stds,
|
289
|
+
transform: (newX) => newX.map(row =>
|
290
|
+
row.map((val, j) => (val - means[j]) / stds[j])
|
291
|
+
)
|
292
|
+
};
|
293
|
+
}
|
294
|
+
|
295
|
+
minMaxScaler(X, featureRange = [0, 1]) {
|
296
|
+
const n = X.length;
|
297
|
+
const m = X[0].length;
|
298
|
+
const mins = [];
|
299
|
+
const maxs = [];
|
300
|
+
const [min_range, max_range] = featureRange;
|
301
|
+
|
302
|
+
for (let j = 0; j < m; j++) {
|
303
|
+
const column = X.map(row => row[j]);
|
304
|
+
mins.push(Math.min(...column));
|
305
|
+
maxs.push(Math.max(...column));
|
306
|
+
}
|
307
|
+
|
308
|
+
const scaled = X.map(row =>
|
309
|
+
row.map((val, j) => {
|
310
|
+
const range = maxs[j] - mins[j];
|
311
|
+
if (range === 0) return min_range;
|
312
|
+
return min_range + ((val - mins[j]) / range) * (max_range - min_range);
|
313
|
+
})
|
314
|
+
);
|
315
|
+
|
316
|
+
return {
|
317
|
+
scaled: scaled,
|
318
|
+
mins: mins,
|
319
|
+
maxs: maxs,
|
320
|
+
transform: (newX) => newX.map(row =>
|
321
|
+
row.map((val, j) => {
|
322
|
+
const range = maxs[j] - mins[j];
|
323
|
+
if (range === 0) return min_range;
|
324
|
+
return min_range + ((val - mins[j]) / range) * (max_range - min_range);
|
325
|
+
})
|
326
|
+
)
|
327
|
+
};
|
328
|
+
}
|
329
|
+
|
330
|
+
// ====== Metrics ======
|
331
|
+
rocCurve(yTrue, yProba) {
|
332
|
+
const scores = yProba.map((proba, i) => ({
|
333
|
+
probability: typeof proba === 'object' ? Object.values(proba)[1] : proba,
|
334
|
+
label: yTrue[i]
|
335
|
+
}));
|
336
|
+
|
337
|
+
scores.sort((a, b) => b.probability - a.probability);
|
338
|
+
|
339
|
+
const positives = yTrue.filter(y => y === 1 || y === true).length;
|
340
|
+
const negatives = yTrue.length - positives;
|
341
|
+
|
342
|
+
const tpr = [0];
|
343
|
+
const fpr = [0];
|
344
|
+
let tp = 0;
|
345
|
+
let fp = 0;
|
346
|
+
|
347
|
+
scores.forEach(score => {
|
348
|
+
if (score.label === 1 || score.label === true) {
|
349
|
+
tp++;
|
350
|
+
} else {
|
351
|
+
fp++;
|
352
|
+
}
|
353
|
+
tpr.push(tp / positives);
|
354
|
+
fpr.push(fp / negatives);
|
355
|
+
});
|
356
|
+
|
357
|
+
// Calculate AUC using trapezoidal rule
|
358
|
+
let auc = 0;
|
359
|
+
for (let i = 1; i < fpr.length; i++) {
|
360
|
+
auc += (fpr[i] - fpr[i - 1]) * (tpr[i] + tpr[i - 1]) / 2;
|
361
|
+
}
|
362
|
+
|
363
|
+
return {
|
364
|
+
fpr: fpr,
|
365
|
+
tpr: tpr,
|
366
|
+
auc: auc,
|
367
|
+
thresholds: scores.map(s => s.probability)
|
368
|
+
};
|
369
|
+
}
|
370
|
+
|
371
|
+
precisionRecallCurve(yTrue, yProba) {
|
372
|
+
const scores = yProba.map((proba, i) => ({
|
373
|
+
probability: typeof proba === 'object' ? Object.values(proba)[1] : proba,
|
374
|
+
label: yTrue[i]
|
375
|
+
}));
|
376
|
+
|
377
|
+
scores.sort((a, b) => b.probability - a.probability);
|
378
|
+
|
379
|
+
const precision = [];
|
380
|
+
const recall = [];
|
381
|
+
let tp = 0;
|
382
|
+
let fp = 0;
|
383
|
+
const totalPositives = yTrue.filter(y => y === 1 || y === true).length;
|
384
|
+
|
385
|
+
scores.forEach(score => {
|
386
|
+
if (score.label === 1 || score.label === true) {
|
387
|
+
tp++;
|
388
|
+
} else {
|
389
|
+
fp++;
|
390
|
+
}
|
391
|
+
|
392
|
+
const currentPrecision = tp / (tp + fp);
|
393
|
+
const currentRecall = tp / totalPositives;
|
394
|
+
|
395
|
+
precision.push(currentPrecision);
|
396
|
+
recall.push(currentRecall);
|
397
|
+
});
|
398
|
+
|
399
|
+
return {
|
400
|
+
precision: precision,
|
401
|
+
recall: recall,
|
402
|
+
thresholds: scores.map(s => s.probability)
|
403
|
+
};
|
404
|
+
}
|
405
|
+
|
406
|
+
// ====== Quick Training Helper ======
|
407
|
+
quickTrain(modelType, X, y, options = {}) {
|
408
|
+
const { taskType = 'classification', testSize = 0.2, normalize = true } = options;
|
409
|
+
|
410
|
+
let model;
|
411
|
+
|
412
|
+
switch (modelType.toLowerCase()) {
|
413
|
+
case 'linear':
|
414
|
+
case 'linearregression':
|
415
|
+
model = this.createLinearRegression(options);
|
416
|
+
break;
|
417
|
+
case 'logistic':
|
418
|
+
case 'logisticregression':
|
419
|
+
model = this.createLogisticRegression(options);
|
420
|
+
break;
|
421
|
+
case 'knn':
|
422
|
+
model = this.createKNN(options);
|
423
|
+
break;
|
424
|
+
case 'tree':
|
425
|
+
case 'decisiontree':
|
426
|
+
model = this.createDecisionTree(options);
|
427
|
+
break;
|
428
|
+
case 'forest':
|
429
|
+
case 'randomforest':
|
430
|
+
model = this.createRandomForest(options);
|
431
|
+
break;
|
432
|
+
case 'naivebayes':
|
433
|
+
case 'nb':
|
434
|
+
model = this.createNaiveBayes(options);
|
435
|
+
break;
|
436
|
+
case 'svm':
|
437
|
+
model = this.createSVM(options);
|
438
|
+
break;
|
439
|
+
default:
|
440
|
+
throw new Error(`Unknown model type: ${modelType}`);
|
441
|
+
}
|
442
|
+
|
443
|
+
const { X_train, X_test, y_train, y_test } = this.trainTestSplit(X, y, testSize);
|
444
|
+
|
445
|
+
console.log(`\n🚀 Training ${modelType}...`);
|
446
|
+
const startTime = Date.now();
|
447
|
+
|
448
|
+
model.fit(X_train, y_train, normalize, taskType);
|
449
|
+
|
450
|
+
const trainTime = Date.now() - startTime;
|
451
|
+
console.log(`✅ Training completed in ${trainTime}ms`);
|
452
|
+
|
453
|
+
console.log(`\n📊 Evaluating model...`);
|
454
|
+
const score = model.score(X_test, y_test);
|
455
|
+
|
456
|
+
console.log(`\n${'='.repeat(60)}`);
|
457
|
+
console.log(`📈 RESULTS`);
|
458
|
+
console.log(`${'='.repeat(60)}`);
|
459
|
+
|
460
|
+
if (taskType === 'classification') {
|
461
|
+
console.log(`Accuracy: ${(score.accuracy * 100).toFixed(2)}%`);
|
462
|
+
console.log(`\nConfusion Matrix:${score.confusionMatrix.display}`);
|
463
|
+
|
464
|
+
console.log(`\nPer-Class Metrics:`);
|
465
|
+
Object.keys(score.classMetrics).forEach(cls => {
|
466
|
+
const m = score.classMetrics[cls];
|
467
|
+
console.log(` ${cls}:`);
|
468
|
+
console.log(` Precision: ${(m.precision * 100).toFixed(2)}%`);
|
469
|
+
console.log(` Recall: ${(m.recall * 100).toFixed(2)}%`);
|
470
|
+
console.log(` F1-Score: ${(m.f1Score * 100).toFixed(2)}%`);
|
471
|
+
});
|
472
|
+
} else {
|
473
|
+
console.log(`R² Score: ${score.r2Score.toFixed(4)}`);
|
474
|
+
console.log(`MSE: ${score.mse.toFixed(4)}`);
|
475
|
+
console.log(`RMSE: ${score.rmse.toFixed(4)}`);
|
476
|
+
console.log(`MAE: ${score.mae.toFixed(4)}`);
|
477
|
+
}
|
478
|
+
|
479
|
+
console.log(`\n${'='.repeat(60)}\n`);
|
480
|
+
|
481
|
+
return {
|
482
|
+
model: model,
|
483
|
+
score: score,
|
484
|
+
trainTime: trainTime,
|
485
|
+
summary: model.summary()
|
486
|
+
};
|
487
|
+
}
|
488
|
+
}
|
489
|
+
|
490
|
+
export default MachineLearning;
|