datly 0.0.1 → 0.0.2
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/dist/datly.cjs +1 -0
- package/dist/datly.mjs +1 -0
- package/dist/datly.umd.js +1 -1
- package/dist/datly.umd.js.map +1 -0
- package/package.json +24 -11
- package/src/core/dataLoader.js +407 -0
- package/src/core/utils.js +306 -0
- package/src/core/validator.js +205 -0
- package/src/dataviz/index.js +1566 -0
- package/src/descriptive/centralTendency.js +208 -0
- package/src/descriptive/dispersion.js +273 -0
- package/src/descriptive/position.js +268 -0
- package/src/descriptive/shape.js +336 -0
- package/src/index.js +480 -0
- package/src/inferential/confidenceIntervals.js +561 -0
- package/src/inferential/hypothesisTesting.js +527 -0
- package/src/inferential/normalityTests.js +587 -0
- package/src/insights/autoAnalyser.js +685 -0
- package/src/insights/interpreter.js +543 -0
- package/src/insights/patternDetector.js +897 -0
- package/src/insights/reportGenerator.js +1072 -0
- package/src/ml/ClassificationMetrics.js +336 -0
- package/src/ml/DecisionTree.js +412 -0
- package/src/ml/KNearestNeighbors.js +317 -0
- package/src/ml/LinearRegression.js +179 -0
- package/src/ml/LogisticRegression.js +396 -0
- package/src/ml/MachineLearning.js +490 -0
- package/src/ml/NaiveBayes.js +296 -0
- package/src/ml/RandomForest.js +323 -0
- package/src/ml/SupportVectorMachine.js +299 -0
- package/src/ml/baseModel.js +106 -0
- package/src/multivariate/correlation.js +653 -0
- package/src/multivariate/regression.js +660 -0
@@ -0,0 +1,336 @@
|
|
1
|
+
class ClassificationMetrics {
|
2
|
+
confusionMatrix(yTrue, yPred) {
|
3
|
+
const classes = [...new Set([...yTrue, ...yPred])].sort();
|
4
|
+
const n = classes.length;
|
5
|
+
const matrix = Array(n).fill(0).map(() => Array(n).fill(0));
|
6
|
+
const classIndex = new Map(classes.map((c, i) => [c, i]));
|
7
|
+
|
8
|
+
for (let i = 0; i < yTrue.length; i++) {
|
9
|
+
const trueIdx = classIndex.get(yTrue[i]);
|
10
|
+
const predIdx = classIndex.get(yPred[i]);
|
11
|
+
matrix[trueIdx][predIdx]++;
|
12
|
+
}
|
13
|
+
|
14
|
+
return {
|
15
|
+
matrix,
|
16
|
+
classes,
|
17
|
+
display: this.formatConfusionMatrix(matrix, classes)
|
18
|
+
};
|
19
|
+
}
|
20
|
+
|
21
|
+
formatConfusionMatrix(matrix, classes) {
|
22
|
+
const maxLen = Math.max(...matrix.flat().map(v => v.toString().length), 8);
|
23
|
+
const pad = (str) => str.toString().padStart(maxLen);
|
24
|
+
|
25
|
+
let output = '\n' + ' '.repeat(maxLen + 2) + 'Predicted\n';
|
26
|
+
output += ' '.repeat(maxLen + 2) + classes.map(c => pad(c)).join(' ') + '\n';
|
27
|
+
|
28
|
+
for (let i = 0; i < matrix.length; i++) {
|
29
|
+
if (i === 0) output += 'Actual ';
|
30
|
+
else output += ' ';
|
31
|
+
output += pad(classes[i]) + ' ';
|
32
|
+
output += matrix[i].map(v => pad(v)).join(' ') + '\n';
|
33
|
+
}
|
34
|
+
|
35
|
+
return output;
|
36
|
+
}
|
37
|
+
|
38
|
+
accuracy(yTrue, yPred) {
|
39
|
+
let correct = 0;
|
40
|
+
for (let i = 0; i < yTrue.length; i++) {
|
41
|
+
if (yTrue[i] === yPred[i]) correct++;
|
42
|
+
}
|
43
|
+
return correct / yTrue.length;
|
44
|
+
}
|
45
|
+
|
46
|
+
precision(yTrue, yPred, average = 'weighted') {
|
47
|
+
const cm = this.confusionMatrix(yTrue, yPred);
|
48
|
+
const classes = cm.classes;
|
49
|
+
const matrix = cm.matrix;
|
50
|
+
const precisions = [];
|
51
|
+
|
52
|
+
for (let i = 0; i < classes.length; i++) {
|
53
|
+
const tp = matrix[i][i];
|
54
|
+
const fp = matrix.map((row, idx) => idx !== i ? row[i] : 0).reduce((a, b) => a + b, 0);
|
55
|
+
precisions.push(tp + fp > 0 ? tp / (tp + fp) : 0);
|
56
|
+
}
|
57
|
+
|
58
|
+
return this.averageMetric(precisions, yTrue, classes, average);
|
59
|
+
}
|
60
|
+
|
61
|
+
recall(yTrue, yPred, average = 'weighted') {
|
62
|
+
const cm = this.confusionMatrix(yTrue, yPred);
|
63
|
+
const classes = cm.classes;
|
64
|
+
const matrix = cm.matrix;
|
65
|
+
const recalls = [];
|
66
|
+
|
67
|
+
for (let i = 0; i < classes.length; i++) {
|
68
|
+
const tp = matrix[i][i];
|
69
|
+
const fn = matrix[i].reduce((a, b) => a + b, 0) - tp;
|
70
|
+
recalls.push(tp + fn > 0 ? tp / (tp + fn) : 0);
|
71
|
+
}
|
72
|
+
|
73
|
+
return this.averageMetric(recalls, yTrue, classes, average);
|
74
|
+
}
|
75
|
+
|
76
|
+
f1Score(yTrue, yPred, average = 'weighted') {
|
77
|
+
const cm = this.confusionMatrix(yTrue, yPred);
|
78
|
+
const classes = cm.classes;
|
79
|
+
const matrix = cm.matrix;
|
80
|
+
const f1Scores = [];
|
81
|
+
|
82
|
+
for (let i = 0; i < classes.length; i++) {
|
83
|
+
const tp = matrix[i][i];
|
84
|
+
const fp = matrix.map((row, idx) => idx !== i ? row[i] : 0).reduce((a, b) => a + b, 0);
|
85
|
+
const fn = matrix[i].reduce((a, b) => a + b, 0) - tp;
|
86
|
+
|
87
|
+
const precision = tp + fp > 0 ? tp / (tp + fp) : 0;
|
88
|
+
const recall = tp + fn > 0 ? tp / (tp + fn) : 0;
|
89
|
+
const f1 = precision + recall > 0 ? 2 * (precision * recall) / (precision + recall) : 0;
|
90
|
+
|
91
|
+
f1Scores.push(f1);
|
92
|
+
}
|
93
|
+
|
94
|
+
return this.averageMetric(f1Scores, yTrue, classes, average);
|
95
|
+
}
|
96
|
+
|
97
|
+
averageMetric(metrics, yTrue, classes, average) {
|
98
|
+
if (average === 'macro') {
|
99
|
+
return metrics.reduce((sum, m) => sum + m, 0) / metrics.length;
|
100
|
+
} else if (average === 'weighted') {
|
101
|
+
const classCounts = classes.map(cls =>
|
102
|
+
yTrue.filter(y => y === cls).length
|
103
|
+
);
|
104
|
+
const total = yTrue.length;
|
105
|
+
|
106
|
+
let weightedSum = 0;
|
107
|
+
for (let i = 0; i < metrics.length; i++) {
|
108
|
+
weightedSum += metrics[i] * (classCounts[i] / total);
|
109
|
+
}
|
110
|
+
return weightedSum;
|
111
|
+
} else if (average === 'micro') {
|
112
|
+
const cm = this.confusionMatrix(yTrue, yTrue);
|
113
|
+
const matrix = cm.matrix;
|
114
|
+
|
115
|
+
let totalTp = 0;
|
116
|
+
let totalFp = 0;
|
117
|
+
let totalFn = 0;
|
118
|
+
|
119
|
+
for (let i = 0; i < classes.length; i++) {
|
120
|
+
const tp = matrix[i][i];
|
121
|
+
const fp = matrix.map((row, idx) => idx !== i ? row[i] : 0).reduce((a, b) => a + b, 0);
|
122
|
+
const fn = matrix[i].reduce((a, b) => a + b, 0) - tp;
|
123
|
+
|
124
|
+
totalTp += tp;
|
125
|
+
totalFp += fp;
|
126
|
+
totalFn += fn;
|
127
|
+
}
|
128
|
+
|
129
|
+
return totalTp / (totalTp + totalFp);
|
130
|
+
} else if (average === null || average === 'none') {
|
131
|
+
return metrics;
|
132
|
+
}
|
133
|
+
|
134
|
+
throw new Error('Unknown average method. Use: macro, weighted, micro, or null');
|
135
|
+
}
|
136
|
+
|
137
|
+
classificationReport(yTrue, yPred) {
|
138
|
+
const cm = this.confusionMatrix(yTrue, yPred);
|
139
|
+
const classes = cm.classes;
|
140
|
+
const matrix = cm.matrix;
|
141
|
+
|
142
|
+
const report = {
|
143
|
+
classes: {},
|
144
|
+
accuracy: this.accuracy(yTrue, yPred),
|
145
|
+
macroAvg: {},
|
146
|
+
weightedAvg: {}
|
147
|
+
};
|
148
|
+
|
149
|
+
// Per-class metrics
|
150
|
+
for (let i = 0; i < classes.length; i++) {
|
151
|
+
const tp = matrix[i][i];
|
152
|
+
const fp = matrix.map((row, idx) => idx !== i ? row[i] : 0).reduce((a, b) => a + b, 0);
|
153
|
+
const fn = matrix[i].reduce((a, b) => a + b, 0) - tp;
|
154
|
+
const support = tp + fn;
|
155
|
+
|
156
|
+
const precision = tp + fp > 0 ? tp / (tp + fp) : 0;
|
157
|
+
const recall = tp + fn > 0 ? tp / (tp + fn) : 0;
|
158
|
+
const f1 = precision + recall > 0 ? 2 * (precision * recall) / (precision + recall) : 0;
|
159
|
+
|
160
|
+
report.classes[classes[i]] = {
|
161
|
+
precision: precision,
|
162
|
+
recall: recall,
|
163
|
+
f1Score: f1,
|
164
|
+
support: support
|
165
|
+
};
|
166
|
+
}
|
167
|
+
|
168
|
+
// Macro average
|
169
|
+
report.macroAvg = {
|
170
|
+
precision: this.precision(yTrue, yPred, 'macro'),
|
171
|
+
recall: this.recall(yTrue, yPred, 'macro'),
|
172
|
+
f1Score: this.f1Score(yTrue, yPred, 'macro'),
|
173
|
+
support: yTrue.length
|
174
|
+
};
|
175
|
+
|
176
|
+
// Weighted average
|
177
|
+
report.weightedAvg = {
|
178
|
+
precision: this.precision(yTrue, yPred, 'weighted'),
|
179
|
+
recall: this.recall(yTrue, yPred, 'weighted'),
|
180
|
+
f1Score: this.f1Score(yTrue, yPred, 'weighted'),
|
181
|
+
support: yTrue.length
|
182
|
+
};
|
183
|
+
|
184
|
+
return report;
|
185
|
+
}
|
186
|
+
|
187
|
+
formatClassificationReport(yTrue, yPred) {
|
188
|
+
const report = this.classificationReport(yTrue, yPred);
|
189
|
+
const classes = Object.keys(report.classes);
|
190
|
+
|
191
|
+
let output = '\n' + '='.repeat(70) + '\n';
|
192
|
+
output += 'CLASSIFICATION REPORT\n';
|
193
|
+
output += '='.repeat(70) + '\n\n';
|
194
|
+
|
195
|
+
output += ' Precision Recall F1-Score Support\n';
|
196
|
+
output += '-'.repeat(70) + '\n';
|
197
|
+
|
198
|
+
classes.forEach(cls => {
|
199
|
+
const metrics = report.classes[cls];
|
200
|
+
output += `${cls.toString().padEnd(10)} `;
|
201
|
+
output += `${metrics.precision.toFixed(4).padStart(9)} `;
|
202
|
+
output += `${metrics.recall.toFixed(4).padStart(9)} `;
|
203
|
+
output += `${metrics.f1Score.toFixed(4).padStart(9)} `;
|
204
|
+
output += `${metrics.support.toString().padStart(10)}\n`;
|
205
|
+
});
|
206
|
+
|
207
|
+
output += '-'.repeat(70) + '\n';
|
208
|
+
output += `accuracy ${' '.repeat(28)} ${report.accuracy.toFixed(4).padStart(9)} `;
|
209
|
+
output += `${yTrue.length.toString().padStart(10)}\n`;
|
210
|
+
|
211
|
+
output += `macro avg `;
|
212
|
+
output += `${report.macroAvg.precision.toFixed(4).padStart(9)} `;
|
213
|
+
output += `${report.macroAvg.recall.toFixed(4).padStart(9)} `;
|
214
|
+
output += `${report.macroAvg.f1Score.toFixed(4).padStart(9)} `;
|
215
|
+
output += `${report.macroAvg.support.toString().padStart(10)}\n`;
|
216
|
+
|
217
|
+
output += `weighted avg `;
|
218
|
+
output += `${report.weightedAvg.precision.toFixed(4).padStart(7)} `;
|
219
|
+
output += `${report.weightedAvg.recall.toFixed(4).padStart(9)} `;
|
220
|
+
output += `${report.weightedAvg.f1Score.toFixed(4).padStart(9)} `;
|
221
|
+
output += `${report.weightedAvg.support.toString().padStart(10)}\n`;
|
222
|
+
|
223
|
+
output += '='.repeat(70) + '\n';
|
224
|
+
|
225
|
+
return output;
|
226
|
+
}
|
227
|
+
|
228
|
+
matthewsCorrCoef(yTrue, yPred) {
|
229
|
+
const cm = this.confusionMatrix(yTrue, yPred);
|
230
|
+
|
231
|
+
if (cm.classes.length !== 2) {
|
232
|
+
throw new Error('Matthews Correlation Coefficient only works for binary classification');
|
233
|
+
}
|
234
|
+
|
235
|
+
const matrix = cm.matrix;
|
236
|
+
const tp = matrix[0][0];
|
237
|
+
const tn = matrix[1][1];
|
238
|
+
const fp = matrix[0][1];
|
239
|
+
const fn = matrix[1][0];
|
240
|
+
|
241
|
+
const numerator = (tp * tn) - (fp * fn);
|
242
|
+
const denominator = Math.sqrt((tp + fp) * (tp + fn) * (tn + fp) * (tn + fn));
|
243
|
+
|
244
|
+
return denominator === 0 ? 0 : numerator / denominator;
|
245
|
+
}
|
246
|
+
|
247
|
+
cohenKappa(yTrue, yPred) {
|
248
|
+
const cm = this.confusionMatrix(yTrue, yPred);
|
249
|
+
const matrix = cm.matrix;
|
250
|
+
const n = yTrue.length;
|
251
|
+
|
252
|
+
// Observed agreement
|
253
|
+
let po = 0;
|
254
|
+
for (let i = 0; i < matrix.length; i++) {
|
255
|
+
po += matrix[i][i];
|
256
|
+
}
|
257
|
+
po /= n;
|
258
|
+
|
259
|
+
// Expected agreement
|
260
|
+
let pe = 0;
|
261
|
+
for (let i = 0; i < matrix.length; i++) {
|
262
|
+
const rowSum = matrix[i].reduce((a, b) => a + b, 0);
|
263
|
+
const colSum = matrix.reduce((sum, row) => sum + row[i], 0);
|
264
|
+
pe += (rowSum * colSum) / (n * n);
|
265
|
+
}
|
266
|
+
|
267
|
+
return (po - pe) / (1 - pe);
|
268
|
+
}
|
269
|
+
|
270
|
+
specificity(yTrue, yPred, positiveClass = null) {
|
271
|
+
const cm = this.confusionMatrix(yTrue, yPred);
|
272
|
+
const classes = cm.classes;
|
273
|
+
const matrix = cm.matrix;
|
274
|
+
|
275
|
+
if (positiveClass === null) {
|
276
|
+
positiveClass = classes[0];
|
277
|
+
}
|
278
|
+
|
279
|
+
const posIdx = classes.indexOf(positiveClass);
|
280
|
+
if (posIdx === -1) {
|
281
|
+
throw new Error(`Positive class ${positiveClass} not found in data`);
|
282
|
+
}
|
283
|
+
|
284
|
+
const tn = matrix.reduce((sum, row, i) => {
|
285
|
+
return sum + row.reduce((s, val, j) => {
|
286
|
+
return s + (i !== posIdx && j !== posIdx ? val : 0);
|
287
|
+
}, 0);
|
288
|
+
}, 0);
|
289
|
+
|
290
|
+
const fp = matrix.reduce((sum, row, i) => {
|
291
|
+
return sum + (i !== posIdx ? row[posIdx] : 0);
|
292
|
+
}, 0);
|
293
|
+
|
294
|
+
return tn + fp > 0 ? tn / (tn + fp) : 0;
|
295
|
+
}
|
296
|
+
|
297
|
+
sensitivity(yTrue, yPred, positiveClass = null) {
|
298
|
+
// Sensitivity is the same as recall
|
299
|
+
if (positiveClass === null) {
|
300
|
+
return this.recall(yTrue, yPred, 'macro');
|
301
|
+
}
|
302
|
+
|
303
|
+
const cm = this.confusionMatrix(yTrue, yPred);
|
304
|
+
const classes = cm.classes;
|
305
|
+
const matrix = cm.matrix;
|
306
|
+
const posIdx = classes.indexOf(positiveClass);
|
307
|
+
|
308
|
+
if (posIdx === -1) {
|
309
|
+
throw new Error(`Positive class ${positiveClass} not found in data`);
|
310
|
+
}
|
311
|
+
|
312
|
+
const tp = matrix[posIdx][posIdx];
|
313
|
+
const fn = matrix[posIdx].reduce((a, b) => a + b, 0) - tp;
|
314
|
+
|
315
|
+
return tp + fn > 0 ? tp / (tp + fn) : 0;
|
316
|
+
}
|
317
|
+
|
318
|
+
balancedAccuracy(yTrue, yPred) {
|
319
|
+
const cm = this.confusionMatrix(yTrue, yPred);
|
320
|
+
const classes = cm.classes;
|
321
|
+
const matrix = cm.matrix;
|
322
|
+
|
323
|
+
let sensitivities = [];
|
324
|
+
|
325
|
+
for (let i = 0; i < classes.length; i++) {
|
326
|
+
const tp = matrix[i][i];
|
327
|
+
const fn = matrix[i].reduce((a, b) => a + b, 0) - tp;
|
328
|
+
const sensitivity = tp + fn > 0 ? tp / (tp + fn) : 0;
|
329
|
+
sensitivities.push(sensitivity);
|
330
|
+
}
|
331
|
+
|
332
|
+
return sensitivities.reduce((sum, s) => sum + s, 0) / sensitivities.length;
|
333
|
+
}
|
334
|
+
}
|
335
|
+
|
336
|
+
export default ClassificationMetrics;
|