datly 0.0.2 → 0.0.3
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/README.MD +1773 -2386
- package/dist/datly.cjs +1 -1
- package/dist/datly.mjs +1 -1
- package/dist/datly.umd.js +1 -1
- package/package.json +3 -3
- package/src/code.js +2466 -0
- package/src/index.js +236 -480
- package/src/plot.js +609 -0
- package/src/core/dataLoader.js +0 -407
- package/src/core/utils.js +0 -306
- package/src/core/validator.js +0 -205
- package/src/dataviz/index.js +0 -1566
- package/src/descriptive/centralTendency.js +0 -208
- package/src/descriptive/dispersion.js +0 -273
- package/src/descriptive/position.js +0 -268
- package/src/descriptive/shape.js +0 -336
- package/src/inferential/confidenceIntervals.js +0 -561
- package/src/inferential/hypothesisTesting.js +0 -527
- package/src/inferential/normalityTests.js +0 -587
- package/src/insights/autoAnalyser.js +0 -685
- package/src/insights/interpreter.js +0 -543
- package/src/insights/patternDetector.js +0 -897
- package/src/insights/reportGenerator.js +0 -1072
- package/src/ml/ClassificationMetrics.js +0 -336
- package/src/ml/DecisionTree.js +0 -412
- package/src/ml/KNearestNeighbors.js +0 -317
- package/src/ml/LinearRegression.js +0 -179
- package/src/ml/LogisticRegression.js +0 -396
- package/src/ml/MachineLearning.js +0 -490
- package/src/ml/NaiveBayes.js +0 -296
- package/src/ml/RandomForest.js +0 -323
- package/src/ml/SupportVectorMachine.js +0 -299
- package/src/ml/baseModel.js +0 -106
- package/src/multivariate/correlation.js +0 -653
- package/src/multivariate/regression.js +0 -660
package/src/ml/NaiveBayes.js
DELETED
@@ -1,296 +0,0 @@
|
|
1
|
-
import BaseModel from './baseModel.js';
|
2
|
-
|
3
|
-
class NaiveBayes extends BaseModel {
|
4
|
-
constructor(type = 'gaussian') {
|
5
|
-
super();
|
6
|
-
this.type = type; // 'gaussian', 'multinomial', 'bernoulli'
|
7
|
-
this.classes = null;
|
8
|
-
this.classPriors = {};
|
9
|
-
this.parameters = {};
|
10
|
-
}
|
11
|
-
|
12
|
-
fit(X, y) {
|
13
|
-
this.validateTrainingData(X, y);
|
14
|
-
|
15
|
-
const X_train = X.map(row => Array.isArray(row) ? row : [row]);
|
16
|
-
this.classes = [...new Set(y)].sort();
|
17
|
-
const nSamples = X_train.length;
|
18
|
-
const nFeatures = X_train[0].length;
|
19
|
-
|
20
|
-
// Calculate class priors
|
21
|
-
this.classes.forEach(cls => {
|
22
|
-
const count = y.filter(label => label === cls).length;
|
23
|
-
this.classPriors[cls] = count / nSamples;
|
24
|
-
});
|
25
|
-
|
26
|
-
// Calculate parameters for each class
|
27
|
-
if (this.type === 'gaussian') {
|
28
|
-
this.fitGaussian(X_train, y, nFeatures);
|
29
|
-
} else if (this.type === 'multinomial') {
|
30
|
-
this.fitMultinomial(X_train, y, nFeatures);
|
31
|
-
} else if (this.type === 'bernoulli') {
|
32
|
-
this.fitBernoulli(X_train, y, nFeatures);
|
33
|
-
}
|
34
|
-
|
35
|
-
this.trained = true;
|
36
|
-
|
37
|
-
this.trainingMetrics = {
|
38
|
-
nClasses: this.classes.length,
|
39
|
-
nFeatures: nFeatures,
|
40
|
-
nSamples: nSamples,
|
41
|
-
type: this.type
|
42
|
-
};
|
43
|
-
|
44
|
-
return this;
|
45
|
-
}
|
46
|
-
|
47
|
-
fitGaussian(X, y, nFeatures) {
|
48
|
-
this.classes.forEach(cls => {
|
49
|
-
const classData = X.filter((_, idx) => y[idx] === cls);
|
50
|
-
this.parameters[cls] = {
|
51
|
-
means: [],
|
52
|
-
variances: []
|
53
|
-
};
|
54
|
-
|
55
|
-
for (let j = 0; j < nFeatures; j++) {
|
56
|
-
const feature = classData.map(row => row[j]);
|
57
|
-
const mean = feature.reduce((sum, val) => sum + val, 0) / feature.length;
|
58
|
-
const variance = feature.reduce((sum, val) =>
|
59
|
-
sum + Math.pow(val - mean, 2), 0) / feature.length;
|
60
|
-
|
61
|
-
this.parameters[cls].means.push(mean);
|
62
|
-
this.parameters[cls].variances.push(variance + 1e-9); // Add small value to avoid division by zero
|
63
|
-
}
|
64
|
-
});
|
65
|
-
}
|
66
|
-
|
67
|
-
fitMultinomial(X, y, nFeatures) {
|
68
|
-
const alpha = 1.0; // Laplace smoothing
|
69
|
-
|
70
|
-
this.classes.forEach(cls => {
|
71
|
-
const classData = X.filter((_, idx) => y[idx] === cls);
|
72
|
-
this.parameters[cls] = {
|
73
|
-
featureProbs: []
|
74
|
-
};
|
75
|
-
|
76
|
-
for (let j = 0; j < nFeatures; j++) {
|
77
|
-
const featureSum = classData.reduce((sum, row) => sum + row[j], 0);
|
78
|
-
const totalCount = classData.reduce((sum, row) =>
|
79
|
-
sum + row.reduce((s, val) => s + val, 0), 0);
|
80
|
-
|
81
|
-
const prob = (featureSum + alpha) / (totalCount + alpha * nFeatures);
|
82
|
-
this.parameters[cls].featureProbs.push(prob);
|
83
|
-
}
|
84
|
-
});
|
85
|
-
}
|
86
|
-
|
87
|
-
fitBernoulli(X, y, nFeatures) {
|
88
|
-
const alpha = 1.0; // Laplace smoothing
|
89
|
-
|
90
|
-
this.classes.forEach(cls => {
|
91
|
-
const classData = X.filter((_, idx) => y[idx] === cls);
|
92
|
-
const nClassSamples = classData.length;
|
93
|
-
|
94
|
-
this.parameters[cls] = {
|
95
|
-
featureProbs: []
|
96
|
-
};
|
97
|
-
|
98
|
-
for (let j = 0; j < nFeatures; j++) {
|
99
|
-
const featureCount = classData.filter(row => row[j] === 1).length;
|
100
|
-
const prob = (featureCount + alpha) / (nClassSamples + 2 * alpha);
|
101
|
-
this.parameters[cls].featureProbs.push(prob);
|
102
|
-
}
|
103
|
-
});
|
104
|
-
}
|
105
|
-
|
106
|
-
gaussianProbability(x, mean, variance) {
|
107
|
-
const exponent = Math.exp(-Math.pow(x - mean, 2) / (2 * variance));
|
108
|
-
return exponent / Math.sqrt(2 * Math.PI * variance);
|
109
|
-
}
|
110
|
-
|
111
|
-
predictSingle(x) {
|
112
|
-
const posteriors = {};
|
113
|
-
|
114
|
-
this.classes.forEach(cls => {
|
115
|
-
let logProb = Math.log(this.classPriors[cls]);
|
116
|
-
|
117
|
-
if (this.type === 'gaussian') {
|
118
|
-
const params = this.parameters[cls];
|
119
|
-
for (let j = 0; j < x.length; j++) {
|
120
|
-
const prob = this.gaussianProbability(x[j], params.means[j], params.variances[j]);
|
121
|
-
logProb += Math.log(prob + 1e-9);
|
122
|
-
}
|
123
|
-
} else if (this.type === 'multinomial') {
|
124
|
-
const params = this.parameters[cls];
|
125
|
-
for (let j = 0; j < x.length; j++) {
|
126
|
-
logProb += x[j] * Math.log(params.featureProbs[j] + 1e-9);
|
127
|
-
}
|
128
|
-
} else if (this.type === 'bernoulli') {
|
129
|
-
const params = this.parameters[cls];
|
130
|
-
for (let j = 0; j < x.length; j++) {
|
131
|
-
const prob = x[j] === 1 ? params.featureProbs[j] : 1 - params.featureProbs[j];
|
132
|
-
logProb += Math.log(prob + 1e-9);
|
133
|
-
}
|
134
|
-
}
|
135
|
-
|
136
|
-
posteriors[cls] = logProb;
|
137
|
-
});
|
138
|
-
|
139
|
-
return Object.keys(posteriors).reduce((a, b) =>
|
140
|
-
posteriors[a] > posteriors[b] ? a : b
|
141
|
-
);
|
142
|
-
}
|
143
|
-
|
144
|
-
predict(X) {
|
145
|
-
this.validatePredictionData(X);
|
146
|
-
|
147
|
-
const X_test = X.map(row => Array.isArray(row) ? row : [row]);
|
148
|
-
return X_test.map(x => this.predictSingle(x));
|
149
|
-
}
|
150
|
-
|
151
|
-
predictProba(X) {
|
152
|
-
this.validatePredictionData(X);
|
153
|
-
|
154
|
-
const X_test = X.map(row => Array.isArray(row) ? row : [row]);
|
155
|
-
|
156
|
-
return X_test.map(x => {
|
157
|
-
const logPosteriors = {};
|
158
|
-
|
159
|
-
this.classes.forEach(cls => {
|
160
|
-
let logProb = Math.log(this.classPriors[cls]);
|
161
|
-
|
162
|
-
if (this.type === 'gaussian') {
|
163
|
-
const params = this.parameters[cls];
|
164
|
-
for (let j = 0; j < x.length; j++) {
|
165
|
-
const prob = this.gaussianProbability(x[j], params.means[j], params.variances[j]);
|
166
|
-
logProb += Math.log(prob + 1e-9);
|
167
|
-
}
|
168
|
-
} else if (this.type === 'multinomial') {
|
169
|
-
const params = this.parameters[cls];
|
170
|
-
for (let j = 0; j < x.length; j++) {
|
171
|
-
logProb += x[j] * Math.log(params.featureProbs[j] + 1e-9);
|
172
|
-
}
|
173
|
-
} else if (this.type === 'bernoulli') {
|
174
|
-
const params = this.parameters[cls];
|
175
|
-
for (let j = 0; j < x.length; j++) {
|
176
|
-
const prob = x[j] === 1 ? params.featureProbs[j] : 1 - params.featureProbs[j];
|
177
|
-
logProb += Math.log(prob + 1e-9);
|
178
|
-
}
|
179
|
-
}
|
180
|
-
|
181
|
-
logPosteriors[cls] = logProb;
|
182
|
-
});
|
183
|
-
|
184
|
-
// Convert log probabilities to probabilities
|
185
|
-
const maxLogProb = Math.max(...Object.values(logPosteriors));
|
186
|
-
const expProbs = {};
|
187
|
-
let sumExpProbs = 0;
|
188
|
-
|
189
|
-
this.classes.forEach(cls => {
|
190
|
-
expProbs[cls] = Math.exp(logPosteriors[cls] - maxLogProb);
|
191
|
-
sumExpProbs += expProbs[cls];
|
192
|
-
});
|
193
|
-
|
194
|
-
const probas = {};
|
195
|
-
this.classes.forEach(cls => {
|
196
|
-
probas[cls] = expProbs[cls] / sumExpProbs;
|
197
|
-
});
|
198
|
-
|
199
|
-
return probas;
|
200
|
-
});
|
201
|
-
}
|
202
|
-
|
203
|
-
score(X, y) {
|
204
|
-
const predictions = this.predict(X);
|
205
|
-
|
206
|
-
let correct = 0;
|
207
|
-
for (let i = 0; i < y.length; i++) {
|
208
|
-
if (predictions[i] === y[i]) correct++;
|
209
|
-
}
|
210
|
-
const accuracy = correct / y.length;
|
211
|
-
|
212
|
-
const cm = this.confusionMatrix(y, predictions);
|
213
|
-
const metrics = this.calculateClassMetrics(cm);
|
214
|
-
|
215
|
-
return {
|
216
|
-
accuracy: accuracy,
|
217
|
-
confusionMatrix: cm,
|
218
|
-
classMetrics: metrics,
|
219
|
-
predictions: predictions
|
220
|
-
};
|
221
|
-
}
|
222
|
-
|
223
|
-
confusionMatrix(yTrue, yPred) {
|
224
|
-
const n = this.classes.length;
|
225
|
-
const matrix = Array(n).fill(0).map(() => Array(n).fill(0));
|
226
|
-
|
227
|
-
for (let i = 0; i < yTrue.length; i++) {
|
228
|
-
const trueIdx = this.classes.indexOf(yTrue[i]);
|
229
|
-
const predIdx = this.classes.indexOf(yPred[i]);
|
230
|
-
matrix[trueIdx][predIdx]++;
|
231
|
-
}
|
232
|
-
|
233
|
-
return {
|
234
|
-
matrix: matrix,
|
235
|
-
classes: this.classes,
|
236
|
-
display: this.formatConfusionMatrix(matrix)
|
237
|
-
};
|
238
|
-
}
|
239
|
-
|
240
|
-
formatConfusionMatrix(matrix) {
|
241
|
-
const maxLen = Math.max(...matrix.flat().map(v => v.toString().length), 8);
|
242
|
-
const pad = (str) => str.toString().padStart(maxLen);
|
243
|
-
|
244
|
-
let output = '\n' + ' '.repeat(maxLen + 2) + 'Predicted\n';
|
245
|
-
output += ' '.repeat(maxLen + 2) + this.classes.map(c => pad(c)).join(' ') + '\n';
|
246
|
-
|
247
|
-
for (let i = 0; i < matrix.length; i++) {
|
248
|
-
if (i === 0) output += 'Actual ';
|
249
|
-
else output += ' ';
|
250
|
-
output += pad(this.classes[i]) + ' ';
|
251
|
-
output += matrix[i].map(v => pad(v)).join(' ') + '\n';
|
252
|
-
}
|
253
|
-
|
254
|
-
return output;
|
255
|
-
}
|
256
|
-
|
257
|
-
calculateClassMetrics(cm) {
|
258
|
-
const matrix = cm.matrix;
|
259
|
-
const metrics = {};
|
260
|
-
|
261
|
-
this.classes.forEach((cls, i) => {
|
262
|
-
const tp = matrix[i][i];
|
263
|
-
const fn = matrix[i].reduce((sum, val) => sum + val, 0) - tp;
|
264
|
-
const fp = matrix.map(row => row[i]).reduce((sum, val) => sum + val, 0) - tp;
|
265
|
-
|
266
|
-
const precision = tp + fp > 0 ? tp / (tp + fp) : 0;
|
267
|
-
const recall = tp + fn > 0 ? tp / (tp + fn) : 0;
|
268
|
-
const f1 = precision + recall > 0 ? 2 * (precision * recall) / (precision + recall) : 0;
|
269
|
-
|
270
|
-
metrics[cls] = {
|
271
|
-
precision: precision,
|
272
|
-
recall: recall,
|
273
|
-
f1Score: f1,
|
274
|
-
support: tp + fn
|
275
|
-
};
|
276
|
-
});
|
277
|
-
|
278
|
-
return metrics;
|
279
|
-
}
|
280
|
-
|
281
|
-
summary() {
|
282
|
-
if (!this.trained) {
|
283
|
-
throw new Error('Model must be trained first');
|
284
|
-
}
|
285
|
-
|
286
|
-
return {
|
287
|
-
modelType: 'Naive Bayes',
|
288
|
-
naiveBayesType: this.type,
|
289
|
-
classes: this.classes,
|
290
|
-
classPriors: this.classPriors,
|
291
|
-
trainingMetrics: this.trainingMetrics
|
292
|
-
};
|
293
|
-
}
|
294
|
-
}
|
295
|
-
|
296
|
-
export default NaiveBayes;
|
package/src/ml/RandomForest.js
DELETED
@@ -1,323 +0,0 @@
|
|
1
|
-
import BaseModel from './baseModel.js';
|
2
|
-
import DecisionTree from './DecisionTree.js';
|
3
|
-
|
4
|
-
class RandomForest extends BaseModel {
|
5
|
-
constructor(nEstimators = 100, maxDepth = 10, minSamplesSplit = 2, minSamplesLeaf = 1,
|
6
|
-
maxFeatures = 'sqrt', criterion = 'gini', bootstrap = true) {
|
7
|
-
super();
|
8
|
-
this.nEstimators = nEstimators;
|
9
|
-
this.maxDepth = maxDepth;
|
10
|
-
this.minSamplesSplit = minSamplesSplit;
|
11
|
-
this.minSamplesLeaf = minSamplesLeaf;
|
12
|
-
this.maxFeatures = maxFeatures; // 'sqrt', 'log2', number, or null (all features)
|
13
|
-
this.criterion = criterion;
|
14
|
-
this.bootstrap = bootstrap;
|
15
|
-
this.trees = [];
|
16
|
-
this.taskType = null;
|
17
|
-
this.classes = null;
|
18
|
-
this.featureIndices = [];
|
19
|
-
}
|
20
|
-
|
21
|
-
fit(X, y, taskType = 'classification') {
|
22
|
-
this.validateTrainingData(X, y);
|
23
|
-
|
24
|
-
this.taskType = taskType;
|
25
|
-
const X_train = X.map(row => Array.isArray(row) ? row : [row]);
|
26
|
-
const nFeatures = X_train[0].length;
|
27
|
-
|
28
|
-
if (taskType === 'classification') {
|
29
|
-
this.classes = [...new Set(y)].sort();
|
30
|
-
}
|
31
|
-
|
32
|
-
const maxFeaturesCount = this.getMaxFeaturesCount(nFeatures);
|
33
|
-
|
34
|
-
// Train multiple trees
|
35
|
-
for (let i = 0; i < this.nEstimators; i++) {
|
36
|
-
// Bootstrap sampling
|
37
|
-
const { X_sample, y_sample } = this.bootstrap ?
|
38
|
-
this.bootstrapSample(X_train, y) :
|
39
|
-
{ X_sample: X_train, y_sample: y };
|
40
|
-
|
41
|
-
// Random feature selection
|
42
|
-
const featureIndices = this.selectRandomFeatures(nFeatures, maxFeaturesCount);
|
43
|
-
this.featureIndices.push(featureIndices);
|
44
|
-
|
45
|
-
// Extract selected features
|
46
|
-
const X_subset = X_sample.map(row =>
|
47
|
-
featureIndices.map(idx => row[idx])
|
48
|
-
);
|
49
|
-
|
50
|
-
// Train tree
|
51
|
-
const tree = new DecisionTree(
|
52
|
-
this.maxDepth,
|
53
|
-
this.minSamplesSplit,
|
54
|
-
this.minSamplesLeaf,
|
55
|
-
this.criterion
|
56
|
-
);
|
57
|
-
tree.fit(X_subset, y_sample, taskType);
|
58
|
-
this.trees.push(tree);
|
59
|
-
}
|
60
|
-
|
61
|
-
this.trained = true;
|
62
|
-
|
63
|
-
this.trainingMetrics = {
|
64
|
-
nEstimators: this.nEstimators,
|
65
|
-
avgTreeDepth: this.trees.reduce((sum, tree) =>
|
66
|
-
sum + tree.trainingMetrics.treeDepth, 0) / this.nEstimators,
|
67
|
-
avgLeafCount: this.trees.reduce((sum, tree) =>
|
68
|
-
sum + tree.trainingMetrics.leafCount, 0) / this.nEstimators,
|
69
|
-
taskType: this.taskType
|
70
|
-
};
|
71
|
-
|
72
|
-
return this;
|
73
|
-
}
|
74
|
-
|
75
|
-
getMaxFeaturesCount(nFeatures) {
|
76
|
-
if (typeof this.maxFeatures === 'number') {
|
77
|
-
return Math.min(this.maxFeatures, nFeatures);
|
78
|
-
} else if (this.maxFeatures === 'sqrt') {
|
79
|
-
return Math.floor(Math.sqrt(nFeatures));
|
80
|
-
} else if (this.maxFeatures === 'log2') {
|
81
|
-
return Math.floor(Math.log2(nFeatures));
|
82
|
-
} else {
|
83
|
-
return nFeatures; // null = all features
|
84
|
-
}
|
85
|
-
}
|
86
|
-
|
87
|
-
selectRandomFeatures(nFeatures, count) {
|
88
|
-
const indices = Array.from({ length: nFeatures }, (_, i) => i);
|
89
|
-
const selected = [];
|
90
|
-
|
91
|
-
for (let i = 0; i < count; i++) {
|
92
|
-
const randomIdx = Math.floor(Math.random() * indices.length);
|
93
|
-
selected.push(indices[randomIdx]);
|
94
|
-
indices.splice(randomIdx, 1);
|
95
|
-
}
|
96
|
-
|
97
|
-
return selected.sort((a, b) => a - b);
|
98
|
-
}
|
99
|
-
|
100
|
-
bootstrapSample(X, y) {
|
101
|
-
const n = X.length;
|
102
|
-
const X_sample = [];
|
103
|
-
const y_sample = [];
|
104
|
-
|
105
|
-
for (let i = 0; i < n; i++) {
|
106
|
-
const randomIdx = Math.floor(Math.random() * n);
|
107
|
-
X_sample.push(X[randomIdx]);
|
108
|
-
y_sample.push(y[randomIdx]);
|
109
|
-
}
|
110
|
-
|
111
|
-
return { X_sample, y_sample };
|
112
|
-
}
|
113
|
-
|
114
|
-
predict(X) {
|
115
|
-
this.validatePredictionData(X);
|
116
|
-
|
117
|
-
const X_test = X.map(row => Array.isArray(row) ? row : [row]);
|
118
|
-
|
119
|
-
if (this.taskType === 'classification') {
|
120
|
-
return X_test.map(x => {
|
121
|
-
const votes = {};
|
122
|
-
|
123
|
-
this.trees.forEach((tree, idx) => {
|
124
|
-
const X_subset = this.featureIndices[idx].map(i => x[i]);
|
125
|
-
const prediction = tree.predict([X_subset])[0];
|
126
|
-
votes[prediction] = (votes[prediction] || 0) + 1;
|
127
|
-
});
|
128
|
-
|
129
|
-
return Object.keys(votes).reduce((a, b) =>
|
130
|
-
votes[a] > votes[b] ? a : b
|
131
|
-
);
|
132
|
-
});
|
133
|
-
} else {
|
134
|
-
return X_test.map(x => {
|
135
|
-
const predictions = this.trees.map((tree, idx) => {
|
136
|
-
const X_subset = this.featureIndices[idx].map(i => x[i]);
|
137
|
-
return tree.predict([X_subset])[0];
|
138
|
-
});
|
139
|
-
|
140
|
-
return predictions.reduce((sum, pred) => sum + pred, 0) / predictions.length;
|
141
|
-
});
|
142
|
-
}
|
143
|
-
}
|
144
|
-
|
145
|
-
predictProba(X) {
|
146
|
-
if (this.taskType !== 'classification') {
|
147
|
-
throw new Error('predictProba is only available for classification tasks');
|
148
|
-
}
|
149
|
-
|
150
|
-
this.validatePredictionData(X);
|
151
|
-
|
152
|
-
const X_test = X.map(row => Array.isArray(row) ? row : [row]);
|
153
|
-
|
154
|
-
return X_test.map(x => {
|
155
|
-
const classCounts = {};
|
156
|
-
|
157
|
-
this.classes.forEach(cls => {
|
158
|
-
classCounts[cls] = 0;
|
159
|
-
});
|
160
|
-
|
161
|
-
this.trees.forEach((tree, idx) => {
|
162
|
-
const X_subset = this.featureIndices[idx].map(i => x[i]);
|
163
|
-
const proba = tree.predictProba([X_subset])[0];
|
164
|
-
|
165
|
-
Object.keys(proba).forEach(cls => {
|
166
|
-
classCounts[cls] += proba[cls];
|
167
|
-
});
|
168
|
-
});
|
169
|
-
|
170
|
-
const probas = {};
|
171
|
-
Object.keys(classCounts).forEach(cls => {
|
172
|
-
probas[cls] = classCounts[cls] / this.nEstimators;
|
173
|
-
});
|
174
|
-
|
175
|
-
return probas;
|
176
|
-
});
|
177
|
-
}
|
178
|
-
|
179
|
-
score(X, y) {
|
180
|
-
const predictions = this.predict(X);
|
181
|
-
|
182
|
-
if (this.taskType === 'classification') {
|
183
|
-
let correct = 0;
|
184
|
-
for (let i = 0; i < y.length; i++) {
|
185
|
-
if (predictions[i] === y[i]) correct++;
|
186
|
-
}
|
187
|
-
const accuracy = correct / y.length;
|
188
|
-
|
189
|
-
const cm = this.confusionMatrix(y, predictions);
|
190
|
-
const metrics = this.calculateClassMetrics(cm);
|
191
|
-
|
192
|
-
return {
|
193
|
-
accuracy: accuracy,
|
194
|
-
confusionMatrix: cm,
|
195
|
-
classMetrics: metrics,
|
196
|
-
predictions: predictions
|
197
|
-
};
|
198
|
-
} else {
|
199
|
-
const yMean = y.reduce((sum, val) => sum + val, 0) / y.length;
|
200
|
-
|
201
|
-
const ssRes = predictions.reduce((sum, pred, i) =>
|
202
|
-
sum + Math.pow(y[i] - pred, 2), 0);
|
203
|
-
const ssTot = y.reduce((sum, val) =>
|
204
|
-
sum + Math.pow(val - yMean, 2), 0);
|
205
|
-
|
206
|
-
const r2 = 1 - (ssRes / ssTot);
|
207
|
-
const mse = ssRes / y.length;
|
208
|
-
const rmse = Math.sqrt(mse);
|
209
|
-
const mae = predictions.reduce((sum, pred, i) =>
|
210
|
-
sum + Math.abs(y[i] - pred), 0) / y.length;
|
211
|
-
|
212
|
-
return {
|
213
|
-
r2Score: r2,
|
214
|
-
mse: mse,
|
215
|
-
rmse: rmse,
|
216
|
-
mae: mae,
|
217
|
-
predictions: predictions,
|
218
|
-
residuals: predictions.map((pred, i) => y[i] - pred)
|
219
|
-
};
|
220
|
-
}
|
221
|
-
}
|
222
|
-
|
223
|
-
confusionMatrix(yTrue, yPred) {
|
224
|
-
const n = this.classes.length;
|
225
|
-
const matrix = Array(n).fill(0).map(() => Array(n).fill(0));
|
226
|
-
|
227
|
-
for (let i = 0; i < yTrue.length; i++) {
|
228
|
-
const trueIdx = this.classes.indexOf(yTrue[i]);
|
229
|
-
const predIdx = this.classes.indexOf(yPred[i]);
|
230
|
-
matrix[trueIdx][predIdx]++;
|
231
|
-
}
|
232
|
-
|
233
|
-
return {
|
234
|
-
matrix: matrix,
|
235
|
-
classes: this.classes,
|
236
|
-
display: this.formatConfusionMatrix(matrix)
|
237
|
-
};
|
238
|
-
}
|
239
|
-
|
240
|
-
formatConfusionMatrix(matrix) {
|
241
|
-
const maxLen = Math.max(...matrix.flat().map(v => v.toString().length), 8);
|
242
|
-
const pad = (str) => str.toString().padStart(maxLen);
|
243
|
-
|
244
|
-
let output = '\n' + ' '.repeat(maxLen + 2) + 'Predicted\n';
|
245
|
-
output += ' '.repeat(maxLen + 2) + this.classes.map(c => pad(c)).join(' ') + '\n';
|
246
|
-
|
247
|
-
for (let i = 0; i < matrix.length; i++) {
|
248
|
-
if (i === 0) output += 'Actual ';
|
249
|
-
else output += ' ';
|
250
|
-
output += pad(this.classes[i]) + ' ';
|
251
|
-
output += matrix[i].map(v => pad(v)).join(' ') + '\n';
|
252
|
-
}
|
253
|
-
|
254
|
-
return output;
|
255
|
-
}
|
256
|
-
|
257
|
-
calculateClassMetrics(cm) {
|
258
|
-
const matrix = cm.matrix;
|
259
|
-
const metrics = {};
|
260
|
-
|
261
|
-
this.classes.forEach((cls, i) => {
|
262
|
-
const tp = matrix[i][i];
|
263
|
-
const fn = matrix[i].reduce((sum, val) => sum + val, 0) - tp;
|
264
|
-
const fp = matrix.map(row => row[i]).reduce((sum, val) => sum + val, 0) - tp;
|
265
|
-
|
266
|
-
const precision = tp + fp > 0 ? tp / (tp + fp) : 0;
|
267
|
-
const recall = tp + fn > 0 ? tp / (tp + fn) : 0;
|
268
|
-
const f1 = precision + recall > 0 ? 2 * (precision * recall) / (precision + recall) : 0;
|
269
|
-
|
270
|
-
metrics[cls] = {
|
271
|
-
precision: precision,
|
272
|
-
recall: recall,
|
273
|
-
f1Score: f1,
|
274
|
-
support: tp + fn
|
275
|
-
};
|
276
|
-
});
|
277
|
-
|
278
|
-
return metrics;
|
279
|
-
}
|
280
|
-
|
281
|
-
getFeatureImportance() {
|
282
|
-
const nFeatures = this.featureIndices[0].length;
|
283
|
-
const importance = Array(nFeatures).fill(0);
|
284
|
-
|
285
|
-
this.trees.forEach((tree, idx) => {
|
286
|
-
const treeImportance = tree.getFeatureImportance();
|
287
|
-
const featureMap = this.featureIndices[idx];
|
288
|
-
|
289
|
-
Object.keys(treeImportance).forEach(key => {
|
290
|
-
const localIdx = parseInt(key.split('_')[1]);
|
291
|
-
const globalIdx = featureMap[localIdx];
|
292
|
-
importance[globalIdx] += treeImportance[key];
|
293
|
-
});
|
294
|
-
});
|
295
|
-
|
296
|
-
const total = importance.reduce((sum, val) => sum + val, 0);
|
297
|
-
return importance.map(val => val / total);
|
298
|
-
}
|
299
|
-
|
300
|
-
summary() {
|
301
|
-
if (!this.trained) {
|
302
|
-
throw new Error('Model must be trained first');
|
303
|
-
}
|
304
|
-
|
305
|
-
return {
|
306
|
-
modelType: 'Random Forest',
|
307
|
-
taskType: this.taskType,
|
308
|
-
trainingMetrics: this.trainingMetrics,
|
309
|
-
featureImportance: this.getFeatureImportance(),
|
310
|
-
hyperparameters: {
|
311
|
-
nEstimators: this.nEstimators,
|
312
|
-
maxDepth: this.maxDepth,
|
313
|
-
minSamplesSplit: this.minSamplesSplit,
|
314
|
-
minSamplesLeaf: this.minSamplesLeaf,
|
315
|
-
maxFeatures: this.maxFeatures,
|
316
|
-
criterion: this.criterion,
|
317
|
-
bootstrap: this.bootstrap
|
318
|
-
}
|
319
|
-
};
|
320
|
-
}
|
321
|
-
}
|
322
|
-
|
323
|
-
export default RandomForest;
|