datly 0.0.2 → 0.0.3

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -1,296 +0,0 @@
1
- import BaseModel from './baseModel.js';
2
-
3
- class NaiveBayes extends BaseModel {
4
- constructor(type = 'gaussian') {
5
- super();
6
- this.type = type; // 'gaussian', 'multinomial', 'bernoulli'
7
- this.classes = null;
8
- this.classPriors = {};
9
- this.parameters = {};
10
- }
11
-
12
- fit(X, y) {
13
- this.validateTrainingData(X, y);
14
-
15
- const X_train = X.map(row => Array.isArray(row) ? row : [row]);
16
- this.classes = [...new Set(y)].sort();
17
- const nSamples = X_train.length;
18
- const nFeatures = X_train[0].length;
19
-
20
- // Calculate class priors
21
- this.classes.forEach(cls => {
22
- const count = y.filter(label => label === cls).length;
23
- this.classPriors[cls] = count / nSamples;
24
- });
25
-
26
- // Calculate parameters for each class
27
- if (this.type === 'gaussian') {
28
- this.fitGaussian(X_train, y, nFeatures);
29
- } else if (this.type === 'multinomial') {
30
- this.fitMultinomial(X_train, y, nFeatures);
31
- } else if (this.type === 'bernoulli') {
32
- this.fitBernoulli(X_train, y, nFeatures);
33
- }
34
-
35
- this.trained = true;
36
-
37
- this.trainingMetrics = {
38
- nClasses: this.classes.length,
39
- nFeatures: nFeatures,
40
- nSamples: nSamples,
41
- type: this.type
42
- };
43
-
44
- return this;
45
- }
46
-
47
- fitGaussian(X, y, nFeatures) {
48
- this.classes.forEach(cls => {
49
- const classData = X.filter((_, idx) => y[idx] === cls);
50
- this.parameters[cls] = {
51
- means: [],
52
- variances: []
53
- };
54
-
55
- for (let j = 0; j < nFeatures; j++) {
56
- const feature = classData.map(row => row[j]);
57
- const mean = feature.reduce((sum, val) => sum + val, 0) / feature.length;
58
- const variance = feature.reduce((sum, val) =>
59
- sum + Math.pow(val - mean, 2), 0) / feature.length;
60
-
61
- this.parameters[cls].means.push(mean);
62
- this.parameters[cls].variances.push(variance + 1e-9); // Add small value to avoid division by zero
63
- }
64
- });
65
- }
66
-
67
- fitMultinomial(X, y, nFeatures) {
68
- const alpha = 1.0; // Laplace smoothing
69
-
70
- this.classes.forEach(cls => {
71
- const classData = X.filter((_, idx) => y[idx] === cls);
72
- this.parameters[cls] = {
73
- featureProbs: []
74
- };
75
-
76
- for (let j = 0; j < nFeatures; j++) {
77
- const featureSum = classData.reduce((sum, row) => sum + row[j], 0);
78
- const totalCount = classData.reduce((sum, row) =>
79
- sum + row.reduce((s, val) => s + val, 0), 0);
80
-
81
- const prob = (featureSum + alpha) / (totalCount + alpha * nFeatures);
82
- this.parameters[cls].featureProbs.push(prob);
83
- }
84
- });
85
- }
86
-
87
- fitBernoulli(X, y, nFeatures) {
88
- const alpha = 1.0; // Laplace smoothing
89
-
90
- this.classes.forEach(cls => {
91
- const classData = X.filter((_, idx) => y[idx] === cls);
92
- const nClassSamples = classData.length;
93
-
94
- this.parameters[cls] = {
95
- featureProbs: []
96
- };
97
-
98
- for (let j = 0; j < nFeatures; j++) {
99
- const featureCount = classData.filter(row => row[j] === 1).length;
100
- const prob = (featureCount + alpha) / (nClassSamples + 2 * alpha);
101
- this.parameters[cls].featureProbs.push(prob);
102
- }
103
- });
104
- }
105
-
106
- gaussianProbability(x, mean, variance) {
107
- const exponent = Math.exp(-Math.pow(x - mean, 2) / (2 * variance));
108
- return exponent / Math.sqrt(2 * Math.PI * variance);
109
- }
110
-
111
- predictSingle(x) {
112
- const posteriors = {};
113
-
114
- this.classes.forEach(cls => {
115
- let logProb = Math.log(this.classPriors[cls]);
116
-
117
- if (this.type === 'gaussian') {
118
- const params = this.parameters[cls];
119
- for (let j = 0; j < x.length; j++) {
120
- const prob = this.gaussianProbability(x[j], params.means[j], params.variances[j]);
121
- logProb += Math.log(prob + 1e-9);
122
- }
123
- } else if (this.type === 'multinomial') {
124
- const params = this.parameters[cls];
125
- for (let j = 0; j < x.length; j++) {
126
- logProb += x[j] * Math.log(params.featureProbs[j] + 1e-9);
127
- }
128
- } else if (this.type === 'bernoulli') {
129
- const params = this.parameters[cls];
130
- for (let j = 0; j < x.length; j++) {
131
- const prob = x[j] === 1 ? params.featureProbs[j] : 1 - params.featureProbs[j];
132
- logProb += Math.log(prob + 1e-9);
133
- }
134
- }
135
-
136
- posteriors[cls] = logProb;
137
- });
138
-
139
- return Object.keys(posteriors).reduce((a, b) =>
140
- posteriors[a] > posteriors[b] ? a : b
141
- );
142
- }
143
-
144
- predict(X) {
145
- this.validatePredictionData(X);
146
-
147
- const X_test = X.map(row => Array.isArray(row) ? row : [row]);
148
- return X_test.map(x => this.predictSingle(x));
149
- }
150
-
151
- predictProba(X) {
152
- this.validatePredictionData(X);
153
-
154
- const X_test = X.map(row => Array.isArray(row) ? row : [row]);
155
-
156
- return X_test.map(x => {
157
- const logPosteriors = {};
158
-
159
- this.classes.forEach(cls => {
160
- let logProb = Math.log(this.classPriors[cls]);
161
-
162
- if (this.type === 'gaussian') {
163
- const params = this.parameters[cls];
164
- for (let j = 0; j < x.length; j++) {
165
- const prob = this.gaussianProbability(x[j], params.means[j], params.variances[j]);
166
- logProb += Math.log(prob + 1e-9);
167
- }
168
- } else if (this.type === 'multinomial') {
169
- const params = this.parameters[cls];
170
- for (let j = 0; j < x.length; j++) {
171
- logProb += x[j] * Math.log(params.featureProbs[j] + 1e-9);
172
- }
173
- } else if (this.type === 'bernoulli') {
174
- const params = this.parameters[cls];
175
- for (let j = 0; j < x.length; j++) {
176
- const prob = x[j] === 1 ? params.featureProbs[j] : 1 - params.featureProbs[j];
177
- logProb += Math.log(prob + 1e-9);
178
- }
179
- }
180
-
181
- logPosteriors[cls] = logProb;
182
- });
183
-
184
- // Convert log probabilities to probabilities
185
- const maxLogProb = Math.max(...Object.values(logPosteriors));
186
- const expProbs = {};
187
- let sumExpProbs = 0;
188
-
189
- this.classes.forEach(cls => {
190
- expProbs[cls] = Math.exp(logPosteriors[cls] - maxLogProb);
191
- sumExpProbs += expProbs[cls];
192
- });
193
-
194
- const probas = {};
195
- this.classes.forEach(cls => {
196
- probas[cls] = expProbs[cls] / sumExpProbs;
197
- });
198
-
199
- return probas;
200
- });
201
- }
202
-
203
- score(X, y) {
204
- const predictions = this.predict(X);
205
-
206
- let correct = 0;
207
- for (let i = 0; i < y.length; i++) {
208
- if (predictions[i] === y[i]) correct++;
209
- }
210
- const accuracy = correct / y.length;
211
-
212
- const cm = this.confusionMatrix(y, predictions);
213
- const metrics = this.calculateClassMetrics(cm);
214
-
215
- return {
216
- accuracy: accuracy,
217
- confusionMatrix: cm,
218
- classMetrics: metrics,
219
- predictions: predictions
220
- };
221
- }
222
-
223
- confusionMatrix(yTrue, yPred) {
224
- const n = this.classes.length;
225
- const matrix = Array(n).fill(0).map(() => Array(n).fill(0));
226
-
227
- for (let i = 0; i < yTrue.length; i++) {
228
- const trueIdx = this.classes.indexOf(yTrue[i]);
229
- const predIdx = this.classes.indexOf(yPred[i]);
230
- matrix[trueIdx][predIdx]++;
231
- }
232
-
233
- return {
234
- matrix: matrix,
235
- classes: this.classes,
236
- display: this.formatConfusionMatrix(matrix)
237
- };
238
- }
239
-
240
- formatConfusionMatrix(matrix) {
241
- const maxLen = Math.max(...matrix.flat().map(v => v.toString().length), 8);
242
- const pad = (str) => str.toString().padStart(maxLen);
243
-
244
- let output = '\n' + ' '.repeat(maxLen + 2) + 'Predicted\n';
245
- output += ' '.repeat(maxLen + 2) + this.classes.map(c => pad(c)).join(' ') + '\n';
246
-
247
- for (let i = 0; i < matrix.length; i++) {
248
- if (i === 0) output += 'Actual ';
249
- else output += ' ';
250
- output += pad(this.classes[i]) + ' ';
251
- output += matrix[i].map(v => pad(v)).join(' ') + '\n';
252
- }
253
-
254
- return output;
255
- }
256
-
257
- calculateClassMetrics(cm) {
258
- const matrix = cm.matrix;
259
- const metrics = {};
260
-
261
- this.classes.forEach((cls, i) => {
262
- const tp = matrix[i][i];
263
- const fn = matrix[i].reduce((sum, val) => sum + val, 0) - tp;
264
- const fp = matrix.map(row => row[i]).reduce((sum, val) => sum + val, 0) - tp;
265
-
266
- const precision = tp + fp > 0 ? tp / (tp + fp) : 0;
267
- const recall = tp + fn > 0 ? tp / (tp + fn) : 0;
268
- const f1 = precision + recall > 0 ? 2 * (precision * recall) / (precision + recall) : 0;
269
-
270
- metrics[cls] = {
271
- precision: precision,
272
- recall: recall,
273
- f1Score: f1,
274
- support: tp + fn
275
- };
276
- });
277
-
278
- return metrics;
279
- }
280
-
281
- summary() {
282
- if (!this.trained) {
283
- throw new Error('Model must be trained first');
284
- }
285
-
286
- return {
287
- modelType: 'Naive Bayes',
288
- naiveBayesType: this.type,
289
- classes: this.classes,
290
- classPriors: this.classPriors,
291
- trainingMetrics: this.trainingMetrics
292
- };
293
- }
294
- }
295
-
296
- export default NaiveBayes;
@@ -1,323 +0,0 @@
1
- import BaseModel from './baseModel.js';
2
- import DecisionTree from './DecisionTree.js';
3
-
4
- class RandomForest extends BaseModel {
5
- constructor(nEstimators = 100, maxDepth = 10, minSamplesSplit = 2, minSamplesLeaf = 1,
6
- maxFeatures = 'sqrt', criterion = 'gini', bootstrap = true) {
7
- super();
8
- this.nEstimators = nEstimators;
9
- this.maxDepth = maxDepth;
10
- this.minSamplesSplit = minSamplesSplit;
11
- this.minSamplesLeaf = minSamplesLeaf;
12
- this.maxFeatures = maxFeatures; // 'sqrt', 'log2', number, or null (all features)
13
- this.criterion = criterion;
14
- this.bootstrap = bootstrap;
15
- this.trees = [];
16
- this.taskType = null;
17
- this.classes = null;
18
- this.featureIndices = [];
19
- }
20
-
21
- fit(X, y, taskType = 'classification') {
22
- this.validateTrainingData(X, y);
23
-
24
- this.taskType = taskType;
25
- const X_train = X.map(row => Array.isArray(row) ? row : [row]);
26
- const nFeatures = X_train[0].length;
27
-
28
- if (taskType === 'classification') {
29
- this.classes = [...new Set(y)].sort();
30
- }
31
-
32
- const maxFeaturesCount = this.getMaxFeaturesCount(nFeatures);
33
-
34
- // Train multiple trees
35
- for (let i = 0; i < this.nEstimators; i++) {
36
- // Bootstrap sampling
37
- const { X_sample, y_sample } = this.bootstrap ?
38
- this.bootstrapSample(X_train, y) :
39
- { X_sample: X_train, y_sample: y };
40
-
41
- // Random feature selection
42
- const featureIndices = this.selectRandomFeatures(nFeatures, maxFeaturesCount);
43
- this.featureIndices.push(featureIndices);
44
-
45
- // Extract selected features
46
- const X_subset = X_sample.map(row =>
47
- featureIndices.map(idx => row[idx])
48
- );
49
-
50
- // Train tree
51
- const tree = new DecisionTree(
52
- this.maxDepth,
53
- this.minSamplesSplit,
54
- this.minSamplesLeaf,
55
- this.criterion
56
- );
57
- tree.fit(X_subset, y_sample, taskType);
58
- this.trees.push(tree);
59
- }
60
-
61
- this.trained = true;
62
-
63
- this.trainingMetrics = {
64
- nEstimators: this.nEstimators,
65
- avgTreeDepth: this.trees.reduce((sum, tree) =>
66
- sum + tree.trainingMetrics.treeDepth, 0) / this.nEstimators,
67
- avgLeafCount: this.trees.reduce((sum, tree) =>
68
- sum + tree.trainingMetrics.leafCount, 0) / this.nEstimators,
69
- taskType: this.taskType
70
- };
71
-
72
- return this;
73
- }
74
-
75
- getMaxFeaturesCount(nFeatures) {
76
- if (typeof this.maxFeatures === 'number') {
77
- return Math.min(this.maxFeatures, nFeatures);
78
- } else if (this.maxFeatures === 'sqrt') {
79
- return Math.floor(Math.sqrt(nFeatures));
80
- } else if (this.maxFeatures === 'log2') {
81
- return Math.floor(Math.log2(nFeatures));
82
- } else {
83
- return nFeatures; // null = all features
84
- }
85
- }
86
-
87
- selectRandomFeatures(nFeatures, count) {
88
- const indices = Array.from({ length: nFeatures }, (_, i) => i);
89
- const selected = [];
90
-
91
- for (let i = 0; i < count; i++) {
92
- const randomIdx = Math.floor(Math.random() * indices.length);
93
- selected.push(indices[randomIdx]);
94
- indices.splice(randomIdx, 1);
95
- }
96
-
97
- return selected.sort((a, b) => a - b);
98
- }
99
-
100
- bootstrapSample(X, y) {
101
- const n = X.length;
102
- const X_sample = [];
103
- const y_sample = [];
104
-
105
- for (let i = 0; i < n; i++) {
106
- const randomIdx = Math.floor(Math.random() * n);
107
- X_sample.push(X[randomIdx]);
108
- y_sample.push(y[randomIdx]);
109
- }
110
-
111
- return { X_sample, y_sample };
112
- }
113
-
114
- predict(X) {
115
- this.validatePredictionData(X);
116
-
117
- const X_test = X.map(row => Array.isArray(row) ? row : [row]);
118
-
119
- if (this.taskType === 'classification') {
120
- return X_test.map(x => {
121
- const votes = {};
122
-
123
- this.trees.forEach((tree, idx) => {
124
- const X_subset = this.featureIndices[idx].map(i => x[i]);
125
- const prediction = tree.predict([X_subset])[0];
126
- votes[prediction] = (votes[prediction] || 0) + 1;
127
- });
128
-
129
- return Object.keys(votes).reduce((a, b) =>
130
- votes[a] > votes[b] ? a : b
131
- );
132
- });
133
- } else {
134
- return X_test.map(x => {
135
- const predictions = this.trees.map((tree, idx) => {
136
- const X_subset = this.featureIndices[idx].map(i => x[i]);
137
- return tree.predict([X_subset])[0];
138
- });
139
-
140
- return predictions.reduce((sum, pred) => sum + pred, 0) / predictions.length;
141
- });
142
- }
143
- }
144
-
145
- predictProba(X) {
146
- if (this.taskType !== 'classification') {
147
- throw new Error('predictProba is only available for classification tasks');
148
- }
149
-
150
- this.validatePredictionData(X);
151
-
152
- const X_test = X.map(row => Array.isArray(row) ? row : [row]);
153
-
154
- return X_test.map(x => {
155
- const classCounts = {};
156
-
157
- this.classes.forEach(cls => {
158
- classCounts[cls] = 0;
159
- });
160
-
161
- this.trees.forEach((tree, idx) => {
162
- const X_subset = this.featureIndices[idx].map(i => x[i]);
163
- const proba = tree.predictProba([X_subset])[0];
164
-
165
- Object.keys(proba).forEach(cls => {
166
- classCounts[cls] += proba[cls];
167
- });
168
- });
169
-
170
- const probas = {};
171
- Object.keys(classCounts).forEach(cls => {
172
- probas[cls] = classCounts[cls] / this.nEstimators;
173
- });
174
-
175
- return probas;
176
- });
177
- }
178
-
179
- score(X, y) {
180
- const predictions = this.predict(X);
181
-
182
- if (this.taskType === 'classification') {
183
- let correct = 0;
184
- for (let i = 0; i < y.length; i++) {
185
- if (predictions[i] === y[i]) correct++;
186
- }
187
- const accuracy = correct / y.length;
188
-
189
- const cm = this.confusionMatrix(y, predictions);
190
- const metrics = this.calculateClassMetrics(cm);
191
-
192
- return {
193
- accuracy: accuracy,
194
- confusionMatrix: cm,
195
- classMetrics: metrics,
196
- predictions: predictions
197
- };
198
- } else {
199
- const yMean = y.reduce((sum, val) => sum + val, 0) / y.length;
200
-
201
- const ssRes = predictions.reduce((sum, pred, i) =>
202
- sum + Math.pow(y[i] - pred, 2), 0);
203
- const ssTot = y.reduce((sum, val) =>
204
- sum + Math.pow(val - yMean, 2), 0);
205
-
206
- const r2 = 1 - (ssRes / ssTot);
207
- const mse = ssRes / y.length;
208
- const rmse = Math.sqrt(mse);
209
- const mae = predictions.reduce((sum, pred, i) =>
210
- sum + Math.abs(y[i] - pred), 0) / y.length;
211
-
212
- return {
213
- r2Score: r2,
214
- mse: mse,
215
- rmse: rmse,
216
- mae: mae,
217
- predictions: predictions,
218
- residuals: predictions.map((pred, i) => y[i] - pred)
219
- };
220
- }
221
- }
222
-
223
- confusionMatrix(yTrue, yPred) {
224
- const n = this.classes.length;
225
- const matrix = Array(n).fill(0).map(() => Array(n).fill(0));
226
-
227
- for (let i = 0; i < yTrue.length; i++) {
228
- const trueIdx = this.classes.indexOf(yTrue[i]);
229
- const predIdx = this.classes.indexOf(yPred[i]);
230
- matrix[trueIdx][predIdx]++;
231
- }
232
-
233
- return {
234
- matrix: matrix,
235
- classes: this.classes,
236
- display: this.formatConfusionMatrix(matrix)
237
- };
238
- }
239
-
240
- formatConfusionMatrix(matrix) {
241
- const maxLen = Math.max(...matrix.flat().map(v => v.toString().length), 8);
242
- const pad = (str) => str.toString().padStart(maxLen);
243
-
244
- let output = '\n' + ' '.repeat(maxLen + 2) + 'Predicted\n';
245
- output += ' '.repeat(maxLen + 2) + this.classes.map(c => pad(c)).join(' ') + '\n';
246
-
247
- for (let i = 0; i < matrix.length; i++) {
248
- if (i === 0) output += 'Actual ';
249
- else output += ' ';
250
- output += pad(this.classes[i]) + ' ';
251
- output += matrix[i].map(v => pad(v)).join(' ') + '\n';
252
- }
253
-
254
- return output;
255
- }
256
-
257
- calculateClassMetrics(cm) {
258
- const matrix = cm.matrix;
259
- const metrics = {};
260
-
261
- this.classes.forEach((cls, i) => {
262
- const tp = matrix[i][i];
263
- const fn = matrix[i].reduce((sum, val) => sum + val, 0) - tp;
264
- const fp = matrix.map(row => row[i]).reduce((sum, val) => sum + val, 0) - tp;
265
-
266
- const precision = tp + fp > 0 ? tp / (tp + fp) : 0;
267
- const recall = tp + fn > 0 ? tp / (tp + fn) : 0;
268
- const f1 = precision + recall > 0 ? 2 * (precision * recall) / (precision + recall) : 0;
269
-
270
- metrics[cls] = {
271
- precision: precision,
272
- recall: recall,
273
- f1Score: f1,
274
- support: tp + fn
275
- };
276
- });
277
-
278
- return metrics;
279
- }
280
-
281
- getFeatureImportance() {
282
- const nFeatures = this.featureIndices[0].length;
283
- const importance = Array(nFeatures).fill(0);
284
-
285
- this.trees.forEach((tree, idx) => {
286
- const treeImportance = tree.getFeatureImportance();
287
- const featureMap = this.featureIndices[idx];
288
-
289
- Object.keys(treeImportance).forEach(key => {
290
- const localIdx = parseInt(key.split('_')[1]);
291
- const globalIdx = featureMap[localIdx];
292
- importance[globalIdx] += treeImportance[key];
293
- });
294
- });
295
-
296
- const total = importance.reduce((sum, val) => sum + val, 0);
297
- return importance.map(val => val / total);
298
- }
299
-
300
- summary() {
301
- if (!this.trained) {
302
- throw new Error('Model must be trained first');
303
- }
304
-
305
- return {
306
- modelType: 'Random Forest',
307
- taskType: this.taskType,
308
- trainingMetrics: this.trainingMetrics,
309
- featureImportance: this.getFeatureImportance(),
310
- hyperparameters: {
311
- nEstimators: this.nEstimators,
312
- maxDepth: this.maxDepth,
313
- minSamplesSplit: this.minSamplesSplit,
314
- minSamplesLeaf: this.minSamplesLeaf,
315
- maxFeatures: this.maxFeatures,
316
- criterion: this.criterion,
317
- bootstrap: this.bootstrap
318
- }
319
- };
320
- }
321
- }
322
-
323
- export default RandomForest;