datly 0.0.2 → 0.0.3
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/README.MD +1773 -2386
- package/dist/datly.cjs +1 -1
- package/dist/datly.mjs +1 -1
- package/dist/datly.umd.js +1 -1
- package/package.json +3 -3
- package/src/code.js +2466 -0
- package/src/index.js +236 -480
- package/src/plot.js +609 -0
- package/src/core/dataLoader.js +0 -407
- package/src/core/utils.js +0 -306
- package/src/core/validator.js +0 -205
- package/src/dataviz/index.js +0 -1566
- package/src/descriptive/centralTendency.js +0 -208
- package/src/descriptive/dispersion.js +0 -273
- package/src/descriptive/position.js +0 -268
- package/src/descriptive/shape.js +0 -336
- package/src/inferential/confidenceIntervals.js +0 -561
- package/src/inferential/hypothesisTesting.js +0 -527
- package/src/inferential/normalityTests.js +0 -587
- package/src/insights/autoAnalyser.js +0 -685
- package/src/insights/interpreter.js +0 -543
- package/src/insights/patternDetector.js +0 -897
- package/src/insights/reportGenerator.js +0 -1072
- package/src/ml/ClassificationMetrics.js +0 -336
- package/src/ml/DecisionTree.js +0 -412
- package/src/ml/KNearestNeighbors.js +0 -317
- package/src/ml/LinearRegression.js +0 -179
- package/src/ml/LogisticRegression.js +0 -396
- package/src/ml/MachineLearning.js +0 -490
- package/src/ml/NaiveBayes.js +0 -296
- package/src/ml/RandomForest.js +0 -323
- package/src/ml/SupportVectorMachine.js +0 -299
- package/src/ml/baseModel.js +0 -106
- package/src/multivariate/correlation.js +0 -653
- package/src/multivariate/regression.js +0 -660
@@ -1,299 +0,0 @@
|
|
1
|
-
import BaseModel from './baseModel.js';
|
2
|
-
|
3
|
-
class SupportVectorMachine extends BaseModel {
|
4
|
-
constructor(C = 1.0, kernel = 'linear', gamma = 'scale', degree = 3, learningRate = 0.001, iterations = 1000) {
|
5
|
-
super();
|
6
|
-
this.C = C; // Regularization parameter
|
7
|
-
this.kernel = kernel; // 'linear', 'rbf', 'poly'
|
8
|
-
this.gamma = gamma; // Kernel coefficient ('scale', 'auto', or number)
|
9
|
-
this.degree = degree; // Degree for polynomial kernel
|
10
|
-
this.learningRate = learningRate;
|
11
|
-
this.iterations = iterations;
|
12
|
-
this.weights = null;
|
13
|
-
this.bias = null;
|
14
|
-
this.supportVectors = null;
|
15
|
-
this.classes = null;
|
16
|
-
this.normParams = null;
|
17
|
-
}
|
18
|
-
|
19
|
-
fit(X, y, normalize = true) {
|
20
|
-
this.validateTrainingData(X, y);
|
21
|
-
|
22
|
-
let X_train = X.map(row => Array.isArray(row) ? row : [row]);
|
23
|
-
this.classes = [...new Set(y)].sort();
|
24
|
-
|
25
|
-
if (this.classes.length !== 2) {
|
26
|
-
throw new Error('SVM currently only supports binary classification');
|
27
|
-
}
|
28
|
-
|
29
|
-
// Convert labels to -1 and 1
|
30
|
-
const yBinary = y.map(label => label === this.classes[1] ? 1 : -1);
|
31
|
-
|
32
|
-
if (normalize) {
|
33
|
-
const { normalized, means, stds } = this.normalizeFeatures(X_train);
|
34
|
-
X_train = normalized;
|
35
|
-
this.normParams = { means, stds };
|
36
|
-
}
|
37
|
-
|
38
|
-
const nSamples = X_train.length;
|
39
|
-
const nFeatures = X_train[0].length;
|
40
|
-
|
41
|
-
// Calculate gamma if set to 'scale' or 'auto'
|
42
|
-
if (this.gamma === 'scale') {
|
43
|
-
const variance = this.calculateVariance(X_train);
|
44
|
-
this.gamma = 1 / (nFeatures * variance);
|
45
|
-
} else if (this.gamma === 'auto') {
|
46
|
-
this.gamma = 1 / nFeatures;
|
47
|
-
}
|
48
|
-
|
49
|
-
// Initialize weights and bias
|
50
|
-
this.weights = Array(nFeatures).fill(0);
|
51
|
-
this.bias = 0;
|
52
|
-
|
53
|
-
const losses = [];
|
54
|
-
|
55
|
-
// Simplified SMO-like algorithm (gradient descent)
|
56
|
-
for (let iter = 0; iter < this.iterations; iter++) {
|
57
|
-
let loss = 0;
|
58
|
-
|
59
|
-
for (let i = 0; i < nSamples; i++) {
|
60
|
-
const xi = X_train[i];
|
61
|
-
const yi = yBinary[i];
|
62
|
-
|
63
|
-
const prediction = this.decisionFunction([xi])[0];
|
64
|
-
const margin = yi * prediction;
|
65
|
-
|
66
|
-
if (margin < 1) {
|
67
|
-
// Update weights for misclassified or margin violations
|
68
|
-
for (let j = 0; j < nFeatures; j++) {
|
69
|
-
this.weights[j] += this.learningRate * (yi * xi[j] - 2 * (1 / this.C) * this.weights[j]);
|
70
|
-
}
|
71
|
-
this.bias += this.learningRate * yi;
|
72
|
-
loss += 1 - margin;
|
73
|
-
} else {
|
74
|
-
// Update weights for correct classifications
|
75
|
-
for (let j = 0; j < nFeatures; j++) {
|
76
|
-
this.weights[j] += this.learningRate * (-2 * (1 / this.C) * this.weights[j]);
|
77
|
-
}
|
78
|
-
}
|
79
|
-
}
|
80
|
-
|
81
|
-
// Add regularization term to loss
|
82
|
-
const regTerm = (1 / this.C) * this.weights.reduce((sum, w) => sum + w * w, 0);
|
83
|
-
losses.push(loss / nSamples + regTerm);
|
84
|
-
}
|
85
|
-
|
86
|
-
// Identify support vectors (samples near the margin)
|
87
|
-
this.identifySupportVectors(X_train, yBinary);
|
88
|
-
|
89
|
-
this.trained = true;
|
90
|
-
|
91
|
-
this.trainingMetrics = {
|
92
|
-
finalLoss: losses[losses.length - 1],
|
93
|
-
losses: losses,
|
94
|
-
nSupportVectors: this.supportVectors.length,
|
95
|
-
supportVectorRatio: this.supportVectors.length / nSamples
|
96
|
-
};
|
97
|
-
|
98
|
-
return this;
|
99
|
-
}
|
100
|
-
|
101
|
-
calculateVariance(X) {
|
102
|
-
const n = X.length;
|
103
|
-
const m = X[0].length;
|
104
|
-
let totalVariance = 0;
|
105
|
-
|
106
|
-
for (let j = 0; j < m; j++) {
|
107
|
-
const column = X.map(row => row[j]);
|
108
|
-
const mean = column.reduce((sum, val) => sum + val, 0) / n;
|
109
|
-
const variance = column.reduce((sum, val) => sum + Math.pow(val - mean, 2), 0) / n;
|
110
|
-
totalVariance += variance;
|
111
|
-
}
|
112
|
-
|
113
|
-
return totalVariance / m;
|
114
|
-
}
|
115
|
-
|
116
|
-
identifySupportVectors(X, y) {
|
117
|
-
this.supportVectors = [];
|
118
|
-
|
119
|
-
for (let i = 0; i < X.length; i++) {
|
120
|
-
const prediction = this.decisionFunction([X[i]])[0];
|
121
|
-
const margin = Math.abs(prediction);
|
122
|
-
|
123
|
-
// Support vectors are points close to the decision boundary
|
124
|
-
if (margin < 1.5) {
|
125
|
-
this.supportVectors.push({
|
126
|
-
index: i,
|
127
|
-
vector: X[i],
|
128
|
-
label: y[i],
|
129
|
-
margin: margin
|
130
|
-
});
|
131
|
-
}
|
132
|
-
}
|
133
|
-
}
|
134
|
-
|
135
|
-
kernelFunction(x1, x2) {
|
136
|
-
if (this.kernel === 'linear') {
|
137
|
-
return x1.reduce((sum, val, i) => sum + val * x2[i], 0);
|
138
|
-
} else if (this.kernel === 'rbf') {
|
139
|
-
const squaredDistance = x1.reduce((sum, val, i) =>
|
140
|
-
sum + Math.pow(val - x2[i], 2), 0);
|
141
|
-
return Math.exp(-this.gamma * squaredDistance);
|
142
|
-
} else if (this.kernel === 'poly') {
|
143
|
-
const dotProduct = x1.reduce((sum, val, i) => sum + val * x2[i], 0);
|
144
|
-
return Math.pow(dotProduct + 1, this.degree);
|
145
|
-
}
|
146
|
-
|
147
|
-
return 0;
|
148
|
-
}
|
149
|
-
|
150
|
-
decisionFunction(X) {
|
151
|
-
return X.map(x => {
|
152
|
-
let score = this.bias;
|
153
|
-
for (let j = 0; j < this.weights.length; j++) {
|
154
|
-
score += this.weights[j] * x[j];
|
155
|
-
}
|
156
|
-
return score;
|
157
|
-
});
|
158
|
-
}
|
159
|
-
|
160
|
-
predict(X) {
|
161
|
-
this.validatePredictionData(X);
|
162
|
-
|
163
|
-
let X_test = X.map(row => Array.isArray(row) ? row : [row]);
|
164
|
-
|
165
|
-
if (this.normParams) {
|
166
|
-
const { means, stds } = this.normParams;
|
167
|
-
X_test = X_test.map(row =>
|
168
|
-
row.map((val, j) => (val - means[j]) / stds[j])
|
169
|
-
);
|
170
|
-
}
|
171
|
-
|
172
|
-
const decisions = this.decisionFunction(X_test);
|
173
|
-
return decisions.map(score => score >= 0 ? this.classes[1] : this.classes[0]);
|
174
|
-
}
|
175
|
-
|
176
|
-
predictProba(X) {
|
177
|
-
this.validatePredictionData(X);
|
178
|
-
|
179
|
-
let X_test = X.map(row => Array.isArray(row) ? row : [row]);
|
180
|
-
|
181
|
-
if (this.normParams) {
|
182
|
-
const { means, stds } = this.normParams;
|
183
|
-
X_test = X_test.map(row =>
|
184
|
-
row.map((val, j) => (val - means[j]) / stds[j])
|
185
|
-
);
|
186
|
-
}
|
187
|
-
|
188
|
-
const decisions = this.decisionFunction(X_test);
|
189
|
-
|
190
|
-
// Use sigmoid function to convert decision scores to probabilities
|
191
|
-
return decisions.map(score => {
|
192
|
-
const prob1 = 1 / (1 + Math.exp(-score));
|
193
|
-
return {
|
194
|
-
[this.classes[0]]: 1 - prob1,
|
195
|
-
[this.classes[1]]: prob1
|
196
|
-
};
|
197
|
-
});
|
198
|
-
}
|
199
|
-
|
200
|
-
score(X, y) {
|
201
|
-
const predictions = this.predict(X);
|
202
|
-
|
203
|
-
let correct = 0;
|
204
|
-
for (let i = 0; i < y.length; i++) {
|
205
|
-
if (predictions[i] === y[i]) correct++;
|
206
|
-
}
|
207
|
-
const accuracy = correct / y.length;
|
208
|
-
|
209
|
-
const cm = this.confusionMatrix(y, predictions);
|
210
|
-
const metrics = this.calculateClassMetrics(cm);
|
211
|
-
|
212
|
-
return {
|
213
|
-
accuracy: accuracy,
|
214
|
-
confusionMatrix: cm,
|
215
|
-
classMetrics: metrics,
|
216
|
-
predictions: predictions
|
217
|
-
};
|
218
|
-
}
|
219
|
-
|
220
|
-
confusionMatrix(yTrue, yPred) {
|
221
|
-
const n = this.classes.length;
|
222
|
-
const matrix = Array(n).fill(0).map(() => Array(n).fill(0));
|
223
|
-
|
224
|
-
for (let i = 0; i < yTrue.length; i++) {
|
225
|
-
const trueIdx = this.classes.indexOf(yTrue[i]);
|
226
|
-
const predIdx = this.classes.indexOf(yPred[i]);
|
227
|
-
matrix[trueIdx][predIdx]++;
|
228
|
-
}
|
229
|
-
|
230
|
-
return {
|
231
|
-
matrix: matrix,
|
232
|
-
classes: this.classes,
|
233
|
-
display: this.formatConfusionMatrix(matrix)
|
234
|
-
};
|
235
|
-
}
|
236
|
-
|
237
|
-
formatConfusionMatrix(matrix) {
|
238
|
-
const maxLen = Math.max(...matrix.flat().map(v => v.toString().length), 8);
|
239
|
-
const pad = (str) => str.toString().padStart(maxLen);
|
240
|
-
|
241
|
-
let output = '\n' + ' '.repeat(maxLen + 2) + 'Predicted\n';
|
242
|
-
output += ' '.repeat(maxLen + 2) + this.classes.map(c => pad(c)).join(' ') + '\n';
|
243
|
-
|
244
|
-
for (let i = 0; i < matrix.length; i++) {
|
245
|
-
if (i === 0) output += 'Actual ';
|
246
|
-
else output += ' ';
|
247
|
-
output += pad(this.classes[i]) + ' ';
|
248
|
-
output += matrix[i].map(v => pad(v)).join(' ') + '\n';
|
249
|
-
}
|
250
|
-
|
251
|
-
return output;
|
252
|
-
}
|
253
|
-
|
254
|
-
calculateClassMetrics(cm) {
|
255
|
-
const matrix = cm.matrix;
|
256
|
-
const metrics = {};
|
257
|
-
|
258
|
-
this.classes.forEach((cls, i) => {
|
259
|
-
const tp = matrix[i][i];
|
260
|
-
const fn = matrix[i].reduce((sum, val) => sum + val, 0) - tp;
|
261
|
-
const fp = matrix.map(row => row[i]).reduce((sum, val) => sum + val, 0) - tp;
|
262
|
-
|
263
|
-
const precision = tp + fp > 0 ? tp / (tp + fp) : 0;
|
264
|
-
const recall = tp + fn > 0 ? tp / (tp + fn) : 0;
|
265
|
-
const f1 = precision + recall > 0 ? 2 * (precision * recall) / (precision + recall) : 0;
|
266
|
-
|
267
|
-
metrics[cls] = {
|
268
|
-
precision: precision,
|
269
|
-
recall: recall,
|
270
|
-
f1Score: f1,
|
271
|
-
support: tp + fn
|
272
|
-
};
|
273
|
-
});
|
274
|
-
|
275
|
-
return metrics;
|
276
|
-
}
|
277
|
-
|
278
|
-
summary() {
|
279
|
-
if (!this.trained) {
|
280
|
-
throw new Error('Model must be trained first');
|
281
|
-
}
|
282
|
-
|
283
|
-
return {
|
284
|
-
modelType: 'Support Vector Machine',
|
285
|
-
classes: this.classes,
|
286
|
-
trainingMetrics: this.trainingMetrics,
|
287
|
-
hyperparameters: {
|
288
|
-
C: this.C,
|
289
|
-
kernel: this.kernel,
|
290
|
-
gamma: this.gamma,
|
291
|
-
degree: this.degree,
|
292
|
-
learningRate: this.learningRate,
|
293
|
-
iterations: this.iterations
|
294
|
-
}
|
295
|
-
};
|
296
|
-
}
|
297
|
-
}
|
298
|
-
|
299
|
-
export default SupportVectorMachine;
|
package/src/ml/baseModel.js
DELETED
@@ -1,106 +0,0 @@
|
|
1
|
-
class BaseModel {
|
2
|
-
constructor() {
|
3
|
-
this.trained = false;
|
4
|
-
this.model = null;
|
5
|
-
this.features = null;
|
6
|
-
this.target = null;
|
7
|
-
this.trainingMetrics = {};
|
8
|
-
}
|
9
|
-
|
10
|
-
validateTrainingData(X, y) {
|
11
|
-
if (!Array.isArray(X) || X.length === 0) {
|
12
|
-
throw new Error('X must be a non-empty array');
|
13
|
-
}
|
14
|
-
if (!Array.isArray(y) || y.length === 0) {
|
15
|
-
throw new Error('y must be a non-empty array');
|
16
|
-
}
|
17
|
-
if (X.length !== y.length) {
|
18
|
-
throw new Error('X and y must have the same length');
|
19
|
-
}
|
20
|
-
}
|
21
|
-
|
22
|
-
validatePredictionData(X) {
|
23
|
-
if (!this.trained) {
|
24
|
-
throw new Error('Model must be trained before making predictions');
|
25
|
-
}
|
26
|
-
if (!Array.isArray(X) || X.length === 0) {
|
27
|
-
throw new Error('X must be a non-empty array');
|
28
|
-
}
|
29
|
-
}
|
30
|
-
|
31
|
-
normalizeFeatures(X) {
|
32
|
-
const n = X.length;
|
33
|
-
const m = X[0].length;
|
34
|
-
const normalized = [];
|
35
|
-
const means = [];
|
36
|
-
const stds = [];
|
37
|
-
|
38
|
-
for (let j = 0; j < m; j++) {
|
39
|
-
const column = X.map(row => row[j]);
|
40
|
-
const mean = column.reduce((sum, val) => sum + val, 0) / n;
|
41
|
-
const variance = column.reduce((sum, val) => sum + Math.pow(val - mean, 2), 0) / n;
|
42
|
-
const std = Math.sqrt(variance);
|
43
|
-
|
44
|
-
means.push(mean);
|
45
|
-
stds.push(std === 0 ? 1 : std);
|
46
|
-
}
|
47
|
-
|
48
|
-
for (let i = 0; i < n; i++) {
|
49
|
-
const row = [];
|
50
|
-
for (let j = 0; j < m; j++) {
|
51
|
-
row.push((X[i][j] - means[j]) / stds[j]);
|
52
|
-
}
|
53
|
-
normalized.push(row);
|
54
|
-
}
|
55
|
-
|
56
|
-
return { normalized, means, stds };
|
57
|
-
}
|
58
|
-
|
59
|
-
splitTrainTest(X, y, testSize = 0.2, shuffle = true) {
|
60
|
-
const n = X.length;
|
61
|
-
const indices = Array.from({ length: n }, (_, i) => i);
|
62
|
-
|
63
|
-
if (shuffle) {
|
64
|
-
for (let i = n - 1; i > 0; i--) {
|
65
|
-
const j = Math.floor(Math.random() * (i + 1));
|
66
|
-
[indices[i], indices[j]] = [indices[j], indices[i]];
|
67
|
-
}
|
68
|
-
}
|
69
|
-
|
70
|
-
const testCount = Math.floor(n * testSize);
|
71
|
-
const trainCount = n - testCount;
|
72
|
-
|
73
|
-
const trainIndices = indices.slice(0, trainCount);
|
74
|
-
const testIndices = indices.slice(trainCount);
|
75
|
-
|
76
|
-
return {
|
77
|
-
X_train: trainIndices.map(i => X[i]),
|
78
|
-
X_test: testIndices.map(i => X[i]),
|
79
|
-
y_train: trainIndices.map(i => y[i]),
|
80
|
-
y_test: testIndices.map(i => y[i])
|
81
|
-
};
|
82
|
-
}
|
83
|
-
|
84
|
-
save() {
|
85
|
-
if (!this.trained) {
|
86
|
-
throw new Error('Cannot save untrained model');
|
87
|
-
}
|
88
|
-
return {
|
89
|
-
model: this.model,
|
90
|
-
features: this.features,
|
91
|
-
target: this.target,
|
92
|
-
trainingMetrics: this.trainingMetrics,
|
93
|
-
timestamp: new Date().toISOString()
|
94
|
-
};
|
95
|
-
}
|
96
|
-
|
97
|
-
load(modelData) {
|
98
|
-
this.model = modelData.model;
|
99
|
-
this.features = modelData.features;
|
100
|
-
this.target = modelData.target;
|
101
|
-
this.trainingMetrics = modelData.trainingMetrics;
|
102
|
-
this.trained = true;
|
103
|
-
}
|
104
|
-
}
|
105
|
-
|
106
|
-
export default BaseModel;
|