datly 0.0.1 → 0.0.2
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/dist/datly.cjs +1 -0
- package/dist/datly.mjs +1 -0
- package/dist/datly.umd.js +1 -1
- package/dist/datly.umd.js.map +1 -0
- package/package.json +24 -11
- package/src/core/dataLoader.js +407 -0
- package/src/core/utils.js +306 -0
- package/src/core/validator.js +205 -0
- package/src/dataviz/index.js +1566 -0
- package/src/descriptive/centralTendency.js +208 -0
- package/src/descriptive/dispersion.js +273 -0
- package/src/descriptive/position.js +268 -0
- package/src/descriptive/shape.js +336 -0
- package/src/index.js +480 -0
- package/src/inferential/confidenceIntervals.js +561 -0
- package/src/inferential/hypothesisTesting.js +527 -0
- package/src/inferential/normalityTests.js +587 -0
- package/src/insights/autoAnalyser.js +685 -0
- package/src/insights/interpreter.js +543 -0
- package/src/insights/patternDetector.js +897 -0
- package/src/insights/reportGenerator.js +1072 -0
- package/src/ml/ClassificationMetrics.js +336 -0
- package/src/ml/DecisionTree.js +412 -0
- package/src/ml/KNearestNeighbors.js +317 -0
- package/src/ml/LinearRegression.js +179 -0
- package/src/ml/LogisticRegression.js +396 -0
- package/src/ml/MachineLearning.js +490 -0
- package/src/ml/NaiveBayes.js +296 -0
- package/src/ml/RandomForest.js +323 -0
- package/src/ml/SupportVectorMachine.js +299 -0
- package/src/ml/baseModel.js +106 -0
- package/src/multivariate/correlation.js +653 -0
- package/src/multivariate/regression.js +660 -0
@@ -0,0 +1,660 @@
|
|
1
|
+
class Regression {
|
2
|
+
linear(x, y) {
|
3
|
+
if (!Array.isArray(x) || !Array.isArray(y)) {
|
4
|
+
throw new Error('Both inputs must be arrays');
|
5
|
+
}
|
6
|
+
|
7
|
+
if (x.length !== y.length) {
|
8
|
+
throw new Error('Arrays must have the same length');
|
9
|
+
}
|
10
|
+
|
11
|
+
const validPairs = [];
|
12
|
+
for (let i = 0; i < x.length; i++) {
|
13
|
+
if (typeof x[i] === 'number' && typeof y[i] === 'number' &&
|
14
|
+
!isNaN(x[i]) && !isNaN(y[i]) && isFinite(x[i]) && isFinite(y[i])) {
|
15
|
+
validPairs.push({ x: x[i], y: y[i] });
|
16
|
+
}
|
17
|
+
}
|
18
|
+
|
19
|
+
if (validPairs.length < 3) {
|
20
|
+
throw new Error('Need at least 3 valid paired observations');
|
21
|
+
}
|
22
|
+
|
23
|
+
const n = validPairs.length;
|
24
|
+
const xValues = validPairs.map(pair => pair.x);
|
25
|
+
const yValues = validPairs.map(pair => pair.y);
|
26
|
+
|
27
|
+
const meanX = xValues.reduce((sum, val) => sum + val, 0) / n;
|
28
|
+
const meanY = yValues.reduce((sum, val) => sum + val, 0) / n;
|
29
|
+
|
30
|
+
let numerator = 0;
|
31
|
+
let denominator = 0;
|
32
|
+
|
33
|
+
for (let i = 0; i < n; i++) {
|
34
|
+
const xDiff = xValues[i] - meanX;
|
35
|
+
const yDiff = yValues[i] - meanY;
|
36
|
+
numerator += xDiff * yDiff;
|
37
|
+
denominator += xDiff * xDiff;
|
38
|
+
}
|
39
|
+
|
40
|
+
if (denominator === 0) {
|
41
|
+
throw new Error('Cannot perform regression: X values have zero variance');
|
42
|
+
}
|
43
|
+
|
44
|
+
const slope = numerator / denominator;
|
45
|
+
const intercept = meanY - slope * meanX;
|
46
|
+
|
47
|
+
const predicted = xValues.map(x => intercept + slope * x);
|
48
|
+
const residuals = yValues.map((y, i) => y - predicted[i]);
|
49
|
+
|
50
|
+
const ssResidual = residuals.reduce((sum, r) => sum + r * r, 0);
|
51
|
+
const ssTotal = yValues.reduce((sum, y) => sum + Math.pow(y - meanY, 2), 0);
|
52
|
+
const ssRegression = ssTotal - ssResidual;
|
53
|
+
|
54
|
+
const rSquared = ssTotal === 0 ? 1 : ssRegression / ssTotal;
|
55
|
+
const adjustedRSquared = 1 - ((ssResidual / (n - 2)) / (ssTotal / (n - 1)));
|
56
|
+
|
57
|
+
const mse = ssResidual / (n - 2);
|
58
|
+
const rmse = Math.sqrt(mse);
|
59
|
+
const standardErrorSlope = Math.sqrt(mse / denominator);
|
60
|
+
const standardErrorIntercept = Math.sqrt(mse * (1/n + (meanX * meanX) / denominator));
|
61
|
+
|
62
|
+
const tStatSlope = slope / standardErrorSlope;
|
63
|
+
const tStatIntercept = intercept / standardErrorIntercept;
|
64
|
+
const df = n - 2;
|
65
|
+
|
66
|
+
const pValueSlope = 2 * (1 - this.tCDF(Math.abs(tStatSlope), df));
|
67
|
+
const pValueIntercept = 2 * (1 - this.tCDF(Math.abs(tStatIntercept), df));
|
68
|
+
|
69
|
+
const fStatistic = (ssRegression / 1) / (ssResidual / df);
|
70
|
+
const pValueModel = 1 - this.fCDF(fStatistic, 1, df);
|
71
|
+
|
72
|
+
return {
|
73
|
+
slope: slope,
|
74
|
+
intercept: intercept,
|
75
|
+
rSquared: rSquared,
|
76
|
+
adjustedRSquared: adjustedRSquared,
|
77
|
+
correlation: Math.sqrt(rSquared) * Math.sign(slope),
|
78
|
+
standardErrorSlope: standardErrorSlope,
|
79
|
+
standardErrorIntercept: standardErrorIntercept,
|
80
|
+
tStatSlope: tStatSlope,
|
81
|
+
tStatIntercept: tStatIntercept,
|
82
|
+
pValueSlope: pValueSlope,
|
83
|
+
pValueIntercept: pValueIntercept,
|
84
|
+
fStatistic: fStatistic,
|
85
|
+
pValueModel: pValueModel,
|
86
|
+
degreesOfFreedom: df,
|
87
|
+
mse: mse,
|
88
|
+
rmse: rmse,
|
89
|
+
residuals: residuals,
|
90
|
+
predicted: predicted,
|
91
|
+
sampleSize: n,
|
92
|
+
equation: `y = ${intercept.toFixed(4)} + ${slope.toFixed(4)}x`,
|
93
|
+
residualAnalysis: this.analyzeResiduals(residuals, predicted)
|
94
|
+
};
|
95
|
+
}
|
96
|
+
|
97
|
+
multiple(dataset, dependentVariable, independentVariables) {
|
98
|
+
if (!dataset || !dataset.data || !Array.isArray(dataset.data)) {
|
99
|
+
throw new Error('Invalid dataset format');
|
100
|
+
}
|
101
|
+
|
102
|
+
if (!dataset.headers.includes(dependentVariable)) {
|
103
|
+
throw new Error(`Dependent variable '${dependentVariable}' not found in dataset`);
|
104
|
+
}
|
105
|
+
|
106
|
+
const missingVars = independentVariables.filter(var_ => !dataset.headers.includes(var_));
|
107
|
+
if (missingVars.length > 0) {
|
108
|
+
throw new Error(`Independent variables not found: ${missingVars.join(', ')}`);
|
109
|
+
}
|
110
|
+
|
111
|
+
const validRows = dataset.data.filter(row => {
|
112
|
+
return [dependentVariable, ...independentVariables].every(variable => {
|
113
|
+
const value = row[variable];
|
114
|
+
return typeof value === 'number' && !isNaN(value) && isFinite(value);
|
115
|
+
});
|
116
|
+
});
|
117
|
+
|
118
|
+
if (validRows.length < independentVariables.length + 2) {
|
119
|
+
throw new Error(`Need at least ${independentVariables.length + 2} valid observations`);
|
120
|
+
}
|
121
|
+
|
122
|
+
const n = validRows.length;
|
123
|
+
const k = independentVariables.length;
|
124
|
+
|
125
|
+
const y = validRows.map(row => row[dependentVariable]);
|
126
|
+
const X = validRows.map(row => [1, ...independentVariables.map(var_ => row[var_])]);
|
127
|
+
|
128
|
+
const XTranspose = this.transpose(X);
|
129
|
+
const XTX = this.matrixMultiply(XTranspose, X);
|
130
|
+
const XTXInverse = this.matrixInverse(XTX);
|
131
|
+
const XTY = this.matrixVectorMultiply(XTranspose, y);
|
132
|
+
const coefficients = this.matrixVectorMultiply(XTXInverse, XTY);
|
133
|
+
|
134
|
+
const predicted = X.map(row =>
|
135
|
+
coefficients.reduce((sum, coef, i) => sum + coef * row[i], 0)
|
136
|
+
);
|
137
|
+
|
138
|
+
const residuals = y.map((actual, i) => actual - predicted[i]);
|
139
|
+
const meanY = y.reduce((sum, val) => sum + val, 0) / n;
|
140
|
+
|
141
|
+
const ssResidual = residuals.reduce((sum, r) => sum + r * r, 0);
|
142
|
+
const ssTotal = y.reduce((sum, val) => sum + Math.pow(val - meanY, 2), 0);
|
143
|
+
const ssRegression = ssTotal - ssResidual;
|
144
|
+
|
145
|
+
const rSquared = ssTotal === 0 ? 1 : ssRegression / ssTotal;
|
146
|
+
const adjustedRSquared = 1 - ((ssResidual / (n - k - 1)) / (ssTotal / (n - 1)));
|
147
|
+
|
148
|
+
const mse = ssResidual / (n - k - 1);
|
149
|
+
const rmse = Math.sqrt(mse);
|
150
|
+
|
151
|
+
const standardErrors = coefficients.map((_, i) => Math.sqrt(mse * XTXInverse[i][i]));
|
152
|
+
const tStats = coefficients.map((coef, i) => coef / standardErrors[i]);
|
153
|
+
const pValues = tStats.map(t => 2 * (1 - this.tCDF(Math.abs(t), n - k - 1)));
|
154
|
+
|
155
|
+
const fStatistic = (ssRegression / k) / (ssResidual / (n - k - 1));
|
156
|
+
const pValueModel = 1 - this.fCDF(fStatistic, k, n - k - 1);
|
157
|
+
|
158
|
+
const coefficientData = coefficients.map((coef, i) => ({
|
159
|
+
variable: i === 0 ? 'Intercept' : independentVariables[i - 1],
|
160
|
+
coefficient: coef,
|
161
|
+
standardError: standardErrors[i],
|
162
|
+
tStatistic: tStats[i],
|
163
|
+
pValue: pValues[i],
|
164
|
+
significant: pValues[i] < 0.05
|
165
|
+
}));
|
166
|
+
|
167
|
+
return {
|
168
|
+
coefficients: coefficientData,
|
169
|
+
intercept: coefficients[0],
|
170
|
+
rSquared: rSquared,
|
171
|
+
adjustedRSquared: adjustedRSquared,
|
172
|
+
fStatistic: fStatistic,
|
173
|
+
pValueModel: pValueModel,
|
174
|
+
mse: mse,
|
175
|
+
rmse: rmse,
|
176
|
+
residuals: residuals,
|
177
|
+
predicted: predicted,
|
178
|
+
sampleSize: n,
|
179
|
+
degreesOfFreedom: n - k - 1,
|
180
|
+
dependentVariable: dependentVariable,
|
181
|
+
independentVariables: independentVariables,
|
182
|
+
equation: this.buildEquation(coefficientData),
|
183
|
+
residualAnalysis: this.analyzeResiduals(residuals, predicted)
|
184
|
+
};
|
185
|
+
}
|
186
|
+
|
187
|
+
polynomial(x, y, degree = 2) {
|
188
|
+
if (!Array.isArray(x) || !Array.isArray(y)) {
|
189
|
+
throw new Error('Both inputs must be arrays');
|
190
|
+
}
|
191
|
+
|
192
|
+
if (x.length !== y.length) {
|
193
|
+
throw new Error('Arrays must have the same length');
|
194
|
+
}
|
195
|
+
|
196
|
+
if (degree < 1 || degree > 10) {
|
197
|
+
throw new Error('Degree must be between 1 and 10');
|
198
|
+
}
|
199
|
+
|
200
|
+
const validPairs = [];
|
201
|
+
for (let i = 0; i < x.length; i++) {
|
202
|
+
if (typeof x[i] === 'number' && typeof y[i] === 'number' &&
|
203
|
+
!isNaN(x[i]) && !isNaN(y[i]) && isFinite(x[i]) && isFinite(y[i])) {
|
204
|
+
validPairs.push({ x: x[i], y: y[i] });
|
205
|
+
}
|
206
|
+
}
|
207
|
+
|
208
|
+
const n = validPairs.length;
|
209
|
+
if (n < degree + 2) {
|
210
|
+
throw new Error(`Need at least ${degree + 2} valid observations for degree ${degree} polynomial`);
|
211
|
+
}
|
212
|
+
|
213
|
+
const xValues = validPairs.map(pair => pair.x);
|
214
|
+
const yValues = validPairs.map(pair => pair.y);
|
215
|
+
|
216
|
+
const X = xValues.map(x => {
|
217
|
+
const row = [1];
|
218
|
+
for (let i = 1; i <= degree; i++) {
|
219
|
+
row.push(Math.pow(x, i));
|
220
|
+
}
|
221
|
+
return row;
|
222
|
+
});
|
223
|
+
|
224
|
+
const XTranspose = this.transpose(X);
|
225
|
+
const XTX = this.matrixMultiply(XTranspose, X);
|
226
|
+
const XTXInverse = this.matrixInverse(XTX);
|
227
|
+
const XTY = this.matrixVectorMultiply(XTranspose, yValues);
|
228
|
+
const coefficients = this.matrixVectorMultiply(XTXInverse, XTY);
|
229
|
+
|
230
|
+
const predicted = X.map(row =>
|
231
|
+
coefficients.reduce((sum, coef, i) => sum + coef * row[i], 0)
|
232
|
+
);
|
233
|
+
|
234
|
+
const residuals = yValues.map((actual, i) => actual - predicted[i]);
|
235
|
+
const meanY = yValues.reduce((sum, val) => sum + val, 0) / n;
|
236
|
+
|
237
|
+
const ssResidual = residuals.reduce((sum, r) => sum + r * r, 0);
|
238
|
+
const ssTotal = yValues.reduce((sum, val) => sum + Math.pow(val - meanY, 2), 0);
|
239
|
+
const ssRegression = ssTotal - ssResidual;
|
240
|
+
|
241
|
+
const rSquared = ssTotal === 0 ? 1 : ssRegression / ssTotal;
|
242
|
+
const adjustedRSquared = 1 - ((ssResidual / (n - degree - 1)) / (ssTotal / (n - 1)));
|
243
|
+
|
244
|
+
const mse = ssResidual / (n - degree - 1);
|
245
|
+
const rmse = Math.sqrt(mse);
|
246
|
+
|
247
|
+
const standardErrors = coefficients.map((_, i) => Math.sqrt(mse * XTXInverse[i][i]));
|
248
|
+
const tStats = coefficients.map((coef, i) => coef / standardErrors[i]);
|
249
|
+
const pValues = tStats.map(t => 2 * (1 - this.tCDF(Math.abs(t), n - degree - 1)));
|
250
|
+
|
251
|
+
return {
|
252
|
+
coefficients: coefficients,
|
253
|
+
degree: degree,
|
254
|
+
rSquared: rSquared,
|
255
|
+
adjustedRSquared: adjustedRSquared,
|
256
|
+
mse: mse,
|
257
|
+
rmse: rmse,
|
258
|
+
residuals: residuals,
|
259
|
+
predicted: predicted,
|
260
|
+
sampleSize: n,
|
261
|
+
equation: this.buildPolynomialEquation(coefficients),
|
262
|
+
residualAnalysis: this.analyzeResiduals(residuals, predicted),
|
263
|
+
standardErrors: standardErrors,
|
264
|
+
tStatistics: tStats,
|
265
|
+
pValues: pValues
|
266
|
+
};
|
267
|
+
}
|
268
|
+
|
269
|
+
logistic(x, y, maxIterations = 100, tolerance = 1e-6) {
|
270
|
+
if (!Array.isArray(x) || !Array.isArray(y)) {
|
271
|
+
throw new Error('Both inputs must be arrays');
|
272
|
+
}
|
273
|
+
|
274
|
+
const validPairs = [];
|
275
|
+
for (let i = 0; i < x.length; i++) {
|
276
|
+
if (typeof x[i] === 'number' && typeof y[i] === 'number' &&
|
277
|
+
!isNaN(x[i]) && !isNaN(y[i]) && isFinite(x[i]) && isFinite(y[i]) &&
|
278
|
+
(y[i] === 0 || y[i] === 1)) {
|
279
|
+
validPairs.push({ x: x[i], y: y[i] });
|
280
|
+
}
|
281
|
+
}
|
282
|
+
|
283
|
+
if (validPairs.length < 10) {
|
284
|
+
throw new Error('Need at least 10 valid observations for logistic regression');
|
285
|
+
}
|
286
|
+
|
287
|
+
const xValues = validPairs.map(pair => pair.x);
|
288
|
+
const yValues = validPairs.map(pair => pair.y);
|
289
|
+
const n = validPairs.length;
|
290
|
+
|
291
|
+
let beta0 = 0;
|
292
|
+
let beta1 = 0;
|
293
|
+
|
294
|
+
for (let iter = 0; iter < maxIterations; iter++) {
|
295
|
+
const probabilities = xValues.map(x => this.sigmoid(beta0 + beta1 * x));
|
296
|
+
const weights = probabilities.map(p => p * (1 - p));
|
297
|
+
|
298
|
+
let score0 = 0, score1 = 0;
|
299
|
+
let info00 = 0, info01 = 0, info11 = 0;
|
300
|
+
|
301
|
+
for (let i = 0; i < n; i++) {
|
302
|
+
const residual = yValues[i] - probabilities[i];
|
303
|
+
score0 += residual;
|
304
|
+
score1 += residual * xValues[i];
|
305
|
+
|
306
|
+
info00 += weights[i];
|
307
|
+
info01 += weights[i] * xValues[i];
|
308
|
+
info11 += weights[i] * xValues[i] * xValues[i];
|
309
|
+
}
|
310
|
+
|
311
|
+
const determinant = info00 * info11 - info01 * info01;
|
312
|
+
if (Math.abs(determinant) < 1e-10) {
|
313
|
+
throw new Error('Information matrix is singular');
|
314
|
+
}
|
315
|
+
|
316
|
+
const delta0 = (info11 * score0 - info01 * score1) / determinant;
|
317
|
+
const delta1 = (info00 * score1 - info01 * score0) / determinant;
|
318
|
+
|
319
|
+
beta0 += delta0;
|
320
|
+
beta1 += delta1;
|
321
|
+
|
322
|
+
if (Math.abs(delta0) < tolerance && Math.abs(delta1) < tolerance) {
|
323
|
+
break;
|
324
|
+
}
|
325
|
+
}
|
326
|
+
|
327
|
+
const finalProbabilities = xValues.map(x => this.sigmoid(beta0 + beta1 * x));
|
328
|
+
const predicted = finalProbabilities.map(p => p >= 0.5 ? 1 : 0);
|
329
|
+
|
330
|
+
const logLikelihood = yValues.reduce((sum, y, i) => {
|
331
|
+
const p = finalProbabilities[i];
|
332
|
+
return sum + y * Math.log(p + 1e-15) + (1 - y) * Math.log(1 - p + 1e-15);
|
333
|
+
}, 0);
|
334
|
+
|
335
|
+
const nullLogLikelihood = this.calculateNullLogLikelihood(yValues);
|
336
|
+
const mcFaddenR2 = 1 - (logLikelihood / nullLogLikelihood);
|
337
|
+
|
338
|
+
const accuracy = predicted.reduce((sum, pred, i) => sum + (pred === yValues[i] ? 1 : 0), 0) / n;
|
339
|
+
|
340
|
+
return {
|
341
|
+
intercept: beta0,
|
342
|
+
slope: beta1,
|
343
|
+
probabilities: finalProbabilities,
|
344
|
+
predicted: predicted,
|
345
|
+
logLikelihood: logLikelihood,
|
346
|
+
mcFaddenR2: mcFaddenR2,
|
347
|
+
accuracy: accuracy,
|
348
|
+
sampleSize: n,
|
349
|
+
equation: `p = 1 / (1 + exp(-(${beta0.toFixed(4)} + ${beta1.toFixed(4)}x)))`,
|
350
|
+
confusionMatrix: this.calculateConfusionMatrix(yValues, predicted)
|
351
|
+
};
|
352
|
+
}
|
353
|
+
|
354
|
+
predict(model, newX) {
|
355
|
+
if (!model || typeof model !== 'object') {
|
356
|
+
throw new Error('Invalid model object');
|
357
|
+
}
|
358
|
+
|
359
|
+
if (model.coefficients && Array.isArray(model.coefficients)) {
|
360
|
+
if (Array.isArray(newX[0])) {
|
361
|
+
return newX.map(row => {
|
362
|
+
const extendedRow = [1, ...row];
|
363
|
+
return model.coefficients.reduce((sum, coef, i) => sum + coef.coefficient * extendedRow[i], 0);
|
364
|
+
});
|
365
|
+
} else {
|
366
|
+
const extendedRow = [1, ...newX];
|
367
|
+
return model.coefficients.reduce((sum, coef, i) => sum + coef.coefficient * extendedRow[i], 0);
|
368
|
+
}
|
369
|
+
} else if (model.slope !== undefined && model.intercept !== undefined) {
|
370
|
+
if (Array.isArray(newX)) {
|
371
|
+
return newX.map(x => model.intercept + model.slope * x);
|
372
|
+
} else {
|
373
|
+
return model.intercept + model.slope * newX;
|
374
|
+
}
|
375
|
+
} else if (model.coefficients && model.degree !== undefined) {
|
376
|
+
if (Array.isArray(newX)) {
|
377
|
+
return newX.map(x => {
|
378
|
+
let result = model.coefficients[0];
|
379
|
+
for (let i = 1; i <= model.degree; i++) {
|
380
|
+
result += model.coefficients[i] * Math.pow(x, i);
|
381
|
+
}
|
382
|
+
return result;
|
383
|
+
});
|
384
|
+
} else {
|
385
|
+
let result = model.coefficients[0];
|
386
|
+
for (let i = 1; i <= model.degree; i++) {
|
387
|
+
result += model.coefficients[i] * Math.pow(newX, i);
|
388
|
+
}
|
389
|
+
return result;
|
390
|
+
}
|
391
|
+
} else {
|
392
|
+
throw new Error('Unknown model type');
|
393
|
+
}
|
394
|
+
}
|
395
|
+
|
396
|
+
analyzeResiduals(residuals, predicted) {
|
397
|
+
const n = residuals.length;
|
398
|
+
const meanResidual = residuals.reduce((sum, r) => sum + r, 0) / n;
|
399
|
+
const stdResidual = Math.sqrt(residuals.reduce((sum, r) => sum + Math.pow(r - meanResidual, 2), 0) / (n - 1));
|
400
|
+
|
401
|
+
const standardizedResiduals = residuals.map(r => r / stdResidual);
|
402
|
+
const outliers = standardizedResiduals.map((sr, i) => ({ index: i, value: sr }))
|
403
|
+
.filter(item => Math.abs(item.value) > 2);
|
404
|
+
|
405
|
+
const durbinWatson = this.calculateDurbinWatson(residuals);
|
406
|
+
|
407
|
+
return {
|
408
|
+
mean: meanResidual,
|
409
|
+
standardDeviation: stdResidual,
|
410
|
+
standardizedResiduals: standardizedResiduals,
|
411
|
+
outliers: outliers,
|
412
|
+
durbinWatson: durbinWatson,
|
413
|
+
normalityTest: this.testResidualNormality(residuals)
|
414
|
+
};
|
415
|
+
}
|
416
|
+
|
417
|
+
calculateDurbinWatson(residuals) {
|
418
|
+
let numerator = 0;
|
419
|
+
let denominator = 0;
|
420
|
+
|
421
|
+
for (let i = 1; i < residuals.length; i++) {
|
422
|
+
numerator += Math.pow(residuals[i] - residuals[i - 1], 2);
|
423
|
+
}
|
424
|
+
|
425
|
+
for (let i = 0; i < residuals.length; i++) {
|
426
|
+
denominator += Math.pow(residuals[i], 2);
|
427
|
+
}
|
428
|
+
|
429
|
+
return numerator / denominator;
|
430
|
+
}
|
431
|
+
|
432
|
+
testResidualNormality(residuals) {
|
433
|
+
const n = residuals.length;
|
434
|
+
const mean = residuals.reduce((sum, r) => sum + r, 0) / n;
|
435
|
+
const variance = residuals.reduce((sum, r) => sum + Math.pow(r - mean, 2), 0) / (n - 1);
|
436
|
+
const stdDev = Math.sqrt(variance);
|
437
|
+
|
438
|
+
if (stdDev === 0) {
|
439
|
+
return { isNormal: true, pValue: 1 };
|
440
|
+
}
|
441
|
+
|
442
|
+
const skewness = residuals.reduce((sum, r) => sum + Math.pow((r - mean) / stdDev, 3), 0) / n;
|
443
|
+
const kurtosis = residuals.reduce((sum, r) => sum + Math.pow((r - mean) / stdDev, 4), 0) / n - 3;
|
444
|
+
|
445
|
+
const jarqueBera = (n / 6) * (Math.pow(skewness, 2) + Math.pow(kurtosis, 2) / 4);
|
446
|
+
const pValue = 1 - this.chiSquareCDF(jarqueBera, 2);
|
447
|
+
|
448
|
+
return {
|
449
|
+
jarqueBeraStatistic: jarqueBera,
|
450
|
+
pValue: pValue,
|
451
|
+
isNormal: pValue > 0.05,
|
452
|
+
skewness: skewness,
|
453
|
+
kurtosis: kurtosis
|
454
|
+
};
|
455
|
+
}
|
456
|
+
|
457
|
+
buildEquation(coefficientData) {
|
458
|
+
const terms = coefficientData.map(coef => {
|
459
|
+
if (coef.variable === 'Intercept') {
|
460
|
+
return coef.coefficient.toFixed(4);
|
461
|
+
} else {
|
462
|
+
const sign = coef.coefficient >= 0 ? '+' : '';
|
463
|
+
return `${sign}${coef.coefficient.toFixed(4)}*${coef.variable}`;
|
464
|
+
}
|
465
|
+
});
|
466
|
+
|
467
|
+
return `y = ${terms.join(' ')}`;
|
468
|
+
}
|
469
|
+
|
470
|
+
buildPolynomialEquation(coefficients) {
|
471
|
+
const terms = coefficients.map((coef, i) => {
|
472
|
+
if (i === 0) {
|
473
|
+
return coef.toFixed(4);
|
474
|
+
} else if (i === 1) {
|
475
|
+
const sign = coef >= 0 ? '+' : '';
|
476
|
+
return `${sign}${coef.toFixed(4)}*x`;
|
477
|
+
} else {
|
478
|
+
const sign = coef >= 0 ? '+' : '';
|
479
|
+
return `${sign}${coef.toFixed(4)}*x^${i}`;
|
480
|
+
}
|
481
|
+
});
|
482
|
+
|
483
|
+
return `y = ${terms.join(' ')}`;
|
484
|
+
}
|
485
|
+
|
486
|
+
sigmoid(z) {
|
487
|
+
return 1 / (1 + Math.exp(-Math.max(-500, Math.min(500, z))));
|
488
|
+
}
|
489
|
+
|
490
|
+
calculateNullLogLikelihood(y) {
|
491
|
+
const p = y.reduce((sum, val) => sum + val, 0) / y.length;
|
492
|
+
return y.reduce((sum, val) => {
|
493
|
+
return sum + val * Math.log(p + 1e-15) + (1 - val) * Math.log(1 - p + 1e-15);
|
494
|
+
}, 0);
|
495
|
+
}
|
496
|
+
|
497
|
+
calculateConfusionMatrix(actual, predicted) {
|
498
|
+
let tp = 0, fp = 0, tn = 0, fn = 0;
|
499
|
+
|
500
|
+
for (let i = 0; i < actual.length; i++) {
|
501
|
+
if (actual[i] === 1 && predicted[i] === 1) tp++;
|
502
|
+
else if (actual[i] === 0 && predicted[i] === 1) fp++;
|
503
|
+
else if (actual[i] === 0 && predicted[i] === 0) tn++;
|
504
|
+
else if (actual[i] === 1 && predicted[i] === 0) fn++;
|
505
|
+
}
|
506
|
+
|
507
|
+
const precision = tp + fp > 0 ? tp / (tp + fp) : 0;
|
508
|
+
const recall = tp + fn > 0 ? tp / (tp + fn) : 0;
|
509
|
+
const specificity = tn + fp > 0 ? tn / (tn + fp) : 0;
|
510
|
+
const f1Score = precision + recall > 0 ? 2 * (precision * recall) / (precision + recall) : 0;
|
511
|
+
|
512
|
+
return {
|
513
|
+
truePositive: tp,
|
514
|
+
falsePositive: fp,
|
515
|
+
trueNegative: tn,
|
516
|
+
falseNegative: fn,
|
517
|
+
precision: precision,
|
518
|
+
recall: recall,
|
519
|
+
specificity: specificity,
|
520
|
+
f1Score: f1Score
|
521
|
+
};
|
522
|
+
}
|
523
|
+
|
524
|
+
matrixMultiply(A, B) {
|
525
|
+
const rowsA = A.length;
|
526
|
+
const colsA = A[0].length;
|
527
|
+
const colsB = B[0].length;
|
528
|
+
|
529
|
+
const result = Array(rowsA).fill().map(() => Array(colsB).fill(0));
|
530
|
+
|
531
|
+
for (let i = 0; i < rowsA; i++) {
|
532
|
+
for (let j = 0; j < colsB; j++) {
|
533
|
+
for (let k = 0; k < colsA; k++) {
|
534
|
+
result[i][j] += A[i][k] * B[k][j];
|
535
|
+
}
|
536
|
+
}
|
537
|
+
}
|
538
|
+
|
539
|
+
return result;
|
540
|
+
}
|
541
|
+
|
542
|
+
matrixVectorMultiply(A, b) {
|
543
|
+
return A.map(row => row.reduce((sum, val, i) => sum + val * b[i], 0));
|
544
|
+
}
|
545
|
+
|
546
|
+
transpose(matrix) {
|
547
|
+
return matrix[0].map((_, colIndex) => matrix.map(row => row[colIndex]));
|
548
|
+
}
|
549
|
+
|
550
|
+
matrixInverse(matrix) {
|
551
|
+
const n = matrix.length;
|
552
|
+
const identity = Array(n).fill().map((_, i) => Array(n).fill().map((_, j) => i === j ? 1 : 0));
|
553
|
+
const augmented = matrix.map((row, i) => [...row, ...identity[i]]);
|
554
|
+
|
555
|
+
for (let i = 0; i < n; i++) {
|
556
|
+
let maxRow = i;
|
557
|
+
for (let k = i + 1; k < n; k++) {
|
558
|
+
if (Math.abs(augmented[k][i]) > Math.abs(augmented[maxRow][i])) {
|
559
|
+
maxRow = k;
|
560
|
+
}
|
561
|
+
}
|
562
|
+
[augmented[i], augmented[maxRow]] = [augmented[maxRow], augmented[i]];
|
563
|
+
|
564
|
+
const pivot = augmented[i][i];
|
565
|
+
if (Math.abs(pivot) < 1e-10) {
|
566
|
+
throw new Error('Matrix is singular and cannot be inverted');
|
567
|
+
}
|
568
|
+
|
569
|
+
for (let j = 0; j < 2 * n; j++) {
|
570
|
+
augmented[i][j] /= pivot;
|
571
|
+
}
|
572
|
+
|
573
|
+
for (let k = 0; k < n; k++) {
|
574
|
+
if (k !== i) {
|
575
|
+
const factor = augmented[k][i];
|
576
|
+
for (let j = 0; j < 2 * n; j++) {
|
577
|
+
augmented[k][j] -= factor * augmented[i][j];
|
578
|
+
}
|
579
|
+
}
|
580
|
+
}
|
581
|
+
}
|
582
|
+
|
583
|
+
return augmented.map(row => row.slice(n));
|
584
|
+
}
|
585
|
+
|
586
|
+
tCDF(t, df) {
|
587
|
+
if (df <= 0) return 0.5;
|
588
|
+
|
589
|
+
const x = df / (t * t + df);
|
590
|
+
return 1 - 0.5 * this.incompleteBeta(df / 2, 0.5, x);
|
591
|
+
}
|
592
|
+
|
593
|
+
fCDF(f, df1, df2) {
|
594
|
+
if (f <= 0) return 0;
|
595
|
+
|
596
|
+
const x = df2 / (df2 + df1 * f);
|
597
|
+
return 1 - this.incompleteBeta(df2 / 2, df1 / 2, x);
|
598
|
+
}
|
599
|
+
|
600
|
+
chiSquareCDF(x, df) {
|
601
|
+
if (x <= 0) return 0;
|
602
|
+
return this.incompleteGamma(df / 2, x / 2) / this.gamma(df / 2);
|
603
|
+
}
|
604
|
+
|
605
|
+
incompleteBeta(a, b, x) {
|
606
|
+
if (x <= 0) return 0;
|
607
|
+
if (x >= 1) return 1;
|
608
|
+
|
609
|
+
let result = 0;
|
610
|
+
let term = 1;
|
611
|
+
|
612
|
+
for (let n = 0; n < 100; n++) {
|
613
|
+
if (n > 0) {
|
614
|
+
term *= x * (a + n - 1) / n;
|
615
|
+
}
|
616
|
+
result += term / (a + n);
|
617
|
+
if (Math.abs(term) < 1e-10) break;
|
618
|
+
}
|
619
|
+
|
620
|
+
return result * Math.pow(x, a) * Math.pow(1 - x, b);
|
621
|
+
}
|
622
|
+
|
623
|
+
incompleteGamma(a, x) {
|
624
|
+
if (x <= 0) return 0;
|
625
|
+
|
626
|
+
let sum = 1;
|
627
|
+
let term = 1;
|
628
|
+
|
629
|
+
for (let n = 1; n < 100; n++) {
|
630
|
+
term *= x / (a + n - 1);
|
631
|
+
sum += term;
|
632
|
+
if (Math.abs(term) < 1e-12) break;
|
633
|
+
}
|
634
|
+
|
635
|
+
return Math.pow(x, a) * Math.exp(-x) * sum;
|
636
|
+
}
|
637
|
+
|
638
|
+
gamma(x) {
|
639
|
+
if (x < 0.5) {
|
640
|
+
return Math.PI / (Math.sin(Math.PI * x) * this.gamma(1 - x));
|
641
|
+
}
|
642
|
+
|
643
|
+
x -= 1;
|
644
|
+
let result = 0.99999999999980993;
|
645
|
+
const coefficients = [
|
646
|
+
676.5203681218851, -1259.1392167224028, 771.32342877765313,
|
647
|
+
-176.61502916214059, 12.507343278686905, -0.13857109526572012,
|
648
|
+
9.9843695780195716e-6, 1.5056327351493116e-7
|
649
|
+
];
|
650
|
+
|
651
|
+
for (let i = 0; i < coefficients.length; i++) {
|
652
|
+
result += coefficients[i] / (x + i + 1);
|
653
|
+
}
|
654
|
+
|
655
|
+
const t = x + coefficients.length - 0.5;
|
656
|
+
return Math.sqrt(2 * Math.PI) * Math.pow(t, x + 0.5) * Math.exp(-t) * result;
|
657
|
+
}
|
658
|
+
}
|
659
|
+
|
660
|
+
export default Regression;
|