datly 0.0.2 → 0.0.3
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/README.MD +1773 -2386
- package/dist/datly.cjs +1 -1
- package/dist/datly.mjs +1 -1
- package/dist/datly.umd.js +1 -1
- package/package.json +3 -3
- package/src/code.js +2466 -0
- package/src/index.js +236 -480
- package/src/plot.js +609 -0
- package/src/core/dataLoader.js +0 -407
- package/src/core/utils.js +0 -306
- package/src/core/validator.js +0 -205
- package/src/dataviz/index.js +0 -1566
- package/src/descriptive/centralTendency.js +0 -208
- package/src/descriptive/dispersion.js +0 -273
- package/src/descriptive/position.js +0 -268
- package/src/descriptive/shape.js +0 -336
- package/src/inferential/confidenceIntervals.js +0 -561
- package/src/inferential/hypothesisTesting.js +0 -527
- package/src/inferential/normalityTests.js +0 -587
- package/src/insights/autoAnalyser.js +0 -685
- package/src/insights/interpreter.js +0 -543
- package/src/insights/patternDetector.js +0 -897
- package/src/insights/reportGenerator.js +0 -1072
- package/src/ml/ClassificationMetrics.js +0 -336
- package/src/ml/DecisionTree.js +0 -412
- package/src/ml/KNearestNeighbors.js +0 -317
- package/src/ml/LinearRegression.js +0 -179
- package/src/ml/LogisticRegression.js +0 -396
- package/src/ml/MachineLearning.js +0 -490
- package/src/ml/NaiveBayes.js +0 -296
- package/src/ml/RandomForest.js +0 -323
- package/src/ml/SupportVectorMachine.js +0 -299
- package/src/ml/baseModel.js +0 -106
- package/src/multivariate/correlation.js +0 -653
- package/src/multivariate/regression.js +0 -660
@@ -1,653 +0,0 @@
|
|
1
|
-
class Correlation {
|
2
|
-
pearson(x, y) {
|
3
|
-
if (!Array.isArray(x) || !Array.isArray(y)) {
|
4
|
-
throw new Error('Both inputs must be arrays');
|
5
|
-
}
|
6
|
-
|
7
|
-
if (x.length !== y.length) {
|
8
|
-
throw new Error('Arrays must have the same length');
|
9
|
-
}
|
10
|
-
|
11
|
-
const validPairs = [];
|
12
|
-
for (let i = 0; i < x.length; i++) {
|
13
|
-
if (typeof x[i] === 'number' && typeof y[i] === 'number' &&
|
14
|
-
!isNaN(x[i]) && !isNaN(y[i]) && isFinite(x[i]) && isFinite(y[i])) {
|
15
|
-
validPairs.push({ x: x[i], y: y[i] });
|
16
|
-
}
|
17
|
-
}
|
18
|
-
|
19
|
-
if (validPairs.length < 3) {
|
20
|
-
throw new Error('Need at least 3 valid paired observations');
|
21
|
-
}
|
22
|
-
|
23
|
-
const n = validPairs.length;
|
24
|
-
const xValues = validPairs.map(pair => pair.x);
|
25
|
-
const yValues = validPairs.map(pair => pair.y);
|
26
|
-
|
27
|
-
const meanX = xValues.reduce((sum, val) => sum + val, 0) / n;
|
28
|
-
const meanY = yValues.reduce((sum, val) => sum + val, 0) / n;
|
29
|
-
|
30
|
-
let numerator = 0;
|
31
|
-
let sumXSquared = 0;
|
32
|
-
let sumYSquared = 0;
|
33
|
-
|
34
|
-
for (let i = 0; i < n; i++) {
|
35
|
-
const xDiff = xValues[i] - meanX;
|
36
|
-
const yDiff = yValues[i] - meanY;
|
37
|
-
numerator += xDiff * yDiff;
|
38
|
-
sumXSquared += xDiff * xDiff;
|
39
|
-
sumYSquared += yDiff * yDiff;
|
40
|
-
}
|
41
|
-
|
42
|
-
const denominator = Math.sqrt(sumXSquared * sumYSquared);
|
43
|
-
|
44
|
-
if (denominator === 0) {
|
45
|
-
return {
|
46
|
-
correlation: 0,
|
47
|
-
pValue: 1,
|
48
|
-
tStatistic: 0,
|
49
|
-
degreesOfFreedom: n - 2,
|
50
|
-
significant: false,
|
51
|
-
confidenceInterval: { lower: 0, upper: 0 },
|
52
|
-
sampleSize: n
|
53
|
-
};
|
54
|
-
}
|
55
|
-
|
56
|
-
const r = numerator / denominator;
|
57
|
-
const tStat = r * Math.sqrt((n - 2) / (1 - r * r));
|
58
|
-
const pValue = 2 * (1 - this.tCDF(Math.abs(tStat), n - 2));
|
59
|
-
|
60
|
-
const confidenceInterval = this.pearsonConfidenceInterval(r, n);
|
61
|
-
|
62
|
-
return {
|
63
|
-
correlation: r,
|
64
|
-
pValue: pValue,
|
65
|
-
tStatistic: tStat,
|
66
|
-
degreesOfFreedom: n - 2,
|
67
|
-
significant: pValue < 0.05,
|
68
|
-
confidenceInterval: confidenceInterval,
|
69
|
-
sampleSize: n,
|
70
|
-
interpretation: this.interpretCorrelation(r, pValue)
|
71
|
-
};
|
72
|
-
}
|
73
|
-
|
74
|
-
spearman(x, y) {
|
75
|
-
if (!Array.isArray(x) || !Array.isArray(y)) {
|
76
|
-
throw new Error('Both inputs must be arrays');
|
77
|
-
}
|
78
|
-
|
79
|
-
if (x.length !== y.length) {
|
80
|
-
throw new Error('Arrays must have the same length');
|
81
|
-
}
|
82
|
-
|
83
|
-
const validPairs = [];
|
84
|
-
for (let i = 0; i < x.length; i++) {
|
85
|
-
if (typeof x[i] === 'number' && typeof y[i] === 'number' &&
|
86
|
-
!isNaN(x[i]) && !isNaN(y[i]) && isFinite(x[i]) && isFinite(y[i])) {
|
87
|
-
validPairs.push({ x: x[i], y: y[i], originalIndex: i });
|
88
|
-
}
|
89
|
-
}
|
90
|
-
|
91
|
-
if (validPairs.length < 3) {
|
92
|
-
throw new Error('Need at least 3 valid paired observations');
|
93
|
-
}
|
94
|
-
|
95
|
-
const n = validPairs.length;
|
96
|
-
const xRanks = this.calculateRanks(validPairs.map(pair => pair.x));
|
97
|
-
const yRanks = this.calculateRanks(validPairs.map(pair => pair.y));
|
98
|
-
|
99
|
-
const rho = this.pearsonFromArrays(xRanks, yRanks);
|
100
|
-
const tStat = rho * Math.sqrt((n - 2) / (1 - rho * rho));
|
101
|
-
const pValue = 2 * (1 - this.tCDF(Math.abs(tStat), n - 2));
|
102
|
-
|
103
|
-
return {
|
104
|
-
correlation: rho,
|
105
|
-
pValue: pValue,
|
106
|
-
tStatistic: tStat,
|
107
|
-
degreesOfFreedom: n - 2,
|
108
|
-
significant: pValue < 0.05,
|
109
|
-
sampleSize: n,
|
110
|
-
xRanks: xRanks,
|
111
|
-
yRanks: yRanks,
|
112
|
-
interpretation: this.interpretCorrelation(rho, pValue, 'Spearman')
|
113
|
-
};
|
114
|
-
}
|
115
|
-
|
116
|
-
kendall(x, y) {
|
117
|
-
if (!Array.isArray(x) || !Array.isArray(y)) {
|
118
|
-
throw new Error('Both inputs must be arrays');
|
119
|
-
}
|
120
|
-
|
121
|
-
if (x.length !== y.length) {
|
122
|
-
throw new Error('Arrays must have the same length');
|
123
|
-
}
|
124
|
-
|
125
|
-
const validPairs = [];
|
126
|
-
for (let i = 0; i < x.length; i++) {
|
127
|
-
if (typeof x[i] === 'number' && typeof y[i] === 'number' &&
|
128
|
-
!isNaN(x[i]) && !isNaN(y[i]) && isFinite(x[i]) && isFinite(y[i])) {
|
129
|
-
validPairs.push({ x: x[i], y: y[i] });
|
130
|
-
}
|
131
|
-
}
|
132
|
-
|
133
|
-
if (validPairs.length < 3) {
|
134
|
-
throw new Error('Need at least 3 valid paired observations');
|
135
|
-
}
|
136
|
-
|
137
|
-
const n = validPairs.length;
|
138
|
-
let concordant = 0;
|
139
|
-
let discordant = 0;
|
140
|
-
let tiesX = 0;
|
141
|
-
let tiesY = 0;
|
142
|
-
let tiesXY = 0;
|
143
|
-
|
144
|
-
for (let i = 0; i < n; i++) {
|
145
|
-
for (let j = i + 1; j < n; j++) {
|
146
|
-
const xDiff = validPairs[i].x - validPairs[j].x;
|
147
|
-
const yDiff = validPairs[i].y - validPairs[j].y;
|
148
|
-
|
149
|
-
if (xDiff === 0 && yDiff === 0) {
|
150
|
-
tiesXY++;
|
151
|
-
} else if (xDiff === 0) {
|
152
|
-
tiesX++;
|
153
|
-
} else if (yDiff === 0) {
|
154
|
-
tiesY++;
|
155
|
-
} else if (xDiff * yDiff > 0) {
|
156
|
-
concordant++;
|
157
|
-
} else {
|
158
|
-
discordant++;
|
159
|
-
}
|
160
|
-
}
|
161
|
-
}
|
162
|
-
|
163
|
-
const totalPairs = n * (n - 1) / 2;
|
164
|
-
const tau = (concordant - discordant) / Math.sqrt((totalPairs - tiesX) * (totalPairs - tiesY));
|
165
|
-
|
166
|
-
const variance = (2 * (2 * n + 5)) / (9 * n * (n - 1));
|
167
|
-
const zStat = tau / Math.sqrt(variance);
|
168
|
-
const pValue = 2 * (1 - this.normalCDF(Math.abs(zStat)));
|
169
|
-
|
170
|
-
return {
|
171
|
-
correlation: tau,
|
172
|
-
pValue: pValue,
|
173
|
-
zStatistic: zStat,
|
174
|
-
concordantPairs: concordant,
|
175
|
-
discordantPairs: discordant,
|
176
|
-
tiesX: tiesX,
|
177
|
-
tiesY: tiesY,
|
178
|
-
tiesXY: tiesXY,
|
179
|
-
significant: pValue < 0.05,
|
180
|
-
sampleSize: n,
|
181
|
-
interpretation: this.interpretCorrelation(tau, pValue, 'Kendall')
|
182
|
-
};
|
183
|
-
}
|
184
|
-
|
185
|
-
matrix(dataset, method = 'pearson') {
|
186
|
-
if (!dataset || !dataset.headers || !dataset.data) {
|
187
|
-
throw new Error('Invalid dataset format');
|
188
|
-
}
|
189
|
-
|
190
|
-
const numericColumns = dataset.headers.filter(header => {
|
191
|
-
const column = dataset.data.map(row => row[header]);
|
192
|
-
const numericValues = column.filter(val =>
|
193
|
-
typeof val === 'number' && !isNaN(val) && isFinite(val)
|
194
|
-
);
|
195
|
-
return numericValues.length > 0;
|
196
|
-
});
|
197
|
-
|
198
|
-
if (numericColumns.length < 2) {
|
199
|
-
throw new Error('Need at least 2 numeric columns for correlation matrix');
|
200
|
-
}
|
201
|
-
|
202
|
-
const correlationMatrix = {};
|
203
|
-
const pValueMatrix = {};
|
204
|
-
const sampleSizeMatrix = {};
|
205
|
-
|
206
|
-
numericColumns.forEach(col1 => {
|
207
|
-
correlationMatrix[col1] = {};
|
208
|
-
pValueMatrix[col1] = {};
|
209
|
-
sampleSizeMatrix[col1] = {};
|
210
|
-
|
211
|
-
numericColumns.forEach(col2 => {
|
212
|
-
if (col1 === col2) {
|
213
|
-
correlationMatrix[col1][col2] = 1;
|
214
|
-
pValueMatrix[col1][col2] = 0;
|
215
|
-
sampleSizeMatrix[col1][col2] = dataset.data.length;
|
216
|
-
} else {
|
217
|
-
const x = dataset.data.map(row => row[col1]);
|
218
|
-
const y = dataset.data.map(row => row[col2]);
|
219
|
-
|
220
|
-
try {
|
221
|
-
let result;
|
222
|
-
switch (method) {
|
223
|
-
case 'pearson':
|
224
|
-
result = this.pearson(x, y);
|
225
|
-
break;
|
226
|
-
case 'spearman':
|
227
|
-
result = this.spearman(x, y);
|
228
|
-
break;
|
229
|
-
case 'kendall':
|
230
|
-
result = this.kendall(x, y);
|
231
|
-
break;
|
232
|
-
default:
|
233
|
-
throw new Error(`Unknown correlation method: ${method}`);
|
234
|
-
}
|
235
|
-
|
236
|
-
correlationMatrix[col1][col2] = result.correlation;
|
237
|
-
pValueMatrix[col1][col2] = result.pValue;
|
238
|
-
sampleSizeMatrix[col1][col2] = result.sampleSize;
|
239
|
-
} catch (error) {
|
240
|
-
correlationMatrix[col1][col2] = NaN;
|
241
|
-
pValueMatrix[col1][col2] = NaN;
|
242
|
-
sampleSizeMatrix[col1][col2] = 0;
|
243
|
-
}
|
244
|
-
}
|
245
|
-
});
|
246
|
-
});
|
247
|
-
|
248
|
-
return {
|
249
|
-
correlations: correlationMatrix,
|
250
|
-
pValues: pValueMatrix,
|
251
|
-
sampleSizes: sampleSizeMatrix,
|
252
|
-
columns: numericColumns,
|
253
|
-
method: method,
|
254
|
-
strongCorrelations: this.findStrongCorrelations(correlationMatrix, pValueMatrix),
|
255
|
-
summary: this.summarizeCorrelationMatrix(correlationMatrix, pValueMatrix, numericColumns)
|
256
|
-
};
|
257
|
-
}
|
258
|
-
|
259
|
-
covariance(x, y, sample = true) {
|
260
|
-
if (!Array.isArray(x) || !Array.isArray(y)) {
|
261
|
-
throw new Error('Both inputs must be arrays');
|
262
|
-
}
|
263
|
-
|
264
|
-
if (x.length !== y.length) {
|
265
|
-
throw new Error('Arrays must have the same length');
|
266
|
-
}
|
267
|
-
|
268
|
-
const validPairs = [];
|
269
|
-
for (let i = 0; i < x.length; i++) {
|
270
|
-
if (typeof x[i] === 'number' && typeof y[i] === 'number' &&
|
271
|
-
!isNaN(x[i]) && !isNaN(y[i]) && isFinite(x[i]) && isFinite(y[i])) {
|
272
|
-
validPairs.push({ x: x[i], y: y[i] });
|
273
|
-
}
|
274
|
-
}
|
275
|
-
|
276
|
-
if (validPairs.length < 2) {
|
277
|
-
throw new Error('Need at least 2 valid paired observations');
|
278
|
-
}
|
279
|
-
|
280
|
-
const n = validPairs.length;
|
281
|
-
const xValues = validPairs.map(pair => pair.x);
|
282
|
-
const yValues = validPairs.map(pair => pair.y);
|
283
|
-
|
284
|
-
const meanX = xValues.reduce((sum, val) => sum + val, 0) / n;
|
285
|
-
const meanY = yValues.reduce((sum, val) => sum + val, 0) / n;
|
286
|
-
|
287
|
-
const covariance = xValues.reduce((sum, xVal, i) => {
|
288
|
-
return sum + (xVal - meanX) * (yValues[i] - meanY);
|
289
|
-
}, 0) / (sample ? n - 1 : n);
|
290
|
-
|
291
|
-
return {
|
292
|
-
covariance: covariance,
|
293
|
-
meanX: meanX,
|
294
|
-
meanY: meanY,
|
295
|
-
sampleSize: n,
|
296
|
-
sample: sample
|
297
|
-
};
|
298
|
-
}
|
299
|
-
|
300
|
-
covarianceMatrix(dataset, sample = true) {
|
301
|
-
if (!dataset || !dataset.headers || !dataset.data) {
|
302
|
-
throw new Error('Invalid dataset format');
|
303
|
-
}
|
304
|
-
|
305
|
-
const numericColumns = dataset.headers.filter(header => {
|
306
|
-
const column = dataset.data.map(row => row[header]);
|
307
|
-
const numericValues = column.filter(val =>
|
308
|
-
typeof val === 'number' && !isNaN(val) && isFinite(val)
|
309
|
-
);
|
310
|
-
return numericValues.length > 0;
|
311
|
-
});
|
312
|
-
|
313
|
-
if (numericColumns.length < 2) {
|
314
|
-
throw new Error('Need at least 2 numeric columns for covariance matrix');
|
315
|
-
}
|
316
|
-
|
317
|
-
const covMatrix = {};
|
318
|
-
|
319
|
-
numericColumns.forEach(col1 => {
|
320
|
-
covMatrix[col1] = {};
|
321
|
-
|
322
|
-
numericColumns.forEach(col2 => {
|
323
|
-
const x = dataset.data.map(row => row[col1]);
|
324
|
-
const y = dataset.data.map(row => row[col2]);
|
325
|
-
|
326
|
-
const result = this.covariance(x, y, sample);
|
327
|
-
covMatrix[col1][col2] = result.covariance;
|
328
|
-
});
|
329
|
-
});
|
330
|
-
|
331
|
-
return {
|
332
|
-
covariance: covMatrix,
|
333
|
-
columns: numericColumns,
|
334
|
-
sample: sample
|
335
|
-
};
|
336
|
-
}
|
337
|
-
|
338
|
-
partialCorrelation(x, y, z) {
|
339
|
-
const rxy = this.pearson(x, y);
|
340
|
-
const rxz = this.pearson(x, z);
|
341
|
-
const ryz = this.pearson(y, z);
|
342
|
-
|
343
|
-
const numerator = rxy.correlation - (rxz.correlation * ryz.correlation);
|
344
|
-
const denominator = Math.sqrt((1 - rxz.correlation ** 2) * (1 - ryz.correlation ** 2));
|
345
|
-
|
346
|
-
if (denominator === 0) {
|
347
|
-
return {
|
348
|
-
correlation: 0,
|
349
|
-
pValue: 1,
|
350
|
-
significant: false
|
351
|
-
};
|
352
|
-
}
|
353
|
-
|
354
|
-
const partialR = numerator / denominator;
|
355
|
-
const n = Math.min(rxy.sampleSize, rxz.sampleSize, ryz.sampleSize);
|
356
|
-
const df = n - 3;
|
357
|
-
const tStat = partialR * Math.sqrt(df / (1 - partialR ** 2));
|
358
|
-
const pValue = 2 * (1 - this.tCDF(Math.abs(tStat), df));
|
359
|
-
|
360
|
-
return {
|
361
|
-
correlation: partialR,
|
362
|
-
pValue: pValue,
|
363
|
-
tStatistic: tStat,
|
364
|
-
degreesOfFreedom: df,
|
365
|
-
significant: pValue < 0.05,
|
366
|
-
sampleSize: n,
|
367
|
-
controllingFor: 'third variable'
|
368
|
-
};
|
369
|
-
}
|
370
|
-
|
371
|
-
calculateRanks(values) {
|
372
|
-
const indexed = values.map((value, index) => ({ value, index }));
|
373
|
-
indexed.sort((a, b) => a.value - b.value);
|
374
|
-
|
375
|
-
const ranks = new Array(values.length);
|
376
|
-
let currentRank = 1;
|
377
|
-
|
378
|
-
for (let i = 0; i < indexed.length; i++) {
|
379
|
-
const tiedValues = [indexed[i]];
|
380
|
-
|
381
|
-
while (i + 1 < indexed.length && indexed[i + 1].value === indexed[i].value) {
|
382
|
-
i++;
|
383
|
-
tiedValues.push(indexed[i]);
|
384
|
-
}
|
385
|
-
|
386
|
-
const averageRank = (currentRank + currentRank + tiedValues.length - 1) / 2;
|
387
|
-
tiedValues.forEach(item => {
|
388
|
-
ranks[item.index] = averageRank;
|
389
|
-
});
|
390
|
-
|
391
|
-
currentRank += tiedValues.length;
|
392
|
-
}
|
393
|
-
|
394
|
-
return ranks;
|
395
|
-
}
|
396
|
-
|
397
|
-
pearsonFromArrays(x, y) {
|
398
|
-
const n = x.length;
|
399
|
-
const meanX = x.reduce((sum, val) => sum + val, 0) / n;
|
400
|
-
const meanY = y.reduce((sum, val) => sum + val, 0) / n;
|
401
|
-
|
402
|
-
let numerator = 0;
|
403
|
-
let sumXSquared = 0;
|
404
|
-
let sumYSquared = 0;
|
405
|
-
|
406
|
-
for (let i = 0; i < n; i++) {
|
407
|
-
const xDiff = x[i] - meanX;
|
408
|
-
const yDiff = y[i] - meanY;
|
409
|
-
numerator += xDiff * yDiff;
|
410
|
-
sumXSquared += xDiff * xDiff;
|
411
|
-
sumYSquared += yDiff * yDiff;
|
412
|
-
}
|
413
|
-
|
414
|
-
const denominator = Math.sqrt(sumXSquared * sumYSquared);
|
415
|
-
return denominator === 0 ? 0 : numerator / denominator;
|
416
|
-
}
|
417
|
-
|
418
|
-
pearsonConfidenceInterval(r, n, confidence = 0.95) {
|
419
|
-
if (Math.abs(r) >= 1) {
|
420
|
-
return { lower: r, upper: r };
|
421
|
-
}
|
422
|
-
|
423
|
-
const fisherZ = 0.5 * Math.log((1 + r) / (1 - r));
|
424
|
-
const standardError = 1 / Math.sqrt(n - 3);
|
425
|
-
const alpha = 1 - confidence;
|
426
|
-
const zCritical = this.normalInverse(1 - alpha / 2);
|
427
|
-
const marginOfError = zCritical * standardError;
|
428
|
-
|
429
|
-
const lowerZ = fisherZ - marginOfError;
|
430
|
-
const upperZ = fisherZ + marginOfError;
|
431
|
-
|
432
|
-
const lowerR = (Math.exp(2 * lowerZ) - 1) / (Math.exp(2 * lowerZ) + 1);
|
433
|
-
const upperR = (Math.exp(2 * upperZ) - 1) / (Math.exp(2 * upperZ) + 1);
|
434
|
-
|
435
|
-
return { lower: lowerR, upper: upperR };
|
436
|
-
}
|
437
|
-
|
438
|
-
findStrongCorrelations(correlationMatrix, pValueMatrix, threshold = 0.7) {
|
439
|
-
const strongCorrelations = [];
|
440
|
-
const columns = Object.keys(correlationMatrix);
|
441
|
-
|
442
|
-
for (let i = 0; i < columns.length; i++) {
|
443
|
-
for (let j = i + 1; j < columns.length; j++) {
|
444
|
-
const col1 = columns[i];
|
445
|
-
const col2 = columns[j];
|
446
|
-
const correlation = correlationMatrix[col1][col2];
|
447
|
-
const pValue = pValueMatrix[col1][col2];
|
448
|
-
|
449
|
-
if (Math.abs(correlation) >= threshold && pValue < 0.05) {
|
450
|
-
strongCorrelations.push({
|
451
|
-
variable1: col1,
|
452
|
-
variable2: col2,
|
453
|
-
correlation: correlation,
|
454
|
-
pValue: pValue,
|
455
|
-
strength: this.getCorrelationStrength(Math.abs(correlation))
|
456
|
-
});
|
457
|
-
}
|
458
|
-
}
|
459
|
-
}
|
460
|
-
|
461
|
-
return strongCorrelations.sort((a, b) => Math.abs(b.correlation) - Math.abs(a.correlation));
|
462
|
-
}
|
463
|
-
|
464
|
-
summarizeCorrelationMatrix(correlationMatrix, pValueMatrix, columns) {
|
465
|
-
let totalCorrelations = 0;
|
466
|
-
let significantCorrelations = 0;
|
467
|
-
let strongPositive = 0;
|
468
|
-
let strongNegative = 0;
|
469
|
-
let maxCorrelation = 0;
|
470
|
-
let minCorrelation = 0;
|
471
|
-
|
472
|
-
for (let i = 0; i < columns.length; i++) {
|
473
|
-
for (let j = i + 1; j < columns.length; j++) {
|
474
|
-
const col1 = columns[i];
|
475
|
-
const col2 = columns[j];
|
476
|
-
const correlation = correlationMatrix[col1][col2];
|
477
|
-
const pValue = pValueMatrix[col1][col2];
|
478
|
-
|
479
|
-
if (!isNaN(correlation)) {
|
480
|
-
totalCorrelations++;
|
481
|
-
|
482
|
-
if (pValue < 0.05) {
|
483
|
-
significantCorrelations++;
|
484
|
-
}
|
485
|
-
|
486
|
-
if (correlation > 0.7) strongPositive++;
|
487
|
-
if (correlation < -0.7) strongNegative++;
|
488
|
-
|
489
|
-
maxCorrelation = Math.max(maxCorrelation, correlation);
|
490
|
-
minCorrelation = Math.min(minCorrelation, correlation);
|
491
|
-
}
|
492
|
-
}
|
493
|
-
}
|
494
|
-
|
495
|
-
return {
|
496
|
-
totalPairs: totalCorrelations,
|
497
|
-
significantPairs: significantCorrelations,
|
498
|
-
strongPositiveCorrelations: strongPositive,
|
499
|
-
strongNegativeCorrelations: strongNegative,
|
500
|
-
maxCorrelation: maxCorrelation,
|
501
|
-
minCorrelation: minCorrelation,
|
502
|
-
averageAbsoluteCorrelation: this.calculateAverageAbsoluteCorrelation(correlationMatrix, columns)
|
503
|
-
};
|
504
|
-
}
|
505
|
-
|
506
|
-
calculateAverageAbsoluteCorrelation(correlationMatrix, columns) {
|
507
|
-
let sum = 0;
|
508
|
-
let count = 0;
|
509
|
-
|
510
|
-
for (let i = 0; i < columns.length; i++) {
|
511
|
-
for (let j = i + 1; j < columns.length; j++) {
|
512
|
-
const correlation = correlationMatrix[columns[i]][columns[j]];
|
513
|
-
if (!isNaN(correlation)) {
|
514
|
-
sum += Math.abs(correlation);
|
515
|
-
count++;
|
516
|
-
}
|
517
|
-
}
|
518
|
-
}
|
519
|
-
|
520
|
-
return count > 0 ? sum / count : 0;
|
521
|
-
}
|
522
|
-
|
523
|
-
getCorrelationStrength(absCorrelation) {
|
524
|
-
if (absCorrelation >= 0.9) return 'Very Strong';
|
525
|
-
if (absCorrelation >= 0.7) return 'Strong';
|
526
|
-
if (absCorrelation >= 0.5) return 'Moderate';
|
527
|
-
if (absCorrelation >= 0.3) return 'Weak';
|
528
|
-
return 'Very Weak';
|
529
|
-
}
|
530
|
-
|
531
|
-
interpretCorrelation(correlation, pValue, method = 'Pearson') {
|
532
|
-
const strength = this.getCorrelationStrength(Math.abs(correlation));
|
533
|
-
const direction = correlation > 0 ? 'positive' : 'negative';
|
534
|
-
const significance = pValue < 0.05 ? 'significant' : 'not significant';
|
535
|
-
|
536
|
-
return `${method} correlation: ${strength} ${direction} relationship (r = ${correlation.toFixed(4)}, p = ${pValue.toFixed(4)}, ${significance})`;
|
537
|
-
}
|
538
|
-
|
539
|
-
tCDF(t, df) {
|
540
|
-
const x = df / (t * t + df);
|
541
|
-
return 1 - 0.5 * this.incompleteBeta(df / 2, 0.5, x);
|
542
|
-
}
|
543
|
-
|
544
|
-
normalCDF(z) {
|
545
|
-
return 0.5 * (1 + this.erf(z / Math.sqrt(2)));
|
546
|
-
}
|
547
|
-
|
548
|
-
normalInverse(p) {
|
549
|
-
if (p <= 0 || p >= 1) throw new Error('p must be between 0 and 1');
|
550
|
-
|
551
|
-
const a = [-3.969683028665376e+01, 2.209460984245205e+02, -2.759285104469687e+02,
|
552
|
-
1.383577518672690e+02, -3.066479806614716e+01, 2.506628277459239e+00];
|
553
|
-
const b = [-5.447609879822406e+01, 1.615858368580409e+02, -1.556989798598866e+02,
|
554
|
-
6.680131188771972e+01, -1.328068155288572e+01, 1];
|
555
|
-
|
556
|
-
if (p > 0.5) return -this.normalInverse(1 - p);
|
557
|
-
|
558
|
-
const q = Math.sqrt(-2 * Math.log(p));
|
559
|
-
let num = a[5];
|
560
|
-
let den = b[5];
|
561
|
-
|
562
|
-
for (let i = 4; i >= 0; i--) {
|
563
|
-
num = num * q + a[i];
|
564
|
-
den = den * q + b[i];
|
565
|
-
}
|
566
|
-
|
567
|
-
return num / den;
|
568
|
-
}
|
569
|
-
|
570
|
-
erf(x) {
|
571
|
-
const a1 = 0.254829592;
|
572
|
-
const a2 = -0.284496736;
|
573
|
-
const a3 = 1.421413741;
|
574
|
-
const a4 = -1.453152027;
|
575
|
-
const a5 = 1.061405429;
|
576
|
-
const p = 0.3275911;
|
577
|
-
|
578
|
-
const sign = x < 0 ? -1 : 1;
|
579
|
-
x = Math.abs(x);
|
580
|
-
|
581
|
-
const t = 1 / (1 + p * x);
|
582
|
-
const y = 1 - (((((a5 * t + a4) * t) + a3) * t + a2) * t + a1) * t * Math.exp(-x * x);
|
583
|
-
|
584
|
-
return sign * y;
|
585
|
-
}
|
586
|
-
|
587
|
-
incompleteBeta(a, b, x) {
|
588
|
-
if (x <= 0) return 0;
|
589
|
-
if (x >= 1) return 1;
|
590
|
-
|
591
|
-
const bt = Math.exp(this.logGamma(a + b) - this.logGamma(a) - this.logGamma(b) +
|
592
|
-
a * Math.log(x) + b * Math.log(1 - x));
|
593
|
-
|
594
|
-
if (x < (a + 1) / (a + b + 2)) {
|
595
|
-
return bt * this.continuedFractionBeta(a, b, x) / a;
|
596
|
-
} else {
|
597
|
-
return 1 - bt * this.continuedFractionBeta(b, a, 1 - x) / b;
|
598
|
-
}
|
599
|
-
}
|
600
|
-
|
601
|
-
continuedFractionBeta(a, b, x) {
|
602
|
-
const qab = a + b;
|
603
|
-
const qap = a + 1;
|
604
|
-
const qam = a - 1;
|
605
|
-
let c = 1;
|
606
|
-
let d = 1 - qab * x / qap;
|
607
|
-
|
608
|
-
if (Math.abs(d) < 1e-30) d = 1e-30;
|
609
|
-
d = 1 / d;
|
610
|
-
let h = d;
|
611
|
-
|
612
|
-
for (let m = 1; m <= 100; m++) {
|
613
|
-
const m2 = 2 * m;
|
614
|
-
let aa = m * (b - m) * x / ((qam + m2) * (a + m2));
|
615
|
-
d = 1 + aa * d;
|
616
|
-
if (Math.abs(d) < 1e-30) d = 1e-30;
|
617
|
-
c = 1 + aa / c;
|
618
|
-
if (Math.abs(c) < 1e-30) c = 1e-30;
|
619
|
-
d = 1 / d;
|
620
|
-
h *= d * c;
|
621
|
-
|
622
|
-
aa = -(a + m) * (qab + m) * x / ((a + m2) * (qap + m2));
|
623
|
-
d = 1 + aa * d;
|
624
|
-
if (Math.abs(d) < 1e-30) d = 1e-30;
|
625
|
-
c = 1 + aa / c;
|
626
|
-
if (Math.abs(c) < 1e-30) c = 1e-30;
|
627
|
-
d = 1 / d;
|
628
|
-
const del = d * c;
|
629
|
-
h *= del;
|
630
|
-
|
631
|
-
if (Math.abs(del - 1) < 1e-12) break;
|
632
|
-
}
|
633
|
-
|
634
|
-
return h;
|
635
|
-
}
|
636
|
-
|
637
|
-
logGamma(x) {
|
638
|
-
const cof = [76.18009172947146, -86.50532032941677, 24.01409824083091,
|
639
|
-
-1.231739572450155, 0.001208650973866179, -0.000005395239384953];
|
640
|
-
let ser = 1.000000000190015;
|
641
|
-
|
642
|
-
const xx = x;
|
643
|
-
let y = x;
|
644
|
-
let tmp = x + 5.5;
|
645
|
-
tmp -= (x + 0.5) * Math.log(tmp);
|
646
|
-
|
647
|
-
for (let j = 0; j < 6; j++) ser += cof[j] / ++y;
|
648
|
-
|
649
|
-
return -tmp + Math.log(2.5066282746310005 * ser / xx);
|
650
|
-
}
|
651
|
-
}
|
652
|
-
|
653
|
-
export default Correlation;
|