datly 0.0.1 → 0.0.2
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/dist/datly.cjs +1 -0
- package/dist/datly.mjs +1 -0
- package/dist/datly.umd.js +1 -1
- package/dist/datly.umd.js.map +1 -0
- package/package.json +24 -11
- package/src/core/dataLoader.js +407 -0
- package/src/core/utils.js +306 -0
- package/src/core/validator.js +205 -0
- package/src/dataviz/index.js +1566 -0
- package/src/descriptive/centralTendency.js +208 -0
- package/src/descriptive/dispersion.js +273 -0
- package/src/descriptive/position.js +268 -0
- package/src/descriptive/shape.js +336 -0
- package/src/index.js +480 -0
- package/src/inferential/confidenceIntervals.js +561 -0
- package/src/inferential/hypothesisTesting.js +527 -0
- package/src/inferential/normalityTests.js +587 -0
- package/src/insights/autoAnalyser.js +685 -0
- package/src/insights/interpreter.js +543 -0
- package/src/insights/patternDetector.js +897 -0
- package/src/insights/reportGenerator.js +1072 -0
- package/src/ml/ClassificationMetrics.js +336 -0
- package/src/ml/DecisionTree.js +412 -0
- package/src/ml/KNearestNeighbors.js +317 -0
- package/src/ml/LinearRegression.js +179 -0
- package/src/ml/LogisticRegression.js +396 -0
- package/src/ml/MachineLearning.js +490 -0
- package/src/ml/NaiveBayes.js +296 -0
- package/src/ml/RandomForest.js +323 -0
- package/src/ml/SupportVectorMachine.js +299 -0
- package/src/ml/baseModel.js +106 -0
- package/src/multivariate/correlation.js +653 -0
- package/src/multivariate/regression.js +660 -0
@@ -0,0 +1,208 @@
|
|
1
|
+
class CentralTendency {
|
2
|
+
mean(column) {
|
3
|
+
if (!Array.isArray(column) || column.length === 0) {
|
4
|
+
throw new Error('Column must be a non-empty array');
|
5
|
+
}
|
6
|
+
|
7
|
+
const validValues = column.filter(val =>
|
8
|
+
typeof val === 'number' && !isNaN(val) && isFinite(val)
|
9
|
+
);
|
10
|
+
|
11
|
+
if (validValues.length === 0) {
|
12
|
+
throw new Error('No valid numeric values found');
|
13
|
+
}
|
14
|
+
|
15
|
+
return validValues.reduce((sum, val) => sum + val, 0) / validValues.length;
|
16
|
+
}
|
17
|
+
|
18
|
+
median(column) {
|
19
|
+
if (!Array.isArray(column) || column.length === 0) {
|
20
|
+
throw new Error('Column must be a non-empty array');
|
21
|
+
}
|
22
|
+
|
23
|
+
const validValues = column.filter(val =>
|
24
|
+
typeof val === 'number' && !isNaN(val) && isFinite(val)
|
25
|
+
);
|
26
|
+
|
27
|
+
if (validValues.length === 0) {
|
28
|
+
throw new Error('No valid numeric values found');
|
29
|
+
}
|
30
|
+
|
31
|
+
const sorted = validValues.sort((a, b) => a - b);
|
32
|
+
const middle = Math.floor(sorted.length / 2);
|
33
|
+
|
34
|
+
return sorted.length % 2 === 0 ?
|
35
|
+
(sorted[middle - 1] + sorted[middle]) / 2 :
|
36
|
+
sorted[middle];
|
37
|
+
}
|
38
|
+
|
39
|
+
mode(column) {
|
40
|
+
if (!Array.isArray(column) || column.length === 0) {
|
41
|
+
throw new Error('Column must be a non-empty array');
|
42
|
+
}
|
43
|
+
|
44
|
+
const frequency = {};
|
45
|
+
let maxFreq = 0;
|
46
|
+
|
47
|
+
column.forEach(value => {
|
48
|
+
const key = value === null || value === undefined ? 'null' : String(value);
|
49
|
+
frequency[key] = (frequency[key] || 0) + 1;
|
50
|
+
maxFreq = Math.max(maxFreq, frequency[key]);
|
51
|
+
});
|
52
|
+
|
53
|
+
const modes = Object.entries(frequency)
|
54
|
+
.filter(([_, freq]) => freq === maxFreq)
|
55
|
+
.map(([value, _]) => value === 'null' ? null : this.parseValue(value));
|
56
|
+
|
57
|
+
return {
|
58
|
+
values: modes,
|
59
|
+
frequency: maxFreq,
|
60
|
+
isMultimodal: modes.length > 1,
|
61
|
+
isUniform: maxFreq === 1 && Object.keys(frequency).length === column.length
|
62
|
+
};
|
63
|
+
}
|
64
|
+
|
65
|
+
parseValue(str) {
|
66
|
+
if (/^-?\d+$/.test(str)) return parseInt(str, 10);
|
67
|
+
if (/^-?\d*\.\d+$/.test(str)) return parseFloat(str);
|
68
|
+
if (str === 'true') return true;
|
69
|
+
if (str === 'false') return false;
|
70
|
+
return str;
|
71
|
+
}
|
72
|
+
|
73
|
+
geometricMean(column) {
|
74
|
+
if (!Array.isArray(column) || column.length === 0) {
|
75
|
+
throw new Error('Column must be a non-empty array');
|
76
|
+
}
|
77
|
+
|
78
|
+
const validValues = column.filter(val =>
|
79
|
+
typeof val === 'number' && !isNaN(val) && isFinite(val) && val > 0
|
80
|
+
);
|
81
|
+
|
82
|
+
if (validValues.length === 0) {
|
83
|
+
throw new Error('Geometric mean requires positive numeric values');
|
84
|
+
}
|
85
|
+
|
86
|
+
const logSum = validValues.reduce((sum, val) => sum + Math.log(val), 0);
|
87
|
+
return Math.exp(logSum / validValues.length);
|
88
|
+
}
|
89
|
+
|
90
|
+
harmonicMean(column) {
|
91
|
+
if (!Array.isArray(column) || column.length === 0) {
|
92
|
+
throw new Error('Column must be a non-empty array');
|
93
|
+
}
|
94
|
+
|
95
|
+
const validValues = column.filter(val =>
|
96
|
+
typeof val === 'number' && !isNaN(val) && isFinite(val) && val > 0
|
97
|
+
);
|
98
|
+
|
99
|
+
if (validValues.length === 0) {
|
100
|
+
throw new Error('Harmonic mean requires positive numeric values');
|
101
|
+
}
|
102
|
+
|
103
|
+
const reciprocalSum = validValues.reduce((sum, val) => sum + (1 / val), 0);
|
104
|
+
return validValues.length / reciprocalSum;
|
105
|
+
}
|
106
|
+
|
107
|
+
trimmedMean(column, percentage) {
|
108
|
+
if (!Array.isArray(column) || column.length === 0) {
|
109
|
+
throw new Error('Column must be a non-empty array');
|
110
|
+
}
|
111
|
+
|
112
|
+
if (typeof percentage !== 'number' || percentage < 0 || percentage >= 50) {
|
113
|
+
throw new Error('Percentage must be between 0 and 50');
|
114
|
+
}
|
115
|
+
|
116
|
+
const validValues = column.filter(val =>
|
117
|
+
typeof val === 'number' && !isNaN(val) && isFinite(val)
|
118
|
+
);
|
119
|
+
|
120
|
+
if (validValues.length === 0) {
|
121
|
+
throw new Error('No valid numeric values found');
|
122
|
+
}
|
123
|
+
|
124
|
+
const sorted = validValues.sort((a, b) => a - b);
|
125
|
+
const trimCount = Math.floor((percentage / 100) * sorted.length);
|
126
|
+
const trimmed = sorted.slice(trimCount, sorted.length - trimCount);
|
127
|
+
|
128
|
+
if (trimmed.length === 0) {
|
129
|
+
throw new Error('Too much trimming - no values remain');
|
130
|
+
}
|
131
|
+
|
132
|
+
return trimmed.reduce((sum, val) => sum + val, 0) / trimmed.length;
|
133
|
+
}
|
134
|
+
|
135
|
+
quadraticMean(column) {
|
136
|
+
if (!Array.isArray(column) || column.length === 0) {
|
137
|
+
throw new Error('Column must be a non-empty array');
|
138
|
+
}
|
139
|
+
|
140
|
+
const validValues = column.filter(val =>
|
141
|
+
typeof val === 'number' && !isNaN(val) && isFinite(val)
|
142
|
+
);
|
143
|
+
|
144
|
+
if (validValues.length === 0) {
|
145
|
+
throw new Error('No valid numeric values found');
|
146
|
+
}
|
147
|
+
|
148
|
+
const sumOfSquares = validValues.reduce((sum, val) => sum + val * val, 0);
|
149
|
+
return Math.sqrt(sumOfSquares / validValues.length);
|
150
|
+
}
|
151
|
+
|
152
|
+
weightedMean(values, weights) {
|
153
|
+
if (!Array.isArray(values) || !Array.isArray(weights)) {
|
154
|
+
throw new Error('Values and weights must be arrays');
|
155
|
+
}
|
156
|
+
|
157
|
+
if (values.length !== weights.length) {
|
158
|
+
throw new Error('Values and weights must have the same length');
|
159
|
+
}
|
160
|
+
|
161
|
+
if (values.length === 0) {
|
162
|
+
throw new Error('Arrays must not be empty');
|
163
|
+
}
|
164
|
+
|
165
|
+
const validPairs = [];
|
166
|
+
for (let i = 0; i < values.length; i++) {
|
167
|
+
if (typeof values[i] === 'number' && typeof weights[i] === 'number' &&
|
168
|
+
!isNaN(values[i]) && !isNaN(weights[i]) &&
|
169
|
+
isFinite(values[i]) && isFinite(weights[i]) && weights[i] >= 0) {
|
170
|
+
validPairs.push({ value: values[i], weight: weights[i] });
|
171
|
+
}
|
172
|
+
}
|
173
|
+
|
174
|
+
if (validPairs.length === 0) {
|
175
|
+
throw new Error('No valid value-weight pairs found');
|
176
|
+
}
|
177
|
+
|
178
|
+
const totalWeight = validPairs.reduce((sum, pair) => sum + pair.weight, 0);
|
179
|
+
|
180
|
+
if (totalWeight === 0) {
|
181
|
+
throw new Error('Total weight cannot be zero');
|
182
|
+
}
|
183
|
+
|
184
|
+
const weightedSum = validPairs.reduce((sum, pair) => sum + pair.value * pair.weight, 0);
|
185
|
+
return weightedSum / totalWeight;
|
186
|
+
}
|
187
|
+
|
188
|
+
midrange(column) {
|
189
|
+
if (!Array.isArray(column) || column.length === 0) {
|
190
|
+
throw new Error('Column must be a non-empty array');
|
191
|
+
}
|
192
|
+
|
193
|
+
const validValues = column.filter(val =>
|
194
|
+
typeof val === 'number' && !isNaN(val) && isFinite(val)
|
195
|
+
);
|
196
|
+
|
197
|
+
if (validValues.length === 0) {
|
198
|
+
throw new Error('No valid numeric values found');
|
199
|
+
}
|
200
|
+
|
201
|
+
const min = Math.min(...validValues);
|
202
|
+
const max = Math.max(...validValues);
|
203
|
+
|
204
|
+
return (min + max) / 2;
|
205
|
+
}
|
206
|
+
}
|
207
|
+
|
208
|
+
export default CentralTendency;
|
@@ -0,0 +1,273 @@
|
|
1
|
+
class Dispersion {
|
2
|
+
variance(column, sample = true) {
|
3
|
+
if (!Array.isArray(column) || column.length === 0) {
|
4
|
+
throw new Error('Column must be a non-empty array');
|
5
|
+
}
|
6
|
+
|
7
|
+
const validValues = column.filter(val =>
|
8
|
+
typeof val === 'number' && !isNaN(val) && isFinite(val)
|
9
|
+
);
|
10
|
+
|
11
|
+
if (validValues.length === 0) {
|
12
|
+
throw new Error('No valid numeric values found');
|
13
|
+
}
|
14
|
+
|
15
|
+
if (sample && validValues.length < 2) {
|
16
|
+
throw new Error('Sample variance requires at least 2 values');
|
17
|
+
}
|
18
|
+
|
19
|
+
const mean = validValues.reduce((sum, val) => sum + val, 0) / validValues.length;
|
20
|
+
const sumSquaredDiff = validValues.reduce((sum, val) => sum + Math.pow(val - mean, 2), 0);
|
21
|
+
|
22
|
+
const denominator = sample ? validValues.length - 1 : validValues.length;
|
23
|
+
return sumSquaredDiff / denominator;
|
24
|
+
}
|
25
|
+
|
26
|
+
standardDeviation(column, sample = true) {
|
27
|
+
return Math.sqrt(this.variance(column, sample));
|
28
|
+
}
|
29
|
+
|
30
|
+
range(column) {
|
31
|
+
if (!Array.isArray(column) || column.length === 0) {
|
32
|
+
throw new Error('Column must be a non-empty array');
|
33
|
+
}
|
34
|
+
|
35
|
+
const validValues = column.filter(val =>
|
36
|
+
typeof val === 'number' && !isNaN(val) && isFinite(val)
|
37
|
+
);
|
38
|
+
|
39
|
+
if (validValues.length === 0) {
|
40
|
+
throw new Error('No valid numeric values found');
|
41
|
+
}
|
42
|
+
|
43
|
+
const min = Math.min(...validValues);
|
44
|
+
const max = Math.max(...validValues);
|
45
|
+
|
46
|
+
return {
|
47
|
+
range: max - min,
|
48
|
+
min: min,
|
49
|
+
max: max
|
50
|
+
};
|
51
|
+
}
|
52
|
+
|
53
|
+
interquartileRange(column) {
|
54
|
+
if (!Array.isArray(column) || column.length === 0) {
|
55
|
+
throw new Error('Column must be a non-empty array');
|
56
|
+
}
|
57
|
+
|
58
|
+
const validValues = column.filter(val =>
|
59
|
+
typeof val === 'number' && !isNaN(val) && isFinite(val)
|
60
|
+
);
|
61
|
+
|
62
|
+
if (validValues.length === 0) {
|
63
|
+
throw new Error('No valid numeric values found');
|
64
|
+
}
|
65
|
+
|
66
|
+
const sorted = validValues.sort((a, b) => a - b);
|
67
|
+
const q1 = this.quantile(sorted, 0.25);
|
68
|
+
const q3 = this.quantile(sorted, 0.75);
|
69
|
+
|
70
|
+
return {
|
71
|
+
iqr: q3 - q1,
|
72
|
+
q1: q1,
|
73
|
+
q3: q3
|
74
|
+
};
|
75
|
+
}
|
76
|
+
|
77
|
+
quantile(sortedArray, q) {
|
78
|
+
const index = (sortedArray.length - 1) * q;
|
79
|
+
const lower = Math.floor(index);
|
80
|
+
const upper = Math.ceil(index);
|
81
|
+
const weight = index % 1;
|
82
|
+
|
83
|
+
if (lower === upper) {
|
84
|
+
return sortedArray[lower];
|
85
|
+
}
|
86
|
+
|
87
|
+
return sortedArray[lower] * (1 - weight) + sortedArray[upper] * weight;
|
88
|
+
}
|
89
|
+
|
90
|
+
coefficientOfVariation(column) {
|
91
|
+
if (!Array.isArray(column) || column.length === 0) {
|
92
|
+
throw new Error('Column must be a non-empty array');
|
93
|
+
}
|
94
|
+
|
95
|
+
const validValues = column.filter(val =>
|
96
|
+
typeof val === 'number' && !isNaN(val) && isFinite(val)
|
97
|
+
);
|
98
|
+
|
99
|
+
if (validValues.length === 0) {
|
100
|
+
throw new Error('No valid numeric values found');
|
101
|
+
}
|
102
|
+
|
103
|
+
const mean = validValues.reduce((sum, val) => sum + val, 0) / validValues.length;
|
104
|
+
|
105
|
+
if (mean === 0) {
|
106
|
+
throw new Error('Cannot calculate coefficient of variation when mean is zero');
|
107
|
+
}
|
108
|
+
|
109
|
+
const std = this.standardDeviation(column);
|
110
|
+
return {
|
111
|
+
cv: std / Math.abs(mean),
|
112
|
+
cvPercent: (std / Math.abs(mean)) * 100
|
113
|
+
};
|
114
|
+
}
|
115
|
+
|
116
|
+
meanAbsoluteDeviation(column) {
|
117
|
+
if (!Array.isArray(column) || column.length === 0) {
|
118
|
+
throw new Error('Column must be a non-empty array');
|
119
|
+
}
|
120
|
+
|
121
|
+
const validValues = column.filter(val =>
|
122
|
+
typeof val === 'number' && !isNaN(val) && isFinite(val)
|
123
|
+
);
|
124
|
+
|
125
|
+
if (validValues.length === 0) {
|
126
|
+
throw new Error('No valid numeric values found');
|
127
|
+
}
|
128
|
+
|
129
|
+
const mean = validValues.reduce((sum, val) => sum + val, 0) / validValues.length;
|
130
|
+
const sumAbsDiff = validValues.reduce((sum, val) => sum + Math.abs(val - mean), 0);
|
131
|
+
|
132
|
+
return {
|
133
|
+
mad: sumAbsDiff / validValues.length,
|
134
|
+
mean: mean
|
135
|
+
};
|
136
|
+
}
|
137
|
+
|
138
|
+
medianAbsoluteDeviation(column) {
|
139
|
+
if (!Array.isArray(column) || column.length === 0) {
|
140
|
+
throw new Error('Column must be a non-empty array');
|
141
|
+
}
|
142
|
+
|
143
|
+
const validValues = column.filter(val =>
|
144
|
+
typeof val === 'number' && !isNaN(val) && isFinite(val)
|
145
|
+
);
|
146
|
+
|
147
|
+
if (validValues.length === 0) {
|
148
|
+
throw new Error('No valid numeric values found');
|
149
|
+
}
|
150
|
+
|
151
|
+
const sorted = validValues.sort((a, b) => a - b);
|
152
|
+
const median = this.quantile(sorted, 0.5);
|
153
|
+
const deviations = validValues.map(val => Math.abs(val - median));
|
154
|
+
const sortedDeviations = deviations.sort((a, b) => a - b);
|
155
|
+
|
156
|
+
return {
|
157
|
+
mad: this.quantile(sortedDeviations, 0.5),
|
158
|
+
median: median
|
159
|
+
};
|
160
|
+
}
|
161
|
+
|
162
|
+
standardError(column) {
|
163
|
+
if (!Array.isArray(column) || column.length === 0) {
|
164
|
+
throw new Error('Column must be a non-empty array');
|
165
|
+
}
|
166
|
+
|
167
|
+
const validValues = column.filter(val =>
|
168
|
+
typeof val === 'number' && !isNaN(val) && isFinite(val)
|
169
|
+
);
|
170
|
+
|
171
|
+
if (validValues.length === 0) {
|
172
|
+
throw new Error('No valid numeric values found');
|
173
|
+
}
|
174
|
+
|
175
|
+
const std = this.standardDeviation(column);
|
176
|
+
return std / Math.sqrt(validValues.length);
|
177
|
+
}
|
178
|
+
|
179
|
+
quartileCoefficient(column) {
|
180
|
+
const iqrResult = this.interquartileRange(column);
|
181
|
+
const q1 = iqrResult.q1;
|
182
|
+
const q3 = iqrResult.q3;
|
183
|
+
|
184
|
+
if (q1 + q3 === 0) {
|
185
|
+
throw new Error('Cannot calculate quartile coefficient when Q1 + Q3 = 0');
|
186
|
+
}
|
187
|
+
|
188
|
+
return (q3 - q1) / (q3 + q1);
|
189
|
+
}
|
190
|
+
|
191
|
+
percentileRange(column, lowerPercentile, upperPercentile) {
|
192
|
+
if (lowerPercentile >= upperPercentile) {
|
193
|
+
throw new Error('Lower percentile must be less than upper percentile');
|
194
|
+
}
|
195
|
+
|
196
|
+
if (lowerPercentile < 0 || upperPercentile > 100) {
|
197
|
+
throw new Error('Percentiles must be between 0 and 100');
|
198
|
+
}
|
199
|
+
|
200
|
+
const validValues = column.filter(val =>
|
201
|
+
typeof val === 'number' && !isNaN(val) && isFinite(val)
|
202
|
+
);
|
203
|
+
|
204
|
+
if (validValues.length === 0) {
|
205
|
+
throw new Error('No valid numeric values found');
|
206
|
+
}
|
207
|
+
|
208
|
+
const sorted = validValues.sort((a, b) => a - b);
|
209
|
+
const lowerValue = this.quantile(sorted, lowerPercentile / 100);
|
210
|
+
const upperValue = this.quantile(sorted, upperPercentile / 100);
|
211
|
+
|
212
|
+
return {
|
213
|
+
range: upperValue - lowerValue,
|
214
|
+
lowerValue: lowerValue,
|
215
|
+
upperValue: upperValue,
|
216
|
+
lowerPercentile: lowerPercentile,
|
217
|
+
upperPercentile: upperPercentile
|
218
|
+
};
|
219
|
+
}
|
220
|
+
|
221
|
+
giniCoefficient(column) {
|
222
|
+
if (!Array.isArray(column) || column.length === 0) {
|
223
|
+
throw new Error('Column must be a non-empty array');
|
224
|
+
}
|
225
|
+
|
226
|
+
const validValues = column.filter(val =>
|
227
|
+
typeof val === 'number' && !isNaN(val) && isFinite(val) && val >= 0
|
228
|
+
);
|
229
|
+
|
230
|
+
if (validValues.length === 0) {
|
231
|
+
throw new Error('Gini coefficient requires non-negative numeric values');
|
232
|
+
}
|
233
|
+
|
234
|
+
const sorted = validValues.sort((a, b) => a - b);
|
235
|
+
const n = sorted.length;
|
236
|
+
const mean = sorted.reduce((sum, val) => sum + val, 0) / n;
|
237
|
+
|
238
|
+
if (mean === 0) {
|
239
|
+
return 0;
|
240
|
+
}
|
241
|
+
|
242
|
+
let numerator = 0;
|
243
|
+
for (let i = 0; i < n; i++) {
|
244
|
+
for (let j = 0; j < n; j++) {
|
245
|
+
numerator += Math.abs(sorted[i] - sorted[j]);
|
246
|
+
}
|
247
|
+
}
|
248
|
+
|
249
|
+
return numerator / (2 * n * n * mean);
|
250
|
+
}
|
251
|
+
|
252
|
+
robustScale(column) {
|
253
|
+
const iqrResult = this.interquartileRange(column);
|
254
|
+
const median = this.quantile(column.filter(val =>
|
255
|
+
typeof val === 'number' && !isNaN(val) && isFinite(val)
|
256
|
+
).sort((a, b) => a - b), 0.5);
|
257
|
+
|
258
|
+
const scaled = column.map(val => {
|
259
|
+
if (typeof val === 'number' && !isNaN(val) && isFinite(val)) {
|
260
|
+
return iqrResult.iqr !== 0 ? (val - median) / iqrResult.iqr : 0;
|
261
|
+
}
|
262
|
+
return null;
|
263
|
+
});
|
264
|
+
|
265
|
+
return {
|
266
|
+
scaledValues: scaled,
|
267
|
+
median: median,
|
268
|
+
iqr: iqrResult.iqr
|
269
|
+
};
|
270
|
+
}
|
271
|
+
}
|
272
|
+
|
273
|
+
export default Dispersion;
|