datly 0.0.2 → 0.0.3
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/README.MD +1773 -2386
- package/dist/datly.cjs +1 -1
- package/dist/datly.mjs +1 -1
- package/dist/datly.umd.js +1 -1
- package/package.json +3 -3
- package/src/code.js +2466 -0
- package/src/index.js +236 -480
- package/src/plot.js +609 -0
- package/src/core/dataLoader.js +0 -407
- package/src/core/utils.js +0 -306
- package/src/core/validator.js +0 -205
- package/src/dataviz/index.js +0 -1566
- package/src/descriptive/centralTendency.js +0 -208
- package/src/descriptive/dispersion.js +0 -273
- package/src/descriptive/position.js +0 -268
- package/src/descriptive/shape.js +0 -336
- package/src/inferential/confidenceIntervals.js +0 -561
- package/src/inferential/hypothesisTesting.js +0 -527
- package/src/inferential/normalityTests.js +0 -587
- package/src/insights/autoAnalyser.js +0 -685
- package/src/insights/interpreter.js +0 -543
- package/src/insights/patternDetector.js +0 -897
- package/src/insights/reportGenerator.js +0 -1072
- package/src/ml/ClassificationMetrics.js +0 -336
- package/src/ml/DecisionTree.js +0 -412
- package/src/ml/KNearestNeighbors.js +0 -317
- package/src/ml/LinearRegression.js +0 -179
- package/src/ml/LogisticRegression.js +0 -396
- package/src/ml/MachineLearning.js +0 -490
- package/src/ml/NaiveBayes.js +0 -296
- package/src/ml/RandomForest.js +0 -323
- package/src/ml/SupportVectorMachine.js +0 -299
- package/src/ml/baseModel.js +0 -106
- package/src/multivariate/correlation.js +0 -653
- package/src/multivariate/regression.js +0 -660
package/src/core/dataLoader.js
DELETED
@@ -1,407 +0,0 @@
|
|
1
|
-
// Detecção de ambiente
|
2
|
-
const isNode = typeof process !== 'undefined' &&
|
3
|
-
process.versions != null &&
|
4
|
-
process.versions.node != null;
|
5
|
-
|
6
|
-
// Lazy load do fs
|
7
|
-
let fs = null;
|
8
|
-
let fsPromise = null;
|
9
|
-
|
10
|
-
async function getFS() {
|
11
|
-
if (!isNode) return null;
|
12
|
-
if (fs) return fs;
|
13
|
-
if (fsPromise) return fsPromise;
|
14
|
-
|
15
|
-
fsPromise = import('fs').then(m => {
|
16
|
-
fs = m.default || m;
|
17
|
-
return fs;
|
18
|
-
}).catch(() => null);
|
19
|
-
|
20
|
-
return fsPromise;
|
21
|
-
}
|
22
|
-
|
23
|
-
class DataLoader {
|
24
|
-
async loadCSV(filePath, options = {}) {
|
25
|
-
const defaultOptions = {
|
26
|
-
delimiter: ',',
|
27
|
-
header: true,
|
28
|
-
skipEmptyLines: true,
|
29
|
-
encoding: 'utf8'
|
30
|
-
};
|
31
|
-
|
32
|
-
const config = { ...defaultOptions, ...options };
|
33
|
-
|
34
|
-
try {
|
35
|
-
let content;
|
36
|
-
|
37
|
-
// Node.js
|
38
|
-
if (isNode) {
|
39
|
-
const fsModule = await getFS();
|
40
|
-
if (fsModule) {
|
41
|
-
content = fsModule.readFileSync(filePath, { encoding: config.encoding });
|
42
|
-
}
|
43
|
-
}
|
44
|
-
// Browser com File System Access API
|
45
|
-
if (!content && typeof window !== 'undefined' && window.fs) {
|
46
|
-
content = window.fs.readFileSync
|
47
|
-
? window.fs.readFileSync(filePath, { encoding: config.encoding })
|
48
|
-
: await window.fs.readFile(filePath, { encoding: config.encoding });
|
49
|
-
}
|
50
|
-
// Fetch API (browser)
|
51
|
-
if (!content && typeof fetch !== 'undefined') {
|
52
|
-
const response = await fetch(filePath);
|
53
|
-
if (!response.ok) {
|
54
|
-
throw new Error(`HTTP error! status: ${response.status}`);
|
55
|
-
}
|
56
|
-
content = await response.text();
|
57
|
-
}
|
58
|
-
|
59
|
-
if (!content) {
|
60
|
-
throw new Error('No file system available. Use fetch, fs module, or pass CSV text directly to parseCSV()');
|
61
|
-
}
|
62
|
-
|
63
|
-
return this.parseCSV(content, config);
|
64
|
-
} catch (error) {
|
65
|
-
throw new Error(`Failed to load CSV: ${error.message}`);
|
66
|
-
}
|
67
|
-
}
|
68
|
-
|
69
|
-
async loadJSON(jsonInput, options = {}) {
|
70
|
-
const defaultOptions = {
|
71
|
-
validateTypes: true,
|
72
|
-
autoInferHeaders: true
|
73
|
-
};
|
74
|
-
|
75
|
-
const config = { ...defaultOptions, ...options };
|
76
|
-
|
77
|
-
try {
|
78
|
-
let jsonData;
|
79
|
-
|
80
|
-
// Se é uma string que parece ser um caminho de arquivo
|
81
|
-
if (typeof jsonInput === 'string' && !jsonInput.trim().startsWith('{') && !jsonInput.trim().startsWith('[')) {
|
82
|
-
let content;
|
83
|
-
|
84
|
-
// Node.js
|
85
|
-
if (isNode) {
|
86
|
-
const fsModule = await getFS();
|
87
|
-
if (fsModule) {
|
88
|
-
content = fsModule.readFileSync(jsonInput, { encoding: 'utf8' });
|
89
|
-
}
|
90
|
-
}
|
91
|
-
// Browser com File System Access API
|
92
|
-
if (!content && typeof window !== 'undefined' && window.fs) {
|
93
|
-
content = window.fs.readFileSync
|
94
|
-
? window.fs.readFileSync(jsonInput, { encoding: 'utf8' })
|
95
|
-
: await window.fs.readFile(jsonInput, { encoding: 'utf8' });
|
96
|
-
}
|
97
|
-
// Fetch API (browser)
|
98
|
-
if (!content && typeof fetch !== 'undefined') {
|
99
|
-
const response = await fetch(jsonInput);
|
100
|
-
if (!response.ok) {
|
101
|
-
throw new Error(`HTTP error! status: ${response.status}`);
|
102
|
-
}
|
103
|
-
content = await response.text();
|
104
|
-
}
|
105
|
-
|
106
|
-
if (!content) {
|
107
|
-
throw new Error('No file system available');
|
108
|
-
}
|
109
|
-
|
110
|
-
jsonData = JSON.parse(content);
|
111
|
-
}
|
112
|
-
// String JSON
|
113
|
-
else if (typeof jsonInput === 'string') {
|
114
|
-
jsonData = JSON.parse(jsonInput);
|
115
|
-
}
|
116
|
-
// Objeto JS
|
117
|
-
else if (typeof jsonInput === 'object') {
|
118
|
-
jsonData = jsonInput;
|
119
|
-
}
|
120
|
-
else {
|
121
|
-
throw new Error('Invalid JSON input: must be string, file path, or object');
|
122
|
-
}
|
123
|
-
|
124
|
-
return this.parseJSON(jsonData, config);
|
125
|
-
} catch (error) {
|
126
|
-
throw new Error(`Failed to load JSON: ${error.message}`);
|
127
|
-
}
|
128
|
-
}
|
129
|
-
|
130
|
-
parseJSON(jsonData, config) {
|
131
|
-
if (!jsonData) {
|
132
|
-
throw new Error('JSON data is empty or null');
|
133
|
-
}
|
134
|
-
|
135
|
-
if (Array.isArray(jsonData)) {
|
136
|
-
return this.parseJSONArray(jsonData, config);
|
137
|
-
} else if (jsonData.headers && jsonData.data) {
|
138
|
-
return this.parseStructuredJSON(jsonData, config);
|
139
|
-
} else if (typeof jsonData === 'object') {
|
140
|
-
return this.parseJSONObject(jsonData, config);
|
141
|
-
} else {
|
142
|
-
throw new Error('Unsupported JSON format');
|
143
|
-
}
|
144
|
-
}
|
145
|
-
|
146
|
-
parseJSONArray(jsonArray, config) {
|
147
|
-
if (jsonArray.length === 0) {
|
148
|
-
throw new Error('JSON array is empty');
|
149
|
-
}
|
150
|
-
|
151
|
-
const firstRow = jsonArray[0];
|
152
|
-
if (typeof firstRow !== 'object' || firstRow === null) {
|
153
|
-
throw new Error('JSON array must contain objects');
|
154
|
-
}
|
155
|
-
|
156
|
-
let headers;
|
157
|
-
if (config.autoInferHeaders) {
|
158
|
-
const allKeys = new Set();
|
159
|
-
jsonArray.forEach(row => {
|
160
|
-
if (typeof row === 'object' && row !== null) {
|
161
|
-
Object.keys(row).forEach(key => allKeys.add(key));
|
162
|
-
}
|
163
|
-
});
|
164
|
-
headers = Array.from(allKeys);
|
165
|
-
} else {
|
166
|
-
headers = Object.keys(firstRow);
|
167
|
-
}
|
168
|
-
|
169
|
-
const data = jsonArray.map((row, index) => {
|
170
|
-
if (typeof row !== 'object' || row === null) {
|
171
|
-
console.warn(`Row ${index} is not an object, skipping`);
|
172
|
-
return null;
|
173
|
-
}
|
174
|
-
|
175
|
-
const processedRow = {};
|
176
|
-
headers.forEach(header => {
|
177
|
-
let value = row[header];
|
178
|
-
if (config.validateTypes) {
|
179
|
-
value = this.inferType(value);
|
180
|
-
}
|
181
|
-
processedRow[header] = value;
|
182
|
-
});
|
183
|
-
return processedRow;
|
184
|
-
}).filter(row => row !== null);
|
185
|
-
|
186
|
-
return {
|
187
|
-
headers,
|
188
|
-
data,
|
189
|
-
length: data.length,
|
190
|
-
columns: headers.length,
|
191
|
-
source: 'json_array'
|
192
|
-
};
|
193
|
-
}
|
194
|
-
|
195
|
-
parseStructuredJSON(jsonData, config) {
|
196
|
-
const { headers, data } = jsonData;
|
197
|
-
|
198
|
-
if (!Array.isArray(headers)) {
|
199
|
-
throw new Error('Headers must be an array');
|
200
|
-
}
|
201
|
-
|
202
|
-
if (!Array.isArray(data)) {
|
203
|
-
throw new Error('Data must be an array');
|
204
|
-
}
|
205
|
-
|
206
|
-
if (headers.length === 0) {
|
207
|
-
throw new Error('Headers array is empty');
|
208
|
-
}
|
209
|
-
|
210
|
-
const processedData = data.map((row, index) => {
|
211
|
-
if (Array.isArray(row)) {
|
212
|
-
const processedRow = {};
|
213
|
-
headers.forEach((header, i) => {
|
214
|
-
let value = i < row.length ? row[i] : null;
|
215
|
-
if (config.validateTypes) {
|
216
|
-
value = this.inferType(value);
|
217
|
-
}
|
218
|
-
processedRow[header] = value;
|
219
|
-
});
|
220
|
-
return processedRow;
|
221
|
-
} else if (typeof row === 'object' && row !== null) {
|
222
|
-
const processedRow = {};
|
223
|
-
headers.forEach(header => {
|
224
|
-
let value = row[header];
|
225
|
-
if (config.validateTypes) {
|
226
|
-
value = this.inferType(value);
|
227
|
-
}
|
228
|
-
processedRow[header] = value;
|
229
|
-
});
|
230
|
-
return processedRow;
|
231
|
-
} else {
|
232
|
-
console.warn(`Row ${index} has invalid format, skipping`);
|
233
|
-
return null;
|
234
|
-
}
|
235
|
-
}).filter(row => row !== null);
|
236
|
-
|
237
|
-
return {
|
238
|
-
headers,
|
239
|
-
data: processedData,
|
240
|
-
length: processedData.length,
|
241
|
-
columns: headers.length,
|
242
|
-
source: 'structured_json'
|
243
|
-
};
|
244
|
-
}
|
245
|
-
|
246
|
-
parseJSONObject(jsonObject, config) {
|
247
|
-
const entries = Object.entries(jsonObject);
|
248
|
-
if (entries.length === 0) {
|
249
|
-
throw new Error('JSON object is empty');
|
250
|
-
}
|
251
|
-
|
252
|
-
const headers = ['key', 'value'];
|
253
|
-
const data = entries.map(([key, value]) => ({
|
254
|
-
key: key,
|
255
|
-
value: config.validateTypes ? this.inferType(value) : value
|
256
|
-
}));
|
257
|
-
|
258
|
-
return {
|
259
|
-
headers,
|
260
|
-
data,
|
261
|
-
length: data.length,
|
262
|
-
columns: 2,
|
263
|
-
source: 'json_object'
|
264
|
-
};
|
265
|
-
}
|
266
|
-
|
267
|
-
parseCSV(content, options) {
|
268
|
-
const lines = content.split('\n').filter(line =>
|
269
|
-
options.skipEmptyLines ? line.trim() !== '' : true
|
270
|
-
);
|
271
|
-
|
272
|
-
if (lines.length === 0) {
|
273
|
-
throw new Error('CSV file is empty');
|
274
|
-
}
|
275
|
-
|
276
|
-
const headers = options.header
|
277
|
-
? lines[0].split(options.delimiter).map(h => h.trim().replace(/['"]/g, ''))
|
278
|
-
: Array.from({ length: lines[0].split(options.delimiter).length }, (_, i) => `col_${i}`);
|
279
|
-
|
280
|
-
const startIndex = options.header ? 1 : 0;
|
281
|
-
const data = [];
|
282
|
-
|
283
|
-
for (let i = startIndex; i < lines.length; i++) {
|
284
|
-
const values = lines[i].split(options.delimiter);
|
285
|
-
if (values.length === headers.length) {
|
286
|
-
const row = {};
|
287
|
-
headers.forEach((header, index) => {
|
288
|
-
let value = values[index].trim().replace(/['"]/g, '');
|
289
|
-
row[header] = this.inferType(value);
|
290
|
-
});
|
291
|
-
data.push(row);
|
292
|
-
}
|
293
|
-
}
|
294
|
-
|
295
|
-
return {
|
296
|
-
headers,
|
297
|
-
data,
|
298
|
-
length: data.length,
|
299
|
-
columns: headers.length
|
300
|
-
};
|
301
|
-
}
|
302
|
-
|
303
|
-
inferType(value) {
|
304
|
-
if (value === '' || value === 'null' || value === 'NULL' || value === 'NaN') {
|
305
|
-
return null;
|
306
|
-
}
|
307
|
-
|
308
|
-
if (value === 'true' || value === 'TRUE') return true;
|
309
|
-
if (value === 'false' || value === 'FALSE') return false;
|
310
|
-
|
311
|
-
if (/^-?\d+$/.test(value)) {
|
312
|
-
return parseInt(value, 10);
|
313
|
-
}
|
314
|
-
|
315
|
-
if (/^-?\d*\.\d+$/.test(value)) {
|
316
|
-
return parseFloat(value);
|
317
|
-
}
|
318
|
-
|
319
|
-
return value;
|
320
|
-
}
|
321
|
-
|
322
|
-
cleanData(dataset) {
|
323
|
-
const cleaned = {
|
324
|
-
...dataset,
|
325
|
-
data: dataset.data.filter(row => {
|
326
|
-
return Object.values(row).some(value => value !== null && value !== undefined);
|
327
|
-
})
|
328
|
-
};
|
329
|
-
|
330
|
-
cleaned.length = cleaned.data.length;
|
331
|
-
return cleaned;
|
332
|
-
}
|
333
|
-
|
334
|
-
getDataInfo(dataset) {
|
335
|
-
const info = {
|
336
|
-
rows: dataset.length,
|
337
|
-
columns: dataset.columns,
|
338
|
-
headers: dataset.headers,
|
339
|
-
types: {},
|
340
|
-
nullCounts: {},
|
341
|
-
uniqueCounts: {}
|
342
|
-
};
|
343
|
-
|
344
|
-
dataset.headers.forEach(header => {
|
345
|
-
const column = dataset.data.map(row => row[header]);
|
346
|
-
const nonNullValues = column.filter(val => val !== null && val !== undefined);
|
347
|
-
const types = [...new Set(nonNullValues.map(val => typeof val))];
|
348
|
-
|
349
|
-
info.types[header] = types.length === 1 ? types[0] : 'mixed';
|
350
|
-
info.nullCounts[header] = column.length - nonNullValues.length;
|
351
|
-
info.uniqueCounts[header] = new Set(nonNullValues).size;
|
352
|
-
});
|
353
|
-
|
354
|
-
return info;
|
355
|
-
}
|
356
|
-
|
357
|
-
getColumn(dataset, columnName) {
|
358
|
-
if (!dataset.headers.includes(columnName)) {
|
359
|
-
throw new Error(`Column '${columnName}' not found`);
|
360
|
-
}
|
361
|
-
|
362
|
-
return dataset.data
|
363
|
-
.map(row => row[columnName])
|
364
|
-
.filter(val => val !== null && val !== undefined && !isNaN(val));
|
365
|
-
}
|
366
|
-
|
367
|
-
getColumns(dataset, columnNames) {
|
368
|
-
const result = {};
|
369
|
-
columnNames.forEach(name => {
|
370
|
-
result[name] = this.getColumn(dataset, name);
|
371
|
-
});
|
372
|
-
return result;
|
373
|
-
}
|
374
|
-
|
375
|
-
filterRows(dataset, condition) {
|
376
|
-
return {
|
377
|
-
...dataset,
|
378
|
-
data: dataset.data.filter(condition),
|
379
|
-
length: dataset.data.filter(condition).length
|
380
|
-
};
|
381
|
-
}
|
382
|
-
|
383
|
-
sortBy(dataset, columnName, order = 'asc') {
|
384
|
-
const sortedData = [...dataset.data].sort((a, b) => {
|
385
|
-
const aVal = a[columnName];
|
386
|
-
const bVal = b[columnName];
|
387
|
-
|
388
|
-
if (aVal === null || aVal === undefined) return 1;
|
389
|
-
if (bVal === null || bVal === undefined) return -1;
|
390
|
-
|
391
|
-
if (typeof aVal === 'string' && typeof bVal === 'string') {
|
392
|
-
return order === 'asc'
|
393
|
-
? aVal.localeCompare(bVal)
|
394
|
-
: bVal.localeCompare(aVal);
|
395
|
-
}
|
396
|
-
|
397
|
-
return order === 'asc' ? aVal - bVal : bVal - aVal;
|
398
|
-
});
|
399
|
-
|
400
|
-
return {
|
401
|
-
...dataset,
|
402
|
-
data: sortedData
|
403
|
-
};
|
404
|
-
}
|
405
|
-
}
|
406
|
-
|
407
|
-
export default DataLoader;
|
package/src/core/utils.js
DELETED
@@ -1,306 +0,0 @@
|
|
1
|
-
class Utils {
|
2
|
-
detectOutliers(column, method = 'iqr') {
|
3
|
-
const sortedData = [...column].sort((a, b) => a - b);
|
4
|
-
const outliers = [];
|
5
|
-
const indices = [];
|
6
|
-
|
7
|
-
switch (method) {
|
8
|
-
case 'iqr':
|
9
|
-
const q1 = this.quantile(sortedData, 0.25);
|
10
|
-
const q3 = this.quantile(sortedData, 0.75);
|
11
|
-
const iqr = q3 - q1;
|
12
|
-
const lowerBound = q1 - 1.5 * iqr;
|
13
|
-
const upperBound = q3 + 1.5 * iqr;
|
14
|
-
|
15
|
-
column.forEach((value, index) => {
|
16
|
-
if (value < lowerBound || value > upperBound) {
|
17
|
-
outliers.push(value);
|
18
|
-
indices.push(index);
|
19
|
-
}
|
20
|
-
});
|
21
|
-
break;
|
22
|
-
|
23
|
-
case 'zscore':
|
24
|
-
const mean = this.mean(column);
|
25
|
-
const std = this.standardDeviation(column);
|
26
|
-
|
27
|
-
column.forEach((value, index) => {
|
28
|
-
const zscore = Math.abs((value - mean) / std);
|
29
|
-
if (zscore > 3) {
|
30
|
-
outliers.push(value);
|
31
|
-
indices.push(index);
|
32
|
-
}
|
33
|
-
});
|
34
|
-
break;
|
35
|
-
|
36
|
-
case 'modified_zscore':
|
37
|
-
const median = this.median(column);
|
38
|
-
const deviations = column.map(x => Math.abs(x - median));
|
39
|
-
const mad = this.median(deviations);
|
40
|
-
|
41
|
-
column.forEach((value, index) => {
|
42
|
-
const modifiedZScore = 0.6745 * (value - median) / mad;
|
43
|
-
if (Math.abs(modifiedZScore) > 3.5) {
|
44
|
-
outliers.push(value);
|
45
|
-
indices.push(index);
|
46
|
-
}
|
47
|
-
});
|
48
|
-
break;
|
49
|
-
|
50
|
-
default:
|
51
|
-
throw new Error(`Unknown outlier detection method: ${method}`);
|
52
|
-
}
|
53
|
-
|
54
|
-
return {
|
55
|
-
outliers,
|
56
|
-
indices,
|
57
|
-
count: outliers.length,
|
58
|
-
percentage: (outliers.length / column.length) * 100
|
59
|
-
};
|
60
|
-
}
|
61
|
-
|
62
|
-
frequencyTable(column) {
|
63
|
-
const frequencies = {};
|
64
|
-
const total = column.length;
|
65
|
-
|
66
|
-
column.forEach(value => {
|
67
|
-
const key = value === null || value === undefined ? 'null' : String(value);
|
68
|
-
frequencies[key] = (frequencies[key] || 0) + 1;
|
69
|
-
});
|
70
|
-
|
71
|
-
const result = Object.entries(frequencies).map(([value, count]) => ({
|
72
|
-
value: value === 'null' ? null : value,
|
73
|
-
frequency: count,
|
74
|
-
relativeFrequency: count / total,
|
75
|
-
percentage: (count / total) * 100
|
76
|
-
}));
|
77
|
-
|
78
|
-
return result.sort((a, b) => b.frequency - a.frequency);
|
79
|
-
}
|
80
|
-
|
81
|
-
groupBy(dataset, column, aggregation) {
|
82
|
-
const groups = {};
|
83
|
-
|
84
|
-
dataset.data.forEach(row => {
|
85
|
-
const key = row[column];
|
86
|
-
if (!groups[key]) {
|
87
|
-
groups[key] = [];
|
88
|
-
}
|
89
|
-
groups[key].push(row);
|
90
|
-
});
|
91
|
-
|
92
|
-
const result = {};
|
93
|
-
Object.entries(groups).forEach(([key, rows]) => {
|
94
|
-
result[key] = {
|
95
|
-
count: rows.length,
|
96
|
-
data: rows
|
97
|
-
};
|
98
|
-
|
99
|
-
if (aggregation && typeof aggregation === 'object') {
|
100
|
-
Object.entries(aggregation).forEach(([targetCol, func]) => {
|
101
|
-
const values = rows.map(row => row[targetCol]).filter(v =>
|
102
|
-
typeof v === 'number' && !isNaN(v)
|
103
|
-
);
|
104
|
-
|
105
|
-
if (values.length > 0) {
|
106
|
-
result[key][`${func}_${targetCol}`] = this.applyAggregation(values, func);
|
107
|
-
}
|
108
|
-
});
|
109
|
-
}
|
110
|
-
});
|
111
|
-
|
112
|
-
return result;
|
113
|
-
}
|
114
|
-
|
115
|
-
applyAggregation(values, func) {
|
116
|
-
switch (func) {
|
117
|
-
case 'mean': return this.mean(values);
|
118
|
-
case 'median': return this.median(values);
|
119
|
-
case 'sum': return values.reduce((a, b) => a + b, 0);
|
120
|
-
case 'min': return Math.min(...values);
|
121
|
-
case 'max': return Math.max(...values);
|
122
|
-
case 'std': return this.standardDeviation(values);
|
123
|
-
case 'var': return this.variance(values);
|
124
|
-
case 'count': return values.length;
|
125
|
-
default: throw new Error(`Unknown aggregation function: ${func}`);
|
126
|
-
}
|
127
|
-
}
|
128
|
-
|
129
|
-
sample(dataset, size, method = 'random') {
|
130
|
-
if (size >= dataset.length) {
|
131
|
-
return { ...dataset };
|
132
|
-
}
|
133
|
-
|
134
|
-
let sampledData;
|
135
|
-
|
136
|
-
switch (method) {
|
137
|
-
case 'random':
|
138
|
-
const indices = this.randomSample(dataset.length, size);
|
139
|
-
sampledData = indices.map(i => dataset.data[i]);
|
140
|
-
break;
|
141
|
-
|
142
|
-
case 'systematic':
|
143
|
-
const interval = Math.floor(dataset.length / size);
|
144
|
-
sampledData = [];
|
145
|
-
for (let i = 0; i < size; i++) {
|
146
|
-
sampledData.push(dataset.data[i * interval]);
|
147
|
-
}
|
148
|
-
break;
|
149
|
-
|
150
|
-
case 'first':
|
151
|
-
sampledData = dataset.data.slice(0, size);
|
152
|
-
break;
|
153
|
-
|
154
|
-
case 'last':
|
155
|
-
sampledData = dataset.data.slice(-size);
|
156
|
-
break;
|
157
|
-
|
158
|
-
default:
|
159
|
-
throw new Error(`Unknown sampling method: ${method}`);
|
160
|
-
}
|
161
|
-
|
162
|
-
return {
|
163
|
-
...dataset,
|
164
|
-
data: sampledData,
|
165
|
-
length: sampledData.length
|
166
|
-
};
|
167
|
-
}
|
168
|
-
|
169
|
-
randomSample(populationSize, sampleSize) {
|
170
|
-
const indices = Array.from({ length: populationSize }, (_, i) => i);
|
171
|
-
const sample = [];
|
172
|
-
|
173
|
-
for (let i = 0; i < sampleSize; i++) {
|
174
|
-
const randomIndex = Math.floor(Math.random() * indices.length);
|
175
|
-
sample.push(indices.splice(randomIndex, 1)[0]);
|
176
|
-
}
|
177
|
-
|
178
|
-
return sample;
|
179
|
-
}
|
180
|
-
|
181
|
-
bootstrap(sample, statistic, iterations = 1000) {
|
182
|
-
const bootstrapStats = [];
|
183
|
-
|
184
|
-
for (let i = 0; i < iterations; i++) {
|
185
|
-
const bootstrapSample = [];
|
186
|
-
for (let j = 0; j < sample.length; j++) {
|
187
|
-
const randomIndex = Math.floor(Math.random() * sample.length);
|
188
|
-
bootstrapSample.push(sample[randomIndex]);
|
189
|
-
}
|
190
|
-
|
191
|
-
const stat = this.applyStatistic(bootstrapSample, statistic);
|
192
|
-
bootstrapStats.push(stat);
|
193
|
-
}
|
194
|
-
|
195
|
-
return {
|
196
|
-
bootstrapStats: bootstrapStats.sort((a, b) => a - b),
|
197
|
-
mean: this.mean(bootstrapStats),
|
198
|
-
standardError: this.standardDeviation(bootstrapStats),
|
199
|
-
confidenceInterval: {
|
200
|
-
lower: this.quantile(bootstrapStats, 0.025),
|
201
|
-
upper: this.quantile(bootstrapStats, 0.975)
|
202
|
-
}
|
203
|
-
};
|
204
|
-
}
|
205
|
-
|
206
|
-
applyStatistic(sample, statistic) {
|
207
|
-
switch (statistic) {
|
208
|
-
case 'mean': return this.mean(sample);
|
209
|
-
case 'median': return this.median(sample);
|
210
|
-
case 'std': return this.standardDeviation(sample);
|
211
|
-
case 'var': return this.variance(sample);
|
212
|
-
default:
|
213
|
-
if (typeof statistic === 'function') {
|
214
|
-
return statistic(sample);
|
215
|
-
}
|
216
|
-
throw new Error(`Unknown statistic: ${statistic}`);
|
217
|
-
}
|
218
|
-
}
|
219
|
-
|
220
|
-
contingencyTable(col1, col2) {
|
221
|
-
const uniqueCol1 = [...new Set(col1)];
|
222
|
-
const uniqueCol2 = [...new Set(col2)];
|
223
|
-
|
224
|
-
const table = {};
|
225
|
-
const totals = { row: {}, col: {}, grand: 0 };
|
226
|
-
|
227
|
-
uniqueCol1.forEach(val1 => {
|
228
|
-
table[val1] = {};
|
229
|
-
totals.row[val1] = 0;
|
230
|
-
});
|
231
|
-
|
232
|
-
uniqueCol2.forEach(val2 => {
|
233
|
-
totals.col[val2] = 0;
|
234
|
-
});
|
235
|
-
|
236
|
-
for (let i = 0; i < col1.length; i++) {
|
237
|
-
const val1 = col1[i];
|
238
|
-
const val2 = col2[i];
|
239
|
-
|
240
|
-
if (!table[val1][val2]) {
|
241
|
-
table[val1][val2] = 0;
|
242
|
-
}
|
243
|
-
|
244
|
-
table[val1][val2]++;
|
245
|
-
totals.row[val1]++;
|
246
|
-
totals.col[val2]++;
|
247
|
-
totals.grand++;
|
248
|
-
}
|
249
|
-
|
250
|
-
uniqueCol1.forEach(val1 => {
|
251
|
-
uniqueCol2.forEach(val2 => {
|
252
|
-
if (!table[val1][val2]) {
|
253
|
-
table[val1][val2] = 0;
|
254
|
-
}
|
255
|
-
});
|
256
|
-
});
|
257
|
-
|
258
|
-
return { table, totals, rows: uniqueCol1, columns: uniqueCol2 };
|
259
|
-
}
|
260
|
-
|
261
|
-
mean(arr) {
|
262
|
-
return arr.reduce((sum, val) => sum + val, 0) / arr.length;
|
263
|
-
}
|
264
|
-
|
265
|
-
median(arr) {
|
266
|
-
const sorted = [...arr].sort((a, b) => a - b);
|
267
|
-
const mid = Math.floor(sorted.length / 2);
|
268
|
-
return sorted.length % 2 === 0 ?
|
269
|
-
(sorted[mid - 1] + sorted[mid]) / 2 :
|
270
|
-
sorted[mid];
|
271
|
-
}
|
272
|
-
|
273
|
-
quantile(arr, q) {
|
274
|
-
const sorted = [...arr].sort((a, b) => a - b);
|
275
|
-
const index = (sorted.length - 1) * q;
|
276
|
-
const lower = Math.floor(index);
|
277
|
-
const upper = Math.ceil(index);
|
278
|
-
const weight = index % 1;
|
279
|
-
|
280
|
-
if (lower === upper) {
|
281
|
-
return sorted[lower];
|
282
|
-
}
|
283
|
-
|
284
|
-
return sorted[lower] * (1 - weight) + sorted[upper] * weight;
|
285
|
-
}
|
286
|
-
|
287
|
-
standardDeviation(arr) {
|
288
|
-
return Math.sqrt(this.variance(arr));
|
289
|
-
}
|
290
|
-
|
291
|
-
variance(arr) {
|
292
|
-
const mean = this.mean(arr);
|
293
|
-
return arr.reduce((sum, val) => sum + Math.pow(val - mean, 2), 0) / (arr.length - 1);
|
294
|
-
}
|
295
|
-
|
296
|
-
round(value, decimals = 4) {
|
297
|
-
return Math.round(value * Math.pow(10, decimals)) / Math.pow(10, decimals);
|
298
|
-
}
|
299
|
-
|
300
|
-
formatNumber(value, decimals = 4) {
|
301
|
-
if (typeof value !== 'number') return value;
|
302
|
-
return this.round(value, decimals);
|
303
|
-
}
|
304
|
-
}
|
305
|
-
|
306
|
-
export default Utils;
|