datly 0.0.1 → 0.0.2
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/dist/datly.cjs +1 -0
- package/dist/datly.mjs +1 -0
- package/dist/datly.umd.js +1 -1
- package/dist/datly.umd.js.map +1 -0
- package/package.json +24 -11
- package/src/core/dataLoader.js +407 -0
- package/src/core/utils.js +306 -0
- package/src/core/validator.js +205 -0
- package/src/dataviz/index.js +1566 -0
- package/src/descriptive/centralTendency.js +208 -0
- package/src/descriptive/dispersion.js +273 -0
- package/src/descriptive/position.js +268 -0
- package/src/descriptive/shape.js +336 -0
- package/src/index.js +480 -0
- package/src/inferential/confidenceIntervals.js +561 -0
- package/src/inferential/hypothesisTesting.js +527 -0
- package/src/inferential/normalityTests.js +587 -0
- package/src/insights/autoAnalyser.js +685 -0
- package/src/insights/interpreter.js +543 -0
- package/src/insights/patternDetector.js +897 -0
- package/src/insights/reportGenerator.js +1072 -0
- package/src/ml/ClassificationMetrics.js +336 -0
- package/src/ml/DecisionTree.js +412 -0
- package/src/ml/KNearestNeighbors.js +317 -0
- package/src/ml/LinearRegression.js +179 -0
- package/src/ml/LogisticRegression.js +396 -0
- package/src/ml/MachineLearning.js +490 -0
- package/src/ml/NaiveBayes.js +296 -0
- package/src/ml/RandomForest.js +323 -0
- package/src/ml/SupportVectorMachine.js +299 -0
- package/src/ml/baseModel.js +106 -0
- package/src/multivariate/correlation.js +653 -0
- package/src/multivariate/regression.js +660 -0
package/package.json
CHANGED
@@ -1,6 +1,6 @@
|
|
1
1
|
{
|
2
2
|
"name": "datly",
|
3
|
-
"version": "0.0.
|
3
|
+
"version": "0.0.2",
|
4
4
|
"description": "A JavaScript toolkit for data science, statistics, and machine learning in the browser or Node.js.",
|
5
5
|
"keywords": [
|
6
6
|
"data-science",
|
@@ -29,23 +29,36 @@
|
|
29
29
|
"url": "https://github.com/vbfs/datly/issues"
|
30
30
|
},
|
31
31
|
"type": "module",
|
32
|
-
"main": "dist/datly.
|
32
|
+
"main": "dist/datly.cjs",
|
33
|
+
"module": "dist/datly.mjs",
|
33
34
|
"unpkg": "dist/datly.umd.js",
|
35
|
+
"exports": {
|
36
|
+
".": {
|
37
|
+
"import": "./dist/datly.mjs",
|
38
|
+
"require": "./dist/datly.cjs",
|
39
|
+
"default": "./dist/datly.umd.js"
|
40
|
+
}
|
41
|
+
},
|
34
42
|
"scripts": {
|
35
|
-
"build": "microbundle
|
36
|
-
"dev": "microbundle watch --entry src/index.js --
|
43
|
+
"build": "microbundle --entry src/index.js --format modern,esm,cjs,umd --name Datly --compress --no-sourcemap --external none",
|
44
|
+
"dev": "microbundle watch --entry src/index.js --format modern,esm,cjs,umd --name Datly --external none",
|
37
45
|
"prepublishOnly": "npm run build"
|
38
46
|
},
|
39
47
|
"devDependencies": {
|
40
|
-
"
|
41
|
-
"
|
48
|
+
"microbundle": "^0.15.1",
|
49
|
+
"d3-array": "^3.2.4",
|
50
|
+
"d3-axis": "^3.0.0",
|
51
|
+
"d3-scale": "^4.0.2",
|
52
|
+
"d3-scale-chromatic": "^3.1.0",
|
53
|
+
"d3-selection": "^3.0.0",
|
54
|
+
"d3-shape": "^3.2.0",
|
55
|
+
"d3-interpolate": "^3.0.1"
|
42
56
|
},
|
57
|
+
"dependencies": {},
|
43
58
|
"files": [
|
44
59
|
"dist",
|
60
|
+
"src",
|
45
61
|
"README.md",
|
46
62
|
"LICENSE"
|
47
|
-
]
|
48
|
-
|
49
|
-
"node": ">=14"
|
50
|
-
}
|
51
|
-
}
|
63
|
+
]
|
64
|
+
}
|
@@ -0,0 +1,407 @@
|
|
1
|
+
// Detecção de ambiente
|
2
|
+
const isNode = typeof process !== 'undefined' &&
|
3
|
+
process.versions != null &&
|
4
|
+
process.versions.node != null;
|
5
|
+
|
6
|
+
// Lazy load do fs
|
7
|
+
let fs = null;
|
8
|
+
let fsPromise = null;
|
9
|
+
|
10
|
+
async function getFS() {
|
11
|
+
if (!isNode) return null;
|
12
|
+
if (fs) return fs;
|
13
|
+
if (fsPromise) return fsPromise;
|
14
|
+
|
15
|
+
fsPromise = import('fs').then(m => {
|
16
|
+
fs = m.default || m;
|
17
|
+
return fs;
|
18
|
+
}).catch(() => null);
|
19
|
+
|
20
|
+
return fsPromise;
|
21
|
+
}
|
22
|
+
|
23
|
+
class DataLoader {
|
24
|
+
async loadCSV(filePath, options = {}) {
|
25
|
+
const defaultOptions = {
|
26
|
+
delimiter: ',',
|
27
|
+
header: true,
|
28
|
+
skipEmptyLines: true,
|
29
|
+
encoding: 'utf8'
|
30
|
+
};
|
31
|
+
|
32
|
+
const config = { ...defaultOptions, ...options };
|
33
|
+
|
34
|
+
try {
|
35
|
+
let content;
|
36
|
+
|
37
|
+
// Node.js
|
38
|
+
if (isNode) {
|
39
|
+
const fsModule = await getFS();
|
40
|
+
if (fsModule) {
|
41
|
+
content = fsModule.readFileSync(filePath, { encoding: config.encoding });
|
42
|
+
}
|
43
|
+
}
|
44
|
+
// Browser com File System Access API
|
45
|
+
if (!content && typeof window !== 'undefined' && window.fs) {
|
46
|
+
content = window.fs.readFileSync
|
47
|
+
? window.fs.readFileSync(filePath, { encoding: config.encoding })
|
48
|
+
: await window.fs.readFile(filePath, { encoding: config.encoding });
|
49
|
+
}
|
50
|
+
// Fetch API (browser)
|
51
|
+
if (!content && typeof fetch !== 'undefined') {
|
52
|
+
const response = await fetch(filePath);
|
53
|
+
if (!response.ok) {
|
54
|
+
throw new Error(`HTTP error! status: ${response.status}`);
|
55
|
+
}
|
56
|
+
content = await response.text();
|
57
|
+
}
|
58
|
+
|
59
|
+
if (!content) {
|
60
|
+
throw new Error('No file system available. Use fetch, fs module, or pass CSV text directly to parseCSV()');
|
61
|
+
}
|
62
|
+
|
63
|
+
return this.parseCSV(content, config);
|
64
|
+
} catch (error) {
|
65
|
+
throw new Error(`Failed to load CSV: ${error.message}`);
|
66
|
+
}
|
67
|
+
}
|
68
|
+
|
69
|
+
async loadJSON(jsonInput, options = {}) {
|
70
|
+
const defaultOptions = {
|
71
|
+
validateTypes: true,
|
72
|
+
autoInferHeaders: true
|
73
|
+
};
|
74
|
+
|
75
|
+
const config = { ...defaultOptions, ...options };
|
76
|
+
|
77
|
+
try {
|
78
|
+
let jsonData;
|
79
|
+
|
80
|
+
// Se é uma string que parece ser um caminho de arquivo
|
81
|
+
if (typeof jsonInput === 'string' && !jsonInput.trim().startsWith('{') && !jsonInput.trim().startsWith('[')) {
|
82
|
+
let content;
|
83
|
+
|
84
|
+
// Node.js
|
85
|
+
if (isNode) {
|
86
|
+
const fsModule = await getFS();
|
87
|
+
if (fsModule) {
|
88
|
+
content = fsModule.readFileSync(jsonInput, { encoding: 'utf8' });
|
89
|
+
}
|
90
|
+
}
|
91
|
+
// Browser com File System Access API
|
92
|
+
if (!content && typeof window !== 'undefined' && window.fs) {
|
93
|
+
content = window.fs.readFileSync
|
94
|
+
? window.fs.readFileSync(jsonInput, { encoding: 'utf8' })
|
95
|
+
: await window.fs.readFile(jsonInput, { encoding: 'utf8' });
|
96
|
+
}
|
97
|
+
// Fetch API (browser)
|
98
|
+
if (!content && typeof fetch !== 'undefined') {
|
99
|
+
const response = await fetch(jsonInput);
|
100
|
+
if (!response.ok) {
|
101
|
+
throw new Error(`HTTP error! status: ${response.status}`);
|
102
|
+
}
|
103
|
+
content = await response.text();
|
104
|
+
}
|
105
|
+
|
106
|
+
if (!content) {
|
107
|
+
throw new Error('No file system available');
|
108
|
+
}
|
109
|
+
|
110
|
+
jsonData = JSON.parse(content);
|
111
|
+
}
|
112
|
+
// String JSON
|
113
|
+
else if (typeof jsonInput === 'string') {
|
114
|
+
jsonData = JSON.parse(jsonInput);
|
115
|
+
}
|
116
|
+
// Objeto JS
|
117
|
+
else if (typeof jsonInput === 'object') {
|
118
|
+
jsonData = jsonInput;
|
119
|
+
}
|
120
|
+
else {
|
121
|
+
throw new Error('Invalid JSON input: must be string, file path, or object');
|
122
|
+
}
|
123
|
+
|
124
|
+
return this.parseJSON(jsonData, config);
|
125
|
+
} catch (error) {
|
126
|
+
throw new Error(`Failed to load JSON: ${error.message}`);
|
127
|
+
}
|
128
|
+
}
|
129
|
+
|
130
|
+
parseJSON(jsonData, config) {
|
131
|
+
if (!jsonData) {
|
132
|
+
throw new Error('JSON data is empty or null');
|
133
|
+
}
|
134
|
+
|
135
|
+
if (Array.isArray(jsonData)) {
|
136
|
+
return this.parseJSONArray(jsonData, config);
|
137
|
+
} else if (jsonData.headers && jsonData.data) {
|
138
|
+
return this.parseStructuredJSON(jsonData, config);
|
139
|
+
} else if (typeof jsonData === 'object') {
|
140
|
+
return this.parseJSONObject(jsonData, config);
|
141
|
+
} else {
|
142
|
+
throw new Error('Unsupported JSON format');
|
143
|
+
}
|
144
|
+
}
|
145
|
+
|
146
|
+
parseJSONArray(jsonArray, config) {
|
147
|
+
if (jsonArray.length === 0) {
|
148
|
+
throw new Error('JSON array is empty');
|
149
|
+
}
|
150
|
+
|
151
|
+
const firstRow = jsonArray[0];
|
152
|
+
if (typeof firstRow !== 'object' || firstRow === null) {
|
153
|
+
throw new Error('JSON array must contain objects');
|
154
|
+
}
|
155
|
+
|
156
|
+
let headers;
|
157
|
+
if (config.autoInferHeaders) {
|
158
|
+
const allKeys = new Set();
|
159
|
+
jsonArray.forEach(row => {
|
160
|
+
if (typeof row === 'object' && row !== null) {
|
161
|
+
Object.keys(row).forEach(key => allKeys.add(key));
|
162
|
+
}
|
163
|
+
});
|
164
|
+
headers = Array.from(allKeys);
|
165
|
+
} else {
|
166
|
+
headers = Object.keys(firstRow);
|
167
|
+
}
|
168
|
+
|
169
|
+
const data = jsonArray.map((row, index) => {
|
170
|
+
if (typeof row !== 'object' || row === null) {
|
171
|
+
console.warn(`Row ${index} is not an object, skipping`);
|
172
|
+
return null;
|
173
|
+
}
|
174
|
+
|
175
|
+
const processedRow = {};
|
176
|
+
headers.forEach(header => {
|
177
|
+
let value = row[header];
|
178
|
+
if (config.validateTypes) {
|
179
|
+
value = this.inferType(value);
|
180
|
+
}
|
181
|
+
processedRow[header] = value;
|
182
|
+
});
|
183
|
+
return processedRow;
|
184
|
+
}).filter(row => row !== null);
|
185
|
+
|
186
|
+
return {
|
187
|
+
headers,
|
188
|
+
data,
|
189
|
+
length: data.length,
|
190
|
+
columns: headers.length,
|
191
|
+
source: 'json_array'
|
192
|
+
};
|
193
|
+
}
|
194
|
+
|
195
|
+
parseStructuredJSON(jsonData, config) {
|
196
|
+
const { headers, data } = jsonData;
|
197
|
+
|
198
|
+
if (!Array.isArray(headers)) {
|
199
|
+
throw new Error('Headers must be an array');
|
200
|
+
}
|
201
|
+
|
202
|
+
if (!Array.isArray(data)) {
|
203
|
+
throw new Error('Data must be an array');
|
204
|
+
}
|
205
|
+
|
206
|
+
if (headers.length === 0) {
|
207
|
+
throw new Error('Headers array is empty');
|
208
|
+
}
|
209
|
+
|
210
|
+
const processedData = data.map((row, index) => {
|
211
|
+
if (Array.isArray(row)) {
|
212
|
+
const processedRow = {};
|
213
|
+
headers.forEach((header, i) => {
|
214
|
+
let value = i < row.length ? row[i] : null;
|
215
|
+
if (config.validateTypes) {
|
216
|
+
value = this.inferType(value);
|
217
|
+
}
|
218
|
+
processedRow[header] = value;
|
219
|
+
});
|
220
|
+
return processedRow;
|
221
|
+
} else if (typeof row === 'object' && row !== null) {
|
222
|
+
const processedRow = {};
|
223
|
+
headers.forEach(header => {
|
224
|
+
let value = row[header];
|
225
|
+
if (config.validateTypes) {
|
226
|
+
value = this.inferType(value);
|
227
|
+
}
|
228
|
+
processedRow[header] = value;
|
229
|
+
});
|
230
|
+
return processedRow;
|
231
|
+
} else {
|
232
|
+
console.warn(`Row ${index} has invalid format, skipping`);
|
233
|
+
return null;
|
234
|
+
}
|
235
|
+
}).filter(row => row !== null);
|
236
|
+
|
237
|
+
return {
|
238
|
+
headers,
|
239
|
+
data: processedData,
|
240
|
+
length: processedData.length,
|
241
|
+
columns: headers.length,
|
242
|
+
source: 'structured_json'
|
243
|
+
};
|
244
|
+
}
|
245
|
+
|
246
|
+
parseJSONObject(jsonObject, config) {
|
247
|
+
const entries = Object.entries(jsonObject);
|
248
|
+
if (entries.length === 0) {
|
249
|
+
throw new Error('JSON object is empty');
|
250
|
+
}
|
251
|
+
|
252
|
+
const headers = ['key', 'value'];
|
253
|
+
const data = entries.map(([key, value]) => ({
|
254
|
+
key: key,
|
255
|
+
value: config.validateTypes ? this.inferType(value) : value
|
256
|
+
}));
|
257
|
+
|
258
|
+
return {
|
259
|
+
headers,
|
260
|
+
data,
|
261
|
+
length: data.length,
|
262
|
+
columns: 2,
|
263
|
+
source: 'json_object'
|
264
|
+
};
|
265
|
+
}
|
266
|
+
|
267
|
+
parseCSV(content, options) {
|
268
|
+
const lines = content.split('\n').filter(line =>
|
269
|
+
options.skipEmptyLines ? line.trim() !== '' : true
|
270
|
+
);
|
271
|
+
|
272
|
+
if (lines.length === 0) {
|
273
|
+
throw new Error('CSV file is empty');
|
274
|
+
}
|
275
|
+
|
276
|
+
const headers = options.header
|
277
|
+
? lines[0].split(options.delimiter).map(h => h.trim().replace(/['"]/g, ''))
|
278
|
+
: Array.from({ length: lines[0].split(options.delimiter).length }, (_, i) => `col_${i}`);
|
279
|
+
|
280
|
+
const startIndex = options.header ? 1 : 0;
|
281
|
+
const data = [];
|
282
|
+
|
283
|
+
for (let i = startIndex; i < lines.length; i++) {
|
284
|
+
const values = lines[i].split(options.delimiter);
|
285
|
+
if (values.length === headers.length) {
|
286
|
+
const row = {};
|
287
|
+
headers.forEach((header, index) => {
|
288
|
+
let value = values[index].trim().replace(/['"]/g, '');
|
289
|
+
row[header] = this.inferType(value);
|
290
|
+
});
|
291
|
+
data.push(row);
|
292
|
+
}
|
293
|
+
}
|
294
|
+
|
295
|
+
return {
|
296
|
+
headers,
|
297
|
+
data,
|
298
|
+
length: data.length,
|
299
|
+
columns: headers.length
|
300
|
+
};
|
301
|
+
}
|
302
|
+
|
303
|
+
inferType(value) {
|
304
|
+
if (value === '' || value === 'null' || value === 'NULL' || value === 'NaN') {
|
305
|
+
return null;
|
306
|
+
}
|
307
|
+
|
308
|
+
if (value === 'true' || value === 'TRUE') return true;
|
309
|
+
if (value === 'false' || value === 'FALSE') return false;
|
310
|
+
|
311
|
+
if (/^-?\d+$/.test(value)) {
|
312
|
+
return parseInt(value, 10);
|
313
|
+
}
|
314
|
+
|
315
|
+
if (/^-?\d*\.\d+$/.test(value)) {
|
316
|
+
return parseFloat(value);
|
317
|
+
}
|
318
|
+
|
319
|
+
return value;
|
320
|
+
}
|
321
|
+
|
322
|
+
cleanData(dataset) {
|
323
|
+
const cleaned = {
|
324
|
+
...dataset,
|
325
|
+
data: dataset.data.filter(row => {
|
326
|
+
return Object.values(row).some(value => value !== null && value !== undefined);
|
327
|
+
})
|
328
|
+
};
|
329
|
+
|
330
|
+
cleaned.length = cleaned.data.length;
|
331
|
+
return cleaned;
|
332
|
+
}
|
333
|
+
|
334
|
+
getDataInfo(dataset) {
|
335
|
+
const info = {
|
336
|
+
rows: dataset.length,
|
337
|
+
columns: dataset.columns,
|
338
|
+
headers: dataset.headers,
|
339
|
+
types: {},
|
340
|
+
nullCounts: {},
|
341
|
+
uniqueCounts: {}
|
342
|
+
};
|
343
|
+
|
344
|
+
dataset.headers.forEach(header => {
|
345
|
+
const column = dataset.data.map(row => row[header]);
|
346
|
+
const nonNullValues = column.filter(val => val !== null && val !== undefined);
|
347
|
+
const types = [...new Set(nonNullValues.map(val => typeof val))];
|
348
|
+
|
349
|
+
info.types[header] = types.length === 1 ? types[0] : 'mixed';
|
350
|
+
info.nullCounts[header] = column.length - nonNullValues.length;
|
351
|
+
info.uniqueCounts[header] = new Set(nonNullValues).size;
|
352
|
+
});
|
353
|
+
|
354
|
+
return info;
|
355
|
+
}
|
356
|
+
|
357
|
+
getColumn(dataset, columnName) {
|
358
|
+
if (!dataset.headers.includes(columnName)) {
|
359
|
+
throw new Error(`Column '${columnName}' not found`);
|
360
|
+
}
|
361
|
+
|
362
|
+
return dataset.data
|
363
|
+
.map(row => row[columnName])
|
364
|
+
.filter(val => val !== null && val !== undefined && !isNaN(val));
|
365
|
+
}
|
366
|
+
|
367
|
+
getColumns(dataset, columnNames) {
|
368
|
+
const result = {};
|
369
|
+
columnNames.forEach(name => {
|
370
|
+
result[name] = this.getColumn(dataset, name);
|
371
|
+
});
|
372
|
+
return result;
|
373
|
+
}
|
374
|
+
|
375
|
+
filterRows(dataset, condition) {
|
376
|
+
return {
|
377
|
+
...dataset,
|
378
|
+
data: dataset.data.filter(condition),
|
379
|
+
length: dataset.data.filter(condition).length
|
380
|
+
};
|
381
|
+
}
|
382
|
+
|
383
|
+
sortBy(dataset, columnName, order = 'asc') {
|
384
|
+
const sortedData = [...dataset.data].sort((a, b) => {
|
385
|
+
const aVal = a[columnName];
|
386
|
+
const bVal = b[columnName];
|
387
|
+
|
388
|
+
if (aVal === null || aVal === undefined) return 1;
|
389
|
+
if (bVal === null || bVal === undefined) return -1;
|
390
|
+
|
391
|
+
if (typeof aVal === 'string' && typeof bVal === 'string') {
|
392
|
+
return order === 'asc'
|
393
|
+
? aVal.localeCompare(bVal)
|
394
|
+
: bVal.localeCompare(aVal);
|
395
|
+
}
|
396
|
+
|
397
|
+
return order === 'asc' ? aVal - bVal : bVal - aVal;
|
398
|
+
});
|
399
|
+
|
400
|
+
return {
|
401
|
+
...dataset,
|
402
|
+
data: sortedData
|
403
|
+
};
|
404
|
+
}
|
405
|
+
}
|
406
|
+
|
407
|
+
export default DataLoader;
|