datly 0.0.1 → 0.0.2
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/dist/datly.cjs +1 -0
- package/dist/datly.mjs +1 -0
- package/dist/datly.umd.js +1 -1
- package/dist/datly.umd.js.map +1 -0
- package/package.json +24 -11
- package/src/core/dataLoader.js +407 -0
- package/src/core/utils.js +306 -0
- package/src/core/validator.js +205 -0
- package/src/dataviz/index.js +1566 -0
- package/src/descriptive/centralTendency.js +208 -0
- package/src/descriptive/dispersion.js +273 -0
- package/src/descriptive/position.js +268 -0
- package/src/descriptive/shape.js +336 -0
- package/src/index.js +480 -0
- package/src/inferential/confidenceIntervals.js +561 -0
- package/src/inferential/hypothesisTesting.js +527 -0
- package/src/inferential/normalityTests.js +587 -0
- package/src/insights/autoAnalyser.js +685 -0
- package/src/insights/interpreter.js +543 -0
- package/src/insights/patternDetector.js +897 -0
- package/src/insights/reportGenerator.js +1072 -0
- package/src/ml/ClassificationMetrics.js +336 -0
- package/src/ml/DecisionTree.js +412 -0
- package/src/ml/KNearestNeighbors.js +317 -0
- package/src/ml/LinearRegression.js +179 -0
- package/src/ml/LogisticRegression.js +396 -0
- package/src/ml/MachineLearning.js +490 -0
- package/src/ml/NaiveBayes.js +296 -0
- package/src/ml/RandomForest.js +323 -0
- package/src/ml/SupportVectorMachine.js +299 -0
- package/src/ml/baseModel.js +106 -0
- package/src/multivariate/correlation.js +653 -0
- package/src/multivariate/regression.js +660 -0
package/src/index.js
ADDED
@@ -0,0 +1,480 @@
|
|
1
|
+
import DataLoader from "./core/dataLoader.js";
|
2
|
+
import Validator from "./core/validator.js";
|
3
|
+
import Utils from "./core/utils.js";
|
4
|
+
import CentralTendency from "./descriptive/centralTendency.js";
|
5
|
+
import Dispersion from "./descriptive/dispersion.js";
|
6
|
+
import Position from "./descriptive/position.js";
|
7
|
+
import Shape from "./descriptive/shape.js";
|
8
|
+
import HypothesisTesting from "./inferential/hypothesisTesting.js";
|
9
|
+
import ConfidenceIntervals from "./inferential/confidenceIntervals.js";
|
10
|
+
import NormalityTests from "./inferential/normalityTests.js";
|
11
|
+
import Correlation from "./multivariate/correlation.js";
|
12
|
+
import Regression from "./multivariate/regression.js";
|
13
|
+
import ReportGenerator from "./insights/reportGenerator.js";
|
14
|
+
import PatternDetector from "./insights/patternDetector.js";
|
15
|
+
import Interpreter from "./insights/interpreter.js";
|
16
|
+
import AutoAnalyzer from "./insights/autoAnalyser.js";
|
17
|
+
import MachineLearning from "./ml/machineLearning.js";
|
18
|
+
import DataViz from "./dataviz/index.js";
|
19
|
+
|
20
|
+
class Datly {
|
21
|
+
constructor() {
|
22
|
+
this.dataLoader = new DataLoader();
|
23
|
+
this.validator = new Validator();
|
24
|
+
this.utils = new Utils();
|
25
|
+
this.centralTendency = new CentralTendency();
|
26
|
+
this.dispersion = new Dispersion();
|
27
|
+
this.position = new Position();
|
28
|
+
this.shape = new Shape();
|
29
|
+
this.hypothesisTesting = new HypothesisTesting();
|
30
|
+
this.confidenceIntervals = new ConfidenceIntervals();
|
31
|
+
this.normalityTests = new NormalityTests();
|
32
|
+
this.correlation = new Correlation();
|
33
|
+
this.regression = new Regression();
|
34
|
+
this.reportGenerator = new ReportGenerator();
|
35
|
+
this.patternDetector = new PatternDetector();
|
36
|
+
this.interpreter = new Interpreter();
|
37
|
+
this.autoAnalyzer = new AutoAnalyzer(this);
|
38
|
+
this.ml = new MachineLearning();
|
39
|
+
this.viz = new DataViz();
|
40
|
+
}
|
41
|
+
|
42
|
+
// ====== Loaders / Utils ======
|
43
|
+
async loadCSV(filePath, options = {}) {
|
44
|
+
return this.dataLoader.loadCSV(filePath, options);
|
45
|
+
}
|
46
|
+
|
47
|
+
async loadJSON(jsonInput, options = {}) {
|
48
|
+
return this.dataLoader.loadJSON(jsonInput, options);
|
49
|
+
}
|
50
|
+
|
51
|
+
cleanData(dataset) {
|
52
|
+
return this.dataLoader.cleanData(dataset);
|
53
|
+
}
|
54
|
+
|
55
|
+
getColumn(dataset, columnName) {
|
56
|
+
return this.dataLoader.getColumn(dataset, columnName);
|
57
|
+
}
|
58
|
+
|
59
|
+
getColumns(dataset, columnNames) {
|
60
|
+
return this.dataLoader.getColumns(dataset, columnNames);
|
61
|
+
}
|
62
|
+
|
63
|
+
validateData(dataset) {
|
64
|
+
return this.validator.validateData(dataset);
|
65
|
+
}
|
66
|
+
|
67
|
+
getDataInfo(dataset) {
|
68
|
+
return this.dataLoader.getDataInfo(dataset);
|
69
|
+
}
|
70
|
+
|
71
|
+
// ====== Descriptive: Central Tendency ======
|
72
|
+
mean(column) {
|
73
|
+
return this.centralTendency.mean(column);
|
74
|
+
}
|
75
|
+
|
76
|
+
median(column) {
|
77
|
+
return this.centralTendency.median(column);
|
78
|
+
}
|
79
|
+
|
80
|
+
mode(column) {
|
81
|
+
return this.centralTendency.mode(column);
|
82
|
+
}
|
83
|
+
|
84
|
+
geometricMean(column) {
|
85
|
+
return this.centralTendency.geometricMean(column);
|
86
|
+
}
|
87
|
+
|
88
|
+
harmonicMean(column) {
|
89
|
+
return this.centralTendency.harmonicMean(column);
|
90
|
+
}
|
91
|
+
|
92
|
+
trimmedMean(column, percentage) {
|
93
|
+
return this.centralTendency.trimmedMean(column, percentage);
|
94
|
+
}
|
95
|
+
|
96
|
+
// ====== Descriptive: Dispersion ======
|
97
|
+
variance(column) {
|
98
|
+
return this.dispersion.variance(column);
|
99
|
+
}
|
100
|
+
|
101
|
+
standardDeviation(column) {
|
102
|
+
return this.dispersion.standardDeviation(column);
|
103
|
+
}
|
104
|
+
|
105
|
+
range(column) {
|
106
|
+
return this.dispersion.range(column);
|
107
|
+
}
|
108
|
+
|
109
|
+
interquartileRange(column) {
|
110
|
+
return this.dispersion.interquartileRange(column);
|
111
|
+
}
|
112
|
+
|
113
|
+
coefficientOfVariation(column) {
|
114
|
+
return this.dispersion.coefficientOfVariation(column);
|
115
|
+
}
|
116
|
+
|
117
|
+
meanAbsoluteDeviation(column) {
|
118
|
+
return this.dispersion.meanAbsoluteDeviation(column);
|
119
|
+
}
|
120
|
+
|
121
|
+
// ====== Descriptive: Position & Shape ======
|
122
|
+
quantile(column, q) {
|
123
|
+
return this.position.quantile(column, q);
|
124
|
+
}
|
125
|
+
|
126
|
+
percentile(column, p) {
|
127
|
+
return this.position.percentile(column, p);
|
128
|
+
}
|
129
|
+
|
130
|
+
quartiles(column) {
|
131
|
+
return this.position.quartiles(column);
|
132
|
+
}
|
133
|
+
|
134
|
+
quintiles(column) {
|
135
|
+
return this.position.quintiles(column);
|
136
|
+
}
|
137
|
+
|
138
|
+
deciles(column) {
|
139
|
+
return this.position.deciles(column);
|
140
|
+
}
|
141
|
+
|
142
|
+
percentileRank(column, value) {
|
143
|
+
return this.position.percentileRank(column, value);
|
144
|
+
}
|
145
|
+
|
146
|
+
zScore(column, value) {
|
147
|
+
return this.position.zScore(column, value);
|
148
|
+
}
|
149
|
+
|
150
|
+
boxplotStats(column) {
|
151
|
+
return this.position.boxplotStats(column);
|
152
|
+
}
|
153
|
+
|
154
|
+
fiveNumberSummary(column) {
|
155
|
+
return this.position.fiveNumberSummary(column);
|
156
|
+
}
|
157
|
+
|
158
|
+
rank(column, method = "average") {
|
159
|
+
return this.position.rank(column, method);
|
160
|
+
}
|
161
|
+
|
162
|
+
normalizedRank(column) {
|
163
|
+
return this.position.normalizedRank(column);
|
164
|
+
}
|
165
|
+
|
166
|
+
standardizedValues(column) {
|
167
|
+
return this.position.standardizedValues(column);
|
168
|
+
}
|
169
|
+
|
170
|
+
skewness(column) {
|
171
|
+
return this.shape.skewness(column);
|
172
|
+
}
|
173
|
+
|
174
|
+
kurtosis(column) {
|
175
|
+
return this.shape.kurtosis(column);
|
176
|
+
}
|
177
|
+
|
178
|
+
isNormalDistribution(column) {
|
179
|
+
return this.shape.isNormalDistribution(column);
|
180
|
+
}
|
181
|
+
|
182
|
+
// ====== Multivariate ======
|
183
|
+
correlationPearson(col1, col2) {
|
184
|
+
return this.correlation.pearson(col1, col2);
|
185
|
+
}
|
186
|
+
|
187
|
+
correlationSpearman(col1, col2) {
|
188
|
+
return this.correlation.spearman(col1, col2);
|
189
|
+
}
|
190
|
+
|
191
|
+
correlationMatrix(dataset) {
|
192
|
+
return this.correlation.matrix(dataset);
|
193
|
+
}
|
194
|
+
|
195
|
+
covariance(col1, col2) {
|
196
|
+
return this.correlation.covariance(col1, col2);
|
197
|
+
}
|
198
|
+
|
199
|
+
// ====== Inferential ======
|
200
|
+
tTest(sample1, sample2, type = "two-sample") {
|
201
|
+
return this.hypothesisTesting.tTest(sample1, sample2, type);
|
202
|
+
}
|
203
|
+
|
204
|
+
zTest(sample, populationMean, populationStd) {
|
205
|
+
return this.hypothesisTesting.zTest(sample, populationMean, populationStd);
|
206
|
+
}
|
207
|
+
|
208
|
+
anovaTest(groups) {
|
209
|
+
return this.hypothesisTesting.anovaTest(groups);
|
210
|
+
}
|
211
|
+
|
212
|
+
chiSquareTest(col1, col2) {
|
213
|
+
return this.hypothesisTesting.chiSquareTest(col1, col2);
|
214
|
+
}
|
215
|
+
|
216
|
+
confidenceInterval(sample, confidence = 0.95) {
|
217
|
+
return this.confidenceIntervals.mean(sample, confidence);
|
218
|
+
}
|
219
|
+
|
220
|
+
shapiroWilkTest(sample) {
|
221
|
+
return this.normalityTests.shapiroWilk(sample);
|
222
|
+
}
|
223
|
+
|
224
|
+
// ====== Regression ======
|
225
|
+
linearRegression(x, y) {
|
226
|
+
return this.regression.linear(x, y);
|
227
|
+
}
|
228
|
+
|
229
|
+
// ====== Utils ======
|
230
|
+
detectOutliers(column, method = "iqr") {
|
231
|
+
return this.utils.detectOutliers(column, method);
|
232
|
+
}
|
233
|
+
|
234
|
+
frequencyTable(column) {
|
235
|
+
return this.utils.frequencyTable(column);
|
236
|
+
}
|
237
|
+
|
238
|
+
groupBy(dataset, column, aggregation) {
|
239
|
+
return this.utils.groupBy(dataset, column, aggregation);
|
240
|
+
}
|
241
|
+
|
242
|
+
sample(dataset, size, method = "random") {
|
243
|
+
return this.utils.sample(dataset, size, method);
|
244
|
+
}
|
245
|
+
|
246
|
+
// ====== Insights/Reports ======
|
247
|
+
generateSummaryReport(dataset) {
|
248
|
+
return this.reportGenerator.summary(dataset);
|
249
|
+
}
|
250
|
+
|
251
|
+
identifyPatterns(dataset) {
|
252
|
+
return this.patternDetector.analyze(dataset);
|
253
|
+
}
|
254
|
+
|
255
|
+
interpretResults(testResult) {
|
256
|
+
return this.interpreter.interpret(testResult);
|
257
|
+
}
|
258
|
+
|
259
|
+
// ====== AutoAnalyzer ======
|
260
|
+
autoAnalyze(dataset, options = {}) {
|
261
|
+
return this.autoAnalyzer.autoAnalyze(dataset, options);
|
262
|
+
}
|
263
|
+
|
264
|
+
async autoAnalyzeFromFile(filePath, loadOptions = {}, analysisOptions = {}) {
|
265
|
+
let dataset;
|
266
|
+
const lower = filePath.toLowerCase();
|
267
|
+
if (lower.endsWith(".csv")) {
|
268
|
+
dataset = await this.loadCSV(filePath, loadOptions);
|
269
|
+
} else if (lower.endsWith(".json")) {
|
270
|
+
dataset = await this.loadJSON(filePath, loadOptions);
|
271
|
+
} else {
|
272
|
+
throw new Error("Formato de arquivo não suportado. Use CSV ou JSON.");
|
273
|
+
}
|
274
|
+
return this.autoAnalyze(dataset, analysisOptions);
|
275
|
+
}
|
276
|
+
|
277
|
+
async quickAnalysis(filePath, options = {}) {
|
278
|
+
const result = await this.autoAnalyzeFromFile(filePath, {}, options);
|
279
|
+
|
280
|
+
console.log("\n" + "=".repeat(60));
|
281
|
+
console.log("📊 AUTO REPORT");
|
282
|
+
console.log("=".repeat(60));
|
283
|
+
|
284
|
+
console.log(`\n📈 EXECUTIVE RESUME:`);
|
285
|
+
console.log(`• Total insights: ${result.summary.totalInsights}`);
|
286
|
+
console.log(`• Priority Insights: ${result.summary.highPriorityInsights}`);
|
287
|
+
|
288
|
+
console.log(`\n🔍 MAIN INSIGHTS:`);
|
289
|
+
result.summary.keyFindings.forEach((f, i) => {
|
290
|
+
console.log(`${i + 1}. ${f.title}`);
|
291
|
+
console.log(` ${f.description}`);
|
292
|
+
});
|
293
|
+
|
294
|
+
console.log(`\n💡 RECOMMENDATIONS:`);
|
295
|
+
result.summary.recommendations.forEach((rec, i) => {
|
296
|
+
console.log(`${i + 1}. ${rec}`);
|
297
|
+
});
|
298
|
+
|
299
|
+
console.log("\n" + "=".repeat(60));
|
300
|
+
return result;
|
301
|
+
}
|
302
|
+
|
303
|
+
createLinearRegression(options) {
|
304
|
+
return this.ml.createLinearRegression(options);
|
305
|
+
}
|
306
|
+
|
307
|
+
createLogisticRegression(options) {
|
308
|
+
return this.ml.createLogisticRegression(options);
|
309
|
+
}
|
310
|
+
|
311
|
+
createKNN(options) {
|
312
|
+
return this.ml.createKNN(options);
|
313
|
+
}
|
314
|
+
|
315
|
+
createDecisionTree(options) {
|
316
|
+
return this.ml.createDecisionTree(options);
|
317
|
+
}
|
318
|
+
|
319
|
+
createRandomForest(options) {
|
320
|
+
return this.ml.createRandomForest(options);
|
321
|
+
}
|
322
|
+
|
323
|
+
createNaiveBayes(options) {
|
324
|
+
return this.ml.createNaiveBayes(options);
|
325
|
+
}
|
326
|
+
|
327
|
+
createSVM(options) {
|
328
|
+
return this.ml.createSVM(options);
|
329
|
+
}
|
330
|
+
|
331
|
+
// ====== Machine Learning: Utilities ======
|
332
|
+
trainTestSplit(X, y, testSize = 0.2, shuffle = true) {
|
333
|
+
return this.ml.trainTestSplit(X, y, testSize, shuffle);
|
334
|
+
}
|
335
|
+
|
336
|
+
crossValidate(model, X, y, folds = 5, taskType = "classification") {
|
337
|
+
return this.ml.crossValidate(model, X, y, folds, taskType);
|
338
|
+
}
|
339
|
+
|
340
|
+
compareModels(models, X, y, taskType = "classification") {
|
341
|
+
return this.ml.compareModels(models, X, y, taskType);
|
342
|
+
}
|
343
|
+
|
344
|
+
quickTrain(modelType, X, y, options = {}) {
|
345
|
+
return this.ml.quickTrain(modelType, X, y, options);
|
346
|
+
}
|
347
|
+
|
348
|
+
// ====== Machine Learning: Feature Engineering ======
|
349
|
+
polynomialFeatures(X, degree = 2) {
|
350
|
+
return this.ml.polynomialFeatures(X, degree);
|
351
|
+
}
|
352
|
+
|
353
|
+
standardScaler(X) {
|
354
|
+
return this.ml.standardScaler(X);
|
355
|
+
}
|
356
|
+
|
357
|
+
minMaxScaler(X, featureRange = [0, 1]) {
|
358
|
+
return this.ml.minMaxScaler(X, featureRange);
|
359
|
+
}
|
360
|
+
|
361
|
+
// ====== Machine Learning: Metrics ======
|
362
|
+
rocCurve(yTrue, yProba) {
|
363
|
+
return this.ml.rocCurve(yTrue, yProba);
|
364
|
+
}
|
365
|
+
|
366
|
+
precisionRecallCurve(yTrue, yProba) {
|
367
|
+
return this.ml.precisionRecallCurve(yTrue, yProba);
|
368
|
+
}
|
369
|
+
|
370
|
+
// ====== Data Visualization: Basic Plots ======
|
371
|
+
plotHistogram(data, options) {
|
372
|
+
return this.viz.histogram(data, options);
|
373
|
+
}
|
374
|
+
|
375
|
+
plotBoxplot(data, options) {
|
376
|
+
return this.viz.boxplot(data, options);
|
377
|
+
}
|
378
|
+
|
379
|
+
plotScatter(xData, yData, options) {
|
380
|
+
return this.viz.scatter(xData, yData, options);
|
381
|
+
}
|
382
|
+
|
383
|
+
plotLine(xData, yData, options) {
|
384
|
+
return this.viz.line(xData, yData, options);
|
385
|
+
}
|
386
|
+
|
387
|
+
plotBar(categories, values, options) {
|
388
|
+
return this.viz.bar(categories, values, options);
|
389
|
+
}
|
390
|
+
|
391
|
+
plotPie(labels, values, options) {
|
392
|
+
return this.viz.pie(labels, values, options);
|
393
|
+
}
|
394
|
+
|
395
|
+
// ====== Data Visualization: Advanced Plots ======
|
396
|
+
plotHeatmap(matrix, options) {
|
397
|
+
return this.viz.heatmap(matrix, options);
|
398
|
+
}
|
399
|
+
|
400
|
+
plotViolin(data, options) {
|
401
|
+
return this.viz.violin(data, options);
|
402
|
+
}
|
403
|
+
|
404
|
+
plotDensity(data, options) {
|
405
|
+
return this.viz.density(data, options);
|
406
|
+
}
|
407
|
+
|
408
|
+
plotQQ(data, options) {
|
409
|
+
return this.viz.qqplot(data, options);
|
410
|
+
}
|
411
|
+
|
412
|
+
plotParallel(data, dimensions, options) {
|
413
|
+
return this.viz.parallel(data, dimensions, options);
|
414
|
+
}
|
415
|
+
|
416
|
+
plotPairplot(data, variables, options) {
|
417
|
+
return this.viz.pairplot(data, variables, options);
|
418
|
+
}
|
419
|
+
|
420
|
+
plotMultiline(series, options) {
|
421
|
+
return this.viz.multiline(series, options);
|
422
|
+
}
|
423
|
+
|
424
|
+
// ====== Data Visualization: Helpers ======
|
425
|
+
plotCorrelationMatrix(dataset, options = {}) {
|
426
|
+
const columns = Object.keys(dataset[0]).filter(
|
427
|
+
(key) => typeof dataset[0][key] === "number"
|
428
|
+
);
|
429
|
+
|
430
|
+
const n = columns.length;
|
431
|
+
const matrix = Array(n)
|
432
|
+
.fill(0)
|
433
|
+
.map(() => Array(n).fill(0));
|
434
|
+
|
435
|
+
for (let i = 0; i < n; i++) {
|
436
|
+
for (let j = 0; j < n; j++) {
|
437
|
+
const col1 = dataset.map((row) => row[columns[i]]);
|
438
|
+
const col2 = dataset.map((row) => row[columns[j]]);
|
439
|
+
matrix[i][j] = this.correlationPearson(col1, col2);
|
440
|
+
}
|
441
|
+
}
|
442
|
+
|
443
|
+
return this.viz.heatmap(matrix, {
|
444
|
+
title: "Correlation Matrix",
|
445
|
+
labels: columns,
|
446
|
+
...options,
|
447
|
+
});
|
448
|
+
}
|
449
|
+
|
450
|
+
plotDistribution(dataset, columnName, options = {}) {
|
451
|
+
const data = dataset.map((row) => row[columnName]).filter((v) => !isNaN(v));
|
452
|
+
|
453
|
+
return this.viz.histogram(data, {
|
454
|
+
title: `Distribution of ${columnName}`,
|
455
|
+
xlabel: columnName,
|
456
|
+
ylabel: "Frequency",
|
457
|
+
...options,
|
458
|
+
});
|
459
|
+
}
|
460
|
+
|
461
|
+
plotMultipleDistributions(dataset, columnNames, options = {}) {
|
462
|
+
const data = columnNames.map((col) =>
|
463
|
+
dataset.map((row) => row[col]).filter((v) => !isNaN(v))
|
464
|
+
);
|
465
|
+
|
466
|
+
return this.viz.boxplot(data, {
|
467
|
+
title: "Distribution Comparison",
|
468
|
+
labels: columnNames,
|
469
|
+
...options,
|
470
|
+
});
|
471
|
+
}
|
472
|
+
}
|
473
|
+
|
474
|
+
export default Datly;
|
475
|
+
|
476
|
+
// TODO:
|
477
|
+
// Adicionar apenas os métodos usados no D3 [X]
|
478
|
+
// Adicionar o id do elemento onde quero adicionar as views
|
479
|
+
// Garantir que todos os métodos da documentação consigam ser chamados pelo datly
|
480
|
+
// como exibir os plots no observable hq?
|