datly 0.0.2 → 0.0.3

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/src/index.js CHANGED
@@ -1,480 +1,236 @@
1
- import DataLoader from "./core/dataLoader.js";
2
- import Validator from "./core/validator.js";
3
- import Utils from "./core/utils.js";
4
- import CentralTendency from "./descriptive/centralTendency.js";
5
- import Dispersion from "./descriptive/dispersion.js";
6
- import Position from "./descriptive/position.js";
7
- import Shape from "./descriptive/shape.js";
8
- import HypothesisTesting from "./inferential/hypothesisTesting.js";
9
- import ConfidenceIntervals from "./inferential/confidenceIntervals.js";
10
- import NormalityTests from "./inferential/normalityTests.js";
11
- import Correlation from "./multivariate/correlation.js";
12
- import Regression from "./multivariate/regression.js";
13
- import ReportGenerator from "./insights/reportGenerator.js";
14
- import PatternDetector from "./insights/patternDetector.js";
15
- import Interpreter from "./insights/interpreter.js";
16
- import AutoAnalyzer from "./insights/autoAnalyser.js";
17
- import MachineLearning from "./ml/machineLearning.js";
18
- import DataViz from "./dataviz/index.js";
19
-
20
- class Datly {
21
- constructor() {
22
- this.dataLoader = new DataLoader();
23
- this.validator = new Validator();
24
- this.utils = new Utils();
25
- this.centralTendency = new CentralTendency();
26
- this.dispersion = new Dispersion();
27
- this.position = new Position();
28
- this.shape = new Shape();
29
- this.hypothesisTesting = new HypothesisTesting();
30
- this.confidenceIntervals = new ConfidenceIntervals();
31
- this.normalityTests = new NormalityTests();
32
- this.correlation = new Correlation();
33
- this.regression = new Regression();
34
- this.reportGenerator = new ReportGenerator();
35
- this.patternDetector = new PatternDetector();
36
- this.interpreter = new Interpreter();
37
- this.autoAnalyzer = new AutoAnalyzer(this);
38
- this.ml = new MachineLearning();
39
- this.viz = new DataViz();
40
- }
41
-
42
- // ====== Loaders / Utils ======
43
- async loadCSV(filePath, options = {}) {
44
- return this.dataLoader.loadCSV(filePath, options);
45
- }
46
-
47
- async loadJSON(jsonInput, options = {}) {
48
- return this.dataLoader.loadJSON(jsonInput, options);
49
- }
50
-
51
- cleanData(dataset) {
52
- return this.dataLoader.cleanData(dataset);
53
- }
54
-
55
- getColumn(dataset, columnName) {
56
- return this.dataLoader.getColumn(dataset, columnName);
57
- }
58
-
59
- getColumns(dataset, columnNames) {
60
- return this.dataLoader.getColumns(dataset, columnNames);
61
- }
62
-
63
- validateData(dataset) {
64
- return this.validator.validateData(dataset);
65
- }
66
-
67
- getDataInfo(dataset) {
68
- return this.dataLoader.getDataInfo(dataset);
69
- }
70
-
71
- // ====== Descriptive: Central Tendency ======
72
- mean(column) {
73
- return this.centralTendency.mean(column);
74
- }
75
-
76
- median(column) {
77
- return this.centralTendency.median(column);
78
- }
79
-
80
- mode(column) {
81
- return this.centralTendency.mode(column);
82
- }
83
-
84
- geometricMean(column) {
85
- return this.centralTendency.geometricMean(column);
86
- }
87
-
88
- harmonicMean(column) {
89
- return this.centralTendency.harmonicMean(column);
90
- }
91
-
92
- trimmedMean(column, percentage) {
93
- return this.centralTendency.trimmedMean(column, percentage);
94
- }
95
-
96
- // ====== Descriptive: Dispersion ======
97
- variance(column) {
98
- return this.dispersion.variance(column);
99
- }
100
-
101
- standardDeviation(column) {
102
- return this.dispersion.standardDeviation(column);
103
- }
104
-
105
- range(column) {
106
- return this.dispersion.range(column);
107
- }
108
-
109
- interquartileRange(column) {
110
- return this.dispersion.interquartileRange(column);
111
- }
112
-
113
- coefficientOfVariation(column) {
114
- return this.dispersion.coefficientOfVariation(column);
115
- }
116
-
117
- meanAbsoluteDeviation(column) {
118
- return this.dispersion.meanAbsoluteDeviation(column);
119
- }
120
-
121
- // ====== Descriptive: Position & Shape ======
122
- quantile(column, q) {
123
- return this.position.quantile(column, q);
124
- }
125
-
126
- percentile(column, p) {
127
- return this.position.percentile(column, p);
128
- }
129
-
130
- quartiles(column) {
131
- return this.position.quartiles(column);
132
- }
133
-
134
- quintiles(column) {
135
- return this.position.quintiles(column);
136
- }
137
-
138
- deciles(column) {
139
- return this.position.deciles(column);
140
- }
141
-
142
- percentileRank(column, value) {
143
- return this.position.percentileRank(column, value);
144
- }
145
-
146
- zScore(column, value) {
147
- return this.position.zScore(column, value);
148
- }
149
-
150
- boxplotStats(column) {
151
- return this.position.boxplotStats(column);
152
- }
153
-
154
- fiveNumberSummary(column) {
155
- return this.position.fiveNumberSummary(column);
156
- }
157
-
158
- rank(column, method = "average") {
159
- return this.position.rank(column, method);
160
- }
161
-
162
- normalizedRank(column) {
163
- return this.position.normalizedRank(column);
164
- }
165
-
166
- standardizedValues(column) {
167
- return this.position.standardizedValues(column);
168
- }
169
-
170
- skewness(column) {
171
- return this.shape.skewness(column);
172
- }
173
-
174
- kurtosis(column) {
175
- return this.shape.kurtosis(column);
176
- }
177
-
178
- isNormalDistribution(column) {
179
- return this.shape.isNormalDistribution(column);
180
- }
181
-
182
- // ====== Multivariate ======
183
- correlationPearson(col1, col2) {
184
- return this.correlation.pearson(col1, col2);
185
- }
186
-
187
- correlationSpearman(col1, col2) {
188
- return this.correlation.spearman(col1, col2);
189
- }
190
-
191
- correlationMatrix(dataset) {
192
- return this.correlation.matrix(dataset);
193
- }
194
-
195
- covariance(col1, col2) {
196
- return this.correlation.covariance(col1, col2);
197
- }
198
-
199
- // ====== Inferential ======
200
- tTest(sample1, sample2, type = "two-sample") {
201
- return this.hypothesisTesting.tTest(sample1, sample2, type);
202
- }
203
-
204
- zTest(sample, populationMean, populationStd) {
205
- return this.hypothesisTesting.zTest(sample, populationMean, populationStd);
206
- }
207
-
208
- anovaTest(groups) {
209
- return this.hypothesisTesting.anovaTest(groups);
210
- }
211
-
212
- chiSquareTest(col1, col2) {
213
- return this.hypothesisTesting.chiSquareTest(col1, col2);
214
- }
215
-
216
- confidenceInterval(sample, confidence = 0.95) {
217
- return this.confidenceIntervals.mean(sample, confidence);
218
- }
219
-
220
- shapiroWilkTest(sample) {
221
- return this.normalityTests.shapiroWilk(sample);
222
- }
223
-
224
- // ====== Regression ======
225
- linearRegression(x, y) {
226
- return this.regression.linear(x, y);
227
- }
228
-
229
- // ====== Utils ======
230
- detectOutliers(column, method = "iqr") {
231
- return this.utils.detectOutliers(column, method);
232
- }
233
-
234
- frequencyTable(column) {
235
- return this.utils.frequencyTable(column);
236
- }
237
-
238
- groupBy(dataset, column, aggregation) {
239
- return this.utils.groupBy(dataset, column, aggregation);
240
- }
241
-
242
- sample(dataset, size, method = "random") {
243
- return this.utils.sample(dataset, size, method);
244
- }
245
-
246
- // ====== Insights/Reports ======
247
- generateSummaryReport(dataset) {
248
- return this.reportGenerator.summary(dataset);
249
- }
250
-
251
- identifyPatterns(dataset) {
252
- return this.patternDetector.analyze(dataset);
253
- }
254
-
255
- interpretResults(testResult) {
256
- return this.interpreter.interpret(testResult);
257
- }
258
-
259
- // ====== AutoAnalyzer ======
260
- autoAnalyze(dataset, options = {}) {
261
- return this.autoAnalyzer.autoAnalyze(dataset, options);
262
- }
263
-
264
- async autoAnalyzeFromFile(filePath, loadOptions = {}, analysisOptions = {}) {
265
- let dataset;
266
- const lower = filePath.toLowerCase();
267
- if (lower.endsWith(".csv")) {
268
- dataset = await this.loadCSV(filePath, loadOptions);
269
- } else if (lower.endsWith(".json")) {
270
- dataset = await this.loadJSON(filePath, loadOptions);
271
- } else {
272
- throw new Error("Formato de arquivo não suportado. Use CSV ou JSON.");
273
- }
274
- return this.autoAnalyze(dataset, analysisOptions);
275
- }
276
-
277
- async quickAnalysis(filePath, options = {}) {
278
- const result = await this.autoAnalyzeFromFile(filePath, {}, options);
279
-
280
- console.log("\n" + "=".repeat(60));
281
- console.log("📊 AUTO REPORT");
282
- console.log("=".repeat(60));
283
-
284
- console.log(`\n📈 EXECUTIVE RESUME:`);
285
- console.log(`• Total insights: ${result.summary.totalInsights}`);
286
- console.log(`• Priority Insights: ${result.summary.highPriorityInsights}`);
287
-
288
- console.log(`\n🔍 MAIN INSIGHTS:`);
289
- result.summary.keyFindings.forEach((f, i) => {
290
- console.log(`${i + 1}. ${f.title}`);
291
- console.log(` ${f.description}`);
292
- });
293
-
294
- console.log(`\n💡 RECOMMENDATIONS:`);
295
- result.summary.recommendations.forEach((rec, i) => {
296
- console.log(`${i + 1}. ${rec}`);
297
- });
298
-
299
- console.log("\n" + "=".repeat(60));
300
- return result;
301
- }
302
-
303
- createLinearRegression(options) {
304
- return this.ml.createLinearRegression(options);
305
- }
306
-
307
- createLogisticRegression(options) {
308
- return this.ml.createLogisticRegression(options);
309
- }
310
-
311
- createKNN(options) {
312
- return this.ml.createKNN(options);
313
- }
314
-
315
- createDecisionTree(options) {
316
- return this.ml.createDecisionTree(options);
317
- }
318
-
319
- createRandomForest(options) {
320
- return this.ml.createRandomForest(options);
321
- }
322
-
323
- createNaiveBayes(options) {
324
- return this.ml.createNaiveBayes(options);
325
- }
326
-
327
- createSVM(options) {
328
- return this.ml.createSVM(options);
329
- }
330
-
331
- // ====== Machine Learning: Utilities ======
332
- trainTestSplit(X, y, testSize = 0.2, shuffle = true) {
333
- return this.ml.trainTestSplit(X, y, testSize, shuffle);
334
- }
335
-
336
- crossValidate(model, X, y, folds = 5, taskType = "classification") {
337
- return this.ml.crossValidate(model, X, y, folds, taskType);
338
- }
339
-
340
- compareModels(models, X, y, taskType = "classification") {
341
- return this.ml.compareModels(models, X, y, taskType);
342
- }
343
-
344
- quickTrain(modelType, X, y, options = {}) {
345
- return this.ml.quickTrain(modelType, X, y, options);
346
- }
347
-
348
- // ====== Machine Learning: Feature Engineering ======
349
- polynomialFeatures(X, degree = 2) {
350
- return this.ml.polynomialFeatures(X, degree);
351
- }
352
-
353
- standardScaler(X) {
354
- return this.ml.standardScaler(X);
355
- }
356
-
357
- minMaxScaler(X, featureRange = [0, 1]) {
358
- return this.ml.minMaxScaler(X, featureRange);
359
- }
360
-
361
- // ====== Machine Learning: Metrics ======
362
- rocCurve(yTrue, yProba) {
363
- return this.ml.rocCurve(yTrue, yProba);
364
- }
365
-
366
- precisionRecallCurve(yTrue, yProba) {
367
- return this.ml.precisionRecallCurve(yTrue, yProba);
368
- }
369
-
370
- // ====== Data Visualization: Basic Plots ======
371
- plotHistogram(data, options) {
372
- return this.viz.histogram(data, options);
373
- }
374
-
375
- plotBoxplot(data, options) {
376
- return this.viz.boxplot(data, options);
377
- }
378
-
379
- plotScatter(xData, yData, options) {
380
- return this.viz.scatter(xData, yData, options);
381
- }
382
-
383
- plotLine(xData, yData, options) {
384
- return this.viz.line(xData, yData, options);
385
- }
386
-
387
- plotBar(categories, values, options) {
388
- return this.viz.bar(categories, values, options);
389
- }
390
-
391
- plotPie(labels, values, options) {
392
- return this.viz.pie(labels, values, options);
393
- }
394
-
395
- // ====== Data Visualization: Advanced Plots ======
396
- plotHeatmap(matrix, options) {
397
- return this.viz.heatmap(matrix, options);
398
- }
399
-
400
- plotViolin(data, options) {
401
- return this.viz.violin(data, options);
402
- }
403
-
404
- plotDensity(data, options) {
405
- return this.viz.density(data, options);
406
- }
407
-
408
- plotQQ(data, options) {
409
- return this.viz.qqplot(data, options);
410
- }
411
-
412
- plotParallel(data, dimensions, options) {
413
- return this.viz.parallel(data, dimensions, options);
414
- }
415
-
416
- plotPairplot(data, variables, options) {
417
- return this.viz.pairplot(data, variables, options);
418
- }
419
-
420
- plotMultiline(series, options) {
421
- return this.viz.multiline(series, options);
422
- }
423
-
424
- // ====== Data Visualization: Helpers ======
425
- plotCorrelationMatrix(dataset, options = {}) {
426
- const columns = Object.keys(dataset[0]).filter(
427
- (key) => typeof dataset[0][key] === "number"
428
- );
429
-
430
- const n = columns.length;
431
- const matrix = Array(n)
432
- .fill(0)
433
- .map(() => Array(n).fill(0));
434
-
435
- for (let i = 0; i < n; i++) {
436
- for (let j = 0; j < n; j++) {
437
- const col1 = dataset.map((row) => row[columns[i]]);
438
- const col2 = dataset.map((row) => row[columns[j]]);
439
- matrix[i][j] = this.correlationPearson(col1, col2);
440
- }
441
- }
442
-
443
- return this.viz.heatmap(matrix, {
444
- title: "Correlation Matrix",
445
- labels: columns,
446
- ...options,
447
- });
448
- }
449
-
450
- plotDistribution(dataset, columnName, options = {}) {
451
- const data = dataset.map((row) => row[columnName]).filter((v) => !isNaN(v));
452
-
453
- return this.viz.histogram(data, {
454
- title: `Distribution of ${columnName}`,
455
- xlabel: columnName,
456
- ylabel: "Frequency",
457
- ...options,
458
- });
459
- }
460
-
461
- plotMultipleDistributions(dataset, columnNames, options = {}) {
462
- const data = columnNames.map((col) =>
463
- dataset.map((row) => row[col]).filter((v) => !isNaN(v))
464
- );
465
-
466
- return this.viz.boxplot(data, {
467
- title: "Distribution Comparison",
468
- labels: columnNames,
469
- ...options,
470
- });
471
- }
472
- }
473
-
474
- export default Datly;
475
-
476
- // TODO:
477
- // Adicionar apenas os métodos usados no D3 [X]
478
- // Adicionar o id do elemento onde quero adicionar as views
479
- // Garantir que todos os métodos da documentação consigam ser chamados pelo datly
480
- // como exibir os plots no observable hq?
1
+ import {
2
+ plotHistogram,
3
+ plotBoxplot,
4
+ plotScatter,
5
+ plotLine,
6
+ plotBar,
7
+ plotPie,
8
+ plotHeatmap,
9
+ plotViolin,
10
+ plotDensity,
11
+ plotQQ,
12
+ plotParallel,
13
+ plotPairplot,
14
+ plotMultiline
15
+ } from './plot.js';
16
+
17
+ import {
18
+ // dataframe
19
+ dataframe_from_json,
20
+ df_describe,
21
+ df_missing_report,
22
+ df_corr,
23
+ eda_overview,
24
+ // stats
25
+ mean,
26
+ stddeviation,
27
+ variance,
28
+ median,
29
+ quantile,
30
+ minv,
31
+ maxv,
32
+ skewness,
33
+ kurtosis,
34
+ corr_pearson,
35
+ corr_spearman,
36
+ // distributions
37
+ normal_pdf,
38
+ normal_cdf,
39
+ normal_ppf,
40
+ binomial_pmf,
41
+ binomial_cdf,
42
+ poisson_pmf,
43
+ poisson_cdf,
44
+ // hypothesis tests
45
+ t_test_independent,
46
+ z_test_one_sample,
47
+ chi_square_independence,
48
+ anova_oneway,
49
+ // ml
50
+ train_test_split,
51
+ train_linear_regression,
52
+ train_logistic_regression,
53
+ predict_linear,
54
+ predict_logistic,
55
+ metrics_classification,
56
+ metrics_regression,
57
+ // additional statistical tests
58
+ t_test_paired,
59
+ t_test_one_sample,
60
+ shapiro_wilk,
61
+ jarque_bera,
62
+ levene_test,
63
+ kruskal_wallis,
64
+ mann_whitney,
65
+ wilcoxon_signed_rank,
66
+ chi_square_goodness,
67
+ // confidence intervals
68
+ confidence_interval_mean,
69
+ confidence_interval_proportion,
70
+ confidence_interval_variance,
71
+ confidence_interval_difference,
72
+ // additional correlations
73
+ corr_kendall,
74
+ corr_partial,
75
+ corr_matrix_all,
76
+ // knn
77
+ train_knn_classifier,
78
+ predict_knn_classifier,
79
+ train_knn_regressor,
80
+ predict_knn_regressor,
81
+ // decision trees
82
+ train_decision_tree_classifier,
83
+ train_decision_tree_regressor,
84
+ predict_decision_tree,
85
+ // random forest
86
+ train_random_forest_classifier,
87
+ train_random_forest_regressor,
88
+ predict_random_forest_classifier,
89
+ predict_random_forest_regressor,
90
+ // naive bayes
91
+ train_naive_bayes,
92
+ predict_naive_bayes,
93
+ // feature scaling
94
+ standard_scaler_fit,
95
+ standard_scaler_transform,
96
+ minmax_scaler_fit,
97
+ minmax_scaler_transform,
98
+ // dimensionality reduction
99
+ train_pca,
100
+ transform_pca,
101
+ // clustering
102
+ train_kmeans,
103
+ predict_kmeans,
104
+ // ensemble
105
+ ensemble_voting_classifier,
106
+ ensemble_voting_regressor,
107
+ // cross-validation
108
+ cross_validate,
109
+ // feature importance
110
+ feature_importance_tree,
111
+ // outlier detection
112
+ outliers_iqr,
113
+ outliers_zscore,
114
+ // time series
115
+ moving_average,
116
+ exponential_smoothing,
117
+ autocorrelation,
118
+ } from './code.js'
119
+
120
+ const daitly = {
121
+ dataframe_from_json,
122
+ df_describe,
123
+ df_missing_report,
124
+ df_corr,
125
+ eda_overview,
126
+ // stats
127
+ mean,
128
+ stddeviation,
129
+ variance,
130
+ median,
131
+ quantile,
132
+ minv,
133
+ maxv,
134
+ skewness,
135
+ kurtosis,
136
+ corr_pearson,
137
+ corr_spearman,
138
+ // distributions
139
+ normal_pdf,
140
+ normal_cdf,
141
+ normal_ppf,
142
+ binomial_pmf,
143
+ binomial_cdf,
144
+ poisson_pmf,
145
+ poisson_cdf,
146
+ // hypothesis tests
147
+ t_test_independent,
148
+ z_test_one_sample,
149
+ chi_square_independence,
150
+ anova_oneway,
151
+ // ml
152
+ train_test_split,
153
+ train_linear_regression,
154
+ train_logistic_regression,
155
+ predict_linear,
156
+ predict_logistic,
157
+ metrics_classification,
158
+ metrics_regression,
159
+ // additional statistical tests
160
+ t_test_paired,
161
+ t_test_one_sample,
162
+ shapiro_wilk,
163
+ jarque_bera,
164
+ levene_test,
165
+ kruskal_wallis,
166
+ mann_whitney,
167
+ wilcoxon_signed_rank,
168
+ chi_square_goodness,
169
+ // confidence intervals
170
+ confidence_interval_mean,
171
+ confidence_interval_proportion,
172
+ confidence_interval_variance,
173
+ confidence_interval_difference,
174
+ // additional correlations
175
+ corr_kendall,
176
+ corr_partial,
177
+ corr_matrix_all,
178
+ // knn
179
+ train_knn_classifier,
180
+ predict_knn_classifier,
181
+ train_knn_regressor,
182
+ predict_knn_regressor,
183
+ // decision trees
184
+ train_decision_tree_classifier,
185
+ train_decision_tree_regressor,
186
+ predict_decision_tree,
187
+ // random forest
188
+ train_random_forest_classifier,
189
+ train_random_forest_regressor,
190
+ predict_random_forest_classifier,
191
+ predict_random_forest_regressor,
192
+ // naive bayes
193
+ train_naive_bayes,
194
+ predict_naive_bayes,
195
+ // feature scaling
196
+ standard_scaler_fit,
197
+ standard_scaler_transform,
198
+ minmax_scaler_fit,
199
+ minmax_scaler_transform,
200
+ // dimensionality reduction
201
+ train_pca,
202
+ transform_pca,
203
+ // clustering
204
+ train_kmeans,
205
+ predict_kmeans,
206
+ // ensemble
207
+ ensemble_voting_classifier,
208
+ ensemble_voting_regressor,
209
+ // cross-validation
210
+ cross_validate,
211
+ // feature importance
212
+ feature_importance_tree,
213
+ // outlier detection
214
+ outliers_iqr,
215
+ outliers_zscore,
216
+ // time series
217
+ moving_average,
218
+ exponential_smoothing,
219
+ autocorrelation,
220
+ // plots
221
+ plotHistogram,
222
+ plotBoxplot,
223
+ plotScatter,
224
+ plotLine,
225
+ plotBar,
226
+ plotPie,
227
+ plotHeatmap,
228
+ plotViolin,
229
+ plotDensity,
230
+ plotQQ,
231
+ plotParallel,
232
+ plotPairplot,
233
+ plotMultiline
234
+ };
235
+
236
+ export default daitly;