datly 0.0.6 → 0.0.8
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/dist/datly.cjs +1 -1
- package/dist/datly.mjs +1 -1
- package/dist/datly.umd.js +1 -1
- package/package.json +1 -1
- package/src/code.js +81 -68
package/src/code.js
CHANGED
|
@@ -1,9 +1,4 @@
|
|
|
1
1
|
// datly.js — functional, text-first data-science toolkit for JavaScript
|
|
2
|
-
// design goals:
|
|
3
|
-
// - functional api (only functions)
|
|
4
|
-
// - every public function returns lowercase, human-readable structured TEXT
|
|
5
|
-
// - dataframe is a plain object; models are serialized as text (json string) and consumed by other funcs
|
|
6
|
-
|
|
7
2
|
// =========================
|
|
8
3
|
// Helpers internos
|
|
9
4
|
// =========================
|
|
@@ -29,54 +24,7 @@ const _empty_df = () => _build_df([], []);
|
|
|
29
24
|
|
|
30
25
|
const _uniq = (arr) => [...new Set(arr)];
|
|
31
26
|
|
|
32
|
-
|
|
33
|
-
// HELPER: Formatação de texto
|
|
34
|
-
// =========================
|
|
35
|
-
const _text = (obj, maxItems = 5) => {
|
|
36
|
-
const lowerKeys = (o) =>
|
|
37
|
-
Array.isArray(o)
|
|
38
|
-
? o.map(lowerKeys)
|
|
39
|
-
: o && typeof o === "object"
|
|
40
|
-
? Object.fromEntries(
|
|
41
|
-
Object.entries(o).map(([k, v]) => [
|
|
42
|
-
String(k).toLowerCase(),
|
|
43
|
-
lowerKeys(v),
|
|
44
|
-
])
|
|
45
|
-
)
|
|
46
|
-
: typeof o === "number" && Number.isFinite(o)
|
|
47
|
-
? Number(Number(o).toPrecision(12))
|
|
48
|
-
: o;
|
|
49
|
-
|
|
50
|
-
const normalized = lowerKeys(obj);
|
|
51
|
-
const lines = [];
|
|
52
|
-
|
|
53
|
-
const walk = (o, indent = 0) => {
|
|
54
|
-
const pad = " ".repeat(indent);
|
|
55
|
-
if (Array.isArray(o)) {
|
|
56
|
-
lines.push(pad + `- list (${o.length} items):`);
|
|
57
|
-
const limited = o.slice(0, maxItems);
|
|
58
|
-
limited.forEach((v) => walk(v, indent + 2));
|
|
59
|
-
if (o.length > maxItems) {
|
|
60
|
-
lines.push(pad + ` ... ${o.length - maxItems} more items omitted`);
|
|
61
|
-
}
|
|
62
|
-
} else if (o && typeof o === "object") {
|
|
63
|
-
Object.keys(o).forEach((k) => {
|
|
64
|
-
const v = o[k];
|
|
65
|
-
if (v && typeof v === "object") {
|
|
66
|
-
lines.push(pad + k + ":");
|
|
67
|
-
walk(v, indent + 2);
|
|
68
|
-
} else {
|
|
69
|
-
lines.push(pad + k + ": " + String(v).toLowerCase());
|
|
70
|
-
}
|
|
71
|
-
});
|
|
72
|
-
} else {
|
|
73
|
-
lines.push(pad + String(o).toLowerCase());
|
|
74
|
-
}
|
|
75
|
-
};
|
|
76
|
-
|
|
77
|
-
walk(normalized);
|
|
78
|
-
return lines.join("\n");
|
|
79
|
-
};
|
|
27
|
+
const _text = (obj) => obj;
|
|
80
28
|
|
|
81
29
|
const _flatten = (obj, prefix = "", maxDepth = 5, currentDepth = 0) => {
|
|
82
30
|
const result = {};
|
|
@@ -528,6 +476,7 @@ const normal_pdf = (x, mu = 0, sigma = 1) =>
|
|
|
528
476
|
? x.map((v) => _phi((v - mu) / sigma) / sigma)
|
|
529
477
|
: _phi((x - mu) / sigma) / sigma,
|
|
530
478
|
});
|
|
479
|
+
|
|
531
480
|
const normal_cdf = (x, mu = 0, sigma = 1) =>
|
|
532
481
|
_ok("distribution", {
|
|
533
482
|
name: "normal_cdf",
|
|
@@ -536,6 +485,7 @@ const normal_cdf = (x, mu = 0, sigma = 1) =>
|
|
|
536
485
|
? x.map((v) => _Phi((v - mu) / sigma))
|
|
537
486
|
: _Phi((x - mu) / sigma),
|
|
538
487
|
});
|
|
488
|
+
|
|
539
489
|
const normal_ppf = (p, mu = 0, sigma = 1) =>
|
|
540
490
|
_ok("distribution", {
|
|
541
491
|
name: "normal_ppf",
|
|
@@ -1009,7 +959,7 @@ const train_test_split = (X, y, test_size = 0.2, seed = 42) => {
|
|
|
1009
959
|
type: "split",
|
|
1010
960
|
sizes: { train: y_train.length, test: y_test.length },
|
|
1011
961
|
indices: { train: train_idx, test: test_idx },
|
|
1012
|
-
preview: { x_train:
|
|
962
|
+
preview: { x_train: X_train.slice(0, 2), y_train: y_train.slice(0, 5) },
|
|
1013
963
|
});
|
|
1014
964
|
};
|
|
1015
965
|
|
|
@@ -1859,6 +1809,7 @@ const standard_scaler_transform = (scaler_text, X) => {
|
|
|
1859
1809
|
return _text({
|
|
1860
1810
|
type: "scaled_data",
|
|
1861
1811
|
method: "standard",
|
|
1812
|
+
data: X_scaled,
|
|
1862
1813
|
preview: X_scaled.slice(0, 5),
|
|
1863
1814
|
});
|
|
1864
1815
|
} catch {
|
|
@@ -2135,57 +2086,104 @@ const ensemble_voting_regressor = (models_text, X_test) => {
|
|
|
2135
2086
|
|
|
2136
2087
|
const cross_validate = (X, y, model_type, opts = {}) => {
|
|
2137
2088
|
const k_folds = opts.k_folds ?? 5;
|
|
2089
|
+
const normalize = opts.normalize ?? false;
|
|
2090
|
+
const shuffle = opts.shuffle ?? true; // ← Embaralhar antes de dividir
|
|
2091
|
+
const seed = opts.seed ?? 42;
|
|
2138
2092
|
const n = X.length;
|
|
2139
|
-
const fold_size = Math.floor(n / k_folds);
|
|
2140
2093
|
|
|
2094
|
+
// ✅ Embaralhar índices se solicitado
|
|
2095
|
+
let indices = Array.from({ length: n }, (_, i) => i);
|
|
2096
|
+
if (shuffle) {
|
|
2097
|
+
let s = seed;
|
|
2098
|
+
const rand = () => (s = (s * 9301 + 49297) % 233280) / 233280;
|
|
2099
|
+
indices.sort(() => rand() - 0.5);
|
|
2100
|
+
}
|
|
2101
|
+
|
|
2102
|
+
const fold_size = Math.floor(n / k_folds);
|
|
2141
2103
|
const scores = [];
|
|
2142
2104
|
|
|
2143
2105
|
for (let fold = 0; fold < k_folds; fold++) {
|
|
2144
2106
|
const test_start = fold * fold_size;
|
|
2145
2107
|
const test_end = fold === k_folds - 1 ? n : (fold + 1) * fold_size;
|
|
2146
2108
|
|
|
2147
|
-
|
|
2148
|
-
const
|
|
2149
|
-
const
|
|
2150
|
-
|
|
2109
|
+
// Usar índices embaralhados
|
|
2110
|
+
const train_idx = [...indices.slice(0, test_start), ...indices.slice(test_end)];
|
|
2111
|
+
const test_idx = indices.slice(test_start, test_end);
|
|
2112
|
+
|
|
2113
|
+
let X_train = train_idx.map(i => X[i]);
|
|
2114
|
+
let y_train = train_idx.map(i => y[i]);
|
|
2115
|
+
let X_test = test_idx.map(i => X[i]);
|
|
2116
|
+
const y_test = test_idx.map(i => y[i]);
|
|
2117
|
+
|
|
2118
|
+
// Normalização dentro do fold
|
|
2119
|
+
if (normalize) {
|
|
2120
|
+
const scaler = standard_scaler_fit(X_train);
|
|
2121
|
+
const train_scaled = standard_scaler_transform(scaler, X_train);
|
|
2122
|
+
const test_scaled = standard_scaler_transform(scaler, X_test);
|
|
2123
|
+
|
|
2124
|
+
X_train = _lowerJson(train_scaled).data || train_scaled.data;
|
|
2125
|
+
X_test = _lowerJson(test_scaled).data || test_scaled.data;
|
|
2126
|
+
}
|
|
2151
2127
|
|
|
2152
2128
|
let model_text;
|
|
2153
2129
|
let predictions;
|
|
2154
2130
|
|
|
2131
|
+
// Treinar e avaliar baseado no tipo de modelo
|
|
2155
2132
|
if (model_type === "linear_regression") {
|
|
2156
2133
|
model_text = train_linear_regression(X_train, y_train);
|
|
2157
2134
|
const pred_result = _lowerJson(predict_linear(model_text, X_test));
|
|
2158
2135
|
predictions = pred_result.predictions;
|
|
2159
2136
|
const metrics = _lowerJson(metrics_regression(y_test, predictions));
|
|
2160
2137
|
scores.push(metrics.r2);
|
|
2138
|
+
|
|
2161
2139
|
} else if (model_type === "logistic_regression") {
|
|
2162
2140
|
model_text = train_logistic_regression(X_train, y_train, opts);
|
|
2163
2141
|
const pred_result = _lowerJson(predict_logistic(model_text, X_test));
|
|
2164
2142
|
predictions = pred_result.classes;
|
|
2165
2143
|
const metrics = _lowerJson(metrics_classification(y_test, predictions));
|
|
2166
2144
|
scores.push(metrics.accuracy);
|
|
2145
|
+
|
|
2167
2146
|
} else if (model_type === "knn_classifier") {
|
|
2168
2147
|
model_text = train_knn_classifier(X_train, y_train, opts.k ?? 5);
|
|
2169
|
-
const pred_result = _lowerJson(
|
|
2170
|
-
predict_knn_classifier(model_text, X_test)
|
|
2171
|
-
);
|
|
2148
|
+
const pred_result = _lowerJson(predict_knn_classifier(model_text, X_test));
|
|
2172
2149
|
predictions = pred_result.predictions;
|
|
2173
2150
|
const metrics = _lowerJson(metrics_classification(y_test, predictions));
|
|
2174
2151
|
scores.push(metrics.accuracy);
|
|
2152
|
+
|
|
2175
2153
|
} else if (model_type === "decision_tree_classifier") {
|
|
2176
2154
|
model_text = train_decision_tree_classifier(X_train, y_train, opts);
|
|
2177
2155
|
const pred_result = _lowerJson(predict_decision_tree(model_text, X_test));
|
|
2178
2156
|
predictions = pred_result.predictions;
|
|
2179
2157
|
const metrics = _lowerJson(metrics_classification(y_test, predictions));
|
|
2180
2158
|
scores.push(metrics.accuracy);
|
|
2159
|
+
|
|
2181
2160
|
} else if (model_type === "random_forest_classifier") {
|
|
2182
2161
|
model_text = train_random_forest_classifier(X_train, y_train, opts);
|
|
2183
|
-
const pred_result = _lowerJson(
|
|
2184
|
-
predict_random_forest_classifier(model_text, X_test)
|
|
2185
|
-
);
|
|
2162
|
+
const pred_result = _lowerJson(predict_random_forest_classifier(model_text, X_test));
|
|
2186
2163
|
predictions = pred_result.predictions;
|
|
2187
2164
|
const metrics = _lowerJson(metrics_classification(y_test, predictions));
|
|
2188
2165
|
scores.push(metrics.accuracy);
|
|
2166
|
+
|
|
2167
|
+
} else if (model_type === "knn_regressor") {
|
|
2168
|
+
model_text = train_knn_regressor(X_train, y_train, opts.k ?? 5);
|
|
2169
|
+
const pred_result = _lowerJson(predict_knn_regressor(model_text, X_test));
|
|
2170
|
+
predictions = pred_result.predictions;
|
|
2171
|
+
const metrics = _lowerJson(metrics_regression(y_test, predictions));
|
|
2172
|
+
scores.push(metrics.r2);
|
|
2173
|
+
|
|
2174
|
+
} else if (model_type === "decision_tree_regressor") {
|
|
2175
|
+
model_text = train_decision_tree_regressor(X_train, y_train, opts);
|
|
2176
|
+
const pred_result = _lowerJson(predict_decision_tree(model_text, X_test));
|
|
2177
|
+
predictions = pred_result.predictions;
|
|
2178
|
+
const metrics = _lowerJson(metrics_regression(y_test, predictions));
|
|
2179
|
+
scores.push(metrics.r2);
|
|
2180
|
+
|
|
2181
|
+
} else if (model_type === "random_forest_regressor") {
|
|
2182
|
+
model_text = train_random_forest_regressor(X_train, y_train, opts);
|
|
2183
|
+
const pred_result = _lowerJson(predict_random_forest_regressor(model_text, X_test));
|
|
2184
|
+
predictions = pred_result.predictions;
|
|
2185
|
+
const metrics = _lowerJson(metrics_regression(y_test, predictions));
|
|
2186
|
+
scores.push(metrics.r2);
|
|
2189
2187
|
}
|
|
2190
2188
|
}
|
|
2191
2189
|
|
|
@@ -2196,6 +2194,10 @@ const cross_validate = (X, y, model_type, opts = {}) => {
|
|
|
2196
2194
|
scores,
|
|
2197
2195
|
mean_score: _mean(scores),
|
|
2198
2196
|
std_score: _std(scores, true),
|
|
2197
|
+
min_score: _min(scores),
|
|
2198
|
+
max_score: _max(scores),
|
|
2199
|
+
normalized: normalize,
|
|
2200
|
+
shuffled: shuffle,
|
|
2199
2201
|
});
|
|
2200
2202
|
};
|
|
2201
2203
|
|
|
@@ -2507,8 +2509,19 @@ const df_info = (df) => {
|
|
|
2507
2509
|
df.columns.forEach((c) => {
|
|
2508
2510
|
const colVals = df.data.map((r) => r[c]);
|
|
2509
2511
|
const nonNull = colVals.filter((v) => v != null);
|
|
2510
|
-
|
|
2511
|
-
|
|
2512
|
+
|
|
2513
|
+
// Coletar os tipos únicos
|
|
2514
|
+
const typeSet = new Set(nonNull.map((v) => typeof v));
|
|
2515
|
+
|
|
2516
|
+
// Converter Set para array e pegar o tipo apropriado
|
|
2517
|
+
if (typeSet.size === 0) {
|
|
2518
|
+
types[c] = "empty";
|
|
2519
|
+
} else if (typeSet.size === 1) {
|
|
2520
|
+
types[c] = Array.from(typeSet)[0]; // ou [...typeSet][0]
|
|
2521
|
+
} else {
|
|
2522
|
+
types[c] = "mixed";
|
|
2523
|
+
}
|
|
2524
|
+
|
|
2512
2525
|
nulls[c] = colVals.length - nonNull.length;
|
|
2513
2526
|
uniques[c] = new Set(nonNull).size;
|
|
2514
2527
|
});
|
|
@@ -2931,4 +2944,4 @@ export {
|
|
|
2931
2944
|
moving_average,
|
|
2932
2945
|
exponential_smoothing,
|
|
2933
2946
|
autocorrelation,
|
|
2934
|
-
};
|
|
2947
|
+
};
|