datly 0.0.6 → 0.0.8

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/src/code.js CHANGED
@@ -1,9 +1,4 @@
1
1
  // datly.js — functional, text-first data-science toolkit for JavaScript
2
- // design goals:
3
- // - functional api (only functions)
4
- // - every public function returns lowercase, human-readable structured TEXT
5
- // - dataframe is a plain object; models are serialized as text (json string) and consumed by other funcs
6
-
7
2
  // =========================
8
3
  // Helpers internos
9
4
  // =========================
@@ -29,54 +24,7 @@ const _empty_df = () => _build_df([], []);
29
24
 
30
25
  const _uniq = (arr) => [...new Set(arr)];
31
26
 
32
- // =========================
33
- // HELPER: Formatação de texto
34
- // =========================
35
- const _text = (obj, maxItems = 5) => {
36
- const lowerKeys = (o) =>
37
- Array.isArray(o)
38
- ? o.map(lowerKeys)
39
- : o && typeof o === "object"
40
- ? Object.fromEntries(
41
- Object.entries(o).map(([k, v]) => [
42
- String(k).toLowerCase(),
43
- lowerKeys(v),
44
- ])
45
- )
46
- : typeof o === "number" && Number.isFinite(o)
47
- ? Number(Number(o).toPrecision(12))
48
- : o;
49
-
50
- const normalized = lowerKeys(obj);
51
- const lines = [];
52
-
53
- const walk = (o, indent = 0) => {
54
- const pad = " ".repeat(indent);
55
- if (Array.isArray(o)) {
56
- lines.push(pad + `- list (${o.length} items):`);
57
- const limited = o.slice(0, maxItems);
58
- limited.forEach((v) => walk(v, indent + 2));
59
- if (o.length > maxItems) {
60
- lines.push(pad + ` ... ${o.length - maxItems} more items omitted`);
61
- }
62
- } else if (o && typeof o === "object") {
63
- Object.keys(o).forEach((k) => {
64
- const v = o[k];
65
- if (v && typeof v === "object") {
66
- lines.push(pad + k + ":");
67
- walk(v, indent + 2);
68
- } else {
69
- lines.push(pad + k + ": " + String(v).toLowerCase());
70
- }
71
- });
72
- } else {
73
- lines.push(pad + String(o).toLowerCase());
74
- }
75
- };
76
-
77
- walk(normalized);
78
- return lines.join("\n");
79
- };
27
+ const _text = (obj) => obj;
80
28
 
81
29
  const _flatten = (obj, prefix = "", maxDepth = 5, currentDepth = 0) => {
82
30
  const result = {};
@@ -528,6 +476,7 @@ const normal_pdf = (x, mu = 0, sigma = 1) =>
528
476
  ? x.map((v) => _phi((v - mu) / sigma) / sigma)
529
477
  : _phi((x - mu) / sigma) / sigma,
530
478
  });
479
+
531
480
  const normal_cdf = (x, mu = 0, sigma = 1) =>
532
481
  _ok("distribution", {
533
482
  name: "normal_cdf",
@@ -536,6 +485,7 @@ const normal_cdf = (x, mu = 0, sigma = 1) =>
536
485
  ? x.map((v) => _Phi((v - mu) / sigma))
537
486
  : _Phi((x - mu) / sigma),
538
487
  });
488
+
539
489
  const normal_ppf = (p, mu = 0, sigma = 1) =>
540
490
  _ok("distribution", {
541
491
  name: "normal_ppf",
@@ -1009,7 +959,7 @@ const train_test_split = (X, y, test_size = 0.2, seed = 42) => {
1009
959
  type: "split",
1010
960
  sizes: { train: y_train.length, test: y_test.length },
1011
961
  indices: { train: train_idx, test: test_idx },
1012
- preview: { x_train: X_train.slice(0, 2), y_train: y_train.slice(0, 5) },
962
+ preview: { x_train: X_train.slice(0, 2), y_train: y_train.slice(0, 5) },
1013
963
  });
1014
964
  };
1015
965
 
@@ -1859,6 +1809,7 @@ const standard_scaler_transform = (scaler_text, X) => {
1859
1809
  return _text({
1860
1810
  type: "scaled_data",
1861
1811
  method: "standard",
1812
+ data: X_scaled,
1862
1813
  preview: X_scaled.slice(0, 5),
1863
1814
  });
1864
1815
  } catch {
@@ -2135,57 +2086,104 @@ const ensemble_voting_regressor = (models_text, X_test) => {
2135
2086
 
2136
2087
  const cross_validate = (X, y, model_type, opts = {}) => {
2137
2088
  const k_folds = opts.k_folds ?? 5;
2089
+ const normalize = opts.normalize ?? false;
2090
+ const shuffle = opts.shuffle ?? true; // ← Embaralhar antes de dividir
2091
+ const seed = opts.seed ?? 42;
2138
2092
  const n = X.length;
2139
- const fold_size = Math.floor(n / k_folds);
2140
2093
 
2094
+ // ✅ Embaralhar índices se solicitado
2095
+ let indices = Array.from({ length: n }, (_, i) => i);
2096
+ if (shuffle) {
2097
+ let s = seed;
2098
+ const rand = () => (s = (s * 9301 + 49297) % 233280) / 233280;
2099
+ indices.sort(() => rand() - 0.5);
2100
+ }
2101
+
2102
+ const fold_size = Math.floor(n / k_folds);
2141
2103
  const scores = [];
2142
2104
 
2143
2105
  for (let fold = 0; fold < k_folds; fold++) {
2144
2106
  const test_start = fold * fold_size;
2145
2107
  const test_end = fold === k_folds - 1 ? n : (fold + 1) * fold_size;
2146
2108
 
2147
- const X_train = [...X.slice(0, test_start), ...X.slice(test_end)];
2148
- const y_train = [...y.slice(0, test_start), ...y.slice(test_end)];
2149
- const X_test = X.slice(test_start, test_end);
2150
- const y_test = y.slice(test_start, test_end);
2109
+ // Usar índices embaralhados
2110
+ const train_idx = [...indices.slice(0, test_start), ...indices.slice(test_end)];
2111
+ const test_idx = indices.slice(test_start, test_end);
2112
+
2113
+ let X_train = train_idx.map(i => X[i]);
2114
+ let y_train = train_idx.map(i => y[i]);
2115
+ let X_test = test_idx.map(i => X[i]);
2116
+ const y_test = test_idx.map(i => y[i]);
2117
+
2118
+ // Normalização dentro do fold
2119
+ if (normalize) {
2120
+ const scaler = standard_scaler_fit(X_train);
2121
+ const train_scaled = standard_scaler_transform(scaler, X_train);
2122
+ const test_scaled = standard_scaler_transform(scaler, X_test);
2123
+
2124
+ X_train = _lowerJson(train_scaled).data || train_scaled.data;
2125
+ X_test = _lowerJson(test_scaled).data || test_scaled.data;
2126
+ }
2151
2127
 
2152
2128
  let model_text;
2153
2129
  let predictions;
2154
2130
 
2131
+ // Treinar e avaliar baseado no tipo de modelo
2155
2132
  if (model_type === "linear_regression") {
2156
2133
  model_text = train_linear_regression(X_train, y_train);
2157
2134
  const pred_result = _lowerJson(predict_linear(model_text, X_test));
2158
2135
  predictions = pred_result.predictions;
2159
2136
  const metrics = _lowerJson(metrics_regression(y_test, predictions));
2160
2137
  scores.push(metrics.r2);
2138
+
2161
2139
  } else if (model_type === "logistic_regression") {
2162
2140
  model_text = train_logistic_regression(X_train, y_train, opts);
2163
2141
  const pred_result = _lowerJson(predict_logistic(model_text, X_test));
2164
2142
  predictions = pred_result.classes;
2165
2143
  const metrics = _lowerJson(metrics_classification(y_test, predictions));
2166
2144
  scores.push(metrics.accuracy);
2145
+
2167
2146
  } else if (model_type === "knn_classifier") {
2168
2147
  model_text = train_knn_classifier(X_train, y_train, opts.k ?? 5);
2169
- const pred_result = _lowerJson(
2170
- predict_knn_classifier(model_text, X_test)
2171
- );
2148
+ const pred_result = _lowerJson(predict_knn_classifier(model_text, X_test));
2172
2149
  predictions = pred_result.predictions;
2173
2150
  const metrics = _lowerJson(metrics_classification(y_test, predictions));
2174
2151
  scores.push(metrics.accuracy);
2152
+
2175
2153
  } else if (model_type === "decision_tree_classifier") {
2176
2154
  model_text = train_decision_tree_classifier(X_train, y_train, opts);
2177
2155
  const pred_result = _lowerJson(predict_decision_tree(model_text, X_test));
2178
2156
  predictions = pred_result.predictions;
2179
2157
  const metrics = _lowerJson(metrics_classification(y_test, predictions));
2180
2158
  scores.push(metrics.accuracy);
2159
+
2181
2160
  } else if (model_type === "random_forest_classifier") {
2182
2161
  model_text = train_random_forest_classifier(X_train, y_train, opts);
2183
- const pred_result = _lowerJson(
2184
- predict_random_forest_classifier(model_text, X_test)
2185
- );
2162
+ const pred_result = _lowerJson(predict_random_forest_classifier(model_text, X_test));
2186
2163
  predictions = pred_result.predictions;
2187
2164
  const metrics = _lowerJson(metrics_classification(y_test, predictions));
2188
2165
  scores.push(metrics.accuracy);
2166
+
2167
+ } else if (model_type === "knn_regressor") {
2168
+ model_text = train_knn_regressor(X_train, y_train, opts.k ?? 5);
2169
+ const pred_result = _lowerJson(predict_knn_regressor(model_text, X_test));
2170
+ predictions = pred_result.predictions;
2171
+ const metrics = _lowerJson(metrics_regression(y_test, predictions));
2172
+ scores.push(metrics.r2);
2173
+
2174
+ } else if (model_type === "decision_tree_regressor") {
2175
+ model_text = train_decision_tree_regressor(X_train, y_train, opts);
2176
+ const pred_result = _lowerJson(predict_decision_tree(model_text, X_test));
2177
+ predictions = pred_result.predictions;
2178
+ const metrics = _lowerJson(metrics_regression(y_test, predictions));
2179
+ scores.push(metrics.r2);
2180
+
2181
+ } else if (model_type === "random_forest_regressor") {
2182
+ model_text = train_random_forest_regressor(X_train, y_train, opts);
2183
+ const pred_result = _lowerJson(predict_random_forest_regressor(model_text, X_test));
2184
+ predictions = pred_result.predictions;
2185
+ const metrics = _lowerJson(metrics_regression(y_test, predictions));
2186
+ scores.push(metrics.r2);
2189
2187
  }
2190
2188
  }
2191
2189
 
@@ -2196,6 +2194,10 @@ const cross_validate = (X, y, model_type, opts = {}) => {
2196
2194
  scores,
2197
2195
  mean_score: _mean(scores),
2198
2196
  std_score: _std(scores, true),
2197
+ min_score: _min(scores),
2198
+ max_score: _max(scores),
2199
+ normalized: normalize,
2200
+ shuffled: shuffle,
2199
2201
  });
2200
2202
  };
2201
2203
 
@@ -2507,8 +2509,19 @@ const df_info = (df) => {
2507
2509
  df.columns.forEach((c) => {
2508
2510
  const colVals = df.data.map((r) => r[c]);
2509
2511
  const nonNull = colVals.filter((v) => v != null);
2510
- const tset = new Set(nonNull.map((v) => typeof v));
2511
- types[c] = tset.size === 1 ? [...tset][0] : "mixed";
2512
+
2513
+ // Coletar os tipos únicos
2514
+ const typeSet = new Set(nonNull.map((v) => typeof v));
2515
+
2516
+ // Converter Set para array e pegar o tipo apropriado
2517
+ if (typeSet.size === 0) {
2518
+ types[c] = "empty";
2519
+ } else if (typeSet.size === 1) {
2520
+ types[c] = Array.from(typeSet)[0]; // ou [...typeSet][0]
2521
+ } else {
2522
+ types[c] = "mixed";
2523
+ }
2524
+
2512
2525
  nulls[c] = colVals.length - nonNull.length;
2513
2526
  uniques[c] = new Set(nonNull).size;
2514
2527
  });
@@ -2931,4 +2944,4 @@ export {
2931
2944
  moving_average,
2932
2945
  exponential_smoothing,
2933
2946
  autocorrelation,
2934
- };
2947
+ };