npm - datly - Versions diffs - 0.0.1 → 0.0.2 - Mend

datly 0.0.1 → 0.0.2

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (33) hide show

package/dist/datly.cjs +1 -0
package/dist/datly.mjs +1 -0
package/dist/datly.umd.js +1 -1
package/dist/datly.umd.js.map +1 -0
package/package.json +24 -11
package/src/core/dataLoader.js +407 -0
package/src/core/utils.js +306 -0
package/src/core/validator.js +205 -0
package/src/dataviz/index.js +1566 -0
package/src/descriptive/centralTendency.js +208 -0
package/src/descriptive/dispersion.js +273 -0
package/src/descriptive/position.js +268 -0
package/src/descriptive/shape.js +336 -0
package/src/index.js +480 -0
package/src/inferential/confidenceIntervals.js +561 -0
package/src/inferential/hypothesisTesting.js +527 -0
package/src/inferential/normalityTests.js +587 -0
package/src/insights/autoAnalyser.js +685 -0
package/src/insights/interpreter.js +543 -0
package/src/insights/patternDetector.js +897 -0
package/src/insights/reportGenerator.js +1072 -0
package/src/ml/ClassificationMetrics.js +336 -0
package/src/ml/DecisionTree.js +412 -0
package/src/ml/KNearestNeighbors.js +317 -0
package/src/ml/LinearRegression.js +179 -0
package/src/ml/LogisticRegression.js +396 -0
package/src/ml/MachineLearning.js +490 -0
package/src/ml/NaiveBayes.js +296 -0
package/src/ml/RandomForest.js +323 -0
package/src/ml/SupportVectorMachine.js +299 -0
package/src/ml/baseModel.js +106 -0
package/src/multivariate/correlation.js +653 -0
package/src/multivariate/regression.js +660 -0

package/src/ml/LinearRegression.js ADDED Viewed

@@ -0,0 +1,179 @@
+import BaseModel from './baseModel.js';
+class LinearRegression extends BaseModel {
+  constructor(learningRate = 0.01, iterations = 1000, regularization = null, lambda = 0.01) {
+    super();
+    this.learningRate = learningRate;
+    this.iterations = iterations;
+    this.regularization = regularization; // 'l1', 'l2', or null
+    this.lambda = lambda;
+    this.weights = null;
+    this.bias = null;
+    this.normParams = null;
+  }
+  fit(X, y, normalize = true) {
+    this.validateTrainingData(X, y);
+    let X_train = X.map(row => Array.isArray(row) ? row : [row]);
+    if (normalize) {
+      const { normalized, means, stds } = this.normalizeFeatures(X_train);
+      X_train = normalized;
+      this.normParams = { means, stds };
+    }
+    const n = X_train.length;
+    const m = X_train[0].length;
+    this.weights = Array(m).fill(0);
+    this.bias = 0;
+    const losses = [];
+    for (let iter = 0; iter < this.iterations; iter++) {
+      const predictions = X_train.map(x => this.predictSingle(x));
+      const errors = predictions.map((pred, i) => pred - y[i]);
+      // Gradient descent
+      const weightGradients = Array(m).fill(0);
+      let biasGradient = 0;
+      for (let i = 0; i < n; i++) {
+        biasGradient += errors[i];
+        for (let j = 0; j < m; j++) {
+          weightGradients[j] += errors[i] * X_train[i][j];
+        }
+      }
+      // Apply regularization
+      for (let j = 0; j < m; j++) {
+        if (this.regularization === 'l2') {
+          weightGradients[j] += this.lambda * this.weights[j];
+        } else if (this.regularization === 'l1') {
+          weightGradients[j] += this.lambda * Math.sign(this.weights[j]);
+        }
+        this.weights[j] -= (this.learningRate / n) * weightGradients[j];
+      }
+      this.bias -= (this.learningRate / n) * biasGradient;
+      // Calculate loss
+      const loss = this.calculateLoss(predictions, y);
+      losses.push(loss);
+    }
+    this.trained = true;
+    this.trainingMetrics = {
+      finalLoss: losses[losses.length - 1],
+      losses: losses,
+      weights: [...this.weights],
+      bias: this.bias
+    };
+    return this;
+  }
+  predictSingle(x) {
+    let sum = this.bias;
+    for (let j = 0; j < this.weights.length; j++) {
+      sum += this.weights[j] * x[j];
+    }
+    return sum;
+  }
+  predict(X) {
+    this.validatePredictionData(X);
+    let X_test = X.map(row => Array.isArray(row) ? row : [row]);
+    if (this.normParams) {
+      const { means, stds } = this.normParams;
+      X_test = X_test.map(row =>
+        row.map((val, j) => (val - means[j]) / stds[j])
+      );
+    }
+    return X_test.map(x => this.predictSingle(x));
+  }
+  calculateLoss(predictions, y) {
+    const mse = predictions.reduce((sum, pred, i) =>
+      sum + Math.pow(pred - y[i], 2), 0) / predictions.length;
+    if (this.regularization === 'l2') {
+      const l2 = this.weights.reduce((sum, w) => sum + w * w, 0);
+      return mse + this.lambda * l2;
+    } else if (this.regularization === 'l1') {
+      const l1 = this.weights.reduce((sum, w) => sum + Math.abs(w), 0);
+      return mse + this.lambda * l1;
+    }
+    return mse;
+  }
+  score(X, y) {
+    const predictions = this.predict(X);
+    const yMean = y.reduce((sum, val) => sum + val, 0) / y.length;
+    const ssRes = predictions.reduce((sum, pred, i) =>
+      sum + Math.pow(y[i] - pred, 2), 0);
+    const ssTot = y.reduce((sum, val) =>
+      sum + Math.pow(val - yMean, 2), 0);
+    const r2 = 1 - (ssRes / ssTot);
+    const mse = ssRes / y.length;
+    const rmse = Math.sqrt(mse);
+    const mae = predictions.reduce((sum, pred, i) =>
+      sum + Math.abs(y[i] - pred), 0) / y.length;
+    return {
+      r2Score: r2,
+      mse: mse,
+      rmse: rmse,
+      mae: mae,
+      predictions: predictions,
+      residuals: predictions.map((pred, i) => y[i] - pred)
+    };
+  }
+  getCoefficients() {
+    if (!this.trained) {
+      throw new Error('Model must be trained first');
+    }
+    return {
+      weights: [...this.weights],
+      bias: this.bias,
+      equation: this.getEquation()
+    };
+  }
+  getEquation() {
+    let eq = `y = ${this.bias.toFixed(4)}`;
+    this.weights.forEach((w, i) => {
+      const sign = w >= 0 ? '+' : '';
+      eq += ` ${sign} ${w.toFixed(4)}*x${i + 1}`;
+    });
+    return eq;
+  }
+  summary() {
+    if (!this.trained) {
+      throw new Error('Model must be trained first');
+    }
+    return {
+      modelType: 'Linear Regression',
+      coefficients: this.getCoefficients(),
+      trainingMetrics: this.trainingMetrics,
+      hyperparameters: {
+        learningRate: this.learningRate,
+        iterations: this.iterations,
+        regularization: this.regularization,
+        lambda: this.lambda
+      }
+    };
+  }
+}
+export default LinearRegression;

package/src/ml/LogisticRegression.js ADDED Viewed

@@ -0,0 +1,396 @@
+import BaseModel from './baseModel.js';
+class LogisticRegression extends BaseModel {
+  constructor({
+    learningRate = 0.01,
+    iterations = 1000,
+    batchSize = null,
+    regularization = null,
+    lambda = 0.01,
+    earlyStopping = false,
+    tol = 1e-6,
+    randomInit = true
+  } = {}) {
+    super();
+    this.learningRate = learningRate;
+    this.iterations = iterations;
+    this.batchSize = batchSize;
+    this.regularization = regularization;
+    this.lambda = lambda;
+    this.earlyStopping = earlyStopping;
+    this.tol = tol;
+    this.randomInit = randomInit;
+    this.weights = null;
+    this.bias = null;
+    this.normParams = null;
+    this.classes = null;
+    this.multiclass = false;
+    this.losses = [];
+  }
+  sigmoid(z) {
+    return 1 / (1 + Math.exp(-z));
+  }
+  softmax(z) {
+    const maxZ = Math.max(...z);
+    const expZ = z.map(v => Math.exp(v - maxZ));
+    const sum = expZ.reduce((a, b) => a + b, 0);
+    return expZ.map(v => v / sum);
+  }
+  fit(X, y, normalize = true) {
+    this.validateTrainingData(X, y);
+    this.classes = [...new Set(y)].sort((a, b) =>
+      typeof a === 'number' && typeof b === 'number'
+        ? a - b
+        : String(a).localeCompare(String(b))
+    );
+    this.multiclass = this.classes.length > 2;
+    let X_train = X.map(r => (Array.isArray(r) ? r : [r]));
+    if (normalize) {
+      const { normalized, means, stds } = this.normalizeFeaturesSafe(X_train);
+      X_train = normalized;
+      this.normParams = { means, stds };
+    }
+    this.multiclass
+      ? this.fitMulticlass(X_train, y)
+      : this.fitBinary(X_train, y);
+    this.trained = true;
+    return this;
+  }
+  fitBinary(X, y) {
+    const n = X.length;
+    const m = X[0].length;
+    const yBin = y.map(label => (label === this.classes[1] ? 1 : 0));
+    this.weights = this.randomInit
+      ? Array(m).fill(0).map(() => Math.random() * 0.01)
+      : Array(m).fill(0);
+    this.bias = 0;
+    let prevLoss = Infinity;
+    this.losses = [];
+    for (let iter = 0; iter < this.iterations; iter++) {
+      const { Xb, yb } = this.getBatch(X, yBin);
+      const predictions = Xb.map(row => this.sigmoid(this.linear(row)));
+      const { weightGradients, biasGradient } = this.gradientBinary(Xb, yb, predictions);
+      for (let j = 0; j < m; j++) {
+        this.weights[j] -= (this.learningRate / Xb.length) * weightGradients[j];
+      }
+      this.bias -= (this.learningRate / Xb.length) * biasGradient;
+      const loss = this.calculateBinaryLoss(predictions, yb);
+      this.losses.push(loss);
+      if (this.earlyStopping && Math.abs(prevLoss - loss) < this.tol) break;
+      prevLoss = loss;
+    }
+    this.trainingMetrics = {
+      finalLoss: this.losses[this.losses.length - 1],
+      losses: this.losses,
+      weights: [...this.weights],
+      bias: this.bias
+    };
+  }
+  fitMulticlass(X, y) {
+    const n = X.length;
+    const m = X[0].length;
+    const k = this.classes.length;
+    const yOneHot = this.oneHotEncode(y, k);
+    this.weights = Array(k)
+      .fill(0)
+      .map(() =>
+        this.randomInit
+          ? Array(m).fill(0).map(() => Math.random() * 0.01)
+          : Array(m).fill(0)
+      );
+    this.bias = Array(k).fill(0);
+    let prevLoss = Infinity;
+    this.losses = [];
+    for (let iter = 0; iter < this.iterations; iter++) {
+      const { Xb, yb } = this.getBatch(X, yOneHot);
+      const predictions = Xb.map(row => this.forwardMulticlass(row));
+      for (let c = 0; c < k; c++) {
+        const grad = this.gradientMulticlass(Xb, yb, predictions, c);
+        for (let j = 0; j < m; j++) {
+          this.weights[c][j] -= (this.learningRate / Xb.length) * grad.weight[j];
+        }
+        this.bias[c] -= (this.learningRate / Xb.length) * grad.bias;
+      }
+      const loss = this.calculateMulticlassLoss(predictions, yb);
+      this.losses.push(loss);
+      if (this.earlyStopping && Math.abs(prevLoss - loss) < this.tol) break;
+      prevLoss = loss;
+    }
+    this.trainingMetrics = {
+      finalLoss: this.losses[this.losses.length - 1],
+      losses: this.losses
+    };
+  }
+  predict(X, returnProba = false) {
+    this.validatePredictionData(X);
+    let X_test = X.map(r => (Array.isArray(r) ? r : [r]));
+    if (this.normParams) {
+      const { means, stds } = this.normParams;
+      X_test = X_test.map(r =>
+        r.map((v, j) => (v - means[j]) / (stds[j] || 1))
+      );
+    }
+    return this.multiclass
+      ? this.predictMulticlass(X_test, returnProba)
+      : this.predictBinary(X_test, returnProba);
+  }
+  predictBinary(X, returnProba) {
+    return X.map(row => {
+      const p = this.sigmoid(this.linear(row));
+      if (returnProba) {
+        return { [this.classes[0]]: 1 - p, [this.classes[1]]: p };
+      }
+      return p >= 0.5 ? this.classes[1] : this.classes[0];
+    });
+  }
+  predictMulticlass(X, returnProba) {
+    return X.map(row => {
+      const probs = this.forwardMulticlass(row);
+      if (returnProba) {
+        const out = {};
+        this.classes.forEach((cls, i) => (out[cls] = probs[i]));
+        return out;
+      }
+      const maxIdx = probs.indexOf(Math.max(...probs));
+      return this.classes[maxIdx];
+    });
+  }
+  // ---------- Auxiliares ----------
+  linear(x) {
+    return this.bias + x.reduce((s, v, j) => s + v * this.weights[j], 0);
+  }
+  forwardMulticlass(x) {
+    const z = this.bias.map((b, c) =>
+      b + x.reduce((s, v, j) => s + v * this.weights[c][j], 0)
+    );
+    return this.softmax(z);
+  }
+  getBatch(X, y) {
+    if (!this.batchSize || this.batchSize >= X.length) {
+      return { Xb: X, yb: y };
+    }
+    const idx = Math.floor(Math.random() * (X.length - this.batchSize));
+    return {
+      Xb: X.slice(idx, idx + this.batchSize),
+      yb: y.slice(idx, idx + this.batchSize)
+    };
+  }
+  gradientBinary(X, y, predictions) {
+    const m = X[0].length;
+    const weightGradients = Array(m).fill(0);
+    let biasGradient = 0;
+    for (let i = 0; i < X.length; i++) {
+      const error = predictions[i] - y[i];
+      biasGradient += error;
+      for (let j = 0; j < m; j++) {
+        weightGradients[j] += error * X[i][j];
+      }
+    }
+    if (this.regularization) {
+      for (let j = 0; j < m; j++) {
+        if (this.regularization === 'l2') weightGradients[j] += this.lambda * this.weights[j];
+        if (this.regularization === 'l1') weightGradients[j] += this.lambda * Math.sign(this.weights[j]);
+      }
+    }
+    return { weightGradients, biasGradient };
+  }
+  gradientMulticlass(X, y, predictions, c) {
+    const m = X[0].length;
+    const weightGradients = Array(m).fill(0);
+    let biasGradient = 0;
+    for (let i = 0; i < X.length; i++) {
+      const error = predictions[i][c] - y[i][c];
+      biasGradient += error;
+      for (let j = 0; j < m; j++) {
+        weightGradients[j] += error * X[i][j];
+      }
+    }
+    if (this.regularization === 'l2') {
+      for (let j = 0; j < m; j++) {
+        weightGradients[j] += this.lambda * this.weights[c][j];
+      }
+    }
+    return { weight: weightGradients, bias: biasGradient };
+  }
+  normalizeFeaturesSafe(X) {
+    const m = X[0].length;
+    const means = Array(m).fill(0);
+    const stds = Array(m).fill(0);
+    for (let j = 0; j < m; j++) {
+      const col = X.map(r => r[j]);
+      const mean = col.reduce((a, b) => a + b, 0) / col.length;
+      const std = Math.sqrt(col.reduce((a, b) => a + (b - mean) ** 2, 0) / col.length);
+      means[j] = mean;
+      stds[j] = std || 1;
+    }
+    const normalized = X.map(r => r.map((v, j) => (v - means[j]) / stds[j]));
+    return { normalized, means, stds };
+  }
+  oneHotEncode(y, k) {
+    return y.map(label => {
+      const arr = Array(k).fill(0);
+      arr[this.classes.indexOf(label)] = 1;
+      return arr;
+    });
+  }
+  calculateBinaryLoss(predictions, y) {
+    const eps = 1e-15;
+    let loss = 0;
+    for (let i = 0; i < predictions.length; i++) {
+      const p = Math.min(Math.max(predictions[i], eps), 1 - eps);
+      loss -= y[i] * Math.log(p) + (1 - y[i]) * Math.log(1 - p);
+    }
+    loss /= predictions.length;
+    if (this.regularization === 'l2') {
+      const reg = this.weights.reduce((s, w) => s + w * w, 0);
+      loss += (this.lambda / 2) * reg;
+    }
+    return loss;
+  }
+  calculateMulticlassLoss(predictions, yOneHot) {
+    const eps = 1e-15;
+    let loss = 0;
+    for (let i = 0; i < predictions.length; i++) {
+      for (let c = 0; c < yOneHot[i].length; c++) {
+        const p = Math.min(Math.max(predictions[i][c], eps), 1 - eps);
+        loss -= yOneHot[i][c] * Math.log(p);
+      }
+    }
+    return loss / predictions.length;
+  }
+  // ---------- 🆕 ROC & AUC ----------
+  rocCurve(X, y) {
+    if (this.multiclass) {
+      console.warn('ROC Curve disponível apenas para problemas binários');
+      return null;
+    }
+    const proba = this.predict(X, true).map(p => p[this.classes[1]]);
+    const thresholds = [...new Set(proba)].sort((a, b) => b - a);
+    const points = [];
+    for (const t of thresholds) {
+      let tp = 0, fp = 0, tn = 0, fn = 0;
+      for (let i = 0; i < y.length; i++) {
+        const actual = y[i] === this.classes[1] ? 1 : 0;
+        const pred = proba[i] >= t ? 1 : 0;
+        if (actual === 1 && pred === 1) tp++;
+        else if (actual === 0 && pred === 1) fp++;
+        else if (actual === 0 && pred === 0) tn++;
+        else if (actual === 1 && pred === 0) fn++;
+      }
+      const tpr = tp / (tp + fn);
+      const fpr = fp / (fp + tn);
+      points.push({ fpr, tpr });
+    }
+    // Ordena por FPR crescente
+    points.sort((a, b) => a.fpr - b.fpr);
+    return points;
+  }
+  aucScore(X, y) {
+    const curve = this.rocCurve(X, y);
+    if (!curve) return null;
+    let auc = 0;
+    for (let i = 1; i < curve.length; i++) {
+      const x1 = curve[i - 1].fpr;
+      const x2 = curve[i].fpr;
+      const y1 = curve[i - 1].tpr;
+      const y2 = curve[i].tpr;
+      auc += (x2 - x1) * (y1 + y2) / 2; // trapezoidal rule
+    }
+    return Math.abs(auc);
+  }
+  score(X, y) {
+    const yPred = this.predict(X);
+    const yProba = this.predict(X, true);
+    const accuracy = yPred.filter((p, i) => p === y[i]).length / y.length;
+    const cm = this.confusionMatrix(y, yPred);
+    const metrics = this.calculateClassMetrics(cm);
+    const auc = !this.multiclass ? this.aucScore(X, y) : null;
+    const roc = !this.multiclass ? this.rocCurve(X, y) : null;
+    return {
+      accuracy,
+      auc,
+      roc,
+      confusionMatrix: cm,
+      classMetrics: metrics,
+      predictions: yPred,
+      probabilities: yProba
+    };
+  }
+  summary() {
+    if (!this.trained) throw new Error('Model must be trained first');
+    return {
+      modelType: 'Logistic Regression',
+      classes: this.classes,
+      multiclass: this.multiclass,
+      trainingMetrics: this.trainingMetrics,
+      hyperparameters: {
+        learningRate: this.learningRate,
+        iterations: this.iterations,
+        regularization: this.regularization,
+        lambda: this.lambda,
+        batchSize: this.batchSize,
+        earlyStopping: this.earlyStopping
+      }
+    };
+  }
+}
+export default LogisticRegression;