npm - bun-scikit - Versions diffs - 0.1.2 → 0.1.4 - Mend

bun-scikit 0.1.2 → 0.1.4

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (21) hide show

package/README.md +73 -137
package/docs/README.md +1 -0
package/package.json +2 -3
package/scripts/check-benchmark-health.ts +162 -1
package/scripts/sync-benchmark-readme.ts +56 -0
package/src/dummy/DummyClassifier.ts +190 -0
package/src/dummy/DummyRegressor.ts +108 -0
package/src/feature_selection/VarianceThreshold.ts +88 -0
package/src/index.ts +23 -0
package/src/metrics/classification.ts +30 -0
package/src/metrics/regression.ts +40 -0
package/src/model_selection/RandomizedSearchCV.ts +269 -0
package/src/native/node-addon/bun_scikit_addon.cpp +149 -0
package/src/native/zigKernels.ts +33 -4
package/src/preprocessing/Binarizer.ts +46 -0
package/src/preprocessing/LabelEncoder.ts +62 -0
package/src/preprocessing/MaxAbsScaler.ts +77 -0
package/src/preprocessing/Normalizer.ts +66 -0
package/src/tree/DecisionTreeClassifier.ts +146 -3
package/zig/kernels.zig +63 -40
package/binding.gyp +0 -21

package/src/dummy/DummyClassifier.ts ADDED Viewed

@@ -0,0 +1,190 @@
+import type { Matrix, Vector } from "../types";
+import { accuracyScore } from "../metrics/classification";
+import {
+  assertConsistentRowSize,
+  assertFiniteMatrix,
+  assertFiniteVector,
+  assertNonEmptyMatrix,
+  assertVectorLength,
+} from "../utils/validation";
+export type DummyClassifierStrategy =
+  | "most_frequent"
+  | "prior"
+  | "stratified"
+  | "uniform"
+  | "constant";
+export interface DummyClassifierOptions {
+  strategy?: DummyClassifierStrategy;
+  constant?: number;
+  randomState?: number;
+}
+class Mulberry32 {
+  private state: number;
+  constructor(seed: number) {
+    this.state = seed >>> 0;
+  }
+  next(): number {
+    this.state = (this.state + 0x6d2b79f5) >>> 0;
+    let t = this.state ^ (this.state >>> 15);
+    t = Math.imul(t, this.state | 1);
+    t ^= t + Math.imul(t ^ (t >>> 7), t | 61);
+    return ((t ^ (t >>> 14)) >>> 0) / 4294967296;
+  }
+}
+export class DummyClassifier {
+  classes_: number[] | null = null;
+  classPrior_: number[] | null = null;
+  constant_: number | null = null;
+  private readonly strategy: DummyClassifierStrategy;
+  private readonly configuredConstant?: number;
+  private readonly randomState: number;
+  private majorityClass: number | null = null;
+  private nFeaturesIn_: number | null = null;
+  constructor(options: DummyClassifierOptions = {}) {
+    this.strategy = options.strategy ?? "prior";
+    this.configuredConstant = options.constant;
+    this.randomState = options.randomState ?? 42;
+  }
+  fit(X: Matrix, y: Vector): this {
+    assertNonEmptyMatrix(X);
+    assertConsistentRowSize(X);
+    assertFiniteMatrix(X);
+    assertVectorLength(y, X.length);
+    assertFiniteVector(y);
+    this.nFeaturesIn_ = X[0].length;
+    const counts = new Map<number, number>();
+    for (let i = 0; i < y.length; i += 1) {
+      counts.set(y[i], (counts.get(y[i]) ?? 0) + 1);
+    }
+    const classes = Array.from(counts.keys()).sort((a, b) => a - b);
+    const priors = new Array<number>(classes.length);
+    for (let i = 0; i < classes.length; i += 1) {
+      priors[i] = (counts.get(classes[i]) ?? 0) / y.length;
+    }
+    let majorityClass = classes[0];
+    let majorityCount = counts.get(majorityClass) ?? 0;
+    for (let i = 1; i < classes.length; i += 1) {
+      const cls = classes[i];
+      const clsCount = counts.get(cls) ?? 0;
+      if (clsCount > majorityCount) {
+        majorityClass = cls;
+        majorityCount = clsCount;
+      }
+    }
+    if (this.strategy === "constant") {
+      if (!Number.isFinite(this.configuredConstant)) {
+        throw new Error("constant strategy requires a finite constant value.");
+      }
+      this.constant_ = this.configuredConstant!;
+    } else {
+      this.constant_ = majorityClass;
+    }
+    this.classes_ = classes;
+    this.classPrior_ = priors;
+    this.majorityClass = majorityClass;
+    return this;
+  }
+  private ensureFitted(): void {
+    if (!this.classes_ || !this.classPrior_ || this.nFeaturesIn_ === null || this.majorityClass === null) {
+      throw new Error("DummyClassifier has not been fitted.");
+    }
+  }
+  private sampleByPrior(rng: Mulberry32): number {
+    let r = rng.next();
+    for (let i = 0; i < this.classPrior_!.length; i += 1) {
+      r -= this.classPrior_![i];
+      if (r <= 0) {
+        return this.classes_![i];
+      }
+    }
+    return this.classes_![this.classes_!.length - 1];
+  }
+  predict(X: Matrix): Vector {
+    this.ensureFitted();
+    if (!Array.isArray(X) || X.length === 0) {
+      throw new Error("X must be a non-empty 2D array.");
+    }
+    if (!Array.isArray(X[0]) || X[0].length !== this.nFeaturesIn_) {
+      throw new Error(`Feature size mismatch. Expected ${this.nFeaturesIn_}, got ${X[0]?.length ?? 0}.`);
+    }
+    switch (this.strategy) {
+      case "most_frequent":
+      case "prior":
+        return new Array<number>(X.length).fill(this.majorityClass!);
+      case "constant":
+        return new Array<number>(X.length).fill(this.constant_!);
+      case "uniform": {
+        const rng = new Mulberry32(this.randomState);
+        const out = new Array<number>(X.length);
+        for (let i = 0; i < X.length; i += 1) {
+          const idx = Math.floor(rng.next() * this.classes_!.length);
+          out[i] = this.classes_![idx];
+        }
+        return out;
+      }
+      case "stratified": {
+        const rng = new Mulberry32(this.randomState);
+        const out = new Array<number>(X.length);
+        for (let i = 0; i < X.length; i += 1) {
+          out[i] = this.sampleByPrior(rng);
+        }
+        return out;
+      }
+      default: {
+        const exhaustive: never = this.strategy;
+        throw new Error(`Unsupported strategy: ${exhaustive}`);
+      }
+    }
+  }
+  predictProba(X: Matrix): Matrix {
+    this.ensureFitted();
+    if (!Array.isArray(X) || X.length === 0) {
+      throw new Error("X must be a non-empty 2D array.");
+    }
+    if (!Array.isArray(X[0]) || X[0].length !== this.nFeaturesIn_) {
+      throw new Error(`Feature size mismatch. Expected ${this.nFeaturesIn_}, got ${X[0]?.length ?? 0}.`);
+    }
+    if (this.strategy === "uniform") {
+      const value = 1 / this.classes_!.length;
+      return X.map(() => new Array(this.classes_!.length).fill(value));
+    }
+    if (this.strategy === "most_frequent" || this.strategy === "constant") {
+      const oneHot = new Array<number>(this.classes_!.length).fill(0);
+      const label = this.strategy === "constant" ? this.constant_! : this.majorityClass!;
+      const classIndex = this.classes_!.indexOf(label);
+      if (classIndex >= 0) {
+        oneHot[classIndex] = 1;
+      }
+      return X.map(() => [...oneHot]);
+    }
+    // prior / stratified share prior probabilities.
+    const prior = [...this.classPrior_!];
+    return X.map(() => [...prior]);
+  }
+  score(X: Matrix, y: Vector): number {
+    return accuracyScore(y, this.predict(X));
+  }
+}

package/src/dummy/DummyRegressor.ts ADDED Viewed

@@ -0,0 +1,108 @@
+import type { Matrix, Vector } from "../types";
+import { r2Score } from "../metrics/regression";
+import { assertFiniteVector, validateRegressionInputs } from "../utils/validation";
+export type DummyRegressorStrategy = "mean" | "median" | "quantile" | "constant";
+export interface DummyRegressorOptions {
+  strategy?: DummyRegressorStrategy;
+  constant?: number;
+  quantile?: number;
+}
+function computeMedian(values: number[]): number {
+  const sorted = [...values].sort((a, b) => a - b);
+  const mid = Math.floor(sorted.length / 2);
+  if (sorted.length % 2 === 0) {
+    return 0.5 * (sorted[mid - 1] + sorted[mid]);
+  }
+  return sorted[mid];
+}
+function computeQuantile(values: number[], q: number): number {
+  const sorted = [...values].sort((a, b) => a - b);
+  const pos = q * (sorted.length - 1);
+  const lo = Math.floor(pos);
+  const hi = Math.ceil(pos);
+  if (lo === hi) {
+    return sorted[lo];
+  }
+  const weight = pos - lo;
+  return sorted[lo] * (1 - weight) + sorted[hi] * weight;
+}
+export class DummyRegressor {
+  constant_: number | null = null;
+  private readonly strategy: DummyRegressorStrategy;
+  private readonly constant?: number;
+  private readonly quantile: number;
+  private nFeaturesIn_: number | null = null;
+  constructor(options: DummyRegressorOptions = {}) {
+    this.strategy = options.strategy ?? "mean";
+    this.constant = options.constant;
+    this.quantile = options.quantile ?? 0.5;
+    if (this.strategy === "constant") {
+      if (!Number.isFinite(this.constant)) {
+        throw new Error("constant strategy requires a finite constant value.");
+      }
+    }
+    if (this.strategy === "quantile") {
+      if (!Number.isFinite(this.quantile) || this.quantile < 0 || this.quantile > 1) {
+        throw new Error(`quantile must be in [0, 1]. Got ${this.quantile}.`);
+      }
+    }
+  }
+  fit(X: Matrix, y: Vector): this {
+    validateRegressionInputs(X, y);
+    this.nFeaturesIn_ = X[0].length;
+    switch (this.strategy) {
+      case "mean": {
+        let total = 0;
+        for (let i = 0; i < y.length; i += 1) {
+          total += y[i];
+        }
+        this.constant_ = total / y.length;
+        break;
+      }
+      case "median":
+        this.constant_ = computeMedian(y);
+        break;
+      case "quantile":
+        this.constant_ = computeQuantile(y, this.quantile);
+        break;
+      case "constant":
+        this.constant_ = this.constant!;
+        break;
+      default: {
+        const exhaustive: never = this.strategy;
+        throw new Error(`Unsupported strategy: ${exhaustive}`);
+      }
+    }
+    return this;
+  }
+  predict(X: Matrix): Vector {
+    if (this.constant_ === null || this.nFeaturesIn_ === null) {
+      throw new Error("DummyRegressor has not been fitted.");
+    }
+    if (!Array.isArray(X) || X.length === 0) {
+      throw new Error("X must be a non-empty 2D array.");
+    }
+    if (!Array.isArray(X[0]) || X[0].length !== this.nFeaturesIn_) {
+      throw new Error(`Feature size mismatch. Expected ${this.nFeaturesIn_}, got ${X[0]?.length ?? 0}.`);
+    }
+    return new Array<number>(X.length).fill(this.constant_);
+  }
+  score(X: Matrix, y: Vector): number {
+    assertFiniteVector(y);
+    return r2Score(y, this.predict(X));
+  }
+}

package/src/feature_selection/VarianceThreshold.ts ADDED Viewed

@@ -0,0 +1,88 @@
+import type { Matrix } from "../types";
+import {
+  assertConsistentRowSize,
+  assertFiniteMatrix,
+  assertNonEmptyMatrix,
+} from "../utils/validation";
+export interface VarianceThresholdOptions {
+  threshold?: number;
+}
+export class VarianceThreshold {
+  variances_: number[] | null = null;
+  nFeaturesIn_: number | null = null;
+  selectedFeatureIndices_: number[] | null = null;
+  private readonly threshold: number;
+  constructor(options: VarianceThresholdOptions = {}) {
+    this.threshold = options.threshold ?? 0;
+    if (!Number.isFinite(this.threshold) || this.threshold < 0) {
+      throw new Error(`threshold must be finite and >= 0. Got ${this.threshold}.`);
+    }
+  }
+  fit(X: Matrix): this {
+    assertNonEmptyMatrix(X);
+    assertConsistentRowSize(X);
+    assertFiniteMatrix(X);
+    const nSamples = X.length;
+    const nFeatures = X[0].length;
+    const means = new Array<number>(nFeatures).fill(0);
+    const variances = new Array<number>(nFeatures).fill(0);
+    for (let i = 0; i < nSamples; i += 1) {
+      for (let j = 0; j < nFeatures; j += 1) {
+        means[j] += X[i][j];
+      }
+    }
+    for (let j = 0; j < nFeatures; j += 1) {
+      means[j] /= nSamples;
+    }
+    for (let i = 0; i < nSamples; i += 1) {
+      for (let j = 0; j < nFeatures; j += 1) {
+        const diff = X[i][j] - means[j];
+        variances[j] += diff * diff;
+      }
+    }
+    for (let j = 0; j < nFeatures; j += 1) {
+      variances[j] /= nSamples;
+    }
+    const selectedFeatureIndices: number[] = [];
+    for (let j = 0; j < nFeatures; j += 1) {
+      if (variances[j] > this.threshold) {
+        selectedFeatureIndices.push(j);
+      }
+    }
+    if (selectedFeatureIndices.length === 0) {
+      throw new Error("No feature in X meets the variance threshold.");
+    }
+    this.nFeaturesIn_ = nFeatures;
+    this.variances_ = variances;
+    this.selectedFeatureIndices_ = selectedFeatureIndices;
+    return this;
+  }
+  transform(X: Matrix): Matrix {
+    if (!this.selectedFeatureIndices_ || this.nFeaturesIn_ === null) {
+      throw new Error("VarianceThreshold has not been fitted.");
+    }
+    assertNonEmptyMatrix(X);
+    assertConsistentRowSize(X);
+    assertFiniteMatrix(X);
+    if (X[0].length !== this.nFeaturesIn_) {
+      throw new Error(`Feature size mismatch. Expected ${this.nFeaturesIn_}, got ${X[0].length}.`);
+    }
+    return X.map((row) => this.selectedFeatureIndices_!.map((featureIdx) => row[featureIdx]));
+  }
+  fitTransform(X: Matrix): Matrix {
+    return this.fit(X).transform(X);
+  }
+}

package/src/index.ts CHANGED Viewed

@@ -1,15 +1,28 @@
 export * from "./types";
+// Baselines
+export * from "./dummy/DummyClassifier";
+export * from "./dummy/DummyRegressor";
+// Preprocessing
 export * from "./preprocessing/StandardScaler";
 export * from "./preprocessing/MinMaxScaler";
 export * from "./preprocessing/RobustScaler";
+export * from "./preprocessing/MaxAbsScaler";
+export * from "./preprocessing/Normalizer";
+export * from "./preprocessing/Binarizer";
+export * from "./preprocessing/LabelEncoder";
 export * from "./preprocessing/PolynomialFeatures";
 export * from "./preprocessing/SimpleImputer";
 export * from "./preprocessing/OneHotEncoder";
+// Linear models
 export * from "./linear_model/LinearRegression";
 export * from "./linear_model/LogisticRegression";
 export * from "./linear_model/SGDClassifier";
 export * from "./linear_model/SGDRegressor";
+// Other estimators
 export * from "./neighbors/KNeighborsClassifier";
 export * from "./naive_bayes/GaussianNB";
 export * from "./svm/LinearSVC";
@@ -17,6 +30,8 @@ export * from "./tree/DecisionTreeClassifier";
 export * from "./tree/DecisionTreeRegressor";
 export * from "./ensemble/RandomForestClassifier";
 export * from "./ensemble/RandomForestRegressor";
+// Model selection
 export * from "./model_selection/trainTestSplit";
 export * from "./model_selection/KFold";
 export * from "./model_selection/StratifiedKFold";
@@ -25,8 +40,16 @@ export * from "./model_selection/RepeatedKFold";
 export * from "./model_selection/RepeatedStratifiedKFold";
 export * from "./model_selection/crossValScore";
 export * from "./model_selection/GridSearchCV";
+export * from "./model_selection/RandomizedSearchCV";
+// Feature selection
+export * from "./feature_selection/VarianceThreshold";
+// Composition
 export * from "./pipeline/Pipeline";
 export * from "./pipeline/ColumnTransformer";
 export * from "./pipeline/FeatureUnion";
+// Metrics
 export * from "./metrics/regression";
 export * from "./metrics/classification";

package/src/metrics/classification.ts CHANGED Viewed

@@ -292,3 +292,33 @@ export function classificationReport(
     },
   };
 }
+export function balancedAccuracyScore(yTrue: number[], yPred: number[]): number {
+  const report = classificationReport(yTrue, yPred);
+  return report.macroAvg.recall;
+}
+export function matthewsCorrcoef(
+  yTrue: number[],
+  yPred: number[],
+  positiveLabel = 1,
+): number {
+  const { tp, fp, fn, tn } = confusionCounts(yTrue, yPred, positiveLabel);
+  const numerator = tp * tn - fp * fn;
+  const denominator = Math.sqrt((tp + fp) * (tp + fn) * (tn + fp) * (tn + fn));
+  if (denominator === 0) {
+    return 0;
+  }
+  return numerator / denominator;
+}
+export function brierScoreLoss(yTrue: number[], yPredProb: number[]): number {
+  validateInputs(yTrue, yPredProb);
+  validateBinaryTargets(yTrue);
+  let total = 0;
+  for (let i = 0; i < yTrue.length; i += 1) {
+    const diff = yPredProb[i] - yTrue[i];
+    total += diff * diff;
+  }
+  return total / yTrue.length;
+}

package/src/metrics/regression.ts CHANGED Viewed

@@ -49,3 +49,43 @@ export function r2Score(yTrue: number[], yPred: number[]): number {
   return 1 - ssRes / ssTot;
 }
+export function meanAbsolutePercentageError(yTrue: number[], yPred: number[]): number {
+  validateInputs(yTrue, yPred);
+  let total = 0;
+  for (let i = 0; i < yTrue.length; i += 1) {
+    const denom = Math.max(Math.abs(yTrue[i]), 1e-12);
+    total += Math.abs((yTrue[i] - yPred[i]) / denom);
+  }
+  return total / yTrue.length;
+}
+export function explainedVarianceScore(yTrue: number[], yPred: number[]): number {
+  validateInputs(yTrue, yPred);
+  const n = yTrue.length;
+  const yTrueMean = mean(yTrue);
+  const residuals = new Array<number>(n);
+  let residualMean = 0;
+  for (let i = 0; i < n; i += 1) {
+    const r = yTrue[i] - yPred[i];
+    residuals[i] = r;
+    residualMean += r;
+  }
+  residualMean /= n;
+  let varTrue = 0;
+  let varResidual = 0;
+  for (let i = 0; i < n; i += 1) {
+    const centeredY = yTrue[i] - yTrueMean;
+    const centeredR = residuals[i] - residualMean;
+    varTrue += centeredY * centeredY;
+    varResidual += centeredR * centeredR;
+  }
+  varTrue /= n;
+  varResidual /= n;
+  if (varTrue === 0) {
+    return varResidual === 0 ? 1 : 0;
+  }
+  return 1 - varResidual / varTrue;
+}