npm - bun-scikit - Versions diffs - 0.1.3 → 0.1.5 - Mend

bun-scikit 0.1.3 → 0.1.5

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (22) hide show

package/README.md +73 -137
package/package.json +3 -2
package/scripts/build-node-addon.ts +17 -1
package/scripts/check-benchmark-health.ts +112 -6
package/scripts/sync-benchmark-readme.ts +56 -0
package/src/dummy/DummyClassifier.ts +190 -0
package/src/dummy/DummyRegressor.ts +108 -0
package/src/ensemble/RandomForestClassifier.ts +154 -8
package/src/ensemble/RandomForestRegressor.ts +12 -8
package/src/feature_selection/VarianceThreshold.ts +88 -0
package/src/index.ts +23 -0
package/src/metrics/classification.ts +30 -0
package/src/metrics/regression.ts +40 -0
package/src/model_selection/RandomizedSearchCV.ts +269 -0
package/src/native/node-addon/bun_scikit_addon.cpp +307 -0
package/src/native/zigKernels.ts +122 -4
package/src/preprocessing/Binarizer.ts +46 -0
package/src/preprocessing/LabelEncoder.ts +62 -0
package/src/preprocessing/MaxAbsScaler.ts +77 -0
package/src/preprocessing/Normalizer.ts +66 -0
package/src/tree/DecisionTreeClassifier.ts +159 -4
package/zig/kernels.zig +333 -89

package/src/dummy/DummyClassifier.ts ADDED Viewed

@@ -0,0 +1,190 @@
+import type { Matrix, Vector } from "../types";
+import { accuracyScore } from "../metrics/classification";
+import {
+  assertConsistentRowSize,
+  assertFiniteMatrix,
+  assertFiniteVector,
+  assertNonEmptyMatrix,
+  assertVectorLength,
+} from "../utils/validation";
+export type DummyClassifierStrategy =
+  | "most_frequent"
+  | "prior"
+  | "stratified"
+  | "uniform"
+  | "constant";
+export interface DummyClassifierOptions {
+  strategy?: DummyClassifierStrategy;
+  constant?: number;
+  randomState?: number;
+}
+class Mulberry32 {
+  private state: number;
+  constructor(seed: number) {
+    this.state = seed >>> 0;
+  }
+  next(): number {
+    this.state = (this.state + 0x6d2b79f5) >>> 0;
+    let t = this.state ^ (this.state >>> 15);
+    t = Math.imul(t, this.state | 1);
+    t ^= t + Math.imul(t ^ (t >>> 7), t | 61);
+    return ((t ^ (t >>> 14)) >>> 0) / 4294967296;
+  }
+}
+export class DummyClassifier {
+  classes_: number[] | null = null;
+  classPrior_: number[] | null = null;
+  constant_: number | null = null;
+  private readonly strategy: DummyClassifierStrategy;
+  private readonly configuredConstant?: number;
+  private readonly randomState: number;
+  private majorityClass: number | null = null;
+  private nFeaturesIn_: number | null = null;
+  constructor(options: DummyClassifierOptions = {}) {
+    this.strategy = options.strategy ?? "prior";
+    this.configuredConstant = options.constant;
+    this.randomState = options.randomState ?? 42;
+  }
+  fit(X: Matrix, y: Vector): this {
+    assertNonEmptyMatrix(X);
+    assertConsistentRowSize(X);
+    assertFiniteMatrix(X);
+    assertVectorLength(y, X.length);
+    assertFiniteVector(y);
+    this.nFeaturesIn_ = X[0].length;
+    const counts = new Map<number, number>();
+    for (let i = 0; i < y.length; i += 1) {
+      counts.set(y[i], (counts.get(y[i]) ?? 0) + 1);
+    }
+    const classes = Array.from(counts.keys()).sort((a, b) => a - b);
+    const priors = new Array<number>(classes.length);
+    for (let i = 0; i < classes.length; i += 1) {
+      priors[i] = (counts.get(classes[i]) ?? 0) / y.length;
+    }
+    let majorityClass = classes[0];
+    let majorityCount = counts.get(majorityClass) ?? 0;
+    for (let i = 1; i < classes.length; i += 1) {
+      const cls = classes[i];
+      const clsCount = counts.get(cls) ?? 0;
+      if (clsCount > majorityCount) {
+        majorityClass = cls;
+        majorityCount = clsCount;
+      }
+    }
+    if (this.strategy === "constant") {
+      if (!Number.isFinite(this.configuredConstant)) {
+        throw new Error("constant strategy requires a finite constant value.");
+      }
+      this.constant_ = this.configuredConstant!;
+    } else {
+      this.constant_ = majorityClass;
+    }
+    this.classes_ = classes;
+    this.classPrior_ = priors;
+    this.majorityClass = majorityClass;
+    return this;
+  }
+  private ensureFitted(): void {
+    if (!this.classes_ || !this.classPrior_ || this.nFeaturesIn_ === null || this.majorityClass === null) {
+      throw new Error("DummyClassifier has not been fitted.");
+    }
+  }
+  private sampleByPrior(rng: Mulberry32): number {
+    let r = rng.next();
+    for (let i = 0; i < this.classPrior_!.length; i += 1) {
+      r -= this.classPrior_![i];
+      if (r <= 0) {
+        return this.classes_![i];
+      }
+    }
+    return this.classes_![this.classes_!.length - 1];
+  }
+  predict(X: Matrix): Vector {
+    this.ensureFitted();
+    if (!Array.isArray(X) || X.length === 0) {
+      throw new Error("X must be a non-empty 2D array.");
+    }
+    if (!Array.isArray(X[0]) || X[0].length !== this.nFeaturesIn_) {
+      throw new Error(`Feature size mismatch. Expected ${this.nFeaturesIn_}, got ${X[0]?.length ?? 0}.`);
+    }
+    switch (this.strategy) {
+      case "most_frequent":
+      case "prior":
+        return new Array<number>(X.length).fill(this.majorityClass!);
+      case "constant":
+        return new Array<number>(X.length).fill(this.constant_!);
+      case "uniform": {
+        const rng = new Mulberry32(this.randomState);
+        const out = new Array<number>(X.length);
+        for (let i = 0; i < X.length; i += 1) {
+          const idx = Math.floor(rng.next() * this.classes_!.length);
+          out[i] = this.classes_![idx];
+        }
+        return out;
+      }
+      case "stratified": {
+        const rng = new Mulberry32(this.randomState);
+        const out = new Array<number>(X.length);
+        for (let i = 0; i < X.length; i += 1) {
+          out[i] = this.sampleByPrior(rng);
+        }
+        return out;
+      }
+      default: {
+        const exhaustive: never = this.strategy;
+        throw new Error(`Unsupported strategy: ${exhaustive}`);
+      }
+    }
+  }
+  predictProba(X: Matrix): Matrix {
+    this.ensureFitted();
+    if (!Array.isArray(X) || X.length === 0) {
+      throw new Error("X must be a non-empty 2D array.");
+    }
+    if (!Array.isArray(X[0]) || X[0].length !== this.nFeaturesIn_) {
+      throw new Error(`Feature size mismatch. Expected ${this.nFeaturesIn_}, got ${X[0]?.length ?? 0}.`);
+    }
+    if (this.strategy === "uniform") {
+      const value = 1 / this.classes_!.length;
+      return X.map(() => new Array(this.classes_!.length).fill(value));
+    }
+    if (this.strategy === "most_frequent" || this.strategy === "constant") {
+      const oneHot = new Array<number>(this.classes_!.length).fill(0);
+      const label = this.strategy === "constant" ? this.constant_! : this.majorityClass!;
+      const classIndex = this.classes_!.indexOf(label);
+      if (classIndex >= 0) {
+        oneHot[classIndex] = 1;
+      }
+      return X.map(() => [...oneHot]);
+    }
+    // prior / stratified share prior probabilities.
+    const prior = [...this.classPrior_!];
+    return X.map(() => [...prior]);
+  }
+  score(X: Matrix, y: Vector): number {
+    return accuracyScore(y, this.predict(X));
+  }
+}

package/src/dummy/DummyRegressor.ts ADDED Viewed

@@ -0,0 +1,108 @@
+import type { Matrix, Vector } from "../types";
+import { r2Score } from "../metrics/regression";
+import { assertFiniteVector, validateRegressionInputs } from "../utils/validation";
+export type DummyRegressorStrategy = "mean" | "median" | "quantile" | "constant";
+export interface DummyRegressorOptions {
+  strategy?: DummyRegressorStrategy;
+  constant?: number;
+  quantile?: number;
+}
+function computeMedian(values: number[]): number {
+  const sorted = [...values].sort((a, b) => a - b);
+  const mid = Math.floor(sorted.length / 2);
+  if (sorted.length % 2 === 0) {
+    return 0.5 * (sorted[mid - 1] + sorted[mid]);
+  }
+  return sorted[mid];
+}
+function computeQuantile(values: number[], q: number): number {
+  const sorted = [...values].sort((a, b) => a - b);
+  const pos = q * (sorted.length - 1);
+  const lo = Math.floor(pos);
+  const hi = Math.ceil(pos);
+  if (lo === hi) {
+    return sorted[lo];
+  }
+  const weight = pos - lo;
+  return sorted[lo] * (1 - weight) + sorted[hi] * weight;
+}
+export class DummyRegressor {
+  constant_: number | null = null;
+  private readonly strategy: DummyRegressorStrategy;
+  private readonly constant?: number;
+  private readonly quantile: number;
+  private nFeaturesIn_: number | null = null;
+  constructor(options: DummyRegressorOptions = {}) {
+    this.strategy = options.strategy ?? "mean";
+    this.constant = options.constant;
+    this.quantile = options.quantile ?? 0.5;
+    if (this.strategy === "constant") {
+      if (!Number.isFinite(this.constant)) {
+        throw new Error("constant strategy requires a finite constant value.");
+      }
+    }
+    if (this.strategy === "quantile") {
+      if (!Number.isFinite(this.quantile) || this.quantile < 0 || this.quantile > 1) {
+        throw new Error(`quantile must be in [0, 1]. Got ${this.quantile}.`);
+      }
+    }
+  }
+  fit(X: Matrix, y: Vector): this {
+    validateRegressionInputs(X, y);
+    this.nFeaturesIn_ = X[0].length;
+    switch (this.strategy) {
+      case "mean": {
+        let total = 0;
+        for (let i = 0; i < y.length; i += 1) {
+          total += y[i];
+        }
+        this.constant_ = total / y.length;
+        break;
+      }
+      case "median":
+        this.constant_ = computeMedian(y);
+        break;
+      case "quantile":
+        this.constant_ = computeQuantile(y, this.quantile);
+        break;
+      case "constant":
+        this.constant_ = this.constant!;
+        break;
+      default: {
+        const exhaustive: never = this.strategy;
+        throw new Error(`Unsupported strategy: ${exhaustive}`);
+      }
+    }
+    return this;
+  }
+  predict(X: Matrix): Vector {
+    if (this.constant_ === null || this.nFeaturesIn_ === null) {
+      throw new Error("DummyRegressor has not been fitted.");
+    }
+    if (!Array.isArray(X) || X.length === 0) {
+      throw new Error("X must be a non-empty 2D array.");
+    }
+    if (!Array.isArray(X[0]) || X[0].length !== this.nFeaturesIn_) {
+      throw new Error(`Feature size mismatch. Expected ${this.nFeaturesIn_}, got ${X[0]?.length ?? 0}.`);
+    }
+    return new Array<number>(X.length).fill(this.constant_);
+  }
+  score(X: Matrix, y: Vector): number {
+    assertFiniteVector(y);
+    return r2Score(y, this.predict(X));
+  }
+}

package/src/ensemble/RandomForestClassifier.ts CHANGED Viewed

@@ -2,6 +2,7 @@ import type { ClassificationModel, Matrix, Vector } from "../types";
 import { accuracyScore } from "../metrics/classification";
 import { DecisionTreeClassifier, type MaxFeaturesOption } from "../tree/DecisionTreeClassifier";
 import { assertFiniteVector, validateClassificationInputs } from "../utils/validation";
+import { getZigKernels } from "../native/zigKernels";
 export interface RandomForestClassifierOptions {
   nEstimators?: number;
@@ -23,8 +24,18 @@ function mulberry32(seed: number): () => number {
   };
 }
+function isTruthy(value: string | undefined): boolean {
+  if (!value) {
+    return false;
+  }
+  const normalized = value.trim().toLowerCase();
+  return !(normalized === "0" || normalized === "false" || normalized === "off");
+}
 export class RandomForestClassifier implements ClassificationModel {
   classes_: Vector = [0, 1];
+  fitBackend_: "zig" | "js" = "js";
+  fitBackendLibrary_: string | null = null;
   private readonly nEstimators: number;
   private readonly maxDepth?: number;
   private readonly minSamplesSplit?: number;
@@ -32,6 +43,7 @@ export class RandomForestClassifier implements ClassificationModel {
   private readonly maxFeatures: MaxFeaturesOption;
   private readonly bootstrap: boolean;
   private readonly randomState?: number;
+  private nativeModelHandle: bigint | null = null;
   private trees: DecisionTreeClassifier[] = [];
   constructor(options: RandomForestClassifierOptions = {}) {
@@ -49,6 +61,7 @@ export class RandomForestClassifier implements ClassificationModel {
   }
   fit(X: Matrix, y: Vector): this {
+    this.disposeNativeModel();
     validateClassificationInputs(X, y);
     const sampleCount = X.length;
@@ -56,10 +69,17 @@ export class RandomForestClassifier implements ClassificationModel {
     const random = this.randomState === undefined ? Math.random : mulberry32(this.randomState);
     const flattenedX = this.flattenTrainingMatrix(X, sampleCount, featureCount);
     const yBinary = this.buildBinaryTargets(y);
+    const sampleIndices = new Uint32Array(sampleCount);
+    this.trees = [];
+    if (this.tryFitNativeForest(flattenedX, yBinary, sampleCount, featureCount)) {
+      this.fitBackend_ = "zig";
+      return this;
+    }
+    this.fitBackend_ = "js";
+    this.fitBackendLibrary_ = null;
     this.trees = new Array(this.nEstimators);
     for (let estimatorIndex = 0; estimatorIndex < this.nEstimators; estimatorIndex += 1) {
-      const sampleIndices = new Uint32Array(sampleCount);
       if (this.bootstrap) {
         for (let i = 0; i < sampleCount; i += 1) {
           sampleIndices[i] = Math.floor(random() * sampleCount);
@@ -86,20 +106,47 @@ export class RandomForestClassifier implements ClassificationModel {
   }
   predict(X: Matrix): Vector {
+    if (this.nativeModelHandle !== null) {
+      const kernels = getZigKernels();
+      const predict = kernels?.randomForestClassifierModelPredict;
+      if (predict) {
+        const sampleCount = X.length;
+        const featureCount = X[0]?.length ?? 0;
+        const flattened = this.flattenTrainingMatrix(X, sampleCount, featureCount);
+        const out = new Uint8Array(sampleCount);
+        const status = predict(
+          this.nativeModelHandle,
+          flattened,
+          sampleCount,
+          featureCount,
+          out,
+        );
+        if (status === 1) {
+          return Array.from(out);
+        }
+      }
+    }
     if (this.trees.length === 0) {
       throw new Error("RandomForestClassifier has not been fitted.");
     }
-    const treePredictions = this.trees.map((tree) => tree.predict(X));
     const sampleCount = X.length;
-    const predictions = new Array(sampleCount).fill(0);
+    const voteCounts = new Uint16Array(sampleCount);
-    for (let sampleIndex = 0; sampleIndex < sampleCount; sampleIndex += 1) {
-      let positiveVotes = 0;
-      for (let treeIndex = 0; treeIndex < treePredictions.length; treeIndex += 1) {
-        positiveVotes += treePredictions[treeIndex][sampleIndex] === 1 ? 1 : 0;
+    for (let treeIndex = 0; treeIndex < this.trees.length; treeIndex += 1) {
+      const treePrediction = this.trees[treeIndex].predict(X);
+      for (let sampleIndex = 0; sampleIndex < sampleCount; sampleIndex += 1) {
+        if (treePrediction[sampleIndex] === 1) {
+          voteCounts[sampleIndex] += 1;
+        }
       }
-      predictions[sampleIndex] = positiveVotes * 2 >= this.trees.length ? 1 : 0;
+    }
+    const predictions = new Array<number>(sampleCount);
+    const voteThreshold = this.trees.length;
+    for (let sampleIndex = 0; sampleIndex < sampleCount; sampleIndex += 1) {
+      predictions[sampleIndex] = voteCounts[sampleIndex] * 2 >= voteThreshold ? 1 : 0;
     }
     return predictions;
@@ -110,6 +157,105 @@ export class RandomForestClassifier implements ClassificationModel {
     return accuracyScore(y, this.predict(X));
   }
+  dispose(): void {
+    this.disposeNativeModel();
+    for (let i = 0; i < this.trees.length; i += 1) {
+      this.trees[i].dispose();
+    }
+    this.trees = [];
+  }
+  private resolveNativeMaxFeatures(featureCount: number): {
+    mode: 0 | 1 | 2 | 3;
+    value: number;
+  } {
+    if (this.maxFeatures === null || this.maxFeatures === undefined) {
+      return { mode: 0, value: 0 };
+    }
+    if (this.maxFeatures === "sqrt") {
+      return { mode: 1, value: 0 };
+    }
+    if (this.maxFeatures === "log2") {
+      return { mode: 2, value: 0 };
+    }
+    const value = Number.isFinite(this.maxFeatures)
+      ? Math.max(1, Math.min(featureCount, Math.floor(this.maxFeatures)))
+      : featureCount;
+    return { mode: 3, value };
+  }
+  private tryFitNativeForest(
+    flattenedX: Float64Array,
+    yBinary: Uint8Array,
+    sampleCount: number,
+    featureCount: number,
+  ): boolean {
+    if (!isTruthy(process.env.BUN_SCIKIT_EXPERIMENTAL_NATIVE_FOREST)) {
+      return false;
+    }
+    if (process.env.BUN_SCIKIT_TREE_BACKEND?.trim().toLowerCase() !== "zig") {
+      return false;
+    }
+    const kernels = getZigKernels();
+    const create = kernels?.randomForestClassifierModelCreate;
+    const fit = kernels?.randomForestClassifierModelFit;
+    const destroy = kernels?.randomForestClassifierModelDestroy;
+    if (!create || !fit || !destroy) {
+      return false;
+    }
+    const { mode, value } = this.resolveNativeMaxFeatures(featureCount);
+    const useRandomState = this.randomState === undefined ? 0 : 1;
+    const randomState = this.randomState ?? 0;
+    const handle = create(
+      this.nEstimators,
+      this.maxDepth ?? 12,
+      this.minSamplesSplit ?? 2,
+      this.minSamplesLeaf ?? 1,
+      mode,
+      value,
+      this.bootstrap ? 1 : 0,
+      randomState >>> 0,
+      useRandomState,
+      featureCount,
+    );
+    if (handle === 0n) {
+      return false;
+    }
+    let shouldDestroy = true;
+    try {
+      const status = fit(handle, flattenedX, yBinary, sampleCount, featureCount);
+      if (status !== 1) {
+        return false;
+      }
+      this.nativeModelHandle = handle;
+      this.fitBackendLibrary_ = kernels.libraryPath;
+      shouldDestroy = false;
+      return true;
+    } finally {
+      if (shouldDestroy) {
+        destroy(handle);
+      }
+    }
+  }
+  private disposeNativeModel(): void {
+    if (this.nativeModelHandle === null) {
+      return;
+    }
+    const kernels = getZigKernels();
+    const destroy = kernels?.randomForestClassifierModelDestroy;
+    if (destroy) {
+      try {
+        destroy(this.nativeModelHandle);
+      } catch {
+        // best effort cleanup
+      }
+    }
+    this.nativeModelHandle = null;
+  }
   private flattenTrainingMatrix(
     X: Matrix,
     sampleCount: number,

package/src/ensemble/RandomForestRegressor.ts CHANGED Viewed

@@ -56,10 +56,10 @@ export class RandomForestRegressor implements RegressionModel {
     const random = this.randomState === undefined ? Math.random : mulberry32(this.randomState);
     const flattenedX = this.flattenTrainingMatrix(X, sampleCount, featureCount);
     const yValues = this.toFloat64Vector(y);
+    const sampleIndices = new Uint32Array(sampleCount);
     this.trees = new Array(this.nEstimators);
     for (let estimatorIndex = 0; estimatorIndex < this.nEstimators; estimatorIndex += 1) {
-      const sampleIndices = new Uint32Array(sampleCount);
       if (this.bootstrap) {
         for (let i = 0; i < sampleCount; i += 1) {
           sampleIndices[i] = Math.floor(random() * sampleCount);
@@ -90,16 +90,20 @@ export class RandomForestRegressor implements RegressionModel {
       throw new Error("RandomForestRegressor has not been fitted.");
     }
-    const treePredictions = this.trees.map((tree) => tree.predict(X));
     const sampleCount = X.length;
-    const predictions = new Array<number>(sampleCount).fill(0);
+    const sums = new Float64Array(sampleCount);
-    for (let sampleIndex = 0; sampleIndex < sampleCount; sampleIndex += 1) {
-      let sum = 0;
-      for (let treeIndex = 0; treeIndex < treePredictions.length; treeIndex += 1) {
-        sum += treePredictions[treeIndex][sampleIndex];
+    for (let treeIndex = 0; treeIndex < this.trees.length; treeIndex += 1) {
+      const treePrediction = this.trees[treeIndex].predict(X);
+      for (let sampleIndex = 0; sampleIndex < sampleCount; sampleIndex += 1) {
+        sums[sampleIndex] += treePrediction[sampleIndex];
       }
-      predictions[sampleIndex] = sum / this.trees.length;
+    }
+    const predictions = new Array<number>(sampleCount);
+    const denominator = this.trees.length;
+    for (let sampleIndex = 0; sampleIndex < sampleCount; sampleIndex += 1) {
+      predictions[sampleIndex] = sums[sampleIndex] / denominator;
     }
     return predictions;

package/src/feature_selection/VarianceThreshold.ts ADDED Viewed

@@ -0,0 +1,88 @@
+import type { Matrix } from "../types";
+import {
+  assertConsistentRowSize,
+  assertFiniteMatrix,
+  assertNonEmptyMatrix,
+} from "../utils/validation";
+export interface VarianceThresholdOptions {
+  threshold?: number;
+}
+export class VarianceThreshold {
+  variances_: number[] | null = null;
+  nFeaturesIn_: number | null = null;
+  selectedFeatureIndices_: number[] | null = null;
+  private readonly threshold: number;
+  constructor(options: VarianceThresholdOptions = {}) {
+    this.threshold = options.threshold ?? 0;
+    if (!Number.isFinite(this.threshold) || this.threshold < 0) {
+      throw new Error(`threshold must be finite and >= 0. Got ${this.threshold}.`);
+    }
+  }
+  fit(X: Matrix): this {
+    assertNonEmptyMatrix(X);
+    assertConsistentRowSize(X);
+    assertFiniteMatrix(X);
+    const nSamples = X.length;
+    const nFeatures = X[0].length;
+    const means = new Array<number>(nFeatures).fill(0);
+    const variances = new Array<number>(nFeatures).fill(0);
+    for (let i = 0; i < nSamples; i += 1) {
+      for (let j = 0; j < nFeatures; j += 1) {
+        means[j] += X[i][j];
+      }
+    }
+    for (let j = 0; j < nFeatures; j += 1) {
+      means[j] /= nSamples;
+    }
+    for (let i = 0; i < nSamples; i += 1) {
+      for (let j = 0; j < nFeatures; j += 1) {
+        const diff = X[i][j] - means[j];
+        variances[j] += diff * diff;
+      }
+    }
+    for (let j = 0; j < nFeatures; j += 1) {
+      variances[j] /= nSamples;
+    }
+    const selectedFeatureIndices: number[] = [];
+    for (let j = 0; j < nFeatures; j += 1) {
+      if (variances[j] > this.threshold) {
+        selectedFeatureIndices.push(j);
+      }
+    }
+    if (selectedFeatureIndices.length === 0) {
+      throw new Error("No feature in X meets the variance threshold.");
+    }
+    this.nFeaturesIn_ = nFeatures;
+    this.variances_ = variances;
+    this.selectedFeatureIndices_ = selectedFeatureIndices;
+    return this;
+  }
+  transform(X: Matrix): Matrix {
+    if (!this.selectedFeatureIndices_ || this.nFeaturesIn_ === null) {
+      throw new Error("VarianceThreshold has not been fitted.");
+    }
+    assertNonEmptyMatrix(X);
+    assertConsistentRowSize(X);
+    assertFiniteMatrix(X);
+    if (X[0].length !== this.nFeaturesIn_) {
+      throw new Error(`Feature size mismatch. Expected ${this.nFeaturesIn_}, got ${X[0].length}.`);
+    }
+    return X.map((row) => this.selectedFeatureIndices_!.map((featureIdx) => row[featureIdx]));
+  }
+  fitTransform(X: Matrix): Matrix {
+    return this.fit(X).transform(X);
+  }
+}

package/src/index.ts CHANGED Viewed

@@ -1,15 +1,28 @@
 export * from "./types";
+// Baselines
+export * from "./dummy/DummyClassifier";
+export * from "./dummy/DummyRegressor";
+// Preprocessing
 export * from "./preprocessing/StandardScaler";
 export * from "./preprocessing/MinMaxScaler";
 export * from "./preprocessing/RobustScaler";
+export * from "./preprocessing/MaxAbsScaler";
+export * from "./preprocessing/Normalizer";
+export * from "./preprocessing/Binarizer";
+export * from "./preprocessing/LabelEncoder";
 export * from "./preprocessing/PolynomialFeatures";
 export * from "./preprocessing/SimpleImputer";
 export * from "./preprocessing/OneHotEncoder";
+// Linear models
 export * from "./linear_model/LinearRegression";
 export * from "./linear_model/LogisticRegression";
 export * from "./linear_model/SGDClassifier";
 export * from "./linear_model/SGDRegressor";
+// Other estimators
 export * from "./neighbors/KNeighborsClassifier";
 export * from "./naive_bayes/GaussianNB";
 export * from "./svm/LinearSVC";
@@ -17,6 +30,8 @@ export * from "./tree/DecisionTreeClassifier";
 export * from "./tree/DecisionTreeRegressor";
 export * from "./ensemble/RandomForestClassifier";
 export * from "./ensemble/RandomForestRegressor";
+// Model selection
 export * from "./model_selection/trainTestSplit";
 export * from "./model_selection/KFold";
 export * from "./model_selection/StratifiedKFold";
@@ -25,8 +40,16 @@ export * from "./model_selection/RepeatedKFold";
 export * from "./model_selection/RepeatedStratifiedKFold";
 export * from "./model_selection/crossValScore";
 export * from "./model_selection/GridSearchCV";
+export * from "./model_selection/RandomizedSearchCV";
+// Feature selection
+export * from "./feature_selection/VarianceThreshold";
+// Composition
 export * from "./pipeline/Pipeline";
 export * from "./pipeline/ColumnTransformer";
 export * from "./pipeline/FeatureUnion";
+// Metrics
 export * from "./metrics/regression";
 export * from "./metrics/classification";