npm - bun-scikit - Versions diffs - 0.1.4 → 0.1.5 - Mend

bun-scikit 0.1.4 → 0.1.5

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (9) hide show

package/package.json +2 -1
package/scripts/build-node-addon.ts +17 -1
package/scripts/check-benchmark-health.ts +50 -5
package/src/ensemble/RandomForestClassifier.ts +154 -8
package/src/ensemble/RandomForestRegressor.ts +12 -8
package/src/native/node-addon/bun_scikit_addon.cpp +158 -0
package/src/native/zigKernels.ts +89 -0
package/src/tree/DecisionTreeClassifier.ts +13 -1
package/zig/kernels.zig +278 -57

package/package.json CHANGED Viewed

@@ -1,6 +1,6 @@
 {
   "name": "bun-scikit",
-  "version": "0.1.4",
+  "version": "0.1.5",
   "description": "A scikit-learn-inspired machine learning library for Bun/TypeScript.",
   "license": "MIT",
   "module": "index.ts",
@@ -69,6 +69,7 @@
   "devDependencies": {
     "@types/bun": "latest",
     "node-addon-api": "^8.3.1",
+    "node-gyp": "^12.2.0",
     "typedoc": "^0.28.14",
     "typescript": "^5.9.2"
   }

package/scripts/build-node-addon.ts CHANGED Viewed

@@ -1,8 +1,24 @@
 import { cp, mkdir } from "node:fs/promises";
+import { createRequire } from "node:module";
 import { resolve } from "node:path";
+function resolveNodeGypCommand(): string[] {
+  const npmNodeGyp = process.env.npm_config_node_gyp?.trim();
+  if (npmNodeGyp) {
+    return ["node", npmNodeGyp, "rebuild"];
+  }
+  try {
+    const require = createRequire(import.meta.url);
+    const nodeGypScript = require.resolve("node-gyp/bin/node-gyp.js");
+    return ["node", nodeGypScript, "rebuild"];
+  } catch {
+    return ["node-gyp", "rebuild"];
+  }
+}
 async function main(): Promise<void> {
-  const child = Bun.spawn(["bunx", "node-gyp", "rebuild"], {
+  const child = Bun.spawn(resolveNodeGypCommand(), {
     stdout: "inherit",
     stderr: "inherit",
   });

package/scripts/check-benchmark-health.ts CHANGED Viewed

@@ -80,6 +80,14 @@ interface BenchmarkSnapshot {
   };
 }
+function parseArgValue(flag: string): string | null {
+  const index = Bun.argv.indexOf(flag);
+  if (index === -1 || index + 1 >= Bun.argv.length) {
+    return null;
+  }
+  return Bun.argv[index + 1];
+}
 function speedupThreshold(
   envName: string,
   defaultValue: number,
@@ -95,13 +103,18 @@ function speedupThreshold(
   return parsed;
 }
-const pathArgIndex = Bun.argv.indexOf("--input");
-const inputPath =
-  pathArgIndex !== -1 && pathArgIndex + 1 < Bun.argv.length
-    ? resolve(Bun.argv[pathArgIndex + 1])
-    : resolve("bench/results/heart-ci-current.json");
+const inputPath = resolve(parseArgValue("--input") ?? "bench/results/heart-ci-current.json");
+const baselinePath = resolve(
+  parseArgValue("--baseline") ?? process.env.BENCH_BASELINE_INPUT ?? "bench/results/heart-ci-latest.json",
+);
+const baselineInputEnabled = inputPath !== baselinePath;
 const snapshot = JSON.parse(await readFile(inputPath, "utf-8")) as BenchmarkSnapshot;
+const baselineSnapshot = baselineInputEnabled
+  ? ((await readFile(baselinePath, "utf-8").then((raw) => JSON.parse(raw) as BenchmarkSnapshot).catch(
+      () => null,
+    )) as BenchmarkSnapshot | null)
+  : null;
 const [bunRegression, sklearnRegression] = snapshot.suites.regression.results;
 const [bunClassification, sklearnClassification] = snapshot.suites.classification.results;
@@ -136,6 +149,14 @@ const maxZigForestPredictSlowdownVsJs = speedupThreshold(
   "BENCH_MAX_ZIG_FOREST_PREDICT_SLOWDOWN_VS_JS",
   20,
 );
+const minZigTreeFitRetentionVsBaseline = speedupThreshold(
+  "BENCH_MIN_ZIG_TREE_FIT_RETENTION_VS_BASELINE",
+  0.9,
+);
+const minZigForestFitRetentionVsBaseline = speedupThreshold(
+  "BENCH_MIN_ZIG_FOREST_FIT_RETENTION_VS_BASELINE",
+  0.9,
+);
 for (const result of [
   bunRegression,
@@ -296,6 +317,30 @@ if (snapshot.suites.treeBackendModes.enabled) {
       `RandomForest zig predict slowdown too large vs js-fast: ${randomForestPredictSlowdown} > ${maxZigForestPredictSlowdownVsJs}.`,
     );
   }
+  if (baselineSnapshot?.suites?.treeBackendModes?.enabled) {
+    const [baselineDecisionTreeModes, baselineRandomForestModes] =
+      baselineSnapshot.suites.treeBackendModes.models;
+    if (baselineDecisionTreeModes && baselineRandomForestModes) {
+      const decisionTreeFitRetention =
+        decisionTreeModes.comparison.zigFitSpeedupVsJs /
+        baselineDecisionTreeModes.comparison.zigFitSpeedupVsJs;
+      const randomForestFitRetention =
+        randomForestModes.comparison.zigFitSpeedupVsJs /
+        baselineRandomForestModes.comparison.zigFitSpeedupVsJs;
+      if (decisionTreeFitRetention < minZigTreeFitRetentionVsBaseline) {
+        throw new Error(
+          `DecisionTree zig/js fit retention too low vs baseline: ${decisionTreeFitRetention} < ${minZigTreeFitRetentionVsBaseline}.`,
+        );
+      }
+      if (randomForestFitRetention < minZigForestFitRetentionVsBaseline) {
+        throw new Error(
+          `RandomForest zig/js fit retention too low vs baseline: ${randomForestFitRetention} < ${minZigForestFitRetentionVsBaseline}.`,
+        );
+      }
+    }
+  }
 }
 console.log("Benchmark comparison health checks passed.");

package/src/ensemble/RandomForestClassifier.ts CHANGED Viewed

@@ -2,6 +2,7 @@ import type { ClassificationModel, Matrix, Vector } from "../types";
 import { accuracyScore } from "../metrics/classification";
 import { DecisionTreeClassifier, type MaxFeaturesOption } from "../tree/DecisionTreeClassifier";
 import { assertFiniteVector, validateClassificationInputs } from "../utils/validation";
+import { getZigKernels } from "../native/zigKernels";
 export interface RandomForestClassifierOptions {
   nEstimators?: number;
@@ -23,8 +24,18 @@ function mulberry32(seed: number): () => number {
   };
 }
+function isTruthy(value: string | undefined): boolean {
+  if (!value) {
+    return false;
+  }
+  const normalized = value.trim().toLowerCase();
+  return !(normalized === "0" || normalized === "false" || normalized === "off");
+}
 export class RandomForestClassifier implements ClassificationModel {
   classes_: Vector = [0, 1];
+  fitBackend_: "zig" | "js" = "js";
+  fitBackendLibrary_: string | null = null;
   private readonly nEstimators: number;
   private readonly maxDepth?: number;
   private readonly minSamplesSplit?: number;
@@ -32,6 +43,7 @@ export class RandomForestClassifier implements ClassificationModel {
   private readonly maxFeatures: MaxFeaturesOption;
   private readonly bootstrap: boolean;
   private readonly randomState?: number;
+  private nativeModelHandle: bigint | null = null;
   private trees: DecisionTreeClassifier[] = [];
   constructor(options: RandomForestClassifierOptions = {}) {
@@ -49,6 +61,7 @@ export class RandomForestClassifier implements ClassificationModel {
   }
   fit(X: Matrix, y: Vector): this {
+    this.disposeNativeModel();
     validateClassificationInputs(X, y);
     const sampleCount = X.length;
@@ -56,10 +69,17 @@ export class RandomForestClassifier implements ClassificationModel {
     const random = this.randomState === undefined ? Math.random : mulberry32(this.randomState);
     const flattenedX = this.flattenTrainingMatrix(X, sampleCount, featureCount);
     const yBinary = this.buildBinaryTargets(y);
+    const sampleIndices = new Uint32Array(sampleCount);
+    this.trees = [];
+    if (this.tryFitNativeForest(flattenedX, yBinary, sampleCount, featureCount)) {
+      this.fitBackend_ = "zig";
+      return this;
+    }
+    this.fitBackend_ = "js";
+    this.fitBackendLibrary_ = null;
     this.trees = new Array(this.nEstimators);
     for (let estimatorIndex = 0; estimatorIndex < this.nEstimators; estimatorIndex += 1) {
-      const sampleIndices = new Uint32Array(sampleCount);
       if (this.bootstrap) {
         for (let i = 0; i < sampleCount; i += 1) {
           sampleIndices[i] = Math.floor(random() * sampleCount);
@@ -86,20 +106,47 @@ export class RandomForestClassifier implements ClassificationModel {
   }
   predict(X: Matrix): Vector {
+    if (this.nativeModelHandle !== null) {
+      const kernels = getZigKernels();
+      const predict = kernels?.randomForestClassifierModelPredict;
+      if (predict) {
+        const sampleCount = X.length;
+        const featureCount = X[0]?.length ?? 0;
+        const flattened = this.flattenTrainingMatrix(X, sampleCount, featureCount);
+        const out = new Uint8Array(sampleCount);
+        const status = predict(
+          this.nativeModelHandle,
+          flattened,
+          sampleCount,
+          featureCount,
+          out,
+        );
+        if (status === 1) {
+          return Array.from(out);
+        }
+      }
+    }
     if (this.trees.length === 0) {
       throw new Error("RandomForestClassifier has not been fitted.");
     }
-    const treePredictions = this.trees.map((tree) => tree.predict(X));
     const sampleCount = X.length;
-    const predictions = new Array(sampleCount).fill(0);
+    const voteCounts = new Uint16Array(sampleCount);
-    for (let sampleIndex = 0; sampleIndex < sampleCount; sampleIndex += 1) {
-      let positiveVotes = 0;
-      for (let treeIndex = 0; treeIndex < treePredictions.length; treeIndex += 1) {
-        positiveVotes += treePredictions[treeIndex][sampleIndex] === 1 ? 1 : 0;
+    for (let treeIndex = 0; treeIndex < this.trees.length; treeIndex += 1) {
+      const treePrediction = this.trees[treeIndex].predict(X);
+      for (let sampleIndex = 0; sampleIndex < sampleCount; sampleIndex += 1) {
+        if (treePrediction[sampleIndex] === 1) {
+          voteCounts[sampleIndex] += 1;
+        }
       }
-      predictions[sampleIndex] = positiveVotes * 2 >= this.trees.length ? 1 : 0;
+    }
+    const predictions = new Array<number>(sampleCount);
+    const voteThreshold = this.trees.length;
+    for (let sampleIndex = 0; sampleIndex < sampleCount; sampleIndex += 1) {
+      predictions[sampleIndex] = voteCounts[sampleIndex] * 2 >= voteThreshold ? 1 : 0;
     }
     return predictions;
@@ -110,6 +157,105 @@ export class RandomForestClassifier implements ClassificationModel {
     return accuracyScore(y, this.predict(X));
   }
+  dispose(): void {
+    this.disposeNativeModel();
+    for (let i = 0; i < this.trees.length; i += 1) {
+      this.trees[i].dispose();
+    }
+    this.trees = [];
+  }
+  private resolveNativeMaxFeatures(featureCount: number): {
+    mode: 0 | 1 | 2 | 3;
+    value: number;
+  } {
+    if (this.maxFeatures === null || this.maxFeatures === undefined) {
+      return { mode: 0, value: 0 };
+    }
+    if (this.maxFeatures === "sqrt") {
+      return { mode: 1, value: 0 };
+    }
+    if (this.maxFeatures === "log2") {
+      return { mode: 2, value: 0 };
+    }
+    const value = Number.isFinite(this.maxFeatures)
+      ? Math.max(1, Math.min(featureCount, Math.floor(this.maxFeatures)))
+      : featureCount;
+    return { mode: 3, value };
+  }
+  private tryFitNativeForest(
+    flattenedX: Float64Array,
+    yBinary: Uint8Array,
+    sampleCount: number,
+    featureCount: number,
+  ): boolean {
+    if (!isTruthy(process.env.BUN_SCIKIT_EXPERIMENTAL_NATIVE_FOREST)) {
+      return false;
+    }
+    if (process.env.BUN_SCIKIT_TREE_BACKEND?.trim().toLowerCase() !== "zig") {
+      return false;
+    }
+    const kernels = getZigKernels();
+    const create = kernels?.randomForestClassifierModelCreate;
+    const fit = kernels?.randomForestClassifierModelFit;
+    const destroy = kernels?.randomForestClassifierModelDestroy;
+    if (!create || !fit || !destroy) {
+      return false;
+    }
+    const { mode, value } = this.resolveNativeMaxFeatures(featureCount);
+    const useRandomState = this.randomState === undefined ? 0 : 1;
+    const randomState = this.randomState ?? 0;
+    const handle = create(
+      this.nEstimators,
+      this.maxDepth ?? 12,
+      this.minSamplesSplit ?? 2,
+      this.minSamplesLeaf ?? 1,
+      mode,
+      value,
+      this.bootstrap ? 1 : 0,
+      randomState >>> 0,
+      useRandomState,
+      featureCount,
+    );
+    if (handle === 0n) {
+      return false;
+    }
+    let shouldDestroy = true;
+    try {
+      const status = fit(handle, flattenedX, yBinary, sampleCount, featureCount);
+      if (status !== 1) {
+        return false;
+      }
+      this.nativeModelHandle = handle;
+      this.fitBackendLibrary_ = kernels.libraryPath;
+      shouldDestroy = false;
+      return true;
+    } finally {
+      if (shouldDestroy) {
+        destroy(handle);
+      }
+    }
+  }
+  private disposeNativeModel(): void {
+    if (this.nativeModelHandle === null) {
+      return;
+    }
+    const kernels = getZigKernels();
+    const destroy = kernels?.randomForestClassifierModelDestroy;
+    if (destroy) {
+      try {
+        destroy(this.nativeModelHandle);
+      } catch {
+        // best effort cleanup
+      }
+    }
+    this.nativeModelHandle = null;
+  }
   private flattenTrainingMatrix(
     X: Matrix,
     sampleCount: number,

package/src/ensemble/RandomForestRegressor.ts CHANGED Viewed

@@ -56,10 +56,10 @@ export class RandomForestRegressor implements RegressionModel {
     const random = this.randomState === undefined ? Math.random : mulberry32(this.randomState);
     const flattenedX = this.flattenTrainingMatrix(X, sampleCount, featureCount);
     const yValues = this.toFloat64Vector(y);
+    const sampleIndices = new Uint32Array(sampleCount);
     this.trees = new Array(this.nEstimators);
     for (let estimatorIndex = 0; estimatorIndex < this.nEstimators; estimatorIndex += 1) {
-      const sampleIndices = new Uint32Array(sampleCount);
       if (this.bootstrap) {
         for (let i = 0; i < sampleCount; i += 1) {
           sampleIndices[i] = Math.floor(random() * sampleCount);
@@ -90,16 +90,20 @@ export class RandomForestRegressor implements RegressionModel {
       throw new Error("RandomForestRegressor has not been fitted.");
     }
-    const treePredictions = this.trees.map((tree) => tree.predict(X));
     const sampleCount = X.length;
-    const predictions = new Array<number>(sampleCount).fill(0);
+    const sums = new Float64Array(sampleCount);
-    for (let sampleIndex = 0; sampleIndex < sampleCount; sampleIndex += 1) {
-      let sum = 0;
-      for (let treeIndex = 0; treeIndex < treePredictions.length; treeIndex += 1) {
-        sum += treePredictions[treeIndex][sampleIndex];
+    for (let treeIndex = 0; treeIndex < this.trees.length; treeIndex += 1) {
+      const treePrediction = this.trees[treeIndex].predict(X);
+      for (let sampleIndex = 0; sampleIndex < sampleCount; sampleIndex += 1) {
+        sums[sampleIndex] += treePrediction[sampleIndex];
       }
-      predictions[sampleIndex] = sum / this.trees.length;
+    }
+    const predictions = new Array<number>(sampleCount);
+    const denominator = this.trees.length;
+    for (let sampleIndex = 0; sampleIndex < sampleCount; sampleIndex += 1) {
+      predictions[sampleIndex] = sums[sampleIndex] / denominator;
     }
     return predictions;

package/src/native/node-addon/bun_scikit_addon.cpp CHANGED Viewed

@@ -31,6 +31,20 @@ using DecisionTreeModelCreateFn = NativeHandle (*)(std::size_t, std::size_t, std
 using DecisionTreeModelDestroyFn = void (*)(NativeHandle);
 using DecisionTreeModelFitFn = std::uint8_t (*)(NativeHandle, const double*, const std::uint8_t*, std::size_t, std::size_t, const std::uint32_t*, std::size_t);
 using DecisionTreeModelPredictFn = std::uint8_t (*)(NativeHandle, const double*, std::size_t, std::size_t, std::uint8_t*);
+using RandomForestClassifierModelCreateFn = NativeHandle (*)(
+    std::size_t,
+    std::size_t,
+    std::size_t,
+    std::size_t,
+    std::uint8_t,
+    std::size_t,
+    std::uint8_t,
+    std::uint32_t,
+    std::uint8_t,
+    std::size_t);
+using RandomForestClassifierModelDestroyFn = void (*)(NativeHandle);
+using RandomForestClassifierModelFitFn = std::uint8_t (*)(NativeHandle, const double*, const std::uint8_t*, std::size_t, std::size_t);
+using RandomForestClassifierModelPredictFn = std::uint8_t (*)(NativeHandle, const double*, std::size_t, std::size_t, std::uint8_t*);
 struct KernelLibrary {
 #if defined(_WIN32)
@@ -55,6 +69,10 @@ struct KernelLibrary {
   DecisionTreeModelDestroyFn decision_tree_model_destroy{nullptr};
   DecisionTreeModelFitFn decision_tree_model_fit{nullptr};
   DecisionTreeModelPredictFn decision_tree_model_predict{nullptr};
+  RandomForestClassifierModelCreateFn random_forest_classifier_model_create{nullptr};
+  RandomForestClassifierModelDestroyFn random_forest_classifier_model_destroy{nullptr};
+  RandomForestClassifierModelFitFn random_forest_classifier_model_fit{nullptr};
+  RandomForestClassifierModelPredictFn random_forest_classifier_model_predict{nullptr};
 };
 KernelLibrary g_library{};
@@ -154,6 +172,14 @@ Napi::Value LoadNativeLibrary(const Napi::CallbackInfo& info) {
       loadSymbol<DecisionTreeModelFitFn>("decision_tree_model_fit");
   g_library.decision_tree_model_predict =
       loadSymbol<DecisionTreeModelPredictFn>("decision_tree_model_predict");
+  g_library.random_forest_classifier_model_create =
+      loadSymbol<RandomForestClassifierModelCreateFn>("random_forest_classifier_model_create");
+  g_library.random_forest_classifier_model_destroy =
+      loadSymbol<RandomForestClassifierModelDestroyFn>("random_forest_classifier_model_destroy");
+  g_library.random_forest_classifier_model_fit =
+      loadSymbol<RandomForestClassifierModelFitFn>("random_forest_classifier_model_fit");
+  g_library.random_forest_classifier_model_predict =
+      loadSymbol<RandomForestClassifierModelPredictFn>("random_forest_classifier_model_predict");
   return Napi::Boolean::New(env, true);
 }
@@ -567,6 +593,134 @@ Napi::Value DecisionTreeModelPredict(const Napi::CallbackInfo& info) {
   return Napi::Number::New(env, status);
 }
+Napi::Value RandomForestClassifierModelCreate(const Napi::CallbackInfo& info) {
+  const Napi::Env env = info.Env();
+  if (!isLibraryLoaded(env)) {
+    return env.Null();
+  }
+  if (!g_library.random_forest_classifier_model_create) {
+    throwError(env, "Symbol random_forest_classifier_model_create is unavailable.");
+    return env.Null();
+  }
+  if (info.Length() != 10 || !info[0].IsNumber() || !info[1].IsNumber() || !info[2].IsNumber() ||
+      !info[3].IsNumber() || !info[4].IsNumber() || !info[5].IsNumber() || !info[6].IsNumber() ||
+      !info[7].IsNumber() || !info[8].IsNumber() || !info[9].IsNumber()) {
+    throwTypeError(env, "randomForestClassifierModelCreate(nEstimators, maxDepth, minSamplesSplit, minSamplesLeaf, maxFeaturesMode, maxFeaturesValue, bootstrap, randomState, useRandomState, nFeatures) expects ten numbers.");
+    return env.Null();
+  }
+  const std::size_t n_estimators = static_cast<std::size_t>(info[0].As<Napi::Number>().Uint32Value());
+  const std::size_t max_depth = static_cast<std::size_t>(info[1].As<Napi::Number>().Uint32Value());
+  const std::size_t min_samples_split = static_cast<std::size_t>(info[2].As<Napi::Number>().Uint32Value());
+  const std::size_t min_samples_leaf = static_cast<std::size_t>(info[3].As<Napi::Number>().Uint32Value());
+  const std::uint8_t max_features_mode = static_cast<std::uint8_t>(info[4].As<Napi::Number>().Uint32Value());
+  const std::size_t max_features_value = static_cast<std::size_t>(info[5].As<Napi::Number>().Uint32Value());
+  const std::uint8_t bootstrap = static_cast<std::uint8_t>(info[6].As<Napi::Number>().Uint32Value());
+  const std::uint32_t random_state = static_cast<std::uint32_t>(info[7].As<Napi::Number>().Uint32Value());
+  const std::uint8_t use_random_state = static_cast<std::uint8_t>(info[8].As<Napi::Number>().Uint32Value());
+  const std::size_t n_features = static_cast<std::size_t>(info[9].As<Napi::Number>().Uint32Value());
+  const NativeHandle handle = g_library.random_forest_classifier_model_create(
+      n_estimators,
+      max_depth,
+      min_samples_split,
+      min_samples_leaf,
+      max_features_mode,
+      max_features_value,
+      bootstrap,
+      random_state,
+      use_random_state,
+      n_features);
+  return Napi::BigInt::New(env, static_cast<std::uint64_t>(handle));
+}
+Napi::Value RandomForestClassifierModelDestroy(const Napi::CallbackInfo& info) {
+  const Napi::Env env = info.Env();
+  if (!isLibraryLoaded(env)) {
+    return env.Null();
+  }
+  if (!g_library.random_forest_classifier_model_destroy) {
+    throwError(env, "Symbol random_forest_classifier_model_destroy is unavailable.");
+    return env.Null();
+  }
+  if (info.Length() != 1) {
+    throwTypeError(env, "randomForestClassifierModelDestroy(handle) expects one BigInt.");
+    return env.Null();
+  }
+  const NativeHandle handle = handleFromBigInt(info[0], env);
+  if (env.IsExceptionPending()) {
+    return env.Null();
+  }
+  g_library.random_forest_classifier_model_destroy(handle);
+  return env.Undefined();
+}
+Napi::Value RandomForestClassifierModelFit(const Napi::CallbackInfo& info) {
+  const Napi::Env env = info.Env();
+  if (!isLibraryLoaded(env)) {
+    return env.Null();
+  }
+  if (!g_library.random_forest_classifier_model_fit) {
+    throwError(env, "Symbol random_forest_classifier_model_fit is unavailable.");
+    return env.Null();
+  }
+  if (info.Length() != 5 || !info[1].IsTypedArray() || !info[2].IsTypedArray() ||
+      !info[3].IsNumber() || !info[4].IsNumber()) {
+    throwTypeError(env, "randomForestClassifierModelFit(handle, x, y, nSamples, nFeatures) has invalid arguments.");
+    return env.Null();
+  }
+  const NativeHandle handle = handleFromBigInt(info[0], env);
+  if (env.IsExceptionPending()) {
+    return env.Null();
+  }
+  auto x = info[1].As<Napi::Float64Array>();
+  auto y = info[2].As<Napi::Uint8Array>();
+  const std::size_t n_samples = static_cast<std::size_t>(info[3].As<Napi::Number>().Uint32Value());
+  const std::size_t n_features = static_cast<std::size_t>(info[4].As<Napi::Number>().Uint32Value());
+  const std::uint8_t status = g_library.random_forest_classifier_model_fit(
+      handle,
+      x.Data(),
+      y.Data(),
+      n_samples,
+      n_features);
+  return Napi::Number::New(env, status);
+}
+Napi::Value RandomForestClassifierModelPredict(const Napi::CallbackInfo& info) {
+  const Napi::Env env = info.Env();
+  if (!isLibraryLoaded(env)) {
+    return env.Null();
+  }
+  if (!g_library.random_forest_classifier_model_predict) {
+    throwError(env, "Symbol random_forest_classifier_model_predict is unavailable.");
+    return env.Null();
+  }
+  if (info.Length() != 5 || !info[1].IsTypedArray() || !info[2].IsNumber() || !info[3].IsNumber() ||
+      !info[4].IsTypedArray()) {
+    throwTypeError(env, "randomForestClassifierModelPredict(handle, x, nSamples, nFeatures, outLabels) has invalid arguments.");
+    return env.Null();
+  }
+  const NativeHandle handle = handleFromBigInt(info[0], env);
+  if (env.IsExceptionPending()) {
+    return env.Null();
+  }
+  auto x = info[1].As<Napi::Float64Array>();
+  const std::size_t n_samples = static_cast<std::size_t>(info[2].As<Napi::Number>().Uint32Value());
+  const std::size_t n_features = static_cast<std::size_t>(info[3].As<Napi::Number>().Uint32Value());
+  auto out_labels = info[4].As<Napi::Uint8Array>();
+  const std::uint8_t status = g_library.random_forest_classifier_model_predict(
+      handle,
+      x.Data(),
+      n_samples,
+      n_features,
+      out_labels.Data());
+  return Napi::Number::New(env, status);
+}
 Napi::Object Init(Napi::Env env, Napi::Object exports) {
   exports.Set("loadLibrary", Napi::Function::New(env, LoadNativeLibrary));
   exports.Set("unloadLibrary", Napi::Function::New(env, UnloadLibrary));
@@ -590,6 +744,10 @@ Napi::Object Init(Napi::Env env, Napi::Object exports) {
   exports.Set("decisionTreeModelDestroy", Napi::Function::New(env, DecisionTreeModelDestroy));
   exports.Set("decisionTreeModelFit", Napi::Function::New(env, DecisionTreeModelFit));
   exports.Set("decisionTreeModelPredict", Napi::Function::New(env, DecisionTreeModelPredict));
+  exports.Set("randomForestClassifierModelCreate", Napi::Function::New(env, RandomForestClassifierModelCreate));
+  exports.Set("randomForestClassifierModelDestroy", Napi::Function::New(env, RandomForestClassifierModelDestroy));
+  exports.Set("randomForestClassifierModelFit", Napi::Function::New(env, RandomForestClassifierModelFit));
+  exports.Set("randomForestClassifierModelPredict", Napi::Function::New(env, RandomForestClassifierModelPredict));
   return exports;
 }

package/src/native/zigKernels.ts CHANGED Viewed

@@ -88,6 +88,33 @@ type DecisionTreeModelPredictFn = (
   nFeatures: number,
   outLabels: Uint8Array,
 ) => number;
+type RandomForestClassifierModelCreateFn = (
+  nEstimators: number,
+  maxDepth: number,
+  minSamplesSplit: number,
+  minSamplesLeaf: number,
+  maxFeaturesMode: number,
+  maxFeaturesValue: number,
+  bootstrap: number,
+  randomState: number,
+  useRandomState: number,
+  nFeatures: number,
+) => NativeHandle;
+type RandomForestClassifierModelDestroyFn = (handle: NativeHandle) => void;
+type RandomForestClassifierModelFitFn = (
+  handle: NativeHandle,
+  x: Float64Array,
+  y: Uint8Array,
+  nSamples: number,
+  nFeatures: number,
+) => number;
+type RandomForestClassifierModelPredictFn = (
+  handle: NativeHandle,
+  x: Float64Array,
+  nSamples: number,
+  nFeatures: number,
+  outLabels: Uint8Array,
+) => number;
 type LogisticTrainEpochFn = (
   x: Float64Array,
@@ -138,6 +165,10 @@ interface ZigKernelLibrary {
     decision_tree_model_destroy?: DecisionTreeModelDestroyFn;
     decision_tree_model_fit?: DecisionTreeModelFitFn;
     decision_tree_model_predict?: DecisionTreeModelPredictFn;
+    random_forest_classifier_model_create?: RandomForestClassifierModelCreateFn;
+    random_forest_classifier_model_destroy?: RandomForestClassifierModelDestroyFn;
+    random_forest_classifier_model_fit?: RandomForestClassifierModelFitFn;
+    random_forest_classifier_model_predict?: RandomForestClassifierModelPredictFn;
     logistic_train_epoch?: LogisticTrainEpochFn;
     logistic_train_epochs?: LogisticTrainEpochsFn;
   };
@@ -162,6 +193,10 @@ export interface ZigKernels {
   decisionTreeModelDestroy: DecisionTreeModelDestroyFn | null;
   decisionTreeModelFit: DecisionTreeModelFitFn | null;
   decisionTreeModelPredict: DecisionTreeModelPredictFn | null;
+  randomForestClassifierModelCreate: RandomForestClassifierModelCreateFn | null;
+  randomForestClassifierModelDestroy: RandomForestClassifierModelDestroyFn | null;
+  randomForestClassifierModelFit: RandomForestClassifierModelFitFn | null;
+  randomForestClassifierModelPredict: RandomForestClassifierModelPredictFn | null;
   logisticTrainEpoch: LogisticTrainEpochFn | null;
   logisticTrainEpochs: LogisticTrainEpochsFn | null;
   abiVersion: number | null;
@@ -247,6 +282,10 @@ interface NodeApiAddon {
   decisionTreeModelDestroy?: DecisionTreeModelDestroyFn;
   decisionTreeModelFit?: DecisionTreeModelFitFn;
   decisionTreeModelPredict?: DecisionTreeModelPredictFn;
+  randomForestClassifierModelCreate?: RandomForestClassifierModelCreateFn;
+  randomForestClassifierModelDestroy?: RandomForestClassifierModelDestroyFn;
+  randomForestClassifierModelFit?: RandomForestClassifierModelFitFn;
+  randomForestClassifierModelPredict?: RandomForestClassifierModelPredictFn;
 }
 function tryLoadNodeApiKernels(): ZigKernels | null {
@@ -289,6 +328,13 @@ function tryLoadNodeApiKernels(): ZigKernels | null {
           decisionTreeModelDestroy: addon.decisionTreeModelDestroy ?? null,
           decisionTreeModelFit: addon.decisionTreeModelFit ?? null,
           decisionTreeModelPredict: addon.decisionTreeModelPredict ?? null,
+          randomForestClassifierModelCreate:
+            addon.randomForestClassifierModelCreate ?? null,
+          randomForestClassifierModelDestroy:
+            addon.randomForestClassifierModelDestroy ?? null,
+          randomForestClassifierModelFit: addon.randomForestClassifierModelFit ?? null,
+          randomForestClassifierModelPredict:
+            addon.randomForestClassifierModelPredict ?? null,
           logisticTrainEpoch: null,
           logisticTrainEpochs: null,
           abiVersion,
@@ -432,6 +478,33 @@ export function getZigKernels(): ZigKernels | null {
             args: ["usize", FFIType.ptr, "usize", "usize", FFIType.ptr],
             returns: FFIType.u8,
           },
+          random_forest_classifier_model_create: {
+            args: [
+              "usize",
+              "usize",
+              "usize",
+              "usize",
+              FFIType.u8,
+              "usize",
+              FFIType.u8,
+              FFIType.u32,
+              FFIType.u8,
+              "usize",
+            ],
+            returns: "usize",
+          },
+          random_forest_classifier_model_destroy: {
+            args: ["usize"],
+            returns: FFIType.void,
+          },
+          random_forest_classifier_model_fit: {
+            args: ["usize", FFIType.ptr, FFIType.ptr, "usize", "usize"],
+            returns: FFIType.u8,
+          },
+          random_forest_classifier_model_predict: {
+            args: ["usize", FFIType.ptr, "usize", "usize", FFIType.ptr],
+            returns: FFIType.u8,
+          },
           logistic_train_epoch: {
             args: [
               FFIType.ptr,
@@ -492,6 +565,14 @@ export function getZigKernels(): ZigKernels | null {
           decisionTreeModelDestroy: library.symbols.decision_tree_model_destroy ?? null,
           decisionTreeModelFit: library.symbols.decision_tree_model_fit ?? null,
           decisionTreeModelPredict: library.symbols.decision_tree_model_predict ?? null,
+          randomForestClassifierModelCreate:
+            library.symbols.random_forest_classifier_model_create ?? null,
+          randomForestClassifierModelDestroy:
+            library.symbols.random_forest_classifier_model_destroy ?? null,
+          randomForestClassifierModelFit:
+            library.symbols.random_forest_classifier_model_fit ?? null,
+          randomForestClassifierModelPredict:
+            library.symbols.random_forest_classifier_model_predict ?? null,
           logisticTrainEpoch: library.symbols.logistic_train_epoch ?? null,
           logisticTrainEpochs: library.symbols.logistic_train_epochs ?? null,
           abiVersion,
@@ -555,6 +636,10 @@ export function getZigKernels(): ZigKernels | null {
             decisionTreeModelDestroy: null,
             decisionTreeModelFit: null,
             decisionTreeModelPredict: null,
+            randomForestClassifierModelCreate: null,
+            randomForestClassifierModelDestroy: null,
+            randomForestClassifierModelFit: null,
+            randomForestClassifierModelPredict: null,
             logisticTrainEpoch: library.symbols.logistic_train_epoch ?? null,
             logisticTrainEpochs: library.symbols.logistic_train_epochs ?? null,
             abiVersion: null,
@@ -600,6 +685,10 @@ export function getZigKernels(): ZigKernels | null {
             decisionTreeModelDestroy: null,
             decisionTreeModelFit: null,
             decisionTreeModelPredict: null,
+            randomForestClassifierModelCreate: null,
+            randomForestClassifierModelDestroy: null,
+            randomForestClassifierModelFit: null,
+            randomForestClassifierModelPredict: null,
             logisticTrainEpoch: library.symbols.logistic_train_epoch ?? null,
             logisticTrainEpochs: null,
             abiVersion: null,

package/src/tree/DecisionTreeClassifier.ts CHANGED Viewed

@@ -185,7 +185,12 @@ export class DecisionTreeClassifier implements ClassificationModel {
       }
     }
-    return X.map((sample) => this.predictOne(sample, this.root!));
+    const predictions = new Array<number>(X.length);
+    const root = this.root!;
+    for (let i = 0; i < X.length; i += 1) {
+      predictions[i] = this.predictOne(X[i], root);
+    }
+    return predictions;
   }
   score(X: Matrix, y: Vector): number {
@@ -193,6 +198,13 @@ export class DecisionTreeClassifier implements ClassificationModel {
     return accuracyScore(y, this.predict(X));
   }
+  dispose(): void {
+    this.destroyZigModel();
+    this.root = null;
+    this.flattenedXTrain = null;
+    this.yBinaryTrain = null;
+  }
   private predictOne(sample: Vector, node: TreeNode): 0 | 1 {
     let current: TreeNode = node;
     while (

package/zig/kernels.zig CHANGED Viewed

@@ -74,12 +74,31 @@ const DecisionTreeModel = struct {
     use_random_state: bool,
     root_index: usize,
     has_root: bool,
+    feature_scratch: []usize,
     nodes: std.ArrayListUnmanaged(TreeNode),
 };
-const SplitResult = struct {
+const RandomForestClassifierModel = struct {
+    n_features: usize,
+    n_estimators: usize,
+    max_depth: usize,
+    min_samples_split: usize,
+    min_samples_leaf: usize,
+    max_features_mode: u8,
+    max_features_value: usize,
+    bootstrap: bool,
+    random_state: u32,
+    use_random_state: bool,
+    tree_handles: []usize,
+    fitted_estimators: usize,
+};
+const SplitEvaluation = struct {
     threshold: f64,
     impurity: f64,
+};
+const SplitPartition = struct {
     left_indices: []usize,
     right_indices: []usize,
 };
@@ -167,41 +186,33 @@ fn resolveMaxFeatures(model: *const DecisionTreeModel) usize {
     }
 }
-fn freeSplit(split: SplitResult) void {
-    allocator.free(split.left_indices);
-    allocator.free(split.right_indices);
+inline fn asRandomForestClassifierModel(handle: usize) ?*RandomForestClassifierModel {
+    if (handle == 0) {
+        return null;
+    }
+    return @as(*RandomForestClassifierModel, @ptrFromInt(handle));
 }
-fn selectCandidateFeatures(model: *const DecisionTreeModel, rng: *Mulberry32) ![]usize {
-    const k = resolveMaxFeatures(model);
-    if (k >= model.n_features) {
-        const all_features = try allocator.alloc(usize, model.n_features);
-        errdefer allocator.free(all_features);
-        for (all_features, 0..) |*entry, idx| {
-            entry.* = idx;
-        }
-        return all_features;
+fn selectCandidateFeatures(model: *DecisionTreeModel, rng: *Mulberry32) []const usize {
+    for (model.feature_scratch, 0..) |*entry, idx| {
+        entry.* = idx;
     }
-    const shuffled = try allocator.alloc(usize, model.n_features);
-    errdefer allocator.free(shuffled);
-    for (shuffled, 0..) |*entry, idx| {
-        entry.* = idx;
+    const k = resolveMaxFeatures(model);
+    if (k >= model.n_features) {
+        return model.feature_scratch[0..model.n_features];
     }
-    var i = model.n_features;
-    while (i > 1) {
-        i -= 1;
-        const j = rng.nextIndex(i + 1);
-        const tmp = shuffled[i];
-        shuffled[i] = shuffled[j];
-        shuffled[j] = tmp;
+    var i: usize = 0;
+    while (i < k) : (i += 1) {
+        const remaining = model.n_features - i;
+        const j = i + rng.nextIndex(remaining);
+        const tmp = model.feature_scratch[i];
+        model.feature_scratch[i] = model.feature_scratch[j];
+        model.feature_scratch[j] = tmp;
     }
-    const selected = try allocator.alloc(usize, k);
-    @memcpy(selected, shuffled[0..k]);
-    allocator.free(shuffled);
-    return selected;
+    return model.feature_scratch[0..k];
 }
 fn findBestSplitForFeature(
@@ -210,7 +221,7 @@ fn findBestSplitForFeature(
     y_ptr: [*]const u8,
     indices: []const usize,
     feature_index: usize,
-) !?SplitResult {
+) ?SplitEvaluation {
     const sample_count = indices.len;
     if (sample_count < 2) {
         return null;
@@ -282,29 +293,41 @@ fn findBestSplitForFeature(
         return null;
     }
-    var left_partition_count: usize = 0;
+    return SplitEvaluation{
+        .threshold = best_threshold,
+        .impurity = best_impurity,
+    };
+}
+fn partitionIndicesForThreshold(
+    model: *const DecisionTreeModel,
+    workspace: std.mem.Allocator,
+    x_ptr: [*]const f64,
+    indices: []const usize,
+    feature_index: usize,
+    threshold: f64,
+) !?SplitPartition {
+    var left_count: usize = 0;
     for (indices) |sample_index| {
         const value = x_ptr[sample_index * model.n_features + feature_index];
-        if (value <= best_threshold) {
-            left_partition_count += 1;
+        if (value <= threshold) {
+            left_count += 1;
         }
     }
-    const right_partition_count = sample_count - left_partition_count;
-    if (left_partition_count < model.min_samples_leaf or right_partition_count < model.min_samples_leaf) {
+    const right_count = indices.len - left_count;
+    if (left_count < model.min_samples_leaf or right_count < model.min_samples_leaf) {
         return null;
     }
-    const left_indices = try allocator.alloc(usize, left_partition_count);
-    errdefer allocator.free(left_indices);
-    const right_indices = try allocator.alloc(usize, right_partition_count);
-    errdefer allocator.free(right_indices);
+    const left_indices = try workspace.alloc(usize, left_count);
+    const right_indices = try workspace.alloc(usize, right_count);
     var left_write: usize = 0;
     var right_write: usize = 0;
     for (indices) |sample_index| {
         const value = x_ptr[sample_index * model.n_features + feature_index];
-        if (value <= best_threshold) {
+        if (value <= threshold) {
             left_indices[left_write] = sample_index;
             left_write += 1;
         } else {
@@ -313,9 +336,7 @@ fn findBestSplitForFeature(
         }
     }
-    return SplitResult{
-        .threshold = best_threshold,
-        .impurity = best_impurity,
+    return SplitPartition{
         .left_indices = left_indices,
         .right_indices = right_indices,
     };
@@ -323,6 +344,7 @@ fn findBestSplitForFeature(
 fn buildDecisionTreeNode(
     model: *DecisionTreeModel,
+    workspace: std.mem.Allocator,
     x_ptr: [*]const f64,
     y_ptr: [*]const u8,
     indices: []const usize,
@@ -353,25 +375,19 @@ fn buildDecisionTreeNode(
     }
     const parent_impurity = giniImpurity(positive_count, sample_count);
-    const candidate_features = try selectCandidateFeatures(model, rng);
-    defer allocator.free(candidate_features);
+    const candidate_features = selectCandidateFeatures(model, rng);
     var best_feature: usize = 0;
-    var best_split: ?SplitResult = null;
+    var best_split: ?SplitEvaluation = null;
     var best_found = false;
     for (candidate_features) |feature_index| {
-        const split_opt = try findBestSplitForFeature(model, x_ptr, y_ptr, indices, feature_index);
+        const split_opt = findBestSplitForFeature(model, x_ptr, y_ptr, indices, feature_index);
         if (split_opt) |split| {
             if (!best_found or split.impurity < best_split.?.impurity) {
-                if (best_split) |previous| {
-                    freeSplit(previous);
-                }
                 best_split = split;
                 best_feature = feature_index;
                 best_found = true;
-            } else {
-                freeSplit(split);
             }
         }
     }
@@ -390,7 +406,6 @@ fn buildDecisionTreeNode(
     }
     const split = best_split.?;
-    defer freeSplit(split);
     if (split.impurity >= parent_impurity - 1e-12) {
         const node_index = model.nodes.items.len;
         try model.nodes.append(allocator, TreeNode{
@@ -404,6 +419,25 @@ fn buildDecisionTreeNode(
         return node_index;
     }
+    const partition = (try partitionIndicesForThreshold(
+        model,
+        workspace,
+        x_ptr,
+        indices,
+        best_feature,
+        split.threshold,
+    )) orelse {
+        const node_index = model.nodes.items.len;
+        try model.nodes.append(allocator, TreeNode{
+            .prediction = prediction,
+            .feature_index = 0,
+            .threshold = 0.0,
+            .left_index = 0,
+            .right_index = 0,
+            .is_leaf = true,
+        });
+        return node_index;
+    };
     const node_index = model.nodes.items.len;
     try model.nodes.append(allocator, TreeNode{
         .prediction = prediction,
@@ -416,17 +450,19 @@ fn buildDecisionTreeNode(
     const left_index = try buildDecisionTreeNode(
         model,
+        workspace,
         x_ptr,
         y_ptr,
-        split.left_indices,
+        partition.left_indices,
         depth + 1,
         rng,
     );
     const right_index = try buildDecisionTreeNode(
         model,
+        workspace,
         x_ptr,
         y_ptr,
-        split.right_indices,
+        partition.right_indices,
         depth + 1,
         rng,
     );
@@ -1136,6 +1172,11 @@ pub export fn decision_tree_model_create(
     const model = allocator.create(DecisionTreeModel) catch return 0;
     errdefer allocator.destroy(model);
+    const feature_scratch = allocator.alloc(usize, n_features) catch return 0;
+    errdefer allocator.free(feature_scratch);
+    for (feature_scratch, 0..) |*entry, idx| {
+        entry.* = idx;
+    }
     model.* = .{
         .n_features = n_features,
         .max_depth = max_depth,
@@ -1147,6 +1188,7 @@ pub export fn decision_tree_model_create(
         .use_random_state = use_random_state != 0,
         .root_index = 0,
         .has_root = false,
+        .feature_scratch = feature_scratch,
         .nodes = .empty,
     };
     return @intFromPtr(model);
@@ -1154,6 +1196,7 @@ pub export fn decision_tree_model_create(
 pub export fn decision_tree_model_destroy(handle: usize) void {
     const model = asDecisionTreeModel(handle) orelse return;
+    allocator.free(model.feature_scratch);
     model.nodes.deinit(allocator);
     allocator.destroy(model);
 }
@@ -1180,8 +1223,11 @@ pub export fn decision_tree_model_fit(
         return 0;
     }
-    const root_indices = allocator.alloc(usize, root_size) catch return 0;
-    defer allocator.free(root_indices);
+    var arena = std.heap.ArenaAllocator.init(allocator);
+    defer arena.deinit();
+    const workspace = arena.allocator();
+    const root_indices = workspace.alloc(usize, root_size) catch return 0;
     if (sample_count == 0) {
         for (root_indices, 0..) |*entry, idx| {
@@ -1202,7 +1248,7 @@ pub export fn decision_tree_model_fit(
     else
         @as(u32, @truncate(@as(u64, @bitCast(std.time.microTimestamp()))));
     var rng = Mulberry32.init(rng_seed);
-    const root_index = buildDecisionTreeNode(model, x_ptr, y_ptr, root_indices, 0, &rng) catch {
+    const root_index = buildDecisionTreeNode(model, workspace, x_ptr, y_ptr, root_indices, 0, &rng) catch {
         model.nodes.clearRetainingCapacity();
         model.has_root = false;
         return 0;
@@ -1243,6 +1289,181 @@ pub export fn decision_tree_model_predict(
     return 1;
 }
+fn resetRandomForestClassifierModel(model: *RandomForestClassifierModel) void {
+    var i: usize = 0;
+    while (i < model.fitted_estimators) : (i += 1) {
+        const tree_handle = model.tree_handles[i];
+        if (tree_handle != 0) {
+            decision_tree_model_destroy(tree_handle);
+            model.tree_handles[i] = 0;
+        }
+    }
+    model.fitted_estimators = 0;
+}
+pub export fn random_forest_classifier_model_create(
+    n_estimators: usize,
+    max_depth: usize,
+    min_samples_split: usize,
+    min_samples_leaf: usize,
+    max_features_mode: u8,
+    max_features_value: usize,
+    bootstrap: u8,
+    random_state: u32,
+    use_random_state: u8,
+    n_features: usize,
+) usize {
+    if (n_features == 0 or max_depth == 0 or n_estimators == 0) {
+        return 0;
+    }
+    const model = allocator.create(RandomForestClassifierModel) catch return 0;
+    errdefer allocator.destroy(model);
+    const tree_handles = allocator.alloc(usize, n_estimators) catch return 0;
+    errdefer allocator.free(tree_handles);
+    @memset(tree_handles, 0);
+    model.* = .{
+        .n_features = n_features,
+        .n_estimators = n_estimators,
+        .max_depth = max_depth,
+        .min_samples_split = if (min_samples_split < 2) 2 else min_samples_split,
+        .min_samples_leaf = if (min_samples_leaf < 1) 1 else min_samples_leaf,
+        .max_features_mode = max_features_mode,
+        .max_features_value = max_features_value,
+        .bootstrap = bootstrap != 0,
+        .random_state = random_state,
+        .use_random_state = use_random_state != 0,
+        .tree_handles = tree_handles,
+        .fitted_estimators = 0,
+    };
+    return @intFromPtr(model);
+}
+pub export fn random_forest_classifier_model_destroy(handle: usize) void {
+    const model = asRandomForestClassifierModel(handle) orelse return;
+    resetRandomForestClassifierModel(model);
+    allocator.free(model.tree_handles);
+    allocator.destroy(model);
+}
+pub export fn random_forest_classifier_model_fit(
+    handle: usize,
+    x_ptr: [*]const f64,
+    y_ptr: [*]const u8,
+    n_samples: usize,
+    n_features: usize,
+) u8 {
+    const model = asRandomForestClassifierModel(handle) orelse return 0;
+    if (n_samples == 0 or n_features == 0 or n_features != model.n_features) {
+        return 0;
+    }
+    resetRandomForestClassifierModel(model);
+    const sample_indices = allocator.alloc(u32, n_samples) catch return 0;
+    defer allocator.free(sample_indices);
+    const rng_seed: u32 = if (model.use_random_state)
+        model.random_state
+    else
+        @as(u32, @truncate(@as(u64, @bitCast(std.time.microTimestamp()))));
+    var rng = Mulberry32.init(rng_seed);
+    var estimator_index: usize = 0;
+    while (estimator_index < model.n_estimators) : (estimator_index += 1) {
+        const tree_seed: u32 = if (model.use_random_state)
+            model.random_state +% @as(u32, @truncate(estimator_index + 1))
+        else
+            rng.state +% @as(u32, @truncate(estimator_index + 1));
+        const tree_handle = decision_tree_model_create(
+            model.max_depth,
+            model.min_samples_split,
+            model.min_samples_leaf,
+            model.max_features_mode,
+            model.max_features_value,
+            tree_seed,
+            if (model.use_random_state) 1 else 0,
+            model.n_features,
+        );
+        if (tree_handle == 0) {
+            resetRandomForestClassifierModel(model);
+            return 0;
+        }
+        if (model.bootstrap) {
+            var i: usize = 0;
+            while (i < n_samples) : (i += 1) {
+                sample_indices[i] = @as(u32, @truncate(rng.nextIndex(n_samples)));
+            }
+        } else {
+            for (sample_indices, 0..) |*entry, idx| {
+                entry.* = @as(u32, @truncate(idx));
+            }
+        }
+        const fit_status = decision_tree_model_fit(
+            tree_handle,
+            x_ptr,
+            y_ptr,
+            n_samples,
+            n_features,
+            sample_indices.ptr,
+            n_samples,
+        );
+        if (fit_status != 1) {
+            decision_tree_model_destroy(tree_handle);
+            resetRandomForestClassifierModel(model);
+            return 0;
+        }
+        model.tree_handles[estimator_index] = tree_handle;
+        model.fitted_estimators = estimator_index + 1;
+    }
+    return 1;
+}
+pub export fn random_forest_classifier_model_predict(
+    handle: usize,
+    x_ptr: [*]const f64,
+    n_samples: usize,
+    n_features: usize,
+    out_labels_ptr: [*]u8,
+) u8 {
+    const model = asRandomForestClassifierModel(handle) orelse return 0;
+    if (model.fitted_estimators == 0 or n_samples == 0 or n_features != model.n_features) {
+        return 0;
+    }
+    var i: usize = 0;
+    while (i < n_samples) : (i += 1) {
+        const row_offset = i * model.n_features;
+        var positive_votes: usize = 0;
+        var tree_index: usize = 0;
+        while (tree_index < model.fitted_estimators) : (tree_index += 1) {
+            const tree = asDecisionTreeModel(model.tree_handles[tree_index]) orelse continue;
+            if (!tree.has_root) {
+                continue;
+            }
+            var node_index = tree.root_index;
+            while (true) {
+                const node = tree.nodes.items[node_index];
+                if (node.is_leaf) {
+                    positive_votes += if (node.prediction == 1) 1 else 0;
+                    break;
+                }
+                const value = x_ptr[row_offset + node.feature_index];
+                node_index = if (value <= node.threshold) node.left_index else node.right_index;
+            }
+        }
+        out_labels_ptr[i] = if (positive_votes * 2 >= model.fitted_estimators) 1 else 0;
+    }
+    return 1;
+}
 pub export fn logistic_train_epoch(
     x_ptr: [*]const f64,
     y_ptr: [*]const f64,