npm - @wlearn/ensemble - Versions diffs - 0.1.0 - Mend

@wlearn/ensemble 0.1.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (8) hide show

package/src/stacking.js ADDED Viewed

@@ -0,0 +1,372 @@
+import {
+  encodeBundle, decodeBundle, register, load as registryLoad,
+  normalizeX, normalizeY, accuracy, r2Score,
+  stratifiedKFold, kFold,
+  ValidationError, NotFittedError, DisposedError,
+  lift
+} from '@wlearn/core'
+const TYPE_ID_CLS = 'wlearn.ensemble.stacking.classifier@1'
+const TYPE_ID_REG = 'wlearn.ensemble.stacking.regressor@1'
+let _registered = false
+export class StackingEnsemble {
+  #baseSpecs      // [name, Class, params][]
+  #metaSpec       // [name, Class, params]
+  #baseModels     // fitted base model instances (on full data)
+  #metaModel      // fitted meta-model instance
+  #cv
+  #task
+  #passthrough
+  #seed
+  #classes
+  #nClasses
+  #nMetaCols
+  #fitted = false
+  #disposed = false
+  constructor(params) {
+    this.#baseSpecs = params.estimators || []
+    this.#metaSpec = params.finalEstimator || null
+    this.#cv = params.cv || 5
+    this.#task = params.task || 'classification'
+    this.#passthrough = params.passthrough || false
+    this.#seed = params.seed ?? 42
+    this.#baseModels = null
+    this.#metaModel = null
+    this.#classes = null
+    this.#nClasses = 0
+    this.#nMetaCols = 0
+    StackingEnsemble._register()
+  }
+  static async create(params = {}) {
+    return new StackingEnsemble(params)
+  }
+  #ensureAlive() {
+    if (this.#disposed) throw new DisposedError('StackingEnsemble has been disposed.')
+  }
+  #ensureFitted() {
+    this.#ensureAlive()
+    if (!this.#fitted) throw new NotFittedError('StackingEnsemble is not fitted. Call fit() first.')
+  }
+  async fit(X, y) {
+    this.#ensureAlive()
+    if (!this.#metaSpec) {
+      throw new ValidationError('StackingEnsemble requires a finalEstimator')
+    }
+    const Xn = normalizeX(X)
+    const yn = normalizeY(y)
+    const n = Xn.rows
+    // Discover classes
+    if (this.#task === 'classification') {
+      const labelSet = new Set()
+      for (let i = 0; i < yn.length; i++) labelSet.add(yn[i])
+      this.#classes = new Int32Array([...labelSet].sort((a, b) => a - b))
+      this.#nClasses = this.#classes.length
+    }
+    // Generate folds
+    const folds = this.#task === 'classification'
+      ? stratifiedKFold(yn, this.#cv, { shuffle: true, seed: this.#seed })
+      : kFold(n, this.#cv, { shuffle: true, seed: this.#seed })
+    // Step 1: Generate OOF predictions for each base model
+    const nBase = this.#baseSpecs.length
+    const colsPerModel = this.#task === 'classification' ? this.#nClasses : 1
+    const oofCols = nBase * colsPerModel
+    const oofData = new Float64Array(n * oofCols)
+    for (let b = 0; b < nBase; b++) {
+      const [, EstClass, params] = this.#baseSpecs[b]
+      for (const { train, test } of folds) {
+        const Xtrain = _subsetX(Xn, train)
+        const ytrain = _subsetY(yn, train)
+        const Xtest = _subsetX(Xn, test)
+        const model = await EstClass.create(params || {})
+        try {
+          model.fit(Xtrain, ytrain)
+          if (this.#task === 'classification') {
+            const proba = await model.predictProba(Xtest)
+            for (let i = 0; i < test.length; i++) {
+              const row = test[i]
+              for (let c = 0; c < this.#nClasses; c++) {
+                oofData[row * oofCols + b * colsPerModel + c] = proba[i * this.#nClasses + c]
+              }
+            }
+          } else {
+            const preds = await model.predict(Xtest)
+            for (let i = 0; i < test.length; i++) {
+              oofData[test[i] * oofCols + b] = preds[i]
+            }
+          }
+        } finally {
+          model.dispose()
+        }
+      }
+    }
+    // Step 2: Build meta-feature matrix
+    let metaX
+    if (this.#passthrough) {
+      this.#nMetaCols = oofCols + Xn.cols
+      const metaData = new Float64Array(n * this.#nMetaCols)
+      for (let i = 0; i < n; i++) {
+        // OOF predictions
+        metaData.set(
+          oofData.subarray(i * oofCols, (i + 1) * oofCols),
+          i * this.#nMetaCols
+        )
+        // Original features
+        metaData.set(
+          Xn.data.subarray(i * Xn.cols, (i + 1) * Xn.cols),
+          i * this.#nMetaCols + oofCols
+        )
+      }
+      metaX = { data: metaData, rows: n, cols: this.#nMetaCols }
+    } else {
+      this.#nMetaCols = oofCols
+      metaX = { data: oofData, rows: n, cols: oofCols }
+    }
+    // Step 3: Train base models on full data
+    this.#baseModels = []
+    for (const [, EstClass, params] of this.#baseSpecs) {
+      const model = await EstClass.create(params || {})
+      model.fit(Xn, yn)
+      this.#baseModels.push(model)
+    }
+    // Step 4: Train meta-model on OOF features
+    const [, MetaClass, metaParams] = this.#metaSpec
+    this.#metaModel = await MetaClass.create(metaParams || {})
+    this.#metaModel.fit(metaX, yn)
+    this.#fitted = true
+    return this
+  }
+  predict(X) {
+    this.#ensureFitted()
+    const metaX = this.#buildMetaFeatures(X)
+    return lift(metaX, mx => this.#metaModel.predict(mx))
+  }
+  predictProba(X) {
+    this.#ensureFitted()
+    if (this.#task !== 'classification') {
+      throw new ValidationError('predictProba is only available for classification')
+    }
+    if (typeof this.#metaModel.predictProba !== 'function') {
+      throw new ValidationError('Meta-model does not support predictProba')
+    }
+    const metaX = this.#buildMetaFeatures(X)
+    return lift(metaX, mx => this.#metaModel.predictProba(mx))
+  }
+  score(X, y) {
+    this.#ensureFitted()
+    const preds = this.predict(X)
+    const yn = normalizeY(y)
+    const scorer = this.#task === 'classification' ? accuracy : r2Score
+    return lift(preds, p => scorer(yn, p))
+  }
+  save() {
+    this.#ensureFitted()
+    const typeId = this.#task === 'classification' ? TYPE_ID_CLS : TYPE_ID_REG
+    const manifest = {
+      typeId,
+      params: {
+        task: this.#task,
+        cv: this.#cv,
+        passthrough: this.#passthrough,
+        seed: this.#seed,
+        estimatorNames: this.#baseSpecs.map(s => s[0]),
+        metaName: this.#metaSpec[0],
+        classes: this.#classes ? [...this.#classes] : null,
+        nMetaCols: this.#nMetaCols,
+      },
+    }
+    const artifacts = this.#baseModels.map((model, i) => ({
+      id: this.#baseSpecs[i][0],
+      data: model.save(),
+      mediaType: 'application/x-wlearn-bundle',
+    }))
+    artifacts.push({
+      id: this.#metaSpec[0],
+      data: this.#metaModel.save(),
+      mediaType: 'application/x-wlearn-bundle',
+    })
+    return encodeBundle(manifest, artifacts)
+  }
+  static async load(bytes) {
+    const { manifest, toc, blobs } = decodeBundle(bytes)
+    return StackingEnsemble._loadFromParts(manifest, toc, blobs)
+  }
+  dispose() {
+    if (this.#disposed) return
+    this.#disposed = true
+    if (this.#baseModels) {
+      for (const m of this.#baseModels) m.dispose()
+    }
+    if (this.#metaModel) this.#metaModel.dispose()
+  }
+  getParams() {
+    return {
+      task: this.#task,
+      cv: this.#cv,
+      passthrough: this.#passthrough,
+      seed: this.#seed,
+      estimatorNames: this.#baseSpecs.map(s => s[0]),
+      metaName: this.#metaSpec ? this.#metaSpec[0] : null,
+    }
+  }
+  setParams(p) {
+    this.#ensureAlive()
+    if (p.cv !== undefined) this.#cv = p.cv
+    if (p.passthrough !== undefined) this.#passthrough = p.passthrough
+    if (p.seed !== undefined) this.#seed = p.seed
+    return this
+  }
+  get capabilities() {
+    return {
+      classifier: this.#task === 'classification',
+      regressor: this.#task === 'regression',
+      predictProba: this.#task === 'classification',
+      decisionFunction: false,
+      sampleWeight: false,
+      csr: false,
+      earlyStopping: false,
+    }
+  }
+  get isFitted() { return this.#fitted }
+  get classes() { return this.#classes }
+  // --- Private helpers ---
+  #buildMetaFeatures(X) {
+    const Xn = normalizeX(X)
+    const n = Xn.rows
+    const nBase = this.#baseModels.length
+    const colsPerModel = this.#task === 'classification' ? this.#nClasses : 1
+    const oofCols = nBase * colsPerModel
+    // Collect predictions from all base models
+    const rawOutputs = []
+    let hasPromise = false
+    for (let b = 0; b < nBase; b++) {
+      const out = this.#task === 'classification'
+        ? this.#baseModels[b].predictProba(Xn)
+        : this.#baseModels[b].predict(Xn)
+      if (out != null && typeof out.then === 'function') hasPromise = true
+      rawOutputs.push(out)
+    }
+    const assemble = (outputs) => {
+      const metaData = new Float64Array(n * this.#nMetaCols)
+      for (let b = 0; b < nBase; b++) {
+        if (this.#task === 'classification') {
+          const proba = outputs[b]
+          for (let i = 0; i < n; i++) {
+            for (let c = 0; c < this.#nClasses; c++) {
+              metaData[i * this.#nMetaCols + b * colsPerModel + c] = proba[i * this.#nClasses + c]
+            }
+          }
+        } else {
+          const preds = outputs[b]
+          for (let i = 0; i < n; i++) {
+            metaData[i * this.#nMetaCols + b] = preds[i]
+          }
+        }
+      }
+      if (this.#passthrough) {
+        for (let i = 0; i < n; i++) {
+          metaData.set(
+            Xn.data.subarray(i * Xn.cols, (i + 1) * Xn.cols),
+            i * this.#nMetaCols + oofCols
+          )
+        }
+      }
+      return { data: metaData, rows: n, cols: this.#nMetaCols }
+    }
+    return hasPromise ? Promise.all(rawOutputs).then(assemble) : assemble(rawOutputs)
+  }
+  static _register() {
+    if (_registered) return
+    _registered = true
+    const loader = (manifest, toc, blobs) => {
+      return StackingEnsemble._loadFromParts(manifest, toc, blobs)
+    }
+    register(TYPE_ID_CLS, loader)
+    register(TYPE_ID_REG, loader)
+  }
+  static async _loadFromParts(manifest, toc, blobs) {
+    const p = manifest.params
+    const ens = new StackingEnsemble({
+      task: p.task,
+      cv: p.cv,
+      passthrough: p.passthrough,
+      seed: p.seed,
+    })
+    ens.#classes = p.classes ? new Int32Array(p.classes) : null
+    ens.#nClasses = ens.#classes ? ens.#classes.length : 0
+    ens.#nMetaCols = p.nMetaCols
+    ens.#baseSpecs = p.estimatorNames.map(name => [name, null, null])
+    ens.#metaSpec = [p.metaName, null, null]
+    // Load base models
+    ens.#baseModels = []
+    for (const name of p.estimatorNames) {
+      const entry = toc.find(t => t.id === name)
+      if (!entry) throw new ValidationError(`No artifact for base estimator "${name}"`)
+      const blob = blobs.subarray(entry.offset, entry.offset + entry.length)
+      ens.#baseModels.push(await registryLoad(blob))
+    }
+    // Load meta-model
+    const metaEntry = toc.find(t => t.id === p.metaName)
+    if (!metaEntry) throw new ValidationError(`No artifact for meta estimator "${p.metaName}"`)
+    const metaBlob = blobs.subarray(metaEntry.offset, metaEntry.offset + metaEntry.length)
+    ens.#metaModel = await registryLoad(metaBlob)
+    ens.#fitted = true
+    return ens
+  }
+}
+// --- Subset helpers ---
+function _subsetX(X, indices) {
+  const { data, cols } = X
+  const rows = indices.length
+  const out = new Float64Array(rows * cols)
+  for (let i = 0; i < rows; i++) {
+    const srcOff = indices[i] * cols
+    out.set(data.subarray(srcOff, srcOff + cols), i * cols)
+  }
+  return { data: out, rows, cols }
+}
+function _subsetY(y, indices) {
+  const out = new (y.constructor)(indices.length)
+  for (let i = 0; i < indices.length; i++) {
+    out[i] = y[indices[i]]
+  }
+  return out
+}

package/src/voting.js ADDED Viewed

@@ -0,0 +1,311 @@
+import {
+  encodeBundle, decodeBundle, register, load as registryLoad,
+  normalizeX, normalizeY, accuracy, r2Score,
+  ValidationError, NotFittedError, DisposedError,
+  lift
+} from '@wlearn/core'
+const TYPE_ID_CLS = 'wlearn.ensemble.voting.classifier@1'
+const TYPE_ID_REG = 'wlearn.ensemble.voting.regressor@1'
+let _registered = false
+export class VotingEnsemble {
+  #specs       // [name, Class, params][]
+  #models      // fitted instances
+  #weights
+  #voting      // 'soft' | 'hard'
+  #task        // 'classification' | 'regression'
+  #classes
+  #fitted = false
+  #disposed = false
+  constructor(params) {
+    this.#specs = params.estimators || []
+    this.#weights = params.weights || null
+    this.#voting = params.voting || 'soft'
+    this.#task = params.task || 'classification'
+    this.#models = null
+    this.#classes = null
+    VotingEnsemble._register()
+  }
+  static async create(params = {}) {
+    return new VotingEnsemble(params)
+  }
+  #ensureAlive() {
+    if (this.#disposed) throw new DisposedError('VotingEnsemble has been disposed.')
+  }
+  #ensureFitted() {
+    this.#ensureAlive()
+    if (!this.#fitted) throw new NotFittedError('VotingEnsemble is not fitted. Call fit() first.')
+  }
+  async fit(X, y) {
+    this.#ensureAlive()
+    const Xn = normalizeX(X)
+    const yn = normalizeY(y)
+    if (this.#task === 'classification') {
+      const labelSet = new Set()
+      for (let i = 0; i < yn.length; i++) labelSet.add(yn[i])
+      this.#classes = new Int32Array([...labelSet].sort((a, b) => a - b))
+    }
+    // Default equal weights
+    if (!this.#weights) {
+      this.#weights = new Float64Array(this.#specs.length).fill(1 / this.#specs.length)
+    }
+    // Instantiate and fit all models
+    this.#models = []
+    for (const [name, EstClass, params] of this.#specs) {
+      const model = await EstClass.create(params || {})
+      model.fit(Xn, yn)
+      this.#models.push(model)
+    }
+    this.#fitted = true
+    return this
+  }
+  predict(X) {
+    this.#ensureFitted()
+    const Xn = normalizeX(X)
+    const n = Xn.rows
+    if (this.#task === 'regression') {
+      return this.#weightedAverage(Xn, n)
+    }
+    if (this.#voting === 'soft') {
+      const proba = this.predictProba(Xn)
+      return lift(proba, p => {
+        const nc = this.#classes.length
+        const out = new Float64Array(n)
+        for (let i = 0; i < n; i++) {
+          let bestC = 0, bestV = -Infinity
+          for (let c = 0; c < nc; c++) {
+            if (p[i * nc + c] > bestV) { bestV = p[i * nc + c]; bestC = c }
+          }
+          out[i] = this.#classes[bestC]
+        }
+        return out
+      })
+    }
+    // Hard voting: majority vote
+    return this.#majorityVote(Xn, n)
+  }
+  predictProba(X) {
+    this.#ensureFitted()
+    if (this.#task !== 'classification') {
+      throw new ValidationError('predictProba is only available for classification')
+    }
+    if (this.#voting === 'hard') {
+      throw new ValidationError('predictProba requires voting="soft"')
+    }
+    const Xn = normalizeX(X)
+    const n = Xn.rows
+    const nc = this.#classes.length
+    // Collect predictions from all models
+    const rawOutputs = []
+    let hasPromise = false
+    for (let m = 0; m < this.#models.length; m++) {
+      const out = this.#models[m].predictProba(Xn)
+      if (out != null && typeof out.then === 'function') hasPromise = true
+      rawOutputs.push(out)
+    }
+    const assemble = (outputs) => {
+      const result = new Float64Array(n * nc)
+      for (let m = 0; m < outputs.length; m++) {
+        const proba = outputs[m]
+        const w = this.#weights[m]
+        for (let i = 0; i < n * nc; i++) {
+          result[i] += w * proba[i]
+        }
+      }
+      return result
+    }
+    return hasPromise ? Promise.all(rawOutputs).then(assemble) : assemble(rawOutputs)
+  }
+  score(X, y) {
+    this.#ensureFitted()
+    const preds = this.predict(X)
+    const yn = normalizeY(y)
+    const scorer = this.#task === 'classification' ? accuracy : r2Score
+    return lift(preds, p => scorer(yn, p))
+  }
+  save() {
+    this.#ensureFitted()
+    const typeId = this.#task === 'classification' ? TYPE_ID_CLS : TYPE_ID_REG
+    const manifest = {
+      typeId,
+      params: {
+        task: this.#task,
+        voting: this.#voting,
+        weights: [...this.#weights],
+        estimatorNames: this.#specs.map(s => s[0]),
+        classes: this.#classes ? [...this.#classes] : null,
+      },
+    }
+    const artifacts = this.#models.map((model, i) => ({
+      id: this.#specs[i][0],
+      data: model.save(),
+      mediaType: 'application/x-wlearn-bundle',
+    }))
+    return encodeBundle(manifest, artifacts)
+  }
+  static async load(bytes) {
+    const { manifest, toc, blobs } = decodeBundle(bytes)
+    const p = manifest.params
+    const ens = new VotingEnsemble({
+      task: p.task,
+      voting: p.voting,
+      weights: new Float64Array(p.weights),
+    })
+    ens.#classes = p.classes ? new Int32Array(p.classes) : null
+    ens.#specs = p.estimatorNames.map(name => [name, null, null])
+    // Load submodels via registry
+    ens.#models = []
+    for (const name of p.estimatorNames) {
+      const entry = toc.find(t => t.id === name)
+      if (!entry) throw new ValidationError(`No artifact for estimator "${name}"`)
+      const blob = blobs.subarray(entry.offset, entry.offset + entry.length)
+      const model = await registryLoad(blob)
+      ens.#models.push(model)
+    }
+    ens.#fitted = true
+    return ens
+  }
+  dispose() {
+    if (this.#disposed) return
+    this.#disposed = true
+    if (this.#models) {
+      for (const m of this.#models) m.dispose()
+    }
+  }
+  getParams() {
+    return {
+      task: this.#task,
+      voting: this.#voting,
+      weights: this.#weights ? [...this.#weights] : null,
+      estimatorNames: this.#specs.map(s => s[0]),
+    }
+  }
+  setParams(p) {
+    this.#ensureAlive()
+    if (p.voting !== undefined) this.#voting = p.voting
+    if (p.weights !== undefined) this.#weights = new Float64Array(p.weights)
+    return this
+  }
+  get capabilities() {
+    return {
+      classifier: this.#task === 'classification',
+      regressor: this.#task === 'regression',
+      predictProba: this.#task === 'classification' && this.#voting === 'soft',
+      decisionFunction: false,
+      sampleWeight: false,
+      csr: false,
+      earlyStopping: false,
+    }
+  }
+  get isFitted() { return this.#fitted }
+  get classes() { return this.#classes }
+  // --- Private helpers ---
+  #weightedAverage(Xn, n) {
+    const rawOutputs = []
+    let hasPromise = false
+    for (let m = 0; m < this.#models.length; m++) {
+      const out = this.#models[m].predict(Xn)
+      if (out != null && typeof out.then === 'function') hasPromise = true
+      rawOutputs.push(out)
+    }
+    const assemble = (outputs) => {
+      const result = new Float64Array(n)
+      for (let m = 0; m < outputs.length; m++) {
+        const w = this.#weights[m]
+        for (let i = 0; i < n; i++) result[i] += w * outputs[m][i]
+      }
+      return result
+    }
+    return hasPromise ? Promise.all(rawOutputs).then(assemble) : assemble(rawOutputs)
+  }
+  #majorityVote(Xn, n) {
+    const rawOutputs = []
+    let hasPromise = false
+    for (let m = 0; m < this.#models.length; m++) {
+      const out = this.#models[m].predict(Xn)
+      if (out != null && typeof out.then === 'function') hasPromise = true
+      rawOutputs.push(out)
+    }
+    const assemble = (outputs) => {
+      const nc = this.#classes.length
+      const result = new Float64Array(n)
+      for (let i = 0; i < n; i++) {
+        const votes = new Float64Array(nc)
+        for (let m = 0; m < outputs.length; m++) {
+          const pred = outputs[m][i]
+          const classIdx = this.#classes.indexOf(pred)
+          if (classIdx >= 0) votes[classIdx] += this.#weights[m]
+        }
+        let bestC = 0, bestV = -Infinity
+        for (let c = 0; c < nc; c++) {
+          if (votes[c] > bestV) { bestV = votes[c]; bestC = c }
+        }
+        result[i] = this.#classes[bestC]
+      }
+      return result
+    }
+    return hasPromise ? Promise.all(rawOutputs).then(assemble) : assemble(rawOutputs)
+  }
+  static _register() {
+    if (_registered) return
+    _registered = true
+    const loader = (manifest, toc, blobs) => {
+      return VotingEnsemble._loadFromParts(manifest, toc, blobs)
+    }
+    register(TYPE_ID_CLS, loader)
+    register(TYPE_ID_REG, loader)
+  }
+  static async _loadFromParts(manifest, toc, blobs) {
+    const p = manifest.params
+    const ens = new VotingEnsemble({
+      task: p.task,
+      voting: p.voting,
+      weights: new Float64Array(p.weights),
+    })
+    ens.#classes = p.classes ? new Int32Array(p.classes) : null
+    ens.#specs = p.estimatorNames.map(name => [name, null, null])
+    ens.#models = []
+    for (const name of p.estimatorNames) {
+      const entry = toc.find(t => t.id === name)
+      if (!entry) throw new ValidationError(`No artifact for estimator "${name}"`)
+      const blob = blobs.subarray(entry.offset, entry.offset + entry.length)
+      const model = await registryLoad(blob)
+      ens.#models.push(model)
+    }
+    ens.#fitted = true
+    return ens
+  }
+}