npm - @wlearn/automl - Versions diffs - 0.1.0 - Mend

@wlearn/automl 0.1.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (14) hide show

package/package.json +27 -0
package/src/auto-fit.js +261 -0
package/src/common.js +108 -0
package/src/executor.js +209 -0
package/src/halving.js +95 -0
package/src/index.js +12 -0
package/src/leaderboard.js +106 -0
package/src/portfolio.js +332 -0
package/src/progressive.js +156 -0
package/src/sampler.js +162 -0
package/src/search.js +93 -0
package/src/strategy-halving.js +157 -0
package/src/strategy-progressive.js +126 -0
package/src/strategy-random.js +67 -0

package/src/sampler.js ADDED Viewed

@@ -0,0 +1,162 @@
+import { makeLCG } from '@wlearn/core'
+const { floor, round, log, exp, min, max } = Math
+/**
+ * Sample a single value from a SearchParam definition.
+ */
+export function sampleParam(param, rng) {
+  const { type } = param
+  switch (type) {
+    case 'categorical':
+      return param.values[floor(rng() * param.values.length)]
+    case 'uniform':
+      return param.low + rng() * (param.high - param.low)
+    case 'log_uniform':
+      return exp(log(param.low) + rng() * (log(param.high) - log(param.low)))
+    case 'int_uniform':
+      return param.low + floor(rng() * (param.high - param.low + 1))
+    case 'int_log_uniform':
+      return round(exp(log(param.low) + rng() * (log(param.high) - log(param.low))))
+    default:
+      throw new Error(`Unknown SearchParam type: "${type}"`)
+  }
+}
+/**
+ * Sample a complete config from a SearchSpace, respecting conditions.
+ */
+export function sampleConfig(space, rng) {
+  const config = {}
+  const keys = Object.keys(space)
+  // First pass: non-conditional params
+  for (const key of keys) {
+    if (!space[key].condition) {
+      config[key] = sampleParam(space[key], rng)
+    }
+  }
+  // Second pass: conditional params
+  for (const key of keys) {
+    const { condition } = space[key]
+    if (!condition) continue
+    let satisfied = true
+    for (const [ck, cv] of Object.entries(condition)) {
+      if (config[ck] !== cv) { satisfied = false; break }
+    }
+    if (satisfied) {
+      config[key] = sampleParam(space[key], rng)
+    }
+  }
+  return config
+}
+/**
+ * Generate n random configs from a SearchSpace.
+ */
+export function randomConfigs(space, n, { seed = 42 } = {}) {
+  const rng = makeLCG(seed)
+  const configs = []
+  for (let i = 0; i < n; i++) {
+    configs.push(sampleConfig(space, rng))
+  }
+  return configs
+}
+/**
+ * Enumerate grid points from a SearchSpace.
+ * Continuous params discretized to `steps` values.
+ */
+export function gridConfigs(space, { steps = 5 } = {}) {
+  const keys = Object.keys(space)
+  if (keys.length === 0) return [{}]
+  // Build value arrays for non-conditional params
+  const nonCond = keys.filter(k => !space[k].condition)
+  const condKeys = keys.filter(k => space[k].condition)
+  const valueArrays = nonCond.map(k => _discretize(space[k], steps))
+  // Cartesian product of non-conditional params
+  let combos = [{}]
+  for (let i = 0; i < nonCond.length; i++) {
+    const key = nonCond[i]
+    const vals = valueArrays[i]
+    const next = []
+    for (const combo of combos) {
+      for (const v of vals) {
+        next.push({ ...combo, [key]: v })
+      }
+    }
+    combos = next
+  }
+  // Add conditional params where conditions are met
+  for (const combo of combos) {
+    for (const key of condKeys) {
+      const { condition } = space[key]
+      let satisfied = true
+      for (const [ck, cv] of Object.entries(condition)) {
+        if (combo[ck] !== cv) { satisfied = false; break }
+      }
+      if (satisfied) {
+        // For grid, take all discrete values and expand
+        // But that would multiply combos -- for simplicity, take midpoint
+        const vals = _discretize(space[key], steps)
+        combo[key] = vals[floor(vals.length / 2)]
+      }
+    }
+  }
+  return combos
+}
+function _discretize(param, steps) {
+  const { type } = param
+  switch (type) {
+    case 'categorical':
+      return [...param.values]
+    case 'uniform': {
+      const arr = []
+      for (let i = 0; i < steps; i++) {
+        arr.push(param.low + (param.high - param.low) * i / max(1, steps - 1))
+      }
+      return arr
+    }
+    case 'log_uniform': {
+      const logLow = log(param.low)
+      const logHigh = log(param.high)
+      const arr = []
+      for (let i = 0; i < steps; i++) {
+        arr.push(exp(logLow + (logHigh - logLow) * i / max(1, steps - 1)))
+      }
+      return arr
+    }
+    case 'int_uniform': {
+      const range = param.high - param.low + 1
+      if (range <= steps) {
+        const arr = []
+        for (let v = param.low; v <= param.high; v++) arr.push(v)
+        return arr
+      }
+      const arr = []
+      for (let i = 0; i < steps; i++) {
+        arr.push(param.low + round((param.high - param.low) * i / max(1, steps - 1)))
+      }
+      return [...new Set(arr)].sort((a, b) => a - b)
+    }
+    case 'int_log_uniform': {
+      const logLow = log(param.low)
+      const logHigh = log(param.high)
+      const arr = []
+      for (let i = 0; i < steps; i++) {
+        arr.push(round(exp(logLow + (logHigh - logLow) * i / max(1, steps - 1))))
+      }
+      return [...new Set(arr)].sort((a, b) => a - b)
+    }
+    default:
+      throw new Error(`Unknown SearchParam type: "${type}"`)
+  }
+}

package/src/search.js ADDED Viewed

@@ -0,0 +1,93 @@
+import { stratifiedKFold, kFold, normalizeX, normalizeY,
+  ValidationError } from '@wlearn/core'
+import { Executor } from './executor.js'
+import { RandomStrategy } from './strategy-random.js'
+import { detectTask, scorerGreaterIsBetter } from './common.js'
+/**
+ * Random hyperparameter search with cross-validation.
+ */
+export class RandomSearch {
+  #models
+  #opts
+  #leaderboard = null
+  #bestResult = null
+  /**
+   * @param {Array<{ name: string, cls: object, searchSpace?: object, params?: object }>} models
+   * @param {object} opts
+   */
+  constructor(models, opts = {}) {
+    if (!models || models.length === 0) {
+      throw new ValidationError('RandomSearch: at least one model is required')
+    }
+    this.#models = models
+    this.#opts = {
+      scoring: null, // auto-detect
+      cv: 5,
+      seed: 42,
+      task: null, // auto-detect
+      nIter: 20,
+      maxTimeMs: 0,
+      onProgress: null,
+      ...opts,
+    }
+  }
+  /**
+   * Run the search.
+   */
+  async fit(X, y) {
+    const Xn = normalizeX(X)
+    const yn = normalizeY(y)
+    const task = this.#opts.task || detectTask(yn)
+    const scoring = this.#opts.scoring || (task === 'classification' ? 'accuracy' : 'r2')
+    const { cv, seed, nIter, maxTimeMs, onProgress } = this.#opts
+    // Generate folds once, shared across all candidates
+    const folds = task === 'classification'
+      ? stratifiedKFold(yn, cv, { shuffle: true, seed })
+      : kFold(yn.length, cv, { shuffle: true, seed })
+    const executor = new Executor({
+      folds,
+      scoring,
+      X: Xn,
+      y: yn,
+      timeLimitMs: maxTimeMs,
+      seed,
+      onProgress,
+    })
+    const strategy = new RandomStrategy(this.#models, { nIter, seed })
+    const { leaderboard } = await executor.runStrategy(strategy)
+    if (leaderboard.length === 0) {
+      throw new ValidationError('RandomSearch: no candidates were evaluated')
+    }
+    this.#leaderboard = leaderboard
+    this.#bestResult = leaderboard.best()
+    return { leaderboard, bestResult: this.#bestResult }
+  }
+  /**
+   * Refit the best candidate on full data.
+   */
+  async refitBest(X, y) {
+    if (!this.#bestResult) {
+      throw new ValidationError('RandomSearch: must call fit() first')
+    }
+    const best = this.#bestResult
+    const model = this.#models.find(m => m.name === best.modelName)
+    const instance = await model.cls.create(best.params)
+    const Xn = normalizeX(X)
+    const yn = normalizeY(y)
+    instance.fit(Xn, yn)
+    return instance
+  }
+  get leaderboard() { return this.#leaderboard }
+  get bestResult() { return this.#bestResult }
+}

package/src/strategy-halving.js ADDED Viewed

@@ -0,0 +1,157 @@
+import { makeLCG } from '@wlearn/core'
+import { sampleConfig } from './sampler.js'
+import { makeCandidateId } from './common.js'
+const { ceil, log, max, min, floor } = Math
+/**
+ * Successive halving strategy: multi-round elimination tournament.
+ *
+ * Evaluates many candidates on small subsamples, progressively
+ * eliminates the worst and increases resource allocation.
+ *
+ * Round transitions happen inside report() when all candidates
+ * in the current round have been evaluated. next() returns null
+ * only when fully done.
+ */
+export class HalvingStrategy {
+  #candidates       // all candidates for current round
+  #roundIndex = 0   // index within current round's candidates
+  #round = 0        // current round number
+  #nRounds
+  #factor
+  #nSamples         // total sample count (for computing fractions)
+  #minResources
+  #greaterIsBetter
+  #roundResults = [] // results collected for current round
+  #rounds = []       // completed round stats
+  #done = false
+  #finalRound = false
+  /**
+   * @param {Array<{ name: string, cls: object, searchSpace?: object, params?: object }>} models
+   * @param {object} opts
+   * @param {number} opts.nIter - candidates per model
+   * @param {number} opts.seed
+   * @param {number} opts.factor - elimination factor (keep top 1/factor)
+   * @param {number} opts.nSamples - total sample count for fraction computation
+   * @param {boolean} opts.greaterIsBetter - sort direction for elimination
+   * @param {number} opts.cv - fold count (for minResources default)
+   */
+  constructor(models, { nIter = 20, seed = 42, factor = 3, nSamples, greaterIsBetter = true, cv = 5 } = {}) {
+    this.#factor = factor
+    this.#nSamples = nSamples || 0
+    this.#greaterIsBetter = greaterIsBetter
+    // Generate all candidate configs
+    const rng = makeLCG(seed)
+    const allCandidates = []
+    for (const model of models) {
+      const space = model.searchSpace || model.cls.defaultSearchSpace?.() || {}
+      const effectiveSpace = { ...space }
+      if (model.params) {
+        for (const key of Object.keys(model.params)) {
+          delete effectiveSpace[key]
+        }
+      }
+      const configRng = makeLCG((rng() * 0x7fffffff) | 0)
+      for (let i = 0; i < nIter; i++) {
+        const config = sampleConfig(effectiveSpace, configRng)
+        const params = { ...config, ...(model.params || {}) }
+        const candidateId = makeCandidateId(model.name, params)
+        allCandidates.push({
+          candidateId,
+          cls: model.cls,
+          params,
+        })
+      }
+    }
+    this.#candidates = allCandidates
+    this.#nRounds = max(1, ceil(log(allCandidates.length) / log(factor)))
+    this.#minResources = max(cv * 2, floor(this.#nSamples / (factor ** this.#nRounds)))
+  }
+  /**
+   * Return next candidate for current round (with subsample budget),
+   * or null when fully done.
+   */
+  next() {
+    if (this.#done) return null
+    if (this.#roundIndex >= this.#candidates.length) return null
+    const cand = this.#candidates[this.#roundIndex++]
+    // For non-final rounds, attach subsample budget
+    if (!this.#finalRound) {
+      const nResources = min(this.#nSamples, floor(this.#minResources * (this.#factor ** this.#round)))
+      const fraction = min(1, nResources / this.#nSamples)
+      if (fraction < 1) {
+        return { ...cand, budget: { type: 'subsample', value: fraction } }
+      }
+    }
+    return cand
+  }
+  /**
+   * Report a candidate result.
+   * When all candidates in the current round have reported,
+   * performs elimination and advances to the next round.
+   */
+  report(result) {
+    this.#roundResults.push(result)
+    // Check if all candidates in current round have been evaluated
+    if (this.#roundResults.length < this.#candidates.length) return
+    // Round complete: sort and eliminate
+    if (this.#finalRound) {
+      // Final round done
+      this.#done = true
+      return
+    }
+    const nResources = min(this.#nSamples, floor(this.#minResources * (this.#factor ** this.#round)))
+    const fraction = min(1, nResources / this.#nSamples)
+    // Sort results
+    const sorted = [...this.#roundResults]
+    if (this.#greaterIsBetter) {
+      sorted.sort((a, b) => b.meanScore - a.meanScore)
+    } else {
+      sorted.sort((a, b) => a.meanScore - b.meanScore)
+    }
+    const nSurvivors = max(1, ceil(sorted.length / this.#factor))
+    this.#rounds.push({
+      round: this.#round,
+      nResources,
+      fraction,
+      nCandidates: this.#roundResults.length,
+      nSurvivors,
+    })
+    // Build survivor candidate list
+    const survivorIds = new Set(sorted.slice(0, nSurvivors).map(r => r.candidateId))
+    this.#candidates = this.#candidates.filter(c => survivorIds.has(c.candidateId))
+    this.#round++
+    this.#roundIndex = 0
+    this.#roundResults = []
+    // Check if we should enter final round (1 or fewer survivors, or max rounds)
+    if (this.#candidates.length <= 1 || this.#round >= this.#nRounds) {
+      this.#finalRound = true
+    }
+  }
+  isDone() {
+    return this.#done
+  }
+  get rounds() {
+    return this.#rounds
+  }
+}

package/src/strategy-progressive.js ADDED Viewed

@@ -0,0 +1,126 @@
+import { makeLCG } from '@wlearn/core'
+import { sampleConfig } from './sampler.js'
+import { makeCandidateId } from './common.js'
+const { max, ceil } = Math
+/**
+ * Progressive evaluation strategy: probe all candidates cheaply (1 fold),
+ * then promote top N to full evaluation.
+ *
+ * Phase 1 (probe): yield all candidates for cheap 1-fold evaluation
+ * Phase 2 (promote): yield top N candidates for full K-fold evaluation
+ *
+ * This pairs with ProgressiveSearch which manages two Executors
+ * (probe executor with 1 fold, full executor with K folds).
+ */
+export class ProgressiveStrategy {
+  #allCandidates = []
+  #promotedCandidates = []
+  #phase = 'probe'
+  #probeIndex = 0
+  #promoteIndex = 0
+  #probeResults = []
+  #promoteCount
+  #greaterIsBetter
+  #done = false
+  #probeFraction
+  /**
+   * @param {Array<{ name: string, cls: object, searchSpace?: object, params?: object }>} models
+   * @param {object} opts
+   * @param {number} opts.nIter - candidates per model
+   * @param {number} opts.seed
+   * @param {number} opts.promoteCount - how many to promote from probe to full eval
+   * @param {boolean} opts.greaterIsBetter - sort direction
+   * @param {number} opts.probeFraction - subsample fraction for probe phase (0-1)
+   */
+  constructor(models, { nIter = 20, seed = 42, promoteCount = 10,
+    greaterIsBetter = true, probeFraction = 0.5 } = {}) {
+    this.#promoteCount = promoteCount
+    this.#greaterIsBetter = greaterIsBetter
+    this.#probeFraction = probeFraction
+    const rng = makeLCG(seed)
+    for (const model of models) {
+      const space = model.searchSpace || model.cls.defaultSearchSpace?.() || {}
+      const effectiveSpace = { ...space }
+      if (model.params) {
+        for (const key of Object.keys(model.params)) {
+          delete effectiveSpace[key]
+        }
+      }
+      const configRng = makeLCG((rng() * 0x7fffffff) | 0)
+      for (let i = 0; i < nIter; i++) {
+        const config = sampleConfig(effectiveSpace, configRng)
+        const params = { ...config, ...(model.params || {}) }
+        const candidateId = makeCandidateId(model.name, params)
+        this.#allCandidates.push({ candidateId, cls: model.cls, params })
+      }
+    }
+  }
+  get phase() { return this.#phase }
+  next() {
+    if (this.#done) return null
+    if (this.#phase === 'probe') {
+      if (this.#probeIndex >= this.#allCandidates.length) return null
+      const cand = this.#allCandidates[this.#probeIndex++]
+      // Attach subsample budget for cheaper probe
+      if (this.#probeFraction < 1) {
+        return { ...cand, budget: { type: 'subsample', value: this.#probeFraction } }
+      }
+      return cand
+    }
+    // Promote phase
+    if (this.#promoteIndex >= this.#promotedCandidates.length) return null
+    return this.#promotedCandidates[this.#promoteIndex++]
+  }
+  report(result) {
+    if (this.#phase === 'probe') {
+      this.#probeResults.push(result)
+      if (this.#probeResults.length >= this.#allCandidates.length) {
+        this.#transitionToPromote()
+      }
+      return
+    }
+    // Promote phase: nothing to do per-result
+  }
+  #transitionToPromote() {
+    // Sort probe results
+    const sorted = [...this.#probeResults]
+    if (this.#greaterIsBetter) {
+      sorted.sort((a, b) => b.meanScore - a.meanScore)
+    } else {
+      sorted.sort((a, b) => a.meanScore - b.meanScore)
+    }
+    // Select top N
+    const topN = sorted.slice(0, max(1, this.#promoteCount))
+    const topIds = new Set(topN.map(r => r.candidateId))
+    // Build promoted list from original candidates (to preserve cls reference)
+    this.#promotedCandidates = this.#allCandidates.filter(
+      c => topIds.has(c.candidateId)
+    )
+    this.#phase = 'promote'
+    this.#promoteIndex = 0
+  }
+  isDone() {
+    if (this.#done) return true
+    if (this.#phase === 'promote' &&
+        this.#promoteIndex >= this.#promotedCandidates.length &&
+        this.#promotedCandidates.length > 0) {
+      this.#done = true
+      return true
+    }
+    return false
+  }
+}

package/src/strategy-random.js ADDED Viewed

@@ -0,0 +1,67 @@
+import { makeLCG } from '@wlearn/core'
+import { sampleConfig } from './sampler.js'
+import { makeCandidateId } from './common.js'
+/**
+ * Random search strategy: generates nIter random configs per model,
+ * yields them one at a time. No adaptive behavior.
+ */
+export class RandomStrategy {
+  #queue = []
+  #index = 0
+  #total = 0
+  /**
+   * @param {Array<{ name: string, cls: object, searchSpace?: object, params?: object }>} models
+   * @param {object} opts
+   * @param {number} opts.nIter - candidates per model
+   * @param {number} opts.seed
+   */
+  constructor(models, { nIter = 20, seed = 42 } = {}) {
+    const rng = makeLCG(seed)
+    for (const model of models) {
+      const space = model.searchSpace || model.cls.defaultSearchSpace?.() || {}
+      // Remove fixed params from search space
+      const effectiveSpace = { ...space }
+      if (model.params) {
+        for (const key of Object.keys(model.params)) {
+          delete effectiveSpace[key]
+        }
+      }
+      const configRng = makeLCG((rng() * 0x7fffffff) | 0)
+      for (let i = 0; i < nIter; i++) {
+        const config = sampleConfig(effectiveSpace, configRng)
+        const params = { ...config, ...(model.params || {}) }
+        const candidateId = makeCandidateId(model.name, params)
+        this.#queue.push({
+          candidateId,
+          cls: model.cls,
+          params,
+        })
+      }
+    }
+    this.#total = this.#queue.length
+  }
+  /**
+   * Return next candidate to evaluate, or null when exhausted.
+   */
+  next() {
+    if (this.#index >= this.#total) return null
+    return this.#queue[this.#index++]
+  }
+  /**
+   * Report result. No-op for random search.
+   */
+  report(_result) {}
+  /**
+   * True when all candidates have been yielded.
+   */
+  isDone() {
+    return this.#index >= this.#total
+  }
+}