@wlearn/automl 0.1.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/src/sampler.js ADDED
@@ -0,0 +1,162 @@
1
+ import { makeLCG } from '@wlearn/core'
2
+
3
+ const { floor, round, log, exp, min, max } = Math
4
+
5
+ /**
6
+ * Sample a single value from a SearchParam definition.
7
+ */
8
+ export function sampleParam(param, rng) {
9
+ const { type } = param
10
+ switch (type) {
11
+ case 'categorical':
12
+ return param.values[floor(rng() * param.values.length)]
13
+ case 'uniform':
14
+ return param.low + rng() * (param.high - param.low)
15
+ case 'log_uniform':
16
+ return exp(log(param.low) + rng() * (log(param.high) - log(param.low)))
17
+ case 'int_uniform':
18
+ return param.low + floor(rng() * (param.high - param.low + 1))
19
+ case 'int_log_uniform':
20
+ return round(exp(log(param.low) + rng() * (log(param.high) - log(param.low))))
21
+ default:
22
+ throw new Error(`Unknown SearchParam type: "${type}"`)
23
+ }
24
+ }
25
+
26
+ /**
27
+ * Sample a complete config from a SearchSpace, respecting conditions.
28
+ */
29
+ export function sampleConfig(space, rng) {
30
+ const config = {}
31
+ const keys = Object.keys(space)
32
+
33
+ // First pass: non-conditional params
34
+ for (const key of keys) {
35
+ if (!space[key].condition) {
36
+ config[key] = sampleParam(space[key], rng)
37
+ }
38
+ }
39
+
40
+ // Second pass: conditional params
41
+ for (const key of keys) {
42
+ const { condition } = space[key]
43
+ if (!condition) continue
44
+ let satisfied = true
45
+ for (const [ck, cv] of Object.entries(condition)) {
46
+ if (config[ck] !== cv) { satisfied = false; break }
47
+ }
48
+ if (satisfied) {
49
+ config[key] = sampleParam(space[key], rng)
50
+ }
51
+ }
52
+
53
+ return config
54
+ }
55
+
56
+ /**
57
+ * Generate n random configs from a SearchSpace.
58
+ */
59
+ export function randomConfigs(space, n, { seed = 42 } = {}) {
60
+ const rng = makeLCG(seed)
61
+ const configs = []
62
+ for (let i = 0; i < n; i++) {
63
+ configs.push(sampleConfig(space, rng))
64
+ }
65
+ return configs
66
+ }
67
+
68
+ /**
69
+ * Enumerate grid points from a SearchSpace.
70
+ * Continuous params discretized to `steps` values.
71
+ */
72
+ export function gridConfigs(space, { steps = 5 } = {}) {
73
+ const keys = Object.keys(space)
74
+ if (keys.length === 0) return [{}]
75
+
76
+ // Build value arrays for non-conditional params
77
+ const nonCond = keys.filter(k => !space[k].condition)
78
+ const condKeys = keys.filter(k => space[k].condition)
79
+
80
+ const valueArrays = nonCond.map(k => _discretize(space[k], steps))
81
+
82
+ // Cartesian product of non-conditional params
83
+ let combos = [{}]
84
+ for (let i = 0; i < nonCond.length; i++) {
85
+ const key = nonCond[i]
86
+ const vals = valueArrays[i]
87
+ const next = []
88
+ for (const combo of combos) {
89
+ for (const v of vals) {
90
+ next.push({ ...combo, [key]: v })
91
+ }
92
+ }
93
+ combos = next
94
+ }
95
+
96
+ // Add conditional params where conditions are met
97
+ for (const combo of combos) {
98
+ for (const key of condKeys) {
99
+ const { condition } = space[key]
100
+ let satisfied = true
101
+ for (const [ck, cv] of Object.entries(condition)) {
102
+ if (combo[ck] !== cv) { satisfied = false; break }
103
+ }
104
+ if (satisfied) {
105
+ // For grid, take all discrete values and expand
106
+ // But that would multiply combos -- for simplicity, take midpoint
107
+ const vals = _discretize(space[key], steps)
108
+ combo[key] = vals[floor(vals.length / 2)]
109
+ }
110
+ }
111
+ }
112
+
113
+ return combos
114
+ }
115
+
116
+ function _discretize(param, steps) {
117
+ const { type } = param
118
+ switch (type) {
119
+ case 'categorical':
120
+ return [...param.values]
121
+ case 'uniform': {
122
+ const arr = []
123
+ for (let i = 0; i < steps; i++) {
124
+ arr.push(param.low + (param.high - param.low) * i / max(1, steps - 1))
125
+ }
126
+ return arr
127
+ }
128
+ case 'log_uniform': {
129
+ const logLow = log(param.low)
130
+ const logHigh = log(param.high)
131
+ const arr = []
132
+ for (let i = 0; i < steps; i++) {
133
+ arr.push(exp(logLow + (logHigh - logLow) * i / max(1, steps - 1)))
134
+ }
135
+ return arr
136
+ }
137
+ case 'int_uniform': {
138
+ const range = param.high - param.low + 1
139
+ if (range <= steps) {
140
+ const arr = []
141
+ for (let v = param.low; v <= param.high; v++) arr.push(v)
142
+ return arr
143
+ }
144
+ const arr = []
145
+ for (let i = 0; i < steps; i++) {
146
+ arr.push(param.low + round((param.high - param.low) * i / max(1, steps - 1)))
147
+ }
148
+ return [...new Set(arr)].sort((a, b) => a - b)
149
+ }
150
+ case 'int_log_uniform': {
151
+ const logLow = log(param.low)
152
+ const logHigh = log(param.high)
153
+ const arr = []
154
+ for (let i = 0; i < steps; i++) {
155
+ arr.push(round(exp(logLow + (logHigh - logLow) * i / max(1, steps - 1))))
156
+ }
157
+ return [...new Set(arr)].sort((a, b) => a - b)
158
+ }
159
+ default:
160
+ throw new Error(`Unknown SearchParam type: "${type}"`)
161
+ }
162
+ }
package/src/search.js ADDED
@@ -0,0 +1,93 @@
1
+ import { stratifiedKFold, kFold, normalizeX, normalizeY,
2
+ ValidationError } from '@wlearn/core'
3
+ import { Executor } from './executor.js'
4
+ import { RandomStrategy } from './strategy-random.js'
5
+ import { detectTask, scorerGreaterIsBetter } from './common.js'
6
+
7
+ /**
8
+ * Random hyperparameter search with cross-validation.
9
+ */
10
+ export class RandomSearch {
11
+ #models
12
+ #opts
13
+ #leaderboard = null
14
+ #bestResult = null
15
+
16
+ /**
17
+ * @param {Array<{ name: string, cls: object, searchSpace?: object, params?: object }>} models
18
+ * @param {object} opts
19
+ */
20
+ constructor(models, opts = {}) {
21
+ if (!models || models.length === 0) {
22
+ throw new ValidationError('RandomSearch: at least one model is required')
23
+ }
24
+ this.#models = models
25
+ this.#opts = {
26
+ scoring: null, // auto-detect
27
+ cv: 5,
28
+ seed: 42,
29
+ task: null, // auto-detect
30
+ nIter: 20,
31
+ maxTimeMs: 0,
32
+ onProgress: null,
33
+ ...opts,
34
+ }
35
+ }
36
+
37
+ /**
38
+ * Run the search.
39
+ */
40
+ async fit(X, y) {
41
+ const Xn = normalizeX(X)
42
+ const yn = normalizeY(y)
43
+ const task = this.#opts.task || detectTask(yn)
44
+ const scoring = this.#opts.scoring || (task === 'classification' ? 'accuracy' : 'r2')
45
+ const { cv, seed, nIter, maxTimeMs, onProgress } = this.#opts
46
+
47
+ // Generate folds once, shared across all candidates
48
+ const folds = task === 'classification'
49
+ ? stratifiedKFold(yn, cv, { shuffle: true, seed })
50
+ : kFold(yn.length, cv, { shuffle: true, seed })
51
+
52
+ const executor = new Executor({
53
+ folds,
54
+ scoring,
55
+ X: Xn,
56
+ y: yn,
57
+ timeLimitMs: maxTimeMs,
58
+ seed,
59
+ onProgress,
60
+ })
61
+
62
+ const strategy = new RandomStrategy(this.#models, { nIter, seed })
63
+
64
+ const { leaderboard } = await executor.runStrategy(strategy)
65
+
66
+ if (leaderboard.length === 0) {
67
+ throw new ValidationError('RandomSearch: no candidates were evaluated')
68
+ }
69
+
70
+ this.#leaderboard = leaderboard
71
+ this.#bestResult = leaderboard.best()
72
+ return { leaderboard, bestResult: this.#bestResult }
73
+ }
74
+
75
+ /**
76
+ * Refit the best candidate on full data.
77
+ */
78
+ async refitBest(X, y) {
79
+ if (!this.#bestResult) {
80
+ throw new ValidationError('RandomSearch: must call fit() first')
81
+ }
82
+ const best = this.#bestResult
83
+ const model = this.#models.find(m => m.name === best.modelName)
84
+ const instance = await model.cls.create(best.params)
85
+ const Xn = normalizeX(X)
86
+ const yn = normalizeY(y)
87
+ instance.fit(Xn, yn)
88
+ return instance
89
+ }
90
+
91
+ get leaderboard() { return this.#leaderboard }
92
+ get bestResult() { return this.#bestResult }
93
+ }
@@ -0,0 +1,157 @@
1
+ import { makeLCG } from '@wlearn/core'
2
+ import { sampleConfig } from './sampler.js'
3
+ import { makeCandidateId } from './common.js'
4
+
5
+ const { ceil, log, max, min, floor } = Math
6
+
7
+ /**
8
+ * Successive halving strategy: multi-round elimination tournament.
9
+ *
10
+ * Evaluates many candidates on small subsamples, progressively
11
+ * eliminates the worst and increases resource allocation.
12
+ *
13
+ * Round transitions happen inside report() when all candidates
14
+ * in the current round have been evaluated. next() returns null
15
+ * only when fully done.
16
+ */
17
+ export class HalvingStrategy {
18
+ #candidates // all candidates for current round
19
+ #roundIndex = 0 // index within current round's candidates
20
+ #round = 0 // current round number
21
+ #nRounds
22
+ #factor
23
+ #nSamples // total sample count (for computing fractions)
24
+ #minResources
25
+ #greaterIsBetter
26
+ #roundResults = [] // results collected for current round
27
+ #rounds = [] // completed round stats
28
+ #done = false
29
+ #finalRound = false
30
+
31
+ /**
32
+ * @param {Array<{ name: string, cls: object, searchSpace?: object, params?: object }>} models
33
+ * @param {object} opts
34
+ * @param {number} opts.nIter - candidates per model
35
+ * @param {number} opts.seed
36
+ * @param {number} opts.factor - elimination factor (keep top 1/factor)
37
+ * @param {number} opts.nSamples - total sample count for fraction computation
38
+ * @param {boolean} opts.greaterIsBetter - sort direction for elimination
39
+ * @param {number} opts.cv - fold count (for minResources default)
40
+ */
41
+ constructor(models, { nIter = 20, seed = 42, factor = 3, nSamples, greaterIsBetter = true, cv = 5 } = {}) {
42
+ this.#factor = factor
43
+ this.#nSamples = nSamples || 0
44
+ this.#greaterIsBetter = greaterIsBetter
45
+
46
+ // Generate all candidate configs
47
+ const rng = makeLCG(seed)
48
+ const allCandidates = []
49
+ for (const model of models) {
50
+ const space = model.searchSpace || model.cls.defaultSearchSpace?.() || {}
51
+ const effectiveSpace = { ...space }
52
+ if (model.params) {
53
+ for (const key of Object.keys(model.params)) {
54
+ delete effectiveSpace[key]
55
+ }
56
+ }
57
+ const configRng = makeLCG((rng() * 0x7fffffff) | 0)
58
+ for (let i = 0; i < nIter; i++) {
59
+ const config = sampleConfig(effectiveSpace, configRng)
60
+ const params = { ...config, ...(model.params || {}) }
61
+ const candidateId = makeCandidateId(model.name, params)
62
+ allCandidates.push({
63
+ candidateId,
64
+ cls: model.cls,
65
+ params,
66
+ })
67
+ }
68
+ }
69
+
70
+ this.#candidates = allCandidates
71
+ this.#nRounds = max(1, ceil(log(allCandidates.length) / log(factor)))
72
+ this.#minResources = max(cv * 2, floor(this.#nSamples / (factor ** this.#nRounds)))
73
+ }
74
+
75
+ /**
76
+ * Return next candidate for current round (with subsample budget),
77
+ * or null when fully done.
78
+ */
79
+ next() {
80
+ if (this.#done) return null
81
+ if (this.#roundIndex >= this.#candidates.length) return null
82
+
83
+ const cand = this.#candidates[this.#roundIndex++]
84
+
85
+ // For non-final rounds, attach subsample budget
86
+ if (!this.#finalRound) {
87
+ const nResources = min(this.#nSamples, floor(this.#minResources * (this.#factor ** this.#round)))
88
+ const fraction = min(1, nResources / this.#nSamples)
89
+ if (fraction < 1) {
90
+ return { ...cand, budget: { type: 'subsample', value: fraction } }
91
+ }
92
+ }
93
+
94
+ return cand
95
+ }
96
+
97
+ /**
98
+ * Report a candidate result.
99
+ * When all candidates in the current round have reported,
100
+ * performs elimination and advances to the next round.
101
+ */
102
+ report(result) {
103
+ this.#roundResults.push(result)
104
+
105
+ // Check if all candidates in current round have been evaluated
106
+ if (this.#roundResults.length < this.#candidates.length) return
107
+
108
+ // Round complete: sort and eliminate
109
+ if (this.#finalRound) {
110
+ // Final round done
111
+ this.#done = true
112
+ return
113
+ }
114
+
115
+ const nResources = min(this.#nSamples, floor(this.#minResources * (this.#factor ** this.#round)))
116
+ const fraction = min(1, nResources / this.#nSamples)
117
+
118
+ // Sort results
119
+ const sorted = [...this.#roundResults]
120
+ if (this.#greaterIsBetter) {
121
+ sorted.sort((a, b) => b.meanScore - a.meanScore)
122
+ } else {
123
+ sorted.sort((a, b) => a.meanScore - b.meanScore)
124
+ }
125
+
126
+ const nSurvivors = max(1, ceil(sorted.length / this.#factor))
127
+
128
+ this.#rounds.push({
129
+ round: this.#round,
130
+ nResources,
131
+ fraction,
132
+ nCandidates: this.#roundResults.length,
133
+ nSurvivors,
134
+ })
135
+
136
+ // Build survivor candidate list
137
+ const survivorIds = new Set(sorted.slice(0, nSurvivors).map(r => r.candidateId))
138
+ this.#candidates = this.#candidates.filter(c => survivorIds.has(c.candidateId))
139
+
140
+ this.#round++
141
+ this.#roundIndex = 0
142
+ this.#roundResults = []
143
+
144
+ // Check if we should enter final round (1 or fewer survivors, or max rounds)
145
+ if (this.#candidates.length <= 1 || this.#round >= this.#nRounds) {
146
+ this.#finalRound = true
147
+ }
148
+ }
149
+
150
+ isDone() {
151
+ return this.#done
152
+ }
153
+
154
+ get rounds() {
155
+ return this.#rounds
156
+ }
157
+ }
@@ -0,0 +1,126 @@
1
+ import { makeLCG } from '@wlearn/core'
2
+ import { sampleConfig } from './sampler.js'
3
+ import { makeCandidateId } from './common.js'
4
+
5
+ const { max, ceil } = Math
6
+
7
+ /**
8
+ * Progressive evaluation strategy: probe all candidates cheaply (1 fold),
9
+ * then promote top N to full evaluation.
10
+ *
11
+ * Phase 1 (probe): yield all candidates for cheap 1-fold evaluation
12
+ * Phase 2 (promote): yield top N candidates for full K-fold evaluation
13
+ *
14
+ * This pairs with ProgressiveSearch which manages two Executors
15
+ * (probe executor with 1 fold, full executor with K folds).
16
+ */
17
+ export class ProgressiveStrategy {
18
+ #allCandidates = []
19
+ #promotedCandidates = []
20
+ #phase = 'probe'
21
+ #probeIndex = 0
22
+ #promoteIndex = 0
23
+ #probeResults = []
24
+ #promoteCount
25
+ #greaterIsBetter
26
+ #done = false
27
+ #probeFraction
28
+
29
+ /**
30
+ * @param {Array<{ name: string, cls: object, searchSpace?: object, params?: object }>} models
31
+ * @param {object} opts
32
+ * @param {number} opts.nIter - candidates per model
33
+ * @param {number} opts.seed
34
+ * @param {number} opts.promoteCount - how many to promote from probe to full eval
35
+ * @param {boolean} opts.greaterIsBetter - sort direction
36
+ * @param {number} opts.probeFraction - subsample fraction for probe phase (0-1)
37
+ */
38
+ constructor(models, { nIter = 20, seed = 42, promoteCount = 10,
39
+ greaterIsBetter = true, probeFraction = 0.5 } = {}) {
40
+ this.#promoteCount = promoteCount
41
+ this.#greaterIsBetter = greaterIsBetter
42
+ this.#probeFraction = probeFraction
43
+
44
+ const rng = makeLCG(seed)
45
+ for (const model of models) {
46
+ const space = model.searchSpace || model.cls.defaultSearchSpace?.() || {}
47
+ const effectiveSpace = { ...space }
48
+ if (model.params) {
49
+ for (const key of Object.keys(model.params)) {
50
+ delete effectiveSpace[key]
51
+ }
52
+ }
53
+ const configRng = makeLCG((rng() * 0x7fffffff) | 0)
54
+ for (let i = 0; i < nIter; i++) {
55
+ const config = sampleConfig(effectiveSpace, configRng)
56
+ const params = { ...config, ...(model.params || {}) }
57
+ const candidateId = makeCandidateId(model.name, params)
58
+ this.#allCandidates.push({ candidateId, cls: model.cls, params })
59
+ }
60
+ }
61
+ }
62
+
63
+ get phase() { return this.#phase }
64
+
65
+ next() {
66
+ if (this.#done) return null
67
+
68
+ if (this.#phase === 'probe') {
69
+ if (this.#probeIndex >= this.#allCandidates.length) return null
70
+ const cand = this.#allCandidates[this.#probeIndex++]
71
+ // Attach subsample budget for cheaper probe
72
+ if (this.#probeFraction < 1) {
73
+ return { ...cand, budget: { type: 'subsample', value: this.#probeFraction } }
74
+ }
75
+ return cand
76
+ }
77
+
78
+ // Promote phase
79
+ if (this.#promoteIndex >= this.#promotedCandidates.length) return null
80
+ return this.#promotedCandidates[this.#promoteIndex++]
81
+ }
82
+
83
+ report(result) {
84
+ if (this.#phase === 'probe') {
85
+ this.#probeResults.push(result)
86
+ if (this.#probeResults.length >= this.#allCandidates.length) {
87
+ this.#transitionToPromote()
88
+ }
89
+ return
90
+ }
91
+ // Promote phase: nothing to do per-result
92
+ }
93
+
94
+ #transitionToPromote() {
95
+ // Sort probe results
96
+ const sorted = [...this.#probeResults]
97
+ if (this.#greaterIsBetter) {
98
+ sorted.sort((a, b) => b.meanScore - a.meanScore)
99
+ } else {
100
+ sorted.sort((a, b) => a.meanScore - b.meanScore)
101
+ }
102
+
103
+ // Select top N
104
+ const topN = sorted.slice(0, max(1, this.#promoteCount))
105
+ const topIds = new Set(topN.map(r => r.candidateId))
106
+
107
+ // Build promoted list from original candidates (to preserve cls reference)
108
+ this.#promotedCandidates = this.#allCandidates.filter(
109
+ c => topIds.has(c.candidateId)
110
+ )
111
+
112
+ this.#phase = 'promote'
113
+ this.#promoteIndex = 0
114
+ }
115
+
116
+ isDone() {
117
+ if (this.#done) return true
118
+ if (this.#phase === 'promote' &&
119
+ this.#promoteIndex >= this.#promotedCandidates.length &&
120
+ this.#promotedCandidates.length > 0) {
121
+ this.#done = true
122
+ return true
123
+ }
124
+ return false
125
+ }
126
+ }
@@ -0,0 +1,67 @@
1
+ import { makeLCG } from '@wlearn/core'
2
+ import { sampleConfig } from './sampler.js'
3
+ import { makeCandidateId } from './common.js'
4
+
5
+ /**
6
+ * Random search strategy: generates nIter random configs per model,
7
+ * yields them one at a time. No adaptive behavior.
8
+ */
9
+ export class RandomStrategy {
10
+ #queue = []
11
+ #index = 0
12
+ #total = 0
13
+
14
+ /**
15
+ * @param {Array<{ name: string, cls: object, searchSpace?: object, params?: object }>} models
16
+ * @param {object} opts
17
+ * @param {number} opts.nIter - candidates per model
18
+ * @param {number} opts.seed
19
+ */
20
+ constructor(models, { nIter = 20, seed = 42 } = {}) {
21
+ const rng = makeLCG(seed)
22
+
23
+ for (const model of models) {
24
+ const space = model.searchSpace || model.cls.defaultSearchSpace?.() || {}
25
+ // Remove fixed params from search space
26
+ const effectiveSpace = { ...space }
27
+ if (model.params) {
28
+ for (const key of Object.keys(model.params)) {
29
+ delete effectiveSpace[key]
30
+ }
31
+ }
32
+
33
+ const configRng = makeLCG((rng() * 0x7fffffff) | 0)
34
+ for (let i = 0; i < nIter; i++) {
35
+ const config = sampleConfig(effectiveSpace, configRng)
36
+ const params = { ...config, ...(model.params || {}) }
37
+ const candidateId = makeCandidateId(model.name, params)
38
+ this.#queue.push({
39
+ candidateId,
40
+ cls: model.cls,
41
+ params,
42
+ })
43
+ }
44
+ }
45
+ this.#total = this.#queue.length
46
+ }
47
+
48
+ /**
49
+ * Return next candidate to evaluate, or null when exhausted.
50
+ */
51
+ next() {
52
+ if (this.#index >= this.#total) return null
53
+ return this.#queue[this.#index++]
54
+ }
55
+
56
+ /**
57
+ * Report result. No-op for random search.
58
+ */
59
+ report(_result) {}
60
+
61
+ /**
62
+ * True when all candidates have been yielded.
63
+ */
64
+ isDone() {
65
+ return this.#index >= this.#total
66
+ }
67
+ }