@wlearn/automl 0.1.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/package.json +27 -0
- package/src/auto-fit.js +261 -0
- package/src/common.js +108 -0
- package/src/executor.js +209 -0
- package/src/halving.js +95 -0
- package/src/index.js +12 -0
- package/src/leaderboard.js +106 -0
- package/src/portfolio.js +332 -0
- package/src/progressive.js +156 -0
- package/src/sampler.js +162 -0
- package/src/search.js +93 -0
- package/src/strategy-halving.js +157 -0
- package/src/strategy-progressive.js +126 -0
- package/src/strategy-random.js +67 -0
package/src/index.js
ADDED
|
@@ -0,0 +1,12 @@
|
|
|
1
|
+
export { sampleParam, sampleConfig, randomConfigs, gridConfigs } from './sampler.js'
|
|
2
|
+
export { RandomSearch } from './search.js'
|
|
3
|
+
export { SuccessiveHalvingSearch } from './halving.js'
|
|
4
|
+
export { PortfolioSearch, PortfolioStrategy, getPortfolio, PORTFOLIO } from './portfolio.js'
|
|
5
|
+
export { Leaderboard } from './leaderboard.js'
|
|
6
|
+
export { autoFit } from './auto-fit.js'
|
|
7
|
+
export { Executor } from './executor.js'
|
|
8
|
+
export { RandomStrategy } from './strategy-random.js'
|
|
9
|
+
export { HalvingStrategy } from './strategy-halving.js'
|
|
10
|
+
export { ProgressiveStrategy } from './strategy-progressive.js'
|
|
11
|
+
export { ProgressiveSearch } from './progressive.js'
|
|
12
|
+
export { detectTask, makeCandidateId, seedFor, partialShuffle, scorerGreaterIsBetter } from './common.js'
|
|
@@ -0,0 +1,106 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* Tracks and ranks candidate evaluation results.
|
|
3
|
+
*/
|
|
4
|
+
export class Leaderboard {
|
|
5
|
+
#entries = []
|
|
6
|
+
#nextId = 0
|
|
7
|
+
#dirty = true
|
|
8
|
+
|
|
9
|
+
/**
|
|
10
|
+
* Add a candidate result.
|
|
11
|
+
* @param {{ modelName: string, params: Record<string, unknown>, scores: Float64Array, fitTimeMs: number }} entry
|
|
12
|
+
* @returns {object} the entry with id assigned
|
|
13
|
+
*/
|
|
14
|
+
add({ modelName, params, scores, fitTimeMs }) {
|
|
15
|
+
let sum = 0
|
|
16
|
+
for (let i = 0; i < scores.length; i++) sum += scores[i]
|
|
17
|
+
const meanScore = sum / scores.length
|
|
18
|
+
|
|
19
|
+
let sumSq = 0
|
|
20
|
+
for (let i = 0; i < scores.length; i++) {
|
|
21
|
+
const d = scores[i] - meanScore
|
|
22
|
+
sumSq += d * d
|
|
23
|
+
}
|
|
24
|
+
const stdScore = Math.sqrt(sumSq / scores.length)
|
|
25
|
+
|
|
26
|
+
const entry = {
|
|
27
|
+
id: this.#nextId++,
|
|
28
|
+
modelName,
|
|
29
|
+
params,
|
|
30
|
+
scores,
|
|
31
|
+
meanScore,
|
|
32
|
+
stdScore,
|
|
33
|
+
fitTimeMs,
|
|
34
|
+
rank: 0,
|
|
35
|
+
}
|
|
36
|
+
this.#entries.push(entry)
|
|
37
|
+
this.#dirty = true
|
|
38
|
+
return entry
|
|
39
|
+
}
|
|
40
|
+
|
|
41
|
+
/**
|
|
42
|
+
* Return all entries sorted by meanScore descending with ranks assigned.
|
|
43
|
+
*/
|
|
44
|
+
ranked() {
|
|
45
|
+
if (this.#dirty) {
|
|
46
|
+
this.#entries.sort((a, b) => b.meanScore - a.meanScore)
|
|
47
|
+
for (let i = 0; i < this.#entries.length; i++) {
|
|
48
|
+
this.#entries[i].rank = i + 1
|
|
49
|
+
}
|
|
50
|
+
this.#dirty = false
|
|
51
|
+
}
|
|
52
|
+
return this.#entries.slice()
|
|
53
|
+
}
|
|
54
|
+
|
|
55
|
+
/**
|
|
56
|
+
* Return the best entry (highest meanScore) or null.
|
|
57
|
+
*/
|
|
58
|
+
best() {
|
|
59
|
+
if (this.#entries.length === 0) return null
|
|
60
|
+
this.ranked() // ensure sorted
|
|
61
|
+
return this.#entries[0]
|
|
62
|
+
}
|
|
63
|
+
|
|
64
|
+
/**
|
|
65
|
+
* Return top k entries.
|
|
66
|
+
*/
|
|
67
|
+
top(k) {
|
|
68
|
+
return this.ranked().slice(0, k)
|
|
69
|
+
}
|
|
70
|
+
|
|
71
|
+
/**
|
|
72
|
+
* Serialize to JSON-friendly array.
|
|
73
|
+
*/
|
|
74
|
+
toJSON() {
|
|
75
|
+
return this.ranked().map(e => ({
|
|
76
|
+
id: e.id,
|
|
77
|
+
modelName: e.modelName,
|
|
78
|
+
params: e.params,
|
|
79
|
+
scores: [...e.scores],
|
|
80
|
+
meanScore: e.meanScore,
|
|
81
|
+
stdScore: e.stdScore,
|
|
82
|
+
fitTimeMs: e.fitTimeMs,
|
|
83
|
+
rank: e.rank,
|
|
84
|
+
}))
|
|
85
|
+
}
|
|
86
|
+
|
|
87
|
+
/**
|
|
88
|
+
* Deserialize from JSON array.
|
|
89
|
+
*/
|
|
90
|
+
static fromJSON(arr) {
|
|
91
|
+
const lb = new Leaderboard()
|
|
92
|
+
for (const e of arr) {
|
|
93
|
+
lb.#entries.push({
|
|
94
|
+
...e,
|
|
95
|
+
scores: new Float64Array(e.scores),
|
|
96
|
+
})
|
|
97
|
+
if (e.id >= lb.#nextId) lb.#nextId = e.id + 1
|
|
98
|
+
}
|
|
99
|
+
lb.#dirty = true
|
|
100
|
+
return lb
|
|
101
|
+
}
|
|
102
|
+
|
|
103
|
+
get length() {
|
|
104
|
+
return this.#entries.length
|
|
105
|
+
}
|
|
106
|
+
}
|
package/src/portfolio.js
ADDED
|
@@ -0,0 +1,332 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* Zeroshot portfolio: pre-tuned hyperparameter configs per model family.
|
|
3
|
+
*
|
|
4
|
+
* Instead of random search, the portfolio provides a curated set of configs
|
|
5
|
+
* known to work well across diverse datasets. Inspired by AutoGluon's
|
|
6
|
+
* zeroshot portfolio approach (TabRepo).
|
|
7
|
+
*/
|
|
8
|
+
|
|
9
|
+
import { stratifiedKFold, kFold, normalizeX, normalizeY,
|
|
10
|
+
ValidationError } from '@wlearn/core'
|
|
11
|
+
import { Executor } from './executor.js'
|
|
12
|
+
import { detectTask } from './common.js'
|
|
13
|
+
import { makeCandidateId } from './common.js'
|
|
14
|
+
|
|
15
|
+
// ---------------------------------------------------------------------------
|
|
16
|
+
// Portfolio configs: task -> model_name -> list of param dicts
|
|
17
|
+
// ---------------------------------------------------------------------------
|
|
18
|
+
|
|
19
|
+
export const PORTFOLIO = {
|
|
20
|
+
classification: {
|
|
21
|
+
xgb: [
|
|
22
|
+
{ objective: 'multi:softprob', eta: 0.05, max_depth: 6, numRound: 200,
|
|
23
|
+
subsample: 0.8, colsample_bytree: 0.8, min_child_weight: 1.0,
|
|
24
|
+
lambda: 1.0, alpha: 0.0 },
|
|
25
|
+
{ objective: 'multi:softprob', eta: 0.01, max_depth: 10, numRound: 500,
|
|
26
|
+
subsample: 0.7, colsample_bytree: 0.65, min_child_weight: 0.6,
|
|
27
|
+
lambda: 0.1, alpha: 0.0 },
|
|
28
|
+
{ objective: 'multi:softprob', eta: 0.1, max_depth: 3, numRound: 100,
|
|
29
|
+
subsample: 0.9, colsample_bytree: 0.9, min_child_weight: 1.0,
|
|
30
|
+
lambda: 1.0, alpha: 0.0 },
|
|
31
|
+
{ objective: 'multi:softprob', eta: 0.03, max_depth: 7, numRound: 300,
|
|
32
|
+
subsample: 0.8, colsample_bytree: 0.7, min_child_weight: 1.0,
|
|
33
|
+
lambda: 5.0, alpha: 1.0 },
|
|
34
|
+
{ objective: 'multi:softprob', eta: 0.02, max_depth: 8, numRound: 400,
|
|
35
|
+
subsample: 0.8, colsample_bytree: 0.8, min_child_weight: 0.8,
|
|
36
|
+
lambda: 0.1, alpha: 0.0 },
|
|
37
|
+
{ objective: 'multi:softprob', eta: 0.08, max_depth: 4, numRound: 150,
|
|
38
|
+
subsample: 0.85, colsample_bytree: 0.55, min_child_weight: 1.0,
|
|
39
|
+
lambda: 1.0, alpha: 0.1 },
|
|
40
|
+
{ objective: 'multi:softprob', eta: 0.015, max_depth: 9, numRound: 350,
|
|
41
|
+
subsample: 0.75, colsample_bytree: 0.55, min_child_weight: 0.9,
|
|
42
|
+
lambda: 0.5, alpha: 0.0 },
|
|
43
|
+
{ objective: 'multi:softprob', eta: 0.3, max_depth: 3, numRound: 50,
|
|
44
|
+
subsample: 0.9, colsample_bytree: 0.9, min_child_weight: 1.0,
|
|
45
|
+
lambda: 1.0, alpha: 0.0 },
|
|
46
|
+
// RF-mode configs (low correlation with boosting for ensemble diversity)
|
|
47
|
+
{ objective: 'multi:softprob', num_parallel_tree: 100, numRound: 1,
|
|
48
|
+
subsample: 0.8, colsample_bynode: 0.8, learning_rate: 1.0 },
|
|
49
|
+
{ objective: 'multi:softprob', num_parallel_tree: 200, numRound: 1,
|
|
50
|
+
subsample: 0.7, colsample_bynode: 0.6, learning_rate: 1.0 },
|
|
51
|
+
],
|
|
52
|
+
lgb: [
|
|
53
|
+
{ objective: 'multiclass', learning_rate: 0.05, max_depth: 6,
|
|
54
|
+
numRound: 200, num_leaves: 63, subsample: 0.8,
|
|
55
|
+
colsample_bytree: 0.8, min_child_weight: 1.0,
|
|
56
|
+
reg_lambda: 1.0, reg_alpha: 0.0, verbosity: -1 },
|
|
57
|
+
{ objective: 'multiclass', learning_rate: 0.01, max_depth: -1,
|
|
58
|
+
numRound: 500, num_leaves: 127, subsample: 0.7,
|
|
59
|
+
colsample_bytree: 0.65, reg_lambda: 0.1, reg_alpha: 0.0, verbosity: -1 },
|
|
60
|
+
{ objective: 'multiclass', learning_rate: 0.1, max_depth: 4,
|
|
61
|
+
numRound: 100, num_leaves: 15, subsample: 0.9,
|
|
62
|
+
colsample_bytree: 0.9, reg_lambda: 1.0, reg_alpha: 0.0, verbosity: -1 },
|
|
63
|
+
{ objective: 'multiclass', learning_rate: 0.05,
|
|
64
|
+
numRound: 200, num_leaves: 63, subsample: 0.8,
|
|
65
|
+
colsample_bytree: 0.8, extra_trees: true,
|
|
66
|
+
reg_lambda: 1.0, reg_alpha: 0.0, verbosity: -1 },
|
|
67
|
+
{ objective: 'multiclass', learning_rate: 0.03, max_depth: 7,
|
|
68
|
+
numRound: 300, num_leaves: 63, subsample: 0.8,
|
|
69
|
+
colsample_bytree: 0.7, reg_lambda: 5.0, reg_alpha: 1.0, verbosity: -1 },
|
|
70
|
+
{ objective: 'multiclass', learning_rate: 0.01, max_depth: 8,
|
|
71
|
+
numRound: 500, num_leaves: 95, subsample: 0.75,
|
|
72
|
+
colsample_bytree: 0.55, reg_lambda: 0.5, reg_alpha: 0.0, verbosity: -1 },
|
|
73
|
+
],
|
|
74
|
+
ebm: [
|
|
75
|
+
{ objective: 'classification', learningRate: 0.01, maxRounds: 500,
|
|
76
|
+
maxLeaves: 3, maxBins: 256 },
|
|
77
|
+
{ objective: 'classification', learningRate: 0.01, maxRounds: 500,
|
|
78
|
+
maxLeaves: 4, maxInteractions: 15, maxBins: 256 },
|
|
79
|
+
{ objective: 'classification', learningRate: 0.05, maxRounds: 300,
|
|
80
|
+
maxLeaves: 3, maxBins: 128 },
|
|
81
|
+
{ objective: 'classification', learningRate: 0.005, maxRounds: 800,
|
|
82
|
+
maxLeaves: 5, maxBins: 512 },
|
|
83
|
+
],
|
|
84
|
+
linear: [
|
|
85
|
+
{ solver: 0, C: 1.0 },
|
|
86
|
+
{ solver: 0, C: 10.0 },
|
|
87
|
+
{ solver: 7, C: 1.0 },
|
|
88
|
+
{ solver: 6, C: 0.1 },
|
|
89
|
+
],
|
|
90
|
+
svm: [
|
|
91
|
+
{ svmType: 0, kernel: 2, C: 1.0, gamma: 0, probability: 1 },
|
|
92
|
+
{ svmType: 0, kernel: 2, C: 10.0, gamma: 0.01, probability: 1 },
|
|
93
|
+
{ svmType: 0, kernel: 1, C: 1.0, degree: 3, gamma: 0, probability: 1 },
|
|
94
|
+
{ svmType: 0, kernel: 0, C: 1.0, probability: 1 },
|
|
95
|
+
],
|
|
96
|
+
knn: [
|
|
97
|
+
{ k: 5, metric: 'l2', task: 'classification' },
|
|
98
|
+
{ k: 15, metric: 'l2', task: 'classification' },
|
|
99
|
+
{ k: 3, metric: 'l1', task: 'classification' },
|
|
100
|
+
],
|
|
101
|
+
tsetlin: [
|
|
102
|
+
{ task: 'classification', nClauses: 100, threshold: 50, s: 3.0, nEpochs: 100 },
|
|
103
|
+
{ task: 'classification', nClauses: 500, threshold: 100, s: 5.0, nEpochs: 100 },
|
|
104
|
+
{ task: 'classification', nClauses: 50, threshold: 25, s: 2.0, nEpochs: 60 },
|
|
105
|
+
],
|
|
106
|
+
},
|
|
107
|
+
regression: {
|
|
108
|
+
xgb: [
|
|
109
|
+
{ objective: 'reg:squarederror', eta: 0.05, max_depth: 6, numRound: 200,
|
|
110
|
+
subsample: 0.8, colsample_bytree: 0.8, min_child_weight: 1.0,
|
|
111
|
+
lambda: 1.0, alpha: 0.0 },
|
|
112
|
+
{ objective: 'reg:squarederror', eta: 0.01, max_depth: 10, numRound: 500,
|
|
113
|
+
subsample: 0.7, colsample_bytree: 0.65, min_child_weight: 0.6,
|
|
114
|
+
lambda: 0.1, alpha: 0.0 },
|
|
115
|
+
{ objective: 'reg:squarederror', eta: 0.1, max_depth: 3, numRound: 100,
|
|
116
|
+
subsample: 0.9, colsample_bytree: 0.9, min_child_weight: 1.0,
|
|
117
|
+
lambda: 1.0, alpha: 0.0 },
|
|
118
|
+
{ objective: 'reg:squarederror', eta: 0.03, max_depth: 7, numRound: 300,
|
|
119
|
+
subsample: 0.8, colsample_bytree: 0.7, min_child_weight: 1.0,
|
|
120
|
+
lambda: 5.0, alpha: 1.0 },
|
|
121
|
+
{ objective: 'reg:squarederror', eta: 0.02, max_depth: 8, numRound: 400,
|
|
122
|
+
subsample: 0.8, colsample_bytree: 0.8, min_child_weight: 0.8,
|
|
123
|
+
lambda: 0.1, alpha: 0.0 },
|
|
124
|
+
{ objective: 'reg:squarederror', eta: 0.08, max_depth: 4, numRound: 150,
|
|
125
|
+
subsample: 0.85, colsample_bytree: 0.55, min_child_weight: 1.0,
|
|
126
|
+
lambda: 1.0, alpha: 0.1 },
|
|
127
|
+
{ objective: 'reg:squarederror', eta: 0.015, max_depth: 9, numRound: 350,
|
|
128
|
+
subsample: 0.75, colsample_bytree: 0.55, min_child_weight: 0.9,
|
|
129
|
+
lambda: 0.5, alpha: 0.0 },
|
|
130
|
+
{ objective: 'reg:squarederror', eta: 0.3, max_depth: 3, numRound: 50,
|
|
131
|
+
subsample: 0.9, colsample_bytree: 0.9, min_child_weight: 1.0,
|
|
132
|
+
lambda: 1.0, alpha: 0.0 },
|
|
133
|
+
// RF-mode configs (low correlation with boosting for ensemble diversity)
|
|
134
|
+
{ objective: 'reg:squarederror', num_parallel_tree: 100, numRound: 1,
|
|
135
|
+
subsample: 0.8, colsample_bynode: 0.8, learning_rate: 1.0 },
|
|
136
|
+
{ objective: 'reg:squarederror', num_parallel_tree: 200, numRound: 1,
|
|
137
|
+
subsample: 0.7, colsample_bynode: 0.6, learning_rate: 1.0 },
|
|
138
|
+
],
|
|
139
|
+
lgb: [
|
|
140
|
+
{ objective: 'regression', learning_rate: 0.05, max_depth: 6,
|
|
141
|
+
numRound: 200, num_leaves: 63, subsample: 0.8,
|
|
142
|
+
colsample_bytree: 0.8, min_child_weight: 1.0,
|
|
143
|
+
reg_lambda: 1.0, reg_alpha: 0.0, verbosity: -1 },
|
|
144
|
+
{ objective: 'regression', learning_rate: 0.01, max_depth: -1,
|
|
145
|
+
numRound: 500, num_leaves: 127, subsample: 0.7,
|
|
146
|
+
colsample_bytree: 0.65, reg_lambda: 0.1, reg_alpha: 0.0, verbosity: -1 },
|
|
147
|
+
{ objective: 'regression', learning_rate: 0.1, max_depth: 4,
|
|
148
|
+
numRound: 100, num_leaves: 15, subsample: 0.9,
|
|
149
|
+
colsample_bytree: 0.9, reg_lambda: 1.0, reg_alpha: 0.0, verbosity: -1 },
|
|
150
|
+
{ objective: 'regression', learning_rate: 0.05,
|
|
151
|
+
numRound: 200, num_leaves: 63, subsample: 0.8,
|
|
152
|
+
colsample_bytree: 0.8, extra_trees: true,
|
|
153
|
+
reg_lambda: 1.0, reg_alpha: 0.0, verbosity: -1 },
|
|
154
|
+
{ objective: 'regression', learning_rate: 0.03, max_depth: 7,
|
|
155
|
+
numRound: 300, num_leaves: 63, subsample: 0.8,
|
|
156
|
+
colsample_bytree: 0.7, reg_lambda: 5.0, reg_alpha: 1.0, verbosity: -1 },
|
|
157
|
+
{ objective: 'regression', learning_rate: 0.01, max_depth: 8,
|
|
158
|
+
numRound: 500, num_leaves: 95, subsample: 0.75,
|
|
159
|
+
colsample_bytree: 0.55, reg_lambda: 0.5, reg_alpha: 0.0, verbosity: -1 },
|
|
160
|
+
],
|
|
161
|
+
ebm: [
|
|
162
|
+
{ objective: 'regression', learningRate: 0.01, maxRounds: 500,
|
|
163
|
+
maxLeaves: 3, maxBins: 256 },
|
|
164
|
+
{ objective: 'regression', learningRate: 0.01, maxRounds: 500,
|
|
165
|
+
maxLeaves: 4, maxInteractions: 15, maxBins: 256 },
|
|
166
|
+
{ objective: 'regression', learningRate: 0.05, maxRounds: 300,
|
|
167
|
+
maxLeaves: 3, maxBins: 128 },
|
|
168
|
+
{ objective: 'regression', learningRate: 0.005, maxRounds: 800,
|
|
169
|
+
maxLeaves: 5, maxBins: 512 },
|
|
170
|
+
],
|
|
171
|
+
linear: [
|
|
172
|
+
{ solver: 11, C: 1.0 },
|
|
173
|
+
{ solver: 11, C: 10.0 },
|
|
174
|
+
{ solver: 12, C: 1.0 },
|
|
175
|
+
{ solver: 13, C: 0.1 },
|
|
176
|
+
],
|
|
177
|
+
svm: [
|
|
178
|
+
{ svmType: 3, kernel: 2, C: 1.0, gamma: 0 },
|
|
179
|
+
{ svmType: 3, kernel: 2, C: 10.0, gamma: 0.01 },
|
|
180
|
+
{ svmType: 3, kernel: 1, C: 1.0, degree: 3, gamma: 0 },
|
|
181
|
+
{ svmType: 3, kernel: 0, C: 1.0 },
|
|
182
|
+
],
|
|
183
|
+
knn: [
|
|
184
|
+
{ k: 5, metric: 'l2', task: 'regression' },
|
|
185
|
+
{ k: 15, metric: 'l2', task: 'regression' },
|
|
186
|
+
{ k: 3, metric: 'l1', task: 'regression' },
|
|
187
|
+
],
|
|
188
|
+
tsetlin: [
|
|
189
|
+
{ task: 'regression', nClauses: 100, threshold: 50, s: 3.0, nEpochs: 100 },
|
|
190
|
+
{ task: 'regression', nClauses: 500, threshold: 100, s: 5.0, nEpochs: 100 },
|
|
191
|
+
{ task: 'regression', nClauses: 50, threshold: 25, s: 2.0, nEpochs: 60 },
|
|
192
|
+
],
|
|
193
|
+
},
|
|
194
|
+
}
|
|
195
|
+
|
|
196
|
+
/**
|
|
197
|
+
* Return portfolio configs for the given task.
|
|
198
|
+
* @param {string} task - 'classification' or 'regression'
|
|
199
|
+
* @returns {Object} model name -> config list
|
|
200
|
+
*/
|
|
201
|
+
export function getPortfolio(task = 'classification') {
|
|
202
|
+
return PORTFOLIO[task] || PORTFOLIO.classification
|
|
203
|
+
}
|
|
204
|
+
|
|
205
|
+
// ---------------------------------------------------------------------------
|
|
206
|
+
// PortfolioStrategy
|
|
207
|
+
// ---------------------------------------------------------------------------
|
|
208
|
+
|
|
209
|
+
/**
|
|
210
|
+
* Yields pre-tuned configs from the zeroshot portfolio.
|
|
211
|
+
* Same interface as RandomStrategy / HalvingStrategy.
|
|
212
|
+
*/
|
|
213
|
+
export class PortfolioStrategy {
|
|
214
|
+
#queue = []
|
|
215
|
+
#index = 0
|
|
216
|
+
#total = 0
|
|
217
|
+
|
|
218
|
+
/**
|
|
219
|
+
* @param {Array<{ name: string, cls: object, params?: object }>} models
|
|
220
|
+
* @param {object} opts
|
|
221
|
+
*/
|
|
222
|
+
constructor(models, { task = 'classification', seed = 42 } = {}) {
|
|
223
|
+
const portfolio = getPortfolio(task)
|
|
224
|
+
|
|
225
|
+
for (const model of models) {
|
|
226
|
+
const name = model.name
|
|
227
|
+
const cls = model.cls
|
|
228
|
+
const fixed = model.params || {}
|
|
229
|
+
|
|
230
|
+
const configs = portfolio[name] || [{}]
|
|
231
|
+
|
|
232
|
+
for (const config of configs) {
|
|
233
|
+
const params = { ...config, ...fixed }
|
|
234
|
+
const candidateId = makeCandidateId(name, params)
|
|
235
|
+
this.#queue.push({ candidateId, cls, params })
|
|
236
|
+
}
|
|
237
|
+
}
|
|
238
|
+
|
|
239
|
+
this.#total = this.#queue.length
|
|
240
|
+
}
|
|
241
|
+
|
|
242
|
+
next() {
|
|
243
|
+
if (this.#index >= this.#total) return null
|
|
244
|
+
return this.#queue[this.#index++]
|
|
245
|
+
}
|
|
246
|
+
|
|
247
|
+
report(_result) {}
|
|
248
|
+
|
|
249
|
+
isDone() {
|
|
250
|
+
return this.#index >= this.#total
|
|
251
|
+
}
|
|
252
|
+
}
|
|
253
|
+
|
|
254
|
+
// ---------------------------------------------------------------------------
|
|
255
|
+
// PortfolioSearch
|
|
256
|
+
// ---------------------------------------------------------------------------
|
|
257
|
+
|
|
258
|
+
/**
|
|
259
|
+
* Evaluate pre-tuned portfolio configs with cross-validation.
|
|
260
|
+
*/
|
|
261
|
+
export class PortfolioSearch {
|
|
262
|
+
#models
|
|
263
|
+
#opts
|
|
264
|
+
#leaderboard = null
|
|
265
|
+
#bestResult = null
|
|
266
|
+
|
|
267
|
+
constructor(models, opts = {}) {
|
|
268
|
+
if (!models || models.length === 0) {
|
|
269
|
+
throw new ValidationError('PortfolioSearch: at least one model is required')
|
|
270
|
+
}
|
|
271
|
+
this.#models = models
|
|
272
|
+
this.#opts = {
|
|
273
|
+
scoring: null,
|
|
274
|
+
cv: 5,
|
|
275
|
+
seed: 42,
|
|
276
|
+
task: null,
|
|
277
|
+
maxTimeMs: 0,
|
|
278
|
+
onProgress: null,
|
|
279
|
+
...opts,
|
|
280
|
+
}
|
|
281
|
+
}
|
|
282
|
+
|
|
283
|
+
async fit(X, y) {
|
|
284
|
+
const Xn = normalizeX(X)
|
|
285
|
+
const yn = normalizeY(y)
|
|
286
|
+
const task = this.#opts.task || detectTask(yn)
|
|
287
|
+
const scoring = this.#opts.scoring || (task === 'classification' ? 'accuracy' : 'r2')
|
|
288
|
+
const { cv, seed, maxTimeMs, onProgress } = this.#opts
|
|
289
|
+
|
|
290
|
+
const folds = task === 'classification'
|
|
291
|
+
? stratifiedKFold(yn, cv, { shuffle: true, seed })
|
|
292
|
+
: kFold(yn.length, cv, { shuffle: true, seed })
|
|
293
|
+
|
|
294
|
+
const executor = new Executor({
|
|
295
|
+
folds,
|
|
296
|
+
scoring,
|
|
297
|
+
X: Xn,
|
|
298
|
+
y: yn,
|
|
299
|
+
timeLimitMs: maxTimeMs,
|
|
300
|
+
seed,
|
|
301
|
+
onProgress,
|
|
302
|
+
})
|
|
303
|
+
|
|
304
|
+
const strategy = new PortfolioStrategy(this.#models, { task, seed })
|
|
305
|
+
|
|
306
|
+
const { leaderboard } = await executor.runStrategy(strategy)
|
|
307
|
+
|
|
308
|
+
if (leaderboard.length === 0) {
|
|
309
|
+
throw new ValidationError('PortfolioSearch: no candidates were evaluated')
|
|
310
|
+
}
|
|
311
|
+
|
|
312
|
+
this.#leaderboard = leaderboard
|
|
313
|
+
this.#bestResult = leaderboard.best()
|
|
314
|
+
return { leaderboard, bestResult: this.#bestResult }
|
|
315
|
+
}
|
|
316
|
+
|
|
317
|
+
async refitBest(X, y) {
|
|
318
|
+
if (!this.#bestResult) {
|
|
319
|
+
throw new ValidationError('PortfolioSearch: must call fit() first')
|
|
320
|
+
}
|
|
321
|
+
const best = this.#bestResult
|
|
322
|
+
const model = this.#models.find(m => m.name === best.modelName)
|
|
323
|
+
const instance = await model.cls.create(best.params)
|
|
324
|
+
const Xn = normalizeX(X)
|
|
325
|
+
const yn = normalizeY(y)
|
|
326
|
+
instance.fit(Xn, yn)
|
|
327
|
+
return instance
|
|
328
|
+
}
|
|
329
|
+
|
|
330
|
+
get leaderboard() { return this.#leaderboard }
|
|
331
|
+
get bestResult() { return this.#bestResult }
|
|
332
|
+
}
|
|
@@ -0,0 +1,156 @@
|
|
|
1
|
+
import { stratifiedKFold, kFold, normalizeX, normalizeY,
|
|
2
|
+
ValidationError } from '@wlearn/core'
|
|
3
|
+
import { Executor } from './executor.js'
|
|
4
|
+
import { ProgressiveStrategy } from './strategy-progressive.js'
|
|
5
|
+
import { detectTask, scorerGreaterIsBetter } from './common.js'
|
|
6
|
+
|
|
7
|
+
/**
|
|
8
|
+
* Progressive search: probe all candidates cheaply (1 fold + subsample),
|
|
9
|
+
* then promote top N to full K-fold evaluation.
|
|
10
|
+
*
|
|
11
|
+
* Faster than full random search when many candidates are weak.
|
|
12
|
+
* The probe phase filters out bad configs quickly, saving time
|
|
13
|
+
* for thorough evaluation of promising candidates.
|
|
14
|
+
*/
|
|
15
|
+
export class ProgressiveSearch {
|
|
16
|
+
#models
|
|
17
|
+
#opts
|
|
18
|
+
#leaderboard = null
|
|
19
|
+
#bestResult = null
|
|
20
|
+
|
|
21
|
+
constructor(models, opts = {}) {
|
|
22
|
+
if (!models || models.length === 0) {
|
|
23
|
+
throw new ValidationError('ProgressiveSearch: at least one model is required')
|
|
24
|
+
}
|
|
25
|
+
this.#models = models
|
|
26
|
+
this.#opts = {
|
|
27
|
+
scoring: null,
|
|
28
|
+
cv: 5,
|
|
29
|
+
seed: 42,
|
|
30
|
+
task: null,
|
|
31
|
+
nIter: 20,
|
|
32
|
+
maxTimeMs: 0,
|
|
33
|
+
promoteCount: 10,
|
|
34
|
+
probeFraction: 0.5,
|
|
35
|
+
onProgress: null,
|
|
36
|
+
...opts,
|
|
37
|
+
}
|
|
38
|
+
}
|
|
39
|
+
|
|
40
|
+
async fit(X, y) {
|
|
41
|
+
const Xn = normalizeX(X)
|
|
42
|
+
const yn = normalizeY(y)
|
|
43
|
+
const task = this.#opts.task || detectTask(yn)
|
|
44
|
+
const scoring = this.#opts.scoring || (task === 'classification' ? 'accuracy' : 'r2')
|
|
45
|
+
const { cv, seed, nIter, maxTimeMs, promoteCount, probeFraction, onProgress } = this.#opts
|
|
46
|
+
const greaterIsBetter = scorerGreaterIsBetter(scoring)
|
|
47
|
+
|
|
48
|
+
// Probe folds: use only 1 fold for cheap screening
|
|
49
|
+
const probeFolds = task === 'classification'
|
|
50
|
+
? stratifiedKFold(yn, 2, { shuffle: true, seed })
|
|
51
|
+
: kFold(yn.length, 2, { shuffle: true, seed })
|
|
52
|
+
// Use only the first fold for probing
|
|
53
|
+
const singleFold = [probeFolds[0]]
|
|
54
|
+
|
|
55
|
+
// Full folds for promoted candidates
|
|
56
|
+
const fullFolds = task === 'classification'
|
|
57
|
+
? stratifiedKFold(yn, cv, { shuffle: true, seed: seed + 1 })
|
|
58
|
+
: kFold(yn.length, cv, { shuffle: true, seed: seed + 1 })
|
|
59
|
+
|
|
60
|
+
// Create strategy
|
|
61
|
+
const strategy = new ProgressiveStrategy(this.#models, {
|
|
62
|
+
nIter, seed, promoteCount, greaterIsBetter, probeFraction,
|
|
63
|
+
})
|
|
64
|
+
|
|
65
|
+
// Phase 1: probe with 1-fold executor
|
|
66
|
+
const probeExecutor = new Executor({
|
|
67
|
+
folds: singleFold,
|
|
68
|
+
scoring,
|
|
69
|
+
X: Xn,
|
|
70
|
+
y: yn,
|
|
71
|
+
timeLimitMs: maxTimeMs > 0 ? Math.floor(maxTimeMs * 0.3) : 0,
|
|
72
|
+
seed,
|
|
73
|
+
onProgress,
|
|
74
|
+
})
|
|
75
|
+
|
|
76
|
+
while (strategy.phase === 'probe' && !strategy.isDone()) {
|
|
77
|
+
if (probeExecutor.isTimedOut) break
|
|
78
|
+
const cand = strategy.next()
|
|
79
|
+
if (cand === null) break
|
|
80
|
+
try {
|
|
81
|
+
const result = await probeExecutor.evaluateCandidate(cand)
|
|
82
|
+
strategy.report(result)
|
|
83
|
+
} catch {
|
|
84
|
+
// Report a failing result so the strategy can count it
|
|
85
|
+
strategy.report({
|
|
86
|
+
candidateId: cand.candidateId,
|
|
87
|
+
meanScore: -Infinity,
|
|
88
|
+
foldScores: new Float64Array(1),
|
|
89
|
+
stdScore: 0,
|
|
90
|
+
fitTimeMs: 0,
|
|
91
|
+
nTrainUsed: 0,
|
|
92
|
+
nTest: 0,
|
|
93
|
+
})
|
|
94
|
+
}
|
|
95
|
+
}
|
|
96
|
+
|
|
97
|
+
// If probe timed out and strategy hasn't transitioned, force it
|
|
98
|
+
if (strategy.phase === 'probe') {
|
|
99
|
+
// Transition didn't happen (not all probes completed) - use what we have
|
|
100
|
+
// The strategy's report method handles this
|
|
101
|
+
}
|
|
102
|
+
|
|
103
|
+
// Phase 2: full evaluation of promoted candidates
|
|
104
|
+
const fullExecutor = new Executor({
|
|
105
|
+
folds: fullFolds,
|
|
106
|
+
scoring,
|
|
107
|
+
X: Xn,
|
|
108
|
+
y: yn,
|
|
109
|
+
timeLimitMs: maxTimeMs > 0 ? Math.floor(maxTimeMs * 0.7) : 0,
|
|
110
|
+
seed,
|
|
111
|
+
onProgress,
|
|
112
|
+
})
|
|
113
|
+
|
|
114
|
+
while (!strategy.isDone()) {
|
|
115
|
+
if (fullExecutor.isTimedOut) break
|
|
116
|
+
const cand = strategy.next()
|
|
117
|
+
if (cand === null) break
|
|
118
|
+
try {
|
|
119
|
+
await fullExecutor.evaluateCandidate(cand)
|
|
120
|
+
} catch {
|
|
121
|
+
// Skip failed candidates
|
|
122
|
+
}
|
|
123
|
+
}
|
|
124
|
+
|
|
125
|
+
const leaderboard = fullExecutor.leaderboard
|
|
126
|
+
if (leaderboard.length === 0) {
|
|
127
|
+
// Fall back to probe results if no full evals completed
|
|
128
|
+
const probeLeaderboard = probeExecutor.leaderboard
|
|
129
|
+
if (probeLeaderboard.length === 0) {
|
|
130
|
+
throw new ValidationError('ProgressiveSearch: no candidates were evaluated')
|
|
131
|
+
}
|
|
132
|
+
this.#leaderboard = probeLeaderboard
|
|
133
|
+
} else {
|
|
134
|
+
this.#leaderboard = leaderboard
|
|
135
|
+
}
|
|
136
|
+
|
|
137
|
+
this.#bestResult = this.#leaderboard.best()
|
|
138
|
+
return { leaderboard: this.#leaderboard, bestResult: this.#bestResult }
|
|
139
|
+
}
|
|
140
|
+
|
|
141
|
+
async refitBest(X, y) {
|
|
142
|
+
if (!this.#bestResult) {
|
|
143
|
+
throw new ValidationError('ProgressiveSearch: must call fit() first')
|
|
144
|
+
}
|
|
145
|
+
const best = this.#bestResult
|
|
146
|
+
const model = this.#models.find(m => m.name === best.modelName)
|
|
147
|
+
const instance = await model.cls.create(best.params)
|
|
148
|
+
const Xn = normalizeX(X)
|
|
149
|
+
const yn = normalizeY(y)
|
|
150
|
+
instance.fit(Xn, yn)
|
|
151
|
+
return instance
|
|
152
|
+
}
|
|
153
|
+
|
|
154
|
+
get leaderboard() { return this.#leaderboard }
|
|
155
|
+
get bestResult() { return this.#bestResult }
|
|
156
|
+
}
|