@wlearn/automl 0.1.0 → 0.2.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/CHANGELOG.md +19 -0
- package/LICENSE +177 -0
- package/NOTICE +5 -0
- package/README.md +108 -0
- package/dist/automl.js +1 -0
- package/dist/automl.mjs +2 -0
- package/package.json +45 -9
- package/src/auto-fit.js +20 -8
- package/src/common.js +9 -7
- package/src/executor.js +6 -4
- package/src/halving.js +8 -6
- package/src/index.js +19 -12
- package/src/leaderboard.js +3 -1
- package/src/portfolio.js +130 -9
- package/src/progressive.js +8 -6
- package/src/sampler.js +7 -5
- package/src/search.js +8 -6
- package/src/strategy-halving.js +6 -4
- package/src/strategy-progressive.js +6 -4
- package/src/strategy-random.js +6 -4
package/src/portfolio.js
CHANGED
|
@@ -6,17 +6,16 @@
|
|
|
6
6
|
* zeroshot portfolio approach (TabRepo).
|
|
7
7
|
*/
|
|
8
8
|
|
|
9
|
-
|
|
10
|
-
ValidationError }
|
|
11
|
-
|
|
12
|
-
|
|
13
|
-
import { makeCandidateId } from './common.js'
|
|
9
|
+
const { stratifiedKFold, kFold, normalizeX, normalizeY,
|
|
10
|
+
ValidationError } = require('@wlearn/core')
|
|
11
|
+
const { Executor } = require('./executor.js')
|
|
12
|
+
const { detectTask, makeCandidateId } = require('./common.js')
|
|
14
13
|
|
|
15
14
|
// ---------------------------------------------------------------------------
|
|
16
15
|
// Portfolio configs: task -> model_name -> list of param dicts
|
|
17
16
|
// ---------------------------------------------------------------------------
|
|
18
17
|
|
|
19
|
-
|
|
18
|
+
const PORTFOLIO = {
|
|
20
19
|
classification: {
|
|
21
20
|
xgb: [
|
|
22
21
|
{ objective: 'multi:softprob', eta: 0.05, max_depth: 6, numRound: 200,
|
|
@@ -103,6 +102,66 @@ export const PORTFOLIO = {
|
|
|
103
102
|
{ task: 'classification', nClauses: 500, threshold: 100, s: 5.0, nEpochs: 100 },
|
|
104
103
|
{ task: 'classification', nClauses: 50, threshold: 25, s: 2.0, nEpochs: 60 },
|
|
105
104
|
],
|
|
105
|
+
rf: [
|
|
106
|
+
{ nEstimators: 100, maxDepth: 10, maxFeatures: 'sqrt', criterion: 'gini',
|
|
107
|
+
minSamplesSplit: 2, minSamplesLeaf: 1 },
|
|
108
|
+
{ nEstimators: 300, maxDepth: 0, maxFeatures: 'log2', criterion: 'gini',
|
|
109
|
+
minSamplesSplit: 5, minSamplesLeaf: 2 },
|
|
110
|
+
{ nEstimators: 200, maxDepth: 15, maxFeatures: 'sqrt', criterion: 'entropy',
|
|
111
|
+
extraTrees: 1, minSamplesSplit: 2 },
|
|
112
|
+
{ nEstimators: 200, maxDepth: 12, maxFeatures: 'sqrt', criterion: 'hellinger',
|
|
113
|
+
minSamplesSplit: 2, minSamplesLeaf: 1 },
|
|
114
|
+
{ nEstimators: 150, maxDepth: 10, maxFeatures: 'sqrt', criterion: 'gini',
|
|
115
|
+
heterogeneous: 1, oobWeighting: 1, minSamplesLeaf: 2 },
|
|
116
|
+
{ nEstimators: 200, maxDepth: 10, maxFeatures: 'sqrt', criterion: 'gini',
|
|
117
|
+
histogramBinning: 1, minSamplesSplit: 2, minSamplesLeaf: 1 },
|
|
118
|
+
{ nEstimators: 150, maxDepth: 12, maxFeatures: 'sqrt', criterion: 'gini',
|
|
119
|
+
jarf: 1, minSamplesSplit: 2, minSamplesLeaf: 1 },
|
|
120
|
+
],
|
|
121
|
+
mlp: [
|
|
122
|
+
{ hidden_sizes: [64], activation: 'relu', lr: 0.01, epochs: 50,
|
|
123
|
+
optimizer: 'adam', batch_size: 32 },
|
|
124
|
+
{ hidden_sizes: [128, 64], activation: 'gelu', lr: 0.001, epochs: 100,
|
|
125
|
+
optimizer: 'adam', batch_size: 32 },
|
|
126
|
+
{ hidden_sizes: [256, 128], activation: 'silu', lr: 0.001, epochs: 150,
|
|
127
|
+
optimizer: 'adam', batch_size: 16 },
|
|
128
|
+
],
|
|
129
|
+
tabm: [
|
|
130
|
+
{ hidden_sizes: [128], activation: 'relu', n_ensemble: 32, lr: 0.005,
|
|
131
|
+
epochs: 100, optimizer: 'adam' },
|
|
132
|
+
{ hidden_sizes: [64], activation: 'gelu', n_ensemble: 8, lr: 0.01,
|
|
133
|
+
epochs: 50, optimizer: 'adam' },
|
|
134
|
+
{ hidden_sizes: [128, 64], activation: 'silu', n_ensemble: 16, lr: 0.001,
|
|
135
|
+
epochs: 150, optimizer: 'adam' },
|
|
136
|
+
],
|
|
137
|
+
nam: [
|
|
138
|
+
{ hidden_sizes: [64], activation: 'exu', lr: 0.01, epochs: 100,
|
|
139
|
+
optimizer: 'adam' },
|
|
140
|
+
{ hidden_sizes: [128], activation: 'relu', lr: 0.001, epochs: 100,
|
|
141
|
+
optimizer: 'adam' },
|
|
142
|
+
{ hidden_sizes: [64, 32], activation: 'exu', lr: 0.005, epochs: 150,
|
|
143
|
+
optimizer: 'adam' },
|
|
144
|
+
],
|
|
145
|
+
gam: [
|
|
146
|
+
{ family: 'binomial', penalty: 'elasticnet', alpha: 0.5, nLambda: 100, nFolds: 5 },
|
|
147
|
+
{ family: 'binomial', penalty: 'lasso', alpha: 1.0, nLambda: 100, nFolds: 5 },
|
|
148
|
+
{ family: 'binomial', penalty: 'ridge', alpha: 0.0, nLambda: 100, nFolds: 5 },
|
|
149
|
+
{ family: 'binomial', penalty: 'scad', nLambda: 100, nFolds: 5 },
|
|
150
|
+
],
|
|
151
|
+
bart: [
|
|
152
|
+
{ numTrees: 200, numBurnin: 200, numSamples: 100, alpha: 0.95, beta: 2.0 },
|
|
153
|
+
{ numTrees: 500, numBurnin: 300, numSamples: 100, alpha: 0.95, beta: 2.0 },
|
|
154
|
+
{ numTrees: 100, maxDepth: 5, numBurnin: 100, numSamples: 100,
|
|
155
|
+
alpha: 0.8, beta: 1.0 },
|
|
156
|
+
],
|
|
157
|
+
fm: [
|
|
158
|
+
{ k: 4, lr: 0.01, epoch: 10, opt: 'adagrad' },
|
|
159
|
+
{ k: 8, lr: 0.005, epoch: 20, opt: 'adagrad', lambda: 0.001 },
|
|
160
|
+
],
|
|
161
|
+
xlr: [
|
|
162
|
+
{ lr: 0.01, opt: 'ftrl', epoch: 10 },
|
|
163
|
+
{ lr: 0.01, lambda: 0.1, opt: 'adagrad', epoch: 20 },
|
|
164
|
+
],
|
|
106
165
|
},
|
|
107
166
|
regression: {
|
|
108
167
|
xgb: [
|
|
@@ -190,6 +249,66 @@ export const PORTFOLIO = {
|
|
|
190
249
|
{ task: 'regression', nClauses: 500, threshold: 100, s: 5.0, nEpochs: 100 },
|
|
191
250
|
{ task: 'regression', nClauses: 50, threshold: 25, s: 2.0, nEpochs: 60 },
|
|
192
251
|
],
|
|
252
|
+
rf: [
|
|
253
|
+
{ nEstimators: 100, maxDepth: 10, maxFeatures: 'sqrt', criterion: 'mse',
|
|
254
|
+
minSamplesSplit: 2, minSamplesLeaf: 1 },
|
|
255
|
+
{ nEstimators: 300, maxDepth: 0, maxFeatures: 'log2', criterion: 'mse',
|
|
256
|
+
minSamplesSplit: 5, minSamplesLeaf: 2 },
|
|
257
|
+
{ nEstimators: 200, maxDepth: 15, maxFeatures: 'sqrt', criterion: 'mae',
|
|
258
|
+
extraTrees: 1, minSamplesSplit: 2 },
|
|
259
|
+
{ nEstimators: 100, maxDepth: 10, maxFeatures: 'sqrt', criterion: 'mse',
|
|
260
|
+
leafModel: 1, minSamplesLeaf: 5 },
|
|
261
|
+
{ nEstimators: 150, maxDepth: 10, maxFeatures: 'sqrt', criterion: 'mse',
|
|
262
|
+
heterogeneous: 1, oobWeighting: 1, minSamplesLeaf: 2 },
|
|
263
|
+
{ nEstimators: 200, maxDepth: 10, maxFeatures: 'sqrt', criterion: 'mse',
|
|
264
|
+
histogramBinning: 1, minSamplesSplit: 2, minSamplesLeaf: 1 },
|
|
265
|
+
{ nEstimators: 150, maxDepth: 12, maxFeatures: 'sqrt', criterion: 'mse',
|
|
266
|
+
jarf: 1, minSamplesSplit: 2, minSamplesLeaf: 1 },
|
|
267
|
+
],
|
|
268
|
+
mlp: [
|
|
269
|
+
{ hidden_sizes: [64], activation: 'relu', lr: 0.01, epochs: 50,
|
|
270
|
+
optimizer: 'adam', batch_size: 32 },
|
|
271
|
+
{ hidden_sizes: [128, 64], activation: 'gelu', lr: 0.001, epochs: 100,
|
|
272
|
+
optimizer: 'adam', batch_size: 32 },
|
|
273
|
+
{ hidden_sizes: [256, 128], activation: 'silu', lr: 0.001, epochs: 150,
|
|
274
|
+
optimizer: 'adam', batch_size: 16 },
|
|
275
|
+
],
|
|
276
|
+
tabm: [
|
|
277
|
+
{ hidden_sizes: [128], activation: 'relu', n_ensemble: 32, lr: 0.005,
|
|
278
|
+
epochs: 100, optimizer: 'adam' },
|
|
279
|
+
{ hidden_sizes: [64], activation: 'gelu', n_ensemble: 8, lr: 0.01,
|
|
280
|
+
epochs: 50, optimizer: 'adam' },
|
|
281
|
+
{ hidden_sizes: [128, 64], activation: 'silu', n_ensemble: 16, lr: 0.001,
|
|
282
|
+
epochs: 150, optimizer: 'adam' },
|
|
283
|
+
],
|
|
284
|
+
nam: [
|
|
285
|
+
{ hidden_sizes: [64], activation: 'exu', lr: 0.01, epochs: 100,
|
|
286
|
+
optimizer: 'adam' },
|
|
287
|
+
{ hidden_sizes: [128], activation: 'relu', lr: 0.001, epochs: 100,
|
|
288
|
+
optimizer: 'adam' },
|
|
289
|
+
{ hidden_sizes: [64, 32], activation: 'exu', lr: 0.005, epochs: 150,
|
|
290
|
+
optimizer: 'adam' },
|
|
291
|
+
],
|
|
292
|
+
gam: [
|
|
293
|
+
{ family: 'gaussian', penalty: 'elasticnet', alpha: 0.5, nLambda: 100, nFolds: 5 },
|
|
294
|
+
{ family: 'gaussian', penalty: 'lasso', alpha: 1.0, nLambda: 100, nFolds: 5 },
|
|
295
|
+
{ family: 'gaussian', penalty: 'ridge', alpha: 0.0, nLambda: 100, nFolds: 5 },
|
|
296
|
+
{ family: 'gaussian', penalty: 'scad', nLambda: 100, nFolds: 5 },
|
|
297
|
+
],
|
|
298
|
+
bart: [
|
|
299
|
+
{ numTrees: 200, numBurnin: 200, numSamples: 100, alpha: 0.95, beta: 2.0 },
|
|
300
|
+
{ numTrees: 500, numBurnin: 300, numSamples: 100, alpha: 0.95, beta: 2.0 },
|
|
301
|
+
{ numTrees: 100, maxDepth: 5, numBurnin: 100, numSamples: 100,
|
|
302
|
+
alpha: 0.8, beta: 1.0 },
|
|
303
|
+
],
|
|
304
|
+
fm: [
|
|
305
|
+
{ k: 4, lr: 0.01, epoch: 10, opt: 'adagrad' },
|
|
306
|
+
{ k: 8, lr: 0.005, epoch: 20, opt: 'adagrad', lambda: 0.001 },
|
|
307
|
+
],
|
|
308
|
+
xlr: [
|
|
309
|
+
{ lr: 0.01, opt: 'ftrl', epoch: 10 },
|
|
310
|
+
{ lr: 0.01, lambda: 0.1, opt: 'adagrad', epoch: 20 },
|
|
311
|
+
],
|
|
193
312
|
},
|
|
194
313
|
}
|
|
195
314
|
|
|
@@ -198,7 +317,7 @@ export const PORTFOLIO = {
|
|
|
198
317
|
* @param {string} task - 'classification' or 'regression'
|
|
199
318
|
* @returns {Object} model name -> config list
|
|
200
319
|
*/
|
|
201
|
-
|
|
320
|
+
function getPortfolio(task = 'classification') {
|
|
202
321
|
return PORTFOLIO[task] || PORTFOLIO.classification
|
|
203
322
|
}
|
|
204
323
|
|
|
@@ -210,7 +329,7 @@ export function getPortfolio(task = 'classification') {
|
|
|
210
329
|
* Yields pre-tuned configs from the zeroshot portfolio.
|
|
211
330
|
* Same interface as RandomStrategy / HalvingStrategy.
|
|
212
331
|
*/
|
|
213
|
-
|
|
332
|
+
class PortfolioStrategy {
|
|
214
333
|
#queue = []
|
|
215
334
|
#index = 0
|
|
216
335
|
#total = 0
|
|
@@ -258,7 +377,7 @@ export class PortfolioStrategy {
|
|
|
258
377
|
/**
|
|
259
378
|
* Evaluate pre-tuned portfolio configs with cross-validation.
|
|
260
379
|
*/
|
|
261
|
-
|
|
380
|
+
class PortfolioSearch {
|
|
262
381
|
#models
|
|
263
382
|
#opts
|
|
264
383
|
#leaderboard = null
|
|
@@ -330,3 +449,5 @@ export class PortfolioSearch {
|
|
|
330
449
|
get leaderboard() { return this.#leaderboard }
|
|
331
450
|
get bestResult() { return this.#bestResult }
|
|
332
451
|
}
|
|
452
|
+
|
|
453
|
+
module.exports = { PORTFOLIO, getPortfolio, PortfolioStrategy, PortfolioSearch }
|
package/src/progressive.js
CHANGED
|
@@ -1,8 +1,8 @@
|
|
|
1
|
-
|
|
2
|
-
ValidationError }
|
|
3
|
-
|
|
4
|
-
|
|
5
|
-
|
|
1
|
+
const { stratifiedKFold, kFold, normalizeX, normalizeY,
|
|
2
|
+
ValidationError } = require('@wlearn/core')
|
|
3
|
+
const { Executor } = require('./executor.js')
|
|
4
|
+
const { ProgressiveStrategy } = require('./strategy-progressive.js')
|
|
5
|
+
const { detectTask, scorerGreaterIsBetter } = require('./common.js')
|
|
6
6
|
|
|
7
7
|
/**
|
|
8
8
|
* Progressive search: probe all candidates cheaply (1 fold + subsample),
|
|
@@ -12,7 +12,7 @@ import { detectTask, scorerGreaterIsBetter } from './common.js'
|
|
|
12
12
|
* The probe phase filters out bad configs quickly, saving time
|
|
13
13
|
* for thorough evaluation of promising candidates.
|
|
14
14
|
*/
|
|
15
|
-
|
|
15
|
+
class ProgressiveSearch {
|
|
16
16
|
#models
|
|
17
17
|
#opts
|
|
18
18
|
#leaderboard = null
|
|
@@ -154,3 +154,5 @@ export class ProgressiveSearch {
|
|
|
154
154
|
get leaderboard() { return this.#leaderboard }
|
|
155
155
|
get bestResult() { return this.#bestResult }
|
|
156
156
|
}
|
|
157
|
+
|
|
158
|
+
module.exports = { ProgressiveSearch }
|
package/src/sampler.js
CHANGED
|
@@ -1,11 +1,11 @@
|
|
|
1
|
-
|
|
1
|
+
const { makeLCG } = require('@wlearn/core')
|
|
2
2
|
|
|
3
3
|
const { floor, round, log, exp, min, max } = Math
|
|
4
4
|
|
|
5
5
|
/**
|
|
6
6
|
* Sample a single value from a SearchParam definition.
|
|
7
7
|
*/
|
|
8
|
-
|
|
8
|
+
function sampleParam(param, rng) {
|
|
9
9
|
const { type } = param
|
|
10
10
|
switch (type) {
|
|
11
11
|
case 'categorical':
|
|
@@ -26,7 +26,7 @@ export function sampleParam(param, rng) {
|
|
|
26
26
|
/**
|
|
27
27
|
* Sample a complete config from a SearchSpace, respecting conditions.
|
|
28
28
|
*/
|
|
29
|
-
|
|
29
|
+
function sampleConfig(space, rng) {
|
|
30
30
|
const config = {}
|
|
31
31
|
const keys = Object.keys(space)
|
|
32
32
|
|
|
@@ -56,7 +56,7 @@ export function sampleConfig(space, rng) {
|
|
|
56
56
|
/**
|
|
57
57
|
* Generate n random configs from a SearchSpace.
|
|
58
58
|
*/
|
|
59
|
-
|
|
59
|
+
function randomConfigs(space, n, { seed = 42 } = {}) {
|
|
60
60
|
const rng = makeLCG(seed)
|
|
61
61
|
const configs = []
|
|
62
62
|
for (let i = 0; i < n; i++) {
|
|
@@ -69,7 +69,7 @@ export function randomConfigs(space, n, { seed = 42 } = {}) {
|
|
|
69
69
|
* Enumerate grid points from a SearchSpace.
|
|
70
70
|
* Continuous params discretized to `steps` values.
|
|
71
71
|
*/
|
|
72
|
-
|
|
72
|
+
function gridConfigs(space, { steps = 5 } = {}) {
|
|
73
73
|
const keys = Object.keys(space)
|
|
74
74
|
if (keys.length === 0) return [{}]
|
|
75
75
|
|
|
@@ -160,3 +160,5 @@ function _discretize(param, steps) {
|
|
|
160
160
|
throw new Error(`Unknown SearchParam type: "${type}"`)
|
|
161
161
|
}
|
|
162
162
|
}
|
|
163
|
+
|
|
164
|
+
module.exports = { sampleParam, sampleConfig, randomConfigs, gridConfigs }
|
package/src/search.js
CHANGED
|
@@ -1,13 +1,13 @@
|
|
|
1
|
-
|
|
2
|
-
ValidationError }
|
|
3
|
-
|
|
4
|
-
|
|
5
|
-
|
|
1
|
+
const { stratifiedKFold, kFold, normalizeX, normalizeY,
|
|
2
|
+
ValidationError } = require('@wlearn/core')
|
|
3
|
+
const { Executor } = require('./executor.js')
|
|
4
|
+
const { RandomStrategy } = require('./strategy-random.js')
|
|
5
|
+
const { detectTask, scorerGreaterIsBetter } = require('./common.js')
|
|
6
6
|
|
|
7
7
|
/**
|
|
8
8
|
* Random hyperparameter search with cross-validation.
|
|
9
9
|
*/
|
|
10
|
-
|
|
10
|
+
class RandomSearch {
|
|
11
11
|
#models
|
|
12
12
|
#opts
|
|
13
13
|
#leaderboard = null
|
|
@@ -91,3 +91,5 @@ export class RandomSearch {
|
|
|
91
91
|
get leaderboard() { return this.#leaderboard }
|
|
92
92
|
get bestResult() { return this.#bestResult }
|
|
93
93
|
}
|
|
94
|
+
|
|
95
|
+
module.exports = { RandomSearch }
|
package/src/strategy-halving.js
CHANGED
|
@@ -1,6 +1,6 @@
|
|
|
1
|
-
|
|
2
|
-
|
|
3
|
-
|
|
1
|
+
const { makeLCG } = require('@wlearn/core')
|
|
2
|
+
const { sampleConfig } = require('./sampler.js')
|
|
3
|
+
const { makeCandidateId } = require('./common.js')
|
|
4
4
|
|
|
5
5
|
const { ceil, log, max, min, floor } = Math
|
|
6
6
|
|
|
@@ -14,7 +14,7 @@ const { ceil, log, max, min, floor } = Math
|
|
|
14
14
|
* in the current round have been evaluated. next() returns null
|
|
15
15
|
* only when fully done.
|
|
16
16
|
*/
|
|
17
|
-
|
|
17
|
+
class HalvingStrategy {
|
|
18
18
|
#candidates // all candidates for current round
|
|
19
19
|
#roundIndex = 0 // index within current round's candidates
|
|
20
20
|
#round = 0 // current round number
|
|
@@ -155,3 +155,5 @@ export class HalvingStrategy {
|
|
|
155
155
|
return this.#rounds
|
|
156
156
|
}
|
|
157
157
|
}
|
|
158
|
+
|
|
159
|
+
module.exports = { HalvingStrategy }
|
|
@@ -1,6 +1,6 @@
|
|
|
1
|
-
|
|
2
|
-
|
|
3
|
-
|
|
1
|
+
const { makeLCG } = require('@wlearn/core')
|
|
2
|
+
const { sampleConfig } = require('./sampler.js')
|
|
3
|
+
const { makeCandidateId } = require('./common.js')
|
|
4
4
|
|
|
5
5
|
const { max, ceil } = Math
|
|
6
6
|
|
|
@@ -14,7 +14,7 @@ const { max, ceil } = Math
|
|
|
14
14
|
* This pairs with ProgressiveSearch which manages two Executors
|
|
15
15
|
* (probe executor with 1 fold, full executor with K folds).
|
|
16
16
|
*/
|
|
17
|
-
|
|
17
|
+
class ProgressiveStrategy {
|
|
18
18
|
#allCandidates = []
|
|
19
19
|
#promotedCandidates = []
|
|
20
20
|
#phase = 'probe'
|
|
@@ -124,3 +124,5 @@ export class ProgressiveStrategy {
|
|
|
124
124
|
return false
|
|
125
125
|
}
|
|
126
126
|
}
|
|
127
|
+
|
|
128
|
+
module.exports = { ProgressiveStrategy }
|
package/src/strategy-random.js
CHANGED
|
@@ -1,12 +1,12 @@
|
|
|
1
|
-
|
|
2
|
-
|
|
3
|
-
|
|
1
|
+
const { makeLCG } = require('@wlearn/core')
|
|
2
|
+
const { sampleConfig } = require('./sampler.js')
|
|
3
|
+
const { makeCandidateId } = require('./common.js')
|
|
4
4
|
|
|
5
5
|
/**
|
|
6
6
|
* Random search strategy: generates nIter random configs per model,
|
|
7
7
|
* yields them one at a time. No adaptive behavior.
|
|
8
8
|
*/
|
|
9
|
-
|
|
9
|
+
class RandomStrategy {
|
|
10
10
|
#queue = []
|
|
11
11
|
#index = 0
|
|
12
12
|
#total = 0
|
|
@@ -65,3 +65,5 @@ export class RandomStrategy {
|
|
|
65
65
|
return this.#index >= this.#total
|
|
66
66
|
}
|
|
67
67
|
}
|
|
68
|
+
|
|
69
|
+
module.exports = { RandomStrategy }
|