dcp-worker 4.1.1 → 4.2.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/bin/dcp-worker CHANGED
@@ -1,5 +1,4 @@
1
1
  #! /usr/bin/env node
2
-
3
2
  /**
4
3
  * @file dcp-worker.js
5
4
  * Standalone NodeJS DCP Worker
@@ -9,571 +8,725 @@
9
8
  * Wes Garland, wes@distributive.network
10
9
  *
11
10
  * @date April 2020
12
- * April-May 2023
13
- * May-June 2023
11
+ * April-May 2023, May-June 2023
12
+ * June 2024, June 2025
14
13
  */
15
14
  'use strict';
16
-
17
15
  var worker;
18
16
 
19
17
  const process = require('process');
20
18
  const fs = require('fs');
21
19
  const path = require('path');
22
- const telnetd = require('../lib/remote-console');
23
-
24
- const { slicesFetched, debugging, displayMaxDiagInfo } = require('../lib/utils');
25
- const { a$sleep, leafMergeInto } = require('dcp/utils');
20
+ const debug = require('debug');
21
+ const telnetd = require('../lib/telnetd');
22
+ const httpd = require('../lib/web-console');
23
+ const pidfile = require('../lib/pidfile');
24
+ const reports = require('../lib/reports');
25
+ const utils = require('../lib/utils');
26
+ const loggers = require('../lib/loggers');
27
+ var wallet, identity, a$sleep, DistributiveWorker; /* populate in main() */
28
+
29
+ const { exitCodes,
30
+ logLevels } = require('../lib/consts');
26
31
 
27
- const EXIT_UNHANDLED = 5;
28
32
  const systemStateInfo = globalThis.systemStateInfo = {};
33
+ const usingDebugger = require('module')._cache.niim instanceof require('module').Module;
34
+
35
+ process.on('SIGESC', handleSigDeath); /* synthetic; for dashboard <esc> keystrokes */
36
+ process.on('SIGINT', handleSigDeath);
37
+ process.on('SIGTERM', handleSigDeath);
38
+ process.on('SIGQUIT', handleSigDeath);
39
+ process.on('unhandledRejection', handleUnhandled);
40
+ process.on('uncaughtException', handleUnhandled);
29
41
 
30
42
  /* Setup the telnet REPL up early to ensure early-failure log messages are captured */
31
43
  const replHelpers = {
32
44
  help: {
33
- report: 'Print a worker status & slice report',
34
- kill: 'Try to kill the worker',
35
- die: 'Kill the worker',
45
+ report: 'Print slice report',
46
+ kill: 'Exit the process immediately',
36
47
  debug: 'Set DCP_DEBUG value and re-initialize debugging scopes',
37
48
  },
38
49
  commands: {
39
- report: printReport,
40
- kill: processExit,
41
- die: () => worker && worker.stop(),
50
+ report: () => reports.printSliceReport(worker),
51
+ kill: (exitCode) => process.exit(Number(exitCode)),
42
52
  debug: (value) => require('dcp/internal/debugging').reinit(value),
43
53
  },
44
54
  };
45
55
  telnetd.init(replHelpers);
46
56
  require('dcp-client').init().then(main).catch(handleUnhandled);
47
57
 
48
- /**
49
- * Output startup banner message. Currently a wrapper for console.log, future plans are to make it
50
- * possible to suppress these messages, and also to send them multiple places (eg tty and syslog)
51
- */
52
- function bannerLog()
58
+ function usage()
53
59
  {
54
- console.log.apply(console, arguments);
55
- bannerLog.cumulative.push(arguments);
60
+ console.log(`
61
+ DCP Worker: Join the DCP network as a compute supplier.
62
+ Copyright (c) 2018-2025, Distributive Corp. Released under the terms of the MIT License.
63
+
64
+ Usage: ${path.basename(__filename)} [options...]
65
+ Options:
66
+ -h, --help Display this help and exit
67
+ --dcp-identity=string Identity key, in hexadecimal
68
+ --dcp-identity=json Identity keystore, in json format
69
+ --dcp-identity=filename Name of file containing identity keystore
70
+ --earnings-account=string Bank account into which to deposit earnings
71
+ --earnings-account=keystore Keystore, in json format, for bank account
72
+ --earnings-account=filename Name of file containing bank account keystore
73
+ -c, --cores=[#cpu][,#gpu] Override detected number of CPU and GPU
74
+ cores available to the evaluator.
75
+ -u, --utilization=[cpu][,gpu] default proportion of CPU,GPU to utilize: eg use
76
+ -u 0.8,0.75 for 80% CPU and 75% GPU utilization
77
+ -m, --max-sandboxes=integer Override the default Maximum number of sandboxes
78
+ -H, --hostname Override evaluator hostname
79
+ -p, --port Override evaluator port number
80
+ -o, --output=mode Set the console mode: text, dashboard, or none
81
+ -l, --logger=type Add an additional logger: syslog, logfile (all
82
+ platforms), event-log (win32)
83
+ --logfile=filename Change filename, implies --logger=logfile
84
+ --overwrite-logfile Overwrite previous log files instead of appending
85
+ --syslog=url Change syslog url, implies --logger=syslog. Syslog
86
+ facility (eg. local7) is the URL pathname.
87
+ --syslog=facility Change syslog facility, implies --logger=syslog.
88
+ --min-level=[logger,]level Set the lowest level of log sent to a given logger
89
+ --min-level=[console,]level Set the lowest level of log sent to the console
90
+ -j, --job Restrict worker to a specific set of jobs. Use more
91
+ than once for multiple jobs.
92
+ -g, --join= Join a compute group; the format is
93
+ "joinKey,joinSecret" or "joinKey,eh1-joinHash".
94
+ Use more than once to join multiple groups.
95
+ -a, --allow-origin=origin Grant all jobs network access to specified origin
96
+ Use more than once for multiple origins.
97
+ --zero-trust Do not load configuration data from scheduler
98
+ --no-global Do not fetch work from the Global Compute Group
99
+ --show= Show configuration data and exit. Valid arguments:
100
+ - worker-id: the worker id
101
+ - identity: the address of the active DCP identity
102
+ - earnings-account: where funds will be deposited
103
+ - owner: the address of the registered owner
104
+ - allowed-origins: origins in origin manager
105
+ - compute-groups: credentials for task requests
106
+ - config: entire worker.config
107
+ - <dot-path>: a property of worker.config
108
+ --kvin | --json Use KVIN or JSON for show output
109
+ --startup-delay=num Delay for num seconds during startup
110
+ --startup-delay-rnd=num Delay for a random amount of time up to num seconds
111
+ --disable-httpd Disable web interface (${httpd.getConfig().listen})
112
+ --register=key Use the given key to register this worker with your
113
+ Distributive portal account and exit
114
+ --unmanaged Do not report statistics to or read configuration
115
+ from scheduler; implies --no-claim-earnings
116
+ --no-claim-earnings Do not transfer unclaimed earnings to the earnings
117
+ account when the worker starts
118
+ --claim-earnings= Transfer unclaimed earnings to the specified
119
+ account and exit
120
+ -f, --pidfile=filename Specify the location of the pid file
121
+ --no-pidfile Do not write the pid file
122
+ -v, --verbose Enable verbose output. Use more than once to
123
+ increase verbosity.
124
+ `);
56
125
  }
57
- bannerLog.cumulative = [];
58
126
 
59
- function parseCliArgs()
127
+ /**
128
+ * Generator which yields { option, optarg } represening the command-line options in the supplied argv.
129
+ *
130
+ * This program has long options that don't have a short option equivalent but posix-getopt doesn't
131
+ * support this, so we do a little bit of extra processing here. Each * in the optionsDefn becomes a
132
+ * unique character above \u1000 for the library to use, and then we disguise this fact from the user
133
+ * and any programmer not maintaining this function.
134
+ */
135
+ function* parseCliOptions(argv)
60
136
  {
61
- var defaultPidFileName;
62
-
63
- defaultPidFileName = require('../lib/pidfile').getDefaultPidFileName(dcpConfig.worker.pidfile);
64
-
65
- const cliArgs = require('dcp/cli')
66
- .base('DCP Worker: Request work from the DCP Scheduler and send it to DCP Evaluators')
67
- .options({
68
- paymentAddress: {
69
- describe: 'The address to deposit funds into, will use the default bank keystore if not provided.',
70
- type: 'string',
71
- },
72
- cores: {
73
- alias: 'c',
74
- describe: 'Number of CPU and GPU cores to work with: the format is 7,1 (or just 7) for 7 CPU cores and 1 GPU',
75
- type: 'string',
76
- },
77
- utilization: {
78
- alias: 'u',
79
- describe: 'default proportion of CPU,GPU to utilize: the format is -u 0.8,0.75 for 80% CPU and 75% GPU utilization',
80
- type: 'string',
81
- },
82
- maxSandboxes: {
83
- alias: 'm',
84
- describe: 'Maximum number of sandboxes',
85
- type: 'number',
86
- default: undefined,
87
- },
88
- verbose: {
89
- alias: 'v',
90
- describe: 'Enable verbose output',
91
- type: 'count',
92
- default: false,
93
- group: 'Output options',
94
- },
95
- outputMode: {
96
- alias: ['o', 'output'],
97
- describe: 'Set the output mode',
98
- type: 'string',
99
- default: 'detect',
100
- choices: ['detect', 'console', 'dashboard', 'event-log', 'syslog', 'logfile'],
101
- group: 'Output options',
102
- },
103
- hostname: {
104
- alias: 'H',
105
- describe: 'Evaluator hostname',
106
- type: 'string',
107
- default: dcpConfig.evaluator.location.hostname,
108
- },
109
- port: {
110
- alias: 'p',
111
- describe: 'Evaluator port',
112
- type: 'number',
113
- default: Number(dcpConfig.evaluator.location.port),
114
- },
115
- 'job-id': {
116
- alias: 'j',
117
- hidden: true,
118
- describe: 'Restrict worker to a specific job (use N times for N jobs)',
119
- type: 'array',
120
- },
121
- join: {
122
- alias: 'g',
123
- hidden: true,
124
- describe: 'Join compute group; the format is "joinKey,joinSecret" or "joinKey,eh1-joinHash"',
125
- type: 'array'
126
- },
127
- joinKeystore: {
128
- hidden: true,
129
- /* future */
130
- },
131
-
132
- leavePublicGroup: {
133
- type: 'boolean',
134
- hidden: true,
135
- describe: 'Do not fetch slices from global compute group.',
136
- default: undefined,
137
- },
138
-
139
- publicGroupFallback: {
140
- hidden: true,
141
- describe: 'If set, worker will prefer private groups but fall back on the global group if no preferred work is available',
142
- type: 'boolean',
143
- default: undefined,
144
- defaultDescription: undefined,
145
- },
146
-
147
- identityKey: {
148
- hidden: true,
149
- describe: 'Identity key, in hex format',
150
- type: 'string',
151
- group: 'Identity options',
152
- },
153
- identityKeystore: {
154
- hidden: true,
155
- describe: 'Identity keystore, in json format',
156
- type: 'string',
157
- group: 'Identity options',
158
- },
159
-
160
- reportInterval: {
161
- describe: 'If set, output a status summary every [interval] seconds in console output mode',
162
- type: 'number',
163
- group: 'Output options',
164
- },
165
-
166
- showConfig: {
167
- hide: false,
168
- describe: 'Show worker config',
169
- type: 'boolean',
170
- },
171
-
172
- logfile: {
173
- describe: 'Path to log file',
174
- type: 'string',
175
- group: 'Log File output options',
176
- default: path.resolve('../log/dcp-worker.log'),
177
- },
178
- syslogAddress: {
179
- describe: 'Address of syslog server',
180
- type: 'string',
181
- group: 'Syslog output options',
182
- default: 'loghost', // Unix standard for syslog
183
- },
184
- syslogFacility: {
185
- describe: 'Name of syslog facility',
186
- type: 'string',
187
- group: 'Syslog output options',
188
- default: 'local7',
189
- },
190
- syslogTransport: {
191
- describe: 'Transport to connect to use for syslog',
192
- type: 'string',
193
- choices: ['udp','tcp','unix','tls'],
194
- group: 'Syslog output options',
195
- default: 'udp',
196
- },
197
- syslogPort: {
198
- describe: 'UDP/TCP port to use for syslog',
199
- type: 'number',
200
- group: 'Syslog output options',
201
- default: 514,
202
- },
203
-
204
- allowedOrigins: {
205
- alias: 'a',
206
- describe: 'modify the \'any\' allow origins of dcpConfig',
207
- type: 'array'
208
- },
209
-
210
- watchdogInterval: {
211
- alias: 'W',
212
- describe: 'Number of milliseconds between watchdog cycles',
213
- type: 'number',
214
- hidden: true,
215
- },
216
- dumpConfig: {
217
- describe: 'If set, dump the configuration and exit',
218
- type: 'boolean',
219
- hidden: true,
220
- },
221
- pidFile: {
222
- alias: 'f',
223
- describe: `create a .pid file for the worker; value overrides default location (${defaultPidFileName})`,
224
- normalize: true
225
- },
226
- })
227
- .strict()
228
- .wrap(process.stdout.columns || 80)
229
- .argv;
230
-
231
- return cliArgs;
232
- }
137
+ const { BasicParser } = require('posix-getopt');
138
+ var parser;
139
+ var longOptCount = 0;
140
+ var optionsDefn = ''
141
+ + 'h(help)*:(earnings-account)*:(identity-keystore)'
142
+ + 'c:(cores)u:(utilization)m:(max-sandboxes)H:(hostname)p:(port)'
143
+ + 'o:(output)l:(logger)*:(logfile)*(overwrite-logfile)*:(syslog)*:(min-level)j:(job)g:(join)'
144
+ + 'a:(allow-origin)*(zero-trust)*(no-global)*(kvin)*(json)*:(show)*(unmanaged)'
145
+ + '*(no-claim-earnings)*:(register)*:(claim-earnings)*(reset)f:(pidfile)*(no-pidfile)v(verbose)';
146
+
147
+ optionsDefn = optionsDefn /* patchup long opt * to > \u1000 chars */
148
+ .match(/[A-Za-z*]:?(\([^)]*\))?/g)
149
+ .map(s => s[0] === '*' ? String.fromCharCode(++longOptCount + 0x1000) + s.slice(1) : s).join('');
150
+ const optionChars = optionsDefn.match(/[A-Za-z\u1000-\u1100/](:?\([^)]*\))?/g).map(s=>s[0]);
151
+ if (optionChars.length !== utils.uniq(optionChars).length)
152
+ throw new Error('getopt - option character used more than once');
153
+
154
+ const psw = process.stderr.write;
155
+ process.stderr.write = function patchupGetoptErrorOutput(s, ...args) {
156
+ /* posix-getopt library has useless chr for long opts when stderr.write('option requires an argument -- ' + chr + '\n'); */
157
+ if (s.startsWith('option requires an argument -- '))
158
+ s = `option ${argv[parser.optind() - 1]} requires an argument\n`;
159
+ psw.call(process.stderr, s, ...args);
160
+ };
161
+ parser = new BasicParser(optionsDefn, argv);
233
162
 
234
- // Imperfect, but handles CG { joinKey, joinHash }.
235
- function isHash(b) {
236
- return b && b.length === 68 && b.startsWith('eh1-');
237
- }
163
+ for (let opthnd, lastOpt=2; (opthnd = parser.getopt()); lastOpt = parser.optind())
164
+ {
165
+ let option;
238
166
 
239
- /**
240
- * Replacement for process.exit() that tries to increase the probability
241
- * that remote log messages will make it out over the network.
242
- */
243
- function processExit()
244
- {
245
- logClosing('debug', 'Exit Code:', process.exitCode || 0);
246
- if (console.close)
247
- console.close();
248
- setImmediate(() => {
249
- process.exit.apply(null, arguments);
250
- });
167
+ /* To fake long-only-opts, we use short opts >= \u1000), and present only the long opt below */
168
+ if (opthnd.option < '\u1000')
169
+ option = opthnd.option;
170
+ else
171
+ option = (Object.entries(parser.gop_aliases).filter(longShort => longShort[1] === opthnd.option)[0] || ['?'])[0];
172
+
173
+ yield { option, optarg: opthnd.optarg, argvSegment: argv.slice(lastOpt, parser.optind()) };
174
+ if (opthnd.error)
175
+ break;
176
+ }
177
+
178
+ process.stderr.write = psw; // eslint-disable-line require-atomic-updates
251
179
  }
252
180
 
253
181
  /**
254
- * Main program entry point. Assumes DCP client is already initialized and console logging is ready.
182
+ * Phase 1 options are options which MUST be parsed before the worker is instantiated or which affect
183
+ * the phase 2 options' operation. Unrecognized options are assumed to be phase 2 options.
184
+ * - evaluator location
185
+ * - logging details
186
+ * - console type
187
+ * - configuration data
188
+ * - dcpConfig.worker
189
+ * - program.argv
190
+ *
191
+ * Important - need to pay close attention to error/exit handling in phase1 because the log redirection
192
+ * and console capturing doesn't happen until afterwards.
193
+ *
194
+ * @param {Array} argv argument vector to process
195
+ * @param {object} operatingMode major settings for this application, can be mutated by this function
196
+ * @returns {object} with properties:
197
+ * - evaluatorOptions - options for setting up the evaluator
198
+ * - loggingOptions - options for setting up the logger(s)
199
+ * - remainArgv - arguments which were not processed (handled in phase 2)
255
200
  */
256
- async function main()
201
+ function processCliOptsPhase1(argv, operatingMode)
257
202
  {
258
- const wallet = require('dcp/wallet');
259
- const identity = require('dcp/identity');
260
- const { DistributiveWorker } = require('dcp/worker');
261
- var workerConfig = {};
262
- const cliArgs = parseCliArgs();
263
- telnetd.setMainEval(function mainEval() { return eval(arguments[0]) }); // eslint-disable-line no-eval
264
- require('../lib/startWorkerLogger').init(cliArgs); /* Start remote logger as early as possible */
265
-
266
- /* Process any identity overrides and then establish our identity. */
267
- identity.interactive = false;
268
- if (cliArgs.identityKey || cliArgs.identityKeystore)
269
- await identity.set(cliArgs.identityKey || cliArgs.identityKeystore);
270
- await identity.login('$login');
271
-
272
- /* Eagerly create a worker object and mutate its config property to reflect argv before starting it. */
273
- const sawOptions = {
274
- hostname: cliArgs.hostname,
275
- port: cliArgs.port
203
+ const loggingOptions = {
204
+ minLevel: { all: undefined }, /* minimum logging levels for console and per logger */
205
+ loggers: [], /* names of loggers (places logs go besides console) */
206
+ syslogUrl: dcpConfig.worker.logging?.syslog?.url || new URL('udp://localhost:514/local7'),
207
+ logfile: path.resolve(path.dirname(require.main.filename), dcpConfig.worker.logging?.logfile || '../log/dcp-worker.log'),
276
208
  };
277
- if (typeof dcpConfig.worker.cleanupTimeout !== 'number')
278
- dcpConfig.worker.cleanupTimeout = 60;
279
- const SandboxConstructor = require('dcp-client/lib/standaloneWorker').workerFactory(sawOptions);
280
-
281
- process.on('SIGINT', handleSigDeath);
282
- process.on('SIGTERM', handleSigDeath);
283
- process.on('SIGQUIT', handleSigDeath);
284
- process.on('unhandledRejection', handleUnhandled);
285
- process.on('uncaughtException', handleUnhandled);
286
-
287
- if (cliArgs.paymentAddress)
288
- workerConfig.paymentAddress = new wallet.Address(cliArgs.paymentAddress);
289
- else if (typeof dcpConfig.worker.paymentAddress === 'string')
290
- workerConfig.paymentAddress = new wallet.Address(workerConfig.paymentAddress);
291
- else if (!workerConfig.paymentAddress)
292
- workerConfig.paymentAddress = (await wallet.get({ oAuth: false })).address;
293
-
294
- if (cliArgs.pidFile)
295
- require('../lib/pidfile').write(cliArgs.pidFile);
296
-
297
- if (cliArgs.leavePublicGroup !== undefined)
298
- workerConfig.leavePublicGroup = mkBool(cliArgs.leavePublicGroup);
299
- if (cliArgs.publicGroupFallback !== undefined)
300
- workerConfig.publicGroupFallback = mkBool(cliArgs.publicGroupFallback);
301
-
302
- /* Support magic value used by Windows screensaver configuration /wg June 2023 */
303
- if (workerConfig.leavePublicGroup === 'fallback')
304
- {
305
- workerConfig.publicGroupFallback = true;
306
- workerConfig.leavePublicGroup = undefined;
307
- }
308
-
309
- /* cliArgs.join is the list of compute groups to join */
310
- if (cliArgs.join && cliArgs.join.length)
311
- {
312
- const cliComputeGroups = cliArgs.join
313
- .map((el) => {
314
- /* Map cliArgs.join to give us [{ joinKey, joinSecret/joinHash }...] */
315
- const [a, b] = el.split(',');
316
- return isHash(b) ? { joinKey: a, joinHash: b } : { joinKey: a, joinSecret: b };
317
- })
318
- .filter((el) => el.joinKey); /* Filter out entries with no joinKey */
319
- workerConfig.computeGroups = cliComputeGroups;
320
- }
209
+ const evaluatorOptions = {
210
+ hostname: dcpConfig.evaluator?.location?.hostname,
211
+ port: dcpConfig.evaluator?.location?.hostname,
212
+ };
213
+ const remainArgv = argv.slice(0, 2);
321
214
 
322
- if (cliArgs.jobId)
215
+ for (let { option, optarg, argvSegment } of parseCliOptions(argv))
323
216
  {
324
- workerConfig.jobIds = [];
325
- workerConfig.jobIds.push(...cliArgs.jobId);
217
+ switch(option)
218
+ {
219
+ case 'h':
220
+ usage(); /* needs primoridial console */
221
+ process.exit(exitCodes.normal);
222
+ break;
223
+ case 'zero-trust':
224
+ operatingMode.zeroTrust = true;
225
+ operatingMode.unmanaged = true;
226
+ break;
227
+ case 'unmanaged':
228
+ operatingMode.unmanaged = true;
229
+ break;
230
+ case 'H':
231
+ evaluatorOptions.hostname = optarg;
232
+ break;
233
+ case 'p':
234
+ evaluatorOptions.port = Number(optarg);
235
+ break;
236
+ case 'o':
237
+ if (optarg === 'console')
238
+ optarg = 'stdio'; /* undocumented back-compat synonym */
239
+ operatingMode.consoleType = optarg;
240
+ break;
241
+ case 'l':
242
+ loggingOptions.loggers.push(optarg);
243
+ break;
244
+ case 'syslog':
245
+ loggingOptions.loggers.push('syslog');
246
+ loggingOptions.syslogUrl = utils.makeSyslogUrl(loggingOptions.syslogUrl, optarg);
247
+ break;
248
+ case 'logfile':
249
+ loggingOptions.logfile = optarg;
250
+ loggingOptions.loggers.push('logfile');
251
+ break;
252
+ case 'overwrite-logfile':
253
+ loggingOptions.overwriteLogfile = true;
254
+ break;
255
+ case 'min-level':
256
+ {
257
+ if (optarg.indexOf(',') === -1)
258
+ optarg=`all,${optarg}`;
259
+ const [ which, level ] = optarg.split(',');
260
+ if (which !== 'all' && which !== 'console' && !loggers.exists(which))
261
+ reportUserError(`--min-level: ${which} is neither 'console' nor the name of a logger`);
262
+ if (!logLevels.lookup(level))
263
+ reportUserError(`--min-level: ${level} is not a valid log level (${Object.keys(logLevels).join(', ')})`);
264
+ loggingOptions.minLevel[which] = level;
265
+ break;
266
+ }
267
+ case 'kvin':
268
+ operatingMode.showSerializer = new (require('dcp/internal/kvin')).KVIN();
269
+ break;
270
+ case 'json':
271
+ operatingMode.showSerializer = JSON;
272
+ break;
273
+ case 'v':
274
+ loggingOptions.verbose++;
275
+ break;
276
+ case '?':
277
+ process.exit(exitCodes.error);
278
+ break;
279
+ case 'claim-earnings':
280
+ case 'register':
281
+ operatingMode.zeroTrust = true;
282
+ operatingMode.consoleType = 'stdio';
283
+ /* fallthrough */
284
+ default:
285
+ remainArgv.push.apply(remainArgv, argvSegment);
286
+ break;
287
+ }
326
288
  }
327
289
 
328
- if (cliArgs.allowedOrigins)
329
- {
330
- workerConfig.allowOrigins = { any: [] };
331
- workerConfig.allowOrigins.any.push(...cliArgs.allowedOrigins);
332
- }
290
+ return { evaluatorOptions, loggingOptions, remainArgv };
291
+ }
333
292
 
334
- if (cliArgs.watchdogInterval)
335
- workerConfig.watchdogInterval = cliArgs.watchdogInterval;
293
+ async function processCliOptsPhase2(argv, operatingMode, loggingOptions)
294
+ {
295
+ var pidfileName = dcpConfig.worker.pidfile || pidfile.getDefaultPidFileName();
336
296
 
337
- if (cliArgs.dumpConfig)
297
+ for (let { option, optarg } of parseCliOptions(argv))
338
298
  {
339
- console.log(JSON.stringify(require('dcp/dcp-config'), null, 2));
340
- process.exit();
299
+ switch(option)
300
+ {
301
+ case 'earnings-account':
302
+ try
303
+ {
304
+ if (wallet.isAddress(optarg))
305
+ worker.config.paymentAddress = new wallet.Address(optarg);
306
+ else if (optarg[0] === '{')
307
+ worker.config.paymentAddress = (await new wallet.BankAccountKeystore(optarg)).address;
308
+ else
309
+ {
310
+ const { expandPath } = require('dcp/utils');
311
+ const json = fs.readFileSync(expandPath(optarg), 'utf8').trim();
312
+ worker.config.paymentAddress = (await new wallet.BankAccountKeystore(json)).address;
313
+ }
314
+ }
315
+ catch (error)
316
+ {
317
+ console.error(`invalid earnings account: '${optarg}'`);
318
+ await cleanupThenExit(exitCodes.error);
319
+ }
320
+ break;
321
+ case 'c':
322
+ Object.assign(worker.config.cores, parseResourceOption(optarg));
323
+ break;
324
+ case 'u':
325
+ Object.assign(worker.config.utilization, parseResourceOption(optarg));
326
+ break;
327
+ case 'm':
328
+ worker.config.maxSandboxes = Number(optarg);
329
+ break;
330
+ case 'j':
331
+ if (!worker.config.jobIds)
332
+ worker.config.jobIds = [];
333
+ worker.config.jobIds.push(optarg);
334
+ break;
335
+ case 'g':
336
+ {
337
+ const [joinKey, cred] = optarg.split(',');
338
+ const cg = { joinKey };
339
+ cg[utils.isHash(cred) ? 'joinHash' : 'joinSecret'] = cred;
340
+ worker.config.computeGroups[joinKey] = cg;
341
+ break;
342
+ }
343
+ case 'a':
344
+ worker.originManager.add(optarg, null, null);
345
+ break;
346
+ case 'no-global':
347
+ worker.config.leaveGlobalGroup = true;
348
+ break;
349
+ case 'show':
350
+ operatingMode.show = optarg;
351
+ break;
352
+ case 'startup-delay':
353
+ await a$sleep(Number(optarg));
354
+ break;
355
+ case 'startup-delay-rnd':
356
+ await a$sleep(Number(optarg) * Math.random());
357
+ break;
358
+ case 'f':
359
+ pidfileName = optarg;
360
+ break;
361
+ case 'no-pidfile':
362
+ pidfileName = false;
363
+ break;
364
+ case 'unmanaged':
365
+ worker.config.unmanaged = true;
366
+ break;
367
+ case 'no-claim-earnings':
368
+ worker.config.claimEarnings = false;
369
+ break;
370
+ case 'register':
371
+ {
372
+ if (!wallet.isPrivateKey(optarg))
373
+ await reportUserError('invalid registration key');
374
+ try
375
+ {
376
+ const reg = await worker.register(optarg);
377
+ if (reg instanceof Error)
378
+ console.error(reg);
379
+ else
380
+ {
381
+ if (reg.status !== 'registered')
382
+ console.error(`Registration failure: ${reg.status}`);
383
+ else
384
+ {
385
+ console.log(`Registration complete; worker ${worker.id} now managed by ${reg.owner}`);
386
+ if (pidfileName)
387
+ await require('../lib/pidfile').signal(pidfileName, 'SIGQUIT', 30, 'stopping worker on pid %i');
388
+ }
389
+ }
390
+ }
391
+ catch(error)
392
+ {
393
+ console.log('Registration failure:', error.message);
394
+ debug('dcp-worker:registration')(error);
395
+ }
396
+ await cleanupThenExit(exitCodes.normal);
397
+ break;
398
+ }
399
+ case 'claim-earnings':
400
+ {
401
+ if (!wallet.isAddress(optarg))
402
+ await reportUserError('invalid bank account');
403
+ const earningsAccount = new wallet.Address(optarg);
404
+ const res = await DistributiveWorker.transferUnclaimedEarnings(earningsAccount);
405
+ if (res.payload)
406
+ console.log(`Claimed ${res.payload} ⊇`);
407
+ else
408
+ {
409
+ debug('dcp-worker:transfer')(res.payload);
410
+ if (res.payload instanceof require('dcp/protocol').ErrorPayload)
411
+ console.error(res.payload.message);
412
+ else
413
+ {
414
+ const error = new Error(`error transferring unclaimed earnings to ${earningsAccount}`);
415
+ if (res.payload.code)
416
+ error.message += ` (${res.payload.code})`;
417
+ if (res.payload.stack)
418
+ {
419
+ error.stack = error.stack
420
+ + `\n -------------------- [${dcpConfig.bank.services.bankTeller.location}]\n`
421
+ + res.payload.stack;
422
+ }
423
+ throw error;
424
+ }
425
+ }
426
+ await cleanupThenExit(exitCodes.normal);
427
+ break;
428
+ }
429
+ case 'disable-httpd':
430
+ operatingMode.startHttpd = false;
431
+ break;
432
+ default:
433
+ throw new Error(`unhandled option '${option}'`);
434
+ /* hidden / undocumented options below */
435
+ case 'slice-report-interval':
436
+ loggingOptions.sliceReportInterval = Number(optarg);
437
+ break;
438
+ case 'reset': /* can lose unclaimed earnings */
439
+ DistributiveWorker.resetLocalStorage();
440
+ console.info('reset complete');
441
+ await cleanupThenExit(exitCodes.normal);
442
+ break;
443
+ }
341
444
  }
342
445
 
343
- worker = new DistributiveWorker(workerConfig, SandboxConstructor);
344
- workerConfig = null; /* bug-finder */
345
- processCoresAndMaxSandboxes(worker.config, cliArgs);
446
+ return { pidfileName };
447
+ }
448
+
449
+ /**
450
+ * Merge the configuration in the database for this program into the cli-derived options.
451
+ */
452
+ async function mergeDbAppConfig(remainArgv, operatingMode, loggingOptions)
453
+ {
454
+ const workerOpaqueId = DistributiveWorker.obtainWorkerId();
455
+ const db = await DistributiveWorker.jnuConnect();
346
456
 
347
- if (cliArgs.showConfig)
457
+ const cursor = await db.select({ table: 'w$dcpWorkerAppConfig', prototype: { workerOpaqueId } });
458
+ const appConfig = await cursor.getFirst();
459
+ if (!appConfig)
348
460
  {
349
- console.log(JSON.stringify(worker.config, null, 2));
350
- process.exit();
461
+ if (loggingOptions.verbose)
462
+ console.log(' . no remote application config at', db.config.location.origin);
463
+ return;
351
464
  }
352
-
353
- bannerLog(` * Starting Distributive Worker ${worker.id}`);
354
- bannerLog(` . Using evaluator at ${dcpConfig.evaluator.location}`);
355
- bannerLog(` . Configured for scheduler ${dcpConfig.scheduler.location}`);
356
- bannerLog(` . Bank is ${dcpConfig.bank.location}`);
357
- bannerLog(` . Earned funds will be deposited in account ${worker.config.paymentAddress}`);
358
- bannerLog(` . Identity is ${identity.get()?.address}`);
359
-
360
- worker.on('warning', (...payload) => console.warn (...payload));
361
- worker.on('stop', () => { console.log('Worker is stopping') });
362
- worker.on('end', () => { logClosing('log', 'Worker has stopped') });
363
- worker.on('job', job => console.log(` . Job: ${job.name} ${job.id.slice(0,8)} ${job.description || ''} ${job.link || ''}`));
364
-
365
- // Display clean diagnostic when not debugging and env var
366
- // DCP_SUPERVISOR_DEBUG_DISPLAY_MAX_INFO isn't set.
367
- worker.on('error', (error) => {
368
- if (displayMaxDiagInfo())
369
- console.error(error);
465
+ if (loggingOptions.verbose)
466
+ console.log(' . loaded remote application config from', db.config.location.origin);
467
+
468
+ if (appConfig.syslogUrl)
469
+ remainArgv.unshift(`--syslog=${appConfig.syslogUrl}`);
470
+ if (appConfig.useSyslog)
471
+ remainArgv.unshift('--logger=syslog');
472
+ if (appConfig.useEventLog)
473
+ remainArgv.unshift('--logger=event-log');
474
+ if (appConfig.disableHttpd)
475
+ remainArgv.unshift('--disable-httpd');
476
+ if (appConfig.startupDelay)
477
+ {
478
+ if (appConfig.startupDelayType === 'rnd')
479
+ remainArgv.unshift(`--startup-delay-rnd=${appConfig.startupDelay}`);
370
480
  else
371
- {
372
- const errorCode = error.code ?? error.errorCode;
373
- const location = error.stack.split('\n')[1].replace(/^\s*/,'');
374
- let message = error.message;
481
+ remainArgv.unshift(`--startup-delay=${appConfig.startupDelay}`);
482
+ }
483
+ }
375
484
 
376
- if (errorCode)
377
- message += ` (${error.code})`;
378
- console.error(`Error: ${error.message} ${location}`);
379
- }
380
- });
381
- require('../lib/default-ui-events').hook(worker, cliArgs);
485
+ /**
486
+ * Main program entry point. Assumes DCP client is already initialized and console logging is ready.
487
+ */
488
+ async function main()
489
+ {
490
+ /* handle file-wide dcp-client imports */
491
+ identity = require('dcp/identity');
492
+ wallet = require('dcp/wallet');
493
+ ({ a$sleep } = require('dcp/utils'));
494
+ ({ DistributiveWorker } = require('dcp/worker'));
495
+
496
+ const operatingMode = {
497
+ consoleType: (!usingDebugger && process.stdout.isTTY) ? 'dashboard' : 'stdio', /* what kind of console? stdio, dashboard, none */
498
+ zeroTrust: false, /* truey => do not load config data etc from scheduler */
499
+ showSerializer: false,
500
+ startHttpd: true
501
+ };
382
502
 
383
- if (cliArgs.outputMode === 'dashboard')
384
- require('../lib/dashboard-tui').init(worker, cliArgs, bannerLog.cumulative);
503
+ identity.interactive = false;
504
+ process.exitCode = exitCodes.lostRef;
385
505
 
386
- /* Let incorrect event-loop references keep us alive when linked with a debug library, but
387
- * exit quickly/accurately for production code even when the library isn't perfect.
506
+ /* Telnetd is started ASAP to make it possible to troubleshoot very early problems. Then set our
507
+ * console type, initialize the loggers, and finally create a worker object and mutate its config to
508
+ * reflect argv before starting it.
388
509
  */
389
- if (require('dcp/build').config.build !== 'debug')
390
- worker.on('end', processExit);
391
- else
392
- worker.on('end', () => setTimeout(processExit, worker.config.cleanupTimeout * 1e3).unref());
510
+ telnetd.setMainEval(function mainEval() { return eval(arguments[0]) }); // eslint-disable-line no-eval
511
+ const {
512
+ remainArgv, /* options not processed in phase1 - pick up in phase2 */
513
+ evaluatorOptions, /* options for sandbox constructor factory */
514
+ loggingOptions, /* options to tell us where logs go besides the console */
515
+ } = processCliOptsPhase1(process.argv, operatingMode);
516
+ await identity.login('$login');
393
517
 
394
- if (worker.config.publicGroupFallback)
518
+ if (dcpConfig.worker.logging?.syslog)
519
+ loggingOptions.loggers.push('syslog');
520
+ if (dcpConfig.worker.logging?.logfile)
521
+ loggingOptions.loggers.push('logfile');
522
+ if (dcpConfig.worker.eventLog)
523
+ loggingOptions.loggers.push('event-log');
524
+ if (!operatingMode.unmanaged)
525
+ await mergeDbAppConfig(remainArgv, operatingMode, loggingOptions);
526
+ loggingOptions.loggers = utils.uniq(loggingOptions.loggers);
527
+ const newConsole = require('../lib/worker-consoles').setConsoleType(operatingMode.consoleType);
528
+ loggers.hook(newConsole, loggingOptions);
529
+
530
+ const SandboxConstructor = require('dcp-client/lib/standaloneWorker').workerFactory(evaluatorOptions);
531
+ worker = new DistributiveWorker(Object.assign({}, dcpConfig.worker, {
532
+ trustScheduler: operatingMode.zeroTrust ? false : dcpConfig.worker.trustScheduler,
533
+ unmanaged: operatingMode.unmanaged ? true : dcpConfig.worker.unmanaged,
534
+ }), SandboxConstructor);
535
+ const { pidfileName } = await processCliOptsPhase2(remainArgv, operatingMode, loggingOptions);
536
+ worker.runInfo.type = 'dcp-worker';
537
+
538
+ if (operatingMode.show)
395
539
  {
396
- if (worker.config.leavePublicGroup)
397
- console.warn(' ! Global Group fallback has been requested, but the global group is blocked by local configuration');
540
+ const output = await require('../lib/show').show(worker, operatingMode.show);
541
+ if (operatingMode.showSerializer)
542
+ console.log(operatingMode.showSerializer.stringify(output, null, 2));
398
543
  else
399
- {
400
- /* Enable global group fallback - this currently works by enabling or disabling the global group
401
- * on the next fetch based on whether or not the most recent fetch was an empty task or not.
402
- */
403
- worker.on('fetch', fetchEventHandler);
404
-
405
- function fetchEventHandler(ev)
406
- {
407
- if (ev instanceof Error)
408
- console.error('Error fetching task:', ev);
409
- else
410
- worker.config.leavePublicGroup = Boolean(slicesFetched(ev) > 0);
411
- }
412
- }
544
+ console.log(output);
545
+ await cleanupThenExit(exitCodes.normal);
413
546
  }
414
547
 
415
- /** @todo i18n */
416
- function qty(amount, singular, plural)
417
- {
418
- if (Array.isArray(amount))
419
- amount = amount.length;
420
- if (!plural)
421
- plural = singular + 's';
422
- if (!amount)
423
- return plural;
424
- if (Number(amount) === 1)
425
- return singular;
426
- return plural;
427
- }
548
+ const sandboxConfig = (new SandboxConstructor()).config;
549
+ console.info(` * Starting Distributive Worker ${worker.id}`);
550
+ console.info(` . Using evaluator at ${utils.shortLoc(sandboxConfig.location)}`);
551
+ console.info(` . Configured for scheduler ${dcpConfig.scheduler.location}`);
552
+ console.info(` . Bank is ${dcpConfig.bank.location}`);
553
+ if (worker.config.paymentAddress)
554
+ console.info(` . Earned funds will be deposited in account ${worker.config.paymentAddress}`);
555
+ else
556
+ console.info(' . Earned funds will be deposited into an unclaimed earnings account - register worker to claim');
557
+ console.info(` . Identity is ${identity.get()?.address}`);
558
+ if (operatingMode.zeroTrust)
559
+ console.info(' ! Zero Trust mode enabled');
560
+ else if (!worker.config.trustScheduler)
561
+ console.info(' ! Scheduler Trust disabled');
562
+
563
+ worker.on('warning', (...payload) => console.warn (...payload));
564
+ worker.on('stop', () => { console.log(' - Worker is stopping') });
565
+ worker.on('end', () => { console.log(' * Worker has stopped') });
566
+ worker.on('job', job => console.log(` . Job: ${job.name} ${job.id.slice(0,8)} ${job.description || ''} ${job.link || ''}`));
567
+ worker.on('claim', amount => console.log(` . transfered ${amount} ⊇ to ${worker.config.paymentAddress}`));
568
+
569
+ require('../lib/default-ui-events').hook(worker, loggingOptions);
570
+ console.setWorker(worker);
428
571
 
429
572
  if (worker.config.jobIds?.length > 0)
430
- bannerLog(` * Processing only ${qty(worker.config.jobIds, 'job')} ` + worker.config.jobIds.join(', '));
573
+ console.info(` * Processing only ${utils.qty(worker.config.jobIds, 'job')} ` + worker.config.jobIds.join(', '));
431
574
  if (worker.config.computeGroups && Object.keys(worker.config.computeGroups).length > 0)
432
- bannerLog(` . Joining compute ${qty(worker.config.computeGroups, 'group')} ` + worker.config.computeGroups.map(el => el.joinKey).join(', '));
433
- if (worker.config.publicGroupFallback)
434
- bannerLog(' . Falling back on global group when preferred groups have no work');
435
- if (worker.config.leavePublicGroup)
436
- bannerLog(' . Leaving the global compute group');
437
-
438
- bannerLog(` . Configured Cores: { cpu: ${worker.config.cores.cpu}, gpu: ${worker.config.cores.gpu} }`);
439
- bannerLog(` . Utilization: { cpu: ${worker.config.utilization.cpu}, gpu: ${worker.config.utilization.gpu} }`);
440
- bannerLog(` . Effective Cores: { cpu: ${worker.config.utilization.cpu * worker.config.cores.cpu}, gpu: ${worker.config.utilization.gpu * worker.config.cores.gpu} }`);
441
- bannerLog(` . Maximum Sandboxes: ${worker.config.maxSandboxes}`);
442
- if (cliArgs.verbose)
443
- bannerLog(` + Verbosity level: ${cliArgs.verbose}`);
444
- if (telnetd.hasOwnProperty('port'))
445
- bannerLog(` ! telnetd listening on port ${telnetd.port}`);
446
-
447
- let workerInfo;
448
- console.throb('log', ` ! Waiting for dcp-evaluator on ${sawOptions.hostname}:${sawOptions.port} to start...`);
449
- for (let i=0; !workerInfo; i++)
575
+ console.info(` . Joining compute ${utils.qty(worker.config.computeGroups, 'group')} ` + Object.values(worker.inspect.computeGroups).map(el => el.joinKey).join(', '));
576
+ if (worker.config.leaveGlobalGroup)
577
+ console.info(' ! Leaving the global compute group');
578
+
579
+ if (loggingOptions.verbose)
580
+ console.info(` + Verbosity level: ${loggingOptions.verbose}`);
581
+
582
+ if (operatingMode.startHttpd)
583
+ await httpd.a$init().catch(e => e);
584
+
585
+ /* Poll the evaluator forever until it accepts a connection */
586
+ let workerInfo = false;
587
+ console.throb(` . Waiting for dcp-evaluator on ${utils.shortLoc(sandboxConfig.location)} to start...`);
588
+ for (let i=0; workerInfo === false; i++)
450
589
  {
451
- workerInfo = await require('../lib/worker-info').getEvaluatorInformation(sawOptions);
452
- if (workerInfo)
590
+ const ts = performance.now();
591
+ if ((workerInfo = await require('../lib/worker-info').getEvaluatorInformation(sandboxConfig)))
453
592
  break;
454
- console.throb();
593
+ if (performance.now() - ts > 100)
594
+ console.throb();
455
595
  await a$sleep(Math.max(i + 1, 10) / 4);
456
596
  console.throb();
457
597
  }
458
598
 
459
599
  if (Object.keys(workerInfo.worktimes).length > 0)
460
600
  {
461
- bannerLog(' . Worktimes Available:');
601
+ console.info(' . Worktimes Available:');
462
602
  for (const wt of workerInfo.worktimes)
463
- bannerLog(` -\t${wt.name}@${wt.versions.join(';')}`);
603
+ console.info(` - ${wt.name}@${wt.versions.join(';')}`);
464
604
  }
465
605
 
466
606
  if (!workerInfo.webgpu.enabled)
467
607
  {
468
- bannerLog(' . WebGPU not enabled');
608
+ console.info(' . WebGPU not enabled');
469
609
  systemStateInfo.gpu = false;
470
610
  }
471
611
  else
472
612
  {
473
- bannerLog(' . WebGPU available. GPU info:');
613
+ console.info(' . WebGPU available. GPU info:');
474
614
  for (let descriptor in workerInfo.webgpu.info)
475
- bannerLog(` -\t${descriptor}: ${workerInfo.webgpu.info[descriptor]}`);
615
+ console.info(` - ${descriptor}: ${workerInfo.webgpu.info[descriptor]}`);
476
616
  systemStateInfo.gpu = Object.assign({}, workerInfo.webgpu.info);
477
- for (let descriptor in worker.config.bannedGPUs)
617
+
618
+ /** @todo move bannedGPU support into DistributiveWorker class /wg jun 2025 */
619
+ const bannedGPUs = Array.isArray(worker.config.bannedGPUs) ? worker.config.bannedGPUs : [ worker.config.bannedGPUs ];
620
+ checkBans: for (let banEntry of bannedGPUs)
478
621
  {
479
- if (worker.config.bannedGPUs[descriptor].test(workerInfo.webgpu.info[descriptor]))
622
+ for (let descriptor in banEntry)
480
623
  {
481
- console.error(' * This GPU is not supported; disabling');
482
- worker.config.cores = { cpu: worker.config.cores.cpu, gpu: 0 };
483
- systemStateInfo.gpu.disabled = true;
484
- break;
624
+ if (banEntry[descriptor].test(workerInfo.webgpu.info[descriptor]))
625
+ {
626
+ console.error(' * This GPU is not supported; disabling');
627
+ worker.config.cores = { cpu: worker.config.cores.cpu, gpu: 0 };
628
+ systemStateInfo.gpu.disabled = true;
629
+ break checkBans;
630
+ }
485
631
  }
486
632
  }
633
+
634
+ /** @todo move GPU type reporting into DistributiveWorker class /wg jun 2025 */
635
+ worker.runInfo.gpuType = workerInfo.webgpu.info.device;
636
+ if (systemStateInfo.gpu.disabled)
637
+ worker.runInfo.gpuType += ' (disabled)';
487
638
  }
488
639
 
489
- bannerLog(' . Supervisor version: ' + worker.supervisorVersion);
490
- bannerLog(' . Output mode: ' + cliArgs.outputMode);
640
+ console.info(` . Utilization Target: cpu: ${String(Math.round(worker.config.utilization.cpu * 100)).padStart(3, ' ')}% `
641
+ + `gpu: ${Math.round(worker.config.utilization.gpu * 100)}%`);
642
+ console.info(` . Configured Cores: cpu: ${String(worker.config.cores.cpu).padEnd(3, ' ')} `
643
+ + ` gpu: ${worker.config.cores.gpu}`);
644
+ console.info(` . Maximum Sandboxes: ${worker.config.maxSandboxes}`);
645
+ console.info(' . Supervisor version: ' + worker.supervisorVersion);
646
+ console.info(' . Console type: ' + operatingMode.consoleType);
647
+ console.info(' . Logging: ' + (loggingOptions.loggers.join(', ') || 'none'));
491
648
 
492
- if (parseFloat(cliArgs.reportInterval))
493
- {
494
- if (cliArgs.outputMode !== 'dashboard')
495
- setInterval(printReport, parseFloat(cliArgs.reportInterval) * 1000).unref();
496
- else
497
- console.warn('Ignoring --reportInterval in dashboard output mode');
498
- }
649
+ if (loggingOptions.sliceReportInterval)
650
+ setInterval(() => reports.printSliceReport(worker), parseFloat(loggingOptions.sliceReportInterval) * 1000).unref();
499
651
 
500
652
  /* Start the worker. Normal process exit happens by virtue of the worker<end> event. Move forward one
501
- * tick in case processExit was called during initialization.
653
+ * tick in case process.exit() was called during initialization. The dance with process.exitCode is so
654
+ * that we can detect when the process exited unexpectedly due to the loss of event loop references.
502
655
  */
503
- setImmediate(async function workerStart() {
504
- bannerLog(' * Ready.\n');
505
- require('../lib/check-scheduler-version').check();
506
- await worker.start();
656
+ if (handleSigDeath.count)
657
+ return;
658
+ require('../lib/check-scheduler-version').check();
659
+
660
+ worker.on('end', () => {
661
+ if (process.exitCode === exitCodes.lostRef)
662
+ process.exitCode = exitCodes.normal;
663
+ cleanupThenExit(exitCodes.normal);
507
664
  });
508
- }
509
-
510
- /**
511
- * Process the cores, utilization and maxSandboxes cli arguments.
512
- *
513
- * cliArgs.cores is the core count of the hardware to use.
514
- * It can be specified with only the cpu or gpu component, or both.
515
- *
516
- * E.g. -c 2,1 => cores = { cpu: 2, gpu: 1 }
517
- * -c 10 => cores = { cpu: 10, gpu: <default> }
518
- * -c ,10 => cores = { cpu: <default>, gpu: 10 }
519
- * -u 0.80,0.75 => utilization = { cpu: 0.80, gpu: 0.75 }
520
- * -m 5 => maxSandboxes = 5
521
- */
522
- function processCoresAndMaxSandboxes (workerConfig, cliArgs)
523
- {
524
- if (typeof cliArgs['maxSandboxes'] !== 'undefined')
525
- workerConfig.maxSandboxes = Number(cliArgs['maxSandboxes']);
526
665
 
527
- const parseArg = (which) => {
528
- if (cliArgs[which])
666
+ worker.on('error', function handleWorkerError(error) {
667
+ if (worker.isWorkerError && worker.isWorkerError(error))
529
668
  {
530
- workerConfig[which] = {};
531
- const [cpu, gpu] = cliArgs[which].split(',');
532
- if (cpu?.length > 0)
533
- workerConfig[which].cpu = Number(cpu);
534
- if (gpu?.length > 0)
535
- workerConfig[which].gpu = Number(gpu);
669
+ // A WorkerError is an error that has been vetted as safe for worker apps.
670
+ if (loggingOptions.verbose > 1)
671
+ console.error('Recoverable', error.code ? `${error.name} (${error.code}):` : `${error.name}:`, error.message);
672
+ else if (process.env.DCP_SUPERVISOR_DEBUG_DISPLAY_MAX_INFO)
673
+ console.error(error.code ? `${error.message};code=${error.code}` : `${error.message}`);
674
+ debug('dcp-worker:error')('Trapped DistributiveWorker<error>:', error);
675
+ return;
536
676
  }
537
- };
677
+ /** @todo Errors from sandbox code need improvement /wg jun 2025 */
678
+ if (/^Received unhandled request from sandbox ring [0-9]. Data:/.exec(error.message))
679
+ {
680
+ const { ServerSideError } = require('dcp/utils');
681
+ const embeddedError = JSON.parse(error.message.slice(error.message.indexOf(' Data:') + 7)).error;
682
+ const ssError = new ServerSideError(embeddedError, dcpConfig.scheduler.location.href, 'dcp-worker');
683
+ error = ssError;
684
+ }
685
+ if (!handleWorkerError.count)
686
+ {
687
+ console.error('Unrecoverable', error.code ? `${error.name} (${error.code})` : error.name,
688
+ '- shutting down the worker:', error.message);
689
+ }
690
+ worker.stop(Boolean(handleWorkerError.count));
691
+ handleWorkerError.count = 1 + (handleWorkerError.count || 0);
692
+ });
538
693
 
539
- parseArg('utilization');
540
- parseArg('cores');
694
+ if (pidfileName && require('../lib/pidfile').write(pidfileName) !== true)
695
+ await cleanupThenExit(exitCodes.pidConflict);
541
696
 
542
- if (debugging())
697
+ if (worker.config.claimEarnings === false && !worker.config.unmanaged)
543
698
  {
544
- console.debug(`dcp-worker: cores = { cpu: ${workerConfig.cores.cpu}, gpu: ${workerConfig.cores.gpu} }`);
545
- console.debug('dcp-worker: utilization =', workerConfig.utilization);
546
- console.debug('dcp-worker: maxSandboxes =', workerConfig.maxSandboxes);
699
+ const amount = await DistributiveWorker.checkUnclaimedEarnings();
700
+ if (amount.gt(0))
701
+ console.note(` ! not claming earnings in the amount of ${amount} ⊇ (can claim later)`);
547
702
  }
548
- }
703
+
704
+ console.info(' * Ready.\n');
705
+ await worker.start();
706
+ httpd.setWorker(worker);
707
+ } /* main() */
549
708
 
550
709
  /**
551
- * Log a closing message (or messages). Since the dashboard clears the screen on exit, we use the
552
- * memoized console property to log the message after we destroy the instance of screen.
710
+ * Parse a cpu,gpu resource option, eg for core count or utiliziation.
711
+ *
712
+ * @param {string} option argument to command-line option given by user. Spaces are ignored.
713
+ * @returns {object} with optional roperties cpu and gpu,
714
+ * @example
715
+ * '2,1' => { cpu: 1, gpu: 1 }
716
+ * ',1' => { gpu: 1 }
717
+ * '2' => { cpu: 2 }
718
+ * '2,0' => { cpu: 2, gpu: 0 }
719
+ * '' => {}
553
720
  */
554
- function logClosing(facility, ...message)
721
+ function parseResourceOption(option)
555
722
  {
556
- var screen = require('../lib/worker-loggers/dashboard').screen;
557
-
558
- if (screen)
559
- {
560
- /* Turn off fullscreen TUI and resume "normal" console logging.
561
- * FUTURE: dashboard API should know how to unregister its hook so that we don't have to clobber
562
- * it here.
563
- */
564
-
565
- try
566
- {
567
- screen.log(...message);
568
- screen.destroy();
569
- screen = false;
570
- console = new (require('console').Console)(process); // eslint-disable-line no-global-assign
571
- telnetd.reintercept();
572
- }
573
- catch(error) {} // eslint-disable-line no-empty
574
- }
575
-
576
- console[facility](...message);
723
+ const [cpu, gpu] = option.replace(/ /g, '').split(',');
724
+ const res = {};
725
+ if (cpu?.length > 0)
726
+ res.cpu = Number(cpu);
727
+ if (gpu?.length > 0)
728
+ res.gpu = Number(gpu);
729
+ return res;
577
730
  }
578
731
 
579
732
  /**
@@ -584,110 +737,81 @@ function logClosing(facility, ...message)
584
737
  */
585
738
  function handleUnhandled(error)
586
739
  {
587
- var _worker = worker;
588
- worker = false;
589
-
590
- process.exitCode = process.exitCode || EXIT_UNHANDLED;
591
- logClosing('error', 'trapped unhandled', error);
592
-
593
- if (_worker)
740
+ if (!error.request || !error.code?.startsWith('EHTTP_'))
741
+ console.error(' *** trapped unhandled', error);
742
+ else
594
743
  {
595
- setTimeout(() => {
596
- logClosing('error', 'handleFatalError timeout - exiting now');
597
- processExit();
598
- }, _worker.config.cleanupTimeout * 1e3).unref();
599
- _worker.on('end', processExit);
600
- _worker.stop();
744
+ console.error(' *** trapped unhandled XHR error', error.message);
745
+ /* appears to be from XHR via justFetch etc */
746
+ console.log(require('dcp/utils').justFetchPrettyError(error));
747
+ delete error.request;
748
+ delete error.response;
601
749
  }
602
750
 
751
+ process.off('unhandledRejection', handleUnhandled);
752
+ process.off('uncaughtException', handleUnhandled);
753
+
603
754
  try
604
755
  {
605
- let log = process.env.DCP_WORKER_UNHANDLED_REJECTION_LOG || worker?.config?.unhandledRejectionLog;
756
+ let log = process.env.DCP_WORKER_UNHANDLED_ERROR_LOG || worker?.config?.unhandledErrorLog;
606
757
  if (log)
607
- fs.appendFileSync(process.env.DCP_WORKER_UNHANDLED_REJECTION_LOG,
758
+ fs.appendFileSync(process.env.DCP_WORKER_UNHANDLED_ERROR_LOG,
608
759
  `${Date.now()}: ${error.message}\n${error.stack}\n\n`);
609
- } catch(e) {} // eslint-disable-line no-empty
610
- }
760
+ } catch(e) {} // eslint-disable-line @distributive/brace-style,no-empty
611
761
 
612
- /** print the slice report via console.log */
613
- function printReport()
614
- {
615
- console.log(sliceReport());
762
+ cleanupThenExit(exitCodes.unhandled);
616
763
  }
617
764
 
618
765
  /**
619
- * Convert a timespan in ms to a human-readable interval in minutes and seconds
766
+ * Cause the process to exit ASAP, but take the time to gracefully unhook loggers so that log messages
767
+ * make it out. Gives up and exits forcefully after cleanupTimeout seconds.
620
768
  *
621
- * @param {number} el Milliseconds to convert
622
- * @return {string} Timespan formatted as `m:ss`
769
+ * @param {number} exitCode - the desired exit code of the dcp-worker process
770
+ * @returns {Promise} - a promise which never resolves, can be used to "block" execution of the current
771
+ * "thread" while the console and loggers are being unhooked.
623
772
  */
624
- function toInterval(el)
773
+ function cleanupThenExit(exitCode)
625
774
  {
626
- const m = Math.floor((el / 1000) / 60).toString(10);
627
- const s = Math.floor((el / 1000) % 60).toString(10).padStart(2, '0');
628
- return `${m}:${s}`;
629
- }
630
-
631
- /** retrieve a slice report screen */
632
- function sliceReport()
633
- {
634
- let report = '';
635
-
636
- report += ('='.repeat(78)) + '\n';
637
-
638
- const sbStates = {
639
- WORKING: 0,
640
- ASSIGNED: 0,
641
- READY: 0,
642
- TERMINATED: 0,
643
- };
644
- const stateNames = {
645
- WORKING: 'Working',
646
- ASSIGNED: 'Assigned',
647
- READY: 'Ready',
648
- TERMINATED: 'Terminated',
649
- };
650
- worker.sandboxes.forEach(sb => {
651
- const { state } = sb;
652
- if (!sbStates[state])
653
- sbStates[state] = 0;
654
- sbStates[state]++;
655
- });
775
+ debug('dcp-worker:exit')('cleanup and exit, code', exitCode);
776
+ if (cleanupThenExit.busy)
777
+ {
778
+ if (!process.exitCode)
779
+ process.exitCode = exitCode;
780
+ debug('dcp-worker:exit')('exit race, bailing on cleanup for exit code', exitCode);
781
+ return;
782
+ }
656
783
 
657
- report += (Date()) + '\n';
658
- report += ('Sandboxes:') + '\n';
659
- Object.keys(sbStates).forEach(state => {
660
- const stateName = stateNames[state] || state;
661
- report += (` ${(stateName + ':').padEnd(12)} ${sbStates[state]}`) + '\n';
662
- })
663
- report += (` * ALL: ${worker.sandboxes.length}`) + '\n';
664
-
665
- report += ('Progress:') + '\n';
666
- worker.workingSandboxes.forEach(sb => {
667
- const jobName = sb.job?.public?.name || `idek (${sb.jobId})`;
668
- let el = Date.now() - sb.sliceStartTime;
669
- const t = el < 1000000
670
- ? toInterval(el)
671
- : 'new';
672
-
673
- el = sb.progressReports && sb.progressReports.last
674
- ? Date.now() - (sb.sliceStartTime + (sb.progressReports.last?.timestamp ?? 0))
675
- : 0;
676
- const pct = (typeof sb.progress) === 'number'
677
- ? `${Number(sb.progress).toFixed(0).padStart(2)}%`
678
- : 'ind';
679
- const stale = (el < 2000) ? '' : `(stale: ${toInterval(el)})`;
680
-
681
- report += (` ${String(sb.id).padStart(4)}: ${sb.jobId} ${jobName.padEnd(34)} `+ `${t} ${pct} ${stale}`.padStart(13)) + '\n';
682
- });
784
+ worker = false;
785
+ cleanupThenExit.busy = true;
786
+ process.exitCode = exitCode;
683
787
 
684
- report += ('Slices:') + '\n';
685
- report += (` working: ${worker.workingSlices.length}`) + '\n';
686
- report += (` queued: ${worker.queuedSlices.length}`) + '\n';
788
+ /* Final event loop reference; keeps the process alive while the loggers unhook */
789
+ setTimeout(() => {
790
+ console.warn(' *** logger cleanup timeout expired ***');
791
+ process.exit();
792
+ }, 10 * 1e3).ref();
687
793
 
688
- report += ('='.repeat(78)) + '\n';
794
+ try
795
+ {
796
+ process.off('SIGESC', handleSigDeath);
797
+ process.off('SIGINT', handleSigDeath);
798
+ process.off('SIGTERM', handleSigDeath);
799
+ process.off('SIGQUIT', handleSigDeath);
800
+ process.off('unhandledRejection', handleUnhandled);
801
+ process.off('uncaughtException', handleUnhandled);
802
+ if (console.close)
803
+ console.close();
804
+ loggers.unhook()
805
+ .catch(error => { console.error('Error unhooking loggers:', error) })
806
+ .finally(() => process.exit());
807
+ }
808
+ catch(error)
809
+ {
810
+ console.error('Cleanup error:', error);
811
+ process.exit(exitCodes.unhandled);
812
+ }
689
813
 
690
- return report;
814
+ return new Promise(()=>1);
691
815
  }
692
816
 
693
817
  /**
@@ -696,53 +820,49 @@ function sliceReport()
696
820
  * - unregistering the signal handler and replacing it with a call to process.exit() after
697
821
  * two tries.
698
822
  * -> this allows a third signal to forcibly terminate the process
699
- * - set a long timeout (dcpConfig.worker.cleanupTimeout seconds), after which the process
700
- * exits forcibly with a non-zero exit code (unix standard for various signals)
823
+ * - set a long timeout (cleanupTimeout seconds), after which the process exits forcibly with a
824
+ * non-zero exit code (unix standard for various signals)
701
825
  * - the worker is soft-stopped after the first signal if it is not SIGQUIT but hard stopped
702
826
  * all other times.
703
827
  */
704
- function handleSigDeath(signalName, signal)
828
+ async function handleSigDeath(signalName, signal)
705
829
  {
830
+ const exitCode = signal ? signal - 128 /* unix standard */ : exitCodes.normal;
706
831
  handleSigDeath.count = Number(handleSigDeath.count || 0) + 1;
707
-
708
832
  if (handleSigDeath.count > 2)
709
833
  {
710
834
  process.on(signalName, () => {
711
835
  console.error(signalName);
712
- process.exit(signal - 128)
836
+ cleanupThenExit(exitCode);
713
837
  });
714
838
  }
715
839
 
716
- if (!worker)
840
+ const warning = signal
841
+ ? `\ntrapped ${signalName}, signal ${signal}`
842
+ : `\ntrapped ${signalName === 'SIGESC' ? '<esc>' : signalName}`;
843
+ if (!worker || !worker.workingSandboxes.length)
717
844
  {
718
- console.warn(`trapped ${signalName}, signal ${signal}`);
719
- die();
845
+ console.warn(warning);
846
+ await cleanupThenExit(exitCode);
847
+ return;
720
848
  }
721
849
  else
722
850
  {
723
851
  const immediate = signalName === 'SIGQUIT' || handleSigDeath.count > 1;
724
- console.warn(`trapped ${signalName}, signal ${signal} -- stopping worker`,
725
- immediate ? 'immediately' : `after ${handleSigDeath.count} slices have finished`);
726
- worker.stop(immediate);
727
- setTimeout(die, worker.config.cleanupTimeout * 1e3).unref();
728
- }
729
- process.emit('dcpBeforeExit');
730
-
731
- function die()
732
- {
733
- processExit(signal - 128);
852
+ console.warn(`${warning} -- stopping worker`,
853
+ immediate
854
+ ? 'immediately'
855
+ : `after ${worker.workingSandboxes.length} slices have finished`);
856
+ worker.stop(immediate); /* DistributiveWorker<end> triggers cleanupThenExit */
857
+ worker.removeAllListeners('warning');
734
858
  }
735
859
 
736
860
  if (handleSigDeath.count === 3)
737
- die();
861
+ await cleanupThenExit(exitCode);
738
862
  }
739
- globalThis.die = () => handleSigDeath('QUIT', 15);
740
863
 
741
- /**
742
- * Cast b to boolean such that 'false' becomes false, falsey things become false, and everything else
743
- * becomes true.
744
- */
745
- function mkBool(b)
864
+ function reportUserError(message)
746
865
  {
747
- return Boolean(b) && (b !== 'false');
866
+ console.error(message);
867
+ return cleanupThenExit(exitCodes.userError);
748
868
  }