dcp-worker 4.3.8 → 4.4.1
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/bin/dcp-evaluator-manager +2 -0
- package/bin/dcp-worker +197 -101
- package/lib/default-ui-events.js +3 -3
- package/lib/pidfile.js +7 -3
- package/lib/show.js +7 -16
- package/lib/telnetd.js +1 -1
- package/lib/utils.js +212 -9
- package/lib/web-iface.js +495 -91
- package/lib/worker-consoles/dashboard-console.js +8 -1
- package/lib/worker-consoles/stdio-console.js +9 -2
- package/package.json +7 -8
- package/www/admin/index.html +22 -0
- package/www/admin/manage-worker.html +19 -0
- package/www/admin/register-worker.html +334 -0
- package/www/hud/dark.css +10 -0
- package/www/hud/hud-common.css +19 -0
- package/www/hud/hud-common.mjs +69 -0
- package/www/hud/index.html +46 -0
- package/www/hud/small-dark.html +348 -0
- package/lib/web-console.js +0 -178
|
@@ -147,9 +147,11 @@ async function listenForConnections(config)
|
|
|
147
147
|
{
|
|
148
148
|
console.log('Using dynamic port', server.address().port);
|
|
149
149
|
// Send info by IPC, if the spawn is setup for that. Probably don't need to try/catch.
|
|
150
|
+
/* WHAT IS THIS FOR? /wg feb 2026 */
|
|
150
151
|
if (process.send)
|
|
151
152
|
try { process.send(server.address().port); } catch (e) {}
|
|
152
153
|
}
|
|
154
|
+
|
|
153
155
|
console.log('Evaluator command is', config.proc, config.argv);
|
|
154
156
|
});
|
|
155
157
|
}
|
package/bin/dcp-worker
CHANGED
|
@@ -18,13 +18,14 @@ const process = require('process');
|
|
|
18
18
|
const fs = require('fs');
|
|
19
19
|
const path = require('path');
|
|
20
20
|
const debug = require('debug');
|
|
21
|
+
const assert = require('assert');
|
|
21
22
|
const telnetd = require('../lib/telnetd');
|
|
22
|
-
const httpd = require('../lib/web-
|
|
23
|
+
const httpd = require('../lib/web-iface');
|
|
23
24
|
const pidfile = require('../lib/pidfile');
|
|
24
25
|
const reports = require('../lib/reports');
|
|
25
26
|
const utils = require('../lib/utils');
|
|
26
27
|
const loggers = require('../lib/loggers');
|
|
27
|
-
var wallet,
|
|
28
|
+
var wallet, a$sleep, DistributiveWorker; /* populate in main() */
|
|
28
29
|
|
|
29
30
|
const { exitCodes,
|
|
30
31
|
logLevels } = require('../lib/consts');
|
|
@@ -53,7 +54,7 @@ const replHelpers = {
|
|
|
53
54
|
},
|
|
54
55
|
};
|
|
55
56
|
telnetd.init(replHelpers);
|
|
56
|
-
const dcpClientOptions = { dcpConfig: { worker: { logging: {
|
|
57
|
+
const dcpClientOptions = { dcpConfig: { worker: { logging: { } } } };
|
|
57
58
|
require('dcp-client').init(dcpClientOptions).then(main).catch(handleUnhandled);
|
|
58
59
|
|
|
59
60
|
function usage()
|
|
@@ -70,59 +71,63 @@ Options:
|
|
|
70
71
|
--dcp-identity=filename Name of file containing identity keystore
|
|
71
72
|
--earnings-account=string Bank account into which to deposit earnings
|
|
72
73
|
--earnings-account=keystore Keystore, in json format, for bank account
|
|
73
|
-
--earnings-account=filename
|
|
74
|
+
--earnings-account=filename Keystore filename for bank account
|
|
74
75
|
-c, --cores=[#cpu][,#gpu] Override detected number of CPU and GPU
|
|
75
76
|
cores available to the evaluator.
|
|
76
|
-
-u, --utilization=[cpu][,gpu]
|
|
77
|
-
|
|
78
|
-
-
|
|
79
|
-
-
|
|
80
|
-
-
|
|
81
|
-
-
|
|
82
|
-
|
|
83
|
-
platforms), event-log (win32)
|
|
77
|
+
-u, --utilization=[cpu][,gpu] proportion of CPU,GPU to use; 0..1
|
|
78
|
+
-m, --max-sandboxes=integer Override the maximum number of sandboxes
|
|
79
|
+
-H, --hostname= Override evaluator hostname
|
|
80
|
+
-p, --port= Override evaluator port number
|
|
81
|
+
-o, --output=mode Set the console mode: text|dashboard|none
|
|
82
|
+
-l, --logger=type Add an additional logger: syslog, logfile
|
|
83
|
+
(all platforms), event-log (win32)
|
|
84
84
|
--logfile=filename Change filename, implies --logger=logfile
|
|
85
|
-
--overwrite-logfile Overwrite
|
|
86
|
-
--syslog=url
|
|
87
|
-
|
|
88
|
-
|
|
89
|
-
|
|
90
|
-
--min-level=[logger,]level Set
|
|
91
|
-
--min-level=[console,]level Set
|
|
92
|
-
|
|
93
|
-
|
|
94
|
-
-
|
|
95
|
-
|
|
85
|
+
--overwrite-logfile Overwrite log files instead of appending
|
|
86
|
+
--syslog=url|facility Change syslog target config, implies
|
|
87
|
+
--logger=syslog. Facility (eg. local7) can
|
|
88
|
+
be specified alone or as the pathname
|
|
89
|
+
component of the URL.
|
|
90
|
+
--min-level=[logger,]level Set lowest log level sent to a given logger
|
|
91
|
+
--min-level=[console,]level Set lowest log level sent to the console
|
|
92
|
+
Levels: debug, info, notice, warn, error,
|
|
93
|
+
crit, alert, emerg
|
|
94
|
+
-j, --job Restrict worker to a specific set of jobs.
|
|
95
|
+
Use more than once for multiple jobs.
|
|
96
|
+
-g, --join=joinKey,joinSecret Join a compute group via plaintext secret
|
|
97
|
+
-g, --join=joinKey,eh1-joinHash Join a compute group via hashed secret.
|
|
96
98
|
Use more than once to join multiple groups.
|
|
97
|
-
-a, --allow-origin=origin Grant all jobs network access to
|
|
99
|
+
-a, --allow-origin=origin Grant all jobs network access to origin
|
|
98
100
|
Use more than once for multiple origins.
|
|
99
|
-
--zero-trust Do not load
|
|
100
|
-
|
|
101
|
-
--
|
|
101
|
+
--zero-trust Do not load config data from scheduler,
|
|
102
|
+
send management statistics, etc.
|
|
103
|
+
--no-global Do not join the Global Compute Group
|
|
104
|
+
--show=type Show configuration data and exit. Types:
|
|
102
105
|
- worker-id: the worker id
|
|
103
|
-
- identity:
|
|
104
|
-
- earnings-account:
|
|
105
|
-
- owner:
|
|
106
|
-
- allowed-origins:
|
|
107
|
-
- compute-groups: credentials
|
|
106
|
+
- identity: identity address
|
|
107
|
+
- earnings-account: bank account address
|
|
108
|
+
- owner: registered owner address
|
|
109
|
+
- allowed-origins: data in origin manager
|
|
110
|
+
- compute-groups: credentials in request
|
|
108
111
|
- config: entire worker.config
|
|
109
112
|
- <dot-path>: a property of worker.config
|
|
110
113
|
--kvin | --json Use KVIN or JSON for show output
|
|
111
114
|
--startup-delay=num Delay for num seconds during startup
|
|
112
|
-
--startup-delay-rnd=num Delay for a random
|
|
113
|
-
--disable-httpd Disable
|
|
114
|
-
--
|
|
115
|
-
|
|
116
|
-
--
|
|
115
|
+
--startup-delay-rnd=num Delay for a random time up to num seconds
|
|
116
|
+
--disable-httpd Disable management service
|
|
117
|
+
--management-hostname= Override the management service hostname
|
|
118
|
+
--management-port= Override the management service port
|
|
119
|
+
--register=key[,label] Use the given key to register this worker
|
|
120
|
+
with your Distributive account and exit
|
|
121
|
+
--unmanaged Do not report statistics to or read config
|
|
117
122
|
from scheduler; implies --no-claim-earnings
|
|
118
|
-
--no-claim-earnings Do not transfer unclaimed earnings to the
|
|
119
|
-
|
|
120
|
-
--claim-earnings= Transfer unclaimed earnings to the
|
|
121
|
-
account and exit
|
|
123
|
+
--no-claim-earnings Do not transfer unclaimed earnings to the
|
|
124
|
+
earnings account when the worker starts
|
|
125
|
+
--claim-earnings= Transfer unclaimed earnings to the
|
|
126
|
+
specified account and exit
|
|
122
127
|
-f, --pidfile=filename Specify the location of the pid file
|
|
123
128
|
--no-pidfile Do not write the pid file
|
|
124
|
-
-v, --verbose Enable verbose output. Use more than once
|
|
125
|
-
increase verbosity.
|
|
129
|
+
-v, --verbose Enable verbose output. Use more than once
|
|
130
|
+
to increase verbosity.
|
|
126
131
|
`);
|
|
127
132
|
}
|
|
128
133
|
|
|
@@ -143,13 +148,20 @@ function* parseCliOptions(argv)
|
|
|
143
148
|
+ 'h(help)*:(earnings-account)*:(identity-keystore)'
|
|
144
149
|
+ 'c:(cores)u:(utilization)m:(max-sandboxes)H:(hostname)p:(port)'
|
|
145
150
|
+ 'o:(output)l:(logger)*:(logfile)*(overwrite-logfile)*:(syslog)*:(min-level)j:(job)g:(join)'
|
|
146
|
-
+ 'a:(allow-origin)*(zero-trust)*(no-global)*(kvin)*(json)*:(show)*
|
|
151
|
+
+ 'a:(allow-origin)*(zero-trust)*(no-global)*(kvin)*(json)*:(show)*'
|
|
152
|
+
+ '(unmanaged)*:(management-hostname)*:(management-port)'
|
|
153
|
+
+ '*(no-masquerade)*:(force-worker-id)*:(lookup-worker-id)'
|
|
147
154
|
+ '*(no-claim-earnings)*:(register)*:(claim-earnings)*(reset)f:(pidfile)*(no-pidfile)v(verbose)';
|
|
148
155
|
|
|
156
|
+
const ck = /(\([^)]*\):?){2}/.exec(optionsDefn);
|
|
157
|
+
if (ck)
|
|
158
|
+
throw new Error(`getopt - option character missing at offset ${ck.index} ${ck[1]}`);
|
|
159
|
+
|
|
149
160
|
optionsDefn = optionsDefn /* patchup long opt * to > \u1000 chars */
|
|
150
161
|
.match(/[A-Za-z*]:?(\([^)]*\))?/g)
|
|
151
162
|
.map(s => s[0] === '*' ? String.fromCharCode(++longOptCount + 0x1000) + s.slice(1) : s).join('');
|
|
152
163
|
const optionChars = optionsDefn.match(/[A-Za-z\u1000-\u1100/](:?\([^)]*\))?/g).map(s=>s[0]);
|
|
164
|
+
|
|
153
165
|
if (optionChars.length !== utils.uniq(optionChars).length)
|
|
154
166
|
throw new Error('getopt - option character used more than once');
|
|
155
167
|
|
|
@@ -225,8 +237,10 @@ function processCliOptsPhase1(argv, operatingMode)
|
|
|
225
237
|
case 'zero-trust':
|
|
226
238
|
operatingMode.zeroTrust = true;
|
|
227
239
|
operatingMode.unmanaged = true;
|
|
240
|
+
operatingMode.startHttpd = false;
|
|
228
241
|
break;
|
|
229
242
|
case 'unmanaged':
|
|
243
|
+
operatingMode.startHttpd = false;
|
|
230
244
|
operatingMode.unmanaged = true;
|
|
231
245
|
break;
|
|
232
246
|
case 'H':
|
|
@@ -278,9 +292,29 @@ function processCliOptsPhase1(argv, operatingMode)
|
|
|
278
292
|
case '?':
|
|
279
293
|
process.exit(exitCodes.error);
|
|
280
294
|
break;
|
|
295
|
+
case 'lookup-worker-id':
|
|
296
|
+
operatingMode.lookupWorkerId = [ optarg ].concat(operatingMode.lookupWorkerId);
|
|
297
|
+
break;
|
|
298
|
+
case 'no-masquerade':
|
|
299
|
+
operatingMode.identityMasquerade = false;
|
|
300
|
+
break;
|
|
301
|
+
case 'force-worker-id':
|
|
302
|
+
{
|
|
303
|
+
const { schedulerConstants } = require('dcp/utils');
|
|
304
|
+
const idRe = new RegExp(`^[${schedulerConstants.workerIdAlphabet}]*`);
|
|
305
|
+
const match = optarg.match(idRe);
|
|
306
|
+
if (match[0].length !== optarg.length)
|
|
307
|
+
reportUserError(`--force-worker-id: invalid character at position ${match[0].length + 1}`);
|
|
308
|
+
else if (optarg.length !== schedulerConstants.workerIdLength)
|
|
309
|
+
reportUserError('--force-worker-id: invalid length');
|
|
310
|
+
DistributiveWorker.obtainWorkerId = () => optarg;
|
|
311
|
+
break;
|
|
312
|
+
}
|
|
313
|
+
/* options below this point are processed in both phase 1 and phase 2 */
|
|
281
314
|
case 'claim-earnings':
|
|
282
315
|
case 'register':
|
|
283
316
|
operatingMode.zeroTrust = true;
|
|
317
|
+
case 'show':
|
|
284
318
|
operatingMode.consoleType = 'stdio';
|
|
285
319
|
/* fallthrough */
|
|
286
320
|
default:
|
|
@@ -294,8 +328,8 @@ function processCliOptsPhase1(argv, operatingMode)
|
|
|
294
328
|
|
|
295
329
|
async function processCliOptsPhase2(argv, operatingMode, loggingOptions)
|
|
296
330
|
{
|
|
297
|
-
|
|
298
|
-
|
|
331
|
+
if (!worker.config.pidfile)
|
|
332
|
+
worker.config.pidfile = pidfile.getDefaultPidFileName();
|
|
299
333
|
for (let { option, optarg } of parseCliOptions(argv))
|
|
300
334
|
{
|
|
301
335
|
switch(option)
|
|
@@ -350,6 +384,7 @@ async function processCliOptsPhase2(argv, operatingMode, loggingOptions)
|
|
|
350
384
|
break;
|
|
351
385
|
case 'show':
|
|
352
386
|
operatingMode.show = optarg;
|
|
387
|
+
operatingMode.consoleType = 'stdio';
|
|
353
388
|
break;
|
|
354
389
|
case 'startup-delay':
|
|
355
390
|
await a$sleep(Number(optarg));
|
|
@@ -358,10 +393,10 @@ async function processCliOptsPhase2(argv, operatingMode, loggingOptions)
|
|
|
358
393
|
await a$sleep(Number(optarg) * Math.random());
|
|
359
394
|
break;
|
|
360
395
|
case 'f':
|
|
361
|
-
|
|
396
|
+
worker.config.pidfile = optarg;
|
|
362
397
|
break;
|
|
363
398
|
case 'no-pidfile':
|
|
364
|
-
|
|
399
|
+
worker.config.pidfile = false;
|
|
365
400
|
break;
|
|
366
401
|
case 'unmanaged':
|
|
367
402
|
worker.config.unmanaged = true;
|
|
@@ -371,24 +406,15 @@ async function processCliOptsPhase2(argv, operatingMode, loggingOptions)
|
|
|
371
406
|
break;
|
|
372
407
|
case 'register':
|
|
373
408
|
{
|
|
374
|
-
if (!wallet.isPrivateKey(optarg))
|
|
375
|
-
await reportUserError('invalid registration key');
|
|
376
409
|
try
|
|
377
410
|
{
|
|
378
|
-
const
|
|
411
|
+
const [ rkey, label ] = optarg.split(',');
|
|
412
|
+
const reg = await utils.registerWorker(worker, rkey, label, reportUserError);
|
|
379
413
|
if (reg instanceof Error)
|
|
380
414
|
console.error(reg);
|
|
381
415
|
else
|
|
382
|
-
|
|
383
|
-
|
|
384
|
-
console.error(`Registration failure: ${reg.status}`);
|
|
385
|
-
else
|
|
386
|
-
{
|
|
387
|
-
console.log(`Registration complete; worker ${worker.id} now managed by ${reg.owner}`);
|
|
388
|
-
if (pidfileName)
|
|
389
|
-
await require('../lib/pidfile').signal(pidfileName, 'SIGQUIT', 30, 'stopping worker on pid %i');
|
|
390
|
-
}
|
|
391
|
-
}
|
|
416
|
+
await require('../lib/pidfile').signal(worker.config.pidfile, 'SIGQUIT', 30,
|
|
417
|
+
'stopping worker on pid %i');
|
|
392
418
|
}
|
|
393
419
|
catch(error)
|
|
394
420
|
{
|
|
@@ -428,6 +454,12 @@ async function processCliOptsPhase2(argv, operatingMode, loggingOptions)
|
|
|
428
454
|
await cleanupThenExit(exitCodes.normal);
|
|
429
455
|
break;
|
|
430
456
|
}
|
|
457
|
+
case 'management-hostname':
|
|
458
|
+
httpd.config.hostname = optarg;
|
|
459
|
+
break;
|
|
460
|
+
case 'management-port':
|
|
461
|
+
httpd.config.port = Number(optarg);
|
|
462
|
+
break;
|
|
431
463
|
case 'disable-httpd':
|
|
432
464
|
operatingMode.startHttpd = false;
|
|
433
465
|
break;
|
|
@@ -444,8 +476,6 @@ async function processCliOptsPhase2(argv, operatingMode, loggingOptions)
|
|
|
444
476
|
break;
|
|
445
477
|
}
|
|
446
478
|
}
|
|
447
|
-
|
|
448
|
-
return { pidfileName };
|
|
449
479
|
}
|
|
450
480
|
|
|
451
481
|
/**
|
|
@@ -489,9 +519,10 @@ async function mergeDbAppConfig(remainArgv, operatingMode, loggingOptions)
|
|
|
489
519
|
*/
|
|
490
520
|
async function main()
|
|
491
521
|
{
|
|
522
|
+
var workerIsStopping = false;
|
|
523
|
+
const identity = require('dcp/identity');
|
|
492
524
|
/* handle file-wide dcp-client imports */
|
|
493
|
-
|
|
494
|
-
wallet = require('dcp/wallet');
|
|
525
|
+
wallet = require('dcp/wallet');
|
|
495
526
|
({ a$sleep } = require('dcp/utils'));
|
|
496
527
|
({ DistributiveWorker } = require('dcp/worker'));
|
|
497
528
|
|
|
@@ -502,7 +533,6 @@ async function main()
|
|
|
502
533
|
startHttpd: true
|
|
503
534
|
};
|
|
504
535
|
|
|
505
|
-
identity.interactive = false;
|
|
506
536
|
process.exitCode = exitCodes.lostRef;
|
|
507
537
|
|
|
508
538
|
/* Telnetd is started ASAP to make it possible to troubleshoot very early problems. Then set our
|
|
@@ -515,7 +545,6 @@ async function main()
|
|
|
515
545
|
evaluatorOptions, /* options for sandbox constructor factory */
|
|
516
546
|
loggingOptions, /* options to tell us where logs go besides the console */
|
|
517
547
|
} = processCliOptsPhase1(process.argv, operatingMode);
|
|
518
|
-
await identity.login('$login');
|
|
519
548
|
|
|
520
549
|
if (dcpConfig.worker.logging?.syslog)
|
|
521
550
|
loggingOptions.loggers.push('syslog');
|
|
@@ -523,6 +552,21 @@ async function main()
|
|
|
523
552
|
loggingOptions.loggers.push('logfile');
|
|
524
553
|
if (dcpConfig.worker.eventLog)
|
|
525
554
|
loggingOptions.loggers.push('event-log');
|
|
555
|
+
if (operatingMode.lookupWorkerId)
|
|
556
|
+
await utils.patchWorkerId(operatingMode.lookupWorkerId);
|
|
557
|
+
|
|
558
|
+
identity.interactive = false;
|
|
559
|
+
if (operatingMode.identityMasquerade === false)
|
|
560
|
+
await identity.login('$login');
|
|
561
|
+
else
|
|
562
|
+
{
|
|
563
|
+
/* Acquire a unique identity for this worker so that registration proxies will be unique even in
|
|
564
|
+
* case when the id.keystore is shared across a campus. The masquerade identity is composed from the
|
|
565
|
+
* worker id and the either the default id.keystore file or the --dcp-identity parameter.
|
|
566
|
+
*/
|
|
567
|
+
await identity.masquerade(DistributiveWorker.obtainWorkerId(), '$login');
|
|
568
|
+
}
|
|
569
|
+
|
|
526
570
|
if (!operatingMode.unmanaged)
|
|
527
571
|
await mergeDbAppConfig(remainArgv, operatingMode, loggingOptions);
|
|
528
572
|
loggingOptions.loggers = utils.uniq(loggingOptions.loggers);
|
|
@@ -534,8 +578,12 @@ async function main()
|
|
|
534
578
|
trustScheduler: operatingMode.zeroTrust ? false : dcpConfig.worker.trustScheduler,
|
|
535
579
|
unmanaged: operatingMode.unmanaged ? true : dcpConfig.worker.unmanaged,
|
|
536
580
|
}), SandboxConstructor);
|
|
537
|
-
|
|
581
|
+
|
|
538
582
|
worker.runInfo.type = 'dcp-worker';
|
|
583
|
+
worker.on('warning', (...payload) => console.warn (...payload));
|
|
584
|
+
worker.on('claim', amount => console.log(` . transfered ${amount} ⊇ to ${worker.config.paymentAddress}`));
|
|
585
|
+
|
|
586
|
+
await processCliOptsPhase2(remainArgv, operatingMode, loggingOptions);
|
|
539
587
|
|
|
540
588
|
if (operatingMode.show)
|
|
541
589
|
{
|
|
@@ -547,6 +595,9 @@ async function main()
|
|
|
547
595
|
await cleanupThenExit(exitCodes.normal);
|
|
548
596
|
}
|
|
549
597
|
|
|
598
|
+
if (operatingMode.startHttpd && worker.config.unmanaged !== true)
|
|
599
|
+
await httpd.a$init(worker);
|
|
600
|
+
|
|
550
601
|
const sandboxConfig = (new SandboxConstructor()).config;
|
|
551
602
|
console.info(` * Starting Distributive Worker ${worker.id}`);
|
|
552
603
|
console.info(` . Using evaluator at ${utils.shortLoc(sandboxConfig.location)}`);
|
|
@@ -557,16 +608,17 @@ async function main()
|
|
|
557
608
|
else
|
|
558
609
|
console.info(' . Earned funds will be deposited into an unclaimed earnings account - register worker to claim');
|
|
559
610
|
console.info(` . Identity is ${identity.get()?.address}`);
|
|
611
|
+
|
|
560
612
|
if (operatingMode.zeroTrust)
|
|
561
|
-
|
|
613
|
+
console.info(' ! Zero Trust mode enabled');
|
|
562
614
|
else if (!worker.config.trustScheduler)
|
|
563
615
|
console.info(' ! Scheduler Trust disabled');
|
|
564
|
-
|
|
565
|
-
|
|
566
|
-
|
|
567
|
-
|
|
568
|
-
|
|
569
|
-
|
|
616
|
+
if (operatingMode.zeroTrust !== false)
|
|
617
|
+
{
|
|
618
|
+
assert(worker.config.trustScheduler === false);
|
|
619
|
+
assert(worker.config.unmanaged === true)
|
|
620
|
+
assert(operatingMode.startHttpd === false);
|
|
621
|
+
}
|
|
570
622
|
|
|
571
623
|
require('../lib/default-ui-events').hook(worker, loggingOptions);
|
|
572
624
|
console.setWorker(worker);
|
|
@@ -581,9 +633,6 @@ async function main()
|
|
|
581
633
|
if (loggingOptions.verbose)
|
|
582
634
|
console.info(` + Verbosity level: ${loggingOptions.verbose}`);
|
|
583
635
|
|
|
584
|
-
if (operatingMode.startHttpd)
|
|
585
|
-
await httpd.a$init().catch(e => e);
|
|
586
|
-
|
|
587
636
|
/* Poll the evaluator forever until it accepts a connection */
|
|
588
637
|
let workerInfo = false;
|
|
589
638
|
console.throb(` . Waiting for dcp-evaluator on ${utils.shortLoc(sandboxConfig.location)} to start...`);
|
|
@@ -592,6 +641,8 @@ async function main()
|
|
|
592
641
|
const ts = performance.now();
|
|
593
642
|
if ((workerInfo = await require('../lib/worker-info').getEvaluatorInformation(sandboxConfig)))
|
|
594
643
|
break;
|
|
644
|
+
if (i === 0 && operatingMode.unmanaged !== true)
|
|
645
|
+
utils.noteWorkerStart(worker);
|
|
595
646
|
if (performance.now() - ts > 100)
|
|
596
647
|
console.throb();
|
|
597
648
|
await a$sleep(Math.max(i + 1, 10) / 4);
|
|
@@ -609,6 +660,12 @@ async function main()
|
|
|
609
660
|
{
|
|
610
661
|
console.info(' . WebGPU not enabled');
|
|
611
662
|
systemStateInfo.gpu = false;
|
|
663
|
+
if (worker.runInfo)
|
|
664
|
+
{
|
|
665
|
+
/** @todo move GPU type reporting into DistributiveWorker class /wg jun 2025 */
|
|
666
|
+
worker.runInfo.gpuType = null;
|
|
667
|
+
worker.runInfo.gpuCores = 0;
|
|
668
|
+
}
|
|
612
669
|
}
|
|
613
670
|
else
|
|
614
671
|
{
|
|
@@ -634,16 +691,29 @@ async function main()
|
|
|
634
691
|
}
|
|
635
692
|
|
|
636
693
|
/** @todo move GPU type reporting into DistributiveWorker class /wg jun 2025 */
|
|
637
|
-
worker.runInfo
|
|
638
|
-
|
|
639
|
-
|
|
694
|
+
if (worker.runInfo)
|
|
695
|
+
{
|
|
696
|
+
if (systemStateInfo.gpu.disabled)
|
|
697
|
+
{
|
|
698
|
+
worker.runInfo.gpuType += ' (disabled)';
|
|
699
|
+
worker.runInfo.gpuCores = 0;
|
|
700
|
+
}
|
|
701
|
+
else
|
|
702
|
+
{
|
|
703
|
+
worker.runInfo.gpuType = workerInfo.webgpu.info.device;
|
|
704
|
+
worker.runInfo.gpuCores = 1;
|
|
705
|
+
}
|
|
706
|
+
}
|
|
640
707
|
}
|
|
641
708
|
|
|
642
|
-
|
|
643
|
-
|
|
644
|
-
|
|
645
|
-
|
|
646
|
-
|
|
709
|
+
if (!workerIsStopping)
|
|
710
|
+
{
|
|
711
|
+
console.info(` . Utilization Target: cpu: ${String(Math.round(worker.config.utilization.cpu * 100)).padStart(3, ' ')}% `
|
|
712
|
+
+ `gpu: ${Math.round(worker.config.utilization.gpu * 100)}%`);
|
|
713
|
+
console.info(` . Configured Cores: cpu: ${String(worker.config.cores.cpu).padEnd(3, ' ')} `
|
|
714
|
+
+ ` gpu: ${worker.config.cores.gpu}`);
|
|
715
|
+
console.info(` . Maximum Sandboxes: ${worker.config.maxSandboxes}`);
|
|
716
|
+
}
|
|
647
717
|
console.info(' . Supervisor version: ' + worker.supervisorVersion);
|
|
648
718
|
console.info(' . Console type: ' + operatingMode.consoleType);
|
|
649
719
|
console.info(' . Logging: ' + (loggingOptions.loggers.join(', ') || 'none'));
|
|
@@ -659,10 +729,10 @@ async function main()
|
|
|
659
729
|
return;
|
|
660
730
|
require('../lib/check-scheduler-version').check();
|
|
661
731
|
|
|
662
|
-
worker.on('end', () => {
|
|
732
|
+
worker.on('end', async () => {
|
|
663
733
|
if (process.exitCode === exitCodes.lostRef)
|
|
664
734
|
process.exitCode = exitCodes.normal;
|
|
665
|
-
cleanupThenExit(exitCodes.normal);
|
|
735
|
+
await cleanupThenExit(exitCodes.normal);
|
|
666
736
|
});
|
|
667
737
|
|
|
668
738
|
worker.on('error', function handleWorkerError(error) {
|
|
@@ -692,7 +762,7 @@ async function main()
|
|
|
692
762
|
}
|
|
693
763
|
|
|
694
764
|
/* Let the supervisor try to finish slices for up to three minutes after a single unrecoverable error. */
|
|
695
|
-
worker.stop(Boolean(handleWorkerError.count));
|
|
765
|
+
worker && worker.stop(Boolean(handleWorkerError.count));
|
|
696
766
|
handleWorkerError.count = 1 + (handleWorkerError.count || 0);
|
|
697
767
|
if (handleWorkerError.count === 1)
|
|
698
768
|
{
|
|
@@ -701,7 +771,7 @@ async function main()
|
|
|
701
771
|
}
|
|
702
772
|
});
|
|
703
773
|
|
|
704
|
-
if (
|
|
774
|
+
if (worker.config.pidfile && require('../lib/pidfile').write(worker.config.pidfile) !== true)
|
|
705
775
|
await cleanupThenExit(exitCodes.pidConflict);
|
|
706
776
|
|
|
707
777
|
if (worker.config.claimEarnings === false && !worker.config.unmanaged)
|
|
@@ -711,9 +781,11 @@ async function main()
|
|
|
711
781
|
console.info(` ! not claming earnings in the amount of ${amount} ⊇ (can claim later)`);
|
|
712
782
|
}
|
|
713
783
|
|
|
784
|
+
worker.on('stop', () => { console.log(' - Worker is stopping'); workerIsStopping = true });
|
|
785
|
+
worker.on('end', () => { console.log(' * Worker has stopped') });
|
|
786
|
+
worker.on('job', job => console.log(` . Job: ${job.name} ${job.id.slice(0,8)} ${job.description || ''} ${job.link || ''}`));
|
|
714
787
|
console.info(' * Ready.\n');
|
|
715
788
|
await worker.start();
|
|
716
|
-
httpd.setWorker(worker);
|
|
717
789
|
} /* main() */
|
|
718
790
|
|
|
719
791
|
/**
|
|
@@ -730,12 +802,17 @@ async function main()
|
|
|
730
802
|
*/
|
|
731
803
|
function parseResourceOption(option)
|
|
732
804
|
{
|
|
805
|
+
function pctOrNumber(x)
|
|
806
|
+
{
|
|
807
|
+
return x.endsWith('%') ? parseFloat(x) * 100 : Number(x);
|
|
808
|
+
}
|
|
809
|
+
|
|
733
810
|
const [cpu, gpu] = option.replace(/ /g, '').split(',');
|
|
734
811
|
const res = {};
|
|
735
812
|
if (cpu?.length > 0)
|
|
736
|
-
res.cpu =
|
|
813
|
+
res.cpu = pctOrNumber(cpu);
|
|
737
814
|
if (gpu?.length > 0)
|
|
738
|
-
res.gpu =
|
|
815
|
+
res.gpu = pctOrNumber(gpu);
|
|
739
816
|
return res;
|
|
740
817
|
}
|
|
741
818
|
|
|
@@ -745,9 +822,11 @@ function parseResourceOption(option)
|
|
|
745
822
|
* the worker must be restarted. This handler does its best to report the rejection and give the worker a few
|
|
746
823
|
* seconds in which to attempt to return slices to the scheduler before it gives up completely.
|
|
747
824
|
*/
|
|
748
|
-
function handleUnhandled(error)
|
|
825
|
+
async function handleUnhandled(error)
|
|
749
826
|
{
|
|
750
|
-
if (
|
|
827
|
+
if (error instanceof utils.UserError)
|
|
828
|
+
console.error('Error:', error?.message);
|
|
829
|
+
else if (!error.request || !error.code?.startsWith('EHTTP_'))
|
|
751
830
|
console.error(' *** trapped unhandled', error);
|
|
752
831
|
else
|
|
753
832
|
{
|
|
@@ -769,7 +848,7 @@ function handleUnhandled(error)
|
|
|
769
848
|
`${Date.now()}: ${error.message}\n${error.stack}\n\n`);
|
|
770
849
|
} catch(e) {} // eslint-disable-line @distributive/brace-style,no-empty
|
|
771
850
|
|
|
772
|
-
cleanupThenExit(exitCodes.unhandled);
|
|
851
|
+
await cleanupThenExit(exitCodes.unhandled);
|
|
773
852
|
}
|
|
774
853
|
|
|
775
854
|
/**
|
|
@@ -780,9 +859,13 @@ function handleUnhandled(error)
|
|
|
780
859
|
* @returns {Promise} - a promise which never resolves, can be used to "block" execution of the current
|
|
781
860
|
* "thread" while the console and loggers are being unhooked.
|
|
782
861
|
*/
|
|
783
|
-
function cleanupThenExit(exitCode)
|
|
862
|
+
async function cleanupThenExit(exitCode)
|
|
784
863
|
{
|
|
785
864
|
debug('dcp-worker:exit')('cleanup and exit, code', exitCode);
|
|
865
|
+
if (worker)
|
|
866
|
+
worker.stop(true); /* toggle internal worker state so it won't surprise us by starting sandboxes */
|
|
867
|
+
|
|
868
|
+
const p$note = utils.noteWorkerShutdown(worker);
|
|
786
869
|
if (cleanupThenExit.busy)
|
|
787
870
|
{
|
|
788
871
|
if (!process.exitCode)
|
|
@@ -813,7 +896,11 @@ function cleanupThenExit(exitCode)
|
|
|
813
896
|
console.close();
|
|
814
897
|
loggers.unhook()
|
|
815
898
|
.catch(error => { console.error('Error unhooking loggers:', error) })
|
|
816
|
-
.finally(() =>
|
|
899
|
+
.finally(async () => {
|
|
900
|
+
await p$note;
|
|
901
|
+
debug('dcp-worker:exit')('done cleanup, exitCode=' + process.exitCode);
|
|
902
|
+
process.exit();
|
|
903
|
+
});
|
|
817
904
|
}
|
|
818
905
|
catch(error)
|
|
819
906
|
{
|
|
@@ -821,6 +908,9 @@ function cleanupThenExit(exitCode)
|
|
|
821
908
|
process.exit(exitCodes.unhandled);
|
|
822
909
|
}
|
|
823
910
|
|
|
911
|
+
/* Return a promise that never resolves so that async functions which want the program to exit can just
|
|
912
|
+
* await it while the log flushing above goes on.
|
|
913
|
+
*/
|
|
824
914
|
return new Promise(()=>1);
|
|
825
915
|
}
|
|
826
916
|
|
|
@@ -841,9 +931,9 @@ async function handleSigDeath(signalName, signal)
|
|
|
841
931
|
handleSigDeath.count = Number(handleSigDeath.count || 0) + 1;
|
|
842
932
|
if (handleSigDeath.count > 2)
|
|
843
933
|
{
|
|
844
|
-
process.on(signalName, () => {
|
|
934
|
+
process.on(signalName, async () => {
|
|
845
935
|
console.error(signalName);
|
|
846
|
-
cleanupThenExit(exitCode);
|
|
936
|
+
await cleanupThenExit(exitCode);
|
|
847
937
|
});
|
|
848
938
|
}
|
|
849
939
|
|
|
@@ -854,7 +944,7 @@ async function handleSigDeath(signalName, signal)
|
|
|
854
944
|
{
|
|
855
945
|
console.warn(warning);
|
|
856
946
|
await cleanupThenExit(exitCode);
|
|
857
|
-
return;
|
|
947
|
+
return; /* unreached */
|
|
858
948
|
}
|
|
859
949
|
else
|
|
860
950
|
{
|
|
@@ -871,6 +961,12 @@ async function handleSigDeath(signalName, signal)
|
|
|
871
961
|
await cleanupThenExit(exitCode);
|
|
872
962
|
}
|
|
873
963
|
|
|
964
|
+
/**
|
|
965
|
+
* Report a user error and then shutdown the process. Await the return value to "hang" the thread until
|
|
966
|
+
* the process has ended.
|
|
967
|
+
*
|
|
968
|
+
* @returns promise from cleanupThenExit
|
|
969
|
+
*/
|
|
874
970
|
function reportUserError(message)
|
|
875
971
|
{
|
|
876
972
|
console.error(message);
|
package/lib/default-ui-events.js
CHANGED
|
@@ -26,7 +26,7 @@
|
|
|
26
26
|
*/
|
|
27
27
|
'use strict';
|
|
28
28
|
|
|
29
|
-
const { slicesFetched
|
|
29
|
+
const { slicesFetched } = require('./utils');
|
|
30
30
|
|
|
31
31
|
const sandboxEventHandlers = {};
|
|
32
32
|
const workerEventHandlers = {};
|
|
@@ -101,7 +101,7 @@ exports.hook = function defaultUiEvents$$hook(worker, options)
|
|
|
101
101
|
|
|
102
102
|
workerEventHandlers.fetch = function fetchHandler(ev) {
|
|
103
103
|
if (ev instanceof Error)
|
|
104
|
-
|
|
104
|
+
options.verbose >= 2 && console.error(' ! Failed to fetch slices:', ev); // redundant
|
|
105
105
|
else
|
|
106
106
|
options.verbose && console.log(' . Fetched', slicesFetched(ev), 'slices');
|
|
107
107
|
};
|
|
@@ -112,7 +112,7 @@ exports.hook = function defaultUiEvents$$hook(worker, options)
|
|
|
112
112
|
|
|
113
113
|
workerEventHandlers.result = function resultHandler(ev) {
|
|
114
114
|
if (ev instanceof Error)
|
|
115
|
-
|
|
115
|
+
options.verbose >= 2 && console.error(" ! Failed to submit results:", ev); // redundant
|
|
116
116
|
else
|
|
117
117
|
options.verbose >= 2 && console.log(' . Submitted');
|
|
118
118
|
};
|
package/lib/pidfile.js
CHANGED
|
@@ -135,7 +135,7 @@ function kill(pid, signal)
|
|
|
135
135
|
}
|
|
136
136
|
|
|
137
137
|
/**
|
|
138
|
-
* Signal another process on this host. Handles the case of a
|
|
138
|
+
* Signal another process on this host. Handles the case of a dangling pidfile by ignoring it.
|
|
139
139
|
* @param {string} filename the path to the pidfile, same semantics as exports.write()
|
|
140
140
|
* @param {string | number } signal the signal to send
|
|
141
141
|
* @param {number | undefined } timeout the number of seconds to wait for the process to die before
|
|
@@ -164,17 +164,21 @@ exports.signal = async function pidfile$$signal(filename, signal, timeout, doing
|
|
|
164
164
|
const loopUntil = Date.now() + (timeout * 1e3);
|
|
165
165
|
while (kill(pid, 0) === true)
|
|
166
166
|
{
|
|
167
|
-
await a$sleep(0.10);
|
|
167
|
+
await a$sleep(0.10).ref();
|
|
168
168
|
if (doingWarning)
|
|
169
169
|
console.throb();
|
|
170
170
|
if (Date.now() >= loopUntil)
|
|
171
171
|
{
|
|
172
172
|
const error = new Error(`timeout waiting for process ${pid} to exit`);
|
|
173
173
|
error.code = 'ETIMEOUT';
|
|
174
|
+
if (doingWarning)
|
|
175
|
+
console.throb(null);
|
|
174
176
|
return error;
|
|
175
177
|
}
|
|
176
|
-
await a$sleep(0.40);
|
|
178
|
+
await a$sleep(0.40).ref();
|
|
177
179
|
}
|
|
178
180
|
|
|
181
|
+
if (doingWarning)
|
|
182
|
+
console.throb(null);
|
|
179
183
|
return true;
|
|
180
184
|
}
|