dcp-worker 3.2.24 → 3.2.25
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/bin/dcp-evaluator-start +84 -149
- package/bin/dcp-worker +354 -379
- package/docs/CODEOWNERS +36 -0
- package/docs/CODEOWNERS.template +10 -0
- package/etc/dcp-worker-config.js +65 -0
- package/etc/dcp-worker-config.js.md5 +2 -0
- package/lib/pidfile.js +96 -0
- package/lib/remote-console.js +28 -34
- package/lib/startWorkerLogger.js +0 -2
- package/lib/worker-loggers/common-types.js +1 -3
- package/lib/worker-loggers/console.js +0 -10
- package/lib/worker-loggers/dashboard.js +21 -18
- package/npm-hooks/postpublish +11 -0
- package/npm-hooks/prepack +17 -0
- package/npm-hooks/prepublish +42 -0
- package/package.json +6 -4
- package/etc/dcp-config.js +0 -51
package/bin/dcp-worker
CHANGED
|
@@ -9,46 +9,43 @@
|
|
|
9
9
|
'use strict';
|
|
10
10
|
|
|
11
11
|
const process = require('process');
|
|
12
|
-
const os
|
|
13
|
-
const fs
|
|
14
|
-
|
|
15
|
-
|
|
16
|
-
const ANALYZE_UNHANDELD_REJECTION = false;
|
|
17
|
-
|
|
18
|
-
// Set default location for pid file
|
|
19
|
-
let DEFAULT_PID_LOC;
|
|
20
|
-
if (fs.existsSync('/var/dcp/run'))
|
|
21
|
-
DEFAULT_PID_LOC = '/var/dcp/run/';
|
|
22
|
-
else if (fs.existsSync('/var/run'))
|
|
23
|
-
DEFAULT_PID_LOC = '/var/run/';
|
|
24
|
-
else
|
|
25
|
-
DEFAULT_PID_LOC = os.tmpdir();
|
|
12
|
+
const os = require('os');
|
|
13
|
+
const fs = require('fs');
|
|
14
|
+
const crypto = require('crypto');
|
|
15
|
+
const chalk = require('chalk');
|
|
26
16
|
|
|
17
|
+
const configName = process.env.DCP_CONFIG || '../etc/dcp-worker-config';
|
|
27
18
|
const TOTAL_CPU_VCORES = os.cpus().length;
|
|
28
|
-
const DEFAULT_CORES = TOTAL_CPU_VCORES - 1;
|
|
29
19
|
var worker, dcpConfig;
|
|
30
20
|
|
|
31
|
-
const
|
|
32
|
-
|
|
33
|
-
|
|
34
|
-
const
|
|
35
|
-
|
|
36
|
-
|
|
37
|
-
|
|
38
|
-
|
|
39
|
-
|
|
40
|
-
|
|
41
|
-
|
|
42
|
-
|
|
43
|
-
|
|
44
|
-
|
|
21
|
+
const EXIT_UNHANDLED = 5;
|
|
22
|
+
|
|
23
|
+
/* Setup the telnet REPL up early to ensure early-failure log messages are captured */
|
|
24
|
+
const replHelpers = {
|
|
25
|
+
help: {
|
|
26
|
+
report: 'Print a worker status & slice report',
|
|
27
|
+
kill: 'Try to kill the worker',
|
|
28
|
+
die: 'Kill the worker',
|
|
29
|
+
},
|
|
30
|
+
commands: {
|
|
31
|
+
report: printReport,
|
|
32
|
+
kill: process.exit,
|
|
33
|
+
die: () => worker && worker.stop()
|
|
34
|
+
},
|
|
35
|
+
};
|
|
36
|
+
require('../lib/remote-console').init(replHelpers);
|
|
45
37
|
|
|
46
|
-
|
|
47
|
-
|
|
48
|
-
|
|
38
|
+
/* Initialize dcp-client with local config defaults and run the main function. DCP_CONFIG_COOKIE becomes dcpConfig.cookie. */
|
|
39
|
+
process.env.DCP_CONFIG_COOKIE = (Math.random().toString(16)).slice(2) + '-' + process.pid + '-' + Date.now();
|
|
40
|
+
require('dcp-client').init({ configName }).then(main).catch(handleUnhandled);
|
|
49
41
|
|
|
42
|
+
function parseCliArgs()
|
|
43
|
+
{
|
|
44
|
+
var defaultPidFileName;
|
|
45
|
+
|
|
50
46
|
dcpConfig = require('dcp/dcp-config');
|
|
51
|
-
|
|
47
|
+
defaultPidFileName = require('../lib/pidfile').getDefaultPidFileName(dcpConfig.worker.pidfile);
|
|
48
|
+
|
|
52
49
|
const cliArgs = require('dcp/cli')
|
|
53
50
|
.base('Standalone NodeJS DCP Worker')
|
|
54
51
|
.options({
|
|
@@ -60,7 +57,7 @@ async function main () {
|
|
|
60
57
|
alias: 'c',
|
|
61
58
|
describe: 'Number of cores to work with',
|
|
62
59
|
type: 'number',
|
|
63
|
-
default:
|
|
60
|
+
default: TOTAL_CPU_VCORES - 1,
|
|
64
61
|
},
|
|
65
62
|
verbose: {
|
|
66
63
|
alias: 'v',
|
|
@@ -87,7 +84,7 @@ async function main () {
|
|
|
87
84
|
alias: 'p',
|
|
88
85
|
describe: 'Evaluator port',
|
|
89
86
|
type: 'number',
|
|
90
|
-
default: Number(dcpConfig.evaluator.
|
|
87
|
+
default: Number(dcpConfig.evaluator.listen.port),
|
|
91
88
|
},
|
|
92
89
|
priorityOnly: {
|
|
93
90
|
alias: 'P',
|
|
@@ -171,11 +168,6 @@ async function main () {
|
|
|
171
168
|
type: 'array'
|
|
172
169
|
},
|
|
173
170
|
|
|
174
|
-
replPort: {
|
|
175
|
-
describe: 'If set, open a REPL on specified TCP port',
|
|
176
|
-
type: 'number',
|
|
177
|
-
default: undefined,
|
|
178
|
-
},
|
|
179
171
|
watchdogInterval: {
|
|
180
172
|
alias: 'W',
|
|
181
173
|
describe: 'Number of milliseconds between watchdog cycles',
|
|
@@ -187,9 +179,9 @@ async function main () {
|
|
|
187
179
|
type: 'boolean',
|
|
188
180
|
hidden: true,
|
|
189
181
|
},
|
|
190
|
-
|
|
182
|
+
pidFile: {
|
|
191
183
|
alias: 'f',
|
|
192
|
-
describe:
|
|
184
|
+
describe: `create a .pid file for the worker; value overrides default location (${defaultPidFileName})`,
|
|
193
185
|
normalize: true
|
|
194
186
|
},
|
|
195
187
|
})
|
|
@@ -200,140 +192,138 @@ async function main () {
|
|
|
200
192
|
if (cliArgs.dumpConfig)
|
|
201
193
|
{
|
|
202
194
|
console.debug(JSON.stringify(require('dcp/dcp-config'), null, 2));
|
|
203
|
-
process.exit(
|
|
195
|
+
process.exit(0);
|
|
204
196
|
}
|
|
205
197
|
|
|
206
|
-
return
|
|
198
|
+
return cliArgs;
|
|
207
199
|
}
|
|
208
200
|
|
|
209
|
-
// Preserve console.error, the dashboard replaces it with a custom logger
|
|
210
|
-
const logError = console.error;
|
|
211
|
-
main()
|
|
212
|
-
.then(exitcode => process.exit(exitcode || 0))
|
|
213
|
-
.catch(e => {
|
|
214
|
-
logError("Script failed:");
|
|
215
|
-
logError(e);
|
|
216
|
-
process.exit(1);
|
|
217
|
-
});
|
|
218
|
-
|
|
219
201
|
// Imperfect, but handles CG { joinKey, joinHash }.
|
|
220
202
|
function isHash(b) {
|
|
221
203
|
return b && b.length === 68 && b.startsWith('eh1-');
|
|
222
204
|
}
|
|
223
205
|
|
|
224
|
-
|
|
225
|
-
|
|
206
|
+
/**
|
|
207
|
+
* Add one or more configuration objects into a target via leaf-merging.
|
|
208
|
+
*/
|
|
209
|
+
function addConfig(target, ...objs)
|
|
210
|
+
{
|
|
211
|
+
const { leafMerge } = require('dcp/utils');
|
|
212
|
+
var tmp = target;
|
|
213
|
+
|
|
214
|
+
for (let obj of objs)
|
|
215
|
+
tmp = leafMerge(tmp, obj);
|
|
216
|
+
|
|
217
|
+
Object.assign(target, tmp);
|
|
218
|
+
}
|
|
219
|
+
|
|
220
|
+
/**
|
|
221
|
+
* Main program entry point. Assumes DCP client is already initialized and console logging is ready.
|
|
222
|
+
*/
|
|
223
|
+
async function main()
|
|
224
|
+
{
|
|
226
225
|
const wallet = require('dcp/wallet');
|
|
227
226
|
const DCPWorker = require('dcp/worker').Worker;
|
|
228
227
|
const { startWorkerLogger } = require('../lib/startWorkerLogger');
|
|
228
|
+
const cliArgs = parseCliArgs();
|
|
229
229
|
const sawOptions = {
|
|
230
230
|
hostname: cliArgs.hostname,
|
|
231
231
|
port: cliArgs.port
|
|
232
232
|
};
|
|
233
233
|
|
|
234
|
-
|
|
235
|
-
if (cliArgs.paymentAddress)
|
|
236
|
-
paymentAddress = new wallet.Address(cliArgs.paymentAddress);
|
|
237
|
-
else
|
|
238
|
-
paymentAddress = (await wallet.get()).address;
|
|
234
|
+
verifyDefaultConfigIntegrity();
|
|
239
235
|
|
|
240
|
-
|
|
241
|
-
|
|
236
|
+
process.on('SIGINT', handleSigDeath);
|
|
237
|
+
process.on('SIGTERM', handleSigDeath);
|
|
238
|
+
process.on('SIGQUIT', handleSigDeath);
|
|
239
|
+
process.on('unhandledRejection', handleUnhandled);
|
|
240
|
+
process.on('uncaughtException', handleUnhandled);
|
|
242
241
|
|
|
243
|
-
|
|
244
|
-
|
|
242
|
+
let paymentAddress = false
|
|
243
|
+
|| cliArgs.paymentAddress
|
|
244
|
+
|| dcpConfig.worker.paymentAddress
|
|
245
|
+
|| (await wallet.get()).address;
|
|
246
|
+
if (typeof paymentAddress === 'string')
|
|
247
|
+
paymentAddress = new wallet.Address(paymentAddress);
|
|
248
|
+
|
|
249
|
+
if (cliArgs.pidFile)
|
|
250
|
+
require('../lib/pidfile').write(cliArgs.pidFile);
|
|
245
251
|
|
|
252
|
+
/* Figure out of the worker's identity and put that keystore in the wallet */
|
|
253
|
+
let identityKeystore = false;
|
|
246
254
|
if (cliArgs.identityKey)
|
|
247
255
|
identityKeystore = await new wallet.IdKeystore(cliArgs.identityKey, '');
|
|
248
256
|
else if (cliArgs.identityKeystore)
|
|
249
257
|
identityKeystore = await new wallet.IdKeystore(JSON.parse(cliArgs.identityKeystore), '');
|
|
250
258
|
else
|
|
251
259
|
identityKeystore = await wallet.getId();
|
|
252
|
-
|
|
253
|
-
// Set the provided identity as the wallet's default
|
|
254
260
|
await wallet.addId(identityKeystore);
|
|
255
261
|
|
|
256
|
-
|
|
257
|
-
|
|
258
|
-
|
|
259
|
-
|
|
260
|
-
|
|
261
|
-
|
|
262
|
-
|
|
263
|
-
|
|
264
|
-
|
|
265
|
-
// The exitGuard will hold an "exit" method, and a Promise to await for
|
|
266
|
-
// the exit code passed to exitGuard.exit()
|
|
267
|
-
let exitcode = EXIT_CLEAN;
|
|
268
|
-
const exitGuard = {
|
|
269
|
-
promise: Promise.resolve(0), // will be overwritten when worker starts
|
|
270
|
-
exit(code) { process.exit(code||exitcode||0) }, // will be overwritten when worker starts
|
|
271
|
-
};
|
|
272
|
-
process.on('SIGQUIT', () => {
|
|
273
|
-
exitcode = EXIT_SIGQUIT;
|
|
274
|
-
cliArgs.verbose >= 1 && console.info(`240: Caught SIGQUIT; exiting worker with exitcode ${exitcode}`);
|
|
275
|
-
exitGuard.exit(exitcode);
|
|
276
|
-
});
|
|
277
|
-
|
|
278
|
-
|
|
279
|
-
/** @type {string[]} */
|
|
262
|
+
/* Build the worker options, which are largely given by dcpConfig.worker. We use a reference for
|
|
263
|
+
* dcpConfig.worker rather than copying it, so that runtime modifications to the worker configuration
|
|
264
|
+
* in memory take effect immediately.
|
|
265
|
+
*
|
|
266
|
+
* forceOptions override any setting in dcpConfig; this can be used for settings calculated above
|
|
267
|
+
* which were derived from dcpConfig in the first place. defaultOptions are overrideable by the usual
|
|
268
|
+
* dcpConfig mechanisms, but since they are dynamic (or non-user-facing) they don't come from the
|
|
269
|
+
* etc/dcp-worker-config.js file that ships with the work.
|
|
270
|
+
*/
|
|
280
271
|
const dcpWorkerOptions = dcpConfig.worker;
|
|
281
|
-
|
|
282
|
-
Object.assign(dcpWorkerOptions, {
|
|
272
|
+
const forceOptions = {
|
|
283
273
|
paymentAddress,
|
|
274
|
+
leavePublicGroup: cliArgs.leavePublicGroup || dcpConfig.worker.leavePublicGroup || cliArgs.publicGroupFallback || false,
|
|
284
275
|
maxWorkingSandboxes: cliArgs.cores,
|
|
285
|
-
|
|
286
|
-
|
|
276
|
+
};
|
|
277
|
+
const defaultOptions = {
|
|
287
278
|
sandboxOptions: {
|
|
288
279
|
SandboxConstructor: require('dcp-client/lib/standaloneWorker').workerFactory(sawOptions)
|
|
289
280
|
},
|
|
290
|
-
|
|
291
|
-
|
|
292
|
-
|
|
281
|
+
};
|
|
282
|
+
|
|
283
|
+
addConfig(dcpWorkerOptions, defaultOptions, dcpConfig.worker, forceOptions);
|
|
293
284
|
|
|
294
285
|
/* cliArgs.join is the list of compute groups to join */
|
|
295
286
|
if (cliArgs.join && cliArgs.join.length)
|
|
296
287
|
{
|
|
297
|
-
|
|
288
|
+
const cliComputeGroups = cliArgs.join
|
|
298
289
|
.map((el) => {
|
|
299
290
|
/* Map cliArgs.join to give us [{ joinKey, joinSecret/joinHash }...] */
|
|
300
291
|
const [a, b] = el.split(',');
|
|
301
292
|
return isHash(b) ? { joinKey: a, joinHash: b } : { joinKey: a, joinSecret: b };
|
|
302
293
|
})
|
|
303
294
|
.filter((el) => el.joinKey); /* Filter out entries with no joinKey */
|
|
304
|
-
|
|
295
|
+
|
|
296
|
+
addConfig(dcpWorkerOptions.computeGroups, dcpWorkerOptions.computeGroups, cliComputeGroups);
|
|
305
297
|
}
|
|
306
|
-
|
|
298
|
+
|
|
307
299
|
if (cliArgs.jobId)
|
|
308
300
|
{
|
|
309
|
-
dcpWorkerOptions.jobAddresses
|
|
301
|
+
dcpWorkerOptions.jobAddresses.push(...cliArgs.jobId);
|
|
310
302
|
dcpWorkerOptions.priorityOnly = true;
|
|
311
303
|
}
|
|
304
|
+
|
|
312
305
|
if (cliArgs.allowedOrigins)
|
|
313
|
-
|
|
306
|
+
{
|
|
307
|
+
if (!dcpWorkerOptions.allowOrigins)
|
|
308
|
+
dcpWorkerOptions.allowOrigins = {};
|
|
309
|
+
if (!dcpWorkerOptions.allowOrigins.any)
|
|
310
|
+
dcpWorkerOptions.allowOrigins.any = [];
|
|
311
|
+
dcpWorkerOptions.allowOrigins.any.push(...cliArgs.allowedOrigins);
|
|
312
|
+
}
|
|
314
313
|
if (cliArgs.watchdogInterval)
|
|
315
314
|
dcpWorkerOptions.watchdogInterval = cliArgs.watchdogInterval;
|
|
316
315
|
|
|
317
|
-
|
|
316
|
+
worker = new DCPWorker(identityKeystore, dcpWorkerOptions);
|
|
317
|
+
worker.on('error', console.error);
|
|
318
|
+
worker.on('warning', console.warn);
|
|
318
319
|
|
|
319
|
-
|
|
320
|
-
*
|
|
321
|
-
* When (and if) we stop using Supevisor1, delete this reference to setDefaultIdentityKeystore
|
|
322
|
-
* and delete the corresponding fucntion from Supervisor2.
|
|
323
|
-
*
|
|
324
|
-
* startWorkerLogger needs to be called before the worker is started so that
|
|
325
|
-
* it can attach event listeners before the events fire, else UI events for
|
|
326
|
-
* things such as progress will never get attached.
|
|
327
|
-
*
|
|
328
|
-
* setDefaultIdentityKeystore needs to be called before the logger because it
|
|
329
|
-
* tries access the identity of the worker before it has started, i.e. where
|
|
330
|
-
* it sets its identity, throwing an assertion error.
|
|
331
|
-
*
|
|
332
|
-
* FIXME(bryan-hoang): This is a fragile solution that is too coupled with the
|
|
333
|
-
* implementation of the worker that should be addressed in Supervisor 2
|
|
320
|
+
/* Let incorrect event-loop references keep us alive when linked with a debug library, but
|
|
321
|
+
* exit quickly/accurately for production code even when the library isn't perfect.
|
|
334
322
|
*/
|
|
335
|
-
|
|
336
|
-
|
|
323
|
+
if (require('dcp/build').config.build !== 'debug')
|
|
324
|
+
worker.on('end', process.exit);
|
|
325
|
+
else
|
|
326
|
+
worker.on('end', () => setTimeout(process.exit, getCleanupTimeoutMs()).unref());
|
|
337
327
|
|
|
338
328
|
if (cliArgs.eventDebug)
|
|
339
329
|
{
|
|
@@ -341,43 +331,34 @@ async function startWorking(cliArgs) {
|
|
|
341
331
|
worker.supervisor.debug = true;
|
|
342
332
|
}
|
|
343
333
|
|
|
334
|
+
worker.on('stop', () => { console.log('Worker is stopping') });
|
|
335
|
+
worker.on('end', () => { logClosing('log', 'Worker has stopped') });
|
|
336
|
+
startWorkerLogger(worker, cliArgs);
|
|
344
337
|
|
|
345
|
-
|
|
346
|
-
// changing the saved exitcode
|
|
347
|
-
worker.on('stop', () => {
|
|
348
|
-
exitGuard.exit();
|
|
349
|
-
});
|
|
350
|
-
|
|
351
|
-
|
|
352
|
-
startWorkerLogger(worker, {
|
|
353
|
-
exitGuard,
|
|
354
|
-
verbose: cliArgs.verbose,
|
|
355
|
-
outputMode: cliArgs.outputMode,
|
|
356
|
-
|
|
357
|
-
logfile: cliArgs.logfile,
|
|
338
|
+
require('../lib/remote-console').setMainEval(function mainEval() { return eval(arguments[0]) });
|
|
358
339
|
|
|
359
|
-
|
|
360
|
-
|
|
361
|
-
|
|
362
|
-
|
|
363
|
-
|
|
364
|
-
|
|
365
|
-
{
|
|
366
|
-
require('../lib/remote-console').init(cliArgs.replPort, {
|
|
367
|
-
help: {
|
|
368
|
-
report: 'Print a worker status & slice report',
|
|
369
|
-
kill: 'Kill the worker',
|
|
370
|
-
},
|
|
371
|
-
commands: {
|
|
372
|
-
report: printReport,
|
|
373
|
-
kill: exitcode => exitGuard.exit(exitcode),
|
|
374
|
-
},
|
|
375
|
-
});
|
|
376
|
-
require('../lib/remote-console').setMainEval(function mainEval() { return eval(arguments[0]) });
|
|
377
|
-
}
|
|
378
|
-
catch (error)
|
|
340
|
+
// Activate public group fallback
|
|
341
|
+
// If requested by CLI
|
|
342
|
+
// OR if requested by dcpConfig and not forbidden by the cli
|
|
343
|
+
if (cliArgs.publicGroupFallback
|
|
344
|
+
|| (dcpConfig.worker?.leavePublicGroup === 'fallback'
|
|
345
|
+
&& typeof cliArgs.publicGroupFallback !== false))
|
|
379
346
|
{
|
|
380
|
-
|
|
347
|
+
dcpWorkerOptions.publicGroupFallback = true;
|
|
348
|
+
|
|
349
|
+
// If local config blocks the public group, then complain instead of activating fallback
|
|
350
|
+
if (dcpConfig.worker?.leavePublicGroup === true)
|
|
351
|
+
{
|
|
352
|
+
console.warn('* Public Group fallback has been requested, but the public group is blocked by local configuration');
|
|
353
|
+
}
|
|
354
|
+
else
|
|
355
|
+
{
|
|
356
|
+
worker.on('fetchend', slicesFetched => {
|
|
357
|
+
// Iff we got work in this fetch, then leave the public group for the
|
|
358
|
+
// next fetch
|
|
359
|
+
dcpConfig.worker.leavePublicGroup = Boolean(slicesFetched > 0);
|
|
360
|
+
});
|
|
361
|
+
}
|
|
381
362
|
}
|
|
382
363
|
|
|
383
364
|
let introBanner = '';
|
|
@@ -395,7 +376,7 @@ async function startWorking(cliArgs) {
|
|
|
395
376
|
plural = singular + 's';
|
|
396
377
|
if (!amount)
|
|
397
378
|
return plural;
|
|
398
|
-
if (amount
|
|
379
|
+
if (Number(amount) === 1)
|
|
399
380
|
return singular;
|
|
400
381
|
return plural;
|
|
401
382
|
}
|
|
@@ -417,269 +398,263 @@ async function startWorking(cliArgs) {
|
|
|
417
398
|
introBanner += ' . ready' + '\n';
|
|
418
399
|
|
|
419
400
|
console.log(introBanner);
|
|
420
|
-
|
|
421
401
|
require('../lib/check-scheduler-version').check();
|
|
422
402
|
|
|
423
|
-
|
|
424
|
-
/** print the slice report via console.log */
|
|
425
|
-
function printReport()
|
|
403
|
+
if (parseFloat(cliArgs.reportInterval))
|
|
426
404
|
{
|
|
427
|
-
|
|
405
|
+
if (cliArgs.outputMode !== 'dashboard')
|
|
406
|
+
setInterval(printReport, parseFloat(cliArgs.reportInterval) * 1000).unref();
|
|
407
|
+
else
|
|
408
|
+
console.log('Ignoring --reportInterval in dashboard output mode');
|
|
428
409
|
}
|
|
429
410
|
|
|
430
|
-
|
|
431
|
-
|
|
411
|
+
/* Start the worker. Normal process exit happens by virtue of the worker<end> event */
|
|
412
|
+
await worker.start();
|
|
413
|
+
}
|
|
414
|
+
|
|
415
|
+
/**
|
|
416
|
+
* Log a closing message (or messages). Since the dashboard clears the screen on exit, we use the
|
|
417
|
+
* memoized console property to log the message after we destroy the instance of screen.
|
|
418
|
+
*/
|
|
419
|
+
function logClosing(facility, ...message)
|
|
420
|
+
{
|
|
421
|
+
var screen = require('../lib/worker-loggers/dashboard').screen;
|
|
422
|
+
|
|
423
|
+
if (!screen)
|
|
424
|
+
console[facility](message);
|
|
425
|
+
else
|
|
432
426
|
{
|
|
433
|
-
|
|
434
|
-
|
|
435
|
-
|
|
436
|
-
|
|
437
|
-
|
|
438
|
-
|
|
439
|
-
|
|
440
|
-
|
|
441
|
-
|
|
442
|
-
|
|
443
|
-
};
|
|
444
|
-
const stateNames = {
|
|
445
|
-
WORKING: 'Working',
|
|
446
|
-
ASSIGNED: 'Assigned',
|
|
447
|
-
READY: 'Ready',
|
|
448
|
-
TERMINATED: 'Terminated',
|
|
449
|
-
};
|
|
450
|
-
sup.sandboxes.forEach(sb => {
|
|
451
|
-
const { state } = sb;
|
|
452
|
-
if (!sbStates[state])
|
|
453
|
-
sbStates[state] = 0;
|
|
454
|
-
sbStates[state]++;
|
|
455
|
-
});
|
|
456
|
-
|
|
457
|
-
report += (Date()) + '\n';
|
|
458
|
-
report += ('Sandboxes:') + '\n';
|
|
459
|
-
Object.keys(sbStates).forEach(state => {
|
|
460
|
-
const stateName = stateNames[state] || state;
|
|
461
|
-
report += (` ${(stateName + ':').padEnd(12)} ${sbStates[state]}`) + '\n';
|
|
462
|
-
})
|
|
463
|
-
report += (` * ALL: ${sup.sandboxes.length}`) + '\n';
|
|
464
|
-
|
|
465
|
-
report += ('Progress:') + '\n';
|
|
466
|
-
sup.workingSandboxes.forEach(sb => {
|
|
467
|
-
const jobName = sb.job && sb.job.public && sb.job.public.name || `idek (${sb.jobAddress})`;
|
|
468
|
-
let el = Date.now() - sb.sliceStartTime;
|
|
469
|
-
const t = el < 1000000
|
|
470
|
-
? toInterval(el)
|
|
471
|
-
: 'new';
|
|
472
|
-
|
|
473
|
-
el = sb.progressReports && sb.progressReports.last
|
|
474
|
-
? Date.now() - (sb.sliceStartTime + sb.progressReports.last.timestamp)
|
|
475
|
-
: 0;
|
|
476
|
-
const pct = (typeof sb.progress) === 'number'
|
|
477
|
-
? `${Number(sb.progress).toFixed(0).padStart(2)}%`
|
|
478
|
-
: 'ind';
|
|
479
|
-
const stale = (el < 2000) ? '' : `(stale: ${toInterval(el)})`;
|
|
480
|
-
|
|
481
|
-
report += (` ${String(sb.id).padStart(4)}: ${sb.jobAddress} ${jobName.padEnd(34)} `+ `${t} ${pct} ${stale}`.padStart(13)) + '\n';
|
|
482
|
-
});
|
|
483
|
-
|
|
484
|
-
report += ('Slices:') + '\n';
|
|
485
|
-
report += (` working: ${sup.allocatedSlices.length}`) + '\n';
|
|
486
|
-
report += (` queued: ${sup.queuedSlices.length}`) + '\n';
|
|
487
|
-
|
|
488
|
-
report += ('='.repeat(78)) + '\n';
|
|
489
|
-
|
|
490
|
-
return report;
|
|
427
|
+
/* Turn off fullscreen TUI and resume "normal" console logging.
|
|
428
|
+
* FUTURE: dashboard API should know how to unregister its hook so that we don't have to clobber
|
|
429
|
+
* it here.
|
|
430
|
+
*/
|
|
431
|
+
screen.log(...message);
|
|
432
|
+
screen.destroy();
|
|
433
|
+
screen = false;
|
|
434
|
+
console = new (require('console').Console)(process);
|
|
435
|
+
require('../lib/remote-console').reintercept();
|
|
436
|
+
console[facility](...message);
|
|
491
437
|
}
|
|
438
|
+
}
|
|
492
439
|
|
|
493
|
-
|
|
494
|
-
|
|
495
|
-
|
|
496
|
-
|
|
497
|
-
|
|
498
|
-
|
|
499
|
-
|
|
440
|
+
/**
|
|
441
|
+
* Fatal error handler: __must not ever throw no matter what__.
|
|
442
|
+
* If we hit a fatal error, we are by definition no longer confident of our program state, meaning that
|
|
443
|
+
* the worker must be restarted. This handler does its best to report the rejection and give the worker a few
|
|
444
|
+
* seconds in which to attempt to return slices to the scheduler before it gives up completely.
|
|
445
|
+
*/
|
|
446
|
+
async function handleUnhandled(error)
|
|
447
|
+
{
|
|
448
|
+
var _worker = worker;
|
|
449
|
+
worker = false;
|
|
450
|
+
|
|
451
|
+
process.exitCode = process.exitCode || EXIT_UNHANDLED;
|
|
452
|
+
|
|
453
|
+
try
|
|
500
454
|
{
|
|
501
|
-
|
|
502
|
-
|
|
503
|
-
return `${m}:${s}`;
|
|
504
|
-
}
|
|
455
|
+
logClosing(error);
|
|
456
|
+
} catch(e) {};
|
|
505
457
|
|
|
506
|
-
if (
|
|
458
|
+
if (!_worker)
|
|
459
|
+
console.error('trapped unhandled error:', error)
|
|
460
|
+
else
|
|
507
461
|
{
|
|
508
|
-
|
|
509
|
-
|
|
510
|
-
|
|
511
|
-
console.log('Ignoring --reportInterval in dashboard output mode');
|
|
462
|
+
console.error('trapped unhandled error -- stopping worker:', error);
|
|
463
|
+
_worker.on('end', process.exit);
|
|
464
|
+
_worker.stop();
|
|
512
465
|
}
|
|
513
466
|
|
|
467
|
+
setTimeout(() => {
|
|
468
|
+
logClosing('error', 'handleFatalError timeout - exiting now');
|
|
469
|
+
process.exit();
|
|
470
|
+
}, getCleanupTimeoutMs()).unref();
|
|
514
471
|
|
|
515
|
-
|
|
516
|
-
|
|
517
|
-
|
|
518
|
-
|
|
519
|
-
|
|
472
|
+
try {
|
|
473
|
+
let log = dcpConfig && dcpConfig.worker && dcpConfig.worker.unhandledRejectionLog;
|
|
474
|
+
if (!log) log = process.env.DCP_WORKER_UNHANDLED_REJECTION_LOG;
|
|
475
|
+
if (log) {
|
|
476
|
+
fs.appendFileSync(process.env.DCP_WORKER_UNHANDLED_REJECTION_LOG,
|
|
477
|
+
`${Date.now()}: ${error.message}\n${error.stack}\n\n`);
|
|
478
|
+
}
|
|
479
|
+
} catch(e) {};
|
|
480
|
+
}
|
|
520
481
|
|
|
482
|
+
/** print the slice report via console.log */
|
|
483
|
+
function printReport()
|
|
484
|
+
{
|
|
485
|
+
console.log(sliceReport());
|
|
486
|
+
}
|
|
521
487
|
|
|
522
|
-
|
|
488
|
+
/**
|
|
489
|
+
* Convert a timespan in ms to a human-readable interval in minutes and seconds
|
|
490
|
+
*
|
|
491
|
+
* @param {number} el Milliseconds to convert
|
|
492
|
+
* @return {string} Timespan formatted as `m:ss`
|
|
493
|
+
*/
|
|
494
|
+
function toInterval(el)
|
|
495
|
+
{
|
|
496
|
+
const m = Math.floor((el / 1000) / 60).toString(10);
|
|
497
|
+
const s = Math.floor((el / 1000) % 60).toString(10).padStart(2, '0');
|
|
498
|
+
return `${m}:${s}`;
|
|
499
|
+
}
|
|
500
|
+
|
|
501
|
+
/** retrieve a slice report screen */
|
|
502
|
+
function sliceReport()
|
|
503
|
+
{
|
|
504
|
+
const sup = worker.supervisor;
|
|
505
|
+
let report = '';
|
|
523
506
|
|
|
524
|
-
|
|
525
|
-
|
|
526
|
-
|
|
527
|
-
|
|
528
|
-
|
|
529
|
-
|
|
530
|
-
|
|
531
|
-
|
|
532
|
-
|
|
533
|
-
|
|
534
|
-
|
|
535
|
-
|
|
536
|
-
|
|
537
|
-
|
|
538
|
-
|
|
539
|
-
|
|
540
|
-
|
|
541
|
-
|
|
542
|
-
|
|
507
|
+
report += ('='.repeat(78)) + '\n';
|
|
508
|
+
|
|
509
|
+
const sbStates = {
|
|
510
|
+
WORKING: 0,
|
|
511
|
+
ASSIGNED: 0,
|
|
512
|
+
READY: 0,
|
|
513
|
+
TERMINATED: 0,
|
|
514
|
+
};
|
|
515
|
+
const stateNames = {
|
|
516
|
+
WORKING: 'Working',
|
|
517
|
+
ASSIGNED: 'Assigned',
|
|
518
|
+
READY: 'Ready',
|
|
519
|
+
TERMINATED: 'Terminated',
|
|
520
|
+
};
|
|
521
|
+
sup.sandboxes.forEach(sb => {
|
|
522
|
+
const { state } = sb;
|
|
523
|
+
if (!sbStates[state])
|
|
524
|
+
sbStates[state] = 0;
|
|
525
|
+
sbStates[state]++;
|
|
543
526
|
});
|
|
544
527
|
|
|
545
|
-
|
|
528
|
+
report += (Date()) + '\n';
|
|
529
|
+
report += ('Sandboxes:') + '\n';
|
|
530
|
+
Object.keys(sbStates).forEach(state => {
|
|
531
|
+
const stateName = stateNames[state] || state;
|
|
532
|
+
report += (` ${(stateName + ':').padEnd(12)} ${sbStates[state]}`) + '\n';
|
|
533
|
+
})
|
|
534
|
+
report += (` * ALL: ${sup.sandboxes.length}`) + '\n';
|
|
535
|
+
|
|
536
|
+
report += ('Progress:') + '\n';
|
|
537
|
+
sup.workingSandboxes.forEach(sb => {
|
|
538
|
+
const jobName = sb.job && sb.job.public && sb.job.public.name || `idek (${sb.jobAddress})`;
|
|
539
|
+
let el = Date.now() - sb.sliceStartTime;
|
|
540
|
+
const t = el < 1000000
|
|
541
|
+
? toInterval(el)
|
|
542
|
+
: 'new';
|
|
543
|
+
|
|
544
|
+
el = sb.progressReports && sb.progressReports.last
|
|
545
|
+
? Date.now() - (sb.sliceStartTime + sb.progressReports.last.timestamp)
|
|
546
|
+
: 0;
|
|
547
|
+
const pct = (typeof sb.progress) === 'number'
|
|
548
|
+
? `${Number(sb.progress).toFixed(0).padStart(2)}%`
|
|
549
|
+
: 'ind';
|
|
550
|
+
const stale = (el < 2000) ? '' : `(stale: ${toInterval(el)})`;
|
|
551
|
+
|
|
552
|
+
report += (` ${String(sb.id).padStart(4)}: ${sb.jobAddress} ${jobName.padEnd(34)} `+ `${t} ${pct} ${stale}`.padStart(13)) + '\n';
|
|
553
|
+
});
|
|
554
|
+
|
|
555
|
+
report += ('Slices:') + '\n';
|
|
556
|
+
report += (` working: ${sup.allocatedSlices.length}`) + '\n';
|
|
557
|
+
report += (` queued: ${sup.queuedSlices.length}`) + '\n';
|
|
558
|
+
|
|
559
|
+
report += ('='.repeat(78)) + '\n';
|
|
546
560
|
|
|
547
|
-
return
|
|
561
|
+
return report;
|
|
548
562
|
}
|
|
549
563
|
|
|
550
|
-
|
|
551
|
-
|
|
564
|
+
/**
|
|
565
|
+
* Handle a signal which requests our the death of the Worker by
|
|
566
|
+
* - stopping the worker
|
|
567
|
+
* - unregistering the handler (this allows a second signal to forcibly terminate the process
|
|
568
|
+
* if that is the default behaviour)
|
|
569
|
+
* - set a long timeout (dcpConfig.worker.cleanupTimeout seconds), after which the process
|
|
570
|
+
* exits forcibly with a non-zero exit code (unix standard for various signals)
|
|
571
|
+
*/
|
|
572
|
+
function handleSigDeath(signalName, signal)
|
|
552
573
|
{
|
|
553
|
-
|
|
554
|
-
const program = path.basename(require.main.filename, '.js');
|
|
555
|
-
let location;
|
|
556
|
-
let filename;
|
|
574
|
+
process.off(signalName, handleSigDeath);
|
|
557
575
|
|
|
558
|
-
if (
|
|
559
|
-
|
|
560
|
-
location = path.dirname(dir);
|
|
561
|
-
if (fs.existsSync(dir))
|
|
562
|
-
{
|
|
563
|
-
if (fs.statSync(dir).isDirectory())
|
|
564
|
-
filename = program;
|
|
565
|
-
else
|
|
566
|
-
{
|
|
567
|
-
console.warn('Previous PID file was not cleaned up');
|
|
568
|
-
filename = path.basename(dir);
|
|
569
|
-
}
|
|
570
|
-
}
|
|
571
|
-
else if (dir.endsWith(path.sep))
|
|
572
|
-
filename = program;
|
|
573
|
-
else
|
|
574
|
-
filename = path.basename(dir);
|
|
575
|
-
}
|
|
576
|
+
if (!worker)
|
|
577
|
+
console.error(`trapped ${signalName}, signal ${signal}`);
|
|
576
578
|
else
|
|
577
579
|
{
|
|
578
|
-
|
|
579
|
-
|
|
580
|
+
console.error(`trapped ${signalName}, signal ${signal} -- stopping worker`);
|
|
581
|
+
worker.stop();
|
|
580
582
|
}
|
|
581
583
|
|
|
582
|
-
|
|
583
|
-
|
|
584
|
-
filename + '.pid'
|
|
585
|
-
)
|
|
586
|
-
try
|
|
587
|
-
{
|
|
588
|
-
if (fs.existsSync(pidfile))
|
|
589
|
-
{
|
|
590
|
-
const oldPid = fs.readFileSync(pidfile, 'utf8')
|
|
591
|
-
console.warn(`Warning: Previous invocation${oldPid.length ? ' pid#' + parseInt(String(oldPid)) : ''} did not remove ${pidfile}`);
|
|
592
|
-
}
|
|
593
|
-
else
|
|
594
|
-
memoizePid.fd = fs.openSync(pidfile, 'wx');
|
|
584
|
+
setTimeout(() => process.exit(signal - 128), getCleanupTimeoutMs()).unref();
|
|
585
|
+
}
|
|
595
586
|
|
|
596
|
-
|
|
597
|
-
|
|
598
|
-
|
|
587
|
+
/**
|
|
588
|
+
* Returns the duration of the cleanup timeout in milliseconds. It is possible to specify zero.
|
|
589
|
+
*/
|
|
590
|
+
function getCleanupTimeoutMs()
|
|
591
|
+
{
|
|
592
|
+
const defaultCT = 60;
|
|
593
|
+
var cleanupTimeout = dcpConfig.worker.cleanupTimeout;
|
|
594
|
+
|
|
595
|
+
if (typeof cleanupTimeout === 'undefined')
|
|
596
|
+
cleanupTimeout = defaultCT;
|
|
597
|
+
if (typeof cleanupTimeout !== 'number')
|
|
598
|
+
cleanupTimeout = Number(cleanupTimeout)
|
|
599
|
+
if (isNaN(cleanupTimeout))
|
|
599
600
|
{
|
|
600
|
-
|
|
601
|
-
|
|
602
|
-
if (typeof memoizePid.fd === 'number')
|
|
601
|
+
cleanupTimeout = defaultCT;
|
|
602
|
+
if (!getCleanupTimeoutMs.warned)
|
|
603
603
|
{
|
|
604
|
-
|
|
605
|
-
|
|
604
|
+
console.warn(`warning: dcpConfig.worker.cleanupTimeout is not a number (${dcpConfig.worker.cleanupTimeout})`);
|
|
605
|
+
getCleanupTimeoutMs.warned = true;
|
|
606
606
|
}
|
|
607
|
-
return;
|
|
608
607
|
}
|
|
608
|
+
return cleanupTimeout * 1000;
|
|
609
|
+
}
|
|
609
610
|
|
|
610
|
-
|
|
611
|
+
/**
|
|
612
|
+
* Ensure the default configuration hasn't been modified by the end-user-sysadmin. It is an
|
|
613
|
+
* attractive nuisance, as it looks just like the file they should modify, but if they make
|
|
614
|
+
* security changes there that are overwritten in an subsequent update, it will be a problem.
|
|
615
|
+
*
|
|
616
|
+
* Every time a new package is generated, the default config file has its md5 checksum recorded
|
|
617
|
+
* via the pack npm hook; all we do is make sure it hasn't changed.
|
|
618
|
+
*/
|
|
619
|
+
function verifyDefaultConfigIntegrity()
|
|
620
|
+
{
|
|
621
|
+
const workerConfPath = require('dcp-client').__cn;
|
|
622
|
+
const md5sumPath = workerConfPath + '.md5';
|
|
611
623
|
|
|
612
|
-
|
|
624
|
+
if (!fs.existsSync(md5sumPath))
|
|
613
625
|
{
|
|
614
|
-
|
|
615
|
-
|
|
616
|
-
fs.unlinkSync(pidfile);
|
|
617
|
-
fs.closeSync(memoizePid.fd);
|
|
618
|
-
delete memoizePid.fd;
|
|
619
|
-
}
|
|
620
|
-
catch (error)
|
|
621
|
-
{
|
|
622
|
-
console.warn(`Warning: Could not remove pidfile at ${pidfile} (${error.code})`);
|
|
623
|
-
}
|
|
626
|
+
console.log(chalk.bold.red(` ! warning: ${md5sumPath} not found; cannot verify configuration integrity`));
|
|
627
|
+
require('dcp/utils').sleep(2);
|
|
624
628
|
}
|
|
625
|
-
|
|
626
|
-
// Cleanup PID file
|
|
627
|
-
process.on('dcpExit', exitHandler);
|
|
628
|
-
}
|
|
629
|
-
|
|
630
|
-
/**
|
|
631
|
-
* Unhandled rejection handler: __must not ever throw no matter what__.
|
|
632
|
-
* If we hit an unhandled rejection, we are by definition no longer confident of our program state, meaning that
|
|
633
|
-
* the worker must be restarted. This handler does its best to report the rejection and give the worker a few
|
|
634
|
-
* seconds in which to attempt to return slices to the scheduler before it gives up completely.
|
|
635
|
-
*/
|
|
636
|
-
async function unhandledRejectionHandler (error) {
|
|
637
|
-
let _worker = worker;
|
|
638
|
-
|
|
639
|
-
if (!worker)
|
|
640
|
-
return;
|
|
641
629
|
else
|
|
642
|
-
|
|
643
|
-
|
|
644
|
-
|
|
645
|
-
|
|
646
|
-
|
|
647
|
-
if (log) {
|
|
648
|
-
fs.appendFileSync(process.env.DCP_WORKER_UNHANDLED_REJECTION_LOG,
|
|
649
|
-
`${Date.now()}: ${error.message}\n${error.stack}\n\n`);
|
|
650
|
-
}
|
|
651
|
-
} catch(e) {};
|
|
630
|
+
{
|
|
631
|
+
const originalMd5sum = fs.readFileSync(md5sumPath, 'ascii');
|
|
632
|
+
const actualMd5sum = crypto.createHash('md5')
|
|
633
|
+
.update(fs.readFileSync(workerConfPath, 'ascii'))
|
|
634
|
+
.digest('hex');
|
|
652
635
|
|
|
653
|
-
|
|
654
|
-
|
|
655
|
-
|
|
656
|
-
|
|
657
|
-
|
|
658
|
-
|
|
659
|
-
|
|
660
|
-
|
|
661
|
-
console.
|
|
662
|
-
|
|
663
|
-
|
|
664
|
-
|
|
665
|
-
|
|
666
|
-
|
|
667
|
-
|
|
668
|
-
|
|
669
|
-
process.stderr.write(util.inspect(error) + '\n');
|
|
670
|
-
} catch(e) {
|
|
671
|
-
console.error(error);
|
|
636
|
+
if (!originalMd5sum.startsWith(actualMd5sum))
|
|
637
|
+
{
|
|
638
|
+
console.warn(chalk.yellow(` ! Detected modified ${workerConfPath};`));
|
|
639
|
+
console.warn(' . DCP Worker configuration changes should not be made by updating the default');
|
|
640
|
+
console.warn(' config, as that file will be overwritten on the next npm update. Instead,');
|
|
641
|
+
console.warn(' make changes via one of the following locations:');
|
|
642
|
+
console.warn(' - ~/.dcp/dcp-worker/dcp-config.js');
|
|
643
|
+
console.warn(' - /etc/dcp/dcp-worker/dcp-config.js');
|
|
644
|
+
console.warn(' - /etc/override/dcp/dcp-worker/dcp-config.js');
|
|
645
|
+
console.warn(' - the Windows Registry');
|
|
646
|
+
|
|
647
|
+
if (require('dcp/build').config.build !== 'debug')
|
|
648
|
+
process.exit(1);
|
|
649
|
+
|
|
650
|
+
console.log(chalk.bold.red.inverse("If this wasn't a debug build, the worker would exit now."));
|
|
651
|
+
require('dcp/utils').sleep(2);
|
|
672
652
|
}
|
|
673
|
-
process.exit(exitCode || EXIT_UNHANDLED);
|
|
674
653
|
}
|
|
675
|
-
setTimeout(bail, 1000 * unhandledRejectionHandler.timeout);
|
|
676
654
|
|
|
677
|
-
|
|
678
|
-
|
|
679
|
-
|
|
680
|
-
|
|
655
|
+
if (dcpConfig.cookie !== process.env.DCP_CONFIG_COOKIE || !dcpConfig.cookie)
|
|
656
|
+
{
|
|
657
|
+
console.error(' ! DCP Worker default configuration was not loaded; exiting.');
|
|
658
|
+
process.exit(1);
|
|
681
659
|
}
|
|
682
|
-
|
|
683
|
-
setImmediate(() => bail(EXIT_UNHANDLED));
|
|
684
|
-
};
|
|
685
|
-
unhandledRejectionHandler.timeout = 5;
|
|
660
|
+
}
|