teraslice 0.87.1 → 0.89.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (69) hide show
  1. package/cluster-service.js +24 -18
  2. package/dist/src/index.js +42 -0
  3. package/package.json +10 -14
  4. package/service.js +4 -6
  5. package/worker-service.js +6 -6
  6. package/index.js +0 -21
  7. package/lib/cluster/cluster_master.js +0 -164
  8. package/lib/cluster/node_master.js +0 -393
  9. package/lib/cluster/services/api.js +0 -581
  10. package/lib/cluster/services/assets.js +0 -211
  11. package/lib/cluster/services/cluster/backends/kubernetes/deployments/worker.hbs +0 -86
  12. package/lib/cluster/services/cluster/backends/kubernetes/index.js +0 -225
  13. package/lib/cluster/services/cluster/backends/kubernetes/jobs/execution_controller.hbs +0 -69
  14. package/lib/cluster/services/cluster/backends/kubernetes/k8s.js +0 -450
  15. package/lib/cluster/services/cluster/backends/kubernetes/k8sResource.js +0 -443
  16. package/lib/cluster/services/cluster/backends/kubernetes/k8sState.js +0 -67
  17. package/lib/cluster/services/cluster/backends/kubernetes/utils.js +0 -58
  18. package/lib/cluster/services/cluster/backends/native/index.js +0 -611
  19. package/lib/cluster/services/cluster/backends/native/messaging.js +0 -563
  20. package/lib/cluster/services/cluster/backends/state-utils.js +0 -49
  21. package/lib/cluster/services/cluster/index.js +0 -15
  22. package/lib/cluster/services/execution.js +0 -459
  23. package/lib/cluster/services/jobs.js +0 -303
  24. package/lib/config/default-sysconfig.js +0 -47
  25. package/lib/config/index.js +0 -32
  26. package/lib/config/schemas/system.js +0 -333
  27. package/lib/processors/save_file/index.js +0 -9
  28. package/lib/processors/save_file/processor.js +0 -17
  29. package/lib/processors/save_file/schema.js +0 -17
  30. package/lib/processors/script.js +0 -130
  31. package/lib/processors/stdout/index.js +0 -9
  32. package/lib/processors/stdout/processor.js +0 -19
  33. package/lib/processors/stdout/schema.js +0 -18
  34. package/lib/storage/analytics.js +0 -106
  35. package/lib/storage/assets.js +0 -275
  36. package/lib/storage/backends/elasticsearch_store.js +0 -567
  37. package/lib/storage/backends/mappings/analytics.json +0 -49
  38. package/lib/storage/backends/mappings/asset.json +0 -40
  39. package/lib/storage/backends/mappings/ex.json +0 -55
  40. package/lib/storage/backends/mappings/job.json +0 -31
  41. package/lib/storage/backends/mappings/state.json +0 -37
  42. package/lib/storage/execution.js +0 -331
  43. package/lib/storage/index.js +0 -16
  44. package/lib/storage/jobs.js +0 -97
  45. package/lib/storage/state.js +0 -302
  46. package/lib/utils/api_utils.js +0 -173
  47. package/lib/utils/asset_utils.js +0 -117
  48. package/lib/utils/date_utils.js +0 -58
  49. package/lib/utils/encoding_utils.js +0 -29
  50. package/lib/utils/events.js +0 -7
  51. package/lib/utils/file_utils.js +0 -118
  52. package/lib/utils/id_utils.js +0 -19
  53. package/lib/utils/port_utils.js +0 -83
  54. package/lib/workers/assets/loader.js +0 -109
  55. package/lib/workers/assets/spawn.js +0 -78
  56. package/lib/workers/context/execution-context.js +0 -16
  57. package/lib/workers/context/terafoundation-context.js +0 -10
  58. package/lib/workers/execution-controller/execution-analytics.js +0 -211
  59. package/lib/workers/execution-controller/index.js +0 -1033
  60. package/lib/workers/execution-controller/recovery.js +0 -188
  61. package/lib/workers/execution-controller/scheduler.js +0 -461
  62. package/lib/workers/execution-controller/slice-analytics.js +0 -115
  63. package/lib/workers/helpers/job.js +0 -93
  64. package/lib/workers/helpers/op-analytics.js +0 -22
  65. package/lib/workers/helpers/terafoundation.js +0 -43
  66. package/lib/workers/helpers/worker-shutdown.js +0 -187
  67. package/lib/workers/metrics/index.js +0 -139
  68. package/lib/workers/worker/index.js +0 -344
  69. package/lib/workers/worker/slice.js +0 -143
@@ -1,567 +0,0 @@
1
- 'use strict';
2
-
3
- const ms = require('ms');
4
- const fs = require('fs');
5
- const path = require('path');
6
- const {
7
- TSError, parseError, isTest, pDelay,
8
- pRetry, logError, pWhile, isString, getTypeOf,
9
- get, random, isInteger
10
- } = require('@terascope/utils');
11
- const elasticsearchApi = require('@terascope/elasticsearch-api');
12
- const { getClientAsync } = require('@terascope/job-components');
13
- const { makeLogger } = require('../../workers/helpers/terafoundation');
14
- const { timeseriesIndex } = require('../../utils/date_utils');
15
-
16
- module.exports = async function elasticsearchStorage(backendConfig) {
17
- const {
18
- context, indexName, recordType,
19
- idField, storageName, bulkSize = 1000,
20
- fullResponse = false, logRecord = true,
21
- forceRefresh = true,
22
- } = backendConfig;
23
-
24
- const logger = makeLogger(context, 'elasticsearch_backend', { storageName });
25
-
26
- const config = context.sysconfig.teraslice;
27
-
28
- const indexSettings = get(config, ['index_settings', storageName], {
29
- number_of_shards: 5,
30
- number_of_replicas: 1,
31
- });
32
-
33
- let elasticsearch;
34
- let client;
35
- let flushInterval;
36
- let isShutdown = false;
37
-
38
- // Buffer to build up bulk requests.
39
- let bulkQueue = [];
40
- let savingBulk = false; // serialize save requests.
41
-
42
- function validateId(recordId) {
43
- if (!recordId || !isString(recordId)) {
44
- throw new TSError(`Invalid ${recordType} id given ${getTypeOf(recordId)}`, {
45
- statusCode: 422
46
- });
47
- }
48
- }
49
-
50
- function validateIdAndRecord(recordId, record) {
51
- validateId(recordId);
52
-
53
- const id = record[idField];
54
- if (id && id !== recordId) {
55
- throw new TSError(`${recordType}.${idField} doesn't match request id`, {
56
- statusCode: 406
57
- });
58
- }
59
- }
60
-
61
- async function getRecord(recordId, indexArg, fields) {
62
- validateId(recordId);
63
-
64
- logger.trace(`getting record id: ${recordId}`);
65
- const query = {
66
- index: indexArg || indexName,
67
- type: recordType,
68
- id: recordId,
69
- };
70
-
71
- if (fields) {
72
- if (!elasticsearch.isElasticsearch6()) {
73
- query._sourceIncludes = fields;
74
- } else {
75
- query._sourceInclude = fields;
76
- }
77
- }
78
- return elasticsearch.get(query);
79
- }
80
-
81
- async function search(query, from, size, sort, fields, indexArg = indexName) {
82
- if (from != null && !isInteger(from)) {
83
- throw new Error(`from parameter must be a integer, got ${from}`);
84
- }
85
- if (size != null && !isInteger(size)) {
86
- throw new Error(`size parameter must be a integer, got ${size}`);
87
- }
88
- if (sort != null && !isString(sort)) {
89
- throw new Error(`sort parameter must be a string, got ${sort}`);
90
- }
91
-
92
- const esQuery = {
93
- index: indexArg,
94
- from: from != null ? from : 0,
95
- size: size != null ? size : 10000,
96
- type: recordType,
97
- sort,
98
- };
99
-
100
- if (typeof query === 'string') {
101
- esQuery.q = query;
102
- } else {
103
- esQuery.body = query;
104
- }
105
-
106
- if (fields) {
107
- if (!elasticsearch.isElasticsearch6()) {
108
- esQuery._sourceIncludes = fields;
109
- } else {
110
- esQuery._sourceInclude = fields;
111
- }
112
- }
113
-
114
- return elasticsearch.search(esQuery);
115
- }
116
-
117
- /*
118
- * index saves a record to elasticsearch allowing automatic
119
- * ID creation
120
- */
121
- async function index(record, indexArg = indexName) {
122
- logger.trace('indexing record', logRecord ? record : undefined);
123
- const query = {
124
- index: indexArg,
125
- type: recordType,
126
- body: record,
127
- refresh: forceRefresh,
128
- };
129
-
130
- return elasticsearch.index(query);
131
- }
132
-
133
- function _getTimeout(timeout) {
134
- if (isInteger(timeout)) {
135
- // don't allow a timeout of less than 1 second
136
- if (timeout <= 1000) return undefined;
137
- return ms(timeout);
138
- }
139
- if (isString(timeout)) {
140
- return timeout;
141
- }
142
- return undefined;
143
- }
144
-
145
- /*
146
- * index saves a record to elasticsearch with a specified ID.
147
- * If the document is already there it will be replaced.
148
- */
149
- async function indexWithId(recordId, record, indexArg, timeout) {
150
- validateIdAndRecord(recordId, record);
151
-
152
- logger.trace(`indexWithId call with id: ${recordId}, record`, logRecord ? record : null);
153
-
154
- const query = {
155
- index: indexArg || indexName,
156
- type: recordType,
157
- id: recordId,
158
- body: record,
159
- refresh: forceRefresh,
160
- timeout: _getTimeout(timeout)
161
- };
162
-
163
- return elasticsearch.indexWithId(query);
164
- }
165
-
166
- /*
167
- * Create saves a record to elasticsearch under the provided id.
168
- * If the record already exists it will not be inserted.
169
- */
170
- async function create(record, indexArg = indexName) {
171
- logger.trace('creating record', logRecord ? record : null);
172
-
173
- const query = {
174
- index: indexArg,
175
- type: recordType,
176
- id: record[idField],
177
- body: record,
178
- refresh: forceRefresh,
179
- };
180
-
181
- return elasticsearch.create(query);
182
- }
183
-
184
- async function count(query, from, sort, indexArg = indexName) {
185
- if (from != null && !isInteger(from)) {
186
- throw new Error(`from parameter must be a integer, got ${from}`);
187
- }
188
- if (sort != null && !isString(sort)) {
189
- throw new Error(`sort parameter must be a string, got ${sort}`);
190
- }
191
-
192
- const esQuery = {
193
- index: indexArg,
194
- type: recordType,
195
- from,
196
- sort,
197
- };
198
-
199
- if (isString(query)) {
200
- esQuery.q = query;
201
- } else {
202
- esQuery.body = query;
203
- }
204
-
205
- const response = await elasticsearch.count(esQuery);
206
-
207
- return response;
208
- }
209
-
210
- async function update(recordId, updateSpec, indexArg = indexName) {
211
- validateIdAndRecord(recordId, updateSpec);
212
-
213
- logger.trace(`updating record ${recordId}, `, logRecord ? updateSpec : null);
214
-
215
- const query = {
216
- index: indexArg,
217
- type: recordType,
218
- id: recordId,
219
- body: {
220
- doc: updateSpec,
221
- },
222
- refresh: forceRefresh,
223
- retryOnConflict: 3,
224
- };
225
-
226
- return elasticsearch.update(query);
227
- }
228
-
229
- async function updatePartial(recordId, applyChanges, indexArg = indexName) {
230
- if (typeof applyChanges !== 'function') {
231
- throw new Error('Update Partial expected a applyChanges function');
232
- }
233
-
234
- validateId(recordId);
235
- await waitForClient();
236
-
237
- const getParams = {
238
- index: indexArg,
239
- type: recordType,
240
- id: recordId,
241
- };
242
-
243
- const existing = await pRetry(() => elasticsearch.get(getParams, true), {
244
- matches: ['no_shard_available_action_exception'],
245
- delay: 1000,
246
- retries: 10,
247
- backoff: 5
248
- });
249
-
250
- const doc = await applyChanges(Object.assign({}, existing._source));
251
-
252
- logger.trace(`updating partial record ${recordId}, `, logRecord ? doc : null);
253
-
254
- validateIdAndRecord(recordId, doc);
255
-
256
- const query = {
257
- index: indexArg,
258
- type: recordType,
259
- id: recordId,
260
- body: doc,
261
- refresh: forceRefresh,
262
- };
263
-
264
- if (!elasticsearch.isElasticsearch6()) {
265
- query.if_seq_no = existing._seq_no;
266
- query.if_primary_term = existing._primary_term;
267
- } else {
268
- query.version = existing._version;
269
- }
270
-
271
- try {
272
- await elasticsearch.indexWithId(query);
273
- return doc;
274
- } catch (err) {
275
- // if there is a version conflict
276
- if (err.statusCode === 409 && err.message.includes('version conflict')) {
277
- logger.debug({ error: err }, `version conflict when updating "${recordId}" (${recordType})`);
278
- return updatePartial(recordId, applyChanges, indexArg);
279
- }
280
-
281
- throw new TSError(err);
282
- }
283
- }
284
-
285
- async function remove(recordId, indexArg = indexName) {
286
- validateId(recordId);
287
-
288
- logger.trace(`removing record ${recordId}`);
289
- const query = {
290
- index: indexArg,
291
- type: recordType,
292
- id: recordId,
293
- refresh: forceRefresh,
294
- };
295
-
296
- return elasticsearch.remove(query);
297
- }
298
-
299
- async function bulk(record, _type, indexArg = indexName) {
300
- if (isShutdown) {
301
- throw new TSError('Unable to send bulk record after shutdown', {
302
- context: {
303
- recordType,
304
- record,
305
- },
306
- });
307
- }
308
-
309
- const type = _type || 'index';
310
-
311
- const action = {
312
- [type]: {
313
- _index: indexArg,
314
- _type: recordType,
315
- }
316
- };
317
-
318
- bulkQueue.push({
319
- action,
320
- data: type === 'delete' ? undefined : record
321
- });
322
-
323
- // We only flush once enough records have accumulated for it to make sense.
324
- if (bulkQueue.length >= bulkSize) {
325
- logger.trace(`flushing bulk queue ${bulkQueue.length}`);
326
- return _flush();
327
- }
328
-
329
- // Bulk saving is a background operation so we don't have
330
- // anything meaningful to return.
331
- return Promise.resolve(true);
332
- }
333
-
334
- function shutdown(forceShutdown) {
335
- const startTime = Date.now();
336
- clearInterval(flushInterval);
337
- if (forceShutdown !== true) {
338
- return _flush(true);
339
- }
340
-
341
- return new Promise((resolve, reject) => {
342
- logger.trace(`attempting to shutdown, will destroy in ${config.shutdown_timeout}`);
343
- const timeout = setTimeout(_destroy, config.shutdown_timeout).unref();
344
-
345
- function _destroy(err) {
346
- logger.trace(`shutdown store, took ${ms(Date.now() - startTime)}`);
347
-
348
- bulkQueue.length = [];
349
- isShutdown = true;
350
- clearTimeout(timeout);
351
-
352
- if (err) reject(err);
353
- else resolve();
354
- }
355
-
356
- _flush(true)
357
- .then(() => {
358
- _destroy();
359
- })
360
- .catch((err) => {
361
- _destroy(err);
362
- });
363
- });
364
- }
365
-
366
- async function bulkSend(bulkRequest) {
367
- return elasticsearch.bulkSend(bulkRequest);
368
- }
369
-
370
- async function _flush(shuttingDown = false) {
371
- if (!bulkQueue.length) return;
372
- if (!shuttingDown && savingBulk) return;
373
-
374
- savingBulk = true;
375
-
376
- const bulkRequest = bulkQueue.slice();
377
- bulkQueue = [];
378
-
379
- try {
380
- const recordCount = await bulkSend(bulkRequest);
381
- const extraMsg = shuttingDown ? ', on shutdown' : '';
382
- logger.debug(`flushed ${recordCount}${extraMsg} records to index ${indexName}`);
383
- } finally {
384
- savingBulk = false;
385
- }
386
- }
387
-
388
- function getMapFile() {
389
- const mappingFile = path.join(__dirname, `mappings/${recordType}.json`);
390
-
391
- const mapping = JSON.parse(fs.readFileSync(mappingFile));
392
- mapping.settings = {
393
- 'index.number_of_shards': indexSettings.number_of_shards,
394
- 'index.number_of_replicas': indexSettings.number_of_replicas,
395
- };
396
- return mapping;
397
- }
398
-
399
- async function sendTemplate(mapping) {
400
- if (mapping.template) {
401
- const clusterName = context.sysconfig.teraslice.name;
402
- const name = `${clusterName}_${recordType}_template`;
403
- // setting template name to reflect current teraslice instance name to help prevent
404
- // conflicts with differing versions of teraslice with same elastic db
405
- if (mapping.template) {
406
- if (!mapping.template.match(clusterName)) {
407
- mapping.template = `${clusterName}${mapping.template}`;
408
- }
409
- }
410
-
411
- return putTemplate(mapping, name);
412
- }
413
-
414
- return Promise.resolve(true);
415
- }
416
-
417
- async function _createIndex(indexArg = indexName) {
418
- const existQuery = { index: indexArg };
419
- return elasticsearch.index_exists(existQuery).then((exists) => {
420
- if (!exists) {
421
- const mapping = getMapFile();
422
-
423
- // Make sure the index exists before we do anything else.
424
- const createQuery = {
425
- index: indexArg,
426
- body: mapping,
427
- };
428
-
429
- // add a random delay to stagger requests
430
- return pDelay(isTest ? 0 : random(0, 5000))
431
- .then(() => sendTemplate(mapping))
432
- .then(() => elasticsearch.index_create(createQuery))
433
- .then((results) => results)
434
- .catch((err) => {
435
- // It's not really an error if it's just that the index is already there
436
- if (parseError(err).includes('already_exists_exception')) {
437
- return true;
438
- }
439
-
440
- const error = new TSError(err, {
441
- reason: `Could not create index: ${indexName}`,
442
- });
443
- return Promise.reject(error);
444
- });
445
- }
446
-
447
- // Index already exists. nothing to do.
448
- return true;
449
- });
450
- }
451
-
452
- function refresh(indexArg = indexName) {
453
- const query = { index: indexArg };
454
- return elasticsearch.index_refresh(query);
455
- }
456
-
457
- async function putTemplate(template, name) {
458
- return elasticsearch.putTemplate(template, name);
459
- }
460
-
461
- function verifyClient() {
462
- if (isShutdown) return false;
463
- return elasticsearch.verifyClient();
464
- }
465
-
466
- async function waitForClient() {
467
- if (elasticsearch.verifyClient()) return;
468
-
469
- await pWhile(async () => {
470
- if (isShutdown) throw new Error('Elasticsearch store is shutdown');
471
- if (elasticsearch.verifyClient()) return true;
472
- await pDelay(100);
473
- return false;
474
- });
475
- }
476
-
477
- // Periodically flush the bulkQueue so we don't end up with cached data lingering.
478
- flushInterval = setInterval(() => {
479
- _flush().catch((err) => {
480
- logError(logger, err, 'background flush failure');
481
- return null;
482
- });
483
- // stager the interval to avoid collisions
484
- }, random(9000, 11000));
485
-
486
- // javascript is having a fit if you use the shorthand get, so we renamed function to getRecord
487
- const api = {
488
- get: getRecord,
489
- search,
490
- refresh,
491
- index,
492
- indexWithId,
493
- create,
494
- update,
495
- updatePartial,
496
- bulk,
497
- bulkSend,
498
- remove,
499
- shutdown,
500
- count,
501
- putTemplate,
502
- waitForClient,
503
- verifyClient,
504
- };
505
-
506
- const isMultiIndex = indexName[indexName.length - 1] === '*';
507
- let newIndex = indexName;
508
-
509
- if (isMultiIndex) {
510
- const storeType = indexName.match(/__(.*)\*/)[1];
511
- const timeseriesFormat = config.index_rollover_frequency[storeType];
512
- const nameSize = indexName.length - 1;
513
- newIndex = timeseriesIndex(timeseriesFormat, indexName.slice(0, nameSize)).index;
514
- }
515
-
516
- async function setup() {
517
- const clientName = JSON.stringify({
518
- connection: config.state.connection,
519
- index: indexName,
520
- });
521
-
522
- const connectionConfig = Object.assign({}, config.state);
523
- if (connectionConfig.connection_cache == null) {
524
- connectionConfig.connection_cache = true;
525
- }
526
-
527
- let { connection } = config.state;
528
- if (config.state.endpoint) {
529
- connection += `:${config.state.endpoint}`;
530
- }
531
-
532
- const options = {
533
- full_response: !!fullResponse,
534
- connection,
535
- };
536
-
537
- await pWhile(async () => {
538
- try {
539
- client = await getClientAsync(context, connectionConfig, 'elasticsearch-next');
540
- elasticsearch = elasticsearchApi(client, logger, options);
541
-
542
- await _createIndex(newIndex);
543
- await elasticsearch.isAvailable(newIndex, recordType);
544
-
545
- return true;
546
- } catch (err) {
547
- const error = new TSError(err, {
548
- reason: `Failure initializing ${recordType} index: ${indexName}`,
549
- });
550
-
551
- if (error.statusCode >= 400 && error.statusCode < 500) {
552
- throw error;
553
- }
554
-
555
- logError(logger, error, `Failed attempt connecting to elasticsearch: ${clientName} (will retry)`);
556
-
557
- await pDelay(isTest ? 0 : random(2000, 4000));
558
-
559
- return false;
560
- }
561
- });
562
-
563
- return api;
564
- }
565
-
566
- return setup();
567
- };
@@ -1,49 +0,0 @@
1
- {
2
- "template": "__analytics*",
3
- "mappings": {
4
- "analytics": {
5
- "_all": {
6
- "enabled": false
7
- },
8
- "dynamic": false,
9
- "properties": {
10
- "ex_id": {
11
- "type": "keyword"
12
- },
13
- "job_id": {
14
- "type": "keyword"
15
- },
16
- "worker_id": {
17
- "type": "keyword"
18
- },
19
- "slice_id": {
20
- "type": "keyword"
21
- },
22
- "slicer_id": {
23
- "type": "keyword"
24
- },
25
- "op": {
26
- "type": "keyword"
27
- },
28
- "order": {
29
- "type": "integer"
30
- },
31
- "count": {
32
- "type": "integer"
33
- },
34
- "state": {
35
- "type": "keyword"
36
- },
37
- "time": {
38
- "type": "integer"
39
- },
40
- "memory": {
41
- "type": "long"
42
- },
43
- "@timestamp": {
44
- "type": "date"
45
- }
46
- }
47
- }
48
- }
49
- }
@@ -1,40 +0,0 @@
1
- {
2
- "mappings": {
3
- "asset": {
4
- "_all": {
5
- "enabled": false
6
- },
7
- "dynamic": false,
8
- "properties": {
9
- "blob": {
10
- "type": "binary",
11
- "doc_values": false
12
- },
13
- "name": {
14
- "type": "keyword"
15
- },
16
- "version": {
17
- "type": "keyword"
18
- },
19
- "id": {
20
- "type": "keyword"
21
- },
22
- "description": {
23
- "type": "keyword"
24
- },
25
- "arch": {
26
- "type": "keyword"
27
- },
28
- "platform": {
29
- "type": "keyword"
30
- },
31
- "node_version": {
32
- "type": "integer"
33
- },
34
- "_created": {
35
- "type": "date"
36
- }
37
- }
38
- }
39
- }
40
- }
@@ -1,55 +0,0 @@
1
- {
2
- "mappings": {
3
- "ex": {
4
- "_all": {
5
- "enabled": false
6
- },
7
- "dynamic": false,
8
- "properties": {
9
- "active": {
10
- "type": "boolean"
11
- },
12
- "job_id": {
13
- "type": "keyword"
14
- },
15
- "ex_id": {
16
- "type": "keyword"
17
- },
18
- "_context": {
19
- "type": "keyword"
20
- },
21
- "_status": {
22
- "type": "keyword"
23
- },
24
- "_has_errors": {
25
- "type": "keyword"
26
- },
27
- "slicer_hostname": {
28
- "type": "keyword"
29
- },
30
- "slicer_port": {
31
- "type": "keyword"
32
- },
33
- "recovered_execution": {
34
- "type": "keyword"
35
- },
36
- "recovered_slice_type": {
37
- "type": "keyword"
38
- },
39
- "metadata": {
40
- "type": "object",
41
- "enabled": false
42
- },
43
- "_slicer_stats": {
44
- "type": "object"
45
- },
46
- "_created": {
47
- "type": "date"
48
- },
49
- "_updated": {
50
- "type": "date"
51
- }
52
- }
53
- }
54
- }
55
- }