teraslice 0.77.1 → 0.80.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -8,10 +8,10 @@ const Request = require('kubernetes-client/backends/request');
8
8
  const { getRetryConfig } = require('./utils');
9
9
 
10
10
  class K8s {
11
- constructor(logger, clientConfig, defaultNamespace = 'default',
11
+ constructor(logger, clientConfig, defaultNamespace,
12
12
  apiPollDelay, shutdownTimeout) {
13
13
  this.apiPollDelay = apiPollDelay;
14
- this.defaultNamespace = defaultNamespace;
14
+ this.defaultNamespace = defaultNamespace || 'default';
15
15
  this.logger = logger;
16
16
  this.shutdownTimeout = shutdownTimeout; // this is in milliseconds
17
17
 
@@ -25,6 +25,7 @@ class K8sResource {
25
25
  constructor(resourceType, resourceName, terasliceConfig, execution) {
26
26
  this.execution = execution;
27
27
  this.jobLabelPrefix = 'job.teraslice.terascope.io';
28
+ this.jobPropertyLabelPrefix = 'job-property.teraslice.terascope.io';
28
29
  this.nodeType = resourceName;
29
30
  this.terasliceConfig = terasliceConfig;
30
31
 
@@ -54,9 +55,10 @@ class K8sResource {
54
55
  this._setImagePullSecret();
55
56
  this._setEphemeralStorage();
56
57
  this._setExternalPorts();
58
+ this._setPriorityClassName();
57
59
 
58
60
  if (resourceName === 'worker') {
59
- this._setAntiAffinity();
61
+ this._setWorkerAntiAffinity();
60
62
  }
61
63
 
62
64
  // Execution controller targets are required nodeAffinities, if
@@ -124,31 +126,40 @@ class K8sResource {
124
126
  };
125
127
  }
126
128
 
127
- _setAntiAffinity() {
129
+ _setWorkerAntiAffinity() {
128
130
  if (this.terasliceConfig.kubernetes_worker_antiaffinity) {
129
- this.resource.spec.template.spec.affinity = {
130
- podAntiAffinity: {
131
- preferredDuringSchedulingIgnoredDuringExecution: [
132
- {
133
- weight: 1,
134
- podAffinityTerm: {
135
- labelSelector: {
136
- matchExpressions: [
137
- {
138
- key: 'app.kubernetes.io/name',
139
- operator: 'In',
140
- values: [
141
- 'teraslice'
142
- ]
143
- }
131
+ const targetKey = 'spec.template.spec.affinity.podAntiAffinity.preferredDuringSchedulingIgnoredDuringExecution';
132
+ if (!_.has(this.resource, targetKey)) {
133
+ _.set(this.resource, targetKey, []);
134
+ }
135
+
136
+ // eslint-disable-next-line max-len
137
+ this.resource.spec.template.spec.affinity.podAntiAffinity.preferredDuringSchedulingIgnoredDuringExecution.push(
138
+ {
139
+ weight: 1,
140
+ podAffinityTerm: {
141
+ labelSelector: {
142
+ matchExpressions: [
143
+ {
144
+ key: 'app.kubernetes.io/name',
145
+ operator: 'In',
146
+ values: [
147
+ 'teraslice'
144
148
  ]
145
149
  },
146
- topologyKey: 'kubernetes.io/hostname'
147
- }
148
- }
149
- ]
150
+ {
151
+ key: 'app.kubernetes.io/instance',
152
+ operator: 'In',
153
+ values: [
154
+ this.templateConfig.clusterNameLabel
155
+ ]
156
+ }
157
+ ]
158
+ },
159
+ topologyKey: 'kubernetes.io/hostname'
160
+ }
150
161
  }
151
- };
162
+ );
152
163
  }
153
164
  }
154
165
 
@@ -210,6 +221,25 @@ class K8sResource {
210
221
  }
211
222
  }
212
223
 
224
+ _setPriorityClassName() {
225
+ if (this.terasliceConfig.kubernetes_priority_class_name) {
226
+ if (this.nodeType === 'execution_controller') {
227
+ // eslint-disable-next-line max-len
228
+ this.resource.spec.template.spec.priorityClassName = this.terasliceConfig.kubernetes_priority_class_name;
229
+ if (this.execution.stateful) {
230
+ // eslint-disable-next-line max-len
231
+ this.resource.spec.template.metadata.labels[`${this.jobPropertyLabelPrefix}/stateful`] = 'true';
232
+ }
233
+ }
234
+ if (this.nodeType === 'worker' && this.execution.stateful) {
235
+ // eslint-disable-next-line max-len
236
+ this.resource.spec.template.spec.priorityClassName = this.terasliceConfig.kubernetes_priority_class_name;
237
+ // eslint-disable-next-line max-len
238
+ this.resource.spec.template.metadata.labels[`${this.jobPropertyLabelPrefix}/stateful`] = 'true';
239
+ }
240
+ }
241
+ }
242
+
213
243
  _setAssetsVolume() {
214
244
  if (this.terasliceConfig.assets_directory && this.terasliceConfig.assets_volume) {
215
245
  this.resource.spec.template.spec.volumes.push({
@@ -257,14 +287,40 @@ class K8sResource {
257
287
  _setResources() {
258
288
  let cpu;
259
289
  let memory;
290
+ let maxMemory;
291
+
292
+ const container = this.resource.spec.template.spec.containers[0];
260
293
 
261
294
  // use teraslice config as defaults and execution config will override it
262
295
  const envVars = Object.assign({}, this.terasliceConfig.env_vars, this.execution.env_vars);
263
296
 
264
297
  if (this.nodeType === 'worker') {
265
- // The settings on the executions override the cluster configs
266
- cpu = this.execution.cpu || this.terasliceConfig.cpu || -1;
267
- memory = this.execution.memory || this.terasliceConfig.memory || -1;
298
+ if (this.execution.resources_requests_cpu
299
+ || this.execution.resources_limits_cpu) {
300
+ if (this.execution.resources_requests_cpu) {
301
+ _.set(container, 'resources.requests.cpu', this.execution.resources_requests_cpu);
302
+ }
303
+ if (this.execution.resources_limits_cpu) {
304
+ _.set(container, 'resources.limits.cpu', this.execution.resources_limits_cpu);
305
+ }
306
+ } else if (this.execution.cpu || this.terasliceConfig.cpu) {
307
+ // The settings on the executions override the cluster configs
308
+ cpu = this.execution.cpu || this.terasliceConfig.cpu || -1;
309
+ _.set(container, 'resources.requests.cpu', cpu);
310
+ _.set(container, 'resources.limits.cpu', cpu);
311
+ }
312
+ if (this.execution.resources_requests_memory
313
+ || this.execution.resources_limits_memory) {
314
+ _.set(container, 'resources.requests.memory', this.execution.resources_requests_memory);
315
+ _.set(container, 'resources.limits.memory', this.execution.resources_limits_memory);
316
+ maxMemory = this.execution.resources_limits_memory;
317
+ } else if (this.execution.memory || this.terasliceConfig.memory) {
318
+ // The settings on the executions override the cluster configs
319
+ memory = this.execution.memory || this.terasliceConfig.memory || -1;
320
+ _.set(container, 'resources.requests.memory', memory);
321
+ _.set(container, 'resources.limits.memory', memory);
322
+ maxMemory = memory;
323
+ }
268
324
  }
269
325
 
270
326
  if (this.nodeType === 'execution_controller') {
@@ -273,21 +329,17 @@ class K8sResource {
273
329
  || this.terasliceConfig.cpu_execution_controller || -1;
274
330
  memory = this.execution.memory_execution_controller
275
331
  || this.terasliceConfig.memory_execution_controller || -1;
276
- }
277
-
278
- const container = this.resource.spec.template.spec.containers[0];
279
-
280
- if (cpu !== -1) {
281
332
  _.set(container, 'resources.requests.cpu', cpu);
282
333
  _.set(container, 'resources.limits.cpu', cpu);
283
- }
284
-
285
- if (memory !== -1) {
286
334
  _.set(container, 'resources.requests.memory', memory);
287
335
  _.set(container, 'resources.limits.memory', memory);
336
+ maxMemory = memory;
288
337
  }
289
338
 
290
- setMaxOldSpaceViaEnv(container.env, envVars, memory);
339
+ // NOTE: This sucks, this manages the memory env var but it ALSO is
340
+ // responsible for doing the config and execution env var merge, which
341
+ // should NOT be in this function
342
+ setMaxOldSpaceViaEnv(container.env, envVars, maxMemory);
291
343
  }
292
344
 
293
345
  _setTargets() {
@@ -16,6 +16,11 @@ function getConnectors() {
16
16
  default: {
17
17
  host: ['localhost:9200']
18
18
  }
19
+ },
20
+ 'elasticsearch-next': {
21
+ default: {
22
+ node: ['localhost:9200']
23
+ }
19
24
  }
20
25
  };
21
26
 
@@ -295,6 +295,11 @@ const schema = {
295
295
  default: 'default',
296
296
  format: 'optional_String'
297
297
  },
298
+ kubernetes_priority_class_name: {
299
+ doc: 'Priority class that the Teraslice master, execution controller, and stateful workers should run with',
300
+ default: undefined,
301
+ format: 'optional_String'
302
+ },
298
303
  kubernetes_config_map_name: {
299
304
  doc: 'Specify the name of the Kubernetes ConfigMap used to configure worker pods',
300
305
  default: 'teraslice-worker',
@@ -50,7 +50,7 @@ module.exports = async function analyticsService(context) {
50
50
  time: stats.time[index],
51
51
  memory: stats.memory[index],
52
52
  },
53
- null,
53
+ 'index',
54
54
  esIndex
55
55
  ));
56
56
 
@@ -18,7 +18,7 @@ const {
18
18
  isInteger
19
19
  } = require('@terascope/utils');
20
20
  const elasticsearchApi = require('@terascope/elasticsearch-api');
21
- const { getClient } = require('@terascope/job-components');
21
+ const { getClientAsync } = require('@terascope/job-components');
22
22
  const { makeLogger } = require('../../workers/helpers/terafoundation');
23
23
  const { timeseriesIndex } = require('../../utils/date_utils');
24
24
 
@@ -29,7 +29,7 @@ module.exports = function elasticsearchStorage(backendConfig) {
29
29
  recordType,
30
30
  idField,
31
31
  storageName,
32
- bulkSize = 500,
32
+ bulkSize = 1000,
33
33
  fullResponse = false,
34
34
  logRecord = true,
35
35
  forceRefresh = true,
@@ -83,8 +83,7 @@ module.exports = function elasticsearchStorage(backendConfig) {
83
83
  };
84
84
 
85
85
  if (fields) {
86
- const esVersion = elasticsearch.getESVersion();
87
- if (esVersion > 6) {
86
+ if (!elasticsearch.isElasticsearch6()) {
88
87
  query._sourceIncludes = fields;
89
88
  } else {
90
89
  query._sourceInclude = fields;
@@ -118,8 +117,7 @@ module.exports = function elasticsearchStorage(backendConfig) {
118
117
  }
119
118
 
120
119
  if (fields) {
121
- const esVersion = elasticsearch.getESVersion();
122
- if (esVersion > 6) {
120
+ if (!elasticsearch.isElasticsearch6()) {
123
121
  esQuery._sourceIncludes = fields;
124
122
  } else {
125
123
  esQuery._sourceInclude = fields;
@@ -161,13 +159,13 @@ module.exports = function elasticsearchStorage(backendConfig) {
161
159
  * index saves a record to elasticsearch with a specified ID.
162
160
  * If the document is already there it will be replaced.
163
161
  */
164
- async function indexWithId(recordId, record, indexArg = indexName, timeout) {
162
+ async function indexWithId(recordId, record, indexArg, timeout) {
165
163
  validateIdAndRecord(recordId, record);
166
164
 
167
165
  logger.trace(`indexWithId call with id: ${recordId}, record`, logRecord ? record : null);
168
166
 
169
167
  const query = {
170
- index: indexArg,
168
+ index: indexArg || indexName,
171
169
  type: recordType,
172
170
  id: recordId,
173
171
  body: record,
@@ -273,9 +271,7 @@ module.exports = function elasticsearchStorage(backendConfig) {
273
271
  refresh: forceRefresh,
274
272
  };
275
273
 
276
- const esVersion = elasticsearch.getESVersion();
277
-
278
- if (esVersion >= 7) {
274
+ if (!elasticsearch.isElasticsearch6()) {
279
275
  query.if_seq_no = existing._seq_no;
280
276
  query.if_primary_term = existing._primary_term;
281
277
  } else {
@@ -322,14 +318,17 @@ module.exports = function elasticsearchStorage(backendConfig) {
322
318
 
323
319
  const type = _type || 'index';
324
320
 
325
- const indexRequest = {
321
+ const action = {
326
322
  [type]: {
327
323
  _index: indexArg,
328
324
  _type: recordType,
329
325
  }
330
326
  };
331
327
 
332
- bulkQueue.push(indexRequest, record);
328
+ bulkQueue.push({
329
+ action,
330
+ data: type === 'delete' ? undefined : record
331
+ });
333
332
 
334
333
  // We only flush once enough records have accumulated for it to make sense.
335
334
  if (bulkQueue.length >= bulkSize) {
@@ -375,17 +374,7 @@ module.exports = function elasticsearchStorage(backendConfig) {
375
374
  }
376
375
 
377
376
  async function bulkSend(bulkRequest) {
378
- const recordCount = (bulkRequest.length / 2);
379
-
380
- await pRetry(async () => elasticsearch.bulkSend(bulkRequest), {
381
- reason: `Failure to bulk create "${recordType}"`,
382
- logError: logger.warn,
383
- delay: isTest ? 100 : 1000,
384
- backoff: 5,
385
- retries: 100,
386
- });
387
-
388
- return recordCount;
377
+ return elasticsearch.bulkSend(bulkRequest);
389
378
  }
390
379
 
391
380
  async function _flush(shuttingDown = false) {
@@ -544,11 +533,6 @@ module.exports = function elasticsearchStorage(backendConfig) {
544
533
  if (connectionConfig.connection_cache == null) {
545
534
  connectionConfig.connection_cache = true;
546
535
  }
547
- client = getClient(context, connectionConfig, 'elasticsearch');
548
- if (!client) {
549
- reject(new Error(`Unable to get client for connection: ${config.state.connection}`));
550
- return;
551
- }
552
536
 
553
537
  let { connection } = config.state;
554
538
  if (config.state.endpoint) {
@@ -560,8 +544,18 @@ module.exports = function elasticsearchStorage(backendConfig) {
560
544
  connection,
561
545
  };
562
546
 
563
- elasticsearch = elasticsearchApi(client, logger, options);
564
- _createIndex(newIndex)
547
+ Promise.resolve()
548
+ .then(() => getClientAsync(context, connectionConfig, 'elasticsearch-next'))
549
+ .then((esClient) => {
550
+ client = esClient;
551
+ if (!client) {
552
+ reject(new Error(`Unable to get client for connection: ${config.state.connection}`));
553
+ return;
554
+ }
555
+ elasticsearch = elasticsearchApi(client, logger, options);
556
+ // eslint-disable-next-line consistent-return
557
+ return _createIndex(newIndex);
558
+ })
565
559
  .then(() => elasticsearch.isAvailable(newIndex, recordType))
566
560
  .then(() => resolve(api))
567
561
  .catch((err) => {
@@ -60,17 +60,19 @@ async function stateStorage(context) {
60
60
  async function createSlices(exId, slices) {
61
61
  await waitForClient();
62
62
 
63
- const bulkRequest = [];
64
- for (const slice of slices) {
63
+ const bulkRequest = slices.map((slice) => {
65
64
  const { record, index } = _createSliceRecord(exId, slice, SliceState.pending);
66
- bulkRequest.push({
67
- index: {
68
- _index: index,
69
- _type: recordType,
70
- _id: record.slice_id,
65
+ return {
66
+ action: {
67
+ index: {
68
+ _index: index,
69
+ _type: recordType,
70
+ _id: record.slice_id,
71
+ },
71
72
  },
72
- }, record);
73
- }
73
+ data: record
74
+ };
75
+ });
74
76
  return backend.bulkSend(bulkRequest);
75
77
  }
76
78
 
@@ -242,8 +244,8 @@ async function stateStorage(context) {
242
244
  }
243
245
  }
244
246
 
245
- async function search(query, from, size, sort = '_updated:desc', fields) {
246
- return backend.search(query, from, size, sort, fields);
247
+ async function search(query, from, size, sort, fields) {
248
+ return backend.search(query, from, size, sort || '_updated:desc', fields);
247
249
  }
248
250
 
249
251
  async function count(query, from = 0, sort = '_updated:desc') {
@@ -4,8 +4,8 @@ const shuffle = require('lodash/shuffle');
4
4
  const {
5
5
  get,
6
6
  times,
7
- toInteger,
8
7
  pDelay,
8
+ toIntegerOrThrow,
9
9
  } = require('@terascope/utils');
10
10
  const porty = require('porty');
11
11
 
@@ -57,11 +57,11 @@ async function findPort(options = {}) {
57
57
 
58
58
  function getPorts(context) {
59
59
  const portConfig = get(context, 'sysconfig.teraslice.slicer_port_range');
60
- const dataArray = portConfig.split(':');
61
- const assetsPort = toInteger(dataArray[0]);
60
+ const dataArray = portConfig.split(':', 2);
61
+ const assetsPort = toIntegerOrThrow(dataArray[0]);
62
62
  const start = assetsPort + 1;
63
63
  // range end is exclusive, so we need to add one
64
- const end = toInteger(dataArray[1]) + 1;
64
+ const end = toIntegerOrThrow(dataArray[1]) + 1;
65
65
  return { assetsPort, start, end };
66
66
  }
67
67
 
@@ -192,7 +192,7 @@ class Worker {
192
192
  this.slicesProcessed += 1;
193
193
  }
194
194
 
195
- async shutdown(block = true, event, shutdownError) {
195
+ async shutdown(block, event, shutdownError) {
196
196
  if (this.isShutdown) return;
197
197
  if (!this.isInitialized) return;
198
198
  const { exId } = this.executionContext;
@@ -202,11 +202,11 @@ class Worker {
202
202
  'worker',
203
203
  `shutdown was called for ${exId}`,
204
204
  'but it was already shutting down',
205
- block ? ', will block until done' : ''
205
+ block !== false ? ', will block until done' : ''
206
206
  ];
207
207
  this.logger.debug(msgs.join(' '));
208
208
 
209
- if (block) {
209
+ if (block !== false) {
210
210
  await waitForWorkerShutdown(this.context, 'worker:shutdown:complete');
211
211
  }
212
212
  return;
package/package.json CHANGED
@@ -1,7 +1,7 @@
1
1
  {
2
2
  "name": "teraslice",
3
3
  "displayName": "Teraslice",
4
- "version": "0.77.1",
4
+ "version": "0.80.0",
5
5
  "description": "Distributed computing platform for processing JSON data",
6
6
  "homepage": "https://github.com/terascope/teraslice#readme",
7
7
  "bugs": {
@@ -33,50 +33,50 @@
33
33
  "test:watch": "ts-scripts test --watch . --"
34
34
  },
35
35
  "resolutions": {
36
- "debug": "^4.3.1",
36
+ "debug": "^4.3.4",
37
37
  "ms": "^2.1.3"
38
38
  },
39
39
  "dependencies": {
40
- "@terascope/elasticsearch-api": "^2.20.0",
41
- "@terascope/job-components": "^0.51.0",
42
- "@terascope/teraslice-messaging": "^0.22.0",
43
- "@terascope/utils": "^0.39.0",
44
- "async-mutex": "^0.3.1",
40
+ "@terascope/elasticsearch-api": "^3.2.0",
41
+ "@terascope/job-components": "^0.57.0",
42
+ "@terascope/teraslice-messaging": "^0.27.2",
43
+ "@terascope/utils": "^0.44.2",
44
+ "async-mutex": "^0.3.2",
45
45
  "barbe": "^3.0.16",
46
- "body-parser": "^1.19.0",
46
+ "body-parser": "^1.20.0",
47
47
  "convict": "^4.4.1",
48
48
  "decompress": "^4.2.1",
49
- "easy-table": "^1.1.1",
50
- "event-loop-stats": "^1.2.0",
51
- "express": "^4.17.1",
52
- "fs-extra": "^9.1.0",
49
+ "easy-table": "^1.2.0",
50
+ "event-loop-stats": "^1.4.1",
51
+ "express": "^4.17.3",
52
+ "fs-extra": "^10.0.1",
53
53
  "gc-stats": "^1.4.0",
54
- "got": "^11.8.2",
54
+ "got": "^11.8.3",
55
55
  "ip": "^1.1.5",
56
56
  "kubernetes-client": "^9.0.0",
57
57
  "lodash": "^4.17.21",
58
58
  "ms": "^2.1.3",
59
- "nanoid": "^3.1.21",
59
+ "nanoid": "^3.3.2",
60
60
  "porty": "^3.1.1",
61
- "semver": "^7.3.5",
61
+ "semver": "^7.3.6",
62
62
  "socket.io": "^1.7.4",
63
63
  "socket.io-client": "^1.7.4",
64
- "terafoundation": "^0.32.0",
64
+ "terafoundation": "^0.39.0",
65
65
  "uuid": "^8.3.2"
66
66
  },
67
67
  "devDependencies": {
68
- "@terascope/teraslice-op-test-harness": "^1.24.0",
68
+ "@terascope/teraslice-op-test-harness": "^1.24.1",
69
69
  "archiver": "^5.3.0",
70
70
  "bufferstreams": "^3.0.0",
71
- "chance": "^1.1.7",
71
+ "chance": "^1.1.8",
72
72
  "elasticsearch": "^15.4.1",
73
- "got": "^11.8.2",
73
+ "got": "^11.8.3",
74
74
  "jest-fixtures": "^0.6.0",
75
75
  "js-yaml": "^4.1.0",
76
- "nock": "^13.0.11"
76
+ "nock": "^13.2.4"
77
77
  },
78
78
  "engines": {
79
- "node": "^12.20.0 || >=14.17.0",
79
+ "node": "^12.22.0 || >=14.17.0",
80
80
  "yarn": ">=1.16.0"
81
81
  },
82
82
  "publishConfig": {