teraslice 3.1.1 → 3.2.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -2,13 +2,14 @@ import ms from 'ms';
2
2
  import { TSError, parseError, isTest, pDelay, pRetry, logError, pWhile, isString, getTypeOf, get, random, isInteger } from '@terascope/core-utils';
3
3
  import elasticsearchApi from '@terascope/elasticsearch-api';
4
4
  import { getClient } from '@terascope/job-components';
5
+ import { DataType } from '@terascope/data-types';
5
6
  import { makeLogger } from '../../workers/helpers/terafoundation.js';
6
7
  import { timeseriesIndex } from '../../utils/date_utils.js';
7
- import analyticsSchema from './mappings/analytics.js';
8
- import assetSchema from './mappings/asset.js';
9
- import executionSchema from './mappings/ex.js';
10
- import jobsSchema from './mappings/job.js';
11
- import stateSchema from './mappings/state.js';
8
+ import { analyticsDataTypeConfig, analyticsTemplate } from './mappings/analytics.js';
9
+ import { assetDataTypeConfig, assetMappingOverrides } from './mappings/asset.js';
10
+ import { executionDataTypeConfig } from './mappings/ex.js';
11
+ import { jobDataTypeConfig } from './mappings/job.js';
12
+ import { stateDataTypeConfig, stateTemplate } from './mappings/state.js';
12
13
  function validateId(recordId, recordType) {
13
14
  if (!recordId || !isString(recordId)) {
14
15
  throw new TSError(`Invalid ${recordType} id given ${getTypeOf(recordId)}`, {
@@ -56,39 +57,23 @@ export class TerasliceElasticsearchStorage {
56
57
  options;
57
58
  mapping;
58
59
  api;
60
+ indexSettings;
59
61
  constructor(backendConfig) {
60
62
  const { context, indexName, recordType, idField, storageName, bulkSize = 1000, fullResponse = false, logRecord = true, forceRefresh = true, logger } = backendConfig;
61
63
  this.context = context;
62
64
  this.storageName = storageName;
63
65
  this.logger = logger ?? makeLogger(context, 'elasticsearch_backend', { storageName });
64
66
  this.recordType = recordType;
65
- if (recordType === 'analytics') {
66
- this.mapping = analyticsSchema;
67
- }
68
- else if (recordType === 'asset') {
69
- this.mapping = assetSchema;
70
- }
71
- else if (recordType === 'ex') {
72
- this.mapping = executionSchema;
73
- }
74
- else if (recordType === 'job') {
75
- this.mapping = jobsSchema;
76
- }
77
- else if (recordType === 'state') {
78
- this.mapping = stateSchema;
79
- }
80
- else {
67
+ // Validate recordType
68
+ const validRecordTypes = ['analytics', 'asset', 'ex', 'job', 'state'];
69
+ if (!validRecordTypes.includes(recordType)) {
81
70
  throw new Error(`Could not find mapping for recordType: ${recordType}`);
82
71
  }
83
72
  const config = this.context.sysconfig.teraslice;
84
- const indexSettings = get(config, ['index_settings', this.storageName], {
73
+ this.indexSettings = get(config, ['index_settings', this.storageName], {
85
74
  number_of_shards: 5,
86
75
  number_of_replicas: 1,
87
76
  });
88
- this.mapping.settings = {
89
- 'index.number_of_shards': indexSettings.number_of_shards,
90
- 'index.number_of_replicas': indexSettings.number_of_replicas,
91
- };
92
77
  this.defaultIndexName = indexName;
93
78
  this.idField = idField;
94
79
  this.options = {
@@ -126,6 +111,7 @@ export class TerasliceElasticsearchStorage {
126
111
  try {
127
112
  const client = await getClient(this.context, connectionConfig, 'elasticsearch-next');
128
113
  this.api = elasticsearchApi(client, this.logger, options);
114
+ this._generateMapping();
129
115
  await this._createIndex(newIndex);
130
116
  await this.api.isAvailable(newIndex);
131
117
  return true;
@@ -407,6 +393,52 @@ export class TerasliceElasticsearchStorage {
407
393
  }
408
394
  return Promise.resolve(true);
409
395
  }
396
+ _getDataTypeConfig() {
397
+ switch (this.recordType) {
398
+ case 'analytics':
399
+ return {
400
+ config: analyticsDataTypeConfig,
401
+ template: analyticsTemplate
402
+ };
403
+ case 'asset':
404
+ return {
405
+ config: assetDataTypeConfig,
406
+ overrides: assetMappingOverrides
407
+ };
408
+ case 'ex':
409
+ return { config: executionDataTypeConfig };
410
+ case 'job':
411
+ return { config: jobDataTypeConfig };
412
+ case 'state':
413
+ return {
414
+ config: stateDataTypeConfig,
415
+ template: stateTemplate
416
+ };
417
+ default:
418
+ throw new Error(`Could not find DataType config for recordType: ${this.recordType}`);
419
+ }
420
+ }
421
+ _generateMapping() {
422
+ const clientMetadata = this.api.getClientMetadata();
423
+ const { config, overrides, template } = this._getDataTypeConfig();
424
+ const dataType = new DataType(config);
425
+ // Generate the mapping using DataType with client metadata for version-specific mappings
426
+ this.mapping = dataType.toESMapping({
427
+ ...clientMetadata,
428
+ overrides: {
429
+ ...overrides,
430
+ settings: {
431
+ 'index.number_of_shards': this.indexSettings.number_of_shards,
432
+ 'index.number_of_replicas': this.indexSettings.number_of_replicas,
433
+ }
434
+ }
435
+ });
436
+ // Add template pattern if this is an index that
437
+ // adds time at the end of the index (analytics/state)
438
+ if (template) {
439
+ this.mapping.template = template;
440
+ }
441
+ }
410
442
  async _createIndex(index = this.defaultIndexName) {
411
443
  // @ts-expect-error TODO: check type missing id
412
444
  const existQuery = { index };
@@ -1,45 +1,20 @@
1
- export default {
2
- template: '__analytics*',
3
- mappings: {
4
- dynamic: false,
5
- properties: {
6
- ex_id: {
7
- type: 'keyword'
8
- },
9
- job_id: {
10
- type: 'keyword'
11
- },
12
- worker_id: {
13
- type: 'keyword'
14
- },
15
- slice_id: {
16
- type: 'keyword'
17
- },
18
- slicer_id: {
19
- type: 'keyword'
20
- },
21
- op: {
22
- type: 'keyword'
23
- },
24
- order: {
25
- type: 'integer'
26
- },
27
- count: {
28
- type: 'integer'
29
- },
30
- state: {
31
- type: 'keyword'
32
- },
33
- time: {
34
- type: 'integer'
35
- },
36
- memory: {
37
- type: 'long'
38
- },
39
- '@timestamp': {
40
- type: 'date'
41
- }
42
- }
1
+ import { FieldType } from '@terascope/types';
2
+ export const analyticsDataTypeConfig = {
3
+ version: 1,
4
+ fields: {
5
+ ex_id: { type: FieldType.Keyword },
6
+ job_id: { type: FieldType.Keyword },
7
+ worker_id: { type: FieldType.Keyword },
8
+ slice_id: { type: FieldType.Keyword },
9
+ slicer_id: { type: FieldType.Keyword },
10
+ op: { type: FieldType.Keyword },
11
+ order: { type: FieldType.Integer },
12
+ count: { type: FieldType.Integer },
13
+ state: { type: FieldType.Keyword },
14
+ time: { type: FieldType.Integer },
15
+ memory: { type: FieldType.Long },
16
+ '@timestamp': { type: FieldType.Date }
43
17
  }
44
18
  };
19
+ export const analyticsTemplate = '__analytics*';
45
20
  //# sourceMappingURL=analytics.js.map
@@ -1,34 +1,30 @@
1
- export default {
1
+ import { FieldType } from '@terascope/types';
2
+ export const assetDataTypeConfig = {
3
+ version: 1,
4
+ fields: {
5
+ name: { type: FieldType.Keyword },
6
+ version: { type: FieldType.Keyword },
7
+ id: { type: FieldType.Keyword },
8
+ description: { type: FieldType.Keyword },
9
+ arch: { type: FieldType.Keyword },
10
+ platform: { type: FieldType.Keyword },
11
+ node_version: { type: FieldType.Integer },
12
+ _created: { type: FieldType.Date }
13
+ }
14
+ };
15
+ /**
16
+ * Override for the blob field which uses ES binary type.
17
+ * I could not find the datatype equivalent
18
+ * so we must add this field manually via mapping overrides.
19
+ * Once the issue below is resolved we can remove this.
20
+ * https://github.com/terascope/teraslice/issues/4296
21
+ */
22
+ export const assetMappingOverrides = {
2
23
  mappings: {
3
- dynamic: false,
4
24
  properties: {
5
25
  blob: {
6
26
  type: 'binary',
7
27
  doc_values: false
8
- },
9
- name: {
10
- type: 'keyword'
11
- },
12
- version: {
13
- type: 'keyword'
14
- },
15
- id: {
16
- type: 'keyword'
17
- },
18
- description: {
19
- type: 'keyword'
20
- },
21
- arch: {
22
- type: 'keyword'
23
- },
24
- platform: {
25
- type: 'keyword'
26
- },
27
- node_version: {
28
- type: 'integer'
29
- },
30
- _created: {
31
- type: 'date'
32
28
  }
33
29
  }
34
30
  }
@@ -1,60 +1,53 @@
1
- export default {
2
- mappings: {
3
- dynamic: false,
4
- properties: {
5
- active: {
6
- type: 'boolean'
7
- },
8
- job_id: {
9
- type: 'keyword'
10
- },
11
- ex_id: {
12
- type: 'keyword'
13
- },
14
- _context: {
15
- type: 'keyword'
16
- },
17
- _status: {
18
- type: 'keyword'
19
- },
20
- _has_errors: {
21
- type: 'keyword'
22
- },
23
- slicer_hostname: {
24
- type: 'keyword'
25
- },
26
- slicer_port: {
27
- type: 'keyword'
28
- },
29
- recovered_execution: {
30
- type: 'keyword'
31
- },
32
- recovered_slice_type: {
33
- type: 'keyword'
34
- },
35
- metadata: {
36
- type: 'object',
37
- enabled: false
38
- },
39
- _slicer_stats: {
40
- type: 'object'
41
- },
42
- _created: {
43
- type: 'date'
44
- },
45
- _updated: {
46
- type: 'date'
47
- },
48
- _deleted: {
49
- type: 'boolean'
50
- },
51
- _deleted_on: {
52
- type: 'date'
53
- },
54
- teraslice_version: {
55
- type: 'keyword'
56
- }
57
- }
1
+ import { FieldType } from '@terascope/types';
2
+ export const executionDataTypeConfig = {
3
+ version: 1,
4
+ fields: {
5
+ // ExecutionConfig fields
6
+ job_id: { type: FieldType.Keyword },
7
+ ex_id: { type: FieldType.Keyword },
8
+ _context: { type: FieldType.Keyword },
9
+ _created: { type: FieldType.Date },
10
+ _updated: { type: FieldType.Date },
11
+ _deleted: { type: FieldType.Boolean },
12
+ _deleted_on: { type: FieldType.Date },
13
+ _status: { type: FieldType.Keyword },
14
+ _has_errors: { type: FieldType.Boolean },
15
+ _slicer_stats: { type: FieldType.Object },
16
+ _failureReason: { type: FieldType.Text },
17
+ metadata: { type: FieldType.Object, indexed: false },
18
+ recovered_execution: { type: FieldType.Keyword },
19
+ recovered_slice_type: { type: FieldType.Keyword },
20
+ slicer_port: { type: FieldType.Integer },
21
+ slicer_hostname: { type: FieldType.Keyword },
22
+ teraslice_version: { type: FieldType.Keyword },
23
+ // ValidatedJobConfig fields
24
+ active: { type: FieldType.Boolean },
25
+ analytics: { type: FieldType.Boolean },
26
+ assets: { type: FieldType.Keyword },
27
+ autorecover: { type: FieldType.Boolean },
28
+ lifecycle: { type: FieldType.Keyword },
29
+ max_retries: { type: FieldType.Integer },
30
+ name: { type: FieldType.Text },
31
+ probation_window: { type: FieldType.Integer },
32
+ performance_metrics: { type: FieldType.Boolean },
33
+ log_level: { type: FieldType.Keyword },
34
+ slicers: { type: FieldType.Integer },
35
+ workers: { type: FieldType.Integer },
36
+ stateful: { type: FieldType.Boolean },
37
+ // k8s field
38
+ cpu: { type: FieldType.Float },
39
+ cpu_execution_controller: { type: FieldType.Float },
40
+ ephemeral_storage: { type: FieldType.Boolean },
41
+ memory: { type: FieldType.Integer },
42
+ memory_execution_controller: { type: FieldType.Integer },
43
+ resources_requests_cpu: { type: FieldType.Float },
44
+ resources_requests_memory: { type: FieldType.Integer },
45
+ resources_limits_cpu: { type: FieldType.Float },
46
+ resources_limits_memory: { type: FieldType.Integer },
47
+ kubernetes_image: { type: FieldType.Keyword },
48
+ prom_metrics_enabled: { type: FieldType.Boolean },
49
+ prom_metrics_port: { type: FieldType.Integer },
50
+ prom_metrics_add_default: { type: FieldType.Boolean }
58
51
  }
59
52
  };
60
53
  //# sourceMappingURL=ex.js.map
@@ -1,33 +1,42 @@
1
- export default {
2
- settings: {
3
- 'index.number_of_shards': 5,
4
- 'index.number_of_replicas': 1
5
- },
6
- mappings: {
7
- dynamic: false,
8
- properties: {
9
- active: {
10
- type: 'boolean'
11
- },
12
- job_id: {
13
- type: 'keyword'
14
- },
15
- _context: {
16
- type: 'keyword'
17
- },
18
- _created: {
19
- type: 'date'
20
- },
21
- _updated: {
22
- type: 'date'
23
- },
24
- _deleted: {
25
- type: 'boolean'
26
- },
27
- _deleted_on: {
28
- type: 'date'
29
- }
30
- }
1
+ import { FieldType } from '@terascope/types';
2
+ export const jobDataTypeConfig = {
3
+ version: 1,
4
+ fields: {
5
+ // JobConfig fields
6
+ job_id: { type: FieldType.Keyword },
7
+ _context: { type: FieldType.Keyword },
8
+ _created: { type: FieldType.Date },
9
+ _updated: { type: FieldType.Date },
10
+ _deleted: { type: FieldType.Boolean },
11
+ _deleted_on: { type: FieldType.Date },
12
+ // ValidatedJobConfig fields
13
+ active: { type: FieldType.Boolean },
14
+ analytics: { type: FieldType.Boolean },
15
+ assets: { type: FieldType.Keyword },
16
+ autorecover: { type: FieldType.Boolean },
17
+ lifecycle: { type: FieldType.Keyword },
18
+ max_retries: { type: FieldType.Integer },
19
+ name: { type: FieldType.Text },
20
+ probation_window: { type: FieldType.Integer },
21
+ performance_metrics: { type: FieldType.Boolean },
22
+ log_level: { type: FieldType.Keyword },
23
+ slicers: { type: FieldType.Integer },
24
+ workers: { type: FieldType.Integer },
25
+ stateful: { type: FieldType.Boolean },
26
+ // K8s fields
27
+ cpu: { type: FieldType.Float },
28
+ cpu_execution_controller: { type: FieldType.Float },
29
+ ephemeral_storage: { type: FieldType.Boolean },
30
+ memory: { type: FieldType.Integer },
31
+ memory_execution_controller: { type: FieldType.Integer },
32
+ resources_requests_cpu: { type: FieldType.Float },
33
+ resources_requests_memory: { type: FieldType.Integer },
34
+ resources_limits_cpu: { type: FieldType.Float },
35
+ resources_limits_memory: { type: FieldType.Integer },
36
+ kubernetes_image: { type: FieldType.Keyword },
37
+ prom_metrics_enabled: { type: FieldType.Boolean },
38
+ prom_metrics_port: { type: FieldType.Integer },
39
+ prom_metrics_add_default: { type: FieldType.Boolean }
31
40
  }
32
41
  };
33
42
  //# sourceMappingURL=job.js.map
@@ -1,33 +1,16 @@
1
- export default {
2
- template: '__state*',
3
- mappings: {
4
- dynamic: false,
5
- properties: {
6
- ex_id: {
7
- type: 'keyword'
8
- },
9
- slice_id: {
10
- type: 'keyword'
11
- },
12
- slicer_id: {
13
- type: 'keyword'
14
- },
15
- slicer_order: {
16
- type: 'integer'
17
- },
18
- state: {
19
- type: 'keyword'
20
- },
21
- _created: {
22
- type: 'date'
23
- },
24
- _updated: {
25
- type: 'date'
26
- },
27
- error: {
28
- type: 'keyword'
29
- }
30
- }
1
+ import { FieldType } from '@terascope/types';
2
+ export const stateDataTypeConfig = {
3
+ version: 1,
4
+ fields: {
5
+ ex_id: { type: FieldType.Keyword },
6
+ slice_id: { type: FieldType.Keyword },
7
+ slicer_id: { type: FieldType.Keyword },
8
+ slicer_order: { type: FieldType.Integer },
9
+ state: { type: FieldType.Keyword },
10
+ _created: { type: FieldType.Date },
11
+ _updated: { type: FieldType.Date },
12
+ error: { type: FieldType.Keyword }
31
13
  }
32
14
  };
15
+ export const stateTemplate = '__state*';
33
16
  //# sourceMappingURL=state.js.map
@@ -282,7 +282,7 @@ describe('k8s', () => {
282
282
  it('can patch a deployment by name', async () => {
283
283
  nock(_url, { encodedQueryParams: true })
284
284
  .patch('/apis/apps/v1/namespaces/default/deployments/test1')
285
- .reply(204, {});
285
+ .reply(200, {});
286
286
  const response = await k8s.patch({ name: 'testName' }, 'test1');
287
287
  expect(response).toEqual({});
288
288
  });
@@ -41,7 +41,7 @@ describe('S3 backend test', () => {
41
41
  connection: 'default',
42
42
  bucket: 'Invalid-Bucket-Name@'
43
43
  });
44
- await expect(s3Backend.initialize()).rejects.toThrow('Bucket name does not follow S3 naming rules: The specified bucket is not valid.');
44
+ await expect(s3Backend.initialize()).rejects.toThrow('Bucket name does not follow S3 naming rules: InvalidBucketName');
45
45
  });
46
46
  });
47
47
  describe('->verifyClient', () => {
@@ -80,8 +80,7 @@ const newSysConfig = (options) => {
80
80
  'elasticsearch-next': {
81
81
  default: {
82
82
  node: [SEARCH_TEST_HOST],
83
- requestTimeout: timeout,
84
- deadTimeout: timeout
83
+ requestTimeout: timeout
85
84
  }
86
85
  }
87
86
  }
package/package.json CHANGED
@@ -1,7 +1,7 @@
1
1
  {
2
2
  "name": "teraslice",
3
3
  "displayName": "Teraslice",
4
- "version": "3.1.1",
4
+ "version": "3.2.1",
5
5
  "description": "Distributed computing platform for processing JSON data",
6
6
  "homepage": "https://github.com/terascope/teraslice#readme",
7
7
  "bugs": {
@@ -39,12 +39,12 @@
39
39
  },
40
40
  "dependencies": {
41
41
  "@kubernetes/client-node": "~1.4.0",
42
- "@terascope/core-utils": "~2.0.1",
43
- "@terascope/elasticsearch-api": "~5.0.1",
44
- "@terascope/file-asset-apis": "~2.0.0",
45
- "@terascope/job-components": "~2.0.1",
46
- "@terascope/teraslice-messaging": "~2.0.1",
47
- "@terascope/types": "~2.0.0",
42
+ "@terascope/core-utils": "~2.1.0",
43
+ "@terascope/elasticsearch-api": "~5.1.0",
44
+ "@terascope/file-asset-apis": "~2.0.1",
45
+ "@terascope/job-components": "~2.1.0",
46
+ "@terascope/teraslice-messaging": "~2.1.0",
47
+ "@terascope/types": "~2.1.0",
48
48
  "async-mutex": "~0.5.0",
49
49
  "barbe": "~3.0.17",
50
50
  "body-parser": "~2.2.2",
@@ -62,12 +62,12 @@
62
62
  "semver": "~7.7.3",
63
63
  "socket.io": "~4.8.3",
64
64
  "socket.io-client": "~4.8.3",
65
- "terafoundation": "~2.0.2",
66
- "terafoundation_kafka_connector": "~2.0.0",
65
+ "terafoundation": "~2.1.0",
66
+ "terafoundation_kafka_connector": "~2.0.1",
67
67
  "uuid": "~13.0.0"
68
68
  },
69
69
  "devDependencies": {
70
- "@terascope/opensearch-client": "~2.0.1",
70
+ "@terascope/opensearch-client": "~2.1.0",
71
71
  "@types/archiver": "~7.0.0",
72
72
  "@types/body-parser": "~1.19.6",
73
73
  "@types/decompress": "~4.2.7",
@@ -82,7 +82,7 @@
82
82
  "chance": "~1.1.13",
83
83
  "jest-fixtures": "~0.6.0",
84
84
  "js-yaml": "~4.1.1",
85
- "nock": "~13.5.6"
85
+ "nock": "~14.0.10"
86
86
  },
87
87
  "engines": {
88
88
  "node": ">=22.0.0",