@terascope/elasticsearch-api 3.3.7 → 3.4.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/index.js CHANGED
@@ -278,6 +278,7 @@ module.exports = function elasticsearchApi(client, logger, _opConfig) {
278
278
  */
279
279
  function _filterRetryRecords(actionRecords, result) {
280
280
  const retry = [];
281
+ const deadLetter = [];
281
282
  const { items } = result;
282
283
 
283
284
  let nonRetriableError = false;
@@ -309,6 +310,11 @@ module.exports = function elasticsearchApi(client, logger, _opConfig) {
309
310
  ) {
310
311
  nonRetriableError = true;
311
312
  reason = `${item.error.type}--${item.error.reason}`;
313
+
314
+ if (config._dead_letter_action === 'kafka_dead_letter') {
315
+ deadLetter.push({ doc: actionRecords[i].data, reason });
316
+ continue;
317
+ }
312
318
  break;
313
319
  }
314
320
  } else if (item.status == null || item.status < 400) {
@@ -318,7 +324,7 @@ module.exports = function elasticsearchApi(client, logger, _opConfig) {
318
324
 
319
325
  if (nonRetriableError) {
320
326
  return {
321
- retry: [], successful, error: true, reason
327
+ retry: [], successful, error: true, reason, deadLetter
322
328
  };
323
329
  }
324
330
 
@@ -331,7 +337,7 @@ module.exports = function elasticsearchApi(client, logger, _opConfig) {
331
337
 
332
338
  /**
333
339
  * @param data {Array<{ action: data }>}
334
- * @returns {Promise<number>}
340
+ * @returns {Promise<{ recordCount: number, deadLetter: record[] }>}
335
341
  */
336
342
  async function _bulkSend(actionRecords, previousCount = 0, previousRetryDelay = 0) {
337
343
  const body = actionRecords.flatMap((record, index) => {
@@ -343,18 +349,7 @@ module.exports = function elasticsearchApi(client, logger, _opConfig) {
343
349
  throw new Error(`Bulk send record is missing the action property${dbg}`);
344
350
  }
345
351
 
346
- if (!isElasticsearch6()) {
347
- const actionKey = getFirstKey(record.action);
348
- const { _type, ...withoutTypeAction } = record.action[actionKey];
349
- // if data is specified return both
350
- return record.data ? [{
351
- ...record.action,
352
- [actionKey]: withoutTypeAction
353
- }, record.data] : [{
354
- ...record.action,
355
- [actionKey]: withoutTypeAction
356
- }];
357
- }
352
+ if (!isElasticsearch6()) return _nonEs6Prep(record);
358
353
 
359
354
  // if data is specified return both
360
355
  return record.data ? [record.action, record.data] : [record.action];
@@ -363,25 +358,25 @@ module.exports = function elasticsearchApi(client, logger, _opConfig) {
363
358
  const response = await _clientRequest('bulk', { body });
364
359
  const results = response.body ? response.body : response;
365
360
 
366
- if (!results.errors) {
367
- return results.items.reduce((c, item) => {
368
- const [value] = Object.values(item);
369
- // ignore non-successful status codes
370
- if (value.status != null && value.status >= 400) return c;
371
- return c + 1;
372
- }, 0);
373
- }
361
+ if (!results.errors) return { recordCount: _affectedRowsCount(results) };
374
362
 
375
363
  const {
376
- retry, successful, error, reason
364
+ retry, successful, error, reason, deadLetter
377
365
  } = _filterRetryRecords(actionRecords, results);
378
366
 
379
367
  if (error) {
368
+ if (config._dead_letter_action === 'kafka_dead_letter') {
369
+ return {
370
+ recordCount: previousCount + successful,
371
+ deadLetter
372
+ };
373
+ }
374
+
380
375
  throw new Error(`bulk send error: ${reason}`);
381
376
  }
382
377
 
383
378
  if (retry.length === 0) {
384
- return previousCount + successful;
379
+ return { recordCount: previousCount + successful };
385
380
  }
386
381
 
387
382
  warning();
@@ -390,10 +385,24 @@ module.exports = function elasticsearchApi(client, logger, _opConfig) {
390
385
  return _bulkSend(retry, previousCount + successful, nextRetryDelay);
391
386
  }
392
387
 
388
+ function _nonEs6Prep(record) {
389
+ const actionKey = getFirstKey(record.action);
390
+
391
+ const { _type, ...withoutTypeAction } = record.action[actionKey];
392
+ // if data is specified return both
393
+
394
+ const body = [{ ...record.action, [actionKey]: withoutTypeAction }];
395
+
396
+ if (record.data != null) body.push(record.data);
397
+
398
+ return body;
399
+ }
400
+
393
401
  /**
394
402
  * The new and improved bulk send with proper retry support
395
403
  *
396
- * @returns {Promise<number>} the number of affected rows
404
+ * @returns {Promise<{ recordCount: number, deadLetter: record[] }>}
405
+ * the number of affected rows and records for kafka dead letter queue
397
406
  */
398
407
  function bulkSend(data) {
399
408
  if (!Array.isArray(data)) {
@@ -403,6 +412,15 @@ module.exports = function elasticsearchApi(client, logger, _opConfig) {
403
412
  return Promise.resolve(_bulkSend(data));
404
413
  }
405
414
 
415
+ function _affectedRowsCount(results) {
416
+ return results.items.reduce((c, item) => {
417
+ const [value] = Object.values(item);
418
+ // ignore non-successful status codes
419
+ if (value.status != null && value.status >= 400) return c;
420
+ return c + 1;
421
+ }, 0);
422
+ }
423
+
406
424
  function _warn(warnLogger, msg) {
407
425
  let _lastTime = null;
408
426
  return () => {
package/package.json CHANGED
@@ -1,7 +1,7 @@
1
1
  {
2
2
  "name": "@terascope/elasticsearch-api",
3
3
  "displayName": "Elasticsearch API",
4
- "version": "3.3.7",
4
+ "version": "3.4.0",
5
5
  "description": "Elasticsearch client api used across multiple services, handles retries and exponential backoff",
6
6
  "homepage": "https://github.com/terascope/teraslice/tree/master/packages/elasticsearch-api#readme",
7
7
  "bugs": {
package/test/api-spec.js CHANGED
@@ -768,7 +768,7 @@ describe('elasticsearch-api', () => {
768
768
  { delete: { _index: 'some_index', _type: 'events', _id: 5 } }
769
769
  ]
770
770
  });
771
- return expect(result).toBe(2);
771
+ return expect(result).toEqual({ recordCount: 2 });
772
772
  });
773
773
 
774
774
  it('can remove type from bulkSend', async () => {
@@ -877,7 +877,7 @@ describe('elasticsearch-api', () => {
877
877
  });
878
878
  const result = await api.bulkSend(myBulkData);
879
879
 
880
- expect(result).toBe(2);
880
+ expect(result).toEqual({ recordCount: 2 });
881
881
 
882
882
  bulkError = ['some_thing_else', 'some_thing_else'];
883
883
 
@@ -0,0 +1,106 @@
1
+ 'use strict';
2
+
3
+ const {
4
+ debugLogger,
5
+ cloneDeep,
6
+ DataEntity
7
+ } = require('@terascope/utils');
8
+ const { ElasticsearchTestHelpers } = require('elasticsearch-store');
9
+ const elasticsearchAPI = require('../index');
10
+
11
+ const {
12
+ makeClient, cleanupIndex,
13
+ EvenDateData, TEST_INDEX_PREFIX,
14
+ createMappingFromDatatype
15
+ } = ElasticsearchTestHelpers;
16
+
17
+ jest.setTimeout(10000);
18
+
19
+ function formatUploadData(
20
+ index, data, isES8ClientTest = false
21
+ ) {
22
+ const results = [];
23
+
24
+ data.forEach((record, i) => {
25
+ const meta = { _index: index, _id: i + 1 };
26
+
27
+ if (!isES8ClientTest) {
28
+ meta._type = '_doc';
29
+ }
30
+
31
+ results.push({ action: { index: meta }, data: record });
32
+ });
33
+
34
+ return results;
35
+ }
36
+
37
+ describe('bulkSend', () => {
38
+ let client;
39
+ let api;
40
+ let isElasticsearch8 = false;
41
+
42
+ beforeAll(async () => {
43
+ client = await makeClient();
44
+ });
45
+
46
+ describe('can return non-retryable records', () => {
47
+ const logger = debugLogger('congested_test');
48
+ const index = `${TEST_INDEX_PREFIX}_non-retryable-records`;
49
+
50
+ beforeAll(async () => {
51
+ await cleanupIndex(client, index);
52
+ api = elasticsearchAPI(client, logger, { _dead_letter_action: 'kafka_dead_letter' });
53
+ isElasticsearch8 = api.isElasticsearch8();
54
+
55
+ const overrides = {
56
+ settings: {
57
+ 'index.number_of_shards': 1,
58
+ 'index.number_of_replicas': 0,
59
+ },
60
+ };
61
+
62
+ const mapping = await createMappingFromDatatype(
63
+ client, EvenDateData.EvenDataType, '_doc', overrides
64
+ );
65
+
66
+ mapping.index = index;
67
+
68
+ await client.indices.create(mapping);
69
+ });
70
+
71
+ afterAll(async () => {
72
+ await cleanupIndex(client, index);
73
+ });
74
+
75
+ it('returns records that cannot be tried again if dlq config is set', async () => {
76
+ const docs = cloneDeep(EvenDateData.data.slice(0, 2));
77
+
78
+ docs[0].bytes = 'this is a bad value';
79
+
80
+ const result = await api.bulkSend(formatUploadData(index, docs, isElasticsearch8));
81
+
82
+ expect(result.recordCount).toBe(1);
83
+
84
+ expect(result.deadLetter[0].doc).toEqual(DataEntity.make({
85
+ ip: '120.67.248.156',
86
+ userAgent: 'Mozilla/5.0 (Windows; U; Windows NT 6.1) AppleWebKit/533.1.2 (KHTML, like Gecko) Chrome/35.0.894.0 Safari/533.1.2',
87
+ url: 'http://lucious.biz',
88
+ uuid: 'b23a8550-0081-453f-9e80-93a90782a5bd',
89
+ created: '2019-04-26T15:00:23.225+00:00',
90
+ ipv6: '9e79:7798:585a:b847:f1c4:81eb:0c3d:7eb8',
91
+ location: '50.15003, -94.89355',
92
+ bytes: 'this is a bad value'
93
+ }));
94
+
95
+ expect(result.deadLetter[0].reason).toBeDefined();
96
+ });
97
+
98
+ it('should return a count if not un-retryable records', async () => {
99
+ const docs = cloneDeep(EvenDateData.data.slice(0, 2));
100
+
101
+ const result = await api.bulkSend(formatUploadData(index, docs, isElasticsearch8));
102
+
103
+ expect(result).toEqual({ recordCount: 2 });
104
+ });
105
+ });
106
+ });
@@ -1,6 +1,10 @@
1
1
  'use strict';
2
2
 
3
- const { debugLogger, chunk, pMap } = require('@terascope/utils');
3
+ const {
4
+ debugLogger,
5
+ chunk,
6
+ pMap
7
+ } = require('@terascope/utils');
4
8
  const { ElasticsearchTestHelpers } = require('elasticsearch-store');
5
9
  const elasticsearchAPI = require('../index');
6
10
 
@@ -11,7 +15,7 @@ const {
11
15
 
12
16
  const THREE_MINUTES = 3 * 60 * 1000;
13
17
 
14
- jest.setTimeout(THREE_MINUTES + 30000);
18
+ jest.setTimeout(THREE_MINUTES + 60000);
15
19
 
16
20
  function formatUploadData(
17
21
  index, data, isES8ClientTest = false
@@ -31,33 +35,38 @@ function formatUploadData(
31
35
  return results;
32
36
  }
33
37
 
34
- describe('bulkSend can work with congested queues', () => {
35
- const logger = debugLogger('congested_test');
36
- const index = `${TEST_INDEX_PREFIX}_congested_queues_`;
37
-
38
+ describe('bulkSend', () => {
38
39
  let client;
39
40
  let api;
40
41
  let isElasticsearch8 = false;
41
42
 
42
43
  beforeAll(async () => {
43
44
  client = await makeClient();
44
- await cleanupIndex(client, index);
45
- api = elasticsearchAPI(client, logger);
46
- isElasticsearch8 = api.isElasticsearch8();
47
45
  });
48
46
 
49
- afterAll(async () => {
50
- await cleanupIndex(client, index);
51
- });
47
+ describe('can work with congested queues', () => {
48
+ const logger = debugLogger('congested_test');
49
+ const index = `${TEST_INDEX_PREFIX}_congested_queues_`;
50
+
51
+ beforeAll(async () => {
52
+ await cleanupIndex(client, index);
53
+ api = elasticsearchAPI(client, logger);
54
+ isElasticsearch8 = api.isElasticsearch8();
55
+ });
56
+
57
+ afterAll(async () => {
58
+ await cleanupIndex(client, index);
59
+ });
52
60
 
53
- it('can get correct data even with congested queues', async () => {
54
- const chunkedData = chunk(EvenDateData.data, 50);
61
+ it('can get correct data even with congested queues', async () => {
62
+ const chunkedData = chunk(EvenDateData.data, 50);
55
63
 
56
- await pMap(chunkedData, async (cData) => {
57
- const formattedData = formatUploadData(index, cData, isElasticsearch8);
58
- return api.bulkSend(formattedData);
59
- }, { concurrency: 9 });
64
+ await pMap(chunkedData, async (cData) => {
65
+ const formattedData = formatUploadData(index, cData, isElasticsearch8);
66
+ return api.bulkSend(formattedData);
67
+ }, { concurrency: 9 });
60
68
 
61
- await waitForData(client, index, EvenDateData.data.length, logger, THREE_MINUTES);
69
+ await waitForData(client, index, EvenDateData.data.length, logger, THREE_MINUTES);
70
+ });
62
71
  });
63
72
  });
package/types/index.d.ts CHANGED
@@ -32,9 +32,9 @@ declare namespace elasticsearchAPI {
32
32
  /**
33
33
  * The new and improved bulk send with proper retry support
34
34
  *
35
- * @returns the number of affected rows
35
+ * @returns the number of affected rows, and deadLetter records if config is set
36
36
  */
37
- bulkSend: (data: BulkRecord[]) => Promise<number>;
37
+ bulkSend: (data: BulkRecord[]) => Promise<{ recordCount: number; deadLetter?: any[]; }>;
38
38
  nodeInfo: (query: any) => Promise<any>;
39
39
  nodeStats: (query: any) => Promise<any>;
40
40
  buildQuery: (opConfig: Config, msg: any) => ClientParams.SearchParams;
@@ -62,7 +62,7 @@ declare namespace elasticsearchAPI {
62
62
  /**
63
63
  * This is used for improved bulk sending function
64
64
  */
65
- export interface AnyBulkAction {
65
+ export interface AnyBulkAction {
66
66
  update?: Partial<BulkActionMetadata>;
67
67
  index?: Partial<BulkActionMetadata>;
68
68
  create?: Partial<BulkActionMetadata>;