node-es-transformer 1.0.0-beta2 → 1.0.0-beta3

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/README.md CHANGED
@@ -110,7 +110,8 @@ transformer({
110
110
 
111
111
  - `deleteIndex`: Setting to automatically delete an existing index, default is `false`.
112
112
  - `sourceClientConfig`/`targetClientConfig`: Optional Elasticsearch client options, defaults to `{ node: 'http://localhost:9200' }`.
113
- - `bufferSize`: The amount of documents inserted with each Elasticsearch bulk insert request, default is `1000`.
113
+ - `bufferSize`: The threshold to flush bulk index request in KBytes, defaults to `5120`.
114
+ - `searchSize`: The amount of documents to be fetched with each search request when reindexing from another source index.
114
115
  - `fileName`: Source filename to ingest, supports wildcards. If this is set, `sourceIndexName` is not allowed.
115
116
  - `splitRegex`: Custom line split regex, defaults to `/\n/`.
116
117
  - `sourceIndexName`: The source Elasticsearch index to reindex from. If this is set, `fileName` is not allowed.
@@ -147,10 +148,10 @@ yarn
147
148
 
148
149
  ```bash
149
150
  # Download the docker image
150
- docker pull docker.elastic.co/elasticsearch/elasticsearch:8.10.4
151
+ docker pull docker.elastic.co/elasticsearch/elasticsearch:8.15.0
151
152
 
152
153
  # Run the container
153
- docker run --name es01 --net elastic -p 9200:9200 -it -m 1GB -e "discovery.type=single-node" -e "xpack.security.enabled=false" docker.elastic.co/elasticsearch/elasticsearch:8.10.4
154
+ docker run --name es01 --net elastic -p 9200:9200 -it -m 1GB -e "discovery.type=single-node" -e "xpack.security.enabled=false" docker.elastic.co/elasticsearch/elasticsearch:8.15.0
154
155
  ```
155
156
 
156
157
  To commit, use `cz`. To prepare a release, use e.g. `yarn release -- --release-as 1.0.0-beta2`.
@@ -5,10 +5,20 @@ function _interopDefault (ex) { return (ex && (typeof ex === 'object') && 'defau
5
5
  var fs = _interopDefault(require('fs'));
6
6
  var es = _interopDefault(require('event-stream'));
7
7
  var glob = _interopDefault(require('glob'));
8
+ var split = _interopDefault(require('split2'));
9
+ var stream = require('stream');
8
10
  var cliProgress = _interopDefault(require('cli-progress'));
9
11
  var elasticsearch = _interopDefault(require('@elastic/elasticsearch'));
10
12
 
11
- var DEFAULT_BUFFER_SIZE = 1000;
13
+ // In earlier versions this was used to set the number of docs to index in a
14
+ // single bulk request. Since we switched to use the helpers.bulk() method from
15
+ // the ES client, this now translates to the `flushBytes` option of the helper.
16
+ // However, for kind of a backwards compability with the old values, this uses
17
+ // KBytes instead of Bytes. It will be multiplied by 1024 in the index queue.
18
+ var DEFAULT_BUFFER_SIZE = 5120;
19
+
20
+ // The default number of docs to fetch in a single search request when reindexing.
21
+ var DEFAULT_SEARCH_SIZE = 1000;
12
22
 
13
23
  function createMappingFactory(ref) {
14
24
  var sourceClient = ref.sourceClient;
@@ -19,6 +29,7 @@ function createMappingFactory(ref) {
19
29
  var mappingsOverride = ref.mappingsOverride;
20
30
  var indexMappingTotalFieldsLimit = ref.indexMappingTotalFieldsLimit;
21
31
  var verbose = ref.verbose;
32
+ var deleteIndex = ref.deleteIndex;
22
33
 
23
34
  return async function () {
24
35
  var targetMappings = mappingsOverride ? undefined : mappings;
@@ -28,7 +39,14 @@ function createMappingFactory(ref) {
28
39
  var mapping = await sourceClient.indices.getMapping({
29
40
  index: sourceIndexName,
30
41
  });
31
- targetMappings = mapping[sourceIndexName].mappings;
42
+ if (mapping[sourceIndexName]) {
43
+ targetMappings = mapping[sourceIndexName].mappings;
44
+ } else {
45
+ var allMappings = Object.values(mapping);
46
+ if (allMappings.length > 0) {
47
+ targetMappings = Object.values(mapping)[0].mappings;
48
+ }
49
+ }
32
50
  } catch (err) {
33
51
  console.log('Error reading source mapping', err);
34
52
  return;
@@ -43,18 +61,28 @@ function createMappingFactory(ref) {
43
61
  }
44
62
 
45
63
  try {
46
- var resp = await targetClient.indices.create({
47
- index: targetIndexName,
48
- body: Object.assign({}, {mappings: targetMappings},
49
- (indexMappingTotalFieldsLimit !== undefined
50
- ? {
51
- settings: {
52
- 'index.mapping.total_fields.limit': indexMappingTotalFieldsLimit,
53
- },
54
- }
55
- : {})),
56
- });
57
- if (verbose) { console.log('Created target mapping', resp); }
64
+ var indexExists = await targetClient.indices.exists({ index: targetIndexName });
65
+
66
+ if (indexExists === true && deleteIndex === true) {
67
+ await targetClient.indices.delete({ index: targetIndexName });
68
+ }
69
+
70
+ if (indexExists === false || deleteIndex === true) {
71
+ var resp = await targetClient.indices.create({
72
+ index: targetIndexName,
73
+ body: Object.assign({}, {mappings: targetMappings},
74
+ (indexMappingTotalFieldsLimit !== undefined
75
+ ? {
76
+ settings: {
77
+ 'index.mapping.total_fields.limit': indexMappingTotalFieldsLimit,
78
+ 'index.number_of_shards': 1,
79
+ 'index.number_of_replicas': 0,
80
+ },
81
+ }
82
+ : {})),
83
+ });
84
+ if (verbose) { console.log('Created target mapping', resp); }
85
+ }
58
86
  } catch (err) {
59
87
  console.log('Error creating target mapping', err);
60
88
  }
@@ -62,17 +90,14 @@ function createMappingFactory(ref) {
62
90
  };
63
91
  }
64
92
 
65
- var MAX_QUEUE_SIZE = 15;
66
-
67
93
  function fileReaderFactory(indexer, fileName, transform, splitRegex, verbose) {
68
94
  function startIndex(files) {
69
- var ingestQueueSize = 0;
70
95
  var finished = false;
71
96
 
72
97
  var file = files.shift();
73
98
  var s = fs
74
99
  .createReadStream(file)
75
- .pipe(es.split(splitRegex))
100
+ .pipe(split(splitRegex))
76
101
  .pipe(
77
102
  es
78
103
  .mapSync(function (line) {
@@ -120,20 +145,13 @@ function fileReaderFactory(indexer, fileName, transform, splitRegex, verbose) {
120
145
  })
121
146
  );
122
147
 
123
- indexer.queueEmitter.on('queue-size', async function (size) {
148
+ indexer.queueEmitter.on('pause', function () {
124
149
  if (finished) { return; }
125
- ingestQueueSize = size;
126
-
127
- if (ingestQueueSize < MAX_QUEUE_SIZE) {
128
- s.resume();
129
- } else {
130
- s.pause();
131
- }
150
+ s.pause();
132
151
  });
133
152
 
134
153
  indexer.queueEmitter.on('resume', function () {
135
154
  if (finished) { return; }
136
- ingestQueueSize = 0;
137
155
  s.resume();
138
156
  });
139
157
  }
@@ -149,7 +167,7 @@ var EventEmitter = require('events');
149
167
 
150
168
  var queueEmitter = new EventEmitter();
151
169
 
152
- var parallelCalls = 1;
170
+ var parallelCalls = 5;
153
171
 
154
172
  // a simple helper queue to bulk index documents
155
173
  function indexQueueFactory(ref) {
@@ -159,78 +177,76 @@ function indexQueueFactory(ref) {
159
177
  var skipHeader = ref.skipHeader; if ( skipHeader === void 0 ) skipHeader = false;
160
178
  var verbose = ref.verbose; if ( verbose === void 0 ) verbose = true;
161
179
 
162
- var buffer = [];
163
- var queue = [];
164
- var ingesting = 0;
165
- var ingestTimes = [];
166
- var finished = false;
180
+ var flushBytes = bufferSize * 1024; // Convert KB to Bytes
181
+ var highWaterMark = flushBytes * parallelCalls;
167
182
 
168
- var ingest = function (b) {
169
- if (typeof b !== 'undefined') {
170
- queue.push(b);
171
- queueEmitter.emit('queue-size', queue.length);
172
- }
183
+ // Create a Readable stream
184
+ var stream$$1 = new stream.Readable({
185
+ read: function read() {}, // Implement read but we manage pushing manually
186
+ highWaterMark: highWaterMark, // Buffer size for backpressure management
187
+ });
173
188
 
174
- if (ingestTimes.length > 5) { ingestTimes = ingestTimes.slice(-5); }
189
+ async function* ndjsonStreamIterator(readableStream) {
190
+ var buffer = ''; // To hold the incomplete data
191
+ var skippedHeader = false;
175
192
 
176
- if (ingesting < parallelCalls) {
177
- var docs = queue.shift();
193
+ // Iterate over the stream using async iteration
194
+ for await (var chunk of readableStream) {
195
+ buffer += chunk.toString(); // Accumulate the chunk data in the buffer
178
196
 
179
- queueEmitter.emit('queue-size', queue.length);
180
- if (queue.length <= 5) {
181
- queueEmitter.emit('resume');
182
- }
197
+ // Split the buffer into lines (NDJSON items)
198
+ var lines = buffer.split('\n');
183
199
 
184
- ingesting += 1;
185
-
186
- if (verbose)
187
- { console.log(("bulk ingest docs: " + (docs.length / 2) + ", queue length: " + (queue.length))); }
188
-
189
- var start = Date.now();
190
- client
191
- .bulk({ body: docs })
192
- .then(function () {
193
- var end = Date.now();
194
- var delta = end - start;
195
- ingestTimes.push(delta);
196
- ingesting -= 1;
197
-
198
- var ingestTimesMovingAverage =
199
- ingestTimes.length > 0
200
- ? ingestTimes.reduce(function (p, c) { return p + c; }, 0) / ingestTimes.length
201
- : 0;
202
- var ingestTimesMovingAverageSeconds = Math.floor(ingestTimesMovingAverage / 1000);
203
-
204
- if (
205
- ingestTimes.length > 0 &&
206
- ingestTimesMovingAverageSeconds < 30 &&
207
- parallelCalls < 10
208
- ) {
209
- parallelCalls += 1;
210
- } else if (
211
- ingestTimes.length > 0 &&
212
- ingestTimesMovingAverageSeconds >= 30 &&
213
- parallelCalls > 1
214
- ) {
215
- parallelCalls -= 1;
216
- }
200
+ // The last line might be incomplete, so hold it back in the buffer
201
+ buffer = lines.pop();
217
202
 
218
- if (queue.length > 0) {
219
- ingest();
220
- } else if (queue.length === 0 && finished) {
221
- queueEmitter.emit('finish');
222
- }
223
- })
224
- .catch(function (error) {
225
- console.error(error);
226
- ingesting -= 1;
227
- parallelCalls = 1;
228
- if (queue.length > 0) {
229
- ingest();
203
+ // Yield each complete JSON object
204
+ for (var line of lines) {
205
+ if (line.trim()) {
206
+ try {
207
+ if (!skipHeader || (skipHeader && !skippedHeader)) {
208
+ yield JSON.parse(line); // Parse and yield the JSON object
209
+ skippedHeader = true;
210
+ }
211
+ } catch (err) {
212
+ // Handle JSON parse errors if necessary
213
+ console.error('Failed to parse JSON:', err);
230
214
  }
231
- });
215
+ }
216
+ }
232
217
  }
233
- };
218
+
219
+ // Handle any remaining data in the buffer after the stream ends
220
+ if (buffer.trim()) {
221
+ try {
222
+ yield JSON.parse(buffer);
223
+ } catch (err) {
224
+ console.error('Failed to parse final JSON:', err);
225
+ }
226
+ }
227
+ }
228
+
229
+ var finished = false;
230
+
231
+ // Async IIFE to start bulk indexing
232
+ (async function () {
233
+ console.log('START BULK INDEXING');
234
+ await client.helpers.bulk({
235
+ concurrency: parallelCalls,
236
+ flushBytes: flushBytes,
237
+ flushInterval: 1000,
238
+ refreshOnCompletion: true,
239
+ datasource: ndjsonStreamIterator(stream$$1),
240
+ onDocument: function onDocument(doc) {
241
+ return {
242
+ index: { _index: targetIndexName },
243
+ };
244
+ },
245
+ });
246
+ console.log('FINISHED BULK INDEXING');
247
+
248
+ queueEmitter.emit('finish');
249
+ })();
234
250
 
235
251
  return {
236
252
  add: function (doc) {
@@ -238,37 +254,22 @@ function indexQueueFactory(ref) {
238
254
  throw new Error('Unexpected doc added after indexer should finish.');
239
255
  }
240
256
 
241
- if (!skipHeader) {
242
- var header = { index: { _index: targetIndexName } };
243
- buffer.push(header);
244
- }
245
- buffer.push(doc);
246
-
247
- if (queue.length === 0) {
248
- queueEmitter.emit('resume');
249
- }
250
-
251
- if (buffer.length >= bufferSize * 2) {
252
- ingest(buffer);
253
- buffer = [];
257
+ var canContinue = stream$$1.push(((JSON.stringify(doc)) + "\n"));
258
+ if (!canContinue) {
259
+ queueEmitter.emit('pause');
260
+ stream$$1.once('drain', function () {
261
+ queueEmitter.emit('resume');
262
+ });
254
263
  }
255
264
  },
256
265
  finish: function () {
257
266
  finished = true;
258
-
259
- if (buffer.length > 0) {
260
- ingest(buffer);
261
- buffer = [];
262
- } else if (queue.length === 0 && ingesting === 0) {
263
- queueEmitter.emit('finish');
264
- }
267
+ stream$$1.push(null);
265
268
  },
266
269
  queueEmitter: queueEmitter,
267
270
  };
268
271
  }
269
272
 
270
- var MAX_QUEUE_SIZE$1 = 15;
271
-
272
273
  // create a new progress bar instance and use shades_classic theme
273
274
  var progressBar = new cliProgress.SingleBar({}, cliProgress.Presets.shades_classic);
274
275
 
@@ -278,21 +279,24 @@ function indexReaderFactory(
278
279
  transform,
279
280
  client,
280
281
  query,
281
- bufferSize,
282
+ searchSize,
282
283
  populatedFields
283
284
  ) {
284
- if ( bufferSize === void 0 ) bufferSize = DEFAULT_BUFFER_SIZE;
285
+ if ( searchSize === void 0 ) searchSize = DEFAULT_SEARCH_SIZE;
285
286
  if ( populatedFields === void 0 ) populatedFields = false;
286
287
 
287
288
  return async function indexReader() {
288
- var responseQueue = [];
289
289
  var docsNum = 0;
290
+ var scrollId;
291
+ var finished = false;
292
+ var readActive = false;
293
+ var backPressurePause = false;
290
294
 
291
295
  async function fetchPopulatedFields() {
292
296
  try {
293
297
  var response = await client.search({
294
298
  index: sourceIndexName,
295
- size: bufferSize,
299
+ size: searchSize,
296
300
  query: {
297
301
  function_score: {
298
302
  query: query,
@@ -303,7 +307,7 @@ function indexReaderFactory(
303
307
 
304
308
  // Get all field names for each returned doc and flatten it
305
309
  // to a list of unique field names used across all docs.
306
- return new Set(response.hits.hits.map(function (d) { return Object.keys(d._source); }).flat(1));
310
+ return Array.from(new Set(response.hits.hits.map(function (d) { return Object.keys(d._source); }).flat(1)));
307
311
  } catch (e) {
308
312
  console.log('error', e);
309
313
  }
@@ -312,7 +316,7 @@ function indexReaderFactory(
312
316
  function search(fields) {
313
317
  return client.search(Object.assign({}, {index: sourceIndexName,
314
318
  scroll: '600s',
315
- size: bufferSize,
319
+ size: searchSize,
316
320
  query: query},
317
321
  (fields ? { _source: fields } : {})));
318
322
  }
@@ -329,21 +333,14 @@ function indexReaderFactory(
329
333
  // identify populated fields
330
334
  if (populatedFields) {
331
335
  fieldsWithData = await fetchPopulatedFields();
332
- console.log('fieldsWithData', fieldsWithData);
333
336
  }
334
337
 
335
- // start things off by searching, setting a scroll timeout, and pushing
336
- // our first response into the queue to be processed
337
- var se = await search(fieldsWithData);
338
- responseQueue.push(se);
339
- progressBar.start(se.hits.total.value, 0);
340
- console.log('se', se.hits.hits[0]);
338
+ await fetchNextResponse();
341
339
 
342
340
  function processHit(hit) {
343
341
  docsNum += 1;
344
342
  try {
345
343
  var doc = typeof transform === 'function' ? transform(hit._source) : hit._source; // eslint-disable-line no-underscore-dangle
346
- // console.log('doc', doc);
347
344
 
348
345
  // if doc is undefined we'll skip indexing it
349
346
  if (typeof doc === 'undefined') {
@@ -363,68 +360,117 @@ function indexReaderFactory(
363
360
  }
364
361
  }
365
362
 
366
- var ingestQueueSize = 0;
367
- var scrollId = se._scroll_id; // eslint-disable-line no-underscore-dangle
368
- var readActive = false;
369
-
370
- async function processResponseQueue() {
371
- while (responseQueue.length) {
372
- readActive = true;
373
- var response = responseQueue.shift();
363
+ async function fetchNextResponse() {
364
+ readActive = true;
374
365
 
375
- // collect the docs from this response
376
- response.hits.hits.forEach(processHit);
366
+ var sc = scrollId ? await scroll(scrollId) : await search(fieldsWithData);
377
367
 
378
- progressBar.update(docsNum);
368
+ if (!scrollId) {
369
+ progressBar.start(sc.hits.total.value, 0);
370
+ }
379
371
 
380
- // check to see if we have collected all of the docs
381
- if (response.hits.total.value === docsNum) {
382
- indexer.finish();
383
- break;
384
- }
372
+ scrollId = sc._scroll_id;
373
+ readActive = false;
385
374
 
386
- if (ingestQueueSize < MAX_QUEUE_SIZE$1) {
387
- // get the next response if there are more docs to fetch
388
- var sc = await scroll(response._scroll_id); // eslint-disable-line no-await-in-loop,no-underscore-dangle,max-len
389
- scrollId = sc._scroll_id; // eslint-disable-line no-underscore-dangle
390
- responseQueue.push(sc);
391
- } else {
392
- readActive = false;
393
- }
394
- }
375
+ processResponse(sc);
395
376
  }
396
377
 
397
- indexer.queueEmitter.on('queue-size', async function (size) {
398
- ingestQueueSize = size;
378
+ async function processResponse(response) {
379
+ // collect the docs from this response
380
+ response.hits.hits.forEach(processHit);
381
+
382
+ progressBar.update(docsNum);
383
+
384
+ // check to see if we have collected all of the docs
385
+ if (response.hits.total.value === docsNum) {
386
+ indexer.finish();
387
+ return;
388
+ }
399
389
 
400
- if (!readActive && ingestQueueSize < MAX_QUEUE_SIZE$1) {
401
- // get the next response if there are more docs to fetch
402
- var sc = await scroll(scrollId); // eslint-disable-line no-await-in-loop,no-underscore-dangle,max-len
403
- scrollId = sc._scroll_id; // eslint-disable-line no-underscore-dangle
404
- responseQueue.push(sc);
405
- processResponseQueue();
390
+ if (!backPressurePause) {
391
+ await fetchNextResponse();
406
392
  }
393
+ }
394
+
395
+ indexer.queueEmitter.on('pause', async function () {
396
+ backPressurePause = true;
407
397
  });
408
398
 
409
399
  indexer.queueEmitter.on('resume', async function () {
410
- ingestQueueSize = 0;
400
+ backPressurePause = false;
411
401
 
412
- if (readActive) {
402
+ if (readActive || finished) {
413
403
  return;
414
404
  }
415
405
 
416
- // get the next response if there are more docs to fetch
417
- var sc = await scroll(scrollId); // eslint-disable-line no-await-in-loop,no-underscore-dangle,max-len
418
- scrollId = sc._scroll_id; // eslint-disable-line no-underscore-dangle
419
- responseQueue.push(sc);
420
- processResponseQueue();
406
+ await fetchNextResponse();
421
407
  });
422
408
 
423
409
  indexer.queueEmitter.on('finish', function () {
410
+ finished = true;
424
411
  progressBar.stop();
425
412
  });
413
+ };
414
+ }
415
+
416
+ function streamReaderFactory(indexer, stream$$1, transform, splitRegex, verbose) {
417
+ function startIndex() {
418
+ console.log('START INDEX', splitRegex);
419
+ var finished = false;
420
+
421
+ var s = stream$$1.pipe(split(splitRegex)).pipe(
422
+ es
423
+ .mapSync(function (line) {
424
+ try {
425
+ // skip empty lines
426
+ if (line === '') {
427
+ return;
428
+ }
429
+
430
+ var doc =
431
+ typeof transform === 'function' ? JSON.stringify(transform(JSON.parse(line))) : line;
432
+
433
+ // if doc is undefined we'll skip indexing it
434
+ if (typeof doc === 'undefined') {
435
+ s.resume();
436
+ return;
437
+ }
438
+
439
+ // the transform callback may return an array of docs so we can emit
440
+ // multiple docs from a single line
441
+ if (Array.isArray(doc)) {
442
+ doc.forEach(function (d) { return indexer.add(d); });
443
+ return;
444
+ }
445
+
446
+ indexer.add(doc);
447
+ } catch (e) {
448
+ console.log('error', e);
449
+ }
450
+ })
451
+ .on('error', function (err) {
452
+ console.log('Error while reading file.', err);
453
+ })
454
+ .on('end', function () {
455
+ if (verbose) { console.log('Read entire stream.'); }
456
+ indexer.finish();
457
+ finished = true;
458
+ })
459
+ );
460
+
461
+ indexer.queueEmitter.on('pause', function () {
462
+ if (finished) { return; }
463
+ s.pause();
464
+ });
426
465
 
427
- processResponseQueue();
466
+ indexer.queueEmitter.on('resume', function () {
467
+ if (finished) { return; }
468
+ s.resume();
469
+ });
470
+ }
471
+
472
+ return function () {
473
+ startIndex();
428
474
  };
429
475
  }
430
476
 
@@ -433,6 +479,8 @@ async function transformer(ref) {
433
479
  var sourceClientConfig = ref.sourceClientConfig;
434
480
  var targetClientConfig = ref.targetClientConfig;
435
481
  var bufferSize = ref.bufferSize; if ( bufferSize === void 0 ) bufferSize = DEFAULT_BUFFER_SIZE;
482
+ var searchSize = ref.searchSize; if ( searchSize === void 0 ) searchSize = DEFAULT_SEARCH_SIZE;
483
+ var stream$$1 = ref.stream;
436
484
  var fileName = ref.fileName;
437
485
  var splitRegex = ref.splitRegex; if ( splitRegex === void 0 ) splitRegex = /\n/;
438
486
  var sourceIndexName = ref.sourceIndexName;
@@ -446,6 +494,7 @@ async function transformer(ref) {
446
494
  var transform = ref.transform;
447
495
  var verbose = ref.verbose; if ( verbose === void 0 ) verbose = true;
448
496
 
497
+ console.log('TRANSFORMER');
449
498
  if (typeof targetIndexName === 'undefined') {
450
499
  throw Error('targetIndexName must be specified.');
451
500
  }
@@ -468,6 +517,7 @@ async function transformer(ref) {
468
517
  mappingsOverride: mappingsOverride,
469
518
  indexMappingTotalFieldsLimit: indexMappingTotalFieldsLimit,
470
519
  verbose: verbose,
520
+ deleteIndex: deleteIndex,
471
521
  });
472
522
  var indexer = indexQueueFactory({
473
523
  targetClient: targetClient,
@@ -482,8 +532,12 @@ async function transformer(ref) {
482
532
  throw Error('Only either one of fileName or sourceIndexName can be specified.');
483
533
  }
484
534
 
485
- if (typeof fileName === 'undefined' && typeof sourceIndexName === 'undefined') {
486
- throw Error('Either fileName or sourceIndexName must be specified.');
535
+ if (
536
+ (typeof fileName !== 'undefined' && typeof sourceIndexName !== 'undefined') ||
537
+ (typeof fileName !== 'undefined' && typeof stream$$1 !== 'undefined') ||
538
+ (typeof sourceIndexName !== 'undefined' && typeof stream$$1 !== 'undefined')
539
+ ) {
540
+ throw Error('Only one of fileName, sourceIndexName, or stream can be specified.');
487
541
  }
488
542
 
489
543
  if (typeof fileName !== 'undefined') {
@@ -497,18 +551,25 @@ async function transformer(ref) {
497
551
  transform,
498
552
  sourceClient,
499
553
  query,
500
- bufferSize,
554
+ searchSize,
501
555
  populatedFields
502
556
  );
503
557
  }
504
558
 
559
+ if (typeof stream$$1 !== 'undefined') {
560
+ console.log('STREAM READER');
561
+ return streamReaderFactory(indexer, stream$$1, transform, splitRegex, verbose);
562
+ }
563
+
505
564
  return null;
506
565
  }
507
566
 
508
567
  var reader = getReader();
568
+ console.log('READER INITIALIZED');
509
569
 
510
570
  try {
511
571
  var indexExists = await targetClient.indices.exists({ index: targetIndexName });
572
+ console.log('INDEX EXISTS', indexExists);
512
573
 
513
574
  if (indexExists === false) {
514
575
  await createMapping();
@@ -1,10 +1,20 @@
1
1
  import fs from 'fs';
2
2
  import es from 'event-stream';
3
3
  import glob from 'glob';
4
+ import split from 'split2';
5
+ import { Readable } from 'stream';
4
6
  import cliProgress from 'cli-progress';
5
7
  import elasticsearch from '@elastic/elasticsearch';
6
8
 
7
- var DEFAULT_BUFFER_SIZE = 1000;
9
+ // In earlier versions this was used to set the number of docs to index in a
10
+ // single bulk request. Since we switched to use the helpers.bulk() method from
11
+ // the ES client, this now translates to the `flushBytes` option of the helper.
12
+ // However, for kind of a backwards compability with the old values, this uses
13
+ // KBytes instead of Bytes. It will be multiplied by 1024 in the index queue.
14
+ var DEFAULT_BUFFER_SIZE = 5120;
15
+
16
+ // The default number of docs to fetch in a single search request when reindexing.
17
+ var DEFAULT_SEARCH_SIZE = 1000;
8
18
 
9
19
  function createMappingFactory(ref) {
10
20
  var sourceClient = ref.sourceClient;
@@ -15,6 +25,7 @@ function createMappingFactory(ref) {
15
25
  var mappingsOverride = ref.mappingsOverride;
16
26
  var indexMappingTotalFieldsLimit = ref.indexMappingTotalFieldsLimit;
17
27
  var verbose = ref.verbose;
28
+ var deleteIndex = ref.deleteIndex;
18
29
 
19
30
  return async function () {
20
31
  var targetMappings = mappingsOverride ? undefined : mappings;
@@ -24,7 +35,14 @@ function createMappingFactory(ref) {
24
35
  var mapping = await sourceClient.indices.getMapping({
25
36
  index: sourceIndexName,
26
37
  });
27
- targetMappings = mapping[sourceIndexName].mappings;
38
+ if (mapping[sourceIndexName]) {
39
+ targetMappings = mapping[sourceIndexName].mappings;
40
+ } else {
41
+ var allMappings = Object.values(mapping);
42
+ if (allMappings.length > 0) {
43
+ targetMappings = Object.values(mapping)[0].mappings;
44
+ }
45
+ }
28
46
  } catch (err) {
29
47
  console.log('Error reading source mapping', err);
30
48
  return;
@@ -39,18 +57,28 @@ function createMappingFactory(ref) {
39
57
  }
40
58
 
41
59
  try {
42
- var resp = await targetClient.indices.create({
43
- index: targetIndexName,
44
- body: Object.assign({}, {mappings: targetMappings},
45
- (indexMappingTotalFieldsLimit !== undefined
46
- ? {
47
- settings: {
48
- 'index.mapping.total_fields.limit': indexMappingTotalFieldsLimit,
49
- },
50
- }
51
- : {})),
52
- });
53
- if (verbose) { console.log('Created target mapping', resp); }
60
+ var indexExists = await targetClient.indices.exists({ index: targetIndexName });
61
+
62
+ if (indexExists === true && deleteIndex === true) {
63
+ await targetClient.indices.delete({ index: targetIndexName });
64
+ }
65
+
66
+ if (indexExists === false || deleteIndex === true) {
67
+ var resp = await targetClient.indices.create({
68
+ index: targetIndexName,
69
+ body: Object.assign({}, {mappings: targetMappings},
70
+ (indexMappingTotalFieldsLimit !== undefined
71
+ ? {
72
+ settings: {
73
+ 'index.mapping.total_fields.limit': indexMappingTotalFieldsLimit,
74
+ 'index.number_of_shards': 1,
75
+ 'index.number_of_replicas': 0,
76
+ },
77
+ }
78
+ : {})),
79
+ });
80
+ if (verbose) { console.log('Created target mapping', resp); }
81
+ }
54
82
  } catch (err) {
55
83
  console.log('Error creating target mapping', err);
56
84
  }
@@ -58,17 +86,14 @@ function createMappingFactory(ref) {
58
86
  };
59
87
  }
60
88
 
61
- var MAX_QUEUE_SIZE = 15;
62
-
63
89
  function fileReaderFactory(indexer, fileName, transform, splitRegex, verbose) {
64
90
  function startIndex(files) {
65
- var ingestQueueSize = 0;
66
91
  var finished = false;
67
92
 
68
93
  var file = files.shift();
69
94
  var s = fs
70
95
  .createReadStream(file)
71
- .pipe(es.split(splitRegex))
96
+ .pipe(split(splitRegex))
72
97
  .pipe(
73
98
  es
74
99
  .mapSync(function (line) {
@@ -116,20 +141,13 @@ function fileReaderFactory(indexer, fileName, transform, splitRegex, verbose) {
116
141
  })
117
142
  );
118
143
 
119
- indexer.queueEmitter.on('queue-size', async function (size) {
144
+ indexer.queueEmitter.on('pause', function () {
120
145
  if (finished) { return; }
121
- ingestQueueSize = size;
122
-
123
- if (ingestQueueSize < MAX_QUEUE_SIZE) {
124
- s.resume();
125
- } else {
126
- s.pause();
127
- }
146
+ s.pause();
128
147
  });
129
148
 
130
149
  indexer.queueEmitter.on('resume', function () {
131
150
  if (finished) { return; }
132
- ingestQueueSize = 0;
133
151
  s.resume();
134
152
  });
135
153
  }
@@ -145,7 +163,7 @@ var EventEmitter = require('events');
145
163
 
146
164
  var queueEmitter = new EventEmitter();
147
165
 
148
- var parallelCalls = 1;
166
+ var parallelCalls = 5;
149
167
 
150
168
  // a simple helper queue to bulk index documents
151
169
  function indexQueueFactory(ref) {
@@ -155,78 +173,76 @@ function indexQueueFactory(ref) {
155
173
  var skipHeader = ref.skipHeader; if ( skipHeader === void 0 ) skipHeader = false;
156
174
  var verbose = ref.verbose; if ( verbose === void 0 ) verbose = true;
157
175
 
158
- var buffer = [];
159
- var queue = [];
160
- var ingesting = 0;
161
- var ingestTimes = [];
162
- var finished = false;
176
+ var flushBytes = bufferSize * 1024; // Convert KB to Bytes
177
+ var highWaterMark = flushBytes * parallelCalls;
163
178
 
164
- var ingest = function (b) {
165
- if (typeof b !== 'undefined') {
166
- queue.push(b);
167
- queueEmitter.emit('queue-size', queue.length);
168
- }
179
+ // Create a Readable stream
180
+ var stream = new Readable({
181
+ read: function read() {}, // Implement read but we manage pushing manually
182
+ highWaterMark: highWaterMark, // Buffer size for backpressure management
183
+ });
169
184
 
170
- if (ingestTimes.length > 5) { ingestTimes = ingestTimes.slice(-5); }
185
+ async function* ndjsonStreamIterator(readableStream) {
186
+ var buffer = ''; // To hold the incomplete data
187
+ var skippedHeader = false;
171
188
 
172
- if (ingesting < parallelCalls) {
173
- var docs = queue.shift();
189
+ // Iterate over the stream using async iteration
190
+ for await (var chunk of readableStream) {
191
+ buffer += chunk.toString(); // Accumulate the chunk data in the buffer
174
192
 
175
- queueEmitter.emit('queue-size', queue.length);
176
- if (queue.length <= 5) {
177
- queueEmitter.emit('resume');
178
- }
193
+ // Split the buffer into lines (NDJSON items)
194
+ var lines = buffer.split('\n');
179
195
 
180
- ingesting += 1;
181
-
182
- if (verbose)
183
- { console.log(("bulk ingest docs: " + (docs.length / 2) + ", queue length: " + (queue.length))); }
184
-
185
- var start = Date.now();
186
- client
187
- .bulk({ body: docs })
188
- .then(function () {
189
- var end = Date.now();
190
- var delta = end - start;
191
- ingestTimes.push(delta);
192
- ingesting -= 1;
193
-
194
- var ingestTimesMovingAverage =
195
- ingestTimes.length > 0
196
- ? ingestTimes.reduce(function (p, c) { return p + c; }, 0) / ingestTimes.length
197
- : 0;
198
- var ingestTimesMovingAverageSeconds = Math.floor(ingestTimesMovingAverage / 1000);
199
-
200
- if (
201
- ingestTimes.length > 0 &&
202
- ingestTimesMovingAverageSeconds < 30 &&
203
- parallelCalls < 10
204
- ) {
205
- parallelCalls += 1;
206
- } else if (
207
- ingestTimes.length > 0 &&
208
- ingestTimesMovingAverageSeconds >= 30 &&
209
- parallelCalls > 1
210
- ) {
211
- parallelCalls -= 1;
212
- }
196
+ // The last line might be incomplete, so hold it back in the buffer
197
+ buffer = lines.pop();
213
198
 
214
- if (queue.length > 0) {
215
- ingest();
216
- } else if (queue.length === 0 && finished) {
217
- queueEmitter.emit('finish');
218
- }
219
- })
220
- .catch(function (error) {
221
- console.error(error);
222
- ingesting -= 1;
223
- parallelCalls = 1;
224
- if (queue.length > 0) {
225
- ingest();
199
+ // Yield each complete JSON object
200
+ for (var line of lines) {
201
+ if (line.trim()) {
202
+ try {
203
+ if (!skipHeader || (skipHeader && !skippedHeader)) {
204
+ yield JSON.parse(line); // Parse and yield the JSON object
205
+ skippedHeader = true;
206
+ }
207
+ } catch (err) {
208
+ // Handle JSON parse errors if necessary
209
+ console.error('Failed to parse JSON:', err);
226
210
  }
227
- });
211
+ }
212
+ }
228
213
  }
229
- };
214
+
215
+ // Handle any remaining data in the buffer after the stream ends
216
+ if (buffer.trim()) {
217
+ try {
218
+ yield JSON.parse(buffer);
219
+ } catch (err) {
220
+ console.error('Failed to parse final JSON:', err);
221
+ }
222
+ }
223
+ }
224
+
225
+ var finished = false;
226
+
227
+ // Async IIFE to start bulk indexing
228
+ (async function () {
229
+ console.log('START BULK INDEXING');
230
+ await client.helpers.bulk({
231
+ concurrency: parallelCalls,
232
+ flushBytes: flushBytes,
233
+ flushInterval: 1000,
234
+ refreshOnCompletion: true,
235
+ datasource: ndjsonStreamIterator(stream),
236
+ onDocument: function onDocument(doc) {
237
+ return {
238
+ index: { _index: targetIndexName },
239
+ };
240
+ },
241
+ });
242
+ console.log('FINISHED BULK INDEXING');
243
+
244
+ queueEmitter.emit('finish');
245
+ })();
230
246
 
231
247
  return {
232
248
  add: function (doc) {
@@ -234,37 +250,22 @@ function indexQueueFactory(ref) {
234
250
  throw new Error('Unexpected doc added after indexer should finish.');
235
251
  }
236
252
 
237
- if (!skipHeader) {
238
- var header = { index: { _index: targetIndexName } };
239
- buffer.push(header);
240
- }
241
- buffer.push(doc);
242
-
243
- if (queue.length === 0) {
244
- queueEmitter.emit('resume');
245
- }
246
-
247
- if (buffer.length >= bufferSize * 2) {
248
- ingest(buffer);
249
- buffer = [];
253
+ var canContinue = stream.push(((JSON.stringify(doc)) + "\n"));
254
+ if (!canContinue) {
255
+ queueEmitter.emit('pause');
256
+ stream.once('drain', function () {
257
+ queueEmitter.emit('resume');
258
+ });
250
259
  }
251
260
  },
252
261
  finish: function () {
253
262
  finished = true;
254
-
255
- if (buffer.length > 0) {
256
- ingest(buffer);
257
- buffer = [];
258
- } else if (queue.length === 0 && ingesting === 0) {
259
- queueEmitter.emit('finish');
260
- }
263
+ stream.push(null);
261
264
  },
262
265
  queueEmitter: queueEmitter,
263
266
  };
264
267
  }
265
268
 
266
- var MAX_QUEUE_SIZE$1 = 15;
267
-
268
269
  // create a new progress bar instance and use shades_classic theme
269
270
  var progressBar = new cliProgress.SingleBar({}, cliProgress.Presets.shades_classic);
270
271
 
@@ -274,21 +275,24 @@ function indexReaderFactory(
274
275
  transform,
275
276
  client,
276
277
  query,
277
- bufferSize,
278
+ searchSize,
278
279
  populatedFields
279
280
  ) {
280
- if ( bufferSize === void 0 ) bufferSize = DEFAULT_BUFFER_SIZE;
281
+ if ( searchSize === void 0 ) searchSize = DEFAULT_SEARCH_SIZE;
281
282
  if ( populatedFields === void 0 ) populatedFields = false;
282
283
 
283
284
  return async function indexReader() {
284
- var responseQueue = [];
285
285
  var docsNum = 0;
286
+ var scrollId;
287
+ var finished = false;
288
+ var readActive = false;
289
+ var backPressurePause = false;
286
290
 
287
291
  async function fetchPopulatedFields() {
288
292
  try {
289
293
  var response = await client.search({
290
294
  index: sourceIndexName,
291
- size: bufferSize,
295
+ size: searchSize,
292
296
  query: {
293
297
  function_score: {
294
298
  query: query,
@@ -299,7 +303,7 @@ function indexReaderFactory(
299
303
 
300
304
  // Get all field names for each returned doc and flatten it
301
305
  // to a list of unique field names used across all docs.
302
- return new Set(response.hits.hits.map(function (d) { return Object.keys(d._source); }).flat(1));
306
+ return Array.from(new Set(response.hits.hits.map(function (d) { return Object.keys(d._source); }).flat(1)));
303
307
  } catch (e) {
304
308
  console.log('error', e);
305
309
  }
@@ -308,7 +312,7 @@ function indexReaderFactory(
308
312
  function search(fields) {
309
313
  return client.search(Object.assign({}, {index: sourceIndexName,
310
314
  scroll: '600s',
311
- size: bufferSize,
315
+ size: searchSize,
312
316
  query: query},
313
317
  (fields ? { _source: fields } : {})));
314
318
  }
@@ -325,21 +329,14 @@ function indexReaderFactory(
325
329
  // identify populated fields
326
330
  if (populatedFields) {
327
331
  fieldsWithData = await fetchPopulatedFields();
328
- console.log('fieldsWithData', fieldsWithData);
329
332
  }
330
333
 
331
- // start things off by searching, setting a scroll timeout, and pushing
332
- // our first response into the queue to be processed
333
- var se = await search(fieldsWithData);
334
- responseQueue.push(se);
335
- progressBar.start(se.hits.total.value, 0);
336
- console.log('se', se.hits.hits[0]);
334
+ await fetchNextResponse();
337
335
 
338
336
  function processHit(hit) {
339
337
  docsNum += 1;
340
338
  try {
341
339
  var doc = typeof transform === 'function' ? transform(hit._source) : hit._source; // eslint-disable-line no-underscore-dangle
342
- // console.log('doc', doc);
343
340
 
344
341
  // if doc is undefined we'll skip indexing it
345
342
  if (typeof doc === 'undefined') {
@@ -359,68 +356,117 @@ function indexReaderFactory(
359
356
  }
360
357
  }
361
358
 
362
- var ingestQueueSize = 0;
363
- var scrollId = se._scroll_id; // eslint-disable-line no-underscore-dangle
364
- var readActive = false;
365
-
366
- async function processResponseQueue() {
367
- while (responseQueue.length) {
368
- readActive = true;
369
- var response = responseQueue.shift();
359
+ async function fetchNextResponse() {
360
+ readActive = true;
370
361
 
371
- // collect the docs from this response
372
- response.hits.hits.forEach(processHit);
362
+ var sc = scrollId ? await scroll(scrollId) : await search(fieldsWithData);
373
363
 
374
- progressBar.update(docsNum);
364
+ if (!scrollId) {
365
+ progressBar.start(sc.hits.total.value, 0);
366
+ }
375
367
 
376
- // check to see if we have collected all of the docs
377
- if (response.hits.total.value === docsNum) {
378
- indexer.finish();
379
- break;
380
- }
368
+ scrollId = sc._scroll_id;
369
+ readActive = false;
381
370
 
382
- if (ingestQueueSize < MAX_QUEUE_SIZE$1) {
383
- // get the next response if there are more docs to fetch
384
- var sc = await scroll(response._scroll_id); // eslint-disable-line no-await-in-loop,no-underscore-dangle,max-len
385
- scrollId = sc._scroll_id; // eslint-disable-line no-underscore-dangle
386
- responseQueue.push(sc);
387
- } else {
388
- readActive = false;
389
- }
390
- }
371
+ processResponse(sc);
391
372
  }
392
373
 
393
- indexer.queueEmitter.on('queue-size', async function (size) {
394
- ingestQueueSize = size;
374
+ async function processResponse(response) {
375
+ // collect the docs from this response
376
+ response.hits.hits.forEach(processHit);
377
+
378
+ progressBar.update(docsNum);
379
+
380
+ // check to see if we have collected all of the docs
381
+ if (response.hits.total.value === docsNum) {
382
+ indexer.finish();
383
+ return;
384
+ }
395
385
 
396
- if (!readActive && ingestQueueSize < MAX_QUEUE_SIZE$1) {
397
- // get the next response if there are more docs to fetch
398
- var sc = await scroll(scrollId); // eslint-disable-line no-await-in-loop,no-underscore-dangle,max-len
399
- scrollId = sc._scroll_id; // eslint-disable-line no-underscore-dangle
400
- responseQueue.push(sc);
401
- processResponseQueue();
386
+ if (!backPressurePause) {
387
+ await fetchNextResponse();
402
388
  }
389
+ }
390
+
391
+ indexer.queueEmitter.on('pause', async function () {
392
+ backPressurePause = true;
403
393
  });
404
394
 
405
395
  indexer.queueEmitter.on('resume', async function () {
406
- ingestQueueSize = 0;
396
+ backPressurePause = false;
407
397
 
408
- if (readActive) {
398
+ if (readActive || finished) {
409
399
  return;
410
400
  }
411
401
 
412
- // get the next response if there are more docs to fetch
413
- var sc = await scroll(scrollId); // eslint-disable-line no-await-in-loop,no-underscore-dangle,max-len
414
- scrollId = sc._scroll_id; // eslint-disable-line no-underscore-dangle
415
- responseQueue.push(sc);
416
- processResponseQueue();
402
+ await fetchNextResponse();
417
403
  });
418
404
 
419
405
  indexer.queueEmitter.on('finish', function () {
406
+ finished = true;
420
407
  progressBar.stop();
421
408
  });
409
+ };
410
+ }
411
+
412
+ function streamReaderFactory(indexer, stream, transform, splitRegex, verbose) {
413
+ function startIndex() {
414
+ console.log('START INDEX', splitRegex);
415
+ var finished = false;
416
+
417
+ var s = stream.pipe(split(splitRegex)).pipe(
418
+ es
419
+ .mapSync(function (line) {
420
+ try {
421
+ // skip empty lines
422
+ if (line === '') {
423
+ return;
424
+ }
425
+
426
+ var doc =
427
+ typeof transform === 'function' ? JSON.stringify(transform(JSON.parse(line))) : line;
428
+
429
+ // if doc is undefined we'll skip indexing it
430
+ if (typeof doc === 'undefined') {
431
+ s.resume();
432
+ return;
433
+ }
434
+
435
+ // the transform callback may return an array of docs so we can emit
436
+ // multiple docs from a single line
437
+ if (Array.isArray(doc)) {
438
+ doc.forEach(function (d) { return indexer.add(d); });
439
+ return;
440
+ }
441
+
442
+ indexer.add(doc);
443
+ } catch (e) {
444
+ console.log('error', e);
445
+ }
446
+ })
447
+ .on('error', function (err) {
448
+ console.log('Error while reading file.', err);
449
+ })
450
+ .on('end', function () {
451
+ if (verbose) { console.log('Read entire stream.'); }
452
+ indexer.finish();
453
+ finished = true;
454
+ })
455
+ );
456
+
457
+ indexer.queueEmitter.on('pause', function () {
458
+ if (finished) { return; }
459
+ s.pause();
460
+ });
422
461
 
423
- processResponseQueue();
462
+ indexer.queueEmitter.on('resume', function () {
463
+ if (finished) { return; }
464
+ s.resume();
465
+ });
466
+ }
467
+
468
+ return function () {
469
+ startIndex();
424
470
  };
425
471
  }
426
472
 
@@ -429,6 +475,8 @@ async function transformer(ref) {
429
475
  var sourceClientConfig = ref.sourceClientConfig;
430
476
  var targetClientConfig = ref.targetClientConfig;
431
477
  var bufferSize = ref.bufferSize; if ( bufferSize === void 0 ) bufferSize = DEFAULT_BUFFER_SIZE;
478
+ var searchSize = ref.searchSize; if ( searchSize === void 0 ) searchSize = DEFAULT_SEARCH_SIZE;
479
+ var stream = ref.stream;
432
480
  var fileName = ref.fileName;
433
481
  var splitRegex = ref.splitRegex; if ( splitRegex === void 0 ) splitRegex = /\n/;
434
482
  var sourceIndexName = ref.sourceIndexName;
@@ -442,6 +490,7 @@ async function transformer(ref) {
442
490
  var transform = ref.transform;
443
491
  var verbose = ref.verbose; if ( verbose === void 0 ) verbose = true;
444
492
 
493
+ console.log('TRANSFORMER');
445
494
  if (typeof targetIndexName === 'undefined') {
446
495
  throw Error('targetIndexName must be specified.');
447
496
  }
@@ -464,6 +513,7 @@ async function transformer(ref) {
464
513
  mappingsOverride: mappingsOverride,
465
514
  indexMappingTotalFieldsLimit: indexMappingTotalFieldsLimit,
466
515
  verbose: verbose,
516
+ deleteIndex: deleteIndex,
467
517
  });
468
518
  var indexer = indexQueueFactory({
469
519
  targetClient: targetClient,
@@ -478,8 +528,12 @@ async function transformer(ref) {
478
528
  throw Error('Only either one of fileName or sourceIndexName can be specified.');
479
529
  }
480
530
 
481
- if (typeof fileName === 'undefined' && typeof sourceIndexName === 'undefined') {
482
- throw Error('Either fileName or sourceIndexName must be specified.');
531
+ if (
532
+ (typeof fileName !== 'undefined' && typeof sourceIndexName !== 'undefined') ||
533
+ (typeof fileName !== 'undefined' && typeof stream !== 'undefined') ||
534
+ (typeof sourceIndexName !== 'undefined' && typeof stream !== 'undefined')
535
+ ) {
536
+ throw Error('Only one of fileName, sourceIndexName, or stream can be specified.');
483
537
  }
484
538
 
485
539
  if (typeof fileName !== 'undefined') {
@@ -493,18 +547,25 @@ async function transformer(ref) {
493
547
  transform,
494
548
  sourceClient,
495
549
  query,
496
- bufferSize,
550
+ searchSize,
497
551
  populatedFields
498
552
  );
499
553
  }
500
554
 
555
+ if (typeof stream !== 'undefined') {
556
+ console.log('STREAM READER');
557
+ return streamReaderFactory(indexer, stream, transform, splitRegex, verbose);
558
+ }
559
+
501
560
  return null;
502
561
  }
503
562
 
504
563
  var reader = getReader();
564
+ console.log('READER INITIALIZED');
505
565
 
506
566
  try {
507
567
  var indexExists = await targetClient.indices.exists({ index: targetIndexName });
568
+ console.log('INDEX EXISTS', indexExists);
508
569
 
509
570
  if (indexExists === false) {
510
571
  await createMapping();
package/package.json CHANGED
@@ -14,20 +14,21 @@
14
14
  "license": "Apache-2.0",
15
15
  "author": "Walter Rafelsberger <walter@rafelsberger.at>",
16
16
  "contributors": [],
17
- "version": "1.0.0-beta2",
17
+ "version": "1.0.0-beta3",
18
18
  "main": "dist/node-es-transformer.cjs.js",
19
19
  "module": "dist/node-es-transformer.esm.js",
20
20
  "dependencies": {
21
- "@elastic/elasticsearch": "^8.10.0",
21
+ "@elastic/elasticsearch": "^8.15.0",
22
22
  "cli-progress": "^3.12.0",
23
23
  "event-stream": "3.3.4",
24
- "glob": "7.1.2"
24
+ "git-cz": "^4.9.0",
25
+ "glob": "7.1.2",
26
+ "split2": "^4.2.0"
25
27
  },
26
28
  "devDependencies": {
27
29
  "acorn": "^6.4.2",
28
30
  "async-retry": "^1.3.3",
29
31
  "commit-and-tag-version": "^11.3.0",
30
- "cz-conventional-changelog": "^3.3.0",
31
32
  "eslint": "^8.51.0",
32
33
  "eslint-config-airbnb": "19.0.4",
33
34
  "eslint-config-prettier": "^9.0.0",
@@ -57,7 +58,7 @@
57
58
  ],
58
59
  "config": {
59
60
  "commitizen": {
60
- "path": "./node_modules/cz-conventional-changelog"
61
+ "path": "git-cz"
61
62
  }
62
63
  },
63
64
  "jest": {