node-es-transformer 1.0.0-alpha10 → 1.0.0-alpha12

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/README.md CHANGED
@@ -1,6 +1,7 @@
1
1
  [![npm](https://img.shields.io/npm/v/node-es-transformer.svg?maxAge=2592000)](https://www.npmjs.com/package/node-es-transformer)
2
2
  [![npm](https://img.shields.io/npm/l/node-es-transformer.svg?maxAge=2592000)](https://www.npmjs.com/package/node-es-transformer)
3
3
  [![npm](https://img.shields.io/npm/dt/node-es-transformer.svg?maxAge=2592000)](https://www.npmjs.com/package/node-es-transformer)
4
+ [![Commitizen friendly](https://img.shields.io/badge/commitizen-friendly-brightgreen.svg)](http://commitizen.github.io/cz-cli/)
4
5
 
5
6
  # node-es-transformer
6
7
 
@@ -115,6 +116,7 @@ transformer({
115
116
  - `targetIndexName`: The target Elasticsearch index where documents will be indexed.
116
117
  - `mappings`: Optional Elasticsearch document mappings. If not set and you're reindexing from another index, the mappings from the existing index will be used.
117
118
  - `mappingsOverride`: If you're reindexing and this is set to `true`, `mappings` will be applied on top of the source index's mappings. Defaults to `false`.
119
+ - `indexMappingTotalFieldsLimit`: Optional field limit for the target index to be created that will be passed on as the `index.mapping.total_fields.limit` setting.
118
120
  - `query`: Optional Elasticsearch [DSL query](https://www.elastic.co/guide/en/elasticsearch/reference/current/query-dsl.html) to filter documents from the source index.
119
121
  - `skipHeader`: If true, skips the first line of the source file. Defaults to `false`.
120
122
  - `transform(line)`: A callback function which allows the transformation of a source line into one or several documents.
@@ -8,6 +8,8 @@ var glob = _interopDefault(require('glob'));
8
8
  var cliProgress = _interopDefault(require('cli-progress'));
9
9
  var elasticsearch = _interopDefault(require('@elastic/elasticsearch'));
10
10
 
11
+ var DEFAULT_BUFFER_SIZE = 1000;
12
+
11
13
  function createMappingFactory(ref) {
12
14
  var sourceClient = ref.sourceClient;
13
15
  var sourceIndexName = ref.sourceIndexName;
@@ -15,6 +17,7 @@ function createMappingFactory(ref) {
15
17
  var targetIndexName = ref.targetIndexName;
16
18
  var mappings = ref.mappings;
17
19
  var mappingsOverride = ref.mappingsOverride;
20
+ var indexMappingTotalFieldsLimit = ref.indexMappingTotalFieldsLimit;
18
21
  var verbose = ref.verbose;
19
22
 
20
23
  return async function () {
@@ -22,7 +25,9 @@ function createMappingFactory(ref) {
22
25
 
23
26
  if (sourceClient && sourceIndexName && typeof targetMappings === 'undefined') {
24
27
  try {
25
- var mapping = await sourceClient.indices.getMapping({ index: sourceIndexName });
28
+ var mapping = await sourceClient.indices.getMapping({
29
+ index: sourceIndexName,
30
+ });
26
31
  targetMappings = mapping[sourceIndexName].mappings;
27
32
  } catch (err) {
28
33
  console.log('Error reading source mapping', err);
@@ -38,12 +43,17 @@ function createMappingFactory(ref) {
38
43
  }
39
44
 
40
45
  try {
41
- var resp = await targetClient.indices.create(
42
- {
43
- index: targetIndexName,
44
- body: { mappings: targetMappings },
45
- }
46
- );
46
+ var resp = await targetClient.indices.create({
47
+ index: targetIndexName,
48
+ body: Object.assign({}, {mappings: targetMappings},
49
+ (indexMappingTotalFieldsLimit !== undefined
50
+ ? {
51
+ settings: {
52
+ 'index.mapping.total_fields.limit': indexMappingTotalFieldsLimit,
53
+ },
54
+ }
55
+ : {})),
56
+ });
47
57
  if (verbose) { console.log('Created target mapping', resp); }
48
58
  } catch (err) {
49
59
  console.log('Error creating target mapping', err);
@@ -55,40 +65,44 @@ function createMappingFactory(ref) {
55
65
  function fileReaderFactory(indexer, fileName, transform, splitRegex, verbose) {
56
66
  function startIndex(files) {
57
67
  var file = files.shift();
58
- var s = fs.createReadStream(file)
68
+ var s = fs
69
+ .createReadStream(file)
59
70
  .pipe(es.split(splitRegex))
60
- .pipe(es.mapSync(function (line) {
61
- s.pause();
62
- try {
63
- var doc = (typeof transform === 'function') ? transform(line) : line;
64
- // if doc is undefined we'll skip indexing it
65
- if (typeof doc === 'undefined') {
66
- s.resume();
67
- return;
68
- }
69
-
70
- // the transform callback may return an array of docs so we can emit
71
- // multiple docs from a single line
72
- if (Array.isArray(doc)) {
73
- doc.forEach(function (d) { return indexer.add(d); });
74
- return;
75
- }
76
-
77
- indexer.add(doc);
78
- } catch (e) {
79
- console.log('error', e);
80
- }
81
- })
82
- .on('error', function (err) {
83
- console.log('Error while reading file.', err);
84
- })
85
- .on('end', function () {
86
- if (verbose) { console.log('Read entire file: ', file); }
87
- indexer.finish();
88
- if (files.length > 0) {
89
- startIndex(files);
90
- }
91
- }));
71
+ .pipe(
72
+ es
73
+ .mapSync(function (line) {
74
+ s.pause();
75
+ try {
76
+ var doc = typeof transform === 'function' ? transform(line) : line;
77
+ // if doc is undefined we'll skip indexing it
78
+ if (typeof doc === 'undefined') {
79
+ s.resume();
80
+ return;
81
+ }
82
+
83
+ // the transform callback may return an array of docs so we can emit
84
+ // multiple docs from a single line
85
+ if (Array.isArray(doc)) {
86
+ doc.forEach(function (d) { return indexer.add(d); });
87
+ return;
88
+ }
89
+
90
+ indexer.add(doc);
91
+ } catch (e) {
92
+ console.log('error', e);
93
+ }
94
+ })
95
+ .on('error', function (err) {
96
+ console.log('Error while reading file.', err);
97
+ })
98
+ .on('end', function () {
99
+ if (verbose) { console.log('Read entire file: ', file); }
100
+ indexer.finish();
101
+ if (files.length > 0) {
102
+ startIndex(files);
103
+ }
104
+ })
105
+ );
92
106
 
93
107
  indexer.queueEmitter.on('resume', function () {
94
108
  s.resume();
@@ -106,45 +120,66 @@ var EventEmitter = require('events');
106
120
 
107
121
  var queueEmitter = new EventEmitter();
108
122
 
123
+ var parallelCalls = 1;
124
+
109
125
  // a simple helper queue to bulk index documents
110
126
  function indexQueueFactory(ref) {
111
127
  var client = ref.targetClient;
112
128
  var targetIndexName = ref.targetIndexName;
113
- var bufferSize = ref.bufferSize; if ( bufferSize === void 0 ) bufferSize = 1000;
129
+ var bufferSize = ref.bufferSize; if ( bufferSize === void 0 ) bufferSize = DEFAULT_BUFFER_SIZE;
114
130
  var skipHeader = ref.skipHeader; if ( skipHeader === void 0 ) skipHeader = false;
115
131
  var verbose = ref.verbose; if ( verbose === void 0 ) verbose = true;
116
132
 
117
133
  var buffer = [];
118
134
  var queue = [];
119
- var ingesting = false;
135
+ var ingesting = 0;
136
+ var ingestTimes = [];
120
137
 
121
- var ingest = async function (b) {
138
+ var ingest = function (b) {
122
139
  if (typeof b !== 'undefined') {
123
140
  queue.push(b);
124
141
  queueEmitter.emit('queue-size', queue.length);
125
142
  }
126
143
 
127
- if (ingesting === false) {
144
+ if (ingestTimes.length > 5) { ingestTimes = ingestTimes.slice(-5); }
145
+
146
+ if (ingesting < parallelCalls) {
128
147
  var docs = queue.shift();
148
+
129
149
  queueEmitter.emit('queue-size', queue.length);
130
- ingesting = true;
131
- if (verbose) { console.log(("bulk ingest docs: " + (docs.length / 2) + ", queue length: " + (queue.length))); }
150
+ if (queue.length <= 5) {
151
+ queueEmitter.emit('resume');
152
+ }
132
153
 
133
- try {
134
- await client.bulk({ body: docs });
135
- ingesting = false;
154
+ ingesting += 1;
155
+
156
+ if (verbose)
157
+ { console.log(("bulk ingest docs: " + (docs.length / 2) + ", queue length: " + (queue.length))); }
158
+
159
+ var start = Date.now();
160
+ client.bulk({ body: docs }).then(function () {
161
+ var end = Date.now();
162
+ var delta = end - start;
163
+ ingestTimes.push(delta);
164
+ ingesting -= 1;
165
+
166
+ var ingestTimesMovingAverage =
167
+ ingestTimes.length > 0 ? ingestTimes.reduce(function (p, c) { return p + c; }, 0) / ingestTimes.length : 0;
168
+ var ingestTimesMovingAverageSeconds = Math.floor(ingestTimesMovingAverage / 1000);
169
+
170
+ if (ingestTimes.length > 0 && ingestTimesMovingAverageSeconds < 30 && parallelCalls < 10) {
171
+ parallelCalls += 1;
172
+ } else if (
173
+ ingestTimes.length > 0 &&
174
+ ingestTimesMovingAverageSeconds >= 30 &&
175
+ parallelCalls > 1
176
+ ) {
177
+ parallelCalls -= 1;
178
+ }
136
179
  if (queue.length > 0) {
137
180
  ingest();
138
181
  }
139
- } catch (err) {
140
- console.log('bulk index error', err);
141
- }
142
- }
143
-
144
- // console.log(`ingest: queue.length ${queue.length}`);
145
- if (queue.length === 0) {
146
- queueEmitter.emit('queue-size', 0);
147
- queueEmitter.emit('resume');
182
+ });
148
183
  }
149
184
  };
150
185
 
@@ -161,7 +196,7 @@ function indexQueueFactory(ref) {
161
196
  queueEmitter.emit('resume');
162
197
  }
163
198
 
164
- if (buffer.length >= (bufferSize * 2)) {
199
+ if (buffer.length >= bufferSize * 2) {
165
200
  ingest(buffer);
166
201
  buffer = [];
167
202
  }
@@ -175,12 +210,21 @@ function indexQueueFactory(ref) {
175
210
  };
176
211
  }
177
212
 
178
- var MAX_QUEUE_SIZE = 5;
213
+ var MAX_QUEUE_SIZE = 15;
179
214
 
180
215
  // create a new progress bar instance and use shades_classic theme
181
216
  var progressBar = new cliProgress.SingleBar({}, cliProgress.Presets.shades_classic);
182
217
 
183
- function indexReaderFactory(indexer, sourceIndexName, transform, client, query) {
218
+ function indexReaderFactory(
219
+ indexer,
220
+ sourceIndexName,
221
+ transform,
222
+ client,
223
+ query,
224
+ bufferSize
225
+ ) {
226
+ if ( bufferSize === void 0 ) bufferSize = DEFAULT_BUFFER_SIZE;
227
+
184
228
  return async function indexReader() {
185
229
  var responseQueue = [];
186
230
  var docsNum = 0;
@@ -189,7 +233,7 @@ function indexReaderFactory(indexer, sourceIndexName, transform, client, query)
189
233
  return client.search({
190
234
  index: sourceIndexName,
191
235
  scroll: '30s',
192
- size: 10000,
236
+ size: bufferSize,
193
237
  query: query,
194
238
  });
195
239
  }
@@ -210,7 +254,7 @@ function indexReaderFactory(indexer, sourceIndexName, transform, client, query)
210
254
  function processHit(hit) {
211
255
  docsNum += 1;
212
256
  try {
213
- var doc = (typeof transform === 'function') ? transform(hit._source) : hit._source; // eslint-disable-line no-underscore-dangle
257
+ var doc = typeof transform === 'function' ? transform(hit._source) : hit._source; // eslint-disable-line no-underscore-dangle
214
258
  // if doc is undefined we'll skip indexing it
215
259
  if (typeof doc === 'undefined') {
216
260
  return;
@@ -252,7 +296,7 @@ function indexReaderFactory(indexer, sourceIndexName, transform, client, query)
252
296
  }
253
297
 
254
298
  if (ingestQueueSize < MAX_QUEUE_SIZE) {
255
- // get the next response if there are more docs to fetch
299
+ // get the next response if there are more docs to fetch
256
300
  var sc = await scroll(response._scroll_id); // eslint-disable-line no-await-in-loop,no-underscore-dangle,max-len
257
301
  scrollId = sc._scroll_id; // eslint-disable-line no-underscore-dangle
258
302
  responseQueue.push(sc);
@@ -266,7 +310,7 @@ function indexReaderFactory(indexer, sourceIndexName, transform, client, query)
266
310
  ingestQueueSize = size;
267
311
 
268
312
  if (!readActive && ingestQueueSize < MAX_QUEUE_SIZE) {
269
- // get the next response if there are more docs to fetch
313
+ // get the next response if there are more docs to fetch
270
314
  var sc = await scroll(scrollId); // eslint-disable-line no-await-in-loop,no-underscore-dangle,max-len
271
315
  scrollId = sc._scroll_id; // eslint-disable-line no-underscore-dangle
272
316
  responseQueue.push(sc);
@@ -296,13 +340,14 @@ async function transformer(ref) {
296
340
  var deleteIndex = ref.deleteIndex; if ( deleteIndex === void 0 ) deleteIndex = false;
297
341
  var sourceClientConfig = ref.sourceClientConfig;
298
342
  var targetClientConfig = ref.targetClientConfig;
299
- var bufferSize = ref.bufferSize; if ( bufferSize === void 0 ) bufferSize = 1000;
343
+ var bufferSize = ref.bufferSize; if ( bufferSize === void 0 ) bufferSize = DEFAULT_BUFFER_SIZE;
300
344
  var fileName = ref.fileName;
301
345
  var splitRegex = ref.splitRegex; if ( splitRegex === void 0 ) splitRegex = /\n/;
302
346
  var sourceIndexName = ref.sourceIndexName;
303
347
  var targetIndexName = ref.targetIndexName;
304
348
  var mappings = ref.mappings;
305
349
  var mappingsOverride = ref.mappingsOverride; if ( mappingsOverride === void 0 ) mappingsOverride = false;
350
+ var indexMappingTotalFieldsLimit = ref.indexMappingTotalFieldsLimit;
306
351
  var query = ref.query;
307
352
  var skipHeader = ref.skipHeader; if ( skipHeader === void 0 ) skipHeader = false;
308
353
  var transform = ref.transform;
@@ -328,6 +373,7 @@ async function transformer(ref) {
328
373
  targetIndexName: targetIndexName,
329
374
  mappings: mappings,
330
375
  mappingsOverride: mappingsOverride,
376
+ indexMappingTotalFieldsLimit: indexMappingTotalFieldsLimit,
331
377
  verbose: verbose,
332
378
  });
333
379
  var indexer = indexQueueFactory({
@@ -339,30 +385,16 @@ async function transformer(ref) {
339
385
  });
340
386
 
341
387
  function getReader() {
342
- if (
343
- typeof fileName !== 'undefined'
344
- && typeof sourceIndexName !== 'undefined'
345
- ) {
346
- throw Error(
347
- 'Only either one of fileName or sourceIndexName can be specified.'
348
- );
388
+ if (typeof fileName !== 'undefined' && typeof sourceIndexName !== 'undefined') {
389
+ throw Error('Only either one of fileName or sourceIndexName can be specified.');
349
390
  }
350
391
 
351
- if (
352
- typeof fileName === 'undefined'
353
- && typeof sourceIndexName === 'undefined'
354
- ) {
392
+ if (typeof fileName === 'undefined' && typeof sourceIndexName === 'undefined') {
355
393
  throw Error('Either fileName or sourceIndexName must be specified.');
356
394
  }
357
395
 
358
396
  if (typeof fileName !== 'undefined') {
359
- return fileReaderFactory(
360
- indexer,
361
- fileName,
362
- transform,
363
- splitRegex,
364
- verbose
365
- );
397
+ return fileReaderFactory(indexer, fileName, transform, splitRegex, verbose);
366
398
  }
367
399
 
368
400
  if (typeof sourceIndexName !== 'undefined') {
@@ -371,7 +403,8 @@ async function transformer(ref) {
371
403
  sourceIndexName,
372
404
  transform,
373
405
  sourceClient,
374
- query
406
+ query,
407
+ bufferSize
375
408
  );
376
409
  }
377
410
 
@@ -4,6 +4,8 @@ import glob from 'glob';
4
4
  import cliProgress from 'cli-progress';
5
5
  import elasticsearch from '@elastic/elasticsearch';
6
6
 
7
+ var DEFAULT_BUFFER_SIZE = 1000;
8
+
7
9
  function createMappingFactory(ref) {
8
10
  var sourceClient = ref.sourceClient;
9
11
  var sourceIndexName = ref.sourceIndexName;
@@ -11,6 +13,7 @@ function createMappingFactory(ref) {
11
13
  var targetIndexName = ref.targetIndexName;
12
14
  var mappings = ref.mappings;
13
15
  var mappingsOverride = ref.mappingsOverride;
16
+ var indexMappingTotalFieldsLimit = ref.indexMappingTotalFieldsLimit;
14
17
  var verbose = ref.verbose;
15
18
 
16
19
  return async function () {
@@ -18,7 +21,9 @@ function createMappingFactory(ref) {
18
21
 
19
22
  if (sourceClient && sourceIndexName && typeof targetMappings === 'undefined') {
20
23
  try {
21
- var mapping = await sourceClient.indices.getMapping({ index: sourceIndexName });
24
+ var mapping = await sourceClient.indices.getMapping({
25
+ index: sourceIndexName,
26
+ });
22
27
  targetMappings = mapping[sourceIndexName].mappings;
23
28
  } catch (err) {
24
29
  console.log('Error reading source mapping', err);
@@ -34,12 +39,17 @@ function createMappingFactory(ref) {
34
39
  }
35
40
 
36
41
  try {
37
- var resp = await targetClient.indices.create(
38
- {
39
- index: targetIndexName,
40
- body: { mappings: targetMappings },
41
- }
42
- );
42
+ var resp = await targetClient.indices.create({
43
+ index: targetIndexName,
44
+ body: Object.assign({}, {mappings: targetMappings},
45
+ (indexMappingTotalFieldsLimit !== undefined
46
+ ? {
47
+ settings: {
48
+ 'index.mapping.total_fields.limit': indexMappingTotalFieldsLimit,
49
+ },
50
+ }
51
+ : {})),
52
+ });
43
53
  if (verbose) { console.log('Created target mapping', resp); }
44
54
  } catch (err) {
45
55
  console.log('Error creating target mapping', err);
@@ -51,40 +61,44 @@ function createMappingFactory(ref) {
51
61
  function fileReaderFactory(indexer, fileName, transform, splitRegex, verbose) {
52
62
  function startIndex(files) {
53
63
  var file = files.shift();
54
- var s = fs.createReadStream(file)
64
+ var s = fs
65
+ .createReadStream(file)
55
66
  .pipe(es.split(splitRegex))
56
- .pipe(es.mapSync(function (line) {
57
- s.pause();
58
- try {
59
- var doc = (typeof transform === 'function') ? transform(line) : line;
60
- // if doc is undefined we'll skip indexing it
61
- if (typeof doc === 'undefined') {
62
- s.resume();
63
- return;
64
- }
65
-
66
- // the transform callback may return an array of docs so we can emit
67
- // multiple docs from a single line
68
- if (Array.isArray(doc)) {
69
- doc.forEach(function (d) { return indexer.add(d); });
70
- return;
71
- }
72
-
73
- indexer.add(doc);
74
- } catch (e) {
75
- console.log('error', e);
76
- }
77
- })
78
- .on('error', function (err) {
79
- console.log('Error while reading file.', err);
80
- })
81
- .on('end', function () {
82
- if (verbose) { console.log('Read entire file: ', file); }
83
- indexer.finish();
84
- if (files.length > 0) {
85
- startIndex(files);
86
- }
87
- }));
67
+ .pipe(
68
+ es
69
+ .mapSync(function (line) {
70
+ s.pause();
71
+ try {
72
+ var doc = typeof transform === 'function' ? transform(line) : line;
73
+ // if doc is undefined we'll skip indexing it
74
+ if (typeof doc === 'undefined') {
75
+ s.resume();
76
+ return;
77
+ }
78
+
79
+ // the transform callback may return an array of docs so we can emit
80
+ // multiple docs from a single line
81
+ if (Array.isArray(doc)) {
82
+ doc.forEach(function (d) { return indexer.add(d); });
83
+ return;
84
+ }
85
+
86
+ indexer.add(doc);
87
+ } catch (e) {
88
+ console.log('error', e);
89
+ }
90
+ })
91
+ .on('error', function (err) {
92
+ console.log('Error while reading file.', err);
93
+ })
94
+ .on('end', function () {
95
+ if (verbose) { console.log('Read entire file: ', file); }
96
+ indexer.finish();
97
+ if (files.length > 0) {
98
+ startIndex(files);
99
+ }
100
+ })
101
+ );
88
102
 
89
103
  indexer.queueEmitter.on('resume', function () {
90
104
  s.resume();
@@ -102,45 +116,66 @@ var EventEmitter = require('events');
102
116
 
103
117
  var queueEmitter = new EventEmitter();
104
118
 
119
+ var parallelCalls = 1;
120
+
105
121
  // a simple helper queue to bulk index documents
106
122
  function indexQueueFactory(ref) {
107
123
  var client = ref.targetClient;
108
124
  var targetIndexName = ref.targetIndexName;
109
- var bufferSize = ref.bufferSize; if ( bufferSize === void 0 ) bufferSize = 1000;
125
+ var bufferSize = ref.bufferSize; if ( bufferSize === void 0 ) bufferSize = DEFAULT_BUFFER_SIZE;
110
126
  var skipHeader = ref.skipHeader; if ( skipHeader === void 0 ) skipHeader = false;
111
127
  var verbose = ref.verbose; if ( verbose === void 0 ) verbose = true;
112
128
 
113
129
  var buffer = [];
114
130
  var queue = [];
115
- var ingesting = false;
131
+ var ingesting = 0;
132
+ var ingestTimes = [];
116
133
 
117
- var ingest = async function (b) {
134
+ var ingest = function (b) {
118
135
  if (typeof b !== 'undefined') {
119
136
  queue.push(b);
120
137
  queueEmitter.emit('queue-size', queue.length);
121
138
  }
122
139
 
123
- if (ingesting === false) {
140
+ if (ingestTimes.length > 5) { ingestTimes = ingestTimes.slice(-5); }
141
+
142
+ if (ingesting < parallelCalls) {
124
143
  var docs = queue.shift();
144
+
125
145
  queueEmitter.emit('queue-size', queue.length);
126
- ingesting = true;
127
- if (verbose) { console.log(("bulk ingest docs: " + (docs.length / 2) + ", queue length: " + (queue.length))); }
146
+ if (queue.length <= 5) {
147
+ queueEmitter.emit('resume');
148
+ }
128
149
 
129
- try {
130
- await client.bulk({ body: docs });
131
- ingesting = false;
150
+ ingesting += 1;
151
+
152
+ if (verbose)
153
+ { console.log(("bulk ingest docs: " + (docs.length / 2) + ", queue length: " + (queue.length))); }
154
+
155
+ var start = Date.now();
156
+ client.bulk({ body: docs }).then(function () {
157
+ var end = Date.now();
158
+ var delta = end - start;
159
+ ingestTimes.push(delta);
160
+ ingesting -= 1;
161
+
162
+ var ingestTimesMovingAverage =
163
+ ingestTimes.length > 0 ? ingestTimes.reduce(function (p, c) { return p + c; }, 0) / ingestTimes.length : 0;
164
+ var ingestTimesMovingAverageSeconds = Math.floor(ingestTimesMovingAverage / 1000);
165
+
166
+ if (ingestTimes.length > 0 && ingestTimesMovingAverageSeconds < 30 && parallelCalls < 10) {
167
+ parallelCalls += 1;
168
+ } else if (
169
+ ingestTimes.length > 0 &&
170
+ ingestTimesMovingAverageSeconds >= 30 &&
171
+ parallelCalls > 1
172
+ ) {
173
+ parallelCalls -= 1;
174
+ }
132
175
  if (queue.length > 0) {
133
176
  ingest();
134
177
  }
135
- } catch (err) {
136
- console.log('bulk index error', err);
137
- }
138
- }
139
-
140
- // console.log(`ingest: queue.length ${queue.length}`);
141
- if (queue.length === 0) {
142
- queueEmitter.emit('queue-size', 0);
143
- queueEmitter.emit('resume');
178
+ });
144
179
  }
145
180
  };
146
181
 
@@ -157,7 +192,7 @@ function indexQueueFactory(ref) {
157
192
  queueEmitter.emit('resume');
158
193
  }
159
194
 
160
- if (buffer.length >= (bufferSize * 2)) {
195
+ if (buffer.length >= bufferSize * 2) {
161
196
  ingest(buffer);
162
197
  buffer = [];
163
198
  }
@@ -171,12 +206,21 @@ function indexQueueFactory(ref) {
171
206
  };
172
207
  }
173
208
 
174
- var MAX_QUEUE_SIZE = 5;
209
+ var MAX_QUEUE_SIZE = 15;
175
210
 
176
211
  // create a new progress bar instance and use shades_classic theme
177
212
  var progressBar = new cliProgress.SingleBar({}, cliProgress.Presets.shades_classic);
178
213
 
179
- function indexReaderFactory(indexer, sourceIndexName, transform, client, query) {
214
+ function indexReaderFactory(
215
+ indexer,
216
+ sourceIndexName,
217
+ transform,
218
+ client,
219
+ query,
220
+ bufferSize
221
+ ) {
222
+ if ( bufferSize === void 0 ) bufferSize = DEFAULT_BUFFER_SIZE;
223
+
180
224
  return async function indexReader() {
181
225
  var responseQueue = [];
182
226
  var docsNum = 0;
@@ -185,7 +229,7 @@ function indexReaderFactory(indexer, sourceIndexName, transform, client, query)
185
229
  return client.search({
186
230
  index: sourceIndexName,
187
231
  scroll: '30s',
188
- size: 10000,
232
+ size: bufferSize,
189
233
  query: query,
190
234
  });
191
235
  }
@@ -206,7 +250,7 @@ function indexReaderFactory(indexer, sourceIndexName, transform, client, query)
206
250
  function processHit(hit) {
207
251
  docsNum += 1;
208
252
  try {
209
- var doc = (typeof transform === 'function') ? transform(hit._source) : hit._source; // eslint-disable-line no-underscore-dangle
253
+ var doc = typeof transform === 'function' ? transform(hit._source) : hit._source; // eslint-disable-line no-underscore-dangle
210
254
  // if doc is undefined we'll skip indexing it
211
255
  if (typeof doc === 'undefined') {
212
256
  return;
@@ -248,7 +292,7 @@ function indexReaderFactory(indexer, sourceIndexName, transform, client, query)
248
292
  }
249
293
 
250
294
  if (ingestQueueSize < MAX_QUEUE_SIZE) {
251
- // get the next response if there are more docs to fetch
295
+ // get the next response if there are more docs to fetch
252
296
  var sc = await scroll(response._scroll_id); // eslint-disable-line no-await-in-loop,no-underscore-dangle,max-len
253
297
  scrollId = sc._scroll_id; // eslint-disable-line no-underscore-dangle
254
298
  responseQueue.push(sc);
@@ -262,7 +306,7 @@ function indexReaderFactory(indexer, sourceIndexName, transform, client, query)
262
306
  ingestQueueSize = size;
263
307
 
264
308
  if (!readActive && ingestQueueSize < MAX_QUEUE_SIZE) {
265
- // get the next response if there are more docs to fetch
309
+ // get the next response if there are more docs to fetch
266
310
  var sc = await scroll(scrollId); // eslint-disable-line no-await-in-loop,no-underscore-dangle,max-len
267
311
  scrollId = sc._scroll_id; // eslint-disable-line no-underscore-dangle
268
312
  responseQueue.push(sc);
@@ -292,13 +336,14 @@ async function transformer(ref) {
292
336
  var deleteIndex = ref.deleteIndex; if ( deleteIndex === void 0 ) deleteIndex = false;
293
337
  var sourceClientConfig = ref.sourceClientConfig;
294
338
  var targetClientConfig = ref.targetClientConfig;
295
- var bufferSize = ref.bufferSize; if ( bufferSize === void 0 ) bufferSize = 1000;
339
+ var bufferSize = ref.bufferSize; if ( bufferSize === void 0 ) bufferSize = DEFAULT_BUFFER_SIZE;
296
340
  var fileName = ref.fileName;
297
341
  var splitRegex = ref.splitRegex; if ( splitRegex === void 0 ) splitRegex = /\n/;
298
342
  var sourceIndexName = ref.sourceIndexName;
299
343
  var targetIndexName = ref.targetIndexName;
300
344
  var mappings = ref.mappings;
301
345
  var mappingsOverride = ref.mappingsOverride; if ( mappingsOverride === void 0 ) mappingsOverride = false;
346
+ var indexMappingTotalFieldsLimit = ref.indexMappingTotalFieldsLimit;
302
347
  var query = ref.query;
303
348
  var skipHeader = ref.skipHeader; if ( skipHeader === void 0 ) skipHeader = false;
304
349
  var transform = ref.transform;
@@ -324,6 +369,7 @@ async function transformer(ref) {
324
369
  targetIndexName: targetIndexName,
325
370
  mappings: mappings,
326
371
  mappingsOverride: mappingsOverride,
372
+ indexMappingTotalFieldsLimit: indexMappingTotalFieldsLimit,
327
373
  verbose: verbose,
328
374
  });
329
375
  var indexer = indexQueueFactory({
@@ -335,30 +381,16 @@ async function transformer(ref) {
335
381
  });
336
382
 
337
383
  function getReader() {
338
- if (
339
- typeof fileName !== 'undefined'
340
- && typeof sourceIndexName !== 'undefined'
341
- ) {
342
- throw Error(
343
- 'Only either one of fileName or sourceIndexName can be specified.'
344
- );
384
+ if (typeof fileName !== 'undefined' && typeof sourceIndexName !== 'undefined') {
385
+ throw Error('Only either one of fileName or sourceIndexName can be specified.');
345
386
  }
346
387
 
347
- if (
348
- typeof fileName === 'undefined'
349
- && typeof sourceIndexName === 'undefined'
350
- ) {
388
+ if (typeof fileName === 'undefined' && typeof sourceIndexName === 'undefined') {
351
389
  throw Error('Either fileName or sourceIndexName must be specified.');
352
390
  }
353
391
 
354
392
  if (typeof fileName !== 'undefined') {
355
- return fileReaderFactory(
356
- indexer,
357
- fileName,
358
- transform,
359
- splitRegex,
360
- verbose
361
- );
393
+ return fileReaderFactory(indexer, fileName, transform, splitRegex, verbose);
362
394
  }
363
395
 
364
396
  if (typeof sourceIndexName !== 'undefined') {
@@ -367,7 +399,8 @@ async function transformer(ref) {
367
399
  sourceIndexName,
368
400
  transform,
369
401
  sourceClient,
370
- query
402
+ query,
403
+ bufferSize
371
404
  );
372
405
  }
373
406
 
package/package.json CHANGED
@@ -14,7 +14,7 @@
14
14
  "license": "Apache-2.0",
15
15
  "author": "Walter Rafelsberger <walter@rafelsberger.at>",
16
16
  "contributors": [],
17
- "version": "1.0.0-alpha10",
17
+ "version": "1.0.0-alpha12",
18
18
  "main": "dist/node-es-transformer.cjs.js",
19
19
  "module": "dist/node-es-transformer.esm.js",
20
20
  "dependencies": {
@@ -25,11 +25,16 @@
25
25
  },
26
26
  "devDependencies": {
27
27
  "acorn": "^6.4.2",
28
+ "commit-and-tag-version": "^11.3.0",
29
+ "cz-conventional-changelog": "^3.3.0",
28
30
  "eslint": "8.2.0",
29
31
  "eslint-config-airbnb": "19.0.4",
32
+ "eslint-config-prettier": "^9.0.0",
30
33
  "eslint-plugin-import": "2.27.5",
31
34
  "eslint-plugin-jsx-a11y": "6.7.1",
35
+ "eslint-plugin-prettier": "^3.3.1",
32
36
  "eslint-plugin-react": "7.32.2",
37
+ "prettier": "^2.2.1",
33
38
  "rollup": "0.66.6",
34
39
  "rollup-plugin-buble": "0.19.6",
35
40
  "rollup-plugin-commonjs": "8.0.2",
@@ -39,9 +44,15 @@
39
44
  "build": "rollup -c",
40
45
  "dev": "rollup -c -w",
41
46
  "test": "node test/test.js",
42
- "pretest": "npm run build"
47
+ "pretest": "npm run build",
48
+ "release": "commit-and-tag-version"
43
49
  },
44
50
  "files": [
45
51
  "dist"
46
- ]
52
+ ],
53
+ "config": {
54
+ "commitizen": {
55
+ "path": "./node_modules/cz-conventional-changelog"
56
+ }
57
+ }
47
58
  }