node-es-transformer 1.0.0-alpha10 → 1.0.0-alpha12
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/README.md +2 -0
- package/dist/node-es-transformer.cjs.js +118 -85
- package/dist/node-es-transformer.esm.js +118 -85
- package/package.json +14 -3
package/README.md
CHANGED
|
@@ -1,6 +1,7 @@
|
|
|
1
1
|
[](https://www.npmjs.com/package/node-es-transformer)
|
|
2
2
|
[](https://www.npmjs.com/package/node-es-transformer)
|
|
3
3
|
[](https://www.npmjs.com/package/node-es-transformer)
|
|
4
|
+
[](http://commitizen.github.io/cz-cli/)
|
|
4
5
|
|
|
5
6
|
# node-es-transformer
|
|
6
7
|
|
|
@@ -115,6 +116,7 @@ transformer({
|
|
|
115
116
|
- `targetIndexName`: The target Elasticsearch index where documents will be indexed.
|
|
116
117
|
- `mappings`: Optional Elasticsearch document mappings. If not set and you're reindexing from another index, the mappings from the existing index will be used.
|
|
117
118
|
- `mappingsOverride`: If you're reindexing and this is set to `true`, `mappings` will be applied on top of the source index's mappings. Defaults to `false`.
|
|
119
|
+
- `indexMappingTotalFieldsLimit`: Optional field limit for the target index to be created that will be passed on as the `index.mapping.total_fields.limit` setting.
|
|
118
120
|
- `query`: Optional Elasticsearch [DSL query](https://www.elastic.co/guide/en/elasticsearch/reference/current/query-dsl.html) to filter documents from the source index.
|
|
119
121
|
- `skipHeader`: If true, skips the first line of the source file. Defaults to `false`.
|
|
120
122
|
- `transform(line)`: A callback function which allows the transformation of a source line into one or several documents.
|
|
@@ -8,6 +8,8 @@ var glob = _interopDefault(require('glob'));
|
|
|
8
8
|
var cliProgress = _interopDefault(require('cli-progress'));
|
|
9
9
|
var elasticsearch = _interopDefault(require('@elastic/elasticsearch'));
|
|
10
10
|
|
|
11
|
+
var DEFAULT_BUFFER_SIZE = 1000;
|
|
12
|
+
|
|
11
13
|
function createMappingFactory(ref) {
|
|
12
14
|
var sourceClient = ref.sourceClient;
|
|
13
15
|
var sourceIndexName = ref.sourceIndexName;
|
|
@@ -15,6 +17,7 @@ function createMappingFactory(ref) {
|
|
|
15
17
|
var targetIndexName = ref.targetIndexName;
|
|
16
18
|
var mappings = ref.mappings;
|
|
17
19
|
var mappingsOverride = ref.mappingsOverride;
|
|
20
|
+
var indexMappingTotalFieldsLimit = ref.indexMappingTotalFieldsLimit;
|
|
18
21
|
var verbose = ref.verbose;
|
|
19
22
|
|
|
20
23
|
return async function () {
|
|
@@ -22,7 +25,9 @@ function createMappingFactory(ref) {
|
|
|
22
25
|
|
|
23
26
|
if (sourceClient && sourceIndexName && typeof targetMappings === 'undefined') {
|
|
24
27
|
try {
|
|
25
|
-
var mapping = await sourceClient.indices.getMapping({
|
|
28
|
+
var mapping = await sourceClient.indices.getMapping({
|
|
29
|
+
index: sourceIndexName,
|
|
30
|
+
});
|
|
26
31
|
targetMappings = mapping[sourceIndexName].mappings;
|
|
27
32
|
} catch (err) {
|
|
28
33
|
console.log('Error reading source mapping', err);
|
|
@@ -38,12 +43,17 @@ function createMappingFactory(ref) {
|
|
|
38
43
|
}
|
|
39
44
|
|
|
40
45
|
try {
|
|
41
|
-
var resp = await targetClient.indices.create(
|
|
42
|
-
|
|
43
|
-
|
|
44
|
-
|
|
45
|
-
|
|
46
|
-
|
|
46
|
+
var resp = await targetClient.indices.create({
|
|
47
|
+
index: targetIndexName,
|
|
48
|
+
body: Object.assign({}, {mappings: targetMappings},
|
|
49
|
+
(indexMappingTotalFieldsLimit !== undefined
|
|
50
|
+
? {
|
|
51
|
+
settings: {
|
|
52
|
+
'index.mapping.total_fields.limit': indexMappingTotalFieldsLimit,
|
|
53
|
+
},
|
|
54
|
+
}
|
|
55
|
+
: {})),
|
|
56
|
+
});
|
|
47
57
|
if (verbose) { console.log('Created target mapping', resp); }
|
|
48
58
|
} catch (err) {
|
|
49
59
|
console.log('Error creating target mapping', err);
|
|
@@ -55,40 +65,44 @@ function createMappingFactory(ref) {
|
|
|
55
65
|
function fileReaderFactory(indexer, fileName, transform, splitRegex, verbose) {
|
|
56
66
|
function startIndex(files) {
|
|
57
67
|
var file = files.shift();
|
|
58
|
-
var s = fs
|
|
68
|
+
var s = fs
|
|
69
|
+
.createReadStream(file)
|
|
59
70
|
.pipe(es.split(splitRegex))
|
|
60
|
-
.pipe(
|
|
61
|
-
|
|
62
|
-
|
|
63
|
-
|
|
64
|
-
|
|
65
|
-
|
|
66
|
-
|
|
67
|
-
|
|
68
|
-
|
|
69
|
-
|
|
70
|
-
|
|
71
|
-
|
|
72
|
-
|
|
73
|
-
|
|
74
|
-
|
|
75
|
-
|
|
76
|
-
|
|
77
|
-
|
|
78
|
-
|
|
79
|
-
|
|
80
|
-
|
|
81
|
-
|
|
82
|
-
|
|
83
|
-
|
|
84
|
-
|
|
85
|
-
|
|
86
|
-
|
|
87
|
-
|
|
88
|
-
|
|
89
|
-
|
|
90
|
-
|
|
91
|
-
|
|
71
|
+
.pipe(
|
|
72
|
+
es
|
|
73
|
+
.mapSync(function (line) {
|
|
74
|
+
s.pause();
|
|
75
|
+
try {
|
|
76
|
+
var doc = typeof transform === 'function' ? transform(line) : line;
|
|
77
|
+
// if doc is undefined we'll skip indexing it
|
|
78
|
+
if (typeof doc === 'undefined') {
|
|
79
|
+
s.resume();
|
|
80
|
+
return;
|
|
81
|
+
}
|
|
82
|
+
|
|
83
|
+
// the transform callback may return an array of docs so we can emit
|
|
84
|
+
// multiple docs from a single line
|
|
85
|
+
if (Array.isArray(doc)) {
|
|
86
|
+
doc.forEach(function (d) { return indexer.add(d); });
|
|
87
|
+
return;
|
|
88
|
+
}
|
|
89
|
+
|
|
90
|
+
indexer.add(doc);
|
|
91
|
+
} catch (e) {
|
|
92
|
+
console.log('error', e);
|
|
93
|
+
}
|
|
94
|
+
})
|
|
95
|
+
.on('error', function (err) {
|
|
96
|
+
console.log('Error while reading file.', err);
|
|
97
|
+
})
|
|
98
|
+
.on('end', function () {
|
|
99
|
+
if (verbose) { console.log('Read entire file: ', file); }
|
|
100
|
+
indexer.finish();
|
|
101
|
+
if (files.length > 0) {
|
|
102
|
+
startIndex(files);
|
|
103
|
+
}
|
|
104
|
+
})
|
|
105
|
+
);
|
|
92
106
|
|
|
93
107
|
indexer.queueEmitter.on('resume', function () {
|
|
94
108
|
s.resume();
|
|
@@ -106,45 +120,66 @@ var EventEmitter = require('events');
|
|
|
106
120
|
|
|
107
121
|
var queueEmitter = new EventEmitter();
|
|
108
122
|
|
|
123
|
+
var parallelCalls = 1;
|
|
124
|
+
|
|
109
125
|
// a simple helper queue to bulk index documents
|
|
110
126
|
function indexQueueFactory(ref) {
|
|
111
127
|
var client = ref.targetClient;
|
|
112
128
|
var targetIndexName = ref.targetIndexName;
|
|
113
|
-
var bufferSize = ref.bufferSize; if ( bufferSize === void 0 ) bufferSize =
|
|
129
|
+
var bufferSize = ref.bufferSize; if ( bufferSize === void 0 ) bufferSize = DEFAULT_BUFFER_SIZE;
|
|
114
130
|
var skipHeader = ref.skipHeader; if ( skipHeader === void 0 ) skipHeader = false;
|
|
115
131
|
var verbose = ref.verbose; if ( verbose === void 0 ) verbose = true;
|
|
116
132
|
|
|
117
133
|
var buffer = [];
|
|
118
134
|
var queue = [];
|
|
119
|
-
var ingesting =
|
|
135
|
+
var ingesting = 0;
|
|
136
|
+
var ingestTimes = [];
|
|
120
137
|
|
|
121
|
-
var ingest =
|
|
138
|
+
var ingest = function (b) {
|
|
122
139
|
if (typeof b !== 'undefined') {
|
|
123
140
|
queue.push(b);
|
|
124
141
|
queueEmitter.emit('queue-size', queue.length);
|
|
125
142
|
}
|
|
126
143
|
|
|
127
|
-
if (
|
|
144
|
+
if (ingestTimes.length > 5) { ingestTimes = ingestTimes.slice(-5); }
|
|
145
|
+
|
|
146
|
+
if (ingesting < parallelCalls) {
|
|
128
147
|
var docs = queue.shift();
|
|
148
|
+
|
|
129
149
|
queueEmitter.emit('queue-size', queue.length);
|
|
130
|
-
|
|
131
|
-
|
|
150
|
+
if (queue.length <= 5) {
|
|
151
|
+
queueEmitter.emit('resume');
|
|
152
|
+
}
|
|
132
153
|
|
|
133
|
-
|
|
134
|
-
|
|
135
|
-
|
|
154
|
+
ingesting += 1;
|
|
155
|
+
|
|
156
|
+
if (verbose)
|
|
157
|
+
{ console.log(("bulk ingest docs: " + (docs.length / 2) + ", queue length: " + (queue.length))); }
|
|
158
|
+
|
|
159
|
+
var start = Date.now();
|
|
160
|
+
client.bulk({ body: docs }).then(function () {
|
|
161
|
+
var end = Date.now();
|
|
162
|
+
var delta = end - start;
|
|
163
|
+
ingestTimes.push(delta);
|
|
164
|
+
ingesting -= 1;
|
|
165
|
+
|
|
166
|
+
var ingestTimesMovingAverage =
|
|
167
|
+
ingestTimes.length > 0 ? ingestTimes.reduce(function (p, c) { return p + c; }, 0) / ingestTimes.length : 0;
|
|
168
|
+
var ingestTimesMovingAverageSeconds = Math.floor(ingestTimesMovingAverage / 1000);
|
|
169
|
+
|
|
170
|
+
if (ingestTimes.length > 0 && ingestTimesMovingAverageSeconds < 30 && parallelCalls < 10) {
|
|
171
|
+
parallelCalls += 1;
|
|
172
|
+
} else if (
|
|
173
|
+
ingestTimes.length > 0 &&
|
|
174
|
+
ingestTimesMovingAverageSeconds >= 30 &&
|
|
175
|
+
parallelCalls > 1
|
|
176
|
+
) {
|
|
177
|
+
parallelCalls -= 1;
|
|
178
|
+
}
|
|
136
179
|
if (queue.length > 0) {
|
|
137
180
|
ingest();
|
|
138
181
|
}
|
|
139
|
-
}
|
|
140
|
-
console.log('bulk index error', err);
|
|
141
|
-
}
|
|
142
|
-
}
|
|
143
|
-
|
|
144
|
-
// console.log(`ingest: queue.length ${queue.length}`);
|
|
145
|
-
if (queue.length === 0) {
|
|
146
|
-
queueEmitter.emit('queue-size', 0);
|
|
147
|
-
queueEmitter.emit('resume');
|
|
182
|
+
});
|
|
148
183
|
}
|
|
149
184
|
};
|
|
150
185
|
|
|
@@ -161,7 +196,7 @@ function indexQueueFactory(ref) {
|
|
|
161
196
|
queueEmitter.emit('resume');
|
|
162
197
|
}
|
|
163
198
|
|
|
164
|
-
if (buffer.length >=
|
|
199
|
+
if (buffer.length >= bufferSize * 2) {
|
|
165
200
|
ingest(buffer);
|
|
166
201
|
buffer = [];
|
|
167
202
|
}
|
|
@@ -175,12 +210,21 @@ function indexQueueFactory(ref) {
|
|
|
175
210
|
};
|
|
176
211
|
}
|
|
177
212
|
|
|
178
|
-
var MAX_QUEUE_SIZE =
|
|
213
|
+
var MAX_QUEUE_SIZE = 15;
|
|
179
214
|
|
|
180
215
|
// create a new progress bar instance and use shades_classic theme
|
|
181
216
|
var progressBar = new cliProgress.SingleBar({}, cliProgress.Presets.shades_classic);
|
|
182
217
|
|
|
183
|
-
function indexReaderFactory(
|
|
218
|
+
function indexReaderFactory(
|
|
219
|
+
indexer,
|
|
220
|
+
sourceIndexName,
|
|
221
|
+
transform,
|
|
222
|
+
client,
|
|
223
|
+
query,
|
|
224
|
+
bufferSize
|
|
225
|
+
) {
|
|
226
|
+
if ( bufferSize === void 0 ) bufferSize = DEFAULT_BUFFER_SIZE;
|
|
227
|
+
|
|
184
228
|
return async function indexReader() {
|
|
185
229
|
var responseQueue = [];
|
|
186
230
|
var docsNum = 0;
|
|
@@ -189,7 +233,7 @@ function indexReaderFactory(indexer, sourceIndexName, transform, client, query)
|
|
|
189
233
|
return client.search({
|
|
190
234
|
index: sourceIndexName,
|
|
191
235
|
scroll: '30s',
|
|
192
|
-
size:
|
|
236
|
+
size: bufferSize,
|
|
193
237
|
query: query,
|
|
194
238
|
});
|
|
195
239
|
}
|
|
@@ -210,7 +254,7 @@ function indexReaderFactory(indexer, sourceIndexName, transform, client, query)
|
|
|
210
254
|
function processHit(hit) {
|
|
211
255
|
docsNum += 1;
|
|
212
256
|
try {
|
|
213
|
-
var doc =
|
|
257
|
+
var doc = typeof transform === 'function' ? transform(hit._source) : hit._source; // eslint-disable-line no-underscore-dangle
|
|
214
258
|
// if doc is undefined we'll skip indexing it
|
|
215
259
|
if (typeof doc === 'undefined') {
|
|
216
260
|
return;
|
|
@@ -252,7 +296,7 @@ function indexReaderFactory(indexer, sourceIndexName, transform, client, query)
|
|
|
252
296
|
}
|
|
253
297
|
|
|
254
298
|
if (ingestQueueSize < MAX_QUEUE_SIZE) {
|
|
255
|
-
|
|
299
|
+
// get the next response if there are more docs to fetch
|
|
256
300
|
var sc = await scroll(response._scroll_id); // eslint-disable-line no-await-in-loop,no-underscore-dangle,max-len
|
|
257
301
|
scrollId = sc._scroll_id; // eslint-disable-line no-underscore-dangle
|
|
258
302
|
responseQueue.push(sc);
|
|
@@ -266,7 +310,7 @@ function indexReaderFactory(indexer, sourceIndexName, transform, client, query)
|
|
|
266
310
|
ingestQueueSize = size;
|
|
267
311
|
|
|
268
312
|
if (!readActive && ingestQueueSize < MAX_QUEUE_SIZE) {
|
|
269
|
-
|
|
313
|
+
// get the next response if there are more docs to fetch
|
|
270
314
|
var sc = await scroll(scrollId); // eslint-disable-line no-await-in-loop,no-underscore-dangle,max-len
|
|
271
315
|
scrollId = sc._scroll_id; // eslint-disable-line no-underscore-dangle
|
|
272
316
|
responseQueue.push(sc);
|
|
@@ -296,13 +340,14 @@ async function transformer(ref) {
|
|
|
296
340
|
var deleteIndex = ref.deleteIndex; if ( deleteIndex === void 0 ) deleteIndex = false;
|
|
297
341
|
var sourceClientConfig = ref.sourceClientConfig;
|
|
298
342
|
var targetClientConfig = ref.targetClientConfig;
|
|
299
|
-
var bufferSize = ref.bufferSize; if ( bufferSize === void 0 ) bufferSize =
|
|
343
|
+
var bufferSize = ref.bufferSize; if ( bufferSize === void 0 ) bufferSize = DEFAULT_BUFFER_SIZE;
|
|
300
344
|
var fileName = ref.fileName;
|
|
301
345
|
var splitRegex = ref.splitRegex; if ( splitRegex === void 0 ) splitRegex = /\n/;
|
|
302
346
|
var sourceIndexName = ref.sourceIndexName;
|
|
303
347
|
var targetIndexName = ref.targetIndexName;
|
|
304
348
|
var mappings = ref.mappings;
|
|
305
349
|
var mappingsOverride = ref.mappingsOverride; if ( mappingsOverride === void 0 ) mappingsOverride = false;
|
|
350
|
+
var indexMappingTotalFieldsLimit = ref.indexMappingTotalFieldsLimit;
|
|
306
351
|
var query = ref.query;
|
|
307
352
|
var skipHeader = ref.skipHeader; if ( skipHeader === void 0 ) skipHeader = false;
|
|
308
353
|
var transform = ref.transform;
|
|
@@ -328,6 +373,7 @@ async function transformer(ref) {
|
|
|
328
373
|
targetIndexName: targetIndexName,
|
|
329
374
|
mappings: mappings,
|
|
330
375
|
mappingsOverride: mappingsOverride,
|
|
376
|
+
indexMappingTotalFieldsLimit: indexMappingTotalFieldsLimit,
|
|
331
377
|
verbose: verbose,
|
|
332
378
|
});
|
|
333
379
|
var indexer = indexQueueFactory({
|
|
@@ -339,30 +385,16 @@ async function transformer(ref) {
|
|
|
339
385
|
});
|
|
340
386
|
|
|
341
387
|
function getReader() {
|
|
342
|
-
if (
|
|
343
|
-
|
|
344
|
-
&& typeof sourceIndexName !== 'undefined'
|
|
345
|
-
) {
|
|
346
|
-
throw Error(
|
|
347
|
-
'Only either one of fileName or sourceIndexName can be specified.'
|
|
348
|
-
);
|
|
388
|
+
if (typeof fileName !== 'undefined' && typeof sourceIndexName !== 'undefined') {
|
|
389
|
+
throw Error('Only either one of fileName or sourceIndexName can be specified.');
|
|
349
390
|
}
|
|
350
391
|
|
|
351
|
-
if (
|
|
352
|
-
typeof fileName === 'undefined'
|
|
353
|
-
&& typeof sourceIndexName === 'undefined'
|
|
354
|
-
) {
|
|
392
|
+
if (typeof fileName === 'undefined' && typeof sourceIndexName === 'undefined') {
|
|
355
393
|
throw Error('Either fileName or sourceIndexName must be specified.');
|
|
356
394
|
}
|
|
357
395
|
|
|
358
396
|
if (typeof fileName !== 'undefined') {
|
|
359
|
-
return fileReaderFactory(
|
|
360
|
-
indexer,
|
|
361
|
-
fileName,
|
|
362
|
-
transform,
|
|
363
|
-
splitRegex,
|
|
364
|
-
verbose
|
|
365
|
-
);
|
|
397
|
+
return fileReaderFactory(indexer, fileName, transform, splitRegex, verbose);
|
|
366
398
|
}
|
|
367
399
|
|
|
368
400
|
if (typeof sourceIndexName !== 'undefined') {
|
|
@@ -371,7 +403,8 @@ async function transformer(ref) {
|
|
|
371
403
|
sourceIndexName,
|
|
372
404
|
transform,
|
|
373
405
|
sourceClient,
|
|
374
|
-
query
|
|
406
|
+
query,
|
|
407
|
+
bufferSize
|
|
375
408
|
);
|
|
376
409
|
}
|
|
377
410
|
|
|
@@ -4,6 +4,8 @@ import glob from 'glob';
|
|
|
4
4
|
import cliProgress from 'cli-progress';
|
|
5
5
|
import elasticsearch from '@elastic/elasticsearch';
|
|
6
6
|
|
|
7
|
+
var DEFAULT_BUFFER_SIZE = 1000;
|
|
8
|
+
|
|
7
9
|
function createMappingFactory(ref) {
|
|
8
10
|
var sourceClient = ref.sourceClient;
|
|
9
11
|
var sourceIndexName = ref.sourceIndexName;
|
|
@@ -11,6 +13,7 @@ function createMappingFactory(ref) {
|
|
|
11
13
|
var targetIndexName = ref.targetIndexName;
|
|
12
14
|
var mappings = ref.mappings;
|
|
13
15
|
var mappingsOverride = ref.mappingsOverride;
|
|
16
|
+
var indexMappingTotalFieldsLimit = ref.indexMappingTotalFieldsLimit;
|
|
14
17
|
var verbose = ref.verbose;
|
|
15
18
|
|
|
16
19
|
return async function () {
|
|
@@ -18,7 +21,9 @@ function createMappingFactory(ref) {
|
|
|
18
21
|
|
|
19
22
|
if (sourceClient && sourceIndexName && typeof targetMappings === 'undefined') {
|
|
20
23
|
try {
|
|
21
|
-
var mapping = await sourceClient.indices.getMapping({
|
|
24
|
+
var mapping = await sourceClient.indices.getMapping({
|
|
25
|
+
index: sourceIndexName,
|
|
26
|
+
});
|
|
22
27
|
targetMappings = mapping[sourceIndexName].mappings;
|
|
23
28
|
} catch (err) {
|
|
24
29
|
console.log('Error reading source mapping', err);
|
|
@@ -34,12 +39,17 @@ function createMappingFactory(ref) {
|
|
|
34
39
|
}
|
|
35
40
|
|
|
36
41
|
try {
|
|
37
|
-
var resp = await targetClient.indices.create(
|
|
38
|
-
|
|
39
|
-
|
|
40
|
-
|
|
41
|
-
|
|
42
|
-
|
|
42
|
+
var resp = await targetClient.indices.create({
|
|
43
|
+
index: targetIndexName,
|
|
44
|
+
body: Object.assign({}, {mappings: targetMappings},
|
|
45
|
+
(indexMappingTotalFieldsLimit !== undefined
|
|
46
|
+
? {
|
|
47
|
+
settings: {
|
|
48
|
+
'index.mapping.total_fields.limit': indexMappingTotalFieldsLimit,
|
|
49
|
+
},
|
|
50
|
+
}
|
|
51
|
+
: {})),
|
|
52
|
+
});
|
|
43
53
|
if (verbose) { console.log('Created target mapping', resp); }
|
|
44
54
|
} catch (err) {
|
|
45
55
|
console.log('Error creating target mapping', err);
|
|
@@ -51,40 +61,44 @@ function createMappingFactory(ref) {
|
|
|
51
61
|
function fileReaderFactory(indexer, fileName, transform, splitRegex, verbose) {
|
|
52
62
|
function startIndex(files) {
|
|
53
63
|
var file = files.shift();
|
|
54
|
-
var s = fs
|
|
64
|
+
var s = fs
|
|
65
|
+
.createReadStream(file)
|
|
55
66
|
.pipe(es.split(splitRegex))
|
|
56
|
-
.pipe(
|
|
57
|
-
|
|
58
|
-
|
|
59
|
-
|
|
60
|
-
|
|
61
|
-
|
|
62
|
-
|
|
63
|
-
|
|
64
|
-
|
|
65
|
-
|
|
66
|
-
|
|
67
|
-
|
|
68
|
-
|
|
69
|
-
|
|
70
|
-
|
|
71
|
-
|
|
72
|
-
|
|
73
|
-
|
|
74
|
-
|
|
75
|
-
|
|
76
|
-
|
|
77
|
-
|
|
78
|
-
|
|
79
|
-
|
|
80
|
-
|
|
81
|
-
|
|
82
|
-
|
|
83
|
-
|
|
84
|
-
|
|
85
|
-
|
|
86
|
-
|
|
87
|
-
|
|
67
|
+
.pipe(
|
|
68
|
+
es
|
|
69
|
+
.mapSync(function (line) {
|
|
70
|
+
s.pause();
|
|
71
|
+
try {
|
|
72
|
+
var doc = typeof transform === 'function' ? transform(line) : line;
|
|
73
|
+
// if doc is undefined we'll skip indexing it
|
|
74
|
+
if (typeof doc === 'undefined') {
|
|
75
|
+
s.resume();
|
|
76
|
+
return;
|
|
77
|
+
}
|
|
78
|
+
|
|
79
|
+
// the transform callback may return an array of docs so we can emit
|
|
80
|
+
// multiple docs from a single line
|
|
81
|
+
if (Array.isArray(doc)) {
|
|
82
|
+
doc.forEach(function (d) { return indexer.add(d); });
|
|
83
|
+
return;
|
|
84
|
+
}
|
|
85
|
+
|
|
86
|
+
indexer.add(doc);
|
|
87
|
+
} catch (e) {
|
|
88
|
+
console.log('error', e);
|
|
89
|
+
}
|
|
90
|
+
})
|
|
91
|
+
.on('error', function (err) {
|
|
92
|
+
console.log('Error while reading file.', err);
|
|
93
|
+
})
|
|
94
|
+
.on('end', function () {
|
|
95
|
+
if (verbose) { console.log('Read entire file: ', file); }
|
|
96
|
+
indexer.finish();
|
|
97
|
+
if (files.length > 0) {
|
|
98
|
+
startIndex(files);
|
|
99
|
+
}
|
|
100
|
+
})
|
|
101
|
+
);
|
|
88
102
|
|
|
89
103
|
indexer.queueEmitter.on('resume', function () {
|
|
90
104
|
s.resume();
|
|
@@ -102,45 +116,66 @@ var EventEmitter = require('events');
|
|
|
102
116
|
|
|
103
117
|
var queueEmitter = new EventEmitter();
|
|
104
118
|
|
|
119
|
+
var parallelCalls = 1;
|
|
120
|
+
|
|
105
121
|
// a simple helper queue to bulk index documents
|
|
106
122
|
function indexQueueFactory(ref) {
|
|
107
123
|
var client = ref.targetClient;
|
|
108
124
|
var targetIndexName = ref.targetIndexName;
|
|
109
|
-
var bufferSize = ref.bufferSize; if ( bufferSize === void 0 ) bufferSize =
|
|
125
|
+
var bufferSize = ref.bufferSize; if ( bufferSize === void 0 ) bufferSize = DEFAULT_BUFFER_SIZE;
|
|
110
126
|
var skipHeader = ref.skipHeader; if ( skipHeader === void 0 ) skipHeader = false;
|
|
111
127
|
var verbose = ref.verbose; if ( verbose === void 0 ) verbose = true;
|
|
112
128
|
|
|
113
129
|
var buffer = [];
|
|
114
130
|
var queue = [];
|
|
115
|
-
var ingesting =
|
|
131
|
+
var ingesting = 0;
|
|
132
|
+
var ingestTimes = [];
|
|
116
133
|
|
|
117
|
-
var ingest =
|
|
134
|
+
var ingest = function (b) {
|
|
118
135
|
if (typeof b !== 'undefined') {
|
|
119
136
|
queue.push(b);
|
|
120
137
|
queueEmitter.emit('queue-size', queue.length);
|
|
121
138
|
}
|
|
122
139
|
|
|
123
|
-
if (
|
|
140
|
+
if (ingestTimes.length > 5) { ingestTimes = ingestTimes.slice(-5); }
|
|
141
|
+
|
|
142
|
+
if (ingesting < parallelCalls) {
|
|
124
143
|
var docs = queue.shift();
|
|
144
|
+
|
|
125
145
|
queueEmitter.emit('queue-size', queue.length);
|
|
126
|
-
|
|
127
|
-
|
|
146
|
+
if (queue.length <= 5) {
|
|
147
|
+
queueEmitter.emit('resume');
|
|
148
|
+
}
|
|
128
149
|
|
|
129
|
-
|
|
130
|
-
|
|
131
|
-
|
|
150
|
+
ingesting += 1;
|
|
151
|
+
|
|
152
|
+
if (verbose)
|
|
153
|
+
{ console.log(("bulk ingest docs: " + (docs.length / 2) + ", queue length: " + (queue.length))); }
|
|
154
|
+
|
|
155
|
+
var start = Date.now();
|
|
156
|
+
client.bulk({ body: docs }).then(function () {
|
|
157
|
+
var end = Date.now();
|
|
158
|
+
var delta = end - start;
|
|
159
|
+
ingestTimes.push(delta);
|
|
160
|
+
ingesting -= 1;
|
|
161
|
+
|
|
162
|
+
var ingestTimesMovingAverage =
|
|
163
|
+
ingestTimes.length > 0 ? ingestTimes.reduce(function (p, c) { return p + c; }, 0) / ingestTimes.length : 0;
|
|
164
|
+
var ingestTimesMovingAverageSeconds = Math.floor(ingestTimesMovingAverage / 1000);
|
|
165
|
+
|
|
166
|
+
if (ingestTimes.length > 0 && ingestTimesMovingAverageSeconds < 30 && parallelCalls < 10) {
|
|
167
|
+
parallelCalls += 1;
|
|
168
|
+
} else if (
|
|
169
|
+
ingestTimes.length > 0 &&
|
|
170
|
+
ingestTimesMovingAverageSeconds >= 30 &&
|
|
171
|
+
parallelCalls > 1
|
|
172
|
+
) {
|
|
173
|
+
parallelCalls -= 1;
|
|
174
|
+
}
|
|
132
175
|
if (queue.length > 0) {
|
|
133
176
|
ingest();
|
|
134
177
|
}
|
|
135
|
-
}
|
|
136
|
-
console.log('bulk index error', err);
|
|
137
|
-
}
|
|
138
|
-
}
|
|
139
|
-
|
|
140
|
-
// console.log(`ingest: queue.length ${queue.length}`);
|
|
141
|
-
if (queue.length === 0) {
|
|
142
|
-
queueEmitter.emit('queue-size', 0);
|
|
143
|
-
queueEmitter.emit('resume');
|
|
178
|
+
});
|
|
144
179
|
}
|
|
145
180
|
};
|
|
146
181
|
|
|
@@ -157,7 +192,7 @@ function indexQueueFactory(ref) {
|
|
|
157
192
|
queueEmitter.emit('resume');
|
|
158
193
|
}
|
|
159
194
|
|
|
160
|
-
if (buffer.length >=
|
|
195
|
+
if (buffer.length >= bufferSize * 2) {
|
|
161
196
|
ingest(buffer);
|
|
162
197
|
buffer = [];
|
|
163
198
|
}
|
|
@@ -171,12 +206,21 @@ function indexQueueFactory(ref) {
|
|
|
171
206
|
};
|
|
172
207
|
}
|
|
173
208
|
|
|
174
|
-
var MAX_QUEUE_SIZE =
|
|
209
|
+
var MAX_QUEUE_SIZE = 15;
|
|
175
210
|
|
|
176
211
|
// create a new progress bar instance and use shades_classic theme
|
|
177
212
|
var progressBar = new cliProgress.SingleBar({}, cliProgress.Presets.shades_classic);
|
|
178
213
|
|
|
179
|
-
function indexReaderFactory(
|
|
214
|
+
function indexReaderFactory(
|
|
215
|
+
indexer,
|
|
216
|
+
sourceIndexName,
|
|
217
|
+
transform,
|
|
218
|
+
client,
|
|
219
|
+
query,
|
|
220
|
+
bufferSize
|
|
221
|
+
) {
|
|
222
|
+
if ( bufferSize === void 0 ) bufferSize = DEFAULT_BUFFER_SIZE;
|
|
223
|
+
|
|
180
224
|
return async function indexReader() {
|
|
181
225
|
var responseQueue = [];
|
|
182
226
|
var docsNum = 0;
|
|
@@ -185,7 +229,7 @@ function indexReaderFactory(indexer, sourceIndexName, transform, client, query)
|
|
|
185
229
|
return client.search({
|
|
186
230
|
index: sourceIndexName,
|
|
187
231
|
scroll: '30s',
|
|
188
|
-
size:
|
|
232
|
+
size: bufferSize,
|
|
189
233
|
query: query,
|
|
190
234
|
});
|
|
191
235
|
}
|
|
@@ -206,7 +250,7 @@ function indexReaderFactory(indexer, sourceIndexName, transform, client, query)
|
|
|
206
250
|
function processHit(hit) {
|
|
207
251
|
docsNum += 1;
|
|
208
252
|
try {
|
|
209
|
-
var doc =
|
|
253
|
+
var doc = typeof transform === 'function' ? transform(hit._source) : hit._source; // eslint-disable-line no-underscore-dangle
|
|
210
254
|
// if doc is undefined we'll skip indexing it
|
|
211
255
|
if (typeof doc === 'undefined') {
|
|
212
256
|
return;
|
|
@@ -248,7 +292,7 @@ function indexReaderFactory(indexer, sourceIndexName, transform, client, query)
|
|
|
248
292
|
}
|
|
249
293
|
|
|
250
294
|
if (ingestQueueSize < MAX_QUEUE_SIZE) {
|
|
251
|
-
|
|
295
|
+
// get the next response if there are more docs to fetch
|
|
252
296
|
var sc = await scroll(response._scroll_id); // eslint-disable-line no-await-in-loop,no-underscore-dangle,max-len
|
|
253
297
|
scrollId = sc._scroll_id; // eslint-disable-line no-underscore-dangle
|
|
254
298
|
responseQueue.push(sc);
|
|
@@ -262,7 +306,7 @@ function indexReaderFactory(indexer, sourceIndexName, transform, client, query)
|
|
|
262
306
|
ingestQueueSize = size;
|
|
263
307
|
|
|
264
308
|
if (!readActive && ingestQueueSize < MAX_QUEUE_SIZE) {
|
|
265
|
-
|
|
309
|
+
// get the next response if there are more docs to fetch
|
|
266
310
|
var sc = await scroll(scrollId); // eslint-disable-line no-await-in-loop,no-underscore-dangle,max-len
|
|
267
311
|
scrollId = sc._scroll_id; // eslint-disable-line no-underscore-dangle
|
|
268
312
|
responseQueue.push(sc);
|
|
@@ -292,13 +336,14 @@ async function transformer(ref) {
|
|
|
292
336
|
var deleteIndex = ref.deleteIndex; if ( deleteIndex === void 0 ) deleteIndex = false;
|
|
293
337
|
var sourceClientConfig = ref.sourceClientConfig;
|
|
294
338
|
var targetClientConfig = ref.targetClientConfig;
|
|
295
|
-
var bufferSize = ref.bufferSize; if ( bufferSize === void 0 ) bufferSize =
|
|
339
|
+
var bufferSize = ref.bufferSize; if ( bufferSize === void 0 ) bufferSize = DEFAULT_BUFFER_SIZE;
|
|
296
340
|
var fileName = ref.fileName;
|
|
297
341
|
var splitRegex = ref.splitRegex; if ( splitRegex === void 0 ) splitRegex = /\n/;
|
|
298
342
|
var sourceIndexName = ref.sourceIndexName;
|
|
299
343
|
var targetIndexName = ref.targetIndexName;
|
|
300
344
|
var mappings = ref.mappings;
|
|
301
345
|
var mappingsOverride = ref.mappingsOverride; if ( mappingsOverride === void 0 ) mappingsOverride = false;
|
|
346
|
+
var indexMappingTotalFieldsLimit = ref.indexMappingTotalFieldsLimit;
|
|
302
347
|
var query = ref.query;
|
|
303
348
|
var skipHeader = ref.skipHeader; if ( skipHeader === void 0 ) skipHeader = false;
|
|
304
349
|
var transform = ref.transform;
|
|
@@ -324,6 +369,7 @@ async function transformer(ref) {
|
|
|
324
369
|
targetIndexName: targetIndexName,
|
|
325
370
|
mappings: mappings,
|
|
326
371
|
mappingsOverride: mappingsOverride,
|
|
372
|
+
indexMappingTotalFieldsLimit: indexMappingTotalFieldsLimit,
|
|
327
373
|
verbose: verbose,
|
|
328
374
|
});
|
|
329
375
|
var indexer = indexQueueFactory({
|
|
@@ -335,30 +381,16 @@ async function transformer(ref) {
|
|
|
335
381
|
});
|
|
336
382
|
|
|
337
383
|
function getReader() {
|
|
338
|
-
if (
|
|
339
|
-
|
|
340
|
-
&& typeof sourceIndexName !== 'undefined'
|
|
341
|
-
) {
|
|
342
|
-
throw Error(
|
|
343
|
-
'Only either one of fileName or sourceIndexName can be specified.'
|
|
344
|
-
);
|
|
384
|
+
if (typeof fileName !== 'undefined' && typeof sourceIndexName !== 'undefined') {
|
|
385
|
+
throw Error('Only either one of fileName or sourceIndexName can be specified.');
|
|
345
386
|
}
|
|
346
387
|
|
|
347
|
-
if (
|
|
348
|
-
typeof fileName === 'undefined'
|
|
349
|
-
&& typeof sourceIndexName === 'undefined'
|
|
350
|
-
) {
|
|
388
|
+
if (typeof fileName === 'undefined' && typeof sourceIndexName === 'undefined') {
|
|
351
389
|
throw Error('Either fileName or sourceIndexName must be specified.');
|
|
352
390
|
}
|
|
353
391
|
|
|
354
392
|
if (typeof fileName !== 'undefined') {
|
|
355
|
-
return fileReaderFactory(
|
|
356
|
-
indexer,
|
|
357
|
-
fileName,
|
|
358
|
-
transform,
|
|
359
|
-
splitRegex,
|
|
360
|
-
verbose
|
|
361
|
-
);
|
|
393
|
+
return fileReaderFactory(indexer, fileName, transform, splitRegex, verbose);
|
|
362
394
|
}
|
|
363
395
|
|
|
364
396
|
if (typeof sourceIndexName !== 'undefined') {
|
|
@@ -367,7 +399,8 @@ async function transformer(ref) {
|
|
|
367
399
|
sourceIndexName,
|
|
368
400
|
transform,
|
|
369
401
|
sourceClient,
|
|
370
|
-
query
|
|
402
|
+
query,
|
|
403
|
+
bufferSize
|
|
371
404
|
);
|
|
372
405
|
}
|
|
373
406
|
|
package/package.json
CHANGED
|
@@ -14,7 +14,7 @@
|
|
|
14
14
|
"license": "Apache-2.0",
|
|
15
15
|
"author": "Walter Rafelsberger <walter@rafelsberger.at>",
|
|
16
16
|
"contributors": [],
|
|
17
|
-
"version": "1.0.0-
|
|
17
|
+
"version": "1.0.0-alpha12",
|
|
18
18
|
"main": "dist/node-es-transformer.cjs.js",
|
|
19
19
|
"module": "dist/node-es-transformer.esm.js",
|
|
20
20
|
"dependencies": {
|
|
@@ -25,11 +25,16 @@
|
|
|
25
25
|
},
|
|
26
26
|
"devDependencies": {
|
|
27
27
|
"acorn": "^6.4.2",
|
|
28
|
+
"commit-and-tag-version": "^11.3.0",
|
|
29
|
+
"cz-conventional-changelog": "^3.3.0",
|
|
28
30
|
"eslint": "8.2.0",
|
|
29
31
|
"eslint-config-airbnb": "19.0.4",
|
|
32
|
+
"eslint-config-prettier": "^9.0.0",
|
|
30
33
|
"eslint-plugin-import": "2.27.5",
|
|
31
34
|
"eslint-plugin-jsx-a11y": "6.7.1",
|
|
35
|
+
"eslint-plugin-prettier": "^3.3.1",
|
|
32
36
|
"eslint-plugin-react": "7.32.2",
|
|
37
|
+
"prettier": "^2.2.1",
|
|
33
38
|
"rollup": "0.66.6",
|
|
34
39
|
"rollup-plugin-buble": "0.19.6",
|
|
35
40
|
"rollup-plugin-commonjs": "8.0.2",
|
|
@@ -39,9 +44,15 @@
|
|
|
39
44
|
"build": "rollup -c",
|
|
40
45
|
"dev": "rollup -c -w",
|
|
41
46
|
"test": "node test/test.js",
|
|
42
|
-
"pretest": "npm run build"
|
|
47
|
+
"pretest": "npm run build",
|
|
48
|
+
"release": "commit-and-tag-version"
|
|
43
49
|
},
|
|
44
50
|
"files": [
|
|
45
51
|
"dist"
|
|
46
|
-
]
|
|
52
|
+
],
|
|
53
|
+
"config": {
|
|
54
|
+
"commitizen": {
|
|
55
|
+
"path": "./node_modules/cz-conventional-changelog"
|
|
56
|
+
}
|
|
57
|
+
}
|
|
47
58
|
}
|