node-es-transformer 1.0.0-alpha10 → 1.0.0-alpha11
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/CHANGELOG.md +32 -0
- package/README.md +2 -0
- package/dist/node-es-transformer.cjs.js +81 -68
- package/dist/node-es-transformer.esm.js +81 -68
- package/package.json +14 -3
package/CHANGELOG.md
ADDED
|
@@ -0,0 +1,32 @@
|
|
|
1
|
+
# Changelog
|
|
2
|
+
|
|
3
|
+
All notable changes to this project will be documented in this file. See [commit-and-tag-version](https://github.com/absolute-version/commit-and-tag-version) for commit guidelines.
|
|
4
|
+
|
|
5
|
+
## [1.0.0-alpha11](https://github.com/walterra/node-es-transformer/compare/v1.0.0-alpha10...v1.0.0-alpha11) (2023-10-12)
|
|
6
|
+
|
|
7
|
+
### Features
|
|
8
|
+
|
|
9
|
+
- new option 'indexMappingTotalFieldsLimit' ([92edad1](https://github.com/walterra/node-es-transformer/commit/92edad18da7186d3881fc181e6e88b7929bed2d4))
|
|
10
|
+
|
|
11
|
+
### Bug Fixes
|
|
12
|
+
|
|
13
|
+
- fixes bufferSize to be applied to index reader too ([ffc3749](https://github.com/walterra/node-es-transformer/commit/ffc3749e296cd39f39924571c197986addc756ff))
|
|
14
|
+
|
|
15
|
+
## [`v1.0.0-alpha10`](https://github.com/walterra/node-es-transformer/releases/tag/v1.0.0-alpha10)
|
|
16
|
+
|
|
17
|
+
- New option `mappingsOverride` (0b951e1).
|
|
18
|
+
- New option `query` (45f91db).
|
|
19
|
+
|
|
20
|
+
## [`v1.0.0-alpha9`](https://github.com/walterra/node-es-transformer/releases/tag/v1.0.0-alpha9)
|
|
21
|
+
|
|
22
|
+
- Source and target configs are now expected to be passed in as complete client configs instead of individual parameters (5e6d0c7).
|
|
23
|
+
|
|
24
|
+
## [`v1.0.0-alpha8`](https://github.com/walterra/node-es-transformer/releases/tag/v1.0.0-alpha8)
|
|
25
|
+
|
|
26
|
+
- Exposes events and introduces `finish` event (a3e5810).
|
|
27
|
+
- Drop support for `_type` from `6.x` indices (3a26a84).
|
|
28
|
+
|
|
29
|
+
## [`v1.0.0-alpha7`](https://github.com/walterra/node-es-transformer/releases/tag/v1.0.0-alpha7)
|
|
30
|
+
|
|
31
|
+
- This version locks down `event-stream` to version `3.3.4` because of the security issue described here: https://github.com/dominictarr/event-stream/issues/116
|
|
32
|
+
- Last version to support `_type` from `6.x` indices.
|
package/README.md
CHANGED
|
@@ -1,6 +1,7 @@
|
|
|
1
1
|
[](https://www.npmjs.com/package/node-es-transformer)
|
|
2
2
|
[](https://www.npmjs.com/package/node-es-transformer)
|
|
3
3
|
[](https://www.npmjs.com/package/node-es-transformer)
|
|
4
|
+
[](http://commitizen.github.io/cz-cli/)
|
|
4
5
|
|
|
5
6
|
# node-es-transformer
|
|
6
7
|
|
|
@@ -115,6 +116,7 @@ transformer({
|
|
|
115
116
|
- `targetIndexName`: The target Elasticsearch index where documents will be indexed.
|
|
116
117
|
- `mappings`: Optional Elasticsearch document mappings. If not set and you're reindexing from another index, the mappings from the existing index will be used.
|
|
117
118
|
- `mappingsOverride`: If you're reindexing and this is set to `true`, `mappings` will be applied on top of the source index's mappings. Defaults to `false`.
|
|
119
|
+
- `indexMappingTotalFieldsLimit`: Optional field limit for the target index to be created that will be passed on as the `index.mapping.total_fields.limit` setting.
|
|
118
120
|
- `query`: Optional Elasticsearch [DSL query](https://www.elastic.co/guide/en/elasticsearch/reference/current/query-dsl.html) to filter documents from the source index.
|
|
119
121
|
- `skipHeader`: If true, skips the first line of the source file. Defaults to `false`.
|
|
120
122
|
- `transform(line)`: A callback function which allows the transformation of a source line into one or several documents.
|
|
@@ -8,6 +8,8 @@ var glob = _interopDefault(require('glob'));
|
|
|
8
8
|
var cliProgress = _interopDefault(require('cli-progress'));
|
|
9
9
|
var elasticsearch = _interopDefault(require('@elastic/elasticsearch'));
|
|
10
10
|
|
|
11
|
+
var DEFAULT_BUFFER_SIZE = 1000;
|
|
12
|
+
|
|
11
13
|
function createMappingFactory(ref) {
|
|
12
14
|
var sourceClient = ref.sourceClient;
|
|
13
15
|
var sourceIndexName = ref.sourceIndexName;
|
|
@@ -15,6 +17,7 @@ function createMappingFactory(ref) {
|
|
|
15
17
|
var targetIndexName = ref.targetIndexName;
|
|
16
18
|
var mappings = ref.mappings;
|
|
17
19
|
var mappingsOverride = ref.mappingsOverride;
|
|
20
|
+
var indexMappingTotalFieldsLimit = ref.indexMappingTotalFieldsLimit;
|
|
18
21
|
var verbose = ref.verbose;
|
|
19
22
|
|
|
20
23
|
return async function () {
|
|
@@ -22,7 +25,9 @@ function createMappingFactory(ref) {
|
|
|
22
25
|
|
|
23
26
|
if (sourceClient && sourceIndexName && typeof targetMappings === 'undefined') {
|
|
24
27
|
try {
|
|
25
|
-
var mapping = await sourceClient.indices.getMapping({
|
|
28
|
+
var mapping = await sourceClient.indices.getMapping({
|
|
29
|
+
index: sourceIndexName,
|
|
30
|
+
});
|
|
26
31
|
targetMappings = mapping[sourceIndexName].mappings;
|
|
27
32
|
} catch (err) {
|
|
28
33
|
console.log('Error reading source mapping', err);
|
|
@@ -38,12 +43,17 @@ function createMappingFactory(ref) {
|
|
|
38
43
|
}
|
|
39
44
|
|
|
40
45
|
try {
|
|
41
|
-
var resp = await targetClient.indices.create(
|
|
42
|
-
|
|
43
|
-
|
|
44
|
-
|
|
45
|
-
|
|
46
|
-
|
|
46
|
+
var resp = await targetClient.indices.create({
|
|
47
|
+
index: targetIndexName,
|
|
48
|
+
body: Object.assign({}, {mappings: targetMappings},
|
|
49
|
+
(indexMappingTotalFieldsLimit !== undefined
|
|
50
|
+
? {
|
|
51
|
+
settings: {
|
|
52
|
+
'index.mapping.total_fields.limit': indexMappingTotalFieldsLimit,
|
|
53
|
+
},
|
|
54
|
+
}
|
|
55
|
+
: {})),
|
|
56
|
+
});
|
|
47
57
|
if (verbose) { console.log('Created target mapping', resp); }
|
|
48
58
|
} catch (err) {
|
|
49
59
|
console.log('Error creating target mapping', err);
|
|
@@ -55,40 +65,44 @@ function createMappingFactory(ref) {
|
|
|
55
65
|
function fileReaderFactory(indexer, fileName, transform, splitRegex, verbose) {
|
|
56
66
|
function startIndex(files) {
|
|
57
67
|
var file = files.shift();
|
|
58
|
-
var s = fs
|
|
68
|
+
var s = fs
|
|
69
|
+
.createReadStream(file)
|
|
59
70
|
.pipe(es.split(splitRegex))
|
|
60
|
-
.pipe(
|
|
61
|
-
|
|
62
|
-
|
|
63
|
-
|
|
64
|
-
|
|
65
|
-
|
|
66
|
-
|
|
67
|
-
|
|
68
|
-
|
|
69
|
-
|
|
70
|
-
|
|
71
|
-
|
|
72
|
-
|
|
73
|
-
|
|
74
|
-
|
|
75
|
-
|
|
76
|
-
|
|
77
|
-
|
|
78
|
-
|
|
79
|
-
|
|
80
|
-
|
|
81
|
-
|
|
82
|
-
|
|
83
|
-
|
|
84
|
-
|
|
85
|
-
|
|
86
|
-
|
|
87
|
-
|
|
88
|
-
|
|
89
|
-
|
|
90
|
-
|
|
91
|
-
|
|
71
|
+
.pipe(
|
|
72
|
+
es
|
|
73
|
+
.mapSync(function (line) {
|
|
74
|
+
s.pause();
|
|
75
|
+
try {
|
|
76
|
+
var doc = typeof transform === 'function' ? transform(line) : line;
|
|
77
|
+
// if doc is undefined we'll skip indexing it
|
|
78
|
+
if (typeof doc === 'undefined') {
|
|
79
|
+
s.resume();
|
|
80
|
+
return;
|
|
81
|
+
}
|
|
82
|
+
|
|
83
|
+
// the transform callback may return an array of docs so we can emit
|
|
84
|
+
// multiple docs from a single line
|
|
85
|
+
if (Array.isArray(doc)) {
|
|
86
|
+
doc.forEach(function (d) { return indexer.add(d); });
|
|
87
|
+
return;
|
|
88
|
+
}
|
|
89
|
+
|
|
90
|
+
indexer.add(doc);
|
|
91
|
+
} catch (e) {
|
|
92
|
+
console.log('error', e);
|
|
93
|
+
}
|
|
94
|
+
})
|
|
95
|
+
.on('error', function (err) {
|
|
96
|
+
console.log('Error while reading file.', err);
|
|
97
|
+
})
|
|
98
|
+
.on('end', function () {
|
|
99
|
+
if (verbose) { console.log('Read entire file: ', file); }
|
|
100
|
+
indexer.finish();
|
|
101
|
+
if (files.length > 0) {
|
|
102
|
+
startIndex(files);
|
|
103
|
+
}
|
|
104
|
+
})
|
|
105
|
+
);
|
|
92
106
|
|
|
93
107
|
indexer.queueEmitter.on('resume', function () {
|
|
94
108
|
s.resume();
|
|
@@ -110,7 +124,7 @@ var queueEmitter = new EventEmitter();
|
|
|
110
124
|
function indexQueueFactory(ref) {
|
|
111
125
|
var client = ref.targetClient;
|
|
112
126
|
var targetIndexName = ref.targetIndexName;
|
|
113
|
-
var bufferSize = ref.bufferSize; if ( bufferSize === void 0 ) bufferSize =
|
|
127
|
+
var bufferSize = ref.bufferSize; if ( bufferSize === void 0 ) bufferSize = DEFAULT_BUFFER_SIZE;
|
|
114
128
|
var skipHeader = ref.skipHeader; if ( skipHeader === void 0 ) skipHeader = false;
|
|
115
129
|
var verbose = ref.verbose; if ( verbose === void 0 ) verbose = true;
|
|
116
130
|
|
|
@@ -128,7 +142,8 @@ function indexQueueFactory(ref) {
|
|
|
128
142
|
var docs = queue.shift();
|
|
129
143
|
queueEmitter.emit('queue-size', queue.length);
|
|
130
144
|
ingesting = true;
|
|
131
|
-
if (verbose)
|
|
145
|
+
if (verbose)
|
|
146
|
+
{ console.log(("bulk ingest docs: " + (docs.length / 2) + ", queue length: " + (queue.length))); }
|
|
132
147
|
|
|
133
148
|
try {
|
|
134
149
|
await client.bulk({ body: docs });
|
|
@@ -161,7 +176,7 @@ function indexQueueFactory(ref) {
|
|
|
161
176
|
queueEmitter.emit('resume');
|
|
162
177
|
}
|
|
163
178
|
|
|
164
|
-
if (buffer.length >=
|
|
179
|
+
if (buffer.length >= bufferSize * 2) {
|
|
165
180
|
ingest(buffer);
|
|
166
181
|
buffer = [];
|
|
167
182
|
}
|
|
@@ -180,7 +195,16 @@ var MAX_QUEUE_SIZE = 5;
|
|
|
180
195
|
// create a new progress bar instance and use shades_classic theme
|
|
181
196
|
var progressBar = new cliProgress.SingleBar({}, cliProgress.Presets.shades_classic);
|
|
182
197
|
|
|
183
|
-
function indexReaderFactory(
|
|
198
|
+
function indexReaderFactory(
|
|
199
|
+
indexer,
|
|
200
|
+
sourceIndexName,
|
|
201
|
+
transform,
|
|
202
|
+
client,
|
|
203
|
+
query,
|
|
204
|
+
bufferSize
|
|
205
|
+
) {
|
|
206
|
+
if ( bufferSize === void 0 ) bufferSize = DEFAULT_BUFFER_SIZE;
|
|
207
|
+
|
|
184
208
|
return async function indexReader() {
|
|
185
209
|
var responseQueue = [];
|
|
186
210
|
var docsNum = 0;
|
|
@@ -189,7 +213,7 @@ function indexReaderFactory(indexer, sourceIndexName, transform, client, query)
|
|
|
189
213
|
return client.search({
|
|
190
214
|
index: sourceIndexName,
|
|
191
215
|
scroll: '30s',
|
|
192
|
-
size:
|
|
216
|
+
size: bufferSize,
|
|
193
217
|
query: query,
|
|
194
218
|
});
|
|
195
219
|
}
|
|
@@ -210,7 +234,7 @@ function indexReaderFactory(indexer, sourceIndexName, transform, client, query)
|
|
|
210
234
|
function processHit(hit) {
|
|
211
235
|
docsNum += 1;
|
|
212
236
|
try {
|
|
213
|
-
var doc =
|
|
237
|
+
var doc = typeof transform === 'function' ? transform(hit._source) : hit._source; // eslint-disable-line no-underscore-dangle
|
|
214
238
|
// if doc is undefined we'll skip indexing it
|
|
215
239
|
if (typeof doc === 'undefined') {
|
|
216
240
|
return;
|
|
@@ -252,7 +276,7 @@ function indexReaderFactory(indexer, sourceIndexName, transform, client, query)
|
|
|
252
276
|
}
|
|
253
277
|
|
|
254
278
|
if (ingestQueueSize < MAX_QUEUE_SIZE) {
|
|
255
|
-
|
|
279
|
+
// get the next response if there are more docs to fetch
|
|
256
280
|
var sc = await scroll(response._scroll_id); // eslint-disable-line no-await-in-loop,no-underscore-dangle,max-len
|
|
257
281
|
scrollId = sc._scroll_id; // eslint-disable-line no-underscore-dangle
|
|
258
282
|
responseQueue.push(sc);
|
|
@@ -266,7 +290,7 @@ function indexReaderFactory(indexer, sourceIndexName, transform, client, query)
|
|
|
266
290
|
ingestQueueSize = size;
|
|
267
291
|
|
|
268
292
|
if (!readActive && ingestQueueSize < MAX_QUEUE_SIZE) {
|
|
269
|
-
|
|
293
|
+
// get the next response if there are more docs to fetch
|
|
270
294
|
var sc = await scroll(scrollId); // eslint-disable-line no-await-in-loop,no-underscore-dangle,max-len
|
|
271
295
|
scrollId = sc._scroll_id; // eslint-disable-line no-underscore-dangle
|
|
272
296
|
responseQueue.push(sc);
|
|
@@ -296,13 +320,14 @@ async function transformer(ref) {
|
|
|
296
320
|
var deleteIndex = ref.deleteIndex; if ( deleteIndex === void 0 ) deleteIndex = false;
|
|
297
321
|
var sourceClientConfig = ref.sourceClientConfig;
|
|
298
322
|
var targetClientConfig = ref.targetClientConfig;
|
|
299
|
-
var bufferSize = ref.bufferSize; if ( bufferSize === void 0 ) bufferSize =
|
|
323
|
+
var bufferSize = ref.bufferSize; if ( bufferSize === void 0 ) bufferSize = DEFAULT_BUFFER_SIZE;
|
|
300
324
|
var fileName = ref.fileName;
|
|
301
325
|
var splitRegex = ref.splitRegex; if ( splitRegex === void 0 ) splitRegex = /\n/;
|
|
302
326
|
var sourceIndexName = ref.sourceIndexName;
|
|
303
327
|
var targetIndexName = ref.targetIndexName;
|
|
304
328
|
var mappings = ref.mappings;
|
|
305
329
|
var mappingsOverride = ref.mappingsOverride; if ( mappingsOverride === void 0 ) mappingsOverride = false;
|
|
330
|
+
var indexMappingTotalFieldsLimit = ref.indexMappingTotalFieldsLimit;
|
|
306
331
|
var query = ref.query;
|
|
307
332
|
var skipHeader = ref.skipHeader; if ( skipHeader === void 0 ) skipHeader = false;
|
|
308
333
|
var transform = ref.transform;
|
|
@@ -328,6 +353,7 @@ async function transformer(ref) {
|
|
|
328
353
|
targetIndexName: targetIndexName,
|
|
329
354
|
mappings: mappings,
|
|
330
355
|
mappingsOverride: mappingsOverride,
|
|
356
|
+
indexMappingTotalFieldsLimit: indexMappingTotalFieldsLimit,
|
|
331
357
|
verbose: verbose,
|
|
332
358
|
});
|
|
333
359
|
var indexer = indexQueueFactory({
|
|
@@ -339,30 +365,16 @@ async function transformer(ref) {
|
|
|
339
365
|
});
|
|
340
366
|
|
|
341
367
|
function getReader() {
|
|
342
|
-
if (
|
|
343
|
-
|
|
344
|
-
&& typeof sourceIndexName !== 'undefined'
|
|
345
|
-
) {
|
|
346
|
-
throw Error(
|
|
347
|
-
'Only either one of fileName or sourceIndexName can be specified.'
|
|
348
|
-
);
|
|
368
|
+
if (typeof fileName !== 'undefined' && typeof sourceIndexName !== 'undefined') {
|
|
369
|
+
throw Error('Only either one of fileName or sourceIndexName can be specified.');
|
|
349
370
|
}
|
|
350
371
|
|
|
351
|
-
if (
|
|
352
|
-
typeof fileName === 'undefined'
|
|
353
|
-
&& typeof sourceIndexName === 'undefined'
|
|
354
|
-
) {
|
|
372
|
+
if (typeof fileName === 'undefined' && typeof sourceIndexName === 'undefined') {
|
|
355
373
|
throw Error('Either fileName or sourceIndexName must be specified.');
|
|
356
374
|
}
|
|
357
375
|
|
|
358
376
|
if (typeof fileName !== 'undefined') {
|
|
359
|
-
return fileReaderFactory(
|
|
360
|
-
indexer,
|
|
361
|
-
fileName,
|
|
362
|
-
transform,
|
|
363
|
-
splitRegex,
|
|
364
|
-
verbose
|
|
365
|
-
);
|
|
377
|
+
return fileReaderFactory(indexer, fileName, transform, splitRegex, verbose);
|
|
366
378
|
}
|
|
367
379
|
|
|
368
380
|
if (typeof sourceIndexName !== 'undefined') {
|
|
@@ -371,7 +383,8 @@ async function transformer(ref) {
|
|
|
371
383
|
sourceIndexName,
|
|
372
384
|
transform,
|
|
373
385
|
sourceClient,
|
|
374
|
-
query
|
|
386
|
+
query,
|
|
387
|
+
bufferSize
|
|
375
388
|
);
|
|
376
389
|
}
|
|
377
390
|
|
|
@@ -4,6 +4,8 @@ import glob from 'glob';
|
|
|
4
4
|
import cliProgress from 'cli-progress';
|
|
5
5
|
import elasticsearch from '@elastic/elasticsearch';
|
|
6
6
|
|
|
7
|
+
var DEFAULT_BUFFER_SIZE = 1000;
|
|
8
|
+
|
|
7
9
|
function createMappingFactory(ref) {
|
|
8
10
|
var sourceClient = ref.sourceClient;
|
|
9
11
|
var sourceIndexName = ref.sourceIndexName;
|
|
@@ -11,6 +13,7 @@ function createMappingFactory(ref) {
|
|
|
11
13
|
var targetIndexName = ref.targetIndexName;
|
|
12
14
|
var mappings = ref.mappings;
|
|
13
15
|
var mappingsOverride = ref.mappingsOverride;
|
|
16
|
+
var indexMappingTotalFieldsLimit = ref.indexMappingTotalFieldsLimit;
|
|
14
17
|
var verbose = ref.verbose;
|
|
15
18
|
|
|
16
19
|
return async function () {
|
|
@@ -18,7 +21,9 @@ function createMappingFactory(ref) {
|
|
|
18
21
|
|
|
19
22
|
if (sourceClient && sourceIndexName && typeof targetMappings === 'undefined') {
|
|
20
23
|
try {
|
|
21
|
-
var mapping = await sourceClient.indices.getMapping({
|
|
24
|
+
var mapping = await sourceClient.indices.getMapping({
|
|
25
|
+
index: sourceIndexName,
|
|
26
|
+
});
|
|
22
27
|
targetMappings = mapping[sourceIndexName].mappings;
|
|
23
28
|
} catch (err) {
|
|
24
29
|
console.log('Error reading source mapping', err);
|
|
@@ -34,12 +39,17 @@ function createMappingFactory(ref) {
|
|
|
34
39
|
}
|
|
35
40
|
|
|
36
41
|
try {
|
|
37
|
-
var resp = await targetClient.indices.create(
|
|
38
|
-
|
|
39
|
-
|
|
40
|
-
|
|
41
|
-
|
|
42
|
-
|
|
42
|
+
var resp = await targetClient.indices.create({
|
|
43
|
+
index: targetIndexName,
|
|
44
|
+
body: Object.assign({}, {mappings: targetMappings},
|
|
45
|
+
(indexMappingTotalFieldsLimit !== undefined
|
|
46
|
+
? {
|
|
47
|
+
settings: {
|
|
48
|
+
'index.mapping.total_fields.limit': indexMappingTotalFieldsLimit,
|
|
49
|
+
},
|
|
50
|
+
}
|
|
51
|
+
: {})),
|
|
52
|
+
});
|
|
43
53
|
if (verbose) { console.log('Created target mapping', resp); }
|
|
44
54
|
} catch (err) {
|
|
45
55
|
console.log('Error creating target mapping', err);
|
|
@@ -51,40 +61,44 @@ function createMappingFactory(ref) {
|
|
|
51
61
|
function fileReaderFactory(indexer, fileName, transform, splitRegex, verbose) {
|
|
52
62
|
function startIndex(files) {
|
|
53
63
|
var file = files.shift();
|
|
54
|
-
var s = fs
|
|
64
|
+
var s = fs
|
|
65
|
+
.createReadStream(file)
|
|
55
66
|
.pipe(es.split(splitRegex))
|
|
56
|
-
.pipe(
|
|
57
|
-
|
|
58
|
-
|
|
59
|
-
|
|
60
|
-
|
|
61
|
-
|
|
62
|
-
|
|
63
|
-
|
|
64
|
-
|
|
65
|
-
|
|
66
|
-
|
|
67
|
-
|
|
68
|
-
|
|
69
|
-
|
|
70
|
-
|
|
71
|
-
|
|
72
|
-
|
|
73
|
-
|
|
74
|
-
|
|
75
|
-
|
|
76
|
-
|
|
77
|
-
|
|
78
|
-
|
|
79
|
-
|
|
80
|
-
|
|
81
|
-
|
|
82
|
-
|
|
83
|
-
|
|
84
|
-
|
|
85
|
-
|
|
86
|
-
|
|
87
|
-
|
|
67
|
+
.pipe(
|
|
68
|
+
es
|
|
69
|
+
.mapSync(function (line) {
|
|
70
|
+
s.pause();
|
|
71
|
+
try {
|
|
72
|
+
var doc = typeof transform === 'function' ? transform(line) : line;
|
|
73
|
+
// if doc is undefined we'll skip indexing it
|
|
74
|
+
if (typeof doc === 'undefined') {
|
|
75
|
+
s.resume();
|
|
76
|
+
return;
|
|
77
|
+
}
|
|
78
|
+
|
|
79
|
+
// the transform callback may return an array of docs so we can emit
|
|
80
|
+
// multiple docs from a single line
|
|
81
|
+
if (Array.isArray(doc)) {
|
|
82
|
+
doc.forEach(function (d) { return indexer.add(d); });
|
|
83
|
+
return;
|
|
84
|
+
}
|
|
85
|
+
|
|
86
|
+
indexer.add(doc);
|
|
87
|
+
} catch (e) {
|
|
88
|
+
console.log('error', e);
|
|
89
|
+
}
|
|
90
|
+
})
|
|
91
|
+
.on('error', function (err) {
|
|
92
|
+
console.log('Error while reading file.', err);
|
|
93
|
+
})
|
|
94
|
+
.on('end', function () {
|
|
95
|
+
if (verbose) { console.log('Read entire file: ', file); }
|
|
96
|
+
indexer.finish();
|
|
97
|
+
if (files.length > 0) {
|
|
98
|
+
startIndex(files);
|
|
99
|
+
}
|
|
100
|
+
})
|
|
101
|
+
);
|
|
88
102
|
|
|
89
103
|
indexer.queueEmitter.on('resume', function () {
|
|
90
104
|
s.resume();
|
|
@@ -106,7 +120,7 @@ var queueEmitter = new EventEmitter();
|
|
|
106
120
|
function indexQueueFactory(ref) {
|
|
107
121
|
var client = ref.targetClient;
|
|
108
122
|
var targetIndexName = ref.targetIndexName;
|
|
109
|
-
var bufferSize = ref.bufferSize; if ( bufferSize === void 0 ) bufferSize =
|
|
123
|
+
var bufferSize = ref.bufferSize; if ( bufferSize === void 0 ) bufferSize = DEFAULT_BUFFER_SIZE;
|
|
110
124
|
var skipHeader = ref.skipHeader; if ( skipHeader === void 0 ) skipHeader = false;
|
|
111
125
|
var verbose = ref.verbose; if ( verbose === void 0 ) verbose = true;
|
|
112
126
|
|
|
@@ -124,7 +138,8 @@ function indexQueueFactory(ref) {
|
|
|
124
138
|
var docs = queue.shift();
|
|
125
139
|
queueEmitter.emit('queue-size', queue.length);
|
|
126
140
|
ingesting = true;
|
|
127
|
-
if (verbose)
|
|
141
|
+
if (verbose)
|
|
142
|
+
{ console.log(("bulk ingest docs: " + (docs.length / 2) + ", queue length: " + (queue.length))); }
|
|
128
143
|
|
|
129
144
|
try {
|
|
130
145
|
await client.bulk({ body: docs });
|
|
@@ -157,7 +172,7 @@ function indexQueueFactory(ref) {
|
|
|
157
172
|
queueEmitter.emit('resume');
|
|
158
173
|
}
|
|
159
174
|
|
|
160
|
-
if (buffer.length >=
|
|
175
|
+
if (buffer.length >= bufferSize * 2) {
|
|
161
176
|
ingest(buffer);
|
|
162
177
|
buffer = [];
|
|
163
178
|
}
|
|
@@ -176,7 +191,16 @@ var MAX_QUEUE_SIZE = 5;
|
|
|
176
191
|
// create a new progress bar instance and use shades_classic theme
|
|
177
192
|
var progressBar = new cliProgress.SingleBar({}, cliProgress.Presets.shades_classic);
|
|
178
193
|
|
|
179
|
-
function indexReaderFactory(
|
|
194
|
+
function indexReaderFactory(
|
|
195
|
+
indexer,
|
|
196
|
+
sourceIndexName,
|
|
197
|
+
transform,
|
|
198
|
+
client,
|
|
199
|
+
query,
|
|
200
|
+
bufferSize
|
|
201
|
+
) {
|
|
202
|
+
if ( bufferSize === void 0 ) bufferSize = DEFAULT_BUFFER_SIZE;
|
|
203
|
+
|
|
180
204
|
return async function indexReader() {
|
|
181
205
|
var responseQueue = [];
|
|
182
206
|
var docsNum = 0;
|
|
@@ -185,7 +209,7 @@ function indexReaderFactory(indexer, sourceIndexName, transform, client, query)
|
|
|
185
209
|
return client.search({
|
|
186
210
|
index: sourceIndexName,
|
|
187
211
|
scroll: '30s',
|
|
188
|
-
size:
|
|
212
|
+
size: bufferSize,
|
|
189
213
|
query: query,
|
|
190
214
|
});
|
|
191
215
|
}
|
|
@@ -206,7 +230,7 @@ function indexReaderFactory(indexer, sourceIndexName, transform, client, query)
|
|
|
206
230
|
function processHit(hit) {
|
|
207
231
|
docsNum += 1;
|
|
208
232
|
try {
|
|
209
|
-
var doc =
|
|
233
|
+
var doc = typeof transform === 'function' ? transform(hit._source) : hit._source; // eslint-disable-line no-underscore-dangle
|
|
210
234
|
// if doc is undefined we'll skip indexing it
|
|
211
235
|
if (typeof doc === 'undefined') {
|
|
212
236
|
return;
|
|
@@ -248,7 +272,7 @@ function indexReaderFactory(indexer, sourceIndexName, transform, client, query)
|
|
|
248
272
|
}
|
|
249
273
|
|
|
250
274
|
if (ingestQueueSize < MAX_QUEUE_SIZE) {
|
|
251
|
-
|
|
275
|
+
// get the next response if there are more docs to fetch
|
|
252
276
|
var sc = await scroll(response._scroll_id); // eslint-disable-line no-await-in-loop,no-underscore-dangle,max-len
|
|
253
277
|
scrollId = sc._scroll_id; // eslint-disable-line no-underscore-dangle
|
|
254
278
|
responseQueue.push(sc);
|
|
@@ -262,7 +286,7 @@ function indexReaderFactory(indexer, sourceIndexName, transform, client, query)
|
|
|
262
286
|
ingestQueueSize = size;
|
|
263
287
|
|
|
264
288
|
if (!readActive && ingestQueueSize < MAX_QUEUE_SIZE) {
|
|
265
|
-
|
|
289
|
+
// get the next response if there are more docs to fetch
|
|
266
290
|
var sc = await scroll(scrollId); // eslint-disable-line no-await-in-loop,no-underscore-dangle,max-len
|
|
267
291
|
scrollId = sc._scroll_id; // eslint-disable-line no-underscore-dangle
|
|
268
292
|
responseQueue.push(sc);
|
|
@@ -292,13 +316,14 @@ async function transformer(ref) {
|
|
|
292
316
|
var deleteIndex = ref.deleteIndex; if ( deleteIndex === void 0 ) deleteIndex = false;
|
|
293
317
|
var sourceClientConfig = ref.sourceClientConfig;
|
|
294
318
|
var targetClientConfig = ref.targetClientConfig;
|
|
295
|
-
var bufferSize = ref.bufferSize; if ( bufferSize === void 0 ) bufferSize =
|
|
319
|
+
var bufferSize = ref.bufferSize; if ( bufferSize === void 0 ) bufferSize = DEFAULT_BUFFER_SIZE;
|
|
296
320
|
var fileName = ref.fileName;
|
|
297
321
|
var splitRegex = ref.splitRegex; if ( splitRegex === void 0 ) splitRegex = /\n/;
|
|
298
322
|
var sourceIndexName = ref.sourceIndexName;
|
|
299
323
|
var targetIndexName = ref.targetIndexName;
|
|
300
324
|
var mappings = ref.mappings;
|
|
301
325
|
var mappingsOverride = ref.mappingsOverride; if ( mappingsOverride === void 0 ) mappingsOverride = false;
|
|
326
|
+
var indexMappingTotalFieldsLimit = ref.indexMappingTotalFieldsLimit;
|
|
302
327
|
var query = ref.query;
|
|
303
328
|
var skipHeader = ref.skipHeader; if ( skipHeader === void 0 ) skipHeader = false;
|
|
304
329
|
var transform = ref.transform;
|
|
@@ -324,6 +349,7 @@ async function transformer(ref) {
|
|
|
324
349
|
targetIndexName: targetIndexName,
|
|
325
350
|
mappings: mappings,
|
|
326
351
|
mappingsOverride: mappingsOverride,
|
|
352
|
+
indexMappingTotalFieldsLimit: indexMappingTotalFieldsLimit,
|
|
327
353
|
verbose: verbose,
|
|
328
354
|
});
|
|
329
355
|
var indexer = indexQueueFactory({
|
|
@@ -335,30 +361,16 @@ async function transformer(ref) {
|
|
|
335
361
|
});
|
|
336
362
|
|
|
337
363
|
function getReader() {
|
|
338
|
-
if (
|
|
339
|
-
|
|
340
|
-
&& typeof sourceIndexName !== 'undefined'
|
|
341
|
-
) {
|
|
342
|
-
throw Error(
|
|
343
|
-
'Only either one of fileName or sourceIndexName can be specified.'
|
|
344
|
-
);
|
|
364
|
+
if (typeof fileName !== 'undefined' && typeof sourceIndexName !== 'undefined') {
|
|
365
|
+
throw Error('Only either one of fileName or sourceIndexName can be specified.');
|
|
345
366
|
}
|
|
346
367
|
|
|
347
|
-
if (
|
|
348
|
-
typeof fileName === 'undefined'
|
|
349
|
-
&& typeof sourceIndexName === 'undefined'
|
|
350
|
-
) {
|
|
368
|
+
if (typeof fileName === 'undefined' && typeof sourceIndexName === 'undefined') {
|
|
351
369
|
throw Error('Either fileName or sourceIndexName must be specified.');
|
|
352
370
|
}
|
|
353
371
|
|
|
354
372
|
if (typeof fileName !== 'undefined') {
|
|
355
|
-
return fileReaderFactory(
|
|
356
|
-
indexer,
|
|
357
|
-
fileName,
|
|
358
|
-
transform,
|
|
359
|
-
splitRegex,
|
|
360
|
-
verbose
|
|
361
|
-
);
|
|
373
|
+
return fileReaderFactory(indexer, fileName, transform, splitRegex, verbose);
|
|
362
374
|
}
|
|
363
375
|
|
|
364
376
|
if (typeof sourceIndexName !== 'undefined') {
|
|
@@ -367,7 +379,8 @@ async function transformer(ref) {
|
|
|
367
379
|
sourceIndexName,
|
|
368
380
|
transform,
|
|
369
381
|
sourceClient,
|
|
370
|
-
query
|
|
382
|
+
query,
|
|
383
|
+
bufferSize
|
|
371
384
|
);
|
|
372
385
|
}
|
|
373
386
|
|
package/package.json
CHANGED
|
@@ -14,7 +14,7 @@
|
|
|
14
14
|
"license": "Apache-2.0",
|
|
15
15
|
"author": "Walter Rafelsberger <walter@rafelsberger.at>",
|
|
16
16
|
"contributors": [],
|
|
17
|
-
"version": "1.0.0-
|
|
17
|
+
"version": "1.0.0-alpha11",
|
|
18
18
|
"main": "dist/node-es-transformer.cjs.js",
|
|
19
19
|
"module": "dist/node-es-transformer.esm.js",
|
|
20
20
|
"dependencies": {
|
|
@@ -25,11 +25,16 @@
|
|
|
25
25
|
},
|
|
26
26
|
"devDependencies": {
|
|
27
27
|
"acorn": "^6.4.2",
|
|
28
|
+
"commit-and-tag-version": "^11.3.0",
|
|
29
|
+
"cz-conventional-changelog": "^3.3.0",
|
|
28
30
|
"eslint": "8.2.0",
|
|
29
31
|
"eslint-config-airbnb": "19.0.4",
|
|
32
|
+
"eslint-config-prettier": "^9.0.0",
|
|
30
33
|
"eslint-plugin-import": "2.27.5",
|
|
31
34
|
"eslint-plugin-jsx-a11y": "6.7.1",
|
|
35
|
+
"eslint-plugin-prettier": "^3.3.1",
|
|
32
36
|
"eslint-plugin-react": "7.32.2",
|
|
37
|
+
"prettier": "^2.2.1",
|
|
33
38
|
"rollup": "0.66.6",
|
|
34
39
|
"rollup-plugin-buble": "0.19.6",
|
|
35
40
|
"rollup-plugin-commonjs": "8.0.2",
|
|
@@ -39,9 +44,15 @@
|
|
|
39
44
|
"build": "rollup -c",
|
|
40
45
|
"dev": "rollup -c -w",
|
|
41
46
|
"test": "node test/test.js",
|
|
42
|
-
"pretest": "npm run build"
|
|
47
|
+
"pretest": "npm run build",
|
|
48
|
+
"release": "commit-and-tag-version"
|
|
43
49
|
},
|
|
44
50
|
"files": [
|
|
45
51
|
"dist"
|
|
46
|
-
]
|
|
52
|
+
],
|
|
53
|
+
"config": {
|
|
54
|
+
"commitizen": {
|
|
55
|
+
"path": "./node_modules/cz-conventional-changelog"
|
|
56
|
+
}
|
|
57
|
+
}
|
|
47
58
|
}
|