node-es-transformer 1.0.0-beta1 → 1.0.0-beta3
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/README.md +7 -3
- package/dist/node-es-transformer.cjs.js +280 -175
- package/dist/node-es-transformer.esm.js +280 -175
- package/package.json +13 -7
package/README.md
CHANGED
|
@@ -110,7 +110,8 @@ transformer({
|
|
|
110
110
|
|
|
111
111
|
- `deleteIndex`: Setting to automatically delete an existing index, default is `false`.
|
|
112
112
|
- `sourceClientConfig`/`targetClientConfig`: Optional Elasticsearch client options, defaults to `{ node: 'http://localhost:9200' }`.
|
|
113
|
-
- `bufferSize`: The
|
|
113
|
+
- `bufferSize`: The threshold to flush bulk index request in KBytes, defaults to `5120`.
|
|
114
|
+
- `searchSize`: The amount of documents to be fetched with each search request when reindexing from another source index.
|
|
114
115
|
- `fileName`: Source filename to ingest, supports wildcards. If this is set, `sourceIndexName` is not allowed.
|
|
115
116
|
- `splitRegex`: Custom line split regex, defaults to `/\n/`.
|
|
116
117
|
- `sourceIndexName`: The source Elasticsearch index to reindex from. If this is set, `fileName` is not allowed.
|
|
@@ -118,6 +119,7 @@ transformer({
|
|
|
118
119
|
- `mappings`: Optional Elasticsearch document mappings. If not set and you're reindexing from another index, the mappings from the existing index will be used.
|
|
119
120
|
- `mappingsOverride`: If you're reindexing and this is set to `true`, `mappings` will be applied on top of the source index's mappings. Defaults to `false`.
|
|
120
121
|
- `indexMappingTotalFieldsLimit`: Optional field limit for the target index to be created that will be passed on as the `index.mapping.total_fields.limit` setting.
|
|
122
|
+
- `populatedFields`: If `true`, fetches a set of random documents to identify which fields are actually used by documents. Can be useful for indices with lots of field mappings to increase query/reindex performance. Defaults to `false`.
|
|
121
123
|
- `query`: Optional Elasticsearch [DSL query](https://www.elastic.co/guide/en/elasticsearch/reference/current/query-dsl.html) to filter documents from the source index.
|
|
122
124
|
- `skipHeader`: If true, skips the first line of the source file. Defaults to `false`.
|
|
123
125
|
- `transform(line)`: A callback function which allows the transformation of a source line into one or several documents.
|
|
@@ -146,12 +148,14 @@ yarn
|
|
|
146
148
|
|
|
147
149
|
```bash
|
|
148
150
|
# Download the docker image
|
|
149
|
-
docker pull docker.elastic.co/elasticsearch/elasticsearch:8.
|
|
151
|
+
docker pull docker.elastic.co/elasticsearch/elasticsearch:8.15.0
|
|
150
152
|
|
|
151
153
|
# Run the container
|
|
152
|
-
docker run --name es01 --net elastic -p 9200:9200 -it -m 1GB -e "discovery.type=single-node" -e "xpack.security.enabled=false" docker.elastic.co/elasticsearch/elasticsearch:8.
|
|
154
|
+
docker run --name es01 --net elastic -p 9200:9200 -it -m 1GB -e "discovery.type=single-node" -e "xpack.security.enabled=false" docker.elastic.co/elasticsearch/elasticsearch:8.15.0
|
|
153
155
|
```
|
|
154
156
|
|
|
157
|
+
To commit, use `cz`. To prepare a release, use e.g. `yarn release -- --release-as 1.0.0-beta2`.
|
|
158
|
+
|
|
155
159
|
## License
|
|
156
160
|
|
|
157
161
|
[Apache 2.0](LICENSE).
|
|
@@ -5,10 +5,20 @@ function _interopDefault (ex) { return (ex && (typeof ex === 'object') && 'defau
|
|
|
5
5
|
var fs = _interopDefault(require('fs'));
|
|
6
6
|
var es = _interopDefault(require('event-stream'));
|
|
7
7
|
var glob = _interopDefault(require('glob'));
|
|
8
|
+
var split = _interopDefault(require('split2'));
|
|
9
|
+
var stream = require('stream');
|
|
8
10
|
var cliProgress = _interopDefault(require('cli-progress'));
|
|
9
11
|
var elasticsearch = _interopDefault(require('@elastic/elasticsearch'));
|
|
10
12
|
|
|
11
|
-
|
|
13
|
+
// In earlier versions this was used to set the number of docs to index in a
|
|
14
|
+
// single bulk request. Since we switched to use the helpers.bulk() method from
|
|
15
|
+
// the ES client, this now translates to the `flushBytes` option of the helper.
|
|
16
|
+
// However, for kind of a backwards compability with the old values, this uses
|
|
17
|
+
// KBytes instead of Bytes. It will be multiplied by 1024 in the index queue.
|
|
18
|
+
var DEFAULT_BUFFER_SIZE = 5120;
|
|
19
|
+
|
|
20
|
+
// The default number of docs to fetch in a single search request when reindexing.
|
|
21
|
+
var DEFAULT_SEARCH_SIZE = 1000;
|
|
12
22
|
|
|
13
23
|
function createMappingFactory(ref) {
|
|
14
24
|
var sourceClient = ref.sourceClient;
|
|
@@ -19,6 +29,7 @@ function createMappingFactory(ref) {
|
|
|
19
29
|
var mappingsOverride = ref.mappingsOverride;
|
|
20
30
|
var indexMappingTotalFieldsLimit = ref.indexMappingTotalFieldsLimit;
|
|
21
31
|
var verbose = ref.verbose;
|
|
32
|
+
var deleteIndex = ref.deleteIndex;
|
|
22
33
|
|
|
23
34
|
return async function () {
|
|
24
35
|
var targetMappings = mappingsOverride ? undefined : mappings;
|
|
@@ -28,7 +39,14 @@ function createMappingFactory(ref) {
|
|
|
28
39
|
var mapping = await sourceClient.indices.getMapping({
|
|
29
40
|
index: sourceIndexName,
|
|
30
41
|
});
|
|
31
|
-
|
|
42
|
+
if (mapping[sourceIndexName]) {
|
|
43
|
+
targetMappings = mapping[sourceIndexName].mappings;
|
|
44
|
+
} else {
|
|
45
|
+
var allMappings = Object.values(mapping);
|
|
46
|
+
if (allMappings.length > 0) {
|
|
47
|
+
targetMappings = Object.values(mapping)[0].mappings;
|
|
48
|
+
}
|
|
49
|
+
}
|
|
32
50
|
} catch (err) {
|
|
33
51
|
console.log('Error reading source mapping', err);
|
|
34
52
|
return;
|
|
@@ -43,18 +61,28 @@ function createMappingFactory(ref) {
|
|
|
43
61
|
}
|
|
44
62
|
|
|
45
63
|
try {
|
|
46
|
-
var
|
|
47
|
-
|
|
48
|
-
|
|
49
|
-
|
|
50
|
-
|
|
51
|
-
|
|
52
|
-
|
|
53
|
-
|
|
54
|
-
|
|
55
|
-
|
|
56
|
-
|
|
57
|
-
|
|
64
|
+
var indexExists = await targetClient.indices.exists({ index: targetIndexName });
|
|
65
|
+
|
|
66
|
+
if (indexExists === true && deleteIndex === true) {
|
|
67
|
+
await targetClient.indices.delete({ index: targetIndexName });
|
|
68
|
+
}
|
|
69
|
+
|
|
70
|
+
if (indexExists === false || deleteIndex === true) {
|
|
71
|
+
var resp = await targetClient.indices.create({
|
|
72
|
+
index: targetIndexName,
|
|
73
|
+
body: Object.assign({}, {mappings: targetMappings},
|
|
74
|
+
(indexMappingTotalFieldsLimit !== undefined
|
|
75
|
+
? {
|
|
76
|
+
settings: {
|
|
77
|
+
'index.mapping.total_fields.limit': indexMappingTotalFieldsLimit,
|
|
78
|
+
'index.number_of_shards': 1,
|
|
79
|
+
'index.number_of_replicas': 0,
|
|
80
|
+
},
|
|
81
|
+
}
|
|
82
|
+
: {})),
|
|
83
|
+
});
|
|
84
|
+
if (verbose) { console.log('Created target mapping', resp); }
|
|
85
|
+
}
|
|
58
86
|
} catch (err) {
|
|
59
87
|
console.log('Error creating target mapping', err);
|
|
60
88
|
}
|
|
@@ -62,22 +90,28 @@ function createMappingFactory(ref) {
|
|
|
62
90
|
};
|
|
63
91
|
}
|
|
64
92
|
|
|
65
|
-
var MAX_QUEUE_SIZE = 15;
|
|
66
|
-
|
|
67
93
|
function fileReaderFactory(indexer, fileName, transform, splitRegex, verbose) {
|
|
68
94
|
function startIndex(files) {
|
|
69
|
-
var ingestQueueSize = 0;
|
|
70
95
|
var finished = false;
|
|
71
96
|
|
|
72
97
|
var file = files.shift();
|
|
73
98
|
var s = fs
|
|
74
99
|
.createReadStream(file)
|
|
75
|
-
.pipe(
|
|
100
|
+
.pipe(split(splitRegex))
|
|
76
101
|
.pipe(
|
|
77
102
|
es
|
|
78
103
|
.mapSync(function (line) {
|
|
79
104
|
try {
|
|
80
|
-
|
|
105
|
+
// skip empty lines
|
|
106
|
+
if (line === '') {
|
|
107
|
+
return;
|
|
108
|
+
}
|
|
109
|
+
|
|
110
|
+
var doc =
|
|
111
|
+
typeof transform === 'function'
|
|
112
|
+
? JSON.stringify(transform(JSON.parse(line)))
|
|
113
|
+
: line;
|
|
114
|
+
|
|
81
115
|
// if doc is undefined we'll skip indexing it
|
|
82
116
|
if (typeof doc === 'undefined') {
|
|
83
117
|
s.resume();
|
|
@@ -111,20 +145,13 @@ function fileReaderFactory(indexer, fileName, transform, splitRegex, verbose) {
|
|
|
111
145
|
})
|
|
112
146
|
);
|
|
113
147
|
|
|
114
|
-
indexer.queueEmitter.on('
|
|
148
|
+
indexer.queueEmitter.on('pause', function () {
|
|
115
149
|
if (finished) { return; }
|
|
116
|
-
|
|
117
|
-
|
|
118
|
-
if (ingestQueueSize < MAX_QUEUE_SIZE) {
|
|
119
|
-
s.resume();
|
|
120
|
-
} else {
|
|
121
|
-
s.pause();
|
|
122
|
-
}
|
|
150
|
+
s.pause();
|
|
123
151
|
});
|
|
124
152
|
|
|
125
153
|
indexer.queueEmitter.on('resume', function () {
|
|
126
154
|
if (finished) { return; }
|
|
127
|
-
ingestQueueSize = 0;
|
|
128
155
|
s.resume();
|
|
129
156
|
});
|
|
130
157
|
}
|
|
@@ -140,7 +167,7 @@ var EventEmitter = require('events');
|
|
|
140
167
|
|
|
141
168
|
var queueEmitter = new EventEmitter();
|
|
142
169
|
|
|
143
|
-
var parallelCalls =
|
|
170
|
+
var parallelCalls = 5;
|
|
144
171
|
|
|
145
172
|
// a simple helper queue to bulk index documents
|
|
146
173
|
function indexQueueFactory(ref) {
|
|
@@ -150,78 +177,76 @@ function indexQueueFactory(ref) {
|
|
|
150
177
|
var skipHeader = ref.skipHeader; if ( skipHeader === void 0 ) skipHeader = false;
|
|
151
178
|
var verbose = ref.verbose; if ( verbose === void 0 ) verbose = true;
|
|
152
179
|
|
|
153
|
-
var
|
|
154
|
-
var
|
|
155
|
-
var ingesting = 0;
|
|
156
|
-
var ingestTimes = [];
|
|
157
|
-
var finished = false;
|
|
180
|
+
var flushBytes = bufferSize * 1024; // Convert KB to Bytes
|
|
181
|
+
var highWaterMark = flushBytes * parallelCalls;
|
|
158
182
|
|
|
159
|
-
|
|
160
|
-
|
|
161
|
-
|
|
162
|
-
|
|
163
|
-
|
|
183
|
+
// Create a Readable stream
|
|
184
|
+
var stream$$1 = new stream.Readable({
|
|
185
|
+
read: function read() {}, // Implement read but we manage pushing manually
|
|
186
|
+
highWaterMark: highWaterMark, // Buffer size for backpressure management
|
|
187
|
+
});
|
|
164
188
|
|
|
165
|
-
|
|
189
|
+
async function* ndjsonStreamIterator(readableStream) {
|
|
190
|
+
var buffer = ''; // To hold the incomplete data
|
|
191
|
+
var skippedHeader = false;
|
|
166
192
|
|
|
167
|
-
|
|
168
|
-
|
|
193
|
+
// Iterate over the stream using async iteration
|
|
194
|
+
for await (var chunk of readableStream) {
|
|
195
|
+
buffer += chunk.toString(); // Accumulate the chunk data in the buffer
|
|
169
196
|
|
|
170
|
-
|
|
171
|
-
|
|
172
|
-
queueEmitter.emit('resume');
|
|
173
|
-
}
|
|
197
|
+
// Split the buffer into lines (NDJSON items)
|
|
198
|
+
var lines = buffer.split('\n');
|
|
174
199
|
|
|
175
|
-
|
|
176
|
-
|
|
177
|
-
if (verbose)
|
|
178
|
-
{ console.log(("bulk ingest docs: " + (docs.length / 2) + ", queue length: " + (queue.length))); }
|
|
179
|
-
|
|
180
|
-
var start = Date.now();
|
|
181
|
-
client
|
|
182
|
-
.bulk({ body: docs })
|
|
183
|
-
.then(function () {
|
|
184
|
-
var end = Date.now();
|
|
185
|
-
var delta = end - start;
|
|
186
|
-
ingestTimes.push(delta);
|
|
187
|
-
ingesting -= 1;
|
|
188
|
-
|
|
189
|
-
var ingestTimesMovingAverage =
|
|
190
|
-
ingestTimes.length > 0
|
|
191
|
-
? ingestTimes.reduce(function (p, c) { return p + c; }, 0) / ingestTimes.length
|
|
192
|
-
: 0;
|
|
193
|
-
var ingestTimesMovingAverageSeconds = Math.floor(ingestTimesMovingAverage / 1000);
|
|
194
|
-
|
|
195
|
-
if (
|
|
196
|
-
ingestTimes.length > 0 &&
|
|
197
|
-
ingestTimesMovingAverageSeconds < 30 &&
|
|
198
|
-
parallelCalls < 10
|
|
199
|
-
) {
|
|
200
|
-
parallelCalls += 1;
|
|
201
|
-
} else if (
|
|
202
|
-
ingestTimes.length > 0 &&
|
|
203
|
-
ingestTimesMovingAverageSeconds >= 30 &&
|
|
204
|
-
parallelCalls > 1
|
|
205
|
-
) {
|
|
206
|
-
parallelCalls -= 1;
|
|
207
|
-
}
|
|
200
|
+
// The last line might be incomplete, so hold it back in the buffer
|
|
201
|
+
buffer = lines.pop();
|
|
208
202
|
|
|
209
|
-
|
|
210
|
-
|
|
211
|
-
|
|
212
|
-
|
|
213
|
-
|
|
214
|
-
|
|
215
|
-
|
|
216
|
-
|
|
217
|
-
|
|
218
|
-
|
|
219
|
-
|
|
220
|
-
ingest();
|
|
203
|
+
// Yield each complete JSON object
|
|
204
|
+
for (var line of lines) {
|
|
205
|
+
if (line.trim()) {
|
|
206
|
+
try {
|
|
207
|
+
if (!skipHeader || (skipHeader && !skippedHeader)) {
|
|
208
|
+
yield JSON.parse(line); // Parse and yield the JSON object
|
|
209
|
+
skippedHeader = true;
|
|
210
|
+
}
|
|
211
|
+
} catch (err) {
|
|
212
|
+
// Handle JSON parse errors if necessary
|
|
213
|
+
console.error('Failed to parse JSON:', err);
|
|
221
214
|
}
|
|
222
|
-
}
|
|
215
|
+
}
|
|
216
|
+
}
|
|
223
217
|
}
|
|
224
|
-
|
|
218
|
+
|
|
219
|
+
// Handle any remaining data in the buffer after the stream ends
|
|
220
|
+
if (buffer.trim()) {
|
|
221
|
+
try {
|
|
222
|
+
yield JSON.parse(buffer);
|
|
223
|
+
} catch (err) {
|
|
224
|
+
console.error('Failed to parse final JSON:', err);
|
|
225
|
+
}
|
|
226
|
+
}
|
|
227
|
+
}
|
|
228
|
+
|
|
229
|
+
var finished = false;
|
|
230
|
+
|
|
231
|
+
// Async IIFE to start bulk indexing
|
|
232
|
+
(async function () {
|
|
233
|
+
console.log('START BULK INDEXING');
|
|
234
|
+
await client.helpers.bulk({
|
|
235
|
+
concurrency: parallelCalls,
|
|
236
|
+
flushBytes: flushBytes,
|
|
237
|
+
flushInterval: 1000,
|
|
238
|
+
refreshOnCompletion: true,
|
|
239
|
+
datasource: ndjsonStreamIterator(stream$$1),
|
|
240
|
+
onDocument: function onDocument(doc) {
|
|
241
|
+
return {
|
|
242
|
+
index: { _index: targetIndexName },
|
|
243
|
+
};
|
|
244
|
+
},
|
|
245
|
+
});
|
|
246
|
+
console.log('FINISHED BULK INDEXING');
|
|
247
|
+
|
|
248
|
+
queueEmitter.emit('finish');
|
|
249
|
+
})();
|
|
225
250
|
|
|
226
251
|
return {
|
|
227
252
|
add: function (doc) {
|
|
@@ -229,37 +254,22 @@ function indexQueueFactory(ref) {
|
|
|
229
254
|
throw new Error('Unexpected doc added after indexer should finish.');
|
|
230
255
|
}
|
|
231
256
|
|
|
232
|
-
|
|
233
|
-
|
|
234
|
-
|
|
235
|
-
|
|
236
|
-
|
|
237
|
-
|
|
238
|
-
if (queue.length === 0) {
|
|
239
|
-
queueEmitter.emit('resume');
|
|
240
|
-
}
|
|
241
|
-
|
|
242
|
-
if (buffer.length >= bufferSize * 2) {
|
|
243
|
-
ingest(buffer);
|
|
244
|
-
buffer = [];
|
|
257
|
+
var canContinue = stream$$1.push(((JSON.stringify(doc)) + "\n"));
|
|
258
|
+
if (!canContinue) {
|
|
259
|
+
queueEmitter.emit('pause');
|
|
260
|
+
stream$$1.once('drain', function () {
|
|
261
|
+
queueEmitter.emit('resume');
|
|
262
|
+
});
|
|
245
263
|
}
|
|
246
264
|
},
|
|
247
265
|
finish: function () {
|
|
248
266
|
finished = true;
|
|
249
|
-
|
|
250
|
-
if (buffer.length > 0) {
|
|
251
|
-
ingest(buffer);
|
|
252
|
-
buffer = [];
|
|
253
|
-
} else if (queue.length === 0 && ingesting === 0) {
|
|
254
|
-
queueEmitter.emit('finish');
|
|
255
|
-
}
|
|
267
|
+
stream$$1.push(null);
|
|
256
268
|
},
|
|
257
269
|
queueEmitter: queueEmitter,
|
|
258
270
|
};
|
|
259
271
|
}
|
|
260
272
|
|
|
261
|
-
var MAX_QUEUE_SIZE$1 = 15;
|
|
262
|
-
|
|
263
273
|
// create a new progress bar instance and use shades_classic theme
|
|
264
274
|
var progressBar = new cliProgress.SingleBar({}, cliProgress.Presets.shades_classic);
|
|
265
275
|
|
|
@@ -269,40 +279,69 @@ function indexReaderFactory(
|
|
|
269
279
|
transform,
|
|
270
280
|
client,
|
|
271
281
|
query,
|
|
272
|
-
|
|
282
|
+
searchSize,
|
|
283
|
+
populatedFields
|
|
273
284
|
) {
|
|
274
|
-
if (
|
|
285
|
+
if ( searchSize === void 0 ) searchSize = DEFAULT_SEARCH_SIZE;
|
|
286
|
+
if ( populatedFields === void 0 ) populatedFields = false;
|
|
275
287
|
|
|
276
288
|
return async function indexReader() {
|
|
277
|
-
var responseQueue = [];
|
|
278
289
|
var docsNum = 0;
|
|
290
|
+
var scrollId;
|
|
291
|
+
var finished = false;
|
|
292
|
+
var readActive = false;
|
|
293
|
+
var backPressurePause = false;
|
|
279
294
|
|
|
280
|
-
function
|
|
281
|
-
|
|
282
|
-
|
|
283
|
-
|
|
284
|
-
|
|
285
|
-
|
|
286
|
-
|
|
295
|
+
async function fetchPopulatedFields() {
|
|
296
|
+
try {
|
|
297
|
+
var response = await client.search({
|
|
298
|
+
index: sourceIndexName,
|
|
299
|
+
size: searchSize,
|
|
300
|
+
query: {
|
|
301
|
+
function_score: {
|
|
302
|
+
query: query,
|
|
303
|
+
random_score: {},
|
|
304
|
+
},
|
|
305
|
+
},
|
|
306
|
+
});
|
|
307
|
+
|
|
308
|
+
// Get all field names for each returned doc and flatten it
|
|
309
|
+
// to a list of unique field names used across all docs.
|
|
310
|
+
return Array.from(new Set(response.hits.hits.map(function (d) { return Object.keys(d._source); }).flat(1)));
|
|
311
|
+
} catch (e) {
|
|
312
|
+
console.log('error', e);
|
|
313
|
+
}
|
|
314
|
+
}
|
|
315
|
+
|
|
316
|
+
function search(fields) {
|
|
317
|
+
return client.search(Object.assign({}, {index: sourceIndexName,
|
|
318
|
+
scroll: '600s',
|
|
319
|
+
size: searchSize,
|
|
320
|
+
query: query},
|
|
321
|
+
(fields ? { _source: fields } : {})));
|
|
287
322
|
}
|
|
288
323
|
|
|
289
324
|
function scroll(id) {
|
|
290
325
|
return client.scroll({
|
|
291
326
|
scroll_id: id,
|
|
292
|
-
scroll: '
|
|
327
|
+
scroll: '600s',
|
|
293
328
|
});
|
|
294
329
|
}
|
|
295
330
|
|
|
296
|
-
|
|
297
|
-
|
|
298
|
-
|
|
299
|
-
|
|
300
|
-
|
|
331
|
+
var fieldsWithData;
|
|
332
|
+
|
|
333
|
+
// identify populated fields
|
|
334
|
+
if (populatedFields) {
|
|
335
|
+
fieldsWithData = await fetchPopulatedFields();
|
|
336
|
+
}
|
|
337
|
+
|
|
338
|
+
await fetchNextResponse();
|
|
301
339
|
|
|
302
340
|
function processHit(hit) {
|
|
303
341
|
docsNum += 1;
|
|
304
342
|
try {
|
|
305
343
|
var doc = typeof transform === 'function' ? transform(hit._source) : hit._source; // eslint-disable-line no-underscore-dangle
|
|
344
|
+
|
|
306
345
|
// if doc is undefined we'll skip indexing it
|
|
307
346
|
if (typeof doc === 'undefined') {
|
|
308
347
|
return;
|
|
@@ -321,68 +360,117 @@ function indexReaderFactory(
|
|
|
321
360
|
}
|
|
322
361
|
}
|
|
323
362
|
|
|
324
|
-
|
|
325
|
-
|
|
326
|
-
var readActive = false;
|
|
363
|
+
async function fetchNextResponse() {
|
|
364
|
+
readActive = true;
|
|
327
365
|
|
|
328
|
-
|
|
329
|
-
while (responseQueue.length) {
|
|
330
|
-
readActive = true;
|
|
331
|
-
var response = responseQueue.shift();
|
|
366
|
+
var sc = scrollId ? await scroll(scrollId) : await search(fieldsWithData);
|
|
332
367
|
|
|
333
|
-
|
|
334
|
-
|
|
368
|
+
if (!scrollId) {
|
|
369
|
+
progressBar.start(sc.hits.total.value, 0);
|
|
370
|
+
}
|
|
335
371
|
|
|
336
|
-
|
|
372
|
+
scrollId = sc._scroll_id;
|
|
373
|
+
readActive = false;
|
|
337
374
|
|
|
338
|
-
|
|
339
|
-
if (response.hits.total.value === docsNum) {
|
|
340
|
-
indexer.finish();
|
|
341
|
-
break;
|
|
342
|
-
}
|
|
343
|
-
|
|
344
|
-
if (ingestQueueSize < MAX_QUEUE_SIZE$1) {
|
|
345
|
-
// get the next response if there are more docs to fetch
|
|
346
|
-
var sc = await scroll(response._scroll_id); // eslint-disable-line no-await-in-loop,no-underscore-dangle,max-len
|
|
347
|
-
scrollId = sc._scroll_id; // eslint-disable-line no-underscore-dangle
|
|
348
|
-
responseQueue.push(sc);
|
|
349
|
-
} else {
|
|
350
|
-
readActive = false;
|
|
351
|
-
}
|
|
352
|
-
}
|
|
375
|
+
processResponse(sc);
|
|
353
376
|
}
|
|
354
377
|
|
|
355
|
-
|
|
356
|
-
|
|
378
|
+
async function processResponse(response) {
|
|
379
|
+
// collect the docs from this response
|
|
380
|
+
response.hits.hits.forEach(processHit);
|
|
357
381
|
|
|
358
|
-
|
|
359
|
-
|
|
360
|
-
|
|
361
|
-
|
|
362
|
-
|
|
363
|
-
|
|
382
|
+
progressBar.update(docsNum);
|
|
383
|
+
|
|
384
|
+
// check to see if we have collected all of the docs
|
|
385
|
+
if (response.hits.total.value === docsNum) {
|
|
386
|
+
indexer.finish();
|
|
387
|
+
return;
|
|
364
388
|
}
|
|
389
|
+
|
|
390
|
+
if (!backPressurePause) {
|
|
391
|
+
await fetchNextResponse();
|
|
392
|
+
}
|
|
393
|
+
}
|
|
394
|
+
|
|
395
|
+
indexer.queueEmitter.on('pause', async function () {
|
|
396
|
+
backPressurePause = true;
|
|
365
397
|
});
|
|
366
398
|
|
|
367
399
|
indexer.queueEmitter.on('resume', async function () {
|
|
368
|
-
|
|
400
|
+
backPressurePause = false;
|
|
369
401
|
|
|
370
|
-
if (readActive) {
|
|
402
|
+
if (readActive || finished) {
|
|
371
403
|
return;
|
|
372
404
|
}
|
|
373
405
|
|
|
374
|
-
|
|
375
|
-
var sc = await scroll(scrollId); // eslint-disable-line no-await-in-loop,no-underscore-dangle,max-len
|
|
376
|
-
scrollId = sc._scroll_id; // eslint-disable-line no-underscore-dangle
|
|
377
|
-
responseQueue.push(sc);
|
|
378
|
-
processResponseQueue();
|
|
406
|
+
await fetchNextResponse();
|
|
379
407
|
});
|
|
380
408
|
|
|
381
409
|
indexer.queueEmitter.on('finish', function () {
|
|
410
|
+
finished = true;
|
|
382
411
|
progressBar.stop();
|
|
383
412
|
});
|
|
413
|
+
};
|
|
414
|
+
}
|
|
384
415
|
|
|
385
|
-
|
|
416
|
+
function streamReaderFactory(indexer, stream$$1, transform, splitRegex, verbose) {
|
|
417
|
+
function startIndex() {
|
|
418
|
+
console.log('START INDEX', splitRegex);
|
|
419
|
+
var finished = false;
|
|
420
|
+
|
|
421
|
+
var s = stream$$1.pipe(split(splitRegex)).pipe(
|
|
422
|
+
es
|
|
423
|
+
.mapSync(function (line) {
|
|
424
|
+
try {
|
|
425
|
+
// skip empty lines
|
|
426
|
+
if (line === '') {
|
|
427
|
+
return;
|
|
428
|
+
}
|
|
429
|
+
|
|
430
|
+
var doc =
|
|
431
|
+
typeof transform === 'function' ? JSON.stringify(transform(JSON.parse(line))) : line;
|
|
432
|
+
|
|
433
|
+
// if doc is undefined we'll skip indexing it
|
|
434
|
+
if (typeof doc === 'undefined') {
|
|
435
|
+
s.resume();
|
|
436
|
+
return;
|
|
437
|
+
}
|
|
438
|
+
|
|
439
|
+
// the transform callback may return an array of docs so we can emit
|
|
440
|
+
// multiple docs from a single line
|
|
441
|
+
if (Array.isArray(doc)) {
|
|
442
|
+
doc.forEach(function (d) { return indexer.add(d); });
|
|
443
|
+
return;
|
|
444
|
+
}
|
|
445
|
+
|
|
446
|
+
indexer.add(doc);
|
|
447
|
+
} catch (e) {
|
|
448
|
+
console.log('error', e);
|
|
449
|
+
}
|
|
450
|
+
})
|
|
451
|
+
.on('error', function (err) {
|
|
452
|
+
console.log('Error while reading file.', err);
|
|
453
|
+
})
|
|
454
|
+
.on('end', function () {
|
|
455
|
+
if (verbose) { console.log('Read entire stream.'); }
|
|
456
|
+
indexer.finish();
|
|
457
|
+
finished = true;
|
|
458
|
+
})
|
|
459
|
+
);
|
|
460
|
+
|
|
461
|
+
indexer.queueEmitter.on('pause', function () {
|
|
462
|
+
if (finished) { return; }
|
|
463
|
+
s.pause();
|
|
464
|
+
});
|
|
465
|
+
|
|
466
|
+
indexer.queueEmitter.on('resume', function () {
|
|
467
|
+
if (finished) { return; }
|
|
468
|
+
s.resume();
|
|
469
|
+
});
|
|
470
|
+
}
|
|
471
|
+
|
|
472
|
+
return function () {
|
|
473
|
+
startIndex();
|
|
386
474
|
};
|
|
387
475
|
}
|
|
388
476
|
|
|
@@ -391,6 +479,8 @@ async function transformer(ref) {
|
|
|
391
479
|
var sourceClientConfig = ref.sourceClientConfig;
|
|
392
480
|
var targetClientConfig = ref.targetClientConfig;
|
|
393
481
|
var bufferSize = ref.bufferSize; if ( bufferSize === void 0 ) bufferSize = DEFAULT_BUFFER_SIZE;
|
|
482
|
+
var searchSize = ref.searchSize; if ( searchSize === void 0 ) searchSize = DEFAULT_SEARCH_SIZE;
|
|
483
|
+
var stream$$1 = ref.stream;
|
|
394
484
|
var fileName = ref.fileName;
|
|
395
485
|
var splitRegex = ref.splitRegex; if ( splitRegex === void 0 ) splitRegex = /\n/;
|
|
396
486
|
var sourceIndexName = ref.sourceIndexName;
|
|
@@ -398,11 +488,13 @@ async function transformer(ref) {
|
|
|
398
488
|
var mappings = ref.mappings;
|
|
399
489
|
var mappingsOverride = ref.mappingsOverride; if ( mappingsOverride === void 0 ) mappingsOverride = false;
|
|
400
490
|
var indexMappingTotalFieldsLimit = ref.indexMappingTotalFieldsLimit;
|
|
491
|
+
var populatedFields = ref.populatedFields; if ( populatedFields === void 0 ) populatedFields = false;
|
|
401
492
|
var query = ref.query;
|
|
402
493
|
var skipHeader = ref.skipHeader; if ( skipHeader === void 0 ) skipHeader = false;
|
|
403
494
|
var transform = ref.transform;
|
|
404
495
|
var verbose = ref.verbose; if ( verbose === void 0 ) verbose = true;
|
|
405
496
|
|
|
497
|
+
console.log('TRANSFORMER');
|
|
406
498
|
if (typeof targetIndexName === 'undefined') {
|
|
407
499
|
throw Error('targetIndexName must be specified.');
|
|
408
500
|
}
|
|
@@ -425,6 +517,7 @@ async function transformer(ref) {
|
|
|
425
517
|
mappingsOverride: mappingsOverride,
|
|
426
518
|
indexMappingTotalFieldsLimit: indexMappingTotalFieldsLimit,
|
|
427
519
|
verbose: verbose,
|
|
520
|
+
deleteIndex: deleteIndex,
|
|
428
521
|
});
|
|
429
522
|
var indexer = indexQueueFactory({
|
|
430
523
|
targetClient: targetClient,
|
|
@@ -439,8 +532,12 @@ async function transformer(ref) {
|
|
|
439
532
|
throw Error('Only either one of fileName or sourceIndexName can be specified.');
|
|
440
533
|
}
|
|
441
534
|
|
|
442
|
-
if (
|
|
443
|
-
|
|
535
|
+
if (
|
|
536
|
+
(typeof fileName !== 'undefined' && typeof sourceIndexName !== 'undefined') ||
|
|
537
|
+
(typeof fileName !== 'undefined' && typeof stream$$1 !== 'undefined') ||
|
|
538
|
+
(typeof sourceIndexName !== 'undefined' && typeof stream$$1 !== 'undefined')
|
|
539
|
+
) {
|
|
540
|
+
throw Error('Only one of fileName, sourceIndexName, or stream can be specified.');
|
|
444
541
|
}
|
|
445
542
|
|
|
446
543
|
if (typeof fileName !== 'undefined') {
|
|
@@ -454,17 +551,25 @@ async function transformer(ref) {
|
|
|
454
551
|
transform,
|
|
455
552
|
sourceClient,
|
|
456
553
|
query,
|
|
457
|
-
|
|
554
|
+
searchSize,
|
|
555
|
+
populatedFields
|
|
458
556
|
);
|
|
459
557
|
}
|
|
460
558
|
|
|
559
|
+
if (typeof stream$$1 !== 'undefined') {
|
|
560
|
+
console.log('STREAM READER');
|
|
561
|
+
return streamReaderFactory(indexer, stream$$1, transform, splitRegex, verbose);
|
|
562
|
+
}
|
|
563
|
+
|
|
461
564
|
return null;
|
|
462
565
|
}
|
|
463
566
|
|
|
464
567
|
var reader = getReader();
|
|
568
|
+
console.log('READER INITIALIZED');
|
|
465
569
|
|
|
466
570
|
try {
|
|
467
571
|
var indexExists = await targetClient.indices.exists({ index: targetIndexName });
|
|
572
|
+
console.log('INDEX EXISTS', indexExists);
|
|
468
573
|
|
|
469
574
|
if (indexExists === false) {
|
|
470
575
|
await createMapping();
|