node-es-transformer 1.0.0-beta1 → 1.0.0-beta3
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/README.md +7 -3
- package/dist/node-es-transformer.cjs.js +280 -175
- package/dist/node-es-transformer.esm.js +280 -175
- package/package.json +13 -7
|
@@ -1,10 +1,20 @@
|
|
|
1
1
|
import fs from 'fs';
|
|
2
2
|
import es from 'event-stream';
|
|
3
3
|
import glob from 'glob';
|
|
4
|
+
import split from 'split2';
|
|
5
|
+
import { Readable } from 'stream';
|
|
4
6
|
import cliProgress from 'cli-progress';
|
|
5
7
|
import elasticsearch from '@elastic/elasticsearch';
|
|
6
8
|
|
|
7
|
-
|
|
9
|
+
// In earlier versions this was used to set the number of docs to index in a
|
|
10
|
+
// single bulk request. Since we switched to use the helpers.bulk() method from
|
|
11
|
+
// the ES client, this now translates to the `flushBytes` option of the helper.
|
|
12
|
+
// However, for kind of a backwards compability with the old values, this uses
|
|
13
|
+
// KBytes instead of Bytes. It will be multiplied by 1024 in the index queue.
|
|
14
|
+
var DEFAULT_BUFFER_SIZE = 5120;
|
|
15
|
+
|
|
16
|
+
// The default number of docs to fetch in a single search request when reindexing.
|
|
17
|
+
var DEFAULT_SEARCH_SIZE = 1000;
|
|
8
18
|
|
|
9
19
|
function createMappingFactory(ref) {
|
|
10
20
|
var sourceClient = ref.sourceClient;
|
|
@@ -15,6 +25,7 @@ function createMappingFactory(ref) {
|
|
|
15
25
|
var mappingsOverride = ref.mappingsOverride;
|
|
16
26
|
var indexMappingTotalFieldsLimit = ref.indexMappingTotalFieldsLimit;
|
|
17
27
|
var verbose = ref.verbose;
|
|
28
|
+
var deleteIndex = ref.deleteIndex;
|
|
18
29
|
|
|
19
30
|
return async function () {
|
|
20
31
|
var targetMappings = mappingsOverride ? undefined : mappings;
|
|
@@ -24,7 +35,14 @@ function createMappingFactory(ref) {
|
|
|
24
35
|
var mapping = await sourceClient.indices.getMapping({
|
|
25
36
|
index: sourceIndexName,
|
|
26
37
|
});
|
|
27
|
-
|
|
38
|
+
if (mapping[sourceIndexName]) {
|
|
39
|
+
targetMappings = mapping[sourceIndexName].mappings;
|
|
40
|
+
} else {
|
|
41
|
+
var allMappings = Object.values(mapping);
|
|
42
|
+
if (allMappings.length > 0) {
|
|
43
|
+
targetMappings = Object.values(mapping)[0].mappings;
|
|
44
|
+
}
|
|
45
|
+
}
|
|
28
46
|
} catch (err) {
|
|
29
47
|
console.log('Error reading source mapping', err);
|
|
30
48
|
return;
|
|
@@ -39,18 +57,28 @@ function createMappingFactory(ref) {
|
|
|
39
57
|
}
|
|
40
58
|
|
|
41
59
|
try {
|
|
42
|
-
var
|
|
43
|
-
|
|
44
|
-
|
|
45
|
-
|
|
46
|
-
|
|
47
|
-
|
|
48
|
-
|
|
49
|
-
|
|
50
|
-
|
|
51
|
-
|
|
52
|
-
|
|
53
|
-
|
|
60
|
+
var indexExists = await targetClient.indices.exists({ index: targetIndexName });
|
|
61
|
+
|
|
62
|
+
if (indexExists === true && deleteIndex === true) {
|
|
63
|
+
await targetClient.indices.delete({ index: targetIndexName });
|
|
64
|
+
}
|
|
65
|
+
|
|
66
|
+
if (indexExists === false || deleteIndex === true) {
|
|
67
|
+
var resp = await targetClient.indices.create({
|
|
68
|
+
index: targetIndexName,
|
|
69
|
+
body: Object.assign({}, {mappings: targetMappings},
|
|
70
|
+
(indexMappingTotalFieldsLimit !== undefined
|
|
71
|
+
? {
|
|
72
|
+
settings: {
|
|
73
|
+
'index.mapping.total_fields.limit': indexMappingTotalFieldsLimit,
|
|
74
|
+
'index.number_of_shards': 1,
|
|
75
|
+
'index.number_of_replicas': 0,
|
|
76
|
+
},
|
|
77
|
+
}
|
|
78
|
+
: {})),
|
|
79
|
+
});
|
|
80
|
+
if (verbose) { console.log('Created target mapping', resp); }
|
|
81
|
+
}
|
|
54
82
|
} catch (err) {
|
|
55
83
|
console.log('Error creating target mapping', err);
|
|
56
84
|
}
|
|
@@ -58,22 +86,28 @@ function createMappingFactory(ref) {
|
|
|
58
86
|
};
|
|
59
87
|
}
|
|
60
88
|
|
|
61
|
-
var MAX_QUEUE_SIZE = 15;
|
|
62
|
-
|
|
63
89
|
function fileReaderFactory(indexer, fileName, transform, splitRegex, verbose) {
|
|
64
90
|
function startIndex(files) {
|
|
65
|
-
var ingestQueueSize = 0;
|
|
66
91
|
var finished = false;
|
|
67
92
|
|
|
68
93
|
var file = files.shift();
|
|
69
94
|
var s = fs
|
|
70
95
|
.createReadStream(file)
|
|
71
|
-
.pipe(
|
|
96
|
+
.pipe(split(splitRegex))
|
|
72
97
|
.pipe(
|
|
73
98
|
es
|
|
74
99
|
.mapSync(function (line) {
|
|
75
100
|
try {
|
|
76
|
-
|
|
101
|
+
// skip empty lines
|
|
102
|
+
if (line === '') {
|
|
103
|
+
return;
|
|
104
|
+
}
|
|
105
|
+
|
|
106
|
+
var doc =
|
|
107
|
+
typeof transform === 'function'
|
|
108
|
+
? JSON.stringify(transform(JSON.parse(line)))
|
|
109
|
+
: line;
|
|
110
|
+
|
|
77
111
|
// if doc is undefined we'll skip indexing it
|
|
78
112
|
if (typeof doc === 'undefined') {
|
|
79
113
|
s.resume();
|
|
@@ -107,20 +141,13 @@ function fileReaderFactory(indexer, fileName, transform, splitRegex, verbose) {
|
|
|
107
141
|
})
|
|
108
142
|
);
|
|
109
143
|
|
|
110
|
-
indexer.queueEmitter.on('
|
|
144
|
+
indexer.queueEmitter.on('pause', function () {
|
|
111
145
|
if (finished) { return; }
|
|
112
|
-
|
|
113
|
-
|
|
114
|
-
if (ingestQueueSize < MAX_QUEUE_SIZE) {
|
|
115
|
-
s.resume();
|
|
116
|
-
} else {
|
|
117
|
-
s.pause();
|
|
118
|
-
}
|
|
146
|
+
s.pause();
|
|
119
147
|
});
|
|
120
148
|
|
|
121
149
|
indexer.queueEmitter.on('resume', function () {
|
|
122
150
|
if (finished) { return; }
|
|
123
|
-
ingestQueueSize = 0;
|
|
124
151
|
s.resume();
|
|
125
152
|
});
|
|
126
153
|
}
|
|
@@ -136,7 +163,7 @@ var EventEmitter = require('events');
|
|
|
136
163
|
|
|
137
164
|
var queueEmitter = new EventEmitter();
|
|
138
165
|
|
|
139
|
-
var parallelCalls =
|
|
166
|
+
var parallelCalls = 5;
|
|
140
167
|
|
|
141
168
|
// a simple helper queue to bulk index documents
|
|
142
169
|
function indexQueueFactory(ref) {
|
|
@@ -146,78 +173,76 @@ function indexQueueFactory(ref) {
|
|
|
146
173
|
var skipHeader = ref.skipHeader; if ( skipHeader === void 0 ) skipHeader = false;
|
|
147
174
|
var verbose = ref.verbose; if ( verbose === void 0 ) verbose = true;
|
|
148
175
|
|
|
149
|
-
var
|
|
150
|
-
var
|
|
151
|
-
var ingesting = 0;
|
|
152
|
-
var ingestTimes = [];
|
|
153
|
-
var finished = false;
|
|
176
|
+
var flushBytes = bufferSize * 1024; // Convert KB to Bytes
|
|
177
|
+
var highWaterMark = flushBytes * parallelCalls;
|
|
154
178
|
|
|
155
|
-
|
|
156
|
-
|
|
157
|
-
|
|
158
|
-
|
|
159
|
-
|
|
179
|
+
// Create a Readable stream
|
|
180
|
+
var stream = new Readable({
|
|
181
|
+
read: function read() {}, // Implement read but we manage pushing manually
|
|
182
|
+
highWaterMark: highWaterMark, // Buffer size for backpressure management
|
|
183
|
+
});
|
|
160
184
|
|
|
161
|
-
|
|
185
|
+
async function* ndjsonStreamIterator(readableStream) {
|
|
186
|
+
var buffer = ''; // To hold the incomplete data
|
|
187
|
+
var skippedHeader = false;
|
|
162
188
|
|
|
163
|
-
|
|
164
|
-
|
|
189
|
+
// Iterate over the stream using async iteration
|
|
190
|
+
for await (var chunk of readableStream) {
|
|
191
|
+
buffer += chunk.toString(); // Accumulate the chunk data in the buffer
|
|
165
192
|
|
|
166
|
-
|
|
167
|
-
|
|
168
|
-
queueEmitter.emit('resume');
|
|
169
|
-
}
|
|
193
|
+
// Split the buffer into lines (NDJSON items)
|
|
194
|
+
var lines = buffer.split('\n');
|
|
170
195
|
|
|
171
|
-
|
|
172
|
-
|
|
173
|
-
if (verbose)
|
|
174
|
-
{ console.log(("bulk ingest docs: " + (docs.length / 2) + ", queue length: " + (queue.length))); }
|
|
175
|
-
|
|
176
|
-
var start = Date.now();
|
|
177
|
-
client
|
|
178
|
-
.bulk({ body: docs })
|
|
179
|
-
.then(function () {
|
|
180
|
-
var end = Date.now();
|
|
181
|
-
var delta = end - start;
|
|
182
|
-
ingestTimes.push(delta);
|
|
183
|
-
ingesting -= 1;
|
|
184
|
-
|
|
185
|
-
var ingestTimesMovingAverage =
|
|
186
|
-
ingestTimes.length > 0
|
|
187
|
-
? ingestTimes.reduce(function (p, c) { return p + c; }, 0) / ingestTimes.length
|
|
188
|
-
: 0;
|
|
189
|
-
var ingestTimesMovingAverageSeconds = Math.floor(ingestTimesMovingAverage / 1000);
|
|
190
|
-
|
|
191
|
-
if (
|
|
192
|
-
ingestTimes.length > 0 &&
|
|
193
|
-
ingestTimesMovingAverageSeconds < 30 &&
|
|
194
|
-
parallelCalls < 10
|
|
195
|
-
) {
|
|
196
|
-
parallelCalls += 1;
|
|
197
|
-
} else if (
|
|
198
|
-
ingestTimes.length > 0 &&
|
|
199
|
-
ingestTimesMovingAverageSeconds >= 30 &&
|
|
200
|
-
parallelCalls > 1
|
|
201
|
-
) {
|
|
202
|
-
parallelCalls -= 1;
|
|
203
|
-
}
|
|
196
|
+
// The last line might be incomplete, so hold it back in the buffer
|
|
197
|
+
buffer = lines.pop();
|
|
204
198
|
|
|
205
|
-
|
|
206
|
-
|
|
207
|
-
|
|
208
|
-
|
|
209
|
-
|
|
210
|
-
|
|
211
|
-
|
|
212
|
-
|
|
213
|
-
|
|
214
|
-
|
|
215
|
-
|
|
216
|
-
ingest();
|
|
199
|
+
// Yield each complete JSON object
|
|
200
|
+
for (var line of lines) {
|
|
201
|
+
if (line.trim()) {
|
|
202
|
+
try {
|
|
203
|
+
if (!skipHeader || (skipHeader && !skippedHeader)) {
|
|
204
|
+
yield JSON.parse(line); // Parse and yield the JSON object
|
|
205
|
+
skippedHeader = true;
|
|
206
|
+
}
|
|
207
|
+
} catch (err) {
|
|
208
|
+
// Handle JSON parse errors if necessary
|
|
209
|
+
console.error('Failed to parse JSON:', err);
|
|
217
210
|
}
|
|
218
|
-
}
|
|
211
|
+
}
|
|
212
|
+
}
|
|
219
213
|
}
|
|
220
|
-
|
|
214
|
+
|
|
215
|
+
// Handle any remaining data in the buffer after the stream ends
|
|
216
|
+
if (buffer.trim()) {
|
|
217
|
+
try {
|
|
218
|
+
yield JSON.parse(buffer);
|
|
219
|
+
} catch (err) {
|
|
220
|
+
console.error('Failed to parse final JSON:', err);
|
|
221
|
+
}
|
|
222
|
+
}
|
|
223
|
+
}
|
|
224
|
+
|
|
225
|
+
var finished = false;
|
|
226
|
+
|
|
227
|
+
// Async IIFE to start bulk indexing
|
|
228
|
+
(async function () {
|
|
229
|
+
console.log('START BULK INDEXING');
|
|
230
|
+
await client.helpers.bulk({
|
|
231
|
+
concurrency: parallelCalls,
|
|
232
|
+
flushBytes: flushBytes,
|
|
233
|
+
flushInterval: 1000,
|
|
234
|
+
refreshOnCompletion: true,
|
|
235
|
+
datasource: ndjsonStreamIterator(stream),
|
|
236
|
+
onDocument: function onDocument(doc) {
|
|
237
|
+
return {
|
|
238
|
+
index: { _index: targetIndexName },
|
|
239
|
+
};
|
|
240
|
+
},
|
|
241
|
+
});
|
|
242
|
+
console.log('FINISHED BULK INDEXING');
|
|
243
|
+
|
|
244
|
+
queueEmitter.emit('finish');
|
|
245
|
+
})();
|
|
221
246
|
|
|
222
247
|
return {
|
|
223
248
|
add: function (doc) {
|
|
@@ -225,37 +250,22 @@ function indexQueueFactory(ref) {
|
|
|
225
250
|
throw new Error('Unexpected doc added after indexer should finish.');
|
|
226
251
|
}
|
|
227
252
|
|
|
228
|
-
|
|
229
|
-
|
|
230
|
-
|
|
231
|
-
|
|
232
|
-
|
|
233
|
-
|
|
234
|
-
if (queue.length === 0) {
|
|
235
|
-
queueEmitter.emit('resume');
|
|
236
|
-
}
|
|
237
|
-
|
|
238
|
-
if (buffer.length >= bufferSize * 2) {
|
|
239
|
-
ingest(buffer);
|
|
240
|
-
buffer = [];
|
|
253
|
+
var canContinue = stream.push(((JSON.stringify(doc)) + "\n"));
|
|
254
|
+
if (!canContinue) {
|
|
255
|
+
queueEmitter.emit('pause');
|
|
256
|
+
stream.once('drain', function () {
|
|
257
|
+
queueEmitter.emit('resume');
|
|
258
|
+
});
|
|
241
259
|
}
|
|
242
260
|
},
|
|
243
261
|
finish: function () {
|
|
244
262
|
finished = true;
|
|
245
|
-
|
|
246
|
-
if (buffer.length > 0) {
|
|
247
|
-
ingest(buffer);
|
|
248
|
-
buffer = [];
|
|
249
|
-
} else if (queue.length === 0 && ingesting === 0) {
|
|
250
|
-
queueEmitter.emit('finish');
|
|
251
|
-
}
|
|
263
|
+
stream.push(null);
|
|
252
264
|
},
|
|
253
265
|
queueEmitter: queueEmitter,
|
|
254
266
|
};
|
|
255
267
|
}
|
|
256
268
|
|
|
257
|
-
var MAX_QUEUE_SIZE$1 = 15;
|
|
258
|
-
|
|
259
269
|
// create a new progress bar instance and use shades_classic theme
|
|
260
270
|
var progressBar = new cliProgress.SingleBar({}, cliProgress.Presets.shades_classic);
|
|
261
271
|
|
|
@@ -265,40 +275,69 @@ function indexReaderFactory(
|
|
|
265
275
|
transform,
|
|
266
276
|
client,
|
|
267
277
|
query,
|
|
268
|
-
|
|
278
|
+
searchSize,
|
|
279
|
+
populatedFields
|
|
269
280
|
) {
|
|
270
|
-
if (
|
|
281
|
+
if ( searchSize === void 0 ) searchSize = DEFAULT_SEARCH_SIZE;
|
|
282
|
+
if ( populatedFields === void 0 ) populatedFields = false;
|
|
271
283
|
|
|
272
284
|
return async function indexReader() {
|
|
273
|
-
var responseQueue = [];
|
|
274
285
|
var docsNum = 0;
|
|
286
|
+
var scrollId;
|
|
287
|
+
var finished = false;
|
|
288
|
+
var readActive = false;
|
|
289
|
+
var backPressurePause = false;
|
|
275
290
|
|
|
276
|
-
function
|
|
277
|
-
|
|
278
|
-
|
|
279
|
-
|
|
280
|
-
|
|
281
|
-
|
|
282
|
-
|
|
291
|
+
async function fetchPopulatedFields() {
|
|
292
|
+
try {
|
|
293
|
+
var response = await client.search({
|
|
294
|
+
index: sourceIndexName,
|
|
295
|
+
size: searchSize,
|
|
296
|
+
query: {
|
|
297
|
+
function_score: {
|
|
298
|
+
query: query,
|
|
299
|
+
random_score: {},
|
|
300
|
+
},
|
|
301
|
+
},
|
|
302
|
+
});
|
|
303
|
+
|
|
304
|
+
// Get all field names for each returned doc and flatten it
|
|
305
|
+
// to a list of unique field names used across all docs.
|
|
306
|
+
return Array.from(new Set(response.hits.hits.map(function (d) { return Object.keys(d._source); }).flat(1)));
|
|
307
|
+
} catch (e) {
|
|
308
|
+
console.log('error', e);
|
|
309
|
+
}
|
|
310
|
+
}
|
|
311
|
+
|
|
312
|
+
function search(fields) {
|
|
313
|
+
return client.search(Object.assign({}, {index: sourceIndexName,
|
|
314
|
+
scroll: '600s',
|
|
315
|
+
size: searchSize,
|
|
316
|
+
query: query},
|
|
317
|
+
(fields ? { _source: fields } : {})));
|
|
283
318
|
}
|
|
284
319
|
|
|
285
320
|
function scroll(id) {
|
|
286
321
|
return client.scroll({
|
|
287
322
|
scroll_id: id,
|
|
288
|
-
scroll: '
|
|
323
|
+
scroll: '600s',
|
|
289
324
|
});
|
|
290
325
|
}
|
|
291
326
|
|
|
292
|
-
|
|
293
|
-
|
|
294
|
-
|
|
295
|
-
|
|
296
|
-
|
|
327
|
+
var fieldsWithData;
|
|
328
|
+
|
|
329
|
+
// identify populated fields
|
|
330
|
+
if (populatedFields) {
|
|
331
|
+
fieldsWithData = await fetchPopulatedFields();
|
|
332
|
+
}
|
|
333
|
+
|
|
334
|
+
await fetchNextResponse();
|
|
297
335
|
|
|
298
336
|
function processHit(hit) {
|
|
299
337
|
docsNum += 1;
|
|
300
338
|
try {
|
|
301
339
|
var doc = typeof transform === 'function' ? transform(hit._source) : hit._source; // eslint-disable-line no-underscore-dangle
|
|
340
|
+
|
|
302
341
|
// if doc is undefined we'll skip indexing it
|
|
303
342
|
if (typeof doc === 'undefined') {
|
|
304
343
|
return;
|
|
@@ -317,68 +356,117 @@ function indexReaderFactory(
|
|
|
317
356
|
}
|
|
318
357
|
}
|
|
319
358
|
|
|
320
|
-
|
|
321
|
-
|
|
322
|
-
var readActive = false;
|
|
359
|
+
async function fetchNextResponse() {
|
|
360
|
+
readActive = true;
|
|
323
361
|
|
|
324
|
-
|
|
325
|
-
while (responseQueue.length) {
|
|
326
|
-
readActive = true;
|
|
327
|
-
var response = responseQueue.shift();
|
|
362
|
+
var sc = scrollId ? await scroll(scrollId) : await search(fieldsWithData);
|
|
328
363
|
|
|
329
|
-
|
|
330
|
-
|
|
364
|
+
if (!scrollId) {
|
|
365
|
+
progressBar.start(sc.hits.total.value, 0);
|
|
366
|
+
}
|
|
331
367
|
|
|
332
|
-
|
|
368
|
+
scrollId = sc._scroll_id;
|
|
369
|
+
readActive = false;
|
|
333
370
|
|
|
334
|
-
|
|
335
|
-
if (response.hits.total.value === docsNum) {
|
|
336
|
-
indexer.finish();
|
|
337
|
-
break;
|
|
338
|
-
}
|
|
339
|
-
|
|
340
|
-
if (ingestQueueSize < MAX_QUEUE_SIZE$1) {
|
|
341
|
-
// get the next response if there are more docs to fetch
|
|
342
|
-
var sc = await scroll(response._scroll_id); // eslint-disable-line no-await-in-loop,no-underscore-dangle,max-len
|
|
343
|
-
scrollId = sc._scroll_id; // eslint-disable-line no-underscore-dangle
|
|
344
|
-
responseQueue.push(sc);
|
|
345
|
-
} else {
|
|
346
|
-
readActive = false;
|
|
347
|
-
}
|
|
348
|
-
}
|
|
371
|
+
processResponse(sc);
|
|
349
372
|
}
|
|
350
373
|
|
|
351
|
-
|
|
352
|
-
|
|
374
|
+
async function processResponse(response) {
|
|
375
|
+
// collect the docs from this response
|
|
376
|
+
response.hits.hits.forEach(processHit);
|
|
353
377
|
|
|
354
|
-
|
|
355
|
-
|
|
356
|
-
|
|
357
|
-
|
|
358
|
-
|
|
359
|
-
|
|
378
|
+
progressBar.update(docsNum);
|
|
379
|
+
|
|
380
|
+
// check to see if we have collected all of the docs
|
|
381
|
+
if (response.hits.total.value === docsNum) {
|
|
382
|
+
indexer.finish();
|
|
383
|
+
return;
|
|
360
384
|
}
|
|
385
|
+
|
|
386
|
+
if (!backPressurePause) {
|
|
387
|
+
await fetchNextResponse();
|
|
388
|
+
}
|
|
389
|
+
}
|
|
390
|
+
|
|
391
|
+
indexer.queueEmitter.on('pause', async function () {
|
|
392
|
+
backPressurePause = true;
|
|
361
393
|
});
|
|
362
394
|
|
|
363
395
|
indexer.queueEmitter.on('resume', async function () {
|
|
364
|
-
|
|
396
|
+
backPressurePause = false;
|
|
365
397
|
|
|
366
|
-
if (readActive) {
|
|
398
|
+
if (readActive || finished) {
|
|
367
399
|
return;
|
|
368
400
|
}
|
|
369
401
|
|
|
370
|
-
|
|
371
|
-
var sc = await scroll(scrollId); // eslint-disable-line no-await-in-loop,no-underscore-dangle,max-len
|
|
372
|
-
scrollId = sc._scroll_id; // eslint-disable-line no-underscore-dangle
|
|
373
|
-
responseQueue.push(sc);
|
|
374
|
-
processResponseQueue();
|
|
402
|
+
await fetchNextResponse();
|
|
375
403
|
});
|
|
376
404
|
|
|
377
405
|
indexer.queueEmitter.on('finish', function () {
|
|
406
|
+
finished = true;
|
|
378
407
|
progressBar.stop();
|
|
379
408
|
});
|
|
409
|
+
};
|
|
410
|
+
}
|
|
380
411
|
|
|
381
|
-
|
|
412
|
+
function streamReaderFactory(indexer, stream, transform, splitRegex, verbose) {
|
|
413
|
+
function startIndex() {
|
|
414
|
+
console.log('START INDEX', splitRegex);
|
|
415
|
+
var finished = false;
|
|
416
|
+
|
|
417
|
+
var s = stream.pipe(split(splitRegex)).pipe(
|
|
418
|
+
es
|
|
419
|
+
.mapSync(function (line) {
|
|
420
|
+
try {
|
|
421
|
+
// skip empty lines
|
|
422
|
+
if (line === '') {
|
|
423
|
+
return;
|
|
424
|
+
}
|
|
425
|
+
|
|
426
|
+
var doc =
|
|
427
|
+
typeof transform === 'function' ? JSON.stringify(transform(JSON.parse(line))) : line;
|
|
428
|
+
|
|
429
|
+
// if doc is undefined we'll skip indexing it
|
|
430
|
+
if (typeof doc === 'undefined') {
|
|
431
|
+
s.resume();
|
|
432
|
+
return;
|
|
433
|
+
}
|
|
434
|
+
|
|
435
|
+
// the transform callback may return an array of docs so we can emit
|
|
436
|
+
// multiple docs from a single line
|
|
437
|
+
if (Array.isArray(doc)) {
|
|
438
|
+
doc.forEach(function (d) { return indexer.add(d); });
|
|
439
|
+
return;
|
|
440
|
+
}
|
|
441
|
+
|
|
442
|
+
indexer.add(doc);
|
|
443
|
+
} catch (e) {
|
|
444
|
+
console.log('error', e);
|
|
445
|
+
}
|
|
446
|
+
})
|
|
447
|
+
.on('error', function (err) {
|
|
448
|
+
console.log('Error while reading file.', err);
|
|
449
|
+
})
|
|
450
|
+
.on('end', function () {
|
|
451
|
+
if (verbose) { console.log('Read entire stream.'); }
|
|
452
|
+
indexer.finish();
|
|
453
|
+
finished = true;
|
|
454
|
+
})
|
|
455
|
+
);
|
|
456
|
+
|
|
457
|
+
indexer.queueEmitter.on('pause', function () {
|
|
458
|
+
if (finished) { return; }
|
|
459
|
+
s.pause();
|
|
460
|
+
});
|
|
461
|
+
|
|
462
|
+
indexer.queueEmitter.on('resume', function () {
|
|
463
|
+
if (finished) { return; }
|
|
464
|
+
s.resume();
|
|
465
|
+
});
|
|
466
|
+
}
|
|
467
|
+
|
|
468
|
+
return function () {
|
|
469
|
+
startIndex();
|
|
382
470
|
};
|
|
383
471
|
}
|
|
384
472
|
|
|
@@ -387,6 +475,8 @@ async function transformer(ref) {
|
|
|
387
475
|
var sourceClientConfig = ref.sourceClientConfig;
|
|
388
476
|
var targetClientConfig = ref.targetClientConfig;
|
|
389
477
|
var bufferSize = ref.bufferSize; if ( bufferSize === void 0 ) bufferSize = DEFAULT_BUFFER_SIZE;
|
|
478
|
+
var searchSize = ref.searchSize; if ( searchSize === void 0 ) searchSize = DEFAULT_SEARCH_SIZE;
|
|
479
|
+
var stream = ref.stream;
|
|
390
480
|
var fileName = ref.fileName;
|
|
391
481
|
var splitRegex = ref.splitRegex; if ( splitRegex === void 0 ) splitRegex = /\n/;
|
|
392
482
|
var sourceIndexName = ref.sourceIndexName;
|
|
@@ -394,11 +484,13 @@ async function transformer(ref) {
|
|
|
394
484
|
var mappings = ref.mappings;
|
|
395
485
|
var mappingsOverride = ref.mappingsOverride; if ( mappingsOverride === void 0 ) mappingsOverride = false;
|
|
396
486
|
var indexMappingTotalFieldsLimit = ref.indexMappingTotalFieldsLimit;
|
|
487
|
+
var populatedFields = ref.populatedFields; if ( populatedFields === void 0 ) populatedFields = false;
|
|
397
488
|
var query = ref.query;
|
|
398
489
|
var skipHeader = ref.skipHeader; if ( skipHeader === void 0 ) skipHeader = false;
|
|
399
490
|
var transform = ref.transform;
|
|
400
491
|
var verbose = ref.verbose; if ( verbose === void 0 ) verbose = true;
|
|
401
492
|
|
|
493
|
+
console.log('TRANSFORMER');
|
|
402
494
|
if (typeof targetIndexName === 'undefined') {
|
|
403
495
|
throw Error('targetIndexName must be specified.');
|
|
404
496
|
}
|
|
@@ -421,6 +513,7 @@ async function transformer(ref) {
|
|
|
421
513
|
mappingsOverride: mappingsOverride,
|
|
422
514
|
indexMappingTotalFieldsLimit: indexMappingTotalFieldsLimit,
|
|
423
515
|
verbose: verbose,
|
|
516
|
+
deleteIndex: deleteIndex,
|
|
424
517
|
});
|
|
425
518
|
var indexer = indexQueueFactory({
|
|
426
519
|
targetClient: targetClient,
|
|
@@ -435,8 +528,12 @@ async function transformer(ref) {
|
|
|
435
528
|
throw Error('Only either one of fileName or sourceIndexName can be specified.');
|
|
436
529
|
}
|
|
437
530
|
|
|
438
|
-
if (
|
|
439
|
-
|
|
531
|
+
if (
|
|
532
|
+
(typeof fileName !== 'undefined' && typeof sourceIndexName !== 'undefined') ||
|
|
533
|
+
(typeof fileName !== 'undefined' && typeof stream !== 'undefined') ||
|
|
534
|
+
(typeof sourceIndexName !== 'undefined' && typeof stream !== 'undefined')
|
|
535
|
+
) {
|
|
536
|
+
throw Error('Only one of fileName, sourceIndexName, or stream can be specified.');
|
|
440
537
|
}
|
|
441
538
|
|
|
442
539
|
if (typeof fileName !== 'undefined') {
|
|
@@ -450,17 +547,25 @@ async function transformer(ref) {
|
|
|
450
547
|
transform,
|
|
451
548
|
sourceClient,
|
|
452
549
|
query,
|
|
453
|
-
|
|
550
|
+
searchSize,
|
|
551
|
+
populatedFields
|
|
454
552
|
);
|
|
455
553
|
}
|
|
456
554
|
|
|
555
|
+
if (typeof stream !== 'undefined') {
|
|
556
|
+
console.log('STREAM READER');
|
|
557
|
+
return streamReaderFactory(indexer, stream, transform, splitRegex, verbose);
|
|
558
|
+
}
|
|
559
|
+
|
|
457
560
|
return null;
|
|
458
561
|
}
|
|
459
562
|
|
|
460
563
|
var reader = getReader();
|
|
564
|
+
console.log('READER INITIALIZED');
|
|
461
565
|
|
|
462
566
|
try {
|
|
463
567
|
var indexExists = await targetClient.indices.exists({ index: targetIndexName });
|
|
568
|
+
console.log('INDEX EXISTS', indexExists);
|
|
464
569
|
|
|
465
570
|
if (indexExists === false) {
|
|
466
571
|
await createMapping();
|