node-es-transformer 1.0.2 → 1.2.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/README.md +129 -13
- package/dist/node-es-transformer.cjs.js +722 -128
- package/dist/node-es-transformer.cjs.js.map +1 -1
- package/dist/node-es-transformer.esm.js +704 -129
- package/dist/node-es-transformer.esm.js.map +1 -1
- package/index.d.ts +81 -2
- package/package.json +12 -6
|
@@ -1,11 +1,15 @@
|
|
|
1
1
|
import elasticsearch9 from 'es9';
|
|
2
2
|
import elasticsearch8 from 'es8';
|
|
3
|
+
import parquet from '@dsnp/parquetjs';
|
|
4
|
+
import * as arrow from 'apache-arrow';
|
|
3
5
|
import fs from 'fs';
|
|
6
|
+
import { parse } from 'csv-parse';
|
|
4
7
|
import es from 'event-stream';
|
|
5
8
|
import { globSync } from 'glob';
|
|
6
9
|
import split from 'split2';
|
|
7
|
-
import {
|
|
10
|
+
import { PassThrough } from 'stream';
|
|
8
11
|
import cliProgress from 'cli-progress';
|
|
12
|
+
import pino from 'pino';
|
|
9
13
|
|
|
10
14
|
// In earlier versions this was used to set the number of docs to index in a
|
|
11
15
|
// single bulk request. Since we switched to use the helpers.bulk() method from
|
|
@@ -23,14 +27,16 @@ function createMappingFactory({
|
|
|
23
27
|
targetClient,
|
|
24
28
|
targetIndexName,
|
|
25
29
|
mappings,
|
|
30
|
+
inferredIngestPipeline,
|
|
26
31
|
mappingsOverride,
|
|
27
32
|
indexMappingTotalFieldsLimit,
|
|
28
|
-
verbose,
|
|
29
33
|
deleteIndex,
|
|
30
|
-
pipeline
|
|
34
|
+
pipeline,
|
|
35
|
+
logger
|
|
31
36
|
}) {
|
|
32
37
|
return async () => {
|
|
33
38
|
let targetMappings = mappingsOverride ? undefined : mappings;
|
|
39
|
+
let defaultPipeline = pipeline;
|
|
34
40
|
if (sourceClient && sourceIndexName && typeof targetMappings === 'undefined') {
|
|
35
41
|
try {
|
|
36
42
|
const mapping = await sourceClient.indices.getMapping({
|
|
@@ -45,7 +51,10 @@ function createMappingFactory({
|
|
|
45
51
|
}
|
|
46
52
|
}
|
|
47
53
|
} catch (err) {
|
|
48
|
-
|
|
54
|
+
logger.error({
|
|
55
|
+
err,
|
|
56
|
+
sourceIndexName
|
|
57
|
+
}, 'Error reading source mapping');
|
|
49
58
|
return;
|
|
50
59
|
}
|
|
51
60
|
}
|
|
@@ -69,93 +78,312 @@ function createMappingFactory({
|
|
|
69
78
|
});
|
|
70
79
|
}
|
|
71
80
|
if (indexExists === false || deleteIndex === true) {
|
|
72
|
-
|
|
73
|
-
|
|
74
|
-
|
|
75
|
-
|
|
76
|
-
|
|
77
|
-
|
|
78
|
-
|
|
79
|
-
|
|
80
|
-
|
|
81
|
+
if (typeof defaultPipeline === 'undefined' && typeof inferredIngestPipeline === 'object' && inferredIngestPipeline !== null && typeof targetClient?.ingest?.putPipeline === 'function') {
|
|
82
|
+
const inferredPipelineName = `${targetIndexName}-inferred-pipeline`;
|
|
83
|
+
try {
|
|
84
|
+
await targetClient.ingest.putPipeline({
|
|
85
|
+
id: inferredPipelineName,
|
|
86
|
+
...inferredIngestPipeline
|
|
87
|
+
});
|
|
88
|
+
defaultPipeline = inferredPipelineName;
|
|
89
|
+
logger.info({
|
|
90
|
+
inferredPipelineName
|
|
91
|
+
}, 'Created inferred ingest pipeline');
|
|
92
|
+
} catch (err) {
|
|
93
|
+
logger.error({
|
|
94
|
+
err,
|
|
95
|
+
inferredPipelineName
|
|
96
|
+
}, 'Error creating inferred ingest pipeline');
|
|
97
|
+
}
|
|
98
|
+
}
|
|
99
|
+
const settings = {
|
|
100
|
+
...(defaultPipeline !== undefined ? {
|
|
101
|
+
'index.default_pipeline': defaultPipeline
|
|
81
102
|
} : {}),
|
|
82
103
|
...(indexMappingTotalFieldsLimit !== undefined ? {
|
|
83
|
-
|
|
84
|
-
|
|
85
|
-
|
|
86
|
-
|
|
87
|
-
|
|
104
|
+
'index.mapping.total_fields.limit': indexMappingTotalFieldsLimit,
|
|
105
|
+
'index.number_of_shards': 1,
|
|
106
|
+
'index.number_of_replicas': 0
|
|
107
|
+
} : {})
|
|
108
|
+
};
|
|
109
|
+
const response = await targetClient.indices.create({
|
|
110
|
+
index: targetIndexName,
|
|
111
|
+
mappings: targetMappings,
|
|
112
|
+
...(Object.keys(settings).length > 0 ? {
|
|
113
|
+
settings
|
|
88
114
|
} : {})
|
|
89
115
|
});
|
|
90
|
-
|
|
116
|
+
logger.info({
|
|
117
|
+
targetIndexName,
|
|
118
|
+
response
|
|
119
|
+
}, 'Created target mapping');
|
|
91
120
|
}
|
|
92
121
|
} catch (err) {
|
|
93
|
-
|
|
122
|
+
logger.error({
|
|
123
|
+
err,
|
|
124
|
+
targetIndexName
|
|
125
|
+
}, 'Error creating target mapping');
|
|
94
126
|
}
|
|
95
127
|
}
|
|
96
128
|
};
|
|
97
129
|
}
|
|
98
130
|
|
|
99
|
-
function
|
|
100
|
-
|
|
101
|
-
|
|
102
|
-
|
|
103
|
-
|
|
131
|
+
function getCsvParserOptions(csvOptions = {}, skipHeader = false) {
|
|
132
|
+
const options = {
|
|
133
|
+
bom: true,
|
|
134
|
+
columns: true,
|
|
135
|
+
trim: true,
|
|
136
|
+
skip_empty_lines: true,
|
|
137
|
+
...csvOptions
|
|
138
|
+
};
|
|
139
|
+
const consumesHeader = options.columns === true || typeof options.columns === 'function';
|
|
140
|
+
if (skipHeader && !consumesHeader && typeof options.from_line === 'undefined') {
|
|
141
|
+
options.from_line = 2;
|
|
142
|
+
}
|
|
143
|
+
return options;
|
|
144
|
+
}
|
|
145
|
+
|
|
146
|
+
function createPauseWaiter$1(queueEmitter) {
|
|
147
|
+
let paused = false;
|
|
148
|
+
let waiters = [];
|
|
149
|
+
const onPause = () => {
|
|
150
|
+
paused = true;
|
|
151
|
+
};
|
|
152
|
+
const onResume = () => {
|
|
153
|
+
paused = false;
|
|
154
|
+
waiters.forEach(resolve => resolve());
|
|
155
|
+
waiters = [];
|
|
156
|
+
};
|
|
157
|
+
queueEmitter.on('pause', onPause);
|
|
158
|
+
queueEmitter.on('resume', onResume);
|
|
159
|
+
return {
|
|
160
|
+
async waitIfPaused() {
|
|
161
|
+
if (!paused) return;
|
|
162
|
+
await new Promise(resolve => {
|
|
163
|
+
waiters.push(resolve);
|
|
164
|
+
});
|
|
165
|
+
},
|
|
166
|
+
cleanup() {
|
|
167
|
+
queueEmitter.removeListener('pause', onPause);
|
|
168
|
+
queueEmitter.removeListener('resume', onResume);
|
|
169
|
+
waiters.forEach(resolve => resolve());
|
|
170
|
+
waiters = [];
|
|
171
|
+
}
|
|
172
|
+
};
|
|
173
|
+
}
|
|
174
|
+
function fileReaderFactory(indexer, fileName, transform, splitRegex, skipHeader = false, sourceFormat = 'ndjson', csvOptions = {}, logger) {
|
|
175
|
+
function addParsedDoc(parsed, file) {
|
|
176
|
+
const context = {
|
|
177
|
+
fileName: file
|
|
178
|
+
};
|
|
179
|
+
const doc = typeof transform === 'function' ? transform(parsed, context) : parsed;
|
|
180
|
+
|
|
181
|
+
// if doc is null/undefined we'll skip indexing it
|
|
182
|
+
if (doc === null || typeof doc === 'undefined') {
|
|
183
|
+
return;
|
|
184
|
+
}
|
|
185
|
+
|
|
186
|
+
// the transform callback may return an array of docs so we can emit
|
|
187
|
+
// multiple docs from a single line
|
|
188
|
+
if (Array.isArray(doc)) {
|
|
189
|
+
doc.forEach(d => {
|
|
190
|
+
if (d === null || typeof d === 'undefined') return;
|
|
191
|
+
indexer.add(d);
|
|
192
|
+
});
|
|
193
|
+
return;
|
|
194
|
+
}
|
|
195
|
+
indexer.add(doc);
|
|
196
|
+
}
|
|
197
|
+
async function processParquetFile(file) {
|
|
198
|
+
const {
|
|
199
|
+
waitIfPaused,
|
|
200
|
+
cleanup
|
|
201
|
+
} = createPauseWaiter$1(indexer.queueEmitter);
|
|
202
|
+
const reader = await parquet.ParquetReader.openFile(file);
|
|
203
|
+
try {
|
|
204
|
+
const cursor = reader.getCursor();
|
|
205
|
+
while (true) {
|
|
206
|
+
// eslint-disable-next-line no-await-in-loop
|
|
207
|
+
const row = await cursor.next();
|
|
208
|
+
if (row === null || typeof row === 'undefined') {
|
|
209
|
+
break;
|
|
210
|
+
}
|
|
211
|
+
addParsedDoc(row, file);
|
|
212
|
+
// eslint-disable-next-line no-await-in-loop
|
|
213
|
+
await waitIfPaused();
|
|
214
|
+
}
|
|
215
|
+
logger.info({
|
|
216
|
+
file
|
|
217
|
+
}, 'Read entire file');
|
|
218
|
+
} finally {
|
|
219
|
+
cleanup();
|
|
220
|
+
await reader.close();
|
|
221
|
+
}
|
|
222
|
+
}
|
|
223
|
+
async function processArrowFile(file) {
|
|
224
|
+
const {
|
|
225
|
+
waitIfPaused,
|
|
226
|
+
cleanup
|
|
227
|
+
} = createPauseWaiter$1(indexer.queueEmitter);
|
|
228
|
+
try {
|
|
229
|
+
const reader = await arrow.RecordBatchReader.from(fs.createReadStream(file));
|
|
230
|
+
for await (const recordBatch of reader) {
|
|
231
|
+
const {
|
|
232
|
+
fields
|
|
233
|
+
} = recordBatch.schema;
|
|
234
|
+
for (let rowIndex = 0; rowIndex < recordBatch.numRows; rowIndex++) {
|
|
235
|
+
const row = {};
|
|
236
|
+
fields.forEach(field => {
|
|
237
|
+
const vector = recordBatch.getChild(field.name);
|
|
238
|
+
row[field.name] = vector ? vector.get(rowIndex) : undefined;
|
|
239
|
+
});
|
|
240
|
+
addParsedDoc(row, file);
|
|
241
|
+
// eslint-disable-next-line no-await-in-loop
|
|
242
|
+
await waitIfPaused();
|
|
243
|
+
}
|
|
244
|
+
}
|
|
245
|
+
logger.info({
|
|
246
|
+
file
|
|
247
|
+
}, 'Read entire file');
|
|
248
|
+
} finally {
|
|
249
|
+
cleanup();
|
|
250
|
+
}
|
|
251
|
+
}
|
|
252
|
+
function processStreamFile(file, buildStream, errorMessage) {
|
|
253
|
+
return new Promise((resolve, reject) => {
|
|
254
|
+
let finished = false;
|
|
255
|
+
const s = buildStream();
|
|
256
|
+
const onPause = () => {
|
|
257
|
+
if (finished) return;
|
|
258
|
+
s.pause();
|
|
259
|
+
};
|
|
260
|
+
const onResume = () => {
|
|
261
|
+
if (finished) return;
|
|
262
|
+
s.resume();
|
|
263
|
+
};
|
|
264
|
+
function cleanup() {
|
|
265
|
+
indexer.queueEmitter.removeListener('pause', onPause);
|
|
266
|
+
indexer.queueEmitter.removeListener('resume', onResume);
|
|
267
|
+
}
|
|
268
|
+
indexer.queueEmitter.on('pause', onPause);
|
|
269
|
+
indexer.queueEmitter.on('resume', onResume);
|
|
270
|
+
s.on('end', () => {
|
|
271
|
+
finished = true;
|
|
272
|
+
cleanup();
|
|
273
|
+
logger.info({
|
|
274
|
+
file
|
|
275
|
+
}, 'Read entire file');
|
|
276
|
+
resolve();
|
|
277
|
+
});
|
|
278
|
+
s.on('error', err => {
|
|
279
|
+
finished = true;
|
|
280
|
+
cleanup();
|
|
281
|
+
logger.error({
|
|
282
|
+
err,
|
|
283
|
+
file
|
|
284
|
+
}, errorMessage);
|
|
285
|
+
reject(err);
|
|
286
|
+
});
|
|
287
|
+
});
|
|
288
|
+
}
|
|
289
|
+
function processNdjsonFile(file) {
|
|
290
|
+
let skippedHeader = false;
|
|
291
|
+
return processStreamFile(file, () => fs.createReadStream(file).pipe(split(splitRegex)).pipe(es.mapSync(line => {
|
|
104
292
|
try {
|
|
105
293
|
// skip empty lines
|
|
106
294
|
if (line === '') {
|
|
107
295
|
return;
|
|
108
296
|
}
|
|
109
|
-
|
|
110
|
-
|
|
111
|
-
// if doc is undefined we'll skip indexing it
|
|
112
|
-
if (typeof doc === 'undefined') {
|
|
113
|
-
s.resume();
|
|
297
|
+
if (skipHeader && !skippedHeader) {
|
|
298
|
+
skippedHeader = true;
|
|
114
299
|
return;
|
|
115
300
|
}
|
|
116
|
-
|
|
117
|
-
|
|
118
|
-
|
|
119
|
-
|
|
120
|
-
|
|
121
|
-
|
|
122
|
-
}
|
|
123
|
-
indexer.add(doc);
|
|
124
|
-
} catch (e) {
|
|
125
|
-
console.log('error', e);
|
|
301
|
+
const parsed = JSON.parse(line);
|
|
302
|
+
addParsedDoc(parsed, file);
|
|
303
|
+
} catch (err) {
|
|
304
|
+
logger.error({
|
|
305
|
+
err,
|
|
306
|
+
file
|
|
307
|
+
}, 'Failed to process NDJSON line');
|
|
126
308
|
}
|
|
127
309
|
}).on('error', err => {
|
|
128
|
-
|
|
129
|
-
|
|
130
|
-
|
|
131
|
-
|
|
132
|
-
|
|
133
|
-
|
|
310
|
+
logger.error({
|
|
311
|
+
err,
|
|
312
|
+
file
|
|
313
|
+
}, 'Error while reading file');
|
|
314
|
+
})), 'Error while reading file');
|
|
315
|
+
}
|
|
316
|
+
function processCsvFile(file) {
|
|
317
|
+
const parserOptions = getCsvParserOptions(csvOptions, skipHeader);
|
|
318
|
+
return processStreamFile(file, () => fs.createReadStream(file).pipe(parse(parserOptions)).pipe(es.mapSync(record => {
|
|
319
|
+
try {
|
|
320
|
+
addParsedDoc(record, file);
|
|
321
|
+
} catch (err) {
|
|
322
|
+
logger.error({
|
|
323
|
+
err,
|
|
324
|
+
file
|
|
325
|
+
}, 'Failed to process CSV record');
|
|
134
326
|
}
|
|
327
|
+
}).on('error', err => {
|
|
328
|
+
logger.error({
|
|
329
|
+
err,
|
|
330
|
+
file
|
|
331
|
+
}, 'Error while reading CSV file');
|
|
332
|
+
})), 'Error while reading CSV file');
|
|
333
|
+
}
|
|
334
|
+
async function processFile(file) {
|
|
335
|
+
if (sourceFormat === 'csv') {
|
|
336
|
+
await processCsvFile(file);
|
|
337
|
+
return;
|
|
338
|
+
}
|
|
339
|
+
if (sourceFormat === 'ndjson') {
|
|
340
|
+
await processNdjsonFile(file);
|
|
341
|
+
return;
|
|
342
|
+
}
|
|
343
|
+
if (sourceFormat === 'parquet') {
|
|
344
|
+
await processParquetFile(file);
|
|
345
|
+
return;
|
|
346
|
+
}
|
|
347
|
+
if (sourceFormat === 'arrow') {
|
|
348
|
+
await processArrowFile(file);
|
|
349
|
+
return;
|
|
350
|
+
}
|
|
351
|
+
throw Error(`Unsupported sourceFormat: ${sourceFormat}`);
|
|
352
|
+
}
|
|
353
|
+
async function startIndex(files) {
|
|
354
|
+
if (files.length === 0) {
|
|
135
355
|
indexer.finish();
|
|
136
|
-
|
|
137
|
-
}
|
|
138
|
-
|
|
139
|
-
|
|
140
|
-
|
|
141
|
-
|
|
142
|
-
|
|
143
|
-
|
|
144
|
-
|
|
145
|
-
|
|
356
|
+
return;
|
|
357
|
+
}
|
|
358
|
+
try {
|
|
359
|
+
for (const file of files) {
|
|
360
|
+
// eslint-disable-next-line no-await-in-loop
|
|
361
|
+
await processFile(file);
|
|
362
|
+
}
|
|
363
|
+
} catch (err) {
|
|
364
|
+
logger.error({
|
|
365
|
+
err,
|
|
366
|
+
files
|
|
367
|
+
}, 'Error while processing files');
|
|
368
|
+
} finally {
|
|
369
|
+
indexer.finish();
|
|
370
|
+
}
|
|
146
371
|
}
|
|
147
372
|
return () => {
|
|
148
373
|
try {
|
|
149
374
|
const files = globSync(fileName);
|
|
150
375
|
startIndex(files);
|
|
151
|
-
} catch (
|
|
152
|
-
|
|
376
|
+
} catch (err) {
|
|
377
|
+
logger.error({
|
|
378
|
+
err,
|
|
379
|
+
fileName
|
|
380
|
+
}, 'Error matching files');
|
|
381
|
+
indexer.finish();
|
|
153
382
|
}
|
|
154
383
|
};
|
|
155
384
|
}
|
|
156
385
|
|
|
157
386
|
const EventEmitter = require('events');
|
|
158
|
-
const queueEmitter = new EventEmitter();
|
|
159
387
|
const parallelCalls = 5;
|
|
160
388
|
|
|
161
389
|
// a simple helper queue to bulk index documents
|
|
@@ -163,21 +391,20 @@ function indexQueueFactory({
|
|
|
163
391
|
targetClient: client,
|
|
164
392
|
targetIndexName,
|
|
165
393
|
bufferSize = DEFAULT_BUFFER_SIZE,
|
|
166
|
-
|
|
394
|
+
logger
|
|
167
395
|
}) {
|
|
396
|
+
const queueEmitter = new EventEmitter();
|
|
168
397
|
let docsPerSecond = 0;
|
|
169
398
|
const flushBytes = bufferSize * 1024; // Convert KB to Bytes
|
|
170
399
|
const highWaterMark = flushBytes * parallelCalls;
|
|
171
400
|
|
|
172
|
-
// Create a
|
|
173
|
-
const stream = new
|
|
174
|
-
read() {},
|
|
175
|
-
// Implement read but we manage pushing manually
|
|
401
|
+
// Create a PassThrough stream (readable + writable) for proper backpressure
|
|
402
|
+
const stream = new PassThrough({
|
|
176
403
|
highWaterMark // Buffer size for backpressure management
|
|
177
404
|
});
|
|
178
405
|
async function* ndjsonStreamIterator(readableStream) {
|
|
179
406
|
let buffer = ''; // To hold the incomplete data
|
|
180
|
-
|
|
407
|
+
|
|
181
408
|
try {
|
|
182
409
|
// Iterate over the stream using async iteration
|
|
183
410
|
for await (const chunk of readableStream) {
|
|
@@ -191,16 +418,15 @@ function indexQueueFactory({
|
|
|
191
418
|
|
|
192
419
|
// Yield each complete JSON object
|
|
193
420
|
for (const line of lines) {
|
|
194
|
-
if (line.trim()) {
|
|
195
|
-
|
|
196
|
-
|
|
197
|
-
|
|
198
|
-
|
|
199
|
-
|
|
200
|
-
|
|
201
|
-
|
|
202
|
-
|
|
203
|
-
}
|
|
421
|
+
if (!line.trim()) {
|
|
422
|
+
continue;
|
|
423
|
+
}
|
|
424
|
+
try {
|
|
425
|
+
yield JSON.parse(line); // Parse and yield the JSON object
|
|
426
|
+
} catch (err) {
|
|
427
|
+
logger.error({
|
|
428
|
+
err
|
|
429
|
+
}, 'Failed to parse JSON from NDJSON stream');
|
|
204
430
|
}
|
|
205
431
|
}
|
|
206
432
|
}
|
|
@@ -210,7 +436,9 @@ function indexQueueFactory({
|
|
|
210
436
|
try {
|
|
211
437
|
yield JSON.parse(buffer);
|
|
212
438
|
} catch (err) {
|
|
213
|
-
|
|
439
|
+
logger.error({
|
|
440
|
+
err
|
|
441
|
+
}, 'Failed to parse final JSON from NDJSON stream');
|
|
214
442
|
}
|
|
215
443
|
}
|
|
216
444
|
} finally {
|
|
@@ -236,7 +464,7 @@ function indexQueueFactory({
|
|
|
236
464
|
flushInterval: 1000,
|
|
237
465
|
refreshOnCompletion: true,
|
|
238
466
|
datasource: ndjsonStreamIterator(stream),
|
|
239
|
-
onDocument(
|
|
467
|
+
onDocument() {
|
|
240
468
|
docsPerSecond++;
|
|
241
469
|
return {
|
|
242
470
|
index: {
|
|
@@ -245,9 +473,13 @@ function indexQueueFactory({
|
|
|
245
473
|
};
|
|
246
474
|
}
|
|
247
475
|
});
|
|
248
|
-
} catch (
|
|
249
|
-
|
|
250
|
-
|
|
476
|
+
} catch (err) {
|
|
477
|
+
logger.error({
|
|
478
|
+
err,
|
|
479
|
+
targetIndexName
|
|
480
|
+
}, 'Error during bulk indexing');
|
|
481
|
+
queueEmitter.emit('error', err);
|
|
482
|
+
throw err;
|
|
251
483
|
} finally {
|
|
252
484
|
// Clean up interval
|
|
253
485
|
clearInterval(interval);
|
|
@@ -276,7 +508,7 @@ function indexQueueFactory({
|
|
|
276
508
|
if (finished) {
|
|
277
509
|
throw new Error('Unexpected doc added after indexer should finish.');
|
|
278
510
|
}
|
|
279
|
-
const canContinue = stream.
|
|
511
|
+
const canContinue = stream.write(`${JSON.stringify(doc)}\n`);
|
|
280
512
|
if (!canContinue) {
|
|
281
513
|
queueEmitter.emit('pause');
|
|
282
514
|
|
|
@@ -289,7 +521,7 @@ function indexQueueFactory({
|
|
|
289
521
|
},
|
|
290
522
|
finish: () => {
|
|
291
523
|
finished = true;
|
|
292
|
-
stream.
|
|
524
|
+
stream.end();
|
|
293
525
|
},
|
|
294
526
|
queueEmitter
|
|
295
527
|
};
|
|
@@ -297,7 +529,7 @@ function indexQueueFactory({
|
|
|
297
529
|
|
|
298
530
|
// create a new progress bar instance and use shades_classic theme
|
|
299
531
|
const progressBar = new cliProgress.SingleBar({}, cliProgress.Presets.shades_classic);
|
|
300
|
-
function indexReaderFactory(indexer, sourceIndexName, transform, client, query, searchSize = DEFAULT_SEARCH_SIZE, populatedFields = false) {
|
|
532
|
+
function indexReaderFactory(indexer, sourceIndexName, transform, client, query, searchSize = DEFAULT_SEARCH_SIZE, populatedFields = false, logger) {
|
|
301
533
|
return async function indexReader() {
|
|
302
534
|
let docsNum = 0;
|
|
303
535
|
let scrollId;
|
|
@@ -316,8 +548,11 @@ function indexReaderFactory(indexer, sourceIndexName, transform, client, query,
|
|
|
316
548
|
maxRetries: 0
|
|
317
549
|
});
|
|
318
550
|
return Object.keys(response.fields);
|
|
319
|
-
} catch (
|
|
320
|
-
|
|
551
|
+
} catch (err) {
|
|
552
|
+
logger.error({
|
|
553
|
+
err,
|
|
554
|
+
sourceIndexName
|
|
555
|
+
}, 'Failed to fetch populated fields');
|
|
321
556
|
}
|
|
322
557
|
}
|
|
323
558
|
function search(fields) {
|
|
@@ -361,8 +596,10 @@ function indexReaderFactory(indexer, sourceIndexName, transform, client, query,
|
|
|
361
596
|
return;
|
|
362
597
|
}
|
|
363
598
|
indexer.add(doc);
|
|
364
|
-
} catch (
|
|
365
|
-
|
|
599
|
+
} catch (err) {
|
|
600
|
+
logger.error({
|
|
601
|
+
err
|
|
602
|
+
}, 'Failed to process source index document');
|
|
366
603
|
}
|
|
367
604
|
}
|
|
368
605
|
async function fetchNextResponse() {
|
|
@@ -406,48 +643,339 @@ function indexReaderFactory(indexer, sourceIndexName, transform, client, query,
|
|
|
406
643
|
};
|
|
407
644
|
}
|
|
408
645
|
|
|
409
|
-
|
|
410
|
-
|
|
411
|
-
|
|
412
|
-
|
|
646
|
+
const DEFAULT_INFER_MAPPINGS_SAMPLE_BYTES = 100000;
|
|
647
|
+
const DEFAULT_INFER_MAPPINGS_LINES_TO_SAMPLE = 1000;
|
|
648
|
+
function readSample(filePath, sampleBytes) {
|
|
649
|
+
const fd = fs.openSync(filePath, 'r');
|
|
650
|
+
try {
|
|
651
|
+
const buffer = Buffer.alloc(sampleBytes);
|
|
652
|
+
const bytesRead = fs.readSync(fd, buffer, 0, sampleBytes, 0);
|
|
653
|
+
return buffer.subarray(0, bytesRead).toString('utf8');
|
|
654
|
+
} finally {
|
|
655
|
+
fs.closeSync(fd);
|
|
656
|
+
}
|
|
657
|
+
}
|
|
658
|
+
function emptyInferenceResult(mappings) {
|
|
659
|
+
return {
|
|
660
|
+
mappings,
|
|
661
|
+
ingestPipeline: undefined
|
|
662
|
+
};
|
|
663
|
+
}
|
|
664
|
+
async function inferMappingsFromSource({
|
|
665
|
+
targetClient,
|
|
666
|
+
fileName,
|
|
667
|
+
sourceFormat,
|
|
668
|
+
csvOptions,
|
|
669
|
+
skipHeader,
|
|
670
|
+
mappings,
|
|
671
|
+
inferMappings,
|
|
672
|
+
inferMappingsOptions,
|
|
673
|
+
logger
|
|
674
|
+
}) {
|
|
675
|
+
if (!inferMappings || typeof mappings !== 'undefined' || typeof fileName === 'undefined') {
|
|
676
|
+
return emptyInferenceResult(mappings);
|
|
677
|
+
}
|
|
678
|
+
if (sourceFormat !== 'ndjson' && sourceFormat !== 'csv') {
|
|
679
|
+
logger.info({
|
|
680
|
+
sourceFormat
|
|
681
|
+
}, 'Skipping mapping inference. Inference is only supported for ndjson and csv.');
|
|
682
|
+
return emptyInferenceResult(mappings);
|
|
683
|
+
}
|
|
684
|
+
if (typeof targetClient?.textStructure?.findStructure !== 'function' || sourceFormat === 'xml' || sourceFormat === 'semi_structured_text') {
|
|
685
|
+
return emptyInferenceResult(mappings);
|
|
686
|
+
}
|
|
687
|
+
const files = globSync(fileName);
|
|
688
|
+
if (files.length === 0) {
|
|
689
|
+
logger.info({
|
|
690
|
+
fileName
|
|
691
|
+
}, 'No files matched for mapping inference');
|
|
692
|
+
return emptyInferenceResult(mappings);
|
|
693
|
+
}
|
|
694
|
+
const {
|
|
695
|
+
sampleBytes = DEFAULT_INFER_MAPPINGS_SAMPLE_BYTES,
|
|
696
|
+
...requestParams
|
|
697
|
+
} = inferMappingsOptions || {};
|
|
698
|
+
const sampleText = readSample(files[0], sampleBytes);
|
|
699
|
+
if (!sampleText || sampleText.trim() === '') {
|
|
700
|
+
logger.info('Skipping mapping inference because the sample text is empty');
|
|
701
|
+
return emptyInferenceResult(mappings);
|
|
702
|
+
}
|
|
703
|
+
const params = {
|
|
704
|
+
body: sampleText,
|
|
705
|
+
lines_to_sample: DEFAULT_INFER_MAPPINGS_LINES_TO_SAMPLE,
|
|
706
|
+
...requestParams
|
|
707
|
+
};
|
|
708
|
+
if (typeof params.format === 'undefined') {
|
|
709
|
+
params.format = sourceFormat === 'csv' ? 'delimited' : 'ndjson';
|
|
710
|
+
}
|
|
711
|
+
if (sourceFormat === 'csv') {
|
|
712
|
+
if (typeof params.delimiter === 'undefined' && typeof csvOptions?.delimiter === 'string') {
|
|
713
|
+
params.delimiter = csvOptions.delimiter;
|
|
714
|
+
}
|
|
715
|
+
if (typeof params.quote === 'undefined' && typeof csvOptions?.quote === 'string') {
|
|
716
|
+
params.quote = csvOptions.quote;
|
|
717
|
+
}
|
|
718
|
+
if (typeof params.has_header_row === 'undefined' && typeof csvOptions?.columns === 'boolean') {
|
|
719
|
+
params.has_header_row = csvOptions.columns;
|
|
720
|
+
}
|
|
721
|
+
if (typeof params.has_header_row === 'undefined' && skipHeader) {
|
|
722
|
+
params.has_header_row = true;
|
|
723
|
+
}
|
|
724
|
+
}
|
|
725
|
+
try {
|
|
726
|
+
const response = await targetClient.textStructure.findStructure(params);
|
|
727
|
+
if (response?.mappings) {
|
|
728
|
+
logger.info({
|
|
729
|
+
file: files[0]
|
|
730
|
+
}, 'Inferred mappings via _text_structure/find_structure');
|
|
731
|
+
}
|
|
732
|
+
if (response?.ingest_pipeline) {
|
|
733
|
+
logger.info('Inferred ingest pipeline via _text_structure/find_structure');
|
|
734
|
+
}
|
|
735
|
+
return {
|
|
736
|
+
mappings: response?.mappings || mappings,
|
|
737
|
+
ingestPipeline: response?.ingest_pipeline
|
|
738
|
+
};
|
|
739
|
+
} catch (err) {
|
|
740
|
+
logger.warn({
|
|
741
|
+
err
|
|
742
|
+
}, 'Could not infer mappings via _text_structure/find_structure');
|
|
743
|
+
return emptyInferenceResult(mappings);
|
|
744
|
+
}
|
|
745
|
+
}
|
|
746
|
+
|
|
747
|
+
const DEFAULT_LOG_LEVEL = 'info';
|
|
748
|
+
function resolveLogLevel(verbose = true) {
|
|
749
|
+
if (typeof process.env.LOG_LEVEL === 'string' && process.env.LOG_LEVEL.trim() !== '') {
|
|
750
|
+
return process.env.LOG_LEVEL;
|
|
751
|
+
}
|
|
752
|
+
return verbose ? DEFAULT_LOG_LEVEL : 'error';
|
|
753
|
+
}
|
|
754
|
+
function createLogger({
|
|
755
|
+
logger,
|
|
756
|
+
verbose = true
|
|
757
|
+
} = {}) {
|
|
758
|
+
if (logger && typeof logger === 'object') {
|
|
759
|
+
return logger;
|
|
760
|
+
}
|
|
761
|
+
return pino({
|
|
762
|
+
name: 'node-es-transformer',
|
|
763
|
+
level: resolveLogLevel(verbose),
|
|
764
|
+
timestamp: pino.stdTimeFunctions.isoTime,
|
|
765
|
+
serializers: {
|
|
766
|
+
err: pino.stdSerializers.err,
|
|
767
|
+
error: pino.stdSerializers.err
|
|
768
|
+
}
|
|
769
|
+
});
|
|
770
|
+
}
|
|
771
|
+
function createChildLogger(logger, bindings) {
|
|
772
|
+
if (!logger || typeof logger.child !== 'function') {
|
|
773
|
+
return logger;
|
|
774
|
+
}
|
|
775
|
+
return logger.child(bindings);
|
|
776
|
+
}
|
|
777
|
+
|
|
778
|
+
function createPauseWaiter(queueEmitter) {
|
|
779
|
+
let paused = false;
|
|
780
|
+
let waiters = [];
|
|
781
|
+
const onPause = () => {
|
|
782
|
+
paused = true;
|
|
783
|
+
};
|
|
784
|
+
const onResume = () => {
|
|
785
|
+
paused = false;
|
|
786
|
+
waiters.forEach(resolve => resolve());
|
|
787
|
+
waiters = [];
|
|
788
|
+
};
|
|
789
|
+
queueEmitter.on('pause', onPause);
|
|
790
|
+
queueEmitter.on('resume', onResume);
|
|
791
|
+
return {
|
|
792
|
+
async waitIfPaused() {
|
|
793
|
+
if (!paused) return;
|
|
794
|
+
await new Promise(resolve => {
|
|
795
|
+
waiters.push(resolve);
|
|
796
|
+
});
|
|
797
|
+
},
|
|
798
|
+
cleanup() {
|
|
799
|
+
queueEmitter.removeListener('pause', onPause);
|
|
800
|
+
queueEmitter.removeListener('resume', onResume);
|
|
801
|
+
waiters.forEach(resolve => resolve());
|
|
802
|
+
waiters = [];
|
|
803
|
+
}
|
|
804
|
+
};
|
|
805
|
+
}
|
|
806
|
+
async function readStreamToBuffer(stream) {
|
|
807
|
+
const chunks = [];
|
|
808
|
+
for await (const chunk of stream) {
|
|
809
|
+
chunks.push(Buffer.isBuffer(chunk) ? chunk : Buffer.from(chunk));
|
|
810
|
+
}
|
|
811
|
+
return Buffer.concat(chunks);
|
|
812
|
+
}
|
|
813
|
+
function streamReaderFactory(indexer, stream, transform, splitRegex, skipHeader = false, sourceFormat = 'ndjson', csvOptions = {}, logger) {
|
|
814
|
+
function addParsedDoc(parsed) {
|
|
815
|
+
const doc = typeof transform === 'function' ? transform(parsed) : parsed;
|
|
816
|
+
|
|
817
|
+
// if doc is null/undefined we'll skip indexing it
|
|
818
|
+
if (doc === null || typeof doc === 'undefined') {
|
|
819
|
+
return;
|
|
820
|
+
}
|
|
821
|
+
|
|
822
|
+
// the transform callback may return an array of docs so we can emit
|
|
823
|
+
// multiple docs from a single line
|
|
824
|
+
if (Array.isArray(doc)) {
|
|
825
|
+
doc.forEach(d => {
|
|
826
|
+
if (d === null || typeof d === 'undefined') return;
|
|
827
|
+
indexer.add(d);
|
|
828
|
+
});
|
|
829
|
+
return;
|
|
830
|
+
}
|
|
831
|
+
indexer.add(doc);
|
|
832
|
+
}
|
|
833
|
+
async function processParquetStream() {
|
|
834
|
+
const {
|
|
835
|
+
waitIfPaused,
|
|
836
|
+
cleanup
|
|
837
|
+
} = createPauseWaiter(indexer.queueEmitter);
|
|
838
|
+
const parquetBuffer = await readStreamToBuffer(stream);
|
|
839
|
+
const reader = await parquet.ParquetReader.openBuffer(parquetBuffer);
|
|
840
|
+
try {
|
|
841
|
+
const cursor = reader.getCursor();
|
|
842
|
+
while (true) {
|
|
843
|
+
// eslint-disable-next-line no-await-in-loop
|
|
844
|
+
const row = await cursor.next();
|
|
845
|
+
if (row === null || typeof row === 'undefined') {
|
|
846
|
+
break;
|
|
847
|
+
}
|
|
848
|
+
addParsedDoc(row);
|
|
849
|
+
// eslint-disable-next-line no-await-in-loop
|
|
850
|
+
await waitIfPaused();
|
|
851
|
+
}
|
|
852
|
+
logger.info('Read entire stream');
|
|
853
|
+
} finally {
|
|
854
|
+
cleanup();
|
|
855
|
+
await reader.close();
|
|
856
|
+
}
|
|
857
|
+
}
|
|
858
|
+
async function processArrowStream() {
|
|
859
|
+
const {
|
|
860
|
+
waitIfPaused,
|
|
861
|
+
cleanup
|
|
862
|
+
} = createPauseWaiter(indexer.queueEmitter);
|
|
863
|
+
try {
|
|
864
|
+
const reader = await arrow.RecordBatchReader.from(stream);
|
|
865
|
+
for await (const recordBatch of reader) {
|
|
866
|
+
const {
|
|
867
|
+
fields
|
|
868
|
+
} = recordBatch.schema;
|
|
869
|
+
for (let rowIndex = 0; rowIndex < recordBatch.numRows; rowIndex++) {
|
|
870
|
+
const row = {};
|
|
871
|
+
fields.forEach(field => {
|
|
872
|
+
const vector = recordBatch.getChild(field.name);
|
|
873
|
+
row[field.name] = vector ? vector.get(rowIndex) : undefined;
|
|
874
|
+
});
|
|
875
|
+
addParsedDoc(row);
|
|
876
|
+
// eslint-disable-next-line no-await-in-loop
|
|
877
|
+
await waitIfPaused();
|
|
878
|
+
}
|
|
879
|
+
}
|
|
880
|
+
logger.info('Read entire stream');
|
|
881
|
+
} finally {
|
|
882
|
+
cleanup();
|
|
883
|
+
}
|
|
884
|
+
}
|
|
885
|
+
function processPipeline(buildPipeline, errorMessage) {
|
|
886
|
+
return new Promise((resolve, reject) => {
|
|
887
|
+
let finished = false;
|
|
888
|
+
const s = buildPipeline();
|
|
889
|
+
const onPause = () => {
|
|
890
|
+
if (finished) return;
|
|
891
|
+
s.pause();
|
|
892
|
+
};
|
|
893
|
+
const onResume = () => {
|
|
894
|
+
if (finished) return;
|
|
895
|
+
s.resume();
|
|
896
|
+
};
|
|
897
|
+
function cleanup() {
|
|
898
|
+
indexer.queueEmitter.removeListener('pause', onPause);
|
|
899
|
+
indexer.queueEmitter.removeListener('resume', onResume);
|
|
900
|
+
}
|
|
901
|
+
indexer.queueEmitter.on('pause', onPause);
|
|
902
|
+
indexer.queueEmitter.on('resume', onResume);
|
|
903
|
+
s.on('end', () => {
|
|
904
|
+
finished = true;
|
|
905
|
+
cleanup();
|
|
906
|
+
logger.info('Read entire stream');
|
|
907
|
+
resolve();
|
|
908
|
+
});
|
|
909
|
+
s.on('error', err => {
|
|
910
|
+
finished = true;
|
|
911
|
+
cleanup();
|
|
912
|
+
logger.error({
|
|
913
|
+
err
|
|
914
|
+
}, errorMessage);
|
|
915
|
+
reject(err);
|
|
916
|
+
});
|
|
917
|
+
});
|
|
918
|
+
}
|
|
919
|
+
function processCsvStream() {
|
|
920
|
+
return processPipeline(() => stream.pipe(parse(getCsvParserOptions(csvOptions, skipHeader))).pipe(es.mapSync(record => {
|
|
921
|
+
try {
|
|
922
|
+
addParsedDoc(record);
|
|
923
|
+
} catch (err) {
|
|
924
|
+
logger.error({
|
|
925
|
+
err
|
|
926
|
+
}, 'Failed to process CSV stream record');
|
|
927
|
+
}
|
|
928
|
+
}).on('error', err => {
|
|
929
|
+
logger.error({
|
|
930
|
+
err
|
|
931
|
+
}, 'Error while reading CSV stream');
|
|
932
|
+
})), 'Error while reading CSV stream');
|
|
933
|
+
}
|
|
934
|
+
function processNdjsonStream() {
|
|
935
|
+
let skippedHeader = false;
|
|
936
|
+
return processPipeline(() => stream.pipe(split(splitRegex)).pipe(es.mapSync(line => {
|
|
413
937
|
try {
|
|
414
938
|
// skip empty lines
|
|
415
939
|
if (line === '') {
|
|
416
940
|
return;
|
|
417
941
|
}
|
|
418
|
-
|
|
419
|
-
|
|
420
|
-
// if doc is undefined we'll skip indexing it
|
|
421
|
-
if (typeof doc === 'undefined') {
|
|
422
|
-
s.resume();
|
|
423
|
-
return;
|
|
424
|
-
}
|
|
425
|
-
|
|
426
|
-
// the transform callback may return an array of docs so we can emit
|
|
427
|
-
// multiple docs from a single line
|
|
428
|
-
if (Array.isArray(doc)) {
|
|
429
|
-
doc.forEach(d => indexer.add(d));
|
|
942
|
+
if (skipHeader && !skippedHeader) {
|
|
943
|
+
skippedHeader = true;
|
|
430
944
|
return;
|
|
431
945
|
}
|
|
432
|
-
|
|
433
|
-
|
|
434
|
-
|
|
946
|
+
const parsed = JSON.parse(line);
|
|
947
|
+
addParsedDoc(parsed);
|
|
948
|
+
} catch (err) {
|
|
949
|
+
logger.error({
|
|
950
|
+
err
|
|
951
|
+
}, 'Failed to process NDJSON stream line');
|
|
435
952
|
}
|
|
436
953
|
}).on('error', err => {
|
|
437
|
-
|
|
438
|
-
|
|
439
|
-
|
|
954
|
+
logger.error({
|
|
955
|
+
err
|
|
956
|
+
}, 'Error while reading stream');
|
|
957
|
+
})), 'Error while reading stream');
|
|
958
|
+
}
|
|
959
|
+
async function startIndex() {
|
|
960
|
+
try {
|
|
961
|
+
if (sourceFormat === 'csv') {
|
|
962
|
+
await processCsvStream();
|
|
963
|
+
} else if (sourceFormat === 'ndjson') {
|
|
964
|
+
await processNdjsonStream();
|
|
965
|
+
} else if (sourceFormat === 'parquet') {
|
|
966
|
+
await processParquetStream();
|
|
967
|
+
} else if (sourceFormat === 'arrow') {
|
|
968
|
+
await processArrowStream();
|
|
969
|
+
} else {
|
|
970
|
+
throw Error(`Unsupported sourceFormat: ${sourceFormat}`);
|
|
971
|
+
}
|
|
972
|
+
} catch (err) {
|
|
973
|
+
logger.error({
|
|
974
|
+
err
|
|
975
|
+
}, 'Error while reading stream');
|
|
976
|
+
} finally {
|
|
440
977
|
indexer.finish();
|
|
441
|
-
|
|
442
|
-
}));
|
|
443
|
-
indexer.queueEmitter.on('pause', () => {
|
|
444
|
-
if (finished) return;
|
|
445
|
-
s.pause();
|
|
446
|
-
});
|
|
447
|
-
indexer.queueEmitter.on('resume', () => {
|
|
448
|
-
if (finished) return;
|
|
449
|
-
s.resume();
|
|
450
|
-
});
|
|
978
|
+
}
|
|
451
979
|
}
|
|
452
980
|
return () => {
|
|
453
981
|
startIndex();
|
|
@@ -528,22 +1056,31 @@ async function transformer({
|
|
|
528
1056
|
searchSize = DEFAULT_SEARCH_SIZE,
|
|
529
1057
|
stream,
|
|
530
1058
|
fileName,
|
|
1059
|
+
sourceFormat = 'ndjson',
|
|
1060
|
+
csvOptions = {},
|
|
531
1061
|
splitRegex = /\n/,
|
|
532
1062
|
sourceIndexName,
|
|
533
1063
|
targetIndexName,
|
|
534
1064
|
mappings,
|
|
535
1065
|
mappingsOverride = false,
|
|
1066
|
+
inferMappings = false,
|
|
1067
|
+
inferMappingsOptions = {},
|
|
536
1068
|
indexMappingTotalFieldsLimit,
|
|
537
1069
|
pipeline,
|
|
538
1070
|
populatedFields = false,
|
|
539
1071
|
query,
|
|
540
1072
|
skipHeader = false,
|
|
541
1073
|
transform,
|
|
542
|
-
verbose = true
|
|
1074
|
+
verbose = true,
|
|
1075
|
+
logger: loggerInput
|
|
543
1076
|
}) {
|
|
544
1077
|
if (typeof targetIndexName === 'undefined') {
|
|
545
1078
|
throw Error('targetIndexName must be specified.');
|
|
546
1079
|
}
|
|
1080
|
+
const logger = createLogger({
|
|
1081
|
+
logger: loggerInput,
|
|
1082
|
+
verbose
|
|
1083
|
+
});
|
|
547
1084
|
const defaultClientConfig = {
|
|
548
1085
|
node: process.env.ELASTICSEARCH_URL || 'http://localhost:9200'
|
|
549
1086
|
};
|
|
@@ -551,23 +1088,47 @@ async function transformer({
|
|
|
551
1088
|
// Support both old (config) and new (client instance) patterns
|
|
552
1089
|
const sourceClient = await getOrCreateClient(sourceClientInput || sourceClientConfig, defaultClientConfig, sourceClientVersion);
|
|
553
1090
|
const targetClient = await getOrCreateClient(targetClientInput || targetClientConfig || sourceClientInput || sourceClientConfig, defaultClientConfig, targetClientVersion);
|
|
1091
|
+
const inferenceResult = await inferMappingsFromSource({
|
|
1092
|
+
targetClient,
|
|
1093
|
+
fileName,
|
|
1094
|
+
sourceFormat,
|
|
1095
|
+
csvOptions,
|
|
1096
|
+
skipHeader,
|
|
1097
|
+
mappings,
|
|
1098
|
+
inferMappings,
|
|
1099
|
+
inferMappingsOptions,
|
|
1100
|
+
logger: createChildLogger(logger, {
|
|
1101
|
+
component: 'mapping-inference'
|
|
1102
|
+
})
|
|
1103
|
+
});
|
|
554
1104
|
const createMapping = createMappingFactory({
|
|
555
1105
|
sourceClient,
|
|
556
1106
|
sourceIndexName,
|
|
557
1107
|
targetClient,
|
|
558
1108
|
targetIndexName,
|
|
559
|
-
mappings,
|
|
1109
|
+
mappings: inferenceResult.mappings,
|
|
1110
|
+
inferredIngestPipeline: inferenceResult.ingestPipeline,
|
|
560
1111
|
mappingsOverride,
|
|
561
1112
|
indexMappingTotalFieldsLimit,
|
|
562
|
-
verbose,
|
|
563
1113
|
deleteIndex,
|
|
564
|
-
pipeline
|
|
1114
|
+
pipeline,
|
|
1115
|
+
logger: createChildLogger(logger, {
|
|
1116
|
+
component: 'create-mapping'
|
|
1117
|
+
})
|
|
565
1118
|
});
|
|
566
1119
|
const indexer = indexQueueFactory({
|
|
567
1120
|
targetClient,
|
|
568
1121
|
targetIndexName,
|
|
569
1122
|
bufferSize,
|
|
570
|
-
|
|
1123
|
+
logger: createChildLogger(logger, {
|
|
1124
|
+
component: 'index-queue'
|
|
1125
|
+
})
|
|
1126
|
+
});
|
|
1127
|
+
function validateSourceFormat() {
|
|
1128
|
+
if (sourceFormat !== 'ndjson' && sourceFormat !== 'csv' && sourceFormat !== 'parquet' && sourceFormat !== 'arrow') {
|
|
1129
|
+
throw Error(`Unsupported sourceFormat: ${sourceFormat}. Use "ndjson", "csv", "parquet", or "arrow".`);
|
|
1130
|
+
}
|
|
1131
|
+
}
|
|
571
1132
|
function getReader() {
|
|
572
1133
|
if (typeof fileName !== 'undefined' && typeof sourceIndexName !== 'undefined') {
|
|
573
1134
|
throw Error('Only either one of fileName or sourceIndexName can be specified.');
|
|
@@ -576,17 +1137,28 @@ async function transformer({
|
|
|
576
1137
|
throw Error('Only one of fileName, sourceIndexName, or stream can be specified.');
|
|
577
1138
|
}
|
|
578
1139
|
if (typeof fileName !== 'undefined') {
|
|
579
|
-
|
|
1140
|
+
validateSourceFormat();
|
|
1141
|
+
return fileReaderFactory(indexer, fileName, transform, splitRegex, skipHeader, sourceFormat, csvOptions, createChildLogger(logger, {
|
|
1142
|
+
component: 'file-reader'
|
|
1143
|
+
}));
|
|
580
1144
|
}
|
|
581
1145
|
if (typeof sourceIndexName !== 'undefined') {
|
|
582
|
-
return indexReaderFactory(indexer, sourceIndexName, transform, sourceClient, query, searchSize, populatedFields
|
|
1146
|
+
return indexReaderFactory(indexer, sourceIndexName, transform, sourceClient, query, searchSize, populatedFields, createChildLogger(logger, {
|
|
1147
|
+
component: 'index-reader'
|
|
1148
|
+
}));
|
|
583
1149
|
}
|
|
584
1150
|
if (typeof stream !== 'undefined') {
|
|
585
|
-
|
|
1151
|
+
validateSourceFormat();
|
|
1152
|
+
return streamReaderFactory(indexer, stream, transform, splitRegex, skipHeader, sourceFormat, csvOptions, createChildLogger(logger, {
|
|
1153
|
+
component: 'stream-reader'
|
|
1154
|
+
}));
|
|
586
1155
|
}
|
|
587
1156
|
return null;
|
|
588
1157
|
}
|
|
589
1158
|
const reader = getReader();
|
|
1159
|
+
if (typeof reader !== 'function') {
|
|
1160
|
+
throw Error('One of fileName, sourceIndexName, or stream must be specified.');
|
|
1161
|
+
}
|
|
590
1162
|
try {
|
|
591
1163
|
const indexExists = await targetClient.indices.exists({
|
|
592
1164
|
index: targetIndexName
|
|
@@ -603,8 +1175,11 @@ async function transformer({
|
|
|
603
1175
|
} else {
|
|
604
1176
|
reader();
|
|
605
1177
|
}
|
|
606
|
-
} catch (
|
|
607
|
-
|
|
1178
|
+
} catch (err) {
|
|
1179
|
+
logger.error({
|
|
1180
|
+
err,
|
|
1181
|
+
targetIndexName
|
|
1182
|
+
}, 'Error checking index existence');
|
|
608
1183
|
} finally {
|
|
609
1184
|
// targetClient.close();
|
|
610
1185
|
}
|