node-es-transformer 1.0.2 → 1.1.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/README.md CHANGED
@@ -96,7 +96,7 @@ yarn add node-es-transformer
96
96
 
97
97
  ## Usage
98
98
 
99
- ### Read from a file
99
+ ### Read NDJSON from a file
100
100
 
101
101
  ```javascript
102
102
  const transformer = require('node-es-transformer');
@@ -129,6 +129,50 @@ transformer({
129
129
  });
130
130
  ```
131
131
 
132
+ ### Read CSV from a file
133
+
134
+ ```javascript
135
+ const transformer = require('node-es-transformer');
136
+
137
+ transformer({
138
+ fileName: 'users.csv',
139
+ sourceFormat: 'csv',
140
+ targetIndexName: 'users-index',
141
+ mappings: {
142
+ properties: {
143
+ id: { type: 'integer' },
144
+ first_name: { type: 'keyword' },
145
+ last_name: { type: 'keyword' },
146
+ full_name: { type: 'keyword' },
147
+ },
148
+ },
149
+ transform(row) {
150
+ return {
151
+ ...row,
152
+ id: Number(row.id),
153
+ full_name: `${row.first_name} ${row.last_name}`,
154
+ };
155
+ },
156
+ });
157
+ ```
158
+
159
+ ### Infer mappings from CSV sample
160
+
161
+ ```javascript
162
+ const transformer = require('node-es-transformer');
163
+
164
+ transformer({
165
+ fileName: 'users.csv',
166
+ sourceFormat: 'csv',
167
+ targetIndexName: 'users-index',
168
+ inferMappings: true,
169
+ inferMappingsOptions: {
170
+ sampleBytes: 200000,
171
+ lines_to_sample: 2000,
172
+ },
173
+ });
174
+ ```
175
+
132
176
  ### Read from another index
133
177
 
134
178
  ```javascript
@@ -242,9 +286,11 @@ All options are passed to the main `transformer()` function.
242
286
 
243
287
  Choose **one** of these sources:
244
288
 
245
- - **`fileName`** (string): Source filename to ingest. Supports wildcards (e.g., `logs/*.json`).
289
+ - **`fileName`** (string): Source filename to ingest. Supports wildcards (e.g., `logs/*.json` or `data/*.csv`).
246
290
  - **`sourceIndexName`** (string): Source Elasticsearch index to reindex from.
247
291
  - **`stream`** (Readable): Node.js readable stream to ingest from.
292
+ - **`sourceFormat`** (`'ndjson' | 'csv'`): Format for file/stream sources. Default: `'ndjson'`.
293
+ - **`csvOptions`** (object): CSV parser options (delimiter, quote, columns, etc.) used when `sourceFormat: 'csv'`.
248
294
 
249
295
  #### Client Configuration
250
296
 
@@ -259,10 +305,14 @@ Choose **one** of these sources:
259
305
 
260
306
  - **`mappings`** (object): Elasticsearch document mappings for target index. If reindexing and not provided, mappings are copied from source index.
261
307
  - **`mappingsOverride`** (boolean): When reindexing, apply `mappings` on top of source index mappings. Default: `false`.
308
+ - **`inferMappings`** (boolean): Infer mappings for `fileName` sources via `/_text_structure/find_structure`. Ignored when `mappings` is provided. If inference returns `ingest_pipeline`, it is created as `<targetIndexName>-inferred-pipeline` and applied as the index default pipeline (unless `pipeline` is explicitly set). Default: `false`.
309
+ - **`inferMappingsOptions`** (object): Options for `/_text_structure/find_structure` (for example `sampleBytes`, `lines_to_sample`, `delimiter`, `quote`, `has_header_row`, `timeout`).
262
310
  - **`deleteIndex`** (boolean): Delete target index if it exists before starting. Default: `false`.
263
311
  - **`indexMappingTotalFieldsLimit`** (number): Field limit for target index (`index.mapping.total_fields.limit` setting).
264
312
  - **`pipeline`** (string): Elasticsearch ingest pipeline name to use during indexing.
265
313
 
314
+ When `inferMappings` is enabled, the target cluster must allow `/_text_structure/find_structure` (cluster privilege: `monitor_text_structure`). If inferred ingest pipelines are used, the target cluster must also allow creating ingest pipelines (`_ingest/pipeline`).
315
+
266
316
  #### Performance Options
267
317
 
268
318
  - **`bufferSize`** (number): Buffer size threshold in KBytes for bulk indexing. Default: `5120` (5 MB).
@@ -276,8 +326,12 @@ Choose **one** of these sources:
276
326
  - Return array of documents to split one source into multiple targets
277
327
  - Return `null`/`undefined` to skip document
278
328
  - **`query`** (object): Elasticsearch [DSL query](https://www.elastic.co/guide/en/elasticsearch/reference/current/query-dsl.html) to filter source documents.
279
- - **`splitRegex`** (RegExp): Line split regex for file/stream sources. Default: `/\n/`.
280
- - **`skipHeader`** (boolean): Skip first line of source file (e.g., CSV header). Default: `false`.
329
+ - **`splitRegex`** (RegExp): Line split regex for file/stream sources when `sourceFormat` is `'ndjson'`. Default: `/\n/`.
330
+ - **`skipHeader`** (boolean): Header skipping for file/stream sources.
331
+ - NDJSON: skips the first non-empty line
332
+ - CSV: skips the first data line only when `csvOptions.columns` does not consume headers
333
+ - Default: `false`
334
+ - Applies only to `fileName`/`stream` sources
281
335
  - **`verbose`** (boolean): Enable logging and progress bars. Default: `true`.
282
336
 
283
337
  ### Return Value
@@ -3,6 +3,7 @@
3
3
  var elasticsearch9 = require('es9');
4
4
  var elasticsearch8 = require('es8');
5
5
  var fs = require('fs');
6
+ var csvParse = require('csv-parse');
6
7
  var es = require('event-stream');
7
8
  var glob = require('glob');
8
9
  var split = require('split2');
@@ -25,6 +26,7 @@ function createMappingFactory({
25
26
  targetClient,
26
27
  targetIndexName,
27
28
  mappings,
29
+ inferredIngestPipeline,
28
30
  mappingsOverride,
29
31
  indexMappingTotalFieldsLimit,
30
32
  verbose,
@@ -33,6 +35,7 @@ function createMappingFactory({
33
35
  }) {
34
36
  return async () => {
35
37
  let targetMappings = mappingsOverride ? undefined : mappings;
38
+ let defaultPipeline = pipeline;
36
39
  if (sourceClient && sourceIndexName && typeof targetMappings === 'undefined') {
37
40
  try {
38
41
  const mapping = await sourceClient.indices.getMapping({
@@ -71,22 +74,34 @@ function createMappingFactory({
71
74
  });
72
75
  }
73
76
  if (indexExists === false || deleteIndex === true) {
77
+ if (typeof defaultPipeline === 'undefined' && typeof inferredIngestPipeline === 'object' && inferredIngestPipeline !== null && typeof targetClient?.ingest?.putPipeline === 'function') {
78
+ const inferredPipelineName = `${targetIndexName}-inferred-pipeline`;
79
+ try {
80
+ await targetClient.ingest.putPipeline({
81
+ id: inferredPipelineName,
82
+ ...inferredIngestPipeline
83
+ });
84
+ defaultPipeline = inferredPipelineName;
85
+ if (verbose) console.log(`Created inferred ingest pipeline ${inferredPipelineName}`);
86
+ } catch (err) {
87
+ console.log('Error creating inferred ingest pipeline', err);
88
+ }
89
+ }
90
+ const settings = {
91
+ ...(defaultPipeline !== undefined ? {
92
+ 'index.default_pipeline': defaultPipeline
93
+ } : {}),
94
+ ...(indexMappingTotalFieldsLimit !== undefined ? {
95
+ 'index.mapping.total_fields.limit': indexMappingTotalFieldsLimit,
96
+ 'index.number_of_shards': 1,
97
+ 'index.number_of_replicas': 0
98
+ } : {})
99
+ };
74
100
  const resp = await targetClient.indices.create({
75
101
  index: targetIndexName,
76
102
  mappings: targetMappings,
77
- ...(pipeline !== undefined ? {
78
- settings: {
79
- index: {
80
- default_pipeline: pipeline
81
- }
82
- }
83
- } : {}),
84
- ...(indexMappingTotalFieldsLimit !== undefined ? {
85
- settings: {
86
- 'index.mapping.total_fields.limit': indexMappingTotalFieldsLimit,
87
- 'index.number_of_shards': 1,
88
- 'index.number_of_replicas': 0
89
- }
103
+ ...(Object.keys(settings).length > 0 ? {
104
+ settings
90
105
  } : {})
91
106
  });
92
107
  if (verbose) console.log('Created target mapping', resp);
@@ -98,37 +113,89 @@ function createMappingFactory({
98
113
  };
99
114
  }
100
115
 
101
- function fileReaderFactory(indexer, fileName, transform, splitRegex, verbose) {
102
- function startIndex(files) {
103
- let finished = false;
104
- const file = files.shift();
116
+ function getCsvParserOptions(csvOptions = {}, skipHeader = false) {
117
+ const options = {
118
+ bom: true,
119
+ columns: true,
120
+ trim: true,
121
+ skip_empty_lines: true,
122
+ ...csvOptions
123
+ };
124
+ const consumesHeader = options.columns === true || typeof options.columns === 'function';
125
+ if (skipHeader && !consumesHeader && typeof options.from_line === 'undefined') {
126
+ options.from_line = 2;
127
+ }
128
+ return options;
129
+ }
130
+
131
+ function fileReaderFactory(indexer, fileName, transform, splitRegex, verbose, skipHeader = false, sourceFormat = 'ndjson', csvOptions = {}) {
132
+ function addParsedDoc(parsed, file, streamRef) {
133
+ const context = {
134
+ fileName: file
135
+ };
136
+ const doc = typeof transform === 'function' ? transform(parsed, context) : parsed;
137
+
138
+ // if doc is null/undefined we'll skip indexing it
139
+ if (doc === null || typeof doc === 'undefined') {
140
+ streamRef.resume();
141
+ return;
142
+ }
143
+
144
+ // the transform callback may return an array of docs so we can emit
145
+ // multiple docs from a single line
146
+ if (Array.isArray(doc)) {
147
+ doc.forEach(d => {
148
+ if (d === null || typeof d === 'undefined') return;
149
+ indexer.add(d);
150
+ });
151
+ return;
152
+ }
153
+ indexer.add(doc);
154
+ }
155
+ function createNdjsonReader(file) {
156
+ let skippedHeader = false;
105
157
  const s = fs.createReadStream(file).pipe(split(splitRegex)).pipe(es.mapSync(line => {
106
158
  try {
107
159
  // skip empty lines
108
160
  if (line === '') {
109
161
  return;
110
162
  }
111
- const doc = typeof transform === 'function' ? JSON.stringify(transform(JSON.parse(line))) : line;
112
-
113
- // if doc is undefined we'll skip indexing it
114
- if (typeof doc === 'undefined') {
115
- s.resume();
116
- return;
117
- }
118
-
119
- // the transform callback may return an array of docs so we can emit
120
- // multiple docs from a single line
121
- if (Array.isArray(doc)) {
122
- doc.forEach(d => indexer.add(d));
163
+ if (skipHeader && !skippedHeader) {
164
+ skippedHeader = true;
123
165
  return;
124
166
  }
125
- indexer.add(doc);
167
+ const parsed = JSON.parse(line);
168
+ addParsedDoc(parsed, file, s);
126
169
  } catch (e) {
127
170
  console.log('error', e);
128
171
  }
129
172
  }).on('error', err => {
130
173
  console.log('Error while reading file.', err);
131
- }).on('end', () => {
174
+ }));
175
+ return s;
176
+ }
177
+ function createCsvReader(file) {
178
+ const parserOptions = getCsvParserOptions(csvOptions, skipHeader);
179
+ const s = fs.createReadStream(file).pipe(csvParse.parse(parserOptions)).pipe(es.mapSync(record => {
180
+ try {
181
+ addParsedDoc(record, file, s);
182
+ } catch (e) {
183
+ console.log('error', e);
184
+ }
185
+ }).on('error', err => {
186
+ console.log('Error while reading CSV file.', err);
187
+ }));
188
+ return s;
189
+ }
190
+ function startIndex(files) {
191
+ let finished = false;
192
+ if (files.length === 0) {
193
+ indexer.finish();
194
+ return;
195
+ }
196
+ const file = files.shift();
197
+ const s = sourceFormat === 'csv' ? createCsvReader(file) : createNdjsonReader(file);
198
+ s.on('end', () => {
132
199
  if (verbose) console.log('Read entire file: ', file);
133
200
  if (files.length > 0) {
134
201
  startIndex(files);
@@ -136,7 +203,7 @@ function fileReaderFactory(indexer, fileName, transform, splitRegex, verbose) {
136
203
  }
137
204
  indexer.finish();
138
205
  finished = true;
139
- }));
206
+ });
140
207
  indexer.queueEmitter.on('pause', () => {
141
208
  if (finished) return;
142
209
  s.pause();
@@ -157,29 +224,26 @@ function fileReaderFactory(indexer, fileName, transform, splitRegex, verbose) {
157
224
  }
158
225
 
159
226
  const EventEmitter = require('events');
160
- const queueEmitter = new EventEmitter();
161
227
  const parallelCalls = 5;
162
228
 
163
229
  // a simple helper queue to bulk index documents
164
230
  function indexQueueFactory({
165
231
  targetClient: client,
166
232
  targetIndexName,
167
- bufferSize = DEFAULT_BUFFER_SIZE,
168
- skipHeader = false
233
+ bufferSize = DEFAULT_BUFFER_SIZE
169
234
  }) {
235
+ const queueEmitter = new EventEmitter();
170
236
  let docsPerSecond = 0;
171
237
  const flushBytes = bufferSize * 1024; // Convert KB to Bytes
172
238
  const highWaterMark = flushBytes * parallelCalls;
173
239
 
174
- // Create a Readable stream
175
- const stream$1 = new stream.Readable({
176
- read() {},
177
- // Implement read but we manage pushing manually
240
+ // Create a PassThrough stream (readable + writable) for proper backpressure
241
+ const stream$1 = new stream.PassThrough({
178
242
  highWaterMark // Buffer size for backpressure management
179
243
  });
180
244
  async function* ndjsonStreamIterator(readableStream) {
181
245
  let buffer = ''; // To hold the incomplete data
182
- let skippedHeader = false;
246
+
183
247
  try {
184
248
  // Iterate over the stream using async iteration
185
249
  for await (const chunk of readableStream) {
@@ -193,16 +257,14 @@ function indexQueueFactory({
193
257
 
194
258
  // Yield each complete JSON object
195
259
  for (const line of lines) {
196
- if (line.trim()) {
197
- try {
198
- if (!skipHeader || skipHeader && !skippedHeader) {
199
- yield JSON.parse(line); // Parse and yield the JSON object
200
- skippedHeader = true;
201
- }
202
- } catch (err) {
203
- // Handle JSON parse errors if necessary
204
- console.error('Failed to parse JSON:', err);
205
- }
260
+ if (!line.trim()) {
261
+ continue;
262
+ }
263
+ try {
264
+ yield JSON.parse(line); // Parse and yield the JSON object
265
+ } catch (err) {
266
+ // Handle JSON parse errors if necessary
267
+ console.error('Failed to parse JSON:', err);
206
268
  }
207
269
  }
208
270
  }
@@ -278,7 +340,7 @@ function indexQueueFactory({
278
340
  if (finished) {
279
341
  throw new Error('Unexpected doc added after indexer should finish.');
280
342
  }
281
- const canContinue = stream$1.push(`${JSON.stringify(doc)}\n`);
343
+ const canContinue = stream$1.write(`${JSON.stringify(doc)}\n`);
282
344
  if (!canContinue) {
283
345
  queueEmitter.emit('pause');
284
346
 
@@ -291,7 +353,7 @@ function indexQueueFactory({
291
353
  },
292
354
  finish: () => {
293
355
  finished = true;
294
- stream$1.push(null);
356
+ stream$1.end();
295
357
  },
296
358
  queueEmitter
297
359
  };
@@ -408,40 +470,154 @@ function indexReaderFactory(indexer, sourceIndexName, transform, client, query,
408
470
  };
409
471
  }
410
472
 
411
- function streamReaderFactory(indexer, stream, transform, splitRegex, verbose) {
473
+ const DEFAULT_INFER_MAPPINGS_SAMPLE_BYTES = 100000;
474
+ const DEFAULT_INFER_MAPPINGS_LINES_TO_SAMPLE = 1000;
475
+ function readSample(filePath, sampleBytes) {
476
+ const fd = fs.openSync(filePath, 'r');
477
+ try {
478
+ const buffer = Buffer.alloc(sampleBytes);
479
+ const bytesRead = fs.readSync(fd, buffer, 0, sampleBytes, 0);
480
+ return buffer.subarray(0, bytesRead).toString('utf8');
481
+ } finally {
482
+ fs.closeSync(fd);
483
+ }
484
+ }
485
+ function emptyInferenceResult(mappings) {
486
+ return {
487
+ mappings,
488
+ ingestPipeline: undefined
489
+ };
490
+ }
491
+ async function inferMappingsFromSource({
492
+ targetClient,
493
+ fileName,
494
+ sourceFormat,
495
+ csvOptions,
496
+ skipHeader,
497
+ mappings,
498
+ inferMappings,
499
+ inferMappingsOptions,
500
+ verbose
501
+ }) {
502
+ if (!inferMappings || typeof mappings !== 'undefined' || typeof fileName === 'undefined') {
503
+ return emptyInferenceResult(mappings);
504
+ }
505
+ if (typeof targetClient?.textStructure?.findStructure !== 'function' || sourceFormat === 'xml' || sourceFormat === 'semi_structured_text') {
506
+ return emptyInferenceResult(mappings);
507
+ }
508
+ const files = glob.globSync(fileName);
509
+ if (files.length === 0) {
510
+ if (verbose) console.log(`No files matched for mapping inference: ${fileName}`);
511
+ return emptyInferenceResult(mappings);
512
+ }
513
+ const {
514
+ sampleBytes = DEFAULT_INFER_MAPPINGS_SAMPLE_BYTES,
515
+ ...requestParams
516
+ } = inferMappingsOptions || {};
517
+ const sampleText = readSample(files[0], sampleBytes);
518
+ if (!sampleText || sampleText.trim() === '') {
519
+ if (verbose) console.log('Skipping mapping inference because the sample text is empty.');
520
+ return emptyInferenceResult(mappings);
521
+ }
522
+ const params = {
523
+ body: sampleText,
524
+ lines_to_sample: DEFAULT_INFER_MAPPINGS_LINES_TO_SAMPLE,
525
+ ...requestParams
526
+ };
527
+ if (typeof params.format === 'undefined') {
528
+ params.format = sourceFormat === 'csv' ? 'delimited' : 'ndjson';
529
+ }
530
+ if (sourceFormat === 'csv') {
531
+ if (typeof params.delimiter === 'undefined' && typeof csvOptions?.delimiter === 'string') {
532
+ params.delimiter = csvOptions.delimiter;
533
+ }
534
+ if (typeof params.quote === 'undefined' && typeof csvOptions?.quote === 'string') {
535
+ params.quote = csvOptions.quote;
536
+ }
537
+ if (typeof params.has_header_row === 'undefined' && typeof csvOptions?.columns === 'boolean') {
538
+ params.has_header_row = csvOptions.columns;
539
+ }
540
+ if (typeof params.has_header_row === 'undefined' && skipHeader) {
541
+ params.has_header_row = true;
542
+ }
543
+ }
544
+ try {
545
+ const response = await targetClient.textStructure.findStructure(params);
546
+ if (response?.mappings && verbose) {
547
+ console.log(`Inferred mappings via _text_structure/find_structure from ${files[0]}`);
548
+ }
549
+ if (response?.ingest_pipeline && verbose) {
550
+ console.log('Inferred ingest pipeline via _text_structure/find_structure');
551
+ }
552
+ return {
553
+ mappings: response?.mappings || mappings,
554
+ ingestPipeline: response?.ingest_pipeline
555
+ };
556
+ } catch (error) {
557
+ if (verbose) {
558
+ console.log('Could not infer mappings via _text_structure/find_structure:', error.message);
559
+ }
560
+ return emptyInferenceResult(mappings);
561
+ }
562
+ }
563
+
564
+ function streamReaderFactory(indexer, stream, transform, splitRegex, verbose, skipHeader = false, sourceFormat = 'ndjson', csvOptions = {}) {
565
+ function addParsedDoc(parsed, streamRef) {
566
+ const doc = typeof transform === 'function' ? transform(parsed) : parsed;
567
+
568
+ // if doc is null/undefined we'll skip indexing it
569
+ if (doc === null || typeof doc === 'undefined') {
570
+ streamRef.resume();
571
+ return;
572
+ }
573
+
574
+ // the transform callback may return an array of docs so we can emit
575
+ // multiple docs from a single line
576
+ if (Array.isArray(doc)) {
577
+ doc.forEach(d => {
578
+ if (d === null || typeof d === 'undefined') return;
579
+ indexer.add(d);
580
+ });
581
+ return;
582
+ }
583
+ indexer.add(doc);
584
+ }
412
585
  function startIndex() {
413
586
  let finished = false;
414
- const s = stream.pipe(split(splitRegex)).pipe(es.mapSync(line => {
587
+ const s = sourceFormat === 'csv' ? stream.pipe(csvParse.parse(getCsvParserOptions(csvOptions, skipHeader))).pipe(es.mapSync(record => {
415
588
  try {
416
- // skip empty lines
417
- if (line === '') {
418
- return;
419
- }
420
- const doc = typeof transform === 'function' ? JSON.stringify(transform(JSON.parse(line))) : line;
421
-
422
- // if doc is undefined we'll skip indexing it
423
- if (typeof doc === 'undefined') {
424
- s.resume();
425
- return;
426
- }
427
-
428
- // the transform callback may return an array of docs so we can emit
429
- // multiple docs from a single line
430
- if (Array.isArray(doc)) {
431
- doc.forEach(d => indexer.add(d));
432
- return;
433
- }
434
- indexer.add(doc);
589
+ addParsedDoc(record, s);
435
590
  } catch (e) {
436
591
  console.log('error', e);
437
592
  }
438
593
  }).on('error', err => {
439
- console.log('Error while reading stream.', err);
440
- }).on('end', () => {
594
+ console.log('Error while reading CSV stream.', err);
595
+ })) : (() => {
596
+ let skippedHeader = false;
597
+ return stream.pipe(split(splitRegex)).pipe(es.mapSync(line => {
598
+ try {
599
+ // skip empty lines
600
+ if (line === '') {
601
+ return;
602
+ }
603
+ if (skipHeader && !skippedHeader) {
604
+ skippedHeader = true;
605
+ return;
606
+ }
607
+ const parsed = JSON.parse(line);
608
+ addParsedDoc(parsed, s);
609
+ } catch (e) {
610
+ console.log('error', e);
611
+ }
612
+ }).on('error', err => {
613
+ console.log('Error while reading stream.', err);
614
+ }));
615
+ })();
616
+ s.on('end', () => {
441
617
  if (verbose) console.log('Read entire stream.');
442
618
  indexer.finish();
443
619
  finished = true;
444
- }));
620
+ });
445
621
  indexer.queueEmitter.on('pause', () => {
446
622
  if (finished) return;
447
623
  s.pause();
@@ -530,11 +706,15 @@ async function transformer({
530
706
  searchSize = DEFAULT_SEARCH_SIZE,
531
707
  stream,
532
708
  fileName,
709
+ sourceFormat = 'ndjson',
710
+ csvOptions = {},
533
711
  splitRegex = /\n/,
534
712
  sourceIndexName,
535
713
  targetIndexName,
536
714
  mappings,
537
715
  mappingsOverride = false,
716
+ inferMappings = false,
717
+ inferMappingsOptions = {},
538
718
  indexMappingTotalFieldsLimit,
539
719
  pipeline,
540
720
  populatedFields = false,
@@ -553,12 +733,24 @@ async function transformer({
553
733
  // Support both old (config) and new (client instance) patterns
554
734
  const sourceClient = await getOrCreateClient(sourceClientInput || sourceClientConfig, defaultClientConfig, sourceClientVersion);
555
735
  const targetClient = await getOrCreateClient(targetClientInput || targetClientConfig || sourceClientInput || sourceClientConfig, defaultClientConfig, targetClientVersion);
736
+ const inferenceResult = await inferMappingsFromSource({
737
+ targetClient,
738
+ fileName,
739
+ sourceFormat,
740
+ csvOptions,
741
+ skipHeader,
742
+ mappings,
743
+ inferMappings,
744
+ inferMappingsOptions,
745
+ verbose
746
+ });
556
747
  const createMapping = createMappingFactory({
557
748
  sourceClient,
558
749
  sourceIndexName,
559
750
  targetClient,
560
751
  targetIndexName,
561
- mappings,
752
+ mappings: inferenceResult.mappings,
753
+ inferredIngestPipeline: inferenceResult.ingestPipeline,
562
754
  mappingsOverride,
563
755
  indexMappingTotalFieldsLimit,
564
756
  verbose,
@@ -568,8 +760,12 @@ async function transformer({
568
760
  const indexer = indexQueueFactory({
569
761
  targetClient,
570
762
  targetIndexName,
571
- bufferSize,
572
- skipHeader});
763
+ bufferSize});
764
+ function validateSourceFormat() {
765
+ if (sourceFormat !== 'ndjson' && sourceFormat !== 'csv') {
766
+ throw Error(`Unsupported sourceFormat: ${sourceFormat}. Use "ndjson" or "csv".`);
767
+ }
768
+ }
573
769
  function getReader() {
574
770
  if (typeof fileName !== 'undefined' && typeof sourceIndexName !== 'undefined') {
575
771
  throw Error('Only either one of fileName or sourceIndexName can be specified.');
@@ -578,13 +774,15 @@ async function transformer({
578
774
  throw Error('Only one of fileName, sourceIndexName, or stream can be specified.');
579
775
  }
580
776
  if (typeof fileName !== 'undefined') {
581
- return fileReaderFactory(indexer, fileName, transform, splitRegex, verbose);
777
+ validateSourceFormat();
778
+ return fileReaderFactory(indexer, fileName, transform, splitRegex, verbose, skipHeader, sourceFormat, csvOptions);
582
779
  }
583
780
  if (typeof sourceIndexName !== 'undefined') {
584
781
  return indexReaderFactory(indexer, sourceIndexName, transform, sourceClient, query, searchSize, populatedFields);
585
782
  }
586
783
  if (typeof stream !== 'undefined') {
587
- return streamReaderFactory(indexer, stream, transform, splitRegex, verbose);
784
+ validateSourceFormat();
785
+ return streamReaderFactory(indexer, stream, transform, splitRegex, verbose, skipHeader, sourceFormat, csvOptions);
588
786
  }
589
787
  return null;
590
788
  }