node-es-transformer 1.0.0-beta4 → 1.0.0-beta6
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
|
@@ -1,44 +1,44 @@
|
|
|
1
|
+
import elasticsearch from '@elastic/elasticsearch';
|
|
1
2
|
import fs from 'fs';
|
|
2
3
|
import es from 'event-stream';
|
|
3
4
|
import glob from 'glob';
|
|
4
5
|
import split from 'split2';
|
|
5
6
|
import { Readable } from 'stream';
|
|
6
7
|
import cliProgress from 'cli-progress';
|
|
7
|
-
import elasticsearch from '@elastic/elasticsearch';
|
|
8
8
|
|
|
9
9
|
// In earlier versions this was used to set the number of docs to index in a
|
|
10
10
|
// single bulk request. Since we switched to use the helpers.bulk() method from
|
|
11
11
|
// the ES client, this now translates to the `flushBytes` option of the helper.
|
|
12
12
|
// However, for kind of a backwards compability with the old values, this uses
|
|
13
13
|
// KBytes instead of Bytes. It will be multiplied by 1024 in the index queue.
|
|
14
|
-
|
|
14
|
+
const DEFAULT_BUFFER_SIZE = 5120;
|
|
15
15
|
|
|
16
16
|
// The default number of docs to fetch in a single search request when reindexing.
|
|
17
|
-
|
|
18
|
-
|
|
19
|
-
function createMappingFactory(ref) {
|
|
20
|
-
var sourceClient = ref.sourceClient;
|
|
21
|
-
var sourceIndexName = ref.sourceIndexName;
|
|
22
|
-
var targetClient = ref.targetClient;
|
|
23
|
-
var targetIndexName = ref.targetIndexName;
|
|
24
|
-
var mappings = ref.mappings;
|
|
25
|
-
var mappingsOverride = ref.mappingsOverride;
|
|
26
|
-
var indexMappingTotalFieldsLimit = ref.indexMappingTotalFieldsLimit;
|
|
27
|
-
var verbose = ref.verbose;
|
|
28
|
-
var deleteIndex = ref.deleteIndex;
|
|
29
|
-
|
|
30
|
-
return async function () {
|
|
31
|
-
var targetMappings = mappingsOverride ? undefined : mappings;
|
|
17
|
+
const DEFAULT_SEARCH_SIZE = 1000;
|
|
32
18
|
|
|
19
|
+
function createMappingFactory({
|
|
20
|
+
sourceClient,
|
|
21
|
+
sourceIndexName,
|
|
22
|
+
targetClient,
|
|
23
|
+
targetIndexName,
|
|
24
|
+
mappings,
|
|
25
|
+
mappingsOverride,
|
|
26
|
+
indexMappingTotalFieldsLimit,
|
|
27
|
+
verbose,
|
|
28
|
+
deleteIndex,
|
|
29
|
+
pipeline
|
|
30
|
+
}) {
|
|
31
|
+
return async () => {
|
|
32
|
+
let targetMappings = mappingsOverride ? undefined : mappings;
|
|
33
33
|
if (sourceClient && sourceIndexName && typeof targetMappings === 'undefined') {
|
|
34
34
|
try {
|
|
35
|
-
|
|
36
|
-
index: sourceIndexName
|
|
35
|
+
const mapping = await sourceClient.indices.getMapping({
|
|
36
|
+
index: sourceIndexName
|
|
37
37
|
});
|
|
38
38
|
if (mapping[sourceIndexName]) {
|
|
39
39
|
targetMappings = mapping[sourceIndexName].mappings;
|
|
40
40
|
} else {
|
|
41
|
-
|
|
41
|
+
const allMappings = Object.values(mapping);
|
|
42
42
|
if (allMappings.length > 0) {
|
|
43
43
|
targetMappings = Object.values(mapping)[0].mappings;
|
|
44
44
|
}
|
|
@@ -48,36 +48,47 @@ function createMappingFactory(ref) {
|
|
|
48
48
|
return;
|
|
49
49
|
}
|
|
50
50
|
}
|
|
51
|
-
|
|
52
51
|
if (typeof targetMappings === 'object' && targetMappings !== null) {
|
|
53
52
|
if (mappingsOverride) {
|
|
54
|
-
targetMappings =
|
|
55
|
-
|
|
56
|
-
|
|
53
|
+
targetMappings = {
|
|
54
|
+
...targetMappings,
|
|
55
|
+
properties: {
|
|
56
|
+
...targetMappings.properties,
|
|
57
|
+
...mappings
|
|
58
|
+
}
|
|
59
|
+
};
|
|
57
60
|
}
|
|
58
|
-
|
|
59
61
|
try {
|
|
60
|
-
|
|
61
|
-
|
|
62
|
+
const indexExists = await targetClient.indices.exists({
|
|
63
|
+
index: targetIndexName
|
|
64
|
+
});
|
|
62
65
|
if (indexExists === true && deleteIndex === true) {
|
|
63
|
-
await targetClient.indices.delete({
|
|
66
|
+
await targetClient.indices.delete({
|
|
67
|
+
index: targetIndexName
|
|
68
|
+
});
|
|
64
69
|
}
|
|
65
|
-
|
|
66
70
|
if (indexExists === false || deleteIndex === true) {
|
|
67
|
-
|
|
71
|
+
const resp = await targetClient.indices.create({
|
|
68
72
|
index: targetIndexName,
|
|
69
|
-
body:
|
|
70
|
-
|
|
71
|
-
|
|
72
|
-
|
|
73
|
-
|
|
74
|
-
|
|
75
|
-
'index.number_of_replicas': 0,
|
|
76
|
-
},
|
|
73
|
+
body: {
|
|
74
|
+
mappings: targetMappings,
|
|
75
|
+
...(pipeline !== undefined ? {
|
|
76
|
+
settings: {
|
|
77
|
+
index: {
|
|
78
|
+
default_pipeline: pipeline
|
|
77
79
|
}
|
|
78
|
-
|
|
80
|
+
}
|
|
81
|
+
} : {}),
|
|
82
|
+
...(indexMappingTotalFieldsLimit !== undefined ? {
|
|
83
|
+
settings: {
|
|
84
|
+
'index.mapping.total_fields.limit': indexMappingTotalFieldsLimit,
|
|
85
|
+
'index.number_of_shards': 1,
|
|
86
|
+
'index.number_of_replicas': 0
|
|
87
|
+
}
|
|
88
|
+
} : {})
|
|
89
|
+
}
|
|
79
90
|
});
|
|
80
|
-
if (verbose)
|
|
91
|
+
if (verbose) console.log('Created target mapping', resp);
|
|
81
92
|
}
|
|
82
93
|
} catch (err) {
|
|
83
94
|
console.log('Error creating target mapping', err);
|
|
@@ -88,119 +99,99 @@ function createMappingFactory(ref) {
|
|
|
88
99
|
|
|
89
100
|
function fileReaderFactory(indexer, fileName, transform, splitRegex, verbose) {
|
|
90
101
|
function startIndex(files) {
|
|
91
|
-
|
|
92
|
-
|
|
93
|
-
|
|
94
|
-
|
|
95
|
-
|
|
96
|
-
|
|
97
|
-
|
|
98
|
-
|
|
99
|
-
|
|
100
|
-
try {
|
|
101
|
-
// skip empty lines
|
|
102
|
-
if (line === '') {
|
|
103
|
-
return;
|
|
104
|
-
}
|
|
105
|
-
|
|
106
|
-
var doc =
|
|
107
|
-
typeof transform === 'function'
|
|
108
|
-
? JSON.stringify(transform(JSON.parse(line)))
|
|
109
|
-
: line;
|
|
110
|
-
|
|
111
|
-
// if doc is undefined we'll skip indexing it
|
|
112
|
-
if (typeof doc === 'undefined') {
|
|
113
|
-
s.resume();
|
|
114
|
-
return;
|
|
115
|
-
}
|
|
116
|
-
|
|
117
|
-
// the transform callback may return an array of docs so we can emit
|
|
118
|
-
// multiple docs from a single line
|
|
119
|
-
if (Array.isArray(doc)) {
|
|
120
|
-
doc.forEach(function (d) { return indexer.add(d); });
|
|
121
|
-
return;
|
|
122
|
-
}
|
|
123
|
-
|
|
124
|
-
indexer.add(doc);
|
|
125
|
-
} catch (e) {
|
|
126
|
-
console.log('error', e);
|
|
127
|
-
}
|
|
128
|
-
})
|
|
129
|
-
.on('error', function (err) {
|
|
130
|
-
console.log('Error while reading file.', err);
|
|
131
|
-
})
|
|
132
|
-
.on('end', function () {
|
|
133
|
-
if (verbose) { console.log('Read entire file: ', file); }
|
|
134
|
-
if (files.length > 0) {
|
|
135
|
-
startIndex(files);
|
|
136
|
-
return;
|
|
137
|
-
}
|
|
102
|
+
let finished = false;
|
|
103
|
+
const file = files.shift();
|
|
104
|
+
const s = fs.createReadStream(file).pipe(split(splitRegex)).pipe(es.mapSync(line => {
|
|
105
|
+
try {
|
|
106
|
+
// skip empty lines
|
|
107
|
+
if (line === '') {
|
|
108
|
+
return;
|
|
109
|
+
}
|
|
110
|
+
const doc = typeof transform === 'function' ? JSON.stringify(transform(JSON.parse(line))) : line;
|
|
138
111
|
|
|
139
|
-
|
|
140
|
-
|
|
141
|
-
|
|
142
|
-
|
|
112
|
+
// if doc is undefined we'll skip indexing it
|
|
113
|
+
if (typeof doc === 'undefined') {
|
|
114
|
+
s.resume();
|
|
115
|
+
return;
|
|
116
|
+
}
|
|
143
117
|
|
|
144
|
-
|
|
145
|
-
|
|
118
|
+
// the transform callback may return an array of docs so we can emit
|
|
119
|
+
// multiple docs from a single line
|
|
120
|
+
if (Array.isArray(doc)) {
|
|
121
|
+
doc.forEach(d => indexer.add(d));
|
|
122
|
+
return;
|
|
123
|
+
}
|
|
124
|
+
indexer.add(doc);
|
|
125
|
+
} catch (e) {
|
|
126
|
+
console.log('error', e);
|
|
127
|
+
}
|
|
128
|
+
}).on('error', err => {
|
|
129
|
+
console.log('Error while reading file.', err);
|
|
130
|
+
}).on('end', () => {
|
|
131
|
+
if (verbose) console.log('Read entire file: ', file);
|
|
132
|
+
if (files.length > 0) {
|
|
133
|
+
startIndex(files);
|
|
134
|
+
return;
|
|
135
|
+
}
|
|
136
|
+
indexer.finish();
|
|
137
|
+
finished = true;
|
|
138
|
+
}));
|
|
139
|
+
indexer.queueEmitter.on('pause', () => {
|
|
140
|
+
if (finished) return;
|
|
146
141
|
s.pause();
|
|
147
142
|
});
|
|
148
|
-
|
|
149
|
-
|
|
150
|
-
if (finished) { return; }
|
|
143
|
+
indexer.queueEmitter.on('resume', () => {
|
|
144
|
+
if (finished) return;
|
|
151
145
|
s.resume();
|
|
152
146
|
});
|
|
153
147
|
}
|
|
154
|
-
|
|
155
|
-
|
|
156
|
-
glob(fileName, function (er, files) {
|
|
148
|
+
return () => {
|
|
149
|
+
glob(fileName, (er, files) => {
|
|
157
150
|
startIndex(files);
|
|
158
151
|
});
|
|
159
152
|
};
|
|
160
153
|
}
|
|
161
154
|
|
|
162
|
-
|
|
163
|
-
|
|
164
|
-
|
|
165
|
-
|
|
166
|
-
var parallelCalls = 5;
|
|
155
|
+
const EventEmitter = require('events');
|
|
156
|
+
const queueEmitter = new EventEmitter();
|
|
157
|
+
const parallelCalls = 5;
|
|
167
158
|
|
|
168
159
|
// a simple helper queue to bulk index documents
|
|
169
|
-
function indexQueueFactory(
|
|
170
|
-
|
|
171
|
-
|
|
172
|
-
|
|
173
|
-
|
|
174
|
-
|
|
175
|
-
|
|
176
|
-
|
|
177
|
-
|
|
160
|
+
function indexQueueFactory({
|
|
161
|
+
targetClient: client,
|
|
162
|
+
targetIndexName,
|
|
163
|
+
bufferSize = DEFAULT_BUFFER_SIZE,
|
|
164
|
+
skipHeader = false
|
|
165
|
+
}) {
|
|
166
|
+
let docsPerSecond = 0;
|
|
167
|
+
const flushBytes = bufferSize * 1024; // Convert KB to Bytes
|
|
168
|
+
const highWaterMark = flushBytes * parallelCalls;
|
|
178
169
|
|
|
179
170
|
// Create a Readable stream
|
|
180
|
-
|
|
181
|
-
read
|
|
182
|
-
|
|
171
|
+
const stream = new Readable({
|
|
172
|
+
read() {},
|
|
173
|
+
// Implement read but we manage pushing manually
|
|
174
|
+
highWaterMark // Buffer size for backpressure management
|
|
183
175
|
});
|
|
184
|
-
|
|
185
176
|
async function* ndjsonStreamIterator(readableStream) {
|
|
186
|
-
|
|
187
|
-
|
|
177
|
+
let buffer = ''; // To hold the incomplete data
|
|
178
|
+
let skippedHeader = false;
|
|
188
179
|
|
|
189
180
|
// Iterate over the stream using async iteration
|
|
190
|
-
for await (
|
|
181
|
+
for await (const chunk of readableStream) {
|
|
191
182
|
buffer += chunk.toString(); // Accumulate the chunk data in the buffer
|
|
192
183
|
|
|
193
184
|
// Split the buffer into lines (NDJSON items)
|
|
194
|
-
|
|
185
|
+
const lines = buffer.split('\n');
|
|
195
186
|
|
|
196
187
|
// The last line might be incomplete, so hold it back in the buffer
|
|
197
188
|
buffer = lines.pop();
|
|
198
189
|
|
|
199
190
|
// Yield each complete JSON object
|
|
200
|
-
for (
|
|
191
|
+
for (const line of lines) {
|
|
201
192
|
if (line.trim()) {
|
|
202
193
|
try {
|
|
203
|
-
if (!skipHeader ||
|
|
194
|
+
if (!skipHeader || skipHeader && !skippedHeader) {
|
|
204
195
|
yield JSON.parse(line); // Parse and yield the JSON object
|
|
205
196
|
skippedHeader = true;
|
|
206
197
|
}
|
|
@@ -221,118 +212,106 @@ function indexQueueFactory(ref) {
|
|
|
221
212
|
}
|
|
222
213
|
}
|
|
223
214
|
}
|
|
224
|
-
|
|
225
|
-
var finished = false;
|
|
215
|
+
let finished = false;
|
|
226
216
|
|
|
227
217
|
// Async IIFE to start bulk indexing
|
|
228
|
-
(async
|
|
218
|
+
(async () => {
|
|
219
|
+
const interval = setInterval(() => {
|
|
220
|
+
queueEmitter.emit('docsPerSecond', docsPerSecond);
|
|
221
|
+
docsPerSecond = 0;
|
|
222
|
+
}, 1000);
|
|
229
223
|
await client.helpers.bulk({
|
|
230
224
|
concurrency: parallelCalls,
|
|
231
|
-
flushBytes
|
|
225
|
+
flushBytes,
|
|
232
226
|
flushInterval: 1000,
|
|
233
227
|
refreshOnCompletion: true,
|
|
234
228
|
datasource: ndjsonStreamIterator(stream),
|
|
235
|
-
onDocument
|
|
229
|
+
onDocument(doc) {
|
|
230
|
+
docsPerSecond++;
|
|
236
231
|
return {
|
|
237
|
-
index: {
|
|
232
|
+
index: {
|
|
233
|
+
_index: targetIndexName
|
|
234
|
+
}
|
|
238
235
|
};
|
|
239
|
-
}
|
|
236
|
+
}
|
|
240
237
|
});
|
|
241
|
-
|
|
238
|
+
clearInterval(interval);
|
|
242
239
|
queueEmitter.emit('finish');
|
|
243
240
|
})();
|
|
244
|
-
|
|
245
241
|
return {
|
|
246
|
-
add:
|
|
242
|
+
add: doc => {
|
|
247
243
|
if (finished) {
|
|
248
244
|
throw new Error('Unexpected doc added after indexer should finish.');
|
|
249
245
|
}
|
|
250
|
-
|
|
251
|
-
var canContinue = stream.push(((JSON.stringify(doc)) + "\n"));
|
|
246
|
+
const canContinue = stream.push(`${JSON.stringify(doc)}\n`);
|
|
252
247
|
if (!canContinue) {
|
|
253
248
|
queueEmitter.emit('pause');
|
|
254
|
-
stream.once('drain',
|
|
249
|
+
stream.once('drain', () => {
|
|
255
250
|
queueEmitter.emit('resume');
|
|
256
251
|
});
|
|
257
252
|
}
|
|
258
253
|
},
|
|
259
|
-
finish:
|
|
254
|
+
finish: () => {
|
|
260
255
|
finished = true;
|
|
261
256
|
stream.push(null);
|
|
262
257
|
},
|
|
263
|
-
queueEmitter
|
|
258
|
+
queueEmitter
|
|
264
259
|
};
|
|
265
260
|
}
|
|
266
261
|
|
|
267
262
|
// create a new progress bar instance and use shades_classic theme
|
|
268
|
-
|
|
269
|
-
|
|
270
|
-
function indexReaderFactory(
|
|
271
|
-
indexer,
|
|
272
|
-
sourceIndexName,
|
|
273
|
-
transform,
|
|
274
|
-
client,
|
|
275
|
-
query,
|
|
276
|
-
searchSize,
|
|
277
|
-
populatedFields
|
|
278
|
-
) {
|
|
279
|
-
if ( searchSize === void 0 ) searchSize = DEFAULT_SEARCH_SIZE;
|
|
280
|
-
if ( populatedFields === void 0 ) populatedFields = false;
|
|
281
|
-
|
|
263
|
+
const progressBar = new cliProgress.SingleBar({}, cliProgress.Presets.shades_classic);
|
|
264
|
+
function indexReaderFactory(indexer, sourceIndexName, transform, client, query, searchSize = DEFAULT_SEARCH_SIZE, populatedFields = false) {
|
|
282
265
|
return async function indexReader() {
|
|
283
|
-
|
|
284
|
-
|
|
285
|
-
|
|
286
|
-
|
|
287
|
-
|
|
288
|
-
|
|
266
|
+
let docsNum = 0;
|
|
267
|
+
let scrollId;
|
|
268
|
+
let finished = false;
|
|
269
|
+
let readActive = false;
|
|
270
|
+
let backPressurePause = false;
|
|
289
271
|
async function fetchPopulatedFields() {
|
|
290
272
|
try {
|
|
291
273
|
// Get all populated fields from the index
|
|
292
|
-
|
|
293
|
-
|
|
294
|
-
|
|
295
|
-
|
|
296
|
-
|
|
297
|
-
|
|
298
|
-
|
|
299
|
-
|
|
300
|
-
);
|
|
301
|
-
|
|
274
|
+
const response = await client.fieldCaps({
|
|
275
|
+
index: sourceIndexName,
|
|
276
|
+
fields: '*',
|
|
277
|
+
include_empty_fields: false,
|
|
278
|
+
filters: '-metadata'
|
|
279
|
+
}, {
|
|
280
|
+
maxRetries: 0
|
|
281
|
+
});
|
|
302
282
|
return Object.keys(response.fields);
|
|
303
283
|
} catch (e) {
|
|
304
284
|
console.log('error', e);
|
|
305
285
|
}
|
|
306
286
|
}
|
|
307
|
-
|
|
308
287
|
function search(fields) {
|
|
309
|
-
return client.search(
|
|
288
|
+
return client.search({
|
|
289
|
+
index: sourceIndexName,
|
|
310
290
|
scroll: '600s',
|
|
311
291
|
size: searchSize,
|
|
312
|
-
query
|
|
313
|
-
(fields ? {
|
|
292
|
+
query,
|
|
293
|
+
...(fields ? {
|
|
294
|
+
_source: fields
|
|
295
|
+
} : {})
|
|
296
|
+
});
|
|
314
297
|
}
|
|
315
|
-
|
|
316
298
|
function scroll(id) {
|
|
317
299
|
return client.scroll({
|
|
318
300
|
scroll_id: id,
|
|
319
|
-
scroll: '600s'
|
|
301
|
+
scroll: '600s'
|
|
320
302
|
});
|
|
321
303
|
}
|
|
322
|
-
|
|
323
|
-
var fieldsWithData;
|
|
304
|
+
let fieldsWithData;
|
|
324
305
|
|
|
325
306
|
// identify populated fields
|
|
326
307
|
if (populatedFields) {
|
|
327
308
|
fieldsWithData = await fetchPopulatedFields();
|
|
328
309
|
}
|
|
329
|
-
|
|
330
310
|
await fetchNextResponse();
|
|
331
|
-
|
|
332
311
|
function processHit(hit) {
|
|
333
312
|
docsNum += 1;
|
|
334
313
|
try {
|
|
335
|
-
|
|
314
|
+
const doc = typeof transform === 'function' ? transform(hit._source) : hit._source; // eslint-disable-line no-underscore-dangle
|
|
336
315
|
|
|
337
316
|
// if doc is undefined we'll skip indexing it
|
|
338
317
|
if (typeof doc === 'undefined') {
|
|
@@ -342,35 +321,27 @@ function indexReaderFactory(
|
|
|
342
321
|
// the transform callback may return an array of docs so we can emit
|
|
343
322
|
// multiple docs from a single line
|
|
344
323
|
if (Array.isArray(doc)) {
|
|
345
|
-
doc.forEach(
|
|
324
|
+
doc.forEach(d => indexer.add(d));
|
|
346
325
|
return;
|
|
347
326
|
}
|
|
348
|
-
|
|
349
327
|
indexer.add(doc);
|
|
350
328
|
} catch (e) {
|
|
351
329
|
console.log('error', e);
|
|
352
330
|
}
|
|
353
331
|
}
|
|
354
|
-
|
|
355
332
|
async function fetchNextResponse() {
|
|
356
333
|
readActive = true;
|
|
357
|
-
|
|
358
|
-
var sc = scrollId ? await scroll(scrollId) : await search(fieldsWithData);
|
|
359
|
-
|
|
334
|
+
const sc = scrollId ? await scroll(scrollId) : await search(fieldsWithData);
|
|
360
335
|
if (!scrollId) {
|
|
361
336
|
progressBar.start(sc.hits.total.value, 0);
|
|
362
337
|
}
|
|
363
|
-
|
|
364
338
|
scrollId = sc._scroll_id;
|
|
365
339
|
readActive = false;
|
|
366
|
-
|
|
367
340
|
processResponse(sc);
|
|
368
341
|
}
|
|
369
|
-
|
|
370
342
|
async function processResponse(response) {
|
|
371
343
|
// collect the docs from this response
|
|
372
344
|
response.hits.hits.forEach(processHit);
|
|
373
|
-
|
|
374
345
|
progressBar.update(docsNum);
|
|
375
346
|
|
|
376
347
|
// check to see if we have collected all of the docs
|
|
@@ -378,27 +349,21 @@ function indexReaderFactory(
|
|
|
378
349
|
indexer.finish();
|
|
379
350
|
return;
|
|
380
351
|
}
|
|
381
|
-
|
|
382
352
|
if (!backPressurePause) {
|
|
383
353
|
await fetchNextResponse();
|
|
384
354
|
}
|
|
385
355
|
}
|
|
386
|
-
|
|
387
|
-
indexer.queueEmitter.on('pause', async function () {
|
|
356
|
+
indexer.queueEmitter.on('pause', async () => {
|
|
388
357
|
backPressurePause = true;
|
|
389
358
|
});
|
|
390
|
-
|
|
391
|
-
indexer.queueEmitter.on('resume', async function () {
|
|
359
|
+
indexer.queueEmitter.on('resume', async () => {
|
|
392
360
|
backPressurePause = false;
|
|
393
|
-
|
|
394
361
|
if (readActive || finished) {
|
|
395
362
|
return;
|
|
396
363
|
}
|
|
397
|
-
|
|
398
364
|
await fetchNextResponse();
|
|
399
365
|
});
|
|
400
|
-
|
|
401
|
-
indexer.queueEmitter.on('finish', function () {
|
|
366
|
+
indexer.queueEmitter.on('finish', () => {
|
|
402
367
|
finished = true;
|
|
403
368
|
progressBar.stop();
|
|
404
369
|
});
|
|
@@ -407,162 +372,130 @@ function indexReaderFactory(
|
|
|
407
372
|
|
|
408
373
|
function streamReaderFactory(indexer, stream, transform, splitRegex, verbose) {
|
|
409
374
|
function startIndex() {
|
|
410
|
-
|
|
411
|
-
|
|
412
|
-
|
|
413
|
-
|
|
414
|
-
|
|
415
|
-
|
|
416
|
-
|
|
417
|
-
|
|
418
|
-
return;
|
|
419
|
-
}
|
|
420
|
-
|
|
421
|
-
var doc =
|
|
422
|
-
typeof transform === 'function' ? JSON.stringify(transform(JSON.parse(line))) : line;
|
|
423
|
-
|
|
424
|
-
// if doc is undefined we'll skip indexing it
|
|
425
|
-
if (typeof doc === 'undefined') {
|
|
426
|
-
s.resume();
|
|
427
|
-
return;
|
|
428
|
-
}
|
|
429
|
-
|
|
430
|
-
// the transform callback may return an array of docs so we can emit
|
|
431
|
-
// multiple docs from a single line
|
|
432
|
-
if (Array.isArray(doc)) {
|
|
433
|
-
doc.forEach(function (d) { return indexer.add(d); });
|
|
434
|
-
return;
|
|
435
|
-
}
|
|
375
|
+
let finished = false;
|
|
376
|
+
const s = stream.pipe(split(splitRegex)).pipe(es.mapSync(line => {
|
|
377
|
+
try {
|
|
378
|
+
// skip empty lines
|
|
379
|
+
if (line === '') {
|
|
380
|
+
return;
|
|
381
|
+
}
|
|
382
|
+
const doc = typeof transform === 'function' ? JSON.stringify(transform(JSON.parse(line))) : line;
|
|
436
383
|
|
|
437
|
-
|
|
438
|
-
|
|
439
|
-
|
|
440
|
-
|
|
441
|
-
}
|
|
442
|
-
.on('error', function (err) {
|
|
443
|
-
console.log('Error while reading stream.', err);
|
|
444
|
-
})
|
|
445
|
-
.on('end', function () {
|
|
446
|
-
if (verbose) { console.log('Read entire stream.'); }
|
|
447
|
-
indexer.finish();
|
|
448
|
-
finished = true;
|
|
449
|
-
})
|
|
450
|
-
);
|
|
384
|
+
// if doc is undefined we'll skip indexing it
|
|
385
|
+
if (typeof doc === 'undefined') {
|
|
386
|
+
s.resume();
|
|
387
|
+
return;
|
|
388
|
+
}
|
|
451
389
|
|
|
452
|
-
|
|
453
|
-
|
|
390
|
+
// the transform callback may return an array of docs so we can emit
|
|
391
|
+
// multiple docs from a single line
|
|
392
|
+
if (Array.isArray(doc)) {
|
|
393
|
+
doc.forEach(d => indexer.add(d));
|
|
394
|
+
return;
|
|
395
|
+
}
|
|
396
|
+
indexer.add(doc);
|
|
397
|
+
} catch (e) {
|
|
398
|
+
console.log('error', e);
|
|
399
|
+
}
|
|
400
|
+
}).on('error', err => {
|
|
401
|
+
console.log('Error while reading stream.', err);
|
|
402
|
+
}).on('end', () => {
|
|
403
|
+
if (verbose) console.log('Read entire stream.');
|
|
404
|
+
indexer.finish();
|
|
405
|
+
finished = true;
|
|
406
|
+
}));
|
|
407
|
+
indexer.queueEmitter.on('pause', () => {
|
|
408
|
+
if (finished) return;
|
|
454
409
|
s.pause();
|
|
455
410
|
});
|
|
456
|
-
|
|
457
|
-
|
|
458
|
-
if (finished) { return; }
|
|
411
|
+
indexer.queueEmitter.on('resume', () => {
|
|
412
|
+
if (finished) return;
|
|
459
413
|
s.resume();
|
|
460
414
|
});
|
|
461
415
|
}
|
|
462
|
-
|
|
463
|
-
return function () {
|
|
416
|
+
return () => {
|
|
464
417
|
startIndex();
|
|
465
418
|
};
|
|
466
419
|
}
|
|
467
420
|
|
|
468
|
-
async function transformer(
|
|
469
|
-
|
|
470
|
-
|
|
471
|
-
|
|
472
|
-
|
|
473
|
-
|
|
474
|
-
|
|
475
|
-
|
|
476
|
-
|
|
477
|
-
|
|
478
|
-
|
|
479
|
-
|
|
480
|
-
|
|
481
|
-
|
|
482
|
-
|
|
483
|
-
|
|
484
|
-
|
|
485
|
-
|
|
486
|
-
|
|
487
|
-
|
|
421
|
+
async function transformer({
|
|
422
|
+
deleteIndex = false,
|
|
423
|
+
sourceClientConfig,
|
|
424
|
+
targetClientConfig,
|
|
425
|
+
bufferSize = DEFAULT_BUFFER_SIZE,
|
|
426
|
+
searchSize = DEFAULT_SEARCH_SIZE,
|
|
427
|
+
stream,
|
|
428
|
+
fileName,
|
|
429
|
+
splitRegex = /\n/,
|
|
430
|
+
sourceIndexName,
|
|
431
|
+
targetIndexName,
|
|
432
|
+
mappings,
|
|
433
|
+
mappingsOverride = false,
|
|
434
|
+
indexMappingTotalFieldsLimit,
|
|
435
|
+
pipeline,
|
|
436
|
+
populatedFields = false,
|
|
437
|
+
query,
|
|
438
|
+
skipHeader = false,
|
|
439
|
+
transform,
|
|
440
|
+
verbose = true
|
|
441
|
+
}) {
|
|
488
442
|
if (typeof targetIndexName === 'undefined') {
|
|
489
443
|
throw Error('targetIndexName must be specified.');
|
|
490
444
|
}
|
|
491
|
-
|
|
492
|
-
|
|
493
|
-
node: 'http://localhost:9200',
|
|
445
|
+
const defaultClientConfig = {
|
|
446
|
+
node: 'http://localhost:9200'
|
|
494
447
|
};
|
|
495
|
-
|
|
496
|
-
|
|
497
|
-
|
|
498
|
-
|
|
499
|
-
|
|
500
|
-
|
|
501
|
-
|
|
502
|
-
|
|
503
|
-
|
|
504
|
-
|
|
505
|
-
|
|
506
|
-
|
|
507
|
-
|
|
508
|
-
indexMappingTotalFieldsLimit: indexMappingTotalFieldsLimit,
|
|
509
|
-
verbose: verbose,
|
|
510
|
-
deleteIndex: deleteIndex,
|
|
448
|
+
const sourceClient = new elasticsearch.Client(sourceClientConfig || defaultClientConfig);
|
|
449
|
+
const targetClient = new elasticsearch.Client(targetClientConfig || sourceClientConfig || defaultClientConfig);
|
|
450
|
+
const createMapping = createMappingFactory({
|
|
451
|
+
sourceClient,
|
|
452
|
+
sourceIndexName,
|
|
453
|
+
targetClient,
|
|
454
|
+
targetIndexName,
|
|
455
|
+
mappings,
|
|
456
|
+
mappingsOverride,
|
|
457
|
+
indexMappingTotalFieldsLimit,
|
|
458
|
+
verbose,
|
|
459
|
+
deleteIndex,
|
|
460
|
+
pipeline
|
|
511
461
|
});
|
|
512
|
-
|
|
513
|
-
targetClient
|
|
514
|
-
targetIndexName
|
|
515
|
-
bufferSize
|
|
516
|
-
skipHeader
|
|
517
|
-
verbose
|
|
462
|
+
const indexer = indexQueueFactory({
|
|
463
|
+
targetClient,
|
|
464
|
+
targetIndexName,
|
|
465
|
+
bufferSize,
|
|
466
|
+
skipHeader,
|
|
467
|
+
verbose
|
|
518
468
|
});
|
|
519
|
-
|
|
520
469
|
function getReader() {
|
|
521
470
|
if (typeof fileName !== 'undefined' && typeof sourceIndexName !== 'undefined') {
|
|
522
471
|
throw Error('Only either one of fileName or sourceIndexName can be specified.');
|
|
523
472
|
}
|
|
524
|
-
|
|
525
|
-
if (
|
|
526
|
-
(typeof fileName !== 'undefined' && typeof sourceIndexName !== 'undefined') ||
|
|
527
|
-
(typeof fileName !== 'undefined' && typeof stream !== 'undefined') ||
|
|
528
|
-
(typeof sourceIndexName !== 'undefined' && typeof stream !== 'undefined')
|
|
529
|
-
) {
|
|
473
|
+
if (typeof fileName !== 'undefined' && typeof sourceIndexName !== 'undefined' || typeof fileName !== 'undefined' && typeof stream !== 'undefined' || typeof sourceIndexName !== 'undefined' && typeof stream !== 'undefined') {
|
|
530
474
|
throw Error('Only one of fileName, sourceIndexName, or stream can be specified.');
|
|
531
475
|
}
|
|
532
|
-
|
|
533
476
|
if (typeof fileName !== 'undefined') {
|
|
534
477
|
return fileReaderFactory(indexer, fileName, transform, splitRegex, verbose);
|
|
535
478
|
}
|
|
536
|
-
|
|
537
479
|
if (typeof sourceIndexName !== 'undefined') {
|
|
538
|
-
return indexReaderFactory(
|
|
539
|
-
indexer,
|
|
540
|
-
sourceIndexName,
|
|
541
|
-
transform,
|
|
542
|
-
sourceClient,
|
|
543
|
-
query,
|
|
544
|
-
searchSize,
|
|
545
|
-
populatedFields
|
|
546
|
-
);
|
|
480
|
+
return indexReaderFactory(indexer, sourceIndexName, transform, sourceClient, query, searchSize, populatedFields);
|
|
547
481
|
}
|
|
548
|
-
|
|
549
482
|
if (typeof stream !== 'undefined') {
|
|
550
483
|
return streamReaderFactory(indexer, stream, transform, splitRegex, verbose);
|
|
551
484
|
}
|
|
552
|
-
|
|
553
485
|
return null;
|
|
554
486
|
}
|
|
555
|
-
|
|
556
|
-
var reader = getReader();
|
|
557
|
-
|
|
487
|
+
const reader = getReader();
|
|
558
488
|
try {
|
|
559
|
-
|
|
560
|
-
|
|
489
|
+
const indexExists = await targetClient.indices.exists({
|
|
490
|
+
index: targetIndexName
|
|
491
|
+
});
|
|
561
492
|
if (indexExists === false) {
|
|
562
493
|
await createMapping();
|
|
563
494
|
reader();
|
|
564
495
|
} else if (deleteIndex === true) {
|
|
565
|
-
await targetClient.indices.delete({
|
|
496
|
+
await targetClient.indices.delete({
|
|
497
|
+
index: targetIndexName
|
|
498
|
+
});
|
|
566
499
|
await createMapping();
|
|
567
500
|
reader();
|
|
568
501
|
} else {
|
|
@@ -573,8 +506,10 @@ async function transformer(ref) {
|
|
|
573
506
|
} finally {
|
|
574
507
|
// targetClient.close();
|
|
575
508
|
}
|
|
576
|
-
|
|
577
|
-
|
|
509
|
+
return {
|
|
510
|
+
events: indexer.queueEmitter
|
|
511
|
+
};
|
|
578
512
|
}
|
|
579
513
|
|
|
580
|
-
export default
|
|
514
|
+
export { transformer as default };
|
|
515
|
+
//# sourceMappingURL=node-es-transformer.esm.js.map
|