node-es-transformer 1.0.0-beta4 → 1.0.0-beta6
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
|
@@ -1,48 +1,46 @@
|
|
|
1
1
|
'use strict';
|
|
2
2
|
|
|
3
|
-
|
|
4
|
-
|
|
5
|
-
var
|
|
6
|
-
var
|
|
7
|
-
var
|
|
8
|
-
var split = _interopDefault(require('split2'));
|
|
3
|
+
var elasticsearch = require('@elastic/elasticsearch');
|
|
4
|
+
var fs = require('fs');
|
|
5
|
+
var es = require('event-stream');
|
|
6
|
+
var glob = require('glob');
|
|
7
|
+
var split = require('split2');
|
|
9
8
|
var stream = require('stream');
|
|
10
|
-
var cliProgress =
|
|
11
|
-
var elasticsearch = _interopDefault(require('@elastic/elasticsearch'));
|
|
9
|
+
var cliProgress = require('cli-progress');
|
|
12
10
|
|
|
13
11
|
// In earlier versions this was used to set the number of docs to index in a
|
|
14
12
|
// single bulk request. Since we switched to use the helpers.bulk() method from
|
|
15
13
|
// the ES client, this now translates to the `flushBytes` option of the helper.
|
|
16
14
|
// However, for kind of a backwards compability with the old values, this uses
|
|
17
15
|
// KBytes instead of Bytes. It will be multiplied by 1024 in the index queue.
|
|
18
|
-
|
|
16
|
+
const DEFAULT_BUFFER_SIZE = 5120;
|
|
19
17
|
|
|
20
18
|
// The default number of docs to fetch in a single search request when reindexing.
|
|
21
|
-
|
|
22
|
-
|
|
23
|
-
function createMappingFactory(ref) {
|
|
24
|
-
var sourceClient = ref.sourceClient;
|
|
25
|
-
var sourceIndexName = ref.sourceIndexName;
|
|
26
|
-
var targetClient = ref.targetClient;
|
|
27
|
-
var targetIndexName = ref.targetIndexName;
|
|
28
|
-
var mappings = ref.mappings;
|
|
29
|
-
var mappingsOverride = ref.mappingsOverride;
|
|
30
|
-
var indexMappingTotalFieldsLimit = ref.indexMappingTotalFieldsLimit;
|
|
31
|
-
var verbose = ref.verbose;
|
|
32
|
-
var deleteIndex = ref.deleteIndex;
|
|
33
|
-
|
|
34
|
-
return async function () {
|
|
35
|
-
var targetMappings = mappingsOverride ? undefined : mappings;
|
|
19
|
+
const DEFAULT_SEARCH_SIZE = 1000;
|
|
36
20
|
|
|
21
|
+
function createMappingFactory({
|
|
22
|
+
sourceClient,
|
|
23
|
+
sourceIndexName,
|
|
24
|
+
targetClient,
|
|
25
|
+
targetIndexName,
|
|
26
|
+
mappings,
|
|
27
|
+
mappingsOverride,
|
|
28
|
+
indexMappingTotalFieldsLimit,
|
|
29
|
+
verbose,
|
|
30
|
+
deleteIndex,
|
|
31
|
+
pipeline
|
|
32
|
+
}) {
|
|
33
|
+
return async () => {
|
|
34
|
+
let targetMappings = mappingsOverride ? undefined : mappings;
|
|
37
35
|
if (sourceClient && sourceIndexName && typeof targetMappings === 'undefined') {
|
|
38
36
|
try {
|
|
39
|
-
|
|
40
|
-
index: sourceIndexName
|
|
37
|
+
const mapping = await sourceClient.indices.getMapping({
|
|
38
|
+
index: sourceIndexName
|
|
41
39
|
});
|
|
42
40
|
if (mapping[sourceIndexName]) {
|
|
43
41
|
targetMappings = mapping[sourceIndexName].mappings;
|
|
44
42
|
} else {
|
|
45
|
-
|
|
43
|
+
const allMappings = Object.values(mapping);
|
|
46
44
|
if (allMappings.length > 0) {
|
|
47
45
|
targetMappings = Object.values(mapping)[0].mappings;
|
|
48
46
|
}
|
|
@@ -52,36 +50,47 @@ function createMappingFactory(ref) {
|
|
|
52
50
|
return;
|
|
53
51
|
}
|
|
54
52
|
}
|
|
55
|
-
|
|
56
53
|
if (typeof targetMappings === 'object' && targetMappings !== null) {
|
|
57
54
|
if (mappingsOverride) {
|
|
58
|
-
targetMappings =
|
|
59
|
-
|
|
60
|
-
|
|
55
|
+
targetMappings = {
|
|
56
|
+
...targetMappings,
|
|
57
|
+
properties: {
|
|
58
|
+
...targetMappings.properties,
|
|
59
|
+
...mappings
|
|
60
|
+
}
|
|
61
|
+
};
|
|
61
62
|
}
|
|
62
|
-
|
|
63
63
|
try {
|
|
64
|
-
|
|
65
|
-
|
|
64
|
+
const indexExists = await targetClient.indices.exists({
|
|
65
|
+
index: targetIndexName
|
|
66
|
+
});
|
|
66
67
|
if (indexExists === true && deleteIndex === true) {
|
|
67
|
-
await targetClient.indices.delete({
|
|
68
|
+
await targetClient.indices.delete({
|
|
69
|
+
index: targetIndexName
|
|
70
|
+
});
|
|
68
71
|
}
|
|
69
|
-
|
|
70
72
|
if (indexExists === false || deleteIndex === true) {
|
|
71
|
-
|
|
73
|
+
const resp = await targetClient.indices.create({
|
|
72
74
|
index: targetIndexName,
|
|
73
|
-
body:
|
|
74
|
-
|
|
75
|
-
|
|
76
|
-
|
|
77
|
-
|
|
78
|
-
|
|
79
|
-
'index.number_of_replicas': 0,
|
|
80
|
-
},
|
|
75
|
+
body: {
|
|
76
|
+
mappings: targetMappings,
|
|
77
|
+
...(pipeline !== undefined ? {
|
|
78
|
+
settings: {
|
|
79
|
+
index: {
|
|
80
|
+
default_pipeline: pipeline
|
|
81
81
|
}
|
|
82
|
-
|
|
82
|
+
}
|
|
83
|
+
} : {}),
|
|
84
|
+
...(indexMappingTotalFieldsLimit !== undefined ? {
|
|
85
|
+
settings: {
|
|
86
|
+
'index.mapping.total_fields.limit': indexMappingTotalFieldsLimit,
|
|
87
|
+
'index.number_of_shards': 1,
|
|
88
|
+
'index.number_of_replicas': 0
|
|
89
|
+
}
|
|
90
|
+
} : {})
|
|
91
|
+
}
|
|
83
92
|
});
|
|
84
|
-
if (verbose)
|
|
93
|
+
if (verbose) console.log('Created target mapping', resp);
|
|
85
94
|
}
|
|
86
95
|
} catch (err) {
|
|
87
96
|
console.log('Error creating target mapping', err);
|
|
@@ -92,119 +101,99 @@ function createMappingFactory(ref) {
|
|
|
92
101
|
|
|
93
102
|
function fileReaderFactory(indexer, fileName, transform, splitRegex, verbose) {
|
|
94
103
|
function startIndex(files) {
|
|
95
|
-
|
|
96
|
-
|
|
97
|
-
|
|
98
|
-
|
|
99
|
-
|
|
100
|
-
|
|
101
|
-
|
|
102
|
-
|
|
103
|
-
|
|
104
|
-
try {
|
|
105
|
-
// skip empty lines
|
|
106
|
-
if (line === '') {
|
|
107
|
-
return;
|
|
108
|
-
}
|
|
109
|
-
|
|
110
|
-
var doc =
|
|
111
|
-
typeof transform === 'function'
|
|
112
|
-
? JSON.stringify(transform(JSON.parse(line)))
|
|
113
|
-
: line;
|
|
114
|
-
|
|
115
|
-
// if doc is undefined we'll skip indexing it
|
|
116
|
-
if (typeof doc === 'undefined') {
|
|
117
|
-
s.resume();
|
|
118
|
-
return;
|
|
119
|
-
}
|
|
120
|
-
|
|
121
|
-
// the transform callback may return an array of docs so we can emit
|
|
122
|
-
// multiple docs from a single line
|
|
123
|
-
if (Array.isArray(doc)) {
|
|
124
|
-
doc.forEach(function (d) { return indexer.add(d); });
|
|
125
|
-
return;
|
|
126
|
-
}
|
|
127
|
-
|
|
128
|
-
indexer.add(doc);
|
|
129
|
-
} catch (e) {
|
|
130
|
-
console.log('error', e);
|
|
131
|
-
}
|
|
132
|
-
})
|
|
133
|
-
.on('error', function (err) {
|
|
134
|
-
console.log('Error while reading file.', err);
|
|
135
|
-
})
|
|
136
|
-
.on('end', function () {
|
|
137
|
-
if (verbose) { console.log('Read entire file: ', file); }
|
|
138
|
-
if (files.length > 0) {
|
|
139
|
-
startIndex(files);
|
|
140
|
-
return;
|
|
141
|
-
}
|
|
104
|
+
let finished = false;
|
|
105
|
+
const file = files.shift();
|
|
106
|
+
const s = fs.createReadStream(file).pipe(split(splitRegex)).pipe(es.mapSync(line => {
|
|
107
|
+
try {
|
|
108
|
+
// skip empty lines
|
|
109
|
+
if (line === '') {
|
|
110
|
+
return;
|
|
111
|
+
}
|
|
112
|
+
const doc = typeof transform === 'function' ? JSON.stringify(transform(JSON.parse(line))) : line;
|
|
142
113
|
|
|
143
|
-
|
|
144
|
-
|
|
145
|
-
|
|
146
|
-
|
|
114
|
+
// if doc is undefined we'll skip indexing it
|
|
115
|
+
if (typeof doc === 'undefined') {
|
|
116
|
+
s.resume();
|
|
117
|
+
return;
|
|
118
|
+
}
|
|
147
119
|
|
|
148
|
-
|
|
149
|
-
|
|
120
|
+
// the transform callback may return an array of docs so we can emit
|
|
121
|
+
// multiple docs from a single line
|
|
122
|
+
if (Array.isArray(doc)) {
|
|
123
|
+
doc.forEach(d => indexer.add(d));
|
|
124
|
+
return;
|
|
125
|
+
}
|
|
126
|
+
indexer.add(doc);
|
|
127
|
+
} catch (e) {
|
|
128
|
+
console.log('error', e);
|
|
129
|
+
}
|
|
130
|
+
}).on('error', err => {
|
|
131
|
+
console.log('Error while reading file.', err);
|
|
132
|
+
}).on('end', () => {
|
|
133
|
+
if (verbose) console.log('Read entire file: ', file);
|
|
134
|
+
if (files.length > 0) {
|
|
135
|
+
startIndex(files);
|
|
136
|
+
return;
|
|
137
|
+
}
|
|
138
|
+
indexer.finish();
|
|
139
|
+
finished = true;
|
|
140
|
+
}));
|
|
141
|
+
indexer.queueEmitter.on('pause', () => {
|
|
142
|
+
if (finished) return;
|
|
150
143
|
s.pause();
|
|
151
144
|
});
|
|
152
|
-
|
|
153
|
-
|
|
154
|
-
if (finished) { return; }
|
|
145
|
+
indexer.queueEmitter.on('resume', () => {
|
|
146
|
+
if (finished) return;
|
|
155
147
|
s.resume();
|
|
156
148
|
});
|
|
157
149
|
}
|
|
158
|
-
|
|
159
|
-
|
|
160
|
-
glob(fileName, function (er, files) {
|
|
150
|
+
return () => {
|
|
151
|
+
glob(fileName, (er, files) => {
|
|
161
152
|
startIndex(files);
|
|
162
153
|
});
|
|
163
154
|
};
|
|
164
155
|
}
|
|
165
156
|
|
|
166
|
-
|
|
167
|
-
|
|
168
|
-
|
|
169
|
-
|
|
170
|
-
var parallelCalls = 5;
|
|
157
|
+
const EventEmitter = require('events');
|
|
158
|
+
const queueEmitter = new EventEmitter();
|
|
159
|
+
const parallelCalls = 5;
|
|
171
160
|
|
|
172
161
|
// a simple helper queue to bulk index documents
|
|
173
|
-
function indexQueueFactory(
|
|
174
|
-
|
|
175
|
-
|
|
176
|
-
|
|
177
|
-
|
|
178
|
-
|
|
179
|
-
|
|
180
|
-
|
|
181
|
-
|
|
162
|
+
function indexQueueFactory({
|
|
163
|
+
targetClient: client,
|
|
164
|
+
targetIndexName,
|
|
165
|
+
bufferSize = DEFAULT_BUFFER_SIZE,
|
|
166
|
+
skipHeader = false
|
|
167
|
+
}) {
|
|
168
|
+
let docsPerSecond = 0;
|
|
169
|
+
const flushBytes = bufferSize * 1024; // Convert KB to Bytes
|
|
170
|
+
const highWaterMark = flushBytes * parallelCalls;
|
|
182
171
|
|
|
183
172
|
// Create a Readable stream
|
|
184
|
-
|
|
185
|
-
read
|
|
186
|
-
|
|
173
|
+
const stream$1 = new stream.Readable({
|
|
174
|
+
read() {},
|
|
175
|
+
// Implement read but we manage pushing manually
|
|
176
|
+
highWaterMark // Buffer size for backpressure management
|
|
187
177
|
});
|
|
188
|
-
|
|
189
178
|
async function* ndjsonStreamIterator(readableStream) {
|
|
190
|
-
|
|
191
|
-
|
|
179
|
+
let buffer = ''; // To hold the incomplete data
|
|
180
|
+
let skippedHeader = false;
|
|
192
181
|
|
|
193
182
|
// Iterate over the stream using async iteration
|
|
194
|
-
for await (
|
|
183
|
+
for await (const chunk of readableStream) {
|
|
195
184
|
buffer += chunk.toString(); // Accumulate the chunk data in the buffer
|
|
196
185
|
|
|
197
186
|
// Split the buffer into lines (NDJSON items)
|
|
198
|
-
|
|
187
|
+
const lines = buffer.split('\n');
|
|
199
188
|
|
|
200
189
|
// The last line might be incomplete, so hold it back in the buffer
|
|
201
190
|
buffer = lines.pop();
|
|
202
191
|
|
|
203
192
|
// Yield each complete JSON object
|
|
204
|
-
for (
|
|
193
|
+
for (const line of lines) {
|
|
205
194
|
if (line.trim()) {
|
|
206
195
|
try {
|
|
207
|
-
if (!skipHeader ||
|
|
196
|
+
if (!skipHeader || skipHeader && !skippedHeader) {
|
|
208
197
|
yield JSON.parse(line); // Parse and yield the JSON object
|
|
209
198
|
skippedHeader = true;
|
|
210
199
|
}
|
|
@@ -225,118 +214,106 @@ function indexQueueFactory(ref) {
|
|
|
225
214
|
}
|
|
226
215
|
}
|
|
227
216
|
}
|
|
228
|
-
|
|
229
|
-
var finished = false;
|
|
217
|
+
let finished = false;
|
|
230
218
|
|
|
231
219
|
// Async IIFE to start bulk indexing
|
|
232
|
-
(async
|
|
220
|
+
(async () => {
|
|
221
|
+
const interval = setInterval(() => {
|
|
222
|
+
queueEmitter.emit('docsPerSecond', docsPerSecond);
|
|
223
|
+
docsPerSecond = 0;
|
|
224
|
+
}, 1000);
|
|
233
225
|
await client.helpers.bulk({
|
|
234
226
|
concurrency: parallelCalls,
|
|
235
|
-
flushBytes
|
|
227
|
+
flushBytes,
|
|
236
228
|
flushInterval: 1000,
|
|
237
229
|
refreshOnCompletion: true,
|
|
238
|
-
datasource: ndjsonStreamIterator(stream
|
|
239
|
-
onDocument
|
|
230
|
+
datasource: ndjsonStreamIterator(stream$1),
|
|
231
|
+
onDocument(doc) {
|
|
232
|
+
docsPerSecond++;
|
|
240
233
|
return {
|
|
241
|
-
index: {
|
|
234
|
+
index: {
|
|
235
|
+
_index: targetIndexName
|
|
236
|
+
}
|
|
242
237
|
};
|
|
243
|
-
}
|
|
238
|
+
}
|
|
244
239
|
});
|
|
245
|
-
|
|
240
|
+
clearInterval(interval);
|
|
246
241
|
queueEmitter.emit('finish');
|
|
247
242
|
})();
|
|
248
|
-
|
|
249
243
|
return {
|
|
250
|
-
add:
|
|
244
|
+
add: doc => {
|
|
251
245
|
if (finished) {
|
|
252
246
|
throw new Error('Unexpected doc added after indexer should finish.');
|
|
253
247
|
}
|
|
254
|
-
|
|
255
|
-
var canContinue = stream$$1.push(((JSON.stringify(doc)) + "\n"));
|
|
248
|
+
const canContinue = stream$1.push(`${JSON.stringify(doc)}\n`);
|
|
256
249
|
if (!canContinue) {
|
|
257
250
|
queueEmitter.emit('pause');
|
|
258
|
-
stream
|
|
251
|
+
stream$1.once('drain', () => {
|
|
259
252
|
queueEmitter.emit('resume');
|
|
260
253
|
});
|
|
261
254
|
}
|
|
262
255
|
},
|
|
263
|
-
finish:
|
|
256
|
+
finish: () => {
|
|
264
257
|
finished = true;
|
|
265
|
-
stream
|
|
258
|
+
stream$1.push(null);
|
|
266
259
|
},
|
|
267
|
-
queueEmitter
|
|
260
|
+
queueEmitter
|
|
268
261
|
};
|
|
269
262
|
}
|
|
270
263
|
|
|
271
264
|
// create a new progress bar instance and use shades_classic theme
|
|
272
|
-
|
|
273
|
-
|
|
274
|
-
function indexReaderFactory(
|
|
275
|
-
indexer,
|
|
276
|
-
sourceIndexName,
|
|
277
|
-
transform,
|
|
278
|
-
client,
|
|
279
|
-
query,
|
|
280
|
-
searchSize,
|
|
281
|
-
populatedFields
|
|
282
|
-
) {
|
|
283
|
-
if ( searchSize === void 0 ) searchSize = DEFAULT_SEARCH_SIZE;
|
|
284
|
-
if ( populatedFields === void 0 ) populatedFields = false;
|
|
285
|
-
|
|
265
|
+
const progressBar = new cliProgress.SingleBar({}, cliProgress.Presets.shades_classic);
|
|
266
|
+
function indexReaderFactory(indexer, sourceIndexName, transform, client, query, searchSize = DEFAULT_SEARCH_SIZE, populatedFields = false) {
|
|
286
267
|
return async function indexReader() {
|
|
287
|
-
|
|
288
|
-
|
|
289
|
-
|
|
290
|
-
|
|
291
|
-
|
|
292
|
-
|
|
268
|
+
let docsNum = 0;
|
|
269
|
+
let scrollId;
|
|
270
|
+
let finished = false;
|
|
271
|
+
let readActive = false;
|
|
272
|
+
let backPressurePause = false;
|
|
293
273
|
async function fetchPopulatedFields() {
|
|
294
274
|
try {
|
|
295
275
|
// Get all populated fields from the index
|
|
296
|
-
|
|
297
|
-
|
|
298
|
-
|
|
299
|
-
|
|
300
|
-
|
|
301
|
-
|
|
302
|
-
|
|
303
|
-
|
|
304
|
-
);
|
|
305
|
-
|
|
276
|
+
const response = await client.fieldCaps({
|
|
277
|
+
index: sourceIndexName,
|
|
278
|
+
fields: '*',
|
|
279
|
+
include_empty_fields: false,
|
|
280
|
+
filters: '-metadata'
|
|
281
|
+
}, {
|
|
282
|
+
maxRetries: 0
|
|
283
|
+
});
|
|
306
284
|
return Object.keys(response.fields);
|
|
307
285
|
} catch (e) {
|
|
308
286
|
console.log('error', e);
|
|
309
287
|
}
|
|
310
288
|
}
|
|
311
|
-
|
|
312
289
|
function search(fields) {
|
|
313
|
-
return client.search(
|
|
290
|
+
return client.search({
|
|
291
|
+
index: sourceIndexName,
|
|
314
292
|
scroll: '600s',
|
|
315
293
|
size: searchSize,
|
|
316
|
-
query
|
|
317
|
-
(fields ? {
|
|
294
|
+
query,
|
|
295
|
+
...(fields ? {
|
|
296
|
+
_source: fields
|
|
297
|
+
} : {})
|
|
298
|
+
});
|
|
318
299
|
}
|
|
319
|
-
|
|
320
300
|
function scroll(id) {
|
|
321
301
|
return client.scroll({
|
|
322
302
|
scroll_id: id,
|
|
323
|
-
scroll: '600s'
|
|
303
|
+
scroll: '600s'
|
|
324
304
|
});
|
|
325
305
|
}
|
|
326
|
-
|
|
327
|
-
var fieldsWithData;
|
|
306
|
+
let fieldsWithData;
|
|
328
307
|
|
|
329
308
|
// identify populated fields
|
|
330
309
|
if (populatedFields) {
|
|
331
310
|
fieldsWithData = await fetchPopulatedFields();
|
|
332
311
|
}
|
|
333
|
-
|
|
334
312
|
await fetchNextResponse();
|
|
335
|
-
|
|
336
313
|
function processHit(hit) {
|
|
337
314
|
docsNum += 1;
|
|
338
315
|
try {
|
|
339
|
-
|
|
316
|
+
const doc = typeof transform === 'function' ? transform(hit._source) : hit._source; // eslint-disable-line no-underscore-dangle
|
|
340
317
|
|
|
341
318
|
// if doc is undefined we'll skip indexing it
|
|
342
319
|
if (typeof doc === 'undefined') {
|
|
@@ -346,35 +323,27 @@ function indexReaderFactory(
|
|
|
346
323
|
// the transform callback may return an array of docs so we can emit
|
|
347
324
|
// multiple docs from a single line
|
|
348
325
|
if (Array.isArray(doc)) {
|
|
349
|
-
doc.forEach(
|
|
326
|
+
doc.forEach(d => indexer.add(d));
|
|
350
327
|
return;
|
|
351
328
|
}
|
|
352
|
-
|
|
353
329
|
indexer.add(doc);
|
|
354
330
|
} catch (e) {
|
|
355
331
|
console.log('error', e);
|
|
356
332
|
}
|
|
357
333
|
}
|
|
358
|
-
|
|
359
334
|
async function fetchNextResponse() {
|
|
360
335
|
readActive = true;
|
|
361
|
-
|
|
362
|
-
var sc = scrollId ? await scroll(scrollId) : await search(fieldsWithData);
|
|
363
|
-
|
|
336
|
+
const sc = scrollId ? await scroll(scrollId) : await search(fieldsWithData);
|
|
364
337
|
if (!scrollId) {
|
|
365
338
|
progressBar.start(sc.hits.total.value, 0);
|
|
366
339
|
}
|
|
367
|
-
|
|
368
340
|
scrollId = sc._scroll_id;
|
|
369
341
|
readActive = false;
|
|
370
|
-
|
|
371
342
|
processResponse(sc);
|
|
372
343
|
}
|
|
373
|
-
|
|
374
344
|
async function processResponse(response) {
|
|
375
345
|
// collect the docs from this response
|
|
376
346
|
response.hits.hits.forEach(processHit);
|
|
377
|
-
|
|
378
347
|
progressBar.update(docsNum);
|
|
379
348
|
|
|
380
349
|
// check to see if we have collected all of the docs
|
|
@@ -382,191 +351,153 @@ function indexReaderFactory(
|
|
|
382
351
|
indexer.finish();
|
|
383
352
|
return;
|
|
384
353
|
}
|
|
385
|
-
|
|
386
354
|
if (!backPressurePause) {
|
|
387
355
|
await fetchNextResponse();
|
|
388
356
|
}
|
|
389
357
|
}
|
|
390
|
-
|
|
391
|
-
indexer.queueEmitter.on('pause', async function () {
|
|
358
|
+
indexer.queueEmitter.on('pause', async () => {
|
|
392
359
|
backPressurePause = true;
|
|
393
360
|
});
|
|
394
|
-
|
|
395
|
-
indexer.queueEmitter.on('resume', async function () {
|
|
361
|
+
indexer.queueEmitter.on('resume', async () => {
|
|
396
362
|
backPressurePause = false;
|
|
397
|
-
|
|
398
363
|
if (readActive || finished) {
|
|
399
364
|
return;
|
|
400
365
|
}
|
|
401
|
-
|
|
402
366
|
await fetchNextResponse();
|
|
403
367
|
});
|
|
404
|
-
|
|
405
|
-
indexer.queueEmitter.on('finish', function () {
|
|
368
|
+
indexer.queueEmitter.on('finish', () => {
|
|
406
369
|
finished = true;
|
|
407
370
|
progressBar.stop();
|
|
408
371
|
});
|
|
409
372
|
};
|
|
410
373
|
}
|
|
411
374
|
|
|
412
|
-
function streamReaderFactory(indexer, stream
|
|
375
|
+
function streamReaderFactory(indexer, stream, transform, splitRegex, verbose) {
|
|
413
376
|
function startIndex() {
|
|
414
|
-
|
|
415
|
-
|
|
416
|
-
|
|
417
|
-
|
|
418
|
-
|
|
419
|
-
|
|
420
|
-
|
|
421
|
-
|
|
422
|
-
return;
|
|
423
|
-
}
|
|
424
|
-
|
|
425
|
-
var doc =
|
|
426
|
-
typeof transform === 'function' ? JSON.stringify(transform(JSON.parse(line))) : line;
|
|
427
|
-
|
|
428
|
-
// if doc is undefined we'll skip indexing it
|
|
429
|
-
if (typeof doc === 'undefined') {
|
|
430
|
-
s.resume();
|
|
431
|
-
return;
|
|
432
|
-
}
|
|
433
|
-
|
|
434
|
-
// the transform callback may return an array of docs so we can emit
|
|
435
|
-
// multiple docs from a single line
|
|
436
|
-
if (Array.isArray(doc)) {
|
|
437
|
-
doc.forEach(function (d) { return indexer.add(d); });
|
|
438
|
-
return;
|
|
439
|
-
}
|
|
377
|
+
let finished = false;
|
|
378
|
+
const s = stream.pipe(split(splitRegex)).pipe(es.mapSync(line => {
|
|
379
|
+
try {
|
|
380
|
+
// skip empty lines
|
|
381
|
+
if (line === '') {
|
|
382
|
+
return;
|
|
383
|
+
}
|
|
384
|
+
const doc = typeof transform === 'function' ? JSON.stringify(transform(JSON.parse(line))) : line;
|
|
440
385
|
|
|
441
|
-
|
|
442
|
-
|
|
443
|
-
|
|
444
|
-
|
|
445
|
-
}
|
|
446
|
-
.on('error', function (err) {
|
|
447
|
-
console.log('Error while reading stream.', err);
|
|
448
|
-
})
|
|
449
|
-
.on('end', function () {
|
|
450
|
-
if (verbose) { console.log('Read entire stream.'); }
|
|
451
|
-
indexer.finish();
|
|
452
|
-
finished = true;
|
|
453
|
-
})
|
|
454
|
-
);
|
|
386
|
+
// if doc is undefined we'll skip indexing it
|
|
387
|
+
if (typeof doc === 'undefined') {
|
|
388
|
+
s.resume();
|
|
389
|
+
return;
|
|
390
|
+
}
|
|
455
391
|
|
|
456
|
-
|
|
457
|
-
|
|
392
|
+
// the transform callback may return an array of docs so we can emit
|
|
393
|
+
// multiple docs from a single line
|
|
394
|
+
if (Array.isArray(doc)) {
|
|
395
|
+
doc.forEach(d => indexer.add(d));
|
|
396
|
+
return;
|
|
397
|
+
}
|
|
398
|
+
indexer.add(doc);
|
|
399
|
+
} catch (e) {
|
|
400
|
+
console.log('error', e);
|
|
401
|
+
}
|
|
402
|
+
}).on('error', err => {
|
|
403
|
+
console.log('Error while reading stream.', err);
|
|
404
|
+
}).on('end', () => {
|
|
405
|
+
if (verbose) console.log('Read entire stream.');
|
|
406
|
+
indexer.finish();
|
|
407
|
+
finished = true;
|
|
408
|
+
}));
|
|
409
|
+
indexer.queueEmitter.on('pause', () => {
|
|
410
|
+
if (finished) return;
|
|
458
411
|
s.pause();
|
|
459
412
|
});
|
|
460
|
-
|
|
461
|
-
|
|
462
|
-
if (finished) { return; }
|
|
413
|
+
indexer.queueEmitter.on('resume', () => {
|
|
414
|
+
if (finished) return;
|
|
463
415
|
s.resume();
|
|
464
416
|
});
|
|
465
417
|
}
|
|
466
|
-
|
|
467
|
-
return function () {
|
|
418
|
+
return () => {
|
|
468
419
|
startIndex();
|
|
469
420
|
};
|
|
470
421
|
}
|
|
471
422
|
|
|
472
|
-
async function transformer(
|
|
473
|
-
|
|
474
|
-
|
|
475
|
-
|
|
476
|
-
|
|
477
|
-
|
|
478
|
-
|
|
479
|
-
|
|
480
|
-
|
|
481
|
-
|
|
482
|
-
|
|
483
|
-
|
|
484
|
-
|
|
485
|
-
|
|
486
|
-
|
|
487
|
-
|
|
488
|
-
|
|
489
|
-
|
|
490
|
-
|
|
491
|
-
|
|
423
|
+
async function transformer({
|
|
424
|
+
deleteIndex = false,
|
|
425
|
+
sourceClientConfig,
|
|
426
|
+
targetClientConfig,
|
|
427
|
+
bufferSize = DEFAULT_BUFFER_SIZE,
|
|
428
|
+
searchSize = DEFAULT_SEARCH_SIZE,
|
|
429
|
+
stream,
|
|
430
|
+
fileName,
|
|
431
|
+
splitRegex = /\n/,
|
|
432
|
+
sourceIndexName,
|
|
433
|
+
targetIndexName,
|
|
434
|
+
mappings,
|
|
435
|
+
mappingsOverride = false,
|
|
436
|
+
indexMappingTotalFieldsLimit,
|
|
437
|
+
pipeline,
|
|
438
|
+
populatedFields = false,
|
|
439
|
+
query,
|
|
440
|
+
skipHeader = false,
|
|
441
|
+
transform,
|
|
442
|
+
verbose = true
|
|
443
|
+
}) {
|
|
492
444
|
if (typeof targetIndexName === 'undefined') {
|
|
493
445
|
throw Error('targetIndexName must be specified.');
|
|
494
446
|
}
|
|
495
|
-
|
|
496
|
-
|
|
497
|
-
node: 'http://localhost:9200',
|
|
447
|
+
const defaultClientConfig = {
|
|
448
|
+
node: 'http://localhost:9200'
|
|
498
449
|
};
|
|
499
|
-
|
|
500
|
-
|
|
501
|
-
|
|
502
|
-
|
|
503
|
-
|
|
504
|
-
|
|
505
|
-
|
|
506
|
-
|
|
507
|
-
|
|
508
|
-
|
|
509
|
-
|
|
510
|
-
|
|
511
|
-
|
|
512
|
-
indexMappingTotalFieldsLimit: indexMappingTotalFieldsLimit,
|
|
513
|
-
verbose: verbose,
|
|
514
|
-
deleteIndex: deleteIndex,
|
|
450
|
+
const sourceClient = new elasticsearch.Client(sourceClientConfig || defaultClientConfig);
|
|
451
|
+
const targetClient = new elasticsearch.Client(targetClientConfig || sourceClientConfig || defaultClientConfig);
|
|
452
|
+
const createMapping = createMappingFactory({
|
|
453
|
+
sourceClient,
|
|
454
|
+
sourceIndexName,
|
|
455
|
+
targetClient,
|
|
456
|
+
targetIndexName,
|
|
457
|
+
mappings,
|
|
458
|
+
mappingsOverride,
|
|
459
|
+
indexMappingTotalFieldsLimit,
|
|
460
|
+
verbose,
|
|
461
|
+
deleteIndex,
|
|
462
|
+
pipeline
|
|
515
463
|
});
|
|
516
|
-
|
|
517
|
-
targetClient
|
|
518
|
-
targetIndexName
|
|
519
|
-
bufferSize
|
|
520
|
-
skipHeader
|
|
521
|
-
verbose
|
|
464
|
+
const indexer = indexQueueFactory({
|
|
465
|
+
targetClient,
|
|
466
|
+
targetIndexName,
|
|
467
|
+
bufferSize,
|
|
468
|
+
skipHeader,
|
|
469
|
+
verbose
|
|
522
470
|
});
|
|
523
|
-
|
|
524
471
|
function getReader() {
|
|
525
472
|
if (typeof fileName !== 'undefined' && typeof sourceIndexName !== 'undefined') {
|
|
526
473
|
throw Error('Only either one of fileName or sourceIndexName can be specified.');
|
|
527
474
|
}
|
|
528
|
-
|
|
529
|
-
if (
|
|
530
|
-
(typeof fileName !== 'undefined' && typeof sourceIndexName !== 'undefined') ||
|
|
531
|
-
(typeof fileName !== 'undefined' && typeof stream$$1 !== 'undefined') ||
|
|
532
|
-
(typeof sourceIndexName !== 'undefined' && typeof stream$$1 !== 'undefined')
|
|
533
|
-
) {
|
|
475
|
+
if (typeof fileName !== 'undefined' && typeof sourceIndexName !== 'undefined' || typeof fileName !== 'undefined' && typeof stream !== 'undefined' || typeof sourceIndexName !== 'undefined' && typeof stream !== 'undefined') {
|
|
534
476
|
throw Error('Only one of fileName, sourceIndexName, or stream can be specified.');
|
|
535
477
|
}
|
|
536
|
-
|
|
537
478
|
if (typeof fileName !== 'undefined') {
|
|
538
479
|
return fileReaderFactory(indexer, fileName, transform, splitRegex, verbose);
|
|
539
480
|
}
|
|
540
|
-
|
|
541
481
|
if (typeof sourceIndexName !== 'undefined') {
|
|
542
|
-
return indexReaderFactory(
|
|
543
|
-
indexer,
|
|
544
|
-
sourceIndexName,
|
|
545
|
-
transform,
|
|
546
|
-
sourceClient,
|
|
547
|
-
query,
|
|
548
|
-
searchSize,
|
|
549
|
-
populatedFields
|
|
550
|
-
);
|
|
482
|
+
return indexReaderFactory(indexer, sourceIndexName, transform, sourceClient, query, searchSize, populatedFields);
|
|
551
483
|
}
|
|
552
|
-
|
|
553
|
-
|
|
554
|
-
return streamReaderFactory(indexer, stream$$1, transform, splitRegex, verbose);
|
|
484
|
+
if (typeof stream !== 'undefined') {
|
|
485
|
+
return streamReaderFactory(indexer, stream, transform, splitRegex, verbose);
|
|
555
486
|
}
|
|
556
|
-
|
|
557
487
|
return null;
|
|
558
488
|
}
|
|
559
|
-
|
|
560
|
-
var reader = getReader();
|
|
561
|
-
|
|
489
|
+
const reader = getReader();
|
|
562
490
|
try {
|
|
563
|
-
|
|
564
|
-
|
|
491
|
+
const indexExists = await targetClient.indices.exists({
|
|
492
|
+
index: targetIndexName
|
|
493
|
+
});
|
|
565
494
|
if (indexExists === false) {
|
|
566
495
|
await createMapping();
|
|
567
496
|
reader();
|
|
568
497
|
} else if (deleteIndex === true) {
|
|
569
|
-
await targetClient.indices.delete({
|
|
498
|
+
await targetClient.indices.delete({
|
|
499
|
+
index: targetIndexName
|
|
500
|
+
});
|
|
570
501
|
await createMapping();
|
|
571
502
|
reader();
|
|
572
503
|
} else {
|
|
@@ -577,8 +508,10 @@ async function transformer(ref) {
|
|
|
577
508
|
} finally {
|
|
578
509
|
// targetClient.close();
|
|
579
510
|
}
|
|
580
|
-
|
|
581
|
-
|
|
511
|
+
return {
|
|
512
|
+
events: indexer.queueEmitter
|
|
513
|
+
};
|
|
582
514
|
}
|
|
583
515
|
|
|
584
516
|
module.exports = transformer;
|
|
517
|
+
//# sourceMappingURL=node-es-transformer.cjs.js.map
|