node-es-transformer 1.0.0-beta3 → 1.0.0-beta5
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/CHANGELOG.md +95 -0
- package/README.md +6 -16
- package/changelog.config.js +3 -0
- package/dist/node-es-transformer.cjs.js +263 -339
- package/dist/node-es-transformer.cjs.js.map +1 -0
- package/dist/node-es-transformer.esm.js +254 -328
- package/dist/node-es-transformer.esm.js.map +1 -0
- package/package.json +7 -7
|
@@ -1,44 +1,44 @@
|
|
|
1
|
+
import elasticsearch from '@elastic/elasticsearch';
|
|
1
2
|
import fs from 'fs';
|
|
2
3
|
import es from 'event-stream';
|
|
3
4
|
import glob from 'glob';
|
|
4
5
|
import split from 'split2';
|
|
5
6
|
import { Readable } from 'stream';
|
|
6
7
|
import cliProgress from 'cli-progress';
|
|
7
|
-
import elasticsearch from '@elastic/elasticsearch';
|
|
8
8
|
|
|
9
9
|
// In earlier versions this was used to set the number of docs to index in a
|
|
10
10
|
// single bulk request. Since we switched to use the helpers.bulk() method from
|
|
11
11
|
// the ES client, this now translates to the `flushBytes` option of the helper.
|
|
12
12
|
// However, for kind of a backwards compability with the old values, this uses
|
|
13
13
|
// KBytes instead of Bytes. It will be multiplied by 1024 in the index queue.
|
|
14
|
-
|
|
14
|
+
const DEFAULT_BUFFER_SIZE = 5120;
|
|
15
15
|
|
|
16
16
|
// The default number of docs to fetch in a single search request when reindexing.
|
|
17
|
-
|
|
18
|
-
|
|
19
|
-
function createMappingFactory(ref) {
|
|
20
|
-
var sourceClient = ref.sourceClient;
|
|
21
|
-
var sourceIndexName = ref.sourceIndexName;
|
|
22
|
-
var targetClient = ref.targetClient;
|
|
23
|
-
var targetIndexName = ref.targetIndexName;
|
|
24
|
-
var mappings = ref.mappings;
|
|
25
|
-
var mappingsOverride = ref.mappingsOverride;
|
|
26
|
-
var indexMappingTotalFieldsLimit = ref.indexMappingTotalFieldsLimit;
|
|
27
|
-
var verbose = ref.verbose;
|
|
28
|
-
var deleteIndex = ref.deleteIndex;
|
|
29
|
-
|
|
30
|
-
return async function () {
|
|
31
|
-
var targetMappings = mappingsOverride ? undefined : mappings;
|
|
17
|
+
const DEFAULT_SEARCH_SIZE = 1000;
|
|
32
18
|
|
|
19
|
+
function createMappingFactory({
|
|
20
|
+
sourceClient,
|
|
21
|
+
sourceIndexName,
|
|
22
|
+
targetClient,
|
|
23
|
+
targetIndexName,
|
|
24
|
+
mappings,
|
|
25
|
+
mappingsOverride,
|
|
26
|
+
indexMappingTotalFieldsLimit,
|
|
27
|
+
verbose,
|
|
28
|
+
deleteIndex,
|
|
29
|
+
pipeline
|
|
30
|
+
}) {
|
|
31
|
+
return async () => {
|
|
32
|
+
let targetMappings = mappingsOverride ? undefined : mappings;
|
|
33
33
|
if (sourceClient && sourceIndexName && typeof targetMappings === 'undefined') {
|
|
34
34
|
try {
|
|
35
|
-
|
|
36
|
-
index: sourceIndexName
|
|
35
|
+
const mapping = await sourceClient.indices.getMapping({
|
|
36
|
+
index: sourceIndexName
|
|
37
37
|
});
|
|
38
38
|
if (mapping[sourceIndexName]) {
|
|
39
39
|
targetMappings = mapping[sourceIndexName].mappings;
|
|
40
40
|
} else {
|
|
41
|
-
|
|
41
|
+
const allMappings = Object.values(mapping);
|
|
42
42
|
if (allMappings.length > 0) {
|
|
43
43
|
targetMappings = Object.values(mapping)[0].mappings;
|
|
44
44
|
}
|
|
@@ -48,36 +48,47 @@ function createMappingFactory(ref) {
|
|
|
48
48
|
return;
|
|
49
49
|
}
|
|
50
50
|
}
|
|
51
|
-
|
|
52
51
|
if (typeof targetMappings === 'object' && targetMappings !== null) {
|
|
53
52
|
if (mappingsOverride) {
|
|
54
|
-
targetMappings =
|
|
55
|
-
|
|
56
|
-
|
|
53
|
+
targetMappings = {
|
|
54
|
+
...targetMappings,
|
|
55
|
+
properties: {
|
|
56
|
+
...targetMappings.properties,
|
|
57
|
+
...mappings
|
|
58
|
+
}
|
|
59
|
+
};
|
|
57
60
|
}
|
|
58
|
-
|
|
59
61
|
try {
|
|
60
|
-
|
|
61
|
-
|
|
62
|
+
const indexExists = await targetClient.indices.exists({
|
|
63
|
+
index: targetIndexName
|
|
64
|
+
});
|
|
62
65
|
if (indexExists === true && deleteIndex === true) {
|
|
63
|
-
await targetClient.indices.delete({
|
|
66
|
+
await targetClient.indices.delete({
|
|
67
|
+
index: targetIndexName
|
|
68
|
+
});
|
|
64
69
|
}
|
|
65
|
-
|
|
66
70
|
if (indexExists === false || deleteIndex === true) {
|
|
67
|
-
|
|
71
|
+
const resp = await targetClient.indices.create({
|
|
68
72
|
index: targetIndexName,
|
|
69
|
-
body:
|
|
70
|
-
|
|
71
|
-
|
|
72
|
-
|
|
73
|
-
|
|
74
|
-
|
|
75
|
-
'index.number_of_replicas': 0,
|
|
76
|
-
},
|
|
73
|
+
body: {
|
|
74
|
+
mappings: targetMappings,
|
|
75
|
+
...(pipeline !== undefined ? {
|
|
76
|
+
settings: {
|
|
77
|
+
index: {
|
|
78
|
+
default_pipeline: pipeline
|
|
77
79
|
}
|
|
78
|
-
|
|
80
|
+
}
|
|
81
|
+
} : {}),
|
|
82
|
+
...(indexMappingTotalFieldsLimit !== undefined ? {
|
|
83
|
+
settings: {
|
|
84
|
+
'index.mapping.total_fields.limit': indexMappingTotalFieldsLimit,
|
|
85
|
+
'index.number_of_shards': 1,
|
|
86
|
+
'index.number_of_replicas': 0
|
|
87
|
+
}
|
|
88
|
+
} : {})
|
|
89
|
+
}
|
|
79
90
|
});
|
|
80
|
-
if (verbose)
|
|
91
|
+
if (verbose) console.log('Created target mapping', resp);
|
|
81
92
|
}
|
|
82
93
|
} catch (err) {
|
|
83
94
|
console.log('Error creating target mapping', err);
|
|
@@ -88,119 +99,99 @@ function createMappingFactory(ref) {
|
|
|
88
99
|
|
|
89
100
|
function fileReaderFactory(indexer, fileName, transform, splitRegex, verbose) {
|
|
90
101
|
function startIndex(files) {
|
|
91
|
-
|
|
92
|
-
|
|
93
|
-
|
|
94
|
-
|
|
95
|
-
|
|
96
|
-
|
|
97
|
-
|
|
98
|
-
|
|
99
|
-
|
|
100
|
-
try {
|
|
101
|
-
// skip empty lines
|
|
102
|
-
if (line === '') {
|
|
103
|
-
return;
|
|
104
|
-
}
|
|
105
|
-
|
|
106
|
-
var doc =
|
|
107
|
-
typeof transform === 'function'
|
|
108
|
-
? JSON.stringify(transform(JSON.parse(line)))
|
|
109
|
-
: line;
|
|
110
|
-
|
|
111
|
-
// if doc is undefined we'll skip indexing it
|
|
112
|
-
if (typeof doc === 'undefined') {
|
|
113
|
-
s.resume();
|
|
114
|
-
return;
|
|
115
|
-
}
|
|
116
|
-
|
|
117
|
-
// the transform callback may return an array of docs so we can emit
|
|
118
|
-
// multiple docs from a single line
|
|
119
|
-
if (Array.isArray(doc)) {
|
|
120
|
-
doc.forEach(function (d) { return indexer.add(d); });
|
|
121
|
-
return;
|
|
122
|
-
}
|
|
123
|
-
|
|
124
|
-
indexer.add(doc);
|
|
125
|
-
} catch (e) {
|
|
126
|
-
console.log('error', e);
|
|
127
|
-
}
|
|
128
|
-
})
|
|
129
|
-
.on('error', function (err) {
|
|
130
|
-
console.log('Error while reading file.', err);
|
|
131
|
-
})
|
|
132
|
-
.on('end', function () {
|
|
133
|
-
if (verbose) { console.log('Read entire file: ', file); }
|
|
134
|
-
if (files.length > 0) {
|
|
135
|
-
startIndex(files);
|
|
136
|
-
return;
|
|
137
|
-
}
|
|
102
|
+
let finished = false;
|
|
103
|
+
const file = files.shift();
|
|
104
|
+
const s = fs.createReadStream(file).pipe(split(splitRegex)).pipe(es.mapSync(line => {
|
|
105
|
+
try {
|
|
106
|
+
// skip empty lines
|
|
107
|
+
if (line === '') {
|
|
108
|
+
return;
|
|
109
|
+
}
|
|
110
|
+
const doc = typeof transform === 'function' ? JSON.stringify(transform(JSON.parse(line))) : line;
|
|
138
111
|
|
|
139
|
-
|
|
140
|
-
|
|
141
|
-
|
|
142
|
-
|
|
112
|
+
// if doc is undefined we'll skip indexing it
|
|
113
|
+
if (typeof doc === 'undefined') {
|
|
114
|
+
s.resume();
|
|
115
|
+
return;
|
|
116
|
+
}
|
|
143
117
|
|
|
144
|
-
|
|
145
|
-
|
|
118
|
+
// the transform callback may return an array of docs so we can emit
|
|
119
|
+
// multiple docs from a single line
|
|
120
|
+
if (Array.isArray(doc)) {
|
|
121
|
+
doc.forEach(d => indexer.add(d));
|
|
122
|
+
return;
|
|
123
|
+
}
|
|
124
|
+
indexer.add(doc);
|
|
125
|
+
} catch (e) {
|
|
126
|
+
console.log('error', e);
|
|
127
|
+
}
|
|
128
|
+
}).on('error', err => {
|
|
129
|
+
console.log('Error while reading file.', err);
|
|
130
|
+
}).on('end', () => {
|
|
131
|
+
if (verbose) console.log('Read entire file: ', file);
|
|
132
|
+
if (files.length > 0) {
|
|
133
|
+
startIndex(files);
|
|
134
|
+
return;
|
|
135
|
+
}
|
|
136
|
+
indexer.finish();
|
|
137
|
+
finished = true;
|
|
138
|
+
}));
|
|
139
|
+
indexer.queueEmitter.on('pause', () => {
|
|
140
|
+
if (finished) return;
|
|
146
141
|
s.pause();
|
|
147
142
|
});
|
|
148
|
-
|
|
149
|
-
|
|
150
|
-
if (finished) { return; }
|
|
143
|
+
indexer.queueEmitter.on('resume', () => {
|
|
144
|
+
if (finished) return;
|
|
151
145
|
s.resume();
|
|
152
146
|
});
|
|
153
147
|
}
|
|
154
|
-
|
|
155
|
-
|
|
156
|
-
glob(fileName, function (er, files) {
|
|
148
|
+
return () => {
|
|
149
|
+
glob(fileName, (er, files) => {
|
|
157
150
|
startIndex(files);
|
|
158
151
|
});
|
|
159
152
|
};
|
|
160
153
|
}
|
|
161
154
|
|
|
162
|
-
|
|
163
|
-
|
|
164
|
-
|
|
165
|
-
|
|
166
|
-
var parallelCalls = 5;
|
|
155
|
+
const EventEmitter = require('events');
|
|
156
|
+
const queueEmitter = new EventEmitter();
|
|
157
|
+
const parallelCalls = 5;
|
|
167
158
|
|
|
168
159
|
// a simple helper queue to bulk index documents
|
|
169
|
-
function indexQueueFactory(
|
|
170
|
-
|
|
171
|
-
|
|
172
|
-
|
|
173
|
-
|
|
174
|
-
|
|
175
|
-
|
|
176
|
-
|
|
177
|
-
|
|
160
|
+
function indexQueueFactory({
|
|
161
|
+
targetClient: client,
|
|
162
|
+
targetIndexName,
|
|
163
|
+
bufferSize = DEFAULT_BUFFER_SIZE,
|
|
164
|
+
skipHeader = false
|
|
165
|
+
}) {
|
|
166
|
+
let docsPerSecond = 0;
|
|
167
|
+
const flushBytes = bufferSize * 1024; // Convert KB to Bytes
|
|
168
|
+
const highWaterMark = flushBytes * parallelCalls;
|
|
178
169
|
|
|
179
170
|
// Create a Readable stream
|
|
180
|
-
|
|
181
|
-
read
|
|
182
|
-
|
|
171
|
+
const stream = new Readable({
|
|
172
|
+
read() {},
|
|
173
|
+
// Implement read but we manage pushing manually
|
|
174
|
+
highWaterMark // Buffer size for backpressure management
|
|
183
175
|
});
|
|
184
|
-
|
|
185
176
|
async function* ndjsonStreamIterator(readableStream) {
|
|
186
|
-
|
|
187
|
-
|
|
177
|
+
let buffer = ''; // To hold the incomplete data
|
|
178
|
+
let skippedHeader = false;
|
|
188
179
|
|
|
189
180
|
// Iterate over the stream using async iteration
|
|
190
|
-
for await (
|
|
181
|
+
for await (const chunk of readableStream) {
|
|
191
182
|
buffer += chunk.toString(); // Accumulate the chunk data in the buffer
|
|
192
183
|
|
|
193
184
|
// Split the buffer into lines (NDJSON items)
|
|
194
|
-
|
|
185
|
+
const lines = buffer.split('\n');
|
|
195
186
|
|
|
196
187
|
// The last line might be incomplete, so hold it back in the buffer
|
|
197
188
|
buffer = lines.pop();
|
|
198
189
|
|
|
199
190
|
// Yield each complete JSON object
|
|
200
|
-
for (
|
|
191
|
+
for (const line of lines) {
|
|
201
192
|
if (line.trim()) {
|
|
202
193
|
try {
|
|
203
|
-
if (!skipHeader ||
|
|
194
|
+
if (!skipHeader || skipHeader && !skippedHeader) {
|
|
204
195
|
yield JSON.parse(line); // Parse and yield the JSON object
|
|
205
196
|
skippedHeader = true;
|
|
206
197
|
}
|
|
@@ -221,122 +212,106 @@ function indexQueueFactory(ref) {
|
|
|
221
212
|
}
|
|
222
213
|
}
|
|
223
214
|
}
|
|
224
|
-
|
|
225
|
-
var finished = false;
|
|
215
|
+
let finished = false;
|
|
226
216
|
|
|
227
217
|
// Async IIFE to start bulk indexing
|
|
228
|
-
(async
|
|
229
|
-
|
|
218
|
+
(async () => {
|
|
219
|
+
const interval = setInterval(() => {
|
|
220
|
+
queueEmitter.emit('docsPerSecond', docsPerSecond);
|
|
221
|
+
docsPerSecond = 0;
|
|
222
|
+
}, 1000);
|
|
230
223
|
await client.helpers.bulk({
|
|
231
224
|
concurrency: parallelCalls,
|
|
232
|
-
flushBytes
|
|
225
|
+
flushBytes,
|
|
233
226
|
flushInterval: 1000,
|
|
234
227
|
refreshOnCompletion: true,
|
|
235
228
|
datasource: ndjsonStreamIterator(stream),
|
|
236
|
-
onDocument
|
|
229
|
+
onDocument(doc) {
|
|
230
|
+
docsPerSecond++;
|
|
237
231
|
return {
|
|
238
|
-
index: {
|
|
232
|
+
index: {
|
|
233
|
+
_index: targetIndexName
|
|
234
|
+
}
|
|
239
235
|
};
|
|
240
|
-
}
|
|
236
|
+
}
|
|
241
237
|
});
|
|
242
|
-
|
|
243
|
-
|
|
238
|
+
clearInterval(interval);
|
|
244
239
|
queueEmitter.emit('finish');
|
|
245
240
|
})();
|
|
246
|
-
|
|
247
241
|
return {
|
|
248
|
-
add:
|
|
242
|
+
add: doc => {
|
|
249
243
|
if (finished) {
|
|
250
244
|
throw new Error('Unexpected doc added after indexer should finish.');
|
|
251
245
|
}
|
|
252
|
-
|
|
253
|
-
var canContinue = stream.push(((JSON.stringify(doc)) + "\n"));
|
|
246
|
+
const canContinue = stream.push(`${JSON.stringify(doc)}\n`);
|
|
254
247
|
if (!canContinue) {
|
|
255
248
|
queueEmitter.emit('pause');
|
|
256
|
-
stream.once('drain',
|
|
249
|
+
stream.once('drain', () => {
|
|
257
250
|
queueEmitter.emit('resume');
|
|
258
251
|
});
|
|
259
252
|
}
|
|
260
253
|
},
|
|
261
|
-
finish:
|
|
254
|
+
finish: () => {
|
|
262
255
|
finished = true;
|
|
263
256
|
stream.push(null);
|
|
264
257
|
},
|
|
265
|
-
queueEmitter
|
|
258
|
+
queueEmitter
|
|
266
259
|
};
|
|
267
260
|
}
|
|
268
261
|
|
|
269
262
|
// create a new progress bar instance and use shades_classic theme
|
|
270
|
-
|
|
271
|
-
|
|
272
|
-
function indexReaderFactory(
|
|
273
|
-
indexer,
|
|
274
|
-
sourceIndexName,
|
|
275
|
-
transform,
|
|
276
|
-
client,
|
|
277
|
-
query,
|
|
278
|
-
searchSize,
|
|
279
|
-
populatedFields
|
|
280
|
-
) {
|
|
281
|
-
if ( searchSize === void 0 ) searchSize = DEFAULT_SEARCH_SIZE;
|
|
282
|
-
if ( populatedFields === void 0 ) populatedFields = false;
|
|
283
|
-
|
|
263
|
+
const progressBar = new cliProgress.SingleBar({}, cliProgress.Presets.shades_classic);
|
|
264
|
+
function indexReaderFactory(indexer, sourceIndexName, transform, client, query, searchSize = DEFAULT_SEARCH_SIZE, populatedFields = false) {
|
|
284
265
|
return async function indexReader() {
|
|
285
|
-
|
|
286
|
-
|
|
287
|
-
|
|
288
|
-
|
|
289
|
-
|
|
290
|
-
|
|
266
|
+
let docsNum = 0;
|
|
267
|
+
let scrollId;
|
|
268
|
+
let finished = false;
|
|
269
|
+
let readActive = false;
|
|
270
|
+
let backPressurePause = false;
|
|
291
271
|
async function fetchPopulatedFields() {
|
|
292
272
|
try {
|
|
293
|
-
|
|
273
|
+
// Get all populated fields from the index
|
|
274
|
+
const response = await client.fieldCaps({
|
|
294
275
|
index: sourceIndexName,
|
|
295
|
-
|
|
296
|
-
|
|
297
|
-
|
|
298
|
-
|
|
299
|
-
|
|
300
|
-
},
|
|
301
|
-
},
|
|
276
|
+
fields: '*',
|
|
277
|
+
include_empty_fields: false,
|
|
278
|
+
filters: '-metadata'
|
|
279
|
+
}, {
|
|
280
|
+
maxRetries: 0
|
|
302
281
|
});
|
|
303
|
-
|
|
304
|
-
// Get all field names for each returned doc and flatten it
|
|
305
|
-
// to a list of unique field names used across all docs.
|
|
306
|
-
return Array.from(new Set(response.hits.hits.map(function (d) { return Object.keys(d._source); }).flat(1)));
|
|
282
|
+
return Object.keys(response.fields);
|
|
307
283
|
} catch (e) {
|
|
308
284
|
console.log('error', e);
|
|
309
285
|
}
|
|
310
286
|
}
|
|
311
|
-
|
|
312
287
|
function search(fields) {
|
|
313
|
-
return client.search(
|
|
288
|
+
return client.search({
|
|
289
|
+
index: sourceIndexName,
|
|
314
290
|
scroll: '600s',
|
|
315
291
|
size: searchSize,
|
|
316
|
-
query
|
|
317
|
-
(fields ? {
|
|
292
|
+
query,
|
|
293
|
+
...(fields ? {
|
|
294
|
+
_source: fields
|
|
295
|
+
} : {})
|
|
296
|
+
});
|
|
318
297
|
}
|
|
319
|
-
|
|
320
298
|
function scroll(id) {
|
|
321
299
|
return client.scroll({
|
|
322
300
|
scroll_id: id,
|
|
323
|
-
scroll: '600s'
|
|
301
|
+
scroll: '600s'
|
|
324
302
|
});
|
|
325
303
|
}
|
|
326
|
-
|
|
327
|
-
var fieldsWithData;
|
|
304
|
+
let fieldsWithData;
|
|
328
305
|
|
|
329
306
|
// identify populated fields
|
|
330
307
|
if (populatedFields) {
|
|
331
308
|
fieldsWithData = await fetchPopulatedFields();
|
|
332
309
|
}
|
|
333
|
-
|
|
334
310
|
await fetchNextResponse();
|
|
335
|
-
|
|
336
311
|
function processHit(hit) {
|
|
337
312
|
docsNum += 1;
|
|
338
313
|
try {
|
|
339
|
-
|
|
314
|
+
const doc = typeof transform === 'function' ? transform(hit._source) : hit._source; // eslint-disable-line no-underscore-dangle
|
|
340
315
|
|
|
341
316
|
// if doc is undefined we'll skip indexing it
|
|
342
317
|
if (typeof doc === 'undefined') {
|
|
@@ -346,35 +321,27 @@ function indexReaderFactory(
|
|
|
346
321
|
// the transform callback may return an array of docs so we can emit
|
|
347
322
|
// multiple docs from a single line
|
|
348
323
|
if (Array.isArray(doc)) {
|
|
349
|
-
doc.forEach(
|
|
324
|
+
doc.forEach(d => indexer.add(d));
|
|
350
325
|
return;
|
|
351
326
|
}
|
|
352
|
-
|
|
353
327
|
indexer.add(doc);
|
|
354
328
|
} catch (e) {
|
|
355
329
|
console.log('error', e);
|
|
356
330
|
}
|
|
357
331
|
}
|
|
358
|
-
|
|
359
332
|
async function fetchNextResponse() {
|
|
360
333
|
readActive = true;
|
|
361
|
-
|
|
362
|
-
var sc = scrollId ? await scroll(scrollId) : await search(fieldsWithData);
|
|
363
|
-
|
|
334
|
+
const sc = scrollId ? await scroll(scrollId) : await search(fieldsWithData);
|
|
364
335
|
if (!scrollId) {
|
|
365
336
|
progressBar.start(sc.hits.total.value, 0);
|
|
366
337
|
}
|
|
367
|
-
|
|
368
338
|
scrollId = sc._scroll_id;
|
|
369
339
|
readActive = false;
|
|
370
|
-
|
|
371
340
|
processResponse(sc);
|
|
372
341
|
}
|
|
373
|
-
|
|
374
342
|
async function processResponse(response) {
|
|
375
343
|
// collect the docs from this response
|
|
376
344
|
response.hits.hits.forEach(processHit);
|
|
377
|
-
|
|
378
345
|
progressBar.update(docsNum);
|
|
379
346
|
|
|
380
347
|
// check to see if we have collected all of the docs
|
|
@@ -382,27 +349,21 @@ function indexReaderFactory(
|
|
|
382
349
|
indexer.finish();
|
|
383
350
|
return;
|
|
384
351
|
}
|
|
385
|
-
|
|
386
352
|
if (!backPressurePause) {
|
|
387
353
|
await fetchNextResponse();
|
|
388
354
|
}
|
|
389
355
|
}
|
|
390
|
-
|
|
391
|
-
indexer.queueEmitter.on('pause', async function () {
|
|
356
|
+
indexer.queueEmitter.on('pause', async () => {
|
|
392
357
|
backPressurePause = true;
|
|
393
358
|
});
|
|
394
|
-
|
|
395
|
-
indexer.queueEmitter.on('resume', async function () {
|
|
359
|
+
indexer.queueEmitter.on('resume', async () => {
|
|
396
360
|
backPressurePause = false;
|
|
397
|
-
|
|
398
361
|
if (readActive || finished) {
|
|
399
362
|
return;
|
|
400
363
|
}
|
|
401
|
-
|
|
402
364
|
await fetchNextResponse();
|
|
403
365
|
});
|
|
404
|
-
|
|
405
|
-
indexer.queueEmitter.on('finish', function () {
|
|
366
|
+
indexer.queueEmitter.on('finish', () => {
|
|
406
367
|
finished = true;
|
|
407
368
|
progressBar.stop();
|
|
408
369
|
});
|
|
@@ -411,167 +372,130 @@ function indexReaderFactory(
|
|
|
411
372
|
|
|
412
373
|
function streamReaderFactory(indexer, stream, transform, splitRegex, verbose) {
|
|
413
374
|
function startIndex() {
|
|
414
|
-
|
|
415
|
-
|
|
416
|
-
|
|
417
|
-
|
|
418
|
-
|
|
419
|
-
|
|
420
|
-
|
|
421
|
-
|
|
422
|
-
if (line === '') {
|
|
423
|
-
return;
|
|
424
|
-
}
|
|
425
|
-
|
|
426
|
-
var doc =
|
|
427
|
-
typeof transform === 'function' ? JSON.stringify(transform(JSON.parse(line))) : line;
|
|
428
|
-
|
|
429
|
-
// if doc is undefined we'll skip indexing it
|
|
430
|
-
if (typeof doc === 'undefined') {
|
|
431
|
-
s.resume();
|
|
432
|
-
return;
|
|
433
|
-
}
|
|
434
|
-
|
|
435
|
-
// the transform callback may return an array of docs so we can emit
|
|
436
|
-
// multiple docs from a single line
|
|
437
|
-
if (Array.isArray(doc)) {
|
|
438
|
-
doc.forEach(function (d) { return indexer.add(d); });
|
|
439
|
-
return;
|
|
440
|
-
}
|
|
375
|
+
let finished = false;
|
|
376
|
+
const s = stream.pipe(split(splitRegex)).pipe(es.mapSync(line => {
|
|
377
|
+
try {
|
|
378
|
+
// skip empty lines
|
|
379
|
+
if (line === '') {
|
|
380
|
+
return;
|
|
381
|
+
}
|
|
382
|
+
const doc = typeof transform === 'function' ? JSON.stringify(transform(JSON.parse(line))) : line;
|
|
441
383
|
|
|
442
|
-
|
|
443
|
-
|
|
444
|
-
|
|
445
|
-
|
|
446
|
-
}
|
|
447
|
-
.on('error', function (err) {
|
|
448
|
-
console.log('Error while reading file.', err);
|
|
449
|
-
})
|
|
450
|
-
.on('end', function () {
|
|
451
|
-
if (verbose) { console.log('Read entire stream.'); }
|
|
452
|
-
indexer.finish();
|
|
453
|
-
finished = true;
|
|
454
|
-
})
|
|
455
|
-
);
|
|
384
|
+
// if doc is undefined we'll skip indexing it
|
|
385
|
+
if (typeof doc === 'undefined') {
|
|
386
|
+
s.resume();
|
|
387
|
+
return;
|
|
388
|
+
}
|
|
456
389
|
|
|
457
|
-
|
|
458
|
-
|
|
390
|
+
// the transform callback may return an array of docs so we can emit
|
|
391
|
+
// multiple docs from a single line
|
|
392
|
+
if (Array.isArray(doc)) {
|
|
393
|
+
doc.forEach(d => indexer.add(d));
|
|
394
|
+
return;
|
|
395
|
+
}
|
|
396
|
+
indexer.add(doc);
|
|
397
|
+
} catch (e) {
|
|
398
|
+
console.log('error', e);
|
|
399
|
+
}
|
|
400
|
+
}).on('error', err => {
|
|
401
|
+
console.log('Error while reading stream.', err);
|
|
402
|
+
}).on('end', () => {
|
|
403
|
+
if (verbose) console.log('Read entire stream.');
|
|
404
|
+
indexer.finish();
|
|
405
|
+
finished = true;
|
|
406
|
+
}));
|
|
407
|
+
indexer.queueEmitter.on('pause', () => {
|
|
408
|
+
if (finished) return;
|
|
459
409
|
s.pause();
|
|
460
410
|
});
|
|
461
|
-
|
|
462
|
-
|
|
463
|
-
if (finished) { return; }
|
|
411
|
+
indexer.queueEmitter.on('resume', () => {
|
|
412
|
+
if (finished) return;
|
|
464
413
|
s.resume();
|
|
465
414
|
});
|
|
466
415
|
}
|
|
467
|
-
|
|
468
|
-
return function () {
|
|
416
|
+
return () => {
|
|
469
417
|
startIndex();
|
|
470
418
|
};
|
|
471
419
|
}
|
|
472
420
|
|
|
473
|
-
async function transformer(
|
|
474
|
-
|
|
475
|
-
|
|
476
|
-
|
|
477
|
-
|
|
478
|
-
|
|
479
|
-
|
|
480
|
-
|
|
481
|
-
|
|
482
|
-
|
|
483
|
-
|
|
484
|
-
|
|
485
|
-
|
|
486
|
-
|
|
487
|
-
|
|
488
|
-
|
|
489
|
-
|
|
490
|
-
|
|
491
|
-
|
|
492
|
-
|
|
493
|
-
|
|
421
|
+
async function transformer({
|
|
422
|
+
deleteIndex = false,
|
|
423
|
+
sourceClientConfig,
|
|
424
|
+
targetClientConfig,
|
|
425
|
+
bufferSize = DEFAULT_BUFFER_SIZE,
|
|
426
|
+
searchSize = DEFAULT_SEARCH_SIZE,
|
|
427
|
+
stream,
|
|
428
|
+
fileName,
|
|
429
|
+
splitRegex = /\n/,
|
|
430
|
+
sourceIndexName,
|
|
431
|
+
targetIndexName,
|
|
432
|
+
mappings,
|
|
433
|
+
mappingsOverride = false,
|
|
434
|
+
indexMappingTotalFieldsLimit,
|
|
435
|
+
pipeline,
|
|
436
|
+
populatedFields = false,
|
|
437
|
+
query,
|
|
438
|
+
skipHeader = false,
|
|
439
|
+
transform,
|
|
440
|
+
verbose = true
|
|
441
|
+
}) {
|
|
494
442
|
if (typeof targetIndexName === 'undefined') {
|
|
495
443
|
throw Error('targetIndexName must be specified.');
|
|
496
444
|
}
|
|
497
|
-
|
|
498
|
-
|
|
499
|
-
node: 'http://localhost:9200',
|
|
445
|
+
const defaultClientConfig = {
|
|
446
|
+
node: 'http://localhost:9200'
|
|
500
447
|
};
|
|
501
|
-
|
|
502
|
-
|
|
503
|
-
|
|
504
|
-
|
|
505
|
-
|
|
506
|
-
|
|
507
|
-
|
|
508
|
-
|
|
509
|
-
|
|
510
|
-
|
|
511
|
-
|
|
512
|
-
|
|
513
|
-
|
|
514
|
-
indexMappingTotalFieldsLimit: indexMappingTotalFieldsLimit,
|
|
515
|
-
verbose: verbose,
|
|
516
|
-
deleteIndex: deleteIndex,
|
|
448
|
+
const sourceClient = new elasticsearch.Client(sourceClientConfig || defaultClientConfig);
|
|
449
|
+
const targetClient = new elasticsearch.Client(targetClientConfig || sourceClientConfig || defaultClientConfig);
|
|
450
|
+
const createMapping = createMappingFactory({
|
|
451
|
+
sourceClient,
|
|
452
|
+
sourceIndexName,
|
|
453
|
+
targetClient,
|
|
454
|
+
targetIndexName,
|
|
455
|
+
mappings,
|
|
456
|
+
mappingsOverride,
|
|
457
|
+
indexMappingTotalFieldsLimit,
|
|
458
|
+
verbose,
|
|
459
|
+
deleteIndex,
|
|
460
|
+
pipeline
|
|
517
461
|
});
|
|
518
|
-
|
|
519
|
-
targetClient
|
|
520
|
-
targetIndexName
|
|
521
|
-
bufferSize
|
|
522
|
-
skipHeader
|
|
523
|
-
verbose
|
|
462
|
+
const indexer = indexQueueFactory({
|
|
463
|
+
targetClient,
|
|
464
|
+
targetIndexName,
|
|
465
|
+
bufferSize,
|
|
466
|
+
skipHeader,
|
|
467
|
+
verbose
|
|
524
468
|
});
|
|
525
|
-
|
|
526
469
|
function getReader() {
|
|
527
470
|
if (typeof fileName !== 'undefined' && typeof sourceIndexName !== 'undefined') {
|
|
528
471
|
throw Error('Only either one of fileName or sourceIndexName can be specified.');
|
|
529
472
|
}
|
|
530
|
-
|
|
531
|
-
if (
|
|
532
|
-
(typeof fileName !== 'undefined' && typeof sourceIndexName !== 'undefined') ||
|
|
533
|
-
(typeof fileName !== 'undefined' && typeof stream !== 'undefined') ||
|
|
534
|
-
(typeof sourceIndexName !== 'undefined' && typeof stream !== 'undefined')
|
|
535
|
-
) {
|
|
473
|
+
if (typeof fileName !== 'undefined' && typeof sourceIndexName !== 'undefined' || typeof fileName !== 'undefined' && typeof stream !== 'undefined' || typeof sourceIndexName !== 'undefined' && typeof stream !== 'undefined') {
|
|
536
474
|
throw Error('Only one of fileName, sourceIndexName, or stream can be specified.');
|
|
537
475
|
}
|
|
538
|
-
|
|
539
476
|
if (typeof fileName !== 'undefined') {
|
|
540
477
|
return fileReaderFactory(indexer, fileName, transform, splitRegex, verbose);
|
|
541
478
|
}
|
|
542
|
-
|
|
543
479
|
if (typeof sourceIndexName !== 'undefined') {
|
|
544
|
-
return indexReaderFactory(
|
|
545
|
-
indexer,
|
|
546
|
-
sourceIndexName,
|
|
547
|
-
transform,
|
|
548
|
-
sourceClient,
|
|
549
|
-
query,
|
|
550
|
-
searchSize,
|
|
551
|
-
populatedFields
|
|
552
|
-
);
|
|
480
|
+
return indexReaderFactory(indexer, sourceIndexName, transform, sourceClient, query, searchSize, populatedFields);
|
|
553
481
|
}
|
|
554
|
-
|
|
555
482
|
if (typeof stream !== 'undefined') {
|
|
556
|
-
console.log('STREAM READER');
|
|
557
483
|
return streamReaderFactory(indexer, stream, transform, splitRegex, verbose);
|
|
558
484
|
}
|
|
559
|
-
|
|
560
485
|
return null;
|
|
561
486
|
}
|
|
562
|
-
|
|
563
|
-
var reader = getReader();
|
|
564
|
-
console.log('READER INITIALIZED');
|
|
565
|
-
|
|
487
|
+
const reader = getReader();
|
|
566
488
|
try {
|
|
567
|
-
|
|
568
|
-
|
|
569
|
-
|
|
489
|
+
const indexExists = await targetClient.indices.exists({
|
|
490
|
+
index: targetIndexName
|
|
491
|
+
});
|
|
570
492
|
if (indexExists === false) {
|
|
571
493
|
await createMapping();
|
|
572
494
|
reader();
|
|
573
495
|
} else if (deleteIndex === true) {
|
|
574
|
-
await targetClient.indices.delete({
|
|
496
|
+
await targetClient.indices.delete({
|
|
497
|
+
index: targetIndexName
|
|
498
|
+
});
|
|
575
499
|
await createMapping();
|
|
576
500
|
reader();
|
|
577
501
|
} else {
|
|
@@ -582,8 +506,10 @@ async function transformer(ref) {
|
|
|
582
506
|
} finally {
|
|
583
507
|
// targetClient.close();
|
|
584
508
|
}
|
|
585
|
-
|
|
586
|
-
|
|
509
|
+
return {
|
|
510
|
+
events: indexer.queueEmitter
|
|
511
|
+
};
|
|
587
512
|
}
|
|
588
513
|
|
|
589
|
-
export default
|
|
514
|
+
export { transformer as default };
|
|
515
|
+
//# sourceMappingURL=node-es-transformer.esm.js.map
|