node-es-transformer 1.0.0-beta3 → 1.0.0-beta5
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/CHANGELOG.md +95 -0
- package/README.md +6 -16
- package/changelog.config.js +3 -0
- package/dist/node-es-transformer.cjs.js +263 -339
- package/dist/node-es-transformer.cjs.js.map +1 -0
- package/dist/node-es-transformer.esm.js +254 -328
- package/dist/node-es-transformer.esm.js.map +1 -0
- package/package.json +7 -7
|
@@ -1,48 +1,46 @@
|
|
|
1
1
|
'use strict';
|
|
2
2
|
|
|
3
|
-
|
|
4
|
-
|
|
5
|
-
var
|
|
6
|
-
var
|
|
7
|
-
var
|
|
8
|
-
var split = _interopDefault(require('split2'));
|
|
3
|
+
var elasticsearch = require('@elastic/elasticsearch');
|
|
4
|
+
var fs = require('fs');
|
|
5
|
+
var es = require('event-stream');
|
|
6
|
+
var glob = require('glob');
|
|
7
|
+
var split = require('split2');
|
|
9
8
|
var stream = require('stream');
|
|
10
|
-
var cliProgress =
|
|
11
|
-
var elasticsearch = _interopDefault(require('@elastic/elasticsearch'));
|
|
9
|
+
var cliProgress = require('cli-progress');
|
|
12
10
|
|
|
13
11
|
// In earlier versions this was used to set the number of docs to index in a
|
|
14
12
|
// single bulk request. Since we switched to use the helpers.bulk() method from
|
|
15
13
|
// the ES client, this now translates to the `flushBytes` option of the helper.
|
|
16
14
|
// However, for kind of a backwards compability with the old values, this uses
|
|
17
15
|
// KBytes instead of Bytes. It will be multiplied by 1024 in the index queue.
|
|
18
|
-
|
|
16
|
+
const DEFAULT_BUFFER_SIZE = 5120;
|
|
19
17
|
|
|
20
18
|
// The default number of docs to fetch in a single search request when reindexing.
|
|
21
|
-
|
|
22
|
-
|
|
23
|
-
function createMappingFactory(ref) {
|
|
24
|
-
var sourceClient = ref.sourceClient;
|
|
25
|
-
var sourceIndexName = ref.sourceIndexName;
|
|
26
|
-
var targetClient = ref.targetClient;
|
|
27
|
-
var targetIndexName = ref.targetIndexName;
|
|
28
|
-
var mappings = ref.mappings;
|
|
29
|
-
var mappingsOverride = ref.mappingsOverride;
|
|
30
|
-
var indexMappingTotalFieldsLimit = ref.indexMappingTotalFieldsLimit;
|
|
31
|
-
var verbose = ref.verbose;
|
|
32
|
-
var deleteIndex = ref.deleteIndex;
|
|
33
|
-
|
|
34
|
-
return async function () {
|
|
35
|
-
var targetMappings = mappingsOverride ? undefined : mappings;
|
|
19
|
+
const DEFAULT_SEARCH_SIZE = 1000;
|
|
36
20
|
|
|
21
|
+
function createMappingFactory({
|
|
22
|
+
sourceClient,
|
|
23
|
+
sourceIndexName,
|
|
24
|
+
targetClient,
|
|
25
|
+
targetIndexName,
|
|
26
|
+
mappings,
|
|
27
|
+
mappingsOverride,
|
|
28
|
+
indexMappingTotalFieldsLimit,
|
|
29
|
+
verbose,
|
|
30
|
+
deleteIndex,
|
|
31
|
+
pipeline
|
|
32
|
+
}) {
|
|
33
|
+
return async () => {
|
|
34
|
+
let targetMappings = mappingsOverride ? undefined : mappings;
|
|
37
35
|
if (sourceClient && sourceIndexName && typeof targetMappings === 'undefined') {
|
|
38
36
|
try {
|
|
39
|
-
|
|
40
|
-
index: sourceIndexName
|
|
37
|
+
const mapping = await sourceClient.indices.getMapping({
|
|
38
|
+
index: sourceIndexName
|
|
41
39
|
});
|
|
42
40
|
if (mapping[sourceIndexName]) {
|
|
43
41
|
targetMappings = mapping[sourceIndexName].mappings;
|
|
44
42
|
} else {
|
|
45
|
-
|
|
43
|
+
const allMappings = Object.values(mapping);
|
|
46
44
|
if (allMappings.length > 0) {
|
|
47
45
|
targetMappings = Object.values(mapping)[0].mappings;
|
|
48
46
|
}
|
|
@@ -52,36 +50,47 @@ function createMappingFactory(ref) {
|
|
|
52
50
|
return;
|
|
53
51
|
}
|
|
54
52
|
}
|
|
55
|
-
|
|
56
53
|
if (typeof targetMappings === 'object' && targetMappings !== null) {
|
|
57
54
|
if (mappingsOverride) {
|
|
58
|
-
targetMappings =
|
|
59
|
-
|
|
60
|
-
|
|
55
|
+
targetMappings = {
|
|
56
|
+
...targetMappings,
|
|
57
|
+
properties: {
|
|
58
|
+
...targetMappings.properties,
|
|
59
|
+
...mappings
|
|
60
|
+
}
|
|
61
|
+
};
|
|
61
62
|
}
|
|
62
|
-
|
|
63
63
|
try {
|
|
64
|
-
|
|
65
|
-
|
|
64
|
+
const indexExists = await targetClient.indices.exists({
|
|
65
|
+
index: targetIndexName
|
|
66
|
+
});
|
|
66
67
|
if (indexExists === true && deleteIndex === true) {
|
|
67
|
-
await targetClient.indices.delete({
|
|
68
|
+
await targetClient.indices.delete({
|
|
69
|
+
index: targetIndexName
|
|
70
|
+
});
|
|
68
71
|
}
|
|
69
|
-
|
|
70
72
|
if (indexExists === false || deleteIndex === true) {
|
|
71
|
-
|
|
73
|
+
const resp = await targetClient.indices.create({
|
|
72
74
|
index: targetIndexName,
|
|
73
|
-
body:
|
|
74
|
-
|
|
75
|
-
|
|
76
|
-
|
|
77
|
-
|
|
78
|
-
|
|
79
|
-
'index.number_of_replicas': 0,
|
|
80
|
-
},
|
|
75
|
+
body: {
|
|
76
|
+
mappings: targetMappings,
|
|
77
|
+
...(pipeline !== undefined ? {
|
|
78
|
+
settings: {
|
|
79
|
+
index: {
|
|
80
|
+
default_pipeline: pipeline
|
|
81
81
|
}
|
|
82
|
-
|
|
82
|
+
}
|
|
83
|
+
} : {}),
|
|
84
|
+
...(indexMappingTotalFieldsLimit !== undefined ? {
|
|
85
|
+
settings: {
|
|
86
|
+
'index.mapping.total_fields.limit': indexMappingTotalFieldsLimit,
|
|
87
|
+
'index.number_of_shards': 1,
|
|
88
|
+
'index.number_of_replicas': 0
|
|
89
|
+
}
|
|
90
|
+
} : {})
|
|
91
|
+
}
|
|
83
92
|
});
|
|
84
|
-
if (verbose)
|
|
93
|
+
if (verbose) console.log('Created target mapping', resp);
|
|
85
94
|
}
|
|
86
95
|
} catch (err) {
|
|
87
96
|
console.log('Error creating target mapping', err);
|
|
@@ -92,119 +101,99 @@ function createMappingFactory(ref) {
|
|
|
92
101
|
|
|
93
102
|
function fileReaderFactory(indexer, fileName, transform, splitRegex, verbose) {
|
|
94
103
|
function startIndex(files) {
|
|
95
|
-
|
|
96
|
-
|
|
97
|
-
|
|
98
|
-
|
|
99
|
-
|
|
100
|
-
|
|
101
|
-
|
|
102
|
-
|
|
103
|
-
|
|
104
|
-
try {
|
|
105
|
-
// skip empty lines
|
|
106
|
-
if (line === '') {
|
|
107
|
-
return;
|
|
108
|
-
}
|
|
109
|
-
|
|
110
|
-
var doc =
|
|
111
|
-
typeof transform === 'function'
|
|
112
|
-
? JSON.stringify(transform(JSON.parse(line)))
|
|
113
|
-
: line;
|
|
114
|
-
|
|
115
|
-
// if doc is undefined we'll skip indexing it
|
|
116
|
-
if (typeof doc === 'undefined') {
|
|
117
|
-
s.resume();
|
|
118
|
-
return;
|
|
119
|
-
}
|
|
120
|
-
|
|
121
|
-
// the transform callback may return an array of docs so we can emit
|
|
122
|
-
// multiple docs from a single line
|
|
123
|
-
if (Array.isArray(doc)) {
|
|
124
|
-
doc.forEach(function (d) { return indexer.add(d); });
|
|
125
|
-
return;
|
|
126
|
-
}
|
|
127
|
-
|
|
128
|
-
indexer.add(doc);
|
|
129
|
-
} catch (e) {
|
|
130
|
-
console.log('error', e);
|
|
131
|
-
}
|
|
132
|
-
})
|
|
133
|
-
.on('error', function (err) {
|
|
134
|
-
console.log('Error while reading file.', err);
|
|
135
|
-
})
|
|
136
|
-
.on('end', function () {
|
|
137
|
-
if (verbose) { console.log('Read entire file: ', file); }
|
|
138
|
-
if (files.length > 0) {
|
|
139
|
-
startIndex(files);
|
|
140
|
-
return;
|
|
141
|
-
}
|
|
104
|
+
let finished = false;
|
|
105
|
+
const file = files.shift();
|
|
106
|
+
const s = fs.createReadStream(file).pipe(split(splitRegex)).pipe(es.mapSync(line => {
|
|
107
|
+
try {
|
|
108
|
+
// skip empty lines
|
|
109
|
+
if (line === '') {
|
|
110
|
+
return;
|
|
111
|
+
}
|
|
112
|
+
const doc = typeof transform === 'function' ? JSON.stringify(transform(JSON.parse(line))) : line;
|
|
142
113
|
|
|
143
|
-
|
|
144
|
-
|
|
145
|
-
|
|
146
|
-
|
|
114
|
+
// if doc is undefined we'll skip indexing it
|
|
115
|
+
if (typeof doc === 'undefined') {
|
|
116
|
+
s.resume();
|
|
117
|
+
return;
|
|
118
|
+
}
|
|
147
119
|
|
|
148
|
-
|
|
149
|
-
|
|
120
|
+
// the transform callback may return an array of docs so we can emit
|
|
121
|
+
// multiple docs from a single line
|
|
122
|
+
if (Array.isArray(doc)) {
|
|
123
|
+
doc.forEach(d => indexer.add(d));
|
|
124
|
+
return;
|
|
125
|
+
}
|
|
126
|
+
indexer.add(doc);
|
|
127
|
+
} catch (e) {
|
|
128
|
+
console.log('error', e);
|
|
129
|
+
}
|
|
130
|
+
}).on('error', err => {
|
|
131
|
+
console.log('Error while reading file.', err);
|
|
132
|
+
}).on('end', () => {
|
|
133
|
+
if (verbose) console.log('Read entire file: ', file);
|
|
134
|
+
if (files.length > 0) {
|
|
135
|
+
startIndex(files);
|
|
136
|
+
return;
|
|
137
|
+
}
|
|
138
|
+
indexer.finish();
|
|
139
|
+
finished = true;
|
|
140
|
+
}));
|
|
141
|
+
indexer.queueEmitter.on('pause', () => {
|
|
142
|
+
if (finished) return;
|
|
150
143
|
s.pause();
|
|
151
144
|
});
|
|
152
|
-
|
|
153
|
-
|
|
154
|
-
if (finished) { return; }
|
|
145
|
+
indexer.queueEmitter.on('resume', () => {
|
|
146
|
+
if (finished) return;
|
|
155
147
|
s.resume();
|
|
156
148
|
});
|
|
157
149
|
}
|
|
158
|
-
|
|
159
|
-
|
|
160
|
-
glob(fileName, function (er, files) {
|
|
150
|
+
return () => {
|
|
151
|
+
glob(fileName, (er, files) => {
|
|
161
152
|
startIndex(files);
|
|
162
153
|
});
|
|
163
154
|
};
|
|
164
155
|
}
|
|
165
156
|
|
|
166
|
-
|
|
167
|
-
|
|
168
|
-
|
|
169
|
-
|
|
170
|
-
var parallelCalls = 5;
|
|
157
|
+
const EventEmitter = require('events');
|
|
158
|
+
const queueEmitter = new EventEmitter();
|
|
159
|
+
const parallelCalls = 5;
|
|
171
160
|
|
|
172
161
|
// a simple helper queue to bulk index documents
|
|
173
|
-
function indexQueueFactory(
|
|
174
|
-
|
|
175
|
-
|
|
176
|
-
|
|
177
|
-
|
|
178
|
-
|
|
179
|
-
|
|
180
|
-
|
|
181
|
-
|
|
162
|
+
function indexQueueFactory({
|
|
163
|
+
targetClient: client,
|
|
164
|
+
targetIndexName,
|
|
165
|
+
bufferSize = DEFAULT_BUFFER_SIZE,
|
|
166
|
+
skipHeader = false
|
|
167
|
+
}) {
|
|
168
|
+
let docsPerSecond = 0;
|
|
169
|
+
const flushBytes = bufferSize * 1024; // Convert KB to Bytes
|
|
170
|
+
const highWaterMark = flushBytes * parallelCalls;
|
|
182
171
|
|
|
183
172
|
// Create a Readable stream
|
|
184
|
-
|
|
185
|
-
read
|
|
186
|
-
|
|
173
|
+
const stream$1 = new stream.Readable({
|
|
174
|
+
read() {},
|
|
175
|
+
// Implement read but we manage pushing manually
|
|
176
|
+
highWaterMark // Buffer size for backpressure management
|
|
187
177
|
});
|
|
188
|
-
|
|
189
178
|
async function* ndjsonStreamIterator(readableStream) {
|
|
190
|
-
|
|
191
|
-
|
|
179
|
+
let buffer = ''; // To hold the incomplete data
|
|
180
|
+
let skippedHeader = false;
|
|
192
181
|
|
|
193
182
|
// Iterate over the stream using async iteration
|
|
194
|
-
for await (
|
|
183
|
+
for await (const chunk of readableStream) {
|
|
195
184
|
buffer += chunk.toString(); // Accumulate the chunk data in the buffer
|
|
196
185
|
|
|
197
186
|
// Split the buffer into lines (NDJSON items)
|
|
198
|
-
|
|
187
|
+
const lines = buffer.split('\n');
|
|
199
188
|
|
|
200
189
|
// The last line might be incomplete, so hold it back in the buffer
|
|
201
190
|
buffer = lines.pop();
|
|
202
191
|
|
|
203
192
|
// Yield each complete JSON object
|
|
204
|
-
for (
|
|
193
|
+
for (const line of lines) {
|
|
205
194
|
if (line.trim()) {
|
|
206
195
|
try {
|
|
207
|
-
if (!skipHeader ||
|
|
196
|
+
if (!skipHeader || skipHeader && !skippedHeader) {
|
|
208
197
|
yield JSON.parse(line); // Parse and yield the JSON object
|
|
209
198
|
skippedHeader = true;
|
|
210
199
|
}
|
|
@@ -225,122 +214,106 @@ function indexQueueFactory(ref) {
|
|
|
225
214
|
}
|
|
226
215
|
}
|
|
227
216
|
}
|
|
228
|
-
|
|
229
|
-
var finished = false;
|
|
217
|
+
let finished = false;
|
|
230
218
|
|
|
231
219
|
// Async IIFE to start bulk indexing
|
|
232
|
-
(async
|
|
233
|
-
|
|
220
|
+
(async () => {
|
|
221
|
+
const interval = setInterval(() => {
|
|
222
|
+
queueEmitter.emit('docsPerSecond', docsPerSecond);
|
|
223
|
+
docsPerSecond = 0;
|
|
224
|
+
}, 1000);
|
|
234
225
|
await client.helpers.bulk({
|
|
235
226
|
concurrency: parallelCalls,
|
|
236
|
-
flushBytes
|
|
227
|
+
flushBytes,
|
|
237
228
|
flushInterval: 1000,
|
|
238
229
|
refreshOnCompletion: true,
|
|
239
|
-
datasource: ndjsonStreamIterator(stream
|
|
240
|
-
onDocument
|
|
230
|
+
datasource: ndjsonStreamIterator(stream$1),
|
|
231
|
+
onDocument(doc) {
|
|
232
|
+
docsPerSecond++;
|
|
241
233
|
return {
|
|
242
|
-
index: {
|
|
234
|
+
index: {
|
|
235
|
+
_index: targetIndexName
|
|
236
|
+
}
|
|
243
237
|
};
|
|
244
|
-
}
|
|
238
|
+
}
|
|
245
239
|
});
|
|
246
|
-
|
|
247
|
-
|
|
240
|
+
clearInterval(interval);
|
|
248
241
|
queueEmitter.emit('finish');
|
|
249
242
|
})();
|
|
250
|
-
|
|
251
243
|
return {
|
|
252
|
-
add:
|
|
244
|
+
add: doc => {
|
|
253
245
|
if (finished) {
|
|
254
246
|
throw new Error('Unexpected doc added after indexer should finish.');
|
|
255
247
|
}
|
|
256
|
-
|
|
257
|
-
var canContinue = stream$$1.push(((JSON.stringify(doc)) + "\n"));
|
|
248
|
+
const canContinue = stream$1.push(`${JSON.stringify(doc)}\n`);
|
|
258
249
|
if (!canContinue) {
|
|
259
250
|
queueEmitter.emit('pause');
|
|
260
|
-
stream
|
|
251
|
+
stream$1.once('drain', () => {
|
|
261
252
|
queueEmitter.emit('resume');
|
|
262
253
|
});
|
|
263
254
|
}
|
|
264
255
|
},
|
|
265
|
-
finish:
|
|
256
|
+
finish: () => {
|
|
266
257
|
finished = true;
|
|
267
|
-
stream
|
|
258
|
+
stream$1.push(null);
|
|
268
259
|
},
|
|
269
|
-
queueEmitter
|
|
260
|
+
queueEmitter
|
|
270
261
|
};
|
|
271
262
|
}
|
|
272
263
|
|
|
273
264
|
// create a new progress bar instance and use shades_classic theme
|
|
274
|
-
|
|
275
|
-
|
|
276
|
-
function indexReaderFactory(
|
|
277
|
-
indexer,
|
|
278
|
-
sourceIndexName,
|
|
279
|
-
transform,
|
|
280
|
-
client,
|
|
281
|
-
query,
|
|
282
|
-
searchSize,
|
|
283
|
-
populatedFields
|
|
284
|
-
) {
|
|
285
|
-
if ( searchSize === void 0 ) searchSize = DEFAULT_SEARCH_SIZE;
|
|
286
|
-
if ( populatedFields === void 0 ) populatedFields = false;
|
|
287
|
-
|
|
265
|
+
const progressBar = new cliProgress.SingleBar({}, cliProgress.Presets.shades_classic);
|
|
266
|
+
function indexReaderFactory(indexer, sourceIndexName, transform, client, query, searchSize = DEFAULT_SEARCH_SIZE, populatedFields = false) {
|
|
288
267
|
return async function indexReader() {
|
|
289
|
-
|
|
290
|
-
|
|
291
|
-
|
|
292
|
-
|
|
293
|
-
|
|
294
|
-
|
|
268
|
+
let docsNum = 0;
|
|
269
|
+
let scrollId;
|
|
270
|
+
let finished = false;
|
|
271
|
+
let readActive = false;
|
|
272
|
+
let backPressurePause = false;
|
|
295
273
|
async function fetchPopulatedFields() {
|
|
296
274
|
try {
|
|
297
|
-
|
|
275
|
+
// Get all populated fields from the index
|
|
276
|
+
const response = await client.fieldCaps({
|
|
298
277
|
index: sourceIndexName,
|
|
299
|
-
|
|
300
|
-
|
|
301
|
-
|
|
302
|
-
|
|
303
|
-
|
|
304
|
-
},
|
|
305
|
-
},
|
|
278
|
+
fields: '*',
|
|
279
|
+
include_empty_fields: false,
|
|
280
|
+
filters: '-metadata'
|
|
281
|
+
}, {
|
|
282
|
+
maxRetries: 0
|
|
306
283
|
});
|
|
307
|
-
|
|
308
|
-
// Get all field names for each returned doc and flatten it
|
|
309
|
-
// to a list of unique field names used across all docs.
|
|
310
|
-
return Array.from(new Set(response.hits.hits.map(function (d) { return Object.keys(d._source); }).flat(1)));
|
|
284
|
+
return Object.keys(response.fields);
|
|
311
285
|
} catch (e) {
|
|
312
286
|
console.log('error', e);
|
|
313
287
|
}
|
|
314
288
|
}
|
|
315
|
-
|
|
316
289
|
function search(fields) {
|
|
317
|
-
return client.search(
|
|
290
|
+
return client.search({
|
|
291
|
+
index: sourceIndexName,
|
|
318
292
|
scroll: '600s',
|
|
319
293
|
size: searchSize,
|
|
320
|
-
query
|
|
321
|
-
(fields ? {
|
|
294
|
+
query,
|
|
295
|
+
...(fields ? {
|
|
296
|
+
_source: fields
|
|
297
|
+
} : {})
|
|
298
|
+
});
|
|
322
299
|
}
|
|
323
|
-
|
|
324
300
|
function scroll(id) {
|
|
325
301
|
return client.scroll({
|
|
326
302
|
scroll_id: id,
|
|
327
|
-
scroll: '600s'
|
|
303
|
+
scroll: '600s'
|
|
328
304
|
});
|
|
329
305
|
}
|
|
330
|
-
|
|
331
|
-
var fieldsWithData;
|
|
306
|
+
let fieldsWithData;
|
|
332
307
|
|
|
333
308
|
// identify populated fields
|
|
334
309
|
if (populatedFields) {
|
|
335
310
|
fieldsWithData = await fetchPopulatedFields();
|
|
336
311
|
}
|
|
337
|
-
|
|
338
312
|
await fetchNextResponse();
|
|
339
|
-
|
|
340
313
|
function processHit(hit) {
|
|
341
314
|
docsNum += 1;
|
|
342
315
|
try {
|
|
343
|
-
|
|
316
|
+
const doc = typeof transform === 'function' ? transform(hit._source) : hit._source; // eslint-disable-line no-underscore-dangle
|
|
344
317
|
|
|
345
318
|
// if doc is undefined we'll skip indexing it
|
|
346
319
|
if (typeof doc === 'undefined') {
|
|
@@ -350,35 +323,27 @@ function indexReaderFactory(
|
|
|
350
323
|
// the transform callback may return an array of docs so we can emit
|
|
351
324
|
// multiple docs from a single line
|
|
352
325
|
if (Array.isArray(doc)) {
|
|
353
|
-
doc.forEach(
|
|
326
|
+
doc.forEach(d => indexer.add(d));
|
|
354
327
|
return;
|
|
355
328
|
}
|
|
356
|
-
|
|
357
329
|
indexer.add(doc);
|
|
358
330
|
} catch (e) {
|
|
359
331
|
console.log('error', e);
|
|
360
332
|
}
|
|
361
333
|
}
|
|
362
|
-
|
|
363
334
|
async function fetchNextResponse() {
|
|
364
335
|
readActive = true;
|
|
365
|
-
|
|
366
|
-
var sc = scrollId ? await scroll(scrollId) : await search(fieldsWithData);
|
|
367
|
-
|
|
336
|
+
const sc = scrollId ? await scroll(scrollId) : await search(fieldsWithData);
|
|
368
337
|
if (!scrollId) {
|
|
369
338
|
progressBar.start(sc.hits.total.value, 0);
|
|
370
339
|
}
|
|
371
|
-
|
|
372
340
|
scrollId = sc._scroll_id;
|
|
373
341
|
readActive = false;
|
|
374
|
-
|
|
375
342
|
processResponse(sc);
|
|
376
343
|
}
|
|
377
|
-
|
|
378
344
|
async function processResponse(response) {
|
|
379
345
|
// collect the docs from this response
|
|
380
346
|
response.hits.hits.forEach(processHit);
|
|
381
|
-
|
|
382
347
|
progressBar.update(docsNum);
|
|
383
348
|
|
|
384
349
|
// check to see if we have collected all of the docs
|
|
@@ -386,196 +351,153 @@ function indexReaderFactory(
|
|
|
386
351
|
indexer.finish();
|
|
387
352
|
return;
|
|
388
353
|
}
|
|
389
|
-
|
|
390
354
|
if (!backPressurePause) {
|
|
391
355
|
await fetchNextResponse();
|
|
392
356
|
}
|
|
393
357
|
}
|
|
394
|
-
|
|
395
|
-
indexer.queueEmitter.on('pause', async function () {
|
|
358
|
+
indexer.queueEmitter.on('pause', async () => {
|
|
396
359
|
backPressurePause = true;
|
|
397
360
|
});
|
|
398
|
-
|
|
399
|
-
indexer.queueEmitter.on('resume', async function () {
|
|
361
|
+
indexer.queueEmitter.on('resume', async () => {
|
|
400
362
|
backPressurePause = false;
|
|
401
|
-
|
|
402
363
|
if (readActive || finished) {
|
|
403
364
|
return;
|
|
404
365
|
}
|
|
405
|
-
|
|
406
366
|
await fetchNextResponse();
|
|
407
367
|
});
|
|
408
|
-
|
|
409
|
-
indexer.queueEmitter.on('finish', function () {
|
|
368
|
+
indexer.queueEmitter.on('finish', () => {
|
|
410
369
|
finished = true;
|
|
411
370
|
progressBar.stop();
|
|
412
371
|
});
|
|
413
372
|
};
|
|
414
373
|
}
|
|
415
374
|
|
|
416
|
-
function streamReaderFactory(indexer, stream
|
|
375
|
+
function streamReaderFactory(indexer, stream, transform, splitRegex, verbose) {
|
|
417
376
|
function startIndex() {
|
|
418
|
-
|
|
419
|
-
|
|
420
|
-
|
|
421
|
-
|
|
422
|
-
|
|
423
|
-
|
|
424
|
-
|
|
425
|
-
|
|
426
|
-
if (line === '') {
|
|
427
|
-
return;
|
|
428
|
-
}
|
|
429
|
-
|
|
430
|
-
var doc =
|
|
431
|
-
typeof transform === 'function' ? JSON.stringify(transform(JSON.parse(line))) : line;
|
|
432
|
-
|
|
433
|
-
// if doc is undefined we'll skip indexing it
|
|
434
|
-
if (typeof doc === 'undefined') {
|
|
435
|
-
s.resume();
|
|
436
|
-
return;
|
|
437
|
-
}
|
|
438
|
-
|
|
439
|
-
// the transform callback may return an array of docs so we can emit
|
|
440
|
-
// multiple docs from a single line
|
|
441
|
-
if (Array.isArray(doc)) {
|
|
442
|
-
doc.forEach(function (d) { return indexer.add(d); });
|
|
443
|
-
return;
|
|
444
|
-
}
|
|
377
|
+
let finished = false;
|
|
378
|
+
const s = stream.pipe(split(splitRegex)).pipe(es.mapSync(line => {
|
|
379
|
+
try {
|
|
380
|
+
// skip empty lines
|
|
381
|
+
if (line === '') {
|
|
382
|
+
return;
|
|
383
|
+
}
|
|
384
|
+
const doc = typeof transform === 'function' ? JSON.stringify(transform(JSON.parse(line))) : line;
|
|
445
385
|
|
|
446
|
-
|
|
447
|
-
|
|
448
|
-
|
|
449
|
-
|
|
450
|
-
}
|
|
451
|
-
.on('error', function (err) {
|
|
452
|
-
console.log('Error while reading file.', err);
|
|
453
|
-
})
|
|
454
|
-
.on('end', function () {
|
|
455
|
-
if (verbose) { console.log('Read entire stream.'); }
|
|
456
|
-
indexer.finish();
|
|
457
|
-
finished = true;
|
|
458
|
-
})
|
|
459
|
-
);
|
|
386
|
+
// if doc is undefined we'll skip indexing it
|
|
387
|
+
if (typeof doc === 'undefined') {
|
|
388
|
+
s.resume();
|
|
389
|
+
return;
|
|
390
|
+
}
|
|
460
391
|
|
|
461
|
-
|
|
462
|
-
|
|
392
|
+
// the transform callback may return an array of docs so we can emit
|
|
393
|
+
// multiple docs from a single line
|
|
394
|
+
if (Array.isArray(doc)) {
|
|
395
|
+
doc.forEach(d => indexer.add(d));
|
|
396
|
+
return;
|
|
397
|
+
}
|
|
398
|
+
indexer.add(doc);
|
|
399
|
+
} catch (e) {
|
|
400
|
+
console.log('error', e);
|
|
401
|
+
}
|
|
402
|
+
}).on('error', err => {
|
|
403
|
+
console.log('Error while reading stream.', err);
|
|
404
|
+
}).on('end', () => {
|
|
405
|
+
if (verbose) console.log('Read entire stream.');
|
|
406
|
+
indexer.finish();
|
|
407
|
+
finished = true;
|
|
408
|
+
}));
|
|
409
|
+
indexer.queueEmitter.on('pause', () => {
|
|
410
|
+
if (finished) return;
|
|
463
411
|
s.pause();
|
|
464
412
|
});
|
|
465
|
-
|
|
466
|
-
|
|
467
|
-
if (finished) { return; }
|
|
413
|
+
indexer.queueEmitter.on('resume', () => {
|
|
414
|
+
if (finished) return;
|
|
468
415
|
s.resume();
|
|
469
416
|
});
|
|
470
417
|
}
|
|
471
|
-
|
|
472
|
-
return function () {
|
|
418
|
+
return () => {
|
|
473
419
|
startIndex();
|
|
474
420
|
};
|
|
475
421
|
}
|
|
476
422
|
|
|
477
|
-
async function transformer(
|
|
478
|
-
|
|
479
|
-
|
|
480
|
-
|
|
481
|
-
|
|
482
|
-
|
|
483
|
-
|
|
484
|
-
|
|
485
|
-
|
|
486
|
-
|
|
487
|
-
|
|
488
|
-
|
|
489
|
-
|
|
490
|
-
|
|
491
|
-
|
|
492
|
-
|
|
493
|
-
|
|
494
|
-
|
|
495
|
-
|
|
496
|
-
|
|
497
|
-
|
|
423
|
+
async function transformer({
|
|
424
|
+
deleteIndex = false,
|
|
425
|
+
sourceClientConfig,
|
|
426
|
+
targetClientConfig,
|
|
427
|
+
bufferSize = DEFAULT_BUFFER_SIZE,
|
|
428
|
+
searchSize = DEFAULT_SEARCH_SIZE,
|
|
429
|
+
stream,
|
|
430
|
+
fileName,
|
|
431
|
+
splitRegex = /\n/,
|
|
432
|
+
sourceIndexName,
|
|
433
|
+
targetIndexName,
|
|
434
|
+
mappings,
|
|
435
|
+
mappingsOverride = false,
|
|
436
|
+
indexMappingTotalFieldsLimit,
|
|
437
|
+
pipeline,
|
|
438
|
+
populatedFields = false,
|
|
439
|
+
query,
|
|
440
|
+
skipHeader = false,
|
|
441
|
+
transform,
|
|
442
|
+
verbose = true
|
|
443
|
+
}) {
|
|
498
444
|
if (typeof targetIndexName === 'undefined') {
|
|
499
445
|
throw Error('targetIndexName must be specified.');
|
|
500
446
|
}
|
|
501
|
-
|
|
502
|
-
|
|
503
|
-
node: 'http://localhost:9200',
|
|
447
|
+
const defaultClientConfig = {
|
|
448
|
+
node: 'http://localhost:9200'
|
|
504
449
|
};
|
|
505
|
-
|
|
506
|
-
|
|
507
|
-
|
|
508
|
-
|
|
509
|
-
|
|
510
|
-
|
|
511
|
-
|
|
512
|
-
|
|
513
|
-
|
|
514
|
-
|
|
515
|
-
|
|
516
|
-
|
|
517
|
-
|
|
518
|
-
indexMappingTotalFieldsLimit: indexMappingTotalFieldsLimit,
|
|
519
|
-
verbose: verbose,
|
|
520
|
-
deleteIndex: deleteIndex,
|
|
450
|
+
const sourceClient = new elasticsearch.Client(sourceClientConfig || defaultClientConfig);
|
|
451
|
+
const targetClient = new elasticsearch.Client(targetClientConfig || sourceClientConfig || defaultClientConfig);
|
|
452
|
+
const createMapping = createMappingFactory({
|
|
453
|
+
sourceClient,
|
|
454
|
+
sourceIndexName,
|
|
455
|
+
targetClient,
|
|
456
|
+
targetIndexName,
|
|
457
|
+
mappings,
|
|
458
|
+
mappingsOverride,
|
|
459
|
+
indexMappingTotalFieldsLimit,
|
|
460
|
+
verbose,
|
|
461
|
+
deleteIndex,
|
|
462
|
+
pipeline
|
|
521
463
|
});
|
|
522
|
-
|
|
523
|
-
targetClient
|
|
524
|
-
targetIndexName
|
|
525
|
-
bufferSize
|
|
526
|
-
skipHeader
|
|
527
|
-
verbose
|
|
464
|
+
const indexer = indexQueueFactory({
|
|
465
|
+
targetClient,
|
|
466
|
+
targetIndexName,
|
|
467
|
+
bufferSize,
|
|
468
|
+
skipHeader,
|
|
469
|
+
verbose
|
|
528
470
|
});
|
|
529
|
-
|
|
530
471
|
function getReader() {
|
|
531
472
|
if (typeof fileName !== 'undefined' && typeof sourceIndexName !== 'undefined') {
|
|
532
473
|
throw Error('Only either one of fileName or sourceIndexName can be specified.');
|
|
533
474
|
}
|
|
534
|
-
|
|
535
|
-
if (
|
|
536
|
-
(typeof fileName !== 'undefined' && typeof sourceIndexName !== 'undefined') ||
|
|
537
|
-
(typeof fileName !== 'undefined' && typeof stream$$1 !== 'undefined') ||
|
|
538
|
-
(typeof sourceIndexName !== 'undefined' && typeof stream$$1 !== 'undefined')
|
|
539
|
-
) {
|
|
475
|
+
if (typeof fileName !== 'undefined' && typeof sourceIndexName !== 'undefined' || typeof fileName !== 'undefined' && typeof stream !== 'undefined' || typeof sourceIndexName !== 'undefined' && typeof stream !== 'undefined') {
|
|
540
476
|
throw Error('Only one of fileName, sourceIndexName, or stream can be specified.');
|
|
541
477
|
}
|
|
542
|
-
|
|
543
478
|
if (typeof fileName !== 'undefined') {
|
|
544
479
|
return fileReaderFactory(indexer, fileName, transform, splitRegex, verbose);
|
|
545
480
|
}
|
|
546
|
-
|
|
547
481
|
if (typeof sourceIndexName !== 'undefined') {
|
|
548
|
-
return indexReaderFactory(
|
|
549
|
-
indexer,
|
|
550
|
-
sourceIndexName,
|
|
551
|
-
transform,
|
|
552
|
-
sourceClient,
|
|
553
|
-
query,
|
|
554
|
-
searchSize,
|
|
555
|
-
populatedFields
|
|
556
|
-
);
|
|
482
|
+
return indexReaderFactory(indexer, sourceIndexName, transform, sourceClient, query, searchSize, populatedFields);
|
|
557
483
|
}
|
|
558
|
-
|
|
559
|
-
|
|
560
|
-
console.log('STREAM READER');
|
|
561
|
-
return streamReaderFactory(indexer, stream$$1, transform, splitRegex, verbose);
|
|
484
|
+
if (typeof stream !== 'undefined') {
|
|
485
|
+
return streamReaderFactory(indexer, stream, transform, splitRegex, verbose);
|
|
562
486
|
}
|
|
563
|
-
|
|
564
487
|
return null;
|
|
565
488
|
}
|
|
566
|
-
|
|
567
|
-
var reader = getReader();
|
|
568
|
-
console.log('READER INITIALIZED');
|
|
569
|
-
|
|
489
|
+
const reader = getReader();
|
|
570
490
|
try {
|
|
571
|
-
|
|
572
|
-
|
|
573
|
-
|
|
491
|
+
const indexExists = await targetClient.indices.exists({
|
|
492
|
+
index: targetIndexName
|
|
493
|
+
});
|
|
574
494
|
if (indexExists === false) {
|
|
575
495
|
await createMapping();
|
|
576
496
|
reader();
|
|
577
497
|
} else if (deleteIndex === true) {
|
|
578
|
-
await targetClient.indices.delete({
|
|
498
|
+
await targetClient.indices.delete({
|
|
499
|
+
index: targetIndexName
|
|
500
|
+
});
|
|
579
501
|
await createMapping();
|
|
580
502
|
reader();
|
|
581
503
|
} else {
|
|
@@ -586,8 +508,10 @@ async function transformer(ref) {
|
|
|
586
508
|
} finally {
|
|
587
509
|
// targetClient.close();
|
|
588
510
|
}
|
|
589
|
-
|
|
590
|
-
|
|
511
|
+
return {
|
|
512
|
+
events: indexer.queueEmitter
|
|
513
|
+
};
|
|
591
514
|
}
|
|
592
515
|
|
|
593
516
|
module.exports = transformer;
|
|
517
|
+
//# sourceMappingURL=node-es-transformer.cjs.js.map
|