@engine9/input-tools 2.0.7

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/file/tools.js ADDED
@@ -0,0 +1,359 @@
1
+ import fs from 'node:fs';
2
+ import path from 'node:path';
3
+ import debug$0 from 'debug';
4
+ import os from 'node:os';
5
+ import { mkdirp } from 'mkdirp';
6
+ import nodestream from 'node:stream';
7
+ import JSON5 from 'json5';
8
+ import unzipper from 'unzipper';
9
+ import dayjs from 'dayjs';
10
+ import clientS3 from '@aws-sdk/client-s3';
11
+ import { v7 as uuidv7 } from 'uuid';
12
+
13
+ const fsp = fs.promises;
14
+ const debug = debug$0('@engine9/input-tools');
15
+
16
+ const { Transform } = nodestream;
17
+ const { PassThrough } = nodestream;
18
+ const progress = debug$0('info:@engine9/input-tools');
19
+ const { S3Client, HeadObjectCommand, GetObjectCommand } = clientS3;
20
+
21
+ async function getTempDir({ accountId = 'engine9' }) {
22
+ const dir = [os.tmpdir(), accountId, new Date().toISOString().substring(0, 10)].join(path.sep);
23
+ try {
24
+ await mkdirp(dir);
25
+ } catch (err) {
26
+ if (err.code !== 'EEXIST') throw err;
27
+ }
28
+ return dir;
29
+ }
30
+ /*
31
+ Get a new, timestamp based filename, creating any necessary directories
32
+ options:
33
+ prefix/postfix of file
34
+ source:source file, used to generate friendly name
35
+ */
36
+ async function getTempFilename(options) {
37
+ let dir = await getTempDir(options);
38
+ const target = options.targetFilename;
39
+ if (target) {
40
+ if (target.indexOf('/') === 0 || target.indexOf('\\') === 0) {
41
+ // assume a full directory path has been specified
42
+ return target;
43
+ }
44
+ // make a distinct directory, so we don't overwrite the file
45
+ dir = `${dir}/${new Date()
46
+ .toISOString()
47
+ .slice(0, -6)
48
+ .replace(/[^0-9]/g, '_')}`;
49
+ const newDir = await mkdirp(dir);
50
+ return `${newDir}/${target}`;
51
+ }
52
+ let { prefix } = options;
53
+ let { postfix } = options;
54
+ const { targetFormat } = options;
55
+ if (!postfix && targetFormat === 'csv') postfix = '.csv';
56
+ if (options.source) {
57
+ postfix = `_${options.source.split('/').pop()}`;
58
+ postfix = postfix.replace(/['"\\]/g, '').replace(/[^a-zA-Z0-9_.-]/g, '_');
59
+ }
60
+ if (prefix) prefix += '_';
61
+ const p = `${dir}/${prefix || ''}${uuidv7()}${postfix || '.txt'}`;
62
+ return p;
63
+ }
64
+ async function writeTempFile(options) {
65
+ const { content, postfix = '.txt' } = options;
66
+ const filename = await getTempFilename({ ...options, postfix });
67
+ await fsp.writeFile(filename, content);
68
+ return { filename };
69
+ }
70
+ async function getPacketFiles({ packet }) {
71
+ if (packet.indexOf('s3://') === 0) {
72
+ const parts = packet.split('/');
73
+ const Bucket = parts[2];
74
+ const Key = parts.slice(3).join('/');
75
+ const s3Client = new S3Client({});
76
+ debug('Getting ', { Bucket, Key });
77
+ // const directory = await unzipper.Open.s3(s3Client, { Bucket, Key });
78
+ let size = null;
79
+ const directory = await unzipper.Open.custom({
80
+ async size() {
81
+ const info = await s3Client.send(
82
+ new HeadObjectCommand({
83
+ Bucket,
84
+ Key
85
+ })
86
+ );
87
+ size = info.ContentLength;
88
+ progress(`Retrieving file of size ${size / (1024 * 1024)} MB`);
89
+ return info.ContentLength;
90
+ },
91
+ stream(offset, length) {
92
+ const ptStream = new PassThrough();
93
+ s3Client
94
+ .send(
95
+ new GetObjectCommand({
96
+ Bucket,
97
+ Key,
98
+ Range: `bytes=${offset}-${length ?? ''}`
99
+ })
100
+ )
101
+ .then((response) => {
102
+ response.Body.pipe(ptStream);
103
+ })
104
+ .catch((error) => {
105
+ ptStream.emit('error', error);
106
+ });
107
+ return ptStream;
108
+ }
109
+ });
110
+ return directory;
111
+ }
112
+ const directory = await unzipper.Open.file(packet);
113
+ return directory;
114
+ }
115
+ async function getManifest({ packet }) {
116
+ if (!packet) throw new Error('no packet option specififed');
117
+ const { files } = await getPacketFiles({ packet });
118
+ const file = files.find((d) => d.path === 'manifest.json');
119
+ const content = await file.buffer();
120
+ const manifest = JSON.parse(content.toString());
121
+ return manifest;
122
+ }
123
+ function getBatchTransform({ batchSize = 100 }) {
124
+ return {
125
+ transform: new Transform({
126
+ objectMode: true,
127
+ transform(chunk, encoding, cb) {
128
+ this.buffer = (this.buffer || []).concat(chunk);
129
+ if (this.buffer.length >= batchSize) {
130
+ this.push(this.buffer);
131
+ this.buffer = [];
132
+ }
133
+ cb();
134
+ },
135
+ flush(cb) {
136
+ if (this.buffer?.length > 0) this.push(this.buffer);
137
+ cb();
138
+ }
139
+ })
140
+ };
141
+ }
142
+ function getDebatchTransform() {
143
+ return {
144
+ transform: new Transform({
145
+ objectMode: true,
146
+ transform(chunk, encoding, cb) {
147
+ chunk.forEach((c) => this.push(c));
148
+ cb();
149
+ }
150
+ })
151
+ };
152
+ }
153
+ async function getFile({ filename, packet, type }) {
154
+ if (!packet && !filename) throw new Error('no packet option specififed');
155
+ let content = null;
156
+ let filePath = null;
157
+ if (packet) {
158
+ const manifest = await getManifest({ packet });
159
+ const manifestFiles = manifest.files?.filter((d) => d.type === type);
160
+ if (!manifestFiles?.length) throw new Error(`No files of type ${type} found in packet`);
161
+ if (manifestFiles?.length > 1) throw new Error(`Multiple files of type ${type} found in packet`);
162
+ filePath = manifestFiles[0].path;
163
+ const { files } = await getPacketFiles({ packet });
164
+ const handle = files.find((d) => d.path === filePath);
165
+ const buffer = await handle.buffer();
166
+ content = await buffer.toString();
167
+ } else {
168
+ content = await fsp.readFile(filename);
169
+ filePath = filename.split('/').pop();
170
+ }
171
+ if (filePath.slice(-5) === '.json' || filePath.slice(-6) === '.json5') {
172
+ try {
173
+ return JSON5.parse(content);
174
+ } catch (e) {
175
+ debug(`Erroring parsing json content from ${path}`, content);
176
+ throw e;
177
+ }
178
+ }
179
+ return content;
180
+ }
181
+ async function streamPacket({ packet, type }) {
182
+ if (!packet) throw new Error('no packet option specififed');
183
+ const manifest = await getManifest({ packet });
184
+ const manifestFiles = manifest.files?.filter((d) => d.type === type);
185
+ if (!manifestFiles?.length) throw new Error(`No files of type ${type} found in packet`);
186
+ if (manifestFiles?.length > 1) throw new Error(`Multiple files of type ${type} found in packet`);
187
+ const filePath = manifestFiles[0].path;
188
+ const { files } = await getPacketFiles({ packet });
189
+ const handle = files.find((d) => d.path === filePath);
190
+ return { stream: handle.stream(), path: filePath };
191
+ }
192
+ async function downloadFile({ packet, type = 'person' }) {
193
+ const { stream: fileStream, path: filePath } = await streamPacket({ packet, type });
194
+ const filename = await getTempFilename({ targetFilename: filePath.split('/').pop() });
195
+ return new Promise((resolve, reject) => {
196
+ fileStream
197
+ .pipe(fs.createWriteStream(filename))
198
+ .on('error', reject)
199
+ .on('finish', () => {
200
+ resolve({ filename });
201
+ });
202
+ });
203
+ }
204
+ function isValidDate(d) {
205
+ // we WANT to use isNaN, not the Number.isNaN -- we're checking the date type
206
+ return d instanceof Date && !isNaN(d);
207
+ }
208
+ function bool(x, _defaultVal) {
209
+ const defaultVal = _defaultVal === undefined ? false : _defaultVal;
210
+ if (x === undefined || x === null || x === '') return defaultVal;
211
+ if (typeof x !== 'string') return !!x;
212
+ if (x === '1') return true; // 0 will return false, but '1' is true
213
+ const y = x.toLowerCase();
214
+ return !!(y.indexOf('y') + 1) || !!(y.indexOf('t') + 1);
215
+ }
216
+ function getStringArray(s, nonZeroLength) {
217
+ let a = s || [];
218
+ if (typeof a === 'number') a = String(a);
219
+ if (typeof a === 'string') a = [a];
220
+ if (typeof s === 'string') a = s.split(',');
221
+ a = a.map((x) => x.toString().trim()).filter(Boolean);
222
+ if (nonZeroLength && a.length === 0) a = [0];
223
+ return a;
224
+ }
225
+ function relativeDate(s, _initialDate) {
226
+ let initialDate = _initialDate;
227
+ if (!s || s === 'none') return null;
228
+ if (typeof s.getMonth === 'function') return s;
229
+ // We actually want a double equals here to test strings as well
230
+ if (parseInt(s, 10) == s) {
231
+ const r = new Date(parseInt(s, 10));
232
+ if (!isValidDate(r)) throw new Error(`Invalid integer date:${s}`);
233
+ return r;
234
+ }
235
+ if (initialDate) {
236
+ initialDate = new Date(initialDate);
237
+ } else {
238
+ initialDate = new Date();
239
+ }
240
+ let r = s.match(/^([+-]{1})([0-9]+)([YyMwdhms]{1})([.a-z]*)$/);
241
+ if (r) {
242
+ let period = null;
243
+ switch (r[3]) {
244
+ case 'Y':
245
+ case 'y':
246
+ period = 'years';
247
+ break;
248
+ case 'M':
249
+ period = 'months';
250
+ break;
251
+ case 'w':
252
+ period = 'weeks';
253
+ break;
254
+ case 'd':
255
+ period = 'days';
256
+ break;
257
+ case 'h':
258
+ period = 'hours';
259
+ break;
260
+ case 'm':
261
+ period = 'minutes';
262
+ break;
263
+ case 's':
264
+ period = 'seconds';
265
+ break;
266
+ default:
267
+ period = 'minutes';
268
+ break;
269
+ }
270
+ let d = dayjs(initialDate);
271
+ if (r[1] === '+') {
272
+ d = d.add(parseInt(r[2], 10), period);
273
+ } else {
274
+ d = d.subtract(parseInt(r[2], 10), period);
275
+ }
276
+ if (!isValidDate(d.toDate())) throw new Error(`Invalid date configuration:${r}`);
277
+ if (r[4]) {
278
+ const opts = r[4].split('.').filter(Boolean);
279
+ if (opts[0] === 'start') d = d.startOf(opts[1] || 'day');
280
+ else if (opts[0] === 'end') d = d.endOf(opts[1] || 'day');
281
+ else throw new Error(`Invalid relative date,unknown options:${r[4]}`);
282
+ }
283
+ return d.toDate();
284
+ }
285
+ if (s === 'now') {
286
+ r = dayjs(new Date()).toDate();
287
+ return r;
288
+ }
289
+ r = dayjs(new Date(s)).toDate();
290
+ if (!isValidDate(r)) throw new Error(`Invalid Date: ${s}`);
291
+ return r;
292
+ }
293
+ /*
294
+ When comparing two objects, some may come from a file (thus strings), and some from
295
+ a database or elsewhere (not strings), so for deduping make sure to make them all strings
296
+ */
297
+ function makeStrings(o) {
298
+ return Object.entries(o).reduce((a, [k, v]) => {
299
+ a[k] = typeof v === 'object' ? JSON.stringify(v) : String(v);
300
+ return a;
301
+ }, {});
302
+ }
303
+ function appendPostfix(filename, postfix) {
304
+ const filenameParts = filename.split('/');
305
+ const fileParts = filenameParts
306
+ .slice(-1)[0]
307
+ .split('.')
308
+ .filter(Boolean)
309
+ .filter((d) => d !== postfix);
310
+ let targetFile = null;
311
+ if (fileParts.slice(-1)[0] === 'gz') {
312
+ targetFile = fileParts.slice(0, -2).concat(postfix).concat(fileParts.slice(-2)).join('.');
313
+ } else {
314
+ targetFile = fileParts.slice(0, -1).concat(postfix).concat(fileParts.slice(-1)).join('.');
315
+ }
316
+ return filenameParts.slice(0, -1).concat(targetFile).join('/');
317
+ }
318
+ function parseJSON5(o, defaultVal) {
319
+ if (o) {
320
+ if (typeof o === 'object') return o;
321
+ if (typeof o === 'string') return JSON5.parse(o);
322
+ throw new Error(`Could not parse object:${o}`);
323
+ }
324
+ return defaultVal || o;
325
+ }
326
+ export { appendPostfix };
327
+ export { bool };
328
+ export { downloadFile };
329
+ export { getTempFilename };
330
+ export { getTempDir };
331
+ export { getBatchTransform };
332
+ export { getDebatchTransform };
333
+ export { getFile };
334
+ export { getManifest };
335
+ export { getPacketFiles };
336
+ export { getStringArray };
337
+ export { makeStrings };
338
+ export { parseJSON5 };
339
+ export { relativeDate };
340
+ export { streamPacket };
341
+ export { writeTempFile };
342
+ export default {
343
+ appendPostfix,
344
+ bool,
345
+ downloadFile,
346
+ getTempFilename,
347
+ getTempDir,
348
+ getBatchTransform,
349
+ getDebatchTransform,
350
+ getFile,
351
+ getManifest,
352
+ getPacketFiles,
353
+ getStringArray,
354
+ makeStrings,
355
+ parseJSON5,
356
+ relativeDate,
357
+ streamPacket,
358
+ writeTempFile
359
+ };