@engine9-io/input-tools 1.7.5 → 1.7.7

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/ForEachEntry.js CHANGED
@@ -172,7 +172,7 @@ class ForEachEntry {
172
172
  batches += 1;
173
173
  records += batch?.length || 0;
174
174
 
175
- debugThrottle(`Processed ${batches} batches for a total of ${records} records`);
175
+ debugThrottle(`Processed ${batches} batches for a total of ${records} outbound records`);
176
176
  cb();
177
177
  },
178
178
  }),
@@ -0,0 +1,13 @@
1
+ import js from "@eslint/js";
2
+ import globals from "globals";
3
+ import { defineConfig } from "eslint/config";
4
+
5
+ export default defineConfig([
6
+ { files: ["**/*.{js,mjs,cjs}"], plugins: { js }, extends: ["js/recommended"], languageOptions: {
7
+ globals: {
8
+ ...globals.node, // This includes 'process' and other Node.js globals
9
+ // globals.browser
10
+ }
11
+ } },
12
+ { files: ["**/*.js"], languageOptions: { sourceType: "commonjs" } },
13
+ ]);
@@ -20,7 +20,7 @@ const S3Worker = require('./S3');
20
20
  const ParquetWorker = require('./Parquet');
21
21
 
22
22
  const {
23
- bool, getStringArray, getTempDir, makeStrings, streamPacket,
23
+ bool, getStringArray, getTempDir, makeStrings, streamPacket,relativeDate
24
24
  } = require('./tools');
25
25
 
26
26
  function Worker({ accountId }) { this.accountId = accountId; }
@@ -153,7 +153,7 @@ Worker.prototype.detectEncoding.metadata = {
153
153
  Internal method to transform a file into a stream of objects.
154
154
  */
155
155
  Worker.prototype.fileToObjectStream = async function (options) {
156
- const { filename, columns, limit: limitOption } = options;
156
+ const { filename, columns, limit: limitOption,format:formatOverride } = options;
157
157
 
158
158
  // handle stream item
159
159
  if (options.stream) {
@@ -203,14 +203,15 @@ Worker.prototype.fileToObjectStream = async function (options) {
203
203
  } else {
204
204
  stream.setEncoding(encoding);
205
205
  }
206
+ let format=formatOverride || postfix;
206
207
 
207
- if (postfix === 'csv') {
208
+ if (format === 'csv') {
208
209
  const csvTransforms = this.csvToObjectTransforms({ ...options });
209
210
  transforms = transforms.concat(csvTransforms.transforms);
210
- } else if (postfix === 'txt') {
211
+ } else if (format === 'txt') {
211
212
  const csvTransforms = this.csvToObjectTransforms({ ...options, delimiter: '\t' });
212
213
  transforms = transforms.concat(csvTransforms.transforms);
213
- } else if (postfix === 'jsonl') {
214
+ } else if (format === 'jsonl') {
214
215
  /* Type of JSON that has the names in an array in the first record,
215
216
  and the values in JSON arrays thereafter
216
217
  */
@@ -609,17 +610,38 @@ Worker.prototype.json.metadata = {
609
610
  },
610
611
  };
611
612
 
612
- Worker.prototype.list = async function ({ directory }) {
613
+ Worker.prototype.list = async function ({ directory, start:s, end:e }) {
613
614
  if (!directory) throw new Error('directory is required');
615
+ let start=null;
616
+ let end=null;
617
+ if (s) start=relativeDate(s);
618
+ if (e) end=relativeDate(e);
619
+
614
620
  if (directory.startsWith('s3://') || directory.startsWith('r2://')) {
615
621
  const worker = new (directory.startsWith('r2://') ? R2Worker : S3Worker)(this);
616
- return worker.list({ directory });
622
+ return worker.list({ directory, start, end });
617
623
  }
618
624
  const a = await fsp.readdir(directory, { withFileTypes: true });
619
- return a.map((f) => ({
620
- name: f.name,
621
- type: f.isDirectory() ? 'directory' : 'file',
622
- }));
625
+
626
+ const withModified=[];
627
+ for (const file of a) {
628
+ const fullPath = path.join(directory, file.name);
629
+ const stats = await fsp.stat(fullPath);
630
+ if (start && stats.mtime<start.getTime()){
631
+ //do not include
632
+ }else if (end && stats.mtime>end.getTime()){
633
+ //do nothing
634
+ }else{
635
+ withModified.push({
636
+ name:file.name,
637
+ type: file.isDirectory() ? 'directory' : 'file',
638
+ modifiedAt:new Date(stats.mtime).toISOString(),
639
+ });
640
+ }
641
+ }
642
+
643
+ return withModified;
644
+
623
645
  };
624
646
  Worker.prototype.list.metadata = {
625
647
  options: {
package/file/S3.js CHANGED
@@ -1,6 +1,5 @@
1
1
  const debug = require('debug')('@engine9-io/input/S3');
2
2
  const fs = require('node:fs');
3
- // eslint-disable-next-line import/no-unresolved
4
3
  const { mimeType: mime } = require('mime-type/with-db');
5
4
  const {
6
5
  S3Client,
@@ -187,7 +186,7 @@ Worker.prototype.write.metadata = {
187
186
  },
188
187
  };
189
188
 
190
- Worker.prototype.list = async function ({ directory, raw }) {
189
+ Worker.prototype.list = async function ({ directory, start,end,raw }) {
191
190
  if (!directory) throw new Error('directory is required');
192
191
  let dir = directory;
193
192
  while (dir.slice(-1) === '/') dir = dir.slice(0, -1);
@@ -206,7 +205,16 @@ Worker.prototype.list = async function ({ directory, raw }) {
206
205
  name: f.Prefix.slice(Prefix.length + 1, -1),
207
206
  type: 'directory',
208
207
  })))
209
- .concat((files || []).map(({ Key, Size, LastModified }) => ({
208
+ .concat((files || [])
209
+ .filter(({LastModified})=>{
210
+ if (start && new Date(LastModified)<start){
211
+ return false;
212
+ }else if (end && new Date(LastModified)>end){
213
+ return false;
214
+ }else{
215
+ return true;
216
+ }
217
+ }).map(({ Key, Size, LastModified }) => ({
210
218
  name: Key.slice(Prefix.length + 1),
211
219
  type: 'file',
212
220
  size: Size,
package/file/tools.js CHANGED
@@ -12,6 +12,15 @@ const { PassThrough } = require('node:stream');
12
12
  const progress = require('debug')('info:@engine9/input-tools');
13
13
  const unzipper = require('unzipper');
14
14
 
15
+ const dayjs = require('dayjs');
16
+
17
+ const {
18
+ S3Client,
19
+ HeadObjectCommand,
20
+ GetObjectCommand,
21
+ } = require('@aws-sdk/client-s3');
22
+
23
+
15
24
  const {
16
25
  v7: uuidv7,
17
26
  } = require('uuid');
@@ -72,12 +81,6 @@ async function writeTempFile(options) {
72
81
  return { filename };
73
82
  }
74
83
 
75
- const {
76
- S3Client,
77
- HeadObjectCommand,
78
- GetObjectCommand,
79
- } = require('@aws-sdk/client-s3');
80
-
81
84
  async function getPacketFiles({ packet }) {
82
85
  if (packet.indexOf('s3://') === 0) {
83
86
  const parts = packet.split('/');
@@ -128,6 +131,7 @@ async function getPacketFiles({ packet }) {
128
131
  return directory;
129
132
  }
130
133
 
134
+
131
135
  async function getManifest({ packet }) {
132
136
  if (!packet) throw new Error('no packet option specififed');
133
137
  const { files } = await getPacketFiles({ packet });
@@ -222,6 +226,12 @@ async function downloadFile({ packet, type = 'person' }) {
222
226
  });
223
227
  }
224
228
 
229
+ function isValidDate(d) {
230
+ // we WANT to use isNaN, not the Number.isNaN -- we're checking the date type
231
+ // eslint-disable-next-line no-restricted-globals
232
+ return d instanceof Date && !isNaN(d);
233
+ }
234
+
225
235
  function bool(x, _defaultVal) {
226
236
  const defaultVal = (_defaultVal === undefined) ? false : _defaultVal;
227
237
  if (x === undefined || x === null || x === '') return defaultVal;
@@ -240,6 +250,67 @@ function getStringArray(s, nonZeroLength) {
240
250
  if (nonZeroLength && a.length === 0) a = [0];
241
251
  return a;
242
252
  }
253
+ function relativeDate(s, _initialDate) {
254
+ let initialDate = _initialDate;
255
+ if (!s || s === 'none') return null;
256
+ if (typeof s.getMonth === 'function') return s;
257
+ // We actually want a double equals here to test strings as well
258
+ // eslint-disable-next-line eqeqeq
259
+ if (parseInt(s, 10) == s) {
260
+ const r = new Date(parseInt(s, 10));
261
+ if (!isValidDate(r)) throw new Error(`Invalid integer date:${s}`);
262
+ return r;
263
+ }
264
+
265
+ if (initialDate) {
266
+ initialDate = new Date(initialDate);
267
+ } else {
268
+ initialDate = new Date();
269
+ }
270
+
271
+ let r = s.match(/^([+-]{1})([0-9]+)([YyMwdhms]{1})([.a-z]*)$/);
272
+
273
+ if (r) {
274
+ let period = null;
275
+ switch (r[3]) {
276
+ case 'Y':
277
+ case 'y': period = 'years'; break;
278
+
279
+ case 'M': period = 'months'; break;
280
+ case 'w': period = 'weeks'; break;
281
+ case 'd': period = 'days'; break;
282
+ case 'h': period = 'hours'; break;
283
+ case 'm': period = 'minutes'; break;
284
+ case 's': period = 'seconds'; break;
285
+ default: period = 'minutes'; break;
286
+ }
287
+
288
+ let d = dayjs(initialDate);
289
+
290
+ if (r[1] === '+') {
291
+ d = d.add(parseInt(r[2], 10), period);
292
+ } else {
293
+ d = d.subtract(parseInt(r[2], 10), period);
294
+ }
295
+ if (!isValidDate(d.toDate())) throw new Error(`Invalid date configuration:${r}`);
296
+ if (r[4]) {
297
+ const opts = r[4].split('.').filter(Boolean);
298
+ if (opts[0] === 'start') d = d.startOf(opts[1] || 'day');
299
+ else if (opts[0] === 'end') d = d.endOf(opts[1] || 'day');
300
+ else throw new Error(`Invalid relative date,unknown options:${r[4]}`);
301
+ }
302
+
303
+ return d.toDate();
304
+ }
305
+ if (s === 'now') {
306
+ r = dayjs(new Date()).toDate();
307
+ return r;
308
+ }
309
+ r = dayjs(new Date(s)).toDate();
310
+ if (!isValidDate(r)) throw new Error(`Invalid Date: ${s}`);
311
+ return r;
312
+ }
313
+
243
314
  /*
244
315
  When comparing two objects, some may come from a file (thus strings), and some from
245
316
  a database or elsewhere (not strings), so for deduping make sure to make them all strings
@@ -263,6 +334,7 @@ module.exports = {
263
334
  getPacketFiles,
264
335
  getStringArray,
265
336
  makeStrings,
337
+ relativeDate,
266
338
  streamPacket,
267
339
  writeTempFile,
268
340
  };
package/index.js CHANGED
@@ -21,6 +21,8 @@ const {
21
21
  downloadFile,
22
22
  getTempFilename,
23
23
  getTempDir,
24
+ isValidDate,
25
+ relativeDate,
24
26
  streamPacket,
25
27
  getPacketFiles,
26
28
  getBatchTransform,
@@ -54,73 +56,6 @@ handlebars.registerHelper('percent', (a, b) => `${((100 * a) / b).toFixed(2)}%`)
54
56
 
55
57
  handlebars.registerHelper('or', (a, b, c) => a || b || c);
56
58
 
57
- function isValidDate(d) {
58
- // we WANT to use isNaN, not the Number.isNaN -- we're checking the date type
59
- // eslint-disable-next-line no-restricted-globals
60
- return d instanceof Date && !isNaN(d);
61
- }
62
-
63
- function relativeDate(s, _initialDate) {
64
- let initialDate = _initialDate;
65
- if (!s || s === 'none') return null;
66
- if (typeof s.getMonth === 'function') return s;
67
- // We actually want a double equals here to test strings as well
68
- // eslint-disable-next-line eqeqeq
69
- if (parseInt(s, 10) == s) {
70
- const r = new Date(parseInt(s, 10));
71
- if (!isValidDate(r)) throw new Error(`Invalid integer date:${s}`);
72
- return r;
73
- }
74
-
75
- if (initialDate) {
76
- initialDate = new Date(initialDate);
77
- } else {
78
- initialDate = new Date();
79
- }
80
-
81
- let r = s.match(/^([+-]{1})([0-9]+)([YyMwdhms]{1})([.a-z]*)$/);
82
-
83
- if (r) {
84
- let period = null;
85
- switch (r[3]) {
86
- case 'Y':
87
- case 'y': period = 'years'; break;
88
-
89
- case 'M': period = 'months'; break;
90
- case 'w': period = 'weeks'; break;
91
- case 'd': period = 'days'; break;
92
- case 'h': period = 'hours'; break;
93
- case 'm': period = 'minutes'; break;
94
- case 's': period = 'seconds'; break;
95
- default: period = 'minutes'; break;
96
- }
97
-
98
- let d = dayjs(initialDate);
99
-
100
- if (r[1] === '+') {
101
- d = d.add(parseInt(r[2], 10), period);
102
- } else {
103
- d = d.subtract(parseInt(r[2], 10), period);
104
- }
105
- if (!isValidDate(d.toDate())) throw new Error(`Invalid date configuration:${r}`);
106
- if (r[4]) {
107
- const opts = r[4].split('.').filter(Boolean);
108
- if (opts[0] === 'start') d = d.startOf(opts[1] || 'day');
109
- else if (opts[0] === 'end') d = d.endOf(opts[1] || 'day');
110
- else throw new Error(`Invalid relative date,unknown options:${r[4]}`);
111
- }
112
-
113
- return d.toDate();
114
- }
115
- if (s === 'now') {
116
- r = dayjs(new Date()).toDate();
117
- return r;
118
- }
119
- r = dayjs(new Date(s)).toDate();
120
- if (!isValidDate(r)) throw new Error(`Invalid Date: ${s}`);
121
- return r;
122
- }
123
-
124
59
  async function list(_path) {
125
60
  const directory = await unzipper.Open.file(_path);
126
61
 
package/package.json CHANGED
@@ -1,6 +1,6 @@
1
1
  {
2
2
  "name": "@engine9-io/input-tools",
3
- "version": "1.7.5",
3
+ "version": "1.7.7",
4
4
  "description": "Tools for dealing with Engine9 inputs",
5
5
  "main": "index.js",
6
6
  "scripts": {
@@ -10,10 +10,7 @@
10
10
  "author": "Engine9",
11
11
  "license": "GPL-3.0-or-later",
12
12
  "devDependencies": {
13
- "eslint": "^8.57.0",
14
- "eslint-config-airbnb-base": "^15.0.0",
15
- "eslint-plugin-import": "^2.29.0",
16
- "eslint-plugin-jsonc": "^2.15.1"
13
+ "eslint": "^9.33.0"
17
14
  },
18
15
  "dependencies": {
19
16
  "@aws-sdk/client-s3": "^3.723.0",
package/test/file.js ADDED
@@ -0,0 +1,18 @@
1
+ const {
2
+ it,
3
+ } = require('node:test');
4
+ const assert = require('node:assert');
5
+ const debug = require('debug')('files');
6
+
7
+ const { FileUtilities } = require('../index');
8
+
9
+ it('Should list a directory', async () => {
10
+ const futil=new FileUtilities({accountId:'test'});
11
+ let files=await futil.list({directory:'.'});
12
+ assert(files.length,"Should have some files");
13
+ debug(files);
14
+ let startTest=await futil.list({directory:'.',start:'2040-01-01'});
15
+ assert(startTest.length===0,"Should NOT have any files before future start date");
16
+ let endTest=await futil.list({directory:'.',end:'1900-01-01'});
17
+ assert(endTest.length===0,"Should NOT have any files before past end date");
18
+ });
package/.eslintignore DELETED
@@ -1,5 +0,0 @@
1
- node_modules
2
- **/node_modules
3
- ./node_modules/*
4
- ./node_modules/**
5
- *.schema
package/.eslintrc.js DELETED
@@ -1,36 +0,0 @@
1
- module.exports = {
2
- env: {
3
- browser: true,
4
- commonjs: true,
5
- es2021: true,
6
- },
7
- extends: [
8
- 'airbnb-base',
9
- 'plugin:jsonc/base',
10
- 'plugin:jsonc/recommended-with-json5'
11
- ],
12
- overrides: [
13
- {
14
- env: {
15
- node: true,
16
- },
17
- files: [
18
- '.eslintrc.{js,cjs}',
19
- ],
20
- parserOptions: {
21
- sourceType: 'script',
22
- },
23
- plugins: [
24
- 'json5',
25
- ],
26
- },
27
- ],
28
- parserOptions: {
29
- ecmaVersion: 'latest',
30
- },
31
-
32
- rules: {
33
- 'func-names': 'off', // Anonymous functions have their useful cases
34
- 'no-param-reassign': [2, { props: false }], // We often assign props of an object in a function, and that's generally safe.
35
- },
36
- };