npm - @engine9-io/input-tools - Versions diffs - 1.9.11 → 2.0.0 - Mend

@engine9-io/input-tools 1.9.11 → 2.0.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (21) hide show

package/ForEachEntry.js +18 -43
package/ValidatingReadable.js +3 -6
package/buildSamplePackets.js +11 -16
package/eslint.config.mjs +15 -11
package/file/FileUtilities.js +976 -1048
package/file/GoogleDrive.js +32 -38
package/file/Parquet.js +112 -124
package/file/R2.js +27 -32
package/file/S3.js +259 -293
package/file/tools.js +334 -326
package/index.js +60 -75
package/package.json +2 -1
package/test/cli.js +3 -4
package/test/file.js +6 -7
package/test/processing/bigDataMessage.js +8 -10
package/test/processing/forEach.js +6 -8
package/test/processing/forEachResume.js +6 -8
package/test/processing/message.js +31 -39
package/test/processing/zip.js +6 -7
package/test/uuid.js +6 -11
package/timelineTypes.js +2 -24

package/file/GoogleDrive.js CHANGED Viewed

@@ -1,45 +1,39 @@
-const { google } = require('googleapis');
-const fs = require('node:fs');
+import { google } from "googleapis";
+import fs from "node:fs";
 const fsp = fs.promises;
-function Worker() {}
+function Worker() { }
 Worker.prototype.setAuth = async function () {
-  const keyFile = process.env.GOOGLE_APPLICATION_CREDENTIALS;
-  const settings = JSON.parse(await fsp.readFile(keyFile));
-  if (!settings.subject_to_impersonate) throw new Error(`You should include subject_to_impersonate in file ${keyFile}`);
-  const auth = new google.auth.GoogleAuth({
-    clientOptions: {
-      subject: settings.subject_to_impersonate,
-    },
-    keyFile,
-    scopes: ['https://www.googleapis.com/auth/drive'],
-  });
-  google.options({
-    auth,
-  });
+    const keyFile = process.env.GOOGLE_APPLICATION_CREDENTIALS;
+    const settings = JSON.parse(await fsp.readFile(keyFile));
+    if (!settings.subject_to_impersonate)
+        throw new Error(`You should include subject_to_impersonate in file ${keyFile}`);
+    const auth = new google.auth.GoogleAuth({
+        clientOptions: {
+            subject: settings.subject_to_impersonate,
+        },
+        keyFile,
+        scopes: ['https://www.googleapis.com/auth/drive'],
+    });
+    google.options({
+        auth,
+    });
 };
 Worker.prototype.list = async function ({ path }) {
-  await this.setAuth();
-  const drive = google.drive({ version: 'v3' });
-  const folderId = path;
-  const q = `'${folderId}' in parents and trashed=false`;
-  const raw = await drive.files.list({
-    pageSize: 150,
-    q,
-    supportsAllDrives: true, // include share drives as well
-    includeItemsFromAllDrives: true,
-  });
-  return raw.data?.files;
+    await this.setAuth();
+    const drive = google.drive({ version: 'v3' });
+    const folderId = path;
+    const q = `'${folderId}' in parents and trashed=false`;
+    const raw = await drive.files.list({
+        pageSize: 150,
+        q,
+        supportsAllDrives: true, // include share drives as well
+        includeItemsFromAllDrives: true,
+    });
+    return raw.data?.files;
 };
 Worker.prototype.list.metadata = {
-  options: {
-    path: {},
-  },
+    options: {
+        path: {},
+    },
 };
-module.exports = Worker;
+export default Worker;

package/file/Parquet.js CHANGED Viewed

@@ -1,149 +1,137 @@
-const parquet = require('@dsnp/parquetjs');
-const { Readable } = require('node:stream');
-const debug = require('debug')('Parquet');
-const { S3Client } = require('@aws-sdk/client-s3');
-const FileWorker = require('./FileUtilities');
-function Worker() {}
+import parquet from "@dsnp/parquetjs";
+import nodestream from "node:stream";
+import debug$0 from "debug";
+import clientS3 from "@aws-sdk/client-s3";
+import FileWorker from "./FileUtilities.js";
+const { Readable } = nodestream;
+const debug = debug$0('Parquet');
+const { S3Client } = clientS3;
+function Worker() { }
 async function getReader(options) {
-  const { filename } = options;
-  if (!filename) throw new Error('filename is required');
-  if (filename.indexOf('s3://') === 0) {
-    const client = new S3Client({});
-    const parts = filename.split('/');
-    return parquet.ParquetReader.openS3(client, {
-      Bucket: parts[2],
-      Key: parts.slice(3).join('/')
-    });
-  }
-  return parquet.ParquetReader.openFile(filename);
+    const { filename } = options;
+    if (!filename)
+        throw new Error('filename is required');
+    if (filename.indexOf('s3://') === 0) {
+        const client = new S3Client({});
+        const parts = filename.split('/');
+        return parquet.ParquetReader.openS3(client, {
+            Bucket: parts[2],
+            Key: parts.slice(3).join('/')
+        });
+    }
+    return parquet.ParquetReader.openFile(filename);
 }
 Worker.prototype.meta = async function (options) {
-  const reader = await getReader(options);
-  const schema = reader.getSchema();
-  return {
-    //stored as a buffer
-    schema,
-    records: parseInt(reader.metadata?.num_rows?.toString(), 10)
-  };
-  // getMetadata();
+    const reader = await getReader(options);
+    const schema = reader.getSchema();
+    return {
+        //stored as a buffer
+        schema,
+        records: parseInt(reader.metadata?.num_rows?.toString(), 10)
+    };
+    // getMetadata();
 };
 Worker.prototype.meta.metadata = {
-  options: {
-    path: {}
-  }
+    options: {
+        path: {}
+    }
 };
 Worker.prototype.schema = async function (options) {
-  const reader = await getReader(options);
-  return reader.getSchema();
+    const reader = await getReader(options);
+    return reader.getSchema();
 };
 Worker.prototype.schema.metadata = {
-  options: {
-    path: {}
-  }
+    options: {
+        path: {}
+    }
 };
 function cleanColumnName(name) {
-  return name.toLowerCase().replace(/[^a-z0-9_]/g, '_');
+    return name.toLowerCase().replace(/[^a-z0-9_]/g, '_');
 }
 Worker.prototype.stream = async function (options) {
-  const reader = await getReader(options);
-  let columns;
-  if (options.columns) {
-    const { fieldList } = await this.schema(options);
-    columns = [];
-    let requestedColumns = options.columns;
-    if (typeof options.columns === 'string') requestedColumns = options.columns.split(',').map((d) => d.trim());
-    else requestedColumns = options.columns.map((d) => (d.name ? d.name.trim() : d.trim()));
-    requestedColumns.forEach((c) => {
-      const matchingCols = fieldList
-        .filter((f) => f.name === c || cleanColumnName(f.name) === cleanColumnName(c))
-        .map((f) => f.name);
-      columns = columns.concat(matchingCols);
-    });
-  }
-  let limit = 0;
-  if (parseInt(options.limit, 10) === options.limit) limit = parseInt(options.limit, 10);
-  // create a new cursor
-  debug(`Reading parquet file ${options.filename} with columns ${columns?.join(',')} and limit ${limit}`);
-  const cursor = reader.getCursor(columns);
-  let counter = 0;
-  const start = new Date().getTime();
-  const stream = new Readable({
-    objectMode: true,
-    async read() {
-      const token = await cursor.next();
-      if (token) {
-        counter += 1;
-        if (limit && counter > limit) {
-          debug(`Reached limit of ${limit}, stopping`);
-          this.push(null);
-          await reader.close();
-          return;
-        }
-        if (counter % 10000 === 0) {
-          let m = process.memoryUsage().heapTotal;
-          const end = new Date().getTime();
-          debug(
-            `Read ${counter} ${(counter * 1000) / (end - start)}/sec, Node reported memory usage: ${
-              m / 1024 / 1024
-            } MBs`
-          );
-        }
-        this.push(token);
-      } else {
-        await reader.close();
-        this.push(null);
-      }
+    const reader = await getReader(options);
+    let columns;
+    if (options.columns) {
+        const { fieldList } = await this.schema(options);
+        columns = [];
+        let requestedColumns = options.columns;
+        if (typeof options.columns === 'string')
+            requestedColumns = options.columns.split(',').map((d) => d.trim());
+        else
+            requestedColumns = options.columns.map((d) => (d.name ? d.name.trim() : d.trim()));
+        requestedColumns.forEach((c) => {
+            const matchingCols = fieldList
+                .filter((f) => f.name === c || cleanColumnName(f.name) === cleanColumnName(c))
+                .map((f) => f.name);
+            columns = columns.concat(matchingCols);
+        });
     }
-  });
-  return { stream };
+    let limit = 0;
+    if (parseInt(options.limit, 10) === options.limit)
+        limit = parseInt(options.limit, 10);
+    // create a new cursor
+    debug(`Reading parquet file ${options.filename} with columns ${columns?.join(',')} and limit ${limit}`);
+    const cursor = reader.getCursor(columns);
+    let counter = 0;
+    const start = new Date().getTime();
+    const stream = new Readable({
+        objectMode: true,
+        async read() {
+            const token = await cursor.next();
+            if (token) {
+                counter += 1;
+                if (limit && counter > limit) {
+                    debug(`Reached limit of ${limit}, stopping`);
+                    this.push(null);
+                    await reader.close();
+                    return;
+                }
+                if (counter % 10000 === 0) {
+                    let m = process.memoryUsage().heapTotal;
+                    const end = new Date().getTime();
+                    debug(`Read ${counter} ${(counter * 1000) / (end - start)}/sec, Node reported memory usage: ${m / 1024 / 1024} MBs`);
+                }
+                this.push(token);
+            }
+            else {
+                await reader.close();
+                this.push(null);
+            }
+        }
+    });
+    return { stream };
 };
 Worker.prototype.stream.metadata = {
-  options: {
-    path: {}
-  }
+    options: {
+        path: {}
+    }
 };
 Worker.prototype.toFile = async function (options) {
-  const { stream } = await this.stream(options);
-  const fworker = new FileWorker(this);
-  return fworker.objectStreamToFile({ ...options, stream });
+    const { stream } = await this.stream(options);
+    const fworker = new FileWorker(this);
+    return fworker.objectStreamToFile({ ...options, stream });
 };
 Worker.prototype.toFile.metadata = {
-  options: {
-    path: {}
-  }
+    options: {
+        path: {}
+    }
 };
 Worker.prototype.stats = async function (options) {
-  const reader = await getReader(options);
-  const schema = reader.getSchema();
-  const fileMetadata = reader.getFileMetaData();
-  const rowGroups = reader.getRowGroups();
-  // const reader = await parquet.ParquetReader.openS3(client, getParams(options));
-  // return reader.getSchema();
-  return {
-    schema,
-    fileMetadata,
-    rowGroups
-  };
+    const reader = await getReader(options);
+    const schema = reader.getSchema();
+    const fileMetadata = reader.getFileMetaData();
+    const rowGroups = reader.getRowGroups();
+    // const reader = await parquet.ParquetReader.openS3(client, getParams(options));
+    // return reader.getSchema();
+    return {
+        schema,
+        fileMetadata,
+        rowGroups
+    };
 };
 Worker.prototype.stats.metadata = {
-  options: {
-    path: {}
-  }
+    options: {
+        path: {}
+    }
 };
-module.exports = Worker;
+export default Worker;

package/file/R2.js CHANGED Viewed

@@ -1,37 +1,32 @@
-const util = require('node:util');
-const {
-  S3Client,
-} = require('@aws-sdk/client-s3');
-const S3 = require('./S3');
+import util from "node:util";
+import clientS3 from "@aws-sdk/client-s3";
+import S3 from "./S3.js";
+const { S3Client, } = clientS3;
 function R2(worker) {
-  S3.call(this, worker);
-  this.prefix='r2';
+    S3.call(this, worker);
+    this.prefix = 'r2';
 }
 util.inherits(R2, S3);
 R2.prototype.getClient = function () {
-  const missing = ['CLOUDFLARE_R2_ACCOUNT_ID', 'CLOUDFLARE_R2_ACCESS_KEY_ID', 'CLOUDFLARE_R2_SECRET_ACCESS_KEY']
-    .filter((r) => !process.env[r]);
-  if (missing.length > 0) throw new Error(`Missing environment variables for Cloudflare access:${missing.join(',')}`);
-  const ACCOUNT_ID = process.env.CLOUDFLARE_R2_ACCOUNT_ID;
-  const ACCESS_KEY_ID = process.env.CLOUDFLARE_R2_ACCESS_KEY_ID;
-  const SECRET_ACCESS_KEY = process.env.CLOUDFLARE_R2_SECRET_ACCESS_KEY;
-  if (!this.client) {
-    this.client = new S3Client({
-      // R2 does not strictly require a region, but the SDK expects one. 'auto' works fine.
-      region: 'auto',
-      endpoint: `https://${ACCOUNT_ID}.r2.cloudflarestorage.com`,
-      credentials: {
-        accessKeyId: ACCESS_KEY_ID,
-        secretAccessKey: SECRET_ACCESS_KEY,
-      },
-      forcePathStyle: true, // Important for R2 compatibility
-    });
-  }
-  return this.client;
+    const missing = ['CLOUDFLARE_R2_ACCOUNT_ID', 'CLOUDFLARE_R2_ACCESS_KEY_ID', 'CLOUDFLARE_R2_SECRET_ACCESS_KEY']
+        .filter((r) => !process.env[r]);
+    if (missing.length > 0)
+        throw new Error(`Missing environment variables for Cloudflare access:${missing.join(',')}`);
+    const ACCOUNT_ID = process.env.CLOUDFLARE_R2_ACCOUNT_ID;
+    const ACCESS_KEY_ID = process.env.CLOUDFLARE_R2_ACCESS_KEY_ID;
+    const SECRET_ACCESS_KEY = process.env.CLOUDFLARE_R2_SECRET_ACCESS_KEY;
+    if (!this.client) {
+        this.client = new S3Client({
+            // R2 does not strictly require a region, but the SDK expects one. 'auto' works fine.
+            region: 'auto',
+            endpoint: `https://${ACCOUNT_ID}.r2.cloudflarestorage.com`,
+            credentials: {
+                accessKeyId: ACCESS_KEY_ID,
+                secretAccessKey: SECRET_ACCESS_KEY,
+            },
+            forcePathStyle: true, // Important for R2 compatibility
+        });
+    }
+    return this.client;
 };
-module.exports = R2;
+export default R2;