@epfml/discojs-node 3.0.1-p20240821133014.0 → 3.0.1-p20240826092658.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/dist/index.d.ts CHANGED
@@ -1,2 +1,2 @@
1
- export * from './data/index.js';
2
- export { saveModelToDisk, loadModelFromDisk } from './models/model_loader.js';
1
+ export * from './loaders/index.js';
2
+ export { saveModelToDisk, loadModelFromDisk } from './model_loader.js';
package/dist/index.js CHANGED
@@ -1,2 +1,2 @@
1
- export * from './data/index.js';
2
- export { saveModelToDisk, loadModelFromDisk } from './models/model_loader.js';
1
+ export * from './loaders/index.js';
2
+ export { saveModelToDisk, loadModelFromDisk } from './model_loader.js';
@@ -0,0 +1,2 @@
1
+ import { Dataset } from "@epfml/discojs";
2
+ export declare function load(path: string): Dataset<Partial<Record<string, string>>>;
@@ -0,0 +1,24 @@
1
+ import * as fs from "node:fs/promises";
2
+ import { parse as csvParser } from "csv-parse";
3
+ import { Dataset } from "@epfml/discojs";
4
+ function isRecordOfString(raw) {
5
+ if (typeof raw !== "object" || raw === null)
6
+ return false;
7
+ const record = raw;
8
+ for (const [k, v] of Object.entries(record))
9
+ if (typeof k !== "string" || typeof v !== "string")
10
+ return false;
11
+ return true;
12
+ }
13
+ export function load(path) {
14
+ return new Dataset(async function* () {
15
+ const stream = (await fs.open(path))
16
+ .createReadStream()
17
+ .pipe(csvParser({ columns: true }));
18
+ for await (const row of stream) {
19
+ if (!isRecordOfString(row))
20
+ throw new Error("excepted object of string to string");
21
+ yield row;
22
+ }
23
+ });
24
+ }
@@ -0,0 +1,3 @@
1
+ import { Dataset, Image } from "@epfml/discojs";
2
+ export declare function load(path: string): Promise<Image<1 | 3 | 4>>;
3
+ export declare function loadAllInDir(dir: string): Promise<Dataset<Image>>;
@@ -0,0 +1,17 @@
1
+ import sharp from "sharp";
2
+ import * as path from "node:path";
3
+ import * as fs from "node:fs/promises";
4
+ import { Dataset, Image } from "@epfml/discojs";
5
+ export async function load(path) {
6
+ const { data, info } = await sharp(path).raw().toBuffer({
7
+ resolveWithObject: true,
8
+ });
9
+ if (info.channels === 2)
10
+ throw new Error("unsupported channel count");
11
+ return new Image(data, info.width, info.height, info.channels);
12
+ }
13
+ export async function loadAllInDir(dir) {
14
+ const filenames = await fs.readdir(dir);
15
+ const paths = filenames.map((f) => path.join(dir, f));
16
+ return new Dataset(paths).map(load);
17
+ }
@@ -0,0 +1,3 @@
1
+ export { load as loadCSV } from "./csv.js";
2
+ export { load as loadImage, loadAllInDir as loadImagesInDir, } from "./image.js";
3
+ export { load as loadText } from "./text.js";
@@ -0,0 +1,3 @@
1
+ export { load as loadCSV } from "./csv.js";
2
+ export { load as loadImage, loadAllInDir as loadImagesInDir, } from "./image.js";
3
+ export { load as loadText } from "./text.js";
@@ -0,0 +1,2 @@
1
+ import { Dataset, Text } from "@epfml/discojs";
2
+ export declare function load(path: string): Dataset<Text>;
@@ -0,0 +1,11 @@
1
+ import * as fs from "node:fs/promises";
2
+ import * as readline from "node:readline/promises";
3
+ import { Dataset } from "@epfml/discojs";
4
+ export function load(path) {
5
+ return new Dataset(async function* () {
6
+ const input = (await fs.open(path)).createReadStream({ encoding: "utf8" });
7
+ // `readline` is a bit overkill but seems standard
8
+ // https://nodejs.org/api/readline.html#example-read-file-stream-line-by-line
9
+ yield* readline.createInterface({ input, crlfDelay: Infinity });
10
+ });
11
+ }
package/package.json CHANGED
@@ -1,6 +1,6 @@
1
1
  {
2
2
  "name": "@epfml/discojs-node",
3
- "version": "3.0.1-p20240821133014.0",
3
+ "version": "3.0.1-p20240826092658.0",
4
4
  "type": "module",
5
5
  "main": "dist/index.js",
6
6
  "types": "dist/index.d.ts",
@@ -21,11 +21,14 @@
21
21
  "dependencies": {
22
22
  "@epfml/discojs": "*",
23
23
  "@koush/wrtc": "0.5",
24
- "@tensorflow/tfjs-node": "4"
24
+ "@tensorflow/tfjs-node": "4",
25
+ "csv-parse": "5",
26
+ "sharp": "0.33"
25
27
  },
26
28
  "devDependencies": {
27
29
  "@types/node": "22",
28
30
  "nodemon": "3",
31
+ "tmp-promise": "3",
29
32
  "ts-node": "10"
30
33
  }
31
34
  }
@@ -1,5 +0,0 @@
1
- import type tf from '@tensorflow/tfjs';
2
- import { data } from '@epfml/discojs';
3
- export declare class ImageLoader extends data.ImageLoader<string> {
4
- readImageFrom(source: string, channels?: number): Promise<tf.Tensor3D>;
5
- }
@@ -1,11 +0,0 @@
1
- import fs from 'node:fs/promises';
2
- import { node as tfNode } from '@tensorflow/tfjs-node';
3
- import { data } from '@epfml/discojs';
4
- export class ImageLoader extends data.ImageLoader {
5
- async readImageFrom(source, channels) {
6
- // We allow specifying the number of channels because the default number of channels
7
- // differs between web and node for the same image
8
- // E.g. lus covid images have 1 channel with fs.readFile but 3 when loaded with discojs-web
9
- return tfNode.decodeImage(await fs.readFile(source), channels);
10
- }
11
- }
@@ -1,3 +0,0 @@
1
- export { ImageLoader as NodeImageLoader } from './image_loader.js';
2
- export { TabularLoader as NodeTabularLoader } from './tabular_loader.js';
3
- export { TextLoader as NodeTextLoader } from './text_loader.js';
@@ -1,3 +0,0 @@
1
- export { ImageLoader as NodeImageLoader } from './image_loader.js';
2
- export { TabularLoader as NodeTabularLoader } from './tabular_loader.js';
3
- export { TextLoader as NodeTextLoader } from './text_loader.js';
@@ -1,4 +0,0 @@
1
- import { data } from '@epfml/discojs';
2
- export declare class TabularLoader extends data.TabularLoader<string> {
3
- loadDatasetFrom(source: string, csvConfig: Record<string, unknown>): Promise<data.Dataset>;
4
- }
@@ -1,11 +0,0 @@
1
- import { data as tfData } from '@tensorflow/tfjs-node';
2
- import { data } from '@epfml/discojs';
3
- export class TabularLoader extends data.TabularLoader {
4
- loadDatasetFrom(source, csvConfig) {
5
- const prefix = 'file://';
6
- if (source.slice(0, 7) !== prefix) {
7
- source = prefix + source;
8
- }
9
- return Promise.resolve(tfData.csv(source, csvConfig));
10
- }
11
- }
@@ -1,4 +0,0 @@
1
- import { data } from '@epfml/discojs';
2
- export declare class TextLoader extends data.TextLoader<string> {
3
- loadDatasetFrom(source: string): Promise<data.Dataset>;
4
- }
@@ -1,14 +0,0 @@
1
- import { data as tfData } from '@tensorflow/tfjs-node';
2
- import fs from 'node:fs/promises';
3
- import { data } from '@epfml/discojs';
4
- export class TextLoader extends data.TextLoader {
5
- async loadDatasetFrom(source) {
6
- // TODO: reads all the file at once,
7
- // inputting the file path to FileDataSource isn't supported anymore
8
- const inputFile = await fs.readFile(source);
9
- const file = new tfData.FileDataSource(inputFile, { chunkSize: 1024 });
10
- // TODO: reading files line by line is an issue for LLM tokenization
11
- const dataset = new tfData.TextLineDataset(file).filter(s => s !== ' '); // newline creates empty strings
12
- return Promise.resolve(dataset);
13
- }
14
- }