npm - @epfml/discojs - Versions diffs - 3.0.1-p20241024094708.0 → 3.0.1-p20241028120035.0 - Mend

@epfml/discojs 3.0.1-p20241024094708.0 → 3.0.1-p20241028120035.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (93) hide show

package/dist/aggregator/get.d.ts +3 -3
package/dist/client/client.d.ts +5 -5
package/dist/client/decentralized/decentralized_client.d.ts +2 -2
package/dist/client/federated/federated_client.d.ts +2 -2
package/dist/client/utils.d.ts +2 -2
package/dist/dataset/dataset.d.ts +9 -2
package/dist/dataset/dataset.js +83 -36
package/dist/dataset/image.d.ts +5 -0
package/dist/dataset/image.js +6 -1
package/dist/dataset/index.d.ts +0 -1
package/dist/dataset/index.js +0 -1
package/dist/dataset/types.d.ts +2 -0
package/dist/default_tasks/cifar10.d.ts +1 -1
package/dist/default_tasks/cifar10.js +2 -3
package/dist/default_tasks/lus_covid.d.ts +1 -1
package/dist/default_tasks/lus_covid.js +2 -3
package/dist/default_tasks/mnist.d.ts +1 -1
package/dist/default_tasks/mnist.js +2 -4
package/dist/default_tasks/simple_face.d.ts +1 -1
package/dist/default_tasks/simple_face.js +2 -3
package/dist/default_tasks/titanic.d.ts +1 -1
package/dist/default_tasks/titanic.js +3 -6
package/dist/default_tasks/wikitext.d.ts +1 -1
package/dist/default_tasks/wikitext.js +1 -2
package/dist/index.d.ts +4 -5
package/dist/index.js +4 -5
package/dist/models/gpt/index.d.ts +13 -16
package/dist/models/gpt/index.js +62 -43
package/dist/models/gpt/model.d.ts +1 -15
package/dist/models/gpt/model.js +1 -75
package/dist/models/model.d.ts +7 -12
package/dist/models/tfjs.d.ts +10 -8
package/dist/models/tfjs.js +106 -44
package/dist/models/tokenizer.d.ts +1 -1
package/dist/privacy.js +1 -1
package/dist/processing/image.d.ts +18 -0
package/dist/processing/image.js +75 -0
package/dist/processing/index.d.ts +8 -0
package/dist/processing/index.js +106 -0
package/dist/processing/tabular.d.ts +19 -0
package/dist/processing/tabular.js +33 -0
package/dist/processing/text.d.ts +11 -0
package/dist/processing/text.js +33 -0
package/dist/serialization/model.d.ts +3 -3
package/dist/serialization/model.js +19 -6
package/dist/task/task.d.ts +4 -3
package/dist/task/task.js +5 -3
package/dist/task/task_handler.d.ts +3 -3
package/dist/task/task_provider.d.ts +4 -4
package/dist/task/training_information.d.ts +25 -16
package/dist/task/training_information.js +76 -72
package/dist/training/disco.d.ts +20 -12
package/dist/training/disco.js +32 -13
package/dist/training/trainer.d.ts +6 -7
package/dist/training/trainer.js +6 -6
package/dist/types/data_format.d.ts +40 -0
package/dist/types/index.d.ts +2 -0
package/dist/types/index.js +1 -0
package/dist/validator.d.ts +10 -0
package/dist/validator.js +30 -0
package/package.json +4 -2
package/dist/dataset/data/data.d.ts +0 -47
package/dist/dataset/data/data.js +0 -88
package/dist/dataset/data/data_split.d.ts +0 -8
package/dist/dataset/data/helpers.d.ts +0 -10
package/dist/dataset/data/helpers.js +0 -97
package/dist/dataset/data/image_data.d.ts +0 -11
package/dist/dataset/data/image_data.js +0 -43
package/dist/dataset/data/index.d.ts +0 -5
package/dist/dataset/data/index.js +0 -5
package/dist/dataset/data/preprocessing/base.d.ts +0 -16
package/dist/dataset/data/preprocessing/base.js +0 -1
package/dist/dataset/data/preprocessing/image_preprocessing.d.ts +0 -13
package/dist/dataset/data/preprocessing/image_preprocessing.js +0 -42
package/dist/dataset/data/preprocessing/index.d.ts +0 -4
package/dist/dataset/data/preprocessing/index.js +0 -3
package/dist/dataset/data/preprocessing/tabular_preprocessing.d.ts +0 -13
package/dist/dataset/data/preprocessing/tabular_preprocessing.js +0 -45
package/dist/dataset/data/preprocessing/text_preprocessing.d.ts +0 -13
package/dist/dataset/data/preprocessing/text_preprocessing.js +0 -100
package/dist/dataset/data/tabular_data.d.ts +0 -11
package/dist/dataset/data/tabular_data.js +0 -24
package/dist/dataset/data/text_data.d.ts +0 -11
package/dist/dataset/data/text_data.js +0 -14
package/dist/processing.d.ts +0 -35
package/dist/processing.js +0 -89
package/dist/types.d.ts +0 -3
package/dist/types.js +0 -1
package/dist/validation/index.d.ts +0 -1
package/dist/validation/index.js +0 -1
package/dist/validation/validator.d.ts +0 -10
package/dist/validation/validator.js +0 -113
/package/dist/{dataset/data/data_split.js → types/data_format.js} +0 -0

package/dist/dataset/data/image_data.js DELETED Viewed

@@ -1,43 +0,0 @@
-import * as tf from '@tensorflow/tfjs';
-import { Data } from './data.js';
-import { ImagePreprocessing, IMAGE_PREPROCESSING } from './preprocessing/index.js';
-/**
- * Disco data made of image samples (.jpg, .png, etc.).
- */
-export class ImageData extends Data {
-    availablePreprocessing = IMAGE_PREPROCESSING;
-    static async init(dataset, task, size) {
-        // Here we do our best to check data format before proceeding to training, for
-        // better error handling. An incorrectly formatted image in the dataset might still
-        // cause an error during training, because of the lazy aspect of the dataset; we only
-        // verify the first sample.
-        if (task.trainingInformation.preprocessingFunctions?.includes(ImagePreprocessing.Resize) !== true) {
-            const iteration = await dataset.iterator().then((iter) => iter.next());
-            if (iteration.done === true)
-                throw new Error("empty dataset");
-            const sample = iteration.value;
-            // TODO: We suppose the presence of labels
-            // TODO: Typing (discojs-node/src/dataset/data_loader/image_loader.spec.ts)
-            if (typeof sample !== 'object' || sample === null || sample === undefined) {
-                throw new Error("Image is undefined or is not an object");
-            }
-            let shape;
-            if ('xs' in sample) {
-                shape = sample.xs.shape;
-            }
-            else {
-                shape = sample.shape;
-            }
-            const { IMAGE_H, IMAGE_W } = task.trainingInformation;
-            if (IMAGE_W !== undefined && IMAGE_H !== undefined &&
-                (shape[0] !== IMAGE_W || shape[1] !== IMAGE_H)) {
-                throw new Error(`Image doesn't have the dimensions specified in the task's training information. Expected ${IMAGE_H}x${IMAGE_W} but got ${shape[0]}x${shape[1]}.`);
-            }
-            tf.dispose(sample);
-        }
-        return new ImageData(dataset, task, size);
-    }
-    create(dataset, task, size) {
-        return new ImageData(dataset, task, size);
-    }
-}

package/dist/dataset/data/index.d.ts DELETED Viewed

@@ -1,5 +0,0 @@
-export { Data } from './data.js';
-export { ImageData } from './image_data.js';
-export { TabularData } from './tabular_data.js';
-export { TextData } from './text_data.js';
-export { ImagePreprocessing, TabularPreprocessing, TextPreprocessing, IMAGE_PREPROCESSING, TABULAR_PREPROCESSING, TEXT_PREPROCESSING } from './preprocessing/index.js';

package/dist/dataset/data/index.js DELETED Viewed

@@ -1,5 +0,0 @@
-export { Data } from './data.js';
-export { ImageData } from './image_data.js';
-export { TabularData } from './tabular_data.js';
-export { TextData } from './text_data.js';
-export { ImagePreprocessing, TabularPreprocessing, TextPreprocessing, IMAGE_PREPROCESSING, TABULAR_PREPROCESSING, TEXT_PREPROCESSING } from './preprocessing/index.js';

package/dist/dataset/data/preprocessing/base.d.ts DELETED Viewed

@@ -1,16 +0,0 @@
-import type tf from '@tensorflow/tfjs';
-import type { Task } from '../../../index.js';
-import type { ImagePreprocessing } from './image_preprocessing.js';
-import type { TabularPreprocessing } from './tabular_preprocessing.js';
-import type { TextPreprocessing } from './text_preprocessing.js';
-/**
- * All available preprocessing type enums.
- */
-export type Preprocessing = ImagePreprocessing | TextPreprocessing | TabularPreprocessing;
-/**
- * Preprocessing function associating a preprocessing type enum to a sample transformation.
- */
-export interface PreprocessingFunction {
-    type: Preprocessing;
-    apply: (x: Promise<tf.TensorContainer>, task: Task) => Promise<tf.TensorContainer>;
-}

package/dist/dataset/data/preprocessing/base.js DELETED Viewed

	@@ -1 +0,0 @@
1	- export {};

package/dist/dataset/data/preprocessing/image_preprocessing.d.ts DELETED Viewed

@@ -1,13 +0,0 @@
-import { List } from 'immutable';
-import type { PreprocessingFunction } from './base.js';
-/**
- * Available image preprocessing types.
- */
-export declare enum ImagePreprocessing {
-    Resize = 0,
-    Normalize = 1
-}
-/**
- * Available image preprocessing functions.
- */
-export declare const AVAILABLE_PREPROCESSING: List<PreprocessingFunction>;

package/dist/dataset/data/preprocessing/image_preprocessing.js DELETED Viewed

@@ -1,42 +0,0 @@
-import { List } from 'immutable';
-import * as tf from '@tensorflow/tfjs';
-/**
- * Available image preprocessing types.
- */
-export var ImagePreprocessing;
-(function (ImagePreprocessing) {
-    ImagePreprocessing[ImagePreprocessing["Resize"] = 0] = "Resize";
-    ImagePreprocessing[ImagePreprocessing["Normalize"] = 1] = "Normalize";
-})(ImagePreprocessing || (ImagePreprocessing = {}));
-const resize = {
-    type: ImagePreprocessing.Resize,
-    apply: async (entry, task) => {
-        const { xs, ys } = await entry;
-        const params = task.trainingInformation;
-        return {
-            xs: params.IMAGE_W !== undefined && params.IMAGE_H !== undefined
-                ? xs.resizeBilinear([params.IMAGE_H, params.IMAGE_W])
-                : xs,
-            ys
-        };
-    }
-};
-const normalize = {
-    type: ImagePreprocessing.Normalize,
-    apply: async (entry) => {
-        const { xs, ys } = await entry;
-        return tf.tidy(() => {
-            return {
-                xs: xs.div(tf.scalar(255)),
-                ys
-            };
-        });
-    }
-};
-/**
- * Available image preprocessing functions.
- */
-export const AVAILABLE_PREPROCESSING = List([
-    resize,
-    normalize
-]).sortBy((e) => e.type);

package/dist/dataset/data/preprocessing/index.d.ts DELETED Viewed

@@ -1,4 +0,0 @@
-export type { Preprocessing, PreprocessingFunction } from './base.js';
-export { AVAILABLE_PREPROCESSING as IMAGE_PREPROCESSING, ImagePreprocessing } from './image_preprocessing.js';
-export { AVAILABLE_PREPROCESSING as TABULAR_PREPROCESSING, TabularPreprocessing } from './tabular_preprocessing.js';
-export { AVAILABLE_PREPROCESSING as TEXT_PREPROCESSING, TextPreprocessing } from './text_preprocessing.js';

package/dist/dataset/data/preprocessing/index.js DELETED Viewed

@@ -1,3 +0,0 @@
-export { AVAILABLE_PREPROCESSING as IMAGE_PREPROCESSING, ImagePreprocessing } from './image_preprocessing.js';
-export { AVAILABLE_PREPROCESSING as TABULAR_PREPROCESSING, TabularPreprocessing } from './tabular_preprocessing.js';
-export { AVAILABLE_PREPROCESSING as TEXT_PREPROCESSING, TextPreprocessing } from './text_preprocessing.js';

package/dist/dataset/data/preprocessing/tabular_preprocessing.d.ts DELETED Viewed

@@ -1,13 +0,0 @@
-import { List } from 'immutable';
-import type { PreprocessingFunction } from './base.js';
-/**
- * Available tabular preprocessing types.
- */
-export declare enum TabularPreprocessing {
-    Sanitize = 0,
-    Normalize = 1
-}
-/**
- * Available tabular preprocessing functions.
- */
-export declare const AVAILABLE_PREPROCESSING: List<PreprocessingFunction>;

package/dist/dataset/data/preprocessing/tabular_preprocessing.js DELETED Viewed

@@ -1,45 +0,0 @@
-import { List } from 'immutable';
-/**
- * Available tabular preprocessing types.
- */
-export var TabularPreprocessing;
-(function (TabularPreprocessing) {
-    TabularPreprocessing[TabularPreprocessing["Sanitize"] = 0] = "Sanitize";
-    TabularPreprocessing[TabularPreprocessing["Normalize"] = 1] = "Normalize";
-})(TabularPreprocessing || (TabularPreprocessing = {}));
-const sanitize = {
-    type: TabularPreprocessing.Sanitize,
-    apply: async (entry) => {
-        const entryContainer = await entry;
-        // if preprocessing a dataset without labels, then the entry is an array of numbers
-        if (Array.isArray(entryContainer)) {
-            const entry = entryContainer;
-            return entry.map((i) => i ?? 0);
-            // if it is an object
-        }
-        else if (typeof entryContainer === 'object' && entry !== null) {
-            // if the object is a tensor container with features xs and labels ys
-            if (Object.hasOwn(entryContainer, 'xs')) {
-                const { xs, ys } = entryContainer;
-                return {
-                    xs: xs.map(i => i ?? 0),
-                    ys
-                };
-                // if the object contains features as a dict of feature names-values
-            }
-            else {
-                const entry = Object.values(entryContainer);
-                return entry.map((i) => i ?? 0);
-            }
-        }
-        else {
-            throw new Error('Unrecognized format during tabular preprocessing');
-        }
-    }
-};
-/**
- * Available tabular preprocessing functions.
- */
-export const AVAILABLE_PREPROCESSING = List([
-    sanitize
-]).sortBy((e) => e.type);

package/dist/dataset/data/preprocessing/text_preprocessing.d.ts DELETED Viewed

@@ -1,13 +0,0 @@
-import { List } from 'immutable';
-import type { PreprocessingFunction } from './base.js';
-/**
- * Available text preprocessing types.
- */
-export declare enum TextPreprocessing {
-    Tokenize = 0,
-    LeftPadding = 1
-}
-/**
- * Available text preprocessing functions.
- */
-export declare const AVAILABLE_PREPROCESSING: List<PreprocessingFunction>;

package/dist/dataset/data/preprocessing/text_preprocessing.js DELETED Viewed

@@ -1,100 +0,0 @@
-import { List } from 'immutable';
-import * as tf from '@tensorflow/tfjs';
-import { models } from '../../../index.js';
-/**
- * Available text preprocessing types.
- */
-export var TextPreprocessing;
-(function (TextPreprocessing) {
-    TextPreprocessing[TextPreprocessing["Tokenize"] = 0] = "Tokenize";
-    TextPreprocessing[TextPreprocessing["LeftPadding"] = 1] = "LeftPadding";
-})(TextPreprocessing || (TextPreprocessing = {}));
-function isNumberArray(raw) {
-    if (!Array.isArray(raw))
-        return false;
-    const arr = raw; // isArray is unsafely guarding with any[]
-    return arr.every((e) => typeof e === "number");
-}
-function isTokenizedEntry(raw) {
-    if (typeof raw !== "object" || raw === null)
-        return false;
-    const { tokens } = raw;
-    if (!isNumberArray(tokens))
-        return false;
-    const _ = { tokens };
-    return true;
-}
-/**
- * LeftPadding pads all incoming inputs to be a fixed length, which should be specified
- * in `task.trainingInformation.maxSequenceLength`.
- *
- * We are currently only implementing left padding for text generation
- * https://huggingface.co/docs/transformers/en/llm_tutorial#wrong-padding-side
- * The function can easily be extended to support right padding if needed
- *
- * Once Transformers.js supports left padding, it will be possible to pad inputs
- * directly when tokenizing
- * https://github.com/xenova/transformers.js/blob/8804c36591d11d8456788d1bb4b16489121b3be2/src/tokenizers.js#L2517
- */
-const leftPadding = {
-    type: TextPreprocessing.LeftPadding,
-    apply: async (input, task) => {
-        const x = await input;
-        if (!isTokenizedEntry(x))
-            throw new Error("The leftPadding preprocessing expects a non empty 1D array of number");
-        const { tokens } = x;
-        const tokenizer = await models.getTaskTokenizer(task);
-        return tf.tidy(() => {
-            // maxLength is the final length of xs
-            // Because ys the contains the tokens in xs shifted by one (to predict the next token), we need
-            // to include one more token than maxSequenceLength in order to have the next token's label of the maxSequenceLength'th token
-            const maxLength = task.trainingInformation.maxSequenceLength ?? tokenizer.model_max_length;
-            const maxLengthPlusLabel = maxLength + 1;
-            let fixedLengthTokens = tf.tensor1d(tokens, 'int32'); // cast tokens from float to int for gpt-tfjs
-            if (fixedLengthTokens.size > maxLengthPlusLabel) { // Should never happen because tokenization truncates inputs
-                throw Error("There are more tokens than expected after tokenization and truncation");
-            }
-            else if (fixedLengthTokens.size < maxLengthPlusLabel) { // Pad inputs to fixed length
-                const paddingToken = tokenizer.pad_token_id;
-                fixedLengthTokens = fixedLengthTokens.pad([[Math.max(0, maxLengthPlusLabel - fixedLengthTokens.size), 0]], paddingToken);
-            }
-            // if tokens.size == maxLengthPlusLabel we can leave it as it is
-            // ys is a one-hot encoding of the next token (i.e. xs shifted by one)
-            // cast because oneHot isn't size-typing its return value
-            const ys = tf.oneHot(fixedLengthTokens.slice([1]), tokenizer.model.vocab.length + 1);
-            // remove the extra token now that ys is created
-            const xs = fixedLengthTokens.slice([0], maxLength);
-            return { xs, ys };
-        });
-    }
-};
-/**
- * Tokenize and truncates input strings
- */
-const tokenize = {
-    type: TextPreprocessing.Tokenize,
-    apply: async (x, task) => {
-        const xs = await x;
-        if (typeof xs !== 'string')
-            throw new Error("The tokenize preprocessing expects a string as input");
-        const tokenizer = await models.getTaskTokenizer(task);
-        // Add plus one to include the next token label of the last token in the input sequence
-        // The inputs are truncated down to exactly maxSequenceLength in leftPadding
-        const maxLength = task.trainingInformation.maxSequenceLength ?? tokenizer.model_max_length;
-        const maxLengthPlusLabel = maxLength + 1;
-        const { input_ids: tokens } = tokenizer(xs, {
-            // Transformers.js currently only supports right padding while we need left for text generation
-            // Right padding should be supported in the future, once it is, we can directly pad while tokenizing
-            // https://github.com/xenova/transformers.js/blob/8804c36591d11d8456788d1bb4b16489121b3be2/src/tokenizers.js#L2517
-            padding: false,
-            truncation: true,
-            return_tensor: false,
-            max_length: maxLengthPlusLabel,
-        });
-        return { tokens };
-    }
-};
-/**
- * Available text preprocessing functions.
- */
-export const AVAILABLE_PREPROCESSING = List.of(tokenize, leftPadding).sortBy((e) => e.type);

package/dist/dataset/data/tabular_data.d.ts DELETED Viewed

@@ -1,11 +0,0 @@
-import * as tf from '@tensorflow/tfjs';
-import type { Task } from '../../index.js';
-import { Data } from './data.js';
-/**
- * Disco data made of tabular (.csv, .tsv, etc.) files.
- */
-export declare class TabularData extends Data {
-    readonly availablePreprocessing: import("immutable").List<import("./preprocessing/base.js").PreprocessingFunction>;
-    static init(dataset: tf.data.Dataset<tf.TensorContainer>, task: Task, size?: number): Promise<TabularData>;
-    protected create(dataset: tf.data.Dataset<tf.TensorContainer>, task: Task, size: number): TabularData;
-}

package/dist/dataset/data/tabular_data.js DELETED Viewed

@@ -1,24 +0,0 @@
-import { Data } from './data.js';
-import { TABULAR_PREPROCESSING } from './preprocessing/index.js';
-/**
- * Disco data made of tabular (.csv, .tsv, etc.) files.
- */
-export class TabularData extends Data {
-    availablePreprocessing = TABULAR_PREPROCESSING;
-    static async init(dataset, task, size) {
-        // Force the check of the data column format (among other things) before proceeding
-        // to training, for better error handling. An incorrectly formatted line might still
-        // cause an error during training, because of the lazy aspect of the dataset; we only
-        // load/read the tabular file's lines on training.
-        try {
-            await dataset.iterator();
-        }
-        catch (cause) {
-            throw new Error('data input format not compatible with chosen task', { cause });
-        }
-        return new TabularData(dataset, task, size);
-    }
-    create(dataset, task, size) {
-        return new TabularData(dataset, task, size);
-    }
-}

package/dist/dataset/data/text_data.d.ts DELETED Viewed

@@ -1,11 +0,0 @@
-import * as tf from '@tensorflow/tfjs';
-import type { Task } from '../../index.js';
-import { Data } from './data.js';
-/**
- * Disco data made of textual samples.
- */
-export declare class TextData extends Data {
-    readonly availablePreprocessing: import("immutable").List<import("./preprocessing/base.js").PreprocessingFunction>;
-    static init(dataset: tf.data.Dataset<tf.TensorContainer>, task: Task, size?: number): Promise<TextData>;
-    protected create(dataset: tf.data.Dataset<tf.TensorContainer>, task: Task, size?: number): TextData;
-}

package/dist/dataset/data/text_data.js DELETED Viewed

@@ -1,14 +0,0 @@
-import { Data } from './data.js';
-import { TEXT_PREPROCESSING } from './preprocessing/index.js';
-/**
- * Disco data made of textual samples.
- */
-export class TextData extends Data {
-    availablePreprocessing = TEXT_PREPROCESSING;
-    static init(dataset, task, size) {
-        return Promise.resolve(new TextData(dataset, task, size));
-    }
-    create(dataset, task, size) {
-        return new TextData(dataset, task, size);
-    }
-}

package/dist/processing.d.ts DELETED Viewed

@@ -1,35 +0,0 @@
-/** Dataset shapers, convenient to map with */
-import { PreTrainedTokenizer } from "@xenova/transformers";
-import { List } from "immutable";
-import { Image } from "./dataset/image.js";
-/**
- * Convert a string to a number
- *
- * @throws if it isn't written as a number
- */
-export declare function convertToNumber(raw: string): number;
-/**
- * Return the named field of an object with string values
- *
- * @throws if the named field isn't there
- */
-export declare function extractColumn(row: Partial<Record<string, string>>, column: string): string;
-/**
- * Return the index of the element in the given list
- *
- * @throws if not found
- */
-export declare function indexInList(element: string, elements: List<string>): number;
-/**
- * Tokenize and truncates input strings
- *
- * @param length number of tokens
- * @returns encoded string in an array of token, size of max_length
- */
-export declare function tokenizeAndLeftPad(line: string, tokenizer: PreTrainedTokenizer, length: number): number[];
-/** Remove the alpha channel of an image */
-export declare function removeAlpha<W extends number, H extends number>(image: Image<4, W, H>): Image<3, W, H>;
-export declare function removeAlpha<D extends 1 | 3, W extends number, H extends number>(image: Image<D | 4, W, H>): Image<D, W, H>;
-/** Convert monochrome images to multicolor */
-export declare function expandToMulticolor<W extends number, H extends number>(image: Image<1, W, H>): Image<3, W, H>;
-export declare function expandToMulticolor<D extends 3 | 4, W extends number, H extends number>(image: Image<1 | D, W, H>): Image<D, W, H>;

package/dist/processing.js DELETED Viewed

@@ -1,89 +0,0 @@
-/** Dataset shapers, convenient to map with */
-import { Repeat, Seq } from "immutable";
-import { Image } from "./dataset/image.js";
-/**
- * Convert a string to a number
- *
- * @throws if it isn't written as a number
- */
-export function convertToNumber(raw) {
-    const num = Number.parseFloat(raw);
-    if (Number.isNaN(num))
-        throw new Error(`unable to parse "${raw}" as number`);
-    return num;
-}
-/**
- * Return the named field of an object with string values
- *
- * @throws if the named field isn't there
- */
-export function extractColumn(row, column) {
-    const raw = row[column];
-    if (raw === undefined)
-        throw new Error(`${column} not found in row`);
-    return raw;
-}
-/**
- * Return the index of the element in the given list
- *
- * @throws if not found
- */
-export function indexInList(element, elements) {
-    const ret = elements.indexOf(element);
-    if (ret === -1)
-        throw new Error(`${element} not found in list`);
-    return ret;
-}
-function isArrayOfNumber(raw) {
-    return Array.isArray(raw) && raw.every((e) => typeof e === "number");
-}
-/**
- * Tokenize and truncates input strings
- *
- * @param length number of tokens
- * @returns encoded string in an array of token, size of max_length
- */
-export function tokenizeAndLeftPad(line, tokenizer, length) {
-    if (!Number.isInteger(length))
-        throw new Error("length should be an integer");
-    // Transformers.js currently only supports right padding while we need left for text generation
-    // Right padding should be supported in the future, once it is, we can directly pad while tokenizing
-    // https://github.com/xenova/transformers.js/blob/8804c36591d11d8456788d1bb4b16489121b3be2/src/tokenizers.js#L2517
-    const tokenized = tokenizer(line, {
-        padding: false,
-        truncation: true,
-        return_tensor: false,
-        max_length: length,
-    });
-    if (typeof tokenized !== "object" ||
-        tokenized === null ||
-        !("input_ids" in tokenized) ||
-        !isArrayOfNumber(tokenized.input_ids))
-        throw new Error("tokenizer returns unexcepted type");
-    const tokens = tokenized.input_ids;
-    const paddingSize = length - tokens.length;
-    if (paddingSize < 0)
-        throw new Error("tokenized returned more token than excepted");
-    const padding = new Array(paddingSize);
-    padding.fill(tokenizer.pad_token_id);
-    const padded = padding.concat(tokens);
-    return padded;
-}
-export function removeAlpha(image) {
-    switch (image.depth) {
-        case 1:
-        case 3:
-            return new Image(image.data, image.width, image.height, image.depth);
-        case 4:
-            return new Image(image.data.filter((_, i) => i % 4 !== 3), image.width, image.height, 3);
-    }
-}
-export function expandToMulticolor(image) {
-    switch (image.depth) {
-        case 1:
-            return new Image(Uint8Array.from(Seq(image.data).flatMap((v) => Repeat(v, 3))), image.width, image.height, 3);
-        case 3:
-        case 4:
-            return new Image(image.data, image.width, image.height, image.depth);
-    }
-}

package/dist/types.d.ts DELETED Viewed

@@ -1,3 +0,0 @@
-import { Dataset, Image, Tabular, Text } from "./dataset/index.js";
-export type TypedDataset = ["image", Dataset<Image>] | ["tabular", Dataset<Tabular>] | ["text", Dataset<Text>];
-export type TypedLabeledDataset = ["image", Dataset<[Image, label: string]>] | ["tabular", Dataset<Tabular>] | ["text", Dataset<Text>];

package/dist/types.js DELETED Viewed

	@@ -1 +0,0 @@
1	- export {};

package/dist/validation/index.d.ts DELETED Viewed

	@@ -1 +0,0 @@
1	- export { Validator } from './validator.js';

package/dist/validation/index.js DELETED Viewed

	@@ -1 +0,0 @@
1	- export { Validator } from './validator.js';

package/dist/validation/validator.d.ts DELETED Viewed

@@ -1,10 +0,0 @@
-import type { Model, Task, TypedDataset, TypedLabeledDataset } from "../index.js";
-export declare class Validator {
-    #private;
-    readonly task: Task;
-    constructor(task: Task, model: Model);
-    /** infer every line of the dataset and check that it is as labeled */
-    test(dataset: TypedLabeledDataset): AsyncGenerator<boolean>;
-    /** use the model to predict every line of the dataset */
-    infer(dataset: TypedDataset): AsyncGenerator<number, void>;
-}

package/dist/validation/validator.js DELETED Viewed

@@ -1,113 +0,0 @@
-import * as tf from "@tensorflow/tfjs";
-import { datasetToData, labeledDatasetToData, } from "../dataset/data/helpers.js";
-function intoTFDataset(iter) {
-    // @ts-expect-error generator
-    return tf.data.generator(async function* () {
-        yield* iter;
-    });
-}
-export class Validator {
-    task;
-    #model;
-    constructor(task, model) {
-        this.task = task;
-        this.#model = model;
-    }
-    /** infer every line of the dataset and check that it is as labeled */
-    async *test(dataset) {
-        const preprocessed = (await labeledDatasetToData(this.task, dataset)).preprocess();
-        const batched = preprocessed.batch().dataset;
-        const iterator = await tf.data
-            .zip([
-            preprocessed.dataset.map((t) => {
-                if (typeof t !== "object" ||
-                    !("ys" in t) ||
-                    !(t.ys instanceof tf.Tensor) ||
-                    !(t.ys.rank === 1 || t.ys.rank === 2))
-                    throw new Error("unexpected preprocessed dataset");
-                if ("xs" in t)
-                    tf.dispose(t.xs);
-                return t.ys;
-            }),
-            intoTFDataset(this.#inferOnBatchedData(batched)),
-        ])
-            .iterator();
-        for (let iter = await iterator.next(); iter.done !== true; iter = await iterator.next()) {
-            const zipped = iter.value;
-            const label = await getLabel(zipped[0]);
-            tf.dispose(zipped[0]);
-            const infered = zipped[1];
-            yield label === infered;
-        }
-    }
-    /** use the model to predict every line of the dataset */
-    async *infer(dataset) {
-        const data = await datasetToData(this.task, dataset);
-        const batched = data.preprocess().batch().dataset;
-        yield* this.#inferOnBatchedData(batched);
-    }
-    async *#inferOnBatchedData(batched) {
-        const iterator = await batched.iterator();
-        for (let iter = await iterator.next(); iter.done !== true; iter = await iterator.next()) {
-            const row = iter.value;
-            if (typeof row !== "object" ||
-                !("xs" in row) ||
-                !(row.xs instanceof tf.Tensor))
-                throw new Error("unexpected shape of dataset");
-            const prediction = await this.#model.predict(row.xs);
-            tf.dispose(row);
-            let predictions;
-            switch (prediction.rank) {
-                case 2:
-                case 3:
-                    predictions = await getLabels(
-                    // cast as rank was just checked
-                    prediction);
-                    prediction.dispose();
-                    break;
-                default:
-                    throw new Error("unexpected batched prediction shape");
-            }
-            prediction.dispose();
-            for (const prediction of predictions)
-                yield prediction;
-        }
-    }
-}
-async function getLabels(ys) {
-    // cast as unstack drop a dimension and tfjs doesn't type correctly
-    return Promise.all(tf.unstack(ys).map((y) => {
-        const ret = getLabel(y);
-        y.dispose();
-        return ret;
-    }));
-}
-async function getLabel(ys) {
-    switch (ys.rank) {
-        case 1: {
-            if (ys.shape[0] == 1) {
-                // Binary classification
-                const threshold = tf.scalar(0.5);
-                const binaryTensor = ys.greaterEqual(threshold);
-                const binaryArray = await binaryTensor.data();
-                tf.dispose([binaryTensor, threshold]);
-                return binaryArray[0];
-            }
-            // Multi-class classification
-            const indexTensor = ys.argMax();
-            const indexArray = await indexTensor.data();
-            tf.dispose([indexTensor]);
-            return indexArray[0];
-            // Multi-label classification is not supported
-        }
-        case 2: {
-            // it's LLM, we only extract the next token
-            const firstToken = tf.tidy(() => ys.gather([0]).squeeze().argMax());
-            const raw = await firstToken.data();
-            firstToken.dispose();
-            return raw[0];
-        }
-        default:
-            throw new Error("unexpected tensor rank");
-    }
-}

/package/dist/{dataset/data/data_split.js → types/data_format.js} RENAMED Viewed

File without changes