npm - @epfml/discojs - Versions diffs - 3.0.1-p20241025115642.0 → 3.0.1-p20241107104659.0 - Mend

@epfml/discojs 3.0.1-p20241025115642.0 → 3.0.1-p20241107104659.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (93) hide show

package/dist/aggregator/get.d.ts +3 -3
package/dist/client/client.d.ts +5 -5
package/dist/client/decentralized/decentralized_client.d.ts +2 -2
package/dist/client/federated/federated_client.d.ts +2 -2
package/dist/client/utils.d.ts +2 -2
package/dist/dataset/dataset.d.ts +9 -2
package/dist/dataset/dataset.js +83 -36
package/dist/dataset/image.d.ts +5 -0
package/dist/dataset/image.js +6 -1
package/dist/dataset/index.d.ts +0 -1
package/dist/dataset/index.js +0 -1
package/dist/dataset/types.d.ts +2 -0
package/dist/default_tasks/cifar10.d.ts +1 -1
package/dist/default_tasks/cifar10.js +2 -3
package/dist/default_tasks/lus_covid.d.ts +1 -1
package/dist/default_tasks/lus_covid.js +2 -3
package/dist/default_tasks/mnist.d.ts +1 -1
package/dist/default_tasks/mnist.js +3 -5
package/dist/default_tasks/simple_face.d.ts +1 -1
package/dist/default_tasks/simple_face.js +2 -3
package/dist/default_tasks/titanic.d.ts +1 -1
package/dist/default_tasks/titanic.js +3 -6
package/dist/default_tasks/wikitext.d.ts +1 -1
package/dist/default_tasks/wikitext.js +1 -2
package/dist/index.d.ts +4 -5
package/dist/index.js +4 -5
package/dist/models/gpt/index.d.ts +13 -16
package/dist/models/gpt/index.js +62 -43
package/dist/models/gpt/model.d.ts +1 -15
package/dist/models/gpt/model.js +1 -75
package/dist/models/model.d.ts +7 -12
package/dist/models/tfjs.d.ts +10 -8
package/dist/models/tfjs.js +106 -44
package/dist/models/tokenizer.d.ts +1 -1
package/dist/privacy.js +1 -1
package/dist/processing/image.d.ts +18 -0
package/dist/processing/image.js +75 -0
package/dist/processing/index.d.ts +8 -0
package/dist/processing/index.js +106 -0
package/dist/processing/tabular.d.ts +19 -0
package/dist/processing/tabular.js +33 -0
package/dist/processing/text.d.ts +11 -0
package/dist/processing/text.js +33 -0
package/dist/serialization/model.d.ts +3 -3
package/dist/serialization/model.js +19 -6
package/dist/task/task.d.ts +4 -3
package/dist/task/task.js +5 -3
package/dist/task/task_handler.d.ts +3 -3
package/dist/task/task_provider.d.ts +4 -4
package/dist/task/training_information.d.ts +25 -16
package/dist/task/training_information.js +76 -72
package/dist/training/disco.d.ts +20 -12
package/dist/training/disco.js +32 -13
package/dist/training/trainer.d.ts +6 -7
package/dist/training/trainer.js +6 -6
package/dist/types/data_format.d.ts +40 -0
package/dist/types/index.d.ts +2 -0
package/dist/types/index.js +1 -0
package/dist/validator.d.ts +10 -0
package/dist/validator.js +30 -0
package/package.json +4 -2
package/dist/dataset/data/data.d.ts +0 -47
package/dist/dataset/data/data.js +0 -88
package/dist/dataset/data/data_split.d.ts +0 -8
package/dist/dataset/data/helpers.d.ts +0 -10
package/dist/dataset/data/helpers.js +0 -97
package/dist/dataset/data/image_data.d.ts +0 -11
package/dist/dataset/data/image_data.js +0 -43
package/dist/dataset/data/index.d.ts +0 -5
package/dist/dataset/data/index.js +0 -5
package/dist/dataset/data/preprocessing/base.d.ts +0 -16
package/dist/dataset/data/preprocessing/base.js +0 -1
package/dist/dataset/data/preprocessing/image_preprocessing.d.ts +0 -13
package/dist/dataset/data/preprocessing/image_preprocessing.js +0 -42
package/dist/dataset/data/preprocessing/index.d.ts +0 -4
package/dist/dataset/data/preprocessing/index.js +0 -3
package/dist/dataset/data/preprocessing/tabular_preprocessing.d.ts +0 -13
package/dist/dataset/data/preprocessing/tabular_preprocessing.js +0 -45
package/dist/dataset/data/preprocessing/text_preprocessing.d.ts +0 -13
package/dist/dataset/data/preprocessing/text_preprocessing.js +0 -100
package/dist/dataset/data/tabular_data.d.ts +0 -11
package/dist/dataset/data/tabular_data.js +0 -24
package/dist/dataset/data/text_data.d.ts +0 -11
package/dist/dataset/data/text_data.js +0 -14
package/dist/processing.d.ts +0 -35
package/dist/processing.js +0 -89
package/dist/types.d.ts +0 -3
package/dist/types.js +0 -1
package/dist/validation/index.d.ts +0 -1
package/dist/validation/index.js +0 -1
package/dist/validation/validator.d.ts +0 -10
package/dist/validation/validator.js +0 -113
/package/dist/{dataset/data/data_split.js → types/data_format.js} +0 -0

package/dist/models/gpt/index.js CHANGED Viewed

@@ -2,21 +2,27 @@
  * this code is taken from gpt-tfjs with modifications from @peacefulotter and @lukemovement
  **/
 import createDebug from "debug";
-import { List } from 'immutable';
-import * as tf from '@tensorflow/tfjs';
-import { WeightsContainer } from '../../index.js';
+import { List, Range } from "immutable";
+import * as tf from "@tensorflow/tfjs";
+import { WeightsContainer } from "../../index.js";
 import { Model, EpochLogs } from "../index.js";
-import { GPTForCausalLM } from './model.js';
-import { DEFAULT_CONFIG } from './config.js';
-import evaluate from './evaluate.js';
+import { GPTModel } from "./model.js";
+import { DEFAULT_CONFIG } from "./config.js";
+import evaluate from "./evaluate.js";
 const debug = createDebug("discojs:models:gpt");
 export class GPT extends Model {
     model;
+    #blockSize;
     #maxBatchCount;
+    #vocabSize;
     constructor(partialConfig, layersModel) {
         super();
-        this.model = new GPTForCausalLM(partialConfig, layersModel);
+        const model = new GPTModel(partialConfig, layersModel);
+        model.compile();
+        this.model = model;
+        this.#blockSize = partialConfig?.blockSize ?? DEFAULT_CONFIG.blockSize;
         this.#maxBatchCount = partialConfig?.maxIter ?? DEFAULT_CONFIG.maxIter;
+        this.#vocabSize = partialConfig?.vocabSize ?? DEFAULT_CONFIG.vocabSize;
     }
     /**
      * The GPT train methods wraps the model.fitDataset call in a for loop to act as a generator (of logs)
@@ -27,26 +33,20 @@ export class GPT extends Model {
      * @param epochs the number of passes of the training dataset
      * @param tracker
      */
-    async *train(trainingData, validationData) {
-        this.model.compile();
-        const batches = await trainingData.iterator(); // tf.LazyIterator isn't an AsyncGenerator
+    async *train(trainingDataset, validationDataset) {
         let batchesLogs = List();
-        for (let batchNumber = 0; batchNumber < this.#maxBatchCount; batchNumber++) {
-            const iteration = await batches.next();
-            if (iteration.done)
-                break;
-            const batch = iteration.value;
+        for await (const [batch, _] of trainingDataset.zip(Range(0, this.#maxBatchCount))) {
             const batchLogs = await this.#runBatch(batch);
-            tf.dispose(batch);
             yield batchLogs;
             batchesLogs = batchesLogs.push(batchLogs);
         }
-        const validation = validationData && (await this.#evaluate(validationData));
+        const validation = validationDataset && (await this.#evaluate(validationDataset));
         return new EpochLogs(batchesLogs, validation);
     }
     async #runBatch(batch) {
+        const tfBatch = this.#batchToTF(batch);
         let logs;
-        await this.model.fitDataset(tf.data.array([batch]), {
+        await this.model.fitDataset(tf.data.array([tfBatch]), {
             epochs: 1,
             verbose: 0, // don't pollute
             callbacks: {
@@ -55,6 +55,7 @@ export class GPT extends Model {
                 },
             },
         });
+        tf.dispose(tfBatch);
         if (logs === undefined)
             throw new Error("batch didn't gave any logs");
         const { loss, acc: accuracy } = logs;
@@ -67,38 +68,56 @@ export class GPT extends Model {
         };
     }
     async #evaluate(dataset) {
-        const evaluation = await evaluate(this.model, dataset.map((t) => {
-            switch (t) {
-                case null:
-                case undefined:
-                    throw new Error("nullish value in dataset");
-                default:
-                    // TODO unsafe cast
-                    return t;
-            }
-        }), this.config.maxEvalBatches);
+        const evaluation = await evaluate(this.model, tf.data.generator(async function* () {
+            yield* dataset.map((batch) => this.#batchToTF(batch));
+        }.bind(this)), this.config.maxEvalBatches);
         return {
             accuracy: evaluation.val_acc,
             loss: evaluation.val_loss,
         };
     }
-    predict(input) {
-        const ret = this.model.predict(input);
-        if (Array.isArray(ret)) {
-            throw new Error("prediction yield many Tensors but should have only returned one");
-        }
-        return Promise.resolve(ret);
+    #batchToTF(batch) {
+        return tf.tidy(() => ({
+            xs: tf.stack(batch.map(([line]) => tf.tensor1d(line.toArray(), "int32")).toArray()), // cast as stack doesn't type
+            ys: tf.stack(batch
+                .map(([line, next]) => tf.oneHot(line.shift().push(next).toArray(), this.#vocabSize))
+                .toArray()), // cast as oneHot/stack doesn't type
+        }));
     }
-    async generate(input, tokenizer, newTokens = 10) {
-        const { input_ids: tokens } = await tokenizer(input, { return_tensor: false });
-        const generationConfig = {
-            maxNewTokens: newTokens,
+    async predict(batch, options) {
+        const config = {
             temperature: 1.0,
-            doSample: false
+            doSample: false,
+            ...options,
         };
-        const predictedTokens = await this.model.generate(tokens, generationConfig);
-        const generatedWords = tokenizer.decode(predictedTokens[0]);
-        return generatedWords;
+        return List(await Promise.all(batch.map((tokens) => this.#predictSingle(tokens, config))));
+    }
+    async #predictSingle(tokens, config) {
+        // slice input tokens if longer than context length
+        tokens = tokens.slice(-this.#blockSize);
+        const input = tf.tidy(() => tf.tensor1d(tokens.toArray(), "int32").expandDims(0));
+        const logits = tf.tidy(() => {
+            const output = this.model.predict(input);
+            if (Array.isArray(output))
+                throw new Error("The model outputs too multiple values");
+            if (output.rank !== 3)
+                throw new Error("The model outputs wrong shape");
+            return output.squeeze([0]);
+        });
+        input.dispose();
+        const probs = tf.tidy(() => logits
+            .slice([logits.shape[0] - 1])
+            .squeeze([0])
+            .div(config.temperature)
+            .softmax());
+        logits.dispose();
+        const next = tf.tidy(() => config.doSample
+            ? tf.multinomial(probs, 1).squeeze([0])
+            : probs.argMax());
+        probs.dispose();
+        const ret = await next.array();
+        next.dispose();
+        return ret;
     }
     get config() {
         return this.model.getGPTConfig;
@@ -117,7 +136,7 @@ export class GPT extends Model {
     serialize() {
         return {
             weights: this.weights,
-            config: this.config
+            config: this.config,
         };
     }
     extract() {

package/dist/models/gpt/model.d.ts CHANGED Viewed

@@ -14,25 +14,11 @@ export declare abstract class Dataset<T> {
  * GPTModel extends tf.LayersModel and overrides tfjs' default training loop
  *
  */
-declare class GPTModel extends tf.LayersModel {
+export declare class GPTModel extends tf.LayersModel {
     protected readonly config: Required<GPTConfig>;
     constructor(partialConfig?: GPTConfig, layersModel?: tf.LayersModel);
     get getGPTConfig(): Required<GPTConfig>;
     compile(): void;
     fitDataset<T>(dataset: Dataset<T>, trainingArgs: tf.ModelFitDatasetArgs<T>): Promise<tf.History>;
 }
-interface GenerateConfig {
-    maxNewTokens: number;
-    temperature: number;
-    doSample: boolean;
-}
-/**
- * GPTForCausalLM stands for GPT model for Causal Language Modeling. Causal because it only looks at past tokens and not future ones
- * This class extends GPTModel and adds supports for text generation
- *
- */
-export declare class GPTForCausalLM extends GPTModel {
-    generate(idxRaw: tf.TensorLike, conf: GenerateConfig): Promise<number[][]>;
-    private generateOnce;
-}
 export {};

package/dist/models/gpt/model.js CHANGED Viewed

@@ -9,7 +9,7 @@ const debug = createDebug("discojs:models:gpt");
  * GPTModel extends tf.LayersModel and overrides tfjs' default training loop
  *
  */
-class GPTModel extends tf.LayersModel {
+export class GPTModel extends tf.LayersModel {
     config;
     constructor(partialConfig, layersModel) {
         // Fill missing config parameters with default values
@@ -124,77 +124,3 @@ class GPTModel extends tf.LayersModel {
         return new tf.History();
     }
 }
-const defaultGenerateConfig = {
-    maxNewTokens: 20,
-    temperature: 1.0,
-    doSample: false
-};
-function prepareIdx(idx) {
-    return tf.tidy(() => {
-        let ret;
-        if (idx instanceof tf.Tensor) {
-            ret = idx.clone();
-        }
-        else {
-            ret = tf.tensor(idx);
-        }
-        if (ret.dtype !== 'int32') {
-            ret = ret.toInt();
-        }
-        switch (ret.shape.length) {
-            case 1:
-                return ret.expandDims(0);
-            case 2:
-                return ret;
-            default:
-                throw new Error('unexpected shape');
-        }
-    });
-}
-/**
- * GPTForCausalLM stands for GPT model for Causal Language Modeling. Causal because it only looks at past tokens and not future ones
- * This class extends GPTModel and adds supports for text generation
- *
- */
-export class GPTForCausalLM extends GPTModel {
-    async generate(idxRaw, conf) {
-        const config = Object.assign({}, defaultGenerateConfig, conf);
-        let idx = prepareIdx(idxRaw);
-        for (let step = 0; step < config.maxNewTokens; step++) {
-            const idxNext = this.generateOnce(this, idx, config);
-            const idxNew = idx.concat(idxNext, 1);
-            tf.dispose(idx);
-            idx = idxNew;
-            tf.dispose(idxNext);
-        }
-        const idxArr = await idx.array();
-        tf.dispose(idx);
-        return idxArr;
-    }
-    generateOnce(model, idx, config) {
-        const idxNext = tf.tidy(() => {
-            // slice input tokens if longer than context length
-            const blockSize = this.config.blockSize;
-            idx = idx.shape[1] <= blockSize
-                ? idx : idx.slice([0, idx.shape[1] - blockSize]);
-            const output = model.predict(idx);
-            if (Array.isArray(output))
-                throw new Error('The model outputs too multiple values');
-            if (output.shape.length !== 3)
-                throw new Error('The model outputs wrong shape');
-            const logits = output;
-            const logitsScaled = logits
-                .slice([0, idx.shape[1] - 1, 0])
-                .reshape([logits.shape[0], logits.shape[2]])
-                .div(tf.scalar(config.temperature));
-            const probs = logitsScaled.softmax(-1);
-            if (config.doSample) {
-                return tf.multinomial(probs, 1);
-            }
-            else {
-                return probs.argMax(-1).expandDims(1);
-            }
-        });
-        return idxNext;
-    }
-}

package/dist/models/model.d.ts CHANGED Viewed

@@ -1,14 +1,11 @@
-import type tf from "@tensorflow/tfjs";
-import type { WeightsContainer } from "../index.js";
+import type { Batched, Dataset, DataFormat, DataType, WeightsContainer } from "../index.js";
 import type { BatchLogs, EpochLogs } from "./logs.js";
-export type Prediction = tf.Tensor;
-export type Sample = tf.Tensor;
 /**
  * Trainable predictor
  *
  * Allow for various implementation of models (various train function, tensor-library, ...)
  **/
-export declare abstract class Model implements Disposable {
+export declare abstract class Model<D extends DataType> implements Disposable {
     /** Return training state */
     abstract get weights(): WeightsContainer;
     /** Set training state */
@@ -16,15 +13,13 @@ export declare abstract class Model implements Disposable {
     /**
      * Improve predictor
      *
-     * @param trainingData dataset to optimize for
-     * @param validationData dataset to measure how well it is training
-     * @param epochs number of pass over the training dataset
-     * @param tracker watch the various steps
-     * @yields on every epoch, training can be stop by `return`ing it
+     * @param trainingDataset dataset to optimize for
+     * @param validationDataset dataset to measure how well it is training
+     * @yields on every epoch, training can be stop by `return`ing or `throw`ing it
      */
-    abstract train(trainingData: tf.data.Dataset<tf.TensorContainer>, validationData?: tf.data.Dataset<tf.TensorContainer>): AsyncGenerator<BatchLogs, EpochLogs>;
+    abstract train(trainingDataset: Dataset<Batched<DataFormat.ModelEncoded[D]>>, validationDataset?: Dataset<Batched<DataFormat.ModelEncoded[D]>>): AsyncGenerator<BatchLogs, EpochLogs>;
     /** Predict likely values */
-    abstract predict(input: Sample): Promise<Prediction>;
+    abstract predict(batch: Batched<DataFormat.ModelEncoded[D][0]>): Promise<Batched<DataFormat.ModelEncoded[D][1]>>;
     /**
      * This method is automatically called to cleanup the memory occupied by the model
      * when leaving the definition scope if the instance has been defined with the `using` keyword.

package/dist/models/tfjs.d.ts CHANGED Viewed

@@ -1,21 +1,22 @@
 import * as tf from '@tensorflow/tfjs';
-import { WeightsContainer } from '../index.js';
+import { Batched, Dataset, DataFormat, DataType, WeightsContainer } from "../index.js";
 import { BatchLogs } from './index.js';
 import { Model } from './index.js';
-import { Prediction, Sample } from './model.js';
 import { EpochLogs } from './logs.js';
+type Serialized<D extends DataType> = [D, tf.io.ModelArtifacts];
 /** TensorFlow JavaScript model with standard training */
-export declare class TFJS extends Model {
+export declare class TFJS<D extends "image" | "tabular"> extends Model<D> {
     #private;
+    readonly datatype: D;
     private readonly model;
     /** Wrap the given trainable model */
-    constructor(model: tf.LayersModel);
+    constructor(datatype: D, model: tf.LayersModel);
     get weights(): WeightsContainer;
     set weights(ws: WeightsContainer);
-    train(trainingData: tf.data.Dataset<tf.TensorContainer>, validationData?: tf.data.Dataset<tf.TensorContainer>): AsyncGenerator<BatchLogs, EpochLogs>;
-    predict(input: Sample): Promise<Prediction>;
-    static deserialize(raw: tf.io.ModelArtifacts): Promise<Model>;
-    serialize(): Promise<tf.io.ModelArtifacts>;
+    train(trainingDataset: Dataset<Batched<DataFormat.ModelEncoded[D]>>, validationDataset?: Dataset<Batched<DataFormat.ModelEncoded[D]>>): AsyncGenerator<BatchLogs, EpochLogs>;
+    predict(batch: Batched<DataFormat.ModelEncoded[D][0]>): Promise<Batched<DataFormat.ModelEncoded[D][1]>>;
+    static deserialize<D extends "image" | "tabular">([datatype, artifacts,]: Serialized<D>): Promise<TFJS<D>>;
+    serialize(): Promise<Serialized<D>>;
     [Symbol.dispose](): void;
     /**
      * extract wrapped model
@@ -24,3 +25,4 @@ export declare class TFJS extends Model {
      */
     extract(): tf.LayersModel;
 }
+export {};

package/dist/models/tfjs.js CHANGED Viewed

@@ -1,18 +1,22 @@
-import { List, Map } from 'immutable';
+import { List, Map, Range } from "immutable";
 import * as tf from '@tensorflow/tfjs';
-import { WeightsContainer } from '../index.js';
+import { WeightsContainer, } from "../index.js";
 import { Model } from './index.js';
 import { EpochLogs } from './logs.js';
 /** TensorFlow JavaScript model with standard training */
 export class TFJS extends Model {
+    datatype;
     model;
     /** Wrap the given trainable model */
-    constructor(model) {
+    constructor(datatype, model) {
         super();
+        this.datatype = datatype;
         this.model = model;
         if (model.loss === undefined) {
             throw new Error('TFJS models need to be compiled to be used');
         }
+        if (model.outputs.length !== 1)
+            throw new Error("only support single output model");
     }
     get weights() {
         return new WeightsContainer(this.model.weights.map((w) => w.read()));
@@ -20,57 +24,43 @@ export class TFJS extends Model {
     set weights(ws) {
         this.model.setWeights(ws.weights);
     }
-    async *train(trainingData, validationData) {
-        const batches = await trainingData.iterator(); // tf.LazyIterator isn't an AsyncGenerator
+    async *train(trainingDataset, validationDataset) {
         let batchesLogs = List();
-        for (let batchNumber = 0; true; batchNumber++) {
-            const iteration = await batches.next();
-            if (iteration.done)
-                break;
-            const batch = iteration.value;
+        for await (const [batch, batchNumber] of trainingDataset.zip(Range())) {
             const batchLogs = {
                 batch: batchNumber,
                 ...(await this.#runBatch(batch)),
             };
-            tf.dispose(batch);
             yield batchLogs;
             batchesLogs = batchesLogs.push(batchLogs);
         }
-        const validation = validationData && (await this.#evaluate(validationData));
+        const validation = validationDataset && (await this.#evaluate(validationDataset));
         return new EpochLogs(batchesLogs, validation);
     }
     async #runBatch(batch) {
-        let logs;
-        await this.model.fitDataset(tf.data.array([batch]), {
+        const { xs, ys } = this.#batchToTF(batch);
+        const { history } = await this.model.fit(xs, ys, {
             epochs: 1,
             verbose: 0, // don't pollute
-            callbacks: {
-                onEpochEnd: (_, cur) => {
-                    logs = cur;
-                },
-            },
         });
-        if (logs === undefined)
-            throw new Error("batch didn't gave any logs");
-        const { loss, acc: accuracy } = logs;
-        if (loss === undefined || isNaN(loss))
-            throw new Error("training loss is undefined or NaN");
+        const { loss: losses, acc: accuracies } = history;
+        if (losses === undefined ||
+            accuracies === undefined ||
+            typeof losses[0] !== "number" ||
+            typeof accuracies[0] !== "number" ||
+            isNaN(losses[0]) ||
+            isNaN(accuracies[0]))
+            throw new Error("training loss or accuracy is undefined or NaN");
         return {
-            accuracy,
-            loss,
+            accuracy: accuracies[0],
+            loss: losses[0],
             memoryUsage: tf.memory().numBytes / 1024 / 1024 / 1024,
         };
     }
     async #evaluate(dataset) {
-        const evaluation = await this.model.evaluateDataset(dataset.map((t) => {
-            switch (t) {
-                case null:
-                case undefined:
-                    throw new Error("nullish value in dataset");
-                default:
-                    return t;
-            }
-        }));
+        const evaluation = await this.model.evaluateDataset(tf.data.generator(async function* () {
+            yield* dataset.map((batch) => this.#batchToTF(batch));
+        }.bind(this)));
         const metricToValue = Map(List(this.model.metricsNames).zip(Array.isArray(evaluation)
             ? List(await Promise.all(evaluation.map((t) => t.data())))
             : List.of(await evaluation.data()))).map((values) => {
@@ -87,16 +77,39 @@ export class TFJS extends Model {
             throw new Error("some needed metrics are missing");
         return { accuracy, loss };
     }
-    predict(input) {
-        const ret = this.model.predict(input);
-        if (Array.isArray(ret)) {
-            throw new Error('prediction yield many Tensors but should have only returned one');
+    async predict(batch) {
+        async function cleanupPredicted(y) {
+            if (y.shape[0] === 1) {
+                // Binary classification
+                const threshold = tf.scalar(0.5);
+                const binaryTensor = y.greaterEqual(threshold);
+                const binaryArray = await binaryTensor.data();
+                tf.dispose([y, binaryTensor, threshold]);
+                return binaryArray[0];
+            }
+            // Multi-class classification
+            const indexTensor = y.argMax();
+            const indexArray = await indexTensor.data();
+            tf.dispose([y, indexTensor]);
+            return indexArray[0];
+            // Multi-label classification is not supported
         }
-        return Promise.resolve(ret);
+        const xs = this.#batchWithoutLabelToTF(batch);
+        const prediction = this.model.predict(xs);
+        if (Array.isArray(prediction))
+            throw new Error("prediction yield many Tensors but should have only returned one");
+        tf.dispose(xs);
+        if (prediction.rank !== 2)
+            throw new Error("unexpected batched prediction shape");
+        const ret = List(await Promise.all(tf.unstack(prediction).map((y) => cleanupPredicted(
+        // cast as unstack reduce by one the rank
+        y))));
+        prediction.dispose();
+        return ret;
     }
-    static async deserialize(raw) {
-        return new this(await tf.loadLayersModel({
-            load: () => Promise.resolve(raw)
+    static async deserialize([datatype, artifacts,]) {
+        return new this(datatype, await tf.loadLayersModel({
+            load: () => Promise.resolve(artifacts),
         }));
     }
     async serialize() {
@@ -115,7 +128,7 @@ export class TFJS extends Model {
         }, {
             includeOptimizer: true // keep model compiled
         });
-        return await ret;
+        return [this.datatype, await ret];
     }
     [Symbol.dispose]() {
         this.model.dispose();
@@ -128,4 +141,53 @@ export class TFJS extends Model {
     extract() {
         return this.model;
     }
+    #batchToTF(batch) {
+        const outputSize = tf.util.sizeFromShape(this.model.outputShape.map((dim) => {
+            if (Array.isArray(dim))
+                throw new Error("TODO support multiple outputs");
+            return dim ?? 1;
+        }));
+        switch (this.datatype) {
+            case "image": {
+                // cast as typescript doesn't reduce generic type
+                const b = batch;
+                return tf.tidy(() => ({
+                    xs: tf.stack(b
+                        .map(([image]) => tf.tensor3d(image.data, [image.width, image.height, 3], "float32"))
+                        .toArray()),
+                    ys: tf.stack(b
+                        .map(([_, label]) => tf.oneHot(label, outputSize, 1, 0, "int32"))
+                        .toArray()),
+                }));
+            }
+            case "tabular": {
+                // cast as typescript doesn't reduce generic type
+                const b = batch;
+                return tf.tidy(() => ({
+                    xs: tf.stack(b.map(([inputs, _]) => tf.tensor1d(inputs.toArray())).toArray()),
+                    ys: tf.stack(b.map(([_, output]) => tf.tensor1d([output])).toArray()),
+                }));
+            }
+        }
+        const _ = this.datatype;
+        throw new Error("should never happen");
+    }
+    #batchWithoutLabelToTF(batch) {
+        switch (this.datatype) {
+            case "image": {
+                // cast as typescript doesn't reduce generic type
+                const b = batch;
+                return tf.tidy(() => tf.stack(b
+                    .map((image) => tf.tensor3d(image.data, [image.width, image.height, 3], "float32"))
+                    .toArray()));
+            }
+            case "tabular": {
+                // cast as typescript doesn't reduce generic type
+                const b = batch;
+                return tf.tidy(() => tf.stack(b.map((inputs) => tf.tensor1d(inputs.toArray())).toArray()));
+            }
+        }
+        const _ = this.datatype;
+        throw new Error("should never happen");
+    }
 }

package/dist/models/tokenizer.d.ts CHANGED Viewed

@@ -11,4 +11,4 @@ import { PreTrainedTokenizer } from '@xenova/transformers';
  * @param task the task object specifying which tokenizer to use
  * @returns an initialized tokenizer object
  */
-export declare function getTaskTokenizer(task: Task): Promise<PreTrainedTokenizer>;
+export declare function getTaskTokenizer(task: Task<'text'>): Promise<PreTrainedTokenizer>;

package/dist/privacy.js CHANGED Viewed

@@ -5,7 +5,7 @@ async function frobeniusNorm(weights) {
         .reduce((a, b) => a.add(b))
         .data();
     if (squared.length !== 1)
-        throw new Error("unexcepted weights shape");
+        throw new Error("unexpected weights shape");
     return Math.sqrt(squared[0]);
 }
 /** Scramble weights */

package/dist/processing/image.d.ts ADDED Viewed

@@ -0,0 +1,18 @@
+import { Image } from "../index.js";
+/** Image where intensity is represented in the range 0..1 */
+export declare class NormalizedImage<D extends 1 | 3 | 4 = 1 | 3 | 4, W extends number = number, H extends number = number> {
+    readonly data: Readonly<Float32Array>;
+    readonly width: W;
+    readonly height: H;
+    readonly depth: D;
+    private constructor();
+    static from<D extends 1 | 3 | 4 = 1 | 3 | 4, W extends number = number, H extends number = number>(image: Image<D, W, H>): NormalizedImage<D, W, H>;
+}
+/** Remove the alpha channel of an image */
+export declare function removeAlpha<W extends number, H extends number>(image: Image<4, W, H>): Image<3, W, H>;
+export declare function removeAlpha<D extends 1 | 3, W extends number, H extends number>(image: Image<D | 4, W, H>): Image<D, W, H>;
+/** Convert monochrome images to multicolor */
+export declare function expandToMulticolor<W extends number, H extends number>(image: Image<1, W, H>): Image<3, W, H>;
+export declare function expandToMulticolor<D extends 3 | 4, W extends number, H extends number>(image: Image<1 | D, W, H>): Image<D, W, H>;
+export declare function resize<D extends 1 | 3 | 4, W extends number, H extends number>(width: W, height: H, image: Image<D, number, number>): Image<4, W, H>;
+export declare function normalize<D extends 1 | 3 | 4, W extends number, H extends number>(image: Image<D, W, H>): NormalizedImage<D, W, H>;