npm - @epfml/discojs - Versions diffs - 3.0.1-p20240821133014.0 → 3.0.1-p20240826092658.0 - Mend

@epfml/discojs 3.0.1-p20240821133014.0 → 3.0.1-p20240826092658.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (45) hide show

package/dist/dataset/data/data.d.ts +6 -7
package/dist/dataset/data/data.js +12 -7
package/dist/dataset/data/helpers.d.ts +10 -0
package/dist/dataset/data/helpers.js +97 -0
package/dist/dataset/data/image_data.d.ts +3 -3
package/dist/dataset/data/image_data.js +7 -2
package/dist/dataset/data/index.d.ts +0 -1
package/dist/dataset/data/preprocessing/text_preprocessing.js +23 -9
package/dist/dataset/data/tabular_data.d.ts +3 -3
package/dist/dataset/data/text_data.d.ts +3 -3
package/dist/dataset/dataset.d.ts +48 -5
package/dist/dataset/dataset.js +155 -1
package/dist/dataset/image.d.ts +14 -0
package/dist/dataset/image.js +21 -0
package/dist/dataset/index.d.ts +3 -5
package/dist/dataset/index.js +3 -3
package/dist/dataset/types.d.ts +4 -0
package/dist/dataset/types.js +2 -0
package/dist/index.d.ts +4 -0
package/dist/index.js +4 -0
package/dist/models/gpt/model.js +2 -0
package/dist/models/model.d.ts +1 -2
package/dist/models/tfjs.d.ts +4 -4
package/dist/models/tfjs.js +2 -1
package/dist/processing.d.ts +35 -0
package/dist/processing.js +89 -0
package/dist/training/disco.d.ts +7 -7
package/dist/training/disco.js +21 -19
package/dist/types.d.ts +3 -0
package/dist/types.js +1 -0
package/dist/validation/validator.d.ts +7 -23
package/dist/validation/validator.js +99 -105
package/package.json +1 -1
package/dist/dataset/data_loader/data_loader.d.ts +0 -13
package/dist/dataset/data_loader/data_loader.js +0 -2
package/dist/dataset/data_loader/image_loader.d.ts +0 -21
package/dist/dataset/data_loader/image_loader.js +0 -101
package/dist/dataset/data_loader/index.d.ts +0 -5
package/dist/dataset/data_loader/index.js +0 -4
package/dist/dataset/data_loader/tabular_loader.d.ts +0 -35
package/dist/dataset/data_loader/tabular_loader.js +0 -76
package/dist/dataset/data_loader/text_loader.d.ts +0 -14
package/dist/dataset/data_loader/text_loader.js +0 -25
package/dist/dataset/dataset_builder.d.ts +0 -51
package/dist/dataset/dataset_builder.js +0 -118

package/dist/models/tfjs.d.ts CHANGED Viewed

@@ -1,9 +1,9 @@
 import * as tf from '@tensorflow/tfjs';
 import { WeightsContainer } from '../index.js';
-import type { Dataset } from '../dataset/index.js';
-import { BatchLogs, EpochLogs } from './index.js';
+import { BatchLogs } from './index.js';
 import { Model } from './index.js';
-import type { Prediction, Sample } from './model.js';
+import { Prediction, Sample } from './model.js';
+import { EpochLogs } from './logs.js';
 /** TensorFlow JavaScript model with standard training */
 export declare class TFJS extends Model {
     #private;
@@ -12,7 +12,7 @@ export declare class TFJS extends Model {
     constructor(model: tf.LayersModel);
     get weights(): WeightsContainer;
     set weights(ws: WeightsContainer);
-    train(trainingData: Dataset, validationData?: Dataset): AsyncGenerator<BatchLogs, EpochLogs>;
+    train(trainingData: tf.data.Dataset<tf.TensorContainer>, validationData?: tf.data.Dataset<tf.TensorContainer>): AsyncGenerator<BatchLogs, EpochLogs>;
     predict(input: Sample): Promise<Prediction>;
     static deserialize(raw: tf.io.ModelArtifacts): Promise<Model>;
     serialize(): Promise<tf.io.ModelArtifacts>;

package/dist/models/tfjs.js CHANGED Viewed

@@ -1,8 +1,8 @@
 import { List, Map } from 'immutable';
 import * as tf from '@tensorflow/tfjs';
 import { WeightsContainer } from '../index.js';
-import { EpochLogs } from './index.js';
 import { Model } from './index.js';
+import { EpochLogs } from './logs.js';
 /** TensorFlow JavaScript model with standard training */
 export class TFJS extends Model {
     model;
@@ -78,6 +78,7 @@ export class TFJS extends Model {
                 throw new Error("more than one metric value");
             return values[0];
         });
+        tf.dispose(evaluation);
         const [accuracy, loss] = [
             metricToValue.get("acc"),
             metricToValue.get("loss"),

package/dist/processing.d.ts ADDED Viewed

@@ -0,0 +1,35 @@
+/** Dataset shapers, convenient to map with */
+import { PreTrainedTokenizer } from "@xenova/transformers";
+import { List } from "immutable";
+import { Image } from "./dataset/image.js";
+/**
+ * Convert a string to a number
+ *
+ * @throws if it isn't written as a number
+ */
+export declare function convertToNumber(raw: string): number;
+/**
+ * Return the named field of an object with string values
+ *
+ * @throws if the named field isn't there
+ */
+export declare function extractColumn(row: Partial<Record<string, string>>, column: string): string;
+/**
+ * Return the index of the element in the given list
+ *
+ * @throws if not found
+ */
+export declare function indexInList(element: string, elements: List<string>): number;
+/**
+ * Tokenize and truncates input strings
+ *
+ * @param length number of tokens
+ * @returns encoded string in an array of token, size of max_length
+ */
+export declare function tokenizeAndLeftPad(line: string, tokenizer: PreTrainedTokenizer, length: number): number[];
+/** Remove the alpha channel of an image */
+export declare function removeAlpha<W extends number, H extends number>(image: Image<4, W, H>): Image<3, W, H>;
+export declare function removeAlpha<D extends 1 | 3, W extends number, H extends number>(image: Image<D | 4, W, H>): Image<D, W, H>;
+/** Convert monochrome images to multicolor */
+export declare function expandToMulticolor<W extends number, H extends number>(image: Image<1, W, H>): Image<3, W, H>;
+export declare function expandToMulticolor<D extends 3 | 4, W extends number, H extends number>(image: Image<1 | D, W, H>): Image<D, W, H>;

package/dist/processing.js ADDED Viewed

@@ -0,0 +1,89 @@
+/** Dataset shapers, convenient to map with */
+import { Repeat, Seq } from "immutable";
+import { Image } from "./dataset/image.js";
+/**
+ * Convert a string to a number
+ *
+ * @throws if it isn't written as a number
+ */
+export function convertToNumber(raw) {
+    const num = Number.parseFloat(raw);
+    if (Number.isNaN(num))
+        throw new Error(`unable to parse "${raw}" as number`);
+    return num;
+}
+/**
+ * Return the named field of an object with string values
+ *
+ * @throws if the named field isn't there
+ */
+export function extractColumn(row, column) {
+    const raw = row[column];
+    if (raw === undefined)
+        throw new Error(`${column} not found in row`);
+    return raw;
+}
+/**
+ * Return the index of the element in the given list
+ *
+ * @throws if not found
+ */
+export function indexInList(element, elements) {
+    const ret = elements.indexOf(element);
+    if (ret === -1)
+        throw new Error(`${element} not found in list`);
+    return ret;
+}
+function isArrayOfNumber(raw) {
+    return Array.isArray(raw) && raw.every((e) => typeof e === "number");
+}
+/**
+ * Tokenize and truncates input strings
+ *
+ * @param length number of tokens
+ * @returns encoded string in an array of token, size of max_length
+ */
+export function tokenizeAndLeftPad(line, tokenizer, length) {
+    if (!Number.isInteger(length))
+        throw new Error("length should be an integer");
+    // Transformers.js currently only supports right padding while we need left for text generation
+    // Right padding should be supported in the future, once it is, we can directly pad while tokenizing
+    // https://github.com/xenova/transformers.js/blob/8804c36591d11d8456788d1bb4b16489121b3be2/src/tokenizers.js#L2517
+    const tokenized = tokenizer(line, {
+        padding: false,
+        truncation: true,
+        return_tensor: false,
+        max_length: length,
+    });
+    if (typeof tokenized !== "object" ||
+        tokenized === null ||
+        !("input_ids" in tokenized) ||
+        !isArrayOfNumber(tokenized.input_ids))
+        throw new Error("tokenizer returns unexcepted type");
+    const tokens = tokenized.input_ids;
+    const paddingSize = length - tokens.length;
+    if (paddingSize < 0)
+        throw new Error("tokenized returned more token than excepted");
+    const padding = new Array(paddingSize);
+    padding.fill(tokenizer.pad_token_id);
+    const padded = padding.concat(tokens);
+    return padded;
+}
+export function removeAlpha(image) {
+    switch (image.depth) {
+        case 1:
+        case 3:
+            return new Image(image.data, image.width, image.height, image.depth);
+        case 4:
+            return new Image(image.data.filter((_, i) => i % 4 !== 3), image.width, image.height, 3);
+    }
+}
+export function expandToMulticolor(image) {
+    switch (image.depth) {
+        case 1:
+            return new Image(Uint8Array.from(Seq(image.data).flatMap((v) => Repeat(v, 3))), image.width, image.height, 3);
+        case 3:
+        case 4:
+            return new Image(image.data, image.width, image.height, image.depth);
+    }
+}

package/dist/training/disco.d.ts CHANGED Viewed

@@ -1,5 +1,5 @@
-import { data, BatchLogs, EpochLogs, Logger, Memory, Task, TrainingInformation } from "../index.js";
-import { client as clients } from "../index.js";
+import { client as clients, BatchLogs, EpochLogs, Logger, Memory, Task, TrainingInformation } from "../index.js";
+import type { TypedLabeledDataset } from "../index.js";
 import type { Aggregator } from "../aggregator/index.js";
 import { RoundLogs, Trainer } from "./trainer.js";
 interface Config {
@@ -28,20 +28,20 @@ export declare class Disco {
         url: URL;
     }, config: Partial<Config>): Promise<Disco>;
     /** Train on dataset, yielding logs of every round. */
-    trainByRound(dataTuple: data.DataSplit): AsyncGenerator<RoundLogs>;
+    trainByRound(dataset: TypedLabeledDataset): AsyncGenerator<RoundLogs>;
     /** Train on dataset, yielding logs of every epoch. */
-    trainByEpoch(dataTuple: data.DataSplit): AsyncGenerator<EpochLogs>;
+    trainByEpoch(dataset: TypedLabeledDataset): AsyncGenerator<EpochLogs>;
     /** Train on dataset, yielding logs of every batch. */
-    trainByBatch(dataTuple: data.DataSplit): AsyncGenerator<BatchLogs>;
+    trainByBatch(dataTuple: TypedLabeledDataset): AsyncGenerator<BatchLogs>;
     /** Run whole train on dataset. */
-    trainFully(dataTuple: data.DataSplit): Promise<void>;
+    trainFully(dataTuple: TypedLabeledDataset): Promise<void>;
     /**
      * Train on dataset, yield the nested steps.
      *
      * Don't forget to await the yielded generator otherwise nothing will progress.
      * If you don't care about the whole process, use one of the other train methods.
      **/
-    train(dataTuple: data.DataSplit): AsyncGenerator<AsyncGenerator<AsyncGenerator<BatchLogs, EpochLogs>, RoundLogs>>;
+    train(dataset: TypedLabeledDataset): AsyncGenerator<AsyncGenerator<AsyncGenerator<BatchLogs, EpochLogs>, RoundLogs>>;
     /**
      * Stops the ongoing training instance without disconnecting the client.
      */

package/dist/training/disco.js CHANGED Viewed

@@ -1,8 +1,8 @@
-import { async_iterator, } from "../index.js";
-import { client as clients, ConsoleLogger, EmptyMemory } from "../index.js";
+import { async_iterator, client as clients, ConsoleLogger, EmptyMemory, } from "../index.js";
 import { getAggregator } from "../aggregator/index.js";
 import { enumerate, split } from "../utils/async_iterator.js";
 import { Trainer } from "./trainer.js";
+import { labeledDatasetToDataSplit } from "../dataset/data/helpers.js";
 /**
  * Top-level class handling distributed training from a client's perspective. It is meant to be
  * a convenient object providing a reduced yet complete API that wraps model training,
@@ -12,18 +12,14 @@ export class Disco {
     trainer;
     #client;
     #logger;
-    // small helper to avoid keeping Task & Memory around
-    #updateWorkingModel;
+    #memory;
+    #task;
     constructor(trainer, task, client, memory, logger) {
         this.trainer = trainer;
         this.#client = client;
         this.#logger = logger;
-        this.#updateWorkingModel = () => memory.updateWorkingModel({
-            type: "working",
-            taskID: task.id,
-            name: task.trainingInformation.modelID,
-            tensorBackend: task.trainingInformation.tensorBackend,
-        }, this.trainer.model);
+        this.#memory = memory;
+        this.#task = task;
     }
     /**
      * Connect to the given task and get ready to train.
@@ -70,8 +66,8 @@ export class Disco {
         return new Disco(new Trainer(task, model, client), task, client, memory, logger);
     }
     /** Train on dataset, yielding logs of every round. */
-    async *trainByRound(dataTuple) {
-        for await (const round of this.train(dataTuple)) {
+    async *trainByRound(dataset) {
+        for await (const round of this.train(dataset)) {
             const [roundGen, roundLogs] = async_iterator.split(round);
             for await (const epoch of roundGen)
                 for await (const _ of epoch)
@@ -80,8 +76,8 @@ export class Disco {
         }
     }
     /** Train on dataset, yielding logs of every epoch. */
-    async *trainByEpoch(dataTuple) {
-        for await (const round of this.train(dataTuple)) {
+    async *trainByEpoch(dataset) {
+        for await (const round of this.train(dataset)) {
             for await (const epoch of round) {
                 const [epochGen, epochLogs] = async_iterator.split(epoch);
                 for await (const _ of epochGen)
@@ -109,12 +105,13 @@ export class Disco {
      * Don't forget to await the yielded generator otherwise nothing will progress.
      * If you don't care about the whole process, use one of the other train methods.
      **/
-    async *train(dataTuple) {
+    async *train(dataset) {
         this.#logger.success("Training started.");
-        const trainData = dataTuple.train.preprocess().batch();
-        const validationData = dataTuple.validation?.preprocess().batch() ?? trainData;
+        const data = await labeledDatasetToDataSplit(this.#task, dataset);
+        const trainData = data.train.preprocess().batch().dataset;
+        const validationData = data.validation?.preprocess().batch().dataset ?? trainData;
         await this.#client.connect();
-        for await (const [round, epochs] of enumerate(this.trainer.train(trainData.dataset, validationData.dataset))) {
+        for await (const [round, epochs] of enumerate(this.trainer.train(trainData, validationData))) {
             yield async function* () {
                 const [gen, returnedRoundLogs] = split(epochs);
                 for await (const [epoch, batches] of enumerate(gen)) {
@@ -136,7 +133,12 @@ export class Disco {
                 }
                 return await returnedRoundLogs;
             }.bind(this)();
-            await this.#updateWorkingModel(this.trainer.model);
+            await this.#memory.updateWorkingModel({
+                type: "working",
+                taskID: this.#task.id,
+                name: this.#task.trainingInformation.modelID,
+                tensorBackend: this.#task.trainingInformation.tensorBackend,
+            }, this.trainer.model);
         }
         this.#logger.success("Training finished.");
     }

package/dist/types.d.ts ADDED Viewed

@@ -0,0 +1,3 @@
+import { Dataset, Image, Tabular, Text } from "./dataset/index.js";
+export type TypedDataset = ["image", Dataset<Image>] | ["tabular", Dataset<Tabular>] | ["text", Dataset<Text>];
+export type TypedLabeledDataset = ["image", Dataset<[Image, label: string]>] | ["tabular", Dataset<Tabular>] | ["text", Dataset<Text>];

package/dist/types.js ADDED Viewed

	@@ -0,0 +1 @@
1	+ export {};

package/dist/validation/validator.d.ts CHANGED Viewed

@@ -1,26 +1,10 @@
-import type { data, Model, Task, Logger, client as clients, Memory, ModelSource } from '../index.js';
+import type { Model, Task, TypedDataset, TypedLabeledDataset } from "../index.js";
 export declare class Validator {
+    #private;
     readonly task: Task;
-    readonly logger: Logger;
-    private readonly memory;
-    private readonly source?;
-    private readonly client?;
-    private size;
-    private _confusionMatrix;
-    private rollingAccuracy;
-    constructor(task: Task, logger: Logger, memory: Memory, source?: ModelSource | undefined, client?: clients.Client | undefined);
-    private getLabel;
-    test(data: data.Data): AsyncGenerator<Array<{
-        groundTruth: number;
-        pred: number;
-        features: number[];
-    }>, void>;
-    inference(data: data.Data): AsyncGenerator<Array<{
-        features: number[];
-        pred: number;
-    }>, void>;
-    getModel(): Promise<Model>;
-    get accuracy(): number;
-    get visitedSamples(): number;
-    get confusionMatrix(): number[][] | undefined;
+    constructor(task: Task, model: Model);
+    /** infer every line of the dataset and check that it is as labeled */
+    test(dataset: TypedLabeledDataset): AsyncGenerator<boolean>;
+    /** use the model to predict every line of the dataset */
+    infer(dataset: TypedDataset): AsyncGenerator<number, void>;
 }

package/dist/validation/validator.js CHANGED Viewed

@@ -1,119 +1,113 @@
-import { List } from 'immutable';
-import * as tf from '@tensorflow/tfjs';
+import * as tf from "@tensorflow/tfjs";
+import { datasetToData, labeledDatasetToData, } from "../dataset/data/helpers.js";
+function intoTFDataset(iter) {
+    // @ts-expect-error generator
+    return tf.data.generator(async function* () {
+        yield* iter;
+    });
+}
 export class Validator {
     task;
-    logger;
-    memory;
-    source;
-    client;
-    size = 0;
-    _confusionMatrix;
-    rollingAccuracy = 0;
-    constructor(task, logger, memory, source, client) {
+    #model;
+    constructor(task, model) {
         this.task = task;
-        this.logger = logger;
-        this.memory = memory;
-        this.source = source;
-        this.client = client;
-        if (source === undefined && client === undefined) {
-            throw new Error('To initialize a Validator, either or both a source and client need to be specified');
-        }
+        this.#model = model;
     }
-    async getLabel(ys) {
-        // Binary classification
-        if (ys.shape[1] == 1) {
-            const threshold = tf.scalar(0.5);
-            const binaryTensor = ys.greaterEqual(threshold);
-            const binaryArray = await binaryTensor.data();
-            tf.dispose([binaryTensor, threshold]);
-            return binaryArray;
-            // Multi-class classification
-        }
-        else {
-            const yIdxTensor = ys.argMax(-1);
-            const yIdx = await yIdxTensor.data();
-            tf.dispose([yIdxTensor]);
-            return yIdx;
+    /** infer every line of the dataset and check that it is as labeled */
+    async *test(dataset) {
+        const preprocessed = (await labeledDatasetToData(this.task, dataset)).preprocess();
+        const batched = preprocessed.batch().dataset;
+        const iterator = await tf.data
+            .zip([
+            preprocessed.dataset.map((t) => {
+                if (typeof t !== "object" ||
+                    !("ys" in t) ||
+                    !(t.ys instanceof tf.Tensor) ||
+                    !(t.ys.rank === 1 || t.ys.rank === 2))
+                    throw new Error("unexpected preprocessed dataset");
+                if ("xs" in t)
+                    tf.dispose(t.xs);
+                return t.ys;
+            }),
+            intoTFDataset(this.#inferOnBatchedData(batched)),
+        ])
+            .iterator();
+        for (let iter = await iterator.next(); iter.done !== true; iter = await iterator.next()) {
+            const zipped = iter.value;
+            const label = await getLabel(zipped[0]);
+            tf.dispose(zipped[0]);
+            const infered = zipped[1];
+            yield label === infered;
         }
-        // Multi-label classification is not supported
     }
-    // test assumes data comes with labels while predict doesn't
-    async *test(data) {
-        const batchSize = this.task.trainingInformation?.batchSize;
-        if (batchSize === undefined) {
-            throw new TypeError('Batch size is undefined');
-        }
-        const model = await this.getModel();
-        let hits = 0;
-        const iterator = await data.preprocess().dataset.batch(batchSize).iterator();
-        let next = await iterator.next();
-        while (next.done !== true) {
-            const { xs, ys } = next.value;
-            const ysLabel = await this.getLabel(ys);
-            const yPredTensor = await model.predict(xs);
-            const pred = await this.getLabel(yPredTensor);
-            const currentFeatures = await xs.array();
-            this.size += ysLabel.length;
-            hits += List(pred).zip(List(ysLabel)).filter(([p, y]) => p === y).size;
-            this.rollingAccuracy = hits / this.size;
-            tf.dispose([xs, ys, yPredTensor]);
-            yield (List(ysLabel).zip(List(pred), List(currentFeatures)))
-                .map(([gt, p, f]) => ({ groundTruth: gt, pred: p, features: f }))
-                .toArray();
-            next = await iterator.next();
-        }
-        this.logger.success(`Obtained validation accuracy of ${this.accuracy}`);
-        this.logger.success(`Visited ${this.visitedSamples} samples`);
+    /** use the model to predict every line of the dataset */
+    async *infer(dataset) {
+        const data = await datasetToData(this.task, dataset);
+        const batched = data.preprocess().batch().dataset;
+        yield* this.#inferOnBatchedData(batched);
     }
-    async *inference(data) {
-        const batchSize = this.task.trainingInformation?.batchSize;
-        if (batchSize === undefined) {
-            throw new TypeError('Batch size is undefined');
-        }
-        const model = await this.getModel();
-        const iterator = await data.preprocess().dataset.batch(batchSize).iterator();
-        let next = await iterator.next();
-        while (next.done !== true) {
-            let xs;
-            if (next.value instanceof tf.Tensor) {
-                xs = next.value;
-            }
-            else {
-                const tensors = next.value;
-                xs = tensors['xs'];
-                tf.dispose([tensors['ys']]);
+    async *#inferOnBatchedData(batched) {
+        const iterator = await batched.iterator();
+        for (let iter = await iterator.next(); iter.done !== true; iter = await iterator.next()) {
+            const row = iter.value;
+            if (typeof row !== "object" ||
+                !("xs" in row) ||
+                !(row.xs instanceof tf.Tensor))
+                throw new Error("unexpected shape of dataset");
+            const prediction = await this.#model.predict(row.xs);
+            tf.dispose(row);
+            let predictions;
+            switch (prediction.rank) {
+                case 2:
+                case 3:
+                    predictions = await getLabels(
+                    // cast as rank was just checked
+                    prediction);
+                    prediction.dispose();
+                    break;
+                default:
+                    throw new Error("unexpected batched prediction shape");
             }
-            const currentFeatures = await xs.array();
-            const yPredTensor = await model.predict(xs);
-            const pred = await this.getLabel(yPredTensor);
-            this.size += pred.length;
-            if (!Array.isArray(currentFeatures)) {
-                throw new TypeError('Data format is incorrect');
-            }
-            tf.dispose([xs, yPredTensor]);
-            yield List(currentFeatures).zip(List(pred))
-                .map(([f, p]) => ({ features: f, pred: p }))
-                .toArray();
-            next = await iterator.next();
+            prediction.dispose();
+            for (const prediction of predictions)
+                yield prediction;
         }
-        this.logger.success(`Visited ${this.visitedSamples} samples`);
     }
-    async getModel() {
-        if (this.source !== undefined && await this.memory.contains(this.source)) {
-            return await this.memory.getModel(this.source);
+}
+async function getLabels(ys) {
+    // cast as unstack drop a dimension and tfjs doesn't type correctly
+    return Promise.all(tf.unstack(ys).map((y) => {
+        const ret = getLabel(y);
+        y.dispose();
+        return ret;
+    }));
+}
+async function getLabel(ys) {
+    switch (ys.rank) {
+        case 1: {
+            if (ys.shape[0] == 1) {
+                // Binary classification
+                const threshold = tf.scalar(0.5);
+                const binaryTensor = ys.greaterEqual(threshold);
+                const binaryArray = await binaryTensor.data();
+                tf.dispose([binaryTensor, threshold]);
+                return binaryArray[0];
+            }
+            // Multi-class classification
+            const indexTensor = ys.argMax();
+            const indexArray = await indexTensor.data();
+            tf.dispose([indexTensor]);
+            return indexArray[0];
+            // Multi-label classification is not supported
         }
-        if (this.client !== undefined) {
-            return await this.client.getLatestModel();
+        case 2: {
+            // it's LLM, we only extract the next token
+            const firstToken = tf.tidy(() => ys.gather([0]).squeeze().argMax());
+            const raw = await firstToken.data();
+            firstToken.dispose();
+            return raw[0];
         }
-        throw new Error('Could not load the model');
-    }
-    get accuracy() {
-        return this.rollingAccuracy;
-    }
-    get visitedSamples() {
-        return this.size;
-    }
-    get confusionMatrix() {
-        return this._confusionMatrix;
+        default:
+            throw new Error("unexpected tensor rank");
     }
 }

package/package.json CHANGED Viewed

@@ -1,6 +1,6 @@
 {
   "name": "@epfml/discojs",
-  "version": "3.0.1-p20240821133014.0",
+  "version": "3.0.1-p20240826092658.0",
   "type": "module",
   "main": "dist/index.js",
   "types": "dist/index.d.ts",

package/dist/dataset/data_loader/data_loader.d.ts DELETED Viewed

@@ -1,13 +0,0 @@
-import type { DataSplit, Dataset } from '../index.js';
-export interface DataConfig {
-    features?: string[];
-    labels?: string[];
-    shuffle?: boolean;
-    validationSplit?: number;
-    inference?: boolean;
-    channels?: number;
-}
-export declare abstract class DataLoader<Source> {
-    abstract load(source: Source, config: DataConfig): Promise<Dataset>;
-    abstract loadAll(sources: Source[], config: DataConfig): Promise<DataSplit>;
-}

package/dist/dataset/data_loader/data_loader.js DELETED Viewed

	@@ -1,2 +0,0 @@
1	- export class DataLoader {
2	- }

package/dist/dataset/data_loader/image_loader.d.ts DELETED Viewed

@@ -1,21 +0,0 @@
-import * as tf from '@tensorflow/tfjs';
-import type { Task } from '../../index.js';
-import type { Dataset, DataSplit } from '../index.js';
-import type { DataConfig } from '../data_loader/index.js';
-import { DataLoader } from '../data_loader/index.js';
-/**
- * Image data loader whose instantiable implementation is delegated by the platform-dependent Disco subprojects, namely,
- * @epfml/discojs-web and @epfml/discojs-node.
- * Load labels and correctly match them with their respective images, with the following constraints:
- * 1. Images are given as 1 image/1 file;
- * 2. Labels are given as multiple labels/1 file, each label file can contain a different amount of labels.
- */
-export declare abstract class ImageLoader<Source> extends DataLoader<Source> {
-    private readonly task;
-    abstract readImageFrom(source: Source, channels?: number): Promise<tf.Tensor3D>;
-    constructor(task: Task);
-    load(image: Source, config?: DataConfig): Promise<Dataset>;
-    private buildDataset;
-    loadAll(images: Source[], config?: DataConfig): Promise<DataSplit>;
-    shuffle(array: number[]): void;
-}