npm - @epfml/discojs - Versions diffs - 2.1.2-p20240531085945.0 → 2.1.2-p20240617070831.0 - Mend

@epfml/discojs 2.1.2-p20240531085945.0 → 2.1.2-p20240617070831.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (38) hide show

package/dist/dataset/data/preprocessing/text_preprocessing.js +2 -2
package/dist/dataset/data_loader/image_loader.js +1 -1
package/dist/default_tasks/cifar10.js +2 -1
package/dist/default_tasks/lus_covid.js +2 -1
package/dist/default_tasks/mnist.js +2 -1
package/dist/default_tasks/simple_face.js +2 -1
package/dist/default_tasks/skin_condition.js +2 -1
package/dist/default_tasks/titanic.js +2 -1
package/dist/default_tasks/wikitext.js +8 -9
package/dist/index.d.ts +1 -2
package/dist/index.js +1 -2
package/dist/memory/base.d.ts +15 -19
package/dist/memory/empty.d.ts +2 -2
package/dist/memory/empty.js +2 -2
package/dist/memory/index.d.ts +0 -1
package/dist/memory/index.js +0 -1
package/dist/models/gpt/config.d.ts +3 -3
package/dist/models/gpt/index.d.ts +7 -5
package/dist/models/gpt/index.js +12 -9
package/dist/models/gpt/layers.d.ts +1 -3
package/dist/models/gpt/layers.js +9 -16
package/dist/models/gpt/model.d.ts +1 -6
package/dist/models/gpt/model.js +17 -32
package/dist/models/tokenizer.js +5 -1
package/dist/task/training_information.d.ts +1 -0
package/dist/task/training_information.js +8 -2
package/dist/training/trainer/distributed_trainer.js +6 -1
package/dist/training/trainer/local_trainer.js +6 -1
package/dist/training/trainer/trainer_builder.js +6 -2
package/dist/validation/validator.d.ts +5 -7
package/dist/validation/validator.js +53 -67
package/package.json +1 -1
package/dist/informant/graph_informant.d.ts +0 -10
package/dist/informant/graph_informant.js +0 -20
package/dist/informant/index.d.ts +0 -1
package/dist/informant/index.js +0 -1
package/dist/memory/model_type.d.ts +0 -9
package/dist/memory/model_type.js +0 -10

package/dist/dataset/data/preprocessing/text_preprocessing.js CHANGED Viewed

@@ -24,7 +24,7 @@ export var TextPreprocessing;
 const leftPadding = {
     type: TextPreprocessing.LeftPadding,
     apply: async (x, task) => {
-        if (x === undefined || !Array.isArray(x) || x.length == 0 || typeof (x[0] != 'number')) {
+        if (x === undefined || !Array.isArray(x) || x.length == 0 || typeof (x[0] !== 'number')) {
             new Error("The leftPadding preprocessing expects a non empty 1D array of number");
         }
         const { tokens } = await x;
@@ -58,7 +58,7 @@ const leftPadding = {
 const tokenize = {
     type: TextPreprocessing.Tokenize,
     apply: async (x, task) => {
-        if (typeof x != 'string') {
+        if (typeof x !== 'string') {
             new Error("The tokenize preprocessing expects a string as input");
         }
         const xs = await x; // tf.TextLineDataset yields strings

package/dist/dataset/data_loader/image_loader.js CHANGED Viewed

@@ -58,7 +58,7 @@ export class ImageLoader extends DataLoader {
             const numberOfClasses = labelList.length;
             // Map label strings to integer
             const label_to_int = new Map(labelList.map((label_name, idx) => [label_name, idx]));
-            if (label_to_int.size != numberOfClasses) {
+            if (label_to_int.size !== numberOfClasses) {
                 throw new Error("Input labels aren't matching the task LABEL_LIST");
             }
             labels = config.labels.map(label_name => {

package/dist/default_tasks/cifar10.js CHANGED Viewed

@@ -32,7 +32,8 @@ export const cifar10 = {
                 clippingRadius: 20,
                 decentralizedSecure: true,
                 minimumReadyPeers: 3,
-                maxShareValue: 100
+                maxShareValue: 100,
+                tensorBackend: 'tfjs'
             }
         };
     },

package/dist/default_tasks/lus_covid.js CHANGED Viewed

@@ -32,7 +32,8 @@ export const lusCovid = {
                 clippingRadius: 20,
                 decentralizedSecure: true,
                 minimumReadyPeers: 2,
-                maxShareValue: 100
+                maxShareValue: 100,
+                tensorBackend: 'tfjs'
             }
         };
     },

package/dist/default_tasks/mnist.js CHANGED Viewed

@@ -31,7 +31,8 @@ export const mnist = {
                 clippingRadius: 20,
                 decentralizedSecure: true,
                 minimumReadyPeers: 3,
-                maxShareValue: 100
+                maxShareValue: 100,
+                tensorBackend: 'tfjs'
             }
         };
     },

package/dist/default_tasks/simple_face.js CHANGED Viewed

@@ -28,7 +28,8 @@ export const simpleFace = {
                 LABEL_LIST: ['child', 'adult'],
                 scheme: 'federated', // secure aggregation not yet implemented for federated
                 noiseScale: undefined,
-                clippingRadius: undefined
+                clippingRadius: undefined,
+                tensorBackend: 'tfjs'
             }
         };
     },

package/dist/default_tasks/skin_condition.js CHANGED Viewed

@@ -28,7 +28,8 @@ export const skinCondition = {
                 LABEL_LIST: LABELS,
                 scheme: 'federated',
                 noiseScale: undefined,
-                clippingRadius: undefined
+                clippingRadius: undefined,
+                tensorBackend: 'tfjs'
             }
         };
     },

package/dist/default_tasks/titanic.js CHANGED Viewed

@@ -62,7 +62,8 @@ export const titanic = {
                 ],
                 scheme: 'federated', // secure aggregation not yet implemented for FeAI
                 noiseScale: undefined,
-                clippingRadius: undefined
+                clippingRadius: undefined,
+                tensorBackend: 'tfjs'
             }
         };
     },

package/dist/default_tasks/wikitext.js CHANGED Viewed

@@ -17,17 +17,16 @@ export const wikitext = {
                 dataType: 'text',
                 modelID: 'wikitext-103-raw-model',
                 preprocessingFunctions: [data.TextPreprocessing.Tokenize, data.TextPreprocessing.LeftPadding],
-                validationSplit: 0.2, // TODO: is this used somewhere? because train, eval and test are already split in dataset
-                epochs: 5,
                 scheme: 'federated',
-                noiseScale: undefined,
-                decentralizedSecure: true,
-                minimumReadyPeers: 3,
-                maxShareValue: 100,
-                roundDuration: 10,
-                batchSize: 16,
+                epochs: 5,
+                // Unused by wikitext because data already comes split
+                // But if set to 0 then the webapp doesn't display the validation metrics
+                validationSplit: 0.1,
+                roundDuration: 2,
+                batchSize: 1, // If set too high (e.g. 16) then firefox raises a WebGL error
                 tokenizer: 'Xenova/gpt2',
-                maxSequenceLength: 128
+                maxSequenceLength: 128,
+                tensorBackend: 'gpt'
             }
         };
     },

package/dist/index.d.ts CHANGED Viewed

@@ -2,13 +2,12 @@ export * as data from './dataset/index.js';
 export * as serialization from './serialization/index.js';
 export * as training from './training/index.js';
 export * as privacy from './privacy.js';
-export { GraphInformant } from './informant/index.js';
 export * as client from './client/index.js';
 export * as aggregator from './aggregator/index.js';
 export { WeightsContainer, aggregation } from './weights/index.js';
 export { AsyncInformant } from './async_informant.js';
 export { Logger, ConsoleLogger } from './logging/index.js';
-export { Memory, ModelType, type ModelInfo, type Path, type ModelSource, Empty as EmptyMemory } from './memory/index.js';
+export { Memory, type ModelInfo, type Path, type ModelSource, Empty as EmptyMemory } from './memory/index.js';
 export { Disco, RoundLogs } from './training/index.js';
 export { Validator } from './validation/index.js';
 export { Model, EpochLogs } from './models/index.js';

package/dist/index.js CHANGED Viewed

@@ -2,13 +2,12 @@ export * as data from './dataset/index.js';
 export * as serialization from './serialization/index.js';
 export * as training from './training/index.js';
 export * as privacy from './privacy.js';
-export { GraphInformant } from './informant/index.js';
 export * as client from './client/index.js';
 export * as aggregator from './aggregator/index.js';
 export { WeightsContainer, aggregation } from './weights/index.js';
 export { AsyncInformant } from './async_informant.js';
 export { ConsoleLogger } from './logging/index.js';
-export { Memory, ModelType, Empty as EmptyMemory } from './memory/index.js';
+export { Memory, Empty as EmptyMemory } from './memory/index.js';
 export { Disco } from './training/index.js';
 export { Validator } from './validation/index.js';
 export { Model } from './models/index.js';

package/dist/memory/base.d.ts CHANGED Viewed

@@ -1,33 +1,28 @@
 import type { Model, TaskID } from '../index.js';
-import type { ModelType } from './model_type.js';
 /**
  * Model path which uniquely identifies a model in memory.
  */
 export type Path = string;
+/**
+ * Type of models stored in memory. Stored models can either be a model currently
+ * being trained ("working model") or a regular model saved in memory ("saved model").
+ * There can only be a single working model for a given task.
+ */
+type StoredModelType = 'saved' | 'working';
 /**
  * Model information which uniquely identifies a model in memory.
  */
 export interface ModelInfo {
-    /**
-     * The model's type: "working" or "saved" model.
-     */
-    type?: ModelType;
-    /**
-     * The model's version, to allow for multiple saved models of a same task without
-     * causing id conflicts
-     */
+    type: StoredModelType;
     version?: number;
-    /**
-     * The model's corresponding task
-     */
     taskID: TaskID;
-    /**
-     * The model's name
-     */
     name: string;
+    tensorBackend: 'gpt' | 'tfjs';
 }
 /**
  * A model source uniquely identifies a model stored in memory.
+ * It can be in the form of either a model info object or a Path string
+ * (one-to-one mapping between the two)
  */
 export type ModelSource = ModelInfo | Path;
 /**
@@ -95,21 +90,21 @@ export declare abstract class Memory {
     /**
      * Computes the path in memory corresponding to the given model source, be it a path or model information.
      * This is used to easily switch between model path and information, which are both unique model identifiers
-     * with a one-to-one correspondance. Returns undefined instead if no path could be inferred from the given
+     * with a one-to-one equivalence. Returns undefined instead if no path could be inferred from the given
      * model source.
      * @param source The model source
      * @returns The model path
      */
-    abstract pathFor(source: ModelSource): Path | undefined;
+    abstract getModelMemoryPath(source: ModelSource): Path | undefined;
     /**
      * Computes the model information corresponding to the given model source, be it a path or model information.
      * This is used to easily switch between model path and information, which are both unique model identifiers
-     * with a one-to-one correspondance. Returns undefined instead if no unique model information could be inferred
+     * with a one-to-one equivalence. Returns undefined instead if no unique model information could be inferred
      * from the given model source.
      * @param source The model source
      * @returns The model information
      */
-    abstract infoFor(source: ModelSource): ModelInfo | undefined;
+    abstract getModelInfo(source: ModelSource): ModelInfo | undefined;
     /**
      * Computes the lowest version a model source can have without conflicting with model versions currently in memory.
      * @param source The model source
@@ -117,3 +112,4 @@ export declare abstract class Memory {
      */
     abstract duplicateSource(source: ModelSource): Promise<ModelSource | undefined>;
 }
+export {};

package/dist/memory/empty.d.ts CHANGED Viewed

@@ -14,7 +14,7 @@ export declare class Empty extends Memory {
     saveModel(): Promise<undefined>;
     deleteModel(): Promise<void>;
     downloadModel(): Promise<void>;
-    pathFor(): Path;
-    infoFor(): ModelInfo;
+    getModelMemoryPath(): Path;
+    getModelInfo(): ModelInfo;
     duplicateSource(): Promise<undefined>;
 }

package/dist/memory/empty.js CHANGED Viewed

@@ -31,10 +31,10 @@ export class Empty extends Memory {
     downloadModel() {
         return Promise.reject(new Error('empty'));
     }
-    pathFor() {
+    getModelMemoryPath() {
         throw new Error('empty');
     }
-    infoFor() {
+    getModelInfo() {
         throw new Error('empty');
     }
     duplicateSource() {

package/dist/memory/index.d.ts CHANGED Viewed

@@ -1,3 +1,2 @@
 export { Empty } from './empty.js';
 export { Memory, type ModelInfo, type Path, type ModelSource } from './base.js';
-export { ModelType } from './model_type.js';

package/dist/memory/index.js CHANGED Viewed

@@ -1,3 +1,2 @@
 export { Empty } from './empty.js';
 export { Memory } from './base.js';
-export { ModelType } from './model_type.js';

package/dist/models/gpt/config.d.ts CHANGED Viewed

@@ -1,9 +1,9 @@
-type ModelType = 'gpt2' | 'gpt2-medium' | 'gpt2-large' | 'gpt2-xl' | 'gpt-mini' | 'gpt-micro' | 'gpt-nano';
+type GPTModelType = 'gpt2' | 'gpt2-medium' | 'gpt2-large' | 'gpt2-xl' | 'gpt-mini' | 'gpt-micro' | 'gpt-nano';
 export interface GPTConfig {
     lr: number;
     blockSize: number;
     vocabSize: number;
-    modelType: ModelType;
+    modelType: GPTModelType;
     name?: string;
     evaluate?: boolean;
     maxEvalBatches?: number;
@@ -28,5 +28,5 @@ export type ModelSize = {
     nHead: number;
     nEmbd: number;
 };
-export declare function getModelSizes(modelType: ModelType): Required<ModelSize>;
+export declare function getModelSizes(modelType: GPTModelType): Required<ModelSize>;
 export {};

package/dist/models/gpt/index.d.ts CHANGED Viewed

@@ -1,15 +1,20 @@
 /**
  * this code is taken from gpt-tfjs with modifications from @peacefulotter and @lukemovement
  **/
+import * as tf from '@tensorflow/tfjs';
 import { PreTrainedTokenizer } from '@xenova/transformers';
 import { WeightsContainer } from '../../index.js';
 import type { Dataset } from '../../dataset/index.js';
 import { Model } from '../model.js';
 import type { EpochLogs, Prediction, Sample } from '../model.js';
 import type { GPTConfig } from './config.js';
+export type GPTSerialization = {
+    weights: WeightsContainer;
+    config?: GPTConfig;
+};
 export declare class GPT extends Model {
     private readonly model;
-    constructor(partialConfig?: GPTConfig);
+    constructor(partialConfig?: GPTConfig, layersModel?: tf.LayersModel);
     /**
      * The GPT train methods wraps the model.fitDataset call in a for loop to act as a generator (of logs)
      * This allows for getting logs and stopping training without callbacks.
@@ -27,9 +32,6 @@ export declare class GPT extends Model {
     set weights(ws: WeightsContainer);
     static deserialize(data: GPTSerialization): Model;
     serialize(): GPTSerialization;
+    extract(): tf.LayersModel;
     [Symbol.dispose](): void;
 }
-export type GPTSerialization = {
-    weights: WeightsContainer;
-    config?: GPTConfig;
-};

package/dist/models/gpt/index.js CHANGED Viewed

@@ -6,9 +6,9 @@ import { Model } from '../model.js';
 import { GPTForCausalLM } from './model.js';
 export class GPT extends Model {
     model;
-    constructor(partialConfig) {
+    constructor(partialConfig, layersModel) {
         super();
-        this.model = new GPTForCausalLM(partialConfig);
+        this.model = new GPTForCausalLM(partialConfig, layersModel);
     }
     /**
      * The GPT train methods wraps the model.fitDataset call in a for loop to act as a generator (of logs)
@@ -40,13 +40,14 @@ export class GPT extends Model {
                 epoch,
                 peakMemory,
                 training: {
-                    loss: logs.loss
+                    loss: logs.loss,
+                    accuracy: logs.acc
                 }
             };
             if (validationData !== undefined) {
                 if (val_loss === undefined || isNaN(val_loss) ||
                     val_acc === undefined || isNaN(val_acc)) {
-                    throw new Error("Invalid validation logs");
+                    throw new Error("Validation accuracy or loss is undefined or nan");
                 }
                 structuredLogs.validation = { accuracy: logs.val_acc, loss: logs.val_loss };
             }
@@ -91,14 +92,16 @@ export class GPT extends Model {
             config: this.config
         };
     }
+    extract() {
+        return this.model;
+    }
     [Symbol.dispose]() {
-        console.log("Disposing model");
         if (this.model.optimizer !== undefined) {
             this.model.optimizer.dispose();
         }
-        // Some tensors are not cleaned up when model.dispose is called
-        // So we dispose them manually
-        this.model.disposeRefs();
-        this.model.dispose();
+        const disposeResults = this.model.dispose();
+        if (disposeResults.refCountAfterDispose > 0) {
+            console.error("The GPT model was not disposed correctly (refcount > 0)", disposeResults);
+        }
     }
 }

package/dist/models/gpt/layers.d.ts CHANGED Viewed

@@ -8,6 +8,4 @@ import type { GPTConfig } from './config.js';
  * @param conf GPTConfig
  * @returns model, tf.LayersModel, which supports model(inputs), model.predict and model.apply
  */
-export declare function GPTArchitecture(config: Required<GPTConfig>, disposalRefs: tf.TensorContainer[], peakMemory: {
-    value: number;
-}): tf.LayersModel;
+export declare function GPTArchitecture(config: Required<GPTConfig>): tf.LayersModel;

package/dist/models/gpt/layers.js CHANGED Viewed

@@ -40,7 +40,6 @@ class LogLayer extends tf.layers.Layer {
 tf.serialization.registerClass(LogLayer);
 class CausalSelfAttention extends tf.layers.Layer {
     config;
-    peakMemory;
     static className = 'CausalSelfAttention';
     nHead;
     nEmbd;
@@ -51,10 +50,9 @@ class CausalSelfAttention extends tf.layers.Layer {
     cAttnBias;
     cProjKernel;
     cProjBias;
-    constructor(config, disposalRefs, peakMemory) {
+    constructor(config) {
         super(config);
         this.config = config;
-        this.peakMemory = peakMemory;
         this.nEmbd = config.nEmbd;
         this.nHead = config.nHead;
         this.dropout = config.dropout;
@@ -63,7 +61,6 @@ class CausalSelfAttention extends tf.layers.Layer {
         // calling bandPart zero out the upper triangular part of the all-ones matrix
         // from the doc: tf.linalg.band_part(input, -1, 0) ==> Lower triangular part
         this.mask = tf.linalg.bandPart(tf.ones([config.blockSize, config.blockSize]), -1, 0);
-        disposalRefs.push(this.mask); // Push a reference to dispose this matrix later
     }
     build() {
         this.cAttnKernel = this.addWeight('c_attn/kernel', [this.nEmbd, 3 * this.nEmbd], 'float32', tf.initializers.glorotNormal({}));
@@ -134,10 +131,6 @@ class CausalSelfAttention extends tf.layers.Layer {
             y = tf.reshape(y, [B, T, C]);
             y = dense(y, this.cProjKernel, this.cProjBias);
             y = kwargs.training === true ? tf.dropout(y, this.dropout) : y;
-            const memoryAllocated = tf.memory().numBytes / 1024 / 1024 / 1024; // GB
-            if (memoryAllocated > this.peakMemory.value) {
-                this.peakMemory.value = memoryAllocated;
-            }
             return y;
         });
     }
@@ -167,25 +160,25 @@ tf.serialization.registerClass(GELU);
 function MLP(config) {
     return tf.sequential({ layers: [
             tf.layers.dense({
-                name: 'mlp/c_fc',
+                name: config.name + `/mlp/c_fc`,
                 units: 4 * config.nEmbd,
                 inputDim: config.nEmbd,
                 inputShape: [config.blockSize, config.nEmbd]
             }),
             new GELU(),
             tf.layers.dense({
-                name: 'mlp/c_proj',
+                name: config.name + '/mlp/c_proj',
                 units: config.nEmbd,
                 inputDim: 4 * config.nEmbd,
                 inputShape: [config.blockSize, 4 * config.nEmbd]
             }),
             tf.layers.dropout({
-                name: 'mlp/drop',
+                name: config.name + '/mlp/drop',
                 rate: config.residDrop
             }),
         ] });
 }
-function TransformerBlock(conf, disposalRefs, peakMemory) {
+function TransformerBlock(conf) {
     const config = Object.assign({ name: 'h' }, conf);
     const inputs = tf.input({ shape: [config.blockSize, config.nEmbd] });
     let x1, x2;
@@ -196,7 +189,7 @@ function TransformerBlock(conf, disposalRefs, peakMemory) {
         x1 = new LogLayer({ name: config.name + '/ln_1_log' }).apply(x1);
     }
     // self attention layer
-    x1 = new CausalSelfAttention(Object.assign({}, config, { name: config.name + '/attn' }), disposalRefs, peakMemory).apply(x1);
+    x1 = new CausalSelfAttention(Object.assign({}, config, { name: config.name + '/attn' })).apply(x1);
     // Residual connection
     x1 = tf.layers.add().apply([inputs, x1]);
     // normalization
@@ -204,7 +197,7 @@ function TransformerBlock(conf, disposalRefs, peakMemory) {
         .layerNormalization({ name: config.name + '/ln_2', epsilon: 1e-5 })
         .apply(x1);
     // MLP
-    x2 = MLP(Object.assign({}, config, { name: config.name + '/mlp' })).apply(x2);
+    x2 = MLP(Object.assign({}, config, { name: config.name })).apply(x2);
     // add attention output to mlp output
     x2 = tf.layers.add().apply([x1, x2]);
     return tf.model({ name: config.name, inputs, outputs: x2 });
@@ -217,7 +210,7 @@ function TransformerBlock(conf, disposalRefs, peakMemory) {
  * @param conf GPTConfig
  * @returns model, tf.LayersModel, which supports model(inputs), model.predict and model.apply
  */
-export function GPTArchitecture(config, disposalRefs, peakMemory) {
+export function GPTArchitecture(config) {
     const inputs = tf.input({ shape: [null] });
     //Token embedding
     const tokEmb = config.tokEmb
@@ -250,7 +243,7 @@ export function GPTArchitecture(config, disposalRefs, peakMemory) {
     }
     //Apply successively transformer blocks, attention and dense layers
     for (let i = 0; i < config.nLayer; i++) {
-        x = TransformerBlock(Object.assign({}, config, { name: config.name + '/h/' + i }), disposalRefs, peakMemory).apply(x);
+        x = TransformerBlock(Object.assign({}, config, { name: config.name + '/h/' + i })).apply(x);
     }
     // Normalization
     x = tf.layers.layerNormalization({ name: config.name + '/ln_f', epsilon: 1e-5 })

package/dist/models/gpt/model.d.ts CHANGED Viewed

@@ -16,12 +16,7 @@ export declare abstract class Dataset<T> {
  */
 declare class GPTModel extends tf.LayersModel {
     protected readonly config: Required<GPTConfig>;
-    private readonly disposalRefs;
-    protected peakMemory: {
-        value: number;
-    };
-    constructor(partialConfig?: GPTConfig);
-    disposeRefs(): void;
+    constructor(partialConfig?: GPTConfig, layersModel?: tf.LayersModel);
     get getGPTConfig(): Required<GPTConfig>;
     compile(): void;
     fitDataset<T>(dataset: Dataset<T>, trainingArgs: tf.ModelFitDatasetArgs<T>): Promise<tf.History>;

package/dist/models/gpt/model.js CHANGED Viewed

@@ -9,31 +9,20 @@ import { GPTArchitecture } from './layers.js';
  */
 class GPTModel extends tf.LayersModel {
     config;
-    disposalRefs; // Array to store tensor to dispose manually
-    // Object to pass down to layers to store max memory allocated
-    // This is an object rather than a primitive to pass the reference
-    peakMemory;
-    constructor(partialConfig) {
+    constructor(partialConfig, layersModel) {
         // Fill missing config parameters with default values
         let completeConfig = { ...DEFAULT_CONFIG, ...partialConfig };
         // Add layer sizes depending on which model has been specified
         completeConfig = { ...completeConfig, ...getModelSizes(completeConfig.modelType) };
-        // Init the tf.LayersModel and assign it to this
-        const disposalRefs = [];
-        const peakMemory = { value: 0 };
-        const gpt = GPTArchitecture(completeConfig, disposalRefs, peakMemory);
-        const { inputs, outputs, name } = gpt;
-        super({ inputs, outputs, name });
-        this.config = completeConfig;
-        this.disposalRefs = disposalRefs;
-        this.peakMemory = peakMemory;
-    }
-    // Some tensors are not cleaned up when model.dispose is called
-    // So we dispose them manually
-    disposeRefs() {
-        for (const tensorContainer of this.disposalRefs) {
-            tf.dispose([tensorContainer]);
+        if (layersModel !== undefined) {
+            super({ inputs: layersModel.inputs, outputs: layersModel.outputs, name: layersModel.name });
+        }
+        else {
+            const gpt = GPTArchitecture(completeConfig);
+            const { inputs, outputs, name } = gpt;
+            super({ inputs, outputs, name });
         }
+        this.config = completeConfig;
     }
     get getGPTConfig() {
         return this.config;
@@ -42,7 +31,6 @@ class GPTModel extends tf.LayersModel {
         this.optimizer = this.config.weightDecay !== 0
             ? getCustomAdam(this, this.config.lr, this.config.weightDecay)
             : tf.train.adam(this.config.lr);
-        this.peakMemory.value = 0;
     }
     async fitDataset(dataset, trainingArgs) {
         const callbacks = trainingArgs.callbacks;
@@ -50,6 +38,7 @@ class GPTModel extends tf.LayersModel {
         await callbacks.onTrainBegin?.();
         for (let epoch = 1; epoch <= trainingArgs.epochs; epoch++) {
             let averageLoss = 0;
+            let peakMemory = 0;
             let iteration = 1;
             const iterator = await dataset.iterator();
             let preprocessingTime = performance.now();
@@ -69,22 +58,15 @@ class GPTModel extends tf.LayersModel {
                     }
                     return tf.losses.softmaxCrossEntropy(ys, logits);
                 };
-                let backwardPassMemory = 0;
                 const lossTensor = tf.tidy(() => {
                     const { grads, value: lossTensor } = this.optimizer.computeGradients(lossFn);
                     const gradsClipped = clipByGlobalNormObj(grads, 1);
                     this.optimizer.applyGradients(gradsClipped);
-                    backwardPassMemory = tf.memory().numBytes / 1024 / 1024 / 1024;
                     return lossTensor;
                 });
                 const loss = await lossTensor.array();
                 averageLoss += loss;
                 weightUpdateTime = performance.now() - weightUpdateTime;
-                // Probably never the case. Empirically the attention mechanism always allocates
-                // more memory than the backward pass
-                if (backwardPassMemory > this.peakMemory.value) {
-                    this.peakMemory.value = backwardPassMemory;
-                }
                 tf.dispose([xs, ys, lossTensor]);
                 if (evalDataset !== undefined &&
                     this.config.evaluateEvery !== undefined &&
@@ -92,22 +74,25 @@ class GPTModel extends tf.LayersModel {
                     const iterationLogs = await evaluate(this, evalDataset, this.config.maxEvalBatches);
                     console.log(iterationLogs);
                 }
-                console.log(`Epoch: ${epoch}`, `\tStep: ${iteration} / ${this.config.maxIter}`, `\tLoss: ${loss.toFixed(3)}`, `\tPeak memory: ${this.peakMemory.value.toFixed(2)} GB`, `\tNumber of tensors allocated: ${tf.memory().numTensors}`, `\tPreprocessing time: ${preprocessingTime.toFixed(0)} ms`, `\tWeight update time: ${weightUpdateTime.toFixed(0)} ms`);
+                const memory = tf.memory().numBytes / 1024 / 1024 / 1024;
+                if (memory > peakMemory) {
+                    peakMemory = memory;
+                }
+                console.log(`Epoch: ${epoch}`, `\tStep: ${iteration} / ${this.config.maxIter}`, `\tLoss: ${loss.toFixed(3)}`, `\tMemory: ${memory.toFixed(2)} GB`, `\tNumber of tensors allocated: ${tf.memory().numTensors}`, `\tPreprocessing time: ${preprocessingTime.toFixed(0)} ms`, `\tWeight update time: ${weightUpdateTime.toFixed(0)} ms`);
                 iteration++;
                 next = await iterator.next();
             }
             // Memory leak: If we reached the last iteration rather than the end of the dataset, cleanup the tensors
-            if (next.done != true && iteration > this.config.maxIter) {
+            if (next.done !== true && iteration > this.config.maxIter) {
                 const { xs, ys } = next.value;
                 tf.dispose([xs, ys]);
             }
             let logs = {
                 'loss': averageLoss / iteration,
-                'peakMemory': this.peakMemory.value
+                'peakMemory': peakMemory
             };
             if (evalDataset !== undefined) {
                 logs = { ...logs, ...await evaluate(this, evalDataset, this.config.maxEvalBatches) };
-                console.log(logs);
             }
             await callbacks.onEpochEnd?.(epoch, logs);
         }

package/dist/models/tokenizer.js CHANGED Viewed

@@ -1,4 +1,4 @@
-import { AutoTokenizer } from '@xenova/transformers';
+import { AutoTokenizer, env } from '@xenova/transformers';
 /**
  * A task's tokenizer is initially specified as the tokenizer name, e.g., 'Xenova/gpt2'.
  * The first time the tokenizer is needed, this function initializes the actual tokenizer object
@@ -15,6 +15,10 @@ export async function getTaskTokenizer(task) {
     if (tokenizer === undefined)
         throw Error('No tokenizer specified in the task training information');
     if (typeof tokenizer == 'string') {
+        // Needs to be false in order to prevent transformers.js from reading the local cache
+        // and triggering an error when running in the browser
+        // Reference: https://medium.com/@GenerationAI/transformers-js-onnx-runtime-webgpu-46c3e58d547c
+        env.allowLocalModels = false;
         tokenizer = await AutoTokenizer.from_pretrained(tokenizer);
         task.trainingInformation.tokenizer = tokenizer;
     }

package/dist/task/training_information.d.ts CHANGED Viewed

@@ -23,5 +23,6 @@ export interface TrainingInformation {
     aggregator?: AggregatorChoice;
     tokenizer?: string | PreTrainedTokenizer;
     maxSequenceLength?: number;
+    tensorBackend: 'tfjs' | 'gpt';
 }
 export declare function isTrainingInformation(raw: unknown): raw is TrainingInformation;

package/dist/task/training_information.js CHANGED Viewed

@@ -10,7 +10,7 @@ export function isTrainingInformation(raw) {
     if (typeof raw !== 'object' || raw === null) {
         return false;
     }
-    const { IMAGE_H, IMAGE_W, LABEL_LIST, aggregator, batchSize, clippingRadius, dataType, decentralizedSecure, epochs, inputColumns, maxShareValue, minimumReadyPeers, modelID, noiseScale, outputColumns, preprocessingFunctions, roundDuration, scheme, validationSplit, tokenizer, maxSequenceLength, } = raw;
+    const { IMAGE_H, IMAGE_W, LABEL_LIST, aggregator, batchSize, clippingRadius, dataType, decentralizedSecure, epochs, inputColumns, maxShareValue, minimumReadyPeers, modelID, noiseScale, outputColumns, preprocessingFunctions, roundDuration, scheme, validationSplit, tokenizer, maxSequenceLength, tensorBackend } = raw;
     if (typeof dataType !== 'string' ||
         typeof modelID !== 'string' ||
         typeof epochs !== 'number' ||
@@ -53,6 +53,11 @@ export function isTrainingInformation(raw) {
             return false;
         }
     }
+    switch (tensorBackend) {
+        case 'tfjs': break;
+        case 'gpt': break;
+        default: return false;
+    }
     switch (scheme) {
         case 'decentralized': break;
         case 'federated': break;
@@ -80,7 +85,8 @@ export function isTrainingInformation(raw) {
         scheme,
         validationSplit,
         tokenizer,
-        maxSequenceLength
+        maxSequenceLength,
+        tensorBackend
     };
     const _correct = repack;
     const _total = repack;

package/dist/training/trainer/distributed_trainer.js CHANGED Viewed

@@ -31,6 +31,11 @@ export class DistributedTrainer extends Trainer {
             // after it has completed a round of training.
             this.model.weights = this.aggregator.model.weights;
         }
-        await this.memory.updateWorkingModel({ taskID: this.task.id, name: this.task.trainingInformation.modelID }, this.model);
+        await this.memory.updateWorkingModel({
+            type: 'working',
+            taskID: this.task.id,
+            name: this.task.trainingInformation.modelID,
+            tensorBackend: this.task.trainingInformation.tensorBackend
+        }, this.model);
     }
 }

package/dist/training/trainer/local_trainer.js CHANGED Viewed

@@ -14,6 +14,11 @@ export class LocalTrainer extends Trainer {
         return await Promise.resolve();
     }
     async onRoundEnd() {
-        await this.memory.updateWorkingModel({ taskID: this.task.id, name: this.task.trainingInformation.modelID }, this.model);
+        await this.memory.updateWorkingModel({
+            type: 'working',
+            taskID: this.task.id,
+            name: this.task.trainingInformation.modelID,
+            tensorBackend: this.task.trainingInformation.tensorBackend
+        }, this.model);
     }
 }

package/dist/training/trainer/trainer_builder.js CHANGED Viewed

@@ -1,4 +1,3 @@
-import { ModelType } from '../../index.js';
 import { DistributedTrainer } from './distributed_trainer.js';
 import { LocalTrainer } from './local_trainer.js';
 /**
@@ -36,7 +35,12 @@ export class TrainerBuilder {
         if (modelID === undefined) {
             throw new TypeError('model ID is undefined');
         }
-        const info = { type: ModelType.WORKING, taskID: this.task.id, name: modelID };
+        const info = {
+            type: 'working',
+            taskID: this.task.id,
+            name: modelID,
+            tensorBackend: 'gpt'
+        };
         const model = await (await this.memory.contains(info) ? this.memory.getModel(info) : client.getLatestModel());
         return model;
     }

package/dist/validation/validator.d.ts CHANGED Viewed

@@ -1,4 +1,3 @@
-import { List } from 'immutable';
 import type { data, Model, Task, Logger, client as clients, Memory, ModelSource, Features } from '../index.js';
 export declare class Validator {
     readonly task: Task;
@@ -6,22 +5,21 @@ export declare class Validator {
     private readonly memory;
     private readonly source?;
     private readonly client?;
-    private readonly graphInformant;
     private size;
     private _confusionMatrix;
+    private rollingAccuracy;
     constructor(task: Task, logger: Logger, memory: Memory, source?: ModelSource | undefined, client?: clients.Client | undefined);
     private getLabel;
-    assess(data: data.Data, useConfusionMatrix?: boolean): Promise<Array<{
+    test(data: data.Data): AsyncGenerator<Array<{
         groundTruth: number;
         pred: number;
         features: Features;
-    }>>;
-    predict(data: data.Data): Promise<Array<{
+    }>, void>;
+    inference(data: data.Data): AsyncGenerator<Array<{
         features: Features;
         pred: number;
-    }>>;
+    }>, void>;
     getModel(): Promise<Model>;
-    get accuracyData(): List<number>;
     get accuracy(): number;
     get visitedSamples(): number;
     get confusionMatrix(): number[][] | undefined;

package/dist/validation/validator.js CHANGED Viewed

@@ -1,15 +1,14 @@
 import { List } from 'immutable';
 import * as tf from '@tensorflow/tfjs';
-import { GraphInformant } from '../index.js';
 export class Validator {
     task;
     logger;
     memory;
     source;
     client;
-    graphInformant = new GraphInformant();
     size = 0;
     _confusionMatrix;
+    rollingAccuracy = 0;
     constructor(task, logger, memory, source, client) {
         this.task = task;
         this.logger = logger;
@@ -23,91 +22,81 @@ export class Validator {
     async getLabel(ys) {
         // Binary classification
         if (ys.shape[1] == 1) {
-            return await ys.greaterEqual(tf.scalar(0.5)).data();
+            const threshold = tf.scalar(0.5);
+            const binaryTensor = ys.greaterEqual(threshold);
+            const binaryArray = await binaryTensor.data();
+            tf.dispose([binaryTensor, threshold]);
+            return binaryArray;
             // Multi-class classification
         }
         else {
-            return await ys.argMax(-1).data();
+            const yIdxTensor = ys.argMax(-1);
+            const yIdx = await yIdxTensor.data();
+            tf.dispose([yIdxTensor]);
+            return yIdx;
         }
         // Multi-label classification is not supported
     }
-    async assess(data, useConfusionMatrix = false) {
+    // test assumes data comes with labels while predict doesn't
+    async *test(data) {
         const batchSize = this.task.trainingInformation?.batchSize;
         if (batchSize === undefined) {
             throw new TypeError('Batch size is undefined');
         }
         const model = await this.getModel();
-        let features = [];
-        const groundTruth = [];
         let hits = 0;
-        // Get model predictions per batch and flatten the result
-        // Also build the features and ground truth arrays
-        const predictions = (await data.preprocess().dataset.batch(batchSize)
-            .mapAsync(async (e) => {
-            if (typeof e === 'object' && 'xs' in e && 'ys' in e) {
-                const xs = e.xs;
-                const ys = await this.getLabel(e.ys);
-                const pred = await this.getLabel(await model.predict(xs));
-                const currentFeatures = await xs.array();
-                if (Array.isArray(currentFeatures)) {
-                    features = features.concat(currentFeatures);
-                }
-                else {
-                    throw new TypeError('Data format is incorrect');
-                }
-                groundTruth.push(...Array.from(ys));
-                this.size += xs.shape[0];
-                hits += List(pred).zip(List(ys)).filter(([p, y]) => p === y).size;
-                // TODO: Confusion Matrix stats
-                const currentAccuracy = hits / this.size;
-                this.graphInformant.updateAccuracy(currentAccuracy);
-                return Array.from(pred);
-            }
-            else {
-                throw new Error('Input data is missing a feature or the label');
-            }
-        }).toArray()).flat();
+        const iterator = await data.preprocess().dataset.batch(batchSize).iterator();
+        let next = await iterator.next();
+        while (next.done !== true) {
+            const { xs, ys } = next.value;
+            const ysLabel = await this.getLabel(ys);
+            const yPredTensor = await model.predict(xs);
+            const pred = await this.getLabel(yPredTensor);
+            const currentFeatures = await xs.array();
+            this.size += ysLabel.length;
+            hits += List(pred).zip(List(ysLabel)).filter(([p, y]) => p === y).size;
+            this.rollingAccuracy = hits / this.size;
+            tf.dispose([xs, ys, yPredTensor]);
+            yield List(ysLabel).zip(List(pred), List(currentFeatures))
+                .map(([gt, p, f]) => ({ groundTruth: gt, pred: p, features: f }))
+                .toArray();
+            next = await iterator.next();
+        }
         this.logger.success(`Obtained validation accuracy of ${this.accuracy}`);
         this.logger.success(`Visited ${this.visitedSamples} samples`);
-        if (useConfusionMatrix) {
-            try {
-                this._confusionMatrix = tf.math.confusionMatrix([], [], 0).arraySync();
-            }
-            catch (e) {
-                console.error(e instanceof Error ? e.message : e);
-                throw new Error('Failed to compute the confusion matrix');
-            }
-        }
-        return List(groundTruth)
-            .zip(List(predictions), List(features))
-            .map(([gt, p, f]) => ({ groundTruth: gt, pred: p, features: f }))
-            .toArray();
     }
-    async predict(data) {
+    async *inference(data) {
         const batchSize = this.task.trainingInformation?.batchSize;
         if (batchSize === undefined) {
             throw new TypeError('Batch size is undefined');
         }
         const model = await this.getModel();
-        let features = [];
-        // Get model prediction per batch and flatten the result
-        // Also incrementally build the features array
-        const predictions = (await data.preprocess().dataset.batch(batchSize)
-            .mapAsync(async (e) => {
-            const xs = e;
-            const currentFeatures = await xs.array();
-            if (Array.isArray(currentFeatures)) {
-                features = features.concat(currentFeatures);
+        const iterator = await data.preprocess().dataset.batch(batchSize).iterator();
+        let next = await iterator.next();
+        while (next.done !== true) {
+            let xs;
+            if (next.value instanceof tf.Tensor) {
+                xs = next.value;
             }
             else {
+                const tensors = next.value;
+                xs = tensors['xs'];
+                tf.dispose([tensors['ys']]);
+            }
+            const currentFeatures = await xs.array();
+            const yPredTensor = await model.predict(xs);
+            const pred = await this.getLabel(yPredTensor);
+            this.size += pred.length;
+            if (!Array.isArray(currentFeatures)) {
                 throw new TypeError('Data format is incorrect');
             }
-            const pred = await this.getLabel(await model.predict(xs));
-            return Array.from(pred);
-        }).toArray()).flat();
-        return List(features).zip(List(predictions))
-            .map(([f, p]) => ({ features: f, pred: p }))
-            .toArray();
+            tf.dispose([xs, yPredTensor]);
+            yield List(currentFeatures).zip(List(pred))
+                .map(([f, p]) => ({ features: f, pred: p }))
+                .toArray();
+            next = await iterator.next();
+        }
+        this.logger.success(`Visited ${this.visitedSamples} samples`);
     }
     async getModel() {
         if (this.source !== undefined && await this.memory.contains(this.source)) {
@@ -118,11 +107,8 @@ export class Validator {
         }
         throw new Error('Could not load the model');
     }
-    get accuracyData() {
-        return this.graphInformant.data();
-    }
     get accuracy() {
-        return this.graphInformant.accuracy();
+        return this.rollingAccuracy;
     }
     get visitedSamples() {
         return this.size;

package/package.json CHANGED Viewed

@@ -1,6 +1,6 @@
 {
   "name": "@epfml/discojs",
-  "version": "2.1.2-p20240531085945.0",
+  "version": "2.1.2-p20240617070831.0",
   "type": "module",
   "main": "dist/index.js",
   "types": "dist/index.d.ts",

package/dist/informant/graph_informant.d.ts DELETED Viewed

@@ -1,10 +0,0 @@
-import { type List } from 'immutable';
-export declare class GraphInformant {
-    static readonly NB_EPOCHS_ON_GRAPH = 10;
-    private currentAccuracy;
-    private accuracyDataSeries;
-    constructor();
-    updateAccuracy(accuracy: number): void;
-    data(): List<number>;
-    accuracy(): number;
-}

package/dist/informant/graph_informant.js DELETED Viewed

@@ -1,20 +0,0 @@
-import { Repeat } from 'immutable';
-export class GraphInformant {
-    static NB_EPOCHS_ON_GRAPH = 10;
-    currentAccuracy;
-    accuracyDataSeries;
-    constructor() {
-        this.currentAccuracy = 0;
-        this.accuracyDataSeries = Repeat(0, GraphInformant.NB_EPOCHS_ON_GRAPH).toList();
-    }
-    updateAccuracy(accuracy) {
-        this.accuracyDataSeries = this.accuracyDataSeries.shift().push(accuracy);
-        this.currentAccuracy = accuracy;
-    }
-    data() {
-        return this.accuracyDataSeries;
-    }
-    accuracy() {
-        return this.currentAccuracy;
-    }
-}

package/dist/informant/index.d.ts DELETED Viewed

	@@ -1 +0,0 @@
1	- export { GraphInformant } from './graph_informant.js';

package/dist/informant/index.js DELETED Viewed

	@@ -1 +0,0 @@
1	- export { GraphInformant } from './graph_informant.js';

package/dist/memory/model_type.d.ts DELETED Viewed

@@ -1,9 +0,0 @@
-/**
- * Type of models stored in memory. Stored models can either be a model currently
- * being trained ("working model") or a regular model saved in memory ("saved model").
- * There can only be a single working model for a given task.
- */
-export declare enum ModelType {
-    WORKING = "working",
-    SAVED = "saved"
-}

package/dist/memory/model_type.js DELETED Viewed

@@ -1,10 +0,0 @@
-/**
- * Type of models stored in memory. Stored models can either be a model currently
- * being trained ("working model") or a regular model saved in memory ("saved model").
- * There can only be a single working model for a given task.
- */
-export var ModelType;
-(function (ModelType) {
-    ModelType["WORKING"] = "working";
-    ModelType["SAVED"] = "saved";
-})(ModelType || (ModelType = {}));