npm - @epfml/discojs - Versions diffs - 2.1.2-p20240513140724.0 → 2.1.2-p20240515132210.0 - Mend

@epfml/discojs 2.1.2-p20240513140724.0 → 2.1.2-p20240515132210.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (31) hide show

package/dist/index.d.ts +1 -1
package/dist/index.js +1 -1
package/dist/memory/base.d.ts +6 -19
package/dist/memory/empty.d.ts +2 -2
package/dist/memory/empty.js +2 -2
package/dist/memory/index.d.ts +1 -1
package/dist/memory/index.js +1 -1
package/dist/memory/model_type.d.ts +1 -1
package/dist/memory/model_type.js +5 -5
package/dist/models/gpt/config.d.ts +32 -0
package/dist/models/gpt/config.js +42 -0
package/dist/models/gpt/evaluate.d.ts +7 -0
package/dist/models/gpt/evaluate.js +44 -0
package/dist/models/gpt/index.d.ts +37 -0
package/dist/models/gpt/index.js +107 -0
package/dist/models/gpt/layers.d.ts +13 -0
package/dist/models/gpt/layers.js +272 -0
package/dist/models/gpt/model.d.ts +43 -0
package/dist/models/gpt/model.js +191 -0
package/dist/models/gpt/optimizers.d.ts +4 -0
package/dist/models/gpt/optimizers.js +95 -0
package/dist/models/index.d.ts +5 -0
package/dist/models/index.js +4 -0
package/dist/models/model.d.ts +51 -0
package/dist/models/model.js +8 -0
package/dist/models/tfjs.d.ts +24 -0
package/dist/models/tfjs.js +107 -0
package/dist/models/tokenizer.d.ts +14 -0
package/dist/models/tokenizer.js +23 -0
package/dist/training/trainer/trainer_builder.js +2 -2
package/package.json +1 -1

package/dist/index.d.ts CHANGED Viewed

@@ -8,7 +8,7 @@ export * as aggregator from './aggregator/index.js';
 export { WeightsContainer, aggregation } from './weights/index.js';
 export { AsyncInformant } from './async_informant.js';
 export { Logger, ConsoleLogger } from './logging/index.js';
-export { Memory, ModelType, type ModelInfo, type Path, type ModelSource, Empty as EmptyMemory } from './memory/index.js';
+export { Memory, StoredModelType, type ModelInfo, type Path, type ModelSource, Empty as EmptyMemory } from './memory/index.js';
 export { Disco, RoundLogs } from './training/index.js';
 export { Validator } from './validation/index.js';
 export { Model, EpochLogs } from './models/index.js';

package/dist/index.js CHANGED Viewed

@@ -8,7 +8,7 @@ export * as aggregator from './aggregator/index.js';
 export { WeightsContainer, aggregation } from './weights/index.js';
 export { AsyncInformant } from './async_informant.js';
 export { ConsoleLogger } from './logging/index.js';
-export { Memory, ModelType, Empty as EmptyMemory } from './memory/index.js';
+export { Memory, StoredModelType, Empty as EmptyMemory } from './memory/index.js';
 export { Disco } from './training/index.js';
 export { Validator } from './validation/index.js';
 export { Model } from './models/index.js';

package/dist/memory/base.d.ts CHANGED Viewed

@@ -1,5 +1,5 @@
 import type { Model, TaskID } from '../index.js';
-import type { ModelType } from './model_type.js';
+import type { StoredModelType } from './model_type.js';
 /**
  * Model path which uniquely identifies a model in memory.
  */
@@ -8,22 +8,9 @@ export type Path = string;
  * Model information which uniquely identifies a model in memory.
  */
 export interface ModelInfo {
-    /**
-     * The model's type: "working" or "saved" model.
-     */
-    type?: ModelType;
-    /**
-     * The model's version, to allow for multiple saved models of a same task without
-     * causing id conflicts
-     */
+    type?: StoredModelType;
     version?: number;
-    /**
-     * The model's corresponding task
-     */
     taskID: TaskID;
-    /**
-     * The model's name
-     */
     name: string;
 }
 /**
@@ -95,21 +82,21 @@ export declare abstract class Memory {
     /**
      * Computes the path in memory corresponding to the given model source, be it a path or model information.
      * This is used to easily switch between model path and information, which are both unique model identifiers
-     * with a one-to-one correspondance. Returns undefined instead if no path could be inferred from the given
+     * with a one-to-one equivalence. Returns undefined instead if no path could be inferred from the given
      * model source.
      * @param source The model source
      * @returns The model path
      */
-    abstract pathFor(source: ModelSource): Path | undefined;
+    abstract getModelMemoryPath(source: ModelSource): Path | undefined;
     /**
      * Computes the model information corresponding to the given model source, be it a path or model information.
      * This is used to easily switch between model path and information, which are both unique model identifiers
-     * with a one-to-one correspondance. Returns undefined instead if no unique model information could be inferred
+     * with a one-to-one equivalence. Returns undefined instead if no unique model information could be inferred
      * from the given model source.
      * @param source The model source
      * @returns The model information
      */
-    abstract infoFor(source: ModelSource): ModelInfo | undefined;
+    abstract getModelInfo(source: ModelSource): ModelInfo | undefined;
     /**
      * Computes the lowest version a model source can have without conflicting with model versions currently in memory.
      * @param source The model source

package/dist/memory/empty.d.ts CHANGED Viewed

@@ -14,7 +14,7 @@ export declare class Empty extends Memory {
     saveModel(): Promise<undefined>;
     deleteModel(): Promise<void>;
     downloadModel(): Promise<void>;
-    pathFor(): Path;
-    infoFor(): ModelInfo;
+    getModelMemoryPath(): Path;
+    getModelInfo(): ModelInfo;
     duplicateSource(): Promise<undefined>;
 }

package/dist/memory/empty.js CHANGED Viewed

@@ -31,10 +31,10 @@ export class Empty extends Memory {
     downloadModel() {
         return Promise.reject(new Error('empty'));
     }
-    pathFor() {
+    getModelMemoryPath() {
         throw new Error('empty');
     }
-    infoFor() {
+    getModelInfo() {
         throw new Error('empty');
     }
     duplicateSource() {

package/dist/memory/index.d.ts CHANGED Viewed

@@ -1,3 +1,3 @@
 export { Empty } from './empty.js';
 export { Memory, type ModelInfo, type Path, type ModelSource } from './base.js';
-export { ModelType } from './model_type.js';
+export { StoredModelType } from './model_type.js';

package/dist/memory/index.js CHANGED Viewed

@@ -1,3 +1,3 @@
 export { Empty } from './empty.js';
 export { Memory } from './base.js';
-export { ModelType } from './model_type.js';
+export { StoredModelType } from './model_type.js';

package/dist/memory/model_type.d.ts CHANGED Viewed

@@ -3,7 +3,7 @@
  * being trained ("working model") or a regular model saved in memory ("saved model").
  * There can only be a single working model for a given task.
  */
-export declare enum ModelType {
+export declare enum StoredModelType {
     WORKING = "working",
     SAVED = "saved"
 }

package/dist/memory/model_type.js CHANGED Viewed

@@ -3,8 +3,8 @@
  * being trained ("working model") or a regular model saved in memory ("saved model").
  * There can only be a single working model for a given task.
  */
-export var ModelType;
-(function (ModelType) {
-    ModelType["WORKING"] = "working";
-    ModelType["SAVED"] = "saved";
-})(ModelType || (ModelType = {}));
+export var StoredModelType;
+(function (StoredModelType) {
+    StoredModelType["WORKING"] = "working";
+    StoredModelType["SAVED"] = "saved";
+})(StoredModelType || (StoredModelType = {}));

package/dist/models/gpt/config.d.ts ADDED Viewed

@@ -0,0 +1,32 @@
+type GPTModelType = 'gpt2' | 'gpt2-medium' | 'gpt2-large' | 'gpt2-xl' | 'gpt-mini' | 'gpt-micro' | 'gpt-nano';
+export interface GPTConfig {
+    lr: number;
+    blockSize: number;
+    vocabSize: number;
+    modelType: GPTModelType;
+    name?: string;
+    evaluate?: boolean;
+    maxEvalBatches?: number;
+    evaluateEvery?: number;
+    maxIter?: number;
+    weightDecay?: number;
+    verbose?: 0 | 1;
+    bias?: boolean;
+    debug?: boolean;
+    dropout?: number;
+    residDrop?: number;
+    embdDrop?: number;
+    tokEmb?: boolean;
+    lmHead?: boolean;
+    nLayer?: number;
+    nHead?: number;
+    nEmbd?: number;
+}
+export declare const DEFAULT_CONFIG: Required<GPTConfig>;
+export type ModelSize = {
+    nLayer: number;
+    nHead: number;
+    nEmbd: number;
+};
+export declare function getModelSizes(modelType: GPTModelType): Required<ModelSize>;
+export {};

package/dist/models/gpt/config.js ADDED Viewed

@@ -0,0 +1,42 @@
+// for a benchmark of performance, see https://github.com/epfml/disco/pull/659
+export const DEFAULT_CONFIG = {
+    name: 'transformer',
+    lr: 0.001,
+    weightDecay: 0,
+    maxIter: 5,
+    verbose: 0,
+    modelType: 'gpt-nano',
+    evaluate: true,
+    maxEvalBatches: 12,
+    evaluateEvery: 100,
+    blockSize: 128,
+    vocabSize: 50258,
+    bias: true,
+    debug: false,
+    dropout: 0.2,
+    residDrop: 0.2,
+    embdDrop: 0.2,
+    tokEmb: true,
+    lmHead: true,
+    nLayer: 3,
+    nHead: 3,
+    nEmbd: 48,
+};
+export function getModelSizes(modelType) {
+    switch (modelType) {
+        case 'gpt2':
+            return { nLayer: 12, nHead: 12, nEmbd: 768 };
+        case 'gpt2-medium':
+            return { nLayer: 24, nHead: 16, nEmbd: 1024 };
+        case 'gpt2-large':
+            return { nLayer: 36, nHead: 20, nEmbd: 1280 };
+        case 'gpt2-xl':
+            return { nLayer: 48, nHead: 25, nEmbd: 1600 };
+        case 'gpt-mini':
+            return { nLayer: 6, nHead: 6, nEmbd: 192 };
+        case 'gpt-micro':
+            return { nLayer: 4, nHead: 4, nEmbd: 128 };
+        case 'gpt-nano':
+            return { nLayer: 3, nHead: 3, nEmbd: 48 };
+    }
+}

package/dist/models/gpt/evaluate.d.ts ADDED Viewed

@@ -0,0 +1,7 @@
+import * as tf from '@tensorflow/tfjs';
+interface DataPoint extends tf.TensorContainerObject {
+    xs: tf.Tensor2D;
+    ys: tf.Tensor3D;
+}
+export default function evaluate(model: tf.LayersModel, dataset: tf.data.Dataset<DataPoint>, maxEvalBatches: number): Promise<Record<'acc' | 'val_acc' | 'val_loss' | 'val_perplexity', number>>;
+export {};

package/dist/models/gpt/evaluate.js ADDED Viewed

@@ -0,0 +1,44 @@
+import * as tf from '@tensorflow/tfjs';
+export default async function evaluate(model, dataset, maxEvalBatches) {
+    let datasetSize = 0;
+    let totalLoss = 0;
+    const acc = [0, 0];
+    await dataset.take(maxEvalBatches).map(({ xs, ys }) => {
+        const logits = model.apply(xs);
+        if (Array.isArray(logits)) {
+            throw new Error('model output too many tensor');
+        }
+        if (logits instanceof tf.SymbolicTensor) {
+            throw new Error('model output symbolic tensor');
+        }
+        xs.dispose();
+        return { logits, ys };
+    }).mapAsync(async ({ logits, ys }) => {
+        const lossTensor = tf.losses.softmaxCrossEntropy(ys, logits);
+        const loss = await lossTensor.array();
+        if (typeof loss !== 'number') {
+            throw new Error('got multiple loss');
+        }
+        const accTensor = tf.metrics.categoricalAccuracy(ys, logits);
+        const accSize = accTensor.shape.reduce((l, r) => l * r, 1);
+        const accSum = accTensor.sum();
+        const accSummed = await accSum.array();
+        if (typeof accSummed !== 'number') {
+            throw new Error('got multiple accuracy sum');
+        }
+        tf.dispose([ys, logits, accTensor, accSum, lossTensor]);
+        return { loss, accSummed, accSize };
+    }).forEachAsync(({ loss, accSummed, accSize }) => {
+        datasetSize += 1;
+        totalLoss += loss;
+        acc[0] += accSummed;
+        acc[1] += accSize;
+    });
+    const loss = totalLoss / datasetSize;
+    return {
+        val_loss: loss,
+        val_perplexity: Math.exp(loss),
+        acc: acc[0] / acc[1],
+        val_acc: acc[0] / acc[1]
+    };
+}

package/dist/models/gpt/index.d.ts ADDED Viewed

@@ -0,0 +1,37 @@
+/**
+ * this code is taken from gpt-tfjs with modifications from @peacefulotter and @lukemovement
+ **/
+import * as tf from '@tensorflow/tfjs';
+import { PreTrainedTokenizer } from '@xenova/transformers';
+import { WeightsContainer } from '../../index.js';
+import type { Dataset } from '../../dataset/index.js';
+import { Model } from '../model.js';
+import type { EpochLogs, Prediction, Sample } from '../model.js';
+import type { GPTConfig } from './config.js';
+export declare class GPT extends Model {
+    private readonly model;
+    constructor(partialConfig?: GPTConfig);
+    /**
+     * The GPT train methods wraps the model.fitDataset call in a for loop to act as a generator (of logs)
+     * This allows for getting logs and stopping training without callbacks.
+     *
+     * @param trainingData training dataset
+     * @param validationData validation dataset
+     * @param epochs the number of passes of the training dataset
+     * @param tracker
+     */
+    train(trainingData: Dataset, validationData?: Dataset, epochs?: number): AsyncGenerator<EpochLogs, void>;
+    predict(input: Sample): Promise<Prediction>;
+    generate(input: string, tokenizer: PreTrainedTokenizer, newTokens?: number): Promise<string>;
+    get config(): Required<GPTConfig>;
+    get weights(): WeightsContainer;
+    set weights(ws: WeightsContainer);
+    static deserialize(data: GPTSerialization): Model;
+    serialize(): GPTSerialization;
+    extract(): tf.LayersModel;
+    [Symbol.dispose](): void;
+}
+export type GPTSerialization = {
+    weights: WeightsContainer;
+    config?: GPTConfig;
+};

package/dist/models/gpt/index.js ADDED Viewed

@@ -0,0 +1,107 @@
+/**
+ * this code is taken from gpt-tfjs with modifications from @peacefulotter and @lukemovement
+ **/
+import { WeightsContainer } from '../../index.js';
+import { Model } from '../model.js';
+import { GPTForCausalLM } from './model.js';
+export class GPT extends Model {
+    model;
+    constructor(partialConfig) {
+        super();
+        this.model = new GPTForCausalLM(partialConfig);
+    }
+    /**
+     * The GPT train methods wraps the model.fitDataset call in a for loop to act as a generator (of logs)
+     * This allows for getting logs and stopping training without callbacks.
+     *
+     * @param trainingData training dataset
+     * @param validationData validation dataset
+     * @param epochs the number of passes of the training dataset
+     * @param tracker
+     */
+    async *train(trainingData, validationData, epochs = 1) {
+        this.model.compile();
+        let logs;
+        const trainingArgs = {
+            epochs: 1, // force fitDataset to do only one epoch because it is wrapped in a for loop
+            validationData,
+            callbacks: { onEpochEnd: (_, cur) => { logs = cur; } },
+        };
+        for (let epoch = 0; epoch < epochs; epoch++) {
+            await this.model.fitDataset(trainingData, trainingArgs);
+            if (logs === undefined) {
+                throw new Error("Epoch didn't gave any logs");
+            }
+            const { loss, val_acc, val_loss, peakMemory } = logs;
+            if (loss === undefined || isNaN(loss)) {
+                throw new Error("Training loss is undefined or nan");
+            }
+            const structuredLogs = {
+                epoch,
+                peakMemory,
+                training: {
+                    loss: logs.loss
+                }
+            };
+            if (validationData !== undefined) {
+                if (val_loss === undefined || isNaN(val_loss) ||
+                    val_acc === undefined || isNaN(val_acc)) {
+                    throw new Error("Invalid validation logs");
+                }
+                structuredLogs.validation = { accuracy: logs.val_acc, loss: logs.val_loss };
+            }
+            yield structuredLogs;
+        }
+    }
+    predict(input) {
+        const ret = this.model.predict(input);
+        if (Array.isArray(ret)) {
+            throw new Error('prediction yield many Tensors but should have only returned one');
+        }
+        return Promise.resolve(ret);
+    }
+    async generate(input, tokenizer, newTokens = 10) {
+        const { input_ids: tokens } = await tokenizer(input, { return_tensor: false });
+        const generationConfig = {
+            maxNewTokens: newTokens,
+            temperature: 1.0,
+            doSample: false
+        };
+        const predictedTokens = await this.model.generate(tokens, generationConfig);
+        const generatedWords = tokenizer.decode(predictedTokens[0]);
+        return generatedWords;
+    }
+    get config() {
+        return this.model.getGPTConfig;
+    }
+    get weights() {
+        return new WeightsContainer(this.model.weights.map((w) => w.read()));
+    }
+    set weights(ws) {
+        this.model.setWeights(ws.weights);
+    }
+    static deserialize(data) {
+        const model = new GPT(data.config);
+        model.weights = data.weights;
+        return model;
+    }
+    serialize() {
+        return {
+            weights: this.weights,
+            config: this.config
+        };
+    }
+    extract() {
+        return this.model;
+    }
+    [Symbol.dispose]() {
+        console.log("Disposing model");
+        if (this.model.optimizer !== undefined) {
+            this.model.optimizer.dispose();
+        }
+        // Some tensors are not cleaned up when model.dispose is called
+        // So we dispose them manually
+        this.model.disposeRefs();
+        this.model.dispose();
+    }
+}

package/dist/models/gpt/layers.d.ts ADDED Viewed

@@ -0,0 +1,13 @@
+import * as tf from '@tensorflow/tfjs';
+import type { GPTConfig } from './config.js';
+/**
+ * The GPTArchitecture specifically defines a GPT forward pass, i.e.,
+ * what are the inputs, the successive transformer blocks and the outputs. It is then
+ * used to create a GPTModel
+ *
+ * @param conf GPTConfig
+ * @returns model, tf.LayersModel, which supports model(inputs), model.predict and model.apply
+ */
+export declare function GPTArchitecture(config: Required<GPTConfig>, disposalRefs: tf.TensorContainer[], peakMemory: {
+    value: number;
+}): tf.LayersModel;