@epfml/discojs 2.1.2-p20240531085945.0 → 2.1.2-p20240617070831.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (38) hide show
  1. package/dist/dataset/data/preprocessing/text_preprocessing.js +2 -2
  2. package/dist/dataset/data_loader/image_loader.js +1 -1
  3. package/dist/default_tasks/cifar10.js +2 -1
  4. package/dist/default_tasks/lus_covid.js +2 -1
  5. package/dist/default_tasks/mnist.js +2 -1
  6. package/dist/default_tasks/simple_face.js +2 -1
  7. package/dist/default_tasks/skin_condition.js +2 -1
  8. package/dist/default_tasks/titanic.js +2 -1
  9. package/dist/default_tasks/wikitext.js +8 -9
  10. package/dist/index.d.ts +1 -2
  11. package/dist/index.js +1 -2
  12. package/dist/memory/base.d.ts +15 -19
  13. package/dist/memory/empty.d.ts +2 -2
  14. package/dist/memory/empty.js +2 -2
  15. package/dist/memory/index.d.ts +0 -1
  16. package/dist/memory/index.js +0 -1
  17. package/dist/models/gpt/config.d.ts +3 -3
  18. package/dist/models/gpt/index.d.ts +7 -5
  19. package/dist/models/gpt/index.js +12 -9
  20. package/dist/models/gpt/layers.d.ts +1 -3
  21. package/dist/models/gpt/layers.js +9 -16
  22. package/dist/models/gpt/model.d.ts +1 -6
  23. package/dist/models/gpt/model.js +17 -32
  24. package/dist/models/tokenizer.js +5 -1
  25. package/dist/task/training_information.d.ts +1 -0
  26. package/dist/task/training_information.js +8 -2
  27. package/dist/training/trainer/distributed_trainer.js +6 -1
  28. package/dist/training/trainer/local_trainer.js +6 -1
  29. package/dist/training/trainer/trainer_builder.js +6 -2
  30. package/dist/validation/validator.d.ts +5 -7
  31. package/dist/validation/validator.js +53 -67
  32. package/package.json +1 -1
  33. package/dist/informant/graph_informant.d.ts +0 -10
  34. package/dist/informant/graph_informant.js +0 -20
  35. package/dist/informant/index.d.ts +0 -1
  36. package/dist/informant/index.js +0 -1
  37. package/dist/memory/model_type.d.ts +0 -9
  38. package/dist/memory/model_type.js +0 -10
@@ -24,7 +24,7 @@ export var TextPreprocessing;
24
24
  const leftPadding = {
25
25
  type: TextPreprocessing.LeftPadding,
26
26
  apply: async (x, task) => {
27
- if (x === undefined || !Array.isArray(x) || x.length == 0 || typeof (x[0] != 'number')) {
27
+ if (x === undefined || !Array.isArray(x) || x.length == 0 || typeof (x[0] !== 'number')) {
28
28
  new Error("The leftPadding preprocessing expects a non empty 1D array of number");
29
29
  }
30
30
  const { tokens } = await x;
@@ -58,7 +58,7 @@ const leftPadding = {
58
58
  const tokenize = {
59
59
  type: TextPreprocessing.Tokenize,
60
60
  apply: async (x, task) => {
61
- if (typeof x != 'string') {
61
+ if (typeof x !== 'string') {
62
62
  new Error("The tokenize preprocessing expects a string as input");
63
63
  }
64
64
  const xs = await x; // tf.TextLineDataset yields strings
@@ -58,7 +58,7 @@ export class ImageLoader extends DataLoader {
58
58
  const numberOfClasses = labelList.length;
59
59
  // Map label strings to integer
60
60
  const label_to_int = new Map(labelList.map((label_name, idx) => [label_name, idx]));
61
- if (label_to_int.size != numberOfClasses) {
61
+ if (label_to_int.size !== numberOfClasses) {
62
62
  throw new Error("Input labels aren't matching the task LABEL_LIST");
63
63
  }
64
64
  labels = config.labels.map(label_name => {
@@ -32,7 +32,8 @@ export const cifar10 = {
32
32
  clippingRadius: 20,
33
33
  decentralizedSecure: true,
34
34
  minimumReadyPeers: 3,
35
- maxShareValue: 100
35
+ maxShareValue: 100,
36
+ tensorBackend: 'tfjs'
36
37
  }
37
38
  };
38
39
  },
@@ -32,7 +32,8 @@ export const lusCovid = {
32
32
  clippingRadius: 20,
33
33
  decentralizedSecure: true,
34
34
  minimumReadyPeers: 2,
35
- maxShareValue: 100
35
+ maxShareValue: 100,
36
+ tensorBackend: 'tfjs'
36
37
  }
37
38
  };
38
39
  },
@@ -31,7 +31,8 @@ export const mnist = {
31
31
  clippingRadius: 20,
32
32
  decentralizedSecure: true,
33
33
  minimumReadyPeers: 3,
34
- maxShareValue: 100
34
+ maxShareValue: 100,
35
+ tensorBackend: 'tfjs'
35
36
  }
36
37
  };
37
38
  },
@@ -28,7 +28,8 @@ export const simpleFace = {
28
28
  LABEL_LIST: ['child', 'adult'],
29
29
  scheme: 'federated', // secure aggregation not yet implemented for federated
30
30
  noiseScale: undefined,
31
- clippingRadius: undefined
31
+ clippingRadius: undefined,
32
+ tensorBackend: 'tfjs'
32
33
  }
33
34
  };
34
35
  },
@@ -28,7 +28,8 @@ export const skinCondition = {
28
28
  LABEL_LIST: LABELS,
29
29
  scheme: 'federated',
30
30
  noiseScale: undefined,
31
- clippingRadius: undefined
31
+ clippingRadius: undefined,
32
+ tensorBackend: 'tfjs'
32
33
  }
33
34
  };
34
35
  },
@@ -62,7 +62,8 @@ export const titanic = {
62
62
  ],
63
63
  scheme: 'federated', // secure aggregation not yet implemented for FeAI
64
64
  noiseScale: undefined,
65
- clippingRadius: undefined
65
+ clippingRadius: undefined,
66
+ tensorBackend: 'tfjs'
66
67
  }
67
68
  };
68
69
  },
@@ -17,17 +17,16 @@ export const wikitext = {
17
17
  dataType: 'text',
18
18
  modelID: 'wikitext-103-raw-model',
19
19
  preprocessingFunctions: [data.TextPreprocessing.Tokenize, data.TextPreprocessing.LeftPadding],
20
- validationSplit: 0.2, // TODO: is this used somewhere? because train, eval and test are already split in dataset
21
- epochs: 5,
22
20
  scheme: 'federated',
23
- noiseScale: undefined,
24
- decentralizedSecure: true,
25
- minimumReadyPeers: 3,
26
- maxShareValue: 100,
27
- roundDuration: 10,
28
- batchSize: 16,
21
+ epochs: 5,
22
+ // Unused by wikitext because data already comes split
23
+ // But if set to 0 then the webapp doesn't display the validation metrics
24
+ validationSplit: 0.1,
25
+ roundDuration: 2,
26
+ batchSize: 1, // If set too high (e.g. 16) then firefox raises a WebGL error
29
27
  tokenizer: 'Xenova/gpt2',
30
- maxSequenceLength: 128
28
+ maxSequenceLength: 128,
29
+ tensorBackend: 'gpt'
31
30
  }
32
31
  };
33
32
  },
package/dist/index.d.ts CHANGED
@@ -2,13 +2,12 @@ export * as data from './dataset/index.js';
2
2
  export * as serialization from './serialization/index.js';
3
3
  export * as training from './training/index.js';
4
4
  export * as privacy from './privacy.js';
5
- export { GraphInformant } from './informant/index.js';
6
5
  export * as client from './client/index.js';
7
6
  export * as aggregator from './aggregator/index.js';
8
7
  export { WeightsContainer, aggregation } from './weights/index.js';
9
8
  export { AsyncInformant } from './async_informant.js';
10
9
  export { Logger, ConsoleLogger } from './logging/index.js';
11
- export { Memory, ModelType, type ModelInfo, type Path, type ModelSource, Empty as EmptyMemory } from './memory/index.js';
10
+ export { Memory, type ModelInfo, type Path, type ModelSource, Empty as EmptyMemory } from './memory/index.js';
12
11
  export { Disco, RoundLogs } from './training/index.js';
13
12
  export { Validator } from './validation/index.js';
14
13
  export { Model, EpochLogs } from './models/index.js';
package/dist/index.js CHANGED
@@ -2,13 +2,12 @@ export * as data from './dataset/index.js';
2
2
  export * as serialization from './serialization/index.js';
3
3
  export * as training from './training/index.js';
4
4
  export * as privacy from './privacy.js';
5
- export { GraphInformant } from './informant/index.js';
6
5
  export * as client from './client/index.js';
7
6
  export * as aggregator from './aggregator/index.js';
8
7
  export { WeightsContainer, aggregation } from './weights/index.js';
9
8
  export { AsyncInformant } from './async_informant.js';
10
9
  export { ConsoleLogger } from './logging/index.js';
11
- export { Memory, ModelType, Empty as EmptyMemory } from './memory/index.js';
10
+ export { Memory, Empty as EmptyMemory } from './memory/index.js';
12
11
  export { Disco } from './training/index.js';
13
12
  export { Validator } from './validation/index.js';
14
13
  export { Model } from './models/index.js';
@@ -1,33 +1,28 @@
1
1
  import type { Model, TaskID } from '../index.js';
2
- import type { ModelType } from './model_type.js';
3
2
  /**
4
3
  * Model path which uniquely identifies a model in memory.
5
4
  */
6
5
  export type Path = string;
6
+ /**
7
+ * Type of models stored in memory. Stored models can either be a model currently
8
+ * being trained ("working model") or a regular model saved in memory ("saved model").
9
+ * There can only be a single working model for a given task.
10
+ */
11
+ type StoredModelType = 'saved' | 'working';
7
12
  /**
8
13
  * Model information which uniquely identifies a model in memory.
9
14
  */
10
15
  export interface ModelInfo {
11
- /**
12
- * The model's type: "working" or "saved" model.
13
- */
14
- type?: ModelType;
15
- /**
16
- * The model's version, to allow for multiple saved models of a same task without
17
- * causing id conflicts
18
- */
16
+ type: StoredModelType;
19
17
  version?: number;
20
- /**
21
- * The model's corresponding task
22
- */
23
18
  taskID: TaskID;
24
- /**
25
- * The model's name
26
- */
27
19
  name: string;
20
+ tensorBackend: 'gpt' | 'tfjs';
28
21
  }
29
22
  /**
30
23
  * A model source uniquely identifies a model stored in memory.
24
+ * It can be in the form of either a model info object or a Path string
25
+ * (one-to-one mapping between the two)
31
26
  */
32
27
  export type ModelSource = ModelInfo | Path;
33
28
  /**
@@ -95,21 +90,21 @@ export declare abstract class Memory {
95
90
  /**
96
91
  * Computes the path in memory corresponding to the given model source, be it a path or model information.
97
92
  * This is used to easily switch between model path and information, which are both unique model identifiers
98
- * with a one-to-one correspondance. Returns undefined instead if no path could be inferred from the given
93
+ * with a one-to-one equivalence. Returns undefined instead if no path could be inferred from the given
99
94
  * model source.
100
95
  * @param source The model source
101
96
  * @returns The model path
102
97
  */
103
- abstract pathFor(source: ModelSource): Path | undefined;
98
+ abstract getModelMemoryPath(source: ModelSource): Path | undefined;
104
99
  /**
105
100
  * Computes the model information corresponding to the given model source, be it a path or model information.
106
101
  * This is used to easily switch between model path and information, which are both unique model identifiers
107
- * with a one-to-one correspondance. Returns undefined instead if no unique model information could be inferred
102
+ * with a one-to-one equivalence. Returns undefined instead if no unique model information could be inferred
108
103
  * from the given model source.
109
104
  * @param source The model source
110
105
  * @returns The model information
111
106
  */
112
- abstract infoFor(source: ModelSource): ModelInfo | undefined;
107
+ abstract getModelInfo(source: ModelSource): ModelInfo | undefined;
113
108
  /**
114
109
  * Computes the lowest version a model source can have without conflicting with model versions currently in memory.
115
110
  * @param source The model source
@@ -117,3 +112,4 @@ export declare abstract class Memory {
117
112
  */
118
113
  abstract duplicateSource(source: ModelSource): Promise<ModelSource | undefined>;
119
114
  }
115
+ export {};
@@ -14,7 +14,7 @@ export declare class Empty extends Memory {
14
14
  saveModel(): Promise<undefined>;
15
15
  deleteModel(): Promise<void>;
16
16
  downloadModel(): Promise<void>;
17
- pathFor(): Path;
18
- infoFor(): ModelInfo;
17
+ getModelMemoryPath(): Path;
18
+ getModelInfo(): ModelInfo;
19
19
  duplicateSource(): Promise<undefined>;
20
20
  }
@@ -31,10 +31,10 @@ export class Empty extends Memory {
31
31
  downloadModel() {
32
32
  return Promise.reject(new Error('empty'));
33
33
  }
34
- pathFor() {
34
+ getModelMemoryPath() {
35
35
  throw new Error('empty');
36
36
  }
37
- infoFor() {
37
+ getModelInfo() {
38
38
  throw new Error('empty');
39
39
  }
40
40
  duplicateSource() {
@@ -1,3 +1,2 @@
1
1
  export { Empty } from './empty.js';
2
2
  export { Memory, type ModelInfo, type Path, type ModelSource } from './base.js';
3
- export { ModelType } from './model_type.js';
@@ -1,3 +1,2 @@
1
1
  export { Empty } from './empty.js';
2
2
  export { Memory } from './base.js';
3
- export { ModelType } from './model_type.js';
@@ -1,9 +1,9 @@
1
- type ModelType = 'gpt2' | 'gpt2-medium' | 'gpt2-large' | 'gpt2-xl' | 'gpt-mini' | 'gpt-micro' | 'gpt-nano';
1
+ type GPTModelType = 'gpt2' | 'gpt2-medium' | 'gpt2-large' | 'gpt2-xl' | 'gpt-mini' | 'gpt-micro' | 'gpt-nano';
2
2
  export interface GPTConfig {
3
3
  lr: number;
4
4
  blockSize: number;
5
5
  vocabSize: number;
6
- modelType: ModelType;
6
+ modelType: GPTModelType;
7
7
  name?: string;
8
8
  evaluate?: boolean;
9
9
  maxEvalBatches?: number;
@@ -28,5 +28,5 @@ export type ModelSize = {
28
28
  nHead: number;
29
29
  nEmbd: number;
30
30
  };
31
- export declare function getModelSizes(modelType: ModelType): Required<ModelSize>;
31
+ export declare function getModelSizes(modelType: GPTModelType): Required<ModelSize>;
32
32
  export {};
@@ -1,15 +1,20 @@
1
1
  /**
2
2
  * this code is taken from gpt-tfjs with modifications from @peacefulotter and @lukemovement
3
3
  **/
4
+ import * as tf from '@tensorflow/tfjs';
4
5
  import { PreTrainedTokenizer } from '@xenova/transformers';
5
6
  import { WeightsContainer } from '../../index.js';
6
7
  import type { Dataset } from '../../dataset/index.js';
7
8
  import { Model } from '../model.js';
8
9
  import type { EpochLogs, Prediction, Sample } from '../model.js';
9
10
  import type { GPTConfig } from './config.js';
11
+ export type GPTSerialization = {
12
+ weights: WeightsContainer;
13
+ config?: GPTConfig;
14
+ };
10
15
  export declare class GPT extends Model {
11
16
  private readonly model;
12
- constructor(partialConfig?: GPTConfig);
17
+ constructor(partialConfig?: GPTConfig, layersModel?: tf.LayersModel);
13
18
  /**
14
19
  * The GPT train methods wraps the model.fitDataset call in a for loop to act as a generator (of logs)
15
20
  * This allows for getting logs and stopping training without callbacks.
@@ -27,9 +32,6 @@ export declare class GPT extends Model {
27
32
  set weights(ws: WeightsContainer);
28
33
  static deserialize(data: GPTSerialization): Model;
29
34
  serialize(): GPTSerialization;
35
+ extract(): tf.LayersModel;
30
36
  [Symbol.dispose](): void;
31
37
  }
32
- export type GPTSerialization = {
33
- weights: WeightsContainer;
34
- config?: GPTConfig;
35
- };
@@ -6,9 +6,9 @@ import { Model } from '../model.js';
6
6
  import { GPTForCausalLM } from './model.js';
7
7
  export class GPT extends Model {
8
8
  model;
9
- constructor(partialConfig) {
9
+ constructor(partialConfig, layersModel) {
10
10
  super();
11
- this.model = new GPTForCausalLM(partialConfig);
11
+ this.model = new GPTForCausalLM(partialConfig, layersModel);
12
12
  }
13
13
  /**
14
14
  * The GPT train methods wraps the model.fitDataset call in a for loop to act as a generator (of logs)
@@ -40,13 +40,14 @@ export class GPT extends Model {
40
40
  epoch,
41
41
  peakMemory,
42
42
  training: {
43
- loss: logs.loss
43
+ loss: logs.loss,
44
+ accuracy: logs.acc
44
45
  }
45
46
  };
46
47
  if (validationData !== undefined) {
47
48
  if (val_loss === undefined || isNaN(val_loss) ||
48
49
  val_acc === undefined || isNaN(val_acc)) {
49
- throw new Error("Invalid validation logs");
50
+ throw new Error("Validation accuracy or loss is undefined or nan");
50
51
  }
51
52
  structuredLogs.validation = { accuracy: logs.val_acc, loss: logs.val_loss };
52
53
  }
@@ -91,14 +92,16 @@ export class GPT extends Model {
91
92
  config: this.config
92
93
  };
93
94
  }
95
+ extract() {
96
+ return this.model;
97
+ }
94
98
  [Symbol.dispose]() {
95
- console.log("Disposing model");
96
99
  if (this.model.optimizer !== undefined) {
97
100
  this.model.optimizer.dispose();
98
101
  }
99
- // Some tensors are not cleaned up when model.dispose is called
100
- // So we dispose them manually
101
- this.model.disposeRefs();
102
- this.model.dispose();
102
+ const disposeResults = this.model.dispose();
103
+ if (disposeResults.refCountAfterDispose > 0) {
104
+ console.error("The GPT model was not disposed correctly (refcount > 0)", disposeResults);
105
+ }
103
106
  }
104
107
  }
@@ -8,6 +8,4 @@ import type { GPTConfig } from './config.js';
8
8
  * @param conf GPTConfig
9
9
  * @returns model, tf.LayersModel, which supports model(inputs), model.predict and model.apply
10
10
  */
11
- export declare function GPTArchitecture(config: Required<GPTConfig>, disposalRefs: tf.TensorContainer[], peakMemory: {
12
- value: number;
13
- }): tf.LayersModel;
11
+ export declare function GPTArchitecture(config: Required<GPTConfig>): tf.LayersModel;
@@ -40,7 +40,6 @@ class LogLayer extends tf.layers.Layer {
40
40
  tf.serialization.registerClass(LogLayer);
41
41
  class CausalSelfAttention extends tf.layers.Layer {
42
42
  config;
43
- peakMemory;
44
43
  static className = 'CausalSelfAttention';
45
44
  nHead;
46
45
  nEmbd;
@@ -51,10 +50,9 @@ class CausalSelfAttention extends tf.layers.Layer {
51
50
  cAttnBias;
52
51
  cProjKernel;
53
52
  cProjBias;
54
- constructor(config, disposalRefs, peakMemory) {
53
+ constructor(config) {
55
54
  super(config);
56
55
  this.config = config;
57
- this.peakMemory = peakMemory;
58
56
  this.nEmbd = config.nEmbd;
59
57
  this.nHead = config.nHead;
60
58
  this.dropout = config.dropout;
@@ -63,7 +61,6 @@ class CausalSelfAttention extends tf.layers.Layer {
63
61
  // calling bandPart zero out the upper triangular part of the all-ones matrix
64
62
  // from the doc: tf.linalg.band_part(input, -1, 0) ==> Lower triangular part
65
63
  this.mask = tf.linalg.bandPart(tf.ones([config.blockSize, config.blockSize]), -1, 0);
66
- disposalRefs.push(this.mask); // Push a reference to dispose this matrix later
67
64
  }
68
65
  build() {
69
66
  this.cAttnKernel = this.addWeight('c_attn/kernel', [this.nEmbd, 3 * this.nEmbd], 'float32', tf.initializers.glorotNormal({}));
@@ -134,10 +131,6 @@ class CausalSelfAttention extends tf.layers.Layer {
134
131
  y = tf.reshape(y, [B, T, C]);
135
132
  y = dense(y, this.cProjKernel, this.cProjBias);
136
133
  y = kwargs.training === true ? tf.dropout(y, this.dropout) : y;
137
- const memoryAllocated = tf.memory().numBytes / 1024 / 1024 / 1024; // GB
138
- if (memoryAllocated > this.peakMemory.value) {
139
- this.peakMemory.value = memoryAllocated;
140
- }
141
134
  return y;
142
135
  });
143
136
  }
@@ -167,25 +160,25 @@ tf.serialization.registerClass(GELU);
167
160
  function MLP(config) {
168
161
  return tf.sequential({ layers: [
169
162
  tf.layers.dense({
170
- name: 'mlp/c_fc',
163
+ name: config.name + `/mlp/c_fc`,
171
164
  units: 4 * config.nEmbd,
172
165
  inputDim: config.nEmbd,
173
166
  inputShape: [config.blockSize, config.nEmbd]
174
167
  }),
175
168
  new GELU(),
176
169
  tf.layers.dense({
177
- name: 'mlp/c_proj',
170
+ name: config.name + '/mlp/c_proj',
178
171
  units: config.nEmbd,
179
172
  inputDim: 4 * config.nEmbd,
180
173
  inputShape: [config.blockSize, 4 * config.nEmbd]
181
174
  }),
182
175
  tf.layers.dropout({
183
- name: 'mlp/drop',
176
+ name: config.name + '/mlp/drop',
184
177
  rate: config.residDrop
185
178
  }),
186
179
  ] });
187
180
  }
188
- function TransformerBlock(conf, disposalRefs, peakMemory) {
181
+ function TransformerBlock(conf) {
189
182
  const config = Object.assign({ name: 'h' }, conf);
190
183
  const inputs = tf.input({ shape: [config.blockSize, config.nEmbd] });
191
184
  let x1, x2;
@@ -196,7 +189,7 @@ function TransformerBlock(conf, disposalRefs, peakMemory) {
196
189
  x1 = new LogLayer({ name: config.name + '/ln_1_log' }).apply(x1);
197
190
  }
198
191
  // self attention layer
199
- x1 = new CausalSelfAttention(Object.assign({}, config, { name: config.name + '/attn' }), disposalRefs, peakMemory).apply(x1);
192
+ x1 = new CausalSelfAttention(Object.assign({}, config, { name: config.name + '/attn' })).apply(x1);
200
193
  // Residual connection
201
194
  x1 = tf.layers.add().apply([inputs, x1]);
202
195
  // normalization
@@ -204,7 +197,7 @@ function TransformerBlock(conf, disposalRefs, peakMemory) {
204
197
  .layerNormalization({ name: config.name + '/ln_2', epsilon: 1e-5 })
205
198
  .apply(x1);
206
199
  // MLP
207
- x2 = MLP(Object.assign({}, config, { name: config.name + '/mlp' })).apply(x2);
200
+ x2 = MLP(Object.assign({}, config, { name: config.name })).apply(x2);
208
201
  // add attention output to mlp output
209
202
  x2 = tf.layers.add().apply([x1, x2]);
210
203
  return tf.model({ name: config.name, inputs, outputs: x2 });
@@ -217,7 +210,7 @@ function TransformerBlock(conf, disposalRefs, peakMemory) {
217
210
  * @param conf GPTConfig
218
211
  * @returns model, tf.LayersModel, which supports model(inputs), model.predict and model.apply
219
212
  */
220
- export function GPTArchitecture(config, disposalRefs, peakMemory) {
213
+ export function GPTArchitecture(config) {
221
214
  const inputs = tf.input({ shape: [null] });
222
215
  //Token embedding
223
216
  const tokEmb = config.tokEmb
@@ -250,7 +243,7 @@ export function GPTArchitecture(config, disposalRefs, peakMemory) {
250
243
  }
251
244
  //Apply successively transformer blocks, attention and dense layers
252
245
  for (let i = 0; i < config.nLayer; i++) {
253
- x = TransformerBlock(Object.assign({}, config, { name: config.name + '/h/' + i }), disposalRefs, peakMemory).apply(x);
246
+ x = TransformerBlock(Object.assign({}, config, { name: config.name + '/h/' + i })).apply(x);
254
247
  }
255
248
  // Normalization
256
249
  x = tf.layers.layerNormalization({ name: config.name + '/ln_f', epsilon: 1e-5 })
@@ -16,12 +16,7 @@ export declare abstract class Dataset<T> {
16
16
  */
17
17
  declare class GPTModel extends tf.LayersModel {
18
18
  protected readonly config: Required<GPTConfig>;
19
- private readonly disposalRefs;
20
- protected peakMemory: {
21
- value: number;
22
- };
23
- constructor(partialConfig?: GPTConfig);
24
- disposeRefs(): void;
19
+ constructor(partialConfig?: GPTConfig, layersModel?: tf.LayersModel);
25
20
  get getGPTConfig(): Required<GPTConfig>;
26
21
  compile(): void;
27
22
  fitDataset<T>(dataset: Dataset<T>, trainingArgs: tf.ModelFitDatasetArgs<T>): Promise<tf.History>;
@@ -9,31 +9,20 @@ import { GPTArchitecture } from './layers.js';
9
9
  */
10
10
  class GPTModel extends tf.LayersModel {
11
11
  config;
12
- disposalRefs; // Array to store tensor to dispose manually
13
- // Object to pass down to layers to store max memory allocated
14
- // This is an object rather than a primitive to pass the reference
15
- peakMemory;
16
- constructor(partialConfig) {
12
+ constructor(partialConfig, layersModel) {
17
13
  // Fill missing config parameters with default values
18
14
  let completeConfig = { ...DEFAULT_CONFIG, ...partialConfig };
19
15
  // Add layer sizes depending on which model has been specified
20
16
  completeConfig = { ...completeConfig, ...getModelSizes(completeConfig.modelType) };
21
- // Init the tf.LayersModel and assign it to this
22
- const disposalRefs = [];
23
- const peakMemory = { value: 0 };
24
- const gpt = GPTArchitecture(completeConfig, disposalRefs, peakMemory);
25
- const { inputs, outputs, name } = gpt;
26
- super({ inputs, outputs, name });
27
- this.config = completeConfig;
28
- this.disposalRefs = disposalRefs;
29
- this.peakMemory = peakMemory;
30
- }
31
- // Some tensors are not cleaned up when model.dispose is called
32
- // So we dispose them manually
33
- disposeRefs() {
34
- for (const tensorContainer of this.disposalRefs) {
35
- tf.dispose([tensorContainer]);
17
+ if (layersModel !== undefined) {
18
+ super({ inputs: layersModel.inputs, outputs: layersModel.outputs, name: layersModel.name });
19
+ }
20
+ else {
21
+ const gpt = GPTArchitecture(completeConfig);
22
+ const { inputs, outputs, name } = gpt;
23
+ super({ inputs, outputs, name });
36
24
  }
25
+ this.config = completeConfig;
37
26
  }
38
27
  get getGPTConfig() {
39
28
  return this.config;
@@ -42,7 +31,6 @@ class GPTModel extends tf.LayersModel {
42
31
  this.optimizer = this.config.weightDecay !== 0
43
32
  ? getCustomAdam(this, this.config.lr, this.config.weightDecay)
44
33
  : tf.train.adam(this.config.lr);
45
- this.peakMemory.value = 0;
46
34
  }
47
35
  async fitDataset(dataset, trainingArgs) {
48
36
  const callbacks = trainingArgs.callbacks;
@@ -50,6 +38,7 @@ class GPTModel extends tf.LayersModel {
50
38
  await callbacks.onTrainBegin?.();
51
39
  for (let epoch = 1; epoch <= trainingArgs.epochs; epoch++) {
52
40
  let averageLoss = 0;
41
+ let peakMemory = 0;
53
42
  let iteration = 1;
54
43
  const iterator = await dataset.iterator();
55
44
  let preprocessingTime = performance.now();
@@ -69,22 +58,15 @@ class GPTModel extends tf.LayersModel {
69
58
  }
70
59
  return tf.losses.softmaxCrossEntropy(ys, logits);
71
60
  };
72
- let backwardPassMemory = 0;
73
61
  const lossTensor = tf.tidy(() => {
74
62
  const { grads, value: lossTensor } = this.optimizer.computeGradients(lossFn);
75
63
  const gradsClipped = clipByGlobalNormObj(grads, 1);
76
64
  this.optimizer.applyGradients(gradsClipped);
77
- backwardPassMemory = tf.memory().numBytes / 1024 / 1024 / 1024;
78
65
  return lossTensor;
79
66
  });
80
67
  const loss = await lossTensor.array();
81
68
  averageLoss += loss;
82
69
  weightUpdateTime = performance.now() - weightUpdateTime;
83
- // Probably never the case. Empirically the attention mechanism always allocates
84
- // more memory than the backward pass
85
- if (backwardPassMemory > this.peakMemory.value) {
86
- this.peakMemory.value = backwardPassMemory;
87
- }
88
70
  tf.dispose([xs, ys, lossTensor]);
89
71
  if (evalDataset !== undefined &&
90
72
  this.config.evaluateEvery !== undefined &&
@@ -92,22 +74,25 @@ class GPTModel extends tf.LayersModel {
92
74
  const iterationLogs = await evaluate(this, evalDataset, this.config.maxEvalBatches);
93
75
  console.log(iterationLogs);
94
76
  }
95
- console.log(`Epoch: ${epoch}`, `\tStep: ${iteration} / ${this.config.maxIter}`, `\tLoss: ${loss.toFixed(3)}`, `\tPeak memory: ${this.peakMemory.value.toFixed(2)} GB`, `\tNumber of tensors allocated: ${tf.memory().numTensors}`, `\tPreprocessing time: ${preprocessingTime.toFixed(0)} ms`, `\tWeight update time: ${weightUpdateTime.toFixed(0)} ms`);
77
+ const memory = tf.memory().numBytes / 1024 / 1024 / 1024;
78
+ if (memory > peakMemory) {
79
+ peakMemory = memory;
80
+ }
81
+ console.log(`Epoch: ${epoch}`, `\tStep: ${iteration} / ${this.config.maxIter}`, `\tLoss: ${loss.toFixed(3)}`, `\tMemory: ${memory.toFixed(2)} GB`, `\tNumber of tensors allocated: ${tf.memory().numTensors}`, `\tPreprocessing time: ${preprocessingTime.toFixed(0)} ms`, `\tWeight update time: ${weightUpdateTime.toFixed(0)} ms`);
96
82
  iteration++;
97
83
  next = await iterator.next();
98
84
  }
99
85
  // Memory leak: If we reached the last iteration rather than the end of the dataset, cleanup the tensors
100
- if (next.done != true && iteration > this.config.maxIter) {
86
+ if (next.done !== true && iteration > this.config.maxIter) {
101
87
  const { xs, ys } = next.value;
102
88
  tf.dispose([xs, ys]);
103
89
  }
104
90
  let logs = {
105
91
  'loss': averageLoss / iteration,
106
- 'peakMemory': this.peakMemory.value
92
+ 'peakMemory': peakMemory
107
93
  };
108
94
  if (evalDataset !== undefined) {
109
95
  logs = { ...logs, ...await evaluate(this, evalDataset, this.config.maxEvalBatches) };
110
- console.log(logs);
111
96
  }
112
97
  await callbacks.onEpochEnd?.(epoch, logs);
113
98
  }
@@ -1,4 +1,4 @@
1
- import { AutoTokenizer } from '@xenova/transformers';
1
+ import { AutoTokenizer, env } from '@xenova/transformers';
2
2
  /**
3
3
  * A task's tokenizer is initially specified as the tokenizer name, e.g., 'Xenova/gpt2'.
4
4
  * The first time the tokenizer is needed, this function initializes the actual tokenizer object
@@ -15,6 +15,10 @@ export async function getTaskTokenizer(task) {
15
15
  if (tokenizer === undefined)
16
16
  throw Error('No tokenizer specified in the task training information');
17
17
  if (typeof tokenizer == 'string') {
18
+ // Needs to be false in order to prevent transformers.js from reading the local cache
19
+ // and triggering an error when running in the browser
20
+ // Reference: https://medium.com/@GenerationAI/transformers-js-onnx-runtime-webgpu-46c3e58d547c
21
+ env.allowLocalModels = false;
18
22
  tokenizer = await AutoTokenizer.from_pretrained(tokenizer);
19
23
  task.trainingInformation.tokenizer = tokenizer;
20
24
  }
@@ -23,5 +23,6 @@ export interface TrainingInformation {
23
23
  aggregator?: AggregatorChoice;
24
24
  tokenizer?: string | PreTrainedTokenizer;
25
25
  maxSequenceLength?: number;
26
+ tensorBackend: 'tfjs' | 'gpt';
26
27
  }
27
28
  export declare function isTrainingInformation(raw: unknown): raw is TrainingInformation;
@@ -10,7 +10,7 @@ export function isTrainingInformation(raw) {
10
10
  if (typeof raw !== 'object' || raw === null) {
11
11
  return false;
12
12
  }
13
- const { IMAGE_H, IMAGE_W, LABEL_LIST, aggregator, batchSize, clippingRadius, dataType, decentralizedSecure, epochs, inputColumns, maxShareValue, minimumReadyPeers, modelID, noiseScale, outputColumns, preprocessingFunctions, roundDuration, scheme, validationSplit, tokenizer, maxSequenceLength, } = raw;
13
+ const { IMAGE_H, IMAGE_W, LABEL_LIST, aggregator, batchSize, clippingRadius, dataType, decentralizedSecure, epochs, inputColumns, maxShareValue, minimumReadyPeers, modelID, noiseScale, outputColumns, preprocessingFunctions, roundDuration, scheme, validationSplit, tokenizer, maxSequenceLength, tensorBackend } = raw;
14
14
  if (typeof dataType !== 'string' ||
15
15
  typeof modelID !== 'string' ||
16
16
  typeof epochs !== 'number' ||
@@ -53,6 +53,11 @@ export function isTrainingInformation(raw) {
53
53
  return false;
54
54
  }
55
55
  }
56
+ switch (tensorBackend) {
57
+ case 'tfjs': break;
58
+ case 'gpt': break;
59
+ default: return false;
60
+ }
56
61
  switch (scheme) {
57
62
  case 'decentralized': break;
58
63
  case 'federated': break;
@@ -80,7 +85,8 @@ export function isTrainingInformation(raw) {
80
85
  scheme,
81
86
  validationSplit,
82
87
  tokenizer,
83
- maxSequenceLength
88
+ maxSequenceLength,
89
+ tensorBackend
84
90
  };
85
91
  const _correct = repack;
86
92
  const _total = repack;
@@ -31,6 +31,11 @@ export class DistributedTrainer extends Trainer {
31
31
  // after it has completed a round of training.
32
32
  this.model.weights = this.aggregator.model.weights;
33
33
  }
34
- await this.memory.updateWorkingModel({ taskID: this.task.id, name: this.task.trainingInformation.modelID }, this.model);
34
+ await this.memory.updateWorkingModel({
35
+ type: 'working',
36
+ taskID: this.task.id,
37
+ name: this.task.trainingInformation.modelID,
38
+ tensorBackend: this.task.trainingInformation.tensorBackend
39
+ }, this.model);
35
40
  }
36
41
  }
@@ -14,6 +14,11 @@ export class LocalTrainer extends Trainer {
14
14
  return await Promise.resolve();
15
15
  }
16
16
  async onRoundEnd() {
17
- await this.memory.updateWorkingModel({ taskID: this.task.id, name: this.task.trainingInformation.modelID }, this.model);
17
+ await this.memory.updateWorkingModel({
18
+ type: 'working',
19
+ taskID: this.task.id,
20
+ name: this.task.trainingInformation.modelID,
21
+ tensorBackend: this.task.trainingInformation.tensorBackend
22
+ }, this.model);
18
23
  }
19
24
  }
@@ -1,4 +1,3 @@
1
- import { ModelType } from '../../index.js';
2
1
  import { DistributedTrainer } from './distributed_trainer.js';
3
2
  import { LocalTrainer } from './local_trainer.js';
4
3
  /**
@@ -36,7 +35,12 @@ export class TrainerBuilder {
36
35
  if (modelID === undefined) {
37
36
  throw new TypeError('model ID is undefined');
38
37
  }
39
- const info = { type: ModelType.WORKING, taskID: this.task.id, name: modelID };
38
+ const info = {
39
+ type: 'working',
40
+ taskID: this.task.id,
41
+ name: modelID,
42
+ tensorBackend: 'gpt'
43
+ };
40
44
  const model = await (await this.memory.contains(info) ? this.memory.getModel(info) : client.getLatestModel());
41
45
  return model;
42
46
  }
@@ -1,4 +1,3 @@
1
- import { List } from 'immutable';
2
1
  import type { data, Model, Task, Logger, client as clients, Memory, ModelSource, Features } from '../index.js';
3
2
  export declare class Validator {
4
3
  readonly task: Task;
@@ -6,22 +5,21 @@ export declare class Validator {
6
5
  private readonly memory;
7
6
  private readonly source?;
8
7
  private readonly client?;
9
- private readonly graphInformant;
10
8
  private size;
11
9
  private _confusionMatrix;
10
+ private rollingAccuracy;
12
11
  constructor(task: Task, logger: Logger, memory: Memory, source?: ModelSource | undefined, client?: clients.Client | undefined);
13
12
  private getLabel;
14
- assess(data: data.Data, useConfusionMatrix?: boolean): Promise<Array<{
13
+ test(data: data.Data): AsyncGenerator<Array<{
15
14
  groundTruth: number;
16
15
  pred: number;
17
16
  features: Features;
18
- }>>;
19
- predict(data: data.Data): Promise<Array<{
17
+ }>, void>;
18
+ inference(data: data.Data): AsyncGenerator<Array<{
20
19
  features: Features;
21
20
  pred: number;
22
- }>>;
21
+ }>, void>;
23
22
  getModel(): Promise<Model>;
24
- get accuracyData(): List<number>;
25
23
  get accuracy(): number;
26
24
  get visitedSamples(): number;
27
25
  get confusionMatrix(): number[][] | undefined;
@@ -1,15 +1,14 @@
1
1
  import { List } from 'immutable';
2
2
  import * as tf from '@tensorflow/tfjs';
3
- import { GraphInformant } from '../index.js';
4
3
  export class Validator {
5
4
  task;
6
5
  logger;
7
6
  memory;
8
7
  source;
9
8
  client;
10
- graphInformant = new GraphInformant();
11
9
  size = 0;
12
10
  _confusionMatrix;
11
+ rollingAccuracy = 0;
13
12
  constructor(task, logger, memory, source, client) {
14
13
  this.task = task;
15
14
  this.logger = logger;
@@ -23,91 +22,81 @@ export class Validator {
23
22
  async getLabel(ys) {
24
23
  // Binary classification
25
24
  if (ys.shape[1] == 1) {
26
- return await ys.greaterEqual(tf.scalar(0.5)).data();
25
+ const threshold = tf.scalar(0.5);
26
+ const binaryTensor = ys.greaterEqual(threshold);
27
+ const binaryArray = await binaryTensor.data();
28
+ tf.dispose([binaryTensor, threshold]);
29
+ return binaryArray;
27
30
  // Multi-class classification
28
31
  }
29
32
  else {
30
- return await ys.argMax(-1).data();
33
+ const yIdxTensor = ys.argMax(-1);
34
+ const yIdx = await yIdxTensor.data();
35
+ tf.dispose([yIdxTensor]);
36
+ return yIdx;
31
37
  }
32
38
  // Multi-label classification is not supported
33
39
  }
34
- async assess(data, useConfusionMatrix = false) {
40
+ // test assumes data comes with labels while predict doesn't
41
+ async *test(data) {
35
42
  const batchSize = this.task.trainingInformation?.batchSize;
36
43
  if (batchSize === undefined) {
37
44
  throw new TypeError('Batch size is undefined');
38
45
  }
39
46
  const model = await this.getModel();
40
- let features = [];
41
- const groundTruth = [];
42
47
  let hits = 0;
43
- // Get model predictions per batch and flatten the result
44
- // Also build the features and ground truth arrays
45
- const predictions = (await data.preprocess().dataset.batch(batchSize)
46
- .mapAsync(async (e) => {
47
- if (typeof e === 'object' && 'xs' in e && 'ys' in e) {
48
- const xs = e.xs;
49
- const ys = await this.getLabel(e.ys);
50
- const pred = await this.getLabel(await model.predict(xs));
51
- const currentFeatures = await xs.array();
52
- if (Array.isArray(currentFeatures)) {
53
- features = features.concat(currentFeatures);
54
- }
55
- else {
56
- throw new TypeError('Data format is incorrect');
57
- }
58
- groundTruth.push(...Array.from(ys));
59
- this.size += xs.shape[0];
60
- hits += List(pred).zip(List(ys)).filter(([p, y]) => p === y).size;
61
- // TODO: Confusion Matrix stats
62
- const currentAccuracy = hits / this.size;
63
- this.graphInformant.updateAccuracy(currentAccuracy);
64
- return Array.from(pred);
65
- }
66
- else {
67
- throw new Error('Input data is missing a feature or the label');
68
- }
69
- }).toArray()).flat();
48
+ const iterator = await data.preprocess().dataset.batch(batchSize).iterator();
49
+ let next = await iterator.next();
50
+ while (next.done !== true) {
51
+ const { xs, ys } = next.value;
52
+ const ysLabel = await this.getLabel(ys);
53
+ const yPredTensor = await model.predict(xs);
54
+ const pred = await this.getLabel(yPredTensor);
55
+ const currentFeatures = await xs.array();
56
+ this.size += ysLabel.length;
57
+ hits += List(pred).zip(List(ysLabel)).filter(([p, y]) => p === y).size;
58
+ this.rollingAccuracy = hits / this.size;
59
+ tf.dispose([xs, ys, yPredTensor]);
60
+ yield List(ysLabel).zip(List(pred), List(currentFeatures))
61
+ .map(([gt, p, f]) => ({ groundTruth: gt, pred: p, features: f }))
62
+ .toArray();
63
+ next = await iterator.next();
64
+ }
70
65
  this.logger.success(`Obtained validation accuracy of ${this.accuracy}`);
71
66
  this.logger.success(`Visited ${this.visitedSamples} samples`);
72
- if (useConfusionMatrix) {
73
- try {
74
- this._confusionMatrix = tf.math.confusionMatrix([], [], 0).arraySync();
75
- }
76
- catch (e) {
77
- console.error(e instanceof Error ? e.message : e);
78
- throw new Error('Failed to compute the confusion matrix');
79
- }
80
- }
81
- return List(groundTruth)
82
- .zip(List(predictions), List(features))
83
- .map(([gt, p, f]) => ({ groundTruth: gt, pred: p, features: f }))
84
- .toArray();
85
67
  }
86
- async predict(data) {
68
+ async *inference(data) {
87
69
  const batchSize = this.task.trainingInformation?.batchSize;
88
70
  if (batchSize === undefined) {
89
71
  throw new TypeError('Batch size is undefined');
90
72
  }
91
73
  const model = await this.getModel();
92
- let features = [];
93
- // Get model prediction per batch and flatten the result
94
- // Also incrementally build the features array
95
- const predictions = (await data.preprocess().dataset.batch(batchSize)
96
- .mapAsync(async (e) => {
97
- const xs = e;
98
- const currentFeatures = await xs.array();
99
- if (Array.isArray(currentFeatures)) {
100
- features = features.concat(currentFeatures);
74
+ const iterator = await data.preprocess().dataset.batch(batchSize).iterator();
75
+ let next = await iterator.next();
76
+ while (next.done !== true) {
77
+ let xs;
78
+ if (next.value instanceof tf.Tensor) {
79
+ xs = next.value;
101
80
  }
102
81
  else {
82
+ const tensors = next.value;
83
+ xs = tensors['xs'];
84
+ tf.dispose([tensors['ys']]);
85
+ }
86
+ const currentFeatures = await xs.array();
87
+ const yPredTensor = await model.predict(xs);
88
+ const pred = await this.getLabel(yPredTensor);
89
+ this.size += pred.length;
90
+ if (!Array.isArray(currentFeatures)) {
103
91
  throw new TypeError('Data format is incorrect');
104
92
  }
105
- const pred = await this.getLabel(await model.predict(xs));
106
- return Array.from(pred);
107
- }).toArray()).flat();
108
- return List(features).zip(List(predictions))
109
- .map(([f, p]) => ({ features: f, pred: p }))
110
- .toArray();
93
+ tf.dispose([xs, yPredTensor]);
94
+ yield List(currentFeatures).zip(List(pred))
95
+ .map(([f, p]) => ({ features: f, pred: p }))
96
+ .toArray();
97
+ next = await iterator.next();
98
+ }
99
+ this.logger.success(`Visited ${this.visitedSamples} samples`);
111
100
  }
112
101
  async getModel() {
113
102
  if (this.source !== undefined && await this.memory.contains(this.source)) {
@@ -118,11 +107,8 @@ export class Validator {
118
107
  }
119
108
  throw new Error('Could not load the model');
120
109
  }
121
- get accuracyData() {
122
- return this.graphInformant.data();
123
- }
124
110
  get accuracy() {
125
- return this.graphInformant.accuracy();
111
+ return this.rollingAccuracy;
126
112
  }
127
113
  get visitedSamples() {
128
114
  return this.size;
package/package.json CHANGED
@@ -1,6 +1,6 @@
1
1
  {
2
2
  "name": "@epfml/discojs",
3
- "version": "2.1.2-p20240531085945.0",
3
+ "version": "2.1.2-p20240617070831.0",
4
4
  "type": "module",
5
5
  "main": "dist/index.js",
6
6
  "types": "dist/index.d.ts",
@@ -1,10 +0,0 @@
1
- import { type List } from 'immutable';
2
- export declare class GraphInformant {
3
- static readonly NB_EPOCHS_ON_GRAPH = 10;
4
- private currentAccuracy;
5
- private accuracyDataSeries;
6
- constructor();
7
- updateAccuracy(accuracy: number): void;
8
- data(): List<number>;
9
- accuracy(): number;
10
- }
@@ -1,20 +0,0 @@
1
- import { Repeat } from 'immutable';
2
- export class GraphInformant {
3
- static NB_EPOCHS_ON_GRAPH = 10;
4
- currentAccuracy;
5
- accuracyDataSeries;
6
- constructor() {
7
- this.currentAccuracy = 0;
8
- this.accuracyDataSeries = Repeat(0, GraphInformant.NB_EPOCHS_ON_GRAPH).toList();
9
- }
10
- updateAccuracy(accuracy) {
11
- this.accuracyDataSeries = this.accuracyDataSeries.shift().push(accuracy);
12
- this.currentAccuracy = accuracy;
13
- }
14
- data() {
15
- return this.accuracyDataSeries;
16
- }
17
- accuracy() {
18
- return this.currentAccuracy;
19
- }
20
- }
@@ -1 +0,0 @@
1
- export { GraphInformant } from './graph_informant.js';
@@ -1 +0,0 @@
1
- export { GraphInformant } from './graph_informant.js';
@@ -1,9 +0,0 @@
1
- /**
2
- * Type of models stored in memory. Stored models can either be a model currently
3
- * being trained ("working model") or a regular model saved in memory ("saved model").
4
- * There can only be a single working model for a given task.
5
- */
6
- export declare enum ModelType {
7
- WORKING = "working",
8
- SAVED = "saved"
9
- }
@@ -1,10 +0,0 @@
1
- /**
2
- * Type of models stored in memory. Stored models can either be a model currently
3
- * being trained ("working model") or a regular model saved in memory ("saved model").
4
- * There can only be a single working model for a given task.
5
- */
6
- export var ModelType;
7
- (function (ModelType) {
8
- ModelType["WORKING"] = "working";
9
- ModelType["SAVED"] = "saved";
10
- })(ModelType || (ModelType = {}));