@epfml/discojs 2.1.2-p20240531085945.0 → 2.1.2-p20240617070831.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/dist/dataset/data/preprocessing/text_preprocessing.js +2 -2
- package/dist/dataset/data_loader/image_loader.js +1 -1
- package/dist/default_tasks/cifar10.js +2 -1
- package/dist/default_tasks/lus_covid.js +2 -1
- package/dist/default_tasks/mnist.js +2 -1
- package/dist/default_tasks/simple_face.js +2 -1
- package/dist/default_tasks/skin_condition.js +2 -1
- package/dist/default_tasks/titanic.js +2 -1
- package/dist/default_tasks/wikitext.js +8 -9
- package/dist/index.d.ts +1 -2
- package/dist/index.js +1 -2
- package/dist/memory/base.d.ts +15 -19
- package/dist/memory/empty.d.ts +2 -2
- package/dist/memory/empty.js +2 -2
- package/dist/memory/index.d.ts +0 -1
- package/dist/memory/index.js +0 -1
- package/dist/models/gpt/config.d.ts +3 -3
- package/dist/models/gpt/index.d.ts +7 -5
- package/dist/models/gpt/index.js +12 -9
- package/dist/models/gpt/layers.d.ts +1 -3
- package/dist/models/gpt/layers.js +9 -16
- package/dist/models/gpt/model.d.ts +1 -6
- package/dist/models/gpt/model.js +17 -32
- package/dist/models/tokenizer.js +5 -1
- package/dist/task/training_information.d.ts +1 -0
- package/dist/task/training_information.js +8 -2
- package/dist/training/trainer/distributed_trainer.js +6 -1
- package/dist/training/trainer/local_trainer.js +6 -1
- package/dist/training/trainer/trainer_builder.js +6 -2
- package/dist/validation/validator.d.ts +5 -7
- package/dist/validation/validator.js +53 -67
- package/package.json +1 -1
- package/dist/informant/graph_informant.d.ts +0 -10
- package/dist/informant/graph_informant.js +0 -20
- package/dist/informant/index.d.ts +0 -1
- package/dist/informant/index.js +0 -1
- package/dist/memory/model_type.d.ts +0 -9
- package/dist/memory/model_type.js +0 -10
|
@@ -24,7 +24,7 @@ export var TextPreprocessing;
|
|
|
24
24
|
const leftPadding = {
|
|
25
25
|
type: TextPreprocessing.LeftPadding,
|
|
26
26
|
apply: async (x, task) => {
|
|
27
|
-
if (x === undefined || !Array.isArray(x) || x.length == 0 || typeof (x[0]
|
|
27
|
+
if (x === undefined || !Array.isArray(x) || x.length == 0 || typeof (x[0] !== 'number')) {
|
|
28
28
|
new Error("The leftPadding preprocessing expects a non empty 1D array of number");
|
|
29
29
|
}
|
|
30
30
|
const { tokens } = await x;
|
|
@@ -58,7 +58,7 @@ const leftPadding = {
|
|
|
58
58
|
const tokenize = {
|
|
59
59
|
type: TextPreprocessing.Tokenize,
|
|
60
60
|
apply: async (x, task) => {
|
|
61
|
-
if (typeof x
|
|
61
|
+
if (typeof x !== 'string') {
|
|
62
62
|
new Error("The tokenize preprocessing expects a string as input");
|
|
63
63
|
}
|
|
64
64
|
const xs = await x; // tf.TextLineDataset yields strings
|
|
@@ -58,7 +58,7 @@ export class ImageLoader extends DataLoader {
|
|
|
58
58
|
const numberOfClasses = labelList.length;
|
|
59
59
|
// Map label strings to integer
|
|
60
60
|
const label_to_int = new Map(labelList.map((label_name, idx) => [label_name, idx]));
|
|
61
|
-
if (label_to_int.size
|
|
61
|
+
if (label_to_int.size !== numberOfClasses) {
|
|
62
62
|
throw new Error("Input labels aren't matching the task LABEL_LIST");
|
|
63
63
|
}
|
|
64
64
|
labels = config.labels.map(label_name => {
|
|
@@ -28,7 +28,8 @@ export const simpleFace = {
|
|
|
28
28
|
LABEL_LIST: ['child', 'adult'],
|
|
29
29
|
scheme: 'federated', // secure aggregation not yet implemented for federated
|
|
30
30
|
noiseScale: undefined,
|
|
31
|
-
clippingRadius: undefined
|
|
31
|
+
clippingRadius: undefined,
|
|
32
|
+
tensorBackend: 'tfjs'
|
|
32
33
|
}
|
|
33
34
|
};
|
|
34
35
|
},
|
|
@@ -17,17 +17,16 @@ export const wikitext = {
|
|
|
17
17
|
dataType: 'text',
|
|
18
18
|
modelID: 'wikitext-103-raw-model',
|
|
19
19
|
preprocessingFunctions: [data.TextPreprocessing.Tokenize, data.TextPreprocessing.LeftPadding],
|
|
20
|
-
validationSplit: 0.2, // TODO: is this used somewhere? because train, eval and test are already split in dataset
|
|
21
|
-
epochs: 5,
|
|
22
20
|
scheme: 'federated',
|
|
23
|
-
|
|
24
|
-
|
|
25
|
-
|
|
26
|
-
|
|
27
|
-
roundDuration:
|
|
28
|
-
batchSize: 16
|
|
21
|
+
epochs: 5,
|
|
22
|
+
// Unused by wikitext because data already comes split
|
|
23
|
+
// But if set to 0 then the webapp doesn't display the validation metrics
|
|
24
|
+
validationSplit: 0.1,
|
|
25
|
+
roundDuration: 2,
|
|
26
|
+
batchSize: 1, // If set too high (e.g. 16) then firefox raises a WebGL error
|
|
29
27
|
tokenizer: 'Xenova/gpt2',
|
|
30
|
-
maxSequenceLength: 128
|
|
28
|
+
maxSequenceLength: 128,
|
|
29
|
+
tensorBackend: 'gpt'
|
|
31
30
|
}
|
|
32
31
|
};
|
|
33
32
|
},
|
package/dist/index.d.ts
CHANGED
|
@@ -2,13 +2,12 @@ export * as data from './dataset/index.js';
|
|
|
2
2
|
export * as serialization from './serialization/index.js';
|
|
3
3
|
export * as training from './training/index.js';
|
|
4
4
|
export * as privacy from './privacy.js';
|
|
5
|
-
export { GraphInformant } from './informant/index.js';
|
|
6
5
|
export * as client from './client/index.js';
|
|
7
6
|
export * as aggregator from './aggregator/index.js';
|
|
8
7
|
export { WeightsContainer, aggregation } from './weights/index.js';
|
|
9
8
|
export { AsyncInformant } from './async_informant.js';
|
|
10
9
|
export { Logger, ConsoleLogger } from './logging/index.js';
|
|
11
|
-
export { Memory,
|
|
10
|
+
export { Memory, type ModelInfo, type Path, type ModelSource, Empty as EmptyMemory } from './memory/index.js';
|
|
12
11
|
export { Disco, RoundLogs } from './training/index.js';
|
|
13
12
|
export { Validator } from './validation/index.js';
|
|
14
13
|
export { Model, EpochLogs } from './models/index.js';
|
package/dist/index.js
CHANGED
|
@@ -2,13 +2,12 @@ export * as data from './dataset/index.js';
|
|
|
2
2
|
export * as serialization from './serialization/index.js';
|
|
3
3
|
export * as training from './training/index.js';
|
|
4
4
|
export * as privacy from './privacy.js';
|
|
5
|
-
export { GraphInformant } from './informant/index.js';
|
|
6
5
|
export * as client from './client/index.js';
|
|
7
6
|
export * as aggregator from './aggregator/index.js';
|
|
8
7
|
export { WeightsContainer, aggregation } from './weights/index.js';
|
|
9
8
|
export { AsyncInformant } from './async_informant.js';
|
|
10
9
|
export { ConsoleLogger } from './logging/index.js';
|
|
11
|
-
export { Memory,
|
|
10
|
+
export { Memory, Empty as EmptyMemory } from './memory/index.js';
|
|
12
11
|
export { Disco } from './training/index.js';
|
|
13
12
|
export { Validator } from './validation/index.js';
|
|
14
13
|
export { Model } from './models/index.js';
|
package/dist/memory/base.d.ts
CHANGED
|
@@ -1,33 +1,28 @@
|
|
|
1
1
|
import type { Model, TaskID } from '../index.js';
|
|
2
|
-
import type { ModelType } from './model_type.js';
|
|
3
2
|
/**
|
|
4
3
|
* Model path which uniquely identifies a model in memory.
|
|
5
4
|
*/
|
|
6
5
|
export type Path = string;
|
|
6
|
+
/**
|
|
7
|
+
* Type of models stored in memory. Stored models can either be a model currently
|
|
8
|
+
* being trained ("working model") or a regular model saved in memory ("saved model").
|
|
9
|
+
* There can only be a single working model for a given task.
|
|
10
|
+
*/
|
|
11
|
+
type StoredModelType = 'saved' | 'working';
|
|
7
12
|
/**
|
|
8
13
|
* Model information which uniquely identifies a model in memory.
|
|
9
14
|
*/
|
|
10
15
|
export interface ModelInfo {
|
|
11
|
-
|
|
12
|
-
* The model's type: "working" or "saved" model.
|
|
13
|
-
*/
|
|
14
|
-
type?: ModelType;
|
|
15
|
-
/**
|
|
16
|
-
* The model's version, to allow for multiple saved models of a same task without
|
|
17
|
-
* causing id conflicts
|
|
18
|
-
*/
|
|
16
|
+
type: StoredModelType;
|
|
19
17
|
version?: number;
|
|
20
|
-
/**
|
|
21
|
-
* The model's corresponding task
|
|
22
|
-
*/
|
|
23
18
|
taskID: TaskID;
|
|
24
|
-
/**
|
|
25
|
-
* The model's name
|
|
26
|
-
*/
|
|
27
19
|
name: string;
|
|
20
|
+
tensorBackend: 'gpt' | 'tfjs';
|
|
28
21
|
}
|
|
29
22
|
/**
|
|
30
23
|
* A model source uniquely identifies a model stored in memory.
|
|
24
|
+
* It can be in the form of either a model info object or a Path string
|
|
25
|
+
* (one-to-one mapping between the two)
|
|
31
26
|
*/
|
|
32
27
|
export type ModelSource = ModelInfo | Path;
|
|
33
28
|
/**
|
|
@@ -95,21 +90,21 @@ export declare abstract class Memory {
|
|
|
95
90
|
/**
|
|
96
91
|
* Computes the path in memory corresponding to the given model source, be it a path or model information.
|
|
97
92
|
* This is used to easily switch between model path and information, which are both unique model identifiers
|
|
98
|
-
* with a one-to-one
|
|
93
|
+
* with a one-to-one equivalence. Returns undefined instead if no path could be inferred from the given
|
|
99
94
|
* model source.
|
|
100
95
|
* @param source The model source
|
|
101
96
|
* @returns The model path
|
|
102
97
|
*/
|
|
103
|
-
abstract
|
|
98
|
+
abstract getModelMemoryPath(source: ModelSource): Path | undefined;
|
|
104
99
|
/**
|
|
105
100
|
* Computes the model information corresponding to the given model source, be it a path or model information.
|
|
106
101
|
* This is used to easily switch between model path and information, which are both unique model identifiers
|
|
107
|
-
* with a one-to-one
|
|
102
|
+
* with a one-to-one equivalence. Returns undefined instead if no unique model information could be inferred
|
|
108
103
|
* from the given model source.
|
|
109
104
|
* @param source The model source
|
|
110
105
|
* @returns The model information
|
|
111
106
|
*/
|
|
112
|
-
abstract
|
|
107
|
+
abstract getModelInfo(source: ModelSource): ModelInfo | undefined;
|
|
113
108
|
/**
|
|
114
109
|
* Computes the lowest version a model source can have without conflicting with model versions currently in memory.
|
|
115
110
|
* @param source The model source
|
|
@@ -117,3 +112,4 @@ export declare abstract class Memory {
|
|
|
117
112
|
*/
|
|
118
113
|
abstract duplicateSource(source: ModelSource): Promise<ModelSource | undefined>;
|
|
119
114
|
}
|
|
115
|
+
export {};
|
package/dist/memory/empty.d.ts
CHANGED
|
@@ -14,7 +14,7 @@ export declare class Empty extends Memory {
|
|
|
14
14
|
saveModel(): Promise<undefined>;
|
|
15
15
|
deleteModel(): Promise<void>;
|
|
16
16
|
downloadModel(): Promise<void>;
|
|
17
|
-
|
|
18
|
-
|
|
17
|
+
getModelMemoryPath(): Path;
|
|
18
|
+
getModelInfo(): ModelInfo;
|
|
19
19
|
duplicateSource(): Promise<undefined>;
|
|
20
20
|
}
|
package/dist/memory/empty.js
CHANGED
|
@@ -31,10 +31,10 @@ export class Empty extends Memory {
|
|
|
31
31
|
downloadModel() {
|
|
32
32
|
return Promise.reject(new Error('empty'));
|
|
33
33
|
}
|
|
34
|
-
|
|
34
|
+
getModelMemoryPath() {
|
|
35
35
|
throw new Error('empty');
|
|
36
36
|
}
|
|
37
|
-
|
|
37
|
+
getModelInfo() {
|
|
38
38
|
throw new Error('empty');
|
|
39
39
|
}
|
|
40
40
|
duplicateSource() {
|
package/dist/memory/index.d.ts
CHANGED
package/dist/memory/index.js
CHANGED
|
@@ -1,9 +1,9 @@
|
|
|
1
|
-
type
|
|
1
|
+
type GPTModelType = 'gpt2' | 'gpt2-medium' | 'gpt2-large' | 'gpt2-xl' | 'gpt-mini' | 'gpt-micro' | 'gpt-nano';
|
|
2
2
|
export interface GPTConfig {
|
|
3
3
|
lr: number;
|
|
4
4
|
blockSize: number;
|
|
5
5
|
vocabSize: number;
|
|
6
|
-
modelType:
|
|
6
|
+
modelType: GPTModelType;
|
|
7
7
|
name?: string;
|
|
8
8
|
evaluate?: boolean;
|
|
9
9
|
maxEvalBatches?: number;
|
|
@@ -28,5 +28,5 @@ export type ModelSize = {
|
|
|
28
28
|
nHead: number;
|
|
29
29
|
nEmbd: number;
|
|
30
30
|
};
|
|
31
|
-
export declare function getModelSizes(modelType:
|
|
31
|
+
export declare function getModelSizes(modelType: GPTModelType): Required<ModelSize>;
|
|
32
32
|
export {};
|
|
@@ -1,15 +1,20 @@
|
|
|
1
1
|
/**
|
|
2
2
|
* this code is taken from gpt-tfjs with modifications from @peacefulotter and @lukemovement
|
|
3
3
|
**/
|
|
4
|
+
import * as tf from '@tensorflow/tfjs';
|
|
4
5
|
import { PreTrainedTokenizer } from '@xenova/transformers';
|
|
5
6
|
import { WeightsContainer } from '../../index.js';
|
|
6
7
|
import type { Dataset } from '../../dataset/index.js';
|
|
7
8
|
import { Model } from '../model.js';
|
|
8
9
|
import type { EpochLogs, Prediction, Sample } from '../model.js';
|
|
9
10
|
import type { GPTConfig } from './config.js';
|
|
11
|
+
export type GPTSerialization = {
|
|
12
|
+
weights: WeightsContainer;
|
|
13
|
+
config?: GPTConfig;
|
|
14
|
+
};
|
|
10
15
|
export declare class GPT extends Model {
|
|
11
16
|
private readonly model;
|
|
12
|
-
constructor(partialConfig?: GPTConfig);
|
|
17
|
+
constructor(partialConfig?: GPTConfig, layersModel?: tf.LayersModel);
|
|
13
18
|
/**
|
|
14
19
|
* The GPT train methods wraps the model.fitDataset call in a for loop to act as a generator (of logs)
|
|
15
20
|
* This allows for getting logs and stopping training without callbacks.
|
|
@@ -27,9 +32,6 @@ export declare class GPT extends Model {
|
|
|
27
32
|
set weights(ws: WeightsContainer);
|
|
28
33
|
static deserialize(data: GPTSerialization): Model;
|
|
29
34
|
serialize(): GPTSerialization;
|
|
35
|
+
extract(): tf.LayersModel;
|
|
30
36
|
[Symbol.dispose](): void;
|
|
31
37
|
}
|
|
32
|
-
export type GPTSerialization = {
|
|
33
|
-
weights: WeightsContainer;
|
|
34
|
-
config?: GPTConfig;
|
|
35
|
-
};
|
package/dist/models/gpt/index.js
CHANGED
|
@@ -6,9 +6,9 @@ import { Model } from '../model.js';
|
|
|
6
6
|
import { GPTForCausalLM } from './model.js';
|
|
7
7
|
export class GPT extends Model {
|
|
8
8
|
model;
|
|
9
|
-
constructor(partialConfig) {
|
|
9
|
+
constructor(partialConfig, layersModel) {
|
|
10
10
|
super();
|
|
11
|
-
this.model = new GPTForCausalLM(partialConfig);
|
|
11
|
+
this.model = new GPTForCausalLM(partialConfig, layersModel);
|
|
12
12
|
}
|
|
13
13
|
/**
|
|
14
14
|
* The GPT train methods wraps the model.fitDataset call in a for loop to act as a generator (of logs)
|
|
@@ -40,13 +40,14 @@ export class GPT extends Model {
|
|
|
40
40
|
epoch,
|
|
41
41
|
peakMemory,
|
|
42
42
|
training: {
|
|
43
|
-
loss: logs.loss
|
|
43
|
+
loss: logs.loss,
|
|
44
|
+
accuracy: logs.acc
|
|
44
45
|
}
|
|
45
46
|
};
|
|
46
47
|
if (validationData !== undefined) {
|
|
47
48
|
if (val_loss === undefined || isNaN(val_loss) ||
|
|
48
49
|
val_acc === undefined || isNaN(val_acc)) {
|
|
49
|
-
throw new Error("
|
|
50
|
+
throw new Error("Validation accuracy or loss is undefined or nan");
|
|
50
51
|
}
|
|
51
52
|
structuredLogs.validation = { accuracy: logs.val_acc, loss: logs.val_loss };
|
|
52
53
|
}
|
|
@@ -91,14 +92,16 @@ export class GPT extends Model {
|
|
|
91
92
|
config: this.config
|
|
92
93
|
};
|
|
93
94
|
}
|
|
95
|
+
extract() {
|
|
96
|
+
return this.model;
|
|
97
|
+
}
|
|
94
98
|
[Symbol.dispose]() {
|
|
95
|
-
console.log("Disposing model");
|
|
96
99
|
if (this.model.optimizer !== undefined) {
|
|
97
100
|
this.model.optimizer.dispose();
|
|
98
101
|
}
|
|
99
|
-
|
|
100
|
-
|
|
101
|
-
|
|
102
|
-
|
|
102
|
+
const disposeResults = this.model.dispose();
|
|
103
|
+
if (disposeResults.refCountAfterDispose > 0) {
|
|
104
|
+
console.error("The GPT model was not disposed correctly (refcount > 0)", disposeResults);
|
|
105
|
+
}
|
|
103
106
|
}
|
|
104
107
|
}
|
|
@@ -8,6 +8,4 @@ import type { GPTConfig } from './config.js';
|
|
|
8
8
|
* @param conf GPTConfig
|
|
9
9
|
* @returns model, tf.LayersModel, which supports model(inputs), model.predict and model.apply
|
|
10
10
|
*/
|
|
11
|
-
export declare function GPTArchitecture(config: Required<GPTConfig
|
|
12
|
-
value: number;
|
|
13
|
-
}): tf.LayersModel;
|
|
11
|
+
export declare function GPTArchitecture(config: Required<GPTConfig>): tf.LayersModel;
|
|
@@ -40,7 +40,6 @@ class LogLayer extends tf.layers.Layer {
|
|
|
40
40
|
tf.serialization.registerClass(LogLayer);
|
|
41
41
|
class CausalSelfAttention extends tf.layers.Layer {
|
|
42
42
|
config;
|
|
43
|
-
peakMemory;
|
|
44
43
|
static className = 'CausalSelfAttention';
|
|
45
44
|
nHead;
|
|
46
45
|
nEmbd;
|
|
@@ -51,10 +50,9 @@ class CausalSelfAttention extends tf.layers.Layer {
|
|
|
51
50
|
cAttnBias;
|
|
52
51
|
cProjKernel;
|
|
53
52
|
cProjBias;
|
|
54
|
-
constructor(config
|
|
53
|
+
constructor(config) {
|
|
55
54
|
super(config);
|
|
56
55
|
this.config = config;
|
|
57
|
-
this.peakMemory = peakMemory;
|
|
58
56
|
this.nEmbd = config.nEmbd;
|
|
59
57
|
this.nHead = config.nHead;
|
|
60
58
|
this.dropout = config.dropout;
|
|
@@ -63,7 +61,6 @@ class CausalSelfAttention extends tf.layers.Layer {
|
|
|
63
61
|
// calling bandPart zero out the upper triangular part of the all-ones matrix
|
|
64
62
|
// from the doc: tf.linalg.band_part(input, -1, 0) ==> Lower triangular part
|
|
65
63
|
this.mask = tf.linalg.bandPart(tf.ones([config.blockSize, config.blockSize]), -1, 0);
|
|
66
|
-
disposalRefs.push(this.mask); // Push a reference to dispose this matrix later
|
|
67
64
|
}
|
|
68
65
|
build() {
|
|
69
66
|
this.cAttnKernel = this.addWeight('c_attn/kernel', [this.nEmbd, 3 * this.nEmbd], 'float32', tf.initializers.glorotNormal({}));
|
|
@@ -134,10 +131,6 @@ class CausalSelfAttention extends tf.layers.Layer {
|
|
|
134
131
|
y = tf.reshape(y, [B, T, C]);
|
|
135
132
|
y = dense(y, this.cProjKernel, this.cProjBias);
|
|
136
133
|
y = kwargs.training === true ? tf.dropout(y, this.dropout) : y;
|
|
137
|
-
const memoryAllocated = tf.memory().numBytes / 1024 / 1024 / 1024; // GB
|
|
138
|
-
if (memoryAllocated > this.peakMemory.value) {
|
|
139
|
-
this.peakMemory.value = memoryAllocated;
|
|
140
|
-
}
|
|
141
134
|
return y;
|
|
142
135
|
});
|
|
143
136
|
}
|
|
@@ -167,25 +160,25 @@ tf.serialization.registerClass(GELU);
|
|
|
167
160
|
function MLP(config) {
|
|
168
161
|
return tf.sequential({ layers: [
|
|
169
162
|
tf.layers.dense({
|
|
170
|
-
name:
|
|
163
|
+
name: config.name + `/mlp/c_fc`,
|
|
171
164
|
units: 4 * config.nEmbd,
|
|
172
165
|
inputDim: config.nEmbd,
|
|
173
166
|
inputShape: [config.blockSize, config.nEmbd]
|
|
174
167
|
}),
|
|
175
168
|
new GELU(),
|
|
176
169
|
tf.layers.dense({
|
|
177
|
-
name: 'mlp/c_proj',
|
|
170
|
+
name: config.name + '/mlp/c_proj',
|
|
178
171
|
units: config.nEmbd,
|
|
179
172
|
inputDim: 4 * config.nEmbd,
|
|
180
173
|
inputShape: [config.blockSize, 4 * config.nEmbd]
|
|
181
174
|
}),
|
|
182
175
|
tf.layers.dropout({
|
|
183
|
-
name: 'mlp/drop',
|
|
176
|
+
name: config.name + '/mlp/drop',
|
|
184
177
|
rate: config.residDrop
|
|
185
178
|
}),
|
|
186
179
|
] });
|
|
187
180
|
}
|
|
188
|
-
function TransformerBlock(conf
|
|
181
|
+
function TransformerBlock(conf) {
|
|
189
182
|
const config = Object.assign({ name: 'h' }, conf);
|
|
190
183
|
const inputs = tf.input({ shape: [config.blockSize, config.nEmbd] });
|
|
191
184
|
let x1, x2;
|
|
@@ -196,7 +189,7 @@ function TransformerBlock(conf, disposalRefs, peakMemory) {
|
|
|
196
189
|
x1 = new LogLayer({ name: config.name + '/ln_1_log' }).apply(x1);
|
|
197
190
|
}
|
|
198
191
|
// self attention layer
|
|
199
|
-
x1 = new CausalSelfAttention(Object.assign({}, config, { name: config.name + '/attn' })
|
|
192
|
+
x1 = new CausalSelfAttention(Object.assign({}, config, { name: config.name + '/attn' })).apply(x1);
|
|
200
193
|
// Residual connection
|
|
201
194
|
x1 = tf.layers.add().apply([inputs, x1]);
|
|
202
195
|
// normalization
|
|
@@ -204,7 +197,7 @@ function TransformerBlock(conf, disposalRefs, peakMemory) {
|
|
|
204
197
|
.layerNormalization({ name: config.name + '/ln_2', epsilon: 1e-5 })
|
|
205
198
|
.apply(x1);
|
|
206
199
|
// MLP
|
|
207
|
-
x2 = MLP(Object.assign({}, config, { name: config.name
|
|
200
|
+
x2 = MLP(Object.assign({}, config, { name: config.name })).apply(x2);
|
|
208
201
|
// add attention output to mlp output
|
|
209
202
|
x2 = tf.layers.add().apply([x1, x2]);
|
|
210
203
|
return tf.model({ name: config.name, inputs, outputs: x2 });
|
|
@@ -217,7 +210,7 @@ function TransformerBlock(conf, disposalRefs, peakMemory) {
|
|
|
217
210
|
* @param conf GPTConfig
|
|
218
211
|
* @returns model, tf.LayersModel, which supports model(inputs), model.predict and model.apply
|
|
219
212
|
*/
|
|
220
|
-
export function GPTArchitecture(config
|
|
213
|
+
export function GPTArchitecture(config) {
|
|
221
214
|
const inputs = tf.input({ shape: [null] });
|
|
222
215
|
//Token embedding
|
|
223
216
|
const tokEmb = config.tokEmb
|
|
@@ -250,7 +243,7 @@ export function GPTArchitecture(config, disposalRefs, peakMemory) {
|
|
|
250
243
|
}
|
|
251
244
|
//Apply successively transformer blocks, attention and dense layers
|
|
252
245
|
for (let i = 0; i < config.nLayer; i++) {
|
|
253
|
-
x = TransformerBlock(Object.assign({}, config, { name: config.name + '/h/' + i })
|
|
246
|
+
x = TransformerBlock(Object.assign({}, config, { name: config.name + '/h/' + i })).apply(x);
|
|
254
247
|
}
|
|
255
248
|
// Normalization
|
|
256
249
|
x = tf.layers.layerNormalization({ name: config.name + '/ln_f', epsilon: 1e-5 })
|
|
@@ -16,12 +16,7 @@ export declare abstract class Dataset<T> {
|
|
|
16
16
|
*/
|
|
17
17
|
declare class GPTModel extends tf.LayersModel {
|
|
18
18
|
protected readonly config: Required<GPTConfig>;
|
|
19
|
-
|
|
20
|
-
protected peakMemory: {
|
|
21
|
-
value: number;
|
|
22
|
-
};
|
|
23
|
-
constructor(partialConfig?: GPTConfig);
|
|
24
|
-
disposeRefs(): void;
|
|
19
|
+
constructor(partialConfig?: GPTConfig, layersModel?: tf.LayersModel);
|
|
25
20
|
get getGPTConfig(): Required<GPTConfig>;
|
|
26
21
|
compile(): void;
|
|
27
22
|
fitDataset<T>(dataset: Dataset<T>, trainingArgs: tf.ModelFitDatasetArgs<T>): Promise<tf.History>;
|
package/dist/models/gpt/model.js
CHANGED
|
@@ -9,31 +9,20 @@ import { GPTArchitecture } from './layers.js';
|
|
|
9
9
|
*/
|
|
10
10
|
class GPTModel extends tf.LayersModel {
|
|
11
11
|
config;
|
|
12
|
-
|
|
13
|
-
// Object to pass down to layers to store max memory allocated
|
|
14
|
-
// This is an object rather than a primitive to pass the reference
|
|
15
|
-
peakMemory;
|
|
16
|
-
constructor(partialConfig) {
|
|
12
|
+
constructor(partialConfig, layersModel) {
|
|
17
13
|
// Fill missing config parameters with default values
|
|
18
14
|
let completeConfig = { ...DEFAULT_CONFIG, ...partialConfig };
|
|
19
15
|
// Add layer sizes depending on which model has been specified
|
|
20
16
|
completeConfig = { ...completeConfig, ...getModelSizes(completeConfig.modelType) };
|
|
21
|
-
|
|
22
|
-
|
|
23
|
-
|
|
24
|
-
|
|
25
|
-
|
|
26
|
-
|
|
27
|
-
|
|
28
|
-
this.disposalRefs = disposalRefs;
|
|
29
|
-
this.peakMemory = peakMemory;
|
|
30
|
-
}
|
|
31
|
-
// Some tensors are not cleaned up when model.dispose is called
|
|
32
|
-
// So we dispose them manually
|
|
33
|
-
disposeRefs() {
|
|
34
|
-
for (const tensorContainer of this.disposalRefs) {
|
|
35
|
-
tf.dispose([tensorContainer]);
|
|
17
|
+
if (layersModel !== undefined) {
|
|
18
|
+
super({ inputs: layersModel.inputs, outputs: layersModel.outputs, name: layersModel.name });
|
|
19
|
+
}
|
|
20
|
+
else {
|
|
21
|
+
const gpt = GPTArchitecture(completeConfig);
|
|
22
|
+
const { inputs, outputs, name } = gpt;
|
|
23
|
+
super({ inputs, outputs, name });
|
|
36
24
|
}
|
|
25
|
+
this.config = completeConfig;
|
|
37
26
|
}
|
|
38
27
|
get getGPTConfig() {
|
|
39
28
|
return this.config;
|
|
@@ -42,7 +31,6 @@ class GPTModel extends tf.LayersModel {
|
|
|
42
31
|
this.optimizer = this.config.weightDecay !== 0
|
|
43
32
|
? getCustomAdam(this, this.config.lr, this.config.weightDecay)
|
|
44
33
|
: tf.train.adam(this.config.lr);
|
|
45
|
-
this.peakMemory.value = 0;
|
|
46
34
|
}
|
|
47
35
|
async fitDataset(dataset, trainingArgs) {
|
|
48
36
|
const callbacks = trainingArgs.callbacks;
|
|
@@ -50,6 +38,7 @@ class GPTModel extends tf.LayersModel {
|
|
|
50
38
|
await callbacks.onTrainBegin?.();
|
|
51
39
|
for (let epoch = 1; epoch <= trainingArgs.epochs; epoch++) {
|
|
52
40
|
let averageLoss = 0;
|
|
41
|
+
let peakMemory = 0;
|
|
53
42
|
let iteration = 1;
|
|
54
43
|
const iterator = await dataset.iterator();
|
|
55
44
|
let preprocessingTime = performance.now();
|
|
@@ -69,22 +58,15 @@ class GPTModel extends tf.LayersModel {
|
|
|
69
58
|
}
|
|
70
59
|
return tf.losses.softmaxCrossEntropy(ys, logits);
|
|
71
60
|
};
|
|
72
|
-
let backwardPassMemory = 0;
|
|
73
61
|
const lossTensor = tf.tidy(() => {
|
|
74
62
|
const { grads, value: lossTensor } = this.optimizer.computeGradients(lossFn);
|
|
75
63
|
const gradsClipped = clipByGlobalNormObj(grads, 1);
|
|
76
64
|
this.optimizer.applyGradients(gradsClipped);
|
|
77
|
-
backwardPassMemory = tf.memory().numBytes / 1024 / 1024 / 1024;
|
|
78
65
|
return lossTensor;
|
|
79
66
|
});
|
|
80
67
|
const loss = await lossTensor.array();
|
|
81
68
|
averageLoss += loss;
|
|
82
69
|
weightUpdateTime = performance.now() - weightUpdateTime;
|
|
83
|
-
// Probably never the case. Empirically the attention mechanism always allocates
|
|
84
|
-
// more memory than the backward pass
|
|
85
|
-
if (backwardPassMemory > this.peakMemory.value) {
|
|
86
|
-
this.peakMemory.value = backwardPassMemory;
|
|
87
|
-
}
|
|
88
70
|
tf.dispose([xs, ys, lossTensor]);
|
|
89
71
|
if (evalDataset !== undefined &&
|
|
90
72
|
this.config.evaluateEvery !== undefined &&
|
|
@@ -92,22 +74,25 @@ class GPTModel extends tf.LayersModel {
|
|
|
92
74
|
const iterationLogs = await evaluate(this, evalDataset, this.config.maxEvalBatches);
|
|
93
75
|
console.log(iterationLogs);
|
|
94
76
|
}
|
|
95
|
-
|
|
77
|
+
const memory = tf.memory().numBytes / 1024 / 1024 / 1024;
|
|
78
|
+
if (memory > peakMemory) {
|
|
79
|
+
peakMemory = memory;
|
|
80
|
+
}
|
|
81
|
+
console.log(`Epoch: ${epoch}`, `\tStep: ${iteration} / ${this.config.maxIter}`, `\tLoss: ${loss.toFixed(3)}`, `\tMemory: ${memory.toFixed(2)} GB`, `\tNumber of tensors allocated: ${tf.memory().numTensors}`, `\tPreprocessing time: ${preprocessingTime.toFixed(0)} ms`, `\tWeight update time: ${weightUpdateTime.toFixed(0)} ms`);
|
|
96
82
|
iteration++;
|
|
97
83
|
next = await iterator.next();
|
|
98
84
|
}
|
|
99
85
|
// Memory leak: If we reached the last iteration rather than the end of the dataset, cleanup the tensors
|
|
100
|
-
if (next.done
|
|
86
|
+
if (next.done !== true && iteration > this.config.maxIter) {
|
|
101
87
|
const { xs, ys } = next.value;
|
|
102
88
|
tf.dispose([xs, ys]);
|
|
103
89
|
}
|
|
104
90
|
let logs = {
|
|
105
91
|
'loss': averageLoss / iteration,
|
|
106
|
-
'peakMemory':
|
|
92
|
+
'peakMemory': peakMemory
|
|
107
93
|
};
|
|
108
94
|
if (evalDataset !== undefined) {
|
|
109
95
|
logs = { ...logs, ...await evaluate(this, evalDataset, this.config.maxEvalBatches) };
|
|
110
|
-
console.log(logs);
|
|
111
96
|
}
|
|
112
97
|
await callbacks.onEpochEnd?.(epoch, logs);
|
|
113
98
|
}
|
package/dist/models/tokenizer.js
CHANGED
|
@@ -1,4 +1,4 @@
|
|
|
1
|
-
import { AutoTokenizer } from '@xenova/transformers';
|
|
1
|
+
import { AutoTokenizer, env } from '@xenova/transformers';
|
|
2
2
|
/**
|
|
3
3
|
* A task's tokenizer is initially specified as the tokenizer name, e.g., 'Xenova/gpt2'.
|
|
4
4
|
* The first time the tokenizer is needed, this function initializes the actual tokenizer object
|
|
@@ -15,6 +15,10 @@ export async function getTaskTokenizer(task) {
|
|
|
15
15
|
if (tokenizer === undefined)
|
|
16
16
|
throw Error('No tokenizer specified in the task training information');
|
|
17
17
|
if (typeof tokenizer == 'string') {
|
|
18
|
+
// Needs to be false in order to prevent transformers.js from reading the local cache
|
|
19
|
+
// and triggering an error when running in the browser
|
|
20
|
+
// Reference: https://medium.com/@GenerationAI/transformers-js-onnx-runtime-webgpu-46c3e58d547c
|
|
21
|
+
env.allowLocalModels = false;
|
|
18
22
|
tokenizer = await AutoTokenizer.from_pretrained(tokenizer);
|
|
19
23
|
task.trainingInformation.tokenizer = tokenizer;
|
|
20
24
|
}
|
|
@@ -23,5 +23,6 @@ export interface TrainingInformation {
|
|
|
23
23
|
aggregator?: AggregatorChoice;
|
|
24
24
|
tokenizer?: string | PreTrainedTokenizer;
|
|
25
25
|
maxSequenceLength?: number;
|
|
26
|
+
tensorBackend: 'tfjs' | 'gpt';
|
|
26
27
|
}
|
|
27
28
|
export declare function isTrainingInformation(raw: unknown): raw is TrainingInformation;
|
|
@@ -10,7 +10,7 @@ export function isTrainingInformation(raw) {
|
|
|
10
10
|
if (typeof raw !== 'object' || raw === null) {
|
|
11
11
|
return false;
|
|
12
12
|
}
|
|
13
|
-
const { IMAGE_H, IMAGE_W, LABEL_LIST, aggregator, batchSize, clippingRadius, dataType, decentralizedSecure, epochs, inputColumns, maxShareValue, minimumReadyPeers, modelID, noiseScale, outputColumns, preprocessingFunctions, roundDuration, scheme, validationSplit, tokenizer, maxSequenceLength, } = raw;
|
|
13
|
+
const { IMAGE_H, IMAGE_W, LABEL_LIST, aggregator, batchSize, clippingRadius, dataType, decentralizedSecure, epochs, inputColumns, maxShareValue, minimumReadyPeers, modelID, noiseScale, outputColumns, preprocessingFunctions, roundDuration, scheme, validationSplit, tokenizer, maxSequenceLength, tensorBackend } = raw;
|
|
14
14
|
if (typeof dataType !== 'string' ||
|
|
15
15
|
typeof modelID !== 'string' ||
|
|
16
16
|
typeof epochs !== 'number' ||
|
|
@@ -53,6 +53,11 @@ export function isTrainingInformation(raw) {
|
|
|
53
53
|
return false;
|
|
54
54
|
}
|
|
55
55
|
}
|
|
56
|
+
switch (tensorBackend) {
|
|
57
|
+
case 'tfjs': break;
|
|
58
|
+
case 'gpt': break;
|
|
59
|
+
default: return false;
|
|
60
|
+
}
|
|
56
61
|
switch (scheme) {
|
|
57
62
|
case 'decentralized': break;
|
|
58
63
|
case 'federated': break;
|
|
@@ -80,7 +85,8 @@ export function isTrainingInformation(raw) {
|
|
|
80
85
|
scheme,
|
|
81
86
|
validationSplit,
|
|
82
87
|
tokenizer,
|
|
83
|
-
maxSequenceLength
|
|
88
|
+
maxSequenceLength,
|
|
89
|
+
tensorBackend
|
|
84
90
|
};
|
|
85
91
|
const _correct = repack;
|
|
86
92
|
const _total = repack;
|
|
@@ -31,6 +31,11 @@ export class DistributedTrainer extends Trainer {
|
|
|
31
31
|
// after it has completed a round of training.
|
|
32
32
|
this.model.weights = this.aggregator.model.weights;
|
|
33
33
|
}
|
|
34
|
-
await this.memory.updateWorkingModel({
|
|
34
|
+
await this.memory.updateWorkingModel({
|
|
35
|
+
type: 'working',
|
|
36
|
+
taskID: this.task.id,
|
|
37
|
+
name: this.task.trainingInformation.modelID,
|
|
38
|
+
tensorBackend: this.task.trainingInformation.tensorBackend
|
|
39
|
+
}, this.model);
|
|
35
40
|
}
|
|
36
41
|
}
|
|
@@ -14,6 +14,11 @@ export class LocalTrainer extends Trainer {
|
|
|
14
14
|
return await Promise.resolve();
|
|
15
15
|
}
|
|
16
16
|
async onRoundEnd() {
|
|
17
|
-
await this.memory.updateWorkingModel({
|
|
17
|
+
await this.memory.updateWorkingModel({
|
|
18
|
+
type: 'working',
|
|
19
|
+
taskID: this.task.id,
|
|
20
|
+
name: this.task.trainingInformation.modelID,
|
|
21
|
+
tensorBackend: this.task.trainingInformation.tensorBackend
|
|
22
|
+
}, this.model);
|
|
18
23
|
}
|
|
19
24
|
}
|
|
@@ -1,4 +1,3 @@
|
|
|
1
|
-
import { ModelType } from '../../index.js';
|
|
2
1
|
import { DistributedTrainer } from './distributed_trainer.js';
|
|
3
2
|
import { LocalTrainer } from './local_trainer.js';
|
|
4
3
|
/**
|
|
@@ -36,7 +35,12 @@ export class TrainerBuilder {
|
|
|
36
35
|
if (modelID === undefined) {
|
|
37
36
|
throw new TypeError('model ID is undefined');
|
|
38
37
|
}
|
|
39
|
-
const info = {
|
|
38
|
+
const info = {
|
|
39
|
+
type: 'working',
|
|
40
|
+
taskID: this.task.id,
|
|
41
|
+
name: modelID,
|
|
42
|
+
tensorBackend: 'gpt'
|
|
43
|
+
};
|
|
40
44
|
const model = await (await this.memory.contains(info) ? this.memory.getModel(info) : client.getLatestModel());
|
|
41
45
|
return model;
|
|
42
46
|
}
|
|
@@ -1,4 +1,3 @@
|
|
|
1
|
-
import { List } from 'immutable';
|
|
2
1
|
import type { data, Model, Task, Logger, client as clients, Memory, ModelSource, Features } from '../index.js';
|
|
3
2
|
export declare class Validator {
|
|
4
3
|
readonly task: Task;
|
|
@@ -6,22 +5,21 @@ export declare class Validator {
|
|
|
6
5
|
private readonly memory;
|
|
7
6
|
private readonly source?;
|
|
8
7
|
private readonly client?;
|
|
9
|
-
private readonly graphInformant;
|
|
10
8
|
private size;
|
|
11
9
|
private _confusionMatrix;
|
|
10
|
+
private rollingAccuracy;
|
|
12
11
|
constructor(task: Task, logger: Logger, memory: Memory, source?: ModelSource | undefined, client?: clients.Client | undefined);
|
|
13
12
|
private getLabel;
|
|
14
|
-
|
|
13
|
+
test(data: data.Data): AsyncGenerator<Array<{
|
|
15
14
|
groundTruth: number;
|
|
16
15
|
pred: number;
|
|
17
16
|
features: Features;
|
|
18
|
-
}
|
|
19
|
-
|
|
17
|
+
}>, void>;
|
|
18
|
+
inference(data: data.Data): AsyncGenerator<Array<{
|
|
20
19
|
features: Features;
|
|
21
20
|
pred: number;
|
|
22
|
-
}
|
|
21
|
+
}>, void>;
|
|
23
22
|
getModel(): Promise<Model>;
|
|
24
|
-
get accuracyData(): List<number>;
|
|
25
23
|
get accuracy(): number;
|
|
26
24
|
get visitedSamples(): number;
|
|
27
25
|
get confusionMatrix(): number[][] | undefined;
|
|
@@ -1,15 +1,14 @@
|
|
|
1
1
|
import { List } from 'immutable';
|
|
2
2
|
import * as tf from '@tensorflow/tfjs';
|
|
3
|
-
import { GraphInformant } from '../index.js';
|
|
4
3
|
export class Validator {
|
|
5
4
|
task;
|
|
6
5
|
logger;
|
|
7
6
|
memory;
|
|
8
7
|
source;
|
|
9
8
|
client;
|
|
10
|
-
graphInformant = new GraphInformant();
|
|
11
9
|
size = 0;
|
|
12
10
|
_confusionMatrix;
|
|
11
|
+
rollingAccuracy = 0;
|
|
13
12
|
constructor(task, logger, memory, source, client) {
|
|
14
13
|
this.task = task;
|
|
15
14
|
this.logger = logger;
|
|
@@ -23,91 +22,81 @@ export class Validator {
|
|
|
23
22
|
async getLabel(ys) {
|
|
24
23
|
// Binary classification
|
|
25
24
|
if (ys.shape[1] == 1) {
|
|
26
|
-
|
|
25
|
+
const threshold = tf.scalar(0.5);
|
|
26
|
+
const binaryTensor = ys.greaterEqual(threshold);
|
|
27
|
+
const binaryArray = await binaryTensor.data();
|
|
28
|
+
tf.dispose([binaryTensor, threshold]);
|
|
29
|
+
return binaryArray;
|
|
27
30
|
// Multi-class classification
|
|
28
31
|
}
|
|
29
32
|
else {
|
|
30
|
-
|
|
33
|
+
const yIdxTensor = ys.argMax(-1);
|
|
34
|
+
const yIdx = await yIdxTensor.data();
|
|
35
|
+
tf.dispose([yIdxTensor]);
|
|
36
|
+
return yIdx;
|
|
31
37
|
}
|
|
32
38
|
// Multi-label classification is not supported
|
|
33
39
|
}
|
|
34
|
-
|
|
40
|
+
// test assumes data comes with labels while predict doesn't
|
|
41
|
+
async *test(data) {
|
|
35
42
|
const batchSize = this.task.trainingInformation?.batchSize;
|
|
36
43
|
if (batchSize === undefined) {
|
|
37
44
|
throw new TypeError('Batch size is undefined');
|
|
38
45
|
}
|
|
39
46
|
const model = await this.getModel();
|
|
40
|
-
let features = [];
|
|
41
|
-
const groundTruth = [];
|
|
42
47
|
let hits = 0;
|
|
43
|
-
|
|
44
|
-
|
|
45
|
-
|
|
46
|
-
|
|
47
|
-
|
|
48
|
-
|
|
49
|
-
|
|
50
|
-
|
|
51
|
-
|
|
52
|
-
|
|
53
|
-
|
|
54
|
-
|
|
55
|
-
|
|
56
|
-
|
|
57
|
-
|
|
58
|
-
|
|
59
|
-
|
|
60
|
-
hits += List(pred).zip(List(ys)).filter(([p, y]) => p === y).size;
|
|
61
|
-
// TODO: Confusion Matrix stats
|
|
62
|
-
const currentAccuracy = hits / this.size;
|
|
63
|
-
this.graphInformant.updateAccuracy(currentAccuracy);
|
|
64
|
-
return Array.from(pred);
|
|
65
|
-
}
|
|
66
|
-
else {
|
|
67
|
-
throw new Error('Input data is missing a feature or the label');
|
|
68
|
-
}
|
|
69
|
-
}).toArray()).flat();
|
|
48
|
+
const iterator = await data.preprocess().dataset.batch(batchSize).iterator();
|
|
49
|
+
let next = await iterator.next();
|
|
50
|
+
while (next.done !== true) {
|
|
51
|
+
const { xs, ys } = next.value;
|
|
52
|
+
const ysLabel = await this.getLabel(ys);
|
|
53
|
+
const yPredTensor = await model.predict(xs);
|
|
54
|
+
const pred = await this.getLabel(yPredTensor);
|
|
55
|
+
const currentFeatures = await xs.array();
|
|
56
|
+
this.size += ysLabel.length;
|
|
57
|
+
hits += List(pred).zip(List(ysLabel)).filter(([p, y]) => p === y).size;
|
|
58
|
+
this.rollingAccuracy = hits / this.size;
|
|
59
|
+
tf.dispose([xs, ys, yPredTensor]);
|
|
60
|
+
yield List(ysLabel).zip(List(pred), List(currentFeatures))
|
|
61
|
+
.map(([gt, p, f]) => ({ groundTruth: gt, pred: p, features: f }))
|
|
62
|
+
.toArray();
|
|
63
|
+
next = await iterator.next();
|
|
64
|
+
}
|
|
70
65
|
this.logger.success(`Obtained validation accuracy of ${this.accuracy}`);
|
|
71
66
|
this.logger.success(`Visited ${this.visitedSamples} samples`);
|
|
72
|
-
if (useConfusionMatrix) {
|
|
73
|
-
try {
|
|
74
|
-
this._confusionMatrix = tf.math.confusionMatrix([], [], 0).arraySync();
|
|
75
|
-
}
|
|
76
|
-
catch (e) {
|
|
77
|
-
console.error(e instanceof Error ? e.message : e);
|
|
78
|
-
throw new Error('Failed to compute the confusion matrix');
|
|
79
|
-
}
|
|
80
|
-
}
|
|
81
|
-
return List(groundTruth)
|
|
82
|
-
.zip(List(predictions), List(features))
|
|
83
|
-
.map(([gt, p, f]) => ({ groundTruth: gt, pred: p, features: f }))
|
|
84
|
-
.toArray();
|
|
85
67
|
}
|
|
86
|
-
async
|
|
68
|
+
async *inference(data) {
|
|
87
69
|
const batchSize = this.task.trainingInformation?.batchSize;
|
|
88
70
|
if (batchSize === undefined) {
|
|
89
71
|
throw new TypeError('Batch size is undefined');
|
|
90
72
|
}
|
|
91
73
|
const model = await this.getModel();
|
|
92
|
-
|
|
93
|
-
|
|
94
|
-
|
|
95
|
-
|
|
96
|
-
.
|
|
97
|
-
|
|
98
|
-
const currentFeatures = await xs.array();
|
|
99
|
-
if (Array.isArray(currentFeatures)) {
|
|
100
|
-
features = features.concat(currentFeatures);
|
|
74
|
+
const iterator = await data.preprocess().dataset.batch(batchSize).iterator();
|
|
75
|
+
let next = await iterator.next();
|
|
76
|
+
while (next.done !== true) {
|
|
77
|
+
let xs;
|
|
78
|
+
if (next.value instanceof tf.Tensor) {
|
|
79
|
+
xs = next.value;
|
|
101
80
|
}
|
|
102
81
|
else {
|
|
82
|
+
const tensors = next.value;
|
|
83
|
+
xs = tensors['xs'];
|
|
84
|
+
tf.dispose([tensors['ys']]);
|
|
85
|
+
}
|
|
86
|
+
const currentFeatures = await xs.array();
|
|
87
|
+
const yPredTensor = await model.predict(xs);
|
|
88
|
+
const pred = await this.getLabel(yPredTensor);
|
|
89
|
+
this.size += pred.length;
|
|
90
|
+
if (!Array.isArray(currentFeatures)) {
|
|
103
91
|
throw new TypeError('Data format is incorrect');
|
|
104
92
|
}
|
|
105
|
-
|
|
106
|
-
|
|
107
|
-
|
|
108
|
-
|
|
109
|
-
|
|
110
|
-
|
|
93
|
+
tf.dispose([xs, yPredTensor]);
|
|
94
|
+
yield List(currentFeatures).zip(List(pred))
|
|
95
|
+
.map(([f, p]) => ({ features: f, pred: p }))
|
|
96
|
+
.toArray();
|
|
97
|
+
next = await iterator.next();
|
|
98
|
+
}
|
|
99
|
+
this.logger.success(`Visited ${this.visitedSamples} samples`);
|
|
111
100
|
}
|
|
112
101
|
async getModel() {
|
|
113
102
|
if (this.source !== undefined && await this.memory.contains(this.source)) {
|
|
@@ -118,11 +107,8 @@ export class Validator {
|
|
|
118
107
|
}
|
|
119
108
|
throw new Error('Could not load the model');
|
|
120
109
|
}
|
|
121
|
-
get accuracyData() {
|
|
122
|
-
return this.graphInformant.data();
|
|
123
|
-
}
|
|
124
110
|
get accuracy() {
|
|
125
|
-
return this.
|
|
111
|
+
return this.rollingAccuracy;
|
|
126
112
|
}
|
|
127
113
|
get visitedSamples() {
|
|
128
114
|
return this.size;
|
package/package.json
CHANGED
|
@@ -1,10 +0,0 @@
|
|
|
1
|
-
import { type List } from 'immutable';
|
|
2
|
-
export declare class GraphInformant {
|
|
3
|
-
static readonly NB_EPOCHS_ON_GRAPH = 10;
|
|
4
|
-
private currentAccuracy;
|
|
5
|
-
private accuracyDataSeries;
|
|
6
|
-
constructor();
|
|
7
|
-
updateAccuracy(accuracy: number): void;
|
|
8
|
-
data(): List<number>;
|
|
9
|
-
accuracy(): number;
|
|
10
|
-
}
|
|
@@ -1,20 +0,0 @@
|
|
|
1
|
-
import { Repeat } from 'immutable';
|
|
2
|
-
export class GraphInformant {
|
|
3
|
-
static NB_EPOCHS_ON_GRAPH = 10;
|
|
4
|
-
currentAccuracy;
|
|
5
|
-
accuracyDataSeries;
|
|
6
|
-
constructor() {
|
|
7
|
-
this.currentAccuracy = 0;
|
|
8
|
-
this.accuracyDataSeries = Repeat(0, GraphInformant.NB_EPOCHS_ON_GRAPH).toList();
|
|
9
|
-
}
|
|
10
|
-
updateAccuracy(accuracy) {
|
|
11
|
-
this.accuracyDataSeries = this.accuracyDataSeries.shift().push(accuracy);
|
|
12
|
-
this.currentAccuracy = accuracy;
|
|
13
|
-
}
|
|
14
|
-
data() {
|
|
15
|
-
return this.accuracyDataSeries;
|
|
16
|
-
}
|
|
17
|
-
accuracy() {
|
|
18
|
-
return this.currentAccuracy;
|
|
19
|
-
}
|
|
20
|
-
}
|
|
@@ -1 +0,0 @@
|
|
|
1
|
-
export { GraphInformant } from './graph_informant.js';
|
package/dist/informant/index.js
DELETED
|
@@ -1 +0,0 @@
|
|
|
1
|
-
export { GraphInformant } from './graph_informant.js';
|
|
@@ -1,9 +0,0 @@
|
|
|
1
|
-
/**
|
|
2
|
-
* Type of models stored in memory. Stored models can either be a model currently
|
|
3
|
-
* being trained ("working model") or a regular model saved in memory ("saved model").
|
|
4
|
-
* There can only be a single working model for a given task.
|
|
5
|
-
*/
|
|
6
|
-
export declare enum ModelType {
|
|
7
|
-
WORKING = "working",
|
|
8
|
-
SAVED = "saved"
|
|
9
|
-
}
|
|
@@ -1,10 +0,0 @@
|
|
|
1
|
-
/**
|
|
2
|
-
* Type of models stored in memory. Stored models can either be a model currently
|
|
3
|
-
* being trained ("working model") or a regular model saved in memory ("saved model").
|
|
4
|
-
* There can only be a single working model for a given task.
|
|
5
|
-
*/
|
|
6
|
-
export var ModelType;
|
|
7
|
-
(function (ModelType) {
|
|
8
|
-
ModelType["WORKING"] = "working";
|
|
9
|
-
ModelType["SAVED"] = "saved";
|
|
10
|
-
})(ModelType || (ModelType = {}));
|