@epfml/discojs 3.0.1-p20250729132444.0 → 3.0.1-p20250924113522.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/dist/aggregator/get.d.ts +3 -3
- package/dist/aggregator/get.js +1 -2
- package/dist/client/client.d.ts +6 -6
- package/dist/client/decentralized/decentralized_client.d.ts +1 -1
- package/dist/client/decentralized/peer_pool.d.ts +1 -1
- package/dist/client/federated/federated_client.d.ts +1 -1
- package/dist/client/local_client.d.ts +1 -1
- package/dist/client/utils.d.ts +2 -2
- package/dist/client/utils.js +19 -10
- package/dist/default_tasks/cifar10.d.ts +2 -2
- package/dist/default_tasks/cifar10.js +9 -8
- package/dist/default_tasks/lus_covid.d.ts +2 -2
- package/dist/default_tasks/lus_covid.js +9 -8
- package/dist/default_tasks/mnist.d.ts +2 -2
- package/dist/default_tasks/mnist.js +9 -8
- package/dist/default_tasks/simple_face.d.ts +2 -2
- package/dist/default_tasks/simple_face.js +9 -8
- package/dist/default_tasks/tinder_dog.d.ts +1 -1
- package/dist/default_tasks/tinder_dog.js +12 -10
- package/dist/default_tasks/titanic.d.ts +2 -2
- package/dist/default_tasks/titanic.js +20 -33
- package/dist/default_tasks/wikitext.d.ts +2 -2
- package/dist/default_tasks/wikitext.js +16 -13
- package/dist/index.d.ts +1 -1
- package/dist/index.js +1 -1
- package/dist/models/gpt/config.d.ts +2 -2
- package/dist/models/hellaswag.d.ts +2 -3
- package/dist/models/hellaswag.js +3 -4
- package/dist/models/index.d.ts +2 -3
- package/dist/models/index.js +2 -3
- package/dist/models/tokenizer.d.ts +24 -14
- package/dist/models/tokenizer.js +42 -21
- package/dist/processing/index.d.ts +4 -5
- package/dist/processing/index.js +16 -21
- package/dist/serialization/coder.d.ts +5 -1
- package/dist/serialization/coder.js +4 -1
- package/dist/serialization/index.d.ts +4 -0
- package/dist/serialization/index.js +1 -0
- package/dist/serialization/task.d.ts +5 -0
- package/dist/serialization/task.js +34 -0
- package/dist/task/display_information.d.ts +91 -14
- package/dist/task/display_information.js +34 -58
- package/dist/task/index.d.ts +5 -5
- package/dist/task/index.js +4 -3
- package/dist/task/task.d.ts +837 -10
- package/dist/task/task.js +49 -21
- package/dist/task/task_handler.d.ts +4 -4
- package/dist/task/task_handler.js +14 -18
- package/dist/task/task_provider.d.ts +3 -3
- package/dist/task/training_information.d.ts +157 -35
- package/dist/task/training_information.js +85 -110
- package/dist/training/disco.d.ts +8 -8
- package/dist/training/disco.js +2 -1
- package/dist/training/trainer.d.ts +3 -3
- package/dist/training/trainer.js +2 -1
- package/dist/types/index.d.ts +1 -0
- package/dist/validator.d.ts +4 -4
- package/dist/validator.js +7 -6
- package/package.json +4 -7
- package/dist/processing/text.d.ts +0 -21
- package/dist/processing/text.js +0 -36
- package/dist/task/data_example.d.ts +0 -5
- package/dist/task/data_example.js +0 -14
- package/dist/task/summary.d.ts +0 -5
- package/dist/task/summary.js +0 -13
package/dist/validator.js
CHANGED
|
@@ -7,20 +7,21 @@ export class Validator {
|
|
|
7
7
|
this.#model = model;
|
|
8
8
|
}
|
|
9
9
|
/** infer every line of the dataset and check that it is as labelled */
|
|
10
|
-
|
|
11
|
-
const preprocessed =
|
|
10
|
+
test(dataset) {
|
|
11
|
+
const preprocessed = processing.preprocess(this.task, dataset);
|
|
12
12
|
const batched = preprocessed.batch(this.task.trainingInformation.batchSize);
|
|
13
13
|
const predictionWithTruth = batched
|
|
14
14
|
.map(async (batch) => (await this.#model.predict(batch.map(([inputs, _]) => inputs))).zip(batch.map(([_, outputs]) => outputs)))
|
|
15
15
|
.flatten();
|
|
16
|
-
return predictionWithTruth.map(
|
|
17
|
-
predicted:
|
|
18
|
-
truth:
|
|
16
|
+
return predictionWithTruth.map(([predicted, truth]) => ({
|
|
17
|
+
predicted: processing.postprocess(this.task, predicted),
|
|
18
|
+
truth: processing.postprocess(this.task, truth),
|
|
19
19
|
}));
|
|
20
20
|
}
|
|
21
21
|
/** use the model to predict every line of the dataset */
|
|
22
22
|
async *infer(dataset) {
|
|
23
|
-
const modelPredictions =
|
|
23
|
+
const modelPredictions = processing
|
|
24
|
+
.preprocessWithoutLabel(this.task, dataset)
|
|
24
25
|
.batch(this.task.trainingInformation.batchSize)
|
|
25
26
|
.map((batch) => this.#model.predict(batch))
|
|
26
27
|
.flatten();
|
package/package.json
CHANGED
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
{
|
|
2
2
|
"name": "@epfml/discojs",
|
|
3
|
-
"version": "3.0.1-
|
|
3
|
+
"version": "3.0.1-p20250924113522.0",
|
|
4
4
|
"type": "module",
|
|
5
5
|
"main": "dist/index.js",
|
|
6
6
|
"types": "dist/index.d.ts",
|
|
@@ -8,7 +8,7 @@
|
|
|
8
8
|
"watch": "nodemon --ext ts --ignore dist --exec npm run",
|
|
9
9
|
"build": "tsc",
|
|
10
10
|
"lint": "npx eslint .",
|
|
11
|
-
"test": "
|
|
11
|
+
"test": "cd .. && vitest --run --project=discojs"
|
|
12
12
|
},
|
|
13
13
|
"repository": {
|
|
14
14
|
"type": "git",
|
|
@@ -28,15 +28,12 @@
|
|
|
28
28
|
"isomorphic-ws": "5",
|
|
29
29
|
"simple-peer": "9",
|
|
30
30
|
"tslib": "2",
|
|
31
|
-
"ws": "8"
|
|
31
|
+
"ws": "8",
|
|
32
|
+
"zod": "3"
|
|
32
33
|
},
|
|
33
34
|
"devDependencies": {
|
|
34
35
|
"@tensorflow/tfjs-node": "4",
|
|
35
|
-
"@types/chai": "5",
|
|
36
|
-
"@types/mocha": "10",
|
|
37
36
|
"@types/simple-peer": "9",
|
|
38
|
-
"chai": "5",
|
|
39
|
-
"mocha": "10",
|
|
40
37
|
"nodemon": "3",
|
|
41
38
|
"ts-node": "10"
|
|
42
39
|
}
|
|
@@ -1,21 +0,0 @@
|
|
|
1
|
-
import { PreTrainedTokenizer } from "@xenova/transformers";
|
|
2
|
-
import type { Text, TokenizedText } from '../index.js';
|
|
3
|
-
interface TokenizingConfig {
|
|
4
|
-
padding?: boolean;
|
|
5
|
-
padding_side?: 'left' | 'right';
|
|
6
|
-
truncation?: boolean;
|
|
7
|
-
max_length?: number;
|
|
8
|
-
}
|
|
9
|
-
/**
|
|
10
|
-
* Tokenize one line of text.
|
|
11
|
-
* Wrapper around Transformers.js tokenizer to handle type checking and format the output.
|
|
12
|
-
* Note that Transformers.js's tokenizer can tokenize multiple lines of text at once
|
|
13
|
-
* but we are currently not making use of it. Can be useful when padding a batch
|
|
14
|
-
*
|
|
15
|
-
* @param tokenizer the tokenizer object
|
|
16
|
-
* @param text the text to tokenize
|
|
17
|
-
* @param config TokenizingConfig, the tokenizing parameters when using `tokenizer`
|
|
18
|
-
* @returns List<number> the tokenized text
|
|
19
|
-
*/
|
|
20
|
-
export declare function tokenize(tokenizer: PreTrainedTokenizer, text: Text, config?: TokenizingConfig): TokenizedText;
|
|
21
|
-
export {};
|
package/dist/processing/text.js
DELETED
|
@@ -1,36 +0,0 @@
|
|
|
1
|
-
import { List } from "immutable";
|
|
2
|
-
function isArrayOfNumber(raw) {
|
|
3
|
-
return Array.isArray(raw) && raw.every((e) => typeof e === "number");
|
|
4
|
-
}
|
|
5
|
-
/**
|
|
6
|
-
* Tokenize one line of text.
|
|
7
|
-
* Wrapper around Transformers.js tokenizer to handle type checking and format the output.
|
|
8
|
-
* Note that Transformers.js's tokenizer can tokenize multiple lines of text at once
|
|
9
|
-
* but we are currently not making use of it. Can be useful when padding a batch
|
|
10
|
-
*
|
|
11
|
-
* @param tokenizer the tokenizer object
|
|
12
|
-
* @param text the text to tokenize
|
|
13
|
-
* @param config TokenizingConfig, the tokenizing parameters when using `tokenizer`
|
|
14
|
-
* @returns List<number> the tokenized text
|
|
15
|
-
*/
|
|
16
|
-
export function tokenize(tokenizer, text, config) {
|
|
17
|
-
config = { ...config }; // create a config if undefined
|
|
18
|
-
if (config.padding || config.truncation) {
|
|
19
|
-
if (config.max_length === undefined)
|
|
20
|
-
throw new Error("max_length needs to be specified to use padding or truncation");
|
|
21
|
-
if (!Number.isInteger(config.max_length))
|
|
22
|
-
throw new Error("max_length should be an integer");
|
|
23
|
-
}
|
|
24
|
-
if (config.padding) {
|
|
25
|
-
// The padding side is set as an attribute, not in the config
|
|
26
|
-
tokenizer.padding_side = config.padding_side ?? 'left';
|
|
27
|
-
config.truncation = true; // for a single sequence, padding implies truncation to max_length
|
|
28
|
-
}
|
|
29
|
-
const tokenizerResult = tokenizer(text, { ...config, return_tensor: false });
|
|
30
|
-
if (typeof tokenizerResult !== "object" ||
|
|
31
|
-
tokenizerResult === null ||
|
|
32
|
-
!("input_ids" in tokenizerResult) ||
|
|
33
|
-
!isArrayOfNumber(tokenizerResult.input_ids))
|
|
34
|
-
throw new Error("tokenizer returned unexpected type");
|
|
35
|
-
return List(tokenizerResult.input_ids);
|
|
36
|
-
}
|
|
@@ -1,14 +0,0 @@
|
|
|
1
|
-
export function isDataExample(raw) {
|
|
2
|
-
if (typeof raw !== 'object' || raw === null) {
|
|
3
|
-
return false;
|
|
4
|
-
}
|
|
5
|
-
const { columnName, columnData } = raw;
|
|
6
|
-
if (typeof columnName !== 'string' ||
|
|
7
|
-
(typeof columnData !== 'string' && typeof columnData !== 'number')) {
|
|
8
|
-
return false;
|
|
9
|
-
}
|
|
10
|
-
const repack = { columnName, columnData };
|
|
11
|
-
const _correct = repack;
|
|
12
|
-
const _total = repack;
|
|
13
|
-
return true;
|
|
14
|
-
}
|
package/dist/task/summary.d.ts
DELETED
package/dist/task/summary.js
DELETED
|
@@ -1,13 +0,0 @@
|
|
|
1
|
-
export function isSummary(raw) {
|
|
2
|
-
if (typeof raw !== 'object' || raw === null) {
|
|
3
|
-
return false;
|
|
4
|
-
}
|
|
5
|
-
const { preview, overview } = raw;
|
|
6
|
-
if (!(typeof preview === 'string' && typeof overview === 'string')) {
|
|
7
|
-
return false;
|
|
8
|
-
}
|
|
9
|
-
const repack = { preview, overview };
|
|
10
|
-
const _correct = repack;
|
|
11
|
-
const _total = repack;
|
|
12
|
-
return true;
|
|
13
|
-
}
|