npm - @epfml/discojs - Versions diffs - 3.0.1-p20241025115642.0 → 3.0.1-p20241028120035.0 - Mend

@epfml/discojs 3.0.1-p20241025115642.0 → 3.0.1-p20241028120035.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (93) hide show

package/dist/aggregator/get.d.ts +3 -3
package/dist/client/client.d.ts +5 -5
package/dist/client/decentralized/decentralized_client.d.ts +2 -2
package/dist/client/federated/federated_client.d.ts +2 -2
package/dist/client/utils.d.ts +2 -2
package/dist/dataset/dataset.d.ts +9 -2
package/dist/dataset/dataset.js +83 -36
package/dist/dataset/image.d.ts +5 -0
package/dist/dataset/image.js +6 -1
package/dist/dataset/index.d.ts +0 -1
package/dist/dataset/index.js +0 -1
package/dist/dataset/types.d.ts +2 -0
package/dist/default_tasks/cifar10.d.ts +1 -1
package/dist/default_tasks/cifar10.js +2 -3
package/dist/default_tasks/lus_covid.d.ts +1 -1
package/dist/default_tasks/lus_covid.js +2 -3
package/dist/default_tasks/mnist.d.ts +1 -1
package/dist/default_tasks/mnist.js +2 -4
package/dist/default_tasks/simple_face.d.ts +1 -1
package/dist/default_tasks/simple_face.js +2 -3
package/dist/default_tasks/titanic.d.ts +1 -1
package/dist/default_tasks/titanic.js +3 -6
package/dist/default_tasks/wikitext.d.ts +1 -1
package/dist/default_tasks/wikitext.js +1 -2
package/dist/index.d.ts +4 -5
package/dist/index.js +4 -5
package/dist/models/gpt/index.d.ts +13 -16
package/dist/models/gpt/index.js +62 -43
package/dist/models/gpt/model.d.ts +1 -15
package/dist/models/gpt/model.js +1 -75
package/dist/models/model.d.ts +7 -12
package/dist/models/tfjs.d.ts +10 -8
package/dist/models/tfjs.js +106 -44
package/dist/models/tokenizer.d.ts +1 -1
package/dist/privacy.js +1 -1
package/dist/processing/image.d.ts +18 -0
package/dist/processing/image.js +75 -0
package/dist/processing/index.d.ts +8 -0
package/dist/processing/index.js +106 -0
package/dist/processing/tabular.d.ts +19 -0
package/dist/processing/tabular.js +33 -0
package/dist/processing/text.d.ts +11 -0
package/dist/processing/text.js +33 -0
package/dist/serialization/model.d.ts +3 -3
package/dist/serialization/model.js +19 -6
package/dist/task/task.d.ts +4 -3
package/dist/task/task.js +5 -3
package/dist/task/task_handler.d.ts +3 -3
package/dist/task/task_provider.d.ts +4 -4
package/dist/task/training_information.d.ts +25 -16
package/dist/task/training_information.js +76 -72
package/dist/training/disco.d.ts +20 -12
package/dist/training/disco.js +32 -13
package/dist/training/trainer.d.ts +6 -7
package/dist/training/trainer.js +6 -6
package/dist/types/data_format.d.ts +40 -0
package/dist/types/index.d.ts +2 -0
package/dist/types/index.js +1 -0
package/dist/validator.d.ts +10 -0
package/dist/validator.js +30 -0
package/package.json +4 -2
package/dist/dataset/data/data.d.ts +0 -47
package/dist/dataset/data/data.js +0 -88
package/dist/dataset/data/data_split.d.ts +0 -8
package/dist/dataset/data/helpers.d.ts +0 -10
package/dist/dataset/data/helpers.js +0 -97
package/dist/dataset/data/image_data.d.ts +0 -11
package/dist/dataset/data/image_data.js +0 -43
package/dist/dataset/data/index.d.ts +0 -5
package/dist/dataset/data/index.js +0 -5
package/dist/dataset/data/preprocessing/base.d.ts +0 -16
package/dist/dataset/data/preprocessing/base.js +0 -1
package/dist/dataset/data/preprocessing/image_preprocessing.d.ts +0 -13
package/dist/dataset/data/preprocessing/image_preprocessing.js +0 -42
package/dist/dataset/data/preprocessing/index.d.ts +0 -4
package/dist/dataset/data/preprocessing/index.js +0 -3
package/dist/dataset/data/preprocessing/tabular_preprocessing.d.ts +0 -13
package/dist/dataset/data/preprocessing/tabular_preprocessing.js +0 -45
package/dist/dataset/data/preprocessing/text_preprocessing.d.ts +0 -13
package/dist/dataset/data/preprocessing/text_preprocessing.js +0 -100
package/dist/dataset/data/tabular_data.d.ts +0 -11
package/dist/dataset/data/tabular_data.js +0 -24
package/dist/dataset/data/text_data.d.ts +0 -11
package/dist/dataset/data/text_data.js +0 -14
package/dist/processing.d.ts +0 -35
package/dist/processing.js +0 -89
package/dist/types.d.ts +0 -3
package/dist/types.js +0 -1
package/dist/validation/index.d.ts +0 -1
package/dist/validation/index.js +0 -1
package/dist/validation/validator.d.ts +0 -10
package/dist/validation/validator.js +0 -113
/package/dist/{dataset/data/data_split.js → types/data_format.js} +0 -0

package/dist/aggregator/get.d.ts CHANGED Viewed

@@ -1,7 +1,7 @@
-import type { Task } from '../index.js';
+import type { DataType, Task } from '../index.js';
 import { aggregator } from '../index.js';
 type AggregatorOptions = Partial<{
-    scheme: Task['trainingInformation']['scheme'];
+    scheme: Task<DataType>["trainingInformation"]["scheme"];
     roundCutOff: number;
     threshold: number;
     thresholdType: 'relative' | 'absolute';
@@ -24,5 +24,5 @@ type AggregatorOptions = Partial<{
  * @param options Options passed down to the aggregator's constructor
  * @returns The aggregator
  */
-export declare function getAggregator(task: Task, options?: AggregatorOptions): aggregator.Aggregator;
+export declare function getAggregator(task: Task<DataType>, options?: AggregatorOptions): aggregator.Aggregator;
 export {};

package/dist/client/client.d.ts CHANGED Viewed

@@ -1,4 +1,4 @@
-import type { Model, Task, WeightsContainer, RoundStatus } from '../index.js';
+import type { DataType, Model, RoundStatus, Task, WeightsContainer } from "../index.js";
 import type { NodeID } from './types.js';
 import type { EventConnection } from './event_connection.js';
 import type { Aggregator } from '../aggregator/index.js';
@@ -11,7 +11,7 @@ export declare abstract class Client extends EventEmitter<{
     'status': RoundStatus;
 }> {
     readonly url: URL;
-    readonly task: Task;
+    readonly task: Task<DataType>;
     readonly aggregator: Aggregator;
     protected _ownId?: NodeID;
     protected _server?: EventConnection;
@@ -30,7 +30,7 @@ export declare abstract class Client extends EventEmitter<{
      */
     private previousStatus;
     constructor(url: URL, // The network server's URL to connect to
-    task: Task, // The client's corresponding task
+    task: Task<DataType>, // The client's corresponding task
     aggregator: Aggregator);
     /**
      * Communication callback called at the beginning of every training round.
@@ -47,7 +47,7 @@ export declare abstract class Client extends EventEmitter<{
      * This method is overriden by the federated and decentralized clients
      * By default, it fetches and returns the server's base model
      */
-    connect(): Promise<Model>;
+    connect(): Promise<Model<DataType>>;
     /**
      * Handles the disconnection process of the client from any sort of network server.
      */
@@ -94,7 +94,7 @@ export declare abstract class Client extends EventEmitter<{
      * Fetches the latest model available on the network's server, for the adequate task.
      * @returns The latest model
      */
-    getLatestModel(): Promise<Model>;
+    getLatestModel(): Promise<Model<DataType>>;
     /**
     * Number of contributors to a collaborative session
     * If decentralized, it should be the number of peers

package/dist/client/decentralized/decentralized_client.d.ts CHANGED Viewed

@@ -1,4 +1,4 @@
-import type { Model, WeightsContainer } from "../../index.js";
+import type { DataType, Model, WeightsContainer } from "../../index.js";
 import { Client } from '../client.js';
 /**
  * Represents a decentralized client in a network of peers. Peers coordinate each other with the
@@ -18,7 +18,7 @@ export declare class DecentralizedClient extends Client {
      * create peer-to-peer WebRTC connections with peers. The server is used to exchange
      * peers network information.
      */
-    connect(): Promise<Model>;
+    connect(): Promise<Model<DataType>>;
     disconnect(): Promise<void>;
     /**
      * At the beginning of a round, each peer tells the server it is ready to proceed

package/dist/client/federated/federated_client.d.ts CHANGED Viewed

@@ -1,4 +1,4 @@
-import type { Model, WeightsContainer } from "../../index.js";
+import type { DataType, Model, WeightsContainer } from "../../index.js";
 import { Client } from "../client.js";
 /**
  * Client class that communicates with a centralized, federated server, when training
@@ -12,7 +12,7 @@ export declare class FederatedClient extends Client {
      * as well as the latest training information: latest global model, current round and
      * whether we are waiting for more participants.
      */
-    connect(): Promise<Model>;
+    connect(): Promise<Model<DataType>>;
     /**
      * Disconnection process when user quits the task.
      */

package/dist/client/utils.d.ts CHANGED Viewed

@@ -1,4 +1,4 @@
-import type { Task } from '../index.js';
+import type { DataType, Task } from '../index.js';
 import { client as clients, type aggregator } from '../index.js';
 export declare function timeout(ms?: number, errorMsg?: string): Promise<never>;
-export declare function getClient(trainingScheme: Required<Task['trainingInformation']['scheme']>, serverURL: URL, task: Task, aggregator: aggregator.Aggregator): clients.Client;
+export declare function getClient(trainingScheme: Task<DataType>["trainingInformation"]["scheme"], serverURL: URL, task: Task<DataType>, aggregator: aggregator.Aggregator): clients.Client;

package/dist/dataset/dataset.d.ts CHANGED Viewed

@@ -1,4 +1,4 @@
-import { List } from "immutable";
+import { Batched } from "./types.js";
 type DatasetLike<T> = AsyncIterable<T> | Iterable<T> | (() => AsyncIterator<T, void>) | (() => Iterator<T, void>);
 /** Immutable series of data */
 export declare class Dataset<T> implements AsyncIterable<T> {
@@ -31,7 +31,9 @@ export declare class Dataset<T> implements AsyncIterable<T> {
      *
      * @param size count of element per chunk
      */
-    batch(size: number): Dataset<List<T>>;
+    batch(size: number): Dataset<Batched<T>>;
+    /** Flatten chunks */
+    unbatch<U>(this: Dataset<Batched<U>>): Dataset<U>;
     /** Join side-by-side
      *
      * Stops as soon as one runs out
@@ -44,5 +46,10 @@ export declare class Dataset<T> implements AsyncIterable<T> {
      * This is a costly operation as we need to go through the whole Dataset.
      */
     size(): Promise<number>;
+    /** Try to keep generated elements to avoid recomputing
+     *
+     * Drops everything when memory pressure is applied.
+     */
+    cached(): Dataset<T>;
 }
 export {};

package/dist/dataset/dataset.js CHANGED Viewed

@@ -1,4 +1,6 @@
-import { List } from "immutable";
+import createDebug from "debug";
+import { List, Range } from "immutable";
+const debug = createDebug("discojs:dataset");
 /** Immutable series of data */
 export class Dataset {
     #content;
@@ -32,13 +34,10 @@ export class Dataset {
      * @param mapper how to change each element
      */
     map(mapper) {
-        const content = {
-            [Symbol.asyncIterator]: () => this.#content(),
-        };
         return new Dataset(async function* () {
-            for await (const e of content)
+            for await (const e of this)
                 yield await mapper(e);
-        });
+        }.bind(this));
     }
     /** Combine with another Dataset.
      *
@@ -47,13 +46,10 @@ export class Dataset {
     chain(other) {
         if (!(other instanceof Dataset))
             other = new Dataset(other);
-        const self = {
-            [Symbol.asyncIterator]: () => this.#content(),
-        };
         return new Dataset(async function* () {
-            yield* self;
+            yield* this;
             yield* other;
-        });
+        }.bind(this));
     }
     /** Divide into two based on given ratio
      *
@@ -62,16 +58,13 @@ export class Dataset {
     split(ratio) {
         if (ratio < 0 || ratio > 1)
             throw new Error("ratio out of range");
-        const content = {
-            [Symbol.asyncIterator]: () => this.#content(),
-        };
         // to avoid using random sampling or knowing the size beforehand,
         // we compute the actual ratio and make it converge towards the wanted one
         return [
             new Dataset(async function* () {
                 let yielded_by_other = 0;
                 let total_size = 0;
-                for await (const e of content) {
+                for await (const e of this) {
                     total_size++;
                     if (yielded_by_other / total_size >= ratio) {
                         yield e;
@@ -80,18 +73,18 @@ export class Dataset {
                         yielded_by_other++;
                     }
                 }
-            }),
+            }.bind(this)),
             new Dataset(async function* () {
                 let yielded = 0;
                 let total_size = 0;
-                for await (const e of content) {
+                for await (const e of this) {
                     total_size++;
                     if (yielded / total_size < ratio) {
                         yielded++;
                         yield e;
                     }
                 }
-            }),
+            }.bind(this)),
         ];
     }
     /** Slice into chunks
@@ -103,21 +96,30 @@ export class Dataset {
     batch(size) {
         if (size <= 0 || !Number.isInteger(size))
             throw new Error("invalid size");
-        const content = {
-            [Symbol.asyncIterator]: () => this.#content(),
-        };
         return new Dataset(async function* () {
-            let batch = List();
-            for await (const e of content) {
-                batch = batch.push(e);
-                if (batch.size === size) {
-                    yield batch;
-                    batch = List();
-                }
-            }
-            if (!batch.isEmpty())
+            const iter = this[Symbol.asyncIterator]();
+            for (;;) {
+                const batch = List(await Promise.all(Range(0, size).map(() => iter.next()))).flatMap((res) => {
+                    if (res.done)
+                        return [];
+                    else
+                        return [res.value];
+                });
+                if (batch.isEmpty())
+                    break;
                 yield batch;
-        });
+                // iterator couldn't generate more
+                if (batch.size < size)
+                    break;
+            }
+        }.bind(this));
+    }
+    /** Flatten chunks */
+    unbatch() {
+        return new Dataset(async function* () {
+            for await (const batch of this)
+                yield* batch;
+        }.bind(this));
     }
     /** Join side-by-side
      *
@@ -128,11 +130,8 @@ export class Dataset {
     zip(other) {
         if (!(other instanceof Dataset))
             other = new Dataset(other);
-        const content = {
-            [Symbol.asyncIterator]: () => this.#content(),
-        };
         return new Dataset(async function* () {
-            const left = content[Symbol.asyncIterator]();
+            const left = this[Symbol.asyncIterator]();
             const right = other[Symbol.asyncIterator]();
             while (true) {
                 const [l, r] = await Promise.all([left.next(), right.next()]);
@@ -140,7 +139,7 @@ export class Dataset {
                     return;
                 yield [l.value, r.value];
             }
-        });
+        }.bind(this));
     }
     /** Compute size
      *
@@ -152,4 +151,52 @@ export class Dataset {
             ret++;
         return ret;
     }
+    /** Try to keep generated elements to avoid recomputing
+     *
+     * Drops everything when memory pressure is applied.
+     */
+    cached() {
+        return new CachingDataset(this.#content);
+    }
+}
+/**
+ * Avoid recomputing the parent dataset, without hogging memory
+ *
+ * As dataset operations can be time-consuming, this keeps a weak reference to
+ * the generated elements so that a second iteration might yield theses directly.
+ **/
+class CachingDataset extends Dataset {
+    // potential reference to all elements
+    // tristate: undefined == empty, [false, _] == filling, [true, _] == filled
+    #cache = new WeakRef([false, List()]);
+    [Symbol.asyncIterator]() {
+        const cached = this.#cache.deref();
+        if (cached !== undefined && cached[0]) {
+            debug("valid cache, reading from it");
+            // eslint-disable-next-line @typescript-eslint/require-await
+            return (async function* () {
+                yield* cached[1];
+            })();
+        }
+        debug("cache invalid, reading from dataset");
+        this.#cache = new WeakRef([false, List()]);
+        const parentContent = {
+            [Symbol.asyncIterator]: () => super[Symbol.asyncIterator](),
+        };
+        return async function* () {
+            for await (const e of parentContent) {
+                yield e;
+                const caching = this.#cache.deref();
+                if (caching !== undefined)
+                    caching[1] = caching[1].push(e);
+            }
+            const caching = this.#cache.deref();
+            if (caching === undefined) {
+                debug("cache evicted while filling");
+                return;
+            }
+            debug("cache filled");
+            caching[0] = true;
+        }.bind(this)();
+    }
 }

package/dist/dataset/image.d.ts CHANGED Viewed

@@ -1,6 +1,11 @@
 /**
  * Raw image with type level dimensions.
  *
+ * Per convention, `data` layout is as follow
+ *  `height` chunk each containing
+ *    `width` chunk each containing
+ *      a chunk of `depth` bytes
+ *
  * @typeParam D depth of the image
  * @typeParam W width, positive and integral
  * @typeParam H height, positive and integral

package/dist/dataset/image.js CHANGED Viewed

@@ -1,6 +1,11 @@
 /**
  * Raw image with type level dimensions.
  *
+ * Per convention, `data` layout is as follow
+ *  `height` chunk each containing
+ *    `width` chunk each containing
+ *      a chunk of `depth` bytes
+ *
  * @typeParam D depth of the image
  * @typeParam W width, positive and integral
  * @typeParam H height, positive and integral
@@ -16,6 +21,6 @@ export class Image {
         this.height = height;
         this.depth = depth;
         if (data.length != width * height * depth)
-            throw new Error("data isn't of excepted size");
+            throw new Error("data isn't of expected size");
     }
 }

package/dist/dataset/index.d.ts CHANGED Viewed

@@ -1,3 +1,2 @@
 export { Dataset } from "./dataset.js";
 export * from "./types.js";
-export { Data, TabularData, ImageData, TextData, ImagePreprocessing, TabularPreprocessing, TextPreprocessing, IMAGE_PREPROCESSING, TABULAR_PREPROCESSING, TEXT_PREPROCESSING, } from "./data/index.js";

package/dist/dataset/index.js CHANGED Viewed

@@ -1,3 +1,2 @@
 export { Dataset } from "./dataset.js";
 export * from "./types.js";
-export { Data, TabularData, ImageData, TextData, ImagePreprocessing, TabularPreprocessing, TextPreprocessing, IMAGE_PREPROCESSING, TABULAR_PREPROCESSING, TEXT_PREPROCESSING, } from "./data/index.js";

package/dist/dataset/types.d.ts CHANGED Viewed

@@ -1,4 +1,6 @@
+import { List } from "immutable";
 import { Image } from "./image.js";
+export type Batched<T> = List<T>;
 export { Image };
 export type Tabular = Partial<Record<string, string>>;
 export type Text = string;

package/dist/default_tasks/cifar10.d.ts CHANGED Viewed

@@ -1,2 +1,2 @@
 import type { TaskProvider } from '../index.js';
-export declare const cifar10: TaskProvider;
+export declare const cifar10: TaskProvider<'image'>;

package/dist/default_tasks/cifar10.js CHANGED Viewed

@@ -1,5 +1,5 @@
 import * as tf from '@tensorflow/tfjs';
-import { data, models } from '../index.js';
+import { models } from '../index.js';
 import baseModel from '../models/mobileNet_v1_025_224.js';
 export const cifar10 = {
     getTask() {
@@ -24,7 +24,6 @@ export const cifar10 = {
                 validationSplit: 0.2,
                 batchSize: 10,
                 dataType: 'image',
-                preprocessingFunctions: [data.ImagePreprocessing.Resize, data.ImagePreprocessing.Normalize],
                 IMAGE_H: 224,
                 IMAGE_W: 224,
                 LABEL_LIST: ['airplane', 'automobile', 'bird', 'cat', 'deer', 'dog', 'frog', 'horse', 'ship', 'truck'],
@@ -55,6 +54,6 @@ export const cifar10 = {
             loss: 'categoricalCrossentropy',
             metrics: ['accuracy']
         });
-        return new models.TFJS(model);
+        return new models.TFJS('image', model);
     }
 };

package/dist/default_tasks/lus_covid.d.ts CHANGED Viewed

@@ -1,2 +1,2 @@
 import type { TaskProvider } from '../index.js';
-export declare const lusCovid: TaskProvider;
+export declare const lusCovid: TaskProvider<'image'>;

package/dist/default_tasks/lus_covid.js CHANGED Viewed

@@ -1,5 +1,5 @@
 import * as tf from '@tensorflow/tfjs';
-import { data, models } from '../index.js';
+import { models } from '../index.js';
 export const lusCovid = {
     getTask() {
         return {
@@ -24,7 +24,6 @@ export const lusCovid = {
                 batchSize: 5,
                 IMAGE_H: 100,
                 IMAGE_W: 100,
-                preprocessingFunctions: [data.ImagePreprocessing.Resize, data.ImagePreprocessing.Normalize],
                 LABEL_LIST: ['COVID-Positive', 'COVID-Negative'],
                 dataType: 'image',
                 scheme: 'federated',
@@ -82,6 +81,6 @@ export const lusCovid = {
             loss: 'binaryCrossentropy',
             metrics: ['accuracy']
         });
-        return Promise.resolve(new models.TFJS(model));
+        return Promise.resolve(new models.TFJS('image', model));
     }
 };

package/dist/default_tasks/mnist.d.ts CHANGED Viewed

@@ -1,2 +1,2 @@
 import type { TaskProvider } from '../index.js';
-export declare const mnist: TaskProvider;
+export declare const mnist: TaskProvider<'image'>;

package/dist/default_tasks/mnist.js CHANGED Viewed

@@ -1,5 +1,5 @@
 import * as tf from '@tensorflow/tfjs';
-import { data, models } from '../index.js';
+import { models } from '../index.js';
 export const mnist = {
     getTask() {
         return {
@@ -25,8 +25,6 @@ export const mnist = {
                 dataType: 'image',
                 IMAGE_H: 28,
                 IMAGE_W: 28,
-                // Images should already be at the right size but resizing just in case
-                preprocessingFunctions: [data.ImagePreprocessing.Resize, data.ImagePreprocessing.Normalize],
                 LABEL_LIST: ['0', '1', '2', '3', '4', '5', '6', '7', '8', '9'],
                 scheme: 'decentralized',
                 aggregationStrategy: 'secure',
@@ -58,6 +56,6 @@ export const mnist = {
             loss: 'categoricalCrossentropy',
             metrics: ['accuracy']
         });
-        return Promise.resolve(new models.TFJS(model));
+        return Promise.resolve(new models.TFJS('image', model));
     }
 };

package/dist/default_tasks/simple_face.d.ts CHANGED Viewed

@@ -1,2 +1,2 @@
 import type { TaskProvider } from '../index.js';
-export declare const simpleFace: TaskProvider;
+export declare const simpleFace: TaskProvider<'image'>;

package/dist/default_tasks/simple_face.js CHANGED Viewed

@@ -1,5 +1,5 @@
 import * as tf from '@tensorflow/tfjs';
-import { data, models } from '../index.js';
+import { models } from '../index.js';
 import baseModel from '../models/mobileNetV2_35_alpha_2_classes.js';
 export const simpleFace = {
     getTask() {
@@ -22,7 +22,6 @@ export const simpleFace = {
                 roundDuration: 1,
                 validationSplit: 0.2,
                 batchSize: 10,
-                preprocessingFunctions: [data.ImagePreprocessing.Normalize],
                 dataType: 'image',
                 IMAGE_H: 200,
                 IMAGE_W: 200,
@@ -43,6 +42,6 @@ export const simpleFace = {
             loss: 'categoricalCrossentropy',
             metrics: ['accuracy']
         });
-        return new models.TFJS(model);
+        return new models.TFJS('image', model);
     }
 };

package/dist/default_tasks/titanic.d.ts CHANGED Viewed

@@ -1,2 +1,2 @@
 import type { TaskProvider } from '../index.js';
-export declare const titanic: TaskProvider;
+export declare const titanic: TaskProvider<'tabular'>;

package/dist/default_tasks/titanic.js CHANGED Viewed

@@ -1,5 +1,5 @@
 import * as tf from '@tensorflow/tfjs';
-import { data, models } from '../index.js';
+import { models } from '../index.js';
 export const titanic = {
     getTask() {
         return {
@@ -49,7 +49,6 @@ export const titanic = {
                 roundDuration: 2,
                 validationSplit: 0.2,
                 batchSize: 30,
-                preprocessingFunctions: [data.TabularPreprocessing.Sanitize],
                 dataType: 'tabular',
                 inputColumns: [
                     'Age',
@@ -58,9 +57,7 @@ export const titanic = {
                     'Fare',
                     'Pclass'
                 ],
-                outputColumns: [
-                    'Survived'
-                ],
+                outputColumn: 'Survived',
                 scheme: 'federated',
                 aggregationStrategy: 'mean',
                 minNbOfParticipants: 2,
@@ -84,6 +81,6 @@ export const titanic = {
             loss: 'binaryCrossentropy',
             metrics: ['accuracy']
         });
-        return Promise.resolve(new models.TFJS(model));
+        return Promise.resolve(new models.TFJS('tabular', model));
     }
 };

package/dist/default_tasks/wikitext.d.ts CHANGED Viewed

@@ -1,2 +1,2 @@
 import type { TaskProvider } from '../index.js';
-export declare const wikitext: TaskProvider;
+export declare const wikitext: TaskProvider<'text'>;

package/dist/default_tasks/wikitext.js CHANGED Viewed

@@ -1,4 +1,4 @@
-import { data, models } from '../index.js';
+import { models } from '../index.js';
 export const wikitext = {
     getTask() {
         return {
@@ -23,7 +23,6 @@ export const wikitext = {
             },
             trainingInformation: {
                 dataType: 'text',
-                preprocessingFunctions: [data.TextPreprocessing.Tokenize, data.TextPreprocessing.LeftPadding],
                 scheme: 'federated',
                 aggregationStrategy: 'mean',
                 minNbOfParticipants: 2,

package/dist/index.d.ts CHANGED Viewed

@@ -7,14 +7,13 @@ export * as aggregator from './aggregator/index.js';
 export { WeightsContainer, aggregation } from './weights/index.js';
 export { Logger, ConsoleLogger } from './logging/index.js';
 export { Disco, RoundLogs, RoundStatus } from './training/index.js';
-export { Validator } from './validation/index.js';
+export { Validator } from './validator.js';
 export { Model, BatchLogs, EpochLogs, ValidationMetrics } from './models/index.js';
 export * as models from './models/index.js';
 export * from './task/index.js';
 export * as defaultTasks from './default_tasks/index.js';
 export * as async_iterator from "./utils/async_iterator.js";
 export { EventEmitter } from "./utils/event_emitter.js";
-export { Dataset } from "./dataset/index.js";
-export * from "./dataset/types.js";
-export * from "./types.js";
-export * as processing from "./processing.js";
+export * from "./dataset/index.js";
+export * from "./types/index.js";
+export * as processing from "./processing/index.js";

package/dist/index.js CHANGED Viewed

@@ -7,14 +7,13 @@ export * as aggregator from './aggregator/index.js';
 export { WeightsContainer, aggregation } from './weights/index.js';
 export { ConsoleLogger } from './logging/index.js';
 export { Disco } from './training/index.js';
-export { Validator } from './validation/index.js';
+export { Validator } from './validator.js';
 export { Model, EpochLogs } from './models/index.js';
 export * as models from './models/index.js';
 export * from './task/index.js';
 export * as defaultTasks from './default_tasks/index.js';
 export * as async_iterator from "./utils/async_iterator.js";
 export { EventEmitter } from "./utils/event_emitter.js";
-export { Dataset } from "./dataset/index.js";
-export * from "./dataset/types.js"; // TODO merge with above
-export * from "./types.js";
-export * as processing from "./processing.js";
+export * from "./dataset/index.js";
+export * from "./types/index.js";
+export * as processing from "./processing/index.js";

package/dist/models/gpt/index.d.ts CHANGED Viewed

@@ -1,17 +1,20 @@
 /**
  * this code is taken from gpt-tfjs with modifications from @peacefulotter and @lukemovement
  **/
-import * as tf from '@tensorflow/tfjs';
-import { PreTrainedTokenizer } from '@xenova/transformers';
-import { WeightsContainer } from '../../index.js';
+import * as tf from "@tensorflow/tfjs";
+import type { Batched, Dataset, DataFormat } from "../../index.js";
+import { WeightsContainer } from "../../index.js";
 import { BatchLogs, Model, EpochLogs } from "../index.js";
-import type { Prediction, Sample } from '../model.js';
-import { type GPTConfig } from './config.js';
+import { type GPTConfig } from "./config.js";
 export type GPTSerialization = {
     weights: WeightsContainer;
     config?: GPTConfig;
 };
-export declare class GPT extends Model {
+interface PredictConfig {
+    temperature: number;
+    doSample: boolean;
+}
+export declare class GPT extends Model<"text"> {
     #private;
     private readonly model;
     constructor(partialConfig?: GPTConfig, layersModel?: tf.LayersModel);
@@ -24,20 +27,14 @@ export declare class GPT extends Model {
      * @param epochs the number of passes of the training dataset
      * @param tracker
      */
-    train(trainingData: tf.data.Dataset<{
-        xs: tf.Tensor2D;
-        ys: tf.Tensor3D;
-    }>, validationData?: tf.data.Dataset<{
-        xs: tf.Tensor2D;
-        ys: tf.Tensor3D;
-    }>): AsyncGenerator<BatchLogs, EpochLogs>;
-    predict(input: Sample): Promise<Prediction>;
-    generate(input: string, tokenizer: PreTrainedTokenizer, newTokens?: number): Promise<string>;
+    train(trainingDataset: Dataset<Batched<DataFormat.ModelEncoded["text"]>>, validationDataset?: Dataset<Batched<DataFormat.ModelEncoded["text"]>>): AsyncGenerator<BatchLogs, EpochLogs>;
+    predict(batch: Batched<DataFormat.ModelEncoded["text"][0]>, options?: Partial<PredictConfig>): Promise<Batched<DataFormat.ModelEncoded["text"][1]>>;
     get config(): Required<GPTConfig>;
     get weights(): WeightsContainer;
     set weights(ws: WeightsContainer);
-    static deserialize(data: GPTSerialization): Model;
+    static deserialize(data: GPTSerialization): Model<"text">;
     serialize(): GPTSerialization;
     extract(): tf.LayersModel;
     [Symbol.dispose](): void;
 }
+export {};