wasm-bhtsne 1.0.0 → 1.2.2

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/LICENSE CHANGED
File without changes
package/README.md CHANGED
@@ -10,8 +10,17 @@ This is the wasm version of the [bhtsne](https://github.com/frjnn/bhtsne) crate.
10
10
 
11
11
  ## Features
12
12
  - Harnesses multi-threading capabilities through [wasm-bindgen-rayon](https://github.com/RReverser/wasm-bindgen-rayon).
13
+ - Allows passing t-SNE hyperparameters through a JavaScript object, where you only need to include the parameters you want to change from the defaults. If you don't specify any, default values are used.
14
+ - Supports running the algorithm in iterations, enabling progressive refinement of the embedding
15
+ - Supports both Float32Array and Float64Array for data input
16
+
17
+ ## Requirements
18
+ To use the multithreading feature, you need to enable `SharedArrayBuffer` on the Web. As stated in the [wasm-bindgen-rayon readme](https://github.com/RReverser/wasm-bindgen-rayon/blob/main/README.md):
19
+
20
+ In order to use `SharedArrayBuffer` on the Web, you need to enable [cross-origin isolation policies](https://web.dev/coop-coep/). Check out the linked article for details.
13
21
 
14
22
  ## Installation
23
+ Install the [wasm-bhtsne npm package](https://www.npmjs.com/package/wasm-bhtsne):
15
24
  ```shell
16
25
  npm i wasm-bhtsne
17
26
  ```
@@ -41,16 +50,43 @@ function createRandomMatrix(rows, columns) {
41
50
  async onclick() {
42
51
 
43
52
  // create random points and dimensions
44
- const data = createRandomMatrix(500, 7);
53
+ const data = createRandomMatrix(5000, 512);
54
+
55
+ // Example of setting hyperparameters
56
+ const opt = {
57
+ learning_rate: 150.0,
58
+ perplexity: 30.0,
59
+ theta: 0.6
60
+ };
61
+
62
+ // let tsne_encoder = new multiThread.bhtSNEf64(data, opt);
63
+ // or
64
+ let tsne_encoder = new multiThread.bhtSNEf32(data, opt);
65
+ let compressed_vectors;
45
66
 
46
- let tsne_encoder = new multiThread.bhtSNE(data); // create a tSNE instance
47
- tsne_encoder.perplexity = 25.0; // change hyperparameters
67
+ for (let i = 0; i < 1000; i++) {
68
+ compressed_vectors = tsne_encoder.step(1)
69
+ /* …do something with `compressed_vectors`… */
70
+ }
48
71
 
49
- // run the algorithm with 1000 iterations
50
- let compressed_vectors = tsne_encoder.step(1000);
51
72
  console.log("Compressed Vectors:", compressed_vectors);
52
73
  },
53
74
  disabled: false
54
75
  });
55
76
  })();
56
77
  ```
78
+
79
+ ## Hyperparameters
80
+ Here is a list of hyperparameters that can be set in the JavaScript object, along with their default values and descriptions:
81
+
82
+ - **`learning_rate`** (default: `200.0`): controls the step size during the optimization.
83
+ - **`momentum`** (default: `0.5`): helps accelerate gradients vectors in the right directions, thus leading to faster converging.
84
+ - **`final_momentum`** (default: `0.8`): momentum value used after a certain number of iterations.
85
+ - **`momentum_switch_epoch`** (default: `250`): the epoch after which the algorithm switches to `final_momentum` for the map update.
86
+ - **`stop_lying_epoch`** (default: `250`): the epoch after which the P distribution values become true. For epochs < `stop_lying_epoch`, the values of the P distribution are multiplied by a factor equal to `12.0`.
87
+ - **`theta`** (default: `0.5`): Determines the accuracy of the approximation. Larger values increase the speed but decrease accuracy. Must be strictly greater than 0.0.
88
+ - **`embedding_dim`** (default: `2`): the dimensionality of the embedding space.
89
+ - **`perplexity`** (default: `20.0`): the perplexity value. It determines the balance between local and global aspects of the data. A good value lies between 5.0 and 50.0.
90
+
91
+
92
+
package/package.json CHANGED
@@ -1,10 +1,11 @@
1
1
  {
2
2
  "name": "wasm-bhtsne",
3
+ "type": "module",
3
4
  "collaborators": [
4
5
  "lv291 <baiunco291@proton.me>"
5
6
  ],
6
7
  "description": "Barnes-Hut implementations of t-SNE in wasm",
7
- "version": "1.0.0",
8
+ "version": "1.2.2",
8
9
  "license": "MIT",
9
10
  "repository": {
10
11
  "type": "git",
@@ -13,9 +14,10 @@
13
14
  "files": [
14
15
  "wasm_bhtsne_bg.wasm",
15
16
  "wasm_bhtsne.js",
16
- "wasm_bhtsne.d.ts"
17
+ "wasm_bhtsne.d.ts",
18
+ "snippets"
17
19
  ],
18
- "module": "wasm_bhtsne.js",
20
+ "main": "wasm_bhtsne.js",
19
21
  "types": "wasm_bhtsne.d.ts",
20
22
  "sideEffects": [
21
23
  "./snippets/*"
@@ -0,0 +1,107 @@
1
+ /*
2
+ * Copyright 2022 Google Inc. All Rights Reserved.
3
+ * Licensed under the Apache License, Version 2.0 (the "License");
4
+ * you may not use this file except in compliance with the License.
5
+ * You may obtain a copy of the License at
6
+ * http://www.apache.org/licenses/LICENSE-2.0
7
+ * Unless required by applicable law or agreed to in writing, software
8
+ * distributed under the License is distributed on an "AS IS" BASIS,
9
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
10
+ * See the License for the specific language governing permissions and
11
+ * limitations under the License.
12
+ */
13
+
14
+ // Note: we use `wasm_bindgen_worker_`-prefixed message types to make sure
15
+ // we can handle bundling into other files, which might happen to have their
16
+ // own `postMessage`/`onmessage` communication channels.
17
+ //
18
+ // If we didn't take that into the account, we could send much simpler signals
19
+ // like just `0` or whatever, but the code would be less resilient.
20
+
21
+ function waitForMsgType(target, type) {
22
+ return new Promise(resolve => {
23
+ target.addEventListener('message', function onMsg({ data }) {
24
+ if (data?.type !== type) return;
25
+ target.removeEventListener('message', onMsg);
26
+ resolve(data);
27
+ });
28
+ });
29
+ }
30
+
31
+ waitForMsgType(self, 'wasm_bindgen_worker_init').then(async ({ init, receiver }) => {
32
+ // # Note 1
33
+ // Our JS should have been generated in
34
+ // `[out-dir]/snippets/wasm-bindgen-rayon-[hash]/workerHelpers.js`,
35
+ // resolve the main module via `../../..`.
36
+ //
37
+ // This might need updating if the generated structure changes on wasm-bindgen
38
+ // side ever in the future, but works well with bundlers today. The whole
39
+ // point of this crate, after all, is to abstract away unstable features
40
+ // and temporary bugs so that you don't need to deal with them in your code.
41
+ //
42
+ // # Note 2
43
+ // This could be a regular import, but then some bundlers complain about
44
+ // circular deps.
45
+ //
46
+ // Dynamic import could be cheap if this file was inlined into the parent,
47
+ // which would require us just using `../../..` in `new Worker` below,
48
+ // but that doesn't work because wasm-pack unconditionally adds
49
+ // "sideEffects":false (see below).
50
+ //
51
+ // OTOH, even though it can't be inlined, it should be still reasonably
52
+ // cheap since the requested file is already in cache (it was loaded by
53
+ // the main thread).
54
+ const pkg = await import('../../..');
55
+ await pkg.default(init);
56
+ postMessage({ type: 'wasm_bindgen_worker_ready' });
57
+ pkg.wbg_rayon_start_worker(receiver);
58
+ });
59
+
60
+ // Note: this is never used, but necessary to prevent a bug in Firefox
61
+ // (https://bugzilla.mozilla.org/show_bug.cgi?id=1702191) where it collects
62
+ // Web Workers that have a shared WebAssembly memory with the main thread,
63
+ // but are not explicitly rooted via a `Worker` instance.
64
+ //
65
+ // By storing them in a variable, we can keep `Worker` objects around and
66
+ // prevent them from getting GC-d.
67
+ let _workers;
68
+
69
+ export async function startWorkers(module, memory, builder) {
70
+ if (builder.numThreads() === 0) {
71
+ throw new Error(`num_threads must be > 0.`);
72
+ }
73
+
74
+ const workerInit = {
75
+ type: 'wasm_bindgen_worker_init',
76
+ init: { module_or_path: module, memory },
77
+ receiver: builder.receiver()
78
+ };
79
+
80
+ _workers = await Promise.all(
81
+ Array.from({ length: builder.numThreads() }, async () => {
82
+ // Self-spawn into a new Worker.
83
+ //
84
+ // TODO: while `new URL('...', import.meta.url) becomes a semi-standard
85
+ // way to get asset URLs relative to the module across various bundlers
86
+ // and browser, ideally we should switch to `import.meta.resolve`
87
+ // once it becomes a standard.
88
+ //
89
+ // Note: we could use `../../..` as the URL here to inline workerHelpers.js
90
+ // into the parent entry instead of creating another split point -
91
+ // this would be preferable from optimization perspective -
92
+ // however, Webpack then eliminates all message handler code
93
+ // because wasm-pack produces "sideEffects":false in package.json
94
+ // unconditionally.
95
+ //
96
+ // The only way to work around that is to have side effect code
97
+ // in an entry point such as Worker file itself.
98
+ const worker = new Worker(new URL('./workerHelpers.js', import.meta.url), {
99
+ type: 'module'
100
+ });
101
+ worker.postMessage(workerInit);
102
+ await waitForMsgType(worker, 'wasm_bindgen_worker_ready');
103
+ return worker;
104
+ })
105
+ );
106
+ builder.build();
107
+ }
package/wasm_bhtsne.d.ts CHANGED
@@ -1,193 +1,93 @@
1
1
  /* tslint:disable */
2
2
  /* eslint-disable */
3
- /**
4
- * @param {number} num_threads
5
- * @returns {Promise<any>}
6
- */
7
- export function initThreadPool(num_threads: number): Promise<any>;
8
- /**
9
- * @param {number} receiver
10
- */
11
- export function wbg_rayon_start_worker(receiver: number): void;
12
- /**
13
- * t-distributed stochastic neighbor embedding. Provides a parallel implementation of both the
14
- * exact version of the algorithm and the tree accelerated one leveraging space partitioning trees.
15
- */
16
- export class bhtSNE {
3
+
4
+ export class bhtSNEf32 {
17
5
  free(): void;
18
- /**
19
- * @param {any} data
20
- */
21
- constructor(data: any);
22
- /**
23
- * Performs a parallel Barnes-Hut approximation of the t-SNE algorithm.
24
- *
25
- * # Arguments
26
- *
27
- * `epochs` - Sets epochs, the maximum number of fitting iterations.
28
- * @param {number} epochs
29
- * @returns {any}
30
- */
6
+ [Symbol.dispose](): void;
7
+ constructor(data: any, opt: any);
8
+ /**
9
+ * Performs a parallel Barnes-Hut approximation of the t-SNE algorithm.
10
+ *
11
+ * # Arguments
12
+ *
13
+ * `epochs` - the maximum number of fitting iterations. Must be positive
14
+ */
31
15
  step(epochs: number): any;
32
- /**
33
- * Sets a new learning rate.
34
- *
35
- * # Arguments
36
- *
37
- * `learning_rate` - new value for the learning rate.
38
- * @param {number} learning_rate
39
- */
40
- learning_rate(learning_rate: number): void;
41
- /**
42
- * Sets new epochs, i.e the maximum number of fitting iterations.
43
- *
44
- * # Arguments
45
- *
46
- * `epochs` - new value for the epochs.
47
- * @param {number} epochs
48
- */
49
- epochs(epochs: number): void;
50
- /**
51
- * Sets a new momentum.
52
- *
53
- * # Arguments
54
- *
55
- * `momentum` - new value for the momentum.
56
- * @param {number} momentum
57
- */
58
- momentum(momentum: number): void;
59
- /**
60
- * Sets a new final momentum.
61
- *
62
- * # Arguments
63
- *
64
- * `final_momentum` - new value for the final momentum.
65
- * @param {number} final_momentum
66
- */
67
- final_momentum(final_momentum: number): void;
68
- /**
69
- * Sets a new momentum switch epoch, i.e. the epoch after which the algorithm switches to
70
- * `final_momentum` for the map update.
71
- *
72
- * # Arguments
73
- *
74
- * `momentum_switch_epoch` - new value for the momentum switch epoch.
75
- * @param {number} momentum_switch_epoch
76
- */
77
- momentum_switch_epoch(momentum_switch_epoch: number): void;
78
- /**
79
- * Sets a new stop lying epoch, i.e. the epoch after which the P distribution values become
80
- * true, as defined in the original implementation. For epochs < `stop_lying_epoch` the values
81
- * of the P distribution are multiplied by a factor equal to `12.0`.
82
- *
83
- * # Arguments
84
- *
85
- * `stop_lying_epoch` - new value for the stop lying epoch.
86
- * @param {number} stop_lying_epoch
87
- */
88
- stop_lying_epoch(stop_lying_epoch: number): void;
89
- /**
90
- * Sets a new theta, which determines the accuracy of the approximation. Must be **strictly greater than
91
- * 0.0**. Large values for θ increase the speed of the algorithm but decrease its accuracy.
92
- * For small values of θ it is less probable that a cell in the space partitioning tree will
93
- * be treated as a single point. For θ equal to 0.0 the method degenerates in the exact
94
- * version.
95
- *
96
- * # Arguments
97
- *
98
- * * `theta` - new value for the theta.
99
- * @param {number} theta
100
- */
101
- theta(theta: number): void;
102
- /**
103
- * Sets a new value for the embedding dimension.
104
- *
105
- * # Arguments
106
- *
107
- * `embedding_dim` - new value for the embedding space dimensionality.
108
- * @param {number} embedding_dim
109
- */
110
- embedding_dim(embedding_dim: number): void;
111
- /**
112
- * Sets a new perplexity value.
113
- *
114
- * # Arguments
115
- *
116
- * `perplexity` - new value for the perplexity. It's used so that the bandwidth of the Gaussian
117
- * kernels, is set in such a way that the perplexity of each the conditional distribution *Pi*
118
- * equals a predefined perplexity *u*.
119
- *
120
- * A good value for perplexity lies between 5.0 and 50.0.
121
- * @param {number} perplexity
122
- */
123
- perplexity(perplexity: number): void;
124
16
  }
125
- /**
126
- */
17
+
18
+ export class bhtSNEf64 {
19
+ free(): void;
20
+ [Symbol.dispose](): void;
21
+ constructor(data: any, opt: any);
22
+ /**
23
+ * Performs a parallel Barnes-Hut approximation of the t-SNE algorithm.
24
+ *
25
+ * # Arguments
26
+ *
27
+ * `epochs` - Sets epochs, the maximum number of fitting iterations.
28
+ */
29
+ step(epochs: number): any;
30
+ }
31
+
32
+ export function initThreadPool(num_threads: number): Promise<any>;
33
+
127
34
  export class wbg_rayon_PoolBuilder {
35
+ private constructor();
128
36
  free(): void;
129
- /**
130
- * @returns {number}
131
- */
37
+ [Symbol.dispose](): void;
132
38
  numThreads(): number;
133
- /**
134
- * @returns {number}
135
- */
136
- receiver(): number;
137
- /**
138
- */
139
39
  build(): void;
40
+ receiver(): number;
140
41
  }
141
42
 
43
+ export function wbg_rayon_start_worker(receiver: number): void;
44
+
142
45
  export type InitInput = RequestInfo | URL | Response | BufferSource | WebAssembly.Module;
143
46
 
144
47
  export interface InitOutput {
145
- readonly __wbg_bhtsne_free: (a: number) => void;
146
- readonly bhtsne_new: (a: number) => number;
147
- readonly bhtsne_step: (a: number, b: number, c: number) => void;
148
- readonly bhtsne_learning_rate: (a: number, b: number) => void;
149
- readonly bhtsne_epochs: (a: number, b: number) => void;
150
- readonly bhtsne_momentum: (a: number, b: number) => void;
151
- readonly bhtsne_final_momentum: (a: number, b: number) => void;
152
- readonly bhtsne_momentum_switch_epoch: (a: number, b: number) => void;
153
- readonly bhtsne_stop_lying_epoch: (a: number, b: number) => void;
154
- readonly bhtsne_theta: (a: number, b: number) => void;
155
- readonly bhtsne_embedding_dim: (a: number, b: number) => void;
156
- readonly bhtsne_perplexity: (a: number, b: number) => void;
157
- readonly __wbg_wbg_rayon_poolbuilder_free: (a: number) => void;
48
+ readonly __wbg_bhtsnef32_free: (a: number, b: number) => void;
49
+ readonly __wbg_bhtsnef64_free: (a: number, b: number) => void;
50
+ readonly bhtsnef32_new: (a: any, b: any) => number;
51
+ readonly bhtsnef32_step: (a: number, b: number) => [number, number, number];
52
+ readonly bhtsnef64_new: (a: any, b: any) => number;
53
+ readonly bhtsnef64_step: (a: number, b: number) => [number, number, number];
54
+ readonly __wbg_wbg_rayon_poolbuilder_free: (a: number, b: number) => void;
55
+ readonly initThreadPool: (a: number) => any;
56
+ readonly wbg_rayon_poolbuilder_build: (a: number) => void;
158
57
  readonly wbg_rayon_poolbuilder_numThreads: (a: number) => number;
159
58
  readonly wbg_rayon_poolbuilder_receiver: (a: number) => number;
160
- readonly wbg_rayon_poolbuilder_build: (a: number) => void;
161
- readonly initThreadPool: (a: number) => number;
162
59
  readonly wbg_rayon_start_worker: (a: number) => void;
163
60
  readonly memory: WebAssembly.Memory;
164
61
  readonly __wbindgen_malloc: (a: number, b: number) => number;
165
62
  readonly __wbindgen_realloc: (a: number, b: number, c: number, d: number) => number;
166
- readonly __wbindgen_add_to_stack_pointer: (a: number) => number;
167
63
  readonly __wbindgen_exn_store: (a: number) => void;
168
- readonly __wbindgen_thread_destroy: (a?: number, b?: number) => void;
169
- readonly __wbindgen_start: () => void;
64
+ readonly __externref_table_alloc: () => number;
65
+ readonly __wbindgen_externrefs: WebAssembly.Table;
66
+ readonly __externref_table_dealloc: (a: number) => void;
67
+ readonly __wbindgen_thread_destroy: (a?: number, b?: number, c?: number) => void;
68
+ readonly __wbindgen_start: (a: number) => void;
170
69
  }
171
70
 
172
71
  export type SyncInitInput = BufferSource | WebAssembly.Module;
72
+
173
73
  /**
174
74
  * Instantiates the given `module`, which can either be bytes or
175
75
  * a precompiled `WebAssembly.Module`.
176
76
  *
177
- * @param {SyncInitInput} module
178
- * @param {WebAssembly.Memory} maybe_memory
77
+ * @param {{ module: SyncInitInput, memory?: WebAssembly.Memory, thread_stack_size?: number }} module - Passing `SyncInitInput` directly is deprecated.
78
+ * @param {WebAssembly.Memory} memory - Deprecated.
179
79
  *
180
80
  * @returns {InitOutput}
181
81
  */
182
- export function initSync(module: SyncInitInput, maybe_memory?: WebAssembly.Memory): InitOutput;
82
+ export function initSync(module: { module: SyncInitInput, memory?: WebAssembly.Memory, thread_stack_size?: number } | SyncInitInput, memory?: WebAssembly.Memory): InitOutput;
183
83
 
184
84
  /**
185
85
  * If `module_or_path` is {RequestInfo} or {URL}, makes a request and
186
86
  * for everything else, calls `WebAssembly.instantiate` directly.
187
87
  *
188
- * @param {InitInput | Promise<InitInput>} module_or_path
189
- * @param {WebAssembly.Memory} maybe_memory
88
+ * @param {{ module_or_path: InitInput | Promise<InitInput>, memory?: WebAssembly.Memory, thread_stack_size?: number }} module_or_path - Passing `InitInput` directly is deprecated.
89
+ * @param {WebAssembly.Memory} memory - Deprecated.
190
90
  *
191
91
  * @returns {Promise<InitOutput>}
192
92
  */
193
- export default function __wbg_init (module_or_path?: InitInput | Promise<InitInput>, maybe_memory?: WebAssembly.Memory): Promise<InitOutput>;
93
+ export default function __wbg_init (module_or_path?: { module_or_path: InitInput | Promise<InitInput>, memory?: WebAssembly.Memory, thread_stack_size?: number } | InitInput | Promise<InitInput>, memory?: WebAssembly.Memory): Promise<InitOutput>;