npm - wasm-bhtsne - Versions diffs - 1.0.0 → 1.2.2 - Mend

wasm-bhtsne 1.0.0 → 1.2.2

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (7) hide show

package/LICENSE +0 -0
package/README.md +41 -5
package/package.json +5 -3
package/snippets/wasm-bindgen-rayon-38edf6e439f6d70d/src/workerHelpers.js +107 -0
package/wasm_bhtsne.d.ts +54 -154
package/wasm_bhtsne.js +451 -493
package/wasm_bhtsne_bg.wasm +0 -0

package/LICENSE CHANGED Viewed

File without changes

package/README.md CHANGED Viewed

@@ -10,8 +10,17 @@ This is the wasm version of the [bhtsne](https://github.com/frjnn/bhtsne) crate.
 ## Features
 - Harnesses multi-threading capabilities through [wasm-bindgen-rayon](https://github.com/RReverser/wasm-bindgen-rayon).
+- Allows passing t-SNE hyperparameters through a JavaScript object, where you only need to include the parameters you want to change from the defaults. If you don't specify any, default values are used.
+- Supports running the algorithm in iterations, enabling progressive refinement of the embedding
+- Supports both Float32Array and Float64Array for data input
+## Requirements
+To use the multithreading feature, you need to enable `SharedArrayBuffer` on the Web. As stated in the [wasm-bindgen-rayon readme](https://github.com/RReverser/wasm-bindgen-rayon/blob/main/README.md):
+In order to use `SharedArrayBuffer` on the Web, you need to enable [cross-origin isolation policies](https://web.dev/coop-coep/). Check out the linked article for details.
 ## Installation
+Install the [wasm-bhtsne npm package](https://www.npmjs.com/package/wasm-bhtsne):
 ```shell
 npm i wasm-bhtsne
 ```
@@ -41,16 +50,43 @@ function createRandomMatrix(rows, columns) {
         async onclick() {
             // create random points and dimensions
-            const data = createRandomMatrix(500, 7);
+            const data = createRandomMatrix(5000, 512);
+            // Example of setting hyperparameters
+            const opt = {
+                learning_rate: 150.0,
+                perplexity: 30.0,
+                theta: 0.6
+            };
+            // let tsne_encoder = new multiThread.bhtSNEf64(data, opt);
+            // or
+            let tsne_encoder = new multiThread.bhtSNEf32(data, opt);
+            let compressed_vectors;
-            let tsne_encoder = new multiThread.bhtSNE(data); // create a tSNE instance
-            tsne_encoder.perplexity = 25.0;  // change hyperparameters
+            for (let i = 0; i < 1000; i++) {
+                compressed_vectors = tsne_encoder.step(1)
+                /* …do something with `compressed_vectors`… */
+            }
-            // run the algorithm with 1000 iterations
-            let compressed_vectors = tsne_encoder.step(1000);
             console.log("Compressed Vectors:", compressed_vectors);
         },
         disabled: false
     });
 })();
 ```
+## Hyperparameters
+Here is a list of hyperparameters that can be set in the JavaScript object, along with their default values and descriptions:
+- **`learning_rate`** (default: `200.0`): controls the step size during the optimization.
+- **`momentum`** (default: `0.5`): helps accelerate gradients vectors in the right directions, thus leading to faster converging.
+- **`final_momentum`** (default: `0.8`): momentum value used after a certain number of iterations.
+- **`momentum_switch_epoch`** (default: `250`): the epoch after which the algorithm switches to `final_momentum` for the map update.
+- **`stop_lying_epoch`** (default: `250`): the epoch after which the P distribution values become true. For epochs < `stop_lying_epoch`, the values of the P distribution are multiplied by a factor equal to `12.0`.
+- **`theta`** (default: `0.5`): Determines the accuracy of the approximation. Larger values increase the speed but decrease accuracy. Must be strictly greater than 0.0.
+- **`embedding_dim`** (default: `2`): the dimensionality of the embedding space.
+- **`perplexity`** (default: `20.0`): the perplexity value. It determines the balance between local and global aspects of the data. A good value lies between 5.0 and 50.0.

package/package.json CHANGED Viewed

@@ -1,10 +1,11 @@
 {
   "name": "wasm-bhtsne",
+  "type": "module",
   "collaborators": [
     "lv291 <baiunco291@proton.me>"
   ],
   "description": "Barnes-Hut implementations of t-SNE in wasm",
-  "version": "1.0.0",
+  "version": "1.2.2",
   "license": "MIT",
   "repository": {
     "type": "git",
@@ -13,9 +14,10 @@
   "files": [
     "wasm_bhtsne_bg.wasm",
     "wasm_bhtsne.js",
-    "wasm_bhtsne.d.ts"
+    "wasm_bhtsne.d.ts",
+    "snippets"
   ],
-  "module": "wasm_bhtsne.js",
+  "main": "wasm_bhtsne.js",
   "types": "wasm_bhtsne.d.ts",
   "sideEffects": [
     "./snippets/*"

package/snippets/wasm-bindgen-rayon-38edf6e439f6d70d/src/workerHelpers.js ADDED Viewed

@@ -0,0 +1,107 @@
+/*
+ * Copyright 2022 Google Inc. All Rights Reserved.
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+// Note: we use `wasm_bindgen_worker_`-prefixed message types to make sure
+// we can handle bundling into other files, which might happen to have their
+// own `postMessage`/`onmessage` communication channels.
+//
+// If we didn't take that into the account, we could send much simpler signals
+// like just `0` or whatever, but the code would be less resilient.
+function waitForMsgType(target, type) {
+  return new Promise(resolve => {
+    target.addEventListener('message', function onMsg({ data }) {
+      if (data?.type !== type) return;
+      target.removeEventListener('message', onMsg);
+      resolve(data);
+    });
+  });
+}
+waitForMsgType(self, 'wasm_bindgen_worker_init').then(async ({ init, receiver }) => {
+  // # Note 1
+  // Our JS should have been generated in
+  // `[out-dir]/snippets/wasm-bindgen-rayon-[hash]/workerHelpers.js`,
+  // resolve the main module via `../../..`.
+  //
+  // This might need updating if the generated structure changes on wasm-bindgen
+  // side ever in the future, but works well with bundlers today. The whole
+  // point of this crate, after all, is to abstract away unstable features
+  // and temporary bugs so that you don't need to deal with them in your code.
+  //
+  // # Note 2
+  // This could be a regular import, but then some bundlers complain about
+  // circular deps.
+  //
+  // Dynamic import could be cheap if this file was inlined into the parent,
+  // which would require us just using `../../..` in `new Worker` below,
+  // but that doesn't work because wasm-pack unconditionally adds
+  // "sideEffects":false (see below).
+  //
+  // OTOH, even though it can't be inlined, it should be still reasonably
+  // cheap since the requested file is already in cache (it was loaded by
+  // the main thread).
+  const pkg = await import('../../..');
+  await pkg.default(init);
+  postMessage({ type: 'wasm_bindgen_worker_ready' });
+  pkg.wbg_rayon_start_worker(receiver);
+});
+// Note: this is never used, but necessary to prevent a bug in Firefox
+// (https://bugzilla.mozilla.org/show_bug.cgi?id=1702191) where it collects
+// Web Workers that have a shared WebAssembly memory with the main thread,
+// but are not explicitly rooted via a `Worker` instance.
+//
+// By storing them in a variable, we can keep `Worker` objects around and
+// prevent them from getting GC-d.
+let _workers;
+export async function startWorkers(module, memory, builder) {
+  if (builder.numThreads() === 0) {
+    throw new Error(`num_threads must be > 0.`);
+  }
+  const workerInit = {
+    type: 'wasm_bindgen_worker_init',
+    init: { module_or_path: module, memory },
+    receiver: builder.receiver()
+  };
+  _workers = await Promise.all(
+    Array.from({ length: builder.numThreads() }, async () => {
+      // Self-spawn into a new Worker.
+      //
+      // TODO: while `new URL('...', import.meta.url) becomes a semi-standard
+      // way to get asset URLs relative to the module across various bundlers
+      // and browser, ideally we should switch to `import.meta.resolve`
+      // once it becomes a standard.
+      //
+      // Note: we could use `../../..` as the URL here to inline workerHelpers.js
+      // into the parent entry instead of creating another split point -
+      // this would be preferable from optimization perspective -
+      // however, Webpack then eliminates all message handler code
+      // because wasm-pack produces "sideEffects":false in package.json
+      // unconditionally.
+      //
+      // The only way to work around that is to have side effect code
+      // in an entry point such as Worker file itself.
+      const worker = new Worker(new URL('./workerHelpers.js', import.meta.url), {
+        type: 'module'
+      });
+      worker.postMessage(workerInit);
+      await waitForMsgType(worker, 'wasm_bindgen_worker_ready');
+      return worker;
+    })
+  );
+  builder.build();
+}

package/wasm_bhtsne.d.ts CHANGED Viewed

@@ -1,193 +1,93 @@
 /* tslint:disable */
 /* eslint-disable */
-/**
-* @param {number} num_threads
-* @returns {Promise<any>}
-*/
-export function initThreadPool(num_threads: number): Promise<any>;
-/**
-* @param {number} receiver
-*/
-export function wbg_rayon_start_worker(receiver: number): void;
-/**
-* t-distributed stochastic neighbor embedding. Provides a parallel implementation of both the
-* exact version of the algorithm and the tree accelerated one leveraging space partitioning trees.
-*/
-export class bhtSNE {
+export class bhtSNEf32 {
   free(): void;
-/**
-* @param {any} data
-*/
-  constructor(data: any);
-/**
-* Performs a parallel Barnes-Hut approximation of the t-SNE algorithm.
-*
-* # Arguments
-*
-* `epochs` - Sets epochs, the maximum number of fitting iterations.
-* @param {number} epochs
-* @returns {any}
-*/
+  [Symbol.dispose](): void;
+  constructor(data: any, opt: any);
+  /**
+   * Performs a parallel Barnes-Hut approximation of the t-SNE algorithm.
+   *
+   * # Arguments
+   *
+   * `epochs` - the maximum number of fitting iterations. Must be positive
+   */
   step(epochs: number): any;
-/**
-* Sets a new learning rate.
-*
-* # Arguments
-*
-* `learning_rate` - new value for the learning rate.
-* @param {number} learning_rate
-*/
-  learning_rate(learning_rate: number): void;
-/**
-* Sets new epochs, i.e the maximum number of fitting iterations.
-*
-* # Arguments
-*
-* `epochs` - new value for the epochs.
-* @param {number} epochs
-*/
-  epochs(epochs: number): void;
-/**
-* Sets a new momentum.
-*
-* # Arguments
-*
-* `momentum` - new value for the momentum.
-* @param {number} momentum
-*/
-  momentum(momentum: number): void;
-/**
-* Sets a new final momentum.
-*
-* # Arguments
-*
-* `final_momentum` - new value for the final momentum.
-* @param {number} final_momentum
-*/
-  final_momentum(final_momentum: number): void;
-/**
-* Sets a new momentum switch epoch, i.e. the epoch after which the algorithm switches to
-* `final_momentum` for the map update.
-*
-* # Arguments
-*
-* `momentum_switch_epoch` - new value for the momentum switch epoch.
-* @param {number} momentum_switch_epoch
-*/
-  momentum_switch_epoch(momentum_switch_epoch: number): void;
-/**
-* Sets a new stop lying epoch, i.e. the epoch after which the P distribution values become
-* true, as defined in the original implementation. For epochs < `stop_lying_epoch` the values
-* of the P distribution are multiplied by a factor equal to `12.0`.
-*
-* # Arguments
-*
-* `stop_lying_epoch` - new value for the stop lying epoch.
-* @param {number} stop_lying_epoch
-*/
-  stop_lying_epoch(stop_lying_epoch: number): void;
-/**
-* Sets a new theta, which determines the accuracy of the approximation. Must be **strictly greater than
-* 0.0**. Large values for θ increase the speed of the algorithm but decrease its accuracy.
-* For small values of θ it is less probable that a cell in the space partitioning tree will
-* be treated as a single point. For θ equal to 0.0 the method degenerates in the exact
-* version.
-*
-* # Arguments
-*
-* * `theta`  - new value for the theta.
-* @param {number} theta
-*/
-  theta(theta: number): void;
-/**
-* Sets a new value for the embedding dimension.
-*
-* # Arguments
-*
-* `embedding_dim` - new value for the embedding space dimensionality.
-* @param {number} embedding_dim
-*/
-  embedding_dim(embedding_dim: number): void;
-/**
-* Sets a new perplexity value.
-*
-* # Arguments
-*
-* `perplexity` - new value for the perplexity. It's used so that the bandwidth of the Gaussian
-*  kernels, is set in such a way that the perplexity of each the conditional distribution *Pi*
-*  equals a predefined perplexity *u*.
-*
-* A good value for perplexity lies between 5.0 and 50.0.
-* @param {number} perplexity
-*/
-  perplexity(perplexity: number): void;
 }
-/**
-*/
+export class bhtSNEf64 {
+  free(): void;
+  [Symbol.dispose](): void;
+  constructor(data: any, opt: any);
+  /**
+   * Performs a parallel Barnes-Hut approximation of the t-SNE algorithm.
+   *
+   * # Arguments
+   *
+   * `epochs` - Sets epochs, the maximum number of fitting iterations.
+   */
+  step(epochs: number): any;
+}
+export function initThreadPool(num_threads: number): Promise<any>;
 export class wbg_rayon_PoolBuilder {
+  private constructor();
   free(): void;
-/**
-* @returns {number}
-*/
+  [Symbol.dispose](): void;
   numThreads(): number;
-/**
-* @returns {number}
-*/
-  receiver(): number;
-/**
-*/
   build(): void;
+  receiver(): number;
 }
+export function wbg_rayon_start_worker(receiver: number): void;
 export type InitInput = RequestInfo | URL | Response | BufferSource | WebAssembly.Module;
 export interface InitOutput {
-  readonly __wbg_bhtsne_free: (a: number) => void;
-  readonly bhtsne_new: (a: number) => number;
-  readonly bhtsne_step: (a: number, b: number, c: number) => void;
-  readonly bhtsne_learning_rate: (a: number, b: number) => void;
-  readonly bhtsne_epochs: (a: number, b: number) => void;
-  readonly bhtsne_momentum: (a: number, b: number) => void;
-  readonly bhtsne_final_momentum: (a: number, b: number) => void;
-  readonly bhtsne_momentum_switch_epoch: (a: number, b: number) => void;
-  readonly bhtsne_stop_lying_epoch: (a: number, b: number) => void;
-  readonly bhtsne_theta: (a: number, b: number) => void;
-  readonly bhtsne_embedding_dim: (a: number, b: number) => void;
-  readonly bhtsne_perplexity: (a: number, b: number) => void;
-  readonly __wbg_wbg_rayon_poolbuilder_free: (a: number) => void;
+  readonly __wbg_bhtsnef32_free: (a: number, b: number) => void;
+  readonly __wbg_bhtsnef64_free: (a: number, b: number) => void;
+  readonly bhtsnef32_new: (a: any, b: any) => number;
+  readonly bhtsnef32_step: (a: number, b: number) => [number, number, number];
+  readonly bhtsnef64_new: (a: any, b: any) => number;
+  readonly bhtsnef64_step: (a: number, b: number) => [number, number, number];
+  readonly __wbg_wbg_rayon_poolbuilder_free: (a: number, b: number) => void;
+  readonly initThreadPool: (a: number) => any;
+  readonly wbg_rayon_poolbuilder_build: (a: number) => void;
   readonly wbg_rayon_poolbuilder_numThreads: (a: number) => number;
   readonly wbg_rayon_poolbuilder_receiver: (a: number) => number;
-  readonly wbg_rayon_poolbuilder_build: (a: number) => void;
-  readonly initThreadPool: (a: number) => number;
   readonly wbg_rayon_start_worker: (a: number) => void;
   readonly memory: WebAssembly.Memory;
   readonly __wbindgen_malloc: (a: number, b: number) => number;
   readonly __wbindgen_realloc: (a: number, b: number, c: number, d: number) => number;
-  readonly __wbindgen_add_to_stack_pointer: (a: number) => number;
   readonly __wbindgen_exn_store: (a: number) => void;
-  readonly __wbindgen_thread_destroy: (a?: number, b?: number) => void;
-  readonly __wbindgen_start: () => void;
+  readonly __externref_table_alloc: () => number;
+  readonly __wbindgen_externrefs: WebAssembly.Table;
+  readonly __externref_table_dealloc: (a: number) => void;
+  readonly __wbindgen_thread_destroy: (a?: number, b?: number, c?: number) => void;
+  readonly __wbindgen_start: (a: number) => void;
 }
 export type SyncInitInput = BufferSource | WebAssembly.Module;
 /**
 * Instantiates the given `module`, which can either be bytes or
 * a precompiled `WebAssembly.Module`.
 *
-* @param {SyncInitInput} module
-* @param {WebAssembly.Memory} maybe_memory
+* @param {{ module: SyncInitInput, memory?: WebAssembly.Memory, thread_stack_size?: number }} module - Passing `SyncInitInput` directly is deprecated.
+* @param {WebAssembly.Memory} memory - Deprecated.
 *
 * @returns {InitOutput}
 */
-export function initSync(module: SyncInitInput, maybe_memory?: WebAssembly.Memory): InitOutput;
+export function initSync(module: { module: SyncInitInput, memory?: WebAssembly.Memory, thread_stack_size?: number } | SyncInitInput, memory?: WebAssembly.Memory): InitOutput;
 /**
 * If `module_or_path` is {RequestInfo} or {URL}, makes a request and
 * for everything else, calls `WebAssembly.instantiate` directly.
 *
-* @param {InitInput | Promise<InitInput>} module_or_path
-* @param {WebAssembly.Memory} maybe_memory
+* @param {{ module_or_path: InitInput | Promise<InitInput>, memory?: WebAssembly.Memory, thread_stack_size?: number }} module_or_path - Passing `InitInput` directly is deprecated.
+* @param {WebAssembly.Memory} memory - Deprecated.
 *
 * @returns {Promise<InitOutput>}
 */
-export default function __wbg_init (module_or_path?: InitInput | Promise<InitInput>, maybe_memory?: WebAssembly.Memory): Promise<InitOutput>;
+export default function __wbg_init (module_or_path?: { module_or_path: InitInput | Promise<InitInput>, memory?: WebAssembly.Memory, thread_stack_size?: number } | InitInput | Promise<InitInput>, memory?: WebAssembly.Memory): Promise<InitOutput>;