npm - @sauravpanda/flare - Versions diffs - 0.1.0 - Mend

@sauravpanda/flare 0.1.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (12) hide show

package/demo/README.md +40 -0
package/demo/index.html +1767 -0
package/js/index.ts +91 -0
package/js/types.ts +136 -0
package/js/webtransport-loader.js +126 -0
package/js/worker.ts +159 -0
package/package.json +58 -0
package/pkg/flare_web.d.ts +1164 -0
package/pkg/flare_web.js +2790 -0
package/pkg/flare_web_bg.wasm +0 -0
package/pkg/flare_web_bg.wasm.d.ts +105 -0
package/pkg/package.json +27 -0

package/pkg/flare_web.js ADDED Viewed

@@ -0,0 +1,2790 @@
+/* @ts-self-types="./flare_web.d.ts" */
+/**
+ * Flare LLM inference engine, exported to JS.
+ *
+ * Holds a loaded model and runs greedy/sampled token generation.
+ * The detected chat template is available via `chat_template_name` and
+ * `apply_chat_template` so the browser demo can format prompts correctly
+ * for instruction-tuned models.
+ */
+export class FlareEngine {
+    static __wrap(ptr) {
+        ptr = ptr >>> 0;
+        const obj = Object.create(FlareEngine.prototype);
+        obj.__wbg_ptr = ptr;
+        FlareEngineFinalization.register(obj, obj.__wbg_ptr, obj);
+        return obj;
+    }
+    __destroy_into_raw() {
+        const ptr = this.__wbg_ptr;
+        this.__wbg_ptr = 0;
+        FlareEngineFinalization.unregister(this);
+        return ptr;
+    }
+    free() {
+        const ptr = this.__destroy_into_raw();
+        wasm.__wbg_flareengine_free(ptr, 0);
+    }
+    /**
+     * Whether the model requests automatic BOS token prepending.
+     *
+     * Sourced from `tokenizer.ggml.add_bos_token` in the GGUF metadata.
+     * When `true`, all generation methods (`generate_tokens`, `begin_stream`,
+     * `generate_text`, `generate_stream`) automatically prepend the BOS token
+     * to the input token sequence unless it is already the first token.
+     * @returns {boolean}
+     */
+    get add_bos_token() {
+        const ret = wasm.flareengine_add_bos_token(this.__wbg_ptr);
+        return ret !== 0;
+    }
+    /**
+     * Register a stop sequence.
+     *
+     * Generation halts (without emitting the matched tokens) as soon as the
+     * decoded output ends with `sequence`.  Call once per stop string before
+     * `begin_stream` or `generate_with_params`.
+     *
+     * Stop sequences are cleared by `reset()` or `clear_stop_sequences()`.
+     *
+     * ```javascript
+     * engine.add_stop_sequence("<|im_end|>");
+     * engine.add_stop_sequence("</s>");
+     * engine.begin_stream_with_params(promptIds, 200, 0.8, 0.95, 40, 1.1);
+     * ```
+     * @param {string} sequence
+     */
+    add_stop_sequence(sequence) {
+        const ptr0 = passStringToWasm0(sequence, wasm.__wbindgen_malloc, wasm.__wbindgen_realloc);
+        const len0 = WASM_VECTOR_LEN;
+        wasm.flareengine_add_stop_sequence(this.__wbg_ptr, ptr0, len0);
+    }
+    /**
+     * Format a user message (and optional system prompt) using the model's
+     * auto-detected chat template.  Returns the formatted prompt string ready
+     * to be passed to `FlareTokenizer.encode()`.
+     *
+     * Pass an empty string for `system_message` to omit the system turn.
+     *
+     * # JS example
+     * ```javascript
+     * const prompt = engine.apply_chat_template(
+     *   'Explain quantum computing in simple terms.',
+     *   'You are a helpful assistant.'
+     * );
+     * const ids = tokenizer.encode(prompt);
+     * const output = engine.generate_tokens(ids, 128);
+     * ```
+     * @param {string} user_message
+     * @param {string} system_message
+     * @returns {string}
+     */
+    apply_chat_template(user_message, system_message) {
+        let deferred3_0;
+        let deferred3_1;
+        try {
+            const ptr0 = passStringToWasm0(user_message, wasm.__wbindgen_malloc, wasm.__wbindgen_realloc);
+            const len0 = WASM_VECTOR_LEN;
+            const ptr1 = passStringToWasm0(system_message, wasm.__wbindgen_malloc, wasm.__wbindgen_realloc);
+            const len1 = WASM_VECTOR_LEN;
+            const ret = wasm.flareengine_apply_chat_template(this.__wbg_ptr, ptr0, len0, ptr1, len1);
+            deferred3_0 = ret[0];
+            deferred3_1 = ret[1];
+            return getStringFromWasm0(ret[0], ret[1]);
+        } finally {
+            wasm.__wbindgen_free(deferred3_0, deferred3_1, 1);
+        }
+    }
+    /**
+     * Model architecture name from `general.architecture` in the GGUF metadata.
+     *
+     * Returns a lowercase string such as `"llama"`, `"mistral"`, `"gemma2"`,
+     * `"phi3"`, or `"qwen2"`. Returns `"unknown"` if the field is absent.
+     * @returns {string}
+     */
+    get architecture() {
+        let deferred1_0;
+        let deferred1_1;
+        try {
+            const ret = wasm.flareengine_architecture(this.__wbg_ptr);
+            deferred1_0 = ret[0];
+            deferred1_1 = ret[1];
+            return getStringFromWasm0(ret[0], ret[1]);
+        } finally {
+            wasm.__wbindgen_free(deferred1_0, deferred1_1, 1);
+        }
+    }
+    /**
+     * Prepare for token-by-token streaming.
+     *
+     * Runs the prefill pass on `prompt_tokens`, then initialises internal
+     * state so that subsequent calls to `next_token()` each produce one
+     * output token.  Call `engine.reset()` before `begin_stream()` to start
+     * a fresh conversation.
+     *
+     * # JS example
+     * ```javascript
+     * engine.reset();
+     * engine.begin_stream(promptIds, 128);
+     * function tick() {
+     *   const id = engine.next_token();
+     *   if (id === undefined) { /* done */ return; }
+     *   output.textContent += tokenizer.decode_one(id);
+     *   requestAnimationFrame(tick);   // yield to browser, then continue
+     * }
+     * requestAnimationFrame(tick);
+     * ```
+     * @param {Uint32Array} prompt_tokens
+     * @param {number} max_tokens
+     */
+    begin_stream(prompt_tokens, max_tokens) {
+        const ptr0 = passArray32ToWasm0(prompt_tokens, wasm.__wbindgen_malloc);
+        const len0 = WASM_VECTOR_LEN;
+        wasm.flareengine_begin_stream(this.__wbg_ptr, ptr0, len0, max_tokens);
+    }
+    /**
+     * Begin a token-by-token stream, healing the last prompt token.
+     *
+     * Identical to `begin_stream` but avoids double-processing the final prompt
+     * token: the prefill runs only tokens `[0 .. n-2]`, then the first
+     * `next_token()` call processes the last prompt token at its correct
+     * position `n-1` and produces the first output token.  This keeps RoPE
+     * positional embeddings consistent and is recommended when the prompt
+     * ends at a natural token boundary (e.g. when encoding a user turn in a
+     * chat template).
+     *
+     * Falls back to `begin_stream` for prompts shorter than 2 tokens.
+     *
+     * # JS example
+     * ```javascript
+     * engine.reset();
+     * const ids = engine.encode_text(engine.apply_chat_template(userMsg, sysMsg));
+     * engine.begin_stream_healed(ids, 256);
+     * requestAnimationFrame(function tick() {
+     *   const id = engine.next_token();
+     *   if (id !== undefined) output.textContent += tokenizer.decode_one(id);
+     *   if (!engine.stream_done) requestAnimationFrame(tick);
+     * });
+     * ```
+     * @param {Uint32Array} prompt_tokens
+     * @param {number} max_tokens
+     */
+    begin_stream_healed(prompt_tokens, max_tokens) {
+        const ptr0 = passArray32ToWasm0(prompt_tokens, wasm.__wbindgen_malloc);
+        const len0 = WASM_VECTOR_LEN;
+        wasm.flareengine_begin_stream_healed(this.__wbg_ptr, ptr0, len0, max_tokens);
+    }
+    /**
+     * Like `begin_stream_healed` but with full sampling parameters.
+     *
+     * Combines position-consistent prefill (see `begin_stream_healed`) with
+     * the same temperature / top-p / top-k / repeat-penalty / min-p controls
+     * available in `begin_stream_with_params`.
+     *
+     * # JS example
+     * ```javascript
+     * engine.reset();
+     * const ids = engine.encode_text(engine.apply_chat_template(userMsg, sysMsg));
+     * engine.begin_stream_healed_with_params(ids, 256, 0.8, 0.95, 40, 1.1, 0.0);
+     * requestAnimationFrame(function tick() {
+     *   const id = engine.next_token();
+     *   if (id !== undefined) output.textContent += tokenizer.decode_one(id);
+     *   if (!engine.stream_done) requestAnimationFrame(tick);
+     * });
+     * ```
+     * @param {Uint32Array} prompt_tokens
+     * @param {number} max_tokens
+     * @param {number} temperature
+     * @param {number} top_p
+     * @param {number} top_k
+     * @param {number} repeat_penalty
+     * @param {number} min_p
+     */
+    begin_stream_healed_with_params(prompt_tokens, max_tokens, temperature, top_p, top_k, repeat_penalty, min_p) {
+        const ptr0 = passArray32ToWasm0(prompt_tokens, wasm.__wbindgen_malloc);
+        const len0 = WASM_VECTOR_LEN;
+        wasm.flareengine_begin_stream_healed_with_params(this.__wbg_ptr, ptr0, len0, max_tokens, temperature, top_p, top_k, repeat_penalty, min_p);
+    }
+    /**
+     * Like `begin_stream` but with temperature / top-p sampling.
+     *
+     * `temperature`: 0.0 = greedy, 0.7–1.0 = typical creative range.
+     * `top_p`: nucleus sampling threshold (0.0–1.0); 0.9 is a good default.
+     *
+     * # JS example
+     * ```javascript
+     * engine.reset();
+     * engine.begin_stream_with_params(promptIds, 128, 0.8, 0.9);
+     * function tick() {
+     *   const id = engine.next_token();
+     *   if (id === undefined) return;
+     *   output.textContent += tokenizer.decode_one(id);
+     *   requestAnimationFrame(tick);
+     * }
+     * requestAnimationFrame(tick);
+     * ```
+     * Begin a token-by-token stream with sampling parameters including top-k.
+     *
+     * - `temperature`: controls randomness (0 = greedy, higher = more random)
+     * - `top_p`: nucleus sampling — keep the smallest token set whose cumulative
+     *   probability ≥ `top_p` (1.0 = disabled; applied when < 1.0)
+     * - `top_k`: keep only the `top_k` highest-probability tokens before sampling
+     *   (0 = disabled; applied when `top_p` is 1.0 and `top_k` > 0)
+     * - `repeat_penalty`: penalty applied to logits of recently-seen tokens to
+     *   reduce repetition (1.0 = disabled, 1.1–1.3 = typical range)
+     *
+     * ```javascript
+     * engine.begin_stream_with_params(promptIds, 200, 0.8, 0.95, 40, 1.1, 0.0);
+     * ```
+     * @param {Uint32Array} prompt_tokens
+     * @param {number} max_tokens
+     * @param {number} temperature
+     * @param {number} top_p
+     * @param {number} top_k
+     * @param {number} repeat_penalty
+     * @param {number} min_p
+     */
+    begin_stream_with_params(prompt_tokens, max_tokens, temperature, top_p, top_k, repeat_penalty, min_p) {
+        const ptr0 = passArray32ToWasm0(prompt_tokens, wasm.__wbindgen_malloc);
+        const len0 = WASM_VECTOR_LEN;
+        wasm.flareengine_begin_stream_with_params(this.__wbg_ptr, ptr0, len0, max_tokens, temperature, top_p, top_k, repeat_penalty, min_p);
+    }
+    /**
+     * BOS (beginning of sequence) token ID from the GGUF model metadata, if present.
+     * Some models require this to be prepended to the input token sequence.
+     * @returns {number | undefined}
+     */
+    get bos_token_id() {
+        const ret = wasm.flareengine_bos_token_id(this.__wbg_ptr);
+        return ret === 0x100000001 ? undefined : ret;
+    }
+    /**
+     * Name of the auto-detected chat template (e.g. `"ChatML"`, `"Llama3"`,
+     * `"Alpaca"`, `"Raw"`).  Use this to display the template in the UI and
+     * decide whether to call `apply_chat_template` before encoding.
+     * @returns {string}
+     */
+    get chat_template_name() {
+        let deferred1_0;
+        let deferred1_1;
+        try {
+            const ret = wasm.flareengine_chat_template_name(this.__wbg_ptr);
+            deferred1_0 = ret[0];
+            deferred1_1 = ret[1];
+            return getStringFromWasm0(ret[0], ret[1]);
+        } finally {
+            wasm.__wbindgen_free(deferred1_0, deferred1_1, 1);
+        }
+    }
+    /**
+     * Clear any previously loaded raw quantized weights.
+     *
+     * After calling this the engine uses the f32 dequantized path for all
+     * matrix operations until `load_raw_weights` is called again.
+     */
+    clear_raw_weights() {
+        wasm.flareengine_clear_raw_weights(this.__wbg_ptr);
+    }
+    /**
+     * Remove all registered stop sequences.
+     */
+    clear_stop_sequences() {
+        wasm.flareengine_clear_stop_sequences(this.__wbg_ptr);
+    }
+    /**
+     * Compute the perplexity of `text` under the loaded model.
+     *
+     * Encodes `text` with the embedded GGUF vocabulary, runs one forward pass
+     * per token, and measures the log-probability of each correct next-token
+     * prediction.  Perplexity = exp(−mean(log_probs)).
+     *
+     * The KV cache is reset **before and after** the evaluation so the engine
+     * returns to a clean state.
+     *
+     * Returns `f32::INFINITY` if the text encodes to fewer than 2 tokens or if
+     * no GGUF vocabulary is available.
+     *
+     * # JS example
+     * ```javascript
+     * const ppl = engine.compute_perplexity("The quick brown fox");
+     * console.log("Perplexity:", ppl);
+     * ```
+     * @param {string} text
+     * @returns {number}
+     */
+    compute_perplexity(text) {
+        const ptr0 = passStringToWasm0(text, wasm.__wbindgen_malloc, wasm.__wbindgen_realloc);
+        const len0 = WASM_VECTOR_LEN;
+        const ret = wasm.flareengine_compute_perplexity(this.__wbg_ptr, ptr0, len0);
+        return ret;
+    }
+    /**
+     * Fraction of the context window consumed (0.0 = empty, 1.0 = full).
+     *
+     * Equivalent to `tokens_used / max_seq_len`. Returns 0.0 if `max_seq_len` is 0.
+     * @returns {number}
+     */
+    get context_window_pct() {
+        const ret = wasm.flareengine_context_window_pct(this.__wbg_ptr);
+        return ret;
+    }
+    /**
+     * Count the number of tokens in `text` using the model's embedded GGUF vocabulary.
+     *
+     * Returns 0 if the model was not loaded from a GGUF file (e.g. SafeTensors only).
+     *
+     * # JS example
+     * ```javascript
+     * const n = engine.count_tokens(textarea.value);
+     * counter.textContent = `${n} / ${engine.max_seq_len} tokens`;
+     * ```
+     * @param {string} text
+     * @returns {number}
+     */
+    count_tokens(text) {
+        const ptr0 = passStringToWasm0(text, wasm.__wbindgen_malloc, wasm.__wbindgen_realloc);
+        const len0 = WASM_VECTOR_LEN;
+        const ret = wasm.flareengine_count_tokens(this.__wbg_ptr, ptr0, len0);
+        return ret >>> 0;
+    }
+    /**
+     * Decode token IDs to text using the embedded GGUF vocabulary.
+     *
+     * Returns an empty string if no GGUF vocab is available.
+     *
+     * # JS example
+     * ```javascript
+     * const text = engine.decode_ids(generatedIds);
+     * ```
+     * @param {Uint32Array} ids
+     * @returns {string}
+     */
+    decode_ids(ids) {
+        let deferred2_0;
+        let deferred2_1;
+        try {
+            const ptr0 = passArray32ToWasm0(ids, wasm.__wbindgen_malloc);
+            const len0 = WASM_VECTOR_LEN;
+            const ret = wasm.flareengine_decode_ids(this.__wbg_ptr, ptr0, len0);
+            deferred2_0 = ret[0];
+            deferred2_1 = ret[1];
+            return getStringFromWasm0(ret[0], ret[1]);
+        } finally {
+            wasm.__wbindgen_free(deferred2_0, deferred2_1, 1);
+        }
+    }
+    /**
+     * Decode a single token ID to its text piece.
+     *
+     * Convenience wrapper around `decode_ids` for use directly inside a
+     * `next_token()` loop so callers don't need a separate `FlareTokenizer`.
+     *
+     * Returns an empty string if no GGUF vocab is loaded.
+     *
+     * # JS example
+     * ```javascript
+     * engine.begin_stream(promptIds, 128);
+     * requestAnimationFrame(function tick() {
+     *   const id = engine.next_token();
+     *   if (id !== undefined) output.textContent += engine.decode_token(id);
+     *   if (!engine.stream_done) requestAnimationFrame(tick);
+     * });
+     * ```
+     * @param {number} id
+     * @returns {string}
+     */
+    decode_token(id) {
+        let deferred1_0;
+        let deferred1_1;
+        try {
+            const ret = wasm.flareengine_decode_token(this.__wbg_ptr, id);
+            deferred1_0 = ret[0];
+            deferred1_1 = ret[1];
+            return getStringFromWasm0(ret[0], ret[1]);
+        } finally {
+            wasm.__wbindgen_free(deferred1_0, deferred1_1, 1);
+        }
+    }
+    /**
+     * Decode a single token ID, correctly handling multi-byte UTF-8 sequences.
+     *
+     * SentencePiece tokenizers encode non-ASCII characters as consecutive
+     * byte-level tokens such as `<0xE4>`, `<0xB8>`, `<0xAD>` (the UTF-8
+     * encoding of `中`).  The basic `decode_token` function returns incorrect
+     * Latin-1 characters in these cases because it treats each byte as an
+     * independent Unicode scalar.
+     *
+     * `decode_token_chunk` accumulates bytes in an internal buffer until a
+     * complete, valid UTF-8 sequence is assembled, then returns it as a
+     * `String`.  While the sequence is incomplete it returns an empty string,
+     * and when a regular (non-byte) token is encountered it flushes any
+     * buffered bytes (replacing invalid sequences with U+FFFD) before
+     * returning the decoded text.
+     *
+     * **Use this instead of `decode_token` whenever you are streaming tokens
+     * that may include non-Latin characters.**
+     *
+     * ```javascript
+     * engine.begin_stream(prompt, 256);
+     * function tick() {
+     *   const id = engine.next_token();
+     *   if (id !== undefined) output.textContent += engine.decode_token_chunk(id);
+     *   if (!engine.stream_done) requestAnimationFrame(tick);
+     * }
+     * requestAnimationFrame(tick);
+     * ```
+     * @param {number} id
+     * @returns {string}
+     */
+    decode_token_chunk(id) {
+        let deferred1_0;
+        let deferred1_1;
+        try {
+            const ret = wasm.flareengine_decode_token_chunk(this.__wbg_ptr, id);
+            deferred1_0 = ret[0];
+            deferred1_1 = ret[1];
+            return getStringFromWasm0(ret[0], ret[1]);
+        } finally {
+            wasm.__wbindgen_free(deferred1_0, deferred1_1, 1);
+        }
+    }
+    /**
+     * Look up the token embedding row for `token_id` as a flat `Float32Array`.
+     *
+     * The length of the returned vector is `hidden_dim`. See also
+     * [`FlareEngine::output_projection`] for the inverse tail step.
+     * @param {number} token_id
+     * @returns {Float32Array}
+     */
+    embed_token(token_id) {
+        const ret = wasm.flareengine_embed_token(this.__wbg_ptr, token_id);
+        var v1 = getArrayF32FromWasm0(ret[0], ret[1]).slice();
+        wasm.__wbindgen_free(ret[0], ret[1] * 4, 4);
+        return v1;
+    }
+    /**
+     * # JS example
+     * ```javascript
+     * const ids = engine.encode_text("Hello, world!");
+     * const output = engine.generate_tokens(ids, 64);
+     * ```
+     * @param {string} text
+     * @returns {Uint32Array}
+     */
+    encode_text(text) {
+        const ptr0 = passStringToWasm0(text, wasm.__wbindgen_malloc, wasm.__wbindgen_realloc);
+        const len0 = WASM_VECTOR_LEN;
+        const ret = wasm.flareengine_encode_text(this.__wbg_ptr, ptr0, len0);
+        var v2 = getArrayU32FromWasm0(ret[0], ret[1]).slice();
+        wasm.__wbindgen_free(ret[0], ret[1] * 4, 4);
+        return v2;
+    }
+    /**
+     * EOS (end of sequence) token ID from the GGUF model metadata, if present.
+     * Generation stops automatically when this token is produced.
+     * @returns {number | undefined}
+     */
+    get eos_token_id() {
+        const ret = wasm.flareengine_eos_token_id(this.__wbg_ptr);
+        return ret === 0x100000001 ? undefined : ret;
+    }
+    /**
+     * Streaming text-in / text-out generation with a per-token JS callback.
+     *
+     * Encodes `prompt` with the embedded GGUF vocabulary, generates up to
+     * `max_tokens` tokens, and calls `on_token(token_str)` with the decoded
+     * text for each token as it is produced.  Returns the number of tokens
+     * generated (excluding any EOS token).
+     *
+     * Returns 0 if no GGUF vocab is available.
+     *
+     * # Note on browser streaming
+     * `on_token` is called synchronously inside WASM, so the browser will
+     * not visually update between tokens.  For visible character-by-character
+     * output, use `begin_stream` + `next_token` with `requestAnimationFrame`.
+     *
+     * # JS example
+     * ```javascript
+     * engine.reset();
+     * let out = '';
+     * const count = engine.generate_stream("What is Rust?", 128, (token) => {
+     *   out += token;
+     * });
+     * output.textContent = out;
+     * ```
+     * @param {string} prompt
+     * @param {number} max_tokens
+     * @param {Function} on_token
+     * @returns {number}
+     */
+    generate_stream(prompt, max_tokens, on_token) {
+        const ptr0 = passStringToWasm0(prompt, wasm.__wbindgen_malloc, wasm.__wbindgen_realloc);
+        const len0 = WASM_VECTOR_LEN;
+        const ret = wasm.flareengine_generate_stream(this.__wbg_ptr, ptr0, len0, max_tokens, on_token);
+        return ret >>> 0;
+    }
+    /**
+     * Streaming text-in / text-out with explicit sampling parameters.
+     *
+     * Like `generate_stream` but with the full set of sampling controls:
+     *
+     * - `temperature`: 0 = greedy, higher = more diverse
+     * - `top_p`: nucleus sampling (1.0 = disabled)
+     * - `top_k`: top-k sampling, applied when `top_p` is 1.0 and `min_p` is 0.0 (0 = disabled)
+     * - `repeat_penalty`: repetition penalty (1.0 = disabled, 1.1–1.3 = typical)
+     * - `min_p`: min-p threshold (0.0 = disabled)
+     *
+     * Encodes `prompt` with the embedded GGUF vocabulary, generates up to
+     * `max_tokens` tokens, and calls `on_token(token_str)` with the decoded
+     * text for each token.  Respects stop sequences registered via
+     * `add_stop_sequence`.  Returns the number of tokens generated.
+     *
+     * Returns 0 if no GGUF vocab is available.
+     *
+     * # JS example
+     * ```javascript
+     * engine.add_stop_sequence("<|im_end|>");
+     * engine.reset();
+     * let out = '';
+     * const count = engine.generate_stream_with_params(
+     *   prompt, 200, 0.8, 0.95, 40, 1.1, 0.0,
+     *   (token) => { out += token; }
+     * );
+     * ```
+     * @param {string} prompt
+     * @param {number} max_tokens
+     * @param {number} temperature
+     * @param {number} top_p
+     * @param {number} top_k
+     * @param {number} repeat_penalty
+     * @param {number} min_p
+     * @param {Function} on_token
+     * @returns {number}
+     */
+    generate_stream_with_params(prompt, max_tokens, temperature, top_p, top_k, repeat_penalty, min_p, on_token) {
+        const ptr0 = passStringToWasm0(prompt, wasm.__wbindgen_malloc, wasm.__wbindgen_realloc);
+        const len0 = WASM_VECTOR_LEN;
+        const ret = wasm.flareengine_generate_stream_with_params(this.__wbg_ptr, ptr0, len0, max_tokens, temperature, top_p, top_k, repeat_penalty, min_p, on_token);
+        return ret >>> 0;
+    }
+    /**
+     * Full text-in / text-out generation using the embedded GGUF vocabulary.
+     *
+     * Encodes `prompt` with the embedded vocab, runs greedy generation for up
+     * to `max_tokens` steps, then decodes the output back to text. Stops
+     * automatically at EOS.
+     *
+     * Returns an empty string if no GGUF vocab is available.
+     *
+     * # JS example
+     * ```javascript
+     * engine.reset();
+     * const response = engine.generate_text("What is Rust?", 128);
+     * output.textContent = response;
+     * ```
+     * @param {string} prompt
+     * @param {number} max_tokens
+     * @returns {string}
+     */
+    generate_text(prompt, max_tokens) {
+        let deferred2_0;
+        let deferred2_1;
+        try {
+            const ptr0 = passStringToWasm0(prompt, wasm.__wbindgen_malloc, wasm.__wbindgen_realloc);
+            const len0 = WASM_VECTOR_LEN;
+            const ret = wasm.flareengine_generate_text(this.__wbg_ptr, ptr0, len0, max_tokens);
+            deferred2_0 = ret[0];
+            deferred2_1 = ret[1];
+            return getStringFromWasm0(ret[0], ret[1]);
+        } finally {
+            wasm.__wbindgen_free(deferred2_0, deferred2_1, 1);
+        }
+    }
+    /**
+     * Full text-in / text-out generation with explicit sampling parameters.
+     *
+     * Like `generate_text` but with the full set of sampling controls:
+     *
+     * - `temperature`: 0 = greedy, higher = more diverse
+     * - `top_p`: nucleus sampling (1.0 = disabled)
+     * - `top_k`: top-k sampling, applied when `top_p` is 1.0 and `min_p` is 0.0 (0 = disabled)
+     * - `repeat_penalty`: repetition penalty (1.0 = disabled)
+     * - `min_p`: min-p threshold (0.0 = disabled)
+     *
+     * Returns the decoded generated text. Returns an empty string if no GGUF vocab is available.
+     * Respects stop sequences registered via `add_stop_sequence`.
+     *
+     * # JS example
+     * ```javascript
+     * engine.reset();
+     * const response = engine.generate_text_with_params(
+     *   "What is Rust?", 128, 0.8, 0.95, 40, 1.1, 0.0
+     * );
+     * output.textContent = response;
+     * ```
+     * @param {string} prompt
+     * @param {number} max_tokens
+     * @param {number} temperature
+     * @param {number} top_p
+     * @param {number} top_k
+     * @param {number} repeat_penalty
+     * @param {number} min_p
+     * @returns {string}
+     */
+    generate_text_with_params(prompt, max_tokens, temperature, top_p, top_k, repeat_penalty, min_p) {
+        let deferred2_0;
+        let deferred2_1;
+        try {
+            const ptr0 = passStringToWasm0(prompt, wasm.__wbindgen_malloc, wasm.__wbindgen_realloc);
+            const len0 = WASM_VECTOR_LEN;
+            const ret = wasm.flareengine_generate_text_with_params(this.__wbg_ptr, ptr0, len0, max_tokens, temperature, top_p, top_k, repeat_penalty, min_p);
+            deferred2_0 = ret[0];
+            deferred2_1 = ret[1];
+            return getStringFromWasm0(ret[0], ret[1]);
+        } finally {
+            wasm.__wbindgen_free(deferred2_0, deferred2_1, 1);
+        }
+    }
+    /**
+     * Generate `max_tokens` tokens starting from `prompt_tokens` (greedy).
+     * Stops early at EOS. Returns a Uint32Array of generated token IDs.
+     * @param {Uint32Array} prompt_tokens
+     * @param {number} max_tokens
+     * @returns {Uint32Array}
+     */
+    generate_tokens(prompt_tokens, max_tokens) {
+        const ptr0 = passArray32ToWasm0(prompt_tokens, wasm.__wbindgen_malloc);
+        const len0 = WASM_VECTOR_LEN;
+        const ret = wasm.flareengine_generate_tokens(this.__wbg_ptr, ptr0, len0, max_tokens);
+        var v2 = getArrayU32FromWasm0(ret[0], ret[1]).slice();
+        wasm.__wbindgen_free(ret[0], ret[1] * 4, 4);
+        return v2;
+    }
+    /**
+     * Generate a batch of tokens with explicit sampling parameters.
+     *
+     * - `temperature`: 0 = greedy, higher = more diverse
+     * - `top_p`: nucleus sampling (1.0 = disabled)
+     * - `top_k`: top-k sampling, applied when `top_p` is 1.0 and `min_p` is 0.0 (0 = disabled)
+     * - `repeat_penalty`: repetition penalty applied to recently-seen tokens (1.0 = disabled)
+     * - `min_p`: min-p threshold (0.0 = disabled); applied after `top_p`, before `top_k`
+     *
+     * Stops early at EOS. Uses a fixed LCG RNG seed for reproducibility.
+     * @param {Uint32Array} prompt_tokens
+     * @param {number} max_tokens
+     * @param {number} temperature
+     * @param {number} top_p
+     * @param {number} top_k
+     * @param {number} repeat_penalty
+     * @param {number} min_p
+     * @returns {Uint32Array}
+     */
+    generate_with_params(prompt_tokens, max_tokens, temperature, top_p, top_k, repeat_penalty, min_p) {
+        const ptr0 = passArray32ToWasm0(prompt_tokens, wasm.__wbindgen_malloc);
+        const len0 = WASM_VECTOR_LEN;
+        const ret = wasm.flareengine_generate_with_params(this.__wbg_ptr, ptr0, len0, max_tokens, temperature, top_p, top_k, repeat_penalty, min_p);
+        var v2 = getArrayU32FromWasm0(ret[0], ret[1]).slice();
+        wasm.__wbindgen_free(ret[0], ret[1] * 4, 4);
+        return v2;
+    }
+    /**
+     * Returns `true` if raw quantized weights are currently loaded.
+     * @returns {boolean}
+     */
+    get has_raw_weights() {
+        const ret = wasm.flareengine_has_raw_weights(this.__wbg_ptr);
+        return ret !== 0;
+    }
+    /**
+     * Get the hidden dimension.
+     * @returns {number}
+     */
+    get hidden_dim() {
+        const ret = wasm.flareengine_hidden_dim(this.__wbg_ptr);
+        return ret >>> 0;
+    }
+    /**
+     * Try to initialise the WebGPU compute backend.
+     *
+     * Call this after `load()` to enable GPU-accelerated matrix operations
+     * (matvec, matmul, silu_mul). Falls back silently to CPU if WebGPU is
+     * unavailable or adapter request fails.
+     *
+     * Returns `true` if a GPU backend was successfully initialised.
+     *
+     * ```javascript
+     * const engine = FlareEngine.load(bytes);
+     * const gpuEnabled = await engine.init_gpu();
+     * console.log('GPU:', gpuEnabled);
+     * ```
+     * @returns {Promise<boolean>}
+     */
+    init_gpu() {
+        const ret = wasm.flareengine_init_gpu(this.__wbg_ptr);
+        return ret;
+    }
+    /**
+     * Initialise the WebGPU backend using previously serialised pipeline cache
+     * bytes (from `engine.pipeline_cache_data()`).
+     *
+     * On backends that support driver-managed pipeline caches (Vulkan native),
+     * this allows the driver to reuse compiled GPU machine code from a previous
+     * run, eliminating cold-start shader recompilation (typically 100ms–2s).
+     *
+     * On unsupported backends (WebGPU, Metal, DX12) this behaves identically to
+     * `init_gpu()` — the cache bytes are silently ignored.
+     *
+     * ```javascript
+     * const cached = localStorage.getItem('flare-pipeline-cache');
+     * const cacheBytes = cached ? new Uint8Array(JSON.parse(cached)) : new Uint8Array();
+     * await engine.init_gpu_with_cache(cacheBytes);
+     * // After inference, persist the cache:
+     * const data = engine.pipeline_cache_data();
+     * if (data.length > 0) {
+     *   localStorage.setItem('flare-pipeline-cache', JSON.stringify(Array.from(data)));
+     * }
+     * ```
+     * @param {Uint8Array} cache_data
+     * @returns {Promise<boolean>}
+     */
+    init_gpu_with_cache(cache_data) {
+        const ptr0 = passArray8ToWasm0(cache_data, wasm.__wbindgen_malloc);
+        const len0 = WASM_VECTOR_LEN;
+        const ret = wasm.flareengine_init_gpu_with_cache(this.__wbg_ptr, ptr0, len0);
+        return ret;
+    }
+    /**
+     * Milliseconds spent in decode steps of the last generation call.
+     *
+     * For batch generation (`generate_tokens` etc.) this is always 0 — see
+     * `last_prefill_ms` for the total time.  For the streaming API this
+     * accumulates across all `next_token()` calls since the last
+     * `begin_stream()`.
+     * @returns {number}
+     */
+    get last_decode_ms() {
+        const ret = wasm.flareengine_last_decode_ms(this.__wbg_ptr);
+        return ret;
+    }
+    /**
+     * Raw pre-temperature logits from the most recent forward pass.
+     *
+     * Returns the full vocabulary logit vector as a `Float32Array`.  These
+     * are the raw values *before* temperature scaling, repetition penalty,
+     * or any sampling filter — equivalent to the model's raw next-token
+     * distribution.
+     *
+     * Useful for:
+     * - Scoring candidate continuations (classification, ranking)
+     * - Computing perplexity / cross-entropy
+     * - Inspecting the model's "confidence" about the next token
+     *
+     * Returns an empty array before any inference has been run, and is
+     * cleared by `reset()`.
+     *
+     * ```javascript
+     * engine.begin_stream(promptIds, 1); // one token prefill+decode
+     * engine.next_token();
+     * const logits = engine.last_logits; // Float32Array of vocab_size
+     * const topTokenId = logits.indexOf(Math.max(...logits));
+     * ```
+     * @returns {Float32Array}
+     */
+    get last_logits() {
+        const ret = wasm.flareengine_last_logits(this.__wbg_ptr);
+        var v1 = getArrayF32FromWasm0(ret[0], ret[1]).slice();
+        wasm.__wbindgen_free(ret[0], ret[1] * 4, 4);
+        return v1;
+    }
+    /**
+     * Milliseconds spent in the last prefill (prompt processing) phase.
+     *
+     * For `generate_tokens` / `generate_text` / `generate_with_params` this
+     * covers the entire call (prefill + decode are not separated internally).
+     * For the streaming API (`begin_stream` + `next_token`) this covers only
+     * the `begin_stream()` call.
+     * @returns {number}
+     */
+    get last_prefill_ms() {
+        const ret = wasm.flareengine_last_prefill_ms(this.__wbg_ptr);
+        return ret;
+    }
+    /**
+     * Number of tokens generated by the last generation call (excludes prompt
+     * tokens and the EOS token itself).
+     * @returns {number}
+     */
+    get last_tokens_generated() {
+        const ret = wasm.flareengine_last_tokens_generated(this.__wbg_ptr);
+        return ret >>> 0;
+    }
+    /**
+     * Load a GGUF model from a Uint8Array of bytes (e.g. from `fetch`).
+     * @param {Uint8Array} gguf_bytes
+     * @returns {FlareEngine}
+     */
+    static load(gguf_bytes) {
+        const ptr0 = passArray8ToWasm0(gguf_bytes, wasm.__wbindgen_malloc);
+        const len0 = WASM_VECTOR_LEN;
+        const ret = wasm.flareengine_load(ptr0, len0);
+        if (ret[2]) {
+            throw takeFromExternrefTable0(ret[1]);
+        }
+        return FlareEngine.__wrap(ret[0]);
+    }
+    /**
+     * Load raw quantized weights from GGUF bytes so the GPU fused
+     * dequant+matvec kernels can be used during inference.
+     *
+     * Call this **after** `init_gpu()` so the backend is set before the raw
+     * weights are attached.  The method is a no-op (returns `false`) if a
+     * layer's weights are in an unsupported quantization format — the engine
+     * continues to work using the f32 path loaded at `FlareEngine.load()`.
+     *
+     * Returns `true` if all layers were loaded successfully, `false` if any
+     * layer fell back to the f32 path.
+     *
+     * ```javascript
+     * const engine = FlareEngine.load(bytes);
+     * await engine.init_gpu();
+     * const ok = engine.load_raw_weights(bytes);
+     * console.log('Raw weights loaded:', ok);
+     * ```
+     * @param {Uint8Array} gguf_bytes
+     * @returns {boolean}
+     */
+    load_raw_weights(gguf_bytes) {
+        const ptr0 = passArray8ToWasm0(gguf_bytes, wasm.__wbindgen_malloc);
+        const len0 = WASM_VECTOR_LEN;
+        const ret = wasm.flareengine_load_raw_weights(this.__wbg_ptr, ptr0, len0);
+        return ret !== 0;
+    }
+    /**
+     * Maximum sequence length (context window size) of the loaded model.
+     *
+     * Use this to warn users when their prompt is approaching the limit.
+     * @returns {number}
+     */
+    get max_seq_len() {
+        const ret = wasm.flareengine_max_seq_len(this.__wbg_ptr);
+        return ret >>> 0;
+    }
+    /**
+     * Merge a LoRA adapter (SafeTensors format) into the model weights.
+     *
+     * Pass the raw bytes of a `.safetensors` file containing LoRA A/B matrices.
+     * After merging, the adapter's effect is permanent for this engine instance;
+     * call `FlareEngine.load()` again to restore the base model.
+     *
+     * ```javascript
+     * const resp = await fetch('lora-adapter.safetensors');
+     * const bytes = new Uint8Array(await resp.arrayBuffer());
+     * engine.merge_lora(bytes);
+     * ```
+     * @param {Uint8Array} adapter_bytes
+     */
+    merge_lora(adapter_bytes) {
+        const ptr0 = passArray8ToWasm0(adapter_bytes, wasm.__wbindgen_malloc);
+        const len0 = WASM_VECTOR_LEN;
+        const ret = wasm.flareengine_merge_lora(this.__wbg_ptr, ptr0, len0);
+        if (ret[1]) {
+            throw takeFromExternrefTable0(ret[0]);
+        }
+    }
+    /**
+     * Merge a LoRA adapter with a custom alpha scaling factor.
+     *
+     * Same as `merge_lora` but overrides the alpha value embedded in the
+     * adapter file.  The effective scaling is `alpha / rank`.
+     * @param {Uint8Array} adapter_bytes
+     * @param {number} alpha
+     */
+    merge_lora_with_alpha(adapter_bytes, alpha) {
+        const ptr0 = passArray8ToWasm0(adapter_bytes, wasm.__wbindgen_malloc);
+        const len0 = WASM_VECTOR_LEN;
+        const ret = wasm.flareengine_merge_lora_with_alpha(this.__wbg_ptr, ptr0, len0, alpha);
+        if (ret[1]) {
+            throw takeFromExternrefTable0(ret[0]);
+        }
+    }
+    /**
+     * All GGUF model metadata as a JSON string.
+     *
+     * Returns a JSON object mapping each metadata key to its value.
+     * Large vocabulary arrays (`tokenizer.ggml.tokens`, `.merges`, `.scores`,
+     * `.added_tokens`) are omitted to keep the payload practical.
+     * Small arrays (≤ 64 entries) are included as JSON arrays.
+     *
+     * Returns `"{}"` if the model was not loaded from a GGUF file.
+     *
+     * ```javascript
+     * const meta = JSON.parse(engine.metadata_json);
+     * console.log(meta["llama.context_length"]); // e.g. 4096
+     * ```
+     * @returns {string}
+     */
+    get metadata_json() {
+        let deferred1_0;
+        let deferred1_1;
+        try {
+            const ret = wasm.flareengine_metadata_json(this.__wbg_ptr);
+            deferred1_0 = ret[0];
+            deferred1_1 = ret[1];
+            return getStringFromWasm0(ret[0], ret[1]);
+        } finally {
+            wasm.__wbindgen_free(deferred1_0, deferred1_1, 1);
+        }
+    }
+    /**
+     * Model display name from `general.name` in the GGUF metadata.
+     *
+     * Returns the human-readable name embedded by the model author (e.g.
+     * `"Llama 3.2 1B Instruct"`). Returns an empty string if the field is absent.
+     * @returns {string}
+     */
+    get model_name() {
+        let deferred1_0;
+        let deferred1_1;
+        try {
+            const ret = wasm.flareengine_model_name(this.__wbg_ptr);
+            deferred1_0 = ret[0];
+            deferred1_1 = ret[1];
+            return getStringFromWasm0(ret[0], ret[1]);
+        } finally {
+            wasm.__wbindgen_free(deferred1_0, deferred1_1, 1);
+        }
+    }
+    /**
+     * Generate and return the next token ID, or `undefined` when the stream
+     * is complete (EOS reached, `max_tokens` exhausted, or `stop_stream()`
+     * was called).
+     *
+     * Sampling parameters are those set by the most recent `begin_stream` or
+     * `begin_stream_with_params` call.  Call this inside
+     * `requestAnimationFrame` so the browser can update the DOM between
+     * tokens and the page remains responsive.
+     * @returns {number | undefined}
+     */
+    next_token() {
+        const ret = wasm.flareengine_next_token(this.__wbg_ptr);
+        return ret === 0x100000001 ? undefined : ret;
+    }
+    /**
+     * Get the number of attention heads.
+     * @returns {number}
+     */
+    get num_heads() {
+        const ret = wasm.flareengine_num_heads(this.__wbg_ptr);
+        return ret >>> 0;
+    }
+    /**
+     * Get the number of layers.
+     * @returns {number}
+     */
+    get num_layers() {
+        const ret = wasm.flareengine_num_layers(this.__wbg_ptr);
+        return ret >>> 0;
+    }
+    /**
+     * Apply final RMSNorm + output projection to a hidden state and
+     * return logits over the vocabulary.
+     *
+     * `hidden` must have length `hidden_dim`. The returned vector has
+     * length `vocab_size`.
+     * @param {Float32Array} hidden
+     * @returns {Float32Array}
+     */
+    output_projection(hidden) {
+        const ptr0 = passArrayF32ToWasm0(hidden, wasm.__wbindgen_malloc);
+        const len0 = WASM_VECTOR_LEN;
+        const ret = wasm.flareengine_output_projection(this.__wbg_ptr, ptr0, len0);
+        var v2 = getArrayF32FromWasm0(ret[0], ret[1]).slice();
+        wasm.__wbindgen_free(ret[0], ret[1] * 4, 4);
+        return v2;
+    }
+    /**
+     * Return a JSON string summarising the performance metrics from the last
+     * generation call.
+     *
+     * ```javascript
+     * const perf = JSON.parse(engine.performance_summary());
+     * console.log(`TTFT: ${perf.prefill_ms.toFixed(1)} ms`);
+     * console.log(`Decode: ${perf.tokens_per_second.toFixed(1)} tok/s`);
+     * ```
+     * @returns {string}
+     */
+    performance_summary() {
+        let deferred1_0;
+        let deferred1_1;
+        try {
+            const ret = wasm.flareengine_performance_summary(this.__wbg_ptr);
+            deferred1_0 = ret[0];
+            deferred1_1 = ret[1];
+            return getStringFromWasm0(ret[0], ret[1]);
+        } finally {
+            wasm.__wbindgen_free(deferred1_0, deferred1_1, 1);
+        }
+    }
+    /**
+     * Serialise the driver-managed GPU pipeline cache to bytes.
+     *
+     * Returns an opaque blob that can be passed to `init_gpu_with_cache()` on
+     * the next startup to skip shader recompilation.  Store it in
+     * `localStorage` or `IndexedDB` between page loads.
+     *
+     * Returns an empty `Uint8Array` if no GPU is active, or if the current
+     * backend does not support pipeline caching (WebGPU, Metal, DX12).
+     * @returns {Uint8Array}
+     */
+    get pipeline_cache_data() {
+        const ret = wasm.flareengine_pipeline_cache_data(this.__wbg_ptr);
+        var v1 = getArrayU8FromWasm0(ret[0], ret[1]).slice();
+        wasm.__wbindgen_free(ret[0], ret[1] * 1, 1);
+        return v1;
+    }
+    /**
+     * Raw Jinja2 chat template string from the GGUF model metadata, if present.
+     *
+     * This is the `tokenizer.chat_template` field embedded by the model author.
+     * Use this with a JavaScript Jinja2 renderer (e.g. `nunjucks`) for accurate
+     * prompt formatting across all model families, rather than relying on the
+     * simplified built-in `apply_chat_template`.
+     *
+     * Returns `undefined` if the GGUF file did not include a chat template.
+     * @returns {string | undefined}
+     */
+    get raw_chat_template() {
+        const ret = wasm.flareengine_raw_chat_template(this.__wbg_ptr);
+        let v1;
+        if (ret[0] !== 0) {
+            v1 = getStringFromWasm0(ret[0], ret[1]).slice();
+            wasm.__wbindgen_free(ret[0], ret[1] * 1, 1);
+        }
+        return v1;
+    }
+    /**
+     * Current repetition-penalty window size (0 = disabled).
+     * @returns {number}
+     */
+    get repeat_last_n() {
+        const ret = wasm.flareengine_repeat_last_n(this.__wbg_ptr);
+        return ret >>> 0;
+    }
+    /**
+     * Reset the KV cache (start a new conversation).
+     *
+     * Also clears stop sequences, the internal text accumulator, and
+     * restores the RNG seed to the default `0x12345678`.
+     */
+    reset() {
+        wasm.flareengine_reset(this.__wbg_ptr);
+    }
+    /**
+     * Set the repetition-penalty look-back window (number of recent tokens to
+     * penalise).  Use `0` to disable repetition penalty entirely.  Default: 64.
+     *
+     * Takes effect on the next `begin_stream*` call.
+     *
+     * # JS example
+     * ```javascript
+     * engine.set_repeat_last_n(128); // wider window for creative writing
+     * engine.set_repeat_last_n(0);   // disable repeat penalty
+     * ```
+     * @param {number} n
+     */
+    set_repeat_last_n(n) {
+        wasm.flareengine_set_repeat_last_n(this.__wbg_ptr, n);
+    }
+    /**
+     * Set the LCG RNG seed used for the next sampled generation call.
+     *
+     * Controls the random state passed to `begin_stream_with_params` and
+     * `generate_with_params`, enabling reproducible outputs.  The seed is
+     * applied on the next call and then *not* automatically reset, so the
+     * same seed will be reused on subsequent calls unless `set_rng_seed` or
+     * `reset()` is called again.
+     *
+     * `reset()` restores the seed to the default `0x12345678`.
+     *
+     * ```javascript
+     * engine.set_rng_seed(42);
+     * const out1 = engine.generate_text("Hello", 50);
+     * engine.set_rng_seed(42);
+     * const out2 = engine.generate_text("Hello", 50);
+     * // out1 === out2
+     * ```
+     * @param {number} seed
+     */
+    set_rng_seed(seed) {
+        wasm.flareengine_set_rng_seed(this.__wbg_ptr, seed);
+    }
+    /**
+     * Set how many top log-probability entries to capture after each forward
+     * pass.  Pass `0` (the default) to disable and save the computation.
+     *
+     * When enabled, `top_logprobs` is populated after every `next_token()`
+     * call and after every token in `generate_stream_with_params`.
+     *
+     * # JS example
+     * ```javascript
+     * engine.set_top_logprobs(5);
+     * engine.begin_stream(promptIds, 64);
+     * while (!engine.stream_done) {
+     *   engine.next_token();
+     *   const lp = engine.top_logprobs; // Float32Array [id0, lp0, id1, lp1, ...]
+     * }
+     * ```
+     * @param {number} n
+     */
+    set_top_logprobs(n) {
+        wasm.flareengine_set_top_logprobs(this.__wbg_ptr, n);
+    }
+    /**
+     * Signal the current stream to stop after the next `next_token()` call.
+     * The JS Stop button should call this, then wait for `next_token()` to
+     * return `undefined` before updating the UI.
+     */
+    stop_stream() {
+        wasm.flareengine_stop_stream(this.__wbg_ptr);
+    }
+    /**
+     * Whether the current stream has finished.
+     * @returns {boolean}
+     */
+    get stream_done() {
+        const ret = wasm.flareengine_stream_done(this.__wbg_ptr);
+        return ret !== 0;
+    }
+    /**
+     * Why the most-recent stream stopped.
+     *
+     * Returns one of:
+     * - `"eos"` — the model emitted the EOS token
+     * - `"length"` — `max_tokens` budget was exhausted
+     * - `"stop_sequence"` — a registered stop sequence was matched
+     * - `"user"` — `stop_stream()` was called
+     * - `""` (empty) — stream not yet started or still running
+     *
+     * # JS example
+     * ```javascript
+     * while (!engine.stream_done) engine.next_token();
+     * console.log("Stopped because:", engine.stream_stop_reason);
+     * ```
+     * @returns {string}
+     */
+    get stream_stop_reason() {
+        let deferred1_0;
+        let deferred1_1;
+        try {
+            const ret = wasm.flareengine_stream_stop_reason(this.__wbg_ptr);
+            deferred1_0 = ret[0];
+            deferred1_1 = ret[1];
+            return getStringFromWasm0(ret[0], ret[1]);
+        } finally {
+            wasm.__wbindgen_free(deferred1_0, deferred1_1, 1);
+        }
+    }
+    /**
+     * Decode throughput in tokens per second for the last generation call.
+     *
+     * For the streaming API this is calculated from `last_decode_ms`.
+     * For batch generation this is calculated from `last_prefill_ms`
+     * (the total call duration).
+     *
+     * Returns 0.0 if no generation has been run or if timing data is
+     * unavailable.
+     * @returns {number}
+     */
+    get tokens_per_second() {
+        const ret = wasm.flareengine_tokens_per_second(this.__wbg_ptr);
+        return ret;
+    }
+    /**
+     * How many tokens of context space remain before the window is full.
+     *
+     * Equivalent to `max_seq_len - tokens_used`. Returns 0 when the context is
+     * already full or `max_seq_len` is 0.
+     *
+     * # JS example
+     * ```javascript
+     * if (engine.tokens_remaining < 64) {
+     *   console.warn("Context window almost full — consider resetting.");
+     * }
+     * ```
+     * @returns {number}
+     */
+    get tokens_remaining() {
+        const ret = wasm.flareengine_tokens_remaining(this.__wbg_ptr);
+        return ret >>> 0;
+    }
+    /**
+     * Number of tokens currently consumed in the KV-cache session (prompt + generated).
+     *
+     * Updated after every generation call; reset to 0 by `engine.reset()`.
+     * Use with `max_seq_len` to build a context-usage progress bar.
+     * @returns {number}
+     */
+    get tokens_used() {
+        const ret = wasm.flareengine_tokens_used(this.__wbg_ptr);
+        return ret >>> 0;
+    }
+    /**
+     * Interleaved top-N log-probabilities from the last forward pass.
+     *
+     * Layout: `[token_id_0 as f32, log_prob_0, token_id_1 as f32, log_prob_1, ...]`
+     * sorted by descending log-probability.  Length is `top_logprobs_n * 2`.
+     *
+     * Returns an empty array if `set_top_logprobs(0)` (default) or before
+     * any inference has been run.
+     * @returns {Float32Array}
+     */
+    get top_logprobs() {
+        const ret = wasm.flareengine_top_logprobs(this.__wbg_ptr);
+        var v1 = getArrayF32FromWasm0(ret[0], ret[1]).slice();
+        wasm.__wbindgen_free(ret[0], ret[1] * 4, 4);
+        return v1;
+    }
+    /**
+     * Truncate `text` so that it fits within `budget` tokens when encoded.
+     *
+     * Encodes `text` with the embedded GGUF vocabulary, keeps the **last**
+     * `budget` tokens (tail of the text is preferred, so recent context is
+     * preserved), and decodes them back to a string.  Returns `text` unchanged
+     * if it already fits or if no vocab is available.
+     *
+     * A typical call reserves space for the system prompt + generated output:
+     *
+     * ```javascript
+     * // Keep only the tail of the conversation that fits in the context
+     * const budget = engine.max_seq_len - 256; // leave 256 tokens for output
+     * const trimmed = engine.truncate_to_context(conversationText, budget);
+     * ```
+     * @param {string} text
+     * @param {number} budget
+     * @returns {string}
+     */
+    truncate_to_context(text, budget) {
+        let deferred2_0;
+        let deferred2_1;
+        try {
+            const ptr0 = passStringToWasm0(text, wasm.__wbindgen_malloc, wasm.__wbindgen_realloc);
+            const len0 = WASM_VECTOR_LEN;
+            const ret = wasm.flareengine_truncate_to_context(this.__wbg_ptr, ptr0, len0, budget);
+            deferred2_0 = ret[0];
+            deferred2_1 = ret[1];
+            return getStringFromWasm0(ret[0], ret[1]);
+        } finally {
+            wasm.__wbindgen_free(deferred2_0, deferred2_1, 1);
+        }
+    }
+    /**
+     * Get the vocabulary size of the loaded model.
+     * @returns {number}
+     */
+    get vocab_size() {
+        const ret = wasm.flareengine_vocab_size(this.__wbg_ptr);
+        return ret >>> 0;
+    }
+    /**
+     * Run a single dummy forward pass to pre-compile WebGPU shader pipelines.
+     *
+     * WebGPU (and wgpu on native) compiles shader pipelines lazily on the
+     * first dispatch.  This causes a noticeable latency spike — often 100ms
+     * to several seconds — when the user makes their first inference request.
+     *
+     * Call `warmup()` once after `init_gpu()` completes to trigger all shader
+     * compilations in the background so the first real request feels fast.
+     * The KV cache is reset after the warmup so the engine is in a clean state.
+     *
+     * Returns `true` if the warmup forward pass ran without error, `false` if
+     * the model has not been loaded.
+     *
+     * # JS example
+     * ```javascript
+     * const engine = FlareEngine.load(bytes);
+     * await engine.init_gpu();
+     * engine.warmup(); // trigger shader compilation
+     * // First real inference is now fast
+     * engine.begin_stream(promptIds, 128);
+     * ```
+     * @returns {boolean}
+     */
+    warmup() {
+        const ret = wasm.flareengine_warmup(this.__wbg_ptr);
+        return ret !== 0;
+    }
+}
+if (Symbol.dispose) FlareEngine.prototype[Symbol.dispose] = FlareEngine.prototype.free;
+/**
+ * Progressive loader that fetches a GGUF model from a URL with streaming
+ * download progress.
+ *
+ * This enables the browser demo to show download progress as the model
+ * arrives over the network, then layer-loading progress as the model is
+ * parsed. For a 500MB Q4 model the download phase dominates; displaying
+ * progress prevents the page from appearing frozen.
+ *
+ * # JS example
+ *
+ * ```javascript
+ * const loader = new FlareProgressiveLoader('https://example.com/model.gguf');
+ * const engine = await loader.load((loaded, total) => {
+ *   const pct = total > 0 ? Math.round(loaded / total * 100) : 0;
+ *   progressBar.value = pct / 100;
+ *   statusText.textContent = `Downloading… ${pct}%`;
+ * });
+ * ```
+ */
+export class FlareProgressiveLoader {
+    __destroy_into_raw() {
+        const ptr = this.__wbg_ptr;
+        this.__wbg_ptr = 0;
+        FlareProgressiveLoaderFinalization.unregister(this);
+        return ptr;
+    }
+    free() {
+        const ptr = this.__destroy_into_raw();
+        wasm.__wbg_flareprogressiveloader_free(ptr, 0);
+    }
+    /**
+     * Fetch the model from the URL, calling `on_progress(loaded_bytes, total_bytes)`
+     * as each chunk arrives, then parse and return a `FlareEngine`.
+     *
+     * `total_bytes` is 0 when the server does not send a `Content-Length` header
+     * (e.g. when the response is gzip-compressed or chunked).
+     * @param {Function} on_progress
+     * @returns {Promise<FlareEngine>}
+     */
+    load(on_progress) {
+        const ret = wasm.flareprogressiveloader_load(this.__wbg_ptr, on_progress);
+        return ret;
+    }
+    /**
+     * Create a loader for the given model URL.
+     * @param {string} url
+     */
+    constructor(url) {
+        const ptr0 = passStringToWasm0(url, wasm.__wbindgen_malloc, wasm.__wbindgen_realloc);
+        const len0 = WASM_VECTOR_LEN;
+        const ret = wasm.flareprogressiveloader_new(ptr0, len0);
+        this.__wbg_ptr = ret >>> 0;
+        FlareProgressiveLoaderFinalization.register(this, this.__wbg_ptr, this);
+        return this;
+    }
+}
+if (Symbol.dispose) FlareProgressiveLoader.prototype[Symbol.dispose] = FlareProgressiveLoader.prototype.free;
+/**
+ * BPE tokenizer exported to JS for encoding prompts and decoding generated tokens.
+ *
+ * Load from a HuggingFace `tokenizer.json` string, then use `encode` / `decode`
+ * in coordination with `FlareEngine` to run full text-in / text-out inference.
+ *
+ * # JS example
+ *
+ * ```javascript
+ * const resp = await fetch('tokenizer.json');
+ * const json = await resp.text();
+ * const tok = FlareTokenizer.from_json(json);
+ *
+ * const ids = tok.encode("Hello, world!");
+ * const engine = FlareEngine.load(modelBytes);
+ * const out = engine.generate_tokens(ids, 64);
+ * console.log(tok.decode(out));
+ * ```
+ */
+export class FlareTokenizer {
+    static __wrap(ptr) {
+        ptr = ptr >>> 0;
+        const obj = Object.create(FlareTokenizer.prototype);
+        obj.__wbg_ptr = ptr;
+        FlareTokenizerFinalization.register(obj, obj.__wbg_ptr, obj);
+        return obj;
+    }
+    __destroy_into_raw() {
+        const ptr = this.__wbg_ptr;
+        this.__wbg_ptr = 0;
+        FlareTokenizerFinalization.unregister(this);
+        return ptr;
+    }
+    free() {
+        const ptr = this.__destroy_into_raw();
+        wasm.__wbg_flaretokenizer_free(ptr, 0);
+    }
+    /**
+     * BOS (beginning of sequence) token ID, if defined.
+     * @returns {number | undefined}
+     */
+    get bos_token_id() {
+        const ret = wasm.flaretokenizer_bos_token_id(this.__wbg_ptr);
+        return ret === 0x100000001 ? undefined : ret;
+    }
+    /**
+     * Decode a sequence of token IDs to text.
+     * @param {Uint32Array} tokens
+     * @returns {string}
+     */
+    decode(tokens) {
+        let deferred3_0;
+        let deferred3_1;
+        try {
+            const ptr0 = passArray32ToWasm0(tokens, wasm.__wbindgen_malloc);
+            const len0 = WASM_VECTOR_LEN;
+            const ret = wasm.flaretokenizer_decode(this.__wbg_ptr, ptr0, len0);
+            var ptr2 = ret[0];
+            var len2 = ret[1];
+            if (ret[3]) {
+                ptr2 = 0; len2 = 0;
+                throw takeFromExternrefTable0(ret[2]);
+            }
+            deferred3_0 = ptr2;
+            deferred3_1 = len2;
+            return getStringFromWasm0(ptr2, len2);
+        } finally {
+            wasm.__wbindgen_free(deferred3_0, deferred3_1, 1);
+        }
+    }
+    /**
+     * Decode a single token ID to text (useful for streaming output).
+     * @param {number} token_id
+     * @returns {string}
+     */
+    decode_one(token_id) {
+        let deferred2_0;
+        let deferred2_1;
+        try {
+            const ret = wasm.flaretokenizer_decode_one(this.__wbg_ptr, token_id);
+            var ptr1 = ret[0];
+            var len1 = ret[1];
+            if (ret[3]) {
+                ptr1 = 0; len1 = 0;
+                throw takeFromExternrefTable0(ret[2]);
+            }
+            deferred2_0 = ptr1;
+            deferred2_1 = len1;
+            return getStringFromWasm0(ptr1, len1);
+        } finally {
+            wasm.__wbindgen_free(deferred2_0, deferred2_1, 1);
+        }
+    }
+    /**
+     * Encode text to a sequence of token IDs.
+     * @param {string} text
+     * @returns {Uint32Array}
+     */
+    encode(text) {
+        const ptr0 = passStringToWasm0(text, wasm.__wbindgen_malloc, wasm.__wbindgen_realloc);
+        const len0 = WASM_VECTOR_LEN;
+        const ret = wasm.flaretokenizer_encode(this.__wbg_ptr, ptr0, len0);
+        if (ret[3]) {
+            throw takeFromExternrefTable0(ret[2]);
+        }
+        var v2 = getArrayU32FromWasm0(ret[0], ret[1]).slice();
+        wasm.__wbindgen_free(ret[0], ret[1] * 4, 4);
+        return v2;
+    }
+    /**
+     * EOS (end of sequence) token ID, if defined.
+     * @returns {number | undefined}
+     */
+    get eos_token_id() {
+        const ret = wasm.flaretokenizer_eos_token_id(this.__wbg_ptr);
+        return ret === 0x100000001 ? undefined : ret;
+    }
+    /**
+     * Load a tokenizer from the text of a HuggingFace `tokenizer.json` file.
+     * @param {string} json
+     * @returns {FlareTokenizer}
+     */
+    static from_json(json) {
+        const ptr0 = passStringToWasm0(json, wasm.__wbindgen_malloc, wasm.__wbindgen_realloc);
+        const len0 = WASM_VECTOR_LEN;
+        const ret = wasm.flaretokenizer_from_json(ptr0, len0);
+        if (ret[2]) {
+            throw takeFromExternrefTable0(ret[1]);
+        }
+        return FlareTokenizer.__wrap(ret[0]);
+    }
+    /**
+     * Vocabulary size.
+     * @returns {number}
+     */
+    get vocab_size() {
+        const ret = wasm.flaretokenizer_vocab_size(this.__wbg_ptr);
+        return ret >>> 0;
+    }
+}
+if (Symbol.dispose) FlareTokenizer.prototype[Symbol.dispose] = FlareTokenizer.prototype.free;
+/**
+ * Save model bytes to OPFS.
+ *
+ * Creates the `flare-models` directory if it does not exist.  Overwrites any
+ * existing file with the same name.
+ * @param {string} model_name
+ * @param {Uint8Array} data
+ * @returns {Promise<void>}
+ */
+export function cache_model(model_name, data) {
+    const ptr0 = passStringToWasm0(model_name, wasm.__wbindgen_malloc, wasm.__wbindgen_realloc);
+    const len0 = WASM_VECTOR_LEN;
+    const ptr1 = passArray8ToWasm0(data, wasm.__wbindgen_malloc);
+    const len1 = WASM_VECTOR_LEN;
+    const ret = wasm.cache_model(ptr0, len0, ptr1, len1);
+    return ret;
+}
+/**
+ * Delete a cached model from OPFS.
+ * @param {string} model_name
+ * @returns {Promise<void>}
+ */
+export function delete_cached_model(model_name) {
+    const ptr0 = passStringToWasm0(model_name, wasm.__wbindgen_malloc, wasm.__wbindgen_realloc);
+    const len0 = WASM_VECTOR_LEN;
+    const ret = wasm.delete_cached_model(ptr0, len0);
+    return ret;
+}
+/**
+ * Get basic device info as a JSON string.
+ * @returns {string}
+ */
+export function device_info() {
+    let deferred1_0;
+    let deferred1_1;
+    try {
+        const ret = wasm.device_info();
+        deferred1_0 = ret[0];
+        deferred1_1 = ret[1];
+        return getStringFromWasm0(ret[0], ret[1]);
+    } finally {
+        wasm.__wbindgen_free(deferred1_0, deferred1_1, 1);
+    }
+}
+/**
+ * Check if a model is cached in OPFS by name.
+ *
+ * Returns `false` if OPFS is unavailable or the model is not found.
+ * @param {string} model_name
+ * @returns {Promise<boolean>}
+ */
+export function is_model_cached(model_name) {
+    const ptr0 = passStringToWasm0(model_name, wasm.__wbindgen_malloc, wasm.__wbindgen_realloc);
+    const len0 = WASM_VECTOR_LEN;
+    const ret = wasm.is_model_cached(ptr0, len0);
+    return ret;
+}
+/**
+ * List all cached models with their sizes (in bytes).
+ *
+ * Returns a JSON-serialised array of objects: `[{name: string, size: number}, ...]`.
+ * Returns `"[]"` if OPFS is unavailable or the models directory does not exist.
+ * @returns {Promise<any>}
+ */
+export function list_cached_models() {
+    const ret = wasm.list_cached_models();
+    return ret;
+}
+/**
+ * Load model bytes from OPFS.
+ *
+ * Returns `null` (JS) / `None` (Rust) if the model is not cached or OPFS is
+ * unavailable.
+ * @param {string} model_name
+ * @returns {Promise<any>}
+ */
+export function load_cached_model(model_name) {
+    const ptr0 = passStringToWasm0(model_name, wasm.__wbindgen_malloc, wasm.__wbindgen_realloc);
+    const len0 = WASM_VECTOR_LEN;
+    const ret = wasm.load_cached_model(ptr0, len0);
+    return ret;
+}
+/**
+ * Set up better panic messages in the browser console.
+ */
+export function start() {
+    wasm.start();
+}
+/**
+ * Get storage usage and quota estimate.
+ *
+ * Returns a JSON string: `{usage: number, quota: number}`.
+ * Returns `"{}"` if the Storage API is unavailable.
+ * @returns {Promise<any>}
+ */
+export function storage_estimate() {
+    const ret = wasm.storage_estimate();
+    return ret;
+}
+/**
+ * Check if this WASM build was compiled with relaxed SIMD support.
+ *
+ * Relaxed SIMD provides hardware-specific faster operations like fused
+ * multiply-add (`f32x4_relaxed_madd`) that map directly to ARM NEON and
+ * x86 SSE/AVX FMA instructions. When enabled, matvec operations use FMA
+ * for ~15-30% speedup.
+ *
+ * This is a compile-time feature: the WASM binary either includes relaxed
+ * SIMD instructions or it does not. The browser validates them at module
+ * load time, so if this module loaded successfully and returns `true`,
+ * relaxed SIMD is active.
+ * @returns {boolean}
+ */
+export function supports_relaxed_simd() {
+    const ret = wasm.supports_relaxed_simd();
+    return ret !== 0;
+}
+/**
+ * Check if the browser exposes the Web Speech API for speech recognition.
+ *
+ * This probes `window.SpeechRecognition` and the WebKit-prefixed
+ * `window.webkitSpeechRecognition`. Returning `true` means the demo voice
+ * mode can capture microphone input and produce transcripts through the
+ * platform speech engine. This is a foundation for the voice pipeline
+ * (issue #395); a fully offline path will eventually run Whisper in WASM.
+ * @returns {boolean}
+ */
+export function supports_speech_recognition() {
+    const ret = wasm.supports_speech_recognition();
+    return ret !== 0;
+}
+/**
+ * Check if the browser exposes the Web Speech API for speech synthesis.
+ *
+ * Returns `true` when `window.speechSynthesis` is available, enabling the
+ * demo voice mode to speak model responses. A fully offline path will
+ * eventually run a neural TTS model in WASM.
+ * @returns {boolean}
+ */
+export function supports_speech_synthesis() {
+    const ret = wasm.supports_speech_synthesis();
+    return ret !== 0;
+}
+/**
+ * Check if WebNN is available in the current browser.
+ *
+ * WebNN (`navigator.ml`) exposes neural-network acceleration through
+ * platform NPUs/DSPs. This is a foundation check so JS code can decide
+ * whether to build a WebNN graph from exported weights.
+ * @returns {boolean}
+ */
+export function supports_webnn() {
+    const ret = wasm.supports_webnn();
+    return ret !== 0;
+}
+/**
+ * Check if WebTransport is available in the current browser.
+ *
+ * WebTransport (`window.WebTransport`) is a modern transport API built on
+ * HTTP/3 QUIC streams. It allows opening multiple parallel bidirectional
+ * streams to the same origin with lower head-of-line blocking than fetch().
+ * Useful for progressive model loading where different byte ranges of the
+ * GGUF file can be downloaded concurrently.
+ *
+ * Note: actually using WebTransport for parallel range downloads requires
+ * server-side support (HTTP/3 endpoint that accepts byte-range requests
+ * on streams). This check only reports browser capability — the JS loader
+ * will fall back to `fetch()` when the server does not cooperate.
+ * @returns {boolean}
+ */
+export function supports_webtransport() {
+    const ret = wasm.supports_webtransport();
+    return ret !== 0;
+}
+/**
+ * Check if WebGPU is available in the current browser.
+ * @returns {boolean}
+ */
+export function webgpu_available() {
+    const ret = wasm.webgpu_available();
+    return ret !== 0;
+}
+function __wbg_get_imports() {
+    const import0 = {
+        __proto__: null,
+        __wbg_Error_2e59b1b37a9a34c3: function(arg0, arg1) {
+            const ret = Error(getStringFromWasm0(arg0, arg1));
+            return ret;
+        },
+        __wbg_Window_412fe051c1aa1519: function(arg0) {
+            const ret = arg0.Window;
+            return ret;
+        },
+        __wbg_WorkerGlobalScope_349300f9b277afe1: function(arg0) {
+            const ret = arg0.WorkerGlobalScope;
+            return ret;
+        },
+        __wbg___wbindgen_boolean_get_a86c216575a75c30: function(arg0) {
+            const v = arg0;
+            const ret = typeof(v) === 'boolean' ? v : undefined;
+            return isLikeNone(ret) ? 0xFFFFFF : ret ? 1 : 0;
+        },
+        __wbg___wbindgen_debug_string_dd5d2d07ce9e6c57: function(arg0, arg1) {
+            const ret = debugString(arg1);
+            const ptr1 = passStringToWasm0(ret, wasm.__wbindgen_malloc, wasm.__wbindgen_realloc);
+            const len1 = WASM_VECTOR_LEN;
+            getDataViewMemory0().setInt32(arg0 + 4 * 1, len1, true);
+            getDataViewMemory0().setInt32(arg0 + 4 * 0, ptr1, true);
+        },
+        __wbg___wbindgen_is_function_49868bde5eb1e745: function(arg0) {
+            const ret = typeof(arg0) === 'function';
+            return ret;
+        },
+        __wbg___wbindgen_is_null_344c8750a8525473: function(arg0) {
+            const ret = arg0 === null;
+            return ret;
+        },
+        __wbg___wbindgen_is_undefined_c0cca72b82b86f4d: function(arg0) {
+            const ret = arg0 === undefined;
+            return ret;
+        },
+        __wbg___wbindgen_string_get_914df97fcfa788f2: function(arg0, arg1) {
+            const obj = arg1;
+            const ret = typeof(obj) === 'string' ? obj : undefined;
+            var ptr1 = isLikeNone(ret) ? 0 : passStringToWasm0(ret, wasm.__wbindgen_malloc, wasm.__wbindgen_realloc);
+            var len1 = WASM_VECTOR_LEN;
+            getDataViewMemory0().setInt32(arg0 + 4 * 1, len1, true);
+            getDataViewMemory0().setInt32(arg0 + 4 * 0, ptr1, true);
+        },
+        __wbg___wbindgen_throw_81fc77679af83bc6: function(arg0, arg1) {
+            throw new Error(getStringFromWasm0(arg0, arg1));
+        },
+        __wbg__wbg_cb_unref_3c3b4f651835fbcb: function(arg0) {
+            arg0._wbg_cb_unref();
+        },
+        __wbg_arrayBuffer_7bba74066875530e: function(arg0) {
+            const ret = arg0.arrayBuffer();
+            return ret;
+        },
+        __wbg_beginComputePass_097033d61ef8af0f: function(arg0, arg1) {
+            const ret = arg0.beginComputePass(arg1);
+            return ret;
+        },
+        __wbg_body_9a25d64338506fbe: function(arg0) {
+            const ret = arg0.body;
+            return isLikeNone(ret) ? 0 : addToExternrefTable0(ret);
+        },
+        __wbg_buffer_a77cc90da4bdb503: function(arg0) {
+            const ret = arg0.buffer;
+            return ret;
+        },
+        __wbg_call_368fa9c372d473ba: function() { return handleError(function (arg0, arg1, arg2, arg3) {
+            const ret = arg0.call(arg1, arg2, arg3);
+            return ret;
+        }, arguments); },
+        __wbg_call_d578befcc3145dee: function() { return handleError(function (arg0, arg1, arg2) {
+            const ret = arg0.call(arg1, arg2);
+            return ret;
+        }, arguments); },
+        __wbg_close_37e34297940956fd: function(arg0) {
+            const ret = arg0.close();
+            return ret;
+        },
+        __wbg_copyBufferToBuffer_99ba10ae51f20b8a: function() { return handleError(function (arg0, arg1, arg2, arg3, arg4, arg5) {
+            arg0.copyBufferToBuffer(arg1, arg2, arg3, arg4, arg5);
+        }, arguments); },
+        __wbg_createBindGroupLayout_1d37ac0dabfbed28: function() { return handleError(function (arg0, arg1) {
+            const ret = arg0.createBindGroupLayout(arg1);
+            return ret;
+        }, arguments); },
+        __wbg_createBindGroup_3bccbd7517f0708e: function(arg0, arg1) {
+            const ret = arg0.createBindGroup(arg1);
+            return ret;
+        },
+        __wbg_createBuffer_24b346170c9f54c8: function() { return handleError(function (arg0, arg1) {
+            const ret = arg0.createBuffer(arg1);
+            return ret;
+        }, arguments); },
+        __wbg_createCommandEncoder_48a406baaa084912: function(arg0, arg1) {
+            const ret = arg0.createCommandEncoder(arg1);
+            return ret;
+        },
+        __wbg_createComputePipeline_4efb4ca205a4b557: function(arg0, arg1) {
+            const ret = arg0.createComputePipeline(arg1);
+            return ret;
+        },
+        __wbg_createPipelineLayout_f668b6fbdf877ab3: function(arg0, arg1) {
+            const ret = arg0.createPipelineLayout(arg1);
+            return ret;
+        },
+        __wbg_createShaderModule_1b0812f3a4503221: function(arg0, arg1) {
+            const ret = arg0.createShaderModule(arg1);
+            return ret;
+        },
+        __wbg_createWritable_d5314165379c13be: function(arg0) {
+            const ret = arg0.createWritable();
+            return ret;
+        },
+        __wbg_dispatchWorkgroups_1b750cb68e2eb693: function(arg0, arg1, arg2, arg3) {
+            arg0.dispatchWorkgroups(arg1 >>> 0, arg2 >>> 0, arg3 >>> 0);
+        },
+        __wbg_end_fd65a01a19361ec7: function(arg0) {
+            arg0.end();
+        },
+        __wbg_entries_69b67e91e74ba327: function(arg0) {
+            const ret = arg0.entries();
+            return ret;
+        },
+        __wbg_error_a6fa202b58aa1cd3: function(arg0, arg1) {
+            let deferred0_0;
+            let deferred0_1;
+            try {
+                deferred0_0 = arg0;
+                deferred0_1 = arg1;
+                console.error(getStringFromWasm0(arg0, arg1));
+            } finally {
+                wasm.__wbindgen_free(deferred0_0, deferred0_1, 1);
+            }
+        },
+        __wbg_estimate_790345f187f17044: function() { return handleError(function (arg0) {
+            const ret = arg0.estimate();
+            return ret;
+        }, arguments); },
+        __wbg_eval_db8671e4e6469929: function() { return handleError(function (arg0, arg1) {
+            const ret = eval(getStringFromWasm0(arg0, arg1));
+            return ret;
+        }, arguments); },
+        __wbg_features_205df3dd891b74bf: function(arg0) {
+            const ret = arg0.features;
+            return ret;
+        },
+        __wbg_features_efadd23951712b29: function(arg0) {
+            const ret = arg0.features;
+            return ret;
+        },
+        __wbg_fetch_ca19a9480623b9a8: function(arg0, arg1, arg2) {
+            const ret = arg0.fetch(getStringFromWasm0(arg1, arg2));
+            return ret;
+        },
+        __wbg_finish_2440fb64e53f7d5a: function(arg0, arg1) {
+            const ret = arg0.finish(arg1);
+            return ret;
+        },
+        __wbg_finish_4b40810f0b577bc2: function(arg0) {
+            const ret = arg0.finish();
+            return ret;
+        },
+        __wbg_flareengine_new: function(arg0) {
+            const ret = FlareEngine.__wrap(arg0);
+            return ret;
+        },
+        __wbg_from_741da0f916ab74aa: function(arg0) {
+            const ret = Array.from(arg0);
+            return ret;
+        },
+        __wbg_getDirectoryHandle_a38f7b2c1aa52af4: function(arg0, arg1, arg2, arg3) {
+            const ret = arg0.getDirectoryHandle(getStringFromWasm0(arg1, arg2), arg3);
+            return ret;
+        },
+        __wbg_getDirectory_3af764c18446017f: function(arg0) {
+            const ret = arg0.getDirectory();
+            return ret;
+        },
+        __wbg_getFileHandle_029e7a3c6dee72cb: function(arg0, arg1, arg2) {
+            const ret = arg0.getFileHandle(getStringFromWasm0(arg1, arg2));
+            return ret;
+        },
+        __wbg_getFileHandle_326ca47811ae37a1: function(arg0, arg1, arg2, arg3) {
+            const ret = arg0.getFileHandle(getStringFromWasm0(arg1, arg2), arg3);
+            return ret;
+        },
+        __wbg_getFile_0e25dfe508c6bd0a: function(arg0) {
+            const ret = arg0.getFile();
+            return ret;
+        },
+        __wbg_getMappedRange_55878eb97535ca19: function() { return handleError(function (arg0, arg1, arg2) {
+            const ret = arg0.getMappedRange(arg1, arg2);
+            return ret;
+        }, arguments); },
+        __wbg_getReader_3bcb712b2f3b80aa: function(arg0) {
+            const ret = arg0.getReader();
+            return ret;
+        },
+        __wbg_get_4848e350b40afc16: function(arg0, arg1) {
+            const ret = arg0[arg1 >>> 0];
+            return ret;
+        },
+        __wbg_get_5caaa5a9aae7e0b1: function() { return handleError(function (arg0, arg1, arg2, arg3) {
+            const ret = arg1.get(getStringFromWasm0(arg2, arg3));
+            var ptr1 = isLikeNone(ret) ? 0 : passStringToWasm0(ret, wasm.__wbindgen_malloc, wasm.__wbindgen_realloc);
+            var len1 = WASM_VECTOR_LEN;
+            getDataViewMemory0().setInt32(arg0 + 4 * 1, len1, true);
+            getDataViewMemory0().setInt32(arg0 + 4 * 0, ptr1, true);
+        }, arguments); },
+        __wbg_get_f96702c6245e4ef9: function() { return handleError(function (arg0, arg1) {
+            const ret = Reflect.get(arg0, arg1);
+            return ret;
+        }, arguments); },
+        __wbg_get_quota_945897ba0f160371: function(arg0, arg1) {
+            const ret = arg1.quota;
+            getDataViewMemory0().setFloat64(arg0 + 8 * 1, isLikeNone(ret) ? 0 : ret, true);
+            getDataViewMemory0().setInt32(arg0 + 4 * 0, !isLikeNone(ret), true);
+        },
+        __wbg_get_usage_2ea0330cfaeab2c1: function(arg0, arg1) {
+            const ret = arg1.usage;
+            getDataViewMemory0().setFloat64(arg0 + 8 * 1, isLikeNone(ret) ? 0 : ret, true);
+            getDataViewMemory0().setInt32(arg0 + 4 * 0, !isLikeNone(ret), true);
+        },
+        __wbg_gpu_bafbc1407fe850fb: function(arg0) {
+            const ret = arg0.gpu;
+            return ret;
+        },
+        __wbg_has_dc80aa6186153231: function(arg0, arg1, arg2) {
+            const ret = arg0.has(getStringFromWasm0(arg1, arg2));
+            return ret;
+        },
+        __wbg_headers_e08dcb5aa09b9a63: function(arg0) {
+            const ret = arg0.headers;
+            return ret;
+        },
+        __wbg_instanceof_GpuAdapter_aff4b0f95a6c1c3e: function(arg0) {
+            let result;
+            try {
+                result = arg0 instanceof GPUAdapter;
+            } catch (_) {
+                result = false;
+            }
+            const ret = result;
+            return ret;
+        },
+        __wbg_instanceof_ReadableStreamDefaultReader_10d3f15a012b70d7: function(arg0) {
+            let result;
+            try {
+                result = arg0 instanceof ReadableStreamDefaultReader;
+            } catch (_) {
+                result = false;
+            }
+            const ret = result;
+            return ret;
+        },
+        __wbg_instanceof_Response_06795eab66cc4036: function(arg0) {
+            let result;
+            try {
+                result = arg0 instanceof Response;
+            } catch (_) {
+                result = false;
+            }
+            const ret = result;
+            return ret;
+        },
+        __wbg_instanceof_Window_c0fee4c064502536: function(arg0) {
+            let result;
+            try {
+                result = arg0 instanceof Window;
+            } catch (_) {
+                result = false;
+            }
+            const ret = result;
+            return ret;
+        },
+        __wbg_label_4b6427d9045e3926: function(arg0, arg1) {
+            const ret = arg1.label;
+            const ptr1 = passStringToWasm0(ret, wasm.__wbindgen_malloc, wasm.__wbindgen_realloc);
+            const len1 = WASM_VECTOR_LEN;
+            getDataViewMemory0().setInt32(arg0 + 4 * 1, len1, true);
+            getDataViewMemory0().setInt32(arg0 + 4 * 0, ptr1, true);
+        },
+        __wbg_length_0c32cb8543c8e4c8: function(arg0) {
+            const ret = arg0.length;
+            return ret;
+        },
+        __wbg_mapAsync_f7fe2e4825742580: function(arg0, arg1, arg2, arg3) {
+            const ret = arg0.mapAsync(arg1 >>> 0, arg2, arg3);
+            return ret;
+        },
+        __wbg_navigator_9b09ea705d03d227: function(arg0) {
+            const ret = arg0.navigator;
+            return ret;
+        },
+        __wbg_navigator_af52153252bdf29d: function(arg0) {
+            const ret = arg0.navigator;
+            return ret;
+        },
+        __wbg_new_227d7c05414eb861: function() {
+            const ret = new Error();
+            return ret;
+        },
+        __wbg_new_4f9fafbb3909af72: function() {
+            const ret = new Object();
+            return ret;
+        },
+        __wbg_new_a560378ea1240b14: function(arg0) {
+            const ret = new Uint8Array(arg0);
+            return ret;
+        },
+        __wbg_new_f3c9df4f38f3f798: function() {
+            const ret = new Array();
+            return ret;
+        },
+        __wbg_new_from_slice_2580ff33d0d10520: function(arg0, arg1) {
+            const ret = new Uint8Array(getArrayU8FromWasm0(arg0, arg1));
+            return ret;
+        },
+        __wbg_new_typed_14d7cc391ce53d2c: function(arg0, arg1) {
+            try {
+                var state0 = {a: arg0, b: arg1};
+                var cb0 = (arg0, arg1) => {
+                    const a = state0.a;
+                    state0.a = 0;
+                    try {
+                        return wasm_bindgen__convert__closures_____invoke__hcdfd434894ba1863(a, state0.b, arg0, arg1);
+                    } finally {
+                        state0.a = a;
+                    }
+                };
+                const ret = new Promise(cb0);
+                return ret;
+            } finally {
+                state0.a = 0;
+            }
+        },
+        __wbg_new_with_byte_offset_and_length_6bfc75833d6170c8: function(arg0, arg1, arg2) {
+            const ret = new Uint8Array(arg0, arg1 >>> 0, arg2 >>> 0);
+            return ret;
+        },
+        __wbg_next_072e78dcf497124d: function() { return handleError(function (arg0) {
+            const ret = arg0.next();
+            return ret;
+        }, arguments); },
+        __wbg_now_2c44418ca0623664: function(arg0) {
+            const ret = arg0.now();
+            return ret;
+        },
+        __wbg_ok_36f7b13b74596c24: function(arg0) {
+            const ret = arg0.ok;
+            return ret;
+        },
+        __wbg_performance_5ed3f6a3bbe36d0d: function(arg0) {
+            const ret = arg0.performance;
+            return isLikeNone(ret) ? 0 : addToExternrefTable0(ret);
+        },
+        __wbg_prototypesetcall_3e05eb9545565046: function(arg0, arg1, arg2) {
+            Uint8Array.prototype.set.call(getArrayU8FromWasm0(arg0, arg1), arg2);
+        },
+        __wbg_push_6bdbc990be5ac37b: function(arg0, arg1) {
+            const ret = arg0.push(arg1);
+            return ret;
+        },
+        __wbg_queueMicrotask_abaf92f0bd4e80a4: function(arg0) {
+            const ret = arg0.queueMicrotask;
+            return ret;
+        },
+        __wbg_queueMicrotask_df5a6dac26d818f3: function(arg0) {
+            queueMicrotask(arg0);
+        },
+        __wbg_queue_3e40156d83b9183e: function(arg0) {
+            const ret = arg0.queue;
+            return ret;
+        },
+        __wbg_read_316bf844c93a6ccc: function(arg0) {
+            const ret = arg0.read();
+            return ret;
+        },
+        __wbg_removeEntry_f038ab74448d1824: function(arg0, arg1, arg2) {
+            const ret = arg0.removeEntry(getStringFromWasm0(arg1, arg2));
+            return ret;
+        },
+        __wbg_requestAdapter_245da40985c2fdc5: function(arg0, arg1) {
+            const ret = arg0.requestAdapter(arg1);
+            return ret;
+        },
+        __wbg_requestDevice_28434913a23418c4: function(arg0, arg1) {
+            const ret = arg0.requestDevice(arg1);
+            return ret;
+        },
+        __wbg_resolve_0a79de24e9d2267b: function(arg0) {
+            const ret = Promise.resolve(arg0);
+            return ret;
+        },
+        __wbg_setBindGroup_98f0303f15c3cfb4: function() { return handleError(function (arg0, arg1, arg2, arg3, arg4, arg5, arg6) {
+            arg0.setBindGroup(arg1 >>> 0, arg2, getArrayU32FromWasm0(arg3, arg4), arg5, arg6 >>> 0);
+        }, arguments); },
+        __wbg_setBindGroup_bc67abae8c962082: function(arg0, arg1, arg2) {
+            arg0.setBindGroup(arg1 >>> 0, arg2);
+        },
+        __wbg_setPipeline_0c34cc40ab8d6499: function(arg0, arg1) {
+            arg0.setPipeline(arg1);
+        },
+        __wbg_set_62f340d5d135b4db: function(arg0, arg1, arg2) {
+            arg0.set(arg1, arg2 >>> 0);
+        },
+        __wbg_set_8ee2d34facb8466e: function() { return handleError(function (arg0, arg1, arg2) {
+            const ret = Reflect.set(arg0, arg1, arg2);
+            return ret;
+        }, arguments); },
+        __wbg_set_access_1cc7ab8607a9643c: function(arg0, arg1) {
+            arg0.access = __wbindgen_enum_GpuStorageTextureAccess[arg1];
+        },
+        __wbg_set_beginning_of_pass_write_index_ac45c363336c24c7: function(arg0, arg1) {
+            arg0.beginningOfPassWriteIndex = arg1 >>> 0;
+        },
+        __wbg_set_bind_group_layouts_b4667372bdcee99f: function(arg0, arg1) {
+            arg0.bindGroupLayouts = arg1;
+        },
+        __wbg_set_binding_0a48264269982c5e: function(arg0, arg1) {
+            arg0.binding = arg1 >>> 0;
+        },
+        __wbg_set_binding_15ab1e2c74990b25: function(arg0, arg1) {
+            arg0.binding = arg1 >>> 0;
+        },
+        __wbg_set_buffer_3b3e4c4a884d1610: function(arg0, arg1) {
+            arg0.buffer = arg1;
+        },
+        __wbg_set_buffer_ff433f6fc0bcc260: function(arg0, arg1) {
+            arg0.buffer = arg1;
+        },
+        __wbg_set_code_c616b86ce504e24a: function(arg0, arg1, arg2) {
+            arg0.code = getStringFromWasm0(arg1, arg2);
+        },
+        __wbg_set_compute_7c274f1347709d07: function(arg0, arg1) {
+            arg0.compute = arg1;
+        },
+        __wbg_set_create_0654e513e8ccb2be: function(arg0, arg1) {
+            arg0.create = arg1 !== 0;
+        },
+        __wbg_set_create_4b5cddb7e7c14744: function(arg0, arg1) {
+            arg0.create = arg1 !== 0;
+        },
+        __wbg_set_end_of_pass_write_index_c60088bc589e6882: function(arg0, arg1) {
+            arg0.endOfPassWriteIndex = arg1 >>> 0;
+        },
+        __wbg_set_entries_bfc700c1f97eec0b: function(arg0, arg1) {
+            arg0.entries = arg1;
+        },
+        __wbg_set_entries_f07df780e3613292: function(arg0, arg1) {
+            arg0.entries = arg1;
+        },
+        __wbg_set_entry_point_aa503b3bb9fed987: function(arg0, arg1, arg2) {
+            arg0.entryPoint = getStringFromWasm0(arg1, arg2);
+        },
+        __wbg_set_format_b8158198b657d617: function(arg0, arg1) {
+            arg0.format = __wbindgen_enum_GpuTextureFormat[arg1];
+        },
+        __wbg_set_has_dynamic_offset_4d5601049080763e: function(arg0, arg1) {
+            arg0.hasDynamicOffset = arg1 !== 0;
+        },
+        __wbg_set_label_392dc66ad76d942d: function(arg0, arg1, arg2) {
+            arg0.label = getStringFromWasm0(arg1, arg2);
+        },
+        __wbg_set_label_3e06143ad04772ae: function(arg0, arg1, arg2) {
+            arg0.label = getStringFromWasm0(arg1, arg2);
+        },
+        __wbg_set_label_50f397060b5b5610: function(arg0, arg1, arg2) {
+            arg0.label = getStringFromWasm0(arg1, arg2);
+        },
+        __wbg_set_label_68e2953cfd33a5a5: function(arg0, arg1, arg2) {
+            arg0.label = getStringFromWasm0(arg1, arg2);
+        },
+        __wbg_set_label_76c4f74a38ff9bcd: function(arg0, arg1, arg2) {
+            arg0.label = getStringFromWasm0(arg1, arg2);
+        },
+        __wbg_set_label_79484ec4d6d85bbf: function(arg0, arg1, arg2) {
+            arg0.label = getStringFromWasm0(arg1, arg2);
+        },
+        __wbg_set_label_861c8e348e26599d: function(arg0, arg1, arg2) {
+            arg0.label = getStringFromWasm0(arg1, arg2);
+        },
+        __wbg_set_label_d1b6a326332d0520: function(arg0, arg1, arg2) {
+            arg0.label = getStringFromWasm0(arg1, arg2);
+        },
+        __wbg_set_label_d687cfb9a30329c8: function(arg0, arg1, arg2) {
+            arg0.label = getStringFromWasm0(arg1, arg2);
+        },
+        __wbg_set_label_e345704005fb385b: function(arg0, arg1, arg2) {
+            arg0.label = getStringFromWasm0(arg1, arg2);
+        },
+        __wbg_set_layout_b9b36c291ee7f2e1: function(arg0, arg1) {
+            arg0.layout = arg1;
+        },
+        __wbg_set_layout_cccbb8f794df887c: function(arg0, arg1) {
+            arg0.layout = arg1;
+        },
+        __wbg_set_mapped_at_creation_34da9d6bf64b78d6: function(arg0, arg1) {
+            arg0.mappedAtCreation = arg1 !== 0;
+        },
+        __wbg_set_min_binding_size_9389ad67218af140: function(arg0, arg1) {
+            arg0.minBindingSize = arg1;
+        },
+        __wbg_set_module_5f33a55198ad797f: function(arg0, arg1) {
+            arg0.module = arg1;
+        },
+        __wbg_set_multisampled_b526741755338725: function(arg0, arg1) {
+            arg0.multisampled = arg1 !== 0;
+        },
+        __wbg_set_offset_1a0f95ffb7dd6f40: function(arg0, arg1) {
+            arg0.offset = arg1;
+        },
+        __wbg_set_power_preference_915480f4b9565dc2: function(arg0, arg1) {
+            arg0.powerPreference = __wbindgen_enum_GpuPowerPreference[arg1];
+        },
+        __wbg_set_query_set_0a78c3dcb3650b2b: function(arg0, arg1) {
+            arg0.querySet = arg1;
+        },
+        __wbg_set_required_features_42347bf311233eb6: function(arg0, arg1) {
+            arg0.requiredFeatures = arg1;
+        },
+        __wbg_set_resource_f2d72f59cc9308fc: function(arg0, arg1) {
+            arg0.resource = arg1;
+        },
+        __wbg_set_sample_type_6d1e240a417bdf44: function(arg0, arg1) {
+            arg0.sampleType = __wbindgen_enum_GpuTextureSampleType[arg1];
+        },
+        __wbg_set_sampler_f864a162bad4f66f: function(arg0, arg1) {
+            arg0.sampler = arg1;
+        },
+        __wbg_set_size_6b2fc4a0e39e4d07: function(arg0, arg1) {
+            arg0.size = arg1;
+        },
+        __wbg_set_size_c78ae8d2e2181815: function(arg0, arg1) {
+            arg0.size = arg1;
+        },
+        __wbg_set_storage_texture_c3919f22b211c542: function(arg0, arg1) {
+            arg0.storageTexture = arg1;
+        },
+        __wbg_set_texture_bf820de044f0d291: function(arg0, arg1) {
+            arg0.texture = arg1;
+        },
+        __wbg_set_timestamp_writes_b9e1d87e2f057bd1: function(arg0, arg1) {
+            arg0.timestampWrites = arg1;
+        },
+        __wbg_set_type_40f4ae4fa32946cd: function(arg0, arg1) {
+            arg0.type = __wbindgen_enum_GpuBufferBindingType[arg1];
+        },
+        __wbg_set_type_4f1cd48d79f4d6dc: function(arg0, arg1) {
+            arg0.type = __wbindgen_enum_GpuSamplerBindingType[arg1];
+        },
+        __wbg_set_usage_9aa23fa1e13799a8: function(arg0, arg1) {
+            arg0.usage = arg1 >>> 0;
+        },
+        __wbg_set_view_dimension_36c0bf530395d014: function(arg0, arg1) {
+            arg0.viewDimension = __wbindgen_enum_GpuTextureViewDimension[arg1];
+        },
+        __wbg_set_view_dimension_553cd9fa176d06ca: function(arg0, arg1) {
+            arg0.viewDimension = __wbindgen_enum_GpuTextureViewDimension[arg1];
+        },
+        __wbg_set_visibility_eef2d8e9608a8981: function(arg0, arg1) {
+            arg0.visibility = arg1 >>> 0;
+        },
+        __wbg_size_7306c9406e13bf29: function(arg0) {
+            const ret = arg0.size;
+            return ret;
+        },
+        __wbg_stack_3b0d974bbf31e44f: function(arg0, arg1) {
+            const ret = arg1.stack;
+            const ptr1 = passStringToWasm0(ret, wasm.__wbindgen_malloc, wasm.__wbindgen_realloc);
+            const len1 = WASM_VECTOR_LEN;
+            getDataViewMemory0().setInt32(arg0 + 4 * 1, len1, true);
+            getDataViewMemory0().setInt32(arg0 + 4 * 0, ptr1, true);
+        },
+        __wbg_static_accessor_GLOBAL_THIS_a1248013d790bf5f: function() {
+            const ret = typeof globalThis === 'undefined' ? null : globalThis;
+            return isLikeNone(ret) ? 0 : addToExternrefTable0(ret);
+        },
+        __wbg_static_accessor_GLOBAL_f2e0f995a21329ff: function() {
+            const ret = typeof global === 'undefined' ? null : global;
+            return isLikeNone(ret) ? 0 : addToExternrefTable0(ret);
+        },
+        __wbg_static_accessor_SELF_24f78b6d23f286ea: function() {
+            const ret = typeof self === 'undefined' ? null : self;
+            return isLikeNone(ret) ? 0 : addToExternrefTable0(ret);
+        },
+        __wbg_static_accessor_WINDOW_59fd959c540fe405: function() {
+            const ret = typeof window === 'undefined' ? null : window;
+            return isLikeNone(ret) ? 0 : addToExternrefTable0(ret);
+        },
+        __wbg_status_44ecb0ac1da253f4: function(arg0) {
+            const ret = arg0.status;
+            return ret;
+        },
+        __wbg_storage_8f8e63186ec77353: function(arg0) {
+            const ret = arg0.storage;
+            return ret;
+        },
+        __wbg_submit_2521bdd9a232bca7: function(arg0, arg1) {
+            arg0.submit(arg1);
+        },
+        __wbg_then_00eed3ac0b8e82cb: function(arg0, arg1, arg2) {
+            const ret = arg0.then(arg1, arg2);
+            return ret;
+        },
+        __wbg_then_479d77cb064907ee: function(arg0, arg1, arg2) {
+            const ret = arg0.then(arg1, arg2);
+            return ret;
+        },
+        __wbg_then_a0c8db0381c8994c: function(arg0, arg1) {
+            const ret = arg0.then(arg1);
+            return ret;
+        },
+        __wbg_unmap_815a075fd850cb73: function(arg0) {
+            arg0.unmap();
+        },
+        __wbg_userAgent_d58193cc32293b16: function() { return handleError(function (arg0, arg1) {
+            const ret = arg1.userAgent;
+            const ptr1 = passStringToWasm0(ret, wasm.__wbindgen_malloc, wasm.__wbindgen_realloc);
+            const len1 = WASM_VECTOR_LEN;
+            getDataViewMemory0().setInt32(arg0 + 4 * 1, len1, true);
+            getDataViewMemory0().setInt32(arg0 + 4 * 0, ptr1, true);
+        }, arguments); },
+        __wbg_writeBuffer_e8b792fb0962f30d: function() { return handleError(function (arg0, arg1, arg2, arg3, arg4, arg5) {
+            arg0.writeBuffer(arg1, arg2, arg3, arg4, arg5);
+        }, arguments); },
+        __wbg_write_fc53b37dcc29642e: function() { return handleError(function (arg0, arg1, arg2) {
+            const ret = arg0.write(getArrayU8FromWasm0(arg1, arg2));
+            return ret;
+        }, arguments); },
+        __wbindgen_cast_0000000000000001: function(arg0, arg1) {
+            // Cast intrinsic for `Closure(Closure { owned: true, function: Function { arguments: [Externref], shim_idx: 155, ret: Unit, inner_ret: Some(Unit) }, mutable: true }) -> Externref`.
+            const ret = makeMutClosure(arg0, arg1, wasm_bindgen__convert__closures_____invoke__h235e00bf230ad8a4);
+            return ret;
+        },
+        __wbindgen_cast_0000000000000002: function(arg0, arg1) {
+            // Cast intrinsic for `Closure(Closure { owned: true, function: Function { arguments: [Externref], shim_idx: 177, ret: Result(Unit), inner_ret: Some(Result(Unit)) }, mutable: true }) -> Externref`.
+            const ret = makeMutClosure(arg0, arg1, wasm_bindgen__convert__closures_____invoke__h7ed8ea06cc0c8ca5);
+            return ret;
+        },
+        __wbindgen_cast_0000000000000003: function(arg0) {
+            // Cast intrinsic for `F64 -> Externref`.
+            const ret = arg0;
+            return ret;
+        },
+        __wbindgen_cast_0000000000000004: function(arg0, arg1) {
+            // Cast intrinsic for `Ref(Slice(U8)) -> NamedExternref("Uint8Array")`.
+            const ret = getArrayU8FromWasm0(arg0, arg1);
+            return ret;
+        },
+        __wbindgen_cast_0000000000000005: function(arg0, arg1) {
+            // Cast intrinsic for `Ref(String) -> Externref`.
+            const ret = getStringFromWasm0(arg0, arg1);
+            return ret;
+        },
+        __wbindgen_init_externref_table: function() {
+            const table = wasm.__wbindgen_externrefs;
+            const offset = table.grow(4);
+            table.set(0, undefined);
+            table.set(offset + 0, undefined);
+            table.set(offset + 1, null);
+            table.set(offset + 2, true);
+            table.set(offset + 3, false);
+        },
+    };
+    return {
+        __proto__: null,
+        "./flare_web_bg.js": import0,
+    };
+}
+function wasm_bindgen__convert__closures_____invoke__h235e00bf230ad8a4(arg0, arg1, arg2) {
+    wasm.wasm_bindgen__convert__closures_____invoke__h235e00bf230ad8a4(arg0, arg1, arg2);
+}
+function wasm_bindgen__convert__closures_____invoke__h7ed8ea06cc0c8ca5(arg0, arg1, arg2) {
+    const ret = wasm.wasm_bindgen__convert__closures_____invoke__h7ed8ea06cc0c8ca5(arg0, arg1, arg2);
+    if (ret[1]) {
+        throw takeFromExternrefTable0(ret[0]);
+    }
+}
+function wasm_bindgen__convert__closures_____invoke__hcdfd434894ba1863(arg0, arg1, arg2, arg3) {
+    wasm.wasm_bindgen__convert__closures_____invoke__hcdfd434894ba1863(arg0, arg1, arg2, arg3);
+}
+const __wbindgen_enum_GpuBufferBindingType = ["uniform", "storage", "read-only-storage"];
+const __wbindgen_enum_GpuPowerPreference = ["low-power", "high-performance"];
+const __wbindgen_enum_GpuSamplerBindingType = ["filtering", "non-filtering", "comparison"];
+const __wbindgen_enum_GpuStorageTextureAccess = ["write-only", "read-only", "read-write"];
+const __wbindgen_enum_GpuTextureFormat = ["r8unorm", "r8snorm", "r8uint", "r8sint", "r16uint", "r16sint", "r16float", "rg8unorm", "rg8snorm", "rg8uint", "rg8sint", "r32uint", "r32sint", "r32float", "rg16uint", "rg16sint", "rg16float", "rgba8unorm", "rgba8unorm-srgb", "rgba8snorm", "rgba8uint", "rgba8sint", "bgra8unorm", "bgra8unorm-srgb", "rgb9e5ufloat", "rgb10a2uint", "rgb10a2unorm", "rg11b10ufloat", "rg32uint", "rg32sint", "rg32float", "rgba16uint", "rgba16sint", "rgba16float", "rgba32uint", "rgba32sint", "rgba32float", "stencil8", "depth16unorm", "depth24plus", "depth24plus-stencil8", "depth32float", "depth32float-stencil8", "bc1-rgba-unorm", "bc1-rgba-unorm-srgb", "bc2-rgba-unorm", "bc2-rgba-unorm-srgb", "bc3-rgba-unorm", "bc3-rgba-unorm-srgb", "bc4-r-unorm", "bc4-r-snorm", "bc5-rg-unorm", "bc5-rg-snorm", "bc6h-rgb-ufloat", "bc6h-rgb-float", "bc7-rgba-unorm", "bc7-rgba-unorm-srgb", "etc2-rgb8unorm", "etc2-rgb8unorm-srgb", "etc2-rgb8a1unorm", "etc2-rgb8a1unorm-srgb", "etc2-rgba8unorm", "etc2-rgba8unorm-srgb", "eac-r11unorm", "eac-r11snorm", "eac-rg11unorm", "eac-rg11snorm", "astc-4x4-unorm", "astc-4x4-unorm-srgb", "astc-5x4-unorm", "astc-5x4-unorm-srgb", "astc-5x5-unorm", "astc-5x5-unorm-srgb", "astc-6x5-unorm", "astc-6x5-unorm-srgb", "astc-6x6-unorm", "astc-6x6-unorm-srgb", "astc-8x5-unorm", "astc-8x5-unorm-srgb", "astc-8x6-unorm", "astc-8x6-unorm-srgb", "astc-8x8-unorm", "astc-8x8-unorm-srgb", "astc-10x5-unorm", "astc-10x5-unorm-srgb", "astc-10x6-unorm", "astc-10x6-unorm-srgb", "astc-10x8-unorm", "astc-10x8-unorm-srgb", "astc-10x10-unorm", "astc-10x10-unorm-srgb", "astc-12x10-unorm", "astc-12x10-unorm-srgb", "astc-12x12-unorm", "astc-12x12-unorm-srgb"];
+const __wbindgen_enum_GpuTextureSampleType = ["float", "unfilterable-float", "depth", "sint", "uint"];
+const __wbindgen_enum_GpuTextureViewDimension = ["1d", "2d", "2d-array", "cube", "cube-array", "3d"];
+const FlareEngineFinalization = (typeof FinalizationRegistry === 'undefined')
+    ? { register: () => {}, unregister: () => {} }
+    : new FinalizationRegistry(ptr => wasm.__wbg_flareengine_free(ptr >>> 0, 1));
+const FlareProgressiveLoaderFinalization = (typeof FinalizationRegistry === 'undefined')
+    ? { register: () => {}, unregister: () => {} }
+    : new FinalizationRegistry(ptr => wasm.__wbg_flareprogressiveloader_free(ptr >>> 0, 1));
+const FlareTokenizerFinalization = (typeof FinalizationRegistry === 'undefined')
+    ? { register: () => {}, unregister: () => {} }
+    : new FinalizationRegistry(ptr => wasm.__wbg_flaretokenizer_free(ptr >>> 0, 1));
+function addToExternrefTable0(obj) {
+    const idx = wasm.__externref_table_alloc();
+    wasm.__wbindgen_externrefs.set(idx, obj);
+    return idx;
+}
+const CLOSURE_DTORS = (typeof FinalizationRegistry === 'undefined')
+    ? { register: () => {}, unregister: () => {} }
+    : new FinalizationRegistry(state => wasm.__wbindgen_destroy_closure(state.a, state.b));
+function debugString(val) {
+    // primitive types
+    const type = typeof val;
+    if (type == 'number' || type == 'boolean' || val == null) {
+        return  `${val}`;
+    }
+    if (type == 'string') {
+        return `"${val}"`;
+    }
+    if (type == 'symbol') {
+        const description = val.description;
+        if (description == null) {
+            return 'Symbol';
+        } else {
+            return `Symbol(${description})`;
+        }
+    }
+    if (type == 'function') {
+        const name = val.name;
+        if (typeof name == 'string' && name.length > 0) {
+            return `Function(${name})`;
+        } else {
+            return 'Function';
+        }
+    }
+    // objects
+    if (Array.isArray(val)) {
+        const length = val.length;
+        let debug = '[';
+        if (length > 0) {
+            debug += debugString(val[0]);
+        }
+        for(let i = 1; i < length; i++) {
+            debug += ', ' + debugString(val[i]);
+        }
+        debug += ']';
+        return debug;
+    }
+    // Test for built-in
+    const builtInMatches = /\[object ([^\]]+)\]/.exec(toString.call(val));
+    let className;
+    if (builtInMatches && builtInMatches.length > 1) {
+        className = builtInMatches[1];
+    } else {
+        // Failed to match the standard '[object ClassName]'
+        return toString.call(val);
+    }
+    if (className == 'Object') {
+        // we're a user defined class or Object
+        // JSON.stringify avoids problems with cycles, and is generally much
+        // easier than looping through ownProperties of `val`.
+        try {
+            return 'Object(' + JSON.stringify(val) + ')';
+        } catch (_) {
+            return 'Object';
+        }
+    }
+    // errors
+    if (val instanceof Error) {
+        return `${val.name}: ${val.message}\n${val.stack}`;
+    }
+    // TODO we could test for more things here, like `Set`s and `Map`s.
+    return className;
+}
+function getArrayF32FromWasm0(ptr, len) {
+    ptr = ptr >>> 0;
+    return getFloat32ArrayMemory0().subarray(ptr / 4, ptr / 4 + len);
+}
+function getArrayU32FromWasm0(ptr, len) {
+    ptr = ptr >>> 0;
+    return getUint32ArrayMemory0().subarray(ptr / 4, ptr / 4 + len);
+}
+function getArrayU8FromWasm0(ptr, len) {
+    ptr = ptr >>> 0;
+    return getUint8ArrayMemory0().subarray(ptr / 1, ptr / 1 + len);
+}
+let cachedDataViewMemory0 = null;
+function getDataViewMemory0() {
+    if (cachedDataViewMemory0 === null || cachedDataViewMemory0.buffer.detached === true || (cachedDataViewMemory0.buffer.detached === undefined && cachedDataViewMemory0.buffer !== wasm.memory.buffer)) {
+        cachedDataViewMemory0 = new DataView(wasm.memory.buffer);
+    }
+    return cachedDataViewMemory0;
+}
+let cachedFloat32ArrayMemory0 = null;
+function getFloat32ArrayMemory0() {
+    if (cachedFloat32ArrayMemory0 === null || cachedFloat32ArrayMemory0.byteLength === 0) {
+        cachedFloat32ArrayMemory0 = new Float32Array(wasm.memory.buffer);
+    }
+    return cachedFloat32ArrayMemory0;
+}
+function getStringFromWasm0(ptr, len) {
+    ptr = ptr >>> 0;
+    return decodeText(ptr, len);
+}
+let cachedUint32ArrayMemory0 = null;
+function getUint32ArrayMemory0() {
+    if (cachedUint32ArrayMemory0 === null || cachedUint32ArrayMemory0.byteLength === 0) {
+        cachedUint32ArrayMemory0 = new Uint32Array(wasm.memory.buffer);
+    }
+    return cachedUint32ArrayMemory0;
+}
+let cachedUint8ArrayMemory0 = null;
+function getUint8ArrayMemory0() {
+    if (cachedUint8ArrayMemory0 === null || cachedUint8ArrayMemory0.byteLength === 0) {
+        cachedUint8ArrayMemory0 = new Uint8Array(wasm.memory.buffer);
+    }
+    return cachedUint8ArrayMemory0;
+}
+function handleError(f, args) {
+    try {
+        return f.apply(this, args);
+    } catch (e) {
+        const idx = addToExternrefTable0(e);
+        wasm.__wbindgen_exn_store(idx);
+    }
+}
+function isLikeNone(x) {
+    return x === undefined || x === null;
+}
+function makeMutClosure(arg0, arg1, f) {
+    const state = { a: arg0, b: arg1, cnt: 1 };
+    const real = (...args) => {
+        // First up with a closure we increment the internal reference
+        // count. This ensures that the Rust closure environment won't
+        // be deallocated while we're invoking it.
+        state.cnt++;
+        const a = state.a;
+        state.a = 0;
+        try {
+            return f(a, state.b, ...args);
+        } finally {
+            state.a = a;
+            real._wbg_cb_unref();
+        }
+    };
+    real._wbg_cb_unref = () => {
+        if (--state.cnt === 0) {
+            wasm.__wbindgen_destroy_closure(state.a, state.b);
+            state.a = 0;
+            CLOSURE_DTORS.unregister(state);
+        }
+    };
+    CLOSURE_DTORS.register(real, state, state);
+    return real;
+}
+function passArray32ToWasm0(arg, malloc) {
+    const ptr = malloc(arg.length * 4, 4) >>> 0;
+    getUint32ArrayMemory0().set(arg, ptr / 4);
+    WASM_VECTOR_LEN = arg.length;
+    return ptr;
+}
+function passArray8ToWasm0(arg, malloc) {
+    const ptr = malloc(arg.length * 1, 1) >>> 0;
+    getUint8ArrayMemory0().set(arg, ptr / 1);
+    WASM_VECTOR_LEN = arg.length;
+    return ptr;
+}
+function passArrayF32ToWasm0(arg, malloc) {
+    const ptr = malloc(arg.length * 4, 4) >>> 0;
+    getFloat32ArrayMemory0().set(arg, ptr / 4);
+    WASM_VECTOR_LEN = arg.length;
+    return ptr;
+}
+function passStringToWasm0(arg, malloc, realloc) {
+    if (realloc === undefined) {
+        const buf = cachedTextEncoder.encode(arg);
+        const ptr = malloc(buf.length, 1) >>> 0;
+        getUint8ArrayMemory0().subarray(ptr, ptr + buf.length).set(buf);
+        WASM_VECTOR_LEN = buf.length;
+        return ptr;
+    }
+    let len = arg.length;
+    let ptr = malloc(len, 1) >>> 0;
+    const mem = getUint8ArrayMemory0();
+    let offset = 0;
+    for (; offset < len; offset++) {
+        const code = arg.charCodeAt(offset);
+        if (code > 0x7F) break;
+        mem[ptr + offset] = code;
+    }
+    if (offset !== len) {
+        if (offset !== 0) {
+            arg = arg.slice(offset);
+        }
+        ptr = realloc(ptr, len, len = offset + arg.length * 3, 1) >>> 0;
+        const view = getUint8ArrayMemory0().subarray(ptr + offset, ptr + len);
+        const ret = cachedTextEncoder.encodeInto(arg, view);
+        offset += ret.written;
+        ptr = realloc(ptr, len, offset, 1) >>> 0;
+    }
+    WASM_VECTOR_LEN = offset;
+    return ptr;
+}
+function takeFromExternrefTable0(idx) {
+    const value = wasm.__wbindgen_externrefs.get(idx);
+    wasm.__externref_table_dealloc(idx);
+    return value;
+}
+let cachedTextDecoder = new TextDecoder('utf-8', { ignoreBOM: true, fatal: true });
+cachedTextDecoder.decode();
+const MAX_SAFARI_DECODE_BYTES = 2146435072;
+let numBytesDecoded = 0;
+function decodeText(ptr, len) {
+    numBytesDecoded += len;
+    if (numBytesDecoded >= MAX_SAFARI_DECODE_BYTES) {
+        cachedTextDecoder = new TextDecoder('utf-8', { ignoreBOM: true, fatal: true });
+        cachedTextDecoder.decode();
+        numBytesDecoded = len;
+    }
+    return cachedTextDecoder.decode(getUint8ArrayMemory0().subarray(ptr, ptr + len));
+}
+const cachedTextEncoder = new TextEncoder();
+if (!('encodeInto' in cachedTextEncoder)) {
+    cachedTextEncoder.encodeInto = function (arg, view) {
+        const buf = cachedTextEncoder.encode(arg);
+        view.set(buf);
+        return {
+            read: arg.length,
+            written: buf.length
+        };
+    };
+}
+let WASM_VECTOR_LEN = 0;
+let wasmModule, wasm;
+function __wbg_finalize_init(instance, module) {
+    wasm = instance.exports;
+    wasmModule = module;
+    cachedDataViewMemory0 = null;
+    cachedFloat32ArrayMemory0 = null;
+    cachedUint32ArrayMemory0 = null;
+    cachedUint8ArrayMemory0 = null;
+    wasm.__wbindgen_start();
+    return wasm;
+}
+async function __wbg_load(module, imports) {
+    if (typeof Response === 'function' && module instanceof Response) {
+        if (typeof WebAssembly.instantiateStreaming === 'function') {
+            try {
+                return await WebAssembly.instantiateStreaming(module, imports);
+            } catch (e) {
+                const validResponse = module.ok && expectedResponseType(module.type);
+                if (validResponse && module.headers.get('Content-Type') !== 'application/wasm') {
+                    console.warn("`WebAssembly.instantiateStreaming` failed because your server does not serve Wasm with `application/wasm` MIME type. Falling back to `WebAssembly.instantiate` which is slower. Original error:\n", e);
+                } else { throw e; }
+            }
+        }
+        const bytes = await module.arrayBuffer();
+        return await WebAssembly.instantiate(bytes, imports);
+    } else {
+        const instance = await WebAssembly.instantiate(module, imports);
+        if (instance instanceof WebAssembly.Instance) {
+            return { instance, module };
+        } else {
+            return instance;
+        }
+    }
+    function expectedResponseType(type) {
+        switch (type) {
+            case 'basic': case 'cors': case 'default': return true;
+        }
+        return false;
+    }
+}
+function initSync(module) {
+    if (wasm !== undefined) return wasm;
+    if (module !== undefined) {
+        if (Object.getPrototypeOf(module) === Object.prototype) {
+            ({module} = module)
+        } else {
+            console.warn('using deprecated parameters for `initSync()`; pass a single object instead')
+        }
+    }
+    const imports = __wbg_get_imports();
+    if (!(module instanceof WebAssembly.Module)) {
+        module = new WebAssembly.Module(module);
+    }
+    const instance = new WebAssembly.Instance(module, imports);
+    return __wbg_finalize_init(instance, module);
+}
+async function __wbg_init(module_or_path) {
+    if (wasm !== undefined) return wasm;
+    if (module_or_path !== undefined) {
+        if (Object.getPrototypeOf(module_or_path) === Object.prototype) {
+            ({module_or_path} = module_or_path)
+        } else {
+            console.warn('using deprecated parameters for the initialization function; pass a single object instead')
+        }
+    }
+    if (module_or_path === undefined) {
+        module_or_path = new URL('flare_web_bg.wasm', import.meta.url);
+    }
+    const imports = __wbg_get_imports();
+    if (typeof module_or_path === 'string' || (typeof Request === 'function' && module_or_path instanceof Request) || (typeof URL === 'function' && module_or_path instanceof URL)) {
+        module_or_path = fetch(module_or_path);
+    }
+    const { instance, module } = await __wbg_load(await module_or_path, imports);
+    return __wbg_finalize_init(instance, module);
+}
+export { initSync, __wbg_init as default };