@huggingface/transformers 4.0.0-next.5 → 4.0.0-next.7
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/README.md +12 -4
- package/dist/ort-wasm-simd-threaded.jsep.mjs +24 -24
- package/dist/transformers.js +2189 -1015
- package/dist/transformers.min.js +16 -16
- package/dist/transformers.node.cjs +2234 -1029
- package/dist/transformers.node.min.cjs +20 -20
- package/dist/transformers.node.min.mjs +20 -20
- package/dist/transformers.node.mjs +2194 -1017
- package/dist/transformers.web.js +2175 -1001
- package/dist/transformers.web.min.js +18 -18
- package/package.json +4 -4
- package/src/backends/onnx.js +77 -58
- package/src/backends/utils/cacheWasm.js +22 -43
- package/src/cache_utils.js +62 -0
- package/src/configs.js +32 -5
- package/src/env.js +36 -6
- package/src/image_processors_utils.js +3 -3
- package/src/models/auto/modeling_auto.js +14 -1
- package/src/models/chatterbox/modeling_chatterbox.js +1 -1
- package/src/models/detr/image_processing_detr.js +1 -1
- package/src/models/feature_extractors.js +2 -0
- package/src/models/gemma3n/modeling_gemma3n.js +2 -0
- package/src/models/granite_speech/feature_extraction_granite_speech.js +58 -0
- package/src/models/granite_speech/modeling_granite_speech.js +5 -0
- package/src/models/granite_speech/processing_granite_speech.js +62 -0
- package/src/models/grounding_dino/image_processing_grounding_dino.js +1 -1
- package/src/models/idefics3/modeling_idefics3.js +5 -32
- package/src/models/image_processors.js +1 -0
- package/src/models/lfm2_vl/image_processing_lfm2_vl.js +305 -0
- package/src/models/lfm2_vl/modeling_lfm2_vl.js +13 -0
- package/src/models/lfm2_vl/processing_lfm2_vl.js +77 -0
- package/src/models/llava/modeling_llava.js +1 -1
- package/src/models/mistral3/modeling_mistral3.js +2 -2
- package/src/models/modeling_utils.js +234 -292
- package/src/models/models.js +9 -0
- package/src/models/olmo_hybrid/modeling_olmo_hybrid.js +5 -0
- package/src/models/paligemma/modeling_paligemma.js +2 -25
- package/src/models/processors.js +3 -0
- package/src/models/qwen2_5_vl/modeling_qwen2_5_vl.js +5 -1
- package/src/models/qwen2_moe/modeling_qwen2_moe.js +5 -0
- package/src/models/qwen2_vl/image_processing_qwen2_vl.js +1 -41
- package/src/models/qwen2_vl/modeling_qwen2_vl.js +36 -3
- package/src/models/qwen3_5/modeling_qwen3_5.js +1 -0
- package/src/models/qwen3_5_moe/modeling_qwen3_5_moe.js +2 -1
- package/src/models/qwen3_moe/modeling_qwen3_moe.js +5 -0
- package/src/models/qwen3_next/modeling_qwen3_next.js +5 -0
- package/src/models/qwen3_vl/modeling_qwen3_vl.js +2 -1
- package/src/models/qwen3_vl_moe/modeling_qwen3_vl_moe.js +4 -0
- package/src/models/registry.js +39 -4
- package/src/models/sam/image_processing_sam.js +1 -1
- package/src/models/session.js +17 -6
- package/src/models/smolvlm/modeling_smolvlm.js +7 -0
- package/src/models/ultravox/modeling_ultravox.js +1 -3
- package/src/models/voxtral/modeling_voxtral.js +3 -0
- package/src/models/voxtral_realtime/feature_extraction_voxtral_realtime.js +71 -0
- package/src/models/voxtral_realtime/modeling_voxtral_realtime.js +239 -0
- package/src/models/voxtral_realtime/processing_voxtral_realtime.js +113 -0
- package/src/models/whisper/feature_extraction_whisper.js +2 -12
- package/src/pipelines/index.js +2 -84
- package/src/pipelines.js +40 -77
- package/src/transformers.js +2 -0
- package/src/utils/audio.js +18 -2
- package/src/utils/cache/CrossOriginStorageCache.js +251 -0
- package/src/utils/cache/FileCache.js +128 -0
- package/src/utils/cache/cross-origin-storage.d.ts +38 -0
- package/src/utils/cache.js +8 -3
- package/src/utils/hub/{files.js → FileResponse.js} +0 -105
- package/src/utils/hub/utils.js +35 -1
- package/src/utils/hub.js +6 -5
- package/src/utils/image.js +12 -13
- package/src/utils/lru_cache.js +67 -0
- package/src/utils/memoize_promise.js +45 -0
- package/src/utils/model_registry/ModelRegistry.js +70 -23
- package/src/utils/model_registry/get_file_metadata.js +14 -2
- package/src/utils/model_registry/get_model_files.js +63 -78
- package/src/utils/model_registry/get_pipeline_files.js +15 -24
- package/src/utils/model_registry/is_cached.js +81 -4
- package/src/utils/tensor.js +18 -2
- package/types/backends/onnx.d.ts.map +1 -1
- package/types/backends/utils/cacheWasm.d.ts +3 -17
- package/types/backends/utils/cacheWasm.d.ts.map +1 -1
- package/types/cache_utils.d.ts +29 -0
- package/types/cache_utils.d.ts.map +1 -0
- package/types/configs.d.ts.map +1 -1
- package/types/env.d.ts +18 -3
- package/types/env.d.ts.map +1 -1
- package/types/image_processors_utils.d.ts +17 -1
- package/types/image_processors_utils.d.ts.map +1 -1
- package/types/models/auto/modeling_auto.d.ts +6 -0
- package/types/models/auto/modeling_auto.d.ts.map +1 -1
- package/types/models/detr/image_processing_detr.d.ts +1 -1
- package/types/models/feature_extractors.d.ts +2 -0
- package/types/models/gemma3n/modeling_gemma3n.d.ts +2 -0
- package/types/models/gemma3n/modeling_gemma3n.d.ts.map +1 -1
- package/types/models/granite_speech/feature_extraction_granite_speech.d.ts +16 -0
- package/types/models/granite_speech/feature_extraction_granite_speech.d.ts.map +1 -0
- package/types/models/granite_speech/modeling_granite_speech.d.ts +4 -0
- package/types/models/granite_speech/modeling_granite_speech.d.ts.map +1 -0
- package/types/models/granite_speech/processing_granite_speech.d.ts +19 -0
- package/types/models/granite_speech/processing_granite_speech.d.ts.map +1 -0
- package/types/models/grounding_dino/image_processing_grounding_dino.d.ts +1 -1
- package/types/models/idefics3/modeling_idefics3.d.ts +2 -18
- package/types/models/idefics3/modeling_idefics3.d.ts.map +1 -1
- package/types/models/image_processors.d.ts +1 -0
- package/types/models/lfm2_vl/image_processing_lfm2_vl.d.ts +41 -0
- package/types/models/lfm2_vl/image_processing_lfm2_vl.d.ts.map +1 -0
- package/types/models/lfm2_vl/modeling_lfm2_vl.d.ts +4 -0
- package/types/models/lfm2_vl/modeling_lfm2_vl.d.ts.map +1 -0
- package/types/models/lfm2_vl/processing_lfm2_vl.d.ts +18 -0
- package/types/models/lfm2_vl/processing_lfm2_vl.d.ts.map +1 -0
- package/types/models/mistral3/modeling_mistral3.d.ts +2 -2
- package/types/models/mistral3/modeling_mistral3.d.ts.map +1 -1
- package/types/models/modeling_utils.d.ts +44 -24
- package/types/models/modeling_utils.d.ts.map +1 -1
- package/types/models/models.d.ts +9 -0
- package/types/models/olmo_hybrid/modeling_olmo_hybrid.d.ts +8 -0
- package/types/models/olmo_hybrid/modeling_olmo_hybrid.d.ts.map +1 -0
- package/types/models/paligemma/modeling_paligemma.d.ts +2 -8
- package/types/models/paligemma/modeling_paligemma.d.ts.map +1 -1
- package/types/models/processors.d.ts +3 -0
- package/types/models/qwen2_5_vl/modeling_qwen2_5_vl.d.ts +3 -0
- package/types/models/qwen2_5_vl/modeling_qwen2_5_vl.d.ts.map +1 -1
- package/types/models/qwen2_moe/modeling_qwen2_moe.d.ts +8 -0
- package/types/models/qwen2_moe/modeling_qwen2_moe.d.ts.map +1 -0
- package/types/models/qwen2_vl/image_processing_qwen2_vl.d.ts.map +1 -1
- package/types/models/qwen2_vl/modeling_qwen2_vl.d.ts +2 -0
- package/types/models/qwen2_vl/modeling_qwen2_vl.d.ts.map +1 -1
- package/types/models/qwen3_5/modeling_qwen3_5.d.ts +2 -0
- package/types/models/qwen3_5/modeling_qwen3_5.d.ts.map +1 -1
- package/types/models/qwen3_5_moe/modeling_qwen3_5_moe.d.ts +3 -0
- package/types/models/qwen3_5_moe/modeling_qwen3_5_moe.d.ts.map +1 -1
- package/types/models/qwen3_moe/modeling_qwen3_moe.d.ts +8 -0
- package/types/models/qwen3_moe/modeling_qwen3_moe.d.ts.map +1 -0
- package/types/models/qwen3_next/modeling_qwen3_next.d.ts +8 -0
- package/types/models/qwen3_next/modeling_qwen3_next.d.ts.map +1 -0
- package/types/models/qwen3_vl/modeling_qwen3_vl.d.ts +3 -0
- package/types/models/qwen3_vl/modeling_qwen3_vl.d.ts.map +1 -1
- package/types/models/qwen3_vl_moe/modeling_qwen3_vl_moe.d.ts +7 -0
- package/types/models/qwen3_vl_moe/modeling_qwen3_vl_moe.d.ts.map +1 -0
- package/types/models/registry.d.ts +2 -1
- package/types/models/registry.d.ts.map +1 -1
- package/types/models/sam/image_processing_sam.d.ts +1 -1
- package/types/models/session.d.ts +3 -2
- package/types/models/session.d.ts.map +1 -1
- package/types/models/smolvlm/modeling_smolvlm.d.ts +8 -0
- package/types/models/smolvlm/modeling_smolvlm.d.ts.map +1 -0
- package/types/models/ultravox/modeling_ultravox.d.ts +0 -2
- package/types/models/ultravox/modeling_ultravox.d.ts.map +1 -1
- package/types/models/voxtral/modeling_voxtral.d.ts +4 -0
- package/types/models/voxtral/modeling_voxtral.d.ts.map +1 -0
- package/types/models/voxtral_realtime/feature_extraction_voxtral_realtime.d.ts +28 -0
- package/types/models/voxtral_realtime/feature_extraction_voxtral_realtime.d.ts.map +1 -0
- package/types/models/voxtral_realtime/modeling_voxtral_realtime.d.ts +17 -0
- package/types/models/voxtral_realtime/modeling_voxtral_realtime.d.ts.map +1 -0
- package/types/models/voxtral_realtime/processing_voxtral_realtime.d.ts +44 -0
- package/types/models/voxtral_realtime/processing_voxtral_realtime.d.ts.map +1 -0
- package/types/models/whisper/feature_extraction_whisper.d.ts.map +1 -1
- package/types/pipelines/index.d.ts +0 -34
- package/types/pipelines/index.d.ts.map +1 -1
- package/types/pipelines.d.ts.map +1 -1
- package/types/transformers.d.ts +1 -0
- package/types/transformers.d.ts.map +1 -1
- package/types/utils/audio.d.ts +5 -2
- package/types/utils/audio.d.ts.map +1 -1
- package/types/utils/cache/CrossOriginStorageCache.d.ts +120 -0
- package/types/utils/cache/CrossOriginStorageCache.d.ts.map +1 -0
- package/types/utils/cache/FileCache.d.ts +39 -0
- package/types/utils/cache/FileCache.d.ts.map +1 -0
- package/types/utils/cache.d.ts +4 -4
- package/types/utils/cache.d.ts.map +1 -1
- package/types/utils/dtypes.d.ts +1 -1
- package/types/utils/hub/{files.d.ts → FileResponse.d.ts} +1 -38
- package/types/utils/hub/FileResponse.d.ts.map +1 -0
- package/types/utils/hub/utils.d.ts +17 -2
- package/types/utils/hub/utils.d.ts.map +1 -1
- package/types/utils/hub.d.ts +7 -7
- package/types/utils/hub.d.ts.map +1 -1
- package/types/utils/image.d.ts +1 -1
- package/types/utils/image.d.ts.map +1 -1
- package/types/utils/lru_cache.d.ts +38 -0
- package/types/utils/lru_cache.d.ts.map +1 -0
- package/types/utils/memoize_promise.d.ts +14 -0
- package/types/utils/memoize_promise.d.ts.map +1 -0
- package/types/utils/model_registry/ModelRegistry.d.ts +66 -6
- package/types/utils/model_registry/ModelRegistry.d.ts.map +1 -1
- package/types/utils/model_registry/get_file_metadata.d.ts.map +1 -1
- package/types/utils/model_registry/get_model_files.d.ts +1 -0
- package/types/utils/model_registry/get_model_files.d.ts.map +1 -1
- package/types/utils/model_registry/get_pipeline_files.d.ts +2 -1
- package/types/utils/model_registry/get_pipeline_files.d.ts.map +1 -1
- package/types/utils/model_registry/is_cached.d.ts +47 -4
- package/types/utils/model_registry/is_cached.d.ts.map +1 -1
- package/types/utils/tensor.d.ts.map +1 -1
- package/src/utils/data-structures.js +0 -572
- package/types/utils/data-structures.d.ts +0 -294
- package/types/utils/data-structures.d.ts.map +0 -1
- package/types/utils/hub/files.d.ts.map +0 -1
|
@@ -14,22 +14,32 @@ var __export = (target, all) => {
|
|
|
14
14
|
import fs from "fs";
|
|
15
15
|
import path from "path";
|
|
16
16
|
import url from "url";
|
|
17
|
-
var VERSION = "4.0.0-next.
|
|
17
|
+
var VERSION = "4.0.0-next.7";
|
|
18
|
+
var HAS_SELF = typeof self !== "undefined";
|
|
18
19
|
var IS_FS_AVAILABLE = !isEmpty(fs);
|
|
19
20
|
var IS_PATH_AVAILABLE = !isEmpty(path);
|
|
20
|
-
var IS_WEB_CACHE_AVAILABLE =
|
|
21
|
+
var IS_WEB_CACHE_AVAILABLE = HAS_SELF && "caches" in self;
|
|
21
22
|
var IS_DENO_RUNTIME = typeof globalThis.Deno !== "undefined";
|
|
22
23
|
var IS_BUN_RUNTIME = typeof globalThis.Bun !== "undefined";
|
|
23
24
|
var IS_DENO_WEB_RUNTIME = IS_DENO_RUNTIME && IS_WEB_CACHE_AVAILABLE && !IS_FS_AVAILABLE;
|
|
24
25
|
var IS_PROCESS_AVAILABLE = typeof process !== "undefined";
|
|
25
26
|
var IS_NODE_ENV = IS_PROCESS_AVAILABLE && process?.release?.name === "node" && !IS_DENO_WEB_RUNTIME;
|
|
26
27
|
var IS_BROWSER_ENV = typeof window !== "undefined" && typeof window.document !== "undefined";
|
|
27
|
-
var IS_WEBWORKER_ENV =
|
|
28
|
+
var IS_WEBWORKER_ENV = HAS_SELF && ["DedicatedWorkerGlobalScope", "ServiceWorkerGlobalScope", "SharedWorkerGlobalScope"].includes(
|
|
28
29
|
self.constructor?.name
|
|
29
30
|
);
|
|
31
|
+
var IS_WEB_ENV = IS_BROWSER_ENV || IS_WEBWORKER_ENV || IS_DENO_WEB_RUNTIME;
|
|
30
32
|
var IS_WEBGPU_AVAILABLE = IS_NODE_ENV || typeof navigator !== "undefined" && "gpu" in navigator;
|
|
31
33
|
var IS_WEBNN_AVAILABLE = typeof navigator !== "undefined" && "ml" in navigator;
|
|
32
34
|
var IS_CRYPTO_AVAILABLE = typeof crypto !== "undefined" && typeof crypto.getRandomValues === "function";
|
|
35
|
+
var IS_CHROME_AVAILABLE = (
|
|
36
|
+
// @ts-ignore - chrome may not exist in all environments
|
|
37
|
+
typeof chrome !== "undefined" && typeof chrome.runtime !== "undefined" && typeof chrome.runtime.id === "string"
|
|
38
|
+
);
|
|
39
|
+
var IS_SERVICE_WORKER_ENV = (
|
|
40
|
+
// @ts-ignore - ServiceWorkerGlobalScope may not exist in all environments
|
|
41
|
+
typeof ServiceWorkerGlobalScope !== "undefined" && HAS_SELF && self instanceof ServiceWorkerGlobalScope
|
|
42
|
+
);
|
|
33
43
|
var isSafari = () => {
|
|
34
44
|
if (typeof navigator === "undefined") {
|
|
35
45
|
return false;
|
|
@@ -46,6 +56,12 @@ var apis = Object.freeze({
|
|
|
46
56
|
IS_BROWSER_ENV,
|
|
47
57
|
/** Whether we are running in a web worker environment */
|
|
48
58
|
IS_WEBWORKER_ENV,
|
|
59
|
+
/** Whether we are running in a web-like environment (browser, web worker, or Deno web runtime) */
|
|
60
|
+
IS_WEB_ENV,
|
|
61
|
+
/** Whether we are running in a service worker environment */
|
|
62
|
+
IS_SERVICE_WORKER_ENV,
|
|
63
|
+
/** Whether we are running in Deno's web runtime (CDN imports, Cache API available, no filesystem) */
|
|
64
|
+
IS_DENO_WEB_RUNTIME,
|
|
49
65
|
/** Whether the Cache API is available */
|
|
50
66
|
IS_WEB_CACHE_AVAILABLE,
|
|
51
67
|
/** Whether the WebGPU API is available */
|
|
@@ -63,7 +79,9 @@ var apis = Object.freeze({
|
|
|
63
79
|
/** Whether the path API is available */
|
|
64
80
|
IS_PATH_AVAILABLE,
|
|
65
81
|
/** Whether the crypto API is available */
|
|
66
|
-
IS_CRYPTO_AVAILABLE
|
|
82
|
+
IS_CRYPTO_AVAILABLE,
|
|
83
|
+
/** Whether the Chrome runtime API is available */
|
|
84
|
+
IS_CHROME_AVAILABLE
|
|
67
85
|
});
|
|
68
86
|
var RUNNING_LOCALLY = IS_FS_AVAILABLE && IS_PATH_AVAILABLE;
|
|
69
87
|
var dirname__ = "./";
|
|
@@ -124,6 +142,7 @@ var env = {
|
|
|
124
142
|
customCache: null,
|
|
125
143
|
useWasmCache: IS_WEB_CACHE_AVAILABLE || IS_FS_AVAILABLE,
|
|
126
144
|
cacheKey: "transformers-cache",
|
|
145
|
+
experimental_useCrossOriginStorage: false,
|
|
127
146
|
/////////////////// Custom fetch /////////////////////
|
|
128
147
|
fetch: DEFAULT_FETCH
|
|
129
148
|
//////////////////////////////////////////////////////
|
|
@@ -2674,7 +2693,7 @@ var Tokenizer = class {
|
|
|
2674
2693
|
};
|
|
2675
2694
|
var Tokenizer_default = Tokenizer;
|
|
2676
2695
|
|
|
2677
|
-
// ../../node_modules/.pnpm/@huggingface+jinja@0.5.
|
|
2696
|
+
// ../../node_modules/.pnpm/@huggingface+jinja@0.5.6/node_modules/@huggingface/jinja/dist/index.js
|
|
2678
2697
|
var TOKEN_TYPES = Object.freeze({
|
|
2679
2698
|
Text: "Text",
|
|
2680
2699
|
// The text between Jinja statements or expressions
|
|
@@ -4193,7 +4212,11 @@ var Environment = class {
|
|
|
4193
4212
|
["number", (operand) => operand instanceof IntegerValue || operand instanceof FloatValue],
|
|
4194
4213
|
["integer", (operand) => operand instanceof IntegerValue],
|
|
4195
4214
|
["iterable", (operand) => operand.type === "ArrayValue" || operand.type === "StringValue"],
|
|
4196
|
-
["mapping", (operand) => operand
|
|
4215
|
+
["mapping", (operand) => operand instanceof ObjectValue],
|
|
4216
|
+
[
|
|
4217
|
+
"sequence",
|
|
4218
|
+
(operand) => operand instanceof ArrayValue || operand instanceof ObjectValue || operand instanceof StringValue
|
|
4219
|
+
],
|
|
4197
4220
|
[
|
|
4198
4221
|
"lower",
|
|
4199
4222
|
(operand) => {
|
|
@@ -4466,6 +4489,9 @@ var Interpreter = class {
|
|
|
4466
4489
|
applyFilter(operand, filterNode, environment) {
|
|
4467
4490
|
if (filterNode.type === "Identifier") {
|
|
4468
4491
|
const filter = filterNode;
|
|
4492
|
+
if (filter.value === "safe") {
|
|
4493
|
+
return operand;
|
|
4494
|
+
}
|
|
4469
4495
|
if (filter.value === "tojson") {
|
|
4470
4496
|
return new StringValue(toJSON(operand, {}));
|
|
4471
4497
|
}
|
|
@@ -4555,6 +4581,8 @@ var Interpreter = class {
|
|
|
4555
4581
|
return new IntegerValue(Math.floor(operand.value));
|
|
4556
4582
|
case "float":
|
|
4557
4583
|
return new FloatValue(operand.value);
|
|
4584
|
+
case "string":
|
|
4585
|
+
return new StringValue(operand.toString());
|
|
4558
4586
|
default:
|
|
4559
4587
|
throw new Error(`Unknown NumericValue filter: ${filter.value}`);
|
|
4560
4588
|
}
|
|
@@ -5508,9 +5536,8 @@ var Callable2 = (
|
|
|
5508
5536
|
}
|
|
5509
5537
|
);
|
|
5510
5538
|
|
|
5511
|
-
// src/utils/hub/
|
|
5539
|
+
// src/utils/hub/FileResponse.js
|
|
5512
5540
|
import fs2 from "fs";
|
|
5513
|
-
import path2 from "path";
|
|
5514
5541
|
var CONTENT_TYPE_MAP = {
|
|
5515
5542
|
txt: "text/plain",
|
|
5516
5543
|
html: "text/html",
|
|
@@ -5621,6 +5648,174 @@ var FileResponse = class _FileResponse {
|
|
|
5621
5648
|
return JSON.parse(await this.text());
|
|
5622
5649
|
}
|
|
5623
5650
|
};
|
|
5651
|
+
|
|
5652
|
+
// src/utils/cache/FileCache.js
|
|
5653
|
+
import fs3 from "fs";
|
|
5654
|
+
import path2 from "path";
|
|
5655
|
+
|
|
5656
|
+
// src/utils/random.js
|
|
5657
|
+
var Random = class {
|
|
5658
|
+
constructor(seed) {
|
|
5659
|
+
this._mt = new Uint32Array(624);
|
|
5660
|
+
this._idx = 625;
|
|
5661
|
+
this._gauss_next = null;
|
|
5662
|
+
this._random_fn = this.random.bind(this);
|
|
5663
|
+
this.seed(seed);
|
|
5664
|
+
}
|
|
5665
|
+
/**
|
|
5666
|
+
* Seeds this instance's PRNG.
|
|
5667
|
+
*
|
|
5668
|
+
* When called with a number, initializes the state deterministically from that value.
|
|
5669
|
+
* When called with no arguments (or `undefined`/`null`), seeds from OS entropy
|
|
5670
|
+
* via `crypto.getRandomValues`, matching Python's `random.seed()` behaviour.
|
|
5671
|
+
*
|
|
5672
|
+
* @param {number} [n] The seed value. Omit to seed from OS entropy.
|
|
5673
|
+
*/
|
|
5674
|
+
seed(n) {
|
|
5675
|
+
if (n === void 0 || n === null) {
|
|
5676
|
+
if (apis.IS_CRYPTO_AVAILABLE) {
|
|
5677
|
+
const buf = new Uint32Array(1);
|
|
5678
|
+
crypto.getRandomValues(buf);
|
|
5679
|
+
n = buf[0];
|
|
5680
|
+
} else {
|
|
5681
|
+
n = Date.now() >>> 0;
|
|
5682
|
+
}
|
|
5683
|
+
}
|
|
5684
|
+
const mt2 = this._mt;
|
|
5685
|
+
const u = (a, b) => Math.imul(a, b) >>> 0, key = [];
|
|
5686
|
+
for (let v = n || 0; v > 0; v = Math.floor(v / 4294967296)) key.push(v & 4294967295);
|
|
5687
|
+
if (!key.length) key.push(0);
|
|
5688
|
+
mt2[0] = 19650218;
|
|
5689
|
+
for (let k2 = 1; k2 < 624; ++k2) mt2[k2] = u(1812433253, mt2[k2 - 1] ^ mt2[k2 - 1] >>> 30) + k2 >>> 0;
|
|
5690
|
+
let i = 1, j = 0;
|
|
5691
|
+
for (let k2 = Math.max(624, key.length); k2 > 0; --k2, ++i, ++j) {
|
|
5692
|
+
if (i >= 624) {
|
|
5693
|
+
mt2[0] = mt2[623];
|
|
5694
|
+
i = 1;
|
|
5695
|
+
}
|
|
5696
|
+
if (j >= key.length) j = 0;
|
|
5697
|
+
mt2[i] = (mt2[i] ^ u(mt2[i - 1] ^ mt2[i - 1] >>> 30, 1664525)) + key[j] + j >>> 0;
|
|
5698
|
+
}
|
|
5699
|
+
for (let k2 = 623; k2 > 0; --k2, ++i) {
|
|
5700
|
+
if (i >= 624) {
|
|
5701
|
+
mt2[0] = mt2[623];
|
|
5702
|
+
i = 1;
|
|
5703
|
+
}
|
|
5704
|
+
mt2[i] = (mt2[i] ^ u(mt2[i - 1] ^ mt2[i - 1] >>> 30, 1566083941)) - i >>> 0;
|
|
5705
|
+
}
|
|
5706
|
+
mt2[0] = 2147483648;
|
|
5707
|
+
this._idx = 624;
|
|
5708
|
+
this._gauss_next = null;
|
|
5709
|
+
}
|
|
5710
|
+
/**
|
|
5711
|
+
* Generates a random unsigned 32-bit integer.
|
|
5712
|
+
*
|
|
5713
|
+
* Performs the "twist" step when the state buffer is exhausted,
|
|
5714
|
+
* then applies the standard MT19937 tempering transform.
|
|
5715
|
+
*
|
|
5716
|
+
* @returns {number} A random integer in the range [0, 2^32 - 1].
|
|
5717
|
+
*/
|
|
5718
|
+
_int32() {
|
|
5719
|
+
const mt2 = this._mt;
|
|
5720
|
+
if (this._idx >= 624) {
|
|
5721
|
+
for (let k2 = 0; k2 < 624; ++k2) {
|
|
5722
|
+
const y2 = mt2[k2] & 2147483648 | mt2[(k2 + 1) % 624] & 2147483647;
|
|
5723
|
+
mt2[k2] = (mt2[(k2 + 397) % 624] ^ y2 >>> 1 ^ (y2 & 1 ? 2567483615 : 0)) >>> 0;
|
|
5724
|
+
}
|
|
5725
|
+
this._idx = 0;
|
|
5726
|
+
}
|
|
5727
|
+
let y = mt2[this._idx++];
|
|
5728
|
+
y ^= y >>> 11;
|
|
5729
|
+
y ^= y << 7 & 2636928640;
|
|
5730
|
+
y ^= y << 15 & 4022730752;
|
|
5731
|
+
y ^= y >>> 18;
|
|
5732
|
+
return y >>> 0;
|
|
5733
|
+
}
|
|
5734
|
+
/**
|
|
5735
|
+
* Generates a random floating-point number in the half-open interval [0, 1).
|
|
5736
|
+
*
|
|
5737
|
+
* Combines two 32-bit integers (using 53 bits of precision) to produce
|
|
5738
|
+
* a uniformly distributed double, matching Python's `random.random()`.
|
|
5739
|
+
*
|
|
5740
|
+
* @returns {number} A random float in [0, 1).
|
|
5741
|
+
*/
|
|
5742
|
+
random() {
|
|
5743
|
+
return ((this._int32() >>> 5) * 67108864 + (this._int32() >>> 6)) / 9007199254740992;
|
|
5744
|
+
}
|
|
5745
|
+
/**
|
|
5746
|
+
* Generates a random number from a Gaussian (normal) distribution.
|
|
5747
|
+
*
|
|
5748
|
+
* Uses the Box-Muller transform with a cached spare value,
|
|
5749
|
+
* matching Python's `random.gauss()` output for the same seed.
|
|
5750
|
+
*
|
|
5751
|
+
* @param {number} [mu=0] The mean of the distribution.
|
|
5752
|
+
* @param {number} [sigma=1] The standard deviation of the distribution.
|
|
5753
|
+
* @returns {number} A normally distributed random value.
|
|
5754
|
+
*/
|
|
5755
|
+
gauss(mu = 0, sigma = 1) {
|
|
5756
|
+
let z = this._gauss_next;
|
|
5757
|
+
this._gauss_next = null;
|
|
5758
|
+
if (z === null) {
|
|
5759
|
+
const x2pi = this.random() * 2 * Math.PI, g2rad = Math.sqrt(-2 * Math.log(1 - this.random()));
|
|
5760
|
+
z = Math.cos(x2pi) * g2rad;
|
|
5761
|
+
this._gauss_next = Math.sin(x2pi) * g2rad;
|
|
5762
|
+
}
|
|
5763
|
+
return mu + z * sigma;
|
|
5764
|
+
}
|
|
5765
|
+
/**
|
|
5766
|
+
* Shuffles an array in-place using the Fisher-Yates algorithm.
|
|
5767
|
+
*
|
|
5768
|
+
* Uses rejection sampling via `getrandbits`-style bit masking to ensure
|
|
5769
|
+
* a uniform distribution, matching Python's `random.shuffle()`.
|
|
5770
|
+
*
|
|
5771
|
+
* @param {any[]} arr The array to shuffle in-place.
|
|
5772
|
+
*/
|
|
5773
|
+
shuffle(arr) {
|
|
5774
|
+
for (let i = arr.length - 1; i > 0; --i) {
|
|
5775
|
+
const k2 = 32 - Math.clz32(i + 1);
|
|
5776
|
+
let r = this._int32() >>> 32 - k2;
|
|
5777
|
+
while (r > i) r = this._int32() >>> 32 - k2;
|
|
5778
|
+
const t = arr[i];
|
|
5779
|
+
arr[i] = arr[r];
|
|
5780
|
+
arr[r] = t;
|
|
5781
|
+
}
|
|
5782
|
+
}
|
|
5783
|
+
/**
|
|
5784
|
+
* Selects a single element from a weighted population.
|
|
5785
|
+
*
|
|
5786
|
+
* Matches Python's `random.choices(population, weights=weights, k=1)[0]`
|
|
5787
|
+
*
|
|
5788
|
+
* @param {any[]} population The array of items to choose from.
|
|
5789
|
+
* @param {number[]} weights An array of non-negative weights, one per population element.
|
|
5790
|
+
* @returns {*} A single randomly selected element from the population.
|
|
5791
|
+
*/
|
|
5792
|
+
choices(population, weights) {
|
|
5793
|
+
return population[_weightedIndexWith(this._random_fn, weights)];
|
|
5794
|
+
}
|
|
5795
|
+
};
|
|
5796
|
+
function _weightedIndexWith(randomFn, weights) {
|
|
5797
|
+
let sum = 0;
|
|
5798
|
+
for (let i = 0; i < weights.length; ++i) sum += weights[i];
|
|
5799
|
+
let x = randomFn() * sum;
|
|
5800
|
+
for (let i = 0; i < weights.length; ++i) {
|
|
5801
|
+
x -= weights[i];
|
|
5802
|
+
if (x < 0) return i;
|
|
5803
|
+
}
|
|
5804
|
+
return weights.length - 1;
|
|
5805
|
+
}
|
|
5806
|
+
var _default = new Random();
|
|
5807
|
+
var random = Object.freeze({
|
|
5808
|
+
Random,
|
|
5809
|
+
seed: _default.seed.bind(_default),
|
|
5810
|
+
random: _default.random.bind(_default),
|
|
5811
|
+
gauss: _default.gauss.bind(_default),
|
|
5812
|
+
shuffle: _default.shuffle.bind(_default),
|
|
5813
|
+
choices: _default.choices.bind(_default)
|
|
5814
|
+
});
|
|
5815
|
+
var _weightedIndex = (weights) => _weightedIndexWith(random.random, weights);
|
|
5816
|
+
|
|
5817
|
+
// src/utils/cache/FileCache.js
|
|
5818
|
+
var rng = new Random();
|
|
5624
5819
|
var FileCache = class {
|
|
5625
5820
|
/**
|
|
5626
5821
|
* Instantiate a `FileCache` object.
|
|
@@ -5652,13 +5847,16 @@ var FileCache = class {
|
|
|
5652
5847
|
* @returns {Promise<void>}
|
|
5653
5848
|
*/
|
|
5654
5849
|
async put(request, response, progress_callback = void 0) {
|
|
5655
|
-
|
|
5850
|
+
const filePath = path2.join(this.path, request);
|
|
5851
|
+
const id = apis.IS_PROCESS_AVAILABLE ? process.pid : Date.now();
|
|
5852
|
+
const randomSuffix = rng._int32().toString(36);
|
|
5853
|
+
const tmpPath = filePath + `.tmp.${id}.${randomSuffix}`;
|
|
5656
5854
|
try {
|
|
5657
5855
|
const contentLength = response.headers.get("Content-Length");
|
|
5658
5856
|
const total = parseInt(contentLength ?? "0");
|
|
5659
5857
|
let loaded = 0;
|
|
5660
|
-
await
|
|
5661
|
-
const fileStream =
|
|
5858
|
+
await fs3.promises.mkdir(path2.dirname(filePath), { recursive: true });
|
|
5859
|
+
const fileStream = fs3.createWriteStream(tmpPath);
|
|
5662
5860
|
const reader = response.body.getReader();
|
|
5663
5861
|
while (true) {
|
|
5664
5862
|
const { done, value } = await reader.read();
|
|
@@ -5678,10 +5876,13 @@ var FileCache = class {
|
|
|
5678
5876
|
const progress = total ? loaded / total * 100 : 0;
|
|
5679
5877
|
progress_callback?.({ progress, loaded, total });
|
|
5680
5878
|
}
|
|
5681
|
-
|
|
5879
|
+
await new Promise((resolve, reject) => {
|
|
5880
|
+
fileStream.close((err) => err ? reject(err) : resolve());
|
|
5881
|
+
});
|
|
5882
|
+
await fs3.promises.rename(tmpPath, filePath);
|
|
5682
5883
|
} catch (error) {
|
|
5683
5884
|
try {
|
|
5684
|
-
await
|
|
5885
|
+
await fs3.promises.unlink(tmpPath);
|
|
5685
5886
|
} catch {
|
|
5686
5887
|
}
|
|
5687
5888
|
throw error;
|
|
@@ -5695,7 +5896,7 @@ var FileCache = class {
|
|
|
5695
5896
|
async delete(request) {
|
|
5696
5897
|
let filePath = path2.join(this.path, request);
|
|
5697
5898
|
try {
|
|
5698
|
-
await
|
|
5899
|
+
await fs3.promises.unlink(filePath);
|
|
5699
5900
|
return true;
|
|
5700
5901
|
} catch (error) {
|
|
5701
5902
|
return false;
|
|
@@ -5704,6 +5905,7 @@ var FileCache = class {
|
|
|
5704
5905
|
// TODO add the rest?
|
|
5705
5906
|
// addAll(requests: RequestInfo[]): Promise<void>;
|
|
5706
5907
|
// keys(request?: RequestInfo | URL, options?: CacheQueryOptions): Promise<ReadonlyArray<Request>>;
|
|
5908
|
+
// match(request: RequestInfo | URL, options?: CacheQueryOptions): Promise<Response | undefined>;
|
|
5707
5909
|
// matchAll(request?: RequestInfo | URL, options?: CacheQueryOptions): Promise<ReadonlyArray<Response>>;
|
|
5708
5910
|
};
|
|
5709
5911
|
|
|
@@ -5793,50 +5995,351 @@ async function readResponse(response, progress_callback, expectedSize) {
|
|
|
5793
5995
|
await read();
|
|
5794
5996
|
return buffer;
|
|
5795
5997
|
}
|
|
5796
|
-
|
|
5797
|
-
|
|
5798
|
-
|
|
5799
|
-
|
|
5800
|
-
|
|
5801
|
-
|
|
5802
|
-
|
|
5803
|
-
|
|
5804
|
-
|
|
5805
|
-
|
|
5806
|
-
|
|
5807
|
-
);
|
|
5808
|
-
}
|
|
5809
|
-
cache = env.customCache;
|
|
5998
|
+
function isBlobURL(url2) {
|
|
5999
|
+
return isValidUrl(url2, ["blob:"]);
|
|
6000
|
+
}
|
|
6001
|
+
function toAbsoluteURL(url2) {
|
|
6002
|
+
let baseURL;
|
|
6003
|
+
if (typeof location !== "undefined" && location.href) {
|
|
6004
|
+
baseURL = location.href;
|
|
6005
|
+
} else if (typeof import.meta !== "undefined" && import.meta.url) {
|
|
6006
|
+
baseURL = import.meta.url;
|
|
6007
|
+
} else {
|
|
6008
|
+
return url2;
|
|
5810
6009
|
}
|
|
5811
|
-
|
|
5812
|
-
|
|
5813
|
-
|
|
6010
|
+
return new URL(url2, baseURL).href;
|
|
6011
|
+
}
|
|
6012
|
+
|
|
6013
|
+
// src/utils/cache/CrossOriginStorageCache.js
|
|
6014
|
+
var HASH_ALGORITHM = "SHA-256";
|
|
6015
|
+
var HASH_CACHE_NAME = "experimental_transformers-hash-cache";
|
|
6016
|
+
var makeHashDescriptor = (value) => ({ algorithm: HASH_ALGORITHM, value });
|
|
6017
|
+
var CrossOriginStorage = class {
|
|
6018
|
+
/** @type {Promise<Cache> | null} */
|
|
6019
|
+
#hashCache = null;
|
|
6020
|
+
/**
|
|
6021
|
+
* Returns (and lazily opens) the hash cache, reusing the same promise across concurrent callers.
|
|
6022
|
+
* @returns {Promise<Cache>}
|
|
6023
|
+
*/
|
|
6024
|
+
_getHashCache = () => {
|
|
6025
|
+
this.#hashCache ??= caches.open(HASH_CACHE_NAME);
|
|
6026
|
+
return this.#hashCache;
|
|
6027
|
+
};
|
|
6028
|
+
/**
|
|
6029
|
+
* Returns whether the `navigator.crossOriginStorage` API is available in the current environment.
|
|
6030
|
+
* @returns {boolean}
|
|
6031
|
+
*/
|
|
6032
|
+
static isAvailable = () => typeof navigator !== "undefined" && "crossOriginStorage" in navigator;
|
|
6033
|
+
/**
|
|
6034
|
+
* Looks up a cached response for the given URL by resolving its SHA-256 hash and requesting
|
|
6035
|
+
* the corresponding file handle from cross-origin storage.
|
|
6036
|
+
*
|
|
6037
|
+
* Implements `CacheInterface.match`.
|
|
6038
|
+
*
|
|
6039
|
+
* @param {string} request The URL of the resource to look up.
|
|
6040
|
+
* @returns {Promise<Response|undefined>} The cached `Response`, or `undefined` if not found.
|
|
6041
|
+
*/
|
|
6042
|
+
match = async (request) => {
|
|
6043
|
+
const hashValue = await this._getFileHash(request);
|
|
6044
|
+
if (!hashValue) {
|
|
6045
|
+
return void 0;
|
|
5814
6046
|
}
|
|
5815
6047
|
try {
|
|
5816
|
-
|
|
5817
|
-
|
|
5818
|
-
|
|
6048
|
+
const [handle] = await navigator.crossOriginStorage.requestFileHandles([makeHashDescriptor(hashValue)]);
|
|
6049
|
+
const blob = await handle.getFile();
|
|
6050
|
+
return new Response(blob, {
|
|
6051
|
+
headers: {
|
|
6052
|
+
"Content-Length": String(blob.size)
|
|
6053
|
+
}
|
|
6054
|
+
});
|
|
6055
|
+
} catch {
|
|
6056
|
+
return void 0;
|
|
5819
6057
|
}
|
|
5820
|
-
}
|
|
5821
|
-
|
|
5822
|
-
|
|
5823
|
-
|
|
6058
|
+
};
|
|
6059
|
+
/**
|
|
6060
|
+
* Stores a response in cross-origin storage, keyed by its SHA-256 hash.
|
|
6061
|
+
*
|
|
6062
|
+
* For LFS-backed URLs the hash is resolved cheaply via `_getFileHash` (which checks
|
|
6063
|
+
* `HASH_CACHE_NAME` first, then falls back to fetching the Git LFS pointer file)
|
|
6064
|
+
* without reading the response body a second time.
|
|
6065
|
+
*
|
|
6066
|
+
* For non-LFS resources the hash is unknown upfront. In that case the body is consumed
|
|
6067
|
+
* in the background: the stream is read to compute the content hash, the file is written
|
|
6068
|
+
* into cross-origin storage, and the computed hash is persisted to `HASH_CACHE_NAME`
|
|
6069
|
+
* so that future `match` calls can resolve the file without a network round-trip.
|
|
6070
|
+
*
|
|
6071
|
+
* Implements `CacheInterface.put`.
|
|
6072
|
+
*
|
|
6073
|
+
* @param {string} request The URL of the resource (used as the hash-cache key).
|
|
6074
|
+
* @param {Response} response The response whose body will be written to the cache.
|
|
6075
|
+
* @returns {Promise<void>}
|
|
6076
|
+
*/
|
|
6077
|
+
put = async (request, response) => {
|
|
6078
|
+
const hashValue = await this._getFileHash(request);
|
|
6079
|
+
if (hashValue) {
|
|
6080
|
+
const blob = await response.blob();
|
|
6081
|
+
await this._storeBlobInCOS(blob, hashValue);
|
|
6082
|
+
} else {
|
|
6083
|
+
this._processAndStore(request, response.body);
|
|
5824
6084
|
}
|
|
5825
|
-
|
|
5826
|
-
|
|
5827
|
-
|
|
5828
|
-
|
|
5829
|
-
|
|
5830
|
-
|
|
6085
|
+
};
|
|
6086
|
+
/**
|
|
6087
|
+
* Writes a blob into cross-origin storage using the given pre-computed hex hash string.
|
|
6088
|
+
*
|
|
6089
|
+
* @param {Blob} blob
|
|
6090
|
+
* @param {string} hashHex Hex-encoded SHA-256 hash of `blob`.
|
|
6091
|
+
* @returns {Promise<void>}
|
|
6092
|
+
*/
|
|
6093
|
+
_storeBlobInCOS = async (blob, hashHex) => {
|
|
6094
|
+
const [handle] = await navigator.crossOriginStorage.requestFileHandles([makeHashDescriptor(hashHex)], {
|
|
6095
|
+
create: true
|
|
6096
|
+
});
|
|
6097
|
+
const writableStream = await handle.createWritable();
|
|
6098
|
+
await writableStream.write(blob);
|
|
6099
|
+
await writableStream.close();
|
|
6100
|
+
};
|
|
6101
|
+
/**
|
|
6102
|
+
* Background task for non-LFS resources: consumes `stream`, computes the SHA-256 hash
|
|
6103
|
+
* of the resulting blob, stores it in cross-origin storage, and persists the computed
|
|
6104
|
+
* hash to `HASH_CACHE_NAME` keyed by `request` so future `match` calls can resolve the
|
|
6105
|
+
* file without a network round-trip.
|
|
6106
|
+
*
|
|
6107
|
+
* Called fire-and-forget from `put` — errors are swallowed so failures never surface to
|
|
6108
|
+
* the caller.
|
|
6109
|
+
*
|
|
6110
|
+
* @param {string} request The original resource URL.
|
|
6111
|
+
* @param {ReadableStream} stream The response body stream to consume.
|
|
6112
|
+
* @returns {Promise<void>}
|
|
6113
|
+
*/
|
|
6114
|
+
_processAndStore = async (request, stream) => {
|
|
5831
6115
|
try {
|
|
5832
|
-
|
|
5833
|
-
|
|
5834
|
-
|
|
5835
|
-
|
|
6116
|
+
const chunks = [];
|
|
6117
|
+
for await (const chunk2 of stream) {
|
|
6118
|
+
chunks.push(chunk2);
|
|
6119
|
+
}
|
|
6120
|
+
const blob = new Blob(chunks);
|
|
6121
|
+
const hashHex = await this._getBlobHash(blob);
|
|
6122
|
+
await this._storeBlobInCOS(blob, hashHex);
|
|
6123
|
+
try {
|
|
6124
|
+
const hashCache = await this._getHashCache();
|
|
6125
|
+
await hashCache.put(request, new Response(hashHex));
|
|
6126
|
+
} catch {
|
|
6127
|
+
}
|
|
6128
|
+
} catch {
|
|
5836
6129
|
}
|
|
5837
|
-
}
|
|
5838
|
-
|
|
5839
|
-
|
|
6130
|
+
};
|
|
6131
|
+
/**
|
|
6132
|
+
* Deletes the cache entry for the given request.
|
|
6133
|
+
*
|
|
6134
|
+
* Removes the hash entry from `HASH_CACHE_NAME`. Note: cross-origin storage itself does not
|
|
6135
|
+
* expose a delete API, so only the local hash mapping is removed. For non-LFS URLs this
|
|
6136
|
+
* permanently prevents `match` from resolving the file. For LFS-backed URLs, `match` will
|
|
6137
|
+
* re-fetch the LFS pointer file on the next call and repopulate the hash cache automatically.
|
|
6138
|
+
*
|
|
6139
|
+
* Implements `CacheInterface.delete`.
|
|
6140
|
+
*
|
|
6141
|
+
* @param {string} request
|
|
6142
|
+
* @returns {Promise<boolean>} Resolves to `true` if the hash entry was deleted, `false` otherwise.
|
|
6143
|
+
*/
|
|
6144
|
+
delete = async (request) => {
|
|
6145
|
+
try {
|
|
6146
|
+
const hashCache = await this._getHashCache();
|
|
6147
|
+
return await hashCache.delete(request);
|
|
6148
|
+
} catch {
|
|
6149
|
+
return false;
|
|
6150
|
+
}
|
|
6151
|
+
};
|
|
6152
|
+
/**
|
|
6153
|
+
* Resolves the SHA-256 hash for a given URL.
|
|
6154
|
+
*
|
|
6155
|
+
* Returns the cached hash immediately if one has been persisted to `HASH_CACHE_NAME`.
|
|
6156
|
+
* Otherwise falls back to `_getLfsFileHash` to retrieve the hash from the Hugging Face
|
|
6157
|
+
* LFS pointer file, persisting the result to `HASH_CACHE_NAME` for future lookups.
|
|
6158
|
+
*
|
|
6159
|
+
* Returns `null` if the hash cannot be determined (e.g. non-LFS URL with no cached entry).
|
|
6160
|
+
*
|
|
6161
|
+
* @param {string} url The resource URL to resolve a hash for.
|
|
6162
|
+
* @returns {Promise<string|null>} The hex-encoded SHA-256 hash, or `null` if unavailable.
|
|
6163
|
+
*/
|
|
6164
|
+
_getFileHash = async (url2) => {
|
|
6165
|
+
try {
|
|
6166
|
+
const hashCache = await this._getHashCache();
|
|
6167
|
+
const cached = await hashCache.match(url2);
|
|
6168
|
+
if (cached) {
|
|
6169
|
+
return cached.text();
|
|
6170
|
+
}
|
|
6171
|
+
const hash = await this._getLfsFileHash(url2);
|
|
6172
|
+
if (hash) {
|
|
6173
|
+
await hashCache.put(url2, new Response(hash));
|
|
6174
|
+
return hash;
|
|
6175
|
+
}
|
|
6176
|
+
return null;
|
|
6177
|
+
} catch {
|
|
6178
|
+
return null;
|
|
6179
|
+
}
|
|
6180
|
+
};
|
|
6181
|
+
/**
|
|
6182
|
+
* Attempts to retrieve the SHA-256 hash for a Hugging Face resource URL from its raw
|
|
6183
|
+
* Git LFS pointer file.
|
|
6184
|
+
*
|
|
6185
|
+
* Only applicable to URLs containing `/resolve/` (i.e. Hugging Face resolved file URLs).
|
|
6186
|
+
* The `/resolve/` segment is rewritten to `/raw/` to fetch the LFS pointer directly.
|
|
6187
|
+
* Returns `null` for non-LFS URLs or when the network request fails.
|
|
6188
|
+
*
|
|
6189
|
+
* @see https://huggingface.co/docs/hub/en/storage-backends#xet
|
|
6190
|
+
* @param {string} url The resolved Hugging Face URL of the resource.
|
|
6191
|
+
* @returns {Promise<string|null>} The hex-encoded SHA-256 hash, or `null` if unavailable.
|
|
6192
|
+
*/
|
|
6193
|
+
_getLfsFileHash = async (url2) => {
|
|
6194
|
+
if (!url2.includes("/resolve/")) {
|
|
6195
|
+
return null;
|
|
6196
|
+
}
|
|
6197
|
+
const rawUrl = url2.replace("/resolve/", "/raw/");
|
|
6198
|
+
try {
|
|
6199
|
+
const text = await fetch(rawUrl).then((r) => r.text());
|
|
6200
|
+
const match = text.match(/^oid sha256:([0-9a-f]+)$/m);
|
|
6201
|
+
return match ? match[1] : null;
|
|
6202
|
+
} catch {
|
|
6203
|
+
return null;
|
|
6204
|
+
}
|
|
6205
|
+
};
|
|
6206
|
+
/**
|
|
6207
|
+
* Computes the SHA-256 hash of a `Blob`'s contents.
|
|
6208
|
+
*
|
|
6209
|
+
* @param {Blob} blob The blob to hash.
|
|
6210
|
+
* @returns {Promise<string>} The lowercase hex-encoded SHA-256 hash.
|
|
6211
|
+
*/
|
|
6212
|
+
_getBlobHash = async (blob) => {
|
|
6213
|
+
const arrayBuffer = await blob.arrayBuffer();
|
|
6214
|
+
const hashBuffer = await crypto.subtle.digest(HASH_ALGORITHM, arrayBuffer);
|
|
6215
|
+
const hashArray = Array.from(new Uint8Array(hashBuffer));
|
|
6216
|
+
return hashArray.map((byte) => byte.toString(16).padStart(2, "0")).join("");
|
|
6217
|
+
};
|
|
6218
|
+
};
|
|
6219
|
+
|
|
6220
|
+
// src/utils/cache.js
|
|
6221
|
+
async function getCache(file_cache_dir = null) {
|
|
6222
|
+
let cache2 = null;
|
|
6223
|
+
if (env.useCustomCache) {
|
|
6224
|
+
if (!env.customCache) {
|
|
6225
|
+
throw Error("`env.useCustomCache=true`, but `env.customCache` is not defined.");
|
|
6226
|
+
}
|
|
6227
|
+
if (!env.customCache.match || !env.customCache.put) {
|
|
6228
|
+
throw new Error(
|
|
6229
|
+
"`env.customCache` must be an object which implements the `match` and `put` functions of the Web Cache API. For more information, see https://developer.mozilla.org/en-US/docs/Web/API/Cache"
|
|
6230
|
+
);
|
|
6231
|
+
}
|
|
6232
|
+
cache2 = env.customCache;
|
|
6233
|
+
}
|
|
6234
|
+
if (!cache2 && env.experimental_useCrossOriginStorage && CrossOriginStorage.isAvailable()) {
|
|
6235
|
+
cache2 = new CrossOriginStorage();
|
|
6236
|
+
}
|
|
6237
|
+
if (!cache2 && env.useBrowserCache) {
|
|
6238
|
+
if (typeof caches === "undefined") {
|
|
6239
|
+
throw Error("Browser cache is not available in this environment.");
|
|
6240
|
+
}
|
|
6241
|
+
try {
|
|
6242
|
+
cache2 = await caches.open(env.cacheKey);
|
|
6243
|
+
} catch (e) {
|
|
6244
|
+
logger.warn("An error occurred while opening the browser cache:", e);
|
|
6245
|
+
}
|
|
6246
|
+
}
|
|
6247
|
+
if (!cache2 && env.useFSCache) {
|
|
6248
|
+
if (!apis.IS_FS_AVAILABLE) {
|
|
6249
|
+
throw Error("File System Cache is not available in this environment.");
|
|
6250
|
+
}
|
|
6251
|
+
cache2 = new FileCache(file_cache_dir ?? env.cacheDir);
|
|
6252
|
+
}
|
|
6253
|
+
return cache2;
|
|
6254
|
+
}
|
|
6255
|
+
async function tryCache(cache2, ...names) {
|
|
6256
|
+
for (let name of names) {
|
|
6257
|
+
try {
|
|
6258
|
+
let result = await cache2.match(name);
|
|
6259
|
+
if (result) return result;
|
|
6260
|
+
} catch (e) {
|
|
6261
|
+
continue;
|
|
6262
|
+
}
|
|
6263
|
+
}
|
|
6264
|
+
return void 0;
|
|
6265
|
+
}
|
|
6266
|
+
|
|
6267
|
+
// src/utils/lru_cache.js
|
|
6268
|
+
var LRUCache2 = class {
|
|
6269
|
+
/** @type {number} */
|
|
6270
|
+
#capacity;
|
|
6271
|
+
/** @type {Map<any, any>} */
|
|
6272
|
+
#cache;
|
|
6273
|
+
/**
|
|
6274
|
+
* Creates an LRUCache instance.
|
|
6275
|
+
* @param {number} capacity The maximum number of items the cache can hold.
|
|
6276
|
+
*/
|
|
6277
|
+
constructor(capacity) {
|
|
6278
|
+
this.#capacity = capacity;
|
|
6279
|
+
this.#cache = /* @__PURE__ */ new Map();
|
|
6280
|
+
}
|
|
6281
|
+
/**
|
|
6282
|
+
* Retrieves the value associated with the given key and marks the key as recently used.
|
|
6283
|
+
* @param {any} key The key to retrieve.
|
|
6284
|
+
* @returns {any} The value associated with the key, or undefined if the key does not exist.
|
|
6285
|
+
*/
|
|
6286
|
+
get(key) {
|
|
6287
|
+
if (!this.#cache.has(key)) return void 0;
|
|
6288
|
+
const value = this.#cache.get(key);
|
|
6289
|
+
this.#cache.delete(key);
|
|
6290
|
+
this.#cache.set(key, value);
|
|
6291
|
+
return value;
|
|
6292
|
+
}
|
|
6293
|
+
/**
|
|
6294
|
+
* Inserts or updates the key-value pair in the cache.
|
|
6295
|
+
* If the key already exists, it is updated and marked as recently used.
|
|
6296
|
+
* If the cache exceeds its capacity, the least recently used item is evicted.
|
|
6297
|
+
* @param {any} key The key to add or update.
|
|
6298
|
+
* @param {any} value The value to associate with the key.
|
|
6299
|
+
*/
|
|
6300
|
+
put(key, value) {
|
|
6301
|
+
if (this.#cache.has(key)) {
|
|
6302
|
+
this.#cache.delete(key);
|
|
6303
|
+
}
|
|
6304
|
+
this.#cache.set(key, value);
|
|
6305
|
+
if (this.#cache.size > this.#capacity) {
|
|
6306
|
+
this.#cache.delete(this.#cache.keys().next().value);
|
|
6307
|
+
}
|
|
6308
|
+
}
|
|
6309
|
+
/**
|
|
6310
|
+
* Removes the entry for the given key from the cache.
|
|
6311
|
+
* @param {any} key The key to delete.
|
|
6312
|
+
* @returns {boolean} `true` if the entry existed and was removed, `false` otherwise.
|
|
6313
|
+
*/
|
|
6314
|
+
delete(key) {
|
|
6315
|
+
return this.#cache.delete(key);
|
|
6316
|
+
}
|
|
6317
|
+
/**
|
|
6318
|
+
* Clears the cache.
|
|
6319
|
+
*/
|
|
6320
|
+
clear() {
|
|
6321
|
+
this.#cache.clear();
|
|
6322
|
+
}
|
|
6323
|
+
};
|
|
6324
|
+
|
|
6325
|
+
// src/utils/memoize_promise.js
|
|
6326
|
+
var MAX_CACHE_SIZE = 100;
|
|
6327
|
+
var cache = new LRUCache2(MAX_CACHE_SIZE);
|
|
6328
|
+
function memoizePromise(key, factory) {
|
|
6329
|
+
const cached = cache.get(key);
|
|
6330
|
+
if (cached !== void 0) {
|
|
6331
|
+
return cached;
|
|
6332
|
+
}
|
|
6333
|
+
const promise = factory().then(
|
|
6334
|
+
(value) => value,
|
|
6335
|
+
(err) => {
|
|
6336
|
+
cache.delete(key);
|
|
6337
|
+
return Promise.reject(err);
|
|
6338
|
+
}
|
|
6339
|
+
);
|
|
6340
|
+
cache.put(key, promise);
|
|
6341
|
+
return promise;
|
|
6342
|
+
}
|
|
5840
6343
|
|
|
5841
6344
|
// src/utils/model_registry/get_file_metadata.js
|
|
5842
6345
|
async function fetch_file_head(urlOrPath) {
|
|
@@ -5845,17 +6348,27 @@ async function fetch_file_head(urlOrPath) {
|
|
|
5845
6348
|
}
|
|
5846
6349
|
const headers = getFetchHeaders(urlOrPath);
|
|
5847
6350
|
headers.set("Range", "bytes=0-0");
|
|
5848
|
-
return env.fetch(urlOrPath, { method: "GET", headers });
|
|
6351
|
+
return env.fetch(urlOrPath, { method: "GET", headers, cache: "no-store" });
|
|
6352
|
+
}
|
|
6353
|
+
function get_file_metadata(path_or_repo_id, filename, options = {}) {
|
|
6354
|
+
const key = JSON.stringify([
|
|
6355
|
+
path_or_repo_id,
|
|
6356
|
+
filename,
|
|
6357
|
+
options?.revision,
|
|
6358
|
+
options?.cache_dir,
|
|
6359
|
+
options?.local_files_only
|
|
6360
|
+
]);
|
|
6361
|
+
return memoizePromise(key, () => _get_file_metadata(path_or_repo_id, filename, options));
|
|
5849
6362
|
}
|
|
5850
|
-
async function
|
|
5851
|
-
const
|
|
6363
|
+
async function _get_file_metadata(path_or_repo_id, filename, options) {
|
|
6364
|
+
const cache2 = await getCache(options?.cache_dir);
|
|
5852
6365
|
const { localPath, remoteURL, proposedCacheKey, validModelId } = buildResourcePaths(
|
|
5853
6366
|
path_or_repo_id,
|
|
5854
6367
|
filename,
|
|
5855
6368
|
options,
|
|
5856
|
-
|
|
6369
|
+
cache2
|
|
5857
6370
|
);
|
|
5858
|
-
const cachedResponse = await checkCachedResource(
|
|
6371
|
+
const cachedResponse = await checkCachedResource(cache2, localPath, proposedCacheKey);
|
|
5859
6372
|
if (cachedResponse !== void 0 && typeof cachedResponse !== "string") {
|
|
5860
6373
|
const size = cachedResponse.headers.get("content-length");
|
|
5861
6374
|
const contentType = cachedResponse.headers.get("content-type");
|
|
@@ -5953,7 +6466,7 @@ function getFetchHeaders(urlOrPath) {
|
|
|
5953
6466
|
}
|
|
5954
6467
|
return headers;
|
|
5955
6468
|
}
|
|
5956
|
-
function buildResourcePaths(path_or_repo_id, filename, options = {},
|
|
6469
|
+
function buildResourcePaths(path_or_repo_id, filename, options = {}, cache2 = null) {
|
|
5957
6470
|
const revision = options.revision ?? "main";
|
|
5958
6471
|
const requestURL = pathJoin(path_or_repo_id, filename);
|
|
5959
6472
|
const validModelId = isValidHfModelId(path_or_repo_id);
|
|
@@ -5963,7 +6476,7 @@ function buildResourcePaths(path_or_repo_id, filename, options = {}, cache = nul
|
|
|
5963
6476
|
env.remotePathTemplate.replaceAll("{model}", path_or_repo_id).replaceAll("{revision}", encodeURIComponent(revision)),
|
|
5964
6477
|
filename
|
|
5965
6478
|
);
|
|
5966
|
-
const proposedCacheKey =
|
|
6479
|
+
const proposedCacheKey = cache2 instanceof FileCache ? (
|
|
5967
6480
|
// Choose cache key for filesystem cache
|
|
5968
6481
|
// When using the main revision (default), we use the request URL as the cache key.
|
|
5969
6482
|
// If a specific revision is requested, we account for this in the cache key.
|
|
@@ -5977,14 +6490,14 @@ function buildResourcePaths(path_or_repo_id, filename, options = {}, cache = nul
|
|
|
5977
6490
|
validModelId
|
|
5978
6491
|
};
|
|
5979
6492
|
}
|
|
5980
|
-
async function checkCachedResource(
|
|
5981
|
-
if (!
|
|
6493
|
+
async function checkCachedResource(cache2, localPath, proposedCacheKey) {
|
|
6494
|
+
if (!cache2) {
|
|
5982
6495
|
return void 0;
|
|
5983
6496
|
}
|
|
5984
|
-
return await tryCache(
|
|
6497
|
+
return await tryCache(cache2, localPath, proposedCacheKey);
|
|
5985
6498
|
}
|
|
5986
|
-
async function storeCachedResource(path_or_repo_id, filename,
|
|
5987
|
-
if (await
|
|
6499
|
+
async function storeCachedResource(path_or_repo_id, filename, cache2, cacheKey, response, result, options = {}) {
|
|
6500
|
+
if (await cache2.match(cacheKey) !== void 0) {
|
|
5988
6501
|
return;
|
|
5989
6502
|
}
|
|
5990
6503
|
if (!result) {
|
|
@@ -5994,14 +6507,14 @@ async function storeCachedResource(path_or_repo_id, filename, cache, cacheKey, r
|
|
|
5994
6507
|
file: filename,
|
|
5995
6508
|
...data
|
|
5996
6509
|
}) : void 0;
|
|
5997
|
-
await
|
|
6510
|
+
await cache2.put(
|
|
5998
6511
|
cacheKey,
|
|
5999
6512
|
/** @type {Response} */
|
|
6000
6513
|
response,
|
|
6001
6514
|
wrapped_progress
|
|
6002
6515
|
);
|
|
6003
6516
|
} else if (typeof response !== "string") {
|
|
6004
|
-
await
|
|
6517
|
+
await cache2.put(
|
|
6005
6518
|
cacheKey,
|
|
6006
6519
|
new Response(
|
|
6007
6520
|
/** @type {any} */
|
|
@@ -6015,17 +6528,17 @@ async function storeCachedResource(path_or_repo_id, filename, cache, cacheKey, r
|
|
|
6015
6528
|
});
|
|
6016
6529
|
}
|
|
6017
6530
|
}
|
|
6018
|
-
async function loadResourceFile(path_or_repo_id, filename, fatal = true, options = {}, return_path = false,
|
|
6531
|
+
async function loadResourceFile(path_or_repo_id, filename, fatal = true, options = {}, return_path = false, cache2 = null) {
|
|
6019
6532
|
const { requestURL, localPath, remoteURL, proposedCacheKey, validModelId } = buildResourcePaths(
|
|
6020
6533
|
path_or_repo_id,
|
|
6021
6534
|
filename,
|
|
6022
6535
|
options,
|
|
6023
|
-
|
|
6536
|
+
cache2
|
|
6024
6537
|
);
|
|
6025
6538
|
let cacheKey;
|
|
6026
6539
|
let toCacheResponse = false;
|
|
6027
6540
|
let response;
|
|
6028
|
-
response = await checkCachedResource(
|
|
6541
|
+
response = await checkCachedResource(cache2, localPath, proposedCacheKey);
|
|
6029
6542
|
const cacheHit = response !== void 0;
|
|
6030
6543
|
if (!cacheHit) {
|
|
6031
6544
|
if (env.allowLocalModels) {
|
|
@@ -6066,7 +6579,7 @@ async function loadResourceFile(path_or_repo_id, filename, fatal = true, options
|
|
|
6066
6579
|
}
|
|
6067
6580
|
cacheKey = proposedCacheKey;
|
|
6068
6581
|
}
|
|
6069
|
-
toCacheResponse =
|
|
6582
|
+
toCacheResponse = cache2 && // 1. A caching system is available
|
|
6070
6583
|
typeof Response !== "undefined" && // 2. `Response` is defined (i.e., we are in a browser-like environment)
|
|
6071
6584
|
response instanceof Response && // 3. result is a `Response` object (i.e., not a `FileResponse`)
|
|
6072
6585
|
response.status === 200;
|
|
@@ -6128,7 +6641,7 @@ async function loadResourceFile(path_or_repo_id, filename, fatal = true, options
|
|
|
6128
6641
|
// i.e., do not cache FileResponses (prevents duplication)
|
|
6129
6642
|
toCacheResponse && cacheKey && typeof response !== "string"
|
|
6130
6643
|
) {
|
|
6131
|
-
await storeCachedResource(path_or_repo_id, filename,
|
|
6644
|
+
await storeCachedResource(path_or_repo_id, filename, cache2, cacheKey, response, result, options);
|
|
6132
6645
|
}
|
|
6133
6646
|
dispatchCallback(options.progress_callback, {
|
|
6134
6647
|
status: "done",
|
|
@@ -6144,7 +6657,7 @@ async function loadResourceFile(path_or_repo_id, filename, fatal = true, options
|
|
|
6144
6657
|
if (response instanceof FileResponse) {
|
|
6145
6658
|
return response.filePath;
|
|
6146
6659
|
}
|
|
6147
|
-
const cachedResponse = await
|
|
6660
|
+
const cachedResponse = await cache2?.match(cacheKey);
|
|
6148
6661
|
if (cachedResponse instanceof FileResponse) {
|
|
6149
6662
|
return cachedResponse.filePath;
|
|
6150
6663
|
} else if (cachedResponse instanceof Response) {
|
|
@@ -6171,8 +6684,8 @@ async function getModelFile(path_or_repo_id, filename, fatal = true, options = {
|
|
|
6171
6684
|
name: path_or_repo_id,
|
|
6172
6685
|
file: filename
|
|
6173
6686
|
});
|
|
6174
|
-
const
|
|
6175
|
-
return await loadResourceFile(path_or_repo_id, filename, fatal, options, return_path,
|
|
6687
|
+
const cache2 = await getCache(options?.cache_dir);
|
|
6688
|
+
return await loadResourceFile(path_or_repo_id, filename, fatal, options, return_path, cache2);
|
|
6176
6689
|
}
|
|
6177
6690
|
async function getModelText(modelPath, fileName, fatal = true, options = {}) {
|
|
6178
6691
|
const buffer = await getModelFile(modelPath, fileName, fatal, options, false);
|
|
@@ -6965,7 +7478,7 @@ var uint16_to_float32 = /* @__PURE__ */ (function() {
|
|
|
6965
7478
|
// src/backends/onnx.js
|
|
6966
7479
|
import * as ONNX_NODE from "onnxruntime-node";
|
|
6967
7480
|
|
|
6968
|
-
// ../../node_modules/.pnpm/onnxruntime-web@1.25.0-dev.
|
|
7481
|
+
// ../../node_modules/.pnpm/onnxruntime-web@1.25.0-dev.20260307-d626b568e0/node_modules/onnxruntime-web/dist/ort.webgpu.bundle.min.mjs
|
|
6969
7482
|
var ort_webgpu_bundle_min_exports = {};
|
|
6970
7483
|
__export(ort_webgpu_bundle_min_exports, {
|
|
6971
7484
|
InferenceSession: () => Jf,
|
|
@@ -7733,7 +8246,7 @@ async function ts(a = {}) {
|
|
|
7733
8246
|
throw L(e = "Aborted(" + e + ")"), W = true, e = new WebAssembly.RuntimeError(e + ". Build with -sASSERTIONS for more info."), R?.(e), e;
|
|
7734
8247
|
}
|
|
7735
8248
|
function Ye() {
|
|
7736
|
-
return { a: { f: Vs, J: js, k: Hs, p: Ys, l: qs, ta: Js, b: Xs, ca: Qs, Ka: Sn, s: Zs, da: Ln, _a: On, Ga: Bn, Ia: Mn, $a: Cn, Ya: Un, Ra: Dn, Xa: Pn, pa: _n, Ha: Rn, Yb: Nn, Za: kn, Fa: Wn, eb: Ks, Da: ti, Tb: ri, Rb: oi, Ca: si, M: ii,
|
|
8249
|
+
return { a: { f: Vs, J: js, k: Hs, p: Ys, l: qs, ta: Js, b: Xs, ca: Qs, Ka: Sn, s: Zs, da: Ln, _a: On, Ga: Bn, Ia: Mn, $a: Cn, Ya: Un, Ra: Dn, Xa: Pn, pa: _n, Ha: Rn, Yb: Nn, Za: kn, Fa: Wn, eb: Ks, Da: ti, Tb: ri, Rb: oi, Ca: si, M: ii, I: ui, Sb: fi, ka: yi, Ub: bi, Ua: wi, Wb: Ti, La: vi, Pb: Ei, la: Si, Ta: Ar, bb: Ai, U: Oi, n: Di, c: Er, sb: Pi, w: _i, L: Ri, z: Ni, j: ki, o: Yn, tb: Wi, G: Fi, T: Gi, h: $i, u: zi, m: Vi, i: ji, Oa: Hi, Pa: Yi, Qa: qi, Ma: Qn, Na: Zn, Qb: Kn, fb: Xi, db: Ki, Y: eu, rb: tu, ma: ru, cb: Qi, gb: nu, ab: ou, Xb: au, N: Ji, hb: su, X: iu, Vb: uu, ob: bu, C: wu, sa: gu, ra: Tu, qb: vu, W: Eu, v: Su, nb: Au, mb: Iu, lb: xu, pb: Lu, kb: Ou, jb: Bu, ib: Mu, Va: ao, Wa: so, Ja: br, ea: io, oa: uo, Sa: fo, na: co, Db: Gf, xa: Df, Eb: Ff, ya: Uf, F: Ef, e: ff, r: sf, x: af, D: gf, Ib: Bf, ba: Lf, B: df, za: Mf, $: Pf, ha: Of, Fb: kf, Gb: Nf, Ba: Sf, Aa: xf, Jb: Af, wa: Wf, aa: Cf, d: cf, A: lf, q: uf, Cb: $f, t: mf, y: Tf, H: pf, E: hf, K: vf, S: _f, ja: wf, _: Rf, Kb: bf, Lb: yf, P: If2, g: Uu, a: Fe, Ob: qe, Hb: Du, ia: Pu, O: _u, qa: Ru, Mb: Nu, Q: ku, zb: Wu, Ab: Fu, ua: Gu, fa: $u, R: zu, Ea: Vu, va: ju, Z: Hu, xb: Yu, Zb: qu, V: Ju, Bb: Xu, ub: Qu, vb: Ku, wb: ef, ga: tf, yb: rf, Nb: nf } };
|
|
7737
8250
|
}
|
|
7738
8251
|
async function bt() {
|
|
7739
8252
|
function e(o, u) {
|
|
@@ -8920,7 +9433,7 @@ async function ts(a = {}) {
|
|
|
8920
9433
|
Te(`invalid type for getValue: ${t}`);
|
|
8921
9434
|
}
|
|
8922
9435
|
}, r.UTF8ToString = ct, r.stringToUTF8 = Pe, r.lengthBytesUTF8 = _e;
|
|
8923
|
-
var lo, po, Dr, Wt, xe, pt, mo, ho, yo, bo, wo, go, To, vo, Eo, So, Ao, Pr, _r, Rr, Nr, Et, kr, Io, Wr, xo, Lo, Oo, Fr, Bo, Mo, Gr, N, St, Co, D, Ft, P, Uo, $r, Do, Po, _o, zr, Ro, No, ko, Wo, Fo, Go, $o, zo, Vo, jo, Ho, Yo, qo, Jo, Xo, Qo, Zo, Ko, ea, ta, ra, na, oa, aa, sa, ia, ua, fa, ca, da, la, pa, ma, ha, ya, ba, wa, ga, Ta, ke, of = [qe, yr, En, Ln, On, Bn, Mn, Cn, Un, Dn, Pn, _n, Rn, Nn, kn, Wn, Qn, Zn, Kn, ao, so, io, uo, fo, co], Vr = {
|
|
9436
|
+
var lo, po, Dr, Wt, xe, pt, mo, ho, yo, bo, wo, go, To, vo, Eo, So, Ao, Pr, _r, Rr, Nr, Et, kr, Io, Wr, xo, Lo, Oo, Fr, Bo, Mo, Gr, N, St, Co, D, Ft, P, Uo, $r, Do, Po, _o, zr, Ro, No, ko, Wo, Fo, Go, $o, zo, Vo, jo, Ho, Yo, qo, Jo, Xo, Qo, Zo, Ko, ea, ta, ra, na, oa, aa, sa, ia, ua, fa, ca, da, la, pa, ma, ha, ya, ba, wa, ga, Ta, ke, of = [qe, yr, En, Ln, On, Bn, Mn, Cn, Un, Dn, Pn, _n, Rn, Nn, kn, Wn, Qn, Zn, Kn, ao, so, io, uo, fo, co], Vr = { 925676: (e, t, n, o, u) => {
|
|
8924
9437
|
if (r === void 0 || !r.Uc) return 1;
|
|
8925
9438
|
if ((e = ct(Number(e >>> 0))).startsWith("./") && (e = e.substring(2)), !(e = r.Uc.get(e))) return 2;
|
|
8926
9439
|
if (t = Number(t >>> 0), n = Number(n >>> 0), o = Number(o >>> 0), t + n > e.byteLength) return 3;
|
|
@@ -8940,11 +9453,11 @@ async function ts(a = {}) {
|
|
|
8940
9453
|
} catch {
|
|
8941
9454
|
return 4;
|
|
8942
9455
|
}
|
|
8943
|
-
},
|
|
9456
|
+
}, 926500: (e, t, n) => {
|
|
8944
9457
|
r.Sd(e, (p(), J).subarray(t >>> 0, t + n >>> 0));
|
|
8945
|
-
},
|
|
9458
|
+
}, 926564: () => r.me(), 926606: (e) => {
|
|
8946
9459
|
r.jd(e);
|
|
8947
|
-
},
|
|
9460
|
+
}, 926643: () => typeof wasmOffsetConverter < "u" };
|
|
8948
9461
|
function af(e, t, n, o) {
|
|
8949
9462
|
var u = P();
|
|
8950
9463
|
try {
|
|
@@ -9375,7 +9888,7 @@ var tc;
|
|
|
9375
9888
|
var us;
|
|
9376
9889
|
var rc;
|
|
9377
9890
|
var os;
|
|
9378
|
-
var
|
|
9891
|
+
var fs4;
|
|
9379
9892
|
var as;
|
|
9380
9893
|
var cs;
|
|
9381
9894
|
var Yt = k(() => {
|
|
@@ -9412,7 +9925,7 @@ var Yt = k(() => {
|
|
|
9412
9925
|
/*webpackIgnore:true*/
|
|
9413
9926
|
/*@vite-ignore*/
|
|
9414
9927
|
a
|
|
9415
|
-
)).default, os = (es(), $t(Ka)).default,
|
|
9928
|
+
)).default, os = (es(), $t(Ka)).default, fs4 = async () => {
|
|
9416
9929
|
if (!ge) throw new Error("Failed to load proxy worker: cannot determine the script source URL.");
|
|
9417
9930
|
if (en(ge)) return [void 0, os()];
|
|
9418
9931
|
let a = await us(ge);
|
|
@@ -10688,7 +11201,7 @@ var mn = k(() => {
|
|
|
10688
11201
|
if (Mt) throw new Error("multiple calls to 'initWasm()' detected.");
|
|
10689
11202
|
if (lr) throw new Error("previous call to 'initWasm()' failed.");
|
|
10690
11203
|
if (Mt = true, ut()) return new Promise((a, r) => {
|
|
10691
|
-
Ee?.terminate(),
|
|
11204
|
+
Ee?.terminate(), fs4().then(([s, f]) => {
|
|
10692
11205
|
try {
|
|
10693
11206
|
Ee = f, Ee.onerror = (d) => r(d), Ee.onmessage = gc, ln = [a, r];
|
|
10694
11207
|
let i = { type: "init-wasm", in: K };
|
|
@@ -10860,7 +11373,7 @@ var $s = k(() => {
|
|
|
10860
11373
|
Ve();
|
|
10861
11374
|
Ve();
|
|
10862
11375
|
Ve();
|
|
10863
|
-
var Xa = "1.25.0-dev.
|
|
11376
|
+
var Xa = "1.25.0-dev.20260307-d626b568e0";
|
|
10864
11377
|
var Tl = Zr;
|
|
10865
11378
|
{
|
|
10866
11379
|
let a = ($s(), $t(Gs)).wasmBackend;
|
|
@@ -10871,11 +11384,11 @@ Object.defineProperty(K.versions, "web", { value: Xa, enumerable: true });
|
|
|
10871
11384
|
// src/backends/utils/cacheWasm.js
|
|
10872
11385
|
async function loadAndCacheFile(url2) {
|
|
10873
11386
|
const fileName = url2.split("/").pop();
|
|
10874
|
-
let
|
|
11387
|
+
let cache2;
|
|
10875
11388
|
try {
|
|
10876
|
-
|
|
10877
|
-
if (
|
|
10878
|
-
const result = await
|
|
11389
|
+
cache2 = await getCache();
|
|
11390
|
+
if (cache2) {
|
|
11391
|
+
const result = await cache2.match(url2);
|
|
10879
11392
|
if (result) {
|
|
10880
11393
|
return result;
|
|
10881
11394
|
}
|
|
@@ -10887,9 +11400,9 @@ async function loadAndCacheFile(url2) {
|
|
|
10887
11400
|
if (!response.ok) {
|
|
10888
11401
|
throw new Error(`Failed to fetch ${fileName}: ${response.status} ${response.statusText}`);
|
|
10889
11402
|
}
|
|
10890
|
-
if (
|
|
11403
|
+
if (cache2) {
|
|
10891
11404
|
try {
|
|
10892
|
-
await
|
|
11405
|
+
await cache2.put(url2, response.clone());
|
|
10893
11406
|
} catch (e) {
|
|
10894
11407
|
logger.warn(`Failed to cache ${fileName}:`, e);
|
|
10895
11408
|
}
|
|
@@ -10907,34 +11420,21 @@ async function loadWasmBinary(wasmURL) {
|
|
|
10907
11420
|
}
|
|
10908
11421
|
}
|
|
10909
11422
|
async function loadWasmFactory(libURL) {
|
|
11423
|
+
if (apis.IS_SERVICE_WORKER_ENV || apis.IS_CHROME_AVAILABLE) {
|
|
11424
|
+
return libURL;
|
|
11425
|
+
}
|
|
10910
11426
|
const response = await loadAndCacheFile(libURL);
|
|
10911
11427
|
if (!response || typeof response === "string") return null;
|
|
10912
11428
|
try {
|
|
10913
11429
|
let code = await response.text();
|
|
10914
|
-
const baseUrl = libURL.split("/").slice(0, -1).join("/");
|
|
10915
|
-
code = code.replaceAll("import.meta.url", `"${baseUrl}"`);
|
|
10916
11430
|
code = code.replaceAll("globalThis.process?.versions?.node", "false");
|
|
10917
11431
|
const blob = new Blob([code], { type: "text/javascript" });
|
|
10918
11432
|
return URL.createObjectURL(blob);
|
|
10919
11433
|
} catch (error) {
|
|
10920
|
-
logger.warn("Failed to read WASM
|
|
11434
|
+
logger.warn("Failed to read WASM factory:", error);
|
|
10921
11435
|
return null;
|
|
10922
11436
|
}
|
|
10923
11437
|
}
|
|
10924
|
-
function isBlobURL(url2) {
|
|
10925
|
-
return isValidUrl(url2, ["blob:"]);
|
|
10926
|
-
}
|
|
10927
|
-
function toAbsoluteURL(url2) {
|
|
10928
|
-
let baseURL;
|
|
10929
|
-
if (typeof location !== "undefined" && location.href) {
|
|
10930
|
-
baseURL = location.href;
|
|
10931
|
-
} else if (typeof import.meta !== "undefined" && import.meta.url) {
|
|
10932
|
-
baseURL = import.meta.url;
|
|
10933
|
-
} else {
|
|
10934
|
-
return url2;
|
|
10935
|
-
}
|
|
10936
|
-
return new URL(url2, baseURL).href;
|
|
10937
|
-
}
|
|
10938
11438
|
|
|
10939
11439
|
// src/backends/onnx.js
|
|
10940
11440
|
import { Tensor } from "onnxruntime-common";
|
|
@@ -11033,7 +11533,6 @@ function deviceToExecutionProviders(device = null) {
|
|
|
11033
11533
|
}
|
|
11034
11534
|
throw new Error(`Unsupported device: "${device}". Should be one of: ${supportedDevices.join(", ")}.`);
|
|
11035
11535
|
}
|
|
11036
|
-
var IS_WEB_ENV = apis.IS_BROWSER_ENV || apis.IS_WEBWORKER_ENV;
|
|
11037
11536
|
var webInitChain = Promise.resolve();
|
|
11038
11537
|
var wasmLoadPromise = null;
|
|
11039
11538
|
async function ensureWasmLoaded() {
|
|
@@ -11042,6 +11541,11 @@ async function ensureWasmLoaded() {
|
|
|
11042
11541
|
}
|
|
11043
11542
|
const shouldUseWasmCache = env.useWasmCache && typeof ONNX_ENV?.wasm?.wasmPaths === "object" && ONNX_ENV?.wasm?.wasmPaths?.wasm && ONNX_ENV?.wasm?.wasmPaths?.mjs;
|
|
11044
11543
|
if (!shouldUseWasmCache) {
|
|
11544
|
+
if (apis.IS_DENO_WEB_RUNTIME) {
|
|
11545
|
+
throw new Error(
|
|
11546
|
+
"env.useWasmCache=false is not supported in Deno's web runtime. Remove the useWasmCache override."
|
|
11547
|
+
);
|
|
11548
|
+
}
|
|
11045
11549
|
wasmLoadPromise = Promise.resolve();
|
|
11046
11550
|
return wasmLoadPromise;
|
|
11047
11551
|
}
|
|
@@ -11050,6 +11554,7 @@ async function ensureWasmLoaded() {
|
|
|
11050
11554
|
/** @type {{ wasm: string, mjs: string }} */
|
|
11051
11555
|
ONNX_ENV.wasm.wasmPaths
|
|
11052
11556
|
);
|
|
11557
|
+
let wasmBinaryLoaded = false;
|
|
11053
11558
|
await Promise.all([
|
|
11054
11559
|
// Load and cache the WASM binary
|
|
11055
11560
|
urls.wasm && !isBlobURL(urls.wasm) ? (async () => {
|
|
@@ -11057,12 +11562,13 @@ async function ensureWasmLoaded() {
|
|
|
11057
11562
|
const wasmBinary = await loadWasmBinary(toAbsoluteURL(urls.wasm));
|
|
11058
11563
|
if (wasmBinary) {
|
|
11059
11564
|
ONNX_ENV.wasm.wasmBinary = wasmBinary;
|
|
11565
|
+
wasmBinaryLoaded = true;
|
|
11060
11566
|
}
|
|
11061
11567
|
} catch (err) {
|
|
11062
11568
|
logger.warn("Failed to pre-load WASM binary:", err);
|
|
11063
11569
|
}
|
|
11064
11570
|
})() : Promise.resolve(),
|
|
11065
|
-
// Load and cache the WASM factory
|
|
11571
|
+
// Load and cache the WASM factory as a blob URL
|
|
11066
11572
|
urls.mjs && !isBlobURL(urls.mjs) ? (async () => {
|
|
11067
11573
|
try {
|
|
11068
11574
|
const wasmFactoryBlob = await loadWasmFactory(toAbsoluteURL(urls.mjs));
|
|
@@ -11074,6 +11580,9 @@ async function ensureWasmLoaded() {
|
|
|
11074
11580
|
}
|
|
11075
11581
|
})() : Promise.resolve()
|
|
11076
11582
|
]);
|
|
11583
|
+
if (!wasmBinaryLoaded) {
|
|
11584
|
+
ONNX_ENV.wasm.wasmPaths.mjs = urls.mjs;
|
|
11585
|
+
}
|
|
11077
11586
|
})();
|
|
11078
11587
|
return wasmLoadPromise;
|
|
11079
11588
|
}
|
|
@@ -11085,51 +11594,52 @@ async function createInferenceSession(buffer_or_path, session_options, session_c
|
|
|
11085
11594
|
logSeverityLevel,
|
|
11086
11595
|
...session_options
|
|
11087
11596
|
});
|
|
11088
|
-
const session = await (IS_WEB_ENV ? webInitChain = webInitChain.then(load) : load());
|
|
11597
|
+
const session = await (apis.IS_WEB_ENV ? webInitChain = webInitChain.then(load) : load());
|
|
11089
11598
|
session.config = session_config;
|
|
11090
11599
|
return session;
|
|
11091
11600
|
}
|
|
11092
11601
|
var webInferenceChain = Promise.resolve();
|
|
11093
11602
|
async function runInferenceSession(session, ortFeed) {
|
|
11094
11603
|
const run = () => session.run(ortFeed);
|
|
11095
|
-
|
|
11096
|
-
return output;
|
|
11604
|
+
return apis.IS_WEB_ENV ? webInferenceChain = webInferenceChain.then(run) : run();
|
|
11097
11605
|
}
|
|
11098
11606
|
function isONNXTensor(x) {
|
|
11099
11607
|
return x instanceof ONNX.Tensor;
|
|
11100
11608
|
}
|
|
11101
11609
|
var ONNX_ENV = ONNX?.env;
|
|
11102
|
-
if (ONNX_ENV?.wasm) {
|
|
11103
|
-
if (
|
|
11104
|
-
// @ts-ignore Cannot find name 'ServiceWorkerGlobalScope'.ts(2304)
|
|
11105
|
-
!(typeof ServiceWorkerGlobalScope !== "undefined" && self instanceof ServiceWorkerGlobalScope) && ONNX_ENV.versions?.web && !ONNX_ENV.wasm.wasmPaths
|
|
11106
|
-
) {
|
|
11107
|
-
const wasmPathPrefix = `https://cdn.jsdelivr.net/npm/onnxruntime-web@${ONNX_ENV.versions.web}/dist/`;
|
|
11108
|
-
ONNX_ENV.wasm.wasmPaths = apis.IS_SAFARI ? {
|
|
11109
|
-
mjs: `${wasmPathPrefix}ort-wasm-simd-threaded.mjs`,
|
|
11110
|
-
wasm: `${wasmPathPrefix}ort-wasm-simd-threaded.wasm`
|
|
11111
|
-
} : {
|
|
11112
|
-
mjs: `${wasmPathPrefix}ort-wasm-simd-threaded.asyncify.mjs`,
|
|
11113
|
-
wasm: `${wasmPathPrefix}ort-wasm-simd-threaded.asyncify.wasm`
|
|
11114
|
-
};
|
|
11115
|
-
}
|
|
11116
|
-
ONNX_ENV.wasm.proxy = false;
|
|
11117
|
-
}
|
|
11118
|
-
if (ONNX_ENV?.webgpu) {
|
|
11119
|
-
ONNX_ENV.webgpu.powerPreference = "high-performance";
|
|
11120
|
-
}
|
|
11121
11610
|
function isONNXProxy() {
|
|
11122
11611
|
return ONNX_ENV?.wasm?.proxy;
|
|
11123
11612
|
}
|
|
11124
|
-
|
|
11125
|
-
|
|
11126
|
-
|
|
11613
|
+
if (ONNX_ENV) {
|
|
11614
|
+
let setLogLevel = function(logLevel2) {
|
|
11615
|
+
const severityLevel = getOnnxLogSeverityLevel(logLevel2);
|
|
11616
|
+
ONNX_ENV.logLevel = ONNX_LOG_LEVEL_NAMES[severityLevel];
|
|
11617
|
+
};
|
|
11618
|
+
if (ONNX_ENV.wasm) {
|
|
11619
|
+
if (
|
|
11620
|
+
// @ts-ignore Cannot find name 'ServiceWorkerGlobalScope'.ts(2304)
|
|
11621
|
+
!(typeof ServiceWorkerGlobalScope !== "undefined" && self instanceof ServiceWorkerGlobalScope) && ONNX_ENV.versions?.web && !ONNX_ENV.wasm.wasmPaths
|
|
11622
|
+
) {
|
|
11623
|
+
const wasmPathPrefix = `https://cdn.jsdelivr.net/npm/onnxruntime-web@${ONNX_ENV.versions.web}/dist/`;
|
|
11624
|
+
ONNX_ENV.wasm.wasmPaths = apis.IS_SAFARI ? {
|
|
11625
|
+
mjs: `${wasmPathPrefix}ort-wasm-simd-threaded.mjs`,
|
|
11626
|
+
wasm: `${wasmPathPrefix}ort-wasm-simd-threaded.wasm`
|
|
11627
|
+
} : {
|
|
11628
|
+
mjs: `${wasmPathPrefix}ort-wasm-simd-threaded.asyncify.mjs`,
|
|
11629
|
+
wasm: `${wasmPathPrefix}ort-wasm-simd-threaded.asyncify.wasm`
|
|
11630
|
+
};
|
|
11631
|
+
}
|
|
11632
|
+
ONNX_ENV.wasm.proxy = false;
|
|
11633
|
+
}
|
|
11634
|
+
if (ONNX_ENV.webgpu) {
|
|
11635
|
+
ONNX_ENV.webgpu.powerPreference = "high-performance";
|
|
11636
|
+
}
|
|
11637
|
+
setLogLevel(env.logLevel ?? LogLevel.WARNING);
|
|
11638
|
+
env.backends.onnx = {
|
|
11639
|
+
...ONNX_ENV,
|
|
11640
|
+
setLogLevel
|
|
11641
|
+
};
|
|
11127
11642
|
}
|
|
11128
|
-
setLogLevel(env.logLevel ?? LogLevel.WARNING);
|
|
11129
|
-
env.backends.onnx = {
|
|
11130
|
-
...ONNX_ENV,
|
|
11131
|
-
setLogLevel
|
|
11132
|
-
};
|
|
11133
11643
|
|
|
11134
11644
|
// src/ops/registry.js
|
|
11135
11645
|
var wrap = async (session_bytes, session_options, names) => {
|
|
@@ -12334,199 +12844,38 @@ var DataTypeMap = Object.freeze({
|
|
|
12334
12844
|
int4: Int8Array
|
|
12335
12845
|
});
|
|
12336
12846
|
|
|
12337
|
-
// src/utils/
|
|
12338
|
-
var
|
|
12339
|
-
|
|
12340
|
-
|
|
12341
|
-
|
|
12342
|
-
|
|
12343
|
-
|
|
12344
|
-
this.
|
|
12847
|
+
// src/utils/tensor.js
|
|
12848
|
+
var Tensor2 = class _Tensor {
|
|
12849
|
+
/**
|
|
12850
|
+
* Dimensions of the tensor.
|
|
12851
|
+
* @type {number[]}
|
|
12852
|
+
*/
|
|
12853
|
+
get dims() {
|
|
12854
|
+
return this.ort_tensor.dims;
|
|
12855
|
+
}
|
|
12856
|
+
set dims(value) {
|
|
12857
|
+
this.ort_tensor.dims = value;
|
|
12345
12858
|
}
|
|
12346
12859
|
/**
|
|
12347
|
-
*
|
|
12348
|
-
*
|
|
12349
|
-
* When called with a number, initializes the state deterministically from that value.
|
|
12350
|
-
* When called with no arguments (or `undefined`/`null`), seeds from OS entropy
|
|
12351
|
-
* via `crypto.getRandomValues`, matching Python's `random.seed()` behaviour.
|
|
12352
|
-
*
|
|
12353
|
-
* @param {number} [n] The seed value. Omit to seed from OS entropy.
|
|
12860
|
+
* Type of the tensor.
|
|
12861
|
+
* @type {DataType}
|
|
12354
12862
|
*/
|
|
12355
|
-
|
|
12356
|
-
|
|
12357
|
-
if (apis.IS_CRYPTO_AVAILABLE) {
|
|
12358
|
-
const buf = new Uint32Array(1);
|
|
12359
|
-
crypto.getRandomValues(buf);
|
|
12360
|
-
n = buf[0];
|
|
12361
|
-
} else {
|
|
12362
|
-
n = Date.now() >>> 0;
|
|
12363
|
-
}
|
|
12364
|
-
}
|
|
12365
|
-
const mt2 = this._mt;
|
|
12366
|
-
const u = (a, b) => Math.imul(a, b) >>> 0, key = [];
|
|
12367
|
-
for (let v = n || 0; v > 0; v = Math.floor(v / 4294967296)) key.push(v & 4294967295);
|
|
12368
|
-
if (!key.length) key.push(0);
|
|
12369
|
-
mt2[0] = 19650218;
|
|
12370
|
-
for (let k2 = 1; k2 < 624; ++k2) mt2[k2] = u(1812433253, mt2[k2 - 1] ^ mt2[k2 - 1] >>> 30) + k2 >>> 0;
|
|
12371
|
-
let i = 1, j = 0;
|
|
12372
|
-
for (let k2 = Math.max(624, key.length); k2 > 0; --k2, ++i, ++j) {
|
|
12373
|
-
if (i >= 624) {
|
|
12374
|
-
mt2[0] = mt2[623];
|
|
12375
|
-
i = 1;
|
|
12376
|
-
}
|
|
12377
|
-
if (j >= key.length) j = 0;
|
|
12378
|
-
mt2[i] = (mt2[i] ^ u(mt2[i - 1] ^ mt2[i - 1] >>> 30, 1664525)) + key[j] + j >>> 0;
|
|
12379
|
-
}
|
|
12380
|
-
for (let k2 = 623; k2 > 0; --k2, ++i) {
|
|
12381
|
-
if (i >= 624) {
|
|
12382
|
-
mt2[0] = mt2[623];
|
|
12383
|
-
i = 1;
|
|
12384
|
-
}
|
|
12385
|
-
mt2[i] = (mt2[i] ^ u(mt2[i - 1] ^ mt2[i - 1] >>> 30, 1566083941)) - i >>> 0;
|
|
12386
|
-
}
|
|
12387
|
-
mt2[0] = 2147483648;
|
|
12388
|
-
this._idx = 624;
|
|
12389
|
-
this._gauss_next = null;
|
|
12863
|
+
get type() {
|
|
12864
|
+
return this.ort_tensor.type;
|
|
12390
12865
|
}
|
|
12391
12866
|
/**
|
|
12392
|
-
*
|
|
12393
|
-
*
|
|
12394
|
-
* Performs the "twist" step when the state buffer is exhausted,
|
|
12395
|
-
* then applies the standard MT19937 tempering transform.
|
|
12396
|
-
*
|
|
12397
|
-
* @returns {number} A random integer in the range [0, 2^32 - 1].
|
|
12867
|
+
* The data stored in the tensor.
|
|
12868
|
+
* @type {DataArray}
|
|
12398
12869
|
*/
|
|
12399
|
-
|
|
12400
|
-
|
|
12401
|
-
if (this._idx >= 624) {
|
|
12402
|
-
for (let k2 = 0; k2 < 624; ++k2) {
|
|
12403
|
-
const y2 = mt2[k2] & 2147483648 | mt2[(k2 + 1) % 624] & 2147483647;
|
|
12404
|
-
mt2[k2] = (mt2[(k2 + 397) % 624] ^ y2 >>> 1 ^ (y2 & 1 ? 2567483615 : 0)) >>> 0;
|
|
12405
|
-
}
|
|
12406
|
-
this._idx = 0;
|
|
12407
|
-
}
|
|
12408
|
-
let y = mt2[this._idx++];
|
|
12409
|
-
y ^= y >>> 11;
|
|
12410
|
-
y ^= y << 7 & 2636928640;
|
|
12411
|
-
y ^= y << 15 & 4022730752;
|
|
12412
|
-
y ^= y >>> 18;
|
|
12413
|
-
return y >>> 0;
|
|
12870
|
+
get data() {
|
|
12871
|
+
return this.ort_tensor.data;
|
|
12414
12872
|
}
|
|
12415
12873
|
/**
|
|
12416
|
-
*
|
|
12417
|
-
*
|
|
12418
|
-
* Combines two 32-bit integers (using 53 bits of precision) to produce
|
|
12419
|
-
* a uniformly distributed double, matching Python's `random.random()`.
|
|
12420
|
-
*
|
|
12421
|
-
* @returns {number} A random float in [0, 1).
|
|
12874
|
+
* The number of elements in the tensor.
|
|
12875
|
+
* @type {number}
|
|
12422
12876
|
*/
|
|
12423
|
-
|
|
12424
|
-
return
|
|
12425
|
-
}
|
|
12426
|
-
/**
|
|
12427
|
-
* Generates a random number from a Gaussian (normal) distribution.
|
|
12428
|
-
*
|
|
12429
|
-
* Uses the Box-Muller transform with a cached spare value,
|
|
12430
|
-
* matching Python's `random.gauss()` output for the same seed.
|
|
12431
|
-
*
|
|
12432
|
-
* @param {number} [mu=0] The mean of the distribution.
|
|
12433
|
-
* @param {number} [sigma=1] The standard deviation of the distribution.
|
|
12434
|
-
* @returns {number} A normally distributed random value.
|
|
12435
|
-
*/
|
|
12436
|
-
gauss(mu = 0, sigma = 1) {
|
|
12437
|
-
let z = this._gauss_next;
|
|
12438
|
-
this._gauss_next = null;
|
|
12439
|
-
if (z === null) {
|
|
12440
|
-
const x2pi = this.random() * 2 * Math.PI, g2rad = Math.sqrt(-2 * Math.log(1 - this.random()));
|
|
12441
|
-
z = Math.cos(x2pi) * g2rad;
|
|
12442
|
-
this._gauss_next = Math.sin(x2pi) * g2rad;
|
|
12443
|
-
}
|
|
12444
|
-
return mu + z * sigma;
|
|
12445
|
-
}
|
|
12446
|
-
/**
|
|
12447
|
-
* Shuffles an array in-place using the Fisher-Yates algorithm.
|
|
12448
|
-
*
|
|
12449
|
-
* Uses rejection sampling via `getrandbits`-style bit masking to ensure
|
|
12450
|
-
* a uniform distribution, matching Python's `random.shuffle()`.
|
|
12451
|
-
*
|
|
12452
|
-
* @param {any[]} arr The array to shuffle in-place.
|
|
12453
|
-
*/
|
|
12454
|
-
shuffle(arr) {
|
|
12455
|
-
for (let i = arr.length - 1; i > 0; --i) {
|
|
12456
|
-
const k2 = 32 - Math.clz32(i + 1);
|
|
12457
|
-
let r = this._int32() >>> 32 - k2;
|
|
12458
|
-
while (r > i) r = this._int32() >>> 32 - k2;
|
|
12459
|
-
const t = arr[i];
|
|
12460
|
-
arr[i] = arr[r];
|
|
12461
|
-
arr[r] = t;
|
|
12462
|
-
}
|
|
12463
|
-
}
|
|
12464
|
-
/**
|
|
12465
|
-
* Selects a single element from a weighted population.
|
|
12466
|
-
*
|
|
12467
|
-
* Matches Python's `random.choices(population, weights=weights, k=1)[0]`
|
|
12468
|
-
*
|
|
12469
|
-
* @param {any[]} population The array of items to choose from.
|
|
12470
|
-
* @param {number[]} weights An array of non-negative weights, one per population element.
|
|
12471
|
-
* @returns {*} A single randomly selected element from the population.
|
|
12472
|
-
*/
|
|
12473
|
-
choices(population, weights) {
|
|
12474
|
-
return population[_weightedIndexWith(this._random_fn, weights)];
|
|
12475
|
-
}
|
|
12476
|
-
};
|
|
12477
|
-
function _weightedIndexWith(randomFn, weights) {
|
|
12478
|
-
let sum = 0;
|
|
12479
|
-
for (let i = 0; i < weights.length; ++i) sum += weights[i];
|
|
12480
|
-
let x = randomFn() * sum;
|
|
12481
|
-
for (let i = 0; i < weights.length; ++i) {
|
|
12482
|
-
x -= weights[i];
|
|
12483
|
-
if (x < 0) return i;
|
|
12484
|
-
}
|
|
12485
|
-
return weights.length - 1;
|
|
12486
|
-
}
|
|
12487
|
-
var _default = new Random();
|
|
12488
|
-
var random = Object.freeze({
|
|
12489
|
-
Random,
|
|
12490
|
-
seed: _default.seed.bind(_default),
|
|
12491
|
-
random: _default.random.bind(_default),
|
|
12492
|
-
gauss: _default.gauss.bind(_default),
|
|
12493
|
-
shuffle: _default.shuffle.bind(_default),
|
|
12494
|
-
choices: _default.choices.bind(_default)
|
|
12495
|
-
});
|
|
12496
|
-
var _weightedIndex = (weights) => _weightedIndexWith(random.random, weights);
|
|
12497
|
-
|
|
12498
|
-
// src/utils/tensor.js
|
|
12499
|
-
var Tensor2 = class _Tensor {
|
|
12500
|
-
/**
|
|
12501
|
-
* Dimensions of the tensor.
|
|
12502
|
-
* @type {number[]}
|
|
12503
|
-
*/
|
|
12504
|
-
get dims() {
|
|
12505
|
-
return this.ort_tensor.dims;
|
|
12506
|
-
}
|
|
12507
|
-
set dims(value) {
|
|
12508
|
-
this.ort_tensor.dims = value;
|
|
12509
|
-
}
|
|
12510
|
-
/**
|
|
12511
|
-
* Type of the tensor.
|
|
12512
|
-
* @type {DataType}
|
|
12513
|
-
*/
|
|
12514
|
-
get type() {
|
|
12515
|
-
return this.ort_tensor.type;
|
|
12516
|
-
}
|
|
12517
|
-
/**
|
|
12518
|
-
* The data stored in the tensor.
|
|
12519
|
-
* @type {DataArray}
|
|
12520
|
-
*/
|
|
12521
|
-
get data() {
|
|
12522
|
-
return this.ort_tensor.data;
|
|
12523
|
-
}
|
|
12524
|
-
/**
|
|
12525
|
-
* The number of elements in the tensor.
|
|
12526
|
-
* @type {number}
|
|
12527
|
-
*/
|
|
12528
|
-
get size() {
|
|
12529
|
-
return this.ort_tensor.size;
|
|
12877
|
+
get size() {
|
|
12878
|
+
return this.ort_tensor.size;
|
|
12530
12879
|
}
|
|
12531
12880
|
/**
|
|
12532
12881
|
* The location of the tensor data.
|
|
@@ -12905,9 +13254,23 @@ var Tensor2 = class _Tensor {
|
|
|
12905
13254
|
throw Error(`Unsupported norm: ${p}`);
|
|
12906
13255
|
}
|
|
12907
13256
|
const this_data = this.data;
|
|
12908
|
-
const
|
|
13257
|
+
const is_bigint = this_data instanceof BigInt64Array || this_data instanceof BigUint64Array;
|
|
13258
|
+
if (is_bigint && p !== 1) {
|
|
13259
|
+
throw Error(`Expected a floating point tensor as input. Got ${this.type}`);
|
|
13260
|
+
}
|
|
13261
|
+
let fn2, zero;
|
|
13262
|
+
if (is_bigint) {
|
|
13263
|
+
fn2 = (a, b) => a + b;
|
|
13264
|
+
zero = 0n;
|
|
13265
|
+
} else {
|
|
13266
|
+
fn2 = (a, b) => a + b ** p;
|
|
13267
|
+
zero = 0;
|
|
13268
|
+
}
|
|
12909
13269
|
if (dim === null) {
|
|
12910
|
-
|
|
13270
|
+
let val = this_data.reduce(fn2, zero);
|
|
13271
|
+
if (p !== 1) {
|
|
13272
|
+
val = val ** (1 / p);
|
|
13273
|
+
}
|
|
12911
13274
|
return new _Tensor(this.type, [val], []);
|
|
12912
13275
|
}
|
|
12913
13276
|
const [type, result, resultDims] = reduce_helper(fn2, this, dim, keepdim);
|
|
@@ -15367,9 +15730,11 @@ __export(processors_exports, {
|
|
|
15367
15730
|
ChatterboxProcessor: () => ChatterboxProcessor,
|
|
15368
15731
|
Florence2Processor: () => Florence2Processor,
|
|
15369
15732
|
Gemma3nProcessor: () => Gemma3nProcessor,
|
|
15733
|
+
GraniteSpeechProcessor: () => GraniteSpeechProcessor,
|
|
15370
15734
|
GroundingDinoProcessor: () => GroundingDinoProcessor,
|
|
15371
15735
|
Idefics3Processor: () => Idefics3Processor,
|
|
15372
15736
|
JinaCLIPProcessor: () => JinaCLIPProcessor,
|
|
15737
|
+
Lfm2VlProcessor: () => Lfm2VlProcessor,
|
|
15373
15738
|
LlavaProcessor: () => LlavaProcessor,
|
|
15374
15739
|
MgpstrProcessor: () => MgpstrProcessor,
|
|
15375
15740
|
MoonshineProcessor: () => MoonshineProcessor,
|
|
@@ -15390,6 +15755,7 @@ __export(processors_exports, {
|
|
|
15390
15755
|
UltravoxProcessor: () => UltravoxProcessor,
|
|
15391
15756
|
VLChatProcessor: () => VLChatProcessor,
|
|
15392
15757
|
VoxtralProcessor: () => VoxtralProcessor,
|
|
15758
|
+
VoxtralRealtimeProcessor: () => VoxtralRealtimeProcessor,
|
|
15393
15759
|
Wav2Vec2Processor: () => Wav2Vec2Processor,
|
|
15394
15760
|
Wav2Vec2ProcessorWithLM: () => Wav2Vec2ProcessorWithLM,
|
|
15395
15761
|
WhisperProcessor: () => WhisperProcessor
|
|
@@ -15444,19 +15810,21 @@ __export(feature_extractors_exports, {
|
|
|
15444
15810
|
EncodecFeatureExtractor: () => EncodecFeatureExtractor,
|
|
15445
15811
|
FeatureExtractor: () => FeatureExtractor,
|
|
15446
15812
|
Gemma3nAudioFeatureExtractor: () => Gemma3nAudioFeatureExtractor,
|
|
15813
|
+
GraniteSpeechFeatureExtractor: () => GraniteSpeechFeatureExtractor,
|
|
15447
15814
|
MoonshineFeatureExtractor: () => MoonshineFeatureExtractor,
|
|
15448
15815
|
ParakeetFeatureExtractor: () => ParakeetFeatureExtractor,
|
|
15449
15816
|
PyAnnoteFeatureExtractor: () => PyAnnoteFeatureExtractor,
|
|
15450
15817
|
SeamlessM4TFeatureExtractor: () => SeamlessM4TFeatureExtractor,
|
|
15451
15818
|
SnacFeatureExtractor: () => SnacFeatureExtractor,
|
|
15452
15819
|
SpeechT5FeatureExtractor: () => SpeechT5FeatureExtractor,
|
|
15820
|
+
VoxtralRealtimeFeatureExtractor: () => VoxtralRealtimeFeatureExtractor,
|
|
15453
15821
|
Wav2Vec2FeatureExtractor: () => Wav2Vec2FeatureExtractor,
|
|
15454
15822
|
WeSpeakerFeatureExtractor: () => WeSpeakerFeatureExtractor,
|
|
15455
15823
|
WhisperFeatureExtractor: () => WhisperFeatureExtractor
|
|
15456
15824
|
});
|
|
15457
15825
|
|
|
15458
15826
|
// src/utils/io.js
|
|
15459
|
-
import
|
|
15827
|
+
import fs5 from "fs";
|
|
15460
15828
|
import { Readable } from "stream";
|
|
15461
15829
|
import { pipeline as pipe } from "stream/promises";
|
|
15462
15830
|
async function saveBlob(path3, blob) {
|
|
@@ -15474,7 +15842,7 @@ async function saveBlob(path3, blob) {
|
|
|
15474
15842
|
} else if (apis.IS_FS_AVAILABLE) {
|
|
15475
15843
|
const webStream = blob.stream();
|
|
15476
15844
|
const nodeStream = Readable.fromWeb(webStream);
|
|
15477
|
-
const fileStream =
|
|
15845
|
+
const fileStream = fs5.createWriteStream(path3);
|
|
15478
15846
|
await pipe(nodeStream, fileStream);
|
|
15479
15847
|
} else {
|
|
15480
15848
|
throw new Error("Unable to save because filesystem is disabled in this environment.");
|
|
@@ -15677,6 +16045,7 @@ async function spectrogram(waveform, window2, frame_length, hop_length, {
|
|
|
15677
16045
|
mel_filters = null,
|
|
15678
16046
|
mel_floor = 1e-10,
|
|
15679
16047
|
log_mel = null,
|
|
16048
|
+
max_log_mel = null,
|
|
15680
16049
|
reference = 1,
|
|
15681
16050
|
min_value = 1e-10,
|
|
15682
16051
|
db_range = null,
|
|
@@ -15816,6 +16185,17 @@ async function spectrogram(waveform, window2, frame_length, hop_length, {
|
|
|
15816
16185
|
mel_spec_data[i] = Math.log10(mel_spec_data[i]);
|
|
15817
16186
|
}
|
|
15818
16187
|
break;
|
|
16188
|
+
case "log10_max_norm": {
|
|
16189
|
+
for (let i = 0; i < o; ++i) {
|
|
16190
|
+
mel_spec_data[i] = Math.log10(mel_spec_data[i]);
|
|
16191
|
+
}
|
|
16192
|
+
const logMax = max_log_mel ?? max(mel_spec_data)[0];
|
|
16193
|
+
const threshold = logMax - 8;
|
|
16194
|
+
for (let i = 0; i < o; ++i) {
|
|
16195
|
+
mel_spec_data[i] = (Math.max(mel_spec_data[i], threshold) + 4) / 4;
|
|
16196
|
+
}
|
|
16197
|
+
break;
|
|
16198
|
+
}
|
|
15819
16199
|
case "dB":
|
|
15820
16200
|
if (power === 1) {
|
|
15821
16201
|
amplitude_to_db(mel_spec_data, reference, min_value, db_range);
|
|
@@ -15826,7 +16206,9 @@ async function spectrogram(waveform, window2, frame_length, hop_length, {
|
|
|
15826
16206
|
}
|
|
15827
16207
|
break;
|
|
15828
16208
|
default:
|
|
15829
|
-
throw new Error(
|
|
16209
|
+
throw new Error(
|
|
16210
|
+
`log_mel must be one of null, 'log', 'log10', 'log10_max_norm', or 'dB'. Got '${log_mel}'`
|
|
16211
|
+
);
|
|
15830
16212
|
}
|
|
15831
16213
|
}
|
|
15832
16214
|
return mel_spec;
|
|
@@ -16331,6 +16713,56 @@ var Gemma3nAudioFeatureExtractor = class extends FeatureExtractor {
|
|
|
16331
16713
|
}
|
|
16332
16714
|
};
|
|
16333
16715
|
|
|
16716
|
+
// src/models/granite_speech/feature_extraction_granite_speech.js
|
|
16717
|
+
var GraniteSpeechFeatureExtractor = class extends FeatureExtractor {
|
|
16718
|
+
constructor(config) {
|
|
16719
|
+
super(config);
|
|
16720
|
+
const { n_fft, win_length, n_mels, sample_rate } = config.melspec_kwargs;
|
|
16721
|
+
this.mel_filters = mel_filter_bank(
|
|
16722
|
+
Math.floor(1 + n_fft / 2),
|
|
16723
|
+
// num_frequency_bins = 257
|
|
16724
|
+
n_mels,
|
|
16725
|
+
// 80
|
|
16726
|
+
0,
|
|
16727
|
+
// min_frequency
|
|
16728
|
+
sample_rate / 2,
|
|
16729
|
+
// max_frequency = 8000
|
|
16730
|
+
sample_rate,
|
|
16731
|
+
// 16000
|
|
16732
|
+
null,
|
|
16733
|
+
// norm (torchaudio default: no norm)
|
|
16734
|
+
"htk"
|
|
16735
|
+
// mel_scale (torchaudio default)
|
|
16736
|
+
);
|
|
16737
|
+
const raw_window = window_function(win_length, "hann");
|
|
16738
|
+
this.window = new Float64Array(n_fft);
|
|
16739
|
+
const pad = Math.floor((n_fft - win_length) / 2);
|
|
16740
|
+
this.window.set(raw_window, pad);
|
|
16741
|
+
}
|
|
16742
|
+
/**
|
|
16743
|
+
* Extract mel spectrogram features from audio, matching the Python GraniteSpeechFeatureExtractor.
|
|
16744
|
+
* @param {Float32Array|Float64Array} audio The audio waveform.
|
|
16745
|
+
* @returns {Promise<{input_features: Tensor}>}
|
|
16746
|
+
*/
|
|
16747
|
+
async _call(audio) {
|
|
16748
|
+
validate_audio_inputs(audio, "GraniteSpeechFeatureExtractor");
|
|
16749
|
+
const { n_fft, hop_length, n_mels } = this.config.melspec_kwargs;
|
|
16750
|
+
const num_frames = 1 + Math.floor((audio.length - 1) / hop_length);
|
|
16751
|
+
const max_num_frames = num_frames - num_frames % 2;
|
|
16752
|
+
const mel = await spectrogram(audio, this.window, n_fft, hop_length, {
|
|
16753
|
+
power: 2,
|
|
16754
|
+
mel_filters: this.mel_filters,
|
|
16755
|
+
log_mel: "log10_max_norm",
|
|
16756
|
+
transpose: true,
|
|
16757
|
+
// [time, n_mels]
|
|
16758
|
+
max_num_frames,
|
|
16759
|
+
do_pad: false
|
|
16760
|
+
});
|
|
16761
|
+
const input_features = mel.view(-1, 2 * n_mels).unsqueeze_(0);
|
|
16762
|
+
return { input_features };
|
|
16763
|
+
}
|
|
16764
|
+
};
|
|
16765
|
+
|
|
16334
16766
|
// src/models/moonshine/feature_extraction_moonshine.js
|
|
16335
16767
|
var MoonshineFeatureExtractor = class extends FeatureExtractor {
|
|
16336
16768
|
/**
|
|
@@ -16811,6 +17243,71 @@ var WeSpeakerFeatureExtractor = class extends FeatureExtractor {
|
|
|
16811
17243
|
}
|
|
16812
17244
|
};
|
|
16813
17245
|
|
|
17246
|
+
// src/models/voxtral_realtime/feature_extraction_voxtral_realtime.js
|
|
17247
|
+
var VoxtralRealtimeFeatureExtractor = class extends FeatureExtractor {
|
|
17248
|
+
constructor(config) {
|
|
17249
|
+
super(config);
|
|
17250
|
+
this.config.mel_filters ??= mel_filter_bank(
|
|
17251
|
+
Math.floor(1 + this.config.n_fft / 2),
|
|
17252
|
+
// num_frequency_bins
|
|
17253
|
+
this.config.feature_size,
|
|
17254
|
+
// num_mel_filters
|
|
17255
|
+
0,
|
|
17256
|
+
// min_frequency
|
|
17257
|
+
8e3,
|
|
17258
|
+
// max_frequency
|
|
17259
|
+
this.config.sampling_rate,
|
|
17260
|
+
// sampling_rate
|
|
17261
|
+
"slaney",
|
|
17262
|
+
// norm
|
|
17263
|
+
"slaney"
|
|
17264
|
+
// mel_scale
|
|
17265
|
+
);
|
|
17266
|
+
this.window = window_function(this.config.n_fft, "hann");
|
|
17267
|
+
}
|
|
17268
|
+
/**
|
|
17269
|
+
* Computes the log-Mel spectrogram of the provided audio waveform.
|
|
17270
|
+
* @param {Float32Array|Float64Array} waveform The audio waveform to process.
|
|
17271
|
+
* @param {Object} [options]
|
|
17272
|
+
* @param {boolean} [options.center=true] Whether to center-pad the waveform for STFT.
|
|
17273
|
+
* @returns {Promise<import('../../utils/tensor.js').Tensor>} The log-Mel spectrogram tensor of shape [num_mel_bins, num_frames].
|
|
17274
|
+
*/
|
|
17275
|
+
async _extract_fbank_features(waveform, { center = true } = {}) {
|
|
17276
|
+
const { n_fft, hop_length, mel_filters, global_log_mel_max } = this.config;
|
|
17277
|
+
const max_num_frames = center ? Math.floor(waveform.length / hop_length) : Math.floor((waveform.length - n_fft) / hop_length);
|
|
17278
|
+
return await spectrogram(
|
|
17279
|
+
waveform,
|
|
17280
|
+
this.window,
|
|
17281
|
+
n_fft,
|
|
17282
|
+
// frame_length
|
|
17283
|
+
hop_length,
|
|
17284
|
+
{
|
|
17285
|
+
power: 2,
|
|
17286
|
+
mel_filters,
|
|
17287
|
+
log_mel: "log10_max_norm",
|
|
17288
|
+
max_log_mel: global_log_mel_max,
|
|
17289
|
+
center,
|
|
17290
|
+
max_num_frames,
|
|
17291
|
+
do_pad: false
|
|
17292
|
+
}
|
|
17293
|
+
);
|
|
17294
|
+
}
|
|
17295
|
+
/**
|
|
17296
|
+
* Extract mel spectrogram features from audio.
|
|
17297
|
+
* @param {Float32Array|Float64Array} audio The audio data.
|
|
17298
|
+
* @param {Object} [options]
|
|
17299
|
+
* @param {boolean} [options.center=true] Whether to center-pad the waveform.
|
|
17300
|
+
* @returns {Promise<{ input_features: import('../../utils/tensor.js').Tensor }>}
|
|
17301
|
+
*/
|
|
17302
|
+
async _call(audio, { center = true } = {}) {
|
|
17303
|
+
validate_audio_inputs(audio, "VoxtralRealtimeFeatureExtractor");
|
|
17304
|
+
const features = await this._extract_fbank_features(audio, { center });
|
|
17305
|
+
return {
|
|
17306
|
+
input_features: features.unsqueeze_(0)
|
|
17307
|
+
};
|
|
17308
|
+
}
|
|
17309
|
+
};
|
|
17310
|
+
|
|
16814
17311
|
// src/models/whisper/feature_extraction_whisper.js
|
|
16815
17312
|
var WhisperFeatureExtractor = class extends FeatureExtractor {
|
|
16816
17313
|
constructor(config) {
|
|
@@ -16839,7 +17336,7 @@ var WhisperFeatureExtractor = class extends FeatureExtractor {
|
|
|
16839
17336
|
* @returns {Promise<Tensor>} An object containing the log-Mel spectrogram data as a Float32Array and its dimensions as an array of numbers.
|
|
16840
17337
|
*/
|
|
16841
17338
|
async _extract_fbank_features(waveform) {
|
|
16842
|
-
|
|
17339
|
+
return await spectrogram(
|
|
16843
17340
|
waveform,
|
|
16844
17341
|
this.window,
|
|
16845
17342
|
// window
|
|
@@ -16850,7 +17347,7 @@ var WhisperFeatureExtractor = class extends FeatureExtractor {
|
|
|
16850
17347
|
{
|
|
16851
17348
|
power: 2,
|
|
16852
17349
|
mel_filters: this.config.mel_filters,
|
|
16853
|
-
log_mel: "
|
|
17350
|
+
log_mel: "log10_max_norm",
|
|
16854
17351
|
// Custom
|
|
16855
17352
|
max_num_frames: Math.min(
|
|
16856
17353
|
Math.floor(waveform.length / this.config.hop_length),
|
|
@@ -16859,15 +17356,6 @@ var WhisperFeatureExtractor = class extends FeatureExtractor {
|
|
|
16859
17356
|
)
|
|
16860
17357
|
}
|
|
16861
17358
|
);
|
|
16862
|
-
const data = features.data;
|
|
16863
|
-
const maxValue = max(
|
|
16864
|
-
/** @type {Float32Array} */
|
|
16865
|
-
data
|
|
16866
|
-
)[0];
|
|
16867
|
-
for (let i = 0; i < data.length; ++i) {
|
|
16868
|
-
data[i] = (Math.max(data[i], maxValue - 8) + 4) / 4;
|
|
16869
|
-
}
|
|
16870
|
-
return features;
|
|
16871
17359
|
}
|
|
16872
17360
|
/**
|
|
16873
17361
|
* Asynchronously extracts features from a given audio using the provided configuration.
|
|
@@ -16931,11 +17419,10 @@ import sharp from "sharp";
|
|
|
16931
17419
|
var createCanvasFunction;
|
|
16932
17420
|
var ImageDataClass;
|
|
16933
17421
|
var loadImageFunction;
|
|
16934
|
-
|
|
16935
|
-
if (IS_BROWSER_OR_WEBWORKER) {
|
|
17422
|
+
if (apis.IS_WEB_ENV) {
|
|
16936
17423
|
createCanvasFunction = (width, height) => {
|
|
16937
17424
|
if (!self.OffscreenCanvas) {
|
|
16938
|
-
throw new Error("OffscreenCanvas not supported by this
|
|
17425
|
+
throw new Error("OffscreenCanvas not supported by this environment.");
|
|
16939
17426
|
}
|
|
16940
17427
|
return new self.OffscreenCanvas(width, height);
|
|
16941
17428
|
};
|
|
@@ -17025,7 +17512,7 @@ var RawImage = class _RawImage {
|
|
|
17025
17512
|
* @returns {RawImage} The image object.
|
|
17026
17513
|
*/
|
|
17027
17514
|
static fromCanvas(canvas) {
|
|
17028
|
-
if (!
|
|
17515
|
+
if (!apis.IS_WEB_ENV) {
|
|
17029
17516
|
throw new Error("fromCanvas() is only supported in browser environments.");
|
|
17030
17517
|
}
|
|
17031
17518
|
const ctx = (
|
|
@@ -17054,7 +17541,7 @@ var RawImage = class _RawImage {
|
|
|
17054
17541
|
* @returns {Promise<RawImage>} The image object.
|
|
17055
17542
|
*/
|
|
17056
17543
|
static async fromBlob(blob) {
|
|
17057
|
-
if (
|
|
17544
|
+
if (apis.IS_WEB_ENV) {
|
|
17058
17545
|
const img = await loadImageFunction(blob);
|
|
17059
17546
|
const ctx = createCanvasFunction(img.width, img.height).getContext("2d");
|
|
17060
17547
|
ctx.drawImage(img, 0, 0);
|
|
@@ -17235,7 +17722,7 @@ var RawImage = class _RawImage {
|
|
|
17235
17722
|
} else if (nullish_height) {
|
|
17236
17723
|
height = width / this.width * this.height;
|
|
17237
17724
|
}
|
|
17238
|
-
if (
|
|
17725
|
+
if (apis.IS_WEB_ENV) {
|
|
17239
17726
|
const numChannels = this.channels;
|
|
17240
17727
|
const canvas = this.toCanvas();
|
|
17241
17728
|
const ctx = createCanvasFunction(width, height).getContext("2d");
|
|
@@ -17283,7 +17770,7 @@ var RawImage = class _RawImage {
|
|
|
17283
17770
|
if (left === 0 && right === 0 && top === 0 && bottom === 0) {
|
|
17284
17771
|
return this;
|
|
17285
17772
|
}
|
|
17286
|
-
if (
|
|
17773
|
+
if (apis.IS_WEB_ENV) {
|
|
17287
17774
|
const numChannels = this.channels;
|
|
17288
17775
|
const canvas = this.toCanvas();
|
|
17289
17776
|
const newWidth = this.width + left + right;
|
|
@@ -17307,7 +17794,7 @@ var RawImage = class _RawImage {
|
|
|
17307
17794
|
}
|
|
17308
17795
|
const crop_width = x_max - x_min + 1;
|
|
17309
17796
|
const crop_height = y_max - y_min + 1;
|
|
17310
|
-
if (
|
|
17797
|
+
if (apis.IS_WEB_ENV) {
|
|
17311
17798
|
const numChannels = this.channels;
|
|
17312
17799
|
const canvas = this.toCanvas();
|
|
17313
17800
|
const ctx = createCanvasFunction(crop_width, crop_height).getContext("2d");
|
|
@@ -17335,7 +17822,7 @@ var RawImage = class _RawImage {
|
|
|
17335
17822
|
}
|
|
17336
17823
|
const width_offset = (this.width - crop_width) / 2;
|
|
17337
17824
|
const height_offset = (this.height - crop_height) / 2;
|
|
17338
|
-
if (
|
|
17825
|
+
if (apis.IS_WEB_ENV) {
|
|
17339
17826
|
const numChannels = this.channels;
|
|
17340
17827
|
const canvas = this.toCanvas();
|
|
17341
17828
|
const ctx = createCanvasFunction(crop_width, crop_height).getContext("2d");
|
|
@@ -17413,7 +17900,7 @@ var RawImage = class _RawImage {
|
|
|
17413
17900
|
}
|
|
17414
17901
|
}
|
|
17415
17902
|
async toBlob(type = "image/png", quality = 1) {
|
|
17416
|
-
if (!
|
|
17903
|
+
if (!apis.IS_WEB_ENV) {
|
|
17417
17904
|
throw new Error("toBlob() is only supported in browser environments.");
|
|
17418
17905
|
}
|
|
17419
17906
|
const canvas = this.toCanvas();
|
|
@@ -17430,7 +17917,7 @@ var RawImage = class _RawImage {
|
|
|
17430
17917
|
return tensor;
|
|
17431
17918
|
}
|
|
17432
17919
|
toCanvas() {
|
|
17433
|
-
if (!
|
|
17920
|
+
if (!apis.IS_WEB_ENV) {
|
|
17434
17921
|
throw new Error("toCanvas() is only supported in browser environments.");
|
|
17435
17922
|
}
|
|
17436
17923
|
const cloned = this.clone().rgba();
|
|
@@ -17514,7 +18001,7 @@ var RawImage = class _RawImage {
|
|
|
17514
18001
|
* @returns {Promise<void>}
|
|
17515
18002
|
*/
|
|
17516
18003
|
async save(path3) {
|
|
17517
|
-
if (
|
|
18004
|
+
if (apis.IS_WEB_ENV) {
|
|
17518
18005
|
if (apis.IS_WEBWORKER_ENV) {
|
|
17519
18006
|
throw new Error("Unable to save an image from a Web Worker.");
|
|
17520
18007
|
}
|
|
@@ -17534,7 +18021,7 @@ var RawImage = class _RawImage {
|
|
|
17534
18021
|
* @returns {import('sharp').Sharp} The Sharp instance.
|
|
17535
18022
|
*/
|
|
17536
18023
|
toSharp() {
|
|
17537
|
-
if (
|
|
18024
|
+
if (apis.IS_WEB_ENV) {
|
|
17538
18025
|
throw new Error("toSharp() is only supported in server-side environments.");
|
|
17539
18026
|
}
|
|
17540
18027
|
return sharp(this.data, {
|
|
@@ -17747,6 +18234,27 @@ function compute_segments(mask_probs, pred_scores, pred_labels, mask_threshold,
|
|
|
17747
18234
|
}
|
|
17748
18235
|
return [segmentation, segments];
|
|
17749
18236
|
}
|
|
18237
|
+
function smart_resize(height, width, factor = 28, min_pixels = 56 * 56, max_pixels = 14 * 14 * 4 * 1280) {
|
|
18238
|
+
if (height < factor || width < factor) {
|
|
18239
|
+
throw new Error(`height:${height} or width:${width} must be larger than factor:${factor}`);
|
|
18240
|
+
} else if (Math.max(height, width) / Math.min(height, width) > 200) {
|
|
18241
|
+
throw new Error(
|
|
18242
|
+
`absolute aspect ratio must be smaller than 200, got ${Math.max(height, width) / Math.min(height, width)}`
|
|
18243
|
+
);
|
|
18244
|
+
}
|
|
18245
|
+
let h_bar = Math.round(height / factor) * factor;
|
|
18246
|
+
let w_bar = Math.round(width / factor) * factor;
|
|
18247
|
+
if (h_bar * w_bar > max_pixels) {
|
|
18248
|
+
const beta = Math.sqrt(height * width / max_pixels);
|
|
18249
|
+
h_bar = Math.floor(height / beta / factor) * factor;
|
|
18250
|
+
w_bar = Math.floor(width / beta / factor) * factor;
|
|
18251
|
+
} else if (h_bar * w_bar < min_pixels) {
|
|
18252
|
+
const beta = Math.sqrt(min_pixels / (height * width));
|
|
18253
|
+
h_bar = Math.ceil(height * beta / factor) * factor;
|
|
18254
|
+
w_bar = Math.ceil(width * beta / factor) * factor;
|
|
18255
|
+
}
|
|
18256
|
+
return [h_bar, w_bar];
|
|
18257
|
+
}
|
|
17750
18258
|
function post_process_panoptic_segmentation(outputs, threshold = 0.5, mask_threshold = 0.5, overlap_mask_area_threshold = 0.8, label_ids_to_fuse = null, target_sizes = null) {
|
|
17751
18259
|
if (label_ids_to_fuse === null) {
|
|
17752
18260
|
logger.warn("`label_ids_to_fuse` unset. No instance will be fused.");
|
|
@@ -18035,7 +18543,7 @@ var ImageProcessor = class extends Callable2 {
|
|
|
18035
18543
|
});
|
|
18036
18544
|
}
|
|
18037
18545
|
/**
|
|
18038
|
-
* @typedef {
|
|
18546
|
+
* @typedef {Object} PreprocessedImage
|
|
18039
18547
|
* @property {HeightWidth} original_size The original size of the image.
|
|
18040
18548
|
* @property {HeightWidth} reshaped_input_size The reshaped input size of the image.
|
|
18041
18549
|
* @property {Tensor} pixel_values The pixel values of the preprocessed image.
|
|
@@ -18213,6 +18721,7 @@ __export(image_processors_exports, {
|
|
|
18213
18721
|
ImageFeatureExtractor: () => ImageProcessor,
|
|
18214
18722
|
ImageProcessor: () => ImageProcessor,
|
|
18215
18723
|
JinaCLIPImageProcessor: () => JinaCLIPImageProcessor,
|
|
18724
|
+
Lfm2VlImageProcessor: () => Lfm2VlImageProcessor,
|
|
18216
18725
|
LlavaOnevisionImageProcessor: () => LlavaOnevisionImageProcessor,
|
|
18217
18726
|
Mask2FormerImageProcessor: () => Mask2FormerImageProcessor,
|
|
18218
18727
|
MaskFormerFeatureExtractor: () => MaskFormerFeatureExtractor,
|
|
@@ -18616,6 +19125,237 @@ var JinaCLIPImageProcessor = class extends ImageProcessor {
|
|
|
18616
19125
|
}
|
|
18617
19126
|
};
|
|
18618
19127
|
|
|
19128
|
+
// src/models/lfm2_vl/image_processing_lfm2_vl.js
|
|
19129
|
+
function round_by_factor(number, factor) {
|
|
19130
|
+
return Math.round(number / factor) * factor;
|
|
19131
|
+
}
|
|
19132
|
+
function find_closest_aspect_ratio(aspect_ratio, target_ratios, width, height, image_size) {
|
|
19133
|
+
let best_ratio_diff = Infinity;
|
|
19134
|
+
let best_ratio = [1, 1];
|
|
19135
|
+
const area = width * height;
|
|
19136
|
+
for (const ratio of target_ratios) {
|
|
19137
|
+
const ratio_diff = Math.abs(aspect_ratio - ratio[0] / ratio[1]);
|
|
19138
|
+
if (ratio_diff < best_ratio_diff) {
|
|
19139
|
+
best_ratio_diff = ratio_diff;
|
|
19140
|
+
best_ratio = ratio;
|
|
19141
|
+
} else if (ratio_diff === best_ratio_diff && area > 0.5 * image_size * image_size * ratio[0] * ratio[1]) {
|
|
19142
|
+
best_ratio = ratio;
|
|
19143
|
+
}
|
|
19144
|
+
}
|
|
19145
|
+
return best_ratio;
|
|
19146
|
+
}
|
|
19147
|
+
function get_target_ratios(min_tiles, max_tiles) {
|
|
19148
|
+
const ratios = [];
|
|
19149
|
+
const seen = /* @__PURE__ */ new Set();
|
|
19150
|
+
for (let n = min_tiles; n <= max_tiles; ++n) {
|
|
19151
|
+
for (let w = 1; w <= n; ++w) {
|
|
19152
|
+
for (let h = 1; h <= n; ++h) {
|
|
19153
|
+
const product2 = w * h;
|
|
19154
|
+
if (product2 >= min_tiles && product2 <= max_tiles) {
|
|
19155
|
+
const key = w << 16 | h;
|
|
19156
|
+
if (!seen.has(key)) {
|
|
19157
|
+
seen.add(key);
|
|
19158
|
+
ratios.push([w, h]);
|
|
19159
|
+
}
|
|
19160
|
+
}
|
|
19161
|
+
}
|
|
19162
|
+
}
|
|
19163
|
+
}
|
|
19164
|
+
return ratios.sort((a, b) => a[0] * a[1] - b[0] * b[1]);
|
|
19165
|
+
}
|
|
19166
|
+
function convert_image_to_patches(images, patch_size) {
|
|
19167
|
+
const [B, C, H, W] = images.dims;
|
|
19168
|
+
const ph = Math.floor(H / patch_size), pw = Math.floor(W / patch_size);
|
|
19169
|
+
const patch_dim = patch_size * patch_size * C;
|
|
19170
|
+
const data = (
|
|
19171
|
+
/** @type {Float32Array} */
|
|
19172
|
+
images.data
|
|
19173
|
+
);
|
|
19174
|
+
const result = new Float32Array(B * ph * pw * patch_dim);
|
|
19175
|
+
const ch_stride = H * W;
|
|
19176
|
+
for (let b = 0; b < B; ++b) {
|
|
19177
|
+
const b_src = b * C * ch_stride;
|
|
19178
|
+
const b_dst = b * ph * pw * patch_dim;
|
|
19179
|
+
for (let py = 0; py < ph; ++py) {
|
|
19180
|
+
for (let px = 0; px < pw; ++px) {
|
|
19181
|
+
let off = b_dst + (py * pw + px) * patch_dim;
|
|
19182
|
+
for (let dy = 0; dy < patch_size; ++dy) {
|
|
19183
|
+
const row = (py * patch_size + dy) * W + px * patch_size;
|
|
19184
|
+
for (let dx = 0; dx < patch_size; ++dx) {
|
|
19185
|
+
const pixel = row + dx;
|
|
19186
|
+
for (let c = 0; c < C; ++c) {
|
|
19187
|
+
result[off++] = data[b_src + c * ch_stride + pixel];
|
|
19188
|
+
}
|
|
19189
|
+
}
|
|
19190
|
+
}
|
|
19191
|
+
}
|
|
19192
|
+
}
|
|
19193
|
+
}
|
|
19194
|
+
return new Tensor2("float32", result, [B, ph * pw, patch_dim]);
|
|
19195
|
+
}
|
|
19196
|
+
function pad_along_first_dim(patches, target_length) {
|
|
19197
|
+
const [, len2, dim] = patches.dims;
|
|
19198
|
+
const mask_data = new BigInt64Array(target_length);
|
|
19199
|
+
mask_data.fill(1n, 0, len2);
|
|
19200
|
+
let padded = patches;
|
|
19201
|
+
if (len2 < target_length) {
|
|
19202
|
+
const padded_data = new Float32Array(target_length * dim);
|
|
19203
|
+
padded_data.set(
|
|
19204
|
+
/** @type {Float32Array} */
|
|
19205
|
+
patches.data
|
|
19206
|
+
);
|
|
19207
|
+
padded = new Tensor2("float32", padded_data, [1, target_length, dim]);
|
|
19208
|
+
}
|
|
19209
|
+
return { padded, mask: new Tensor2("int64", mask_data, [target_length]) };
|
|
19210
|
+
}
|
|
19211
|
+
var Lfm2VlImageProcessor = class extends ImageProcessor {
|
|
19212
|
+
constructor(config) {
|
|
19213
|
+
super(config);
|
|
19214
|
+
this.downsample_factor = config.downsample_factor ?? 2;
|
|
19215
|
+
this.do_image_splitting = config.do_image_splitting ?? true;
|
|
19216
|
+
this.min_tiles = config.min_tiles ?? 2;
|
|
19217
|
+
this.max_tiles = config.max_tiles ?? 10;
|
|
19218
|
+
this.use_thumbnail = config.use_thumbnail ?? true;
|
|
19219
|
+
this.min_image_tokens = config.min_image_tokens ?? 64;
|
|
19220
|
+
this.max_image_tokens = config.max_image_tokens ?? 256;
|
|
19221
|
+
this.encoder_patch_size = config.encoder_patch_size ?? config.patch_size ?? 16;
|
|
19222
|
+
this.tile_size = config.tile_size ?? 512;
|
|
19223
|
+
this.max_pixels_tolerance = config.max_pixels_tolerance ?? 2;
|
|
19224
|
+
this.return_row_col_info = config.return_row_col_info ?? false;
|
|
19225
|
+
const max_thumbnail_patches = this.max_image_tokens * this.downsample_factor ** 2;
|
|
19226
|
+
const tile_size_patches = this.do_image_splitting ? (this.tile_size / this.encoder_patch_size) ** 2 : 0;
|
|
19227
|
+
this.max_num_patches = Math.max(max_thumbnail_patches, tile_size_patches);
|
|
19228
|
+
}
|
|
19229
|
+
/**
|
|
19230
|
+
* Check if the image is too large to be processed as a single tile.
|
|
19231
|
+
* @param {number} height
|
|
19232
|
+
* @param {number} width
|
|
19233
|
+
* @returns {boolean}
|
|
19234
|
+
*/
|
|
19235
|
+
_is_image_too_large(height, width) {
|
|
19236
|
+
const total_factor = this.encoder_patch_size * this.downsample_factor;
|
|
19237
|
+
const h_bar = Math.max(this.encoder_patch_size, round_by_factor(height, total_factor));
|
|
19238
|
+
const w_bar = Math.max(this.encoder_patch_size, round_by_factor(width, total_factor));
|
|
19239
|
+
return h_bar * w_bar > this.max_image_tokens * (this.encoder_patch_size * this.downsample_factor) ** 2 * this.max_pixels_tolerance;
|
|
19240
|
+
}
|
|
19241
|
+
/**
|
|
19242
|
+
* Get the grid layout for tiling a large image.
|
|
19243
|
+
* @param {number} height
|
|
19244
|
+
* @param {number} width
|
|
19245
|
+
* @returns {{ grid_width: number, grid_height: number, target_width: number, target_height: number }}
|
|
19246
|
+
*/
|
|
19247
|
+
_get_grid_layout(height, width) {
|
|
19248
|
+
const target_ratios = get_target_ratios(this.min_tiles, this.max_tiles);
|
|
19249
|
+
const [grid_width, grid_height] = find_closest_aspect_ratio(
|
|
19250
|
+
width / height,
|
|
19251
|
+
target_ratios,
|
|
19252
|
+
width,
|
|
19253
|
+
height,
|
|
19254
|
+
this.tile_size
|
|
19255
|
+
);
|
|
19256
|
+
return {
|
|
19257
|
+
grid_width,
|
|
19258
|
+
grid_height,
|
|
19259
|
+
target_width: this.tile_size * grid_width,
|
|
19260
|
+
target_height: this.tile_size * grid_height
|
|
19261
|
+
};
|
|
19262
|
+
}
|
|
19263
|
+
/** @param {RawImage|RawImage[]|RawImage[][]} images */
|
|
19264
|
+
// @ts-expect-error
|
|
19265
|
+
async _call(images, { return_row_col_info = null } = {}) {
|
|
19266
|
+
let batched_images;
|
|
19267
|
+
if (!Array.isArray(images)) {
|
|
19268
|
+
batched_images = [[images]];
|
|
19269
|
+
} else if (!Array.isArray(images[0])) {
|
|
19270
|
+
batched_images = [
|
|
19271
|
+
/** @type {RawImage[]} */
|
|
19272
|
+
images
|
|
19273
|
+
];
|
|
19274
|
+
} else {
|
|
19275
|
+
batched_images = /** @type {RawImage[][]} */
|
|
19276
|
+
images;
|
|
19277
|
+
}
|
|
19278
|
+
const all_pixel_values = [];
|
|
19279
|
+
const all_pixel_masks = [];
|
|
19280
|
+
const all_spatial_shapes = [];
|
|
19281
|
+
const all_rows = [];
|
|
19282
|
+
const all_cols = [];
|
|
19283
|
+
const all_image_sizes = [];
|
|
19284
|
+
for (const image_batch of batched_images) {
|
|
19285
|
+
const preprocessed = await Promise.all(image_batch.map((x) => this.preprocess(x, { do_pad: false })));
|
|
19286
|
+
for (const { pixel_values } of preprocessed) {
|
|
19287
|
+
const [, height, width] = pixel_values.dims;
|
|
19288
|
+
const img = pixel_values.unsqueeze_(0);
|
|
19289
|
+
const total_factor = this.encoder_patch_size * this.downsample_factor;
|
|
19290
|
+
const f2 = total_factor ** 2;
|
|
19291
|
+
const [new_height, new_width] = smart_resize(
|
|
19292
|
+
Math.max(total_factor, height),
|
|
19293
|
+
Math.max(total_factor, width),
|
|
19294
|
+
total_factor,
|
|
19295
|
+
this.min_image_tokens * f2,
|
|
19296
|
+
this.max_image_tokens * f2
|
|
19297
|
+
).map((x) => Math.max(total_factor, x));
|
|
19298
|
+
let tiles;
|
|
19299
|
+
let num_rows = 1, num_cols = 1;
|
|
19300
|
+
const is_large = this._is_image_too_large(height, width);
|
|
19301
|
+
const do_splitting = this.do_image_splitting && !(this.min_tiles === 1 && this.max_tiles === 1);
|
|
19302
|
+
if (is_large && do_splitting) {
|
|
19303
|
+
const { grid_width, grid_height, target_width, target_height } = this._get_grid_layout(
|
|
19304
|
+
height,
|
|
19305
|
+
width
|
|
19306
|
+
);
|
|
19307
|
+
num_rows = grid_height;
|
|
19308
|
+
num_cols = grid_width;
|
|
19309
|
+
const resized = await interpolate_4d(img, {
|
|
19310
|
+
size: [target_height, target_width]
|
|
19311
|
+
});
|
|
19312
|
+
tiles = [];
|
|
19313
|
+
for (let r = 0; r < grid_height; ++r) {
|
|
19314
|
+
for (let c = 0; c < grid_width; ++c) {
|
|
19315
|
+
const y = r * this.tile_size;
|
|
19316
|
+
const x = c * this.tile_size;
|
|
19317
|
+
tiles.push(resized.slice(null, null, [y, y + this.tile_size], [x, x + this.tile_size]));
|
|
19318
|
+
}
|
|
19319
|
+
}
|
|
19320
|
+
if (this.use_thumbnail && grid_width * grid_height !== 1) {
|
|
19321
|
+
tiles.push(await interpolate_4d(img, { size: [new_height, new_width] }));
|
|
19322
|
+
}
|
|
19323
|
+
} else {
|
|
19324
|
+
tiles = [await interpolate_4d(img, { size: [new_height, new_width] })];
|
|
19325
|
+
}
|
|
19326
|
+
for (const tile of tiles) {
|
|
19327
|
+
const [, , th, tw] = tile.dims;
|
|
19328
|
+
const patches = convert_image_to_patches(tile, this.encoder_patch_size);
|
|
19329
|
+
const { padded, mask } = pad_along_first_dim(patches, this.max_num_patches);
|
|
19330
|
+
all_pixel_values.push(padded);
|
|
19331
|
+
all_pixel_masks.push(mask);
|
|
19332
|
+
all_spatial_shapes.push([
|
|
19333
|
+
Math.floor(th / this.encoder_patch_size),
|
|
19334
|
+
Math.floor(tw / this.encoder_patch_size)
|
|
19335
|
+
]);
|
|
19336
|
+
}
|
|
19337
|
+
all_rows.push(num_rows);
|
|
19338
|
+
all_cols.push(num_cols);
|
|
19339
|
+
all_image_sizes.push([new_height, new_width]);
|
|
19340
|
+
}
|
|
19341
|
+
}
|
|
19342
|
+
const result = {
|
|
19343
|
+
pixel_values: cat(all_pixel_values, 0),
|
|
19344
|
+
pixel_attention_mask: stack(all_pixel_masks, 0),
|
|
19345
|
+
spatial_shapes: new Tensor2("int64", BigInt64Array.from(all_spatial_shapes.flat(), BigInt), [
|
|
19346
|
+
all_spatial_shapes.length,
|
|
19347
|
+
2
|
|
19348
|
+
])
|
|
19349
|
+
};
|
|
19350
|
+
if (return_row_col_info ?? this.return_row_col_info) {
|
|
19351
|
+
result.image_rows = all_rows;
|
|
19352
|
+
result.image_cols = all_cols;
|
|
19353
|
+
result.image_sizes = all_image_sizes;
|
|
19354
|
+
}
|
|
19355
|
+
return result;
|
|
19356
|
+
}
|
|
19357
|
+
};
|
|
19358
|
+
|
|
18619
19359
|
// src/models/llava_onevision/image_processing_llava_onevision.js
|
|
18620
19360
|
var LlavaOnevisionImageProcessor = class extends ImageProcessor {
|
|
18621
19361
|
};
|
|
@@ -18839,27 +19579,6 @@ var PvtImageProcessor = class extends ImageProcessor {
|
|
|
18839
19579
|
};
|
|
18840
19580
|
|
|
18841
19581
|
// src/models/qwen2_vl/image_processing_qwen2_vl.js
|
|
18842
|
-
function smart_resize(height, width, factor = 28, min_pixels = 56 * 56, max_pixels = 14 * 14 * 4 * 1280) {
|
|
18843
|
-
if (height < factor || width < factor) {
|
|
18844
|
-
throw new Error(`height:${height} or width:${width} must be larger than factor:${factor}`);
|
|
18845
|
-
} else if (Math.max(height, width) / Math.min(height, width) > 200) {
|
|
18846
|
-
throw new Error(
|
|
18847
|
-
`absolute aspect ratio must be smaller than 200, got ${Math.max(height, width) / Math.min(height, width)}`
|
|
18848
|
-
);
|
|
18849
|
-
}
|
|
18850
|
-
let h_bar = Math.round(height / factor) * factor;
|
|
18851
|
-
let w_bar = Math.round(width / factor) * factor;
|
|
18852
|
-
if (h_bar * w_bar > max_pixels) {
|
|
18853
|
-
const beta = Math.sqrt(height * width / max_pixels);
|
|
18854
|
-
h_bar = Math.floor(height / beta / factor) * factor;
|
|
18855
|
-
w_bar = Math.floor(width / beta / factor) * factor;
|
|
18856
|
-
} else if (h_bar * w_bar < min_pixels) {
|
|
18857
|
-
const beta = Math.sqrt(min_pixels / (height * width));
|
|
18858
|
-
h_bar = Math.ceil(height * beta / factor) * factor;
|
|
18859
|
-
w_bar = Math.ceil(width * beta / factor) * factor;
|
|
18860
|
-
}
|
|
18861
|
-
return [h_bar, w_bar];
|
|
18862
|
-
}
|
|
18863
19582
|
var Qwen2VLImageProcessor = class extends ImageProcessor {
|
|
18864
19583
|
constructor(config) {
|
|
18865
19584
|
super(config);
|
|
@@ -19461,6 +20180,57 @@ ${boi_token}${image_tokens_expanded}${eoi_token}
|
|
|
19461
20180
|
}
|
|
19462
20181
|
};
|
|
19463
20182
|
|
|
20183
|
+
// src/models/granite_speech/processing_granite_speech.js
|
|
20184
|
+
var GraniteSpeechProcessor = class extends Processor {
|
|
20185
|
+
static tokenizer_class = AutoTokenizer;
|
|
20186
|
+
static feature_extractor_class = AutoFeatureExtractor;
|
|
20187
|
+
static uses_processor_config = true;
|
|
20188
|
+
/**
|
|
20189
|
+
* Compute the number of audio tokens for a given raw audio length.
|
|
20190
|
+
* @param {number} audioLength Raw audio sample count.
|
|
20191
|
+
* @returns {number} Number of projector output tokens.
|
|
20192
|
+
*/
|
|
20193
|
+
_get_num_audio_features(audioLength) {
|
|
20194
|
+
const { hop_length } = this.feature_extractor.config.melspec_kwargs;
|
|
20195
|
+
const { projector_window_size, projector_downsample_rate } = this.feature_extractor.config;
|
|
20196
|
+
const effective_window_size = Math.floor(projector_window_size / projector_downsample_rate);
|
|
20197
|
+
const mel_length = Math.floor(audioLength / hop_length) + 1;
|
|
20198
|
+
const encoder_length = Math.floor(mel_length / 2);
|
|
20199
|
+
const nblocks = Math.ceil(encoder_length / projector_window_size);
|
|
20200
|
+
return nblocks * effective_window_size;
|
|
20201
|
+
}
|
|
20202
|
+
/**
|
|
20203
|
+
* @param {string} text The text input to process.
|
|
20204
|
+
* @param {Float32Array} audio The audio input to process.
|
|
20205
|
+
*/
|
|
20206
|
+
async _call(text, audio = null, kwargs = {}) {
|
|
20207
|
+
if (Array.isArray(text)) {
|
|
20208
|
+
throw new Error("Batched inputs are not supported yet.");
|
|
20209
|
+
}
|
|
20210
|
+
let audio_inputs = {};
|
|
20211
|
+
if (audio) {
|
|
20212
|
+
const { input_features } = await this.feature_extractor(audio);
|
|
20213
|
+
audio_inputs["input_features"] = input_features;
|
|
20214
|
+
const audio_embed_size = this._get_num_audio_features(audio.length);
|
|
20215
|
+
const mask_data = new Uint8Array(audio_embed_size).fill(1);
|
|
20216
|
+
audio_inputs["input_features_mask"] = new Tensor2("bool", mask_data, [1, audio_embed_size]);
|
|
20217
|
+
const audio_token = this.config.audio_token ?? "<|audio|>";
|
|
20218
|
+
if (!text.includes(audio_token)) {
|
|
20219
|
+
throw new Error(`The input text does not contain the audio token ${audio_token}.`);
|
|
20220
|
+
}
|
|
20221
|
+
text = text.replaceAll(audio_token, audio_token.repeat(audio_embed_size));
|
|
20222
|
+
}
|
|
20223
|
+
const text_inputs = this.tokenizer(text, {
|
|
20224
|
+
add_special_tokens: false,
|
|
20225
|
+
...kwargs
|
|
20226
|
+
});
|
|
20227
|
+
return {
|
|
20228
|
+
...text_inputs,
|
|
20229
|
+
...audio_inputs
|
|
20230
|
+
};
|
|
20231
|
+
}
|
|
20232
|
+
};
|
|
20233
|
+
|
|
19464
20234
|
// src/models/grounding_dino/processing_grounding_dino.js
|
|
19465
20235
|
function get_phrases_from_posmap(posmaps, input_ids) {
|
|
19466
20236
|
const left_idx = 0;
|
|
@@ -19737,6 +20507,66 @@ var JinaCLIPProcessor = class extends Processor {
|
|
|
19737
20507
|
}
|
|
19738
20508
|
};
|
|
19739
20509
|
|
|
20510
|
+
// src/models/lfm2_vl/processing_lfm2_vl.js
|
|
20511
|
+
var Lfm2VlProcessor = class extends Processor {
|
|
20512
|
+
static tokenizer_class = AutoTokenizer;
|
|
20513
|
+
static image_processor_class = AutoImageProcessor;
|
|
20514
|
+
/**
|
|
20515
|
+
* @param {RawImage|RawImage[]} images
|
|
20516
|
+
* @param {string|string[]|null} [text]
|
|
20517
|
+
* @param {Record<string, any>} [kwargs]
|
|
20518
|
+
*/
|
|
20519
|
+
async _call(images, text = null, kwargs = {}) {
|
|
20520
|
+
const { image_rows, image_cols, image_sizes, ...image_inputs } = await this.image_processor(images, {
|
|
20521
|
+
...kwargs,
|
|
20522
|
+
return_row_col_info: true
|
|
20523
|
+
});
|
|
20524
|
+
if (text) {
|
|
20525
|
+
const image_token = this.config.image_token ?? "<image>";
|
|
20526
|
+
const {
|
|
20527
|
+
tile_size = 512,
|
|
20528
|
+
downsample_factor = 2,
|
|
20529
|
+
encoder_patch_size = 16,
|
|
20530
|
+
use_thumbnail = true
|
|
20531
|
+
} = (
|
|
20532
|
+
/** @type {Record<string, any>} */
|
|
20533
|
+
this.image_processor.config
|
|
20534
|
+
);
|
|
20535
|
+
const ds2 = (s) => Math.ceil(Math.floor(s / encoder_patch_size) / downsample_factor);
|
|
20536
|
+
const tokens_per_tile = ds2(tile_size) ** 2;
|
|
20537
|
+
const image_start = this.config.image_start_token ?? "<|image_start|>";
|
|
20538
|
+
const image_end = this.config.image_end_token ?? "<|image_end|>";
|
|
20539
|
+
const thumbnail_token = this.config.image_thumbnail ?? "<|img_thumbnail|>";
|
|
20540
|
+
if (!Array.isArray(text)) text = [text];
|
|
20541
|
+
let image_idx = 0;
|
|
20542
|
+
text = text.map((sample) => {
|
|
20543
|
+
const parts = sample.split(image_token);
|
|
20544
|
+
return parts[0] + parts.slice(1).map((part) => {
|
|
20545
|
+
const idx = image_idx++;
|
|
20546
|
+
const [h, w] = image_sizes[idx];
|
|
20547
|
+
const rows = image_rows[idx], cols = image_cols[idx];
|
|
20548
|
+
const tokens_for_image = ds2(h) * ds2(w);
|
|
20549
|
+
let expanded = image_start;
|
|
20550
|
+
if (rows > 1 || cols > 1) {
|
|
20551
|
+
const tile_str = image_token.repeat(tokens_per_tile);
|
|
20552
|
+
for (let r = 0; r < rows; ++r)
|
|
20553
|
+
for (let c = 0; c < cols; ++c)
|
|
20554
|
+
expanded += `<|img_row_${r + 1}_col_${c + 1}|>` + tile_str;
|
|
20555
|
+
if (use_thumbnail) expanded += thumbnail_token + image_token.repeat(tokens_for_image);
|
|
20556
|
+
} else {
|
|
20557
|
+
expanded += image_token.repeat(tokens_for_image);
|
|
20558
|
+
}
|
|
20559
|
+
return expanded + image_end + part;
|
|
20560
|
+
}).join("");
|
|
20561
|
+
});
|
|
20562
|
+
}
|
|
20563
|
+
return {
|
|
20564
|
+
...image_inputs,
|
|
20565
|
+
...text ? this.tokenizer(text, kwargs) : {}
|
|
20566
|
+
};
|
|
20567
|
+
}
|
|
20568
|
+
};
|
|
20569
|
+
|
|
19740
20570
|
// src/models/llava/processing_llava.js
|
|
19741
20571
|
var LlavaProcessor = class extends Processor {
|
|
19742
20572
|
static tokenizer_class = AutoTokenizer;
|
|
@@ -20269,6 +21099,94 @@ var VoxtralProcessor = class extends Processor {
|
|
|
20269
21099
|
}
|
|
20270
21100
|
};
|
|
20271
21101
|
|
|
21102
|
+
// src/models/voxtral_realtime/processing_voxtral_realtime.js
|
|
21103
|
+
var NUM_LEFT_PAD_TOKENS = 32;
|
|
21104
|
+
var NUM_DELAY_TOKENS = 6;
|
|
21105
|
+
var AUDIO_LENGTH_PER_TOK = 8;
|
|
21106
|
+
var OFFLINE_STREAMING_BUFFER_TOKENS = 10;
|
|
21107
|
+
var STREAMING_PAD_TOKEN_ID = 32;
|
|
21108
|
+
var VoxtralRealtimeProcessor = class extends Processor {
|
|
21109
|
+
static tokenizer_class = AutoTokenizer;
|
|
21110
|
+
static feature_extractor_class = AutoFeatureExtractor;
|
|
21111
|
+
static uses_processor_config = false;
|
|
21112
|
+
/** Number of mel frames in the first audio chunk. */
|
|
21113
|
+
get num_mel_frames_first_audio_chunk() {
|
|
21114
|
+
return (NUM_DELAY_TOKENS + 1) * AUDIO_LENGTH_PER_TOK;
|
|
21115
|
+
}
|
|
21116
|
+
/** Number of raw audio samples in the first audio chunk. */
|
|
21117
|
+
get num_samples_first_audio_chunk() {
|
|
21118
|
+
const { hop_length, n_fft } = this.feature_extractor.config;
|
|
21119
|
+
return (this.num_mel_frames_first_audio_chunk - 1) * hop_length + Math.floor(n_fft / 2);
|
|
21120
|
+
}
|
|
21121
|
+
/** Number of raw audio samples per subsequent audio chunk. */
|
|
21122
|
+
get num_samples_per_audio_chunk() {
|
|
21123
|
+
const { hop_length, n_fft } = this.feature_extractor.config;
|
|
21124
|
+
return AUDIO_LENGTH_PER_TOK * hop_length + n_fft;
|
|
21125
|
+
}
|
|
21126
|
+
/** Number of right-pad tokens for non-streaming mode. */
|
|
21127
|
+
get num_right_pad_tokens() {
|
|
21128
|
+
return NUM_DELAY_TOKENS + 1 + OFFLINE_STREAMING_BUFFER_TOKENS;
|
|
21129
|
+
}
|
|
21130
|
+
/** Number of mel frames per text token. */
|
|
21131
|
+
get audio_length_per_tok() {
|
|
21132
|
+
return AUDIO_LENGTH_PER_TOK;
|
|
21133
|
+
}
|
|
21134
|
+
/** Number of raw audio samples per token. */
|
|
21135
|
+
get raw_audio_length_per_tok() {
|
|
21136
|
+
return AUDIO_LENGTH_PER_TOK * this.feature_extractor.config.hop_length;
|
|
21137
|
+
}
|
|
21138
|
+
/**
|
|
21139
|
+
* Process audio input for VoxtralRealtime.
|
|
21140
|
+
*
|
|
21141
|
+
* In streaming mode with `is_first_audio_chunk=true`, the audio is left-padded
|
|
21142
|
+
* with silence and mel features are extracted with `center=true`.
|
|
21143
|
+
* Returns `{ input_ids, input_features }`.
|
|
21144
|
+
*
|
|
21145
|
+
* In streaming mode with `is_first_audio_chunk=false`, the audio chunk is
|
|
21146
|
+
* processed with `center=false` and only `{ input_features }` is returned.
|
|
21147
|
+
*
|
|
21148
|
+
* In non-streaming mode, the audio is right-padded to ensure the model
|
|
21149
|
+
* transcribes the full audio, then processed with `center=true`.
|
|
21150
|
+
* Returns `{ input_features }`.
|
|
21151
|
+
*
|
|
21152
|
+
* @param {Float32Array|Float64Array} audio The audio waveform.
|
|
21153
|
+
* @param {Object} [options]
|
|
21154
|
+
* @param {boolean} [options.is_streaming=false] Whether processing in streaming mode.
|
|
21155
|
+
* @param {boolean} [options.is_first_audio_chunk=true] Whether this is the first audio chunk.
|
|
21156
|
+
* @returns {Promise<Object>}
|
|
21157
|
+
*/
|
|
21158
|
+
async _call(audio, { is_streaming = false, is_first_audio_chunk = true } = {}) {
|
|
21159
|
+
validate_audio_inputs(audio, "VoxtralRealtimeProcessor");
|
|
21160
|
+
if (!is_streaming && !is_first_audio_chunk) {
|
|
21161
|
+
throw new Error("In non-streaming mode (`is_streaming=false`), `is_first_audio_chunk` must be `true`.");
|
|
21162
|
+
}
|
|
21163
|
+
if (is_first_audio_chunk) {
|
|
21164
|
+
if (is_streaming) {
|
|
21165
|
+
const num_left_pad_samples = NUM_LEFT_PAD_TOKENS * this.raw_audio_length_per_tok;
|
|
21166
|
+
const padded_audio = new Float32Array(num_left_pad_samples + audio.length);
|
|
21167
|
+
padded_audio.set(audio, num_left_pad_samples);
|
|
21168
|
+
const audio_encoding = await this.feature_extractor(padded_audio, { center: true });
|
|
21169
|
+
const num_pad_tokens = NUM_LEFT_PAD_TOKENS + NUM_DELAY_TOKENS;
|
|
21170
|
+
const num_input_tokens = 1 + num_pad_tokens;
|
|
21171
|
+
const input_ids_data = new BigInt64Array(num_input_tokens).fill(BigInt(STREAMING_PAD_TOKEN_ID));
|
|
21172
|
+
input_ids_data[0] = 1n;
|
|
21173
|
+
const input_ids = new Tensor2("int64", input_ids_data, [1, num_input_tokens]);
|
|
21174
|
+
return {
|
|
21175
|
+
input_ids,
|
|
21176
|
+
...audio_encoding
|
|
21177
|
+
};
|
|
21178
|
+
} else {
|
|
21179
|
+
const right_pad_samples = this.num_right_pad_tokens * this.raw_audio_length_per_tok;
|
|
21180
|
+
const padded_audio = new Float32Array(audio.length + right_pad_samples);
|
|
21181
|
+
padded_audio.set(audio);
|
|
21182
|
+
return await this.feature_extractor(padded_audio, { center: true });
|
|
21183
|
+
}
|
|
21184
|
+
} else {
|
|
21185
|
+
return await this.feature_extractor(audio, { center: false });
|
|
21186
|
+
}
|
|
21187
|
+
}
|
|
21188
|
+
};
|
|
21189
|
+
|
|
20272
21190
|
// src/models/wav2vec2/processing_wav2vec2.js
|
|
20273
21191
|
var Wav2Vec2Processor = class extends Processor {
|
|
20274
21192
|
static tokenizer_class = AutoTokenizer;
|
|
@@ -20368,14 +21286,18 @@ function getNormalizedConfig(config) {
|
|
|
20368
21286
|
case "florence2":
|
|
20369
21287
|
case "llava_onevision":
|
|
20370
21288
|
case "idefics3":
|
|
21289
|
+
case "granite_speech":
|
|
20371
21290
|
case "ultravox":
|
|
20372
21291
|
case "voxtral":
|
|
21292
|
+
case "voxtral_realtime":
|
|
20373
21293
|
case "smolvlm":
|
|
20374
21294
|
case "gemma3n":
|
|
21295
|
+
case "lfm2_vl":
|
|
20375
21296
|
case "chatterbox":
|
|
20376
21297
|
case "mistral3":
|
|
20377
21298
|
case "qwen2_5_vl":
|
|
20378
21299
|
case "qwen3_vl":
|
|
21300
|
+
case "qwen3_vl_moe":
|
|
20379
21301
|
init_normalized_config = getNormalizedConfig(config.text_config);
|
|
20380
21302
|
break;
|
|
20381
21303
|
case "moondream1":
|
|
@@ -20425,11 +21347,17 @@ function getNormalizedConfig(config) {
|
|
|
20425
21347
|
case "cohere":
|
|
20426
21348
|
case "cohere2":
|
|
20427
21349
|
case "mistral":
|
|
21350
|
+
case "voxtral_realtime_text":
|
|
21351
|
+
case "voxtral_realtime_encoder":
|
|
20428
21352
|
case "starcoder2":
|
|
20429
21353
|
case "qwen2":
|
|
21354
|
+
case "qwen2_moe":
|
|
20430
21355
|
case "qwen2_vl":
|
|
21356
|
+
case "qwen2_vl_text":
|
|
20431
21357
|
case "qwen2_5_vl_text":
|
|
21358
|
+
case "qwen3_moe":
|
|
20432
21359
|
case "qwen3_vl_text":
|
|
21360
|
+
case "qwen3_vl_moe_text":
|
|
20433
21361
|
case "phi":
|
|
20434
21362
|
case "phi3":
|
|
20435
21363
|
case "phi3_v":
|
|
@@ -20570,6 +21498,9 @@ function getNormalizedConfig(config) {
|
|
|
20570
21498
|
return normalized_config;
|
|
20571
21499
|
}
|
|
20572
21500
|
function getCacheShapes(config, options) {
|
|
21501
|
+
if (!(config instanceof PretrainedConfig)) {
|
|
21502
|
+
config = new PretrainedConfig(config);
|
|
21503
|
+
}
|
|
20573
21504
|
if (["lfm2", "lfm2_moe"].includes(config.model_type)) {
|
|
20574
21505
|
const pkv_prefix = options?.prefix ?? "past_key_values";
|
|
20575
21506
|
const conv_prefix = pkv_prefix === "present" ? "present" : "past";
|
|
@@ -20628,7 +21559,7 @@ function getCacheShapes(config, options) {
|
|
|
20628
21559
|
}
|
|
20629
21560
|
}
|
|
20630
21561
|
return cache_values;
|
|
20631
|
-
} else if (["
|
|
21562
|
+
} else if (["qwen3_next", "qwen3_5_text", "qwen3_5_moe_text", "olmo_hybrid"].includes(config.model_type)) {
|
|
20632
21563
|
const pkv_prefix = options?.prefix ?? "past_key_values";
|
|
20633
21564
|
const conv_prefix = pkv_prefix === "present" ? "present" : "past";
|
|
20634
21565
|
const cache_values = {};
|
|
@@ -20645,11 +21576,10 @@ function getCacheShapes(config, options) {
|
|
|
20645
21576
|
linear_conv_kernel_dim
|
|
20646
21577
|
} = (
|
|
20647
21578
|
/** @type {any} */
|
|
20648
|
-
config
|
|
21579
|
+
config
|
|
20649
21580
|
);
|
|
20650
21581
|
const key_dim = linear_key_head_dim * linear_num_key_heads;
|
|
20651
21582
|
const value_dim = linear_value_head_dim * linear_num_value_heads;
|
|
20652
|
-
const conv_dim = key_dim * 2 + value_dim;
|
|
20653
21583
|
const final_head_dim = head_dim ?? hidden_size / num_attention_heads;
|
|
20654
21584
|
const batch_size = options?.batch_size ?? 1;
|
|
20655
21585
|
for (let i = 0; i < layer_types.length; ++i) {
|
|
@@ -20658,7 +21588,14 @@ function getCacheShapes(config, options) {
|
|
|
20658
21588
|
cache_values[`${pkv_prefix}.${i}.${kv}`] = [batch_size, num_key_value_heads, 0, final_head_dim];
|
|
20659
21589
|
}
|
|
20660
21590
|
} else if (layer_types[i] === "linear_attention") {
|
|
20661
|
-
|
|
21591
|
+
if (config.model_type === "olmo_hybrid") {
|
|
21592
|
+
cache_values[`${conv_prefix}_conv.${i}.key`] = [batch_size, key_dim, linear_conv_kernel_dim];
|
|
21593
|
+
cache_values[`${conv_prefix}_conv.${i}.value`] = [batch_size, value_dim, linear_conv_kernel_dim];
|
|
21594
|
+
cache_values[`${conv_prefix}_conv.${i}.query`] = [batch_size, key_dim, linear_conv_kernel_dim];
|
|
21595
|
+
} else {
|
|
21596
|
+
const conv_dim = key_dim * 2 + value_dim;
|
|
21597
|
+
cache_values[`${conv_prefix}_conv.${i}`] = [batch_size, conv_dim, linear_conv_kernel_dim];
|
|
21598
|
+
}
|
|
20662
21599
|
cache_values[`${conv_prefix}_recurrent.${i}`] = [
|
|
20663
21600
|
batch_size,
|
|
20664
21601
|
linear_num_value_heads,
|
|
@@ -20670,6 +21607,16 @@ function getCacheShapes(config, options) {
|
|
|
20670
21607
|
}
|
|
20671
21608
|
}
|
|
20672
21609
|
return cache_values;
|
|
21610
|
+
} else if (["lfm2_vl", "qwen3_5", "qwen3_5_moe", "voxtral_realtime"].includes(config.model_type)) {
|
|
21611
|
+
let subConfig;
|
|
21612
|
+
if (config.model_type === "voxtral_realtime" && options?.session_name === "audio_encoder") {
|
|
21613
|
+
subConfig = /** @type {any} */
|
|
21614
|
+
config.audio_config;
|
|
21615
|
+
} else {
|
|
21616
|
+
subConfig = /** @type {any} */
|
|
21617
|
+
config.text_config;
|
|
21618
|
+
}
|
|
21619
|
+
return getCacheShapes(subConfig, options);
|
|
20673
21620
|
}
|
|
20674
21621
|
return getKeyValueShapes(config, options);
|
|
20675
21622
|
}
|
|
@@ -20835,7 +21782,7 @@ async function getModelDataFiles(pretrained_model_name_or_path, fileName, suffix
|
|
|
20835
21782
|
}
|
|
20836
21783
|
|
|
20837
21784
|
// src/models/session.js
|
|
20838
|
-
async function getSession(pretrained_model_name_or_path, fileName, options,
|
|
21785
|
+
async function getSession(pretrained_model_name_or_path, fileName, options, cache_config = false, session_name = void 0) {
|
|
20839
21786
|
let custom_config = options.config?.["transformers.js_config"] ?? {};
|
|
20840
21787
|
const selectedDevice = (
|
|
20841
21788
|
/** @type {import("../utils/devices.js").DeviceType} */
|
|
@@ -20893,9 +21840,10 @@ async function getSession(pretrained_model_name_or_path, fileName, options, is_d
|
|
|
20893
21840
|
if (externalData.length > 0 && !apis.IS_NODE_ENV) {
|
|
20894
21841
|
session_options.externalData = externalData;
|
|
20895
21842
|
}
|
|
20896
|
-
if (
|
|
21843
|
+
if (cache_config && selectedDevice === "webgpu" && kv_cache_dtype_config !== false) {
|
|
20897
21844
|
const shapes = getCacheShapes(options.config, {
|
|
20898
|
-
prefix: "present"
|
|
21845
|
+
prefix: "present",
|
|
21846
|
+
session_name
|
|
20899
21847
|
});
|
|
20900
21848
|
if (Object.keys(shapes).length > 0 && !isONNXProxy()) {
|
|
20901
21849
|
const preferredOutputLocation = {};
|
|
@@ -20913,15 +21861,17 @@ async function getSession(pretrained_model_name_or_path, fileName, options, is_d
|
|
|
20913
21861
|
};
|
|
20914
21862
|
return { buffer_or_path, session_options, session_config };
|
|
20915
21863
|
}
|
|
20916
|
-
async function constructSessions(pretrained_model_name_or_path, names, options,
|
|
21864
|
+
async function constructSessions(pretrained_model_name_or_path, names, options, cache_sessions = void 0) {
|
|
20917
21865
|
return Object.fromEntries(
|
|
20918
21866
|
await Promise.all(
|
|
20919
21867
|
Object.keys(names).map(async (name) => {
|
|
21868
|
+
const cache_config = cache_sessions?.[name] ?? false;
|
|
20920
21869
|
const { buffer_or_path, session_options, session_config } = await getSession(
|
|
20921
21870
|
pretrained_model_name_or_path,
|
|
20922
21871
|
names[name],
|
|
20923
21872
|
options,
|
|
20924
|
-
|
|
21873
|
+
cache_config,
|
|
21874
|
+
name
|
|
20925
21875
|
);
|
|
20926
21876
|
const session = await createInferenceSession(buffer_or_path, session_options, session_config);
|
|
20927
21877
|
return [name, session];
|
|
@@ -22221,6 +23171,66 @@ var BeamSearchSampler = class extends LogitsSampler {
|
|
|
22221
23171
|
}
|
|
22222
23172
|
};
|
|
22223
23173
|
|
|
23174
|
+
// src/cache_utils.js
|
|
23175
|
+
var _DynamicCache = class {
|
|
23176
|
+
/**
|
|
23177
|
+
* Create a DynamicCache, optionally pre-populated with entries.
|
|
23178
|
+
* @param {Record<string, Tensor>} [entries] Initial name→Tensor mappings.
|
|
23179
|
+
*/
|
|
23180
|
+
constructor(entries) {
|
|
23181
|
+
if (!entries) return;
|
|
23182
|
+
for (const key in entries) {
|
|
23183
|
+
if (key in this) {
|
|
23184
|
+
throw new TypeError(`Key "${key}" conflicts with an existing property on DynamicCache`);
|
|
23185
|
+
}
|
|
23186
|
+
const value = entries[key];
|
|
23187
|
+
if (!(value instanceof Tensor2)) {
|
|
23188
|
+
throw new TypeError(`Expected a Tensor for key "${key}", got ${typeof value}`);
|
|
23189
|
+
}
|
|
23190
|
+
this[key] = value;
|
|
23191
|
+
}
|
|
23192
|
+
}
|
|
23193
|
+
/**
|
|
23194
|
+
* Get the cached sequence length. This requires at least one attention cache entry to be present.
|
|
23195
|
+
* @returns {number} The past sequence length.
|
|
23196
|
+
*/
|
|
23197
|
+
get_seq_length() {
|
|
23198
|
+
const self2 = (
|
|
23199
|
+
/** @type {any} */
|
|
23200
|
+
this
|
|
23201
|
+
);
|
|
23202
|
+
for (const name in self2) {
|
|
23203
|
+
if (name.startsWith("past_key_values.")) {
|
|
23204
|
+
return self2[name].dims.at(-2);
|
|
23205
|
+
}
|
|
23206
|
+
}
|
|
23207
|
+
throw new Error("Unable to determine sequence length from the cache.");
|
|
23208
|
+
}
|
|
23209
|
+
/**
|
|
23210
|
+
* Dispose all contained tensors whose data resides on the GPU.
|
|
23211
|
+
* Returns a promise that resolves when all disposals are complete.
|
|
23212
|
+
* @returns {Promise<void>} Promise that resolves when all GPU tensors are disposed.
|
|
23213
|
+
*/
|
|
23214
|
+
async dispose() {
|
|
23215
|
+
const promises = [];
|
|
23216
|
+
for (
|
|
23217
|
+
const t of
|
|
23218
|
+
/** @type {Tensor[]} */
|
|
23219
|
+
Object.values(this)
|
|
23220
|
+
) {
|
|
23221
|
+
if (t.location === "gpu-buffer") {
|
|
23222
|
+
promises.push(t.dispose());
|
|
23223
|
+
}
|
|
23224
|
+
}
|
|
23225
|
+
await Promise.all(promises);
|
|
23226
|
+
}
|
|
23227
|
+
};
|
|
23228
|
+
var DynamicCache = (
|
|
23229
|
+
/** @type {new (entries?: Record<string, Tensor>) => DynamicCache} */
|
|
23230
|
+
/** @type {unknown} */
|
|
23231
|
+
_DynamicCache
|
|
23232
|
+
);
|
|
23233
|
+
|
|
22224
23234
|
// src/models/modeling_utils.js
|
|
22225
23235
|
var MODEL_MAPPING_NAMES = null;
|
|
22226
23236
|
function registerTaskMappings(mappings) {
|
|
@@ -22266,71 +23276,181 @@ var MODEL_TYPES = {
|
|
|
22266
23276
|
AutoEncoder: 12,
|
|
22267
23277
|
ImageAudioTextToText: 13,
|
|
22268
23278
|
Supertonic: 14,
|
|
22269
|
-
Chatterbox: 15
|
|
23279
|
+
Chatterbox: 15,
|
|
23280
|
+
MultimodalLanguageModelOnly: 16,
|
|
23281
|
+
VoxtralRealtime: 17
|
|
22270
23282
|
};
|
|
22271
23283
|
var MODEL_TYPE_CONFIG = {
|
|
22272
23284
|
[MODEL_TYPES.DecoderOnly]: {
|
|
22273
23285
|
can_generate: true,
|
|
22274
23286
|
forward: decoder_forward,
|
|
22275
|
-
prepare_inputs: decoder_prepare_inputs_for_generation
|
|
23287
|
+
prepare_inputs: decoder_prepare_inputs_for_generation,
|
|
23288
|
+
sessions: (config, options) => ({ model: options.model_file_name ?? "model" }),
|
|
23289
|
+
cache_sessions: { model: true },
|
|
23290
|
+
optional_configs: { generation_config: "generation_config.json" }
|
|
22276
23291
|
},
|
|
22277
23292
|
[MODEL_TYPES.DecoderOnlyWithoutHead]: {
|
|
22278
23293
|
can_generate: false,
|
|
22279
23294
|
forward: decoder_forward,
|
|
22280
|
-
prepare_inputs: decoder_prepare_inputs_for_generation
|
|
23295
|
+
prepare_inputs: decoder_prepare_inputs_for_generation,
|
|
23296
|
+
sessions: (config, options) => ({ model: options.model_file_name ?? "model" })
|
|
22281
23297
|
},
|
|
22282
23298
|
[MODEL_TYPES.Seq2Seq]: {
|
|
22283
23299
|
can_generate: true,
|
|
22284
23300
|
forward: seq2seq_forward,
|
|
22285
|
-
prepare_inputs: encoder_decoder_prepare_inputs_for_generation
|
|
23301
|
+
prepare_inputs: encoder_decoder_prepare_inputs_for_generation,
|
|
23302
|
+
sessions: () => ({ model: "encoder_model", decoder_model_merged: "decoder_model_merged" }),
|
|
23303
|
+
cache_sessions: { decoder_model_merged: true },
|
|
23304
|
+
optional_configs: { generation_config: "generation_config.json" }
|
|
22286
23305
|
},
|
|
22287
23306
|
[MODEL_TYPES.Vision2Seq]: {
|
|
22288
23307
|
can_generate: true,
|
|
22289
23308
|
forward: seq2seq_forward,
|
|
22290
|
-
prepare_inputs: encoder_decoder_prepare_inputs_for_generation
|
|
23309
|
+
prepare_inputs: encoder_decoder_prepare_inputs_for_generation,
|
|
23310
|
+
sessions: () => ({ model: "encoder_model", decoder_model_merged: "decoder_model_merged" }),
|
|
23311
|
+
cache_sessions: { decoder_model_merged: true },
|
|
23312
|
+
optional_configs: { generation_config: "generation_config.json" }
|
|
22291
23313
|
},
|
|
22292
23314
|
[MODEL_TYPES.Musicgen]: {
|
|
22293
23315
|
can_generate: true,
|
|
22294
|
-
forward: seq2seq_forward
|
|
23316
|
+
forward: seq2seq_forward,
|
|
23317
|
+
sessions: () => ({
|
|
23318
|
+
model: "text_encoder",
|
|
23319
|
+
decoder_model_merged: "decoder_model_merged",
|
|
23320
|
+
encodec_decode: "encodec_decode"
|
|
23321
|
+
}),
|
|
23322
|
+
cache_sessions: { decoder_model_merged: true },
|
|
23323
|
+
optional_configs: { generation_config: "generation_config.json" }
|
|
22295
23324
|
},
|
|
22296
23325
|
[MODEL_TYPES.EncoderDecoder]: {
|
|
22297
23326
|
can_generate: false,
|
|
22298
|
-
forward: seq2seq_forward
|
|
23327
|
+
forward: seq2seq_forward,
|
|
23328
|
+
sessions: () => ({ model: "encoder_model", decoder_model_merged: "decoder_model_merged" }),
|
|
23329
|
+
cache_sessions: { decoder_model_merged: true }
|
|
23330
|
+
},
|
|
23331
|
+
[MODEL_TYPES.MaskGeneration]: {
|
|
23332
|
+
sessions: () => ({ model: "vision_encoder", prompt_encoder_mask_decoder: "prompt_encoder_mask_decoder" })
|
|
22299
23333
|
},
|
|
22300
23334
|
[MODEL_TYPES.ImageTextToText]: {
|
|
22301
23335
|
can_generate: true,
|
|
22302
23336
|
forward: image_text_to_text_forward,
|
|
22303
|
-
prepare_inputs: multimodal_text_to_text_prepare_inputs_for_generation
|
|
23337
|
+
prepare_inputs: multimodal_text_to_text_prepare_inputs_for_generation,
|
|
23338
|
+
sessions: (config) => {
|
|
23339
|
+
const s = {
|
|
23340
|
+
embed_tokens: "embed_tokens",
|
|
23341
|
+
vision_encoder: "vision_encoder",
|
|
23342
|
+
decoder_model_merged: "decoder_model_merged"
|
|
23343
|
+
};
|
|
23344
|
+
if (config.is_encoder_decoder) s["model"] = "encoder_model";
|
|
23345
|
+
return s;
|
|
23346
|
+
},
|
|
23347
|
+
cache_sessions: { decoder_model_merged: true },
|
|
23348
|
+
optional_configs: { generation_config: "generation_config.json" }
|
|
22304
23349
|
},
|
|
22305
23350
|
[MODEL_TYPES.AudioTextToText]: {
|
|
22306
23351
|
can_generate: true,
|
|
22307
23352
|
forward: audio_text_to_text_forward,
|
|
22308
|
-
prepare_inputs: multimodal_text_to_text_prepare_inputs_for_generation
|
|
23353
|
+
prepare_inputs: multimodal_text_to_text_prepare_inputs_for_generation,
|
|
23354
|
+
sessions: () => ({
|
|
23355
|
+
embed_tokens: "embed_tokens",
|
|
23356
|
+
audio_encoder: "audio_encoder",
|
|
23357
|
+
decoder_model_merged: "decoder_model_merged"
|
|
23358
|
+
}),
|
|
23359
|
+
cache_sessions: { decoder_model_merged: true },
|
|
23360
|
+
optional_configs: { generation_config: "generation_config.json" }
|
|
22309
23361
|
},
|
|
22310
|
-
[MODEL_TYPES.
|
|
23362
|
+
[MODEL_TYPES.ImageAudioTextToText]: {
|
|
22311
23363
|
can_generate: true,
|
|
22312
|
-
prepare_inputs: multimodal_text_to_text_prepare_inputs_for_generation
|
|
23364
|
+
prepare_inputs: multimodal_text_to_text_prepare_inputs_for_generation,
|
|
23365
|
+
sessions: () => ({
|
|
23366
|
+
embed_tokens: "embed_tokens",
|
|
23367
|
+
audio_encoder: "audio_encoder",
|
|
23368
|
+
vision_encoder: "vision_encoder",
|
|
23369
|
+
decoder_model_merged: "decoder_model_merged"
|
|
23370
|
+
}),
|
|
23371
|
+
optional_configs: { generation_config: "generation_config.json" }
|
|
22313
23372
|
},
|
|
22314
|
-
[MODEL_TYPES.
|
|
23373
|
+
[MODEL_TYPES.Phi3V]: {
|
|
22315
23374
|
can_generate: true,
|
|
22316
|
-
prepare_inputs: multimodal_text_to_text_prepare_inputs_for_generation
|
|
23375
|
+
prepare_inputs: multimodal_text_to_text_prepare_inputs_for_generation,
|
|
23376
|
+
sessions: () => ({
|
|
23377
|
+
prepare_inputs_embeds: "prepare_inputs_embeds",
|
|
23378
|
+
model: "model",
|
|
23379
|
+
vision_encoder: "vision_encoder"
|
|
23380
|
+
}),
|
|
23381
|
+
cache_sessions: { model: true },
|
|
23382
|
+
optional_configs: { generation_config: "generation_config.json" }
|
|
22317
23383
|
},
|
|
22318
23384
|
[MODEL_TYPES.MultiModality]: {
|
|
22319
|
-
can_generate: true
|
|
23385
|
+
can_generate: true,
|
|
23386
|
+
sessions: () => ({
|
|
23387
|
+
prepare_inputs_embeds: "prepare_inputs_embeds",
|
|
23388
|
+
model: "language_model",
|
|
23389
|
+
lm_head: "lm_head",
|
|
23390
|
+
gen_head: "gen_head",
|
|
23391
|
+
gen_img_embeds: "gen_img_embeds",
|
|
23392
|
+
image_decode: "image_decode"
|
|
23393
|
+
}),
|
|
23394
|
+
cache_sessions: { model: true },
|
|
23395
|
+
optional_configs: { generation_config: "generation_config.json" }
|
|
22320
23396
|
},
|
|
22321
23397
|
[MODEL_TYPES.AutoEncoder]: {
|
|
22322
23398
|
can_generate: false,
|
|
22323
|
-
forward: auto_encoder_forward
|
|
23399
|
+
forward: auto_encoder_forward,
|
|
23400
|
+
sessions: () => ({ encoder_model: "encoder_model", decoder_model: "decoder_model" })
|
|
23401
|
+
},
|
|
23402
|
+
[MODEL_TYPES.Supertonic]: {
|
|
23403
|
+
sessions: () => ({
|
|
23404
|
+
text_encoder: "text_encoder",
|
|
23405
|
+
latent_denoiser: "latent_denoiser",
|
|
23406
|
+
voice_decoder: "voice_decoder"
|
|
23407
|
+
})
|
|
22324
23408
|
},
|
|
22325
23409
|
[MODEL_TYPES.Chatterbox]: {
|
|
22326
23410
|
can_generate: true,
|
|
22327
|
-
forward: encoder_forward
|
|
23411
|
+
forward: encoder_forward,
|
|
23412
|
+
sessions: () => ({
|
|
23413
|
+
embed_tokens: "embed_tokens",
|
|
23414
|
+
speech_encoder: "speech_encoder",
|
|
23415
|
+
model: "language_model",
|
|
23416
|
+
conditional_decoder: "conditional_decoder"
|
|
23417
|
+
}),
|
|
23418
|
+
cache_sessions: { model: true },
|
|
23419
|
+
optional_configs: { generation_config: "generation_config.json" }
|
|
23420
|
+
},
|
|
23421
|
+
[MODEL_TYPES.MultimodalLanguageModelOnly]: {
|
|
23422
|
+
can_generate: true,
|
|
23423
|
+
forward: image_text_to_text_forward,
|
|
23424
|
+
prepare_inputs: multimodal_text_to_text_prepare_inputs_for_generation,
|
|
23425
|
+
sessions: () => ({ embed_tokens: "embed_tokens", decoder_model_merged: "decoder_model_merged" }),
|
|
23426
|
+
cache_sessions: { decoder_model_merged: true },
|
|
23427
|
+
optional_configs: { generation_config: "generation_config.json" }
|
|
23428
|
+
},
|
|
23429
|
+
[MODEL_TYPES.VoxtralRealtime]: {
|
|
23430
|
+
can_generate: true,
|
|
23431
|
+
prepare_inputs: decoder_prepare_inputs_for_generation,
|
|
23432
|
+
sessions: () => ({
|
|
23433
|
+
embed_tokens: "embed_tokens",
|
|
23434
|
+
audio_encoder: "audio_encoder",
|
|
23435
|
+
decoder_model_merged: "decoder_model_merged"
|
|
23436
|
+
}),
|
|
23437
|
+
cache_sessions: { decoder_model_merged: true, audio_encoder: true },
|
|
23438
|
+
optional_configs: { generation_config: "generation_config.json" }
|
|
22328
23439
|
},
|
|
22329
23440
|
default: {
|
|
22330
23441
|
can_generate: false,
|
|
22331
|
-
forward: encoder_forward
|
|
23442
|
+
forward: encoder_forward,
|
|
23443
|
+
sessions: (config, options) => ({ model: options.model_file_name ?? "model" })
|
|
22332
23444
|
}
|
|
22333
23445
|
};
|
|
23446
|
+
function getSessionsConfig(modelType, config, options = {}) {
|
|
23447
|
+
const typeConfig = MODEL_TYPE_CONFIG[modelType] ?? MODEL_TYPE_CONFIG.default;
|
|
23448
|
+
return {
|
|
23449
|
+
sessions: typeConfig.sessions(config, options),
|
|
23450
|
+
cache_sessions: typeConfig.cache_sessions,
|
|
23451
|
+
optional_configs: typeConfig.optional_configs
|
|
23452
|
+
};
|
|
23453
|
+
}
|
|
22334
23454
|
var MODEL_TYPE_MAPPING = /* @__PURE__ */ new Map();
|
|
22335
23455
|
var MODEL_NAME_TO_CLASS_MAPPING = /* @__PURE__ */ new Map();
|
|
22336
23456
|
var MODEL_CLASS_TO_NAME_MAPPING = /* @__PURE__ */ new Map();
|
|
@@ -22416,245 +23536,23 @@ var PreTrainedModel = class extends Callable2 {
|
|
|
22416
23536
|
const modelName = MODEL_CLASS_TO_NAME_MAPPING.get(this);
|
|
22417
23537
|
const modelType = MODEL_TYPE_MAPPING.get(modelName);
|
|
22418
23538
|
config = options.config = await AutoConfig.from_pretrained(pretrained_model_name_or_path, options);
|
|
22419
|
-
|
|
22420
|
-
if (modelType ===
|
|
22421
|
-
|
|
22422
|
-
|
|
22423
|
-
|
|
22424
|
-
{
|
|
22425
|
-
|
|
22426
|
-
},
|
|
22427
|
-
options,
|
|
22428
|
-
"model"
|
|
22429
|
-
),
|
|
22430
|
-
get_optional_configs(
|
|
22431
|
-
pretrained_model_name_or_path,
|
|
22432
|
-
{
|
|
22433
|
-
generation_config: "generation_config.json"
|
|
22434
|
-
},
|
|
22435
|
-
options
|
|
22436
|
-
)
|
|
22437
|
-
]);
|
|
22438
|
-
} else if (modelType === MODEL_TYPES.Seq2Seq || modelType === MODEL_TYPES.Vision2Seq) {
|
|
22439
|
-
info = await Promise.all([
|
|
22440
|
-
constructSessions(
|
|
22441
|
-
pretrained_model_name_or_path,
|
|
22442
|
-
{
|
|
22443
|
-
model: "encoder_model",
|
|
22444
|
-
decoder_model_merged: "decoder_model_merged"
|
|
22445
|
-
},
|
|
22446
|
-
options,
|
|
22447
|
-
"decoder_model_merged"
|
|
22448
|
-
),
|
|
22449
|
-
get_optional_configs(
|
|
22450
|
-
pretrained_model_name_or_path,
|
|
22451
|
-
{
|
|
22452
|
-
generation_config: "generation_config.json"
|
|
22453
|
-
},
|
|
22454
|
-
options
|
|
22455
|
-
)
|
|
22456
|
-
]);
|
|
22457
|
-
} else if (modelType === MODEL_TYPES.MaskGeneration) {
|
|
22458
|
-
info = await Promise.all([
|
|
22459
|
-
constructSessions(
|
|
22460
|
-
pretrained_model_name_or_path,
|
|
22461
|
-
{
|
|
22462
|
-
model: "vision_encoder",
|
|
22463
|
-
prompt_encoder_mask_decoder: "prompt_encoder_mask_decoder"
|
|
22464
|
-
},
|
|
22465
|
-
options
|
|
22466
|
-
)
|
|
22467
|
-
]);
|
|
22468
|
-
} else if (modelType === MODEL_TYPES.EncoderDecoder) {
|
|
22469
|
-
info = await Promise.all([
|
|
22470
|
-
constructSessions(
|
|
22471
|
-
pretrained_model_name_or_path,
|
|
22472
|
-
{
|
|
22473
|
-
model: "encoder_model",
|
|
22474
|
-
decoder_model_merged: "decoder_model_merged"
|
|
22475
|
-
},
|
|
22476
|
-
options,
|
|
22477
|
-
"decoder_model_merged"
|
|
22478
|
-
)
|
|
22479
|
-
]);
|
|
22480
|
-
} else if (modelType === MODEL_TYPES.ImageTextToText) {
|
|
22481
|
-
const sessions = {
|
|
22482
|
-
embed_tokens: "embed_tokens",
|
|
22483
|
-
vision_encoder: "vision_encoder",
|
|
22484
|
-
decoder_model_merged: "decoder_model_merged"
|
|
22485
|
-
};
|
|
22486
|
-
if (config.is_encoder_decoder) {
|
|
22487
|
-
sessions["model"] = "encoder_model";
|
|
22488
|
-
}
|
|
22489
|
-
info = await Promise.all([
|
|
22490
|
-
constructSessions(pretrained_model_name_or_path, sessions, options, "decoder_model_merged"),
|
|
22491
|
-
get_optional_configs(
|
|
22492
|
-
pretrained_model_name_or_path,
|
|
22493
|
-
{
|
|
22494
|
-
generation_config: "generation_config.json"
|
|
22495
|
-
},
|
|
22496
|
-
options
|
|
22497
|
-
)
|
|
22498
|
-
]);
|
|
22499
|
-
} else if (modelType === MODEL_TYPES.AudioTextToText) {
|
|
22500
|
-
const sessions = {
|
|
22501
|
-
embed_tokens: "embed_tokens",
|
|
22502
|
-
audio_encoder: "audio_encoder",
|
|
22503
|
-
decoder_model_merged: "decoder_model_merged"
|
|
22504
|
-
};
|
|
22505
|
-
info = await Promise.all([
|
|
22506
|
-
constructSessions(pretrained_model_name_or_path, sessions, options, "decoder_model_merged"),
|
|
22507
|
-
get_optional_configs(
|
|
22508
|
-
pretrained_model_name_or_path,
|
|
22509
|
-
{
|
|
22510
|
-
generation_config: "generation_config.json"
|
|
22511
|
-
},
|
|
22512
|
-
options
|
|
22513
|
-
)
|
|
22514
|
-
]);
|
|
22515
|
-
} else if (modelType === MODEL_TYPES.ImageAudioTextToText) {
|
|
22516
|
-
const sessions = {
|
|
22517
|
-
embed_tokens: "embed_tokens",
|
|
22518
|
-
audio_encoder: "audio_encoder",
|
|
22519
|
-
vision_encoder: "vision_encoder",
|
|
22520
|
-
decoder_model_merged: "decoder_model_merged"
|
|
22521
|
-
};
|
|
22522
|
-
info = await Promise.all([
|
|
22523
|
-
constructSessions(pretrained_model_name_or_path, sessions, options),
|
|
22524
|
-
get_optional_configs(
|
|
22525
|
-
pretrained_model_name_or_path,
|
|
22526
|
-
{
|
|
22527
|
-
generation_config: "generation_config.json"
|
|
22528
|
-
},
|
|
22529
|
-
options
|
|
22530
|
-
)
|
|
22531
|
-
]);
|
|
22532
|
-
} else if (modelType === MODEL_TYPES.Musicgen) {
|
|
22533
|
-
info = await Promise.all([
|
|
22534
|
-
constructSessions(
|
|
22535
|
-
pretrained_model_name_or_path,
|
|
22536
|
-
{
|
|
22537
|
-
model: "text_encoder",
|
|
22538
|
-
decoder_model_merged: "decoder_model_merged",
|
|
22539
|
-
encodec_decode: "encodec_decode"
|
|
22540
|
-
},
|
|
22541
|
-
options,
|
|
22542
|
-
"decoder_model_merged"
|
|
22543
|
-
),
|
|
22544
|
-
get_optional_configs(
|
|
22545
|
-
pretrained_model_name_or_path,
|
|
22546
|
-
{
|
|
22547
|
-
generation_config: "generation_config.json"
|
|
22548
|
-
},
|
|
22549
|
-
options
|
|
22550
|
-
)
|
|
22551
|
-
]);
|
|
22552
|
-
} else if (modelType === MODEL_TYPES.MultiModality) {
|
|
22553
|
-
info = await Promise.all([
|
|
22554
|
-
constructSessions(
|
|
22555
|
-
pretrained_model_name_or_path,
|
|
22556
|
-
{
|
|
22557
|
-
prepare_inputs_embeds: "prepare_inputs_embeds",
|
|
22558
|
-
model: "language_model",
|
|
22559
|
-
lm_head: "lm_head",
|
|
22560
|
-
gen_head: "gen_head",
|
|
22561
|
-
gen_img_embeds: "gen_img_embeds",
|
|
22562
|
-
image_decode: "image_decode"
|
|
22563
|
-
},
|
|
22564
|
-
options,
|
|
22565
|
-
"model"
|
|
22566
|
-
),
|
|
22567
|
-
get_optional_configs(
|
|
22568
|
-
pretrained_model_name_or_path,
|
|
22569
|
-
{
|
|
22570
|
-
generation_config: "generation_config.json"
|
|
22571
|
-
},
|
|
22572
|
-
options
|
|
22573
|
-
)
|
|
22574
|
-
]);
|
|
22575
|
-
} else if (modelType === MODEL_TYPES.Phi3V) {
|
|
22576
|
-
info = await Promise.all([
|
|
22577
|
-
constructSessions(
|
|
22578
|
-
pretrained_model_name_or_path,
|
|
22579
|
-
{
|
|
22580
|
-
prepare_inputs_embeds: "prepare_inputs_embeds",
|
|
22581
|
-
model: "model",
|
|
22582
|
-
vision_encoder: "vision_encoder"
|
|
22583
|
-
},
|
|
22584
|
-
options,
|
|
22585
|
-
"model"
|
|
22586
|
-
),
|
|
22587
|
-
get_optional_configs(
|
|
22588
|
-
pretrained_model_name_or_path,
|
|
22589
|
-
{
|
|
22590
|
-
generation_config: "generation_config.json"
|
|
22591
|
-
},
|
|
22592
|
-
options
|
|
22593
|
-
)
|
|
22594
|
-
]);
|
|
22595
|
-
} else if (modelType === MODEL_TYPES.Chatterbox) {
|
|
22596
|
-
info = await Promise.all([
|
|
22597
|
-
constructSessions(
|
|
22598
|
-
pretrained_model_name_or_path,
|
|
22599
|
-
{
|
|
22600
|
-
embed_tokens: "embed_tokens",
|
|
22601
|
-
speech_encoder: "speech_encoder",
|
|
22602
|
-
model: "language_model",
|
|
22603
|
-
conditional_decoder: "conditional_decoder"
|
|
22604
|
-
},
|
|
22605
|
-
options,
|
|
22606
|
-
"model"
|
|
22607
|
-
),
|
|
22608
|
-
get_optional_configs(
|
|
22609
|
-
pretrained_model_name_or_path,
|
|
22610
|
-
{
|
|
22611
|
-
generation_config: "generation_config.json"
|
|
22612
|
-
},
|
|
22613
|
-
options
|
|
22614
|
-
)
|
|
22615
|
-
]);
|
|
22616
|
-
} else if (modelType === MODEL_TYPES.AutoEncoder) {
|
|
22617
|
-
info = await Promise.all([
|
|
22618
|
-
constructSessions(
|
|
22619
|
-
pretrained_model_name_or_path,
|
|
22620
|
-
{
|
|
22621
|
-
encoder_model: "encoder_model",
|
|
22622
|
-
decoder_model: "decoder_model"
|
|
22623
|
-
},
|
|
22624
|
-
options
|
|
22625
|
-
)
|
|
22626
|
-
]);
|
|
22627
|
-
} else if (modelType === MODEL_TYPES.Supertonic) {
|
|
22628
|
-
info = await Promise.all([
|
|
22629
|
-
constructSessions(
|
|
22630
|
-
pretrained_model_name_or_path,
|
|
22631
|
-
{
|
|
22632
|
-
text_encoder: "text_encoder",
|
|
22633
|
-
latent_denoiser: "latent_denoiser",
|
|
22634
|
-
voice_decoder: "voice_decoder"
|
|
22635
|
-
},
|
|
22636
|
-
options
|
|
22637
|
-
)
|
|
22638
|
-
]);
|
|
22639
|
-
} else {
|
|
22640
|
-
if (modelType === void 0) {
|
|
22641
|
-
const type = modelName ?? config?.model_type;
|
|
22642
|
-
if (type !== "custom") {
|
|
22643
|
-
logger.warn(
|
|
22644
|
-
`Model type for '${type}' not found, assuming encoder-only architecture. Please report this at ${GITHUB_ISSUE_URL}.`
|
|
22645
|
-
);
|
|
22646
|
-
}
|
|
23539
|
+
const typeConfig = MODEL_TYPE_CONFIG[modelType] ?? MODEL_TYPE_CONFIG.default;
|
|
23540
|
+
if (modelType === void 0) {
|
|
23541
|
+
const type = modelName ?? config?.model_type;
|
|
23542
|
+
if (type !== "custom") {
|
|
23543
|
+
logger.warn(
|
|
23544
|
+
`Model type for '${type}' not found, assuming encoder-only architecture. Please report this at ${GITHUB_ISSUE_URL}.`
|
|
23545
|
+
);
|
|
22647
23546
|
}
|
|
22648
|
-
info = await Promise.all([
|
|
22649
|
-
constructSessions(
|
|
22650
|
-
pretrained_model_name_or_path,
|
|
22651
|
-
{
|
|
22652
|
-
model: options.model_file_name ?? "model"
|
|
22653
|
-
},
|
|
22654
|
-
options
|
|
22655
|
-
)
|
|
22656
|
-
]);
|
|
22657
23547
|
}
|
|
23548
|
+
const sessions = typeConfig.sessions(config, options);
|
|
23549
|
+
const promises = [
|
|
23550
|
+
constructSessions(pretrained_model_name_or_path, sessions, options, typeConfig.cache_sessions)
|
|
23551
|
+
];
|
|
23552
|
+
if (typeConfig.optional_configs) {
|
|
23553
|
+
promises.push(get_optional_configs(pretrained_model_name_or_path, typeConfig.optional_configs, options));
|
|
23554
|
+
}
|
|
23555
|
+
const info = await Promise.all(promises);
|
|
22658
23556
|
return new this(config, ...info);
|
|
22659
23557
|
}
|
|
22660
23558
|
/**
|
|
@@ -22853,7 +23751,7 @@ var PreTrainedModel = class extends Callable2 {
|
|
|
22853
23751
|
* @param {Tensor} [params.inputs=null]
|
|
22854
23752
|
* @param {number} [params.bos_token_id=null]
|
|
22855
23753
|
* @param {Record<string, Tensor|number[]>} [params.model_kwargs]
|
|
22856
|
-
* @returns {{inputs_tensor: Tensor, model_inputs: Record<string, Tensor
|
|
23754
|
+
* @returns {{inputs_tensor: Tensor, model_inputs: Record<string, Tensor> & {past_key_values?: DynamicCache}, model_input_name: string}} The model-specific inputs for generation.
|
|
22857
23755
|
*/
|
|
22858
23756
|
_prepare_model_inputs({ inputs, bos_token_id, model_kwargs }) {
|
|
22859
23757
|
const model_inputs = pick(model_kwargs, this.forward_params);
|
|
@@ -23094,11 +23992,12 @@ var PreTrainedModel = class extends Callable2 {
|
|
|
23094
23992
|
}
|
|
23095
23993
|
}
|
|
23096
23994
|
/**
|
|
23097
|
-
* Returns
|
|
23995
|
+
* Returns a DynamicCache containing past key values from the given decoder results object.
|
|
23098
23996
|
*
|
|
23099
23997
|
* @param {Object} decoderResults The decoder results object.
|
|
23100
|
-
* @param {
|
|
23101
|
-
* @
|
|
23998
|
+
* @param {DynamicCache} pastKeyValues The previous past key values.
|
|
23999
|
+
* @param {boolean} [disposeEncoderPKVs=false] Whether to dispose encoder past key values.
|
|
24000
|
+
* @returns {DynamicCache} A new DynamicCache containing the updated past key values.
|
|
23102
24001
|
*/
|
|
23103
24002
|
getPastKeyValues(decoderResults, pastKeyValues, disposeEncoderPKVs = false) {
|
|
23104
24003
|
const pkvs = /* @__PURE__ */ Object.create(null);
|
|
@@ -23119,7 +24018,7 @@ var PreTrainedModel = class extends Callable2 {
|
|
|
23119
24018
|
}
|
|
23120
24019
|
}
|
|
23121
24020
|
}
|
|
23122
|
-
return pkvs;
|
|
24021
|
+
return new DynamicCache(pkvs);
|
|
23123
24022
|
}
|
|
23124
24023
|
/**
|
|
23125
24024
|
* Returns an object containing attentions from the given model output object.
|
|
@@ -23144,8 +24043,8 @@ var PreTrainedModel = class extends Callable2 {
|
|
|
23144
24043
|
/**
|
|
23145
24044
|
* Adds past key values to the decoder feeds object. If pastKeyValues is null, creates new tensors for past key values.
|
|
23146
24045
|
*
|
|
23147
|
-
* @param {
|
|
23148
|
-
* @param {
|
|
24046
|
+
* @param {Record<string, any>} decoderFeeds The decoder feeds object to add past key values to.
|
|
24047
|
+
* @param {DynamicCache|null} pastKeyValues The cache containing past key values.
|
|
23149
24048
|
*/
|
|
23150
24049
|
addPastKeyValues(decoderFeeds, pastKeyValues) {
|
|
23151
24050
|
if (pastKeyValues) {
|
|
@@ -23162,14 +24061,29 @@ var PreTrainedModel = class extends Callable2 {
|
|
|
23162
24061
|
}
|
|
23163
24062
|
}
|
|
23164
24063
|
}
|
|
23165
|
-
|
|
23166
|
-
|
|
24064
|
+
/**
|
|
24065
|
+
* Helper function to select valid inputs and run through the appropriate encoder (vision, text, audio) based on the input type.
|
|
24066
|
+
* @param {string} sessionName
|
|
24067
|
+
* @param {Record<string, Tensor>} inputs
|
|
24068
|
+
* @param {string} outputName
|
|
24069
|
+
* @private
|
|
24070
|
+
*/
|
|
24071
|
+
async _encode_input(sessionName, inputs, outputName) {
|
|
24072
|
+
if (!Object.hasOwn(this.sessions, sessionName)) {
|
|
24073
|
+
throw new Error(`Model does not have a ${sessionName} session.`);
|
|
24074
|
+
}
|
|
24075
|
+
const session = this.sessions[sessionName];
|
|
24076
|
+
const output = await sessionRun(session, pick(inputs, session.inputNames));
|
|
24077
|
+
return output[outputName];
|
|
24078
|
+
}
|
|
24079
|
+
async encode_image(inputs) {
|
|
24080
|
+
return this._encode_input("vision_encoder", inputs, "image_features");
|
|
23167
24081
|
}
|
|
23168
|
-
async encode_text(
|
|
23169
|
-
return
|
|
24082
|
+
async encode_text(inputs) {
|
|
24083
|
+
return this._encode_input("embed_tokens", inputs, "inputs_embeds");
|
|
23170
24084
|
}
|
|
23171
|
-
async encode_audio(
|
|
23172
|
-
return
|
|
24085
|
+
async encode_audio(inputs) {
|
|
24086
|
+
return this._encode_input("audio_encoder", inputs, "audio_features");
|
|
23173
24087
|
}
|
|
23174
24088
|
};
|
|
23175
24089
|
async function seq2seq_forward(self2, model_inputs) {
|
|
@@ -23224,6 +24138,9 @@ async function decoder_forward(self2, model_inputs, is_encoder_decoder = false)
|
|
|
23224
24138
|
const start_index = ["paligemma", "gemma3_text", "gemma3"].includes(self2.config.model_type) ? 1 : 0;
|
|
23225
24139
|
new_model_inputs.position_ids = create_position_ids(new_model_inputs, past_key_values, start_index);
|
|
23226
24140
|
}
|
|
24141
|
+
if (session.inputNames.includes("num_logits_to_keep") && !new_model_inputs.num_logits_to_keep) {
|
|
24142
|
+
new_model_inputs.num_logits_to_keep = new Tensor2("int64", [0n], []);
|
|
24143
|
+
}
|
|
23227
24144
|
self2.addPastKeyValues(new_model_inputs, past_key_values);
|
|
23228
24145
|
const fixed = pick(new_model_inputs, session.inputNames);
|
|
23229
24146
|
return await sessionRun(session, fixed);
|
|
@@ -23232,7 +24149,7 @@ async function generic_text_to_text_forward(self2, {
|
|
|
23232
24149
|
// Generic parameters:
|
|
23233
24150
|
encode_function,
|
|
23234
24151
|
merge_function,
|
|
23235
|
-
|
|
24152
|
+
modality_input_names,
|
|
23236
24153
|
modality_output_name,
|
|
23237
24154
|
// Produced by the tokenizer/processor:
|
|
23238
24155
|
input_ids = null,
|
|
@@ -23247,38 +24164,54 @@ async function generic_text_to_text_forward(self2, {
|
|
|
23247
24164
|
// Additional parameters
|
|
23248
24165
|
...kwargs
|
|
23249
24166
|
}) {
|
|
23250
|
-
const modality_values = kwargs[modality_input_name];
|
|
23251
24167
|
if (!inputs_embeds) {
|
|
23252
24168
|
inputs_embeds = await self2.encode_text({ input_ids, ...kwargs });
|
|
23253
|
-
|
|
23254
|
-
|
|
23255
|
-
|
|
23256
|
-
|
|
23257
|
-
|
|
23258
|
-
|
|
23259
|
-
|
|
23260
|
-
|
|
23261
|
-
|
|
23262
|
-
inputs_embeds,
|
|
23263
|
-
|
|
23264
|
-
|
|
23265
|
-
|
|
23266
|
-
|
|
23267
|
-
|
|
23268
|
-
|
|
23269
|
-
|
|
23270
|
-
|
|
23271
|
-
|
|
23272
|
-
|
|
23273
|
-
|
|
23274
|
-
|
|
23275
|
-
|
|
24169
|
+
const modality_values = pick(kwargs, modality_input_names);
|
|
24170
|
+
if (Object.keys(modality_values).length > 0) {
|
|
24171
|
+
if (input_ids.dims[1] !== 1) {
|
|
24172
|
+
const modality_features = await encode_function({
|
|
24173
|
+
// Pass the modality values under its expected key.
|
|
24174
|
+
// The caller knows whether this is audio or image.
|
|
24175
|
+
...modality_values,
|
|
24176
|
+
...kwargs
|
|
24177
|
+
});
|
|
24178
|
+
({ inputs_embeds, attention_mask } = merge_function({
|
|
24179
|
+
[modality_output_name]: modality_features,
|
|
24180
|
+
inputs_embeds,
|
|
24181
|
+
input_ids,
|
|
24182
|
+
attention_mask
|
|
24183
|
+
}));
|
|
24184
|
+
} else if (past_key_values && input_ids.dims[1] === 1) {
|
|
24185
|
+
const target_length = input_ids.dims[1];
|
|
24186
|
+
const past_length = past_key_values.get_seq_length();
|
|
24187
|
+
attention_mask = cat(
|
|
24188
|
+
[
|
|
24189
|
+
ones([input_ids.dims[0], past_length]),
|
|
24190
|
+
attention_mask.slice(null, [attention_mask.dims[1] - target_length, attention_mask.dims[1]])
|
|
24191
|
+
],
|
|
24192
|
+
1
|
|
24193
|
+
);
|
|
24194
|
+
}
|
|
23276
24195
|
}
|
|
23277
24196
|
}
|
|
23278
24197
|
if (!position_ids) {
|
|
23279
|
-
if (
|
|
23280
|
-
|
|
23281
|
-
|
|
24198
|
+
if (
|
|
24199
|
+
// Handle special case for qwen vl models
|
|
24200
|
+
[
|
|
24201
|
+
"qwen2_vl",
|
|
24202
|
+
"qwen2_vl_text",
|
|
24203
|
+
"qwen2_5_vl",
|
|
24204
|
+
"qwen2_5_vl_text",
|
|
24205
|
+
"qwen3_vl",
|
|
24206
|
+
"qwen3_vl_text",
|
|
24207
|
+
"qwen3_vl_moe",
|
|
24208
|
+
"qwen3_vl_moe_text",
|
|
24209
|
+
"qwen3_5",
|
|
24210
|
+
"qwen3_5_text",
|
|
24211
|
+
"qwen3_5_moe",
|
|
24212
|
+
"qwen3_5_moe_text"
|
|
24213
|
+
].includes(self2.config.model_type)
|
|
24214
|
+
) {
|
|
23282
24215
|
const { image_grid_thw, video_grid_thw } = kwargs;
|
|
23283
24216
|
[position_ids] = self2.get_rope_index(input_ids, image_grid_thw, video_grid_thw, attention_mask);
|
|
23284
24217
|
}
|
|
@@ -23300,7 +24233,7 @@ async function generic_text_to_text_forward(self2, {
|
|
|
23300
24233
|
async function audio_text_to_text_forward(self2, params) {
|
|
23301
24234
|
return await generic_text_to_text_forward(self2, {
|
|
23302
24235
|
...params,
|
|
23303
|
-
|
|
24236
|
+
modality_input_names: ["audio_values", "input_features"],
|
|
23304
24237
|
modality_output_name: "audio_features",
|
|
23305
24238
|
encode_function: self2.encode_audio.bind(self2),
|
|
23306
24239
|
merge_function: self2._merge_input_ids_with_audio_features.bind(self2)
|
|
@@ -23309,7 +24242,7 @@ async function audio_text_to_text_forward(self2, params) {
|
|
|
23309
24242
|
async function image_text_to_text_forward(self2, params) {
|
|
23310
24243
|
return await generic_text_to_text_forward(self2, {
|
|
23311
24244
|
...params,
|
|
23312
|
-
|
|
24245
|
+
modality_input_names: ["pixel_values"],
|
|
23313
24246
|
modality_output_name: "image_features",
|
|
23314
24247
|
encode_function: self2.encode_image.bind(self2),
|
|
23315
24248
|
merge_function: self2._merge_input_ids_with_image_features.bind(self2)
|
|
@@ -23345,7 +24278,11 @@ function create_position_ids(model_inputs, past_key_values = null, start_index =
|
|
|
23345
24278
|
return position_ids;
|
|
23346
24279
|
}
|
|
23347
24280
|
function decoder_prepare_inputs_for_generation(self2, input_ids, model_inputs, generation_config) {
|
|
23348
|
-
const past_length = model_inputs.past_key_values ?
|
|
24281
|
+
const past_length = model_inputs.past_key_values ? model_inputs.past_key_values.get_seq_length() : 0;
|
|
24282
|
+
const session = self2.sessions["decoder_model_merged"] ?? self2.sessions["model"];
|
|
24283
|
+
if (session?.inputNames.includes("num_logits_to_keep") && !model_inputs.num_logits_to_keep) {
|
|
24284
|
+
model_inputs.num_logits_to_keep = new Tensor2("int64", [1n], []);
|
|
24285
|
+
}
|
|
23349
24286
|
if (!model_inputs.attention_mask) {
|
|
23350
24287
|
let dims;
|
|
23351
24288
|
for (const key of ["input_ids", "inputs_embeds", "position_ids"]) {
|
|
@@ -23653,6 +24590,7 @@ __export(models_exports, {
|
|
|
23653
24590
|
Gemma3ForCausalLM: () => Gemma3ForCausalLM,
|
|
23654
24591
|
Gemma3Model: () => Gemma3Model,
|
|
23655
24592
|
Gemma3PreTrainedModel: () => Gemma3PreTrainedModel,
|
|
24593
|
+
Gemma3nForCausalLM: () => Gemma3nForCausalLM,
|
|
23656
24594
|
Gemma3nForConditionalGeneration: () => Gemma3nForConditionalGeneration,
|
|
23657
24595
|
Gemma3nPreTrainedModel: () => Gemma3nPreTrainedModel,
|
|
23658
24596
|
GemmaForCausalLM: () => GemmaForCausalLM,
|
|
@@ -23670,6 +24608,7 @@ __export(models_exports, {
|
|
|
23670
24608
|
GraniteMoeHybridModel: () => GraniteMoeHybridModel,
|
|
23671
24609
|
GraniteMoeHybridPreTrainedModel: () => GraniteMoeHybridPreTrainedModel,
|
|
23672
24610
|
GranitePreTrainedModel: () => GranitePreTrainedModel,
|
|
24611
|
+
GraniteSpeechForConditionalGeneration: () => GraniteSpeechForConditionalGeneration,
|
|
23673
24612
|
GroundingDinoForObjectDetection: () => GroundingDinoForObjectDetection,
|
|
23674
24613
|
GroundingDinoPreTrainedModel: () => GroundingDinoPreTrainedModel,
|
|
23675
24614
|
GroupViTModel: () => GroupViTModel,
|
|
@@ -23691,7 +24630,6 @@ __export(models_exports, {
|
|
|
23691
24630
|
IJepaModel: () => IJepaModel,
|
|
23692
24631
|
IJepaPreTrainedModel: () => IJepaPreTrainedModel,
|
|
23693
24632
|
Idefics3ForConditionalGeneration: () => Idefics3ForConditionalGeneration,
|
|
23694
|
-
Idefics3PreTrainedModel: () => Idefics3PreTrainedModel,
|
|
23695
24633
|
JAISLMHeadModel: () => JAISLMHeadModel,
|
|
23696
24634
|
JAISModel: () => JAISModel,
|
|
23697
24635
|
JAISPreTrainedModel: () => JAISPreTrainedModel,
|
|
@@ -23705,6 +24643,7 @@ __export(models_exports, {
|
|
|
23705
24643
|
Lfm2MoeModel: () => Lfm2MoeModel,
|
|
23706
24644
|
Lfm2MoePreTrainedModel: () => Lfm2MoePreTrainedModel,
|
|
23707
24645
|
Lfm2PreTrainedModel: () => Lfm2PreTrainedModel,
|
|
24646
|
+
Lfm2VlForConditionalGeneration: () => Lfm2VlForConditionalGeneration,
|
|
23708
24647
|
LiteWhisperForConditionalGeneration: () => LiteWhisperForConditionalGeneration,
|
|
23709
24648
|
Llama4ForCausalLM: () => Llama4ForCausalLM,
|
|
23710
24649
|
Llama4PreTrainedModel: () => Llama4PreTrainedModel,
|
|
@@ -23829,6 +24768,9 @@ __export(models_exports, {
|
|
|
23829
24768
|
Olmo3Model: () => Olmo3Model,
|
|
23830
24769
|
Olmo3PreTrainedModel: () => Olmo3PreTrainedModel,
|
|
23831
24770
|
OlmoForCausalLM: () => OlmoForCausalLM,
|
|
24771
|
+
OlmoHybridForCausalLM: () => OlmoHybridForCausalLM,
|
|
24772
|
+
OlmoHybridModel: () => OlmoHybridModel,
|
|
24773
|
+
OlmoHybridPreTrainedModel: () => OlmoHybridPreTrainedModel,
|
|
23832
24774
|
OlmoModel: () => OlmoModel,
|
|
23833
24775
|
OlmoPreTrainedModel: () => OlmoPreTrainedModel,
|
|
23834
24776
|
OpenELMForCausalLM: () => OpenELMForCausalLM,
|
|
@@ -23841,7 +24783,6 @@ __export(models_exports, {
|
|
|
23841
24783
|
Owlv2Model: () => Owlv2Model,
|
|
23842
24784
|
Owlv2PreTrainedModel: () => Owlv2PreTrainedModel,
|
|
23843
24785
|
PaliGemmaForConditionalGeneration: () => PaliGemmaForConditionalGeneration,
|
|
23844
|
-
PaliGemmaPreTrainedModel: () => PaliGemmaPreTrainedModel,
|
|
23845
24786
|
ParakeetForCTC: () => ParakeetForCTC,
|
|
23846
24787
|
ParakeetPreTrainedModel: () => ParakeetPreTrainedModel,
|
|
23847
24788
|
PatchTSMixerForPrediction: () => PatchTSMixerForPrediction,
|
|
@@ -23867,15 +24808,31 @@ __export(models_exports, {
|
|
|
23867
24808
|
PyAnnotePreTrainedModel: () => PyAnnotePreTrainedModel,
|
|
23868
24809
|
Qwen2ForCausalLM: () => Qwen2ForCausalLM,
|
|
23869
24810
|
Qwen2Model: () => Qwen2Model,
|
|
24811
|
+
Qwen2MoeForCausalLM: () => Qwen2MoeForCausalLM,
|
|
24812
|
+
Qwen2MoeModel: () => Qwen2MoeModel,
|
|
24813
|
+
Qwen2MoePreTrainedModel: () => Qwen2MoePreTrainedModel,
|
|
23870
24814
|
Qwen2PreTrainedModel: () => Qwen2PreTrainedModel,
|
|
24815
|
+
Qwen2VLForCausalLM: () => Qwen2VLForCausalLM,
|
|
23871
24816
|
Qwen2VLForConditionalGeneration: () => Qwen2VLForConditionalGeneration,
|
|
23872
24817
|
Qwen2VLPreTrainedModel: () => Qwen2VLPreTrainedModel,
|
|
24818
|
+
Qwen2_5_VLForCausalLM: () => Qwen2_5_VLForCausalLM,
|
|
23873
24819
|
Qwen2_5_VLForConditionalGeneration: () => Qwen2_5_VLForConditionalGeneration,
|
|
23874
24820
|
Qwen3ForCausalLM: () => Qwen3ForCausalLM,
|
|
23875
24821
|
Qwen3Model: () => Qwen3Model,
|
|
24822
|
+
Qwen3MoeForCausalLM: () => Qwen3MoeForCausalLM,
|
|
24823
|
+
Qwen3MoeModel: () => Qwen3MoeModel,
|
|
24824
|
+
Qwen3MoePreTrainedModel: () => Qwen3MoePreTrainedModel,
|
|
24825
|
+
Qwen3NextForCausalLM: () => Qwen3NextForCausalLM,
|
|
24826
|
+
Qwen3NextModel: () => Qwen3NextModel,
|
|
24827
|
+
Qwen3NextPreTrainedModel: () => Qwen3NextPreTrainedModel,
|
|
23876
24828
|
Qwen3PreTrainedModel: () => Qwen3PreTrainedModel,
|
|
24829
|
+
Qwen3VLForCausalLM: () => Qwen3VLForCausalLM,
|
|
23877
24830
|
Qwen3VLForConditionalGeneration: () => Qwen3VLForConditionalGeneration,
|
|
24831
|
+
Qwen3VLMoeForCausalLM: () => Qwen3VLMoeForCausalLM,
|
|
24832
|
+
Qwen3VLMoeForConditionalGeneration: () => Qwen3VLMoeForConditionalGeneration,
|
|
24833
|
+
Qwen3_5ForCausalLM: () => Qwen3_5ForCausalLM,
|
|
23878
24834
|
Qwen3_5ForConditionalGeneration: () => Qwen3_5ForConditionalGeneration,
|
|
24835
|
+
Qwen3_5MoeForCausalLM: () => Qwen3_5MoeForCausalLM,
|
|
23879
24836
|
Qwen3_5MoeForConditionalGeneration: () => Qwen3_5MoeForConditionalGeneration,
|
|
23880
24837
|
RFDetrForObjectDetection: () => RFDetrForObjectDetection,
|
|
23881
24838
|
RFDetrModel: () => RFDetrModel,
|
|
@@ -23926,7 +24883,6 @@ __export(models_exports, {
|
|
|
23926
24883
|
SmolLM3ForCausalLM: () => SmolLM3ForCausalLM,
|
|
23927
24884
|
SmolLM3Model: () => SmolLM3Model,
|
|
23928
24885
|
SmolLM3PreTrainedModel: () => SmolLM3PreTrainedModel,
|
|
23929
|
-
SmolVLMForConditionalGeneration: () => SmolVLMForConditionalGeneration,
|
|
23930
24886
|
SnacDecoderModel: () => SnacDecoderModel,
|
|
23931
24887
|
SnacEncoderModel: () => SnacEncoderModel,
|
|
23932
24888
|
SnacModel: () => SnacModel,
|
|
@@ -23998,6 +24954,8 @@ __export(models_exports, {
|
|
|
23998
24954
|
VitsModelOutput: () => VitsModelOutput,
|
|
23999
24955
|
VitsPreTrainedModel: () => VitsPreTrainedModel,
|
|
24000
24956
|
VoxtralForConditionalGeneration: () => VoxtralForConditionalGeneration,
|
|
24957
|
+
VoxtralRealtimeForConditionalGeneration: () => VoxtralRealtimeForConditionalGeneration,
|
|
24958
|
+
VoxtralRealtimePreTrainedModel: () => VoxtralRealtimePreTrainedModel,
|
|
24001
24959
|
Wav2Vec2BertForCTC: () => Wav2Vec2BertForCTC,
|
|
24002
24960
|
Wav2Vec2BertForSequenceClassification: () => Wav2Vec2BertForSequenceClassification,
|
|
24003
24961
|
Wav2Vec2BertModel: () => Wav2Vec2BertModel,
|
|
@@ -24358,7 +25316,7 @@ var ChatterboxModel = class extends ChatterboxPreTrainedModel {
|
|
|
24358
25316
|
if (!past_key_values || target_length !== 1) {
|
|
24359
25317
|
throw new Error("Incorrect state encountered during generation.");
|
|
24360
25318
|
}
|
|
24361
|
-
const past_length =
|
|
25319
|
+
const past_length = past_key_values.get_seq_length();
|
|
24362
25320
|
attention_mask = ones([inputs_embeds.dims[0], past_length + target_length]);
|
|
24363
25321
|
}
|
|
24364
25322
|
}
|
|
@@ -25388,6 +26346,8 @@ var Gemma3nForConditionalGeneration = class extends Gemma3nPreTrainedModel {
|
|
|
25388
26346
|
});
|
|
25389
26347
|
}
|
|
25390
26348
|
};
|
|
26349
|
+
var Gemma3nForCausalLM = class extends Gemma3nForConditionalGeneration {
|
|
26350
|
+
};
|
|
25391
26351
|
|
|
25392
26352
|
// src/models/glm/modeling_glm.js
|
|
25393
26353
|
var GlmPreTrainedModel = class extends PreTrainedModel {
|
|
@@ -25469,6 +26429,28 @@ var GraniteMoeHybridModel = class extends GraniteMoeHybridPreTrainedModel {
|
|
|
25469
26429
|
var GraniteMoeHybridForCausalLM = class extends GraniteMoeHybridPreTrainedModel {
|
|
25470
26430
|
};
|
|
25471
26431
|
|
|
26432
|
+
// src/models/ultravox/modeling_ultravox.js
|
|
26433
|
+
var UltravoxPreTrainedModel = class extends PreTrainedModel {
|
|
26434
|
+
forward_params = ["input_ids", "attention_mask", "position_ids", "audio_values", "past_key_values"];
|
|
26435
|
+
};
|
|
26436
|
+
var UltravoxModel = class extends UltravoxPreTrainedModel {
|
|
26437
|
+
_merge_input_ids_with_audio_features(kwargs) {
|
|
26438
|
+
const audio_hidden_size = kwargs.audio_features.dims.at(-1);
|
|
26439
|
+
const reshaped_audio_features = kwargs.audio_features.view(-1, audio_hidden_size);
|
|
26440
|
+
return default_merge_input_ids_with_audio_features({
|
|
26441
|
+
// @ts-ignore
|
|
26442
|
+
audio_token_id: this.config.ignore_index ?? this.config.audio_token_id ?? this.config.audio_token_index,
|
|
26443
|
+
...kwargs,
|
|
26444
|
+
audio_features: reshaped_audio_features
|
|
26445
|
+
});
|
|
26446
|
+
}
|
|
26447
|
+
};
|
|
26448
|
+
|
|
26449
|
+
// src/models/granite_speech/modeling_granite_speech.js
|
|
26450
|
+
var GraniteSpeechForConditionalGeneration = class extends UltravoxModel {
|
|
26451
|
+
forward_params = ["input_ids", "attention_mask", "input_features", "past_key_values"];
|
|
26452
|
+
};
|
|
26453
|
+
|
|
25472
26454
|
// src/models/grounding_dino/modeling_grounding_dino.js
|
|
25473
26455
|
var GroundingDinoPreTrainedModel = class extends PreTrainedModel {
|
|
25474
26456
|
};
|
|
@@ -25564,17 +26546,38 @@ var HubertForSequenceClassification = class extends Wav2Vec2PreTrainedModel {
|
|
|
25564
26546
|
return new SequenceClassifierOutput(await super._call(model_inputs));
|
|
25565
26547
|
}
|
|
25566
26548
|
};
|
|
25567
|
-
|
|
25568
|
-
// src/models/hunyuan_v1_dense/modeling_hunyuan_v1_dense.js
|
|
25569
|
-
var HunYuanDenseV1PreTrainedModel = class extends PreTrainedModel {
|
|
25570
|
-
};
|
|
25571
|
-
var HunYuanDenseV1Model = class extends HunYuanDenseV1PreTrainedModel {
|
|
26549
|
+
|
|
26550
|
+
// src/models/hunyuan_v1_dense/modeling_hunyuan_v1_dense.js
|
|
26551
|
+
var HunYuanDenseV1PreTrainedModel = class extends PreTrainedModel {
|
|
26552
|
+
};
|
|
26553
|
+
var HunYuanDenseV1Model = class extends HunYuanDenseV1PreTrainedModel {
|
|
26554
|
+
};
|
|
26555
|
+
var HunYuanDenseV1ForCausalLM = class extends HunYuanDenseV1PreTrainedModel {
|
|
26556
|
+
};
|
|
26557
|
+
|
|
26558
|
+
// src/models/llava/modeling_llava.js
|
|
26559
|
+
var LlavaPreTrainedModel = class extends PreTrainedModel {
|
|
26560
|
+
forward_params = ["input_ids", "attention_mask", "pixel_values", "position_ids", "past_key_values"];
|
|
26561
|
+
};
|
|
26562
|
+
var LlavaForConditionalGeneration = class extends LlavaPreTrainedModel {
|
|
26563
|
+
_merge_input_ids_with_image_features(kwargs) {
|
|
26564
|
+
const vision_hidden_size = kwargs.image_features.dims.at(-1);
|
|
26565
|
+
const reshaped_image_hidden_states = kwargs.image_features.view(-1, vision_hidden_size);
|
|
26566
|
+
return default_merge_input_ids_with_image_features({
|
|
26567
|
+
// @ts-ignore
|
|
26568
|
+
image_token_id: this.config.image_token_index ?? this.config.image_token_id,
|
|
26569
|
+
...kwargs,
|
|
26570
|
+
image_features: reshaped_image_hidden_states
|
|
26571
|
+
});
|
|
26572
|
+
}
|
|
26573
|
+
};
|
|
26574
|
+
var Moondream1ForConditionalGeneration = class extends LlavaForConditionalGeneration {
|
|
25572
26575
|
};
|
|
25573
|
-
var
|
|
26576
|
+
var LlavaQwen2ForCausalLM = class extends LlavaForConditionalGeneration {
|
|
25574
26577
|
};
|
|
25575
26578
|
|
|
25576
26579
|
// src/models/idefics3/modeling_idefics3.js
|
|
25577
|
-
var
|
|
26580
|
+
var Idefics3ForConditionalGeneration = class extends LlavaForConditionalGeneration {
|
|
25578
26581
|
forward_params = [
|
|
25579
26582
|
"input_ids",
|
|
25580
26583
|
"attention_mask",
|
|
@@ -25584,24 +26587,6 @@ var Idefics3PreTrainedModel = class extends PreTrainedModel {
|
|
|
25584
26587
|
"past_key_values"
|
|
25585
26588
|
];
|
|
25586
26589
|
};
|
|
25587
|
-
var Idefics3ForConditionalGeneration = class extends Idefics3PreTrainedModel {
|
|
25588
|
-
async encode_image({ pixel_values, pixel_attention_mask }) {
|
|
25589
|
-
const features = (await sessionRun(this.sessions["vision_encoder"], { pixel_values, pixel_attention_mask })).image_features;
|
|
25590
|
-
return features;
|
|
25591
|
-
}
|
|
25592
|
-
_merge_input_ids_with_image_features(kwargs) {
|
|
25593
|
-
const vision_hidden_size = kwargs.image_features.dims.at(-1);
|
|
25594
|
-
const reshaped_image_hidden_states = kwargs.image_features.view(-1, vision_hidden_size);
|
|
25595
|
-
return default_merge_input_ids_with_image_features({
|
|
25596
|
-
// @ts-ignore
|
|
25597
|
-
image_token_id: this.config.image_token_id,
|
|
25598
|
-
...kwargs,
|
|
25599
|
-
image_features: reshaped_image_hidden_states
|
|
25600
|
-
});
|
|
25601
|
-
}
|
|
25602
|
-
};
|
|
25603
|
-
var SmolVLMForConditionalGeneration = class extends Idefics3ForConditionalGeneration {
|
|
25604
|
-
};
|
|
25605
26590
|
|
|
25606
26591
|
// src/models/ijepa/modeling_ijepa.js
|
|
25607
26592
|
var IJepaPreTrainedModel = class extends PreTrainedModel {
|
|
@@ -25692,6 +26677,19 @@ var Lfm2MoeModel = class extends Lfm2MoePreTrainedModel {
|
|
|
25692
26677
|
var Lfm2MoeForCausalLM = class extends Lfm2MoePreTrainedModel {
|
|
25693
26678
|
};
|
|
25694
26679
|
|
|
26680
|
+
// src/models/lfm2_vl/modeling_lfm2_vl.js
|
|
26681
|
+
var Lfm2VlForConditionalGeneration = class extends LlavaForConditionalGeneration {
|
|
26682
|
+
forward_params = [
|
|
26683
|
+
"input_ids",
|
|
26684
|
+
"attention_mask",
|
|
26685
|
+
"pixel_values",
|
|
26686
|
+
"pixel_attention_mask",
|
|
26687
|
+
"spatial_shapes",
|
|
26688
|
+
"position_ids",
|
|
26689
|
+
"past_key_values"
|
|
26690
|
+
];
|
|
26691
|
+
};
|
|
26692
|
+
|
|
25695
26693
|
// src/models/llama/modeling_llama.js
|
|
25696
26694
|
var LlamaPreTrainedModel = class extends PreTrainedModel {
|
|
25697
26695
|
};
|
|
@@ -25706,27 +26704,6 @@ var Llama4PreTrainedModel = class extends PreTrainedModel {
|
|
|
25706
26704
|
var Llama4ForCausalLM = class extends Llama4PreTrainedModel {
|
|
25707
26705
|
};
|
|
25708
26706
|
|
|
25709
|
-
// src/models/llava/modeling_llava.js
|
|
25710
|
-
var LlavaPreTrainedModel = class extends PreTrainedModel {
|
|
25711
|
-
forward_params = ["input_ids", "attention_mask", "pixel_values", "position_ids", "past_key_values"];
|
|
25712
|
-
};
|
|
25713
|
-
var LlavaForConditionalGeneration = class extends LlavaPreTrainedModel {
|
|
25714
|
-
_merge_input_ids_with_image_features(kwargs) {
|
|
25715
|
-
const vision_hidden_size = kwargs.image_features.dims.at(-1);
|
|
25716
|
-
const reshaped_image_hidden_states = kwargs.image_features.view(-1, vision_hidden_size);
|
|
25717
|
-
return default_merge_input_ids_with_image_features({
|
|
25718
|
-
// @ts-ignore
|
|
25719
|
-
image_token_id: this.config.image_token_index,
|
|
25720
|
-
...kwargs,
|
|
25721
|
-
image_features: reshaped_image_hidden_states
|
|
25722
|
-
});
|
|
25723
|
-
}
|
|
25724
|
-
};
|
|
25725
|
-
var Moondream1ForConditionalGeneration = class extends LlavaForConditionalGeneration {
|
|
25726
|
-
};
|
|
25727
|
-
var LlavaQwen2ForCausalLM = class extends LlavaForConditionalGeneration {
|
|
25728
|
-
};
|
|
25729
|
-
|
|
25730
26707
|
// src/models/longt5/modeling_longt5.js
|
|
25731
26708
|
var LongT5PreTrainedModel = class extends PreTrainedModel {
|
|
25732
26709
|
};
|
|
@@ -26436,6 +27413,14 @@ var Olmo3Model = class extends Olmo3PreTrainedModel {
|
|
|
26436
27413
|
var Olmo3ForCausalLM = class extends Olmo3PreTrainedModel {
|
|
26437
27414
|
};
|
|
26438
27415
|
|
|
27416
|
+
// src/models/olmo_hybrid/modeling_olmo_hybrid.js
|
|
27417
|
+
var OlmoHybridPreTrainedModel = class extends PreTrainedModel {
|
|
27418
|
+
};
|
|
27419
|
+
var OlmoHybridModel = class extends OlmoHybridPreTrainedModel {
|
|
27420
|
+
};
|
|
27421
|
+
var OlmoHybridForCausalLM = class extends OlmoHybridPreTrainedModel {
|
|
27422
|
+
};
|
|
27423
|
+
|
|
26439
27424
|
// src/models/openelm/modeling_openelm.js
|
|
26440
27425
|
var OpenELMPreTrainedModel = class extends PreTrainedModel {
|
|
26441
27426
|
};
|
|
@@ -26469,27 +27454,7 @@ var OwlViTForObjectDetection = class extends OwlViTPreTrainedModel {
|
|
|
26469
27454
|
};
|
|
26470
27455
|
|
|
26471
27456
|
// src/models/paligemma/modeling_paligemma.js
|
|
26472
|
-
var
|
|
26473
|
-
forward_params = [
|
|
26474
|
-
"input_ids",
|
|
26475
|
-
// 'inputs_embeds',
|
|
26476
|
-
"attention_mask",
|
|
26477
|
-
"pixel_values",
|
|
26478
|
-
"position_ids",
|
|
26479
|
-
"past_key_values"
|
|
26480
|
-
];
|
|
26481
|
-
};
|
|
26482
|
-
var PaliGemmaForConditionalGeneration = class extends PaliGemmaPreTrainedModel {
|
|
26483
|
-
_merge_input_ids_with_image_features(kwargs) {
|
|
26484
|
-
const vision_hidden_size = kwargs.image_features.dims.at(-1);
|
|
26485
|
-
const reshaped_image_hidden_states = kwargs.image_features.view(-1, vision_hidden_size);
|
|
26486
|
-
return default_merge_input_ids_with_image_features({
|
|
26487
|
-
// @ts-ignore
|
|
26488
|
-
image_token_id: this.config.image_token_index,
|
|
26489
|
-
...kwargs,
|
|
26490
|
-
image_features: reshaped_image_hidden_states
|
|
26491
|
-
});
|
|
26492
|
-
}
|
|
27457
|
+
var PaliGemmaForConditionalGeneration = class extends LlavaForConditionalGeneration {
|
|
26493
27458
|
};
|
|
26494
27459
|
|
|
26495
27460
|
// src/models/parakeet/modeling_parakeet.js
|
|
@@ -26640,6 +27605,14 @@ var Qwen2Model = class extends Qwen2PreTrainedModel {
|
|
|
26640
27605
|
var Qwen2ForCausalLM = class extends Qwen2PreTrainedModel {
|
|
26641
27606
|
};
|
|
26642
27607
|
|
|
27608
|
+
// src/models/qwen2_moe/modeling_qwen2_moe.js
|
|
27609
|
+
var Qwen2MoePreTrainedModel = class extends PreTrainedModel {
|
|
27610
|
+
};
|
|
27611
|
+
var Qwen2MoeModel = class extends Qwen2MoePreTrainedModel {
|
|
27612
|
+
};
|
|
27613
|
+
var Qwen2MoeForCausalLM = class extends Qwen2MoePreTrainedModel {
|
|
27614
|
+
};
|
|
27615
|
+
|
|
26643
27616
|
// src/models/qwen2_vl/modeling_qwen2_vl.js
|
|
26644
27617
|
var Qwen2VLPreTrainedModel = class extends PreTrainedModel {
|
|
26645
27618
|
forward_params = [
|
|
@@ -26654,6 +27627,9 @@ var Qwen2VLPreTrainedModel = class extends PreTrainedModel {
|
|
|
26654
27627
|
];
|
|
26655
27628
|
};
|
|
26656
27629
|
var Qwen2VLForConditionalGeneration = class extends Qwen2VLPreTrainedModel {
|
|
27630
|
+
// NOTE: This is used as the base class for all Qwen VL models and their CausalLM variants.
|
|
27631
|
+
// CausalLM variants (e.g., Qwen2VLForCausalLM) extend this class but load only
|
|
27632
|
+
// embed_tokens + decoder_model_merged (no vision_encoder) via MultimodalLanguageModelOnly type.
|
|
26657
27633
|
image_grid_thw_name = "grid_thw";
|
|
26658
27634
|
/**
|
|
26659
27635
|
* Calculate the 3D rope index based on image and video's temporal, height and width in LLM.
|
|
@@ -26843,19 +27819,45 @@ var Qwen2VLForConditionalGeneration = class extends Qwen2VLPreTrainedModel {
|
|
|
26843
27819
|
);
|
|
26844
27820
|
} else {
|
|
26845
27821
|
model_inputs.pixel_values = null;
|
|
26846
|
-
const
|
|
26847
|
-
|
|
26848
|
-
|
|
27822
|
+
const past_length = model_inputs.past_key_values.get_seq_length();
|
|
27823
|
+
if (past_length < model_inputs.input_ids.dims[1]) {
|
|
27824
|
+
const [full_position_ids, rope_deltas] = this.get_rope_index(
|
|
27825
|
+
model_inputs.input_ids,
|
|
27826
|
+
model_inputs.image_grid_thw,
|
|
27827
|
+
model_inputs.video_grid_thw,
|
|
27828
|
+
model_inputs.attention_mask
|
|
27829
|
+
);
|
|
27830
|
+
model_inputs.rope_deltas = rope_deltas;
|
|
27831
|
+
model_inputs.position_ids = full_position_ids.slice(null, null, [past_length, null]);
|
|
27832
|
+
model_inputs.input_ids = model_inputs.input_ids.slice(null, [past_length, null]);
|
|
27833
|
+
} else {
|
|
27834
|
+
if (!model_inputs.rope_deltas) {
|
|
27835
|
+
[, model_inputs.rope_deltas] = this.get_rope_index(
|
|
27836
|
+
model_inputs.input_ids,
|
|
27837
|
+
model_inputs.image_grid_thw,
|
|
27838
|
+
model_inputs.video_grid_thw,
|
|
27839
|
+
model_inputs.attention_mask
|
|
27840
|
+
);
|
|
27841
|
+
}
|
|
27842
|
+
const delta = BigInt(past_length);
|
|
27843
|
+
const rope_deltas_list = model_inputs.rope_deltas.map((x) => delta + x);
|
|
27844
|
+
model_inputs.position_ids = stack([rope_deltas_list, rope_deltas_list, rope_deltas_list], 0);
|
|
27845
|
+
}
|
|
26849
27846
|
}
|
|
26850
27847
|
}
|
|
26851
27848
|
return model_inputs;
|
|
26852
27849
|
}
|
|
26853
27850
|
};
|
|
27851
|
+
var Qwen2VLForCausalLM = class extends Qwen2VLForConditionalGeneration {
|
|
27852
|
+
};
|
|
26854
27853
|
|
|
26855
27854
|
// src/models/qwen2_5_vl/modeling_qwen2_5_vl.js
|
|
26856
27855
|
var Qwen2_5_VLForConditionalGeneration = class extends Qwen2VLForConditionalGeneration {
|
|
26857
27856
|
image_grid_thw_name = "image_grid_thw";
|
|
26858
27857
|
};
|
|
27858
|
+
var Qwen2_5_VLForCausalLM = class extends Qwen2VLForCausalLM {
|
|
27859
|
+
image_grid_thw_name = "image_grid_thw";
|
|
27860
|
+
};
|
|
26859
27861
|
|
|
26860
27862
|
// src/models/qwen3/modeling_qwen3.js
|
|
26861
27863
|
var Qwen3PreTrainedModel = class extends PreTrainedModel {
|
|
@@ -26865,17 +27867,45 @@ var Qwen3Model = class extends Qwen3PreTrainedModel {
|
|
|
26865
27867
|
var Qwen3ForCausalLM = class extends Qwen3PreTrainedModel {
|
|
26866
27868
|
};
|
|
26867
27869
|
|
|
27870
|
+
// src/models/qwen3_moe/modeling_qwen3_moe.js
|
|
27871
|
+
var Qwen3MoePreTrainedModel = class extends PreTrainedModel {
|
|
27872
|
+
};
|
|
27873
|
+
var Qwen3MoeModel = class extends Qwen3MoePreTrainedModel {
|
|
27874
|
+
};
|
|
27875
|
+
var Qwen3MoeForCausalLM = class extends Qwen3MoePreTrainedModel {
|
|
27876
|
+
};
|
|
27877
|
+
|
|
27878
|
+
// src/models/qwen3_next/modeling_qwen3_next.js
|
|
27879
|
+
var Qwen3NextPreTrainedModel = class extends PreTrainedModel {
|
|
27880
|
+
};
|
|
27881
|
+
var Qwen3NextModel = class extends Qwen3NextPreTrainedModel {
|
|
27882
|
+
};
|
|
27883
|
+
var Qwen3NextForCausalLM = class extends Qwen3NextPreTrainedModel {
|
|
27884
|
+
};
|
|
27885
|
+
|
|
26868
27886
|
// src/models/qwen3_vl/modeling_qwen3_vl.js
|
|
26869
27887
|
var Qwen3VLForConditionalGeneration = class extends Qwen2_5_VLForConditionalGeneration {
|
|
26870
27888
|
};
|
|
27889
|
+
var Qwen3VLForCausalLM = class extends Qwen2_5_VLForCausalLM {
|
|
27890
|
+
};
|
|
27891
|
+
|
|
27892
|
+
// src/models/qwen3_vl_moe/modeling_qwen3_vl_moe.js
|
|
27893
|
+
var Qwen3VLMoeForConditionalGeneration = class extends Qwen3VLForConditionalGeneration {
|
|
27894
|
+
};
|
|
27895
|
+
var Qwen3VLMoeForCausalLM = class extends Qwen3VLForCausalLM {
|
|
27896
|
+
};
|
|
26871
27897
|
|
|
26872
27898
|
// src/models/qwen3_5/modeling_qwen3_5.js
|
|
26873
27899
|
var Qwen3_5ForConditionalGeneration = class extends Qwen3VLForConditionalGeneration {
|
|
26874
27900
|
};
|
|
27901
|
+
var Qwen3_5ForCausalLM = class extends Qwen3_5ForConditionalGeneration {
|
|
27902
|
+
};
|
|
26875
27903
|
|
|
26876
27904
|
// src/models/qwen3_5_moe/modeling_qwen3_5_moe.js
|
|
26877
27905
|
var Qwen3_5MoeForConditionalGeneration = class extends Qwen3_5ForConditionalGeneration {
|
|
26878
27906
|
};
|
|
27907
|
+
var Qwen3_5MoeForCausalLM = class extends Qwen3_5ForCausalLM {
|
|
27908
|
+
};
|
|
26879
27909
|
|
|
26880
27910
|
// src/models/resnet/modeling_resnet.js
|
|
26881
27911
|
var ResNetPreTrainedModel = class extends PreTrainedModel {
|
|
@@ -27556,25 +28586,6 @@ var TrOCRPreTrainedModel = class extends PreTrainedModel {
|
|
|
27556
28586
|
var TrOCRForCausalLM = class extends TrOCRPreTrainedModel {
|
|
27557
28587
|
};
|
|
27558
28588
|
|
|
27559
|
-
// src/models/ultravox/modeling_ultravox.js
|
|
27560
|
-
var UltravoxPreTrainedModel = class extends PreTrainedModel {
|
|
27561
|
-
forward_params = ["input_ids", "attention_mask", "position_ids", "audio_values", "past_key_values"];
|
|
27562
|
-
};
|
|
27563
|
-
var UltravoxModel = class extends UltravoxPreTrainedModel {
|
|
27564
|
-
_merge_input_ids_with_audio_features(kwargs) {
|
|
27565
|
-
const audio_hidden_size = kwargs.audio_features.dims.at(-1);
|
|
27566
|
-
const reshaped_audio_features = kwargs.audio_features.view(-1, audio_hidden_size);
|
|
27567
|
-
return default_merge_input_ids_with_audio_features({
|
|
27568
|
-
// @ts-ignore
|
|
27569
|
-
audio_token_id: this.config.ignore_index ?? this.config.audio_token_id,
|
|
27570
|
-
...kwargs,
|
|
27571
|
-
audio_features: reshaped_audio_features
|
|
27572
|
-
});
|
|
27573
|
-
}
|
|
27574
|
-
};
|
|
27575
|
-
var VoxtralForConditionalGeneration = class extends UltravoxModel {
|
|
27576
|
-
};
|
|
27577
|
-
|
|
27578
28589
|
// src/models/unispeech/modeling_unispeech.js
|
|
27579
28590
|
var UniSpeechPreTrainedModel = class extends PreTrainedModel {
|
|
27580
28591
|
};
|
|
@@ -27740,6 +28751,170 @@ var VitsModel = class extends VitsPreTrainedModel {
|
|
|
27740
28751
|
}
|
|
27741
28752
|
};
|
|
27742
28753
|
|
|
28754
|
+
// src/models/voxtral/modeling_voxtral.js
|
|
28755
|
+
var VoxtralForConditionalGeneration = class extends UltravoxModel {
|
|
28756
|
+
};
|
|
28757
|
+
|
|
28758
|
+
// src/models/voxtral_realtime/modeling_voxtral_realtime.js
|
|
28759
|
+
var CONV1_LEFT_PAD = 2;
|
|
28760
|
+
var CONV2_LEFT_PAD = 1;
|
|
28761
|
+
var states = /* @__PURE__ */ new WeakMap();
|
|
28762
|
+
function createEncoderState(model, input_features) {
|
|
28763
|
+
const { text_config, audio_config } = (
|
|
28764
|
+
/** @type {any} */
|
|
28765
|
+
model.config
|
|
28766
|
+
);
|
|
28767
|
+
const encoder_session = model.sessions["audio_encoder"];
|
|
28768
|
+
const { num_mel_bins, hidden_size: enc_hidden_size } = audio_config;
|
|
28769
|
+
const PADDING_CACHE_CHANNELS = num_mel_bins + enc_hidden_size;
|
|
28770
|
+
const enc_kv_cache = new DynamicCache();
|
|
28771
|
+
const enc_dtype = encoder_session?.config?.kv_cache_dtype ?? "float32";
|
|
28772
|
+
const enc_cls = enc_dtype === "float16" ? DataTypeMap.float16 : DataTypeMap.float32;
|
|
28773
|
+
const enc_shapes = getCacheShapes(audio_config, { batch_size: 1 });
|
|
28774
|
+
for (const name in enc_shapes) {
|
|
28775
|
+
const size = enc_shapes[name].reduce((a, b) => a * b, 1);
|
|
28776
|
+
enc_kv_cache[name] = new Tensor2(enc_dtype, new enc_cls(size), enc_shapes[name]);
|
|
28777
|
+
}
|
|
28778
|
+
const enc_padding_cache = new Tensor2(enc_dtype, new enc_cls(PADDING_CACHE_CHANNELS * CONV1_LEFT_PAD), [
|
|
28779
|
+
1,
|
|
28780
|
+
PADDING_CACHE_CHANNELS,
|
|
28781
|
+
CONV1_LEFT_PAD
|
|
28782
|
+
]);
|
|
28783
|
+
const chunks_iter = input_features[Symbol.asyncIterator]?.() ?? input_features[Symbol.iterator]?.();
|
|
28784
|
+
if (!chunks_iter) {
|
|
28785
|
+
throw new Error("input_features must be iterable or async iterable");
|
|
28786
|
+
}
|
|
28787
|
+
return {
|
|
28788
|
+
encoder_session,
|
|
28789
|
+
enc_kv_cache,
|
|
28790
|
+
enc_padding_cache,
|
|
28791
|
+
enc_past_seq_len: 0,
|
|
28792
|
+
audio_embed_queue: [],
|
|
28793
|
+
audio_embed_total_tokens: 0,
|
|
28794
|
+
audio_queue_offset: 0,
|
|
28795
|
+
audio_consumed: 0,
|
|
28796
|
+
stream_exhausted: false,
|
|
28797
|
+
chunks_iter,
|
|
28798
|
+
text_hidden_size: text_config.hidden_size
|
|
28799
|
+
};
|
|
28800
|
+
}
|
|
28801
|
+
async function encodeChunk(s, chunk_features) {
|
|
28802
|
+
const audio_seq_len = chunk_features.dims[2];
|
|
28803
|
+
const conv2_output_len = Math.floor((CONV2_LEFT_PAD + audio_seq_len - 3) / 2) + 1;
|
|
28804
|
+
const position_ids = new Tensor2(
|
|
28805
|
+
"int64",
|
|
28806
|
+
BigInt64Array.from({ length: conv2_output_len }, (_, i) => BigInt(s.enc_past_seq_len + i)),
|
|
28807
|
+
[1, conv2_output_len]
|
|
28808
|
+
);
|
|
28809
|
+
const total_seq_len = s.enc_past_seq_len + conv2_output_len;
|
|
28810
|
+
const attention_mask = ones([1, total_seq_len]);
|
|
28811
|
+
const { audio_embeds, present_padding_cache, ...present_cache } = await sessionRun(s.encoder_session, {
|
|
28812
|
+
input_features: chunk_features,
|
|
28813
|
+
attention_mask,
|
|
28814
|
+
position_ids,
|
|
28815
|
+
past_padding_cache: s.enc_padding_cache,
|
|
28816
|
+
...s.enc_kv_cache
|
|
28817
|
+
});
|
|
28818
|
+
if (s.enc_padding_cache.location === "gpu-buffer") {
|
|
28819
|
+
s.enc_padding_cache.dispose();
|
|
28820
|
+
}
|
|
28821
|
+
s.enc_padding_cache = present_padding_cache;
|
|
28822
|
+
for (const name in present_cache) {
|
|
28823
|
+
if (name.startsWith("present.")) {
|
|
28824
|
+
const pastName = name.replace("present", "past_key_values");
|
|
28825
|
+
const prev = s.enc_kv_cache[pastName];
|
|
28826
|
+
if (prev?.location === "gpu-buffer") {
|
|
28827
|
+
prev.dispose();
|
|
28828
|
+
}
|
|
28829
|
+
s.enc_kv_cache[pastName] = present_cache[name];
|
|
28830
|
+
}
|
|
28831
|
+
}
|
|
28832
|
+
s.enc_past_seq_len = total_seq_len;
|
|
28833
|
+
return audio_embeds;
|
|
28834
|
+
}
|
|
28835
|
+
async function fillAudioBuffer(s, needed) {
|
|
28836
|
+
while (s.audio_embed_total_tokens < needed && !s.stream_exhausted) {
|
|
28837
|
+
const result = await s.chunks_iter.next();
|
|
28838
|
+
if (result.done) {
|
|
28839
|
+
s.stream_exhausted = true;
|
|
28840
|
+
break;
|
|
28841
|
+
}
|
|
28842
|
+
const new_embeds = await encodeChunk(s, result.value);
|
|
28843
|
+
s.audio_embed_queue.push({ data: new_embeds.data, tokens: new_embeds.dims[1] });
|
|
28844
|
+
s.audio_embed_total_tokens += new_embeds.dims[1];
|
|
28845
|
+
}
|
|
28846
|
+
}
|
|
28847
|
+
function addAudioEmbeddings(s, inputs_embeds, current_len) {
|
|
28848
|
+
if (s.audio_embed_queue.length === 0) return;
|
|
28849
|
+
const embed_data = inputs_embeds.data;
|
|
28850
|
+
let embed_write_pos = 0;
|
|
28851
|
+
let remaining = current_len;
|
|
28852
|
+
while (remaining > 0 && s.audio_embed_queue.length > 0) {
|
|
28853
|
+
const front = s.audio_embed_queue[0];
|
|
28854
|
+
const available = front.tokens - s.audio_queue_offset;
|
|
28855
|
+
const n = Math.min(remaining, available);
|
|
28856
|
+
const src_offset = s.audio_queue_offset * s.text_hidden_size;
|
|
28857
|
+
for (let i = 0; i < n * s.text_hidden_size; ++i) {
|
|
28858
|
+
embed_data[embed_write_pos * s.text_hidden_size + i] += front.data[src_offset + i];
|
|
28859
|
+
}
|
|
28860
|
+
embed_write_pos += n;
|
|
28861
|
+
remaining -= n;
|
|
28862
|
+
s.audio_queue_offset += n;
|
|
28863
|
+
if (s.audio_queue_offset >= front.tokens) {
|
|
28864
|
+
s.audio_embed_queue.shift();
|
|
28865
|
+
s.audio_queue_offset = 0;
|
|
28866
|
+
}
|
|
28867
|
+
}
|
|
28868
|
+
s.audio_consumed += current_len - remaining;
|
|
28869
|
+
}
|
|
28870
|
+
var AudioExhaustedCriteria = class extends StoppingCriteria {
|
|
28871
|
+
constructor(enc_state) {
|
|
28872
|
+
super();
|
|
28873
|
+
this._s = enc_state;
|
|
28874
|
+
}
|
|
28875
|
+
_call(input_ids) {
|
|
28876
|
+
const done = this._s.stream_exhausted && this._s.audio_embed_queue.length === 0;
|
|
28877
|
+
return input_ids.map(() => done);
|
|
28878
|
+
}
|
|
28879
|
+
};
|
|
28880
|
+
var VoxtralRealtimePreTrainedModel = class extends PreTrainedModel {
|
|
28881
|
+
forward_params = ["input_ids", "attention_mask", "position_ids", "past_key_values"];
|
|
28882
|
+
};
|
|
28883
|
+
var VoxtralRealtimeForConditionalGeneration = class extends VoxtralRealtimePreTrainedModel {
|
|
28884
|
+
async forward({ input_ids, past_key_values, ...kwargs }) {
|
|
28885
|
+
const current_len = input_ids.dims[1];
|
|
28886
|
+
const enc = states.get(this);
|
|
28887
|
+
if (enc) {
|
|
28888
|
+
await fillAudioBuffer(enc, enc.audio_consumed + current_len);
|
|
28889
|
+
}
|
|
28890
|
+
const { inputs_embeds } = await sessionRun(this.sessions["embed_tokens"], { input_ids });
|
|
28891
|
+
if (enc) {
|
|
28892
|
+
addAudioEmbeddings(enc, inputs_embeds, current_len);
|
|
28893
|
+
}
|
|
28894
|
+
const decoder_feeds = { inputs_embeds, ...kwargs };
|
|
28895
|
+
this.addPastKeyValues(decoder_feeds, past_key_values);
|
|
28896
|
+
const session = this.sessions["decoder_model_merged"];
|
|
28897
|
+
const fixed = pick(decoder_feeds, session.inputNames);
|
|
28898
|
+
return await sessionRun(session, fixed);
|
|
28899
|
+
}
|
|
28900
|
+
async generate({ input_features, stopping_criteria: userStoppingCriteria, ...kwargs }) {
|
|
28901
|
+
if (!input_features) {
|
|
28902
|
+
throw new Error("input_features (generator/iterable) must be provided");
|
|
28903
|
+
}
|
|
28904
|
+
const enc_state = createEncoderState(this, input_features);
|
|
28905
|
+
states.set(this, enc_state);
|
|
28906
|
+
const stopping_criteria = new StoppingCriteriaList();
|
|
28907
|
+
stopping_criteria.push(new AudioExhaustedCriteria(enc_state));
|
|
28908
|
+
if (userStoppingCriteria) stopping_criteria.extend(userStoppingCriteria);
|
|
28909
|
+
try {
|
|
28910
|
+
return await super.generate({ ...kwargs, stopping_criteria });
|
|
28911
|
+
} finally {
|
|
28912
|
+
enc_state.enc_kv_cache.dispose();
|
|
28913
|
+
states.delete(this);
|
|
28914
|
+
}
|
|
28915
|
+
}
|
|
28916
|
+
};
|
|
28917
|
+
|
|
27743
28918
|
// src/models/wav2vec2_bert/modeling_wav2vec2_bert.js
|
|
27744
28919
|
var Wav2Vec2BertPreTrainedModel = class extends PreTrainedModel {
|
|
27745
28920
|
};
|
|
@@ -28364,6 +29539,7 @@ var MODEL_MAPPING_NAMES_DECODER_ONLY = /* @__PURE__ */ new Map([
|
|
|
28364
29539
|
["olmo", "OlmoModel"],
|
|
28365
29540
|
["olmo2", "Olmo2Model"],
|
|
28366
29541
|
["olmo3", "Olmo3Model"],
|
|
29542
|
+
["olmo_hybrid", "OlmoHybridModel"],
|
|
28367
29543
|
["mobilellm", "MobileLLMModel"],
|
|
28368
29544
|
["granite", "GraniteModel"],
|
|
28369
29545
|
["granitemoehybrid", "GraniteMoeHybridModel"],
|
|
@@ -28377,7 +29553,10 @@ var MODEL_MAPPING_NAMES_DECODER_ONLY = /* @__PURE__ */ new Map([
|
|
|
28377
29553
|
["glm", "GlmModel"],
|
|
28378
29554
|
["openelm", "OpenELMModel"],
|
|
28379
29555
|
["qwen2", "Qwen2Model"],
|
|
29556
|
+
["qwen2_moe", "Qwen2MoeModel"],
|
|
28380
29557
|
["qwen3", "Qwen3Model"],
|
|
29558
|
+
["qwen3_moe", "Qwen3MoeModel"],
|
|
29559
|
+
["qwen3_next", "Qwen3NextModel"],
|
|
28381
29560
|
["phi", "PhiModel"],
|
|
28382
29561
|
["phi3", "Phi3Model"],
|
|
28383
29562
|
["mpt", "MptModel"],
|
|
@@ -28385,7 +29564,7 @@ var MODEL_MAPPING_NAMES_DECODER_ONLY = /* @__PURE__ */ new Map([
|
|
|
28385
29564
|
["mistral", "MistralModel"],
|
|
28386
29565
|
["ministral", "MinistralModel"],
|
|
28387
29566
|
["ministral3", "Ministral3Model"],
|
|
28388
|
-
["ernie4_5", "
|
|
29567
|
+
["ernie4_5", "Ernie4_5ForCausalLM"],
|
|
28389
29568
|
["starcoder2", "Starcoder2Model"],
|
|
28390
29569
|
["falcon", "FalconModel"],
|
|
28391
29570
|
["falcon_h1", "FalconH1Model"],
|
|
@@ -28479,6 +29658,7 @@ var MODEL_FOR_CAUSAL_LM_MAPPING_NAMES = /* @__PURE__ */ new Map([
|
|
|
28479
29658
|
["olmo", "OlmoForCausalLM"],
|
|
28480
29659
|
["olmo2", "Olmo2ForCausalLM"],
|
|
28481
29660
|
["olmo3", "Olmo3ForCausalLM"],
|
|
29661
|
+
["olmo_hybrid", "OlmoHybridForCausalLM"],
|
|
28482
29662
|
["mobilellm", "MobileLLMForCausalLM"],
|
|
28483
29663
|
["granite", "GraniteForCausalLM"],
|
|
28484
29664
|
["granitemoehybrid", "GraniteMoeHybridForCausalLM"],
|
|
@@ -28488,11 +29668,22 @@ var MODEL_FOR_CAUSAL_LM_MAPPING_NAMES = /* @__PURE__ */ new Map([
|
|
|
28488
29668
|
["gemma2", "Gemma2ForCausalLM"],
|
|
28489
29669
|
["vaultgemma", "VaultGemmaForCausalLM"],
|
|
28490
29670
|
["gemma3_text", "Gemma3ForCausalLM"],
|
|
29671
|
+
["gemma3", "Gemma3ForCausalLM"],
|
|
28491
29672
|
["helium", "HeliumForCausalLM"],
|
|
28492
29673
|
["glm", "GlmForCausalLM"],
|
|
28493
29674
|
["openelm", "OpenELMForCausalLM"],
|
|
28494
29675
|
["qwen2", "Qwen2ForCausalLM"],
|
|
29676
|
+
["qwen2_moe", "Qwen2MoeForCausalLM"],
|
|
28495
29677
|
["qwen3", "Qwen3ForCausalLM"],
|
|
29678
|
+
["qwen3_moe", "Qwen3MoeForCausalLM"],
|
|
29679
|
+
["qwen3_next", "Qwen3NextForCausalLM"],
|
|
29680
|
+
["qwen2_vl", "Qwen2VLForCausalLM"],
|
|
29681
|
+
["qwen2_5_vl", "Qwen2_5_VLForCausalLM"],
|
|
29682
|
+
["qwen3_vl", "Qwen3VLForCausalLM"],
|
|
29683
|
+
["qwen3_vl_moe", "Qwen3VLMoeForCausalLM"],
|
|
29684
|
+
["qwen3_5", "Qwen3_5ForCausalLM"],
|
|
29685
|
+
["qwen3_5_moe", "Qwen3_5MoeForCausalLM"],
|
|
29686
|
+
["gemma3n", "Gemma3nForCausalLM"],
|
|
28496
29687
|
["phi", "PhiForCausalLM"],
|
|
28497
29688
|
["phi3", "Phi3ForCausalLM"],
|
|
28498
29689
|
["mpt", "MptForCausalLM"],
|
|
@@ -28501,7 +29692,7 @@ var MODEL_FOR_CAUSAL_LM_MAPPING_NAMES = /* @__PURE__ */ new Map([
|
|
|
28501
29692
|
["mistral", "MistralForCausalLM"],
|
|
28502
29693
|
["ministral", "MinistralForCausalLM"],
|
|
28503
29694
|
["ministral3", "Ministral3ForCausalLM"],
|
|
28504
|
-
["ernie4_5", "
|
|
29695
|
+
["ernie4_5", "Ernie4_5ForCausalLM"],
|
|
28505
29696
|
["starcoder2", "Starcoder2ForCausalLM"],
|
|
28506
29697
|
["falcon", "FalconForCausalLM"],
|
|
28507
29698
|
["falcon_h1", "FalconH1ForCausalLM"],
|
|
@@ -28565,8 +29756,10 @@ var MODEL_FOR_IMAGE_TEXT_TO_TEXT_MAPPING_NAMES = /* @__PURE__ */ new Map([
|
|
|
28565
29756
|
["qwen2_vl", "Qwen2VLForConditionalGeneration"],
|
|
28566
29757
|
["qwen2_5_vl", "Qwen2_5_VLForConditionalGeneration"],
|
|
28567
29758
|
["qwen3_vl", "Qwen3VLForConditionalGeneration"],
|
|
29759
|
+
["qwen3_vl_moe", "Qwen3VLMoeForConditionalGeneration"],
|
|
28568
29760
|
["qwen3_5", "Qwen3_5ForConditionalGeneration"],
|
|
28569
29761
|
["qwen3_5_moe", "Qwen3_5MoeForConditionalGeneration"],
|
|
29762
|
+
["lfm2_vl", "Lfm2VlForConditionalGeneration"],
|
|
28570
29763
|
["idefics3", "Idefics3ForConditionalGeneration"],
|
|
28571
29764
|
["smolvlm", "SmolVLMForConditionalGeneration"],
|
|
28572
29765
|
["paligemma", "PaliGemmaForConditionalGeneration"],
|
|
@@ -28575,8 +29768,10 @@ var MODEL_FOR_IMAGE_TEXT_TO_TEXT_MAPPING_NAMES = /* @__PURE__ */ new Map([
|
|
|
28575
29768
|
["mistral3", "Mistral3ForConditionalGeneration"]
|
|
28576
29769
|
]);
|
|
28577
29770
|
var MODEL_FOR_AUDIO_TEXT_TO_TEXT_MAPPING_NAMES = /* @__PURE__ */ new Map([
|
|
29771
|
+
["granite_speech", "GraniteSpeechForConditionalGeneration"],
|
|
28578
29772
|
["ultravox", "UltravoxModel"],
|
|
28579
|
-
["voxtral", "VoxtralForConditionalGeneration"]
|
|
29773
|
+
["voxtral", "VoxtralForConditionalGeneration"],
|
|
29774
|
+
["voxtral_realtime", "VoxtralRealtimeForConditionalGeneration"]
|
|
28580
29775
|
]);
|
|
28581
29776
|
var MODEL_FOR_DOCUMENT_QUESTION_ANSWERING_MAPPING_NAMES = /* @__PURE__ */ new Map([
|
|
28582
29777
|
["vision-encoder-decoder", "VisionEncoderDecoderModel"]
|
|
@@ -28759,24 +29954,37 @@ var CUSTOM_MAPPING = [
|
|
|
28759
29954
|
MODEL_TYPES.ImageAudioTextToText
|
|
28760
29955
|
],
|
|
28761
29956
|
["SupertonicForConditionalGeneration", SupertonicForConditionalGeneration, MODEL_TYPES.Supertonic],
|
|
28762
|
-
["ChatterboxModel", ChatterboxModel, MODEL_TYPES.Chatterbox]
|
|
29957
|
+
["ChatterboxModel", ChatterboxModel, MODEL_TYPES.Chatterbox],
|
|
29958
|
+
["Qwen2VLForCausalLM", Qwen2VLForCausalLM, MODEL_TYPES.MultimodalLanguageModelOnly],
|
|
29959
|
+
["Qwen2_5_VLForCausalLM", Qwen2_5_VLForCausalLM, MODEL_TYPES.MultimodalLanguageModelOnly],
|
|
29960
|
+
["Qwen3VLForCausalLM", Qwen3VLForCausalLM, MODEL_TYPES.MultimodalLanguageModelOnly],
|
|
29961
|
+
["Qwen3VLMoeForCausalLM", Qwen3VLMoeForCausalLM, MODEL_TYPES.MultimodalLanguageModelOnly],
|
|
29962
|
+
["Qwen3_5ForCausalLM", Qwen3_5ForCausalLM, MODEL_TYPES.MultimodalLanguageModelOnly],
|
|
29963
|
+
["Qwen3_5MoeForCausalLM", Qwen3_5MoeForCausalLM, MODEL_TYPES.MultimodalLanguageModelOnly],
|
|
29964
|
+
["Gemma3nForCausalLM", Gemma3nForCausalLM, MODEL_TYPES.MultimodalLanguageModelOnly],
|
|
29965
|
+
[
|
|
29966
|
+
"VoxtralRealtimeForConditionalGeneration",
|
|
29967
|
+
VoxtralRealtimeForConditionalGeneration,
|
|
29968
|
+
MODEL_TYPES.VoxtralRealtime
|
|
29969
|
+
]
|
|
28763
29970
|
];
|
|
28764
29971
|
for (const [name, model, type] of CUSTOM_MAPPING) {
|
|
28765
29972
|
MODEL_TYPE_MAPPING.set(name, type);
|
|
28766
29973
|
MODEL_CLASS_TO_NAME_MAPPING.set(model, name);
|
|
28767
29974
|
MODEL_NAME_TO_CLASS_MAPPING.set(name, model);
|
|
28768
29975
|
}
|
|
28769
|
-
var
|
|
29976
|
+
var CUSTOM_ARCHITECTURES_MAPPING = /* @__PURE__ */ new Map([
|
|
28770
29977
|
["modnet", MODEL_FOR_IMAGE_SEGMENTATION_MAPPING_NAMES],
|
|
28771
29978
|
["birefnet", MODEL_FOR_IMAGE_SEGMENTATION_MAPPING_NAMES],
|
|
28772
29979
|
["isnet", MODEL_FOR_IMAGE_SEGMENTATION_MAPPING_NAMES],
|
|
28773
29980
|
["ben", MODEL_FOR_IMAGE_SEGMENTATION_MAPPING_NAMES]
|
|
28774
29981
|
]);
|
|
28775
|
-
for (const [name, mapping] of
|
|
29982
|
+
for (const [name, mapping] of CUSTOM_ARCHITECTURES_MAPPING.entries()) {
|
|
28776
29983
|
mapping.set(name, "PreTrainedModel");
|
|
28777
29984
|
MODEL_TYPE_MAPPING.set(name, MODEL_TYPES.EncoderOnly);
|
|
28778
29985
|
MODEL_NAME_TO_CLASS_MAPPING.set(name, PreTrainedModel);
|
|
28779
29986
|
}
|
|
29987
|
+
var CUSTOM_ARCHITECTURES = new Set(CUSTOM_ARCHITECTURES_MAPPING.keys());
|
|
28780
29988
|
MODEL_TYPE_MAPPING.set("PreTrainedModel", MODEL_TYPES.EncoderOnly);
|
|
28781
29989
|
MODEL_CLASS_TO_NAME_MAPPING.set(PreTrainedModel, "PreTrainedModel");
|
|
28782
29990
|
var MODEL_MAPPINGS = {
|
|
@@ -28825,6 +30033,18 @@ var PretrainedMixin = class {
|
|
|
28825
30033
|
* the model type is not found in the mapping.
|
|
28826
30034
|
*/
|
|
28827
30035
|
static BASE_IF_FAIL = false;
|
|
30036
|
+
/**
|
|
30037
|
+
* Check whether this AutoModel class supports a given model type.
|
|
30038
|
+
* @param {string} model_type The model type from config (e.g., 'bert', 'whisper').
|
|
30039
|
+
* @returns {boolean} Whether this class can handle the given model type.
|
|
30040
|
+
*/
|
|
30041
|
+
static supports(model_type) {
|
|
30042
|
+
if (!this.MODEL_CLASS_MAPPINGS) return false;
|
|
30043
|
+
for (const mapping of this.MODEL_CLASS_MAPPINGS) {
|
|
30044
|
+
if (mapping.has(model_type)) return true;
|
|
30045
|
+
}
|
|
30046
|
+
return this.BASE_IF_FAIL;
|
|
30047
|
+
}
|
|
28828
30048
|
/** @type {typeof PreTrainedModel.from_pretrained} */
|
|
28829
30049
|
static async from_pretrained(pretrained_model_name_or_path, {
|
|
28830
30050
|
progress_callback = null,
|
|
@@ -28856,7 +30076,7 @@ var PretrainedMixin = class {
|
|
|
28856
30076
|
if (!this.MODEL_CLASS_MAPPINGS) {
|
|
28857
30077
|
throw new Error("`MODEL_CLASS_MAPPINGS` not implemented for this type of `AutoClass`: " + this.name);
|
|
28858
30078
|
}
|
|
28859
|
-
const model_type = options.config
|
|
30079
|
+
const { model_type } = options.config;
|
|
28860
30080
|
for (const MODEL_CLASS_MAPPING of this.MODEL_CLASS_MAPPINGS) {
|
|
28861
30081
|
let modelInfo = MODEL_CLASS_MAPPING.get(model_type);
|
|
28862
30082
|
if (!modelInfo) {
|
|
@@ -30208,40 +31428,30 @@ Pipeline {
|
|
|
30208
31428
|
// src/pipelines/index.js
|
|
30209
31429
|
var SUPPORTED_TASKS = Object.freeze({
|
|
30210
31430
|
"text-classification": {
|
|
30211
|
-
tokenizer: AutoTokenizer,
|
|
30212
31431
|
pipeline: TextClassificationPipeline,
|
|
30213
31432
|
model: AutoModelForSequenceClassification,
|
|
30214
31433
|
default: {
|
|
30215
|
-
// TODO: replace with original
|
|
30216
|
-
// "model": "distilbert-base-uncased-finetuned-sst-2-english",
|
|
30217
31434
|
model: "Xenova/distilbert-base-uncased-finetuned-sst-2-english"
|
|
30218
31435
|
},
|
|
30219
31436
|
type: "text"
|
|
30220
31437
|
},
|
|
30221
31438
|
"token-classification": {
|
|
30222
|
-
tokenizer: AutoTokenizer,
|
|
30223
31439
|
pipeline: TokenClassificationPipeline,
|
|
30224
31440
|
model: AutoModelForTokenClassification,
|
|
30225
31441
|
default: {
|
|
30226
|
-
// TODO: replace with original
|
|
30227
|
-
// "model": "Davlan/bert-base-multilingual-cased-ner-hrl",
|
|
30228
31442
|
model: "Xenova/bert-base-multilingual-cased-ner-hrl"
|
|
30229
31443
|
},
|
|
30230
31444
|
type: "text"
|
|
30231
31445
|
},
|
|
30232
31446
|
"question-answering": {
|
|
30233
|
-
tokenizer: AutoTokenizer,
|
|
30234
31447
|
pipeline: QuestionAnsweringPipeline,
|
|
30235
31448
|
model: AutoModelForQuestionAnswering,
|
|
30236
31449
|
default: {
|
|
30237
|
-
// TODO: replace with original
|
|
30238
|
-
// "model": "distilbert-base-cased-distilled-squad",
|
|
30239
31450
|
model: "Xenova/distilbert-base-cased-distilled-squad"
|
|
30240
31451
|
},
|
|
30241
31452
|
type: "text"
|
|
30242
31453
|
},
|
|
30243
31454
|
"fill-mask": {
|
|
30244
|
-
tokenizer: AutoTokenizer,
|
|
30245
31455
|
pipeline: FillMaskPipeline,
|
|
30246
31456
|
model: AutoModelForMaskedLM,
|
|
30247
31457
|
default: {
|
|
@@ -30251,40 +31461,30 @@ var SUPPORTED_TASKS = Object.freeze({
|
|
|
30251
31461
|
type: "text"
|
|
30252
31462
|
},
|
|
30253
31463
|
summarization: {
|
|
30254
|
-
tokenizer: AutoTokenizer,
|
|
30255
31464
|
pipeline: SummarizationPipeline,
|
|
30256
31465
|
model: AutoModelForSeq2SeqLM,
|
|
30257
31466
|
default: {
|
|
30258
|
-
// TODO: replace with original
|
|
30259
|
-
// "model": "sshleifer/distilbart-cnn-6-6",
|
|
30260
31467
|
model: "Xenova/distilbart-cnn-6-6"
|
|
30261
31468
|
},
|
|
30262
31469
|
type: "text"
|
|
30263
31470
|
},
|
|
30264
31471
|
translation: {
|
|
30265
|
-
tokenizer: AutoTokenizer,
|
|
30266
31472
|
pipeline: TranslationPipeline,
|
|
30267
31473
|
model: AutoModelForSeq2SeqLM,
|
|
30268
31474
|
default: {
|
|
30269
|
-
// TODO: replace with original
|
|
30270
|
-
// "model": "t5-small",
|
|
30271
31475
|
model: "Xenova/t5-small"
|
|
30272
31476
|
},
|
|
30273
31477
|
type: "text"
|
|
30274
31478
|
},
|
|
30275
31479
|
"text2text-generation": {
|
|
30276
|
-
tokenizer: AutoTokenizer,
|
|
30277
31480
|
pipeline: Text2TextGenerationPipeline,
|
|
30278
31481
|
model: AutoModelForSeq2SeqLM,
|
|
30279
31482
|
default: {
|
|
30280
|
-
// TODO: replace with original
|
|
30281
|
-
// "model": "google/flan-t5-small",
|
|
30282
31483
|
model: "Xenova/flan-t5-small"
|
|
30283
31484
|
},
|
|
30284
31485
|
type: "text"
|
|
30285
31486
|
},
|
|
30286
31487
|
"text-generation": {
|
|
30287
|
-
tokenizer: AutoTokenizer,
|
|
30288
31488
|
pipeline: TextGenerationPipeline,
|
|
30289
31489
|
model: AutoModelForCausalLM,
|
|
30290
31490
|
default: {
|
|
@@ -30294,12 +31494,9 @@ var SUPPORTED_TASKS = Object.freeze({
|
|
|
30294
31494
|
type: "text"
|
|
30295
31495
|
},
|
|
30296
31496
|
"zero-shot-classification": {
|
|
30297
|
-
tokenizer: AutoTokenizer,
|
|
30298
31497
|
pipeline: ZeroShotClassificationPipeline,
|
|
30299
31498
|
model: AutoModelForSequenceClassification,
|
|
30300
31499
|
default: {
|
|
30301
|
-
// TODO: replace with original
|
|
30302
|
-
// "model": "typeform/distilbert-base-uncased-mnli",
|
|
30303
31500
|
model: "Xenova/distilbert-base-uncased-mnli"
|
|
30304
31501
|
},
|
|
30305
31502
|
type: "text"
|
|
@@ -30307,47 +31504,30 @@ var SUPPORTED_TASKS = Object.freeze({
|
|
|
30307
31504
|
"audio-classification": {
|
|
30308
31505
|
pipeline: AudioClassificationPipeline,
|
|
30309
31506
|
model: AutoModelForAudioClassification,
|
|
30310
|
-
processor: AutoProcessor,
|
|
30311
31507
|
default: {
|
|
30312
|
-
// TODO: replace with original
|
|
30313
|
-
// "model": "superb/wav2vec2-base-superb-ks",
|
|
30314
31508
|
model: "Xenova/wav2vec2-base-superb-ks"
|
|
30315
31509
|
},
|
|
30316
31510
|
type: "audio"
|
|
30317
31511
|
},
|
|
30318
31512
|
"zero-shot-audio-classification": {
|
|
30319
|
-
tokenizer: AutoTokenizer,
|
|
30320
31513
|
pipeline: ZeroShotAudioClassificationPipeline,
|
|
30321
31514
|
model: AutoModel,
|
|
30322
|
-
processor: AutoProcessor,
|
|
30323
31515
|
default: {
|
|
30324
|
-
// TODO: replace with original
|
|
30325
|
-
// "model": "laion/clap-htsat-fused",
|
|
30326
31516
|
model: "Xenova/clap-htsat-unfused"
|
|
30327
31517
|
},
|
|
30328
31518
|
type: "multimodal"
|
|
30329
31519
|
},
|
|
30330
31520
|
"automatic-speech-recognition": {
|
|
30331
|
-
tokenizer: AutoTokenizer,
|
|
30332
31521
|
pipeline: AutomaticSpeechRecognitionPipeline,
|
|
30333
31522
|
model: [AutoModelForSpeechSeq2Seq, AutoModelForCTC],
|
|
30334
|
-
processor: AutoProcessor,
|
|
30335
31523
|
default: {
|
|
30336
|
-
// TODO: replace with original
|
|
30337
|
-
// "model": "openai/whisper-tiny.en",
|
|
30338
31524
|
model: "Xenova/whisper-tiny.en"
|
|
30339
31525
|
},
|
|
30340
31526
|
type: "multimodal"
|
|
30341
31527
|
},
|
|
30342
31528
|
"text-to-audio": {
|
|
30343
|
-
tokenizer: AutoTokenizer,
|
|
30344
31529
|
pipeline: TextToAudioPipeline,
|
|
30345
31530
|
model: [AutoModelForTextToWaveform, AutoModelForTextToSpectrogram],
|
|
30346
|
-
processor: [
|
|
30347
|
-
AutoProcessor,
|
|
30348
|
-
/* Some don't use a processor */
|
|
30349
|
-
null
|
|
30350
|
-
],
|
|
30351
31531
|
default: {
|
|
30352
31532
|
model: "onnx-community/Supertonic-TTS-ONNX",
|
|
30353
31533
|
dtype: "fp32"
|
|
@@ -30355,124 +31535,86 @@ var SUPPORTED_TASKS = Object.freeze({
|
|
|
30355
31535
|
type: "text"
|
|
30356
31536
|
},
|
|
30357
31537
|
"image-to-text": {
|
|
30358
|
-
tokenizer: AutoTokenizer,
|
|
30359
31538
|
pipeline: ImageToTextPipeline,
|
|
30360
31539
|
model: AutoModelForVision2Seq,
|
|
30361
|
-
processor: AutoProcessor,
|
|
30362
31540
|
default: {
|
|
30363
|
-
// TODO: replace with original
|
|
30364
|
-
// "model": "nlpconnect/vit-gpt2-image-captioning",
|
|
30365
31541
|
model: "Xenova/vit-gpt2-image-captioning"
|
|
30366
31542
|
},
|
|
30367
31543
|
type: "multimodal"
|
|
30368
31544
|
},
|
|
30369
31545
|
"image-classification": {
|
|
30370
|
-
// no tokenizer
|
|
30371
31546
|
pipeline: ImageClassificationPipeline,
|
|
30372
31547
|
model: AutoModelForImageClassification,
|
|
30373
|
-
processor: AutoProcessor,
|
|
30374
31548
|
default: {
|
|
30375
|
-
// TODO: replace with original
|
|
30376
|
-
// "model": "google/vit-base-patch16-224",
|
|
30377
31549
|
model: "Xenova/vit-base-patch16-224"
|
|
30378
31550
|
},
|
|
30379
31551
|
type: "multimodal"
|
|
30380
31552
|
},
|
|
30381
31553
|
"image-segmentation": {
|
|
30382
|
-
// no tokenizer
|
|
30383
31554
|
pipeline: ImageSegmentationPipeline,
|
|
30384
31555
|
model: [AutoModelForImageSegmentation, AutoModelForSemanticSegmentation, AutoModelForUniversalSegmentation],
|
|
30385
|
-
processor: AutoProcessor,
|
|
30386
31556
|
default: {
|
|
30387
|
-
// TODO: replace with original
|
|
30388
|
-
// "model": "facebook/detr-resnet-50-panoptic",
|
|
30389
31557
|
model: "Xenova/detr-resnet-50-panoptic"
|
|
30390
31558
|
},
|
|
30391
31559
|
type: "multimodal"
|
|
30392
31560
|
},
|
|
30393
31561
|
"background-removal": {
|
|
30394
|
-
// no tokenizer
|
|
30395
31562
|
pipeline: BackgroundRemovalPipeline,
|
|
30396
31563
|
model: [AutoModelForImageSegmentation, AutoModelForSemanticSegmentation, AutoModelForUniversalSegmentation],
|
|
30397
|
-
processor: AutoProcessor,
|
|
30398
31564
|
default: {
|
|
30399
31565
|
model: "Xenova/modnet"
|
|
30400
31566
|
},
|
|
30401
31567
|
type: "image"
|
|
30402
31568
|
},
|
|
30403
31569
|
"zero-shot-image-classification": {
|
|
30404
|
-
tokenizer: AutoTokenizer,
|
|
30405
31570
|
pipeline: ZeroShotImageClassificationPipeline,
|
|
30406
31571
|
model: AutoModel,
|
|
30407
|
-
processor: AutoProcessor,
|
|
30408
31572
|
default: {
|
|
30409
|
-
// TODO: replace with original
|
|
30410
|
-
// "model": "openai/clip-vit-base-patch32",
|
|
30411
31573
|
model: "Xenova/clip-vit-base-patch32"
|
|
30412
31574
|
},
|
|
30413
31575
|
type: "multimodal"
|
|
30414
31576
|
},
|
|
30415
31577
|
"object-detection": {
|
|
30416
|
-
// no tokenizer
|
|
30417
31578
|
pipeline: ObjectDetectionPipeline,
|
|
30418
31579
|
model: AutoModelForObjectDetection,
|
|
30419
|
-
processor: AutoProcessor,
|
|
30420
31580
|
default: {
|
|
30421
|
-
// TODO: replace with original
|
|
30422
|
-
// "model": "facebook/detr-resnet-50",
|
|
30423
31581
|
model: "Xenova/detr-resnet-50"
|
|
30424
31582
|
},
|
|
30425
31583
|
type: "multimodal"
|
|
30426
31584
|
},
|
|
30427
31585
|
"zero-shot-object-detection": {
|
|
30428
|
-
tokenizer: AutoTokenizer,
|
|
30429
31586
|
pipeline: ZeroShotObjectDetectionPipeline,
|
|
30430
31587
|
model: AutoModelForZeroShotObjectDetection,
|
|
30431
|
-
processor: AutoProcessor,
|
|
30432
31588
|
default: {
|
|
30433
|
-
// TODO: replace with original
|
|
30434
|
-
// "model": "google/owlvit-base-patch32",
|
|
30435
31589
|
model: "Xenova/owlvit-base-patch32"
|
|
30436
31590
|
},
|
|
30437
31591
|
type: "multimodal"
|
|
30438
31592
|
},
|
|
30439
31593
|
"document-question-answering": {
|
|
30440
|
-
tokenizer: AutoTokenizer,
|
|
30441
31594
|
pipeline: DocumentQuestionAnsweringPipeline,
|
|
30442
31595
|
model: AutoModelForDocumentQuestionAnswering,
|
|
30443
|
-
processor: AutoProcessor,
|
|
30444
31596
|
default: {
|
|
30445
|
-
// TODO: replace with original
|
|
30446
|
-
// "model": "naver-clova-ix/donut-base-finetuned-docvqa",
|
|
30447
31597
|
model: "Xenova/donut-base-finetuned-docvqa"
|
|
30448
31598
|
},
|
|
30449
31599
|
type: "multimodal"
|
|
30450
31600
|
},
|
|
30451
31601
|
"image-to-image": {
|
|
30452
|
-
// no tokenizer
|
|
30453
31602
|
pipeline: ImageToImagePipeline,
|
|
30454
31603
|
model: AutoModelForImageToImage,
|
|
30455
|
-
processor: AutoProcessor,
|
|
30456
31604
|
default: {
|
|
30457
|
-
// TODO: replace with original
|
|
30458
|
-
// "model": "caidas/swin2SR-classical-sr-x2-64",
|
|
30459
31605
|
model: "Xenova/swin2SR-classical-sr-x2-64"
|
|
30460
31606
|
},
|
|
30461
31607
|
type: "image"
|
|
30462
31608
|
},
|
|
30463
31609
|
"depth-estimation": {
|
|
30464
|
-
// no tokenizer
|
|
30465
31610
|
pipeline: DepthEstimationPipeline,
|
|
30466
31611
|
model: AutoModelForDepthEstimation,
|
|
30467
|
-
processor: AutoProcessor,
|
|
30468
31612
|
default: {
|
|
30469
31613
|
model: "onnx-community/depth-anything-v2-small"
|
|
30470
31614
|
},
|
|
30471
31615
|
type: "image"
|
|
30472
31616
|
},
|
|
30473
|
-
// This task serves as a useful interface for dealing with sentence-transformers (https://huggingface.co/sentence-transformers).
|
|
30474
31617
|
"feature-extraction": {
|
|
30475
|
-
tokenizer: AutoTokenizer,
|
|
30476
31618
|
pipeline: FeatureExtractionPipeline,
|
|
30477
31619
|
model: AutoModel,
|
|
30478
31620
|
default: {
|
|
@@ -30482,7 +31624,6 @@ var SUPPORTED_TASKS = Object.freeze({
|
|
|
30482
31624
|
type: "text"
|
|
30483
31625
|
},
|
|
30484
31626
|
"image-feature-extraction": {
|
|
30485
|
-
processor: AutoProcessor,
|
|
30486
31627
|
pipeline: ImageFeatureExtractionPipeline,
|
|
30487
31628
|
model: [AutoModelForImageFeatureExtraction, AutoModel],
|
|
30488
31629
|
default: {
|
|
@@ -30503,8 +31644,18 @@ var TASK_ALIASES = Object.freeze({
|
|
|
30503
31644
|
});
|
|
30504
31645
|
|
|
30505
31646
|
// src/utils/model_registry/get_model_files.js
|
|
31647
|
+
function get_config(modelId, { config = null, cache_dir = null, local_files_only = false, revision = "main" } = {}) {
|
|
31648
|
+
if (config !== null) {
|
|
31649
|
+
return AutoConfig.from_pretrained(modelId, { config, cache_dir, local_files_only, revision });
|
|
31650
|
+
}
|
|
31651
|
+
const key = JSON.stringify([modelId, cache_dir, local_files_only, revision]);
|
|
31652
|
+
return memoizePromise(
|
|
31653
|
+
key,
|
|
31654
|
+
() => AutoConfig.from_pretrained(modelId, { config, cache_dir, local_files_only, revision })
|
|
31655
|
+
);
|
|
31656
|
+
}
|
|
30506
31657
|
async function get_model_files(modelId, { config = null, dtype: overrideDtype = null, device: overrideDevice = null, model_file_name = null } = {}) {
|
|
30507
|
-
config = await
|
|
31658
|
+
config = await get_config(modelId, { config });
|
|
30508
31659
|
const files = [
|
|
30509
31660
|
// Add config.json (always loaded)
|
|
30510
31661
|
"config.json"
|
|
@@ -30534,6 +31685,15 @@ async function get_model_files(modelId, { config = null, dtype: overrideDtype =
|
|
|
30534
31685
|
modelType = mappedType;
|
|
30535
31686
|
foundInMapping = true;
|
|
30536
31687
|
}
|
|
31688
|
+
if (!foundInMapping) {
|
|
31689
|
+
for (const mapping of Object.values(MODEL_MAPPING_NAMES)) {
|
|
31690
|
+
if (mapping.has(config.model_type)) {
|
|
31691
|
+
modelType = MODEL_TYPE_MAPPING.get(mapping.get(config.model_type));
|
|
31692
|
+
foundInMapping = true;
|
|
31693
|
+
break;
|
|
31694
|
+
}
|
|
31695
|
+
}
|
|
31696
|
+
}
|
|
30537
31697
|
}
|
|
30538
31698
|
if (!foundInMapping) {
|
|
30539
31699
|
const archList = architectures.length > 0 ? architectures.join(", ") : "(none)";
|
|
@@ -30556,74 +31716,14 @@ async function get_model_files(modelId, { config = null, dtype: overrideDtype =
|
|
|
30556
31716
|
files.push(dataFilePath);
|
|
30557
31717
|
}
|
|
30558
31718
|
};
|
|
30559
|
-
const
|
|
30560
|
-
|
|
30561
|
-
add_model_file(
|
|
30562
|
-
|
|
30563
|
-
|
|
30564
|
-
|
|
30565
|
-
|
|
30566
|
-
|
|
30567
|
-
add_model_file("decoder_model_merged");
|
|
30568
|
-
files.push("generation_config.json");
|
|
30569
|
-
} else if (modelType === MODEL_TYPES.MaskGeneration) {
|
|
30570
|
-
add_model_file("model", "vision_encoder");
|
|
30571
|
-
add_model_file("prompt_encoder_mask_decoder");
|
|
30572
|
-
} else if (modelType === MODEL_TYPES.EncoderDecoder) {
|
|
30573
|
-
add_model_file("model", "encoder_model");
|
|
30574
|
-
add_model_file("decoder_model_merged");
|
|
30575
|
-
} else if (modelType === MODEL_TYPES.ImageTextToText) {
|
|
30576
|
-
add_model_file("embed_tokens");
|
|
30577
|
-
add_model_file("vision_encoder");
|
|
30578
|
-
add_model_file("decoder_model_merged");
|
|
30579
|
-
if (config.is_encoder_decoder) {
|
|
30580
|
-
add_model_file("model", "encoder_model");
|
|
30581
|
-
}
|
|
30582
|
-
files.push("generation_config.json");
|
|
30583
|
-
} else if (modelType === MODEL_TYPES.AudioTextToText) {
|
|
30584
|
-
add_model_file("embed_tokens");
|
|
30585
|
-
add_model_file("audio_encoder");
|
|
30586
|
-
add_model_file("decoder_model_merged");
|
|
30587
|
-
files.push("generation_config.json");
|
|
30588
|
-
} else if (modelType === MODEL_TYPES.ImageAudioTextToText) {
|
|
30589
|
-
add_model_file("embed_tokens");
|
|
30590
|
-
add_model_file("audio_encoder");
|
|
30591
|
-
add_model_file("vision_encoder");
|
|
30592
|
-
add_model_file("decoder_model_merged");
|
|
30593
|
-
files.push("generation_config.json");
|
|
30594
|
-
} else if (modelType === MODEL_TYPES.Musicgen) {
|
|
30595
|
-
add_model_file("model", "text_encoder");
|
|
30596
|
-
add_model_file("decoder_model_merged");
|
|
30597
|
-
add_model_file("encodec_decode");
|
|
30598
|
-
files.push("generation_config.json");
|
|
30599
|
-
} else if (modelType === MODEL_TYPES.MultiModality) {
|
|
30600
|
-
add_model_file("prepare_inputs_embeds");
|
|
30601
|
-
add_model_file("model", "language_model");
|
|
30602
|
-
add_model_file("lm_head");
|
|
30603
|
-
add_model_file("gen_head");
|
|
30604
|
-
add_model_file("gen_img_embeds");
|
|
30605
|
-
add_model_file("image_decode");
|
|
30606
|
-
files.push("generation_config.json");
|
|
30607
|
-
} else if (modelType === MODEL_TYPES.Phi3V) {
|
|
30608
|
-
add_model_file("prepare_inputs_embeds");
|
|
30609
|
-
add_model_file("model");
|
|
30610
|
-
add_model_file("vision_encoder");
|
|
30611
|
-
files.push("generation_config.json");
|
|
30612
|
-
} else if (modelType === MODEL_TYPES.Chatterbox) {
|
|
30613
|
-
add_model_file("embed_tokens");
|
|
30614
|
-
add_model_file("speech_encoder");
|
|
30615
|
-
add_model_file("model", "language_model");
|
|
30616
|
-
add_model_file("conditional_decoder");
|
|
30617
|
-
files.push("generation_config.json");
|
|
30618
|
-
} else if (modelType === MODEL_TYPES.AutoEncoder) {
|
|
30619
|
-
add_model_file("encoder_model");
|
|
30620
|
-
add_model_file("decoder_model");
|
|
30621
|
-
} else if (modelType === MODEL_TYPES.Supertonic) {
|
|
30622
|
-
add_model_file("text_encoder");
|
|
30623
|
-
add_model_file("latent_denoiser");
|
|
30624
|
-
add_model_file("voice_decoder");
|
|
30625
|
-
} else {
|
|
30626
|
-
add_model_file("model", singleModelName);
|
|
31719
|
+
const { sessions, optional_configs } = getSessionsConfig(modelType, config, { model_file_name });
|
|
31720
|
+
for (const [sessionKey, baseName] of Object.entries(sessions)) {
|
|
31721
|
+
add_model_file(sessionKey, baseName);
|
|
31722
|
+
}
|
|
31723
|
+
if (optional_configs) {
|
|
31724
|
+
for (const configFile of Object.values(optional_configs)) {
|
|
31725
|
+
files.push(configFile);
|
|
31726
|
+
}
|
|
30627
31727
|
}
|
|
30628
31728
|
return files;
|
|
30629
31729
|
}
|
|
@@ -30659,28 +31759,21 @@ async function get_files(modelId, {
|
|
|
30659
31759
|
}
|
|
30660
31760
|
|
|
30661
31761
|
// src/utils/model_registry/get_pipeline_files.js
|
|
30662
|
-
function get_task_components(task) {
|
|
30663
|
-
const taskConfig = SUPPORTED_TASKS[task];
|
|
30664
|
-
if (!taskConfig) {
|
|
30665
|
-
return null;
|
|
30666
|
-
}
|
|
30667
|
-
return {
|
|
30668
|
-
tokenizer: !!taskConfig.tokenizer,
|
|
30669
|
-
processor: !!taskConfig.processor
|
|
30670
|
-
};
|
|
30671
|
-
}
|
|
30672
31762
|
async function get_pipeline_files(task, modelId, options = {}) {
|
|
30673
31763
|
task = TASK_ALIASES[task] ?? task;
|
|
30674
|
-
const
|
|
30675
|
-
if (!
|
|
31764
|
+
const taskConfig = SUPPORTED_TASKS[task];
|
|
31765
|
+
if (!taskConfig) {
|
|
30676
31766
|
throw new Error(
|
|
30677
31767
|
`Unsupported pipeline task: ${task}. Must be one of [${Object.keys(SUPPORTED_TASKS).join(", ")}]`
|
|
30678
31768
|
);
|
|
30679
31769
|
}
|
|
31770
|
+
const { type } = taskConfig;
|
|
31771
|
+
const include_tokenizer = type !== "audio" && type !== "image";
|
|
31772
|
+
const include_processor = type !== "text";
|
|
30680
31773
|
return get_files(modelId, {
|
|
30681
31774
|
...options,
|
|
30682
|
-
include_tokenizer
|
|
30683
|
-
include_processor
|
|
31775
|
+
include_tokenizer,
|
|
31776
|
+
include_processor
|
|
30684
31777
|
});
|
|
30685
31778
|
}
|
|
30686
31779
|
|
|
@@ -30710,12 +31803,12 @@ async function pipeline(task, model = null, {
|
|
|
30710
31803
|
dtype = pipelineInfo.default.dtype;
|
|
30711
31804
|
}
|
|
30712
31805
|
}
|
|
31806
|
+
const expected_files = await get_pipeline_files(task, model, {
|
|
31807
|
+
device,
|
|
31808
|
+
dtype
|
|
31809
|
+
});
|
|
30713
31810
|
let files_loading = {};
|
|
30714
31811
|
if (progress_callback) {
|
|
30715
|
-
const expected_files = await get_pipeline_files(task, model, {
|
|
30716
|
-
device,
|
|
30717
|
-
dtype
|
|
30718
|
-
});
|
|
30719
31812
|
const metadata = await Promise.all(expected_files.map(async (file) => get_file_metadata(model, file)));
|
|
30720
31813
|
metadata.forEach((m, i) => {
|
|
30721
31814
|
if (m.exists) {
|
|
@@ -30761,13 +31854,31 @@ async function pipeline(task, model = null, {
|
|
|
30761
31854
|
model_file_name,
|
|
30762
31855
|
session_options
|
|
30763
31856
|
};
|
|
30764
|
-
const
|
|
30765
|
-
|
|
30766
|
-
|
|
30767
|
-
|
|
31857
|
+
const hasTokenizer = expected_files.includes("tokenizer.json");
|
|
31858
|
+
const hasProcessor = expected_files.includes("preprocessor_config.json");
|
|
31859
|
+
const modelClasses = pipelineInfo.model;
|
|
31860
|
+
let modelPromise;
|
|
31861
|
+
if (Array.isArray(modelClasses)) {
|
|
31862
|
+
const resolvedConfig = config ?? await AutoConfig.from_pretrained(model, pretrainedOptions);
|
|
31863
|
+
const { model_type } = resolvedConfig;
|
|
31864
|
+
const matchedClass = modelClasses.find((cls) => cls.supports(model_type));
|
|
31865
|
+
if (!matchedClass) {
|
|
31866
|
+
throw Error(
|
|
31867
|
+
`Unsupported model type "${model_type}" for task "${task}". None of the candidate model classes support this type.`
|
|
31868
|
+
);
|
|
31869
|
+
}
|
|
31870
|
+
modelPromise = matchedClass.from_pretrained(model, { ...pretrainedOptions, config: resolvedConfig });
|
|
31871
|
+
} else {
|
|
31872
|
+
modelPromise = modelClasses.from_pretrained(model, pretrainedOptions);
|
|
31873
|
+
}
|
|
31874
|
+
const [tokenizer, processor, model_loaded] = await Promise.all([
|
|
31875
|
+
hasTokenizer ? AutoTokenizer.from_pretrained(model, pretrainedOptions) : null,
|
|
31876
|
+
hasProcessor ? AutoProcessor.from_pretrained(model, pretrainedOptions) : null,
|
|
31877
|
+
modelPromise
|
|
30768
31878
|
]);
|
|
30769
|
-
const results =
|
|
30770
|
-
results.
|
|
31879
|
+
const results = { task, model: model_loaded };
|
|
31880
|
+
if (tokenizer) results.tokenizer = tokenizer;
|
|
31881
|
+
if (processor) results.processor = processor;
|
|
30771
31882
|
dispatchCallback(progress_callback, {
|
|
30772
31883
|
status: "ready",
|
|
30773
31884
|
task,
|
|
@@ -30776,48 +31887,6 @@ async function pipeline(task, model = null, {
|
|
|
30776
31887
|
const pipelineClass = pipelineInfo.pipeline;
|
|
30777
31888
|
return new pipelineClass(results);
|
|
30778
31889
|
}
|
|
30779
|
-
async function loadItems(mapping, model, pretrainedOptions) {
|
|
30780
|
-
const result = /* @__PURE__ */ Object.create(null);
|
|
30781
|
-
const promises = [];
|
|
30782
|
-
for (const [name, cls] of mapping.entries()) {
|
|
30783
|
-
if (!cls) continue;
|
|
30784
|
-
let promise;
|
|
30785
|
-
if (Array.isArray(cls)) {
|
|
30786
|
-
promise = new Promise(async (resolve, reject) => {
|
|
30787
|
-
let e;
|
|
30788
|
-
for (const c of cls) {
|
|
30789
|
-
if (c === null) {
|
|
30790
|
-
resolve(null);
|
|
30791
|
-
return;
|
|
30792
|
-
}
|
|
30793
|
-
try {
|
|
30794
|
-
resolve(await c.from_pretrained(model, pretrainedOptions));
|
|
30795
|
-
return;
|
|
30796
|
-
} catch (err) {
|
|
30797
|
-
if (err.message?.includes("Unsupported model type")) {
|
|
30798
|
-
e = err;
|
|
30799
|
-
} else if (err.message?.includes("Could not locate file")) {
|
|
30800
|
-
e = err;
|
|
30801
|
-
} else {
|
|
30802
|
-
reject(err);
|
|
30803
|
-
return;
|
|
30804
|
-
}
|
|
30805
|
-
}
|
|
30806
|
-
}
|
|
30807
|
-
reject(e);
|
|
30808
|
-
});
|
|
30809
|
-
} else {
|
|
30810
|
-
promise = cls.from_pretrained(model, pretrainedOptions);
|
|
30811
|
-
}
|
|
30812
|
-
result[name] = promise;
|
|
30813
|
-
promises.push(promise);
|
|
30814
|
-
}
|
|
30815
|
-
await Promise.all(promises);
|
|
30816
|
-
for (const [name, promise] of Object.entries(result)) {
|
|
30817
|
-
result[name] = await promise;
|
|
30818
|
-
}
|
|
30819
|
-
return result;
|
|
30820
|
-
}
|
|
30821
31890
|
|
|
30822
31891
|
// src/generation/streamers.js
|
|
30823
31892
|
var is_chinese_char2 = (cp) => cp >= 19968 && cp <= 40959 || cp >= 13312 && cp <= 19903 || cp >= 131072 && cp <= 173791 || cp >= 173824 && cp <= 177983 || cp >= 177984 && cp <= 178207 || cp >= 178208 && cp <= 183983 || cp >= 63744 && cp <= 64255 || cp >= 194560 && cp <= 195103;
|
|
@@ -31105,21 +32174,38 @@ async function load_video(src, { num_frames = null, fps = null } = {}) {
|
|
|
31105
32174
|
|
|
31106
32175
|
// src/utils/model_registry/is_cached.js
|
|
31107
32176
|
async function check_files_cache(modelId, files, options = {}) {
|
|
31108
|
-
const
|
|
31109
|
-
if (!
|
|
32177
|
+
const cache2 = await getCache(options?.cache_dir);
|
|
32178
|
+
if (!cache2) {
|
|
31110
32179
|
const fileStatuses2 = files.map((filename) => ({ file: filename, cached: false }));
|
|
31111
32180
|
return { allCached: false, files: fileStatuses2 };
|
|
31112
32181
|
}
|
|
31113
32182
|
const fileStatuses = await Promise.all(
|
|
31114
32183
|
files.map(async (filename) => {
|
|
31115
|
-
const { localPath, proposedCacheKey } = buildResourcePaths(modelId, filename, options,
|
|
31116
|
-
const cached = await checkCachedResource(
|
|
32184
|
+
const { localPath, proposedCacheKey } = buildResourcePaths(modelId, filename, options, cache2);
|
|
32185
|
+
const cached = await checkCachedResource(cache2, localPath, proposedCacheKey);
|
|
31117
32186
|
return { file: filename, cached: !!cached };
|
|
31118
32187
|
})
|
|
31119
32188
|
);
|
|
31120
32189
|
return { allCached: fileStatuses.every((f) => f.cached), files: fileStatuses };
|
|
31121
32190
|
}
|
|
32191
|
+
async function is_file_cached(modelId, filename, options = {}) {
|
|
32192
|
+
const cache2 = await getCache(options?.cache_dir);
|
|
32193
|
+
if (!cache2) return false;
|
|
32194
|
+
const { localPath, proposedCacheKey } = buildResourcePaths(modelId, filename, options, cache2);
|
|
32195
|
+
return !!await checkCachedResource(cache2, localPath, proposedCacheKey);
|
|
32196
|
+
}
|
|
31122
32197
|
async function is_cached(modelId, options = {}) {
|
|
32198
|
+
if (!modelId) {
|
|
32199
|
+
throw new Error("modelId is required");
|
|
32200
|
+
}
|
|
32201
|
+
if (!await is_file_cached(modelId, "config.json", options)) {
|
|
32202
|
+
return false;
|
|
32203
|
+
}
|
|
32204
|
+
const files = await get_files(modelId, options);
|
|
32205
|
+
const result = await check_files_cache(modelId, files, options);
|
|
32206
|
+
return result.allCached;
|
|
32207
|
+
}
|
|
32208
|
+
async function is_cached_files(modelId, options = {}) {
|
|
31123
32209
|
if (!modelId) {
|
|
31124
32210
|
throw new Error("modelId is required");
|
|
31125
32211
|
}
|
|
@@ -31127,6 +32213,20 @@ async function is_cached(modelId, options = {}) {
|
|
|
31127
32213
|
return await check_files_cache(modelId, files, options);
|
|
31128
32214
|
}
|
|
31129
32215
|
async function is_pipeline_cached(task, modelId, options = {}) {
|
|
32216
|
+
if (!task) {
|
|
32217
|
+
throw new Error("task is required");
|
|
32218
|
+
}
|
|
32219
|
+
if (!modelId) {
|
|
32220
|
+
throw new Error("modelId is required");
|
|
32221
|
+
}
|
|
32222
|
+
if (!await is_file_cached(modelId, "config.json", options)) {
|
|
32223
|
+
return false;
|
|
32224
|
+
}
|
|
32225
|
+
const files = await get_pipeline_files(task, modelId, options);
|
|
32226
|
+
const result = await check_files_cache(modelId, files, options);
|
|
32227
|
+
return result.allCached;
|
|
32228
|
+
}
|
|
32229
|
+
async function is_pipeline_cached_files(task, modelId, options = {}) {
|
|
31130
32230
|
if (!task) {
|
|
31131
32231
|
throw new Error("task is required");
|
|
31132
32232
|
}
|
|
@@ -31139,26 +32239,26 @@ async function is_pipeline_cached(task, modelId, options = {}) {
|
|
|
31139
32239
|
|
|
31140
32240
|
// src/utils/model_registry/clear_cache.js
|
|
31141
32241
|
async function clear_files_from_cache(modelId, files, options = {}) {
|
|
31142
|
-
const
|
|
31143
|
-
if (!
|
|
32242
|
+
const cache2 = await getCache(options?.cache_dir);
|
|
32243
|
+
if (!cache2) {
|
|
31144
32244
|
return {
|
|
31145
32245
|
filesDeleted: 0,
|
|
31146
32246
|
filesCached: 0,
|
|
31147
32247
|
files: files.map((filename) => ({ file: filename, deleted: false, wasCached: false }))
|
|
31148
32248
|
};
|
|
31149
32249
|
}
|
|
31150
|
-
if (!
|
|
32250
|
+
if (!cache2.delete) {
|
|
31151
32251
|
throw new Error("Cache does not support delete operation");
|
|
31152
32252
|
}
|
|
31153
32253
|
const results = await Promise.all(
|
|
31154
32254
|
files.map(async (filename) => {
|
|
31155
|
-
const { localPath, proposedCacheKey } = buildResourcePaths(modelId, filename, options,
|
|
31156
|
-
const cached = await checkCachedResource(
|
|
32255
|
+
const { localPath, proposedCacheKey } = buildResourcePaths(modelId, filename, options, cache2);
|
|
32256
|
+
const cached = await checkCachedResource(cache2, localPath, proposedCacheKey);
|
|
31157
32257
|
const wasCached = !!cached;
|
|
31158
32258
|
let deleted = false;
|
|
31159
32259
|
if (wasCached) {
|
|
31160
|
-
const deletedWithProposed = await
|
|
31161
|
-
const deletedWithLocal = !deletedWithProposed && proposedCacheKey !== localPath ? await
|
|
32260
|
+
const deletedWithProposed = await cache2.delete(proposedCacheKey);
|
|
32261
|
+
const deletedWithLocal = !deletedWithProposed && proposedCacheKey !== localPath ? await cache2.delete(localPath) : false;
|
|
31162
32262
|
deleted = deletedWithProposed || deletedWithLocal;
|
|
31163
32263
|
}
|
|
31164
32264
|
return { file: filename, deleted, wasCached };
|
|
@@ -31275,26 +32375,30 @@ var ModelRegistry = class {
|
|
|
31275
32375
|
return get_processor_files(modelId);
|
|
31276
32376
|
}
|
|
31277
32377
|
/**
|
|
31278
|
-
*
|
|
32378
|
+
* Quickly checks if a model is fully cached by verifying `config.json` is present,
|
|
32379
|
+
* then confirming all required files are cached.
|
|
32380
|
+
* Returns a plain boolean — use `is_cached_files` if you need per-file detail.
|
|
31279
32381
|
*
|
|
31280
32382
|
* @param {string} modelId - The model id
|
|
31281
32383
|
* @param {Object} [options] - Optional parameters
|
|
32384
|
+
* @param {string} [options.cache_dir] - Custom cache directory
|
|
32385
|
+
* @param {string} [options.revision] - Model revision (default: 'main')
|
|
32386
|
+
* @param {import('../../configs.js').PretrainedConfig} [options.config] - Pre-loaded config
|
|
31282
32387
|
* @param {import('../dtypes.js').DataType|Record<string, import('../dtypes.js').DataType>} [options.dtype=null] - Override dtype
|
|
31283
32388
|
* @param {import('../devices.js').DeviceType|Record<string, import('../devices.js').DeviceType>} [options.device=null] - Override device
|
|
31284
|
-
* @returns {Promise<
|
|
32389
|
+
* @returns {Promise<boolean>} Whether all required files are cached
|
|
31285
32390
|
*
|
|
31286
32391
|
* @example
|
|
31287
|
-
* const
|
|
31288
|
-
* console.log(
|
|
32392
|
+
* const cached = await ModelRegistry.is_cached('onnx-community/bert-base-uncased-ONNX');
|
|
32393
|
+
* console.log(cached); // true or false
|
|
31289
32394
|
*/
|
|
31290
32395
|
static async is_cached(modelId, options = {}) {
|
|
31291
32396
|
return is_cached(modelId, options);
|
|
31292
32397
|
}
|
|
31293
32398
|
/**
|
|
31294
|
-
*
|
|
31295
|
-
* Automatically determines which
|
|
32399
|
+
* Checks if all files for a given model are already cached, with per-file detail.
|
|
32400
|
+
* Automatically determines which files are needed using get_files().
|
|
31296
32401
|
*
|
|
31297
|
-
* @param {string} task - The pipeline task (e.g., "text-generation", "background-removal")
|
|
31298
32402
|
* @param {string} modelId - The model id
|
|
31299
32403
|
* @param {Object} [options] - Optional parameters
|
|
31300
32404
|
* @param {string} [options.cache_dir] - Custom cache directory
|
|
@@ -31305,12 +32409,57 @@ var ModelRegistry = class {
|
|
|
31305
32409
|
* @returns {Promise<import('./is_cached.js').CacheCheckResult>} Object with allCached boolean and files array with cache status
|
|
31306
32410
|
*
|
|
31307
32411
|
* @example
|
|
31308
|
-
* const status = await ModelRegistry.
|
|
32412
|
+
* const status = await ModelRegistry.is_cached_files('onnx-community/bert-base-uncased-ONNX');
|
|
31309
32413
|
* console.log(status.allCached); // true or false
|
|
32414
|
+
* console.log(status.files); // [{ file: 'config.json', cached: true }, ...]
|
|
32415
|
+
*/
|
|
32416
|
+
static async is_cached_files(modelId, options = {}) {
|
|
32417
|
+
return is_cached_files(modelId, options);
|
|
32418
|
+
}
|
|
32419
|
+
/**
|
|
32420
|
+
* Quickly checks if all files for a specific pipeline task are cached by verifying
|
|
32421
|
+
* `config.json` is present, then confirming all required files are cached.
|
|
32422
|
+
* Returns a plain boolean — use `is_pipeline_cached_files` if you need per-file detail.
|
|
32423
|
+
*
|
|
32424
|
+
* @param {string} task - The pipeline task (e.g., "text-generation", "background-removal")
|
|
32425
|
+
* @param {string} modelId - The model id
|
|
32426
|
+
* @param {Object} [options] - Optional parameters
|
|
32427
|
+
* @param {string} [options.cache_dir] - Custom cache directory
|
|
32428
|
+
* @param {string} [options.revision] - Model revision (default: 'main')
|
|
32429
|
+
* @param {import('../../configs.js').PretrainedConfig} [options.config] - Pre-loaded config
|
|
32430
|
+
* @param {import('../dtypes.js').DataType|Record<string, import('../dtypes.js').DataType>} [options.dtype=null] - Override dtype
|
|
32431
|
+
* @param {import('../devices.js').DeviceType|Record<string, import('../devices.js').DeviceType>} [options.device=null] - Override device
|
|
32432
|
+
* @returns {Promise<boolean>} Whether all required files are cached
|
|
32433
|
+
*
|
|
32434
|
+
* @example
|
|
32435
|
+
* const cached = await ModelRegistry.is_pipeline_cached('text-generation', 'onnx-community/gpt2-ONNX');
|
|
32436
|
+
* console.log(cached); // true or false
|
|
31310
32437
|
*/
|
|
31311
32438
|
static async is_pipeline_cached(task, modelId, options = {}) {
|
|
31312
32439
|
return is_pipeline_cached(task, modelId, options);
|
|
31313
32440
|
}
|
|
32441
|
+
/**
|
|
32442
|
+
* Checks if all files for a specific pipeline task are already cached, with per-file detail.
|
|
32443
|
+
* Automatically determines which components are needed based on the task.
|
|
32444
|
+
*
|
|
32445
|
+
* @param {string} task - The pipeline task (e.g., "text-generation", "background-removal")
|
|
32446
|
+
* @param {string} modelId - The model id
|
|
32447
|
+
* @param {Object} [options] - Optional parameters
|
|
32448
|
+
* @param {string} [options.cache_dir] - Custom cache directory
|
|
32449
|
+
* @param {string} [options.revision] - Model revision (default: 'main')
|
|
32450
|
+
* @param {import('../../configs.js').PretrainedConfig} [options.config] - Pre-loaded config
|
|
32451
|
+
* @param {import('../dtypes.js').DataType|Record<string, import('../dtypes.js').DataType>} [options.dtype=null] - Override dtype
|
|
32452
|
+
* @param {import('../devices.js').DeviceType|Record<string, import('../devices.js').DeviceType>} [options.device=null] - Override device
|
|
32453
|
+
* @returns {Promise<import('./is_cached.js').CacheCheckResult>} Object with allCached boolean and files array with cache status
|
|
32454
|
+
*
|
|
32455
|
+
* @example
|
|
32456
|
+
* const status = await ModelRegistry.is_pipeline_cached_files('text-generation', 'onnx-community/gpt2-ONNX');
|
|
32457
|
+
* console.log(status.allCached); // true or false
|
|
32458
|
+
* console.log(status.files); // [{ file: 'config.json', cached: true }, ...]
|
|
32459
|
+
*/
|
|
32460
|
+
static async is_pipeline_cached_files(task, modelId, options = {}) {
|
|
32461
|
+
return is_pipeline_cached_files(task, modelId, options);
|
|
32462
|
+
}
|
|
31314
32463
|
/**
|
|
31315
32464
|
* Get metadata for a specific file without downloading it.
|
|
31316
32465
|
*
|
|
@@ -31590,6 +32739,7 @@ export {
|
|
|
31590
32739
|
DonutImageProcessor,
|
|
31591
32740
|
DonutSwinModel,
|
|
31592
32741
|
DonutSwinPreTrainedModel,
|
|
32742
|
+
DynamicCache,
|
|
31593
32743
|
EdgeTamModel,
|
|
31594
32744
|
EfficientNetForImageClassification,
|
|
31595
32745
|
EfficientNetImageProcessor,
|
|
@@ -31662,6 +32812,7 @@ export {
|
|
|
31662
32812
|
Gemma3Model,
|
|
31663
32813
|
Gemma3PreTrainedModel,
|
|
31664
32814
|
Gemma3nAudioFeatureExtractor,
|
|
32815
|
+
Gemma3nForCausalLM,
|
|
31665
32816
|
Gemma3nForConditionalGeneration,
|
|
31666
32817
|
Gemma3nPreTrainedModel,
|
|
31667
32818
|
Gemma3nProcessor,
|
|
@@ -31681,6 +32832,9 @@ export {
|
|
|
31681
32832
|
GraniteMoeHybridModel,
|
|
31682
32833
|
GraniteMoeHybridPreTrainedModel,
|
|
31683
32834
|
GranitePreTrainedModel,
|
|
32835
|
+
GraniteSpeechFeatureExtractor,
|
|
32836
|
+
GraniteSpeechForConditionalGeneration,
|
|
32837
|
+
GraniteSpeechProcessor,
|
|
31684
32838
|
GroundingDinoForObjectDetection,
|
|
31685
32839
|
GroundingDinoImageProcessor,
|
|
31686
32840
|
GroundingDinoPreTrainedModel,
|
|
@@ -31706,7 +32860,6 @@ export {
|
|
|
31706
32860
|
IJepaPreTrainedModel,
|
|
31707
32861
|
Idefics3ForConditionalGeneration,
|
|
31708
32862
|
Idefics3ImageProcessor,
|
|
31709
|
-
Idefics3PreTrainedModel,
|
|
31710
32863
|
Idefics3Processor,
|
|
31711
32864
|
ImageClassificationPipeline,
|
|
31712
32865
|
ImageFeatureExtractionPipeline,
|
|
@@ -31731,6 +32884,9 @@ export {
|
|
|
31731
32884
|
Lfm2MoeModel,
|
|
31732
32885
|
Lfm2MoePreTrainedModel,
|
|
31733
32886
|
Lfm2PreTrainedModel,
|
|
32887
|
+
Lfm2VlForConditionalGeneration,
|
|
32888
|
+
Lfm2VlImageProcessor,
|
|
32889
|
+
Lfm2VlProcessor,
|
|
31734
32890
|
LiteWhisperForConditionalGeneration,
|
|
31735
32891
|
Llama4ForCausalLM,
|
|
31736
32892
|
Llama4PreTrainedModel,
|
|
@@ -31895,6 +33051,9 @@ export {
|
|
|
31895
33051
|
Olmo3Model,
|
|
31896
33052
|
Olmo3PreTrainedModel,
|
|
31897
33053
|
OlmoForCausalLM,
|
|
33054
|
+
OlmoHybridForCausalLM,
|
|
33055
|
+
OlmoHybridModel,
|
|
33056
|
+
OlmoHybridPreTrainedModel,
|
|
31898
33057
|
OlmoModel,
|
|
31899
33058
|
OlmoPreTrainedModel,
|
|
31900
33059
|
OpenELMForCausalLM,
|
|
@@ -31911,7 +33070,6 @@ export {
|
|
|
31911
33070
|
Owlv2Model,
|
|
31912
33071
|
Owlv2PreTrainedModel,
|
|
31913
33072
|
PaliGemmaForConditionalGeneration,
|
|
31914
|
-
PaliGemmaPreTrainedModel,
|
|
31915
33073
|
PaliGemmaProcessor,
|
|
31916
33074
|
ParakeetFeatureExtractor,
|
|
31917
33075
|
ParakeetForCTC,
|
|
@@ -31950,20 +33108,36 @@ export {
|
|
|
31950
33108
|
QuestionAnsweringPipeline,
|
|
31951
33109
|
Qwen2ForCausalLM,
|
|
31952
33110
|
Qwen2Model,
|
|
33111
|
+
Qwen2MoeForCausalLM,
|
|
33112
|
+
Qwen2MoeModel,
|
|
33113
|
+
Qwen2MoePreTrainedModel,
|
|
31953
33114
|
Qwen2PreTrainedModel,
|
|
31954
33115
|
Qwen2Tokenizer,
|
|
33116
|
+
Qwen2VLForCausalLM,
|
|
31955
33117
|
Qwen2VLForConditionalGeneration,
|
|
31956
33118
|
Qwen2VLImageProcessor,
|
|
31957
33119
|
Qwen2VLPreTrainedModel,
|
|
31958
33120
|
Qwen2VLProcessor,
|
|
33121
|
+
Qwen2_5_VLForCausalLM,
|
|
31959
33122
|
Qwen2_5_VLForConditionalGeneration,
|
|
31960
33123
|
Qwen2_5_VLProcessor,
|
|
31961
33124
|
Qwen3ForCausalLM,
|
|
31962
33125
|
Qwen3Model,
|
|
33126
|
+
Qwen3MoeForCausalLM,
|
|
33127
|
+
Qwen3MoeModel,
|
|
33128
|
+
Qwen3MoePreTrainedModel,
|
|
33129
|
+
Qwen3NextForCausalLM,
|
|
33130
|
+
Qwen3NextModel,
|
|
33131
|
+
Qwen3NextPreTrainedModel,
|
|
31963
33132
|
Qwen3PreTrainedModel,
|
|
33133
|
+
Qwen3VLForCausalLM,
|
|
31964
33134
|
Qwen3VLForConditionalGeneration,
|
|
33135
|
+
Qwen3VLMoeForCausalLM,
|
|
33136
|
+
Qwen3VLMoeForConditionalGeneration,
|
|
31965
33137
|
Qwen3VLProcessor,
|
|
33138
|
+
Qwen3_5ForCausalLM,
|
|
31966
33139
|
Qwen3_5ForConditionalGeneration,
|
|
33140
|
+
Qwen3_5MoeForCausalLM,
|
|
31967
33141
|
Qwen3_5MoeForConditionalGeneration,
|
|
31968
33142
|
RFDetrForObjectDetection,
|
|
31969
33143
|
RFDetrModel,
|
|
@@ -32035,7 +33209,6 @@ export {
|
|
|
32035
33209
|
SmolLM3ForCausalLM,
|
|
32036
33210
|
SmolLM3Model,
|
|
32037
33211
|
SmolLM3PreTrainedModel,
|
|
32038
|
-
SmolVLMForConditionalGeneration,
|
|
32039
33212
|
Idefics3ImageProcessor as SmolVLMImageProcessor,
|
|
32040
33213
|
Idefics3Processor as SmolVLMProcessor,
|
|
32041
33214
|
SnacDecoderModel,
|
|
@@ -32141,6 +33314,10 @@ export {
|
|
|
32141
33314
|
VitsTokenizer,
|
|
32142
33315
|
VoxtralForConditionalGeneration,
|
|
32143
33316
|
VoxtralProcessor,
|
|
33317
|
+
VoxtralRealtimeFeatureExtractor,
|
|
33318
|
+
VoxtralRealtimeForConditionalGeneration,
|
|
33319
|
+
VoxtralRealtimePreTrainedModel,
|
|
33320
|
+
VoxtralRealtimeProcessor,
|
|
32144
33321
|
Wav2Vec2BertForCTC,
|
|
32145
33322
|
Wav2Vec2BertForSequenceClassification,
|
|
32146
33323
|
Wav2Vec2BertModel,
|
|
@@ -32236,7 +33413,7 @@ export {
|
|
|
32236
33413
|
|
|
32237
33414
|
onnxruntime-web/dist/ort.webgpu.bundle.min.mjs:
|
|
32238
33415
|
(*!
|
|
32239
|
-
* ONNX Runtime Web v1.25.0-dev.
|
|
33416
|
+
* ONNX Runtime Web v1.25.0-dev.20260307-d626b568e0
|
|
32240
33417
|
* Copyright (c) Microsoft Corporation. All rights reserved.
|
|
32241
33418
|
* Licensed under the MIT License.
|
|
32242
33419
|
*)
|