@huggingface/transformers 4.0.0-next.5 → 4.0.0-next.7
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/README.md +12 -4
- package/dist/ort-wasm-simd-threaded.jsep.mjs +24 -24
- package/dist/transformers.js +2189 -1015
- package/dist/transformers.min.js +16 -16
- package/dist/transformers.node.cjs +2234 -1029
- package/dist/transformers.node.min.cjs +20 -20
- package/dist/transformers.node.min.mjs +20 -20
- package/dist/transformers.node.mjs +2194 -1017
- package/dist/transformers.web.js +2175 -1001
- package/dist/transformers.web.min.js +18 -18
- package/package.json +4 -4
- package/src/backends/onnx.js +77 -58
- package/src/backends/utils/cacheWasm.js +22 -43
- package/src/cache_utils.js +62 -0
- package/src/configs.js +32 -5
- package/src/env.js +36 -6
- package/src/image_processors_utils.js +3 -3
- package/src/models/auto/modeling_auto.js +14 -1
- package/src/models/chatterbox/modeling_chatterbox.js +1 -1
- package/src/models/detr/image_processing_detr.js +1 -1
- package/src/models/feature_extractors.js +2 -0
- package/src/models/gemma3n/modeling_gemma3n.js +2 -0
- package/src/models/granite_speech/feature_extraction_granite_speech.js +58 -0
- package/src/models/granite_speech/modeling_granite_speech.js +5 -0
- package/src/models/granite_speech/processing_granite_speech.js +62 -0
- package/src/models/grounding_dino/image_processing_grounding_dino.js +1 -1
- package/src/models/idefics3/modeling_idefics3.js +5 -32
- package/src/models/image_processors.js +1 -0
- package/src/models/lfm2_vl/image_processing_lfm2_vl.js +305 -0
- package/src/models/lfm2_vl/modeling_lfm2_vl.js +13 -0
- package/src/models/lfm2_vl/processing_lfm2_vl.js +77 -0
- package/src/models/llava/modeling_llava.js +1 -1
- package/src/models/mistral3/modeling_mistral3.js +2 -2
- package/src/models/modeling_utils.js +234 -292
- package/src/models/models.js +9 -0
- package/src/models/olmo_hybrid/modeling_olmo_hybrid.js +5 -0
- package/src/models/paligemma/modeling_paligemma.js +2 -25
- package/src/models/processors.js +3 -0
- package/src/models/qwen2_5_vl/modeling_qwen2_5_vl.js +5 -1
- package/src/models/qwen2_moe/modeling_qwen2_moe.js +5 -0
- package/src/models/qwen2_vl/image_processing_qwen2_vl.js +1 -41
- package/src/models/qwen2_vl/modeling_qwen2_vl.js +36 -3
- package/src/models/qwen3_5/modeling_qwen3_5.js +1 -0
- package/src/models/qwen3_5_moe/modeling_qwen3_5_moe.js +2 -1
- package/src/models/qwen3_moe/modeling_qwen3_moe.js +5 -0
- package/src/models/qwen3_next/modeling_qwen3_next.js +5 -0
- package/src/models/qwen3_vl/modeling_qwen3_vl.js +2 -1
- package/src/models/qwen3_vl_moe/modeling_qwen3_vl_moe.js +4 -0
- package/src/models/registry.js +39 -4
- package/src/models/sam/image_processing_sam.js +1 -1
- package/src/models/session.js +17 -6
- package/src/models/smolvlm/modeling_smolvlm.js +7 -0
- package/src/models/ultravox/modeling_ultravox.js +1 -3
- package/src/models/voxtral/modeling_voxtral.js +3 -0
- package/src/models/voxtral_realtime/feature_extraction_voxtral_realtime.js +71 -0
- package/src/models/voxtral_realtime/modeling_voxtral_realtime.js +239 -0
- package/src/models/voxtral_realtime/processing_voxtral_realtime.js +113 -0
- package/src/models/whisper/feature_extraction_whisper.js +2 -12
- package/src/pipelines/index.js +2 -84
- package/src/pipelines.js +40 -77
- package/src/transformers.js +2 -0
- package/src/utils/audio.js +18 -2
- package/src/utils/cache/CrossOriginStorageCache.js +251 -0
- package/src/utils/cache/FileCache.js +128 -0
- package/src/utils/cache/cross-origin-storage.d.ts +38 -0
- package/src/utils/cache.js +8 -3
- package/src/utils/hub/{files.js → FileResponse.js} +0 -105
- package/src/utils/hub/utils.js +35 -1
- package/src/utils/hub.js +6 -5
- package/src/utils/image.js +12 -13
- package/src/utils/lru_cache.js +67 -0
- package/src/utils/memoize_promise.js +45 -0
- package/src/utils/model_registry/ModelRegistry.js +70 -23
- package/src/utils/model_registry/get_file_metadata.js +14 -2
- package/src/utils/model_registry/get_model_files.js +63 -78
- package/src/utils/model_registry/get_pipeline_files.js +15 -24
- package/src/utils/model_registry/is_cached.js +81 -4
- package/src/utils/tensor.js +18 -2
- package/types/backends/onnx.d.ts.map +1 -1
- package/types/backends/utils/cacheWasm.d.ts +3 -17
- package/types/backends/utils/cacheWasm.d.ts.map +1 -1
- package/types/cache_utils.d.ts +29 -0
- package/types/cache_utils.d.ts.map +1 -0
- package/types/configs.d.ts.map +1 -1
- package/types/env.d.ts +18 -3
- package/types/env.d.ts.map +1 -1
- package/types/image_processors_utils.d.ts +17 -1
- package/types/image_processors_utils.d.ts.map +1 -1
- package/types/models/auto/modeling_auto.d.ts +6 -0
- package/types/models/auto/modeling_auto.d.ts.map +1 -1
- package/types/models/detr/image_processing_detr.d.ts +1 -1
- package/types/models/feature_extractors.d.ts +2 -0
- package/types/models/gemma3n/modeling_gemma3n.d.ts +2 -0
- package/types/models/gemma3n/modeling_gemma3n.d.ts.map +1 -1
- package/types/models/granite_speech/feature_extraction_granite_speech.d.ts +16 -0
- package/types/models/granite_speech/feature_extraction_granite_speech.d.ts.map +1 -0
- package/types/models/granite_speech/modeling_granite_speech.d.ts +4 -0
- package/types/models/granite_speech/modeling_granite_speech.d.ts.map +1 -0
- package/types/models/granite_speech/processing_granite_speech.d.ts +19 -0
- package/types/models/granite_speech/processing_granite_speech.d.ts.map +1 -0
- package/types/models/grounding_dino/image_processing_grounding_dino.d.ts +1 -1
- package/types/models/idefics3/modeling_idefics3.d.ts +2 -18
- package/types/models/idefics3/modeling_idefics3.d.ts.map +1 -1
- package/types/models/image_processors.d.ts +1 -0
- package/types/models/lfm2_vl/image_processing_lfm2_vl.d.ts +41 -0
- package/types/models/lfm2_vl/image_processing_lfm2_vl.d.ts.map +1 -0
- package/types/models/lfm2_vl/modeling_lfm2_vl.d.ts +4 -0
- package/types/models/lfm2_vl/modeling_lfm2_vl.d.ts.map +1 -0
- package/types/models/lfm2_vl/processing_lfm2_vl.d.ts +18 -0
- package/types/models/lfm2_vl/processing_lfm2_vl.d.ts.map +1 -0
- package/types/models/mistral3/modeling_mistral3.d.ts +2 -2
- package/types/models/mistral3/modeling_mistral3.d.ts.map +1 -1
- package/types/models/modeling_utils.d.ts +44 -24
- package/types/models/modeling_utils.d.ts.map +1 -1
- package/types/models/models.d.ts +9 -0
- package/types/models/olmo_hybrid/modeling_olmo_hybrid.d.ts +8 -0
- package/types/models/olmo_hybrid/modeling_olmo_hybrid.d.ts.map +1 -0
- package/types/models/paligemma/modeling_paligemma.d.ts +2 -8
- package/types/models/paligemma/modeling_paligemma.d.ts.map +1 -1
- package/types/models/processors.d.ts +3 -0
- package/types/models/qwen2_5_vl/modeling_qwen2_5_vl.d.ts +3 -0
- package/types/models/qwen2_5_vl/modeling_qwen2_5_vl.d.ts.map +1 -1
- package/types/models/qwen2_moe/modeling_qwen2_moe.d.ts +8 -0
- package/types/models/qwen2_moe/modeling_qwen2_moe.d.ts.map +1 -0
- package/types/models/qwen2_vl/image_processing_qwen2_vl.d.ts.map +1 -1
- package/types/models/qwen2_vl/modeling_qwen2_vl.d.ts +2 -0
- package/types/models/qwen2_vl/modeling_qwen2_vl.d.ts.map +1 -1
- package/types/models/qwen3_5/modeling_qwen3_5.d.ts +2 -0
- package/types/models/qwen3_5/modeling_qwen3_5.d.ts.map +1 -1
- package/types/models/qwen3_5_moe/modeling_qwen3_5_moe.d.ts +3 -0
- package/types/models/qwen3_5_moe/modeling_qwen3_5_moe.d.ts.map +1 -1
- package/types/models/qwen3_moe/modeling_qwen3_moe.d.ts +8 -0
- package/types/models/qwen3_moe/modeling_qwen3_moe.d.ts.map +1 -0
- package/types/models/qwen3_next/modeling_qwen3_next.d.ts +8 -0
- package/types/models/qwen3_next/modeling_qwen3_next.d.ts.map +1 -0
- package/types/models/qwen3_vl/modeling_qwen3_vl.d.ts +3 -0
- package/types/models/qwen3_vl/modeling_qwen3_vl.d.ts.map +1 -1
- package/types/models/qwen3_vl_moe/modeling_qwen3_vl_moe.d.ts +7 -0
- package/types/models/qwen3_vl_moe/modeling_qwen3_vl_moe.d.ts.map +1 -0
- package/types/models/registry.d.ts +2 -1
- package/types/models/registry.d.ts.map +1 -1
- package/types/models/sam/image_processing_sam.d.ts +1 -1
- package/types/models/session.d.ts +3 -2
- package/types/models/session.d.ts.map +1 -1
- package/types/models/smolvlm/modeling_smolvlm.d.ts +8 -0
- package/types/models/smolvlm/modeling_smolvlm.d.ts.map +1 -0
- package/types/models/ultravox/modeling_ultravox.d.ts +0 -2
- package/types/models/ultravox/modeling_ultravox.d.ts.map +1 -1
- package/types/models/voxtral/modeling_voxtral.d.ts +4 -0
- package/types/models/voxtral/modeling_voxtral.d.ts.map +1 -0
- package/types/models/voxtral_realtime/feature_extraction_voxtral_realtime.d.ts +28 -0
- package/types/models/voxtral_realtime/feature_extraction_voxtral_realtime.d.ts.map +1 -0
- package/types/models/voxtral_realtime/modeling_voxtral_realtime.d.ts +17 -0
- package/types/models/voxtral_realtime/modeling_voxtral_realtime.d.ts.map +1 -0
- package/types/models/voxtral_realtime/processing_voxtral_realtime.d.ts +44 -0
- package/types/models/voxtral_realtime/processing_voxtral_realtime.d.ts.map +1 -0
- package/types/models/whisper/feature_extraction_whisper.d.ts.map +1 -1
- package/types/pipelines/index.d.ts +0 -34
- package/types/pipelines/index.d.ts.map +1 -1
- package/types/pipelines.d.ts.map +1 -1
- package/types/transformers.d.ts +1 -0
- package/types/transformers.d.ts.map +1 -1
- package/types/utils/audio.d.ts +5 -2
- package/types/utils/audio.d.ts.map +1 -1
- package/types/utils/cache/CrossOriginStorageCache.d.ts +120 -0
- package/types/utils/cache/CrossOriginStorageCache.d.ts.map +1 -0
- package/types/utils/cache/FileCache.d.ts +39 -0
- package/types/utils/cache/FileCache.d.ts.map +1 -0
- package/types/utils/cache.d.ts +4 -4
- package/types/utils/cache.d.ts.map +1 -1
- package/types/utils/dtypes.d.ts +1 -1
- package/types/utils/hub/{files.d.ts → FileResponse.d.ts} +1 -38
- package/types/utils/hub/FileResponse.d.ts.map +1 -0
- package/types/utils/hub/utils.d.ts +17 -2
- package/types/utils/hub/utils.d.ts.map +1 -1
- package/types/utils/hub.d.ts +7 -7
- package/types/utils/hub.d.ts.map +1 -1
- package/types/utils/image.d.ts +1 -1
- package/types/utils/image.d.ts.map +1 -1
- package/types/utils/lru_cache.d.ts +38 -0
- package/types/utils/lru_cache.d.ts.map +1 -0
- package/types/utils/memoize_promise.d.ts +14 -0
- package/types/utils/memoize_promise.d.ts.map +1 -0
- package/types/utils/model_registry/ModelRegistry.d.ts +66 -6
- package/types/utils/model_registry/ModelRegistry.d.ts.map +1 -1
- package/types/utils/model_registry/get_file_metadata.d.ts.map +1 -1
- package/types/utils/model_registry/get_model_files.d.ts +1 -0
- package/types/utils/model_registry/get_model_files.d.ts.map +1 -1
- package/types/utils/model_registry/get_pipeline_files.d.ts +2 -1
- package/types/utils/model_registry/get_pipeline_files.d.ts.map +1 -1
- package/types/utils/model_registry/is_cached.d.ts +47 -4
- package/types/utils/model_registry/is_cached.d.ts.map +1 -1
- package/types/utils/tensor.d.ts.map +1 -1
- package/src/utils/data-structures.js +0 -572
- package/types/utils/data-structures.d.ts +0 -294
- package/types/utils/data-structures.d.ts.map +0 -1
- package/types/utils/hub/files.d.ts.map +0 -1
package/dist/transformers.js
CHANGED
|
@@ -20,22 +20,32 @@ var node_path_default = {};
|
|
|
20
20
|
var node_url_default = {};
|
|
21
21
|
|
|
22
22
|
// src/env.js
|
|
23
|
-
var VERSION = "4.0.0-next.
|
|
23
|
+
var VERSION = "4.0.0-next.7";
|
|
24
|
+
var HAS_SELF = typeof self !== "undefined";
|
|
24
25
|
var IS_FS_AVAILABLE = !isEmpty(node_fs_default);
|
|
25
26
|
var IS_PATH_AVAILABLE = !isEmpty(node_path_default);
|
|
26
|
-
var IS_WEB_CACHE_AVAILABLE =
|
|
27
|
+
var IS_WEB_CACHE_AVAILABLE = HAS_SELF && "caches" in self;
|
|
27
28
|
var IS_DENO_RUNTIME = typeof globalThis.Deno !== "undefined";
|
|
28
29
|
var IS_BUN_RUNTIME = typeof globalThis.Bun !== "undefined";
|
|
29
30
|
var IS_DENO_WEB_RUNTIME = IS_DENO_RUNTIME && IS_WEB_CACHE_AVAILABLE && !IS_FS_AVAILABLE;
|
|
30
31
|
var IS_PROCESS_AVAILABLE = typeof process !== "undefined";
|
|
31
32
|
var IS_NODE_ENV = IS_PROCESS_AVAILABLE && process?.release?.name === "node" && !IS_DENO_WEB_RUNTIME;
|
|
32
33
|
var IS_BROWSER_ENV = typeof window !== "undefined" && typeof window.document !== "undefined";
|
|
33
|
-
var IS_WEBWORKER_ENV =
|
|
34
|
+
var IS_WEBWORKER_ENV = HAS_SELF && ["DedicatedWorkerGlobalScope", "ServiceWorkerGlobalScope", "SharedWorkerGlobalScope"].includes(
|
|
34
35
|
self.constructor?.name
|
|
35
36
|
);
|
|
37
|
+
var IS_WEB_ENV = IS_BROWSER_ENV || IS_WEBWORKER_ENV || IS_DENO_WEB_RUNTIME;
|
|
36
38
|
var IS_WEBGPU_AVAILABLE = IS_NODE_ENV || typeof navigator !== "undefined" && "gpu" in navigator;
|
|
37
39
|
var IS_WEBNN_AVAILABLE = typeof navigator !== "undefined" && "ml" in navigator;
|
|
38
40
|
var IS_CRYPTO_AVAILABLE = typeof crypto !== "undefined" && typeof crypto.getRandomValues === "function";
|
|
41
|
+
var IS_CHROME_AVAILABLE = (
|
|
42
|
+
// @ts-ignore - chrome may not exist in all environments
|
|
43
|
+
typeof chrome !== "undefined" && typeof chrome.runtime !== "undefined" && typeof chrome.runtime.id === "string"
|
|
44
|
+
);
|
|
45
|
+
var IS_SERVICE_WORKER_ENV = (
|
|
46
|
+
// @ts-ignore - ServiceWorkerGlobalScope may not exist in all environments
|
|
47
|
+
typeof ServiceWorkerGlobalScope !== "undefined" && HAS_SELF && self instanceof ServiceWorkerGlobalScope
|
|
48
|
+
);
|
|
39
49
|
var isSafari = () => {
|
|
40
50
|
if (typeof navigator === "undefined") {
|
|
41
51
|
return false;
|
|
@@ -52,6 +62,12 @@ var apis = Object.freeze({
|
|
|
52
62
|
IS_BROWSER_ENV,
|
|
53
63
|
/** Whether we are running in a web worker environment */
|
|
54
64
|
IS_WEBWORKER_ENV,
|
|
65
|
+
/** Whether we are running in a web-like environment (browser, web worker, or Deno web runtime) */
|
|
66
|
+
IS_WEB_ENV,
|
|
67
|
+
/** Whether we are running in a service worker environment */
|
|
68
|
+
IS_SERVICE_WORKER_ENV,
|
|
69
|
+
/** Whether we are running in Deno's web runtime (CDN imports, Cache API available, no filesystem) */
|
|
70
|
+
IS_DENO_WEB_RUNTIME,
|
|
55
71
|
/** Whether the Cache API is available */
|
|
56
72
|
IS_WEB_CACHE_AVAILABLE,
|
|
57
73
|
/** Whether the WebGPU API is available */
|
|
@@ -69,7 +85,9 @@ var apis = Object.freeze({
|
|
|
69
85
|
/** Whether the path API is available */
|
|
70
86
|
IS_PATH_AVAILABLE,
|
|
71
87
|
/** Whether the crypto API is available */
|
|
72
|
-
IS_CRYPTO_AVAILABLE
|
|
88
|
+
IS_CRYPTO_AVAILABLE,
|
|
89
|
+
/** Whether the Chrome runtime API is available */
|
|
90
|
+
IS_CHROME_AVAILABLE
|
|
73
91
|
});
|
|
74
92
|
var RUNNING_LOCALLY = IS_FS_AVAILABLE && IS_PATH_AVAILABLE;
|
|
75
93
|
var dirname__ = "./";
|
|
@@ -130,6 +148,7 @@ var env = {
|
|
|
130
148
|
customCache: null,
|
|
131
149
|
useWasmCache: IS_WEB_CACHE_AVAILABLE || IS_FS_AVAILABLE,
|
|
132
150
|
cacheKey: "transformers-cache",
|
|
151
|
+
experimental_useCrossOriginStorage: false,
|
|
133
152
|
/////////////////// Custom fetch /////////////////////
|
|
134
153
|
fetch: DEFAULT_FETCH
|
|
135
154
|
//////////////////////////////////////////////////////
|
|
@@ -2680,7 +2699,7 @@ var Tokenizer = class {
|
|
|
2680
2699
|
};
|
|
2681
2700
|
var Tokenizer_default = Tokenizer;
|
|
2682
2701
|
|
|
2683
|
-
// ../../node_modules/.pnpm/@huggingface+jinja@0.5.
|
|
2702
|
+
// ../../node_modules/.pnpm/@huggingface+jinja@0.5.6/node_modules/@huggingface/jinja/dist/index.js
|
|
2684
2703
|
var TOKEN_TYPES = Object.freeze({
|
|
2685
2704
|
Text: "Text",
|
|
2686
2705
|
// The text between Jinja statements or expressions
|
|
@@ -4199,7 +4218,11 @@ var Environment = class {
|
|
|
4199
4218
|
["number", (operand) => operand instanceof IntegerValue || operand instanceof FloatValue],
|
|
4200
4219
|
["integer", (operand) => operand instanceof IntegerValue],
|
|
4201
4220
|
["iterable", (operand) => operand.type === "ArrayValue" || operand.type === "StringValue"],
|
|
4202
|
-
["mapping", (operand) => operand
|
|
4221
|
+
["mapping", (operand) => operand instanceof ObjectValue],
|
|
4222
|
+
[
|
|
4223
|
+
"sequence",
|
|
4224
|
+
(operand) => operand instanceof ArrayValue || operand instanceof ObjectValue || operand instanceof StringValue
|
|
4225
|
+
],
|
|
4203
4226
|
[
|
|
4204
4227
|
"lower",
|
|
4205
4228
|
(operand) => {
|
|
@@ -4472,6 +4495,9 @@ var Interpreter = class {
|
|
|
4472
4495
|
applyFilter(operand, filterNode, environment) {
|
|
4473
4496
|
if (filterNode.type === "Identifier") {
|
|
4474
4497
|
const filter = filterNode;
|
|
4498
|
+
if (filter.value === "safe") {
|
|
4499
|
+
return operand;
|
|
4500
|
+
}
|
|
4475
4501
|
if (filter.value === "tojson") {
|
|
4476
4502
|
return new StringValue(toJSON(operand, {}));
|
|
4477
4503
|
}
|
|
@@ -4561,6 +4587,8 @@ var Interpreter = class {
|
|
|
4561
4587
|
return new IntegerValue(Math.floor(operand.value));
|
|
4562
4588
|
case "float":
|
|
4563
4589
|
return new FloatValue(operand.value);
|
|
4590
|
+
case "string":
|
|
4591
|
+
return new StringValue(operand.toString());
|
|
4564
4592
|
default:
|
|
4565
4593
|
throw new Error(`Unknown NumericValue filter: ${filter.value}`);
|
|
4566
4594
|
}
|
|
@@ -5514,7 +5542,7 @@ var Callable2 = (
|
|
|
5514
5542
|
}
|
|
5515
5543
|
);
|
|
5516
5544
|
|
|
5517
|
-
// src/utils/hub/
|
|
5545
|
+
// src/utils/hub/FileResponse.js
|
|
5518
5546
|
var CONTENT_TYPE_MAP = {
|
|
5519
5547
|
txt: "text/plain",
|
|
5520
5548
|
html: "text/html",
|
|
@@ -5625,6 +5653,170 @@ var FileResponse = class _FileResponse {
|
|
|
5625
5653
|
return JSON.parse(await this.text());
|
|
5626
5654
|
}
|
|
5627
5655
|
};
|
|
5656
|
+
|
|
5657
|
+
// src/utils/random.js
|
|
5658
|
+
var Random = class {
|
|
5659
|
+
constructor(seed) {
|
|
5660
|
+
this._mt = new Uint32Array(624);
|
|
5661
|
+
this._idx = 625;
|
|
5662
|
+
this._gauss_next = null;
|
|
5663
|
+
this._random_fn = this.random.bind(this);
|
|
5664
|
+
this.seed(seed);
|
|
5665
|
+
}
|
|
5666
|
+
/**
|
|
5667
|
+
* Seeds this instance's PRNG.
|
|
5668
|
+
*
|
|
5669
|
+
* When called with a number, initializes the state deterministically from that value.
|
|
5670
|
+
* When called with no arguments (or `undefined`/`null`), seeds from OS entropy
|
|
5671
|
+
* via `crypto.getRandomValues`, matching Python's `random.seed()` behaviour.
|
|
5672
|
+
*
|
|
5673
|
+
* @param {number} [n] The seed value. Omit to seed from OS entropy.
|
|
5674
|
+
*/
|
|
5675
|
+
seed(n) {
|
|
5676
|
+
if (n === void 0 || n === null) {
|
|
5677
|
+
if (apis.IS_CRYPTO_AVAILABLE) {
|
|
5678
|
+
const buf = new Uint32Array(1);
|
|
5679
|
+
crypto.getRandomValues(buf);
|
|
5680
|
+
n = buf[0];
|
|
5681
|
+
} else {
|
|
5682
|
+
n = Date.now() >>> 0;
|
|
5683
|
+
}
|
|
5684
|
+
}
|
|
5685
|
+
const mt2 = this._mt;
|
|
5686
|
+
const u = (a, b) => Math.imul(a, b) >>> 0, key = [];
|
|
5687
|
+
for (let v = n || 0; v > 0; v = Math.floor(v / 4294967296)) key.push(v & 4294967295);
|
|
5688
|
+
if (!key.length) key.push(0);
|
|
5689
|
+
mt2[0] = 19650218;
|
|
5690
|
+
for (let k2 = 1; k2 < 624; ++k2) mt2[k2] = u(1812433253, mt2[k2 - 1] ^ mt2[k2 - 1] >>> 30) + k2 >>> 0;
|
|
5691
|
+
let i = 1, j = 0;
|
|
5692
|
+
for (let k2 = Math.max(624, key.length); k2 > 0; --k2, ++i, ++j) {
|
|
5693
|
+
if (i >= 624) {
|
|
5694
|
+
mt2[0] = mt2[623];
|
|
5695
|
+
i = 1;
|
|
5696
|
+
}
|
|
5697
|
+
if (j >= key.length) j = 0;
|
|
5698
|
+
mt2[i] = (mt2[i] ^ u(mt2[i - 1] ^ mt2[i - 1] >>> 30, 1664525)) + key[j] + j >>> 0;
|
|
5699
|
+
}
|
|
5700
|
+
for (let k2 = 623; k2 > 0; --k2, ++i) {
|
|
5701
|
+
if (i >= 624) {
|
|
5702
|
+
mt2[0] = mt2[623];
|
|
5703
|
+
i = 1;
|
|
5704
|
+
}
|
|
5705
|
+
mt2[i] = (mt2[i] ^ u(mt2[i - 1] ^ mt2[i - 1] >>> 30, 1566083941)) - i >>> 0;
|
|
5706
|
+
}
|
|
5707
|
+
mt2[0] = 2147483648;
|
|
5708
|
+
this._idx = 624;
|
|
5709
|
+
this._gauss_next = null;
|
|
5710
|
+
}
|
|
5711
|
+
/**
|
|
5712
|
+
* Generates a random unsigned 32-bit integer.
|
|
5713
|
+
*
|
|
5714
|
+
* Performs the "twist" step when the state buffer is exhausted,
|
|
5715
|
+
* then applies the standard MT19937 tempering transform.
|
|
5716
|
+
*
|
|
5717
|
+
* @returns {number} A random integer in the range [0, 2^32 - 1].
|
|
5718
|
+
*/
|
|
5719
|
+
_int32() {
|
|
5720
|
+
const mt2 = this._mt;
|
|
5721
|
+
if (this._idx >= 624) {
|
|
5722
|
+
for (let k2 = 0; k2 < 624; ++k2) {
|
|
5723
|
+
const y2 = mt2[k2] & 2147483648 | mt2[(k2 + 1) % 624] & 2147483647;
|
|
5724
|
+
mt2[k2] = (mt2[(k2 + 397) % 624] ^ y2 >>> 1 ^ (y2 & 1 ? 2567483615 : 0)) >>> 0;
|
|
5725
|
+
}
|
|
5726
|
+
this._idx = 0;
|
|
5727
|
+
}
|
|
5728
|
+
let y = mt2[this._idx++];
|
|
5729
|
+
y ^= y >>> 11;
|
|
5730
|
+
y ^= y << 7 & 2636928640;
|
|
5731
|
+
y ^= y << 15 & 4022730752;
|
|
5732
|
+
y ^= y >>> 18;
|
|
5733
|
+
return y >>> 0;
|
|
5734
|
+
}
|
|
5735
|
+
/**
|
|
5736
|
+
* Generates a random floating-point number in the half-open interval [0, 1).
|
|
5737
|
+
*
|
|
5738
|
+
* Combines two 32-bit integers (using 53 bits of precision) to produce
|
|
5739
|
+
* a uniformly distributed double, matching Python's `random.random()`.
|
|
5740
|
+
*
|
|
5741
|
+
* @returns {number} A random float in [0, 1).
|
|
5742
|
+
*/
|
|
5743
|
+
random() {
|
|
5744
|
+
return ((this._int32() >>> 5) * 67108864 + (this._int32() >>> 6)) / 9007199254740992;
|
|
5745
|
+
}
|
|
5746
|
+
/**
|
|
5747
|
+
* Generates a random number from a Gaussian (normal) distribution.
|
|
5748
|
+
*
|
|
5749
|
+
* Uses the Box-Muller transform with a cached spare value,
|
|
5750
|
+
* matching Python's `random.gauss()` output for the same seed.
|
|
5751
|
+
*
|
|
5752
|
+
* @param {number} [mu=0] The mean of the distribution.
|
|
5753
|
+
* @param {number} [sigma=1] The standard deviation of the distribution.
|
|
5754
|
+
* @returns {number} A normally distributed random value.
|
|
5755
|
+
*/
|
|
5756
|
+
gauss(mu = 0, sigma = 1) {
|
|
5757
|
+
let z = this._gauss_next;
|
|
5758
|
+
this._gauss_next = null;
|
|
5759
|
+
if (z === null) {
|
|
5760
|
+
const x2pi = this.random() * 2 * Math.PI, g2rad = Math.sqrt(-2 * Math.log(1 - this.random()));
|
|
5761
|
+
z = Math.cos(x2pi) * g2rad;
|
|
5762
|
+
this._gauss_next = Math.sin(x2pi) * g2rad;
|
|
5763
|
+
}
|
|
5764
|
+
return mu + z * sigma;
|
|
5765
|
+
}
|
|
5766
|
+
/**
|
|
5767
|
+
* Shuffles an array in-place using the Fisher-Yates algorithm.
|
|
5768
|
+
*
|
|
5769
|
+
* Uses rejection sampling via `getrandbits`-style bit masking to ensure
|
|
5770
|
+
* a uniform distribution, matching Python's `random.shuffle()`.
|
|
5771
|
+
*
|
|
5772
|
+
* @param {any[]} arr The array to shuffle in-place.
|
|
5773
|
+
*/
|
|
5774
|
+
shuffle(arr) {
|
|
5775
|
+
for (let i = arr.length - 1; i > 0; --i) {
|
|
5776
|
+
const k2 = 32 - Math.clz32(i + 1);
|
|
5777
|
+
let r = this._int32() >>> 32 - k2;
|
|
5778
|
+
while (r > i) r = this._int32() >>> 32 - k2;
|
|
5779
|
+
const t = arr[i];
|
|
5780
|
+
arr[i] = arr[r];
|
|
5781
|
+
arr[r] = t;
|
|
5782
|
+
}
|
|
5783
|
+
}
|
|
5784
|
+
/**
|
|
5785
|
+
* Selects a single element from a weighted population.
|
|
5786
|
+
*
|
|
5787
|
+
* Matches Python's `random.choices(population, weights=weights, k=1)[0]`
|
|
5788
|
+
*
|
|
5789
|
+
* @param {any[]} population The array of items to choose from.
|
|
5790
|
+
* @param {number[]} weights An array of non-negative weights, one per population element.
|
|
5791
|
+
* @returns {*} A single randomly selected element from the population.
|
|
5792
|
+
*/
|
|
5793
|
+
choices(population, weights) {
|
|
5794
|
+
return population[_weightedIndexWith(this._random_fn, weights)];
|
|
5795
|
+
}
|
|
5796
|
+
};
|
|
5797
|
+
function _weightedIndexWith(randomFn, weights) {
|
|
5798
|
+
let sum = 0;
|
|
5799
|
+
for (let i = 0; i < weights.length; ++i) sum += weights[i];
|
|
5800
|
+
let x = randomFn() * sum;
|
|
5801
|
+
for (let i = 0; i < weights.length; ++i) {
|
|
5802
|
+
x -= weights[i];
|
|
5803
|
+
if (x < 0) return i;
|
|
5804
|
+
}
|
|
5805
|
+
return weights.length - 1;
|
|
5806
|
+
}
|
|
5807
|
+
var _default = new Random();
|
|
5808
|
+
var random = Object.freeze({
|
|
5809
|
+
Random,
|
|
5810
|
+
seed: _default.seed.bind(_default),
|
|
5811
|
+
random: _default.random.bind(_default),
|
|
5812
|
+
gauss: _default.gauss.bind(_default),
|
|
5813
|
+
shuffle: _default.shuffle.bind(_default),
|
|
5814
|
+
choices: _default.choices.bind(_default)
|
|
5815
|
+
});
|
|
5816
|
+
var _weightedIndex = (weights) => _weightedIndexWith(random.random, weights);
|
|
5817
|
+
|
|
5818
|
+
// src/utils/cache/FileCache.js
|
|
5819
|
+
var rng = new Random();
|
|
5628
5820
|
var FileCache = class {
|
|
5629
5821
|
/**
|
|
5630
5822
|
* Instantiate a `FileCache` object.
|
|
@@ -5656,13 +5848,16 @@ var FileCache = class {
|
|
|
5656
5848
|
* @returns {Promise<void>}
|
|
5657
5849
|
*/
|
|
5658
5850
|
async put(request, response, progress_callback = void 0) {
|
|
5659
|
-
|
|
5851
|
+
const filePath = node_path_default.join(this.path, request);
|
|
5852
|
+
const id = apis.IS_PROCESS_AVAILABLE ? process.pid : Date.now();
|
|
5853
|
+
const randomSuffix = rng._int32().toString(36);
|
|
5854
|
+
const tmpPath = filePath + `.tmp.${id}.${randomSuffix}`;
|
|
5660
5855
|
try {
|
|
5661
5856
|
const contentLength = response.headers.get("Content-Length");
|
|
5662
5857
|
const total = parseInt(contentLength ?? "0");
|
|
5663
5858
|
let loaded = 0;
|
|
5664
5859
|
await node_fs_default.promises.mkdir(node_path_default.dirname(filePath), { recursive: true });
|
|
5665
|
-
const fileStream = node_fs_default.createWriteStream(
|
|
5860
|
+
const fileStream = node_fs_default.createWriteStream(tmpPath);
|
|
5666
5861
|
const reader = response.body.getReader();
|
|
5667
5862
|
while (true) {
|
|
5668
5863
|
const { done, value } = await reader.read();
|
|
@@ -5682,10 +5877,13 @@ var FileCache = class {
|
|
|
5682
5877
|
const progress = total ? loaded / total * 100 : 0;
|
|
5683
5878
|
progress_callback?.({ progress, loaded, total });
|
|
5684
5879
|
}
|
|
5685
|
-
|
|
5880
|
+
await new Promise((resolve, reject) => {
|
|
5881
|
+
fileStream.close((err) => err ? reject(err) : resolve());
|
|
5882
|
+
});
|
|
5883
|
+
await node_fs_default.promises.rename(tmpPath, filePath);
|
|
5686
5884
|
} catch (error) {
|
|
5687
5885
|
try {
|
|
5688
|
-
await node_fs_default.promises.unlink(
|
|
5886
|
+
await node_fs_default.promises.unlink(tmpPath);
|
|
5689
5887
|
} catch {
|
|
5690
5888
|
}
|
|
5691
5889
|
throw error;
|
|
@@ -5708,6 +5906,7 @@ var FileCache = class {
|
|
|
5708
5906
|
// TODO add the rest?
|
|
5709
5907
|
// addAll(requests: RequestInfo[]): Promise<void>;
|
|
5710
5908
|
// keys(request?: RequestInfo | URL, options?: CacheQueryOptions): Promise<ReadonlyArray<Request>>;
|
|
5909
|
+
// match(request: RequestInfo | URL, options?: CacheQueryOptions): Promise<Response | undefined>;
|
|
5711
5910
|
// matchAll(request?: RequestInfo | URL, options?: CacheQueryOptions): Promise<ReadonlyArray<Response>>;
|
|
5712
5911
|
};
|
|
5713
5912
|
|
|
@@ -5797,69 +5996,380 @@ async function readResponse(response, progress_callback, expectedSize) {
|
|
|
5797
5996
|
await read();
|
|
5798
5997
|
return buffer;
|
|
5799
5998
|
}
|
|
5800
|
-
|
|
5801
|
-
|
|
5802
|
-
|
|
5803
|
-
|
|
5804
|
-
|
|
5805
|
-
|
|
5806
|
-
|
|
5807
|
-
|
|
5808
|
-
|
|
5809
|
-
|
|
5810
|
-
|
|
5811
|
-
);
|
|
5812
|
-
}
|
|
5813
|
-
cache = env.customCache;
|
|
5999
|
+
function isBlobURL(url) {
|
|
6000
|
+
return isValidUrl(url, ["blob:"]);
|
|
6001
|
+
}
|
|
6002
|
+
function toAbsoluteURL(url) {
|
|
6003
|
+
let baseURL;
|
|
6004
|
+
if (typeof location !== "undefined" && location.href) {
|
|
6005
|
+
baseURL = location.href;
|
|
6006
|
+
} else if (typeof import.meta !== "undefined" && import.meta.url) {
|
|
6007
|
+
baseURL = import.meta.url;
|
|
6008
|
+
} else {
|
|
6009
|
+
return url;
|
|
5814
6010
|
}
|
|
5815
|
-
|
|
5816
|
-
|
|
5817
|
-
|
|
6011
|
+
return new URL(url, baseURL).href;
|
|
6012
|
+
}
|
|
6013
|
+
|
|
6014
|
+
// src/utils/cache/CrossOriginStorageCache.js
|
|
6015
|
+
var HASH_ALGORITHM = "SHA-256";
|
|
6016
|
+
var HASH_CACHE_NAME = "experimental_transformers-hash-cache";
|
|
6017
|
+
var makeHashDescriptor = (value) => ({ algorithm: HASH_ALGORITHM, value });
|
|
6018
|
+
var CrossOriginStorage = class {
|
|
6019
|
+
/** @type {Promise<Cache> | null} */
|
|
6020
|
+
#hashCache = null;
|
|
6021
|
+
/**
|
|
6022
|
+
* Returns (and lazily opens) the hash cache, reusing the same promise across concurrent callers.
|
|
6023
|
+
* @returns {Promise<Cache>}
|
|
6024
|
+
*/
|
|
6025
|
+
_getHashCache = () => {
|
|
6026
|
+
this.#hashCache ??= caches.open(HASH_CACHE_NAME);
|
|
6027
|
+
return this.#hashCache;
|
|
6028
|
+
};
|
|
6029
|
+
/**
|
|
6030
|
+
* Returns whether the `navigator.crossOriginStorage` API is available in the current environment.
|
|
6031
|
+
* @returns {boolean}
|
|
6032
|
+
*/
|
|
6033
|
+
static isAvailable = () => typeof navigator !== "undefined" && "crossOriginStorage" in navigator;
|
|
6034
|
+
/**
|
|
6035
|
+
* Looks up a cached response for the given URL by resolving its SHA-256 hash and requesting
|
|
6036
|
+
* the corresponding file handle from cross-origin storage.
|
|
6037
|
+
*
|
|
6038
|
+
* Implements `CacheInterface.match`.
|
|
6039
|
+
*
|
|
6040
|
+
* @param {string} request The URL of the resource to look up.
|
|
6041
|
+
* @returns {Promise<Response|undefined>} The cached `Response`, or `undefined` if not found.
|
|
6042
|
+
*/
|
|
6043
|
+
match = async (request) => {
|
|
6044
|
+
const hashValue = await this._getFileHash(request);
|
|
6045
|
+
if (!hashValue) {
|
|
6046
|
+
return void 0;
|
|
5818
6047
|
}
|
|
5819
6048
|
try {
|
|
5820
|
-
|
|
5821
|
-
|
|
5822
|
-
|
|
6049
|
+
const [handle] = await navigator.crossOriginStorage.requestFileHandles([makeHashDescriptor(hashValue)]);
|
|
6050
|
+
const blob = await handle.getFile();
|
|
6051
|
+
return new Response(blob, {
|
|
6052
|
+
headers: {
|
|
6053
|
+
"Content-Length": String(blob.size)
|
|
6054
|
+
}
|
|
6055
|
+
});
|
|
6056
|
+
} catch {
|
|
6057
|
+
return void 0;
|
|
5823
6058
|
}
|
|
5824
|
-
}
|
|
5825
|
-
|
|
5826
|
-
|
|
5827
|
-
|
|
6059
|
+
};
|
|
6060
|
+
/**
|
|
6061
|
+
* Stores a response in cross-origin storage, keyed by its SHA-256 hash.
|
|
6062
|
+
*
|
|
6063
|
+
* For LFS-backed URLs the hash is resolved cheaply via `_getFileHash` (which checks
|
|
6064
|
+
* `HASH_CACHE_NAME` first, then falls back to fetching the Git LFS pointer file)
|
|
6065
|
+
* without reading the response body a second time.
|
|
6066
|
+
*
|
|
6067
|
+
* For non-LFS resources the hash is unknown upfront. In that case the body is consumed
|
|
6068
|
+
* in the background: the stream is read to compute the content hash, the file is written
|
|
6069
|
+
* into cross-origin storage, and the computed hash is persisted to `HASH_CACHE_NAME`
|
|
6070
|
+
* so that future `match` calls can resolve the file without a network round-trip.
|
|
6071
|
+
*
|
|
6072
|
+
* Implements `CacheInterface.put`.
|
|
6073
|
+
*
|
|
6074
|
+
* @param {string} request The URL of the resource (used as the hash-cache key).
|
|
6075
|
+
* @param {Response} response The response whose body will be written to the cache.
|
|
6076
|
+
* @returns {Promise<void>}
|
|
6077
|
+
*/
|
|
6078
|
+
put = async (request, response) => {
|
|
6079
|
+
const hashValue = await this._getFileHash(request);
|
|
6080
|
+
if (hashValue) {
|
|
6081
|
+
const blob = await response.blob();
|
|
6082
|
+
await this._storeBlobInCOS(blob, hashValue);
|
|
6083
|
+
} else {
|
|
6084
|
+
this._processAndStore(request, response.body);
|
|
5828
6085
|
}
|
|
5829
|
-
|
|
5830
|
-
|
|
5831
|
-
|
|
5832
|
-
|
|
5833
|
-
|
|
5834
|
-
|
|
6086
|
+
};
|
|
6087
|
+
/**
|
|
6088
|
+
* Writes a blob into cross-origin storage using the given pre-computed hex hash string.
|
|
6089
|
+
*
|
|
6090
|
+
* @param {Blob} blob
|
|
6091
|
+
* @param {string} hashHex Hex-encoded SHA-256 hash of `blob`.
|
|
6092
|
+
* @returns {Promise<void>}
|
|
6093
|
+
*/
|
|
6094
|
+
_storeBlobInCOS = async (blob, hashHex) => {
|
|
6095
|
+
const [handle] = await navigator.crossOriginStorage.requestFileHandles([makeHashDescriptor(hashHex)], {
|
|
6096
|
+
create: true
|
|
6097
|
+
});
|
|
6098
|
+
const writableStream = await handle.createWritable();
|
|
6099
|
+
await writableStream.write(blob);
|
|
6100
|
+
await writableStream.close();
|
|
6101
|
+
};
|
|
6102
|
+
/**
|
|
6103
|
+
* Background task for non-LFS resources: consumes `stream`, computes the SHA-256 hash
|
|
6104
|
+
* of the resulting blob, stores it in cross-origin storage, and persists the computed
|
|
6105
|
+
* hash to `HASH_CACHE_NAME` keyed by `request` so future `match` calls can resolve the
|
|
6106
|
+
* file without a network round-trip.
|
|
6107
|
+
*
|
|
6108
|
+
* Called fire-and-forget from `put` — errors are swallowed so failures never surface to
|
|
6109
|
+
* the caller.
|
|
6110
|
+
*
|
|
6111
|
+
* @param {string} request The original resource URL.
|
|
6112
|
+
* @param {ReadableStream} stream The response body stream to consume.
|
|
6113
|
+
* @returns {Promise<void>}
|
|
6114
|
+
*/
|
|
6115
|
+
_processAndStore = async (request, stream) => {
|
|
5835
6116
|
try {
|
|
5836
|
-
|
|
5837
|
-
|
|
5838
|
-
|
|
5839
|
-
|
|
6117
|
+
const chunks = [];
|
|
6118
|
+
for await (const chunk2 of stream) {
|
|
6119
|
+
chunks.push(chunk2);
|
|
6120
|
+
}
|
|
6121
|
+
const blob = new Blob(chunks);
|
|
6122
|
+
const hashHex = await this._getBlobHash(blob);
|
|
6123
|
+
await this._storeBlobInCOS(blob, hashHex);
|
|
6124
|
+
try {
|
|
6125
|
+
const hashCache = await this._getHashCache();
|
|
6126
|
+
await hashCache.put(request, new Response(hashHex));
|
|
6127
|
+
} catch {
|
|
6128
|
+
}
|
|
6129
|
+
} catch {
|
|
5840
6130
|
}
|
|
5841
|
-
}
|
|
5842
|
-
|
|
5843
|
-
|
|
5844
|
-
|
|
5845
|
-
|
|
5846
|
-
|
|
6131
|
+
};
|
|
6132
|
+
/**
|
|
6133
|
+
* Deletes the cache entry for the given request.
|
|
6134
|
+
*
|
|
6135
|
+
* Removes the hash entry from `HASH_CACHE_NAME`. Note: cross-origin storage itself does not
|
|
6136
|
+
* expose a delete API, so only the local hash mapping is removed. For non-LFS URLs this
|
|
6137
|
+
* permanently prevents `match` from resolving the file. For LFS-backed URLs, `match` will
|
|
6138
|
+
* re-fetch the LFS pointer file on the next call and repopulate the hash cache automatically.
|
|
6139
|
+
*
|
|
6140
|
+
* Implements `CacheInterface.delete`.
|
|
6141
|
+
*
|
|
6142
|
+
* @param {string} request
|
|
6143
|
+
* @returns {Promise<boolean>} Resolves to `true` if the hash entry was deleted, `false` otherwise.
|
|
6144
|
+
*/
|
|
6145
|
+
delete = async (request) => {
|
|
6146
|
+
try {
|
|
6147
|
+
const hashCache = await this._getHashCache();
|
|
6148
|
+
return await hashCache.delete(request);
|
|
6149
|
+
} catch {
|
|
6150
|
+
return false;
|
|
6151
|
+
}
|
|
6152
|
+
};
|
|
6153
|
+
/**
|
|
6154
|
+
* Resolves the SHA-256 hash for a given URL.
|
|
6155
|
+
*
|
|
6156
|
+
* Returns the cached hash immediately if one has been persisted to `HASH_CACHE_NAME`.
|
|
6157
|
+
* Otherwise falls back to `_getLfsFileHash` to retrieve the hash from the Hugging Face
|
|
6158
|
+
* LFS pointer file, persisting the result to `HASH_CACHE_NAME` for future lookups.
|
|
6159
|
+
*
|
|
6160
|
+
* Returns `null` if the hash cannot be determined (e.g. non-LFS URL with no cached entry).
|
|
6161
|
+
*
|
|
6162
|
+
* @param {string} url The resource URL to resolve a hash for.
|
|
6163
|
+
* @returns {Promise<string|null>} The hex-encoded SHA-256 hash, or `null` if unavailable.
|
|
6164
|
+
*/
|
|
6165
|
+
_getFileHash = async (url) => {
|
|
6166
|
+
try {
|
|
6167
|
+
const hashCache = await this._getHashCache();
|
|
6168
|
+
const cached = await hashCache.match(url);
|
|
6169
|
+
if (cached) {
|
|
6170
|
+
return cached.text();
|
|
6171
|
+
}
|
|
6172
|
+
const hash = await this._getLfsFileHash(url);
|
|
6173
|
+
if (hash) {
|
|
6174
|
+
await hashCache.put(url, new Response(hash));
|
|
6175
|
+
return hash;
|
|
6176
|
+
}
|
|
6177
|
+
return null;
|
|
6178
|
+
} catch {
|
|
6179
|
+
return null;
|
|
6180
|
+
}
|
|
6181
|
+
};
|
|
6182
|
+
/**
|
|
6183
|
+
* Attempts to retrieve the SHA-256 hash for a Hugging Face resource URL from its raw
|
|
6184
|
+
* Git LFS pointer file.
|
|
6185
|
+
*
|
|
6186
|
+
* Only applicable to URLs containing `/resolve/` (i.e. Hugging Face resolved file URLs).
|
|
6187
|
+
* The `/resolve/` segment is rewritten to `/raw/` to fetch the LFS pointer directly.
|
|
6188
|
+
* Returns `null` for non-LFS URLs or when the network request fails.
|
|
6189
|
+
*
|
|
6190
|
+
* @see https://huggingface.co/docs/hub/en/storage-backends#xet
|
|
6191
|
+
* @param {string} url The resolved Hugging Face URL of the resource.
|
|
6192
|
+
* @returns {Promise<string|null>} The hex-encoded SHA-256 hash, or `null` if unavailable.
|
|
6193
|
+
*/
|
|
6194
|
+
_getLfsFileHash = async (url) => {
|
|
6195
|
+
if (!url.includes("/resolve/")) {
|
|
6196
|
+
return null;
|
|
6197
|
+
}
|
|
6198
|
+
const rawUrl = url.replace("/resolve/", "/raw/");
|
|
6199
|
+
try {
|
|
6200
|
+
const text = await fetch(rawUrl).then((r) => r.text());
|
|
6201
|
+
const match = text.match(/^oid sha256:([0-9a-f]+)$/m);
|
|
6202
|
+
return match ? match[1] : null;
|
|
6203
|
+
} catch {
|
|
6204
|
+
return null;
|
|
6205
|
+
}
|
|
6206
|
+
};
|
|
6207
|
+
/**
|
|
6208
|
+
* Computes the SHA-256 hash of a `Blob`'s contents.
|
|
6209
|
+
*
|
|
6210
|
+
* @param {Blob} blob The blob to hash.
|
|
6211
|
+
* @returns {Promise<string>} The lowercase hex-encoded SHA-256 hash.
|
|
6212
|
+
*/
|
|
6213
|
+
_getBlobHash = async (blob) => {
|
|
6214
|
+
const arrayBuffer = await blob.arrayBuffer();
|
|
6215
|
+
const hashBuffer = await crypto.subtle.digest(HASH_ALGORITHM, arrayBuffer);
|
|
6216
|
+
const hashArray = Array.from(new Uint8Array(hashBuffer));
|
|
6217
|
+
return hashArray.map((byte) => byte.toString(16).padStart(2, "0")).join("");
|
|
6218
|
+
};
|
|
6219
|
+
};
|
|
6220
|
+
|
|
6221
|
+
// src/utils/cache.js
|
|
6222
|
+
async function getCache(file_cache_dir = null) {
|
|
6223
|
+
let cache2 = null;
|
|
6224
|
+
if (env.useCustomCache) {
|
|
6225
|
+
if (!env.customCache) {
|
|
6226
|
+
throw Error("`env.useCustomCache=true`, but `env.customCache` is not defined.");
|
|
6227
|
+
}
|
|
6228
|
+
if (!env.customCache.match || !env.customCache.put) {
|
|
6229
|
+
throw new Error(
|
|
6230
|
+
"`env.customCache` must be an object which implements the `match` and `put` functions of the Web Cache API. For more information, see https://developer.mozilla.org/en-US/docs/Web/API/Cache"
|
|
6231
|
+
);
|
|
6232
|
+
}
|
|
6233
|
+
cache2 = env.customCache;
|
|
6234
|
+
}
|
|
6235
|
+
if (!cache2 && env.experimental_useCrossOriginStorage && CrossOriginStorage.isAvailable()) {
|
|
6236
|
+
cache2 = new CrossOriginStorage();
|
|
6237
|
+
}
|
|
6238
|
+
if (!cache2 && env.useBrowserCache) {
|
|
6239
|
+
if (typeof caches === "undefined") {
|
|
6240
|
+
throw Error("Browser cache is not available in this environment.");
|
|
6241
|
+
}
|
|
6242
|
+
try {
|
|
6243
|
+
cache2 = await caches.open(env.cacheKey);
|
|
6244
|
+
} catch (e) {
|
|
6245
|
+
logger.warn("An error occurred while opening the browser cache:", e);
|
|
6246
|
+
}
|
|
6247
|
+
}
|
|
6248
|
+
if (!cache2 && env.useFSCache) {
|
|
6249
|
+
if (!apis.IS_FS_AVAILABLE) {
|
|
6250
|
+
throw Error("File System Cache is not available in this environment.");
|
|
6251
|
+
}
|
|
6252
|
+
cache2 = new FileCache(file_cache_dir ?? env.cacheDir);
|
|
6253
|
+
}
|
|
6254
|
+
return cache2;
|
|
6255
|
+
}
|
|
6256
|
+
async function tryCache(cache2, ...names) {
|
|
6257
|
+
for (let name of names) {
|
|
6258
|
+
try {
|
|
6259
|
+
let result = await cache2.match(name);
|
|
6260
|
+
if (result) return result;
|
|
6261
|
+
} catch (e) {
|
|
6262
|
+
continue;
|
|
6263
|
+
}
|
|
6264
|
+
}
|
|
6265
|
+
return void 0;
|
|
6266
|
+
}
|
|
6267
|
+
|
|
6268
|
+
// src/utils/lru_cache.js
|
|
6269
|
+
var LRUCache2 = class {
|
|
6270
|
+
/** @type {number} */
|
|
6271
|
+
#capacity;
|
|
6272
|
+
/** @type {Map<any, any>} */
|
|
6273
|
+
#cache;
|
|
6274
|
+
/**
|
|
6275
|
+
* Creates an LRUCache instance.
|
|
6276
|
+
* @param {number} capacity The maximum number of items the cache can hold.
|
|
6277
|
+
*/
|
|
6278
|
+
constructor(capacity) {
|
|
6279
|
+
this.#capacity = capacity;
|
|
6280
|
+
this.#cache = /* @__PURE__ */ new Map();
|
|
6281
|
+
}
|
|
6282
|
+
/**
|
|
6283
|
+
* Retrieves the value associated with the given key and marks the key as recently used.
|
|
6284
|
+
* @param {any} key The key to retrieve.
|
|
6285
|
+
* @returns {any} The value associated with the key, or undefined if the key does not exist.
|
|
6286
|
+
*/
|
|
6287
|
+
get(key) {
|
|
6288
|
+
if (!this.#cache.has(key)) return void 0;
|
|
6289
|
+
const value = this.#cache.get(key);
|
|
6290
|
+
this.#cache.delete(key);
|
|
6291
|
+
this.#cache.set(key, value);
|
|
6292
|
+
return value;
|
|
6293
|
+
}
|
|
6294
|
+
/**
|
|
6295
|
+
* Inserts or updates the key-value pair in the cache.
|
|
6296
|
+
* If the key already exists, it is updated and marked as recently used.
|
|
6297
|
+
* If the cache exceeds its capacity, the least recently used item is evicted.
|
|
6298
|
+
* @param {any} key The key to add or update.
|
|
6299
|
+
* @param {any} value The value to associate with the key.
|
|
6300
|
+
*/
|
|
6301
|
+
put(key, value) {
|
|
6302
|
+
if (this.#cache.has(key)) {
|
|
6303
|
+
this.#cache.delete(key);
|
|
6304
|
+
}
|
|
6305
|
+
this.#cache.set(key, value);
|
|
6306
|
+
if (this.#cache.size > this.#capacity) {
|
|
6307
|
+
this.#cache.delete(this.#cache.keys().next().value);
|
|
6308
|
+
}
|
|
6309
|
+
}
|
|
6310
|
+
/**
|
|
6311
|
+
* Removes the entry for the given key from the cache.
|
|
6312
|
+
* @param {any} key The key to delete.
|
|
6313
|
+
* @returns {boolean} `true` if the entry existed and was removed, `false` otherwise.
|
|
6314
|
+
*/
|
|
6315
|
+
delete(key) {
|
|
6316
|
+
return this.#cache.delete(key);
|
|
6317
|
+
}
|
|
6318
|
+
/**
|
|
6319
|
+
* Clears the cache.
|
|
6320
|
+
*/
|
|
6321
|
+
clear() {
|
|
6322
|
+
this.#cache.clear();
|
|
6323
|
+
}
|
|
6324
|
+
};
|
|
6325
|
+
|
|
6326
|
+
// src/utils/memoize_promise.js
|
|
6327
|
+
var MAX_CACHE_SIZE = 100;
|
|
6328
|
+
var cache = new LRUCache2(MAX_CACHE_SIZE);
|
|
6329
|
+
function memoizePromise(key, factory) {
|
|
6330
|
+
const cached = cache.get(key);
|
|
6331
|
+
if (cached !== void 0) {
|
|
6332
|
+
return cached;
|
|
6333
|
+
}
|
|
6334
|
+
const promise = factory().then(
|
|
6335
|
+
(value) => value,
|
|
6336
|
+
(err) => {
|
|
6337
|
+
cache.delete(key);
|
|
6338
|
+
return Promise.reject(err);
|
|
6339
|
+
}
|
|
6340
|
+
);
|
|
6341
|
+
cache.put(key, promise);
|
|
6342
|
+
return promise;
|
|
6343
|
+
}
|
|
6344
|
+
|
|
6345
|
+
// src/utils/model_registry/get_file_metadata.js
|
|
6346
|
+
async function fetch_file_head(urlOrPath) {
|
|
5847
6347
|
if (!isValidUrl(urlOrPath, ["http:", "https:"])) {
|
|
5848
6348
|
return null;
|
|
5849
6349
|
}
|
|
5850
6350
|
const headers = getFetchHeaders(urlOrPath);
|
|
5851
6351
|
headers.set("Range", "bytes=0-0");
|
|
5852
|
-
return env.fetch(urlOrPath, { method: "GET", headers });
|
|
6352
|
+
return env.fetch(urlOrPath, { method: "GET", headers, cache: "no-store" });
|
|
6353
|
+
}
|
|
6354
|
+
function get_file_metadata(path_or_repo_id, filename, options = {}) {
|
|
6355
|
+
const key = JSON.stringify([
|
|
6356
|
+
path_or_repo_id,
|
|
6357
|
+
filename,
|
|
6358
|
+
options?.revision,
|
|
6359
|
+
options?.cache_dir,
|
|
6360
|
+
options?.local_files_only
|
|
6361
|
+
]);
|
|
6362
|
+
return memoizePromise(key, () => _get_file_metadata(path_or_repo_id, filename, options));
|
|
5853
6363
|
}
|
|
5854
|
-
async function
|
|
5855
|
-
const
|
|
6364
|
+
async function _get_file_metadata(path_or_repo_id, filename, options) {
|
|
6365
|
+
const cache2 = await getCache(options?.cache_dir);
|
|
5856
6366
|
const { localPath, remoteURL, proposedCacheKey, validModelId } = buildResourcePaths(
|
|
5857
6367
|
path_or_repo_id,
|
|
5858
6368
|
filename,
|
|
5859
6369
|
options,
|
|
5860
|
-
|
|
6370
|
+
cache2
|
|
5861
6371
|
);
|
|
5862
|
-
const cachedResponse = await checkCachedResource(
|
|
6372
|
+
const cachedResponse = await checkCachedResource(cache2, localPath, proposedCacheKey);
|
|
5863
6373
|
if (cachedResponse !== void 0 && typeof cachedResponse !== "string") {
|
|
5864
6374
|
const size = cachedResponse.headers.get("content-length");
|
|
5865
6375
|
const contentType = cachedResponse.headers.get("content-type");
|
|
@@ -5957,7 +6467,7 @@ function getFetchHeaders(urlOrPath) {
|
|
|
5957
6467
|
}
|
|
5958
6468
|
return headers;
|
|
5959
6469
|
}
|
|
5960
|
-
function buildResourcePaths(path_or_repo_id, filename, options = {},
|
|
6470
|
+
function buildResourcePaths(path_or_repo_id, filename, options = {}, cache2 = null) {
|
|
5961
6471
|
const revision = options.revision ?? "main";
|
|
5962
6472
|
const requestURL = pathJoin(path_or_repo_id, filename);
|
|
5963
6473
|
const validModelId = isValidHfModelId(path_or_repo_id);
|
|
@@ -5967,7 +6477,7 @@ function buildResourcePaths(path_or_repo_id, filename, options = {}, cache = nul
|
|
|
5967
6477
|
env.remotePathTemplate.replaceAll("{model}", path_or_repo_id).replaceAll("{revision}", encodeURIComponent(revision)),
|
|
5968
6478
|
filename
|
|
5969
6479
|
);
|
|
5970
|
-
const proposedCacheKey =
|
|
6480
|
+
const proposedCacheKey = cache2 instanceof FileCache ? (
|
|
5971
6481
|
// Choose cache key for filesystem cache
|
|
5972
6482
|
// When using the main revision (default), we use the request URL as the cache key.
|
|
5973
6483
|
// If a specific revision is requested, we account for this in the cache key.
|
|
@@ -5981,14 +6491,14 @@ function buildResourcePaths(path_or_repo_id, filename, options = {}, cache = nul
|
|
|
5981
6491
|
validModelId
|
|
5982
6492
|
};
|
|
5983
6493
|
}
|
|
5984
|
-
async function checkCachedResource(
|
|
5985
|
-
if (!
|
|
6494
|
+
async function checkCachedResource(cache2, localPath, proposedCacheKey) {
|
|
6495
|
+
if (!cache2) {
|
|
5986
6496
|
return void 0;
|
|
5987
6497
|
}
|
|
5988
|
-
return await tryCache(
|
|
6498
|
+
return await tryCache(cache2, localPath, proposedCacheKey);
|
|
5989
6499
|
}
|
|
5990
|
-
async function storeCachedResource(path_or_repo_id, filename,
|
|
5991
|
-
if (await
|
|
6500
|
+
async function storeCachedResource(path_or_repo_id, filename, cache2, cacheKey, response, result, options = {}) {
|
|
6501
|
+
if (await cache2.match(cacheKey) !== void 0) {
|
|
5992
6502
|
return;
|
|
5993
6503
|
}
|
|
5994
6504
|
if (!result) {
|
|
@@ -5998,14 +6508,14 @@ async function storeCachedResource(path_or_repo_id, filename, cache, cacheKey, r
|
|
|
5998
6508
|
file: filename,
|
|
5999
6509
|
...data
|
|
6000
6510
|
}) : void 0;
|
|
6001
|
-
await
|
|
6511
|
+
await cache2.put(
|
|
6002
6512
|
cacheKey,
|
|
6003
6513
|
/** @type {Response} */
|
|
6004
6514
|
response,
|
|
6005
6515
|
wrapped_progress
|
|
6006
6516
|
);
|
|
6007
6517
|
} else if (typeof response !== "string") {
|
|
6008
|
-
await
|
|
6518
|
+
await cache2.put(
|
|
6009
6519
|
cacheKey,
|
|
6010
6520
|
new Response(
|
|
6011
6521
|
/** @type {any} */
|
|
@@ -6019,17 +6529,17 @@ async function storeCachedResource(path_or_repo_id, filename, cache, cacheKey, r
|
|
|
6019
6529
|
});
|
|
6020
6530
|
}
|
|
6021
6531
|
}
|
|
6022
|
-
async function loadResourceFile(path_or_repo_id, filename, fatal = true, options = {}, return_path = false,
|
|
6532
|
+
async function loadResourceFile(path_or_repo_id, filename, fatal = true, options = {}, return_path = false, cache2 = null) {
|
|
6023
6533
|
const { requestURL, localPath, remoteURL, proposedCacheKey, validModelId } = buildResourcePaths(
|
|
6024
6534
|
path_or_repo_id,
|
|
6025
6535
|
filename,
|
|
6026
6536
|
options,
|
|
6027
|
-
|
|
6537
|
+
cache2
|
|
6028
6538
|
);
|
|
6029
6539
|
let cacheKey;
|
|
6030
6540
|
let toCacheResponse = false;
|
|
6031
6541
|
let response;
|
|
6032
|
-
response = await checkCachedResource(
|
|
6542
|
+
response = await checkCachedResource(cache2, localPath, proposedCacheKey);
|
|
6033
6543
|
const cacheHit = response !== void 0;
|
|
6034
6544
|
if (!cacheHit) {
|
|
6035
6545
|
if (env.allowLocalModels) {
|
|
@@ -6070,7 +6580,7 @@ async function loadResourceFile(path_or_repo_id, filename, fatal = true, options
|
|
|
6070
6580
|
}
|
|
6071
6581
|
cacheKey = proposedCacheKey;
|
|
6072
6582
|
}
|
|
6073
|
-
toCacheResponse =
|
|
6583
|
+
toCacheResponse = cache2 && // 1. A caching system is available
|
|
6074
6584
|
typeof Response !== "undefined" && // 2. `Response` is defined (i.e., we are in a browser-like environment)
|
|
6075
6585
|
response instanceof Response && // 3. result is a `Response` object (i.e., not a `FileResponse`)
|
|
6076
6586
|
response.status === 200;
|
|
@@ -6132,7 +6642,7 @@ async function loadResourceFile(path_or_repo_id, filename, fatal = true, options
|
|
|
6132
6642
|
// i.e., do not cache FileResponses (prevents duplication)
|
|
6133
6643
|
toCacheResponse && cacheKey && typeof response !== "string"
|
|
6134
6644
|
) {
|
|
6135
|
-
await storeCachedResource(path_or_repo_id, filename,
|
|
6645
|
+
await storeCachedResource(path_or_repo_id, filename, cache2, cacheKey, response, result, options);
|
|
6136
6646
|
}
|
|
6137
6647
|
dispatchCallback(options.progress_callback, {
|
|
6138
6648
|
status: "done",
|
|
@@ -6148,7 +6658,7 @@ async function loadResourceFile(path_or_repo_id, filename, fatal = true, options
|
|
|
6148
6658
|
if (response instanceof FileResponse) {
|
|
6149
6659
|
return response.filePath;
|
|
6150
6660
|
}
|
|
6151
|
-
const cachedResponse = await
|
|
6661
|
+
const cachedResponse = await cache2?.match(cacheKey);
|
|
6152
6662
|
if (cachedResponse instanceof FileResponse) {
|
|
6153
6663
|
return cachedResponse.filePath;
|
|
6154
6664
|
} else if (cachedResponse instanceof Response) {
|
|
@@ -6175,8 +6685,8 @@ async function getModelFile(path_or_repo_id, filename, fatal = true, options = {
|
|
|
6175
6685
|
name: path_or_repo_id,
|
|
6176
6686
|
file: filename
|
|
6177
6687
|
});
|
|
6178
|
-
const
|
|
6179
|
-
return await loadResourceFile(path_or_repo_id, filename, fatal, options, return_path,
|
|
6688
|
+
const cache2 = await getCache(options?.cache_dir);
|
|
6689
|
+
return await loadResourceFile(path_or_repo_id, filename, fatal, options, return_path, cache2);
|
|
6180
6690
|
}
|
|
6181
6691
|
async function getModelText(modelPath, fileName, fatal = true, options = {}) {
|
|
6182
6692
|
const buffer = await getModelFile(modelPath, fileName, fatal, options, false);
|
|
@@ -6973,7 +7483,7 @@ __export(onnxruntime_node_exports, {
|
|
|
6973
7483
|
});
|
|
6974
7484
|
var onnxruntime_node_default = {};
|
|
6975
7485
|
|
|
6976
|
-
// ../../node_modules/.pnpm/onnxruntime-web@1.25.0-dev.
|
|
7486
|
+
// ../../node_modules/.pnpm/onnxruntime-web@1.25.0-dev.20260307-d626b568e0/node_modules/onnxruntime-web/dist/ort.webgpu.bundle.min.mjs
|
|
6977
7487
|
var ort_webgpu_bundle_min_exports = {};
|
|
6978
7488
|
__export(ort_webgpu_bundle_min_exports, {
|
|
6979
7489
|
InferenceSession: () => Jf,
|
|
@@ -7741,7 +8251,7 @@ async function ts(a = {}) {
|
|
|
7741
8251
|
throw L(e = "Aborted(" + e + ")"), W = true, e = new WebAssembly.RuntimeError(e + ". Build with -sASSERTIONS for more info."), R?.(e), e;
|
|
7742
8252
|
}
|
|
7743
8253
|
function Ye() {
|
|
7744
|
-
return { a: { f: Vs, J: js, k: Hs, p: Ys, l: qs, ta: Js, b: Xs, ca: Qs, Ka: Sn, s: Zs, da: Ln, _a: On, Ga: Bn, Ia: Mn, $a: Cn, Ya: Un, Ra: Dn, Xa: Pn, pa: _n, Ha: Rn, Yb: Nn, Za: kn, Fa: Wn, eb: Ks, Da: ti, Tb: ri, Rb: oi, Ca: si, M: ii,
|
|
8254
|
+
return { a: { f: Vs, J: js, k: Hs, p: Ys, l: qs, ta: Js, b: Xs, ca: Qs, Ka: Sn, s: Zs, da: Ln, _a: On, Ga: Bn, Ia: Mn, $a: Cn, Ya: Un, Ra: Dn, Xa: Pn, pa: _n, Ha: Rn, Yb: Nn, Za: kn, Fa: Wn, eb: Ks, Da: ti, Tb: ri, Rb: oi, Ca: si, M: ii, I: ui, Sb: fi, ka: yi, Ub: bi, Ua: wi, Wb: Ti, La: vi, Pb: Ei, la: Si, Ta: Ar, bb: Ai, U: Oi, n: Di, c: Er, sb: Pi, w: _i, L: Ri, z: Ni, j: ki, o: Yn, tb: Wi, G: Fi, T: Gi, h: $i, u: zi, m: Vi, i: ji, Oa: Hi, Pa: Yi, Qa: qi, Ma: Qn, Na: Zn, Qb: Kn, fb: Xi, db: Ki, Y: eu, rb: tu, ma: ru, cb: Qi, gb: nu, ab: ou, Xb: au, N: Ji, hb: su, X: iu, Vb: uu, ob: bu, C: wu, sa: gu, ra: Tu, qb: vu, W: Eu, v: Su, nb: Au, mb: Iu, lb: xu, pb: Lu, kb: Ou, jb: Bu, ib: Mu, Va: ao, Wa: so, Ja: br, ea: io, oa: uo, Sa: fo, na: co, Db: Gf, xa: Df, Eb: Ff, ya: Uf, F: Ef, e: ff, r: sf, x: af, D: gf, Ib: Bf, ba: Lf, B: df, za: Mf, $: Pf, ha: Of, Fb: kf, Gb: Nf, Ba: Sf, Aa: xf, Jb: Af, wa: Wf, aa: Cf, d: cf, A: lf, q: uf, Cb: $f, t: mf, y: Tf, H: pf, E: hf, K: vf, S: _f, ja: wf, _: Rf, Kb: bf, Lb: yf, P: If2, g: Uu, a: Fe, Ob: qe, Hb: Du, ia: Pu, O: _u, qa: Ru, Mb: Nu, Q: ku, zb: Wu, Ab: Fu, ua: Gu, fa: $u, R: zu, Ea: Vu, va: ju, Z: Hu, xb: Yu, Zb: qu, V: Ju, Bb: Xu, ub: Qu, vb: Ku, wb: ef, ga: tf, yb: rf, Nb: nf } };
|
|
7745
8255
|
}
|
|
7746
8256
|
async function bt() {
|
|
7747
8257
|
function e(o, u) {
|
|
@@ -8928,7 +9438,7 @@ async function ts(a = {}) {
|
|
|
8928
9438
|
Te(`invalid type for getValue: ${t}`);
|
|
8929
9439
|
}
|
|
8930
9440
|
}, r.UTF8ToString = ct, r.stringToUTF8 = Pe, r.lengthBytesUTF8 = _e;
|
|
8931
|
-
var lo, po, Dr, Wt, xe, pt, mo, ho, yo, bo, wo, go, To, vo, Eo, So, Ao, Pr, _r, Rr, Nr, Et, kr, Io, Wr, xo, Lo, Oo, Fr, Bo, Mo, Gr, N, St, Co, D, Ft, P, Uo, $r, Do, Po, _o, zr, Ro, No, ko, Wo, Fo, Go, $o, zo, Vo, jo, Ho, Yo, qo, Jo, Xo, Qo, Zo, Ko, ea, ta, ra, na, oa, aa, sa, ia, ua, fa, ca, da, la, pa, ma, ha, ya, ba, wa, ga, Ta, ke, of = [qe, yr, En, Ln, On, Bn, Mn, Cn, Un, Dn, Pn, _n, Rn, Nn, kn, Wn, Qn, Zn, Kn, ao, so, io, uo, fo, co], Vr = {
|
|
9441
|
+
var lo, po, Dr, Wt, xe, pt, mo, ho, yo, bo, wo, go, To, vo, Eo, So, Ao, Pr, _r, Rr, Nr, Et, kr, Io, Wr, xo, Lo, Oo, Fr, Bo, Mo, Gr, N, St, Co, D, Ft, P, Uo, $r, Do, Po, _o, zr, Ro, No, ko, Wo, Fo, Go, $o, zo, Vo, jo, Ho, Yo, qo, Jo, Xo, Qo, Zo, Ko, ea, ta, ra, na, oa, aa, sa, ia, ua, fa, ca, da, la, pa, ma, ha, ya, ba, wa, ga, Ta, ke, of = [qe, yr, En, Ln, On, Bn, Mn, Cn, Un, Dn, Pn, _n, Rn, Nn, kn, Wn, Qn, Zn, Kn, ao, so, io, uo, fo, co], Vr = { 925676: (e, t, n, o, u) => {
|
|
8932
9442
|
if (r === void 0 || !r.Uc) return 1;
|
|
8933
9443
|
if ((e = ct(Number(e >>> 0))).startsWith("./") && (e = e.substring(2)), !(e = r.Uc.get(e))) return 2;
|
|
8934
9444
|
if (t = Number(t >>> 0), n = Number(n >>> 0), o = Number(o >>> 0), t + n > e.byteLength) return 3;
|
|
@@ -8948,11 +9458,11 @@ async function ts(a = {}) {
|
|
|
8948
9458
|
} catch {
|
|
8949
9459
|
return 4;
|
|
8950
9460
|
}
|
|
8951
|
-
},
|
|
9461
|
+
}, 926500: (e, t, n) => {
|
|
8952
9462
|
r.Sd(e, (p(), J).subarray(t >>> 0, t + n >>> 0));
|
|
8953
|
-
},
|
|
9463
|
+
}, 926564: () => r.me(), 926606: (e) => {
|
|
8954
9464
|
r.jd(e);
|
|
8955
|
-
},
|
|
9465
|
+
}, 926643: () => typeof wasmOffsetConverter < "u" };
|
|
8956
9466
|
function af(e, t, n, o) {
|
|
8957
9467
|
var u = P();
|
|
8958
9468
|
try {
|
|
@@ -10868,7 +11378,7 @@ var $s = k(() => {
|
|
|
10868
11378
|
Ve();
|
|
10869
11379
|
Ve();
|
|
10870
11380
|
Ve();
|
|
10871
|
-
var Xa = "1.25.0-dev.
|
|
11381
|
+
var Xa = "1.25.0-dev.20260307-d626b568e0";
|
|
10872
11382
|
var Tl = Zr;
|
|
10873
11383
|
{
|
|
10874
11384
|
let a = ($s(), $t(Gs)).wasmBackend;
|
|
@@ -10879,11 +11389,11 @@ Object.defineProperty(K.versions, "web", { value: Xa, enumerable: true });
|
|
|
10879
11389
|
// src/backends/utils/cacheWasm.js
|
|
10880
11390
|
async function loadAndCacheFile(url) {
|
|
10881
11391
|
const fileName = url.split("/").pop();
|
|
10882
|
-
let
|
|
11392
|
+
let cache2;
|
|
10883
11393
|
try {
|
|
10884
|
-
|
|
10885
|
-
if (
|
|
10886
|
-
const result = await
|
|
11394
|
+
cache2 = await getCache();
|
|
11395
|
+
if (cache2) {
|
|
11396
|
+
const result = await cache2.match(url);
|
|
10887
11397
|
if (result) {
|
|
10888
11398
|
return result;
|
|
10889
11399
|
}
|
|
@@ -10895,9 +11405,9 @@ async function loadAndCacheFile(url) {
|
|
|
10895
11405
|
if (!response.ok) {
|
|
10896
11406
|
throw new Error(`Failed to fetch ${fileName}: ${response.status} ${response.statusText}`);
|
|
10897
11407
|
}
|
|
10898
|
-
if (
|
|
11408
|
+
if (cache2) {
|
|
10899
11409
|
try {
|
|
10900
|
-
await
|
|
11410
|
+
await cache2.put(url, response.clone());
|
|
10901
11411
|
} catch (e) {
|
|
10902
11412
|
logger.warn(`Failed to cache ${fileName}:`, e);
|
|
10903
11413
|
}
|
|
@@ -10915,39 +11425,26 @@ async function loadWasmBinary(wasmURL) {
|
|
|
10915
11425
|
}
|
|
10916
11426
|
}
|
|
10917
11427
|
async function loadWasmFactory(libURL) {
|
|
11428
|
+
if (apis.IS_SERVICE_WORKER_ENV || apis.IS_CHROME_AVAILABLE) {
|
|
11429
|
+
return libURL;
|
|
11430
|
+
}
|
|
10918
11431
|
const response = await loadAndCacheFile(libURL);
|
|
10919
11432
|
if (!response || typeof response === "string") return null;
|
|
10920
11433
|
try {
|
|
10921
11434
|
let code = await response.text();
|
|
10922
|
-
const baseUrl = libURL.split("/").slice(0, -1).join("/");
|
|
10923
|
-
code = code.replaceAll("import.meta.url", `"${baseUrl}"`);
|
|
10924
11435
|
code = code.replaceAll("globalThis.process?.versions?.node", "false");
|
|
10925
11436
|
const blob = new Blob([code], { type: "text/javascript" });
|
|
10926
11437
|
return URL.createObjectURL(blob);
|
|
10927
11438
|
} catch (error) {
|
|
10928
|
-
logger.warn("Failed to read WASM
|
|
11439
|
+
logger.warn("Failed to read WASM factory:", error);
|
|
10929
11440
|
return null;
|
|
10930
11441
|
}
|
|
10931
11442
|
}
|
|
10932
|
-
function isBlobURL(url) {
|
|
10933
|
-
return isValidUrl(url, ["blob:"]);
|
|
10934
|
-
}
|
|
10935
|
-
function toAbsoluteURL(url) {
|
|
10936
|
-
let baseURL;
|
|
10937
|
-
if (typeof location !== "undefined" && location.href) {
|
|
10938
|
-
baseURL = location.href;
|
|
10939
|
-
} else if (typeof import.meta !== "undefined" && import.meta.url) {
|
|
10940
|
-
baseURL = import.meta.url;
|
|
10941
|
-
} else {
|
|
10942
|
-
return url;
|
|
10943
|
-
}
|
|
10944
|
-
return new URL(url, baseURL).href;
|
|
10945
|
-
}
|
|
10946
11443
|
|
|
10947
|
-
// ../../node_modules/.pnpm/onnxruntime-common@1.24.
|
|
10948
|
-
var version = "1.24.
|
|
11444
|
+
// ../../node_modules/.pnpm/onnxruntime-common@1.24.3/node_modules/onnxruntime-common/dist/esm/version.js
|
|
11445
|
+
var version = "1.24.3";
|
|
10949
11446
|
|
|
10950
|
-
// ../../node_modules/.pnpm/onnxruntime-common@1.24.
|
|
11447
|
+
// ../../node_modules/.pnpm/onnxruntime-common@1.24.3/node_modules/onnxruntime-common/dist/esm/env-impl.js
|
|
10951
11448
|
var logLevelValue = "warning";
|
|
10952
11449
|
var env2 = {
|
|
10953
11450
|
wasm: {},
|
|
@@ -10969,7 +11466,7 @@ var env2 = {
|
|
|
10969
11466
|
};
|
|
10970
11467
|
Object.defineProperty(env2, "logLevel", { enumerable: true });
|
|
10971
11468
|
|
|
10972
|
-
// ../../node_modules/.pnpm/onnxruntime-common@1.24.
|
|
11469
|
+
// ../../node_modules/.pnpm/onnxruntime-common@1.24.3/node_modules/onnxruntime-common/dist/esm/tensor-conversion-impl.js
|
|
10973
11470
|
var tensorToDataURL = (tensor, options) => {
|
|
10974
11471
|
const canvas = typeof document !== "undefined" ? document.createElement("canvas") : new OffscreenCanvas(1, 1);
|
|
10975
11472
|
canvas.width = tensor.dims[3];
|
|
@@ -11128,7 +11625,7 @@ var tensorToImageData = (tensor, options) => {
|
|
|
11128
11625
|
return image;
|
|
11129
11626
|
};
|
|
11130
11627
|
|
|
11131
|
-
// ../../node_modules/.pnpm/onnxruntime-common@1.24.
|
|
11628
|
+
// ../../node_modules/.pnpm/onnxruntime-common@1.24.3/node_modules/onnxruntime-common/dist/esm/tensor-factory-impl.js
|
|
11132
11629
|
var bufferToTensor = (buffer, options) => {
|
|
11133
11630
|
if (buffer === void 0) {
|
|
11134
11631
|
throw new Error("Image buffer must be defined");
|
|
@@ -11337,7 +11834,7 @@ var tensorFromMLTensor = (mlTensor, options) => {
|
|
|
11337
11834
|
};
|
|
11338
11835
|
var tensorFromPinnedBuffer = (type, buffer, dims) => new Tensor({ location: "cpu-pinned", type, data: buffer, dims: dims ?? [buffer.length] });
|
|
11339
11836
|
|
|
11340
|
-
// ../../node_modules/.pnpm/onnxruntime-common@1.24.
|
|
11837
|
+
// ../../node_modules/.pnpm/onnxruntime-common@1.24.3/node_modules/onnxruntime-common/dist/esm/tensor-impl-type-mapping.js
|
|
11341
11838
|
var NUMERIC_TENSOR_TYPE_TO_TYPEDARRAY_MAP = /* @__PURE__ */ new Map([
|
|
11342
11839
|
["float32", Float32Array],
|
|
11343
11840
|
["uint8", Uint8Array],
|
|
@@ -11386,7 +11883,7 @@ var checkTypedArray = () => {
|
|
|
11386
11883
|
}
|
|
11387
11884
|
};
|
|
11388
11885
|
|
|
11389
|
-
// ../../node_modules/.pnpm/onnxruntime-common@1.24.
|
|
11886
|
+
// ../../node_modules/.pnpm/onnxruntime-common@1.24.3/node_modules/onnxruntime-common/dist/esm/tensor-utils-impl.js
|
|
11390
11887
|
var calculateSize = (dims) => {
|
|
11391
11888
|
let size = 1;
|
|
11392
11889
|
for (let i = 0; i < dims.length; i++) {
|
|
@@ -11438,7 +11935,7 @@ var tensorReshape = (tensor, dims) => {
|
|
|
11438
11935
|
}
|
|
11439
11936
|
};
|
|
11440
11937
|
|
|
11441
|
-
// ../../node_modules/.pnpm/onnxruntime-common@1.24.
|
|
11938
|
+
// ../../node_modules/.pnpm/onnxruntime-common@1.24.3/node_modules/onnxruntime-common/dist/esm/tensor-impl.js
|
|
11442
11939
|
var Tensor = class {
|
|
11443
11940
|
/**
|
|
11444
11941
|
* implementation.
|
|
@@ -11705,7 +12202,7 @@ var Tensor = class {
|
|
|
11705
12202
|
}
|
|
11706
12203
|
};
|
|
11707
12204
|
|
|
11708
|
-
// ../../node_modules/.pnpm/onnxruntime-common@1.24.
|
|
12205
|
+
// ../../node_modules/.pnpm/onnxruntime-common@1.24.3/node_modules/onnxruntime-common/dist/esm/tensor.js
|
|
11709
12206
|
var Tensor2 = Tensor;
|
|
11710
12207
|
|
|
11711
12208
|
// src/backends/onnx.js
|
|
@@ -11804,7 +12301,6 @@ function deviceToExecutionProviders(device = null) {
|
|
|
11804
12301
|
}
|
|
11805
12302
|
throw new Error(`Unsupported device: "${device}". Should be one of: ${supportedDevices.join(", ")}.`);
|
|
11806
12303
|
}
|
|
11807
|
-
var IS_WEB_ENV = apis.IS_BROWSER_ENV || apis.IS_WEBWORKER_ENV;
|
|
11808
12304
|
var webInitChain = Promise.resolve();
|
|
11809
12305
|
var wasmLoadPromise = null;
|
|
11810
12306
|
async function ensureWasmLoaded() {
|
|
@@ -11813,6 +12309,11 @@ async function ensureWasmLoaded() {
|
|
|
11813
12309
|
}
|
|
11814
12310
|
const shouldUseWasmCache = env.useWasmCache && typeof ONNX_ENV?.wasm?.wasmPaths === "object" && ONNX_ENV?.wasm?.wasmPaths?.wasm && ONNX_ENV?.wasm?.wasmPaths?.mjs;
|
|
11815
12311
|
if (!shouldUseWasmCache) {
|
|
12312
|
+
if (apis.IS_DENO_WEB_RUNTIME) {
|
|
12313
|
+
throw new Error(
|
|
12314
|
+
"env.useWasmCache=false is not supported in Deno's web runtime. Remove the useWasmCache override."
|
|
12315
|
+
);
|
|
12316
|
+
}
|
|
11816
12317
|
wasmLoadPromise = Promise.resolve();
|
|
11817
12318
|
return wasmLoadPromise;
|
|
11818
12319
|
}
|
|
@@ -11821,6 +12322,7 @@ async function ensureWasmLoaded() {
|
|
|
11821
12322
|
/** @type {{ wasm: string, mjs: string }} */
|
|
11822
12323
|
ONNX_ENV.wasm.wasmPaths
|
|
11823
12324
|
);
|
|
12325
|
+
let wasmBinaryLoaded = false;
|
|
11824
12326
|
await Promise.all([
|
|
11825
12327
|
// Load and cache the WASM binary
|
|
11826
12328
|
urls.wasm && !isBlobURL(urls.wasm) ? (async () => {
|
|
@@ -11828,12 +12330,13 @@ async function ensureWasmLoaded() {
|
|
|
11828
12330
|
const wasmBinary = await loadWasmBinary(toAbsoluteURL(urls.wasm));
|
|
11829
12331
|
if (wasmBinary) {
|
|
11830
12332
|
ONNX_ENV.wasm.wasmBinary = wasmBinary;
|
|
12333
|
+
wasmBinaryLoaded = true;
|
|
11831
12334
|
}
|
|
11832
12335
|
} catch (err) {
|
|
11833
12336
|
logger.warn("Failed to pre-load WASM binary:", err);
|
|
11834
12337
|
}
|
|
11835
12338
|
})() : Promise.resolve(),
|
|
11836
|
-
// Load and cache the WASM factory
|
|
12339
|
+
// Load and cache the WASM factory as a blob URL
|
|
11837
12340
|
urls.mjs && !isBlobURL(urls.mjs) ? (async () => {
|
|
11838
12341
|
try {
|
|
11839
12342
|
const wasmFactoryBlob = await loadWasmFactory(toAbsoluteURL(urls.mjs));
|
|
@@ -11845,6 +12348,9 @@ async function ensureWasmLoaded() {
|
|
|
11845
12348
|
}
|
|
11846
12349
|
})() : Promise.resolve()
|
|
11847
12350
|
]);
|
|
12351
|
+
if (!wasmBinaryLoaded) {
|
|
12352
|
+
ONNX_ENV.wasm.wasmPaths.mjs = urls.mjs;
|
|
12353
|
+
}
|
|
11848
12354
|
})();
|
|
11849
12355
|
return wasmLoadPromise;
|
|
11850
12356
|
}
|
|
@@ -11856,51 +12362,52 @@ async function createInferenceSession(buffer_or_path, session_options, session_c
|
|
|
11856
12362
|
logSeverityLevel,
|
|
11857
12363
|
...session_options
|
|
11858
12364
|
});
|
|
11859
|
-
const session = await (IS_WEB_ENV ? webInitChain = webInitChain.then(load) : load());
|
|
12365
|
+
const session = await (apis.IS_WEB_ENV ? webInitChain = webInitChain.then(load) : load());
|
|
11860
12366
|
session.config = session_config;
|
|
11861
12367
|
return session;
|
|
11862
12368
|
}
|
|
11863
12369
|
var webInferenceChain = Promise.resolve();
|
|
11864
12370
|
async function runInferenceSession(session, ortFeed) {
|
|
11865
12371
|
const run = () => session.run(ortFeed);
|
|
11866
|
-
|
|
11867
|
-
return output;
|
|
12372
|
+
return apis.IS_WEB_ENV ? webInferenceChain = webInferenceChain.then(run) : run();
|
|
11868
12373
|
}
|
|
11869
12374
|
function isONNXTensor(x) {
|
|
11870
12375
|
return x instanceof ONNX.Tensor;
|
|
11871
12376
|
}
|
|
11872
12377
|
var ONNX_ENV = ONNX?.env;
|
|
11873
|
-
if (ONNX_ENV?.wasm) {
|
|
11874
|
-
if (
|
|
11875
|
-
// @ts-ignore Cannot find name 'ServiceWorkerGlobalScope'.ts(2304)
|
|
11876
|
-
!(typeof ServiceWorkerGlobalScope !== "undefined" && self instanceof ServiceWorkerGlobalScope) && ONNX_ENV.versions?.web && !ONNX_ENV.wasm.wasmPaths
|
|
11877
|
-
) {
|
|
11878
|
-
const wasmPathPrefix = `https://cdn.jsdelivr.net/npm/onnxruntime-web@${ONNX_ENV.versions.web}/dist/`;
|
|
11879
|
-
ONNX_ENV.wasm.wasmPaths = apis.IS_SAFARI ? {
|
|
11880
|
-
mjs: `${wasmPathPrefix}ort-wasm-simd-threaded.mjs`,
|
|
11881
|
-
wasm: `${wasmPathPrefix}ort-wasm-simd-threaded.wasm`
|
|
11882
|
-
} : {
|
|
11883
|
-
mjs: `${wasmPathPrefix}ort-wasm-simd-threaded.asyncify.mjs`,
|
|
11884
|
-
wasm: `${wasmPathPrefix}ort-wasm-simd-threaded.asyncify.wasm`
|
|
11885
|
-
};
|
|
11886
|
-
}
|
|
11887
|
-
ONNX_ENV.wasm.proxy = false;
|
|
11888
|
-
}
|
|
11889
|
-
if (ONNX_ENV?.webgpu) {
|
|
11890
|
-
ONNX_ENV.webgpu.powerPreference = "high-performance";
|
|
11891
|
-
}
|
|
11892
12378
|
function isONNXProxy() {
|
|
11893
12379
|
return ONNX_ENV?.wasm?.proxy;
|
|
11894
12380
|
}
|
|
11895
|
-
|
|
11896
|
-
|
|
11897
|
-
|
|
12381
|
+
if (ONNX_ENV) {
|
|
12382
|
+
let setLogLevel = function(logLevel2) {
|
|
12383
|
+
const severityLevel = getOnnxLogSeverityLevel(logLevel2);
|
|
12384
|
+
ONNX_ENV.logLevel = ONNX_LOG_LEVEL_NAMES[severityLevel];
|
|
12385
|
+
};
|
|
12386
|
+
if (ONNX_ENV.wasm) {
|
|
12387
|
+
if (
|
|
12388
|
+
// @ts-ignore Cannot find name 'ServiceWorkerGlobalScope'.ts(2304)
|
|
12389
|
+
!(typeof ServiceWorkerGlobalScope !== "undefined" && self instanceof ServiceWorkerGlobalScope) && ONNX_ENV.versions?.web && !ONNX_ENV.wasm.wasmPaths
|
|
12390
|
+
) {
|
|
12391
|
+
const wasmPathPrefix = `https://cdn.jsdelivr.net/npm/onnxruntime-web@${ONNX_ENV.versions.web}/dist/`;
|
|
12392
|
+
ONNX_ENV.wasm.wasmPaths = apis.IS_SAFARI ? {
|
|
12393
|
+
mjs: `${wasmPathPrefix}ort-wasm-simd-threaded.mjs`,
|
|
12394
|
+
wasm: `${wasmPathPrefix}ort-wasm-simd-threaded.wasm`
|
|
12395
|
+
} : {
|
|
12396
|
+
mjs: `${wasmPathPrefix}ort-wasm-simd-threaded.asyncify.mjs`,
|
|
12397
|
+
wasm: `${wasmPathPrefix}ort-wasm-simd-threaded.asyncify.wasm`
|
|
12398
|
+
};
|
|
12399
|
+
}
|
|
12400
|
+
ONNX_ENV.wasm.proxy = false;
|
|
12401
|
+
}
|
|
12402
|
+
if (ONNX_ENV.webgpu) {
|
|
12403
|
+
ONNX_ENV.webgpu.powerPreference = "high-performance";
|
|
12404
|
+
}
|
|
12405
|
+
setLogLevel(env.logLevel ?? LogLevel.WARNING);
|
|
12406
|
+
env.backends.onnx = {
|
|
12407
|
+
...ONNX_ENV,
|
|
12408
|
+
setLogLevel
|
|
12409
|
+
};
|
|
11898
12410
|
}
|
|
11899
|
-
setLogLevel(env.logLevel ?? LogLevel.WARNING);
|
|
11900
|
-
env.backends.onnx = {
|
|
11901
|
-
...ONNX_ENV,
|
|
11902
|
-
setLogLevel
|
|
11903
|
-
};
|
|
11904
12411
|
|
|
11905
12412
|
// src/ops/registry.js
|
|
11906
12413
|
var wrap = async (session_bytes, session_options, names) => {
|
|
@@ -13105,192 +13612,31 @@ var DataTypeMap = Object.freeze({
|
|
|
13105
13612
|
int4: Int8Array
|
|
13106
13613
|
});
|
|
13107
13614
|
|
|
13108
|
-
// src/utils/
|
|
13109
|
-
var
|
|
13110
|
-
constructor(seed) {
|
|
13111
|
-
this._mt = new Uint32Array(624);
|
|
13112
|
-
this._idx = 625;
|
|
13113
|
-
this._gauss_next = null;
|
|
13114
|
-
this._random_fn = this.random.bind(this);
|
|
13115
|
-
this.seed(seed);
|
|
13116
|
-
}
|
|
13615
|
+
// src/utils/tensor.js
|
|
13616
|
+
var Tensor3 = class _Tensor {
|
|
13117
13617
|
/**
|
|
13118
|
-
*
|
|
13119
|
-
*
|
|
13120
|
-
* When called with a number, initializes the state deterministically from that value.
|
|
13121
|
-
* When called with no arguments (or `undefined`/`null`), seeds from OS entropy
|
|
13122
|
-
* via `crypto.getRandomValues`, matching Python's `random.seed()` behaviour.
|
|
13123
|
-
*
|
|
13124
|
-
* @param {number} [n] The seed value. Omit to seed from OS entropy.
|
|
13618
|
+
* Dimensions of the tensor.
|
|
13619
|
+
* @type {number[]}
|
|
13125
13620
|
*/
|
|
13126
|
-
|
|
13127
|
-
|
|
13128
|
-
|
|
13129
|
-
|
|
13130
|
-
|
|
13131
|
-
n = buf[0];
|
|
13132
|
-
} else {
|
|
13133
|
-
n = Date.now() >>> 0;
|
|
13134
|
-
}
|
|
13135
|
-
}
|
|
13136
|
-
const mt2 = this._mt;
|
|
13137
|
-
const u = (a, b) => Math.imul(a, b) >>> 0, key = [];
|
|
13138
|
-
for (let v = n || 0; v > 0; v = Math.floor(v / 4294967296)) key.push(v & 4294967295);
|
|
13139
|
-
if (!key.length) key.push(0);
|
|
13140
|
-
mt2[0] = 19650218;
|
|
13141
|
-
for (let k2 = 1; k2 < 624; ++k2) mt2[k2] = u(1812433253, mt2[k2 - 1] ^ mt2[k2 - 1] >>> 30) + k2 >>> 0;
|
|
13142
|
-
let i = 1, j = 0;
|
|
13143
|
-
for (let k2 = Math.max(624, key.length); k2 > 0; --k2, ++i, ++j) {
|
|
13144
|
-
if (i >= 624) {
|
|
13145
|
-
mt2[0] = mt2[623];
|
|
13146
|
-
i = 1;
|
|
13147
|
-
}
|
|
13148
|
-
if (j >= key.length) j = 0;
|
|
13149
|
-
mt2[i] = (mt2[i] ^ u(mt2[i - 1] ^ mt2[i - 1] >>> 30, 1664525)) + key[j] + j >>> 0;
|
|
13150
|
-
}
|
|
13151
|
-
for (let k2 = 623; k2 > 0; --k2, ++i) {
|
|
13152
|
-
if (i >= 624) {
|
|
13153
|
-
mt2[0] = mt2[623];
|
|
13154
|
-
i = 1;
|
|
13155
|
-
}
|
|
13156
|
-
mt2[i] = (mt2[i] ^ u(mt2[i - 1] ^ mt2[i - 1] >>> 30, 1566083941)) - i >>> 0;
|
|
13157
|
-
}
|
|
13158
|
-
mt2[0] = 2147483648;
|
|
13159
|
-
this._idx = 624;
|
|
13160
|
-
this._gauss_next = null;
|
|
13621
|
+
get dims() {
|
|
13622
|
+
return this.ort_tensor.dims;
|
|
13623
|
+
}
|
|
13624
|
+
set dims(value) {
|
|
13625
|
+
this.ort_tensor.dims = value;
|
|
13161
13626
|
}
|
|
13162
13627
|
/**
|
|
13163
|
-
*
|
|
13164
|
-
*
|
|
13165
|
-
* Performs the "twist" step when the state buffer is exhausted,
|
|
13166
|
-
* then applies the standard MT19937 tempering transform.
|
|
13167
|
-
*
|
|
13168
|
-
* @returns {number} A random integer in the range [0, 2^32 - 1].
|
|
13628
|
+
* Type of the tensor.
|
|
13629
|
+
* @type {DataType}
|
|
13169
13630
|
*/
|
|
13170
|
-
|
|
13171
|
-
|
|
13172
|
-
if (this._idx >= 624) {
|
|
13173
|
-
for (let k2 = 0; k2 < 624; ++k2) {
|
|
13174
|
-
const y2 = mt2[k2] & 2147483648 | mt2[(k2 + 1) % 624] & 2147483647;
|
|
13175
|
-
mt2[k2] = (mt2[(k2 + 397) % 624] ^ y2 >>> 1 ^ (y2 & 1 ? 2567483615 : 0)) >>> 0;
|
|
13176
|
-
}
|
|
13177
|
-
this._idx = 0;
|
|
13178
|
-
}
|
|
13179
|
-
let y = mt2[this._idx++];
|
|
13180
|
-
y ^= y >>> 11;
|
|
13181
|
-
y ^= y << 7 & 2636928640;
|
|
13182
|
-
y ^= y << 15 & 4022730752;
|
|
13183
|
-
y ^= y >>> 18;
|
|
13184
|
-
return y >>> 0;
|
|
13631
|
+
get type() {
|
|
13632
|
+
return this.ort_tensor.type;
|
|
13185
13633
|
}
|
|
13186
13634
|
/**
|
|
13187
|
-
*
|
|
13188
|
-
*
|
|
13189
|
-
* Combines two 32-bit integers (using 53 bits of precision) to produce
|
|
13190
|
-
* a uniformly distributed double, matching Python's `random.random()`.
|
|
13191
|
-
*
|
|
13192
|
-
* @returns {number} A random float in [0, 1).
|
|
13635
|
+
* The data stored in the tensor.
|
|
13636
|
+
* @type {DataArray}
|
|
13193
13637
|
*/
|
|
13194
|
-
|
|
13195
|
-
return
|
|
13196
|
-
}
|
|
13197
|
-
/**
|
|
13198
|
-
* Generates a random number from a Gaussian (normal) distribution.
|
|
13199
|
-
*
|
|
13200
|
-
* Uses the Box-Muller transform with a cached spare value,
|
|
13201
|
-
* matching Python's `random.gauss()` output for the same seed.
|
|
13202
|
-
*
|
|
13203
|
-
* @param {number} [mu=0] The mean of the distribution.
|
|
13204
|
-
* @param {number} [sigma=1] The standard deviation of the distribution.
|
|
13205
|
-
* @returns {number} A normally distributed random value.
|
|
13206
|
-
*/
|
|
13207
|
-
gauss(mu = 0, sigma = 1) {
|
|
13208
|
-
let z = this._gauss_next;
|
|
13209
|
-
this._gauss_next = null;
|
|
13210
|
-
if (z === null) {
|
|
13211
|
-
const x2pi = this.random() * 2 * Math.PI, g2rad = Math.sqrt(-2 * Math.log(1 - this.random()));
|
|
13212
|
-
z = Math.cos(x2pi) * g2rad;
|
|
13213
|
-
this._gauss_next = Math.sin(x2pi) * g2rad;
|
|
13214
|
-
}
|
|
13215
|
-
return mu + z * sigma;
|
|
13216
|
-
}
|
|
13217
|
-
/**
|
|
13218
|
-
* Shuffles an array in-place using the Fisher-Yates algorithm.
|
|
13219
|
-
*
|
|
13220
|
-
* Uses rejection sampling via `getrandbits`-style bit masking to ensure
|
|
13221
|
-
* a uniform distribution, matching Python's `random.shuffle()`.
|
|
13222
|
-
*
|
|
13223
|
-
* @param {any[]} arr The array to shuffle in-place.
|
|
13224
|
-
*/
|
|
13225
|
-
shuffle(arr) {
|
|
13226
|
-
for (let i = arr.length - 1; i > 0; --i) {
|
|
13227
|
-
const k2 = 32 - Math.clz32(i + 1);
|
|
13228
|
-
let r = this._int32() >>> 32 - k2;
|
|
13229
|
-
while (r > i) r = this._int32() >>> 32 - k2;
|
|
13230
|
-
const t = arr[i];
|
|
13231
|
-
arr[i] = arr[r];
|
|
13232
|
-
arr[r] = t;
|
|
13233
|
-
}
|
|
13234
|
-
}
|
|
13235
|
-
/**
|
|
13236
|
-
* Selects a single element from a weighted population.
|
|
13237
|
-
*
|
|
13238
|
-
* Matches Python's `random.choices(population, weights=weights, k=1)[0]`
|
|
13239
|
-
*
|
|
13240
|
-
* @param {any[]} population The array of items to choose from.
|
|
13241
|
-
* @param {number[]} weights An array of non-negative weights, one per population element.
|
|
13242
|
-
* @returns {*} A single randomly selected element from the population.
|
|
13243
|
-
*/
|
|
13244
|
-
choices(population, weights) {
|
|
13245
|
-
return population[_weightedIndexWith(this._random_fn, weights)];
|
|
13246
|
-
}
|
|
13247
|
-
};
|
|
13248
|
-
function _weightedIndexWith(randomFn, weights) {
|
|
13249
|
-
let sum = 0;
|
|
13250
|
-
for (let i = 0; i < weights.length; ++i) sum += weights[i];
|
|
13251
|
-
let x = randomFn() * sum;
|
|
13252
|
-
for (let i = 0; i < weights.length; ++i) {
|
|
13253
|
-
x -= weights[i];
|
|
13254
|
-
if (x < 0) return i;
|
|
13255
|
-
}
|
|
13256
|
-
return weights.length - 1;
|
|
13257
|
-
}
|
|
13258
|
-
var _default = new Random();
|
|
13259
|
-
var random = Object.freeze({
|
|
13260
|
-
Random,
|
|
13261
|
-
seed: _default.seed.bind(_default),
|
|
13262
|
-
random: _default.random.bind(_default),
|
|
13263
|
-
gauss: _default.gauss.bind(_default),
|
|
13264
|
-
shuffle: _default.shuffle.bind(_default),
|
|
13265
|
-
choices: _default.choices.bind(_default)
|
|
13266
|
-
});
|
|
13267
|
-
var _weightedIndex = (weights) => _weightedIndexWith(random.random, weights);
|
|
13268
|
-
|
|
13269
|
-
// src/utils/tensor.js
|
|
13270
|
-
var Tensor3 = class _Tensor {
|
|
13271
|
-
/**
|
|
13272
|
-
* Dimensions of the tensor.
|
|
13273
|
-
* @type {number[]}
|
|
13274
|
-
*/
|
|
13275
|
-
get dims() {
|
|
13276
|
-
return this.ort_tensor.dims;
|
|
13277
|
-
}
|
|
13278
|
-
set dims(value) {
|
|
13279
|
-
this.ort_tensor.dims = value;
|
|
13280
|
-
}
|
|
13281
|
-
/**
|
|
13282
|
-
* Type of the tensor.
|
|
13283
|
-
* @type {DataType}
|
|
13284
|
-
*/
|
|
13285
|
-
get type() {
|
|
13286
|
-
return this.ort_tensor.type;
|
|
13287
|
-
}
|
|
13288
|
-
/**
|
|
13289
|
-
* The data stored in the tensor.
|
|
13290
|
-
* @type {DataArray}
|
|
13291
|
-
*/
|
|
13292
|
-
get data() {
|
|
13293
|
-
return this.ort_tensor.data;
|
|
13638
|
+
get data() {
|
|
13639
|
+
return this.ort_tensor.data;
|
|
13294
13640
|
}
|
|
13295
13641
|
/**
|
|
13296
13642
|
* The number of elements in the tensor.
|
|
@@ -13676,9 +14022,23 @@ var Tensor3 = class _Tensor {
|
|
|
13676
14022
|
throw Error(`Unsupported norm: ${p}`);
|
|
13677
14023
|
}
|
|
13678
14024
|
const this_data = this.data;
|
|
13679
|
-
const
|
|
14025
|
+
const is_bigint = this_data instanceof BigInt64Array || this_data instanceof BigUint64Array;
|
|
14026
|
+
if (is_bigint && p !== 1) {
|
|
14027
|
+
throw Error(`Expected a floating point tensor as input. Got ${this.type}`);
|
|
14028
|
+
}
|
|
14029
|
+
let fn2, zero;
|
|
14030
|
+
if (is_bigint) {
|
|
14031
|
+
fn2 = (a, b) => a + b;
|
|
14032
|
+
zero = 0n;
|
|
14033
|
+
} else {
|
|
14034
|
+
fn2 = (a, b) => a + b ** p;
|
|
14035
|
+
zero = 0;
|
|
14036
|
+
}
|
|
13680
14037
|
if (dim === null) {
|
|
13681
|
-
|
|
14038
|
+
let val = this_data.reduce(fn2, zero);
|
|
14039
|
+
if (p !== 1) {
|
|
14040
|
+
val = val ** (1 / p);
|
|
14041
|
+
}
|
|
13682
14042
|
return new _Tensor(this.type, [val], []);
|
|
13683
14043
|
}
|
|
13684
14044
|
const [type, result, resultDims] = reduce_helper(fn2, this, dim, keepdim);
|
|
@@ -16138,9 +16498,11 @@ __export(processors_exports, {
|
|
|
16138
16498
|
ChatterboxProcessor: () => ChatterboxProcessor,
|
|
16139
16499
|
Florence2Processor: () => Florence2Processor,
|
|
16140
16500
|
Gemma3nProcessor: () => Gemma3nProcessor,
|
|
16501
|
+
GraniteSpeechProcessor: () => GraniteSpeechProcessor,
|
|
16141
16502
|
GroundingDinoProcessor: () => GroundingDinoProcessor,
|
|
16142
16503
|
Idefics3Processor: () => Idefics3Processor,
|
|
16143
16504
|
JinaCLIPProcessor: () => JinaCLIPProcessor,
|
|
16505
|
+
Lfm2VlProcessor: () => Lfm2VlProcessor,
|
|
16144
16506
|
LlavaProcessor: () => LlavaProcessor,
|
|
16145
16507
|
MgpstrProcessor: () => MgpstrProcessor,
|
|
16146
16508
|
MoonshineProcessor: () => MoonshineProcessor,
|
|
@@ -16161,6 +16523,7 @@ __export(processors_exports, {
|
|
|
16161
16523
|
UltravoxProcessor: () => UltravoxProcessor,
|
|
16162
16524
|
VLChatProcessor: () => VLChatProcessor,
|
|
16163
16525
|
VoxtralProcessor: () => VoxtralProcessor,
|
|
16526
|
+
VoxtralRealtimeProcessor: () => VoxtralRealtimeProcessor,
|
|
16164
16527
|
Wav2Vec2Processor: () => Wav2Vec2Processor,
|
|
16165
16528
|
Wav2Vec2ProcessorWithLM: () => Wav2Vec2ProcessorWithLM,
|
|
16166
16529
|
WhisperProcessor: () => WhisperProcessor
|
|
@@ -16215,12 +16578,14 @@ __export(feature_extractors_exports, {
|
|
|
16215
16578
|
EncodecFeatureExtractor: () => EncodecFeatureExtractor,
|
|
16216
16579
|
FeatureExtractor: () => FeatureExtractor,
|
|
16217
16580
|
Gemma3nAudioFeatureExtractor: () => Gemma3nAudioFeatureExtractor,
|
|
16581
|
+
GraniteSpeechFeatureExtractor: () => GraniteSpeechFeatureExtractor,
|
|
16218
16582
|
MoonshineFeatureExtractor: () => MoonshineFeatureExtractor,
|
|
16219
16583
|
ParakeetFeatureExtractor: () => ParakeetFeatureExtractor,
|
|
16220
16584
|
PyAnnoteFeatureExtractor: () => PyAnnoteFeatureExtractor,
|
|
16221
16585
|
SeamlessM4TFeatureExtractor: () => SeamlessM4TFeatureExtractor,
|
|
16222
16586
|
SnacFeatureExtractor: () => SnacFeatureExtractor,
|
|
16223
16587
|
SpeechT5FeatureExtractor: () => SpeechT5FeatureExtractor,
|
|
16588
|
+
VoxtralRealtimeFeatureExtractor: () => VoxtralRealtimeFeatureExtractor,
|
|
16224
16589
|
Wav2Vec2FeatureExtractor: () => Wav2Vec2FeatureExtractor,
|
|
16225
16590
|
WeSpeakerFeatureExtractor: () => WeSpeakerFeatureExtractor,
|
|
16226
16591
|
WhisperFeatureExtractor: () => WhisperFeatureExtractor
|
|
@@ -16455,6 +16820,7 @@ async function spectrogram(waveform, window2, frame_length, hop_length, {
|
|
|
16455
16820
|
mel_filters = null,
|
|
16456
16821
|
mel_floor = 1e-10,
|
|
16457
16822
|
log_mel = null,
|
|
16823
|
+
max_log_mel = null,
|
|
16458
16824
|
reference = 1,
|
|
16459
16825
|
min_value = 1e-10,
|
|
16460
16826
|
db_range = null,
|
|
@@ -16594,6 +16960,17 @@ async function spectrogram(waveform, window2, frame_length, hop_length, {
|
|
|
16594
16960
|
mel_spec_data[i] = Math.log10(mel_spec_data[i]);
|
|
16595
16961
|
}
|
|
16596
16962
|
break;
|
|
16963
|
+
case "log10_max_norm": {
|
|
16964
|
+
for (let i = 0; i < o; ++i) {
|
|
16965
|
+
mel_spec_data[i] = Math.log10(mel_spec_data[i]);
|
|
16966
|
+
}
|
|
16967
|
+
const logMax = max_log_mel ?? max(mel_spec_data)[0];
|
|
16968
|
+
const threshold = logMax - 8;
|
|
16969
|
+
for (let i = 0; i < o; ++i) {
|
|
16970
|
+
mel_spec_data[i] = (Math.max(mel_spec_data[i], threshold) + 4) / 4;
|
|
16971
|
+
}
|
|
16972
|
+
break;
|
|
16973
|
+
}
|
|
16597
16974
|
case "dB":
|
|
16598
16975
|
if (power === 1) {
|
|
16599
16976
|
amplitude_to_db(mel_spec_data, reference, min_value, db_range);
|
|
@@ -16604,7 +16981,9 @@ async function spectrogram(waveform, window2, frame_length, hop_length, {
|
|
|
16604
16981
|
}
|
|
16605
16982
|
break;
|
|
16606
16983
|
default:
|
|
16607
|
-
throw new Error(
|
|
16984
|
+
throw new Error(
|
|
16985
|
+
`log_mel must be one of null, 'log', 'log10', 'log10_max_norm', or 'dB'. Got '${log_mel}'`
|
|
16986
|
+
);
|
|
16608
16987
|
}
|
|
16609
16988
|
}
|
|
16610
16989
|
return mel_spec;
|
|
@@ -17109,6 +17488,56 @@ var Gemma3nAudioFeatureExtractor = class extends FeatureExtractor {
|
|
|
17109
17488
|
}
|
|
17110
17489
|
};
|
|
17111
17490
|
|
|
17491
|
+
// src/models/granite_speech/feature_extraction_granite_speech.js
|
|
17492
|
+
var GraniteSpeechFeatureExtractor = class extends FeatureExtractor {
|
|
17493
|
+
constructor(config) {
|
|
17494
|
+
super(config);
|
|
17495
|
+
const { n_fft, win_length, n_mels, sample_rate } = config.melspec_kwargs;
|
|
17496
|
+
this.mel_filters = mel_filter_bank(
|
|
17497
|
+
Math.floor(1 + n_fft / 2),
|
|
17498
|
+
// num_frequency_bins = 257
|
|
17499
|
+
n_mels,
|
|
17500
|
+
// 80
|
|
17501
|
+
0,
|
|
17502
|
+
// min_frequency
|
|
17503
|
+
sample_rate / 2,
|
|
17504
|
+
// max_frequency = 8000
|
|
17505
|
+
sample_rate,
|
|
17506
|
+
// 16000
|
|
17507
|
+
null,
|
|
17508
|
+
// norm (torchaudio default: no norm)
|
|
17509
|
+
"htk"
|
|
17510
|
+
// mel_scale (torchaudio default)
|
|
17511
|
+
);
|
|
17512
|
+
const raw_window = window_function(win_length, "hann");
|
|
17513
|
+
this.window = new Float64Array(n_fft);
|
|
17514
|
+
const pad = Math.floor((n_fft - win_length) / 2);
|
|
17515
|
+
this.window.set(raw_window, pad);
|
|
17516
|
+
}
|
|
17517
|
+
/**
|
|
17518
|
+
* Extract mel spectrogram features from audio, matching the Python GraniteSpeechFeatureExtractor.
|
|
17519
|
+
* @param {Float32Array|Float64Array} audio The audio waveform.
|
|
17520
|
+
* @returns {Promise<{input_features: Tensor}>}
|
|
17521
|
+
*/
|
|
17522
|
+
async _call(audio) {
|
|
17523
|
+
validate_audio_inputs(audio, "GraniteSpeechFeatureExtractor");
|
|
17524
|
+
const { n_fft, hop_length, n_mels } = this.config.melspec_kwargs;
|
|
17525
|
+
const num_frames = 1 + Math.floor((audio.length - 1) / hop_length);
|
|
17526
|
+
const max_num_frames = num_frames - num_frames % 2;
|
|
17527
|
+
const mel = await spectrogram(audio, this.window, n_fft, hop_length, {
|
|
17528
|
+
power: 2,
|
|
17529
|
+
mel_filters: this.mel_filters,
|
|
17530
|
+
log_mel: "log10_max_norm",
|
|
17531
|
+
transpose: true,
|
|
17532
|
+
// [time, n_mels]
|
|
17533
|
+
max_num_frames,
|
|
17534
|
+
do_pad: false
|
|
17535
|
+
});
|
|
17536
|
+
const input_features = mel.view(-1, 2 * n_mels).unsqueeze_(0);
|
|
17537
|
+
return { input_features };
|
|
17538
|
+
}
|
|
17539
|
+
};
|
|
17540
|
+
|
|
17112
17541
|
// src/models/moonshine/feature_extraction_moonshine.js
|
|
17113
17542
|
var MoonshineFeatureExtractor = class extends FeatureExtractor {
|
|
17114
17543
|
/**
|
|
@@ -17589,6 +18018,71 @@ var WeSpeakerFeatureExtractor = class extends FeatureExtractor {
|
|
|
17589
18018
|
}
|
|
17590
18019
|
};
|
|
17591
18020
|
|
|
18021
|
+
// src/models/voxtral_realtime/feature_extraction_voxtral_realtime.js
|
|
18022
|
+
var VoxtralRealtimeFeatureExtractor = class extends FeatureExtractor {
|
|
18023
|
+
constructor(config) {
|
|
18024
|
+
super(config);
|
|
18025
|
+
this.config.mel_filters ??= mel_filter_bank(
|
|
18026
|
+
Math.floor(1 + this.config.n_fft / 2),
|
|
18027
|
+
// num_frequency_bins
|
|
18028
|
+
this.config.feature_size,
|
|
18029
|
+
// num_mel_filters
|
|
18030
|
+
0,
|
|
18031
|
+
// min_frequency
|
|
18032
|
+
8e3,
|
|
18033
|
+
// max_frequency
|
|
18034
|
+
this.config.sampling_rate,
|
|
18035
|
+
// sampling_rate
|
|
18036
|
+
"slaney",
|
|
18037
|
+
// norm
|
|
18038
|
+
"slaney"
|
|
18039
|
+
// mel_scale
|
|
18040
|
+
);
|
|
18041
|
+
this.window = window_function(this.config.n_fft, "hann");
|
|
18042
|
+
}
|
|
18043
|
+
/**
|
|
18044
|
+
* Computes the log-Mel spectrogram of the provided audio waveform.
|
|
18045
|
+
* @param {Float32Array|Float64Array} waveform The audio waveform to process.
|
|
18046
|
+
* @param {Object} [options]
|
|
18047
|
+
* @param {boolean} [options.center=true] Whether to center-pad the waveform for STFT.
|
|
18048
|
+
* @returns {Promise<import('../../utils/tensor.js').Tensor>} The log-Mel spectrogram tensor of shape [num_mel_bins, num_frames].
|
|
18049
|
+
*/
|
|
18050
|
+
async _extract_fbank_features(waveform, { center = true } = {}) {
|
|
18051
|
+
const { n_fft, hop_length, mel_filters, global_log_mel_max } = this.config;
|
|
18052
|
+
const max_num_frames = center ? Math.floor(waveform.length / hop_length) : Math.floor((waveform.length - n_fft) / hop_length);
|
|
18053
|
+
return await spectrogram(
|
|
18054
|
+
waveform,
|
|
18055
|
+
this.window,
|
|
18056
|
+
n_fft,
|
|
18057
|
+
// frame_length
|
|
18058
|
+
hop_length,
|
|
18059
|
+
{
|
|
18060
|
+
power: 2,
|
|
18061
|
+
mel_filters,
|
|
18062
|
+
log_mel: "log10_max_norm",
|
|
18063
|
+
max_log_mel: global_log_mel_max,
|
|
18064
|
+
center,
|
|
18065
|
+
max_num_frames,
|
|
18066
|
+
do_pad: false
|
|
18067
|
+
}
|
|
18068
|
+
);
|
|
18069
|
+
}
|
|
18070
|
+
/**
|
|
18071
|
+
* Extract mel spectrogram features from audio.
|
|
18072
|
+
* @param {Float32Array|Float64Array} audio The audio data.
|
|
18073
|
+
* @param {Object} [options]
|
|
18074
|
+
* @param {boolean} [options.center=true] Whether to center-pad the waveform.
|
|
18075
|
+
* @returns {Promise<{ input_features: import('../../utils/tensor.js').Tensor }>}
|
|
18076
|
+
*/
|
|
18077
|
+
async _call(audio, { center = true } = {}) {
|
|
18078
|
+
validate_audio_inputs(audio, "VoxtralRealtimeFeatureExtractor");
|
|
18079
|
+
const features = await this._extract_fbank_features(audio, { center });
|
|
18080
|
+
return {
|
|
18081
|
+
input_features: features.unsqueeze_(0)
|
|
18082
|
+
};
|
|
18083
|
+
}
|
|
18084
|
+
};
|
|
18085
|
+
|
|
17592
18086
|
// src/models/whisper/feature_extraction_whisper.js
|
|
17593
18087
|
var WhisperFeatureExtractor = class extends FeatureExtractor {
|
|
17594
18088
|
constructor(config) {
|
|
@@ -17617,7 +18111,7 @@ var WhisperFeatureExtractor = class extends FeatureExtractor {
|
|
|
17617
18111
|
* @returns {Promise<Tensor>} An object containing the log-Mel spectrogram data as a Float32Array and its dimensions as an array of numbers.
|
|
17618
18112
|
*/
|
|
17619
18113
|
async _extract_fbank_features(waveform) {
|
|
17620
|
-
|
|
18114
|
+
return await spectrogram(
|
|
17621
18115
|
waveform,
|
|
17622
18116
|
this.window,
|
|
17623
18117
|
// window
|
|
@@ -17628,7 +18122,7 @@ var WhisperFeatureExtractor = class extends FeatureExtractor {
|
|
|
17628
18122
|
{
|
|
17629
18123
|
power: 2,
|
|
17630
18124
|
mel_filters: this.config.mel_filters,
|
|
17631
|
-
log_mel: "
|
|
18125
|
+
log_mel: "log10_max_norm",
|
|
17632
18126
|
// Custom
|
|
17633
18127
|
max_num_frames: Math.min(
|
|
17634
18128
|
Math.floor(waveform.length / this.config.hop_length),
|
|
@@ -17637,15 +18131,6 @@ var WhisperFeatureExtractor = class extends FeatureExtractor {
|
|
|
17637
18131
|
)
|
|
17638
18132
|
}
|
|
17639
18133
|
);
|
|
17640
|
-
const data = features.data;
|
|
17641
|
-
const maxValue = max(
|
|
17642
|
-
/** @type {Float32Array} */
|
|
17643
|
-
data
|
|
17644
|
-
)[0];
|
|
17645
|
-
for (let i = 0; i < data.length; ++i) {
|
|
17646
|
-
data[i] = (Math.max(data[i], maxValue - 8) + 4) / 4;
|
|
17647
|
-
}
|
|
17648
|
-
return features;
|
|
17649
18134
|
}
|
|
17650
18135
|
/**
|
|
17651
18136
|
* Asynchronously extracts features from a given audio using the provided configuration.
|
|
@@ -17711,11 +18196,10 @@ var sharp_default = {};
|
|
|
17711
18196
|
var createCanvasFunction;
|
|
17712
18197
|
var ImageDataClass;
|
|
17713
18198
|
var loadImageFunction;
|
|
17714
|
-
|
|
17715
|
-
if (IS_BROWSER_OR_WEBWORKER) {
|
|
18199
|
+
if (apis.IS_WEB_ENV) {
|
|
17716
18200
|
createCanvasFunction = (width, height) => {
|
|
17717
18201
|
if (!self.OffscreenCanvas) {
|
|
17718
|
-
throw new Error("OffscreenCanvas not supported by this
|
|
18202
|
+
throw new Error("OffscreenCanvas not supported by this environment.");
|
|
17719
18203
|
}
|
|
17720
18204
|
return new self.OffscreenCanvas(width, height);
|
|
17721
18205
|
};
|
|
@@ -17805,7 +18289,7 @@ var RawImage = class _RawImage {
|
|
|
17805
18289
|
* @returns {RawImage} The image object.
|
|
17806
18290
|
*/
|
|
17807
18291
|
static fromCanvas(canvas) {
|
|
17808
|
-
if (!
|
|
18292
|
+
if (!apis.IS_WEB_ENV) {
|
|
17809
18293
|
throw new Error("fromCanvas() is only supported in browser environments.");
|
|
17810
18294
|
}
|
|
17811
18295
|
const ctx = (
|
|
@@ -17834,7 +18318,7 @@ var RawImage = class _RawImage {
|
|
|
17834
18318
|
* @returns {Promise<RawImage>} The image object.
|
|
17835
18319
|
*/
|
|
17836
18320
|
static async fromBlob(blob) {
|
|
17837
|
-
if (
|
|
18321
|
+
if (apis.IS_WEB_ENV) {
|
|
17838
18322
|
const img = await loadImageFunction(blob);
|
|
17839
18323
|
const ctx = createCanvasFunction(img.width, img.height).getContext("2d");
|
|
17840
18324
|
ctx.drawImage(img, 0, 0);
|
|
@@ -18015,7 +18499,7 @@ var RawImage = class _RawImage {
|
|
|
18015
18499
|
} else if (nullish_height) {
|
|
18016
18500
|
height = width / this.width * this.height;
|
|
18017
18501
|
}
|
|
18018
|
-
if (
|
|
18502
|
+
if (apis.IS_WEB_ENV) {
|
|
18019
18503
|
const numChannels = this.channels;
|
|
18020
18504
|
const canvas = this.toCanvas();
|
|
18021
18505
|
const ctx = createCanvasFunction(width, height).getContext("2d");
|
|
@@ -18063,7 +18547,7 @@ var RawImage = class _RawImage {
|
|
|
18063
18547
|
if (left === 0 && right === 0 && top === 0 && bottom === 0) {
|
|
18064
18548
|
return this;
|
|
18065
18549
|
}
|
|
18066
|
-
if (
|
|
18550
|
+
if (apis.IS_WEB_ENV) {
|
|
18067
18551
|
const numChannels = this.channels;
|
|
18068
18552
|
const canvas = this.toCanvas();
|
|
18069
18553
|
const newWidth = this.width + left + right;
|
|
@@ -18087,7 +18571,7 @@ var RawImage = class _RawImage {
|
|
|
18087
18571
|
}
|
|
18088
18572
|
const crop_width = x_max - x_min + 1;
|
|
18089
18573
|
const crop_height = y_max - y_min + 1;
|
|
18090
|
-
if (
|
|
18574
|
+
if (apis.IS_WEB_ENV) {
|
|
18091
18575
|
const numChannels = this.channels;
|
|
18092
18576
|
const canvas = this.toCanvas();
|
|
18093
18577
|
const ctx = createCanvasFunction(crop_width, crop_height).getContext("2d");
|
|
@@ -18115,7 +18599,7 @@ var RawImage = class _RawImage {
|
|
|
18115
18599
|
}
|
|
18116
18600
|
const width_offset = (this.width - crop_width) / 2;
|
|
18117
18601
|
const height_offset = (this.height - crop_height) / 2;
|
|
18118
|
-
if (
|
|
18602
|
+
if (apis.IS_WEB_ENV) {
|
|
18119
18603
|
const numChannels = this.channels;
|
|
18120
18604
|
const canvas = this.toCanvas();
|
|
18121
18605
|
const ctx = createCanvasFunction(crop_width, crop_height).getContext("2d");
|
|
@@ -18193,7 +18677,7 @@ var RawImage = class _RawImage {
|
|
|
18193
18677
|
}
|
|
18194
18678
|
}
|
|
18195
18679
|
async toBlob(type = "image/png", quality = 1) {
|
|
18196
|
-
if (!
|
|
18680
|
+
if (!apis.IS_WEB_ENV) {
|
|
18197
18681
|
throw new Error("toBlob() is only supported in browser environments.");
|
|
18198
18682
|
}
|
|
18199
18683
|
const canvas = this.toCanvas();
|
|
@@ -18210,7 +18694,7 @@ var RawImage = class _RawImage {
|
|
|
18210
18694
|
return tensor;
|
|
18211
18695
|
}
|
|
18212
18696
|
toCanvas() {
|
|
18213
|
-
if (!
|
|
18697
|
+
if (!apis.IS_WEB_ENV) {
|
|
18214
18698
|
throw new Error("toCanvas() is only supported in browser environments.");
|
|
18215
18699
|
}
|
|
18216
18700
|
const cloned = this.clone().rgba();
|
|
@@ -18294,7 +18778,7 @@ var RawImage = class _RawImage {
|
|
|
18294
18778
|
* @returns {Promise<void>}
|
|
18295
18779
|
*/
|
|
18296
18780
|
async save(path) {
|
|
18297
|
-
if (
|
|
18781
|
+
if (apis.IS_WEB_ENV) {
|
|
18298
18782
|
if (apis.IS_WEBWORKER_ENV) {
|
|
18299
18783
|
throw new Error("Unable to save an image from a Web Worker.");
|
|
18300
18784
|
}
|
|
@@ -18314,7 +18798,7 @@ var RawImage = class _RawImage {
|
|
|
18314
18798
|
* @returns {import('sharp').Sharp} The Sharp instance.
|
|
18315
18799
|
*/
|
|
18316
18800
|
toSharp() {
|
|
18317
|
-
if (
|
|
18801
|
+
if (apis.IS_WEB_ENV) {
|
|
18318
18802
|
throw new Error("toSharp() is only supported in server-side environments.");
|
|
18319
18803
|
}
|
|
18320
18804
|
return sharp_default(this.data, {
|
|
@@ -18527,6 +19011,27 @@ function compute_segments(mask_probs, pred_scores, pred_labels, mask_threshold,
|
|
|
18527
19011
|
}
|
|
18528
19012
|
return [segmentation, segments];
|
|
18529
19013
|
}
|
|
19014
|
+
function smart_resize(height, width, factor = 28, min_pixels = 56 * 56, max_pixels = 14 * 14 * 4 * 1280) {
|
|
19015
|
+
if (height < factor || width < factor) {
|
|
19016
|
+
throw new Error(`height:${height} or width:${width} must be larger than factor:${factor}`);
|
|
19017
|
+
} else if (Math.max(height, width) / Math.min(height, width) > 200) {
|
|
19018
|
+
throw new Error(
|
|
19019
|
+
`absolute aspect ratio must be smaller than 200, got ${Math.max(height, width) / Math.min(height, width)}`
|
|
19020
|
+
);
|
|
19021
|
+
}
|
|
19022
|
+
let h_bar = Math.round(height / factor) * factor;
|
|
19023
|
+
let w_bar = Math.round(width / factor) * factor;
|
|
19024
|
+
if (h_bar * w_bar > max_pixels) {
|
|
19025
|
+
const beta = Math.sqrt(height * width / max_pixels);
|
|
19026
|
+
h_bar = Math.floor(height / beta / factor) * factor;
|
|
19027
|
+
w_bar = Math.floor(width / beta / factor) * factor;
|
|
19028
|
+
} else if (h_bar * w_bar < min_pixels) {
|
|
19029
|
+
const beta = Math.sqrt(min_pixels / (height * width));
|
|
19030
|
+
h_bar = Math.ceil(height * beta / factor) * factor;
|
|
19031
|
+
w_bar = Math.ceil(width * beta / factor) * factor;
|
|
19032
|
+
}
|
|
19033
|
+
return [h_bar, w_bar];
|
|
19034
|
+
}
|
|
18530
19035
|
function post_process_panoptic_segmentation(outputs, threshold = 0.5, mask_threshold = 0.5, overlap_mask_area_threshold = 0.8, label_ids_to_fuse = null, target_sizes = null) {
|
|
18531
19036
|
if (label_ids_to_fuse === null) {
|
|
18532
19037
|
logger.warn("`label_ids_to_fuse` unset. No instance will be fused.");
|
|
@@ -18815,7 +19320,7 @@ var ImageProcessor = class extends Callable2 {
|
|
|
18815
19320
|
});
|
|
18816
19321
|
}
|
|
18817
19322
|
/**
|
|
18818
|
-
* @typedef {
|
|
19323
|
+
* @typedef {Object} PreprocessedImage
|
|
18819
19324
|
* @property {HeightWidth} original_size The original size of the image.
|
|
18820
19325
|
* @property {HeightWidth} reshaped_input_size The reshaped input size of the image.
|
|
18821
19326
|
* @property {Tensor} pixel_values The pixel values of the preprocessed image.
|
|
@@ -18993,6 +19498,7 @@ __export(image_processors_exports, {
|
|
|
18993
19498
|
ImageFeatureExtractor: () => ImageProcessor,
|
|
18994
19499
|
ImageProcessor: () => ImageProcessor,
|
|
18995
19500
|
JinaCLIPImageProcessor: () => JinaCLIPImageProcessor,
|
|
19501
|
+
Lfm2VlImageProcessor: () => Lfm2VlImageProcessor,
|
|
18996
19502
|
LlavaOnevisionImageProcessor: () => LlavaOnevisionImageProcessor,
|
|
18997
19503
|
Mask2FormerImageProcessor: () => Mask2FormerImageProcessor,
|
|
18998
19504
|
MaskFormerFeatureExtractor: () => MaskFormerFeatureExtractor,
|
|
@@ -19396,6 +19902,237 @@ var JinaCLIPImageProcessor = class extends ImageProcessor {
|
|
|
19396
19902
|
}
|
|
19397
19903
|
};
|
|
19398
19904
|
|
|
19905
|
+
// src/models/lfm2_vl/image_processing_lfm2_vl.js
|
|
19906
|
+
function round_by_factor(number, factor) {
|
|
19907
|
+
return Math.round(number / factor) * factor;
|
|
19908
|
+
}
|
|
19909
|
+
function find_closest_aspect_ratio(aspect_ratio, target_ratios, width, height, image_size) {
|
|
19910
|
+
let best_ratio_diff = Infinity;
|
|
19911
|
+
let best_ratio = [1, 1];
|
|
19912
|
+
const area = width * height;
|
|
19913
|
+
for (const ratio of target_ratios) {
|
|
19914
|
+
const ratio_diff = Math.abs(aspect_ratio - ratio[0] / ratio[1]);
|
|
19915
|
+
if (ratio_diff < best_ratio_diff) {
|
|
19916
|
+
best_ratio_diff = ratio_diff;
|
|
19917
|
+
best_ratio = ratio;
|
|
19918
|
+
} else if (ratio_diff === best_ratio_diff && area > 0.5 * image_size * image_size * ratio[0] * ratio[1]) {
|
|
19919
|
+
best_ratio = ratio;
|
|
19920
|
+
}
|
|
19921
|
+
}
|
|
19922
|
+
return best_ratio;
|
|
19923
|
+
}
|
|
19924
|
+
function get_target_ratios(min_tiles, max_tiles) {
|
|
19925
|
+
const ratios = [];
|
|
19926
|
+
const seen = /* @__PURE__ */ new Set();
|
|
19927
|
+
for (let n = min_tiles; n <= max_tiles; ++n) {
|
|
19928
|
+
for (let w = 1; w <= n; ++w) {
|
|
19929
|
+
for (let h = 1; h <= n; ++h) {
|
|
19930
|
+
const product2 = w * h;
|
|
19931
|
+
if (product2 >= min_tiles && product2 <= max_tiles) {
|
|
19932
|
+
const key = w << 16 | h;
|
|
19933
|
+
if (!seen.has(key)) {
|
|
19934
|
+
seen.add(key);
|
|
19935
|
+
ratios.push([w, h]);
|
|
19936
|
+
}
|
|
19937
|
+
}
|
|
19938
|
+
}
|
|
19939
|
+
}
|
|
19940
|
+
}
|
|
19941
|
+
return ratios.sort((a, b) => a[0] * a[1] - b[0] * b[1]);
|
|
19942
|
+
}
|
|
19943
|
+
function convert_image_to_patches(images, patch_size) {
|
|
19944
|
+
const [B, C, H, W] = images.dims;
|
|
19945
|
+
const ph = Math.floor(H / patch_size), pw = Math.floor(W / patch_size);
|
|
19946
|
+
const patch_dim = patch_size * patch_size * C;
|
|
19947
|
+
const data = (
|
|
19948
|
+
/** @type {Float32Array} */
|
|
19949
|
+
images.data
|
|
19950
|
+
);
|
|
19951
|
+
const result = new Float32Array(B * ph * pw * patch_dim);
|
|
19952
|
+
const ch_stride = H * W;
|
|
19953
|
+
for (let b = 0; b < B; ++b) {
|
|
19954
|
+
const b_src = b * C * ch_stride;
|
|
19955
|
+
const b_dst = b * ph * pw * patch_dim;
|
|
19956
|
+
for (let py = 0; py < ph; ++py) {
|
|
19957
|
+
for (let px = 0; px < pw; ++px) {
|
|
19958
|
+
let off = b_dst + (py * pw + px) * patch_dim;
|
|
19959
|
+
for (let dy = 0; dy < patch_size; ++dy) {
|
|
19960
|
+
const row = (py * patch_size + dy) * W + px * patch_size;
|
|
19961
|
+
for (let dx = 0; dx < patch_size; ++dx) {
|
|
19962
|
+
const pixel = row + dx;
|
|
19963
|
+
for (let c = 0; c < C; ++c) {
|
|
19964
|
+
result[off++] = data[b_src + c * ch_stride + pixel];
|
|
19965
|
+
}
|
|
19966
|
+
}
|
|
19967
|
+
}
|
|
19968
|
+
}
|
|
19969
|
+
}
|
|
19970
|
+
}
|
|
19971
|
+
return new Tensor3("float32", result, [B, ph * pw, patch_dim]);
|
|
19972
|
+
}
|
|
19973
|
+
function pad_along_first_dim(patches, target_length) {
|
|
19974
|
+
const [, len2, dim] = patches.dims;
|
|
19975
|
+
const mask_data = new BigInt64Array(target_length);
|
|
19976
|
+
mask_data.fill(1n, 0, len2);
|
|
19977
|
+
let padded = patches;
|
|
19978
|
+
if (len2 < target_length) {
|
|
19979
|
+
const padded_data = new Float32Array(target_length * dim);
|
|
19980
|
+
padded_data.set(
|
|
19981
|
+
/** @type {Float32Array} */
|
|
19982
|
+
patches.data
|
|
19983
|
+
);
|
|
19984
|
+
padded = new Tensor3("float32", padded_data, [1, target_length, dim]);
|
|
19985
|
+
}
|
|
19986
|
+
return { padded, mask: new Tensor3("int64", mask_data, [target_length]) };
|
|
19987
|
+
}
|
|
19988
|
+
var Lfm2VlImageProcessor = class extends ImageProcessor {
|
|
19989
|
+
constructor(config) {
|
|
19990
|
+
super(config);
|
|
19991
|
+
this.downsample_factor = config.downsample_factor ?? 2;
|
|
19992
|
+
this.do_image_splitting = config.do_image_splitting ?? true;
|
|
19993
|
+
this.min_tiles = config.min_tiles ?? 2;
|
|
19994
|
+
this.max_tiles = config.max_tiles ?? 10;
|
|
19995
|
+
this.use_thumbnail = config.use_thumbnail ?? true;
|
|
19996
|
+
this.min_image_tokens = config.min_image_tokens ?? 64;
|
|
19997
|
+
this.max_image_tokens = config.max_image_tokens ?? 256;
|
|
19998
|
+
this.encoder_patch_size = config.encoder_patch_size ?? config.patch_size ?? 16;
|
|
19999
|
+
this.tile_size = config.tile_size ?? 512;
|
|
20000
|
+
this.max_pixels_tolerance = config.max_pixels_tolerance ?? 2;
|
|
20001
|
+
this.return_row_col_info = config.return_row_col_info ?? false;
|
|
20002
|
+
const max_thumbnail_patches = this.max_image_tokens * this.downsample_factor ** 2;
|
|
20003
|
+
const tile_size_patches = this.do_image_splitting ? (this.tile_size / this.encoder_patch_size) ** 2 : 0;
|
|
20004
|
+
this.max_num_patches = Math.max(max_thumbnail_patches, tile_size_patches);
|
|
20005
|
+
}
|
|
20006
|
+
/**
|
|
20007
|
+
* Check if the image is too large to be processed as a single tile.
|
|
20008
|
+
* @param {number} height
|
|
20009
|
+
* @param {number} width
|
|
20010
|
+
* @returns {boolean}
|
|
20011
|
+
*/
|
|
20012
|
+
_is_image_too_large(height, width) {
|
|
20013
|
+
const total_factor = this.encoder_patch_size * this.downsample_factor;
|
|
20014
|
+
const h_bar = Math.max(this.encoder_patch_size, round_by_factor(height, total_factor));
|
|
20015
|
+
const w_bar = Math.max(this.encoder_patch_size, round_by_factor(width, total_factor));
|
|
20016
|
+
return h_bar * w_bar > this.max_image_tokens * (this.encoder_patch_size * this.downsample_factor) ** 2 * this.max_pixels_tolerance;
|
|
20017
|
+
}
|
|
20018
|
+
/**
|
|
20019
|
+
* Get the grid layout for tiling a large image.
|
|
20020
|
+
* @param {number} height
|
|
20021
|
+
* @param {number} width
|
|
20022
|
+
* @returns {{ grid_width: number, grid_height: number, target_width: number, target_height: number }}
|
|
20023
|
+
*/
|
|
20024
|
+
_get_grid_layout(height, width) {
|
|
20025
|
+
const target_ratios = get_target_ratios(this.min_tiles, this.max_tiles);
|
|
20026
|
+
const [grid_width, grid_height] = find_closest_aspect_ratio(
|
|
20027
|
+
width / height,
|
|
20028
|
+
target_ratios,
|
|
20029
|
+
width,
|
|
20030
|
+
height,
|
|
20031
|
+
this.tile_size
|
|
20032
|
+
);
|
|
20033
|
+
return {
|
|
20034
|
+
grid_width,
|
|
20035
|
+
grid_height,
|
|
20036
|
+
target_width: this.tile_size * grid_width,
|
|
20037
|
+
target_height: this.tile_size * grid_height
|
|
20038
|
+
};
|
|
20039
|
+
}
|
|
20040
|
+
/** @param {RawImage|RawImage[]|RawImage[][]} images */
|
|
20041
|
+
// @ts-expect-error
|
|
20042
|
+
async _call(images, { return_row_col_info = null } = {}) {
|
|
20043
|
+
let batched_images;
|
|
20044
|
+
if (!Array.isArray(images)) {
|
|
20045
|
+
batched_images = [[images]];
|
|
20046
|
+
} else if (!Array.isArray(images[0])) {
|
|
20047
|
+
batched_images = [
|
|
20048
|
+
/** @type {RawImage[]} */
|
|
20049
|
+
images
|
|
20050
|
+
];
|
|
20051
|
+
} else {
|
|
20052
|
+
batched_images = /** @type {RawImage[][]} */
|
|
20053
|
+
images;
|
|
20054
|
+
}
|
|
20055
|
+
const all_pixel_values = [];
|
|
20056
|
+
const all_pixel_masks = [];
|
|
20057
|
+
const all_spatial_shapes = [];
|
|
20058
|
+
const all_rows = [];
|
|
20059
|
+
const all_cols = [];
|
|
20060
|
+
const all_image_sizes = [];
|
|
20061
|
+
for (const image_batch of batched_images) {
|
|
20062
|
+
const preprocessed = await Promise.all(image_batch.map((x) => this.preprocess(x, { do_pad: false })));
|
|
20063
|
+
for (const { pixel_values } of preprocessed) {
|
|
20064
|
+
const [, height, width] = pixel_values.dims;
|
|
20065
|
+
const img = pixel_values.unsqueeze_(0);
|
|
20066
|
+
const total_factor = this.encoder_patch_size * this.downsample_factor;
|
|
20067
|
+
const f2 = total_factor ** 2;
|
|
20068
|
+
const [new_height, new_width] = smart_resize(
|
|
20069
|
+
Math.max(total_factor, height),
|
|
20070
|
+
Math.max(total_factor, width),
|
|
20071
|
+
total_factor,
|
|
20072
|
+
this.min_image_tokens * f2,
|
|
20073
|
+
this.max_image_tokens * f2
|
|
20074
|
+
).map((x) => Math.max(total_factor, x));
|
|
20075
|
+
let tiles;
|
|
20076
|
+
let num_rows = 1, num_cols = 1;
|
|
20077
|
+
const is_large = this._is_image_too_large(height, width);
|
|
20078
|
+
const do_splitting = this.do_image_splitting && !(this.min_tiles === 1 && this.max_tiles === 1);
|
|
20079
|
+
if (is_large && do_splitting) {
|
|
20080
|
+
const { grid_width, grid_height, target_width, target_height } = this._get_grid_layout(
|
|
20081
|
+
height,
|
|
20082
|
+
width
|
|
20083
|
+
);
|
|
20084
|
+
num_rows = grid_height;
|
|
20085
|
+
num_cols = grid_width;
|
|
20086
|
+
const resized = await interpolate_4d(img, {
|
|
20087
|
+
size: [target_height, target_width]
|
|
20088
|
+
});
|
|
20089
|
+
tiles = [];
|
|
20090
|
+
for (let r = 0; r < grid_height; ++r) {
|
|
20091
|
+
for (let c = 0; c < grid_width; ++c) {
|
|
20092
|
+
const y = r * this.tile_size;
|
|
20093
|
+
const x = c * this.tile_size;
|
|
20094
|
+
tiles.push(resized.slice(null, null, [y, y + this.tile_size], [x, x + this.tile_size]));
|
|
20095
|
+
}
|
|
20096
|
+
}
|
|
20097
|
+
if (this.use_thumbnail && grid_width * grid_height !== 1) {
|
|
20098
|
+
tiles.push(await interpolate_4d(img, { size: [new_height, new_width] }));
|
|
20099
|
+
}
|
|
20100
|
+
} else {
|
|
20101
|
+
tiles = [await interpolate_4d(img, { size: [new_height, new_width] })];
|
|
20102
|
+
}
|
|
20103
|
+
for (const tile of tiles) {
|
|
20104
|
+
const [, , th, tw] = tile.dims;
|
|
20105
|
+
const patches = convert_image_to_patches(tile, this.encoder_patch_size);
|
|
20106
|
+
const { padded, mask } = pad_along_first_dim(patches, this.max_num_patches);
|
|
20107
|
+
all_pixel_values.push(padded);
|
|
20108
|
+
all_pixel_masks.push(mask);
|
|
20109
|
+
all_spatial_shapes.push([
|
|
20110
|
+
Math.floor(th / this.encoder_patch_size),
|
|
20111
|
+
Math.floor(tw / this.encoder_patch_size)
|
|
20112
|
+
]);
|
|
20113
|
+
}
|
|
20114
|
+
all_rows.push(num_rows);
|
|
20115
|
+
all_cols.push(num_cols);
|
|
20116
|
+
all_image_sizes.push([new_height, new_width]);
|
|
20117
|
+
}
|
|
20118
|
+
}
|
|
20119
|
+
const result = {
|
|
20120
|
+
pixel_values: cat(all_pixel_values, 0),
|
|
20121
|
+
pixel_attention_mask: stack(all_pixel_masks, 0),
|
|
20122
|
+
spatial_shapes: new Tensor3("int64", BigInt64Array.from(all_spatial_shapes.flat(), BigInt), [
|
|
20123
|
+
all_spatial_shapes.length,
|
|
20124
|
+
2
|
|
20125
|
+
])
|
|
20126
|
+
};
|
|
20127
|
+
if (return_row_col_info ?? this.return_row_col_info) {
|
|
20128
|
+
result.image_rows = all_rows;
|
|
20129
|
+
result.image_cols = all_cols;
|
|
20130
|
+
result.image_sizes = all_image_sizes;
|
|
20131
|
+
}
|
|
20132
|
+
return result;
|
|
20133
|
+
}
|
|
20134
|
+
};
|
|
20135
|
+
|
|
19399
20136
|
// src/models/llava_onevision/image_processing_llava_onevision.js
|
|
19400
20137
|
var LlavaOnevisionImageProcessor = class extends ImageProcessor {
|
|
19401
20138
|
};
|
|
@@ -19619,27 +20356,6 @@ var PvtImageProcessor = class extends ImageProcessor {
|
|
|
19619
20356
|
};
|
|
19620
20357
|
|
|
19621
20358
|
// src/models/qwen2_vl/image_processing_qwen2_vl.js
|
|
19622
|
-
function smart_resize(height, width, factor = 28, min_pixels = 56 * 56, max_pixels = 14 * 14 * 4 * 1280) {
|
|
19623
|
-
if (height < factor || width < factor) {
|
|
19624
|
-
throw new Error(`height:${height} or width:${width} must be larger than factor:${factor}`);
|
|
19625
|
-
} else if (Math.max(height, width) / Math.min(height, width) > 200) {
|
|
19626
|
-
throw new Error(
|
|
19627
|
-
`absolute aspect ratio must be smaller than 200, got ${Math.max(height, width) / Math.min(height, width)}`
|
|
19628
|
-
);
|
|
19629
|
-
}
|
|
19630
|
-
let h_bar = Math.round(height / factor) * factor;
|
|
19631
|
-
let w_bar = Math.round(width / factor) * factor;
|
|
19632
|
-
if (h_bar * w_bar > max_pixels) {
|
|
19633
|
-
const beta = Math.sqrt(height * width / max_pixels);
|
|
19634
|
-
h_bar = Math.floor(height / beta / factor) * factor;
|
|
19635
|
-
w_bar = Math.floor(width / beta / factor) * factor;
|
|
19636
|
-
} else if (h_bar * w_bar < min_pixels) {
|
|
19637
|
-
const beta = Math.sqrt(min_pixels / (height * width));
|
|
19638
|
-
h_bar = Math.ceil(height * beta / factor) * factor;
|
|
19639
|
-
w_bar = Math.ceil(width * beta / factor) * factor;
|
|
19640
|
-
}
|
|
19641
|
-
return [h_bar, w_bar];
|
|
19642
|
-
}
|
|
19643
20359
|
var Qwen2VLImageProcessor = class extends ImageProcessor {
|
|
19644
20360
|
constructor(config) {
|
|
19645
20361
|
super(config);
|
|
@@ -20241,6 +20957,57 @@ ${boi_token}${image_tokens_expanded}${eoi_token}
|
|
|
20241
20957
|
}
|
|
20242
20958
|
};
|
|
20243
20959
|
|
|
20960
|
+
// src/models/granite_speech/processing_granite_speech.js
|
|
20961
|
+
var GraniteSpeechProcessor = class extends Processor {
|
|
20962
|
+
static tokenizer_class = AutoTokenizer;
|
|
20963
|
+
static feature_extractor_class = AutoFeatureExtractor;
|
|
20964
|
+
static uses_processor_config = true;
|
|
20965
|
+
/**
|
|
20966
|
+
* Compute the number of audio tokens for a given raw audio length.
|
|
20967
|
+
* @param {number} audioLength Raw audio sample count.
|
|
20968
|
+
* @returns {number} Number of projector output tokens.
|
|
20969
|
+
*/
|
|
20970
|
+
_get_num_audio_features(audioLength) {
|
|
20971
|
+
const { hop_length } = this.feature_extractor.config.melspec_kwargs;
|
|
20972
|
+
const { projector_window_size, projector_downsample_rate } = this.feature_extractor.config;
|
|
20973
|
+
const effective_window_size = Math.floor(projector_window_size / projector_downsample_rate);
|
|
20974
|
+
const mel_length = Math.floor(audioLength / hop_length) + 1;
|
|
20975
|
+
const encoder_length = Math.floor(mel_length / 2);
|
|
20976
|
+
const nblocks = Math.ceil(encoder_length / projector_window_size);
|
|
20977
|
+
return nblocks * effective_window_size;
|
|
20978
|
+
}
|
|
20979
|
+
/**
|
|
20980
|
+
* @param {string} text The text input to process.
|
|
20981
|
+
* @param {Float32Array} audio The audio input to process.
|
|
20982
|
+
*/
|
|
20983
|
+
async _call(text, audio = null, kwargs = {}) {
|
|
20984
|
+
if (Array.isArray(text)) {
|
|
20985
|
+
throw new Error("Batched inputs are not supported yet.");
|
|
20986
|
+
}
|
|
20987
|
+
let audio_inputs = {};
|
|
20988
|
+
if (audio) {
|
|
20989
|
+
const { input_features } = await this.feature_extractor(audio);
|
|
20990
|
+
audio_inputs["input_features"] = input_features;
|
|
20991
|
+
const audio_embed_size = this._get_num_audio_features(audio.length);
|
|
20992
|
+
const mask_data = new Uint8Array(audio_embed_size).fill(1);
|
|
20993
|
+
audio_inputs["input_features_mask"] = new Tensor3("bool", mask_data, [1, audio_embed_size]);
|
|
20994
|
+
const audio_token = this.config.audio_token ?? "<|audio|>";
|
|
20995
|
+
if (!text.includes(audio_token)) {
|
|
20996
|
+
throw new Error(`The input text does not contain the audio token ${audio_token}.`);
|
|
20997
|
+
}
|
|
20998
|
+
text = text.replaceAll(audio_token, audio_token.repeat(audio_embed_size));
|
|
20999
|
+
}
|
|
21000
|
+
const text_inputs = this.tokenizer(text, {
|
|
21001
|
+
add_special_tokens: false,
|
|
21002
|
+
...kwargs
|
|
21003
|
+
});
|
|
21004
|
+
return {
|
|
21005
|
+
...text_inputs,
|
|
21006
|
+
...audio_inputs
|
|
21007
|
+
};
|
|
21008
|
+
}
|
|
21009
|
+
};
|
|
21010
|
+
|
|
20244
21011
|
// src/models/grounding_dino/processing_grounding_dino.js
|
|
20245
21012
|
function get_phrases_from_posmap(posmaps, input_ids) {
|
|
20246
21013
|
const left_idx = 0;
|
|
@@ -20517,6 +21284,66 @@ var JinaCLIPProcessor = class extends Processor {
|
|
|
20517
21284
|
}
|
|
20518
21285
|
};
|
|
20519
21286
|
|
|
21287
|
+
// src/models/lfm2_vl/processing_lfm2_vl.js
|
|
21288
|
+
var Lfm2VlProcessor = class extends Processor {
|
|
21289
|
+
static tokenizer_class = AutoTokenizer;
|
|
21290
|
+
static image_processor_class = AutoImageProcessor;
|
|
21291
|
+
/**
|
|
21292
|
+
* @param {RawImage|RawImage[]} images
|
|
21293
|
+
* @param {string|string[]|null} [text]
|
|
21294
|
+
* @param {Record<string, any>} [kwargs]
|
|
21295
|
+
*/
|
|
21296
|
+
async _call(images, text = null, kwargs = {}) {
|
|
21297
|
+
const { image_rows, image_cols, image_sizes, ...image_inputs } = await this.image_processor(images, {
|
|
21298
|
+
...kwargs,
|
|
21299
|
+
return_row_col_info: true
|
|
21300
|
+
});
|
|
21301
|
+
if (text) {
|
|
21302
|
+
const image_token = this.config.image_token ?? "<image>";
|
|
21303
|
+
const {
|
|
21304
|
+
tile_size = 512,
|
|
21305
|
+
downsample_factor = 2,
|
|
21306
|
+
encoder_patch_size = 16,
|
|
21307
|
+
use_thumbnail = true
|
|
21308
|
+
} = (
|
|
21309
|
+
/** @type {Record<string, any>} */
|
|
21310
|
+
this.image_processor.config
|
|
21311
|
+
);
|
|
21312
|
+
const ds2 = (s) => Math.ceil(Math.floor(s / encoder_patch_size) / downsample_factor);
|
|
21313
|
+
const tokens_per_tile = ds2(tile_size) ** 2;
|
|
21314
|
+
const image_start = this.config.image_start_token ?? "<|image_start|>";
|
|
21315
|
+
const image_end = this.config.image_end_token ?? "<|image_end|>";
|
|
21316
|
+
const thumbnail_token = this.config.image_thumbnail ?? "<|img_thumbnail|>";
|
|
21317
|
+
if (!Array.isArray(text)) text = [text];
|
|
21318
|
+
let image_idx = 0;
|
|
21319
|
+
text = text.map((sample) => {
|
|
21320
|
+
const parts = sample.split(image_token);
|
|
21321
|
+
return parts[0] + parts.slice(1).map((part) => {
|
|
21322
|
+
const idx = image_idx++;
|
|
21323
|
+
const [h, w] = image_sizes[idx];
|
|
21324
|
+
const rows = image_rows[idx], cols = image_cols[idx];
|
|
21325
|
+
const tokens_for_image = ds2(h) * ds2(w);
|
|
21326
|
+
let expanded = image_start;
|
|
21327
|
+
if (rows > 1 || cols > 1) {
|
|
21328
|
+
const tile_str = image_token.repeat(tokens_per_tile);
|
|
21329
|
+
for (let r = 0; r < rows; ++r)
|
|
21330
|
+
for (let c = 0; c < cols; ++c)
|
|
21331
|
+
expanded += `<|img_row_${r + 1}_col_${c + 1}|>` + tile_str;
|
|
21332
|
+
if (use_thumbnail) expanded += thumbnail_token + image_token.repeat(tokens_for_image);
|
|
21333
|
+
} else {
|
|
21334
|
+
expanded += image_token.repeat(tokens_for_image);
|
|
21335
|
+
}
|
|
21336
|
+
return expanded + image_end + part;
|
|
21337
|
+
}).join("");
|
|
21338
|
+
});
|
|
21339
|
+
}
|
|
21340
|
+
return {
|
|
21341
|
+
...image_inputs,
|
|
21342
|
+
...text ? this.tokenizer(text, kwargs) : {}
|
|
21343
|
+
};
|
|
21344
|
+
}
|
|
21345
|
+
};
|
|
21346
|
+
|
|
20520
21347
|
// src/models/llava/processing_llava.js
|
|
20521
21348
|
var LlavaProcessor = class extends Processor {
|
|
20522
21349
|
static tokenizer_class = AutoTokenizer;
|
|
@@ -21049,6 +21876,94 @@ var VoxtralProcessor = class extends Processor {
|
|
|
21049
21876
|
}
|
|
21050
21877
|
};
|
|
21051
21878
|
|
|
21879
|
+
// src/models/voxtral_realtime/processing_voxtral_realtime.js
|
|
21880
|
+
var NUM_LEFT_PAD_TOKENS = 32;
|
|
21881
|
+
var NUM_DELAY_TOKENS = 6;
|
|
21882
|
+
var AUDIO_LENGTH_PER_TOK = 8;
|
|
21883
|
+
var OFFLINE_STREAMING_BUFFER_TOKENS = 10;
|
|
21884
|
+
var STREAMING_PAD_TOKEN_ID = 32;
|
|
21885
|
+
var VoxtralRealtimeProcessor = class extends Processor {
|
|
21886
|
+
static tokenizer_class = AutoTokenizer;
|
|
21887
|
+
static feature_extractor_class = AutoFeatureExtractor;
|
|
21888
|
+
static uses_processor_config = false;
|
|
21889
|
+
/** Number of mel frames in the first audio chunk. */
|
|
21890
|
+
get num_mel_frames_first_audio_chunk() {
|
|
21891
|
+
return (NUM_DELAY_TOKENS + 1) * AUDIO_LENGTH_PER_TOK;
|
|
21892
|
+
}
|
|
21893
|
+
/** Number of raw audio samples in the first audio chunk. */
|
|
21894
|
+
get num_samples_first_audio_chunk() {
|
|
21895
|
+
const { hop_length, n_fft } = this.feature_extractor.config;
|
|
21896
|
+
return (this.num_mel_frames_first_audio_chunk - 1) * hop_length + Math.floor(n_fft / 2);
|
|
21897
|
+
}
|
|
21898
|
+
/** Number of raw audio samples per subsequent audio chunk. */
|
|
21899
|
+
get num_samples_per_audio_chunk() {
|
|
21900
|
+
const { hop_length, n_fft } = this.feature_extractor.config;
|
|
21901
|
+
return AUDIO_LENGTH_PER_TOK * hop_length + n_fft;
|
|
21902
|
+
}
|
|
21903
|
+
/** Number of right-pad tokens for non-streaming mode. */
|
|
21904
|
+
get num_right_pad_tokens() {
|
|
21905
|
+
return NUM_DELAY_TOKENS + 1 + OFFLINE_STREAMING_BUFFER_TOKENS;
|
|
21906
|
+
}
|
|
21907
|
+
/** Number of mel frames per text token. */
|
|
21908
|
+
get audio_length_per_tok() {
|
|
21909
|
+
return AUDIO_LENGTH_PER_TOK;
|
|
21910
|
+
}
|
|
21911
|
+
/** Number of raw audio samples per token. */
|
|
21912
|
+
get raw_audio_length_per_tok() {
|
|
21913
|
+
return AUDIO_LENGTH_PER_TOK * this.feature_extractor.config.hop_length;
|
|
21914
|
+
}
|
|
21915
|
+
/**
|
|
21916
|
+
* Process audio input for VoxtralRealtime.
|
|
21917
|
+
*
|
|
21918
|
+
* In streaming mode with `is_first_audio_chunk=true`, the audio is left-padded
|
|
21919
|
+
* with silence and mel features are extracted with `center=true`.
|
|
21920
|
+
* Returns `{ input_ids, input_features }`.
|
|
21921
|
+
*
|
|
21922
|
+
* In streaming mode with `is_first_audio_chunk=false`, the audio chunk is
|
|
21923
|
+
* processed with `center=false` and only `{ input_features }` is returned.
|
|
21924
|
+
*
|
|
21925
|
+
* In non-streaming mode, the audio is right-padded to ensure the model
|
|
21926
|
+
* transcribes the full audio, then processed with `center=true`.
|
|
21927
|
+
* Returns `{ input_features }`.
|
|
21928
|
+
*
|
|
21929
|
+
* @param {Float32Array|Float64Array} audio The audio waveform.
|
|
21930
|
+
* @param {Object} [options]
|
|
21931
|
+
* @param {boolean} [options.is_streaming=false] Whether processing in streaming mode.
|
|
21932
|
+
* @param {boolean} [options.is_first_audio_chunk=true] Whether this is the first audio chunk.
|
|
21933
|
+
* @returns {Promise<Object>}
|
|
21934
|
+
*/
|
|
21935
|
+
async _call(audio, { is_streaming = false, is_first_audio_chunk = true } = {}) {
|
|
21936
|
+
validate_audio_inputs(audio, "VoxtralRealtimeProcessor");
|
|
21937
|
+
if (!is_streaming && !is_first_audio_chunk) {
|
|
21938
|
+
throw new Error("In non-streaming mode (`is_streaming=false`), `is_first_audio_chunk` must be `true`.");
|
|
21939
|
+
}
|
|
21940
|
+
if (is_first_audio_chunk) {
|
|
21941
|
+
if (is_streaming) {
|
|
21942
|
+
const num_left_pad_samples = NUM_LEFT_PAD_TOKENS * this.raw_audio_length_per_tok;
|
|
21943
|
+
const padded_audio = new Float32Array(num_left_pad_samples + audio.length);
|
|
21944
|
+
padded_audio.set(audio, num_left_pad_samples);
|
|
21945
|
+
const audio_encoding = await this.feature_extractor(padded_audio, { center: true });
|
|
21946
|
+
const num_pad_tokens = NUM_LEFT_PAD_TOKENS + NUM_DELAY_TOKENS;
|
|
21947
|
+
const num_input_tokens = 1 + num_pad_tokens;
|
|
21948
|
+
const input_ids_data = new BigInt64Array(num_input_tokens).fill(BigInt(STREAMING_PAD_TOKEN_ID));
|
|
21949
|
+
input_ids_data[0] = 1n;
|
|
21950
|
+
const input_ids = new Tensor3("int64", input_ids_data, [1, num_input_tokens]);
|
|
21951
|
+
return {
|
|
21952
|
+
input_ids,
|
|
21953
|
+
...audio_encoding
|
|
21954
|
+
};
|
|
21955
|
+
} else {
|
|
21956
|
+
const right_pad_samples = this.num_right_pad_tokens * this.raw_audio_length_per_tok;
|
|
21957
|
+
const padded_audio = new Float32Array(audio.length + right_pad_samples);
|
|
21958
|
+
padded_audio.set(audio);
|
|
21959
|
+
return await this.feature_extractor(padded_audio, { center: true });
|
|
21960
|
+
}
|
|
21961
|
+
} else {
|
|
21962
|
+
return await this.feature_extractor(audio, { center: false });
|
|
21963
|
+
}
|
|
21964
|
+
}
|
|
21965
|
+
};
|
|
21966
|
+
|
|
21052
21967
|
// src/models/wav2vec2/processing_wav2vec2.js
|
|
21053
21968
|
var Wav2Vec2Processor = class extends Processor {
|
|
21054
21969
|
static tokenizer_class = AutoTokenizer;
|
|
@@ -21148,14 +22063,18 @@ function getNormalizedConfig(config) {
|
|
|
21148
22063
|
case "florence2":
|
|
21149
22064
|
case "llava_onevision":
|
|
21150
22065
|
case "idefics3":
|
|
22066
|
+
case "granite_speech":
|
|
21151
22067
|
case "ultravox":
|
|
21152
22068
|
case "voxtral":
|
|
22069
|
+
case "voxtral_realtime":
|
|
21153
22070
|
case "smolvlm":
|
|
21154
22071
|
case "gemma3n":
|
|
22072
|
+
case "lfm2_vl":
|
|
21155
22073
|
case "chatterbox":
|
|
21156
22074
|
case "mistral3":
|
|
21157
22075
|
case "qwen2_5_vl":
|
|
21158
22076
|
case "qwen3_vl":
|
|
22077
|
+
case "qwen3_vl_moe":
|
|
21159
22078
|
init_normalized_config = getNormalizedConfig(config.text_config);
|
|
21160
22079
|
break;
|
|
21161
22080
|
case "moondream1":
|
|
@@ -21205,11 +22124,17 @@ function getNormalizedConfig(config) {
|
|
|
21205
22124
|
case "cohere":
|
|
21206
22125
|
case "cohere2":
|
|
21207
22126
|
case "mistral":
|
|
22127
|
+
case "voxtral_realtime_text":
|
|
22128
|
+
case "voxtral_realtime_encoder":
|
|
21208
22129
|
case "starcoder2":
|
|
21209
22130
|
case "qwen2":
|
|
22131
|
+
case "qwen2_moe":
|
|
21210
22132
|
case "qwen2_vl":
|
|
22133
|
+
case "qwen2_vl_text":
|
|
21211
22134
|
case "qwen2_5_vl_text":
|
|
22135
|
+
case "qwen3_moe":
|
|
21212
22136
|
case "qwen3_vl_text":
|
|
22137
|
+
case "qwen3_vl_moe_text":
|
|
21213
22138
|
case "phi":
|
|
21214
22139
|
case "phi3":
|
|
21215
22140
|
case "phi3_v":
|
|
@@ -21350,6 +22275,9 @@ function getNormalizedConfig(config) {
|
|
|
21350
22275
|
return normalized_config;
|
|
21351
22276
|
}
|
|
21352
22277
|
function getCacheShapes(config, options) {
|
|
22278
|
+
if (!(config instanceof PretrainedConfig)) {
|
|
22279
|
+
config = new PretrainedConfig(config);
|
|
22280
|
+
}
|
|
21353
22281
|
if (["lfm2", "lfm2_moe"].includes(config.model_type)) {
|
|
21354
22282
|
const pkv_prefix = options?.prefix ?? "past_key_values";
|
|
21355
22283
|
const conv_prefix = pkv_prefix === "present" ? "present" : "past";
|
|
@@ -21408,7 +22336,7 @@ function getCacheShapes(config, options) {
|
|
|
21408
22336
|
}
|
|
21409
22337
|
}
|
|
21410
22338
|
return cache_values;
|
|
21411
|
-
} else if (["
|
|
22339
|
+
} else if (["qwen3_next", "qwen3_5_text", "qwen3_5_moe_text", "olmo_hybrid"].includes(config.model_type)) {
|
|
21412
22340
|
const pkv_prefix = options?.prefix ?? "past_key_values";
|
|
21413
22341
|
const conv_prefix = pkv_prefix === "present" ? "present" : "past";
|
|
21414
22342
|
const cache_values = {};
|
|
@@ -21425,11 +22353,10 @@ function getCacheShapes(config, options) {
|
|
|
21425
22353
|
linear_conv_kernel_dim
|
|
21426
22354
|
} = (
|
|
21427
22355
|
/** @type {any} */
|
|
21428
|
-
config
|
|
22356
|
+
config
|
|
21429
22357
|
);
|
|
21430
22358
|
const key_dim = linear_key_head_dim * linear_num_key_heads;
|
|
21431
22359
|
const value_dim = linear_value_head_dim * linear_num_value_heads;
|
|
21432
|
-
const conv_dim = key_dim * 2 + value_dim;
|
|
21433
22360
|
const final_head_dim = head_dim ?? hidden_size / num_attention_heads;
|
|
21434
22361
|
const batch_size = options?.batch_size ?? 1;
|
|
21435
22362
|
for (let i = 0; i < layer_types.length; ++i) {
|
|
@@ -21438,7 +22365,14 @@ function getCacheShapes(config, options) {
|
|
|
21438
22365
|
cache_values[`${pkv_prefix}.${i}.${kv}`] = [batch_size, num_key_value_heads, 0, final_head_dim];
|
|
21439
22366
|
}
|
|
21440
22367
|
} else if (layer_types[i] === "linear_attention") {
|
|
21441
|
-
|
|
22368
|
+
if (config.model_type === "olmo_hybrid") {
|
|
22369
|
+
cache_values[`${conv_prefix}_conv.${i}.key`] = [batch_size, key_dim, linear_conv_kernel_dim];
|
|
22370
|
+
cache_values[`${conv_prefix}_conv.${i}.value`] = [batch_size, value_dim, linear_conv_kernel_dim];
|
|
22371
|
+
cache_values[`${conv_prefix}_conv.${i}.query`] = [batch_size, key_dim, linear_conv_kernel_dim];
|
|
22372
|
+
} else {
|
|
22373
|
+
const conv_dim = key_dim * 2 + value_dim;
|
|
22374
|
+
cache_values[`${conv_prefix}_conv.${i}`] = [batch_size, conv_dim, linear_conv_kernel_dim];
|
|
22375
|
+
}
|
|
21442
22376
|
cache_values[`${conv_prefix}_recurrent.${i}`] = [
|
|
21443
22377
|
batch_size,
|
|
21444
22378
|
linear_num_value_heads,
|
|
@@ -21450,6 +22384,16 @@ function getCacheShapes(config, options) {
|
|
|
21450
22384
|
}
|
|
21451
22385
|
}
|
|
21452
22386
|
return cache_values;
|
|
22387
|
+
} else if (["lfm2_vl", "qwen3_5", "qwen3_5_moe", "voxtral_realtime"].includes(config.model_type)) {
|
|
22388
|
+
let subConfig;
|
|
22389
|
+
if (config.model_type === "voxtral_realtime" && options?.session_name === "audio_encoder") {
|
|
22390
|
+
subConfig = /** @type {any} */
|
|
22391
|
+
config.audio_config;
|
|
22392
|
+
} else {
|
|
22393
|
+
subConfig = /** @type {any} */
|
|
22394
|
+
config.text_config;
|
|
22395
|
+
}
|
|
22396
|
+
return getCacheShapes(subConfig, options);
|
|
21453
22397
|
}
|
|
21454
22398
|
return getKeyValueShapes(config, options);
|
|
21455
22399
|
}
|
|
@@ -21615,7 +22559,7 @@ async function getModelDataFiles(pretrained_model_name_or_path, fileName, suffix
|
|
|
21615
22559
|
}
|
|
21616
22560
|
|
|
21617
22561
|
// src/models/session.js
|
|
21618
|
-
async function getSession(pretrained_model_name_or_path, fileName, options,
|
|
22562
|
+
async function getSession(pretrained_model_name_or_path, fileName, options, cache_config = false, session_name = void 0) {
|
|
21619
22563
|
let custom_config = options.config?.["transformers.js_config"] ?? {};
|
|
21620
22564
|
const selectedDevice = (
|
|
21621
22565
|
/** @type {import("../utils/devices.js").DeviceType} */
|
|
@@ -21673,9 +22617,10 @@ async function getSession(pretrained_model_name_or_path, fileName, options, is_d
|
|
|
21673
22617
|
if (externalData.length > 0 && !apis.IS_NODE_ENV) {
|
|
21674
22618
|
session_options.externalData = externalData;
|
|
21675
22619
|
}
|
|
21676
|
-
if (
|
|
22620
|
+
if (cache_config && selectedDevice === "webgpu" && kv_cache_dtype_config !== false) {
|
|
21677
22621
|
const shapes = getCacheShapes(options.config, {
|
|
21678
|
-
prefix: "present"
|
|
22622
|
+
prefix: "present",
|
|
22623
|
+
session_name
|
|
21679
22624
|
});
|
|
21680
22625
|
if (Object.keys(shapes).length > 0 && !isONNXProxy()) {
|
|
21681
22626
|
const preferredOutputLocation = {};
|
|
@@ -21693,15 +22638,17 @@ async function getSession(pretrained_model_name_or_path, fileName, options, is_d
|
|
|
21693
22638
|
};
|
|
21694
22639
|
return { buffer_or_path, session_options, session_config };
|
|
21695
22640
|
}
|
|
21696
|
-
async function constructSessions(pretrained_model_name_or_path, names, options,
|
|
22641
|
+
async function constructSessions(pretrained_model_name_or_path, names, options, cache_sessions = void 0) {
|
|
21697
22642
|
return Object.fromEntries(
|
|
21698
22643
|
await Promise.all(
|
|
21699
22644
|
Object.keys(names).map(async (name) => {
|
|
22645
|
+
const cache_config = cache_sessions?.[name] ?? false;
|
|
21700
22646
|
const { buffer_or_path, session_options, session_config } = await getSession(
|
|
21701
22647
|
pretrained_model_name_or_path,
|
|
21702
22648
|
names[name],
|
|
21703
22649
|
options,
|
|
21704
|
-
|
|
22650
|
+
cache_config,
|
|
22651
|
+
name
|
|
21705
22652
|
);
|
|
21706
22653
|
const session = await createInferenceSession(buffer_or_path, session_options, session_config);
|
|
21707
22654
|
return [name, session];
|
|
@@ -23001,6 +23948,66 @@ var BeamSearchSampler = class extends LogitsSampler {
|
|
|
23001
23948
|
}
|
|
23002
23949
|
};
|
|
23003
23950
|
|
|
23951
|
+
// src/cache_utils.js
|
|
23952
|
+
var _DynamicCache = class {
|
|
23953
|
+
/**
|
|
23954
|
+
* Create a DynamicCache, optionally pre-populated with entries.
|
|
23955
|
+
* @param {Record<string, Tensor>} [entries] Initial name→Tensor mappings.
|
|
23956
|
+
*/
|
|
23957
|
+
constructor(entries) {
|
|
23958
|
+
if (!entries) return;
|
|
23959
|
+
for (const key in entries) {
|
|
23960
|
+
if (key in this) {
|
|
23961
|
+
throw new TypeError(`Key "${key}" conflicts with an existing property on DynamicCache`);
|
|
23962
|
+
}
|
|
23963
|
+
const value = entries[key];
|
|
23964
|
+
if (!(value instanceof Tensor3)) {
|
|
23965
|
+
throw new TypeError(`Expected a Tensor for key "${key}", got ${typeof value}`);
|
|
23966
|
+
}
|
|
23967
|
+
this[key] = value;
|
|
23968
|
+
}
|
|
23969
|
+
}
|
|
23970
|
+
/**
|
|
23971
|
+
* Get the cached sequence length. This requires at least one attention cache entry to be present.
|
|
23972
|
+
* @returns {number} The past sequence length.
|
|
23973
|
+
*/
|
|
23974
|
+
get_seq_length() {
|
|
23975
|
+
const self2 = (
|
|
23976
|
+
/** @type {any} */
|
|
23977
|
+
this
|
|
23978
|
+
);
|
|
23979
|
+
for (const name in self2) {
|
|
23980
|
+
if (name.startsWith("past_key_values.")) {
|
|
23981
|
+
return self2[name].dims.at(-2);
|
|
23982
|
+
}
|
|
23983
|
+
}
|
|
23984
|
+
throw new Error("Unable to determine sequence length from the cache.");
|
|
23985
|
+
}
|
|
23986
|
+
/**
|
|
23987
|
+
* Dispose all contained tensors whose data resides on the GPU.
|
|
23988
|
+
* Returns a promise that resolves when all disposals are complete.
|
|
23989
|
+
* @returns {Promise<void>} Promise that resolves when all GPU tensors are disposed.
|
|
23990
|
+
*/
|
|
23991
|
+
async dispose() {
|
|
23992
|
+
const promises = [];
|
|
23993
|
+
for (
|
|
23994
|
+
const t of
|
|
23995
|
+
/** @type {Tensor[]} */
|
|
23996
|
+
Object.values(this)
|
|
23997
|
+
) {
|
|
23998
|
+
if (t.location === "gpu-buffer") {
|
|
23999
|
+
promises.push(t.dispose());
|
|
24000
|
+
}
|
|
24001
|
+
}
|
|
24002
|
+
await Promise.all(promises);
|
|
24003
|
+
}
|
|
24004
|
+
};
|
|
24005
|
+
var DynamicCache = (
|
|
24006
|
+
/** @type {new (entries?: Record<string, Tensor>) => DynamicCache} */
|
|
24007
|
+
/** @type {unknown} */
|
|
24008
|
+
_DynamicCache
|
|
24009
|
+
);
|
|
24010
|
+
|
|
23004
24011
|
// src/models/modeling_utils.js
|
|
23005
24012
|
var MODEL_MAPPING_NAMES = null;
|
|
23006
24013
|
function registerTaskMappings(mappings) {
|
|
@@ -23046,71 +24053,181 @@ var MODEL_TYPES = {
|
|
|
23046
24053
|
AutoEncoder: 12,
|
|
23047
24054
|
ImageAudioTextToText: 13,
|
|
23048
24055
|
Supertonic: 14,
|
|
23049
|
-
Chatterbox: 15
|
|
24056
|
+
Chatterbox: 15,
|
|
24057
|
+
MultimodalLanguageModelOnly: 16,
|
|
24058
|
+
VoxtralRealtime: 17
|
|
23050
24059
|
};
|
|
23051
24060
|
var MODEL_TYPE_CONFIG = {
|
|
23052
24061
|
[MODEL_TYPES.DecoderOnly]: {
|
|
23053
24062
|
can_generate: true,
|
|
23054
24063
|
forward: decoder_forward,
|
|
23055
|
-
prepare_inputs: decoder_prepare_inputs_for_generation
|
|
24064
|
+
prepare_inputs: decoder_prepare_inputs_for_generation,
|
|
24065
|
+
sessions: (config, options) => ({ model: options.model_file_name ?? "model" }),
|
|
24066
|
+
cache_sessions: { model: true },
|
|
24067
|
+
optional_configs: { generation_config: "generation_config.json" }
|
|
23056
24068
|
},
|
|
23057
24069
|
[MODEL_TYPES.DecoderOnlyWithoutHead]: {
|
|
23058
24070
|
can_generate: false,
|
|
23059
24071
|
forward: decoder_forward,
|
|
23060
|
-
prepare_inputs: decoder_prepare_inputs_for_generation
|
|
24072
|
+
prepare_inputs: decoder_prepare_inputs_for_generation,
|
|
24073
|
+
sessions: (config, options) => ({ model: options.model_file_name ?? "model" })
|
|
23061
24074
|
},
|
|
23062
24075
|
[MODEL_TYPES.Seq2Seq]: {
|
|
23063
24076
|
can_generate: true,
|
|
23064
24077
|
forward: seq2seq_forward,
|
|
23065
|
-
prepare_inputs: encoder_decoder_prepare_inputs_for_generation
|
|
24078
|
+
prepare_inputs: encoder_decoder_prepare_inputs_for_generation,
|
|
24079
|
+
sessions: () => ({ model: "encoder_model", decoder_model_merged: "decoder_model_merged" }),
|
|
24080
|
+
cache_sessions: { decoder_model_merged: true },
|
|
24081
|
+
optional_configs: { generation_config: "generation_config.json" }
|
|
23066
24082
|
},
|
|
23067
24083
|
[MODEL_TYPES.Vision2Seq]: {
|
|
23068
24084
|
can_generate: true,
|
|
23069
24085
|
forward: seq2seq_forward,
|
|
23070
|
-
prepare_inputs: encoder_decoder_prepare_inputs_for_generation
|
|
24086
|
+
prepare_inputs: encoder_decoder_prepare_inputs_for_generation,
|
|
24087
|
+
sessions: () => ({ model: "encoder_model", decoder_model_merged: "decoder_model_merged" }),
|
|
24088
|
+
cache_sessions: { decoder_model_merged: true },
|
|
24089
|
+
optional_configs: { generation_config: "generation_config.json" }
|
|
23071
24090
|
},
|
|
23072
24091
|
[MODEL_TYPES.Musicgen]: {
|
|
23073
24092
|
can_generate: true,
|
|
23074
|
-
forward: seq2seq_forward
|
|
24093
|
+
forward: seq2seq_forward,
|
|
24094
|
+
sessions: () => ({
|
|
24095
|
+
model: "text_encoder",
|
|
24096
|
+
decoder_model_merged: "decoder_model_merged",
|
|
24097
|
+
encodec_decode: "encodec_decode"
|
|
24098
|
+
}),
|
|
24099
|
+
cache_sessions: { decoder_model_merged: true },
|
|
24100
|
+
optional_configs: { generation_config: "generation_config.json" }
|
|
23075
24101
|
},
|
|
23076
24102
|
[MODEL_TYPES.EncoderDecoder]: {
|
|
23077
24103
|
can_generate: false,
|
|
23078
|
-
forward: seq2seq_forward
|
|
24104
|
+
forward: seq2seq_forward,
|
|
24105
|
+
sessions: () => ({ model: "encoder_model", decoder_model_merged: "decoder_model_merged" }),
|
|
24106
|
+
cache_sessions: { decoder_model_merged: true }
|
|
24107
|
+
},
|
|
24108
|
+
[MODEL_TYPES.MaskGeneration]: {
|
|
24109
|
+
sessions: () => ({ model: "vision_encoder", prompt_encoder_mask_decoder: "prompt_encoder_mask_decoder" })
|
|
23079
24110
|
},
|
|
23080
24111
|
[MODEL_TYPES.ImageTextToText]: {
|
|
23081
24112
|
can_generate: true,
|
|
23082
24113
|
forward: image_text_to_text_forward,
|
|
23083
|
-
prepare_inputs: multimodal_text_to_text_prepare_inputs_for_generation
|
|
24114
|
+
prepare_inputs: multimodal_text_to_text_prepare_inputs_for_generation,
|
|
24115
|
+
sessions: (config) => {
|
|
24116
|
+
const s = {
|
|
24117
|
+
embed_tokens: "embed_tokens",
|
|
24118
|
+
vision_encoder: "vision_encoder",
|
|
24119
|
+
decoder_model_merged: "decoder_model_merged"
|
|
24120
|
+
};
|
|
24121
|
+
if (config.is_encoder_decoder) s["model"] = "encoder_model";
|
|
24122
|
+
return s;
|
|
24123
|
+
},
|
|
24124
|
+
cache_sessions: { decoder_model_merged: true },
|
|
24125
|
+
optional_configs: { generation_config: "generation_config.json" }
|
|
23084
24126
|
},
|
|
23085
24127
|
[MODEL_TYPES.AudioTextToText]: {
|
|
23086
24128
|
can_generate: true,
|
|
23087
24129
|
forward: audio_text_to_text_forward,
|
|
23088
|
-
prepare_inputs: multimodal_text_to_text_prepare_inputs_for_generation
|
|
24130
|
+
prepare_inputs: multimodal_text_to_text_prepare_inputs_for_generation,
|
|
24131
|
+
sessions: () => ({
|
|
24132
|
+
embed_tokens: "embed_tokens",
|
|
24133
|
+
audio_encoder: "audio_encoder",
|
|
24134
|
+
decoder_model_merged: "decoder_model_merged"
|
|
24135
|
+
}),
|
|
24136
|
+
cache_sessions: { decoder_model_merged: true },
|
|
24137
|
+
optional_configs: { generation_config: "generation_config.json" }
|
|
23089
24138
|
},
|
|
23090
|
-
[MODEL_TYPES.
|
|
24139
|
+
[MODEL_TYPES.ImageAudioTextToText]: {
|
|
23091
24140
|
can_generate: true,
|
|
23092
|
-
prepare_inputs: multimodal_text_to_text_prepare_inputs_for_generation
|
|
24141
|
+
prepare_inputs: multimodal_text_to_text_prepare_inputs_for_generation,
|
|
24142
|
+
sessions: () => ({
|
|
24143
|
+
embed_tokens: "embed_tokens",
|
|
24144
|
+
audio_encoder: "audio_encoder",
|
|
24145
|
+
vision_encoder: "vision_encoder",
|
|
24146
|
+
decoder_model_merged: "decoder_model_merged"
|
|
24147
|
+
}),
|
|
24148
|
+
optional_configs: { generation_config: "generation_config.json" }
|
|
23093
24149
|
},
|
|
23094
|
-
[MODEL_TYPES.
|
|
24150
|
+
[MODEL_TYPES.Phi3V]: {
|
|
23095
24151
|
can_generate: true,
|
|
23096
|
-
prepare_inputs: multimodal_text_to_text_prepare_inputs_for_generation
|
|
24152
|
+
prepare_inputs: multimodal_text_to_text_prepare_inputs_for_generation,
|
|
24153
|
+
sessions: () => ({
|
|
24154
|
+
prepare_inputs_embeds: "prepare_inputs_embeds",
|
|
24155
|
+
model: "model",
|
|
24156
|
+
vision_encoder: "vision_encoder"
|
|
24157
|
+
}),
|
|
24158
|
+
cache_sessions: { model: true },
|
|
24159
|
+
optional_configs: { generation_config: "generation_config.json" }
|
|
23097
24160
|
},
|
|
23098
24161
|
[MODEL_TYPES.MultiModality]: {
|
|
23099
|
-
can_generate: true
|
|
24162
|
+
can_generate: true,
|
|
24163
|
+
sessions: () => ({
|
|
24164
|
+
prepare_inputs_embeds: "prepare_inputs_embeds",
|
|
24165
|
+
model: "language_model",
|
|
24166
|
+
lm_head: "lm_head",
|
|
24167
|
+
gen_head: "gen_head",
|
|
24168
|
+
gen_img_embeds: "gen_img_embeds",
|
|
24169
|
+
image_decode: "image_decode"
|
|
24170
|
+
}),
|
|
24171
|
+
cache_sessions: { model: true },
|
|
24172
|
+
optional_configs: { generation_config: "generation_config.json" }
|
|
23100
24173
|
},
|
|
23101
24174
|
[MODEL_TYPES.AutoEncoder]: {
|
|
23102
24175
|
can_generate: false,
|
|
23103
|
-
forward: auto_encoder_forward
|
|
24176
|
+
forward: auto_encoder_forward,
|
|
24177
|
+
sessions: () => ({ encoder_model: "encoder_model", decoder_model: "decoder_model" })
|
|
24178
|
+
},
|
|
24179
|
+
[MODEL_TYPES.Supertonic]: {
|
|
24180
|
+
sessions: () => ({
|
|
24181
|
+
text_encoder: "text_encoder",
|
|
24182
|
+
latent_denoiser: "latent_denoiser",
|
|
24183
|
+
voice_decoder: "voice_decoder"
|
|
24184
|
+
})
|
|
23104
24185
|
},
|
|
23105
24186
|
[MODEL_TYPES.Chatterbox]: {
|
|
23106
24187
|
can_generate: true,
|
|
23107
|
-
forward: encoder_forward
|
|
24188
|
+
forward: encoder_forward,
|
|
24189
|
+
sessions: () => ({
|
|
24190
|
+
embed_tokens: "embed_tokens",
|
|
24191
|
+
speech_encoder: "speech_encoder",
|
|
24192
|
+
model: "language_model",
|
|
24193
|
+
conditional_decoder: "conditional_decoder"
|
|
24194
|
+
}),
|
|
24195
|
+
cache_sessions: { model: true },
|
|
24196
|
+
optional_configs: { generation_config: "generation_config.json" }
|
|
24197
|
+
},
|
|
24198
|
+
[MODEL_TYPES.MultimodalLanguageModelOnly]: {
|
|
24199
|
+
can_generate: true,
|
|
24200
|
+
forward: image_text_to_text_forward,
|
|
24201
|
+
prepare_inputs: multimodal_text_to_text_prepare_inputs_for_generation,
|
|
24202
|
+
sessions: () => ({ embed_tokens: "embed_tokens", decoder_model_merged: "decoder_model_merged" }),
|
|
24203
|
+
cache_sessions: { decoder_model_merged: true },
|
|
24204
|
+
optional_configs: { generation_config: "generation_config.json" }
|
|
24205
|
+
},
|
|
24206
|
+
[MODEL_TYPES.VoxtralRealtime]: {
|
|
24207
|
+
can_generate: true,
|
|
24208
|
+
prepare_inputs: decoder_prepare_inputs_for_generation,
|
|
24209
|
+
sessions: () => ({
|
|
24210
|
+
embed_tokens: "embed_tokens",
|
|
24211
|
+
audio_encoder: "audio_encoder",
|
|
24212
|
+
decoder_model_merged: "decoder_model_merged"
|
|
24213
|
+
}),
|
|
24214
|
+
cache_sessions: { decoder_model_merged: true, audio_encoder: true },
|
|
24215
|
+
optional_configs: { generation_config: "generation_config.json" }
|
|
23108
24216
|
},
|
|
23109
24217
|
default: {
|
|
23110
24218
|
can_generate: false,
|
|
23111
|
-
forward: encoder_forward
|
|
24219
|
+
forward: encoder_forward,
|
|
24220
|
+
sessions: (config, options) => ({ model: options.model_file_name ?? "model" })
|
|
23112
24221
|
}
|
|
23113
24222
|
};
|
|
24223
|
+
function getSessionsConfig(modelType, config, options = {}) {
|
|
24224
|
+
const typeConfig = MODEL_TYPE_CONFIG[modelType] ?? MODEL_TYPE_CONFIG.default;
|
|
24225
|
+
return {
|
|
24226
|
+
sessions: typeConfig.sessions(config, options),
|
|
24227
|
+
cache_sessions: typeConfig.cache_sessions,
|
|
24228
|
+
optional_configs: typeConfig.optional_configs
|
|
24229
|
+
};
|
|
24230
|
+
}
|
|
23114
24231
|
var MODEL_TYPE_MAPPING = /* @__PURE__ */ new Map();
|
|
23115
24232
|
var MODEL_NAME_TO_CLASS_MAPPING = /* @__PURE__ */ new Map();
|
|
23116
24233
|
var MODEL_CLASS_TO_NAME_MAPPING = /* @__PURE__ */ new Map();
|
|
@@ -23196,245 +24313,23 @@ var PreTrainedModel = class extends Callable2 {
|
|
|
23196
24313
|
const modelName = MODEL_CLASS_TO_NAME_MAPPING.get(this);
|
|
23197
24314
|
const modelType = MODEL_TYPE_MAPPING.get(modelName);
|
|
23198
24315
|
config = options.config = await AutoConfig.from_pretrained(pretrained_model_name_or_path, options);
|
|
23199
|
-
|
|
23200
|
-
if (modelType ===
|
|
23201
|
-
|
|
23202
|
-
|
|
23203
|
-
|
|
23204
|
-
{
|
|
23205
|
-
|
|
23206
|
-
},
|
|
23207
|
-
options,
|
|
23208
|
-
"model"
|
|
23209
|
-
),
|
|
23210
|
-
get_optional_configs(
|
|
23211
|
-
pretrained_model_name_or_path,
|
|
23212
|
-
{
|
|
23213
|
-
generation_config: "generation_config.json"
|
|
23214
|
-
},
|
|
23215
|
-
options
|
|
23216
|
-
)
|
|
23217
|
-
]);
|
|
23218
|
-
} else if (modelType === MODEL_TYPES.Seq2Seq || modelType === MODEL_TYPES.Vision2Seq) {
|
|
23219
|
-
info = await Promise.all([
|
|
23220
|
-
constructSessions(
|
|
23221
|
-
pretrained_model_name_or_path,
|
|
23222
|
-
{
|
|
23223
|
-
model: "encoder_model",
|
|
23224
|
-
decoder_model_merged: "decoder_model_merged"
|
|
23225
|
-
},
|
|
23226
|
-
options,
|
|
23227
|
-
"decoder_model_merged"
|
|
23228
|
-
),
|
|
23229
|
-
get_optional_configs(
|
|
23230
|
-
pretrained_model_name_or_path,
|
|
23231
|
-
{
|
|
23232
|
-
generation_config: "generation_config.json"
|
|
23233
|
-
},
|
|
23234
|
-
options
|
|
23235
|
-
)
|
|
23236
|
-
]);
|
|
23237
|
-
} else if (modelType === MODEL_TYPES.MaskGeneration) {
|
|
23238
|
-
info = await Promise.all([
|
|
23239
|
-
constructSessions(
|
|
23240
|
-
pretrained_model_name_or_path,
|
|
23241
|
-
{
|
|
23242
|
-
model: "vision_encoder",
|
|
23243
|
-
prompt_encoder_mask_decoder: "prompt_encoder_mask_decoder"
|
|
23244
|
-
},
|
|
23245
|
-
options
|
|
23246
|
-
)
|
|
23247
|
-
]);
|
|
23248
|
-
} else if (modelType === MODEL_TYPES.EncoderDecoder) {
|
|
23249
|
-
info = await Promise.all([
|
|
23250
|
-
constructSessions(
|
|
23251
|
-
pretrained_model_name_or_path,
|
|
23252
|
-
{
|
|
23253
|
-
model: "encoder_model",
|
|
23254
|
-
decoder_model_merged: "decoder_model_merged"
|
|
23255
|
-
},
|
|
23256
|
-
options,
|
|
23257
|
-
"decoder_model_merged"
|
|
23258
|
-
)
|
|
23259
|
-
]);
|
|
23260
|
-
} else if (modelType === MODEL_TYPES.ImageTextToText) {
|
|
23261
|
-
const sessions = {
|
|
23262
|
-
embed_tokens: "embed_tokens",
|
|
23263
|
-
vision_encoder: "vision_encoder",
|
|
23264
|
-
decoder_model_merged: "decoder_model_merged"
|
|
23265
|
-
};
|
|
23266
|
-
if (config.is_encoder_decoder) {
|
|
23267
|
-
sessions["model"] = "encoder_model";
|
|
23268
|
-
}
|
|
23269
|
-
info = await Promise.all([
|
|
23270
|
-
constructSessions(pretrained_model_name_or_path, sessions, options, "decoder_model_merged"),
|
|
23271
|
-
get_optional_configs(
|
|
23272
|
-
pretrained_model_name_or_path,
|
|
23273
|
-
{
|
|
23274
|
-
generation_config: "generation_config.json"
|
|
23275
|
-
},
|
|
23276
|
-
options
|
|
23277
|
-
)
|
|
23278
|
-
]);
|
|
23279
|
-
} else if (modelType === MODEL_TYPES.AudioTextToText) {
|
|
23280
|
-
const sessions = {
|
|
23281
|
-
embed_tokens: "embed_tokens",
|
|
23282
|
-
audio_encoder: "audio_encoder",
|
|
23283
|
-
decoder_model_merged: "decoder_model_merged"
|
|
23284
|
-
};
|
|
23285
|
-
info = await Promise.all([
|
|
23286
|
-
constructSessions(pretrained_model_name_or_path, sessions, options, "decoder_model_merged"),
|
|
23287
|
-
get_optional_configs(
|
|
23288
|
-
pretrained_model_name_or_path,
|
|
23289
|
-
{
|
|
23290
|
-
generation_config: "generation_config.json"
|
|
23291
|
-
},
|
|
23292
|
-
options
|
|
23293
|
-
)
|
|
23294
|
-
]);
|
|
23295
|
-
} else if (modelType === MODEL_TYPES.ImageAudioTextToText) {
|
|
23296
|
-
const sessions = {
|
|
23297
|
-
embed_tokens: "embed_tokens",
|
|
23298
|
-
audio_encoder: "audio_encoder",
|
|
23299
|
-
vision_encoder: "vision_encoder",
|
|
23300
|
-
decoder_model_merged: "decoder_model_merged"
|
|
23301
|
-
};
|
|
23302
|
-
info = await Promise.all([
|
|
23303
|
-
constructSessions(pretrained_model_name_or_path, sessions, options),
|
|
23304
|
-
get_optional_configs(
|
|
23305
|
-
pretrained_model_name_or_path,
|
|
23306
|
-
{
|
|
23307
|
-
generation_config: "generation_config.json"
|
|
23308
|
-
},
|
|
23309
|
-
options
|
|
23310
|
-
)
|
|
23311
|
-
]);
|
|
23312
|
-
} else if (modelType === MODEL_TYPES.Musicgen) {
|
|
23313
|
-
info = await Promise.all([
|
|
23314
|
-
constructSessions(
|
|
23315
|
-
pretrained_model_name_or_path,
|
|
23316
|
-
{
|
|
23317
|
-
model: "text_encoder",
|
|
23318
|
-
decoder_model_merged: "decoder_model_merged",
|
|
23319
|
-
encodec_decode: "encodec_decode"
|
|
23320
|
-
},
|
|
23321
|
-
options,
|
|
23322
|
-
"decoder_model_merged"
|
|
23323
|
-
),
|
|
23324
|
-
get_optional_configs(
|
|
23325
|
-
pretrained_model_name_or_path,
|
|
23326
|
-
{
|
|
23327
|
-
generation_config: "generation_config.json"
|
|
23328
|
-
},
|
|
23329
|
-
options
|
|
23330
|
-
)
|
|
23331
|
-
]);
|
|
23332
|
-
} else if (modelType === MODEL_TYPES.MultiModality) {
|
|
23333
|
-
info = await Promise.all([
|
|
23334
|
-
constructSessions(
|
|
23335
|
-
pretrained_model_name_or_path,
|
|
23336
|
-
{
|
|
23337
|
-
prepare_inputs_embeds: "prepare_inputs_embeds",
|
|
23338
|
-
model: "language_model",
|
|
23339
|
-
lm_head: "lm_head",
|
|
23340
|
-
gen_head: "gen_head",
|
|
23341
|
-
gen_img_embeds: "gen_img_embeds",
|
|
23342
|
-
image_decode: "image_decode"
|
|
23343
|
-
},
|
|
23344
|
-
options,
|
|
23345
|
-
"model"
|
|
23346
|
-
),
|
|
23347
|
-
get_optional_configs(
|
|
23348
|
-
pretrained_model_name_or_path,
|
|
23349
|
-
{
|
|
23350
|
-
generation_config: "generation_config.json"
|
|
23351
|
-
},
|
|
23352
|
-
options
|
|
23353
|
-
)
|
|
23354
|
-
]);
|
|
23355
|
-
} else if (modelType === MODEL_TYPES.Phi3V) {
|
|
23356
|
-
info = await Promise.all([
|
|
23357
|
-
constructSessions(
|
|
23358
|
-
pretrained_model_name_or_path,
|
|
23359
|
-
{
|
|
23360
|
-
prepare_inputs_embeds: "prepare_inputs_embeds",
|
|
23361
|
-
model: "model",
|
|
23362
|
-
vision_encoder: "vision_encoder"
|
|
23363
|
-
},
|
|
23364
|
-
options,
|
|
23365
|
-
"model"
|
|
23366
|
-
),
|
|
23367
|
-
get_optional_configs(
|
|
23368
|
-
pretrained_model_name_or_path,
|
|
23369
|
-
{
|
|
23370
|
-
generation_config: "generation_config.json"
|
|
23371
|
-
},
|
|
23372
|
-
options
|
|
23373
|
-
)
|
|
23374
|
-
]);
|
|
23375
|
-
} else if (modelType === MODEL_TYPES.Chatterbox) {
|
|
23376
|
-
info = await Promise.all([
|
|
23377
|
-
constructSessions(
|
|
23378
|
-
pretrained_model_name_or_path,
|
|
23379
|
-
{
|
|
23380
|
-
embed_tokens: "embed_tokens",
|
|
23381
|
-
speech_encoder: "speech_encoder",
|
|
23382
|
-
model: "language_model",
|
|
23383
|
-
conditional_decoder: "conditional_decoder"
|
|
23384
|
-
},
|
|
23385
|
-
options,
|
|
23386
|
-
"model"
|
|
23387
|
-
),
|
|
23388
|
-
get_optional_configs(
|
|
23389
|
-
pretrained_model_name_or_path,
|
|
23390
|
-
{
|
|
23391
|
-
generation_config: "generation_config.json"
|
|
23392
|
-
},
|
|
23393
|
-
options
|
|
23394
|
-
)
|
|
23395
|
-
]);
|
|
23396
|
-
} else if (modelType === MODEL_TYPES.AutoEncoder) {
|
|
23397
|
-
info = await Promise.all([
|
|
23398
|
-
constructSessions(
|
|
23399
|
-
pretrained_model_name_or_path,
|
|
23400
|
-
{
|
|
23401
|
-
encoder_model: "encoder_model",
|
|
23402
|
-
decoder_model: "decoder_model"
|
|
23403
|
-
},
|
|
23404
|
-
options
|
|
23405
|
-
)
|
|
23406
|
-
]);
|
|
23407
|
-
} else if (modelType === MODEL_TYPES.Supertonic) {
|
|
23408
|
-
info = await Promise.all([
|
|
23409
|
-
constructSessions(
|
|
23410
|
-
pretrained_model_name_or_path,
|
|
23411
|
-
{
|
|
23412
|
-
text_encoder: "text_encoder",
|
|
23413
|
-
latent_denoiser: "latent_denoiser",
|
|
23414
|
-
voice_decoder: "voice_decoder"
|
|
23415
|
-
},
|
|
23416
|
-
options
|
|
23417
|
-
)
|
|
23418
|
-
]);
|
|
23419
|
-
} else {
|
|
23420
|
-
if (modelType === void 0) {
|
|
23421
|
-
const type = modelName ?? config?.model_type;
|
|
23422
|
-
if (type !== "custom") {
|
|
23423
|
-
logger.warn(
|
|
23424
|
-
`Model type for '${type}' not found, assuming encoder-only architecture. Please report this at ${GITHUB_ISSUE_URL}.`
|
|
23425
|
-
);
|
|
23426
|
-
}
|
|
24316
|
+
const typeConfig = MODEL_TYPE_CONFIG[modelType] ?? MODEL_TYPE_CONFIG.default;
|
|
24317
|
+
if (modelType === void 0) {
|
|
24318
|
+
const type = modelName ?? config?.model_type;
|
|
24319
|
+
if (type !== "custom") {
|
|
24320
|
+
logger.warn(
|
|
24321
|
+
`Model type for '${type}' not found, assuming encoder-only architecture. Please report this at ${GITHUB_ISSUE_URL}.`
|
|
24322
|
+
);
|
|
23427
24323
|
}
|
|
23428
|
-
info = await Promise.all([
|
|
23429
|
-
constructSessions(
|
|
23430
|
-
pretrained_model_name_or_path,
|
|
23431
|
-
{
|
|
23432
|
-
model: options.model_file_name ?? "model"
|
|
23433
|
-
},
|
|
23434
|
-
options
|
|
23435
|
-
)
|
|
23436
|
-
]);
|
|
23437
24324
|
}
|
|
24325
|
+
const sessions = typeConfig.sessions(config, options);
|
|
24326
|
+
const promises = [
|
|
24327
|
+
constructSessions(pretrained_model_name_or_path, sessions, options, typeConfig.cache_sessions)
|
|
24328
|
+
];
|
|
24329
|
+
if (typeConfig.optional_configs) {
|
|
24330
|
+
promises.push(get_optional_configs(pretrained_model_name_or_path, typeConfig.optional_configs, options));
|
|
24331
|
+
}
|
|
24332
|
+
const info = await Promise.all(promises);
|
|
23438
24333
|
return new this(config, ...info);
|
|
23439
24334
|
}
|
|
23440
24335
|
/**
|
|
@@ -23633,7 +24528,7 @@ var PreTrainedModel = class extends Callable2 {
|
|
|
23633
24528
|
* @param {Tensor} [params.inputs=null]
|
|
23634
24529
|
* @param {number} [params.bos_token_id=null]
|
|
23635
24530
|
* @param {Record<string, Tensor|number[]>} [params.model_kwargs]
|
|
23636
|
-
* @returns {{inputs_tensor: Tensor, model_inputs: Record<string, Tensor
|
|
24531
|
+
* @returns {{inputs_tensor: Tensor, model_inputs: Record<string, Tensor> & {past_key_values?: DynamicCache}, model_input_name: string}} The model-specific inputs for generation.
|
|
23637
24532
|
*/
|
|
23638
24533
|
_prepare_model_inputs({ inputs, bos_token_id, model_kwargs }) {
|
|
23639
24534
|
const model_inputs = pick(model_kwargs, this.forward_params);
|
|
@@ -23874,11 +24769,12 @@ var PreTrainedModel = class extends Callable2 {
|
|
|
23874
24769
|
}
|
|
23875
24770
|
}
|
|
23876
24771
|
/**
|
|
23877
|
-
* Returns
|
|
24772
|
+
* Returns a DynamicCache containing past key values from the given decoder results object.
|
|
23878
24773
|
*
|
|
23879
24774
|
* @param {Object} decoderResults The decoder results object.
|
|
23880
|
-
* @param {
|
|
23881
|
-
* @
|
|
24775
|
+
* @param {DynamicCache} pastKeyValues The previous past key values.
|
|
24776
|
+
* @param {boolean} [disposeEncoderPKVs=false] Whether to dispose encoder past key values.
|
|
24777
|
+
* @returns {DynamicCache} A new DynamicCache containing the updated past key values.
|
|
23882
24778
|
*/
|
|
23883
24779
|
getPastKeyValues(decoderResults, pastKeyValues, disposeEncoderPKVs = false) {
|
|
23884
24780
|
const pkvs = /* @__PURE__ */ Object.create(null);
|
|
@@ -23899,7 +24795,7 @@ var PreTrainedModel = class extends Callable2 {
|
|
|
23899
24795
|
}
|
|
23900
24796
|
}
|
|
23901
24797
|
}
|
|
23902
|
-
return pkvs;
|
|
24798
|
+
return new DynamicCache(pkvs);
|
|
23903
24799
|
}
|
|
23904
24800
|
/**
|
|
23905
24801
|
* Returns an object containing attentions from the given model output object.
|
|
@@ -23924,8 +24820,8 @@ var PreTrainedModel = class extends Callable2 {
|
|
|
23924
24820
|
/**
|
|
23925
24821
|
* Adds past key values to the decoder feeds object. If pastKeyValues is null, creates new tensors for past key values.
|
|
23926
24822
|
*
|
|
23927
|
-
* @param {
|
|
23928
|
-
* @param {
|
|
24823
|
+
* @param {Record<string, any>} decoderFeeds The decoder feeds object to add past key values to.
|
|
24824
|
+
* @param {DynamicCache|null} pastKeyValues The cache containing past key values.
|
|
23929
24825
|
*/
|
|
23930
24826
|
addPastKeyValues(decoderFeeds, pastKeyValues) {
|
|
23931
24827
|
if (pastKeyValues) {
|
|
@@ -23942,14 +24838,29 @@ var PreTrainedModel = class extends Callable2 {
|
|
|
23942
24838
|
}
|
|
23943
24839
|
}
|
|
23944
24840
|
}
|
|
23945
|
-
|
|
23946
|
-
|
|
24841
|
+
/**
|
|
24842
|
+
* Helper function to select valid inputs and run through the appropriate encoder (vision, text, audio) based on the input type.
|
|
24843
|
+
* @param {string} sessionName
|
|
24844
|
+
* @param {Record<string, Tensor>} inputs
|
|
24845
|
+
* @param {string} outputName
|
|
24846
|
+
* @private
|
|
24847
|
+
*/
|
|
24848
|
+
async _encode_input(sessionName, inputs, outputName) {
|
|
24849
|
+
if (!Object.hasOwn(this.sessions, sessionName)) {
|
|
24850
|
+
throw new Error(`Model does not have a ${sessionName} session.`);
|
|
24851
|
+
}
|
|
24852
|
+
const session = this.sessions[sessionName];
|
|
24853
|
+
const output = await sessionRun(session, pick(inputs, session.inputNames));
|
|
24854
|
+
return output[outputName];
|
|
24855
|
+
}
|
|
24856
|
+
async encode_image(inputs) {
|
|
24857
|
+
return this._encode_input("vision_encoder", inputs, "image_features");
|
|
23947
24858
|
}
|
|
23948
|
-
async encode_text(
|
|
23949
|
-
return
|
|
24859
|
+
async encode_text(inputs) {
|
|
24860
|
+
return this._encode_input("embed_tokens", inputs, "inputs_embeds");
|
|
23950
24861
|
}
|
|
23951
|
-
async encode_audio(
|
|
23952
|
-
return
|
|
24862
|
+
async encode_audio(inputs) {
|
|
24863
|
+
return this._encode_input("audio_encoder", inputs, "audio_features");
|
|
23953
24864
|
}
|
|
23954
24865
|
};
|
|
23955
24866
|
async function seq2seq_forward(self2, model_inputs) {
|
|
@@ -24004,6 +24915,9 @@ async function decoder_forward(self2, model_inputs, is_encoder_decoder = false)
|
|
|
24004
24915
|
const start_index = ["paligemma", "gemma3_text", "gemma3"].includes(self2.config.model_type) ? 1 : 0;
|
|
24005
24916
|
new_model_inputs.position_ids = create_position_ids(new_model_inputs, past_key_values, start_index);
|
|
24006
24917
|
}
|
|
24918
|
+
if (session.inputNames.includes("num_logits_to_keep") && !new_model_inputs.num_logits_to_keep) {
|
|
24919
|
+
new_model_inputs.num_logits_to_keep = new Tensor3("int64", [0n], []);
|
|
24920
|
+
}
|
|
24007
24921
|
self2.addPastKeyValues(new_model_inputs, past_key_values);
|
|
24008
24922
|
const fixed = pick(new_model_inputs, session.inputNames);
|
|
24009
24923
|
return await sessionRun(session, fixed);
|
|
@@ -24012,7 +24926,7 @@ async function generic_text_to_text_forward(self2, {
|
|
|
24012
24926
|
// Generic parameters:
|
|
24013
24927
|
encode_function,
|
|
24014
24928
|
merge_function,
|
|
24015
|
-
|
|
24929
|
+
modality_input_names,
|
|
24016
24930
|
modality_output_name,
|
|
24017
24931
|
// Produced by the tokenizer/processor:
|
|
24018
24932
|
input_ids = null,
|
|
@@ -24027,38 +24941,54 @@ async function generic_text_to_text_forward(self2, {
|
|
|
24027
24941
|
// Additional parameters
|
|
24028
24942
|
...kwargs
|
|
24029
24943
|
}) {
|
|
24030
|
-
const modality_values = kwargs[modality_input_name];
|
|
24031
24944
|
if (!inputs_embeds) {
|
|
24032
24945
|
inputs_embeds = await self2.encode_text({ input_ids, ...kwargs });
|
|
24033
|
-
|
|
24034
|
-
|
|
24035
|
-
|
|
24036
|
-
|
|
24037
|
-
|
|
24038
|
-
|
|
24039
|
-
|
|
24040
|
-
|
|
24041
|
-
|
|
24042
|
-
inputs_embeds,
|
|
24043
|
-
|
|
24044
|
-
|
|
24045
|
-
|
|
24046
|
-
|
|
24047
|
-
|
|
24048
|
-
|
|
24049
|
-
|
|
24050
|
-
|
|
24051
|
-
|
|
24052
|
-
|
|
24053
|
-
|
|
24054
|
-
|
|
24055
|
-
|
|
24946
|
+
const modality_values = pick(kwargs, modality_input_names);
|
|
24947
|
+
if (Object.keys(modality_values).length > 0) {
|
|
24948
|
+
if (input_ids.dims[1] !== 1) {
|
|
24949
|
+
const modality_features = await encode_function({
|
|
24950
|
+
// Pass the modality values under its expected key.
|
|
24951
|
+
// The caller knows whether this is audio or image.
|
|
24952
|
+
...modality_values,
|
|
24953
|
+
...kwargs
|
|
24954
|
+
});
|
|
24955
|
+
({ inputs_embeds, attention_mask } = merge_function({
|
|
24956
|
+
[modality_output_name]: modality_features,
|
|
24957
|
+
inputs_embeds,
|
|
24958
|
+
input_ids,
|
|
24959
|
+
attention_mask
|
|
24960
|
+
}));
|
|
24961
|
+
} else if (past_key_values && input_ids.dims[1] === 1) {
|
|
24962
|
+
const target_length = input_ids.dims[1];
|
|
24963
|
+
const past_length = past_key_values.get_seq_length();
|
|
24964
|
+
attention_mask = cat(
|
|
24965
|
+
[
|
|
24966
|
+
ones([input_ids.dims[0], past_length]),
|
|
24967
|
+
attention_mask.slice(null, [attention_mask.dims[1] - target_length, attention_mask.dims[1]])
|
|
24968
|
+
],
|
|
24969
|
+
1
|
|
24970
|
+
);
|
|
24971
|
+
}
|
|
24056
24972
|
}
|
|
24057
24973
|
}
|
|
24058
24974
|
if (!position_ids) {
|
|
24059
|
-
if (
|
|
24060
|
-
|
|
24061
|
-
|
|
24975
|
+
if (
|
|
24976
|
+
// Handle special case for qwen vl models
|
|
24977
|
+
[
|
|
24978
|
+
"qwen2_vl",
|
|
24979
|
+
"qwen2_vl_text",
|
|
24980
|
+
"qwen2_5_vl",
|
|
24981
|
+
"qwen2_5_vl_text",
|
|
24982
|
+
"qwen3_vl",
|
|
24983
|
+
"qwen3_vl_text",
|
|
24984
|
+
"qwen3_vl_moe",
|
|
24985
|
+
"qwen3_vl_moe_text",
|
|
24986
|
+
"qwen3_5",
|
|
24987
|
+
"qwen3_5_text",
|
|
24988
|
+
"qwen3_5_moe",
|
|
24989
|
+
"qwen3_5_moe_text"
|
|
24990
|
+
].includes(self2.config.model_type)
|
|
24991
|
+
) {
|
|
24062
24992
|
const { image_grid_thw, video_grid_thw } = kwargs;
|
|
24063
24993
|
[position_ids] = self2.get_rope_index(input_ids, image_grid_thw, video_grid_thw, attention_mask);
|
|
24064
24994
|
}
|
|
@@ -24080,7 +25010,7 @@ async function generic_text_to_text_forward(self2, {
|
|
|
24080
25010
|
async function audio_text_to_text_forward(self2, params) {
|
|
24081
25011
|
return await generic_text_to_text_forward(self2, {
|
|
24082
25012
|
...params,
|
|
24083
|
-
|
|
25013
|
+
modality_input_names: ["audio_values", "input_features"],
|
|
24084
25014
|
modality_output_name: "audio_features",
|
|
24085
25015
|
encode_function: self2.encode_audio.bind(self2),
|
|
24086
25016
|
merge_function: self2._merge_input_ids_with_audio_features.bind(self2)
|
|
@@ -24089,7 +25019,7 @@ async function audio_text_to_text_forward(self2, params) {
|
|
|
24089
25019
|
async function image_text_to_text_forward(self2, params) {
|
|
24090
25020
|
return await generic_text_to_text_forward(self2, {
|
|
24091
25021
|
...params,
|
|
24092
|
-
|
|
25022
|
+
modality_input_names: ["pixel_values"],
|
|
24093
25023
|
modality_output_name: "image_features",
|
|
24094
25024
|
encode_function: self2.encode_image.bind(self2),
|
|
24095
25025
|
merge_function: self2._merge_input_ids_with_image_features.bind(self2)
|
|
@@ -24125,7 +25055,11 @@ function create_position_ids(model_inputs, past_key_values = null, start_index =
|
|
|
24125
25055
|
return position_ids;
|
|
24126
25056
|
}
|
|
24127
25057
|
function decoder_prepare_inputs_for_generation(self2, input_ids, model_inputs, generation_config) {
|
|
24128
|
-
const past_length = model_inputs.past_key_values ?
|
|
25058
|
+
const past_length = model_inputs.past_key_values ? model_inputs.past_key_values.get_seq_length() : 0;
|
|
25059
|
+
const session = self2.sessions["decoder_model_merged"] ?? self2.sessions["model"];
|
|
25060
|
+
if (session?.inputNames.includes("num_logits_to_keep") && !model_inputs.num_logits_to_keep) {
|
|
25061
|
+
model_inputs.num_logits_to_keep = new Tensor3("int64", [1n], []);
|
|
25062
|
+
}
|
|
24129
25063
|
if (!model_inputs.attention_mask) {
|
|
24130
25064
|
let dims;
|
|
24131
25065
|
for (const key of ["input_ids", "inputs_embeds", "position_ids"]) {
|
|
@@ -24433,6 +25367,7 @@ __export(models_exports, {
|
|
|
24433
25367
|
Gemma3ForCausalLM: () => Gemma3ForCausalLM,
|
|
24434
25368
|
Gemma3Model: () => Gemma3Model,
|
|
24435
25369
|
Gemma3PreTrainedModel: () => Gemma3PreTrainedModel,
|
|
25370
|
+
Gemma3nForCausalLM: () => Gemma3nForCausalLM,
|
|
24436
25371
|
Gemma3nForConditionalGeneration: () => Gemma3nForConditionalGeneration,
|
|
24437
25372
|
Gemma3nPreTrainedModel: () => Gemma3nPreTrainedModel,
|
|
24438
25373
|
GemmaForCausalLM: () => GemmaForCausalLM,
|
|
@@ -24450,6 +25385,7 @@ __export(models_exports, {
|
|
|
24450
25385
|
GraniteMoeHybridModel: () => GraniteMoeHybridModel,
|
|
24451
25386
|
GraniteMoeHybridPreTrainedModel: () => GraniteMoeHybridPreTrainedModel,
|
|
24452
25387
|
GranitePreTrainedModel: () => GranitePreTrainedModel,
|
|
25388
|
+
GraniteSpeechForConditionalGeneration: () => GraniteSpeechForConditionalGeneration,
|
|
24453
25389
|
GroundingDinoForObjectDetection: () => GroundingDinoForObjectDetection,
|
|
24454
25390
|
GroundingDinoPreTrainedModel: () => GroundingDinoPreTrainedModel,
|
|
24455
25391
|
GroupViTModel: () => GroupViTModel,
|
|
@@ -24471,7 +25407,6 @@ __export(models_exports, {
|
|
|
24471
25407
|
IJepaModel: () => IJepaModel,
|
|
24472
25408
|
IJepaPreTrainedModel: () => IJepaPreTrainedModel,
|
|
24473
25409
|
Idefics3ForConditionalGeneration: () => Idefics3ForConditionalGeneration,
|
|
24474
|
-
Idefics3PreTrainedModel: () => Idefics3PreTrainedModel,
|
|
24475
25410
|
JAISLMHeadModel: () => JAISLMHeadModel,
|
|
24476
25411
|
JAISModel: () => JAISModel,
|
|
24477
25412
|
JAISPreTrainedModel: () => JAISPreTrainedModel,
|
|
@@ -24485,6 +25420,7 @@ __export(models_exports, {
|
|
|
24485
25420
|
Lfm2MoeModel: () => Lfm2MoeModel,
|
|
24486
25421
|
Lfm2MoePreTrainedModel: () => Lfm2MoePreTrainedModel,
|
|
24487
25422
|
Lfm2PreTrainedModel: () => Lfm2PreTrainedModel,
|
|
25423
|
+
Lfm2VlForConditionalGeneration: () => Lfm2VlForConditionalGeneration,
|
|
24488
25424
|
LiteWhisperForConditionalGeneration: () => LiteWhisperForConditionalGeneration,
|
|
24489
25425
|
Llama4ForCausalLM: () => Llama4ForCausalLM,
|
|
24490
25426
|
Llama4PreTrainedModel: () => Llama4PreTrainedModel,
|
|
@@ -24609,6 +25545,9 @@ __export(models_exports, {
|
|
|
24609
25545
|
Olmo3Model: () => Olmo3Model,
|
|
24610
25546
|
Olmo3PreTrainedModel: () => Olmo3PreTrainedModel,
|
|
24611
25547
|
OlmoForCausalLM: () => OlmoForCausalLM,
|
|
25548
|
+
OlmoHybridForCausalLM: () => OlmoHybridForCausalLM,
|
|
25549
|
+
OlmoHybridModel: () => OlmoHybridModel,
|
|
25550
|
+
OlmoHybridPreTrainedModel: () => OlmoHybridPreTrainedModel,
|
|
24612
25551
|
OlmoModel: () => OlmoModel,
|
|
24613
25552
|
OlmoPreTrainedModel: () => OlmoPreTrainedModel,
|
|
24614
25553
|
OpenELMForCausalLM: () => OpenELMForCausalLM,
|
|
@@ -24621,7 +25560,6 @@ __export(models_exports, {
|
|
|
24621
25560
|
Owlv2Model: () => Owlv2Model,
|
|
24622
25561
|
Owlv2PreTrainedModel: () => Owlv2PreTrainedModel,
|
|
24623
25562
|
PaliGemmaForConditionalGeneration: () => PaliGemmaForConditionalGeneration,
|
|
24624
|
-
PaliGemmaPreTrainedModel: () => PaliGemmaPreTrainedModel,
|
|
24625
25563
|
ParakeetForCTC: () => ParakeetForCTC,
|
|
24626
25564
|
ParakeetPreTrainedModel: () => ParakeetPreTrainedModel,
|
|
24627
25565
|
PatchTSMixerForPrediction: () => PatchTSMixerForPrediction,
|
|
@@ -24647,15 +25585,31 @@ __export(models_exports, {
|
|
|
24647
25585
|
PyAnnotePreTrainedModel: () => PyAnnotePreTrainedModel,
|
|
24648
25586
|
Qwen2ForCausalLM: () => Qwen2ForCausalLM,
|
|
24649
25587
|
Qwen2Model: () => Qwen2Model,
|
|
25588
|
+
Qwen2MoeForCausalLM: () => Qwen2MoeForCausalLM,
|
|
25589
|
+
Qwen2MoeModel: () => Qwen2MoeModel,
|
|
25590
|
+
Qwen2MoePreTrainedModel: () => Qwen2MoePreTrainedModel,
|
|
24650
25591
|
Qwen2PreTrainedModel: () => Qwen2PreTrainedModel,
|
|
25592
|
+
Qwen2VLForCausalLM: () => Qwen2VLForCausalLM,
|
|
24651
25593
|
Qwen2VLForConditionalGeneration: () => Qwen2VLForConditionalGeneration,
|
|
24652
25594
|
Qwen2VLPreTrainedModel: () => Qwen2VLPreTrainedModel,
|
|
25595
|
+
Qwen2_5_VLForCausalLM: () => Qwen2_5_VLForCausalLM,
|
|
24653
25596
|
Qwen2_5_VLForConditionalGeneration: () => Qwen2_5_VLForConditionalGeneration,
|
|
24654
25597
|
Qwen3ForCausalLM: () => Qwen3ForCausalLM,
|
|
24655
25598
|
Qwen3Model: () => Qwen3Model,
|
|
25599
|
+
Qwen3MoeForCausalLM: () => Qwen3MoeForCausalLM,
|
|
25600
|
+
Qwen3MoeModel: () => Qwen3MoeModel,
|
|
25601
|
+
Qwen3MoePreTrainedModel: () => Qwen3MoePreTrainedModel,
|
|
25602
|
+
Qwen3NextForCausalLM: () => Qwen3NextForCausalLM,
|
|
25603
|
+
Qwen3NextModel: () => Qwen3NextModel,
|
|
25604
|
+
Qwen3NextPreTrainedModel: () => Qwen3NextPreTrainedModel,
|
|
24656
25605
|
Qwen3PreTrainedModel: () => Qwen3PreTrainedModel,
|
|
25606
|
+
Qwen3VLForCausalLM: () => Qwen3VLForCausalLM,
|
|
24657
25607
|
Qwen3VLForConditionalGeneration: () => Qwen3VLForConditionalGeneration,
|
|
25608
|
+
Qwen3VLMoeForCausalLM: () => Qwen3VLMoeForCausalLM,
|
|
25609
|
+
Qwen3VLMoeForConditionalGeneration: () => Qwen3VLMoeForConditionalGeneration,
|
|
25610
|
+
Qwen3_5ForCausalLM: () => Qwen3_5ForCausalLM,
|
|
24658
25611
|
Qwen3_5ForConditionalGeneration: () => Qwen3_5ForConditionalGeneration,
|
|
25612
|
+
Qwen3_5MoeForCausalLM: () => Qwen3_5MoeForCausalLM,
|
|
24659
25613
|
Qwen3_5MoeForConditionalGeneration: () => Qwen3_5MoeForConditionalGeneration,
|
|
24660
25614
|
RFDetrForObjectDetection: () => RFDetrForObjectDetection,
|
|
24661
25615
|
RFDetrModel: () => RFDetrModel,
|
|
@@ -24706,7 +25660,6 @@ __export(models_exports, {
|
|
|
24706
25660
|
SmolLM3ForCausalLM: () => SmolLM3ForCausalLM,
|
|
24707
25661
|
SmolLM3Model: () => SmolLM3Model,
|
|
24708
25662
|
SmolLM3PreTrainedModel: () => SmolLM3PreTrainedModel,
|
|
24709
|
-
SmolVLMForConditionalGeneration: () => SmolVLMForConditionalGeneration,
|
|
24710
25663
|
SnacDecoderModel: () => SnacDecoderModel,
|
|
24711
25664
|
SnacEncoderModel: () => SnacEncoderModel,
|
|
24712
25665
|
SnacModel: () => SnacModel,
|
|
@@ -24778,6 +25731,8 @@ __export(models_exports, {
|
|
|
24778
25731
|
VitsModelOutput: () => VitsModelOutput,
|
|
24779
25732
|
VitsPreTrainedModel: () => VitsPreTrainedModel,
|
|
24780
25733
|
VoxtralForConditionalGeneration: () => VoxtralForConditionalGeneration,
|
|
25734
|
+
VoxtralRealtimeForConditionalGeneration: () => VoxtralRealtimeForConditionalGeneration,
|
|
25735
|
+
VoxtralRealtimePreTrainedModel: () => VoxtralRealtimePreTrainedModel,
|
|
24781
25736
|
Wav2Vec2BertForCTC: () => Wav2Vec2BertForCTC,
|
|
24782
25737
|
Wav2Vec2BertForSequenceClassification: () => Wav2Vec2BertForSequenceClassification,
|
|
24783
25738
|
Wav2Vec2BertModel: () => Wav2Vec2BertModel,
|
|
@@ -25138,7 +26093,7 @@ var ChatterboxModel = class extends ChatterboxPreTrainedModel {
|
|
|
25138
26093
|
if (!past_key_values || target_length !== 1) {
|
|
25139
26094
|
throw new Error("Incorrect state encountered during generation.");
|
|
25140
26095
|
}
|
|
25141
|
-
const past_length =
|
|
26096
|
+
const past_length = past_key_values.get_seq_length();
|
|
25142
26097
|
attention_mask = ones([inputs_embeds.dims[0], past_length + target_length]);
|
|
25143
26098
|
}
|
|
25144
26099
|
}
|
|
@@ -26168,6 +27123,8 @@ var Gemma3nForConditionalGeneration = class extends Gemma3nPreTrainedModel {
|
|
|
26168
27123
|
});
|
|
26169
27124
|
}
|
|
26170
27125
|
};
|
|
27126
|
+
var Gemma3nForCausalLM = class extends Gemma3nForConditionalGeneration {
|
|
27127
|
+
};
|
|
26171
27128
|
|
|
26172
27129
|
// src/models/glm/modeling_glm.js
|
|
26173
27130
|
var GlmPreTrainedModel = class extends PreTrainedModel {
|
|
@@ -26249,6 +27206,28 @@ var GraniteMoeHybridModel = class extends GraniteMoeHybridPreTrainedModel {
|
|
|
26249
27206
|
var GraniteMoeHybridForCausalLM = class extends GraniteMoeHybridPreTrainedModel {
|
|
26250
27207
|
};
|
|
26251
27208
|
|
|
27209
|
+
// src/models/ultravox/modeling_ultravox.js
|
|
27210
|
+
var UltravoxPreTrainedModel = class extends PreTrainedModel {
|
|
27211
|
+
forward_params = ["input_ids", "attention_mask", "position_ids", "audio_values", "past_key_values"];
|
|
27212
|
+
};
|
|
27213
|
+
var UltravoxModel = class extends UltravoxPreTrainedModel {
|
|
27214
|
+
_merge_input_ids_with_audio_features(kwargs) {
|
|
27215
|
+
const audio_hidden_size = kwargs.audio_features.dims.at(-1);
|
|
27216
|
+
const reshaped_audio_features = kwargs.audio_features.view(-1, audio_hidden_size);
|
|
27217
|
+
return default_merge_input_ids_with_audio_features({
|
|
27218
|
+
// @ts-ignore
|
|
27219
|
+
audio_token_id: this.config.ignore_index ?? this.config.audio_token_id ?? this.config.audio_token_index,
|
|
27220
|
+
...kwargs,
|
|
27221
|
+
audio_features: reshaped_audio_features
|
|
27222
|
+
});
|
|
27223
|
+
}
|
|
27224
|
+
};
|
|
27225
|
+
|
|
27226
|
+
// src/models/granite_speech/modeling_granite_speech.js
|
|
27227
|
+
var GraniteSpeechForConditionalGeneration = class extends UltravoxModel {
|
|
27228
|
+
forward_params = ["input_ids", "attention_mask", "input_features", "past_key_values"];
|
|
27229
|
+
};
|
|
27230
|
+
|
|
26252
27231
|
// src/models/grounding_dino/modeling_grounding_dino.js
|
|
26253
27232
|
var GroundingDinoPreTrainedModel = class extends PreTrainedModel {
|
|
26254
27233
|
};
|
|
@@ -26344,17 +27323,38 @@ var HubertForSequenceClassification = class extends Wav2Vec2PreTrainedModel {
|
|
|
26344
27323
|
return new SequenceClassifierOutput(await super._call(model_inputs));
|
|
26345
27324
|
}
|
|
26346
27325
|
};
|
|
26347
|
-
|
|
26348
|
-
// src/models/hunyuan_v1_dense/modeling_hunyuan_v1_dense.js
|
|
26349
|
-
var HunYuanDenseV1PreTrainedModel = class extends PreTrainedModel {
|
|
26350
|
-
};
|
|
26351
|
-
var HunYuanDenseV1Model = class extends HunYuanDenseV1PreTrainedModel {
|
|
27326
|
+
|
|
27327
|
+
// src/models/hunyuan_v1_dense/modeling_hunyuan_v1_dense.js
|
|
27328
|
+
var HunYuanDenseV1PreTrainedModel = class extends PreTrainedModel {
|
|
27329
|
+
};
|
|
27330
|
+
var HunYuanDenseV1Model = class extends HunYuanDenseV1PreTrainedModel {
|
|
27331
|
+
};
|
|
27332
|
+
var HunYuanDenseV1ForCausalLM = class extends HunYuanDenseV1PreTrainedModel {
|
|
27333
|
+
};
|
|
27334
|
+
|
|
27335
|
+
// src/models/llava/modeling_llava.js
|
|
27336
|
+
var LlavaPreTrainedModel = class extends PreTrainedModel {
|
|
27337
|
+
forward_params = ["input_ids", "attention_mask", "pixel_values", "position_ids", "past_key_values"];
|
|
27338
|
+
};
|
|
27339
|
+
var LlavaForConditionalGeneration = class extends LlavaPreTrainedModel {
|
|
27340
|
+
_merge_input_ids_with_image_features(kwargs) {
|
|
27341
|
+
const vision_hidden_size = kwargs.image_features.dims.at(-1);
|
|
27342
|
+
const reshaped_image_hidden_states = kwargs.image_features.view(-1, vision_hidden_size);
|
|
27343
|
+
return default_merge_input_ids_with_image_features({
|
|
27344
|
+
// @ts-ignore
|
|
27345
|
+
image_token_id: this.config.image_token_index ?? this.config.image_token_id,
|
|
27346
|
+
...kwargs,
|
|
27347
|
+
image_features: reshaped_image_hidden_states
|
|
27348
|
+
});
|
|
27349
|
+
}
|
|
27350
|
+
};
|
|
27351
|
+
var Moondream1ForConditionalGeneration = class extends LlavaForConditionalGeneration {
|
|
26352
27352
|
};
|
|
26353
|
-
var
|
|
27353
|
+
var LlavaQwen2ForCausalLM = class extends LlavaForConditionalGeneration {
|
|
26354
27354
|
};
|
|
26355
27355
|
|
|
26356
27356
|
// src/models/idefics3/modeling_idefics3.js
|
|
26357
|
-
var
|
|
27357
|
+
var Idefics3ForConditionalGeneration = class extends LlavaForConditionalGeneration {
|
|
26358
27358
|
forward_params = [
|
|
26359
27359
|
"input_ids",
|
|
26360
27360
|
"attention_mask",
|
|
@@ -26364,24 +27364,6 @@ var Idefics3PreTrainedModel = class extends PreTrainedModel {
|
|
|
26364
27364
|
"past_key_values"
|
|
26365
27365
|
];
|
|
26366
27366
|
};
|
|
26367
|
-
var Idefics3ForConditionalGeneration = class extends Idefics3PreTrainedModel {
|
|
26368
|
-
async encode_image({ pixel_values, pixel_attention_mask }) {
|
|
26369
|
-
const features = (await sessionRun(this.sessions["vision_encoder"], { pixel_values, pixel_attention_mask })).image_features;
|
|
26370
|
-
return features;
|
|
26371
|
-
}
|
|
26372
|
-
_merge_input_ids_with_image_features(kwargs) {
|
|
26373
|
-
const vision_hidden_size = kwargs.image_features.dims.at(-1);
|
|
26374
|
-
const reshaped_image_hidden_states = kwargs.image_features.view(-1, vision_hidden_size);
|
|
26375
|
-
return default_merge_input_ids_with_image_features({
|
|
26376
|
-
// @ts-ignore
|
|
26377
|
-
image_token_id: this.config.image_token_id,
|
|
26378
|
-
...kwargs,
|
|
26379
|
-
image_features: reshaped_image_hidden_states
|
|
26380
|
-
});
|
|
26381
|
-
}
|
|
26382
|
-
};
|
|
26383
|
-
var SmolVLMForConditionalGeneration = class extends Idefics3ForConditionalGeneration {
|
|
26384
|
-
};
|
|
26385
27367
|
|
|
26386
27368
|
// src/models/ijepa/modeling_ijepa.js
|
|
26387
27369
|
var IJepaPreTrainedModel = class extends PreTrainedModel {
|
|
@@ -26472,6 +27454,19 @@ var Lfm2MoeModel = class extends Lfm2MoePreTrainedModel {
|
|
|
26472
27454
|
var Lfm2MoeForCausalLM = class extends Lfm2MoePreTrainedModel {
|
|
26473
27455
|
};
|
|
26474
27456
|
|
|
27457
|
+
// src/models/lfm2_vl/modeling_lfm2_vl.js
|
|
27458
|
+
var Lfm2VlForConditionalGeneration = class extends LlavaForConditionalGeneration {
|
|
27459
|
+
forward_params = [
|
|
27460
|
+
"input_ids",
|
|
27461
|
+
"attention_mask",
|
|
27462
|
+
"pixel_values",
|
|
27463
|
+
"pixel_attention_mask",
|
|
27464
|
+
"spatial_shapes",
|
|
27465
|
+
"position_ids",
|
|
27466
|
+
"past_key_values"
|
|
27467
|
+
];
|
|
27468
|
+
};
|
|
27469
|
+
|
|
26475
27470
|
// src/models/llama/modeling_llama.js
|
|
26476
27471
|
var LlamaPreTrainedModel = class extends PreTrainedModel {
|
|
26477
27472
|
};
|
|
@@ -26486,27 +27481,6 @@ var Llama4PreTrainedModel = class extends PreTrainedModel {
|
|
|
26486
27481
|
var Llama4ForCausalLM = class extends Llama4PreTrainedModel {
|
|
26487
27482
|
};
|
|
26488
27483
|
|
|
26489
|
-
// src/models/llava/modeling_llava.js
|
|
26490
|
-
var LlavaPreTrainedModel = class extends PreTrainedModel {
|
|
26491
|
-
forward_params = ["input_ids", "attention_mask", "pixel_values", "position_ids", "past_key_values"];
|
|
26492
|
-
};
|
|
26493
|
-
var LlavaForConditionalGeneration = class extends LlavaPreTrainedModel {
|
|
26494
|
-
_merge_input_ids_with_image_features(kwargs) {
|
|
26495
|
-
const vision_hidden_size = kwargs.image_features.dims.at(-1);
|
|
26496
|
-
const reshaped_image_hidden_states = kwargs.image_features.view(-1, vision_hidden_size);
|
|
26497
|
-
return default_merge_input_ids_with_image_features({
|
|
26498
|
-
// @ts-ignore
|
|
26499
|
-
image_token_id: this.config.image_token_index,
|
|
26500
|
-
...kwargs,
|
|
26501
|
-
image_features: reshaped_image_hidden_states
|
|
26502
|
-
});
|
|
26503
|
-
}
|
|
26504
|
-
};
|
|
26505
|
-
var Moondream1ForConditionalGeneration = class extends LlavaForConditionalGeneration {
|
|
26506
|
-
};
|
|
26507
|
-
var LlavaQwen2ForCausalLM = class extends LlavaForConditionalGeneration {
|
|
26508
|
-
};
|
|
26509
|
-
|
|
26510
27484
|
// src/models/longt5/modeling_longt5.js
|
|
26511
27485
|
var LongT5PreTrainedModel = class extends PreTrainedModel {
|
|
26512
27486
|
};
|
|
@@ -27216,6 +28190,14 @@ var Olmo3Model = class extends Olmo3PreTrainedModel {
|
|
|
27216
28190
|
var Olmo3ForCausalLM = class extends Olmo3PreTrainedModel {
|
|
27217
28191
|
};
|
|
27218
28192
|
|
|
28193
|
+
// src/models/olmo_hybrid/modeling_olmo_hybrid.js
|
|
28194
|
+
var OlmoHybridPreTrainedModel = class extends PreTrainedModel {
|
|
28195
|
+
};
|
|
28196
|
+
var OlmoHybridModel = class extends OlmoHybridPreTrainedModel {
|
|
28197
|
+
};
|
|
28198
|
+
var OlmoHybridForCausalLM = class extends OlmoHybridPreTrainedModel {
|
|
28199
|
+
};
|
|
28200
|
+
|
|
27219
28201
|
// src/models/openelm/modeling_openelm.js
|
|
27220
28202
|
var OpenELMPreTrainedModel = class extends PreTrainedModel {
|
|
27221
28203
|
};
|
|
@@ -27249,27 +28231,7 @@ var OwlViTForObjectDetection = class extends OwlViTPreTrainedModel {
|
|
|
27249
28231
|
};
|
|
27250
28232
|
|
|
27251
28233
|
// src/models/paligemma/modeling_paligemma.js
|
|
27252
|
-
var
|
|
27253
|
-
forward_params = [
|
|
27254
|
-
"input_ids",
|
|
27255
|
-
// 'inputs_embeds',
|
|
27256
|
-
"attention_mask",
|
|
27257
|
-
"pixel_values",
|
|
27258
|
-
"position_ids",
|
|
27259
|
-
"past_key_values"
|
|
27260
|
-
];
|
|
27261
|
-
};
|
|
27262
|
-
var PaliGemmaForConditionalGeneration = class extends PaliGemmaPreTrainedModel {
|
|
27263
|
-
_merge_input_ids_with_image_features(kwargs) {
|
|
27264
|
-
const vision_hidden_size = kwargs.image_features.dims.at(-1);
|
|
27265
|
-
const reshaped_image_hidden_states = kwargs.image_features.view(-1, vision_hidden_size);
|
|
27266
|
-
return default_merge_input_ids_with_image_features({
|
|
27267
|
-
// @ts-ignore
|
|
27268
|
-
image_token_id: this.config.image_token_index,
|
|
27269
|
-
...kwargs,
|
|
27270
|
-
image_features: reshaped_image_hidden_states
|
|
27271
|
-
});
|
|
27272
|
-
}
|
|
28234
|
+
var PaliGemmaForConditionalGeneration = class extends LlavaForConditionalGeneration {
|
|
27273
28235
|
};
|
|
27274
28236
|
|
|
27275
28237
|
// src/models/parakeet/modeling_parakeet.js
|
|
@@ -27420,6 +28382,14 @@ var Qwen2Model = class extends Qwen2PreTrainedModel {
|
|
|
27420
28382
|
var Qwen2ForCausalLM = class extends Qwen2PreTrainedModel {
|
|
27421
28383
|
};
|
|
27422
28384
|
|
|
28385
|
+
// src/models/qwen2_moe/modeling_qwen2_moe.js
|
|
28386
|
+
var Qwen2MoePreTrainedModel = class extends PreTrainedModel {
|
|
28387
|
+
};
|
|
28388
|
+
var Qwen2MoeModel = class extends Qwen2MoePreTrainedModel {
|
|
28389
|
+
};
|
|
28390
|
+
var Qwen2MoeForCausalLM = class extends Qwen2MoePreTrainedModel {
|
|
28391
|
+
};
|
|
28392
|
+
|
|
27423
28393
|
// src/models/qwen2_vl/modeling_qwen2_vl.js
|
|
27424
28394
|
var Qwen2VLPreTrainedModel = class extends PreTrainedModel {
|
|
27425
28395
|
forward_params = [
|
|
@@ -27434,6 +28404,9 @@ var Qwen2VLPreTrainedModel = class extends PreTrainedModel {
|
|
|
27434
28404
|
];
|
|
27435
28405
|
};
|
|
27436
28406
|
var Qwen2VLForConditionalGeneration = class extends Qwen2VLPreTrainedModel {
|
|
28407
|
+
// NOTE: This is used as the base class for all Qwen VL models and their CausalLM variants.
|
|
28408
|
+
// CausalLM variants (e.g., Qwen2VLForCausalLM) extend this class but load only
|
|
28409
|
+
// embed_tokens + decoder_model_merged (no vision_encoder) via MultimodalLanguageModelOnly type.
|
|
27437
28410
|
image_grid_thw_name = "grid_thw";
|
|
27438
28411
|
/**
|
|
27439
28412
|
* Calculate the 3D rope index based on image and video's temporal, height and width in LLM.
|
|
@@ -27623,19 +28596,45 @@ var Qwen2VLForConditionalGeneration = class extends Qwen2VLPreTrainedModel {
|
|
|
27623
28596
|
);
|
|
27624
28597
|
} else {
|
|
27625
28598
|
model_inputs.pixel_values = null;
|
|
27626
|
-
const
|
|
27627
|
-
|
|
27628
|
-
|
|
28599
|
+
const past_length = model_inputs.past_key_values.get_seq_length();
|
|
28600
|
+
if (past_length < model_inputs.input_ids.dims[1]) {
|
|
28601
|
+
const [full_position_ids, rope_deltas] = this.get_rope_index(
|
|
28602
|
+
model_inputs.input_ids,
|
|
28603
|
+
model_inputs.image_grid_thw,
|
|
28604
|
+
model_inputs.video_grid_thw,
|
|
28605
|
+
model_inputs.attention_mask
|
|
28606
|
+
);
|
|
28607
|
+
model_inputs.rope_deltas = rope_deltas;
|
|
28608
|
+
model_inputs.position_ids = full_position_ids.slice(null, null, [past_length, null]);
|
|
28609
|
+
model_inputs.input_ids = model_inputs.input_ids.slice(null, [past_length, null]);
|
|
28610
|
+
} else {
|
|
28611
|
+
if (!model_inputs.rope_deltas) {
|
|
28612
|
+
[, model_inputs.rope_deltas] = this.get_rope_index(
|
|
28613
|
+
model_inputs.input_ids,
|
|
28614
|
+
model_inputs.image_grid_thw,
|
|
28615
|
+
model_inputs.video_grid_thw,
|
|
28616
|
+
model_inputs.attention_mask
|
|
28617
|
+
);
|
|
28618
|
+
}
|
|
28619
|
+
const delta = BigInt(past_length);
|
|
28620
|
+
const rope_deltas_list = model_inputs.rope_deltas.map((x) => delta + x);
|
|
28621
|
+
model_inputs.position_ids = stack([rope_deltas_list, rope_deltas_list, rope_deltas_list], 0);
|
|
28622
|
+
}
|
|
27629
28623
|
}
|
|
27630
28624
|
}
|
|
27631
28625
|
return model_inputs;
|
|
27632
28626
|
}
|
|
27633
28627
|
};
|
|
28628
|
+
var Qwen2VLForCausalLM = class extends Qwen2VLForConditionalGeneration {
|
|
28629
|
+
};
|
|
27634
28630
|
|
|
27635
28631
|
// src/models/qwen2_5_vl/modeling_qwen2_5_vl.js
|
|
27636
28632
|
var Qwen2_5_VLForConditionalGeneration = class extends Qwen2VLForConditionalGeneration {
|
|
27637
28633
|
image_grid_thw_name = "image_grid_thw";
|
|
27638
28634
|
};
|
|
28635
|
+
var Qwen2_5_VLForCausalLM = class extends Qwen2VLForCausalLM {
|
|
28636
|
+
image_grid_thw_name = "image_grid_thw";
|
|
28637
|
+
};
|
|
27639
28638
|
|
|
27640
28639
|
// src/models/qwen3/modeling_qwen3.js
|
|
27641
28640
|
var Qwen3PreTrainedModel = class extends PreTrainedModel {
|
|
@@ -27645,17 +28644,45 @@ var Qwen3Model = class extends Qwen3PreTrainedModel {
|
|
|
27645
28644
|
var Qwen3ForCausalLM = class extends Qwen3PreTrainedModel {
|
|
27646
28645
|
};
|
|
27647
28646
|
|
|
28647
|
+
// src/models/qwen3_moe/modeling_qwen3_moe.js
|
|
28648
|
+
var Qwen3MoePreTrainedModel = class extends PreTrainedModel {
|
|
28649
|
+
};
|
|
28650
|
+
var Qwen3MoeModel = class extends Qwen3MoePreTrainedModel {
|
|
28651
|
+
};
|
|
28652
|
+
var Qwen3MoeForCausalLM = class extends Qwen3MoePreTrainedModel {
|
|
28653
|
+
};
|
|
28654
|
+
|
|
28655
|
+
// src/models/qwen3_next/modeling_qwen3_next.js
|
|
28656
|
+
var Qwen3NextPreTrainedModel = class extends PreTrainedModel {
|
|
28657
|
+
};
|
|
28658
|
+
var Qwen3NextModel = class extends Qwen3NextPreTrainedModel {
|
|
28659
|
+
};
|
|
28660
|
+
var Qwen3NextForCausalLM = class extends Qwen3NextPreTrainedModel {
|
|
28661
|
+
};
|
|
28662
|
+
|
|
27648
28663
|
// src/models/qwen3_vl/modeling_qwen3_vl.js
|
|
27649
28664
|
var Qwen3VLForConditionalGeneration = class extends Qwen2_5_VLForConditionalGeneration {
|
|
27650
28665
|
};
|
|
28666
|
+
var Qwen3VLForCausalLM = class extends Qwen2_5_VLForCausalLM {
|
|
28667
|
+
};
|
|
28668
|
+
|
|
28669
|
+
// src/models/qwen3_vl_moe/modeling_qwen3_vl_moe.js
|
|
28670
|
+
var Qwen3VLMoeForConditionalGeneration = class extends Qwen3VLForConditionalGeneration {
|
|
28671
|
+
};
|
|
28672
|
+
var Qwen3VLMoeForCausalLM = class extends Qwen3VLForCausalLM {
|
|
28673
|
+
};
|
|
27651
28674
|
|
|
27652
28675
|
// src/models/qwen3_5/modeling_qwen3_5.js
|
|
27653
28676
|
var Qwen3_5ForConditionalGeneration = class extends Qwen3VLForConditionalGeneration {
|
|
27654
28677
|
};
|
|
28678
|
+
var Qwen3_5ForCausalLM = class extends Qwen3_5ForConditionalGeneration {
|
|
28679
|
+
};
|
|
27655
28680
|
|
|
27656
28681
|
// src/models/qwen3_5_moe/modeling_qwen3_5_moe.js
|
|
27657
28682
|
var Qwen3_5MoeForConditionalGeneration = class extends Qwen3_5ForConditionalGeneration {
|
|
27658
28683
|
};
|
|
28684
|
+
var Qwen3_5MoeForCausalLM = class extends Qwen3_5ForCausalLM {
|
|
28685
|
+
};
|
|
27659
28686
|
|
|
27660
28687
|
// src/models/resnet/modeling_resnet.js
|
|
27661
28688
|
var ResNetPreTrainedModel = class extends PreTrainedModel {
|
|
@@ -28336,25 +29363,6 @@ var TrOCRPreTrainedModel = class extends PreTrainedModel {
|
|
|
28336
29363
|
var TrOCRForCausalLM = class extends TrOCRPreTrainedModel {
|
|
28337
29364
|
};
|
|
28338
29365
|
|
|
28339
|
-
// src/models/ultravox/modeling_ultravox.js
|
|
28340
|
-
var UltravoxPreTrainedModel = class extends PreTrainedModel {
|
|
28341
|
-
forward_params = ["input_ids", "attention_mask", "position_ids", "audio_values", "past_key_values"];
|
|
28342
|
-
};
|
|
28343
|
-
var UltravoxModel = class extends UltravoxPreTrainedModel {
|
|
28344
|
-
_merge_input_ids_with_audio_features(kwargs) {
|
|
28345
|
-
const audio_hidden_size = kwargs.audio_features.dims.at(-1);
|
|
28346
|
-
const reshaped_audio_features = kwargs.audio_features.view(-1, audio_hidden_size);
|
|
28347
|
-
return default_merge_input_ids_with_audio_features({
|
|
28348
|
-
// @ts-ignore
|
|
28349
|
-
audio_token_id: this.config.ignore_index ?? this.config.audio_token_id,
|
|
28350
|
-
...kwargs,
|
|
28351
|
-
audio_features: reshaped_audio_features
|
|
28352
|
-
});
|
|
28353
|
-
}
|
|
28354
|
-
};
|
|
28355
|
-
var VoxtralForConditionalGeneration = class extends UltravoxModel {
|
|
28356
|
-
};
|
|
28357
|
-
|
|
28358
29366
|
// src/models/unispeech/modeling_unispeech.js
|
|
28359
29367
|
var UniSpeechPreTrainedModel = class extends PreTrainedModel {
|
|
28360
29368
|
};
|
|
@@ -28520,6 +29528,170 @@ var VitsModel = class extends VitsPreTrainedModel {
|
|
|
28520
29528
|
}
|
|
28521
29529
|
};
|
|
28522
29530
|
|
|
29531
|
+
// src/models/voxtral/modeling_voxtral.js
|
|
29532
|
+
var VoxtralForConditionalGeneration = class extends UltravoxModel {
|
|
29533
|
+
};
|
|
29534
|
+
|
|
29535
|
+
// src/models/voxtral_realtime/modeling_voxtral_realtime.js
|
|
29536
|
+
var CONV1_LEFT_PAD = 2;
|
|
29537
|
+
var CONV2_LEFT_PAD = 1;
|
|
29538
|
+
var states = /* @__PURE__ */ new WeakMap();
|
|
29539
|
+
function createEncoderState(model, input_features) {
|
|
29540
|
+
const { text_config, audio_config } = (
|
|
29541
|
+
/** @type {any} */
|
|
29542
|
+
model.config
|
|
29543
|
+
);
|
|
29544
|
+
const encoder_session = model.sessions["audio_encoder"];
|
|
29545
|
+
const { num_mel_bins, hidden_size: enc_hidden_size } = audio_config;
|
|
29546
|
+
const PADDING_CACHE_CHANNELS = num_mel_bins + enc_hidden_size;
|
|
29547
|
+
const enc_kv_cache = new DynamicCache();
|
|
29548
|
+
const enc_dtype = encoder_session?.config?.kv_cache_dtype ?? "float32";
|
|
29549
|
+
const enc_cls = enc_dtype === "float16" ? DataTypeMap.float16 : DataTypeMap.float32;
|
|
29550
|
+
const enc_shapes = getCacheShapes(audio_config, { batch_size: 1 });
|
|
29551
|
+
for (const name in enc_shapes) {
|
|
29552
|
+
const size = enc_shapes[name].reduce((a, b) => a * b, 1);
|
|
29553
|
+
enc_kv_cache[name] = new Tensor3(enc_dtype, new enc_cls(size), enc_shapes[name]);
|
|
29554
|
+
}
|
|
29555
|
+
const enc_padding_cache = new Tensor3(enc_dtype, new enc_cls(PADDING_CACHE_CHANNELS * CONV1_LEFT_PAD), [
|
|
29556
|
+
1,
|
|
29557
|
+
PADDING_CACHE_CHANNELS,
|
|
29558
|
+
CONV1_LEFT_PAD
|
|
29559
|
+
]);
|
|
29560
|
+
const chunks_iter = input_features[Symbol.asyncIterator]?.() ?? input_features[Symbol.iterator]?.();
|
|
29561
|
+
if (!chunks_iter) {
|
|
29562
|
+
throw new Error("input_features must be iterable or async iterable");
|
|
29563
|
+
}
|
|
29564
|
+
return {
|
|
29565
|
+
encoder_session,
|
|
29566
|
+
enc_kv_cache,
|
|
29567
|
+
enc_padding_cache,
|
|
29568
|
+
enc_past_seq_len: 0,
|
|
29569
|
+
audio_embed_queue: [],
|
|
29570
|
+
audio_embed_total_tokens: 0,
|
|
29571
|
+
audio_queue_offset: 0,
|
|
29572
|
+
audio_consumed: 0,
|
|
29573
|
+
stream_exhausted: false,
|
|
29574
|
+
chunks_iter,
|
|
29575
|
+
text_hidden_size: text_config.hidden_size
|
|
29576
|
+
};
|
|
29577
|
+
}
|
|
29578
|
+
async function encodeChunk(s, chunk_features) {
|
|
29579
|
+
const audio_seq_len = chunk_features.dims[2];
|
|
29580
|
+
const conv2_output_len = Math.floor((CONV2_LEFT_PAD + audio_seq_len - 3) / 2) + 1;
|
|
29581
|
+
const position_ids = new Tensor3(
|
|
29582
|
+
"int64",
|
|
29583
|
+
BigInt64Array.from({ length: conv2_output_len }, (_, i) => BigInt(s.enc_past_seq_len + i)),
|
|
29584
|
+
[1, conv2_output_len]
|
|
29585
|
+
);
|
|
29586
|
+
const total_seq_len = s.enc_past_seq_len + conv2_output_len;
|
|
29587
|
+
const attention_mask = ones([1, total_seq_len]);
|
|
29588
|
+
const { audio_embeds, present_padding_cache, ...present_cache } = await sessionRun(s.encoder_session, {
|
|
29589
|
+
input_features: chunk_features,
|
|
29590
|
+
attention_mask,
|
|
29591
|
+
position_ids,
|
|
29592
|
+
past_padding_cache: s.enc_padding_cache,
|
|
29593
|
+
...s.enc_kv_cache
|
|
29594
|
+
});
|
|
29595
|
+
if (s.enc_padding_cache.location === "gpu-buffer") {
|
|
29596
|
+
s.enc_padding_cache.dispose();
|
|
29597
|
+
}
|
|
29598
|
+
s.enc_padding_cache = present_padding_cache;
|
|
29599
|
+
for (const name in present_cache) {
|
|
29600
|
+
if (name.startsWith("present.")) {
|
|
29601
|
+
const pastName = name.replace("present", "past_key_values");
|
|
29602
|
+
const prev = s.enc_kv_cache[pastName];
|
|
29603
|
+
if (prev?.location === "gpu-buffer") {
|
|
29604
|
+
prev.dispose();
|
|
29605
|
+
}
|
|
29606
|
+
s.enc_kv_cache[pastName] = present_cache[name];
|
|
29607
|
+
}
|
|
29608
|
+
}
|
|
29609
|
+
s.enc_past_seq_len = total_seq_len;
|
|
29610
|
+
return audio_embeds;
|
|
29611
|
+
}
|
|
29612
|
+
async function fillAudioBuffer(s, needed) {
|
|
29613
|
+
while (s.audio_embed_total_tokens < needed && !s.stream_exhausted) {
|
|
29614
|
+
const result = await s.chunks_iter.next();
|
|
29615
|
+
if (result.done) {
|
|
29616
|
+
s.stream_exhausted = true;
|
|
29617
|
+
break;
|
|
29618
|
+
}
|
|
29619
|
+
const new_embeds = await encodeChunk(s, result.value);
|
|
29620
|
+
s.audio_embed_queue.push({ data: new_embeds.data, tokens: new_embeds.dims[1] });
|
|
29621
|
+
s.audio_embed_total_tokens += new_embeds.dims[1];
|
|
29622
|
+
}
|
|
29623
|
+
}
|
|
29624
|
+
function addAudioEmbeddings(s, inputs_embeds, current_len) {
|
|
29625
|
+
if (s.audio_embed_queue.length === 0) return;
|
|
29626
|
+
const embed_data = inputs_embeds.data;
|
|
29627
|
+
let embed_write_pos = 0;
|
|
29628
|
+
let remaining = current_len;
|
|
29629
|
+
while (remaining > 0 && s.audio_embed_queue.length > 0) {
|
|
29630
|
+
const front = s.audio_embed_queue[0];
|
|
29631
|
+
const available = front.tokens - s.audio_queue_offset;
|
|
29632
|
+
const n = Math.min(remaining, available);
|
|
29633
|
+
const src_offset = s.audio_queue_offset * s.text_hidden_size;
|
|
29634
|
+
for (let i = 0; i < n * s.text_hidden_size; ++i) {
|
|
29635
|
+
embed_data[embed_write_pos * s.text_hidden_size + i] += front.data[src_offset + i];
|
|
29636
|
+
}
|
|
29637
|
+
embed_write_pos += n;
|
|
29638
|
+
remaining -= n;
|
|
29639
|
+
s.audio_queue_offset += n;
|
|
29640
|
+
if (s.audio_queue_offset >= front.tokens) {
|
|
29641
|
+
s.audio_embed_queue.shift();
|
|
29642
|
+
s.audio_queue_offset = 0;
|
|
29643
|
+
}
|
|
29644
|
+
}
|
|
29645
|
+
s.audio_consumed += current_len - remaining;
|
|
29646
|
+
}
|
|
29647
|
+
var AudioExhaustedCriteria = class extends StoppingCriteria {
|
|
29648
|
+
constructor(enc_state) {
|
|
29649
|
+
super();
|
|
29650
|
+
this._s = enc_state;
|
|
29651
|
+
}
|
|
29652
|
+
_call(input_ids) {
|
|
29653
|
+
const done = this._s.stream_exhausted && this._s.audio_embed_queue.length === 0;
|
|
29654
|
+
return input_ids.map(() => done);
|
|
29655
|
+
}
|
|
29656
|
+
};
|
|
29657
|
+
var VoxtralRealtimePreTrainedModel = class extends PreTrainedModel {
|
|
29658
|
+
forward_params = ["input_ids", "attention_mask", "position_ids", "past_key_values"];
|
|
29659
|
+
};
|
|
29660
|
+
var VoxtralRealtimeForConditionalGeneration = class extends VoxtralRealtimePreTrainedModel {
|
|
29661
|
+
async forward({ input_ids, past_key_values, ...kwargs }) {
|
|
29662
|
+
const current_len = input_ids.dims[1];
|
|
29663
|
+
const enc = states.get(this);
|
|
29664
|
+
if (enc) {
|
|
29665
|
+
await fillAudioBuffer(enc, enc.audio_consumed + current_len);
|
|
29666
|
+
}
|
|
29667
|
+
const { inputs_embeds } = await sessionRun(this.sessions["embed_tokens"], { input_ids });
|
|
29668
|
+
if (enc) {
|
|
29669
|
+
addAudioEmbeddings(enc, inputs_embeds, current_len);
|
|
29670
|
+
}
|
|
29671
|
+
const decoder_feeds = { inputs_embeds, ...kwargs };
|
|
29672
|
+
this.addPastKeyValues(decoder_feeds, past_key_values);
|
|
29673
|
+
const session = this.sessions["decoder_model_merged"];
|
|
29674
|
+
const fixed = pick(decoder_feeds, session.inputNames);
|
|
29675
|
+
return await sessionRun(session, fixed);
|
|
29676
|
+
}
|
|
29677
|
+
async generate({ input_features, stopping_criteria: userStoppingCriteria, ...kwargs }) {
|
|
29678
|
+
if (!input_features) {
|
|
29679
|
+
throw new Error("input_features (generator/iterable) must be provided");
|
|
29680
|
+
}
|
|
29681
|
+
const enc_state = createEncoderState(this, input_features);
|
|
29682
|
+
states.set(this, enc_state);
|
|
29683
|
+
const stopping_criteria = new StoppingCriteriaList();
|
|
29684
|
+
stopping_criteria.push(new AudioExhaustedCriteria(enc_state));
|
|
29685
|
+
if (userStoppingCriteria) stopping_criteria.extend(userStoppingCriteria);
|
|
29686
|
+
try {
|
|
29687
|
+
return await super.generate({ ...kwargs, stopping_criteria });
|
|
29688
|
+
} finally {
|
|
29689
|
+
enc_state.enc_kv_cache.dispose();
|
|
29690
|
+
states.delete(this);
|
|
29691
|
+
}
|
|
29692
|
+
}
|
|
29693
|
+
};
|
|
29694
|
+
|
|
28523
29695
|
// src/models/wav2vec2_bert/modeling_wav2vec2_bert.js
|
|
28524
29696
|
var Wav2Vec2BertPreTrainedModel = class extends PreTrainedModel {
|
|
28525
29697
|
};
|
|
@@ -29144,6 +30316,7 @@ var MODEL_MAPPING_NAMES_DECODER_ONLY = /* @__PURE__ */ new Map([
|
|
|
29144
30316
|
["olmo", "OlmoModel"],
|
|
29145
30317
|
["olmo2", "Olmo2Model"],
|
|
29146
30318
|
["olmo3", "Olmo3Model"],
|
|
30319
|
+
["olmo_hybrid", "OlmoHybridModel"],
|
|
29147
30320
|
["mobilellm", "MobileLLMModel"],
|
|
29148
30321
|
["granite", "GraniteModel"],
|
|
29149
30322
|
["granitemoehybrid", "GraniteMoeHybridModel"],
|
|
@@ -29157,7 +30330,10 @@ var MODEL_MAPPING_NAMES_DECODER_ONLY = /* @__PURE__ */ new Map([
|
|
|
29157
30330
|
["glm", "GlmModel"],
|
|
29158
30331
|
["openelm", "OpenELMModel"],
|
|
29159
30332
|
["qwen2", "Qwen2Model"],
|
|
30333
|
+
["qwen2_moe", "Qwen2MoeModel"],
|
|
29160
30334
|
["qwen3", "Qwen3Model"],
|
|
30335
|
+
["qwen3_moe", "Qwen3MoeModel"],
|
|
30336
|
+
["qwen3_next", "Qwen3NextModel"],
|
|
29161
30337
|
["phi", "PhiModel"],
|
|
29162
30338
|
["phi3", "Phi3Model"],
|
|
29163
30339
|
["mpt", "MptModel"],
|
|
@@ -29165,7 +30341,7 @@ var MODEL_MAPPING_NAMES_DECODER_ONLY = /* @__PURE__ */ new Map([
|
|
|
29165
30341
|
["mistral", "MistralModel"],
|
|
29166
30342
|
["ministral", "MinistralModel"],
|
|
29167
30343
|
["ministral3", "Ministral3Model"],
|
|
29168
|
-
["ernie4_5", "
|
|
30344
|
+
["ernie4_5", "Ernie4_5ForCausalLM"],
|
|
29169
30345
|
["starcoder2", "Starcoder2Model"],
|
|
29170
30346
|
["falcon", "FalconModel"],
|
|
29171
30347
|
["falcon_h1", "FalconH1Model"],
|
|
@@ -29259,6 +30435,7 @@ var MODEL_FOR_CAUSAL_LM_MAPPING_NAMES = /* @__PURE__ */ new Map([
|
|
|
29259
30435
|
["olmo", "OlmoForCausalLM"],
|
|
29260
30436
|
["olmo2", "Olmo2ForCausalLM"],
|
|
29261
30437
|
["olmo3", "Olmo3ForCausalLM"],
|
|
30438
|
+
["olmo_hybrid", "OlmoHybridForCausalLM"],
|
|
29262
30439
|
["mobilellm", "MobileLLMForCausalLM"],
|
|
29263
30440
|
["granite", "GraniteForCausalLM"],
|
|
29264
30441
|
["granitemoehybrid", "GraniteMoeHybridForCausalLM"],
|
|
@@ -29268,11 +30445,22 @@ var MODEL_FOR_CAUSAL_LM_MAPPING_NAMES = /* @__PURE__ */ new Map([
|
|
|
29268
30445
|
["gemma2", "Gemma2ForCausalLM"],
|
|
29269
30446
|
["vaultgemma", "VaultGemmaForCausalLM"],
|
|
29270
30447
|
["gemma3_text", "Gemma3ForCausalLM"],
|
|
30448
|
+
["gemma3", "Gemma3ForCausalLM"],
|
|
29271
30449
|
["helium", "HeliumForCausalLM"],
|
|
29272
30450
|
["glm", "GlmForCausalLM"],
|
|
29273
30451
|
["openelm", "OpenELMForCausalLM"],
|
|
29274
30452
|
["qwen2", "Qwen2ForCausalLM"],
|
|
30453
|
+
["qwen2_moe", "Qwen2MoeForCausalLM"],
|
|
29275
30454
|
["qwen3", "Qwen3ForCausalLM"],
|
|
30455
|
+
["qwen3_moe", "Qwen3MoeForCausalLM"],
|
|
30456
|
+
["qwen3_next", "Qwen3NextForCausalLM"],
|
|
30457
|
+
["qwen2_vl", "Qwen2VLForCausalLM"],
|
|
30458
|
+
["qwen2_5_vl", "Qwen2_5_VLForCausalLM"],
|
|
30459
|
+
["qwen3_vl", "Qwen3VLForCausalLM"],
|
|
30460
|
+
["qwen3_vl_moe", "Qwen3VLMoeForCausalLM"],
|
|
30461
|
+
["qwen3_5", "Qwen3_5ForCausalLM"],
|
|
30462
|
+
["qwen3_5_moe", "Qwen3_5MoeForCausalLM"],
|
|
30463
|
+
["gemma3n", "Gemma3nForCausalLM"],
|
|
29276
30464
|
["phi", "PhiForCausalLM"],
|
|
29277
30465
|
["phi3", "Phi3ForCausalLM"],
|
|
29278
30466
|
["mpt", "MptForCausalLM"],
|
|
@@ -29281,7 +30469,7 @@ var MODEL_FOR_CAUSAL_LM_MAPPING_NAMES = /* @__PURE__ */ new Map([
|
|
|
29281
30469
|
["mistral", "MistralForCausalLM"],
|
|
29282
30470
|
["ministral", "MinistralForCausalLM"],
|
|
29283
30471
|
["ministral3", "Ministral3ForCausalLM"],
|
|
29284
|
-
["ernie4_5", "
|
|
30472
|
+
["ernie4_5", "Ernie4_5ForCausalLM"],
|
|
29285
30473
|
["starcoder2", "Starcoder2ForCausalLM"],
|
|
29286
30474
|
["falcon", "FalconForCausalLM"],
|
|
29287
30475
|
["falcon_h1", "FalconH1ForCausalLM"],
|
|
@@ -29345,8 +30533,10 @@ var MODEL_FOR_IMAGE_TEXT_TO_TEXT_MAPPING_NAMES = /* @__PURE__ */ new Map([
|
|
|
29345
30533
|
["qwen2_vl", "Qwen2VLForConditionalGeneration"],
|
|
29346
30534
|
["qwen2_5_vl", "Qwen2_5_VLForConditionalGeneration"],
|
|
29347
30535
|
["qwen3_vl", "Qwen3VLForConditionalGeneration"],
|
|
30536
|
+
["qwen3_vl_moe", "Qwen3VLMoeForConditionalGeneration"],
|
|
29348
30537
|
["qwen3_5", "Qwen3_5ForConditionalGeneration"],
|
|
29349
30538
|
["qwen3_5_moe", "Qwen3_5MoeForConditionalGeneration"],
|
|
30539
|
+
["lfm2_vl", "Lfm2VlForConditionalGeneration"],
|
|
29350
30540
|
["idefics3", "Idefics3ForConditionalGeneration"],
|
|
29351
30541
|
["smolvlm", "SmolVLMForConditionalGeneration"],
|
|
29352
30542
|
["paligemma", "PaliGemmaForConditionalGeneration"],
|
|
@@ -29355,8 +30545,10 @@ var MODEL_FOR_IMAGE_TEXT_TO_TEXT_MAPPING_NAMES = /* @__PURE__ */ new Map([
|
|
|
29355
30545
|
["mistral3", "Mistral3ForConditionalGeneration"]
|
|
29356
30546
|
]);
|
|
29357
30547
|
var MODEL_FOR_AUDIO_TEXT_TO_TEXT_MAPPING_NAMES = /* @__PURE__ */ new Map([
|
|
30548
|
+
["granite_speech", "GraniteSpeechForConditionalGeneration"],
|
|
29358
30549
|
["ultravox", "UltravoxModel"],
|
|
29359
|
-
["voxtral", "VoxtralForConditionalGeneration"]
|
|
30550
|
+
["voxtral", "VoxtralForConditionalGeneration"],
|
|
30551
|
+
["voxtral_realtime", "VoxtralRealtimeForConditionalGeneration"]
|
|
29360
30552
|
]);
|
|
29361
30553
|
var MODEL_FOR_DOCUMENT_QUESTION_ANSWERING_MAPPING_NAMES = /* @__PURE__ */ new Map([
|
|
29362
30554
|
["vision-encoder-decoder", "VisionEncoderDecoderModel"]
|
|
@@ -29539,24 +30731,37 @@ var CUSTOM_MAPPING = [
|
|
|
29539
30731
|
MODEL_TYPES.ImageAudioTextToText
|
|
29540
30732
|
],
|
|
29541
30733
|
["SupertonicForConditionalGeneration", SupertonicForConditionalGeneration, MODEL_TYPES.Supertonic],
|
|
29542
|
-
["ChatterboxModel", ChatterboxModel, MODEL_TYPES.Chatterbox]
|
|
30734
|
+
["ChatterboxModel", ChatterboxModel, MODEL_TYPES.Chatterbox],
|
|
30735
|
+
["Qwen2VLForCausalLM", Qwen2VLForCausalLM, MODEL_TYPES.MultimodalLanguageModelOnly],
|
|
30736
|
+
["Qwen2_5_VLForCausalLM", Qwen2_5_VLForCausalLM, MODEL_TYPES.MultimodalLanguageModelOnly],
|
|
30737
|
+
["Qwen3VLForCausalLM", Qwen3VLForCausalLM, MODEL_TYPES.MultimodalLanguageModelOnly],
|
|
30738
|
+
["Qwen3VLMoeForCausalLM", Qwen3VLMoeForCausalLM, MODEL_TYPES.MultimodalLanguageModelOnly],
|
|
30739
|
+
["Qwen3_5ForCausalLM", Qwen3_5ForCausalLM, MODEL_TYPES.MultimodalLanguageModelOnly],
|
|
30740
|
+
["Qwen3_5MoeForCausalLM", Qwen3_5MoeForCausalLM, MODEL_TYPES.MultimodalLanguageModelOnly],
|
|
30741
|
+
["Gemma3nForCausalLM", Gemma3nForCausalLM, MODEL_TYPES.MultimodalLanguageModelOnly],
|
|
30742
|
+
[
|
|
30743
|
+
"VoxtralRealtimeForConditionalGeneration",
|
|
30744
|
+
VoxtralRealtimeForConditionalGeneration,
|
|
30745
|
+
MODEL_TYPES.VoxtralRealtime
|
|
30746
|
+
]
|
|
29543
30747
|
];
|
|
29544
30748
|
for (const [name, model, type] of CUSTOM_MAPPING) {
|
|
29545
30749
|
MODEL_TYPE_MAPPING.set(name, type);
|
|
29546
30750
|
MODEL_CLASS_TO_NAME_MAPPING.set(model, name);
|
|
29547
30751
|
MODEL_NAME_TO_CLASS_MAPPING.set(name, model);
|
|
29548
30752
|
}
|
|
29549
|
-
var
|
|
30753
|
+
var CUSTOM_ARCHITECTURES_MAPPING = /* @__PURE__ */ new Map([
|
|
29550
30754
|
["modnet", MODEL_FOR_IMAGE_SEGMENTATION_MAPPING_NAMES],
|
|
29551
30755
|
["birefnet", MODEL_FOR_IMAGE_SEGMENTATION_MAPPING_NAMES],
|
|
29552
30756
|
["isnet", MODEL_FOR_IMAGE_SEGMENTATION_MAPPING_NAMES],
|
|
29553
30757
|
["ben", MODEL_FOR_IMAGE_SEGMENTATION_MAPPING_NAMES]
|
|
29554
30758
|
]);
|
|
29555
|
-
for (const [name, mapping] of
|
|
30759
|
+
for (const [name, mapping] of CUSTOM_ARCHITECTURES_MAPPING.entries()) {
|
|
29556
30760
|
mapping.set(name, "PreTrainedModel");
|
|
29557
30761
|
MODEL_TYPE_MAPPING.set(name, MODEL_TYPES.EncoderOnly);
|
|
29558
30762
|
MODEL_NAME_TO_CLASS_MAPPING.set(name, PreTrainedModel);
|
|
29559
30763
|
}
|
|
30764
|
+
var CUSTOM_ARCHITECTURES = new Set(CUSTOM_ARCHITECTURES_MAPPING.keys());
|
|
29560
30765
|
MODEL_TYPE_MAPPING.set("PreTrainedModel", MODEL_TYPES.EncoderOnly);
|
|
29561
30766
|
MODEL_CLASS_TO_NAME_MAPPING.set(PreTrainedModel, "PreTrainedModel");
|
|
29562
30767
|
var MODEL_MAPPINGS = {
|
|
@@ -29605,6 +30810,18 @@ var PretrainedMixin = class {
|
|
|
29605
30810
|
* the model type is not found in the mapping.
|
|
29606
30811
|
*/
|
|
29607
30812
|
static BASE_IF_FAIL = false;
|
|
30813
|
+
/**
|
|
30814
|
+
* Check whether this AutoModel class supports a given model type.
|
|
30815
|
+
* @param {string} model_type The model type from config (e.g., 'bert', 'whisper').
|
|
30816
|
+
* @returns {boolean} Whether this class can handle the given model type.
|
|
30817
|
+
*/
|
|
30818
|
+
static supports(model_type) {
|
|
30819
|
+
if (!this.MODEL_CLASS_MAPPINGS) return false;
|
|
30820
|
+
for (const mapping of this.MODEL_CLASS_MAPPINGS) {
|
|
30821
|
+
if (mapping.has(model_type)) return true;
|
|
30822
|
+
}
|
|
30823
|
+
return this.BASE_IF_FAIL;
|
|
30824
|
+
}
|
|
29608
30825
|
/** @type {typeof PreTrainedModel.from_pretrained} */
|
|
29609
30826
|
static async from_pretrained(pretrained_model_name_or_path, {
|
|
29610
30827
|
progress_callback = null,
|
|
@@ -29636,7 +30853,7 @@ var PretrainedMixin = class {
|
|
|
29636
30853
|
if (!this.MODEL_CLASS_MAPPINGS) {
|
|
29637
30854
|
throw new Error("`MODEL_CLASS_MAPPINGS` not implemented for this type of `AutoClass`: " + this.name);
|
|
29638
30855
|
}
|
|
29639
|
-
const model_type = options.config
|
|
30856
|
+
const { model_type } = options.config;
|
|
29640
30857
|
for (const MODEL_CLASS_MAPPING of this.MODEL_CLASS_MAPPINGS) {
|
|
29641
30858
|
let modelInfo = MODEL_CLASS_MAPPING.get(model_type);
|
|
29642
30859
|
if (!modelInfo) {
|
|
@@ -30988,40 +32205,30 @@ Pipeline {
|
|
|
30988
32205
|
// src/pipelines/index.js
|
|
30989
32206
|
var SUPPORTED_TASKS = Object.freeze({
|
|
30990
32207
|
"text-classification": {
|
|
30991
|
-
tokenizer: AutoTokenizer,
|
|
30992
32208
|
pipeline: TextClassificationPipeline,
|
|
30993
32209
|
model: AutoModelForSequenceClassification,
|
|
30994
32210
|
default: {
|
|
30995
|
-
// TODO: replace with original
|
|
30996
|
-
// "model": "distilbert-base-uncased-finetuned-sst-2-english",
|
|
30997
32211
|
model: "Xenova/distilbert-base-uncased-finetuned-sst-2-english"
|
|
30998
32212
|
},
|
|
30999
32213
|
type: "text"
|
|
31000
32214
|
},
|
|
31001
32215
|
"token-classification": {
|
|
31002
|
-
tokenizer: AutoTokenizer,
|
|
31003
32216
|
pipeline: TokenClassificationPipeline,
|
|
31004
32217
|
model: AutoModelForTokenClassification,
|
|
31005
32218
|
default: {
|
|
31006
|
-
// TODO: replace with original
|
|
31007
|
-
// "model": "Davlan/bert-base-multilingual-cased-ner-hrl",
|
|
31008
32219
|
model: "Xenova/bert-base-multilingual-cased-ner-hrl"
|
|
31009
32220
|
},
|
|
31010
32221
|
type: "text"
|
|
31011
32222
|
},
|
|
31012
32223
|
"question-answering": {
|
|
31013
|
-
tokenizer: AutoTokenizer,
|
|
31014
32224
|
pipeline: QuestionAnsweringPipeline,
|
|
31015
32225
|
model: AutoModelForQuestionAnswering,
|
|
31016
32226
|
default: {
|
|
31017
|
-
// TODO: replace with original
|
|
31018
|
-
// "model": "distilbert-base-cased-distilled-squad",
|
|
31019
32227
|
model: "Xenova/distilbert-base-cased-distilled-squad"
|
|
31020
32228
|
},
|
|
31021
32229
|
type: "text"
|
|
31022
32230
|
},
|
|
31023
32231
|
"fill-mask": {
|
|
31024
|
-
tokenizer: AutoTokenizer,
|
|
31025
32232
|
pipeline: FillMaskPipeline,
|
|
31026
32233
|
model: AutoModelForMaskedLM,
|
|
31027
32234
|
default: {
|
|
@@ -31031,40 +32238,30 @@ var SUPPORTED_TASKS = Object.freeze({
|
|
|
31031
32238
|
type: "text"
|
|
31032
32239
|
},
|
|
31033
32240
|
summarization: {
|
|
31034
|
-
tokenizer: AutoTokenizer,
|
|
31035
32241
|
pipeline: SummarizationPipeline,
|
|
31036
32242
|
model: AutoModelForSeq2SeqLM,
|
|
31037
32243
|
default: {
|
|
31038
|
-
// TODO: replace with original
|
|
31039
|
-
// "model": "sshleifer/distilbart-cnn-6-6",
|
|
31040
32244
|
model: "Xenova/distilbart-cnn-6-6"
|
|
31041
32245
|
},
|
|
31042
32246
|
type: "text"
|
|
31043
32247
|
},
|
|
31044
32248
|
translation: {
|
|
31045
|
-
tokenizer: AutoTokenizer,
|
|
31046
32249
|
pipeline: TranslationPipeline,
|
|
31047
32250
|
model: AutoModelForSeq2SeqLM,
|
|
31048
32251
|
default: {
|
|
31049
|
-
// TODO: replace with original
|
|
31050
|
-
// "model": "t5-small",
|
|
31051
32252
|
model: "Xenova/t5-small"
|
|
31052
32253
|
},
|
|
31053
32254
|
type: "text"
|
|
31054
32255
|
},
|
|
31055
32256
|
"text2text-generation": {
|
|
31056
|
-
tokenizer: AutoTokenizer,
|
|
31057
32257
|
pipeline: Text2TextGenerationPipeline,
|
|
31058
32258
|
model: AutoModelForSeq2SeqLM,
|
|
31059
32259
|
default: {
|
|
31060
|
-
// TODO: replace with original
|
|
31061
|
-
// "model": "google/flan-t5-small",
|
|
31062
32260
|
model: "Xenova/flan-t5-small"
|
|
31063
32261
|
},
|
|
31064
32262
|
type: "text"
|
|
31065
32263
|
},
|
|
31066
32264
|
"text-generation": {
|
|
31067
|
-
tokenizer: AutoTokenizer,
|
|
31068
32265
|
pipeline: TextGenerationPipeline,
|
|
31069
32266
|
model: AutoModelForCausalLM,
|
|
31070
32267
|
default: {
|
|
@@ -31074,12 +32271,9 @@ var SUPPORTED_TASKS = Object.freeze({
|
|
|
31074
32271
|
type: "text"
|
|
31075
32272
|
},
|
|
31076
32273
|
"zero-shot-classification": {
|
|
31077
|
-
tokenizer: AutoTokenizer,
|
|
31078
32274
|
pipeline: ZeroShotClassificationPipeline,
|
|
31079
32275
|
model: AutoModelForSequenceClassification,
|
|
31080
32276
|
default: {
|
|
31081
|
-
// TODO: replace with original
|
|
31082
|
-
// "model": "typeform/distilbert-base-uncased-mnli",
|
|
31083
32277
|
model: "Xenova/distilbert-base-uncased-mnli"
|
|
31084
32278
|
},
|
|
31085
32279
|
type: "text"
|
|
@@ -31087,47 +32281,30 @@ var SUPPORTED_TASKS = Object.freeze({
|
|
|
31087
32281
|
"audio-classification": {
|
|
31088
32282
|
pipeline: AudioClassificationPipeline,
|
|
31089
32283
|
model: AutoModelForAudioClassification,
|
|
31090
|
-
processor: AutoProcessor,
|
|
31091
32284
|
default: {
|
|
31092
|
-
// TODO: replace with original
|
|
31093
|
-
// "model": "superb/wav2vec2-base-superb-ks",
|
|
31094
32285
|
model: "Xenova/wav2vec2-base-superb-ks"
|
|
31095
32286
|
},
|
|
31096
32287
|
type: "audio"
|
|
31097
32288
|
},
|
|
31098
32289
|
"zero-shot-audio-classification": {
|
|
31099
|
-
tokenizer: AutoTokenizer,
|
|
31100
32290
|
pipeline: ZeroShotAudioClassificationPipeline,
|
|
31101
32291
|
model: AutoModel,
|
|
31102
|
-
processor: AutoProcessor,
|
|
31103
32292
|
default: {
|
|
31104
|
-
// TODO: replace with original
|
|
31105
|
-
// "model": "laion/clap-htsat-fused",
|
|
31106
32293
|
model: "Xenova/clap-htsat-unfused"
|
|
31107
32294
|
},
|
|
31108
32295
|
type: "multimodal"
|
|
31109
32296
|
},
|
|
31110
32297
|
"automatic-speech-recognition": {
|
|
31111
|
-
tokenizer: AutoTokenizer,
|
|
31112
32298
|
pipeline: AutomaticSpeechRecognitionPipeline,
|
|
31113
32299
|
model: [AutoModelForSpeechSeq2Seq, AutoModelForCTC],
|
|
31114
|
-
processor: AutoProcessor,
|
|
31115
32300
|
default: {
|
|
31116
|
-
// TODO: replace with original
|
|
31117
|
-
// "model": "openai/whisper-tiny.en",
|
|
31118
32301
|
model: "Xenova/whisper-tiny.en"
|
|
31119
32302
|
},
|
|
31120
32303
|
type: "multimodal"
|
|
31121
32304
|
},
|
|
31122
32305
|
"text-to-audio": {
|
|
31123
|
-
tokenizer: AutoTokenizer,
|
|
31124
32306
|
pipeline: TextToAudioPipeline,
|
|
31125
32307
|
model: [AutoModelForTextToWaveform, AutoModelForTextToSpectrogram],
|
|
31126
|
-
processor: [
|
|
31127
|
-
AutoProcessor,
|
|
31128
|
-
/* Some don't use a processor */
|
|
31129
|
-
null
|
|
31130
|
-
],
|
|
31131
32308
|
default: {
|
|
31132
32309
|
model: "onnx-community/Supertonic-TTS-ONNX",
|
|
31133
32310
|
dtype: "fp32"
|
|
@@ -31135,124 +32312,86 @@ var SUPPORTED_TASKS = Object.freeze({
|
|
|
31135
32312
|
type: "text"
|
|
31136
32313
|
},
|
|
31137
32314
|
"image-to-text": {
|
|
31138
|
-
tokenizer: AutoTokenizer,
|
|
31139
32315
|
pipeline: ImageToTextPipeline,
|
|
31140
32316
|
model: AutoModelForVision2Seq,
|
|
31141
|
-
processor: AutoProcessor,
|
|
31142
32317
|
default: {
|
|
31143
|
-
// TODO: replace with original
|
|
31144
|
-
// "model": "nlpconnect/vit-gpt2-image-captioning",
|
|
31145
32318
|
model: "Xenova/vit-gpt2-image-captioning"
|
|
31146
32319
|
},
|
|
31147
32320
|
type: "multimodal"
|
|
31148
32321
|
},
|
|
31149
32322
|
"image-classification": {
|
|
31150
|
-
// no tokenizer
|
|
31151
32323
|
pipeline: ImageClassificationPipeline,
|
|
31152
32324
|
model: AutoModelForImageClassification,
|
|
31153
|
-
processor: AutoProcessor,
|
|
31154
32325
|
default: {
|
|
31155
|
-
// TODO: replace with original
|
|
31156
|
-
// "model": "google/vit-base-patch16-224",
|
|
31157
32326
|
model: "Xenova/vit-base-patch16-224"
|
|
31158
32327
|
},
|
|
31159
32328
|
type: "multimodal"
|
|
31160
32329
|
},
|
|
31161
32330
|
"image-segmentation": {
|
|
31162
|
-
// no tokenizer
|
|
31163
32331
|
pipeline: ImageSegmentationPipeline,
|
|
31164
32332
|
model: [AutoModelForImageSegmentation, AutoModelForSemanticSegmentation, AutoModelForUniversalSegmentation],
|
|
31165
|
-
processor: AutoProcessor,
|
|
31166
32333
|
default: {
|
|
31167
|
-
// TODO: replace with original
|
|
31168
|
-
// "model": "facebook/detr-resnet-50-panoptic",
|
|
31169
32334
|
model: "Xenova/detr-resnet-50-panoptic"
|
|
31170
32335
|
},
|
|
31171
32336
|
type: "multimodal"
|
|
31172
32337
|
},
|
|
31173
32338
|
"background-removal": {
|
|
31174
|
-
// no tokenizer
|
|
31175
32339
|
pipeline: BackgroundRemovalPipeline,
|
|
31176
32340
|
model: [AutoModelForImageSegmentation, AutoModelForSemanticSegmentation, AutoModelForUniversalSegmentation],
|
|
31177
|
-
processor: AutoProcessor,
|
|
31178
32341
|
default: {
|
|
31179
32342
|
model: "Xenova/modnet"
|
|
31180
32343
|
},
|
|
31181
32344
|
type: "image"
|
|
31182
32345
|
},
|
|
31183
32346
|
"zero-shot-image-classification": {
|
|
31184
|
-
tokenizer: AutoTokenizer,
|
|
31185
32347
|
pipeline: ZeroShotImageClassificationPipeline,
|
|
31186
32348
|
model: AutoModel,
|
|
31187
|
-
processor: AutoProcessor,
|
|
31188
32349
|
default: {
|
|
31189
|
-
// TODO: replace with original
|
|
31190
|
-
// "model": "openai/clip-vit-base-patch32",
|
|
31191
32350
|
model: "Xenova/clip-vit-base-patch32"
|
|
31192
32351
|
},
|
|
31193
32352
|
type: "multimodal"
|
|
31194
32353
|
},
|
|
31195
32354
|
"object-detection": {
|
|
31196
|
-
// no tokenizer
|
|
31197
32355
|
pipeline: ObjectDetectionPipeline,
|
|
31198
32356
|
model: AutoModelForObjectDetection,
|
|
31199
|
-
processor: AutoProcessor,
|
|
31200
32357
|
default: {
|
|
31201
|
-
// TODO: replace with original
|
|
31202
|
-
// "model": "facebook/detr-resnet-50",
|
|
31203
32358
|
model: "Xenova/detr-resnet-50"
|
|
31204
32359
|
},
|
|
31205
32360
|
type: "multimodal"
|
|
31206
32361
|
},
|
|
31207
32362
|
"zero-shot-object-detection": {
|
|
31208
|
-
tokenizer: AutoTokenizer,
|
|
31209
32363
|
pipeline: ZeroShotObjectDetectionPipeline,
|
|
31210
32364
|
model: AutoModelForZeroShotObjectDetection,
|
|
31211
|
-
processor: AutoProcessor,
|
|
31212
32365
|
default: {
|
|
31213
|
-
// TODO: replace with original
|
|
31214
|
-
// "model": "google/owlvit-base-patch32",
|
|
31215
32366
|
model: "Xenova/owlvit-base-patch32"
|
|
31216
32367
|
},
|
|
31217
32368
|
type: "multimodal"
|
|
31218
32369
|
},
|
|
31219
32370
|
"document-question-answering": {
|
|
31220
|
-
tokenizer: AutoTokenizer,
|
|
31221
32371
|
pipeline: DocumentQuestionAnsweringPipeline,
|
|
31222
32372
|
model: AutoModelForDocumentQuestionAnswering,
|
|
31223
|
-
processor: AutoProcessor,
|
|
31224
32373
|
default: {
|
|
31225
|
-
// TODO: replace with original
|
|
31226
|
-
// "model": "naver-clova-ix/donut-base-finetuned-docvqa",
|
|
31227
32374
|
model: "Xenova/donut-base-finetuned-docvqa"
|
|
31228
32375
|
},
|
|
31229
32376
|
type: "multimodal"
|
|
31230
32377
|
},
|
|
31231
32378
|
"image-to-image": {
|
|
31232
|
-
// no tokenizer
|
|
31233
32379
|
pipeline: ImageToImagePipeline,
|
|
31234
32380
|
model: AutoModelForImageToImage,
|
|
31235
|
-
processor: AutoProcessor,
|
|
31236
32381
|
default: {
|
|
31237
|
-
// TODO: replace with original
|
|
31238
|
-
// "model": "caidas/swin2SR-classical-sr-x2-64",
|
|
31239
32382
|
model: "Xenova/swin2SR-classical-sr-x2-64"
|
|
31240
32383
|
},
|
|
31241
32384
|
type: "image"
|
|
31242
32385
|
},
|
|
31243
32386
|
"depth-estimation": {
|
|
31244
|
-
// no tokenizer
|
|
31245
32387
|
pipeline: DepthEstimationPipeline,
|
|
31246
32388
|
model: AutoModelForDepthEstimation,
|
|
31247
|
-
processor: AutoProcessor,
|
|
31248
32389
|
default: {
|
|
31249
32390
|
model: "onnx-community/depth-anything-v2-small"
|
|
31250
32391
|
},
|
|
31251
32392
|
type: "image"
|
|
31252
32393
|
},
|
|
31253
|
-
// This task serves as a useful interface for dealing with sentence-transformers (https://huggingface.co/sentence-transformers).
|
|
31254
32394
|
"feature-extraction": {
|
|
31255
|
-
tokenizer: AutoTokenizer,
|
|
31256
32395
|
pipeline: FeatureExtractionPipeline,
|
|
31257
32396
|
model: AutoModel,
|
|
31258
32397
|
default: {
|
|
@@ -31262,7 +32401,6 @@ var SUPPORTED_TASKS = Object.freeze({
|
|
|
31262
32401
|
type: "text"
|
|
31263
32402
|
},
|
|
31264
32403
|
"image-feature-extraction": {
|
|
31265
|
-
processor: AutoProcessor,
|
|
31266
32404
|
pipeline: ImageFeatureExtractionPipeline,
|
|
31267
32405
|
model: [AutoModelForImageFeatureExtraction, AutoModel],
|
|
31268
32406
|
default: {
|
|
@@ -31283,8 +32421,18 @@ var TASK_ALIASES = Object.freeze({
|
|
|
31283
32421
|
});
|
|
31284
32422
|
|
|
31285
32423
|
// src/utils/model_registry/get_model_files.js
|
|
32424
|
+
function get_config(modelId, { config = null, cache_dir = null, local_files_only = false, revision = "main" } = {}) {
|
|
32425
|
+
if (config !== null) {
|
|
32426
|
+
return AutoConfig.from_pretrained(modelId, { config, cache_dir, local_files_only, revision });
|
|
32427
|
+
}
|
|
32428
|
+
const key = JSON.stringify([modelId, cache_dir, local_files_only, revision]);
|
|
32429
|
+
return memoizePromise(
|
|
32430
|
+
key,
|
|
32431
|
+
() => AutoConfig.from_pretrained(modelId, { config, cache_dir, local_files_only, revision })
|
|
32432
|
+
);
|
|
32433
|
+
}
|
|
31286
32434
|
async function get_model_files(modelId, { config = null, dtype: overrideDtype = null, device: overrideDevice = null, model_file_name = null } = {}) {
|
|
31287
|
-
config = await
|
|
32435
|
+
config = await get_config(modelId, { config });
|
|
31288
32436
|
const files = [
|
|
31289
32437
|
// Add config.json (always loaded)
|
|
31290
32438
|
"config.json"
|
|
@@ -31314,6 +32462,15 @@ async function get_model_files(modelId, { config = null, dtype: overrideDtype =
|
|
|
31314
32462
|
modelType = mappedType;
|
|
31315
32463
|
foundInMapping = true;
|
|
31316
32464
|
}
|
|
32465
|
+
if (!foundInMapping) {
|
|
32466
|
+
for (const mapping of Object.values(MODEL_MAPPING_NAMES)) {
|
|
32467
|
+
if (mapping.has(config.model_type)) {
|
|
32468
|
+
modelType = MODEL_TYPE_MAPPING.get(mapping.get(config.model_type));
|
|
32469
|
+
foundInMapping = true;
|
|
32470
|
+
break;
|
|
32471
|
+
}
|
|
32472
|
+
}
|
|
32473
|
+
}
|
|
31317
32474
|
}
|
|
31318
32475
|
if (!foundInMapping) {
|
|
31319
32476
|
const archList = architectures.length > 0 ? architectures.join(", ") : "(none)";
|
|
@@ -31336,74 +32493,14 @@ async function get_model_files(modelId, { config = null, dtype: overrideDtype =
|
|
|
31336
32493
|
files.push(dataFilePath);
|
|
31337
32494
|
}
|
|
31338
32495
|
};
|
|
31339
|
-
const
|
|
31340
|
-
|
|
31341
|
-
add_model_file(
|
|
31342
|
-
|
|
31343
|
-
|
|
31344
|
-
|
|
31345
|
-
|
|
31346
|
-
|
|
31347
|
-
add_model_file("decoder_model_merged");
|
|
31348
|
-
files.push("generation_config.json");
|
|
31349
|
-
} else if (modelType === MODEL_TYPES.MaskGeneration) {
|
|
31350
|
-
add_model_file("model", "vision_encoder");
|
|
31351
|
-
add_model_file("prompt_encoder_mask_decoder");
|
|
31352
|
-
} else if (modelType === MODEL_TYPES.EncoderDecoder) {
|
|
31353
|
-
add_model_file("model", "encoder_model");
|
|
31354
|
-
add_model_file("decoder_model_merged");
|
|
31355
|
-
} else if (modelType === MODEL_TYPES.ImageTextToText) {
|
|
31356
|
-
add_model_file("embed_tokens");
|
|
31357
|
-
add_model_file("vision_encoder");
|
|
31358
|
-
add_model_file("decoder_model_merged");
|
|
31359
|
-
if (config.is_encoder_decoder) {
|
|
31360
|
-
add_model_file("model", "encoder_model");
|
|
31361
|
-
}
|
|
31362
|
-
files.push("generation_config.json");
|
|
31363
|
-
} else if (modelType === MODEL_TYPES.AudioTextToText) {
|
|
31364
|
-
add_model_file("embed_tokens");
|
|
31365
|
-
add_model_file("audio_encoder");
|
|
31366
|
-
add_model_file("decoder_model_merged");
|
|
31367
|
-
files.push("generation_config.json");
|
|
31368
|
-
} else if (modelType === MODEL_TYPES.ImageAudioTextToText) {
|
|
31369
|
-
add_model_file("embed_tokens");
|
|
31370
|
-
add_model_file("audio_encoder");
|
|
31371
|
-
add_model_file("vision_encoder");
|
|
31372
|
-
add_model_file("decoder_model_merged");
|
|
31373
|
-
files.push("generation_config.json");
|
|
31374
|
-
} else if (modelType === MODEL_TYPES.Musicgen) {
|
|
31375
|
-
add_model_file("model", "text_encoder");
|
|
31376
|
-
add_model_file("decoder_model_merged");
|
|
31377
|
-
add_model_file("encodec_decode");
|
|
31378
|
-
files.push("generation_config.json");
|
|
31379
|
-
} else if (modelType === MODEL_TYPES.MultiModality) {
|
|
31380
|
-
add_model_file("prepare_inputs_embeds");
|
|
31381
|
-
add_model_file("model", "language_model");
|
|
31382
|
-
add_model_file("lm_head");
|
|
31383
|
-
add_model_file("gen_head");
|
|
31384
|
-
add_model_file("gen_img_embeds");
|
|
31385
|
-
add_model_file("image_decode");
|
|
31386
|
-
files.push("generation_config.json");
|
|
31387
|
-
} else if (modelType === MODEL_TYPES.Phi3V) {
|
|
31388
|
-
add_model_file("prepare_inputs_embeds");
|
|
31389
|
-
add_model_file("model");
|
|
31390
|
-
add_model_file("vision_encoder");
|
|
31391
|
-
files.push("generation_config.json");
|
|
31392
|
-
} else if (modelType === MODEL_TYPES.Chatterbox) {
|
|
31393
|
-
add_model_file("embed_tokens");
|
|
31394
|
-
add_model_file("speech_encoder");
|
|
31395
|
-
add_model_file("model", "language_model");
|
|
31396
|
-
add_model_file("conditional_decoder");
|
|
31397
|
-
files.push("generation_config.json");
|
|
31398
|
-
} else if (modelType === MODEL_TYPES.AutoEncoder) {
|
|
31399
|
-
add_model_file("encoder_model");
|
|
31400
|
-
add_model_file("decoder_model");
|
|
31401
|
-
} else if (modelType === MODEL_TYPES.Supertonic) {
|
|
31402
|
-
add_model_file("text_encoder");
|
|
31403
|
-
add_model_file("latent_denoiser");
|
|
31404
|
-
add_model_file("voice_decoder");
|
|
31405
|
-
} else {
|
|
31406
|
-
add_model_file("model", singleModelName);
|
|
32496
|
+
const { sessions, optional_configs } = getSessionsConfig(modelType, config, { model_file_name });
|
|
32497
|
+
for (const [sessionKey, baseName] of Object.entries(sessions)) {
|
|
32498
|
+
add_model_file(sessionKey, baseName);
|
|
32499
|
+
}
|
|
32500
|
+
if (optional_configs) {
|
|
32501
|
+
for (const configFile of Object.values(optional_configs)) {
|
|
32502
|
+
files.push(configFile);
|
|
32503
|
+
}
|
|
31407
32504
|
}
|
|
31408
32505
|
return files;
|
|
31409
32506
|
}
|
|
@@ -31439,28 +32536,21 @@ async function get_files(modelId, {
|
|
|
31439
32536
|
}
|
|
31440
32537
|
|
|
31441
32538
|
// src/utils/model_registry/get_pipeline_files.js
|
|
31442
|
-
function get_task_components(task) {
|
|
31443
|
-
const taskConfig = SUPPORTED_TASKS[task];
|
|
31444
|
-
if (!taskConfig) {
|
|
31445
|
-
return null;
|
|
31446
|
-
}
|
|
31447
|
-
return {
|
|
31448
|
-
tokenizer: !!taskConfig.tokenizer,
|
|
31449
|
-
processor: !!taskConfig.processor
|
|
31450
|
-
};
|
|
31451
|
-
}
|
|
31452
32539
|
async function get_pipeline_files(task, modelId, options = {}) {
|
|
31453
32540
|
task = TASK_ALIASES[task] ?? task;
|
|
31454
|
-
const
|
|
31455
|
-
if (!
|
|
32541
|
+
const taskConfig = SUPPORTED_TASKS[task];
|
|
32542
|
+
if (!taskConfig) {
|
|
31456
32543
|
throw new Error(
|
|
31457
32544
|
`Unsupported pipeline task: ${task}. Must be one of [${Object.keys(SUPPORTED_TASKS).join(", ")}]`
|
|
31458
32545
|
);
|
|
31459
32546
|
}
|
|
32547
|
+
const { type } = taskConfig;
|
|
32548
|
+
const include_tokenizer = type !== "audio" && type !== "image";
|
|
32549
|
+
const include_processor = type !== "text";
|
|
31460
32550
|
return get_files(modelId, {
|
|
31461
32551
|
...options,
|
|
31462
|
-
include_tokenizer
|
|
31463
|
-
include_processor
|
|
32552
|
+
include_tokenizer,
|
|
32553
|
+
include_processor
|
|
31464
32554
|
});
|
|
31465
32555
|
}
|
|
31466
32556
|
|
|
@@ -31490,12 +32580,12 @@ async function pipeline2(task, model = null, {
|
|
|
31490
32580
|
dtype = pipelineInfo.default.dtype;
|
|
31491
32581
|
}
|
|
31492
32582
|
}
|
|
32583
|
+
const expected_files = await get_pipeline_files(task, model, {
|
|
32584
|
+
device,
|
|
32585
|
+
dtype
|
|
32586
|
+
});
|
|
31493
32587
|
let files_loading = {};
|
|
31494
32588
|
if (progress_callback) {
|
|
31495
|
-
const expected_files = await get_pipeline_files(task, model, {
|
|
31496
|
-
device,
|
|
31497
|
-
dtype
|
|
31498
|
-
});
|
|
31499
32589
|
const metadata = await Promise.all(expected_files.map(async (file) => get_file_metadata(model, file)));
|
|
31500
32590
|
metadata.forEach((m, i) => {
|
|
31501
32591
|
if (m.exists) {
|
|
@@ -31541,13 +32631,31 @@ async function pipeline2(task, model = null, {
|
|
|
31541
32631
|
model_file_name,
|
|
31542
32632
|
session_options
|
|
31543
32633
|
};
|
|
31544
|
-
const
|
|
31545
|
-
|
|
31546
|
-
|
|
31547
|
-
|
|
32634
|
+
const hasTokenizer = expected_files.includes("tokenizer.json");
|
|
32635
|
+
const hasProcessor = expected_files.includes("preprocessor_config.json");
|
|
32636
|
+
const modelClasses = pipelineInfo.model;
|
|
32637
|
+
let modelPromise;
|
|
32638
|
+
if (Array.isArray(modelClasses)) {
|
|
32639
|
+
const resolvedConfig = config ?? await AutoConfig.from_pretrained(model, pretrainedOptions);
|
|
32640
|
+
const { model_type } = resolvedConfig;
|
|
32641
|
+
const matchedClass = modelClasses.find((cls) => cls.supports(model_type));
|
|
32642
|
+
if (!matchedClass) {
|
|
32643
|
+
throw Error(
|
|
32644
|
+
`Unsupported model type "${model_type}" for task "${task}". None of the candidate model classes support this type.`
|
|
32645
|
+
);
|
|
32646
|
+
}
|
|
32647
|
+
modelPromise = matchedClass.from_pretrained(model, { ...pretrainedOptions, config: resolvedConfig });
|
|
32648
|
+
} else {
|
|
32649
|
+
modelPromise = modelClasses.from_pretrained(model, pretrainedOptions);
|
|
32650
|
+
}
|
|
32651
|
+
const [tokenizer, processor, model_loaded] = await Promise.all([
|
|
32652
|
+
hasTokenizer ? AutoTokenizer.from_pretrained(model, pretrainedOptions) : null,
|
|
32653
|
+
hasProcessor ? AutoProcessor.from_pretrained(model, pretrainedOptions) : null,
|
|
32654
|
+
modelPromise
|
|
31548
32655
|
]);
|
|
31549
|
-
const results =
|
|
31550
|
-
results.
|
|
32656
|
+
const results = { task, model: model_loaded };
|
|
32657
|
+
if (tokenizer) results.tokenizer = tokenizer;
|
|
32658
|
+
if (processor) results.processor = processor;
|
|
31551
32659
|
dispatchCallback(progress_callback, {
|
|
31552
32660
|
status: "ready",
|
|
31553
32661
|
task,
|
|
@@ -31556,48 +32664,6 @@ async function pipeline2(task, model = null, {
|
|
|
31556
32664
|
const pipelineClass = pipelineInfo.pipeline;
|
|
31557
32665
|
return new pipelineClass(results);
|
|
31558
32666
|
}
|
|
31559
|
-
async function loadItems(mapping, model, pretrainedOptions) {
|
|
31560
|
-
const result = /* @__PURE__ */ Object.create(null);
|
|
31561
|
-
const promises = [];
|
|
31562
|
-
for (const [name, cls] of mapping.entries()) {
|
|
31563
|
-
if (!cls) continue;
|
|
31564
|
-
let promise;
|
|
31565
|
-
if (Array.isArray(cls)) {
|
|
31566
|
-
promise = new Promise(async (resolve, reject) => {
|
|
31567
|
-
let e;
|
|
31568
|
-
for (const c of cls) {
|
|
31569
|
-
if (c === null) {
|
|
31570
|
-
resolve(null);
|
|
31571
|
-
return;
|
|
31572
|
-
}
|
|
31573
|
-
try {
|
|
31574
|
-
resolve(await c.from_pretrained(model, pretrainedOptions));
|
|
31575
|
-
return;
|
|
31576
|
-
} catch (err) {
|
|
31577
|
-
if (err.message?.includes("Unsupported model type")) {
|
|
31578
|
-
e = err;
|
|
31579
|
-
} else if (err.message?.includes("Could not locate file")) {
|
|
31580
|
-
e = err;
|
|
31581
|
-
} else {
|
|
31582
|
-
reject(err);
|
|
31583
|
-
return;
|
|
31584
|
-
}
|
|
31585
|
-
}
|
|
31586
|
-
}
|
|
31587
|
-
reject(e);
|
|
31588
|
-
});
|
|
31589
|
-
} else {
|
|
31590
|
-
promise = cls.from_pretrained(model, pretrainedOptions);
|
|
31591
|
-
}
|
|
31592
|
-
result[name] = promise;
|
|
31593
|
-
promises.push(promise);
|
|
31594
|
-
}
|
|
31595
|
-
await Promise.all(promises);
|
|
31596
|
-
for (const [name, promise] of Object.entries(result)) {
|
|
31597
|
-
result[name] = await promise;
|
|
31598
|
-
}
|
|
31599
|
-
return result;
|
|
31600
|
-
}
|
|
31601
32667
|
|
|
31602
32668
|
// src/generation/streamers.js
|
|
31603
32669
|
var is_chinese_char2 = (cp) => cp >= 19968 && cp <= 40959 || cp >= 13312 && cp <= 19903 || cp >= 131072 && cp <= 173791 || cp >= 173824 && cp <= 177983 || cp >= 177984 && cp <= 178207 || cp >= 178208 && cp <= 183983 || cp >= 63744 && cp <= 64255 || cp >= 194560 && cp <= 195103;
|
|
@@ -31885,21 +32951,38 @@ async function load_video(src, { num_frames = null, fps = null } = {}) {
|
|
|
31885
32951
|
|
|
31886
32952
|
// src/utils/model_registry/is_cached.js
|
|
31887
32953
|
async function check_files_cache(modelId, files, options = {}) {
|
|
31888
|
-
const
|
|
31889
|
-
if (!
|
|
32954
|
+
const cache2 = await getCache(options?.cache_dir);
|
|
32955
|
+
if (!cache2) {
|
|
31890
32956
|
const fileStatuses2 = files.map((filename) => ({ file: filename, cached: false }));
|
|
31891
32957
|
return { allCached: false, files: fileStatuses2 };
|
|
31892
32958
|
}
|
|
31893
32959
|
const fileStatuses = await Promise.all(
|
|
31894
32960
|
files.map(async (filename) => {
|
|
31895
|
-
const { localPath, proposedCacheKey } = buildResourcePaths(modelId, filename, options,
|
|
31896
|
-
const cached = await checkCachedResource(
|
|
32961
|
+
const { localPath, proposedCacheKey } = buildResourcePaths(modelId, filename, options, cache2);
|
|
32962
|
+
const cached = await checkCachedResource(cache2, localPath, proposedCacheKey);
|
|
31897
32963
|
return { file: filename, cached: !!cached };
|
|
31898
32964
|
})
|
|
31899
32965
|
);
|
|
31900
32966
|
return { allCached: fileStatuses.every((f) => f.cached), files: fileStatuses };
|
|
31901
32967
|
}
|
|
32968
|
+
async function is_file_cached(modelId, filename, options = {}) {
|
|
32969
|
+
const cache2 = await getCache(options?.cache_dir);
|
|
32970
|
+
if (!cache2) return false;
|
|
32971
|
+
const { localPath, proposedCacheKey } = buildResourcePaths(modelId, filename, options, cache2);
|
|
32972
|
+
return !!await checkCachedResource(cache2, localPath, proposedCacheKey);
|
|
32973
|
+
}
|
|
31902
32974
|
async function is_cached(modelId, options = {}) {
|
|
32975
|
+
if (!modelId) {
|
|
32976
|
+
throw new Error("modelId is required");
|
|
32977
|
+
}
|
|
32978
|
+
if (!await is_file_cached(modelId, "config.json", options)) {
|
|
32979
|
+
return false;
|
|
32980
|
+
}
|
|
32981
|
+
const files = await get_files(modelId, options);
|
|
32982
|
+
const result = await check_files_cache(modelId, files, options);
|
|
32983
|
+
return result.allCached;
|
|
32984
|
+
}
|
|
32985
|
+
async function is_cached_files(modelId, options = {}) {
|
|
31903
32986
|
if (!modelId) {
|
|
31904
32987
|
throw new Error("modelId is required");
|
|
31905
32988
|
}
|
|
@@ -31907,6 +32990,20 @@ async function is_cached(modelId, options = {}) {
|
|
|
31907
32990
|
return await check_files_cache(modelId, files, options);
|
|
31908
32991
|
}
|
|
31909
32992
|
async function is_pipeline_cached(task, modelId, options = {}) {
|
|
32993
|
+
if (!task) {
|
|
32994
|
+
throw new Error("task is required");
|
|
32995
|
+
}
|
|
32996
|
+
if (!modelId) {
|
|
32997
|
+
throw new Error("modelId is required");
|
|
32998
|
+
}
|
|
32999
|
+
if (!await is_file_cached(modelId, "config.json", options)) {
|
|
33000
|
+
return false;
|
|
33001
|
+
}
|
|
33002
|
+
const files = await get_pipeline_files(task, modelId, options);
|
|
33003
|
+
const result = await check_files_cache(modelId, files, options);
|
|
33004
|
+
return result.allCached;
|
|
33005
|
+
}
|
|
33006
|
+
async function is_pipeline_cached_files(task, modelId, options = {}) {
|
|
31910
33007
|
if (!task) {
|
|
31911
33008
|
throw new Error("task is required");
|
|
31912
33009
|
}
|
|
@@ -31919,26 +33016,26 @@ async function is_pipeline_cached(task, modelId, options = {}) {
|
|
|
31919
33016
|
|
|
31920
33017
|
// src/utils/model_registry/clear_cache.js
|
|
31921
33018
|
async function clear_files_from_cache(modelId, files, options = {}) {
|
|
31922
|
-
const
|
|
31923
|
-
if (!
|
|
33019
|
+
const cache2 = await getCache(options?.cache_dir);
|
|
33020
|
+
if (!cache2) {
|
|
31924
33021
|
return {
|
|
31925
33022
|
filesDeleted: 0,
|
|
31926
33023
|
filesCached: 0,
|
|
31927
33024
|
files: files.map((filename) => ({ file: filename, deleted: false, wasCached: false }))
|
|
31928
33025
|
};
|
|
31929
33026
|
}
|
|
31930
|
-
if (!
|
|
33027
|
+
if (!cache2.delete) {
|
|
31931
33028
|
throw new Error("Cache does not support delete operation");
|
|
31932
33029
|
}
|
|
31933
33030
|
const results = await Promise.all(
|
|
31934
33031
|
files.map(async (filename) => {
|
|
31935
|
-
const { localPath, proposedCacheKey } = buildResourcePaths(modelId, filename, options,
|
|
31936
|
-
const cached = await checkCachedResource(
|
|
33032
|
+
const { localPath, proposedCacheKey } = buildResourcePaths(modelId, filename, options, cache2);
|
|
33033
|
+
const cached = await checkCachedResource(cache2, localPath, proposedCacheKey);
|
|
31937
33034
|
const wasCached = !!cached;
|
|
31938
33035
|
let deleted = false;
|
|
31939
33036
|
if (wasCached) {
|
|
31940
|
-
const deletedWithProposed = await
|
|
31941
|
-
const deletedWithLocal = !deletedWithProposed && proposedCacheKey !== localPath ? await
|
|
33037
|
+
const deletedWithProposed = await cache2.delete(proposedCacheKey);
|
|
33038
|
+
const deletedWithLocal = !deletedWithProposed && proposedCacheKey !== localPath ? await cache2.delete(localPath) : false;
|
|
31942
33039
|
deleted = deletedWithProposed || deletedWithLocal;
|
|
31943
33040
|
}
|
|
31944
33041
|
return { file: filename, deleted, wasCached };
|
|
@@ -32055,26 +33152,30 @@ var ModelRegistry = class {
|
|
|
32055
33152
|
return get_processor_files(modelId);
|
|
32056
33153
|
}
|
|
32057
33154
|
/**
|
|
32058
|
-
*
|
|
33155
|
+
* Quickly checks if a model is fully cached by verifying `config.json` is present,
|
|
33156
|
+
* then confirming all required files are cached.
|
|
33157
|
+
* Returns a plain boolean — use `is_cached_files` if you need per-file detail.
|
|
32059
33158
|
*
|
|
32060
33159
|
* @param {string} modelId - The model id
|
|
32061
33160
|
* @param {Object} [options] - Optional parameters
|
|
33161
|
+
* @param {string} [options.cache_dir] - Custom cache directory
|
|
33162
|
+
* @param {string} [options.revision] - Model revision (default: 'main')
|
|
33163
|
+
* @param {import('../../configs.js').PretrainedConfig} [options.config] - Pre-loaded config
|
|
32062
33164
|
* @param {import('../dtypes.js').DataType|Record<string, import('../dtypes.js').DataType>} [options.dtype=null] - Override dtype
|
|
32063
33165
|
* @param {import('../devices.js').DeviceType|Record<string, import('../devices.js').DeviceType>} [options.device=null] - Override device
|
|
32064
|
-
* @returns {Promise<
|
|
33166
|
+
* @returns {Promise<boolean>} Whether all required files are cached
|
|
32065
33167
|
*
|
|
32066
33168
|
* @example
|
|
32067
|
-
* const
|
|
32068
|
-
* console.log(
|
|
33169
|
+
* const cached = await ModelRegistry.is_cached('onnx-community/bert-base-uncased-ONNX');
|
|
33170
|
+
* console.log(cached); // true or false
|
|
32069
33171
|
*/
|
|
32070
33172
|
static async is_cached(modelId, options = {}) {
|
|
32071
33173
|
return is_cached(modelId, options);
|
|
32072
33174
|
}
|
|
32073
33175
|
/**
|
|
32074
|
-
*
|
|
32075
|
-
* Automatically determines which
|
|
33176
|
+
* Checks if all files for a given model are already cached, with per-file detail.
|
|
33177
|
+
* Automatically determines which files are needed using get_files().
|
|
32076
33178
|
*
|
|
32077
|
-
* @param {string} task - The pipeline task (e.g., "text-generation", "background-removal")
|
|
32078
33179
|
* @param {string} modelId - The model id
|
|
32079
33180
|
* @param {Object} [options] - Optional parameters
|
|
32080
33181
|
* @param {string} [options.cache_dir] - Custom cache directory
|
|
@@ -32085,12 +33186,57 @@ var ModelRegistry = class {
|
|
|
32085
33186
|
* @returns {Promise<import('./is_cached.js').CacheCheckResult>} Object with allCached boolean and files array with cache status
|
|
32086
33187
|
*
|
|
32087
33188
|
* @example
|
|
32088
|
-
* const status = await ModelRegistry.
|
|
33189
|
+
* const status = await ModelRegistry.is_cached_files('onnx-community/bert-base-uncased-ONNX');
|
|
32089
33190
|
* console.log(status.allCached); // true or false
|
|
33191
|
+
* console.log(status.files); // [{ file: 'config.json', cached: true }, ...]
|
|
33192
|
+
*/
|
|
33193
|
+
static async is_cached_files(modelId, options = {}) {
|
|
33194
|
+
return is_cached_files(modelId, options);
|
|
33195
|
+
}
|
|
33196
|
+
/**
|
|
33197
|
+
* Quickly checks if all files for a specific pipeline task are cached by verifying
|
|
33198
|
+
* `config.json` is present, then confirming all required files are cached.
|
|
33199
|
+
* Returns a plain boolean — use `is_pipeline_cached_files` if you need per-file detail.
|
|
33200
|
+
*
|
|
33201
|
+
* @param {string} task - The pipeline task (e.g., "text-generation", "background-removal")
|
|
33202
|
+
* @param {string} modelId - The model id
|
|
33203
|
+
* @param {Object} [options] - Optional parameters
|
|
33204
|
+
* @param {string} [options.cache_dir] - Custom cache directory
|
|
33205
|
+
* @param {string} [options.revision] - Model revision (default: 'main')
|
|
33206
|
+
* @param {import('../../configs.js').PretrainedConfig} [options.config] - Pre-loaded config
|
|
33207
|
+
* @param {import('../dtypes.js').DataType|Record<string, import('../dtypes.js').DataType>} [options.dtype=null] - Override dtype
|
|
33208
|
+
* @param {import('../devices.js').DeviceType|Record<string, import('../devices.js').DeviceType>} [options.device=null] - Override device
|
|
33209
|
+
* @returns {Promise<boolean>} Whether all required files are cached
|
|
33210
|
+
*
|
|
33211
|
+
* @example
|
|
33212
|
+
* const cached = await ModelRegistry.is_pipeline_cached('text-generation', 'onnx-community/gpt2-ONNX');
|
|
33213
|
+
* console.log(cached); // true or false
|
|
32090
33214
|
*/
|
|
32091
33215
|
static async is_pipeline_cached(task, modelId, options = {}) {
|
|
32092
33216
|
return is_pipeline_cached(task, modelId, options);
|
|
32093
33217
|
}
|
|
33218
|
+
/**
|
|
33219
|
+
* Checks if all files for a specific pipeline task are already cached, with per-file detail.
|
|
33220
|
+
* Automatically determines which components are needed based on the task.
|
|
33221
|
+
*
|
|
33222
|
+
* @param {string} task - The pipeline task (e.g., "text-generation", "background-removal")
|
|
33223
|
+
* @param {string} modelId - The model id
|
|
33224
|
+
* @param {Object} [options] - Optional parameters
|
|
33225
|
+
* @param {string} [options.cache_dir] - Custom cache directory
|
|
33226
|
+
* @param {string} [options.revision] - Model revision (default: 'main')
|
|
33227
|
+
* @param {import('../../configs.js').PretrainedConfig} [options.config] - Pre-loaded config
|
|
33228
|
+
* @param {import('../dtypes.js').DataType|Record<string, import('../dtypes.js').DataType>} [options.dtype=null] - Override dtype
|
|
33229
|
+
* @param {import('../devices.js').DeviceType|Record<string, import('../devices.js').DeviceType>} [options.device=null] - Override device
|
|
33230
|
+
* @returns {Promise<import('./is_cached.js').CacheCheckResult>} Object with allCached boolean and files array with cache status
|
|
33231
|
+
*
|
|
33232
|
+
* @example
|
|
33233
|
+
* const status = await ModelRegistry.is_pipeline_cached_files('text-generation', 'onnx-community/gpt2-ONNX');
|
|
33234
|
+
* console.log(status.allCached); // true or false
|
|
33235
|
+
* console.log(status.files); // [{ file: 'config.json', cached: true }, ...]
|
|
33236
|
+
*/
|
|
33237
|
+
static async is_pipeline_cached_files(task, modelId, options = {}) {
|
|
33238
|
+
return is_pipeline_cached_files(task, modelId, options);
|
|
33239
|
+
}
|
|
32094
33240
|
/**
|
|
32095
33241
|
* Get metadata for a specific file without downloading it.
|
|
32096
33242
|
*
|
|
@@ -32370,6 +33516,7 @@ export {
|
|
|
32370
33516
|
DonutImageProcessor,
|
|
32371
33517
|
DonutSwinModel,
|
|
32372
33518
|
DonutSwinPreTrainedModel,
|
|
33519
|
+
DynamicCache,
|
|
32373
33520
|
EdgeTamModel,
|
|
32374
33521
|
EfficientNetForImageClassification,
|
|
32375
33522
|
EfficientNetImageProcessor,
|
|
@@ -32442,6 +33589,7 @@ export {
|
|
|
32442
33589
|
Gemma3Model,
|
|
32443
33590
|
Gemma3PreTrainedModel,
|
|
32444
33591
|
Gemma3nAudioFeatureExtractor,
|
|
33592
|
+
Gemma3nForCausalLM,
|
|
32445
33593
|
Gemma3nForConditionalGeneration,
|
|
32446
33594
|
Gemma3nPreTrainedModel,
|
|
32447
33595
|
Gemma3nProcessor,
|
|
@@ -32461,6 +33609,9 @@ export {
|
|
|
32461
33609
|
GraniteMoeHybridModel,
|
|
32462
33610
|
GraniteMoeHybridPreTrainedModel,
|
|
32463
33611
|
GranitePreTrainedModel,
|
|
33612
|
+
GraniteSpeechFeatureExtractor,
|
|
33613
|
+
GraniteSpeechForConditionalGeneration,
|
|
33614
|
+
GraniteSpeechProcessor,
|
|
32464
33615
|
GroundingDinoForObjectDetection,
|
|
32465
33616
|
GroundingDinoImageProcessor,
|
|
32466
33617
|
GroundingDinoPreTrainedModel,
|
|
@@ -32486,7 +33637,6 @@ export {
|
|
|
32486
33637
|
IJepaPreTrainedModel,
|
|
32487
33638
|
Idefics3ForConditionalGeneration,
|
|
32488
33639
|
Idefics3ImageProcessor,
|
|
32489
|
-
Idefics3PreTrainedModel,
|
|
32490
33640
|
Idefics3Processor,
|
|
32491
33641
|
ImageClassificationPipeline,
|
|
32492
33642
|
ImageFeatureExtractionPipeline,
|
|
@@ -32511,6 +33661,9 @@ export {
|
|
|
32511
33661
|
Lfm2MoeModel,
|
|
32512
33662
|
Lfm2MoePreTrainedModel,
|
|
32513
33663
|
Lfm2PreTrainedModel,
|
|
33664
|
+
Lfm2VlForConditionalGeneration,
|
|
33665
|
+
Lfm2VlImageProcessor,
|
|
33666
|
+
Lfm2VlProcessor,
|
|
32514
33667
|
LiteWhisperForConditionalGeneration,
|
|
32515
33668
|
Llama4ForCausalLM,
|
|
32516
33669
|
Llama4PreTrainedModel,
|
|
@@ -32675,6 +33828,9 @@ export {
|
|
|
32675
33828
|
Olmo3Model,
|
|
32676
33829
|
Olmo3PreTrainedModel,
|
|
32677
33830
|
OlmoForCausalLM,
|
|
33831
|
+
OlmoHybridForCausalLM,
|
|
33832
|
+
OlmoHybridModel,
|
|
33833
|
+
OlmoHybridPreTrainedModel,
|
|
32678
33834
|
OlmoModel,
|
|
32679
33835
|
OlmoPreTrainedModel,
|
|
32680
33836
|
OpenELMForCausalLM,
|
|
@@ -32691,7 +33847,6 @@ export {
|
|
|
32691
33847
|
Owlv2Model,
|
|
32692
33848
|
Owlv2PreTrainedModel,
|
|
32693
33849
|
PaliGemmaForConditionalGeneration,
|
|
32694
|
-
PaliGemmaPreTrainedModel,
|
|
32695
33850
|
PaliGemmaProcessor,
|
|
32696
33851
|
ParakeetFeatureExtractor,
|
|
32697
33852
|
ParakeetForCTC,
|
|
@@ -32730,20 +33885,36 @@ export {
|
|
|
32730
33885
|
QuestionAnsweringPipeline,
|
|
32731
33886
|
Qwen2ForCausalLM,
|
|
32732
33887
|
Qwen2Model,
|
|
33888
|
+
Qwen2MoeForCausalLM,
|
|
33889
|
+
Qwen2MoeModel,
|
|
33890
|
+
Qwen2MoePreTrainedModel,
|
|
32733
33891
|
Qwen2PreTrainedModel,
|
|
32734
33892
|
Qwen2Tokenizer,
|
|
33893
|
+
Qwen2VLForCausalLM,
|
|
32735
33894
|
Qwen2VLForConditionalGeneration,
|
|
32736
33895
|
Qwen2VLImageProcessor,
|
|
32737
33896
|
Qwen2VLPreTrainedModel,
|
|
32738
33897
|
Qwen2VLProcessor,
|
|
33898
|
+
Qwen2_5_VLForCausalLM,
|
|
32739
33899
|
Qwen2_5_VLForConditionalGeneration,
|
|
32740
33900
|
Qwen2_5_VLProcessor,
|
|
32741
33901
|
Qwen3ForCausalLM,
|
|
32742
33902
|
Qwen3Model,
|
|
33903
|
+
Qwen3MoeForCausalLM,
|
|
33904
|
+
Qwen3MoeModel,
|
|
33905
|
+
Qwen3MoePreTrainedModel,
|
|
33906
|
+
Qwen3NextForCausalLM,
|
|
33907
|
+
Qwen3NextModel,
|
|
33908
|
+
Qwen3NextPreTrainedModel,
|
|
32743
33909
|
Qwen3PreTrainedModel,
|
|
33910
|
+
Qwen3VLForCausalLM,
|
|
32744
33911
|
Qwen3VLForConditionalGeneration,
|
|
33912
|
+
Qwen3VLMoeForCausalLM,
|
|
33913
|
+
Qwen3VLMoeForConditionalGeneration,
|
|
32745
33914
|
Qwen3VLProcessor,
|
|
33915
|
+
Qwen3_5ForCausalLM,
|
|
32746
33916
|
Qwen3_5ForConditionalGeneration,
|
|
33917
|
+
Qwen3_5MoeForCausalLM,
|
|
32747
33918
|
Qwen3_5MoeForConditionalGeneration,
|
|
32748
33919
|
RFDetrForObjectDetection,
|
|
32749
33920
|
RFDetrModel,
|
|
@@ -32815,7 +33986,6 @@ export {
|
|
|
32815
33986
|
SmolLM3ForCausalLM,
|
|
32816
33987
|
SmolLM3Model,
|
|
32817
33988
|
SmolLM3PreTrainedModel,
|
|
32818
|
-
SmolVLMForConditionalGeneration,
|
|
32819
33989
|
Idefics3ImageProcessor as SmolVLMImageProcessor,
|
|
32820
33990
|
Idefics3Processor as SmolVLMProcessor,
|
|
32821
33991
|
SnacDecoderModel,
|
|
@@ -32921,6 +34091,10 @@ export {
|
|
|
32921
34091
|
VitsTokenizer,
|
|
32922
34092
|
VoxtralForConditionalGeneration,
|
|
32923
34093
|
VoxtralProcessor,
|
|
34094
|
+
VoxtralRealtimeFeatureExtractor,
|
|
34095
|
+
VoxtralRealtimeForConditionalGeneration,
|
|
34096
|
+
VoxtralRealtimePreTrainedModel,
|
|
34097
|
+
VoxtralRealtimeProcessor,
|
|
32924
34098
|
Wav2Vec2BertForCTC,
|
|
32925
34099
|
Wav2Vec2BertForSequenceClassification,
|
|
32926
34100
|
Wav2Vec2BertModel,
|
|
@@ -33016,7 +34190,7 @@ export {
|
|
|
33016
34190
|
|
|
33017
34191
|
onnxruntime-web/dist/ort.webgpu.bundle.min.mjs:
|
|
33018
34192
|
(*!
|
|
33019
|
-
* ONNX Runtime Web v1.25.0-dev.
|
|
34193
|
+
* ONNX Runtime Web v1.25.0-dev.20260307-d626b568e0
|
|
33020
34194
|
* Copyright (c) Microsoft Corporation. All rights reserved.
|
|
33021
34195
|
* Licensed under the MIT License.
|
|
33022
34196
|
*)
|