@huggingface/transformers 4.0.0-next.7 → 4.0.0-next.9
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/README.md +13 -2
- package/dist/ort-wasm-simd-threaded.jsep.mjs +26 -26
- package/dist/transformers.js +1002 -587
- package/dist/transformers.min.js +23 -19
- package/dist/transformers.node.cjs +1030 -585
- package/dist/transformers.node.min.cjs +21 -17
- package/dist/transformers.node.min.mjs +21 -17
- package/dist/transformers.node.mjs +1000 -585
- package/dist/transformers.web.js +887 -472
- package/dist/transformers.web.min.js +21 -17
- package/package.json +3 -3
- package/src/configs.js +28 -22
- package/src/env.js +1 -1
- package/src/image_processors_utils.js +25 -15
- package/src/models/chmv2/image_processing_chmv2.js +3 -0
- package/src/models/chmv2/modeling_chmv2.js +4 -0
- package/src/models/deepseek_v3/modeling_deepseek_v3.js +5 -0
- package/src/models/eurobert/modeling_eurobert.js +41 -0
- package/src/models/gemma3/image_processing_gemma3.js +3 -0
- package/src/models/gemma3/modeling_gemma3.js +4 -1
- package/src/models/gemma3/processing_gemma3.js +45 -0
- package/src/models/glm46v/image_processing_glm46v.js +12 -0
- package/src/models/glm46v/processing_glm46v.js +5 -0
- package/src/models/glm_moe_dsa/modeling_glm_moe_dsa.js +5 -0
- package/src/models/glm_ocr/modeling_glm_ocr.js +78 -0
- package/src/models/image_processors.js +3 -0
- package/src/models/lfm2_vl/image_processing_lfm2_vl.js +1 -1
- package/src/models/lighton_ocr/modeling_lighton_ocr.js +3 -0
- package/src/models/mistral4/modeling_mistral4.js +5 -0
- package/src/models/modeling_utils.js +48 -25
- package/src/models/models.js +10 -1
- package/src/models/nemotron_h/modeling_nemotron_h.js +5 -0
- package/src/models/processors.js +2 -0
- package/src/models/qwen2_vl/modeling_qwen2_vl.js +226 -168
- package/src/models/qwen2_vl/processing_qwen2_vl.js +5 -4
- package/src/models/registry.js +19 -8
- package/src/models/solar_open/modeling_solar_open.js +5 -0
- package/src/pipelines.js +1 -0
- package/src/utils/hub.js +4 -1
- package/src/utils/model_registry/ModelRegistry.js +36 -0
- package/src/utils/model_registry/get_available_dtypes.js +68 -0
- package/src/utils/model_registry/get_file_metadata.js +1 -0
- package/src/utils/model_registry/get_model_files.js +7 -60
- package/src/utils/model_registry/resolve_model_type.js +66 -0
- package/types/configs.d.ts.map +1 -1
- package/types/image_processors_utils.d.ts +3 -2
- package/types/image_processors_utils.d.ts.map +1 -1
- package/types/models/{ast/modeling_ast.d.ts → audio_spectrogram_transformer/modeling_audio_spectrogram_transformer.d.ts} +1 -1
- package/types/models/audio_spectrogram_transformer/modeling_audio_spectrogram_transformer.d.ts.map +1 -0
- package/types/models/chmv2/image_processing_chmv2.d.ts +4 -0
- package/types/models/chmv2/image_processing_chmv2.d.ts.map +1 -0
- package/types/models/chmv2/modeling_chmv2.d.ts +6 -0
- package/types/models/chmv2/modeling_chmv2.d.ts.map +1 -0
- package/types/models/deepseek_v3/modeling_deepseek_v3.d.ts +8 -0
- package/types/models/deepseek_v3/modeling_deepseek_v3.d.ts.map +1 -0
- package/types/models/eurobert/modeling_eurobert.d.ts +36 -0
- package/types/models/eurobert/modeling_eurobert.d.ts.map +1 -0
- package/types/models/gemma3/image_processing_gemma3.d.ts +4 -0
- package/types/models/gemma3/image_processing_gemma3.d.ts.map +1 -0
- package/types/models/gemma3/modeling_gemma3.d.ts +4 -1
- package/types/models/gemma3/modeling_gemma3.d.ts.map +1 -1
- package/types/models/gemma3/processing_gemma3.d.ts +20 -0
- package/types/models/gemma3/processing_gemma3.d.ts.map +1 -0
- package/types/models/glm46v/image_processing_glm46v.d.ts +4 -0
- package/types/models/glm46v/image_processing_glm46v.d.ts.map +1 -0
- package/types/models/glm46v/processing_glm46v.d.ts +4 -0
- package/types/models/glm46v/processing_glm46v.d.ts.map +1 -0
- package/types/models/glm_moe_dsa/modeling_glm_moe_dsa.d.ts +8 -0
- package/types/models/glm_moe_dsa/modeling_glm_moe_dsa.d.ts.map +1 -0
- package/types/models/glm_ocr/modeling_glm_ocr.d.ts +26 -0
- package/types/models/glm_ocr/modeling_glm_ocr.d.ts.map +1 -0
- package/types/models/image_processors.d.ts +3 -0
- package/types/models/lighton_ocr/modeling_lighton_ocr.d.ts +4 -0
- package/types/models/lighton_ocr/modeling_lighton_ocr.d.ts.map +1 -0
- package/types/models/mistral4/modeling_mistral4.d.ts +8 -0
- package/types/models/mistral4/modeling_mistral4.d.ts.map +1 -0
- package/types/models/modeling_utils.d.ts +2 -3
- package/types/models/modeling_utils.d.ts.map +1 -1
- package/types/models/models.d.ts +10 -1
- package/types/models/nemotron_h/modeling_nemotron_h.d.ts +8 -0
- package/types/models/nemotron_h/modeling_nemotron_h.d.ts.map +1 -0
- package/types/models/processors.d.ts +2 -0
- package/types/models/qwen2_vl/modeling_qwen2_vl.d.ts +41 -6
- package/types/models/qwen2_vl/modeling_qwen2_vl.d.ts.map +1 -1
- package/types/models/qwen2_vl/processing_qwen2_vl.d.ts +1 -0
- package/types/models/qwen2_vl/processing_qwen2_vl.d.ts.map +1 -1
- package/types/models/registry.d.ts.map +1 -1
- package/types/models/solar_open/modeling_solar_open.d.ts +8 -0
- package/types/models/solar_open/modeling_solar_open.d.ts.map +1 -0
- package/types/pipelines.d.ts +1 -0
- package/types/pipelines.d.ts.map +1 -1
- package/types/utils/hub.d.ts.map +1 -1
- package/types/utils/model_registry/ModelRegistry.d.ts +27 -0
- package/types/utils/model_registry/ModelRegistry.d.ts.map +1 -1
- package/types/utils/model_registry/get_available_dtypes.d.ts +26 -0
- package/types/utils/model_registry/get_available_dtypes.d.ts.map +1 -0
- package/types/utils/model_registry/get_model_files.d.ts +25 -0
- package/types/utils/model_registry/get_model_files.d.ts.map +1 -1
- package/types/utils/model_registry/resolve_model_type.d.ts +24 -0
- package/types/utils/model_registry/resolve_model_type.d.ts.map +1 -0
- package/types/models/ast/modeling_ast.d.ts.map +0 -1
- /package/src/models/{ast/modeling_ast.js → audio_spectrogram_transformer/modeling_audio_spectrogram_transformer.js} +0 -0
package/dist/transformers.js
CHANGED
|
@@ -20,7 +20,7 @@ var node_path_default = {};
|
|
|
20
20
|
var node_url_default = {};
|
|
21
21
|
|
|
22
22
|
// src/env.js
|
|
23
|
-
var VERSION = "4.0.0-next.
|
|
23
|
+
var VERSION = "4.0.0-next.9";
|
|
24
24
|
var HAS_SELF = typeof self !== "undefined";
|
|
25
25
|
var IS_FS_AVAILABLE = !isEmpty(node_fs_default);
|
|
26
26
|
var IS_PATH_AVAILABLE = !isEmpty(node_path_default);
|
|
@@ -250,7 +250,7 @@ var logger = {
|
|
|
250
250
|
}
|
|
251
251
|
};
|
|
252
252
|
|
|
253
|
-
// ../../node_modules/.pnpm/@huggingface+tokenizers@0.1.
|
|
253
|
+
// ../../node_modules/.pnpm/@huggingface+tokenizers@0.1.3/node_modules/@huggingface/tokenizers/dist/tokenizers.mjs
|
|
254
254
|
var DictionarySplitter = class {
|
|
255
255
|
/**
|
|
256
256
|
* @param dictionary The dictionary of words to use for splitting.
|
|
@@ -1906,10 +1906,10 @@ var BPE = class extends TokenizerModel_default {
|
|
|
1906
1906
|
);
|
|
1907
1907
|
if (byte_tokens.every((x) => this.tokens_to_ids.has(x))) {
|
|
1908
1908
|
output_tokens.push(...byte_tokens);
|
|
1909
|
-
} else {
|
|
1909
|
+
} else if (this.unk_token != null) {
|
|
1910
1910
|
output_tokens.push(this.unk_token);
|
|
1911
1911
|
}
|
|
1912
|
-
} else {
|
|
1912
|
+
} else if (this.unk_token != null) {
|
|
1913
1913
|
output_tokens.push(this.unk_token);
|
|
1914
1914
|
}
|
|
1915
1915
|
}
|
|
@@ -5754,14 +5754,14 @@ var Random = class {
|
|
|
5754
5754
|
* @returns {number} A normally distributed random value.
|
|
5755
5755
|
*/
|
|
5756
5756
|
gauss(mu = 0, sigma = 1) {
|
|
5757
|
-
let
|
|
5757
|
+
let z2 = this._gauss_next;
|
|
5758
5758
|
this._gauss_next = null;
|
|
5759
|
-
if (
|
|
5759
|
+
if (z2 === null) {
|
|
5760
5760
|
const x2pi = this.random() * 2 * Math.PI, g2rad = Math.sqrt(-2 * Math.log(1 - this.random()));
|
|
5761
|
-
|
|
5761
|
+
z2 = Math.cos(x2pi) * g2rad;
|
|
5762
5762
|
this._gauss_next = Math.sin(x2pi) * g2rad;
|
|
5763
5763
|
}
|
|
5764
|
-
return mu +
|
|
5764
|
+
return mu + z2 * sigma;
|
|
5765
5765
|
}
|
|
5766
5766
|
/**
|
|
5767
5767
|
* Shuffles an array in-place using the Fisher-Yates algorithm.
|
|
@@ -6515,13 +6515,15 @@ async function storeCachedResource(path_or_repo_id, filename, cache2, cacheKey,
|
|
|
6515
6515
|
wrapped_progress
|
|
6516
6516
|
);
|
|
6517
6517
|
} else if (typeof response !== "string") {
|
|
6518
|
+
const headers = new Headers(response.headers);
|
|
6519
|
+
headers.set("content-length", result.byteLength.toString());
|
|
6518
6520
|
await cache2.put(
|
|
6519
6521
|
cacheKey,
|
|
6520
6522
|
new Response(
|
|
6521
6523
|
/** @type {any} */
|
|
6522
6524
|
result,
|
|
6523
6525
|
{
|
|
6524
|
-
headers
|
|
6526
|
+
headers
|
|
6525
6527
|
}
|
|
6526
6528
|
)
|
|
6527
6529
|
).catch((err) => {
|
|
@@ -7483,7 +7485,7 @@ __export(onnxruntime_node_exports, {
|
|
|
7483
7485
|
});
|
|
7484
7486
|
var onnxruntime_node_default = {};
|
|
7485
7487
|
|
|
7486
|
-
// ../../node_modules/.pnpm/onnxruntime-web@1.25.0-dev.
|
|
7488
|
+
// ../../node_modules/.pnpm/onnxruntime-web@1.25.0-dev.20260323-a99aad9d36/node_modules/onnxruntime-web/dist/ort.webgpu.bundle.min.mjs
|
|
7487
7489
|
var ort_webgpu_bundle_min_exports = {};
|
|
7488
7490
|
__export(ort_webgpu_bundle_min_exports, {
|
|
7489
7491
|
InferenceSession: () => Jf,
|
|
@@ -8251,7 +8253,7 @@ async function ts(a = {}) {
|
|
|
8251
8253
|
throw L(e = "Aborted(" + e + ")"), W = true, e = new WebAssembly.RuntimeError(e + ". Build with -sASSERTIONS for more info."), R?.(e), e;
|
|
8252
8254
|
}
|
|
8253
8255
|
function Ye() {
|
|
8254
|
-
return { a: { f: Vs, J: js, k: Hs, p: Ys, l: qs, ta: Js, b: Xs, ca: Qs, Ka: Sn,
|
|
8256
|
+
return { a: { f: Vs, J: js, k: Hs, p: Ys, l: qs, ta: Js, b: Xs, ca: Qs, Ka: Sn, q: Zs, da: Ln, _a: On, Ga: Bn, Ia: Mn, $a: Cn, Ya: Un, Ra: Dn, Xa: Pn, pa: _n, Ha: Rn, Yb: Nn, Za: kn, Fa: Wn, eb: Ks, Da: ti, Tb: ri, Rb: oi, Ca: si, M: ii, I: ui, Sb: fi, ka: yi, Ub: bi, Ua: wi, Wb: Ti, La: vi, Pb: Ei, la: Si, Ta: Ar, bb: Ai, U: Oi, n: Di, c: Er, sb: Pi, w: _i, L: Ri, z: Ni, j: ki, o: Yn, tb: Wi, G: Fi, T: Gi, h: $i, u: zi, m: Vi, i: ji, Oa: Hi, Pa: Yi, Qa: qi, Ma: Qn, Na: Zn, Qb: Kn, fb: Xi, db: Ki, Y: eu, rb: tu, ma: ru, cb: Qi, gb: nu, ab: ou, Xb: au, N: Ji, hb: su, X: iu, Vb: uu, ob: bu, C: wu, sa: gu, ra: Tu, qb: vu, W: Eu, v: Su, nb: Au, mb: Iu, lb: xu, pb: Lu, kb: Ou, jb: Bu, ib: Mu, Va: ao, Wa: so, Ja: br, ea: io, oa: uo, Sa: fo, na: co, Db: Gf, xa: Df, Eb: Ff, ya: Uf, F: Ef, e: ff, s: sf, x: af, D: gf, Ib: Bf, ba: Lf, B: lf, za: Mf, $: Pf, ha: Of, Fb: kf, Gb: Nf, Ba: Sf, Aa: xf, Jb: Af, wa: Wf, aa: Cf, d: uf, A: df, r: cf, Cb: $f, t: mf, y: Tf, H: pf, E: hf, K: vf, S: _f, ja: wf, _: Rf, Kb: bf, Lb: yf, P: If2, g: Uu, a: Fe, Ob: qe, Hb: Du, ia: Pu, O: _u, qa: Ru, Mb: Nu, Q: ku, zb: Wu, Ab: Fu, ua: Gu, fa: $u, R: zu, Ea: Vu, va: ju, Z: Hu, xb: Yu, Zb: qu, V: Ju, Bb: Xu, ub: Qu, vb: Ku, wb: ef, ga: tf, yb: rf, Nb: nf } };
|
|
8255
8257
|
}
|
|
8256
8258
|
async function bt() {
|
|
8257
8259
|
function e(o, u) {
|
|
@@ -8314,14 +8316,14 @@ async function ts(a = {}) {
|
|
|
8314
8316
|
gt.push(t), Je[e.Nc] = t, t.Nc = e.Nc;
|
|
8315
8317
|
var n = { Oc: "run", he: e.ge, Wc: e.Wc, Nc: e.Nc };
|
|
8316
8318
|
return t.postMessage(n, e.Yc), 0;
|
|
8317
|
-
},
|
|
8319
|
+
}, G = 0, V = (e, t, ...n) => {
|
|
8318
8320
|
var o, u = 16 * n.length, c = P(), h = Ft(u), b = h >>> 3;
|
|
8319
8321
|
for (o of n) typeof o == "bigint" ? ((p(), pe)[b++ >>> 0] = 1n, (p(), pe)[b++ >>> 0] = o) : ((p(), pe)[b++ >>> 0] = 0n, (p(), ae)[b++ >>> 0] = o);
|
|
8320
8322
|
return e = Lo(e, 0, u, h, t), D(c), e;
|
|
8321
8323
|
};
|
|
8322
8324
|
function qe(e) {
|
|
8323
8325
|
if (i) return V(0, 1, e);
|
|
8324
|
-
if (S = e, !(0 <
|
|
8326
|
+
if (S = e, !(0 < G)) {
|
|
8325
8327
|
for (var t of gt) Se(t);
|
|
8326
8328
|
for (t of We) Se(t);
|
|
8327
8329
|
We = [], gt = [], Je = {}, W = true;
|
|
@@ -8366,7 +8368,7 @@ async function ts(a = {}) {
|
|
|
8366
8368
|
We.push(e);
|
|
8367
8369
|
}
|
|
8368
8370
|
var Fe, zs = (e, t) => {
|
|
8369
|
-
|
|
8371
|
+
G = 0, e = zr(e, t), 0 < G ? S = e : Fr(e);
|
|
8370
8372
|
}, Ct = [], Ut = 0, me = (e) => -9007199254740992 > e || 9007199254740992 < e ? NaN : Number(e);
|
|
8371
8373
|
function Vs(e) {
|
|
8372
8374
|
var t = new wr(e >>>= 0);
|
|
@@ -8718,7 +8720,7 @@ async function ts(a = {}) {
|
|
|
8718
8720
|
}
|
|
8719
8721
|
var he = (e) => {
|
|
8720
8722
|
if (!W) try {
|
|
8721
|
-
if (e(), !(0 <
|
|
8723
|
+
if (e(), !(0 < G)) try {
|
|
8722
8724
|
i ? Wt() && Fr(S) : br(S);
|
|
8723
8725
|
} catch (t) {
|
|
8724
8726
|
t instanceof wt || t == "unwind" || y(0, t);
|
|
@@ -8746,7 +8748,7 @@ async function ts(a = {}) {
|
|
|
8746
8748
|
return (t ? Vr[t] : of[e])(...Ir);
|
|
8747
8749
|
}
|
|
8748
8750
|
var Ei = () => {
|
|
8749
|
-
|
|
8751
|
+
G = 0;
|
|
8750
8752
|
};
|
|
8751
8753
|
function Si(e) {
|
|
8752
8754
|
e >>>= 0, i ? postMessage({ Oc: "cleanupThread", ie: e }) : yn(Je[e]);
|
|
@@ -8766,7 +8768,7 @@ async function ts(a = {}) {
|
|
|
8766
8768
|
try {
|
|
8767
8769
|
return e(...n);
|
|
8768
8770
|
} finally {
|
|
8769
|
-
W || (_t.pop(), Me && Ge === 1 && _t.length === 0 && (Ge = 0,
|
|
8771
|
+
W || (_t.pop(), Me && Ge === 1 && _t.length === 0 && (Ge = 0, G += 1, Pt(wa), typeof Fibers < "u" && Fibers.De()));
|
|
8770
8772
|
}
|
|
8771
8773
|
};
|
|
8772
8774
|
return jn.set(e, t), t;
|
|
@@ -8781,7 +8783,7 @@ async function ts(a = {}) {
|
|
|
8781
8783
|
try {
|
|
8782
8784
|
var c = (function() {
|
|
8783
8785
|
var E = (p(), x)[Me + 8 >>> 2 >>> 0];
|
|
8784
|
-
return E = Vn.get(E), E = jn.get(E), --
|
|
8786
|
+
return E = Vn.get(E), E = jn.get(E), --G, E();
|
|
8785
8787
|
})();
|
|
8786
8788
|
} catch (E) {
|
|
8787
8789
|
c = E, u = true;
|
|
@@ -8972,7 +8974,7 @@ async function ts(a = {}) {
|
|
|
8972
8974
|
return L(ct(e >>> 0, t >>> 0));
|
|
8973
8975
|
}
|
|
8974
8976
|
var ou = () => {
|
|
8975
|
-
throw
|
|
8977
|
+
throw G += 1, "unwind";
|
|
8976
8978
|
};
|
|
8977
8979
|
function au() {
|
|
8978
8980
|
return 4294901760;
|
|
@@ -9065,15 +9067,15 @@ async function ts(a = {}) {
|
|
|
9065
9067
|
}
|
|
9066
9068
|
(b = (p(), A)[c + 24 >>> 2 >>> 0]) && (b = { label: Ne(b + 4) }, e.defaultQueue = b), e.label = Ne(c + 4);
|
|
9067
9069
|
}
|
|
9068
|
-
|
|
9069
|
-
--
|
|
9070
|
-
ce[u >>> 0] = B.queue, ce[o >>> 0] = B, lt(n, B.lost.then((ue) => {
|
|
9070
|
+
G += 1, lt(t, h.requestDevice(e).then((B) => {
|
|
9071
|
+
--G, he(() => {
|
|
9072
|
+
ce[u >>> 0] = B.queue, ce[o >>> 0] = B, G += 1, lt(n, B.lost.then((ue) => {
|
|
9071
9073
|
he(() => {
|
|
9072
9074
|
B.onuncapturederror = () => {
|
|
9073
9075
|
};
|
|
9074
9076
|
var ye = P(), fe = Ce(ue.message);
|
|
9075
9077
|
_r(n, yu[ue.reason], fe), D(ye);
|
|
9076
|
-
});
|
|
9078
|
+
}), --G;
|
|
9077
9079
|
})), B.onuncapturederror = (ue) => {
|
|
9078
9080
|
var ye = 5;
|
|
9079
9081
|
ue.error instanceof GPUValidationError ? ye = 2 : ue.error instanceof GPUOutOfMemoryError ? ye = 3 : ue.error instanceof GPUInternalError && (ye = 4);
|
|
@@ -9082,7 +9084,7 @@ async function ts(a = {}) {
|
|
|
9082
9084
|
}, "adapterInfo" in B || (B.adapterInfo = h.info), kr(t, 1, o, 0);
|
|
9083
9085
|
});
|
|
9084
9086
|
}, (B) => {
|
|
9085
|
-
--
|
|
9087
|
+
--G, he(() => {
|
|
9086
9088
|
var ue = P(), ye = Ce(B.message);
|
|
9087
9089
|
kr(t, 3, o, ye), n && _r(n, 4, ye), D(ue);
|
|
9088
9090
|
});
|
|
@@ -9125,12 +9127,12 @@ async function ts(a = {}) {
|
|
|
9125
9127
|
function vu(e, t, n, o, u) {
|
|
9126
9128
|
e >>>= 0, t = me(t), n = me(n), u >>>= 0;
|
|
9127
9129
|
var c = O(e);
|
|
9128
|
-
Re[e] = [], u == 4294967295 && (u = void 0),
|
|
9129
|
-
--
|
|
9130
|
+
Re[e] = [], u == 4294967295 && (u = void 0), G += 1, lt(t, c.mapAsync(n, o >>> 0, u).then(() => {
|
|
9131
|
+
--G, he(() => {
|
|
9130
9132
|
Rr(t, 1, 0);
|
|
9131
9133
|
});
|
|
9132
9134
|
}, (h) => {
|
|
9133
|
-
--
|
|
9135
|
+
--G, he(() => {
|
|
9134
9136
|
P();
|
|
9135
9137
|
var b = Ce(h.message);
|
|
9136
9138
|
Rr(t, h.name === "AbortError" ? 4 : h.name === "OperationError" ? 3 : 0, b), delete Re[e];
|
|
@@ -9159,12 +9161,12 @@ async function ts(a = {}) {
|
|
|
9159
9161
|
return ce[n >>> 0] = u, o && (Re[n] = []), true;
|
|
9160
9162
|
}
|
|
9161
9163
|
function Iu(e, t, n, o) {
|
|
9162
|
-
e >>>= 0, t = me(t), o >>>= 0, n = du(n >>> 0), e = O(e),
|
|
9163
|
-
--
|
|
9164
|
+
e >>>= 0, t = me(t), o >>>= 0, n = du(n >>> 0), e = O(e), G += 1, lt(t, e.createComputePipelineAsync(n).then((u) => {
|
|
9165
|
+
--G, he(() => {
|
|
9164
9166
|
ce[o >>> 0] = u, Pr(t, 1, o, 0);
|
|
9165
9167
|
});
|
|
9166
9168
|
}, (u) => {
|
|
9167
|
-
--
|
|
9169
|
+
--G, he(() => {
|
|
9168
9170
|
var c = P(), h = Ce(u.message);
|
|
9169
9171
|
Pr(t, u.reason === "validation" ? 3 : u.reason === "internal" ? 4 : 0, o, h), D(c);
|
|
9170
9172
|
});
|
|
@@ -9179,15 +9181,15 @@ async function ts(a = {}) {
|
|
|
9179
9181
|
(e = O(e)).onuncapturederror = null, e.destroy();
|
|
9180
9182
|
};
|
|
9181
9183
|
function Ou(e, t) {
|
|
9182
|
-
t = me(t), e = O(e >>> 0),
|
|
9183
|
-
--
|
|
9184
|
+
t = me(t), e = O(e >>> 0), G += 1, lt(t, e.popErrorScope().then((n) => {
|
|
9185
|
+
--G, he(() => {
|
|
9184
9186
|
var o = 5;
|
|
9185
9187
|
n ? n instanceof GPUValidationError ? o = 2 : n instanceof GPUOutOfMemoryError ? o = 3 : n instanceof GPUInternalError && (o = 4) : o = 1;
|
|
9186
9188
|
var u = P(), c = n ? Ce(n.message) : 0;
|
|
9187
9189
|
Nr(t, 1, o, c), D(u);
|
|
9188
9190
|
});
|
|
9189
9191
|
}, (n) => {
|
|
9190
|
-
--
|
|
9192
|
+
--G, he(() => {
|
|
9191
9193
|
var o = P(), u = Ce(n.message);
|
|
9192
9194
|
Nr(t, 1, 5, u), D(o);
|
|
9193
9195
|
});
|
|
@@ -9198,8 +9200,8 @@ async function ts(a = {}) {
|
|
|
9198
9200
|
var u = { featureLevel: pu[(p(), x)[n + 4 >>> 2 >>> 0]], powerPreference: mu[(p(), x)[n + 8 >>> 2 >>> 0]], forceFallbackAdapter: !!(p(), A)[n + 12 >>> 2 >>> 0] };
|
|
9199
9201
|
(e = (p(), A)[n >>> 2 >>> 0]) !== 0 && (p(), u.Fe = !!(p(), A)[e + 8 >>> 2 >>> 0]);
|
|
9200
9202
|
}
|
|
9201
|
-
"gpu" in navigator ? (
|
|
9202
|
-
--
|
|
9203
|
+
"gpu" in navigator ? (G += 1, lt(t, navigator.gpu.requestAdapter(u).then((c) => {
|
|
9204
|
+
--G, he(() => {
|
|
9203
9205
|
if (c) ce[o >>> 0] = c, Et(t, 1, o, 0);
|
|
9204
9206
|
else {
|
|
9205
9207
|
var h = P(), b = Ce("WebGPU not available on this browser (requestAdapter returned null)");
|
|
@@ -9207,7 +9209,7 @@ async function ts(a = {}) {
|
|
|
9207
9209
|
}
|
|
9208
9210
|
});
|
|
9209
9211
|
}, (c) => {
|
|
9210
|
-
--
|
|
9212
|
+
--G, he(() => {
|
|
9211
9213
|
var h = P(), b = Ce(c.message);
|
|
9212
9214
|
Et(t, 4, o, b), D(h);
|
|
9213
9215
|
});
|
|
@@ -9438,7 +9440,7 @@ async function ts(a = {}) {
|
|
|
9438
9440
|
Te(`invalid type for getValue: ${t}`);
|
|
9439
9441
|
}
|
|
9440
9442
|
}, r.UTF8ToString = ct, r.stringToUTF8 = Pe, r.lengthBytesUTF8 = _e;
|
|
9441
|
-
var lo, po, Dr, Wt, xe, pt, mo, ho, yo, bo, wo, go, To, vo, Eo, So, Ao, Pr, _r, Rr, Nr, Et, kr, Io, Wr, xo, Lo, Oo, Fr, Bo, Mo, Gr, N, St, Co, D, Ft, P, Uo, $r, Do, Po, _o, zr, Ro, No, ko, Wo, Fo, Go, $o, zo, Vo, jo, Ho, Yo, qo, Jo, Xo, Qo, Zo, Ko, ea, ta, ra, na, oa, aa, sa, ia, ua, fa, ca, da, la, pa, ma, ha, ya, ba, wa, ga, Ta, ke, of = [qe, yr, En, Ln, On, Bn, Mn, Cn, Un, Dn, Pn, _n, Rn, Nn, kn, Wn, Qn, Zn, Kn, ao, so, io, uo, fo, co], Vr = {
|
|
9443
|
+
var lo, po, Dr, Wt, xe, pt, mo, ho, yo, bo, wo, go, To, vo, Eo, So, Ao, Pr, _r, Rr, Nr, Et, kr, Io, Wr, xo, Lo, Oo, Fr, Bo, Mo, Gr, N, St, Co, D, Ft, P, Uo, $r, Do, Po, _o, zr, Ro, No, ko, Wo, Fo, Go, $o, zo, Vo, jo, Ho, Yo, qo, Jo, Xo, Qo, Zo, Ko, ea, ta, ra, na, oa, aa, sa, ia, ua, fa, ca, da, la, pa, ma, ha, ya, ba, wa, ga, Ta, ke, of = [qe, yr, En, Ln, On, Bn, Mn, Cn, Un, Dn, Pn, _n, Rn, Nn, kn, Wn, Qn, Zn, Kn, ao, so, io, uo, fo, co], Vr = { 937012: (e, t, n, o, u) => {
|
|
9442
9444
|
if (r === void 0 || !r.Uc) return 1;
|
|
9443
9445
|
if ((e = ct(Number(e >>> 0))).startsWith("./") && (e = e.substring(2)), !(e = r.Uc.get(e))) return 2;
|
|
9444
9446
|
if (t = Number(t >>> 0), n = Number(n >>> 0), o = Number(o >>> 0), t + n > e.byteLength) return 3;
|
|
@@ -9458,11 +9460,11 @@ async function ts(a = {}) {
|
|
|
9458
9460
|
} catch {
|
|
9459
9461
|
return 4;
|
|
9460
9462
|
}
|
|
9461
|
-
},
|
|
9463
|
+
}, 937836: (e, t, n) => {
|
|
9462
9464
|
r.Sd(e, (p(), J).subarray(t >>> 0, t + n >>> 0));
|
|
9463
|
-
},
|
|
9465
|
+
}, 937900: () => r.me(), 937942: (e) => {
|
|
9464
9466
|
r.jd(e);
|
|
9465
|
-
},
|
|
9467
|
+
}, 937979: () => typeof wasmOffsetConverter < "u" };
|
|
9466
9468
|
function af(e, t, n, o) {
|
|
9467
9469
|
var u = P();
|
|
9468
9470
|
try {
|
|
@@ -9481,12 +9483,12 @@ async function ts(a = {}) {
|
|
|
9481
9483
|
N(1, 0);
|
|
9482
9484
|
}
|
|
9483
9485
|
}
|
|
9484
|
-
function uf(e
|
|
9485
|
-
var
|
|
9486
|
+
function uf(e) {
|
|
9487
|
+
var t = P();
|
|
9486
9488
|
try {
|
|
9487
|
-
|
|
9488
|
-
} catch (
|
|
9489
|
-
if (D(
|
|
9489
|
+
Ro(e);
|
|
9490
|
+
} catch (n) {
|
|
9491
|
+
if (D(t), n !== n + 0) throw n;
|
|
9490
9492
|
N(1, 0);
|
|
9491
9493
|
}
|
|
9492
9494
|
}
|
|
@@ -9499,25 +9501,16 @@ async function ts(a = {}) {
|
|
|
9499
9501
|
N(1, 0);
|
|
9500
9502
|
}
|
|
9501
9503
|
}
|
|
9502
|
-
function cf(e) {
|
|
9503
|
-
var
|
|
9504
|
-
try {
|
|
9505
|
-
Ro(e);
|
|
9506
|
-
} catch (n) {
|
|
9507
|
-
if (D(t), n !== n + 0) throw n;
|
|
9508
|
-
N(1, 0);
|
|
9509
|
-
}
|
|
9510
|
-
}
|
|
9511
|
-
function df(e, t, n, o, u, c, h) {
|
|
9512
|
-
var b = P();
|
|
9504
|
+
function cf(e, t, n) {
|
|
9505
|
+
var o = P();
|
|
9513
9506
|
try {
|
|
9514
|
-
|
|
9515
|
-
} catch (
|
|
9516
|
-
if (D(
|
|
9507
|
+
_o(e, t, n);
|
|
9508
|
+
} catch (u) {
|
|
9509
|
+
if (D(o), u !== u + 0) throw u;
|
|
9517
9510
|
N(1, 0);
|
|
9518
9511
|
}
|
|
9519
9512
|
}
|
|
9520
|
-
function
|
|
9513
|
+
function df(e, t) {
|
|
9521
9514
|
var n = P();
|
|
9522
9515
|
try {
|
|
9523
9516
|
Vo(e, t);
|
|
@@ -9526,6 +9519,15 @@ async function ts(a = {}) {
|
|
|
9526
9519
|
N(1, 0);
|
|
9527
9520
|
}
|
|
9528
9521
|
}
|
|
9522
|
+
function lf(e, t, n, o, u, c, h) {
|
|
9523
|
+
var b = P();
|
|
9524
|
+
try {
|
|
9525
|
+
return Wo(e, t, n, o, u, c, h);
|
|
9526
|
+
} catch (E) {
|
|
9527
|
+
if (D(b), E !== E + 0) throw E;
|
|
9528
|
+
N(1, 0);
|
|
9529
|
+
}
|
|
9530
|
+
}
|
|
9529
9531
|
function pf(e, t, n, o, u, c) {
|
|
9530
9532
|
var h = P();
|
|
9531
9533
|
try {
|
|
@@ -9955,7 +9957,7 @@ var nc;
|
|
|
9955
9957
|
var oc;
|
|
9956
9958
|
var ac;
|
|
9957
9959
|
var qt;
|
|
9958
|
-
var
|
|
9960
|
+
var z;
|
|
9959
9961
|
var je = k(() => {
|
|
9960
9962
|
"use strict";
|
|
9961
9963
|
Yt();
|
|
@@ -10011,19 +10013,19 @@ var je = k(() => {
|
|
|
10011
10013
|
rr = false, ds = true, H(M);
|
|
10012
10014
|
});
|
|
10013
10015
|
})), await Promise.race(C), S) throw new Error(`WebAssembly backend initializing failed due to timeout: ${r}ms`);
|
|
10014
|
-
},
|
|
10016
|
+
}, z = () => {
|
|
10015
10017
|
if (nn && rn) return rn;
|
|
10016
10018
|
throw new Error("WebAssembly is not initialized yet.");
|
|
10017
10019
|
};
|
|
10018
10020
|
});
|
|
10019
10021
|
var be;
|
|
10020
10022
|
var Lt;
|
|
10021
|
-
var
|
|
10023
|
+
var $;
|
|
10022
10024
|
var nr = k(() => {
|
|
10023
10025
|
"use strict";
|
|
10024
10026
|
je();
|
|
10025
10027
|
be = (a, r) => {
|
|
10026
|
-
let s =
|
|
10028
|
+
let s = z(), f = s.lengthBytesUTF8(a) + 1, i = s._malloc(f);
|
|
10027
10029
|
return s.stringToUTF8(a, i, f), r.push(i), i;
|
|
10028
10030
|
}, Lt = (a, r, s, f) => {
|
|
10029
10031
|
if (typeof a == "object" && a !== null) {
|
|
@@ -10037,8 +10039,8 @@ var nr = k(() => {
|
|
|
10037
10039
|
else if (typeof d == "boolean") f(l, d ? "1" : "0");
|
|
10038
10040
|
else throw new Error(`Can't handle extra config type: ${typeof d}`);
|
|
10039
10041
|
});
|
|
10040
|
-
},
|
|
10041
|
-
let r =
|
|
10042
|
+
}, $ = (a) => {
|
|
10043
|
+
let r = z(), s = r.stackSave();
|
|
10042
10044
|
try {
|
|
10043
10045
|
let f = r.PTR_SIZE, i = r.stackAlloc(2 * f);
|
|
10044
10046
|
r._OrtGetLastError(i, i + f);
|
|
@@ -10055,7 +10057,7 @@ var ps = k(() => {
|
|
|
10055
10057
|
je();
|
|
10056
10058
|
nr();
|
|
10057
10059
|
ls = (a) => {
|
|
10058
|
-
let r =
|
|
10060
|
+
let r = z(), s = 0, f = [], i = a || {};
|
|
10059
10061
|
try {
|
|
10060
10062
|
if (a?.logSeverityLevel === void 0) i.logSeverityLevel = 2;
|
|
10061
10063
|
else if (typeof a.logSeverityLevel != "number" || !Number.isInteger(a.logSeverityLevel) || a.logSeverityLevel < 0 || a.logSeverityLevel > 4) throw new Error(`log severity level is not valid: ${a.logSeverityLevel}`);
|
|
@@ -10063,9 +10065,9 @@ var ps = k(() => {
|
|
|
10063
10065
|
else if (typeof a.logVerbosityLevel != "number" || !Number.isInteger(a.logVerbosityLevel)) throw new Error(`log verbosity level is not valid: ${a.logVerbosityLevel}`);
|
|
10064
10066
|
a?.terminate === void 0 && (i.terminate = false);
|
|
10065
10067
|
let d = 0;
|
|
10066
|
-
return a?.tag !== void 0 && (d = be(a.tag, f)), s = r._OrtCreateRunOptions(i.logSeverityLevel, i.logVerbosityLevel, !!i.terminate, d), s === 0 &&
|
|
10068
|
+
return a?.tag !== void 0 && (d = be(a.tag, f)), s = r._OrtCreateRunOptions(i.logSeverityLevel, i.logVerbosityLevel, !!i.terminate, d), s === 0 && $("Can't create run options."), a?.extra !== void 0 && Lt(a.extra, "", /* @__PURE__ */ new WeakSet(), (l, m) => {
|
|
10067
10069
|
let y = be(l, f), w = be(m, f);
|
|
10068
|
-
r._OrtAddRunConfigEntry(s, y, w) !== 0 &&
|
|
10070
|
+
r._OrtAddRunConfigEntry(s, y, w) !== 0 && $(`Can't set a run config entry: ${l} - ${m}.`);
|
|
10069
10071
|
}), [s, f];
|
|
10070
10072
|
} catch (d) {
|
|
10071
10073
|
throw s !== 0 && r._OrtReleaseRunOptions(s), f.forEach((l) => r._free(l)), d;
|
|
@@ -10113,7 +10115,7 @@ var hs = k(() => {
|
|
|
10113
10115
|
r.use_ort_model_bytes_directly || (r.use_ort_model_bytes_directly = "1"), a.executionProviders && a.executionProviders.some((s) => (typeof s == "string" ? s : s.name) === "webgpu") && (a.enableMemPattern = false);
|
|
10114
10116
|
}, on = (a, r, s, f) => {
|
|
10115
10117
|
let i = be(r, f), d = be(s, f);
|
|
10116
|
-
|
|
10118
|
+
z()._OrtAddSessionConfigEntry(a, i, d) !== 0 && $(`Can't set a session config entry: ${r} - ${s}.`);
|
|
10117
10119
|
}, ot = (a, r, s, f) => {
|
|
10118
10120
|
let i = be(r, f), d = be(s, f);
|
|
10119
10121
|
a.push([i, d]);
|
|
@@ -10144,7 +10146,7 @@ var hs = k(() => {
|
|
|
10144
10146
|
}
|
|
10145
10147
|
S.validationMode && ot(l, "validationMode", S.validationMode, s);
|
|
10146
10148
|
}
|
|
10147
|
-
let v =
|
|
10149
|
+
let v = z().webgpuRegisterDevice(g);
|
|
10148
10150
|
if (v) {
|
|
10149
10151
|
let [S, C, R] = v;
|
|
10150
10152
|
ot(l, "deviceId", S.toString(), s), ot(l, "webgpuInstance", C.toString(), s), ot(l, "webgpuDevice", R.toString(), s);
|
|
@@ -10159,13 +10161,13 @@ var hs = k(() => {
|
|
|
10159
10161
|
}
|
|
10160
10162
|
let m = be(d, s), y = l.length, w = 0, T = 0;
|
|
10161
10163
|
if (y > 0) {
|
|
10162
|
-
w =
|
|
10163
|
-
for (let g = 0; g < y; g++)
|
|
10164
|
+
w = z()._malloc(y * z().PTR_SIZE), s.push(w), T = z()._malloc(y * z().PTR_SIZE), s.push(T);
|
|
10165
|
+
for (let g = 0; g < y; g++) z().setValue(w + g * z().PTR_SIZE, l[g][0], "*"), z().setValue(T + g * z().PTR_SIZE, l[g][1], "*");
|
|
10164
10166
|
}
|
|
10165
|
-
await
|
|
10167
|
+
await z()._OrtAppendExecutionProvider(a, m, w, T, y) !== 0 && $(`Can't append execution provider: ${d}.`);
|
|
10166
10168
|
}
|
|
10167
10169
|
}, ms = async (a) => {
|
|
10168
|
-
let r =
|
|
10170
|
+
let r = z(), s = 0, f = [], i = a || {};
|
|
10169
10171
|
uc(i);
|
|
10170
10172
|
try {
|
|
10171
10173
|
let d = sc(i.graphOptimizationLevel ?? "all"), l = ic(i.executionMode ?? "sequential"), m = typeof i.logId == "string" ? be(i.logId, f) : 0, y = i.logSeverityLevel ?? 2;
|
|
@@ -10173,7 +10175,7 @@ var hs = k(() => {
|
|
|
10173
10175
|
let w = i.logVerbosityLevel ?? 0;
|
|
10174
10176
|
if (!Number.isInteger(w) || w < 0 || w > 4) throw new Error(`log verbosity level is not valid: ${w}`);
|
|
10175
10177
|
let T = typeof i.optimizedModelFilePath == "string" ? be(i.optimizedModelFilePath, f) : 0;
|
|
10176
|
-
if (s = r._OrtCreateSessionOptions(d, !!i.enableCpuMemArena, !!i.enableMemPattern, l, !!i.enableProfiling, 0, m, y, w, T), s === 0 &&
|
|
10178
|
+
if (s = r._OrtCreateSessionOptions(d, !!i.enableCpuMemArena, !!i.enableMemPattern, l, !!i.enableProfiling, 0, m, y, w, T), s === 0 && $("Can't create session options."), i.executionProviders && await fc(s, i, f), i.enableGraphCapture !== void 0) {
|
|
10177
10179
|
if (typeof i.enableGraphCapture != "boolean") throw new Error(`enableGraphCapture must be a boolean value: ${i.enableGraphCapture}`);
|
|
10178
10180
|
on(s, "enableGraphCapture", i.enableGraphCapture.toString(), f);
|
|
10179
10181
|
}
|
|
@@ -10181,13 +10183,13 @@ var hs = k(() => {
|
|
|
10181
10183
|
if (typeof g != "string") throw new Error(`free dimension override name must be a string: ${g}`);
|
|
10182
10184
|
if (typeof v != "number" || !Number.isInteger(v) || v < 0) throw new Error(`free dimension override value must be a non-negative integer: ${v}`);
|
|
10183
10185
|
let S = be(g, f);
|
|
10184
|
-
r._OrtAddFreeDimensionOverride(s, S, v) !== 0 &&
|
|
10186
|
+
r._OrtAddFreeDimensionOverride(s, S, v) !== 0 && $(`Can't set a free dimension override: ${g} - ${v}.`);
|
|
10185
10187
|
}
|
|
10186
10188
|
return i.extra !== void 0 && Lt(i.extra, "", /* @__PURE__ */ new WeakSet(), (g, v) => {
|
|
10187
10189
|
on(s, g, v, f);
|
|
10188
10190
|
}), [s, f];
|
|
10189
10191
|
} catch (d) {
|
|
10190
|
-
throw s !== 0 && r._OrtReleaseSessionOptions(s) !== 0 &&
|
|
10192
|
+
throw s !== 0 && r._OrtReleaseSessionOptions(s) !== 0 && $("Can't release session options."), f.forEach((l) => r._free(l)), d;
|
|
10191
10193
|
}
|
|
10192
10194
|
};
|
|
10193
10195
|
});
|
|
@@ -10757,7 +10759,7 @@ var Os = k(() => {
|
|
|
10757
10759
|
return l ? l.push(d) : this.temporarySessionTensorIds.set(r, [d]), d;
|
|
10758
10760
|
}
|
|
10759
10761
|
uploadTensor(r, s) {
|
|
10760
|
-
if (
|
|
10762
|
+
if (!z().shouldTransferToMLTensor) throw new Error("Trying to upload to a MLTensor while shouldTransferToMLTensor is false");
|
|
10761
10763
|
le("verbose", () => `[WebNN] uploadTensor {tensorId: ${r}, data: ${s.byteLength}}`), this.tensorManager.upload(r, s);
|
|
10762
10764
|
}
|
|
10763
10765
|
async downloadTensor(r, s) {
|
|
@@ -10863,11 +10865,11 @@ var Kr = k(() => {
|
|
|
10863
10865
|
nr();
|
|
10864
10866
|
sn();
|
|
10865
10867
|
yc = (a, r) => {
|
|
10866
|
-
|
|
10868
|
+
z()._OrtInit(a, r) !== 0 && $("Can't initialize onnxruntime.");
|
|
10867
10869
|
}, Jt = async (a) => {
|
|
10868
10870
|
yc(a.wasm.numThreads, Ot(a.logLevel));
|
|
10869
10871
|
}, Xt = async (a, r) => {
|
|
10870
|
-
|
|
10872
|
+
z().asyncInit?.();
|
|
10871
10873
|
let s = a.webgpu.adapter;
|
|
10872
10874
|
if (r === "webgpu") {
|
|
10873
10875
|
if (typeof navigator > "u" || !navigator.gpu) throw new Error("WebGPU is not supported in current environment");
|
|
@@ -10882,29 +10884,29 @@ var Kr = k(() => {
|
|
|
10882
10884
|
}
|
|
10883
10885
|
}
|
|
10884
10886
|
if (r === "webnn" && (typeof navigator > "u" || !navigator.ml)) throw new Error("WebNN is not supported in current environment");
|
|
10885
|
-
if (r === "webgpu" &&
|
|
10887
|
+
if (r === "webgpu" && z().webgpuInit((f) => {
|
|
10886
10888
|
a.webgpu.device = f;
|
|
10887
10889
|
}), r === "webnn") {
|
|
10888
10890
|
let f = new (Os(), $t(Ls)).WebNNBackend(a);
|
|
10889
|
-
|
|
10891
|
+
z().webnnInit([f, () => f.reserveTensorId(), (i) => f.releaseTensorId(i), async (i, d, l, m, y) => f.ensureTensor(i, d, l, m, y), (i, d) => {
|
|
10890
10892
|
f.uploadTensor(i, d);
|
|
10891
10893
|
}, async (i, d) => f.downloadTensor(i, d), (i, d) => f.registerMLContext(i, d), !!a.trace]);
|
|
10892
10894
|
}
|
|
10893
10895
|
}, it = /* @__PURE__ */ new Map(), bc = (a) => {
|
|
10894
|
-
let r =
|
|
10896
|
+
let r = z(), s = r.stackSave();
|
|
10895
10897
|
try {
|
|
10896
10898
|
let f = r.PTR_SIZE, i = r.stackAlloc(2 * f);
|
|
10897
|
-
r._OrtGetInputOutputCount(a, i, i + f) !== 0 &&
|
|
10899
|
+
r._OrtGetInputOutputCount(a, i, i + f) !== 0 && $("Can't get session input/output count.");
|
|
10898
10900
|
let l = f === 4 ? "i32" : "i64";
|
|
10899
10901
|
return [Number(r.getValue(i, l)), Number(r.getValue(i + f, l))];
|
|
10900
10902
|
} finally {
|
|
10901
10903
|
r.stackRestore(s);
|
|
10902
10904
|
}
|
|
10903
10905
|
}, Bs = (a, r) => {
|
|
10904
|
-
let s =
|
|
10906
|
+
let s = z(), f = s.stackSave(), i = 0;
|
|
10905
10907
|
try {
|
|
10906
10908
|
let d = s.PTR_SIZE, l = s.stackAlloc(2 * d);
|
|
10907
|
-
s._OrtGetInputOutputMetadata(a, r, l, l + d) !== 0 &&
|
|
10909
|
+
s._OrtGetInputOutputMetadata(a, r, l, l + d) !== 0 && $("Can't get session input/output metadata.");
|
|
10908
10910
|
let y = Number(s.getValue(l, "*"));
|
|
10909
10911
|
i = Number(s.getValue(l + d, "*"));
|
|
10910
10912
|
let w = s.HEAP32[i / 4];
|
|
@@ -10919,11 +10921,11 @@ var Kr = k(() => {
|
|
|
10919
10921
|
s.stackRestore(f), i !== 0 && s._OrtFree(i);
|
|
10920
10922
|
}
|
|
10921
10923
|
}, xt = (a) => {
|
|
10922
|
-
let r =
|
|
10924
|
+
let r = z(), s = r._malloc(a.byteLength);
|
|
10923
10925
|
if (s === 0) throw new Error(`Can't create a session. failed to allocate a buffer of size ${a.byteLength}.`);
|
|
10924
10926
|
return r.HEAPU8.set(a, s), [s, a.byteLength];
|
|
10925
10927
|
}, Qt = async (a, r) => {
|
|
10926
|
-
let s, f, i =
|
|
10928
|
+
let s, f, i = z();
|
|
10927
10929
|
Array.isArray(a) ? [s, f] = a : a.buffer === i.HEAPU8.buffer ? [s, f] = [a.byteOffset, a.byteLength] : [s, f] = xt(a);
|
|
10928
10930
|
let d = 0, l = 0, m = 0, y = [], w = [], T = [];
|
|
10929
10931
|
try {
|
|
@@ -10944,17 +10946,17 @@ var Kr = k(() => {
|
|
|
10944
10946
|
} else i.currentContext = await i.webnnCreateMLContext();
|
|
10945
10947
|
break;
|
|
10946
10948
|
}
|
|
10947
|
-
d = await i._OrtCreateSession(s, f, l), i.webgpuOnCreateSession?.(d), d === 0 &&
|
|
10949
|
+
d = await i._OrtCreateSession(s, f, l), i.webgpuOnCreateSession?.(d), d === 0 && $("Can't create a session."), i.jsepOnCreateSession?.(), i.currentContext && (i.webnnRegisterMLContext(d, i.currentContext), i.currentContext = void 0, i.shouldTransferToMLTensor = true);
|
|
10948
10950
|
let [g, v] = bc(d), S = !!r?.enableGraphCapture, C = [], R = [], H = [], U = [], M = [];
|
|
10949
10951
|
for (let L = 0; L < g; L++) {
|
|
10950
10952
|
let [W, oe, p] = Bs(d, L);
|
|
10951
|
-
W === 0 &&
|
|
10953
|
+
W === 0 && $("Can't get an input name."), w.push(W);
|
|
10952
10954
|
let ne = i.UTF8ToString(W);
|
|
10953
10955
|
C.push(ne), H.push(oe === 0 ? { name: ne, isTensor: false } : { name: ne, isTensor: true, type: or(oe), shape: p });
|
|
10954
10956
|
}
|
|
10955
10957
|
for (let L = 0; L < v; L++) {
|
|
10956
10958
|
let [W, oe, p] = Bs(d, L + g);
|
|
10957
|
-
W === 0 &&
|
|
10959
|
+
W === 0 && $("Can't get an output name."), T.push(W);
|
|
10958
10960
|
let ne = i.UTF8ToString(W);
|
|
10959
10961
|
R.push(ne), U.push(oe === 0 ? { name: ne, isTensor: false } : { name: ne, isTensor: true, type: or(oe), shape: p });
|
|
10960
10962
|
{
|
|
@@ -10973,23 +10975,23 @@ var Kr = k(() => {
|
|
|
10973
10975
|
}
|
|
10974
10976
|
}
|
|
10975
10977
|
let Y = null;
|
|
10976
|
-
return M.some((L) => L === "gpu-buffer" || L === "ml-tensor" || L === "ml-tensor-cpu-output") && (m = i._OrtCreateBinding(d), m === 0 &&
|
|
10978
|
+
return M.some((L) => L === "gpu-buffer" || L === "ml-tensor" || L === "ml-tensor-cpu-output") && (m = i._OrtCreateBinding(d), m === 0 && $("Can't create IO binding."), Y = { handle: m, outputPreferredLocations: M, outputPreferredLocationsEncoded: M.map((L) => L === "ml-tensor-cpu-output" ? "ml-tensor" : L).map((L) => an(L)) }), it.set(d, [d, w, T, Y, S, false]), [d, C, R, H, U];
|
|
10977
10979
|
} catch (g) {
|
|
10978
|
-
throw w.forEach((v) => i._OrtFree(v)), T.forEach((v) => i._OrtFree(v)), m !== 0 && i._OrtReleaseBinding(m) !== 0 &&
|
|
10980
|
+
throw w.forEach((v) => i._OrtFree(v)), T.forEach((v) => i._OrtFree(v)), m !== 0 && i._OrtReleaseBinding(m) !== 0 && $("Can't release IO binding."), d !== 0 && i._OrtReleaseSession(d) !== 0 && $("Can't release session."), g;
|
|
10979
10981
|
} finally {
|
|
10980
|
-
i._free(s), l !== 0 && i._OrtReleaseSessionOptions(l) !== 0 &&
|
|
10982
|
+
i._free(s), l !== 0 && i._OrtReleaseSessionOptions(l) !== 0 && $("Can't release session options."), y.forEach((g) => i._free(g)), i.unmountExternalData?.();
|
|
10981
10983
|
}
|
|
10982
10984
|
}, Zt = (a) => {
|
|
10983
|
-
let r =
|
|
10985
|
+
let r = z(), s = it.get(a);
|
|
10984
10986
|
if (!s) throw new Error(`cannot release session. invalid session id: ${a}`);
|
|
10985
10987
|
let [f, i, d, l, m] = s;
|
|
10986
|
-
l && (m && r._OrtClearBoundOutputs(l.handle) !== 0 &&
|
|
10988
|
+
l && (m && r._OrtClearBoundOutputs(l.handle) !== 0 && $("Can't clear bound outputs."), r._OrtReleaseBinding(l.handle) !== 0 && $("Can't release IO binding.")), r.jsepOnReleaseSession?.(a), r.webnnOnReleaseSession?.(a), r.webgpuOnReleaseSession?.(a), i.forEach((y) => r._OrtFree(y)), d.forEach((y) => r._OrtFree(y)), r._OrtReleaseSession(f) !== 0 && $("Can't release session."), it.delete(a);
|
|
10987
10989
|
}, Ms = async (a, r, s, f, i, d, l = false) => {
|
|
10988
10990
|
if (!a) {
|
|
10989
10991
|
r.push(0);
|
|
10990
10992
|
return;
|
|
10991
10993
|
}
|
|
10992
|
-
let m =
|
|
10994
|
+
let m = z(), y = m.PTR_SIZE, w = a[0], T = a[1], g = a[3], v = g, S, C;
|
|
10993
10995
|
if (w === "string" && (g === "gpu-buffer" || g === "ml-tensor")) throw new Error("String tensor is not supported on GPU.");
|
|
10994
10996
|
if (l && g !== "gpu-buffer") throw new Error(`External buffer must be provided for input/output index ${d} when enableGraphCapture is true.`);
|
|
10995
10997
|
if (g === "gpu-buffer") {
|
|
@@ -11033,12 +11035,12 @@ var Kr = k(() => {
|
|
|
11033
11035
|
try {
|
|
11034
11036
|
T.forEach((M, Y) => m.setValue(H + Y * y, M, y === 4 ? "i32" : "i64"));
|
|
11035
11037
|
let U = m._OrtCreateTensor(He(w), S, C, H, T.length, an(v));
|
|
11036
|
-
U === 0 &&
|
|
11038
|
+
U === 0 && $(`Can't create tensor for input/output. session=${f}, index=${d}.`), r.push(U);
|
|
11037
11039
|
} finally {
|
|
11038
11040
|
m.stackRestore(R);
|
|
11039
11041
|
}
|
|
11040
11042
|
}, Kt = async (a, r, s, f, i, d) => {
|
|
11041
|
-
let l =
|
|
11043
|
+
let l = z(), m = l.PTR_SIZE, y = it.get(a);
|
|
11042
11044
|
if (!y) throw new Error(`cannot run inference. invalid session id: ${a}`);
|
|
11043
11045
|
let w = y[0], T = y[1], g = y[2], v = y[3], S = y[4], C = y[5], R = r.length, H = f.length, U = 0, M = [], Y = [], L = [], W = [], oe = [], p = l.stackSave(), ne = l.stackAlloc(R * m), X = l.stackAlloc(R * m), J = l.stackAlloc(H * m), Ue = l.stackAlloc(H * m);
|
|
11044
11046
|
try {
|
|
@@ -11054,33 +11056,33 @@ var Kr = k(() => {
|
|
|
11054
11056
|
$e("wasm bindInputsOutputs");
|
|
11055
11057
|
for (let q = 0; q < R; q++) {
|
|
11056
11058
|
let we = r[q];
|
|
11057
|
-
await l._OrtBindInput(_, T[we], Y[q]) !== 0 &&
|
|
11059
|
+
await l._OrtBindInput(_, T[we], Y[q]) !== 0 && $(`Can't bind input[${q}] for session=${a}.`);
|
|
11058
11060
|
}
|
|
11059
11061
|
for (let q = 0; q < H; q++) {
|
|
11060
11062
|
let we = f[q];
|
|
11061
|
-
i[q]?.[3] ? (oe.push(L[q]), l._OrtBindOutput(_, g[we], L[q], 0) !== 0 &&
|
|
11063
|
+
i[q]?.[3] ? (oe.push(L[q]), l._OrtBindOutput(_, g[we], L[q], 0) !== 0 && $(`Can't bind pre-allocated output[${q}] for session=${a}.`)) : l._OrtBindOutput(_, g[we], 0, pe[we]) !== 0 && $(`Can't bind output[${q}] to ${ae[q]} for session=${a}.`);
|
|
11062
11064
|
}
|
|
11063
11065
|
ze("wasm bindInputsOutputs"), it.set(a, [w, T, g, v, S, true]);
|
|
11064
11066
|
}
|
|
11065
11067
|
l.jsepOnRunStart?.(w), l.webnnOnRunStart?.(w);
|
|
11066
11068
|
let Q;
|
|
11067
|
-
v ? Q = await l._OrtRunWithBinding(w, v.handle, H, J, U) : Q = await l._OrtRun(w, X, ne, R, Ue, H, J, U), Q !== 0 &&
|
|
11069
|
+
v ? Q = await l._OrtRunWithBinding(w, v.handle, H, J, U) : Q = await l._OrtRun(w, X, ne, R, Ue, H, J, U), Q !== 0 && $("failed to call OrtRun().");
|
|
11068
11070
|
let x = [], A = [];
|
|
11069
11071
|
$e("wasm ProcessOutputTensor");
|
|
11070
11072
|
for (let _ = 0; _ < H; _++) {
|
|
11071
11073
|
let ae = Number(l.getValue(J + _ * m, "*"));
|
|
11072
11074
|
if (ae === L[_] || oe.includes(L[_])) {
|
|
11073
|
-
x.push(i[_]), ae !== L[_] && l._OrtReleaseTensor(ae) !== 0 &&
|
|
11075
|
+
x.push(i[_]), ae !== L[_] && l._OrtReleaseTensor(ae) !== 0 && $("Can't release tensor.");
|
|
11074
11076
|
continue;
|
|
11075
11077
|
}
|
|
11076
11078
|
let pe = l.stackSave(), q = l.stackAlloc(4 * m), we = false, re, se = 0;
|
|
11077
11079
|
try {
|
|
11078
|
-
l._OrtGetTensorData(ae, q, q + m, q + 2 * m, q + 3 * m) !== 0 &&
|
|
11080
|
+
l._OrtGetTensorData(ae, q, q + m, q + 2 * m, q + 3 * m) !== 0 && $(`Can't access output tensor data on index ${_}.`);
|
|
11079
11081
|
let Te = m === 4 ? "i32" : "i64", Ye = Number(l.getValue(q, Te));
|
|
11080
11082
|
se = l.getValue(q + m, "*");
|
|
11081
11083
|
let bt = l.getValue(q + m * 2, "*"), wt = Number(l.getValue(q + m * 3, Te)), Se = [];
|
|
11082
11084
|
for (let ee = 0; ee < wt; ee++) Se.push(Number(l.getValue(bt + ee * m, Te)));
|
|
11083
|
-
l._OrtFree(bt) !== 0 &&
|
|
11085
|
+
l._OrtFree(bt) !== 0 && $("Can't free memory for tensor dims.");
|
|
11084
11086
|
let Ae = Se.reduce((ee, Z) => ee * Z, 1);
|
|
11085
11087
|
re = or(Ye);
|
|
11086
11088
|
let Oe = v?.outputPreferredLocations[f[_]];
|
|
@@ -11088,24 +11090,24 @@ var Kr = k(() => {
|
|
|
11088
11090
|
if (Oe === "gpu-buffer" || Oe === "ml-tensor") throw new Error("String tensor is not supported on GPU.");
|
|
11089
11091
|
let ee = [];
|
|
11090
11092
|
for (let Z = 0; Z < Ae; Z++) {
|
|
11091
|
-
let
|
|
11092
|
-
ee.push(l.UTF8ToString(
|
|
11093
|
+
let G = l.getValue(se + Z * m, "*"), V = l.getValue(se + (Z + 1) * m, "*"), qe = Z === Ae - 1 ? void 0 : V - G;
|
|
11094
|
+
ee.push(l.UTF8ToString(G, qe));
|
|
11093
11095
|
}
|
|
11094
11096
|
x.push([re, Se, ee, "cpu"]);
|
|
11095
11097
|
} else if (Oe === "gpu-buffer" && Ae > 0) {
|
|
11096
11098
|
let ee = l.webgpuGetBuffer;
|
|
11097
11099
|
if (!ee) throw new Error('preferredLocation "gpu-buffer" is not supported without using WebGPU.');
|
|
11098
|
-
let Z = ee(se),
|
|
11099
|
-
if (
|
|
11100
|
+
let Z = ee(se), G = mt(Ye, Ae);
|
|
11101
|
+
if (G === void 0 || !ar(re)) throw new Error(`Unsupported data type: ${re}`);
|
|
11100
11102
|
we = true;
|
|
11101
11103
|
{
|
|
11102
11104
|
l.webgpuRegisterBuffer(Z, a, se);
|
|
11103
|
-
let V = l.webgpuCreateDownloader(Z,
|
|
11105
|
+
let V = l.webgpuCreateDownloader(Z, G, a);
|
|
11104
11106
|
x.push([re, Se, { gpuBuffer: Z, download: async () => {
|
|
11105
11107
|
let qe = await V();
|
|
11106
11108
|
return new (at(re))(qe);
|
|
11107
11109
|
}, dispose: () => {
|
|
11108
|
-
l._OrtReleaseTensor(ae) !== 0 &&
|
|
11110
|
+
l._OrtReleaseTensor(ae) !== 0 && $("Can't release tensor.");
|
|
11109
11111
|
} }, "gpu-buffer"]);
|
|
11110
11112
|
}
|
|
11111
11113
|
} else if (Oe === "ml-tensor" && Ae > 0) {
|
|
@@ -11120,8 +11122,8 @@ var Kr = k(() => {
|
|
|
11120
11122
|
} else if (Oe === "ml-tensor-cpu-output" && Ae > 0) {
|
|
11121
11123
|
let ee = l.webnnCreateMLTensorDownloader(se, re)(), Z = x.length;
|
|
11122
11124
|
we = true, A.push((async () => {
|
|
11123
|
-
let
|
|
11124
|
-
return l.webnnReleaseTensorId(se), l._OrtReleaseTensor(ae),
|
|
11125
|
+
let G = [Z, await ee];
|
|
11126
|
+
return l.webnnReleaseTensorId(se), l._OrtReleaseTensor(ae), G;
|
|
11125
11127
|
})()), x.push([re, Se, [], "cpu"]);
|
|
11126
11128
|
} else {
|
|
11127
11129
|
let ee = at(re), Z = new ee(Ae);
|
|
@@ -11131,7 +11133,7 @@ var Kr = k(() => {
|
|
|
11131
11133
|
l.stackRestore(pe), re === "string" && se && l._free(se), we || l._OrtReleaseTensor(ae);
|
|
11132
11134
|
}
|
|
11133
11135
|
}
|
|
11134
|
-
v && !S && (l._OrtClearBoundOutputs(v.handle) !== 0 &&
|
|
11136
|
+
v && !S && (l._OrtClearBoundOutputs(v.handle) !== 0 && $("Can't clear bound outputs."), it.set(a, [w, T, g, v, S, false]));
|
|
11135
11137
|
for (let [_, ae] of await Promise.all(A)) x[_][2] = ae;
|
|
11136
11138
|
return ze("wasm ProcessOutputTensor"), x;
|
|
11137
11139
|
} finally {
|
|
@@ -11142,10 +11144,10 @@ var Kr = k(() => {
|
|
|
11142
11144
|
}), Y.forEach((Q) => l._OrtReleaseTensor(Q)), L.forEach((Q) => l._OrtReleaseTensor(Q)), W.forEach((Q) => l._free(Q)), U !== 0 && l._OrtReleaseRunOptions(U), M.forEach((Q) => l._free(Q));
|
|
11143
11145
|
}
|
|
11144
11146
|
}, er = (a) => {
|
|
11145
|
-
let r =
|
|
11147
|
+
let r = z(), s = it.get(a);
|
|
11146
11148
|
if (!s) throw new Error("invalid session id");
|
|
11147
11149
|
let f = s[0], i = r._OrtEndProfiling(f);
|
|
11148
|
-
i === 0 &&
|
|
11150
|
+
i === 0 && $("Can't get an profile file name."), r._OrtFree(i);
|
|
11149
11151
|
}, tr = (a) => {
|
|
11150
11152
|
let r = [];
|
|
11151
11153
|
for (let s of a) {
|
|
@@ -11378,7 +11380,7 @@ var $s = k(() => {
|
|
|
11378
11380
|
Ve();
|
|
11379
11381
|
Ve();
|
|
11380
11382
|
Ve();
|
|
11381
|
-
var Xa = "1.25.0-dev.
|
|
11383
|
+
var Xa = "1.25.0-dev.20260323-a99aad9d36";
|
|
11382
11384
|
var Tl = Zr;
|
|
11383
11385
|
{
|
|
11384
11386
|
let a = ($s(), $t(Gs)).wasmBackend;
|
|
@@ -11529,10 +11531,10 @@ var tensorToDataURL = (tensor, options) => {
|
|
|
11529
11531
|
for (let i = 0; i < height; i++) {
|
|
11530
11532
|
for (let j = 0; j < width; j++) {
|
|
11531
11533
|
const R = (tensor.data[rTensorPointer++] - normBias[0]) * normMean[0];
|
|
11532
|
-
const
|
|
11534
|
+
const G = (tensor.data[gTensorPointer++] - normBias[1]) * normMean[1];
|
|
11533
11535
|
const B = (tensor.data[bTensorPointer++] - normBias[2]) * normMean[2];
|
|
11534
11536
|
const A = aTensorPointer === -1 ? 255 : (tensor.data[aTensorPointer++] - normBias[3]) * normMean[3];
|
|
11535
|
-
pixels2DContext.fillStyle = "rgba(" + R + "," +
|
|
11537
|
+
pixels2DContext.fillStyle = "rgba(" + R + "," + G + "," + B + "," + A + ")";
|
|
11536
11538
|
pixels2DContext.fillRect(j, i, 1, 1);
|
|
11537
11539
|
}
|
|
11538
11540
|
}
|
|
@@ -16497,7 +16499,9 @@ var processors_exports = {};
|
|
|
16497
16499
|
__export(processors_exports, {
|
|
16498
16500
|
ChatterboxProcessor: () => ChatterboxProcessor,
|
|
16499
16501
|
Florence2Processor: () => Florence2Processor,
|
|
16502
|
+
Gemma3Processor: () => Gemma3Processor,
|
|
16500
16503
|
Gemma3nProcessor: () => Gemma3nProcessor,
|
|
16504
|
+
Glm46VProcessor: () => Glm46VProcessor,
|
|
16501
16505
|
GraniteSpeechProcessor: () => GraniteSpeechProcessor,
|
|
16502
16506
|
GroundingDinoProcessor: () => GroundingDinoProcessor,
|
|
16503
16507
|
Idefics3Processor: () => Idefics3Processor,
|
|
@@ -19011,26 +19015,29 @@ function compute_segments(mask_probs, pred_scores, pred_labels, mask_threshold,
|
|
|
19011
19015
|
}
|
|
19012
19016
|
return [segmentation, segments];
|
|
19013
19017
|
}
|
|
19014
|
-
function smart_resize(height, width, factor = 28, min_pixels = 56 * 56, max_pixels = 14 * 14 * 4 * 1280) {
|
|
19018
|
+
function smart_resize(height, width, factor = 28, min_pixels = 56 * 56, max_pixels = 14 * 14 * 4 * 1280, temporal_factor = 1) {
|
|
19015
19019
|
if (height < factor || width < factor) {
|
|
19016
|
-
|
|
19017
|
-
|
|
19020
|
+
const scale = Math.max(factor / height, factor / width);
|
|
19021
|
+
height = Math.round(height * scale);
|
|
19022
|
+
width = Math.round(width * scale);
|
|
19023
|
+
}
|
|
19024
|
+
if (Math.max(height, width) / Math.min(height, width) > 200) {
|
|
19018
19025
|
throw new Error(
|
|
19019
19026
|
`absolute aspect ratio must be smaller than 200, got ${Math.max(height, width) / Math.min(height, width)}`
|
|
19020
19027
|
);
|
|
19021
19028
|
}
|
|
19022
19029
|
let h_bar = Math.round(height / factor) * factor;
|
|
19023
19030
|
let w_bar = Math.round(width / factor) * factor;
|
|
19024
|
-
if (h_bar * w_bar > max_pixels) {
|
|
19025
|
-
const beta = Math.sqrt(height * width / max_pixels);
|
|
19026
|
-
h_bar = Math.floor(height / beta / factor) * factor;
|
|
19027
|
-
w_bar = Math.floor(width / beta / factor) * factor;
|
|
19028
|
-
} else if (h_bar * w_bar < min_pixels) {
|
|
19029
|
-
const beta = Math.sqrt(min_pixels / (height * width));
|
|
19031
|
+
if (temporal_factor * h_bar * w_bar > max_pixels) {
|
|
19032
|
+
const beta = Math.sqrt(temporal_factor * height * width / max_pixels);
|
|
19033
|
+
h_bar = Math.max(factor, Math.floor(height / beta / factor) * factor);
|
|
19034
|
+
w_bar = Math.max(factor, Math.floor(width / beta / factor) * factor);
|
|
19035
|
+
} else if (temporal_factor * h_bar * w_bar < min_pixels) {
|
|
19036
|
+
const beta = Math.sqrt(min_pixels / (temporal_factor * height * width));
|
|
19030
19037
|
h_bar = Math.ceil(height * beta / factor) * factor;
|
|
19031
19038
|
w_bar = Math.ceil(width * beta / factor) * factor;
|
|
19032
19039
|
}
|
|
19033
|
-
return [
|
|
19040
|
+
return [w_bar, h_bar];
|
|
19034
19041
|
}
|
|
19035
19042
|
function post_process_panoptic_segmentation(outputs, threshold = 0.5, mask_threshold = 0.5, overlap_mask_area_threshold = 0.8, label_ids_to_fuse = null, target_sizes = null) {
|
|
19036
19043
|
if (label_ids_to_fuse === null) {
|
|
@@ -19109,7 +19116,7 @@ var ImageProcessor = class extends Callable2 {
|
|
|
19109
19116
|
this.do_pad = config.do_pad;
|
|
19110
19117
|
this.min_pixels = config.min_pixels;
|
|
19111
19118
|
this.max_pixels = config.max_pixels;
|
|
19112
|
-
if (this.do_pad && !this.pad_size && this.size && this.size.width !== void 0 && this.size.height !== void 0) {
|
|
19119
|
+
if (this.do_pad && !this.pad_size && !this.size_divisibility && this.size && this.size.width !== void 0 && this.size.height !== void 0) {
|
|
19113
19120
|
this.pad_size = this.size;
|
|
19114
19121
|
}
|
|
19115
19122
|
this.do_flip_channel_order = config.do_flip_channel_order ?? false;
|
|
@@ -19397,10 +19404,8 @@ var ImageProcessor = class extends Callable2 {
|
|
|
19397
19404
|
const padded = this.pad_image(pixelData, [image.height, image.width, image.channels], this.pad_size);
|
|
19398
19405
|
[pixelData, imgDims] = padded;
|
|
19399
19406
|
} else if (this.size_divisibility) {
|
|
19400
|
-
const
|
|
19401
|
-
|
|
19402
|
-
this.size_divisibility
|
|
19403
|
-
);
|
|
19407
|
+
const paddedWidth = Math.ceil(imgDims[1] / this.size_divisibility) * this.size_divisibility;
|
|
19408
|
+
const paddedHeight = Math.ceil(imgDims[0] / this.size_divisibility) * this.size_divisibility;
|
|
19404
19409
|
[pixelData, imgDims] = this.pad_image(pixelData, imgDims, { width: paddedWidth, height: paddedHeight });
|
|
19405
19410
|
}
|
|
19406
19411
|
}
|
|
@@ -19477,6 +19482,7 @@ var image_processors_exports = {};
|
|
|
19477
19482
|
__export(image_processors_exports, {
|
|
19478
19483
|
BeitFeatureExtractor: () => BeitFeatureExtractor,
|
|
19479
19484
|
BitImageProcessor: () => BitImageProcessor,
|
|
19485
|
+
CHMv2ImageProcessor: () => CHMv2ImageProcessor,
|
|
19480
19486
|
CLIPFeatureExtractor: () => CLIPFeatureExtractor,
|
|
19481
19487
|
CLIPImageProcessor: () => CLIPImageProcessor,
|
|
19482
19488
|
ChineseCLIPFeatureExtractor: () => ChineseCLIPFeatureExtractor,
|
|
@@ -19493,6 +19499,8 @@ __export(image_processors_exports, {
|
|
|
19493
19499
|
DonutImageProcessor: () => DonutImageProcessor,
|
|
19494
19500
|
EfficientNetImageProcessor: () => EfficientNetImageProcessor,
|
|
19495
19501
|
GLPNFeatureExtractor: () => GLPNFeatureExtractor,
|
|
19502
|
+
Gemma3ImageProcessor: () => Gemma3ImageProcessor,
|
|
19503
|
+
Glm46VImageProcessor: () => Glm46VImageProcessor,
|
|
19496
19504
|
GroundingDinoImageProcessor: () => GroundingDinoImageProcessor,
|
|
19497
19505
|
Idefics3ImageProcessor: () => Idefics3ImageProcessor,
|
|
19498
19506
|
ImageFeatureExtractor: () => ImageProcessor,
|
|
@@ -19553,6 +19561,10 @@ var BitImageProcessor = class extends ImageProcessor {
|
|
|
19553
19561
|
var ChineseCLIPFeatureExtractor = class extends ImageProcessor {
|
|
19554
19562
|
};
|
|
19555
19563
|
|
|
19564
|
+
// src/models/chmv2/image_processing_chmv2.js
|
|
19565
|
+
var CHMv2ImageProcessor = class extends ImageProcessor {
|
|
19566
|
+
};
|
|
19567
|
+
|
|
19556
19568
|
// src/models/clip/image_processing_clip.js
|
|
19557
19569
|
var CLIPImageProcessor = class extends ImageProcessor {
|
|
19558
19570
|
};
|
|
@@ -19672,6 +19684,69 @@ var EfficientNetImageProcessor = class extends ImageProcessor {
|
|
|
19672
19684
|
}
|
|
19673
19685
|
};
|
|
19674
19686
|
|
|
19687
|
+
// src/models/gemma3/image_processing_gemma3.js
|
|
19688
|
+
var Gemma3ImageProcessor = class extends ImageProcessor {
|
|
19689
|
+
};
|
|
19690
|
+
|
|
19691
|
+
// src/models/qwen2_vl/image_processing_qwen2_vl.js
|
|
19692
|
+
var Qwen2VLImageProcessor = class extends ImageProcessor {
|
|
19693
|
+
constructor(config) {
|
|
19694
|
+
super(config);
|
|
19695
|
+
this.min_pixels = config.min_pixels ?? config.size?.shortest_edge;
|
|
19696
|
+
this.max_pixels = config.max_pixels ?? config.size?.longest_edge;
|
|
19697
|
+
this.patch_size = config.patch_size;
|
|
19698
|
+
this.merge_size = config.merge_size;
|
|
19699
|
+
}
|
|
19700
|
+
/** @type {ImageProcessor['get_resize_output_image_size']} */
|
|
19701
|
+
get_resize_output_image_size(image, size) {
|
|
19702
|
+
const factor = this.patch_size * this.merge_size;
|
|
19703
|
+
return smart_resize(image.height, image.width, factor, this.min_pixels, this.max_pixels);
|
|
19704
|
+
}
|
|
19705
|
+
async _call(images, ...args) {
|
|
19706
|
+
const { pixel_values, original_sizes, reshaped_input_sizes } = await super._call(images, ...args);
|
|
19707
|
+
let patches = pixel_values;
|
|
19708
|
+
const { temporal_patch_size, merge_size, patch_size } = this.config;
|
|
19709
|
+
if (patches.dims[0] === 1) {
|
|
19710
|
+
patches = cat(
|
|
19711
|
+
Array.from({ length: temporal_patch_size }, () => patches),
|
|
19712
|
+
0
|
|
19713
|
+
);
|
|
19714
|
+
}
|
|
19715
|
+
const grid_t = patches.dims[0] / temporal_patch_size;
|
|
19716
|
+
const channel = patches.dims[1];
|
|
19717
|
+
const grid_h = Math.floor(patches.dims[2] / patch_size);
|
|
19718
|
+
const grid_w = Math.floor(patches.dims[3] / patch_size);
|
|
19719
|
+
const flatten_patches = patches.view(
|
|
19720
|
+
grid_t,
|
|
19721
|
+
temporal_patch_size,
|
|
19722
|
+
channel,
|
|
19723
|
+
Math.floor(grid_h / merge_size),
|
|
19724
|
+
merge_size,
|
|
19725
|
+
patch_size,
|
|
19726
|
+
Math.floor(grid_w / merge_size),
|
|
19727
|
+
merge_size,
|
|
19728
|
+
patch_size
|
|
19729
|
+
).permute(0, 3, 6, 4, 7, 2, 1, 5, 8).view(grid_t * grid_h * grid_w, channel * temporal_patch_size * patch_size * patch_size);
|
|
19730
|
+
const image_grid_thw = new Tensor3("int64", [grid_t, grid_h, grid_w], [1, 3]);
|
|
19731
|
+
return {
|
|
19732
|
+
pixel_values: flatten_patches,
|
|
19733
|
+
image_grid_thw,
|
|
19734
|
+
original_sizes,
|
|
19735
|
+
reshaped_input_sizes
|
|
19736
|
+
};
|
|
19737
|
+
}
|
|
19738
|
+
};
|
|
19739
|
+
|
|
19740
|
+
// src/models/glm46v/image_processing_glm46v.js
|
|
19741
|
+
var Glm46VImageProcessor = class extends Qwen2VLImageProcessor {
|
|
19742
|
+
/** @type {Qwen2VLImageProcessor['get_resize_output_image_size']} */
|
|
19743
|
+
get_resize_output_image_size(image, size) {
|
|
19744
|
+
const factor = this.patch_size * this.merge_size;
|
|
19745
|
+
const temporal_factor = this.config.temporal_patch_size ?? 2;
|
|
19746
|
+
return smart_resize(image.height, image.width, factor, this.min_pixels, this.max_pixels, temporal_factor);
|
|
19747
|
+
}
|
|
19748
|
+
};
|
|
19749
|
+
|
|
19675
19750
|
// src/models/glpn/image_processing_glpn.js
|
|
19676
19751
|
var GLPNFeatureExtractor = class extends ImageProcessor {
|
|
19677
19752
|
};
|
|
@@ -20065,7 +20140,7 @@ var Lfm2VlImageProcessor = class extends ImageProcessor {
|
|
|
20065
20140
|
const img = pixel_values.unsqueeze_(0);
|
|
20066
20141
|
const total_factor = this.encoder_patch_size * this.downsample_factor;
|
|
20067
20142
|
const f2 = total_factor ** 2;
|
|
20068
|
-
const [
|
|
20143
|
+
const [new_width, new_height] = smart_resize(
|
|
20069
20144
|
Math.max(total_factor, height),
|
|
20070
20145
|
Math.max(total_factor, width),
|
|
20071
20146
|
total_factor,
|
|
@@ -20355,55 +20430,6 @@ var PixtralImageProcessor = class extends ImageProcessor {
|
|
|
20355
20430
|
var PvtImageProcessor = class extends ImageProcessor {
|
|
20356
20431
|
};
|
|
20357
20432
|
|
|
20358
|
-
// src/models/qwen2_vl/image_processing_qwen2_vl.js
|
|
20359
|
-
var Qwen2VLImageProcessor = class extends ImageProcessor {
|
|
20360
|
-
constructor(config) {
|
|
20361
|
-
super(config);
|
|
20362
|
-
this.min_pixels = config.min_pixels ?? config.size?.shortest_edge;
|
|
20363
|
-
this.max_pixels = config.max_pixels ?? config.size?.longest_edge;
|
|
20364
|
-
this.patch_size = config.patch_size;
|
|
20365
|
-
this.merge_size = config.merge_size;
|
|
20366
|
-
}
|
|
20367
|
-
/** @type {ImageProcessor['get_resize_output_image_size']} */
|
|
20368
|
-
get_resize_output_image_size(image, size) {
|
|
20369
|
-
const factor = this.patch_size * this.merge_size;
|
|
20370
|
-
return smart_resize(image.height, image.width, factor, this.min_pixels, this.max_pixels);
|
|
20371
|
-
}
|
|
20372
|
-
async _call(images, ...args) {
|
|
20373
|
-
const { pixel_values, original_sizes, reshaped_input_sizes } = await super._call(images, ...args);
|
|
20374
|
-
let patches = pixel_values;
|
|
20375
|
-
const { temporal_patch_size, merge_size, patch_size } = this.config;
|
|
20376
|
-
if (patches.dims[0] === 1) {
|
|
20377
|
-
patches = cat(
|
|
20378
|
-
Array.from({ length: temporal_patch_size }, () => patches),
|
|
20379
|
-
0
|
|
20380
|
-
);
|
|
20381
|
-
}
|
|
20382
|
-
const grid_t = patches.dims[0] / temporal_patch_size;
|
|
20383
|
-
const channel = patches.dims[1];
|
|
20384
|
-
const grid_h = Math.floor(patches.dims[2] / patch_size);
|
|
20385
|
-
const grid_w = Math.floor(patches.dims[3] / patch_size);
|
|
20386
|
-
const flatten_patches = patches.view(
|
|
20387
|
-
grid_t,
|
|
20388
|
-
temporal_patch_size,
|
|
20389
|
-
channel,
|
|
20390
|
-
Math.floor(grid_h / merge_size),
|
|
20391
|
-
merge_size,
|
|
20392
|
-
patch_size,
|
|
20393
|
-
Math.floor(grid_w / merge_size),
|
|
20394
|
-
merge_size,
|
|
20395
|
-
patch_size
|
|
20396
|
-
).permute(0, 3, 6, 4, 7, 2, 1, 5, 8).view(grid_t * grid_h * grid_w, channel * temporal_patch_size * patch_size * patch_size);
|
|
20397
|
-
const image_grid_thw = new Tensor3("int64", [grid_t, grid_h, grid_w], [1, 3]);
|
|
20398
|
-
return {
|
|
20399
|
-
pixel_values: flatten_patches,
|
|
20400
|
-
image_grid_thw,
|
|
20401
|
-
original_sizes,
|
|
20402
|
-
reshaped_input_sizes
|
|
20403
|
-
};
|
|
20404
|
-
}
|
|
20405
|
-
};
|
|
20406
|
-
|
|
20407
20433
|
// src/models/rt_detr/image_processing_rt_detr.js
|
|
20408
20434
|
var RTDetrImageProcessor = class extends ImageProcessor {
|
|
20409
20435
|
/** @type {typeof post_process_object_detection} */
|
|
@@ -20885,6 +20911,48 @@ var Florence2Processor = class extends Processor {
|
|
|
20885
20911
|
}
|
|
20886
20912
|
};
|
|
20887
20913
|
|
|
20914
|
+
// src/models/gemma3/processing_gemma3.js
|
|
20915
|
+
var Gemma3Processor = class extends Processor {
|
|
20916
|
+
static tokenizer_class = AutoTokenizer;
|
|
20917
|
+
static image_processor_class = AutoImageProcessor;
|
|
20918
|
+
static uses_processor_config = true;
|
|
20919
|
+
static uses_chat_template_file = true;
|
|
20920
|
+
constructor(config, components, chat_template) {
|
|
20921
|
+
super(config, components, chat_template);
|
|
20922
|
+
this.image_seq_length = this.config.image_seq_length;
|
|
20923
|
+
const { boi_token, image_token, eoi_token } = this.tokenizer.config;
|
|
20924
|
+
this.boi_token = boi_token;
|
|
20925
|
+
this.image_token = image_token;
|
|
20926
|
+
this.eoi_token = eoi_token;
|
|
20927
|
+
const image_tokens_expanded = image_token.repeat(this.image_seq_length);
|
|
20928
|
+
this.full_image_sequence = `
|
|
20929
|
+
|
|
20930
|
+
${boi_token}${image_tokens_expanded}${eoi_token}
|
|
20931
|
+
|
|
20932
|
+
`;
|
|
20933
|
+
}
|
|
20934
|
+
/**
|
|
20935
|
+
* @param {string|string[]} text
|
|
20936
|
+
* @param {import('../../utils/image.js').RawImage|import('../../utils/image.js').RawImage[]} [images]
|
|
20937
|
+
* @param {Object} [options]
|
|
20938
|
+
*/
|
|
20939
|
+
async _call(text, images = null, options = {}) {
|
|
20940
|
+
if (typeof text === "string") {
|
|
20941
|
+
text = [text];
|
|
20942
|
+
}
|
|
20943
|
+
let image_inputs;
|
|
20944
|
+
if (images) {
|
|
20945
|
+
image_inputs = await this.image_processor(images, options);
|
|
20946
|
+
text = text.map((prompt) => prompt.replaceAll(this.boi_token, this.full_image_sequence));
|
|
20947
|
+
}
|
|
20948
|
+
const text_inputs = this.tokenizer(text, options);
|
|
20949
|
+
return {
|
|
20950
|
+
...text_inputs,
|
|
20951
|
+
...image_inputs
|
|
20952
|
+
};
|
|
20953
|
+
}
|
|
20954
|
+
};
|
|
20955
|
+
|
|
20888
20956
|
// src/models/gemma3n/processing_gemma3n.js
|
|
20889
20957
|
var Gemma3nProcessor = class extends Processor {
|
|
20890
20958
|
static image_processor_class = AutoImageProcessor;
|
|
@@ -20957,6 +21025,56 @@ ${boi_token}${image_tokens_expanded}${eoi_token}
|
|
|
20957
21025
|
}
|
|
20958
21026
|
};
|
|
20959
21027
|
|
|
21028
|
+
// src/models/qwen2_vl/processing_qwen2_vl.js
|
|
21029
|
+
var Qwen2VLProcessor = class extends Processor {
|
|
21030
|
+
static image_processor_class = AutoImageProcessor;
|
|
21031
|
+
static tokenizer_class = AutoTokenizer;
|
|
21032
|
+
static image_token = "<|image_pad|>";
|
|
21033
|
+
/**
|
|
21034
|
+
*
|
|
21035
|
+
* @param {string|string[]} text
|
|
21036
|
+
* @param {RawImage|RawImage[]} images
|
|
21037
|
+
* @param {...any} args
|
|
21038
|
+
* @returns {Promise<any>}
|
|
21039
|
+
*/
|
|
21040
|
+
async _call(text, images = null, ...args) {
|
|
21041
|
+
if (!Array.isArray(text)) {
|
|
21042
|
+
text = [text];
|
|
21043
|
+
}
|
|
21044
|
+
let image_inputs, image_grid_thw;
|
|
21045
|
+
if (images) {
|
|
21046
|
+
image_inputs = await this.image_processor(images);
|
|
21047
|
+
image_grid_thw = image_inputs.image_grid_thw;
|
|
21048
|
+
}
|
|
21049
|
+
if (image_grid_thw) {
|
|
21050
|
+
let merge_length = this.image_processor.config.merge_size ** 2;
|
|
21051
|
+
let index = 0;
|
|
21052
|
+
const image_token = (
|
|
21053
|
+
/** @type {typeof Qwen2VLProcessor} */
|
|
21054
|
+
this.constructor.image_token
|
|
21055
|
+
);
|
|
21056
|
+
const image_grid_thw_list = image_grid_thw.tolist();
|
|
21057
|
+
text = text.map((t) => {
|
|
21058
|
+
while (t.includes(image_token)) {
|
|
21059
|
+
const prod = Number(image_grid_thw_list[index++].reduce((a, b) => a * b, 1n));
|
|
21060
|
+
t = t.replace(image_token, "<|placeholder|>".repeat(Math.floor(prod / merge_length)));
|
|
21061
|
+
}
|
|
21062
|
+
return t.replaceAll("<|placeholder|>", image_token);
|
|
21063
|
+
});
|
|
21064
|
+
}
|
|
21065
|
+
const text_inputs = this.tokenizer(text);
|
|
21066
|
+
return {
|
|
21067
|
+
...text_inputs,
|
|
21068
|
+
...image_inputs
|
|
21069
|
+
};
|
|
21070
|
+
}
|
|
21071
|
+
};
|
|
21072
|
+
|
|
21073
|
+
// src/models/glm46v/processing_glm46v.js
|
|
21074
|
+
var Glm46VProcessor = class extends Qwen2VLProcessor {
|
|
21075
|
+
static image_token = "<|image|>";
|
|
21076
|
+
};
|
|
21077
|
+
|
|
20960
21078
|
// src/models/granite_speech/processing_granite_speech.js
|
|
20961
21079
|
var GraniteSpeechProcessor = class extends Processor {
|
|
20962
21080
|
static tokenizer_class = AutoTokenizer;
|
|
@@ -21687,47 +21805,6 @@ var PyAnnoteProcessor = class extends Processor {
|
|
|
21687
21805
|
}
|
|
21688
21806
|
};
|
|
21689
21807
|
|
|
21690
|
-
// src/models/qwen2_vl/processing_qwen2_vl.js
|
|
21691
|
-
var Qwen2VLProcessor = class extends Processor {
|
|
21692
|
-
static image_processor_class = AutoImageProcessor;
|
|
21693
|
-
static tokenizer_class = AutoTokenizer;
|
|
21694
|
-
/**
|
|
21695
|
-
*
|
|
21696
|
-
* @param {string|string[]} text
|
|
21697
|
-
* @param {RawImage|RawImage[]} images
|
|
21698
|
-
* @param {...any} args
|
|
21699
|
-
* @returns {Promise<any>}
|
|
21700
|
-
*/
|
|
21701
|
-
async _call(text, images = null, ...args) {
|
|
21702
|
-
if (!Array.isArray(text)) {
|
|
21703
|
-
text = [text];
|
|
21704
|
-
}
|
|
21705
|
-
let image_inputs, image_grid_thw;
|
|
21706
|
-
if (images) {
|
|
21707
|
-
image_inputs = await this.image_processor(images);
|
|
21708
|
-
image_grid_thw = image_inputs.image_grid_thw;
|
|
21709
|
-
}
|
|
21710
|
-
if (image_grid_thw) {
|
|
21711
|
-
let merge_length = this.image_processor.config.merge_size ** 2;
|
|
21712
|
-
let index = 0;
|
|
21713
|
-
const image_grid_thw_list = image_grid_thw.tolist();
|
|
21714
|
-
text = text.map((t) => {
|
|
21715
|
-
while (t.includes("<|image_pad|>")) {
|
|
21716
|
-
const prod = Number(image_grid_thw_list[index++].reduce((a, b) => a * b, 1n));
|
|
21717
|
-
t = t.replace("<|image_pad|>", "<|placeholder|>".repeat(Math.floor(prod / merge_length)));
|
|
21718
|
-
}
|
|
21719
|
-
return t.replaceAll("<|placeholder|>", "<|image_pad|>");
|
|
21720
|
-
});
|
|
21721
|
-
}
|
|
21722
|
-
const text_inputs = this.tokenizer(text);
|
|
21723
|
-
return {
|
|
21724
|
-
...text_inputs,
|
|
21725
|
-
...image_inputs
|
|
21726
|
-
// TODO: ...videos_inputs,
|
|
21727
|
-
};
|
|
21728
|
-
}
|
|
21729
|
-
};
|
|
21730
|
-
|
|
21731
21808
|
// src/models/qwen2_5_vl/processing_qwen2_5_vl.js
|
|
21732
21809
|
var Qwen2_5_VLProcessor = class extends Qwen2VLProcessor {
|
|
21733
21810
|
};
|
|
@@ -22071,6 +22148,8 @@ function getNormalizedConfig(config) {
|
|
|
22071
22148
|
case "gemma3n":
|
|
22072
22149
|
case "lfm2_vl":
|
|
22073
22150
|
case "chatterbox":
|
|
22151
|
+
case "lighton_ocr":
|
|
22152
|
+
case "glm_ocr":
|
|
22074
22153
|
case "mistral3":
|
|
22075
22154
|
case "qwen2_5_vl":
|
|
22076
22155
|
case "qwen3_vl":
|
|
@@ -22146,6 +22225,8 @@ function getNormalizedConfig(config) {
|
|
|
22146
22225
|
mapping["dim_kv"] = "head_dim";
|
|
22147
22226
|
break;
|
|
22148
22227
|
case "qwen3":
|
|
22228
|
+
case "solar_open":
|
|
22229
|
+
case "glm_ocr_text":
|
|
22149
22230
|
case "gemma":
|
|
22150
22231
|
case "gemma2":
|
|
22151
22232
|
case "vaultgemma":
|
|
@@ -22156,6 +22237,7 @@ function getNormalizedConfig(config) {
|
|
|
22156
22237
|
case "ernie4_5":
|
|
22157
22238
|
case "hunyuan_v1_dense":
|
|
22158
22239
|
case "falcon_h1":
|
|
22240
|
+
case "nemotron_h":
|
|
22159
22241
|
case "ministral":
|
|
22160
22242
|
case "ministral3":
|
|
22161
22243
|
mapping["num_heads"] = "num_key_value_heads";
|
|
@@ -22190,6 +22272,9 @@ function getNormalizedConfig(config) {
|
|
|
22190
22272
|
mapping["num_attention_heads"] = "num_attention_heads";
|
|
22191
22273
|
break;
|
|
22192
22274
|
case "youtu":
|
|
22275
|
+
case "deepseek_v3":
|
|
22276
|
+
case "glm_moe_dsa":
|
|
22277
|
+
case "mistral4":
|
|
22193
22278
|
mapping["num_heads"] = "num_key_value_heads";
|
|
22194
22279
|
mapping["num_layers"] = "num_hidden_layers";
|
|
22195
22280
|
mapping["dim_kv"] = "qk_head_dim";
|
|
@@ -22278,6 +22363,7 @@ function getCacheShapes(config, options) {
|
|
|
22278
22363
|
if (!(config instanceof PretrainedConfig)) {
|
|
22279
22364
|
config = new PretrainedConfig(config);
|
|
22280
22365
|
}
|
|
22366
|
+
const batch_size = options?.batch_size ?? 1;
|
|
22281
22367
|
if (["lfm2", "lfm2_moe"].includes(config.model_type)) {
|
|
22282
22368
|
const pkv_prefix = options?.prefix ?? "past_key_values";
|
|
22283
22369
|
const conv_prefix = pkv_prefix === "present" ? "present" : "past";
|
|
@@ -22287,7 +22373,6 @@ function getCacheShapes(config, options) {
|
|
|
22287
22373
|
config
|
|
22288
22374
|
);
|
|
22289
22375
|
const head_dim = hidden_size / num_attention_heads;
|
|
22290
|
-
const batch_size = options?.batch_size ?? 1;
|
|
22291
22376
|
for (let i = 0; i < layer_types.length; ++i) {
|
|
22292
22377
|
if (layer_types[i] === "full_attention") {
|
|
22293
22378
|
for (const kv of ["key", "value"]) {
|
|
@@ -22300,31 +22385,26 @@ function getCacheShapes(config, options) {
|
|
|
22300
22385
|
}
|
|
22301
22386
|
}
|
|
22302
22387
|
return cache_values;
|
|
22303
|
-
} else if (["granitemoehybrid", "falcon_h1"].includes(config.model_type)) {
|
|
22388
|
+
} else if (["granitemoehybrid", "falcon_h1", "nemotron_h"].includes(config.model_type)) {
|
|
22304
22389
|
const pkv_prefix = options?.prefix ?? "past_key_values";
|
|
22305
22390
|
const conv_prefix = pkv_prefix === "present" ? "present" : "past";
|
|
22306
|
-
const
|
|
22307
|
-
const {
|
|
22308
|
-
layer_types,
|
|
22309
|
-
num_hidden_layers,
|
|
22310
|
-
num_attention_heads,
|
|
22311
|
-
num_key_value_heads,
|
|
22312
|
-
hidden_size,
|
|
22313
|
-
mamba_d_conv,
|
|
22314
|
-
mamba_n_heads,
|
|
22315
|
-
mamba_d_head,
|
|
22316
|
-
mamba_d_state,
|
|
22317
|
-
mamba_n_groups,
|
|
22318
|
-
mamba_expand,
|
|
22319
|
-
mamba_d_ssm
|
|
22320
|
-
} = (
|
|
22391
|
+
const c = (
|
|
22321
22392
|
/** @type {any} */
|
|
22322
22393
|
config
|
|
22323
22394
|
);
|
|
22324
|
-
const
|
|
22325
|
-
const
|
|
22326
|
-
const
|
|
22327
|
-
|
|
22395
|
+
const layer_types = c.layer_types ?? c.layers_block_type;
|
|
22396
|
+
const num_layers = c.num_hidden_layers ?? layer_types?.length;
|
|
22397
|
+
const num_key_value_heads = c.num_key_value_heads;
|
|
22398
|
+
const head_dim = c.head_dim ?? c.hidden_size / c.num_attention_heads;
|
|
22399
|
+
const mamba_n_heads = c.mamba_n_heads ?? c.mamba_num_heads;
|
|
22400
|
+
const mamba_d_head = c.mamba_d_head ?? c.mamba_head_dim;
|
|
22401
|
+
const mamba_d_state = c.mamba_d_state ?? c.ssm_state_size;
|
|
22402
|
+
const mamba_n_groups = c.mamba_n_groups ?? c.n_groups;
|
|
22403
|
+
const mamba_d_conv = c.mamba_d_conv ?? c.conv_kernel;
|
|
22404
|
+
const mamba_d_ssm = c.mamba_d_ssm ?? (c.mamba_expand ? c.mamba_expand * c.hidden_size : mamba_n_heads * mamba_d_head);
|
|
22405
|
+
const conv_d_inner = mamba_d_ssm + 2 * mamba_n_groups * mamba_d_state;
|
|
22406
|
+
const cache_values = {};
|
|
22407
|
+
for (let i = 0; i < num_layers; ++i) {
|
|
22328
22408
|
if (!layer_types || layer_types[i] === "mamba") {
|
|
22329
22409
|
cache_values[`${conv_prefix}_conv.${i}`] = [batch_size, conv_d_inner, mamba_d_conv];
|
|
22330
22410
|
cache_values[`${conv_prefix}_ssm.${i}`] = [batch_size, mamba_n_heads, mamba_d_head, mamba_d_state];
|
|
@@ -22358,7 +22438,6 @@ function getCacheShapes(config, options) {
|
|
|
22358
22438
|
const key_dim = linear_key_head_dim * linear_num_key_heads;
|
|
22359
22439
|
const value_dim = linear_value_head_dim * linear_num_value_heads;
|
|
22360
22440
|
const final_head_dim = head_dim ?? hidden_size / num_attention_heads;
|
|
22361
|
-
const batch_size = options?.batch_size ?? 1;
|
|
22362
22441
|
for (let i = 0; i < layer_types.length; ++i) {
|
|
22363
22442
|
if (layer_types[i] === "full_attention") {
|
|
22364
22443
|
for (const kv of ["key", "value"]) {
|
|
@@ -24054,8 +24133,7 @@ var MODEL_TYPES = {
|
|
|
24054
24133
|
ImageAudioTextToText: 13,
|
|
24055
24134
|
Supertonic: 14,
|
|
24056
24135
|
Chatterbox: 15,
|
|
24057
|
-
|
|
24058
|
-
VoxtralRealtime: 17
|
|
24136
|
+
VoxtralRealtime: 16
|
|
24059
24137
|
};
|
|
24060
24138
|
var MODEL_TYPE_CONFIG = {
|
|
24061
24139
|
[MODEL_TYPES.DecoderOnly]: {
|
|
@@ -24112,12 +24190,12 @@ var MODEL_TYPE_CONFIG = {
|
|
|
24112
24190
|
can_generate: true,
|
|
24113
24191
|
forward: image_text_to_text_forward,
|
|
24114
24192
|
prepare_inputs: multimodal_text_to_text_prepare_inputs_for_generation,
|
|
24115
|
-
sessions: (config) => {
|
|
24193
|
+
sessions: (config, options, textOnly) => {
|
|
24116
24194
|
const s = {
|
|
24117
24195
|
embed_tokens: "embed_tokens",
|
|
24118
|
-
vision_encoder: "vision_encoder",
|
|
24119
24196
|
decoder_model_merged: "decoder_model_merged"
|
|
24120
24197
|
};
|
|
24198
|
+
if (!textOnly) s["vision_encoder"] = "vision_encoder";
|
|
24121
24199
|
if (config.is_encoder_decoder) s["model"] = "encoder_model";
|
|
24122
24200
|
return s;
|
|
24123
24201
|
},
|
|
@@ -24139,12 +24217,17 @@ var MODEL_TYPE_CONFIG = {
|
|
|
24139
24217
|
[MODEL_TYPES.ImageAudioTextToText]: {
|
|
24140
24218
|
can_generate: true,
|
|
24141
24219
|
prepare_inputs: multimodal_text_to_text_prepare_inputs_for_generation,
|
|
24142
|
-
sessions: () =>
|
|
24143
|
-
|
|
24144
|
-
|
|
24145
|
-
|
|
24146
|
-
|
|
24147
|
-
|
|
24220
|
+
sessions: (config, options, textOnly) => {
|
|
24221
|
+
const s = {
|
|
24222
|
+
embed_tokens: "embed_tokens",
|
|
24223
|
+
decoder_model_merged: "decoder_model_merged"
|
|
24224
|
+
};
|
|
24225
|
+
if (!textOnly) {
|
|
24226
|
+
s["audio_encoder"] = "audio_encoder";
|
|
24227
|
+
s["vision_encoder"] = "vision_encoder";
|
|
24228
|
+
}
|
|
24229
|
+
return s;
|
|
24230
|
+
},
|
|
24148
24231
|
optional_configs: { generation_config: "generation_config.json" }
|
|
24149
24232
|
},
|
|
24150
24233
|
[MODEL_TYPES.Phi3V]: {
|
|
@@ -24195,14 +24278,6 @@ var MODEL_TYPE_CONFIG = {
|
|
|
24195
24278
|
cache_sessions: { model: true },
|
|
24196
24279
|
optional_configs: { generation_config: "generation_config.json" }
|
|
24197
24280
|
},
|
|
24198
|
-
[MODEL_TYPES.MultimodalLanguageModelOnly]: {
|
|
24199
|
-
can_generate: true,
|
|
24200
|
-
forward: image_text_to_text_forward,
|
|
24201
|
-
prepare_inputs: multimodal_text_to_text_prepare_inputs_for_generation,
|
|
24202
|
-
sessions: () => ({ embed_tokens: "embed_tokens", decoder_model_merged: "decoder_model_merged" }),
|
|
24203
|
-
cache_sessions: { decoder_model_merged: true },
|
|
24204
|
-
optional_configs: { generation_config: "generation_config.json" }
|
|
24205
|
-
},
|
|
24206
24281
|
[MODEL_TYPES.VoxtralRealtime]: {
|
|
24207
24282
|
can_generate: true,
|
|
24208
24283
|
prepare_inputs: decoder_prepare_inputs_for_generation,
|
|
@@ -24228,6 +24303,19 @@ function getSessionsConfig(modelType, config, options = {}) {
|
|
|
24228
24303
|
optional_configs: typeConfig.optional_configs
|
|
24229
24304
|
};
|
|
24230
24305
|
}
|
|
24306
|
+
function resolveTypeConfig(modelName, config) {
|
|
24307
|
+
let modelType = MODEL_TYPE_MAPPING.get(modelName);
|
|
24308
|
+
let textOnly = false;
|
|
24309
|
+
const nativeArch = config?.architectures?.[0];
|
|
24310
|
+
if (nativeArch && nativeArch !== modelName && modelName?.endsWith("ForCausalLM") && nativeArch.endsWith("ForConditionalGeneration")) {
|
|
24311
|
+
const nativeType = MODEL_TYPE_MAPPING.get(nativeArch);
|
|
24312
|
+
if (nativeType !== void 0) {
|
|
24313
|
+
modelType = nativeType;
|
|
24314
|
+
textOnly = true;
|
|
24315
|
+
}
|
|
24316
|
+
}
|
|
24317
|
+
return { typeConfig: MODEL_TYPE_CONFIG[modelType] ?? MODEL_TYPE_CONFIG.default, textOnly, modelType };
|
|
24318
|
+
}
|
|
24231
24319
|
var MODEL_TYPE_MAPPING = /* @__PURE__ */ new Map();
|
|
24232
24320
|
var MODEL_NAME_TO_CLASS_MAPPING = /* @__PURE__ */ new Map();
|
|
24233
24321
|
var MODEL_CLASS_TO_NAME_MAPPING = /* @__PURE__ */ new Map();
|
|
@@ -24247,8 +24335,7 @@ var PreTrainedModel = class extends Callable2 {
|
|
|
24247
24335
|
this.sessions = sessions;
|
|
24248
24336
|
this.configs = configs;
|
|
24249
24337
|
const modelName = MODEL_CLASS_TO_NAME_MAPPING.get(this.constructor);
|
|
24250
|
-
const
|
|
24251
|
-
const typeConfig = MODEL_TYPE_CONFIG[modelType] ?? MODEL_TYPE_CONFIG.default;
|
|
24338
|
+
const { typeConfig } = resolveTypeConfig(modelName, config);
|
|
24252
24339
|
this.can_generate = typeConfig.can_generate;
|
|
24253
24340
|
this._forward = typeConfig.forward;
|
|
24254
24341
|
this._prepare_inputs_for_generation = typeConfig.prepare_inputs;
|
|
@@ -24311,9 +24398,8 @@ var PreTrainedModel = class extends Callable2 {
|
|
|
24311
24398
|
session_options
|
|
24312
24399
|
};
|
|
24313
24400
|
const modelName = MODEL_CLASS_TO_NAME_MAPPING.get(this);
|
|
24314
|
-
const modelType = MODEL_TYPE_MAPPING.get(modelName);
|
|
24315
24401
|
config = options.config = await AutoConfig.from_pretrained(pretrained_model_name_or_path, options);
|
|
24316
|
-
const typeConfig
|
|
24402
|
+
const { typeConfig, textOnly, modelType } = resolveTypeConfig(modelName, config);
|
|
24317
24403
|
if (modelType === void 0) {
|
|
24318
24404
|
const type = modelName ?? config?.model_type;
|
|
24319
24405
|
if (type !== "custom") {
|
|
@@ -24322,7 +24408,7 @@ var PreTrainedModel = class extends Callable2 {
|
|
|
24322
24408
|
);
|
|
24323
24409
|
}
|
|
24324
24410
|
}
|
|
24325
|
-
const sessions = typeConfig.sessions(config, options);
|
|
24411
|
+
const sessions = typeConfig.sessions(config, options, textOnly);
|
|
24326
24412
|
const promises = [
|
|
24327
24413
|
constructSessions(pretrained_model_name_or_path, sessions, options, typeConfig.cache_sessions)
|
|
24328
24414
|
];
|
|
@@ -24986,7 +25072,9 @@ async function generic_text_to_text_forward(self2, {
|
|
|
24986
25072
|
"qwen3_5",
|
|
24987
25073
|
"qwen3_5_text",
|
|
24988
25074
|
"qwen3_5_moe",
|
|
24989
|
-
"qwen3_5_moe_text"
|
|
25075
|
+
"qwen3_5_moe_text",
|
|
25076
|
+
"glm_ocr",
|
|
25077
|
+
"glm_ocr_text"
|
|
24990
25078
|
].includes(self2.config.model_type)
|
|
24991
25079
|
) {
|
|
24992
25080
|
const { image_grid_thw, video_grid_thw } = kwargs;
|
|
@@ -25210,6 +25298,8 @@ __export(models_exports, {
|
|
|
25210
25298
|
BloomForCausalLM: () => BloomForCausalLM,
|
|
25211
25299
|
BloomModel: () => BloomModel,
|
|
25212
25300
|
BloomPreTrainedModel: () => BloomPreTrainedModel,
|
|
25301
|
+
CHMv2ForDepthEstimation: () => CHMv2ForDepthEstimation,
|
|
25302
|
+
CHMv2PreTrainedModel: () => CHMv2PreTrainedModel,
|
|
25213
25303
|
CLIPModel: () => CLIPModel,
|
|
25214
25304
|
CLIPPreTrainedModel: () => CLIPPreTrainedModel,
|
|
25215
25305
|
CLIPSegForImageSegmentation: () => CLIPSegForImageSegmentation,
|
|
@@ -25284,6 +25374,9 @@ __export(models_exports, {
|
|
|
25284
25374
|
DebertaV2PreTrainedModel: () => DebertaV2PreTrainedModel,
|
|
25285
25375
|
DecisionTransformerModel: () => DecisionTransformerModel,
|
|
25286
25376
|
DecisionTransformerPreTrainedModel: () => DecisionTransformerPreTrainedModel,
|
|
25377
|
+
DeepseekV3ForCausalLM: () => DeepseekV3ForCausalLM,
|
|
25378
|
+
DeepseekV3Model: () => DeepseekV3Model,
|
|
25379
|
+
DeepseekV3PreTrainedModel: () => DeepseekV3PreTrainedModel,
|
|
25287
25380
|
DeiTForImageClassification: () => DeiTForImageClassification,
|
|
25288
25381
|
DeiTModel: () => DeiTModel,
|
|
25289
25382
|
DeiTPreTrainedModel: () => DeiTPreTrainedModel,
|
|
@@ -25329,6 +25422,11 @@ __export(models_exports, {
|
|
|
25329
25422
|
EsmForTokenClassification: () => EsmForTokenClassification,
|
|
25330
25423
|
EsmModel: () => EsmModel,
|
|
25331
25424
|
EsmPreTrainedModel: () => EsmPreTrainedModel,
|
|
25425
|
+
EuroBertForMaskedLM: () => EuroBertForMaskedLM,
|
|
25426
|
+
EuroBertForSequenceClassification: () => EuroBertForSequenceClassification,
|
|
25427
|
+
EuroBertForTokenClassification: () => EuroBertForTokenClassification,
|
|
25428
|
+
EuroBertModel: () => EuroBertModel,
|
|
25429
|
+
EuroBertPreTrainedModel: () => EuroBertPreTrainedModel,
|
|
25332
25430
|
ExaoneForCausalLM: () => ExaoneForCausalLM,
|
|
25333
25431
|
ExaoneModel: () => ExaoneModel,
|
|
25334
25432
|
ExaonePreTrainedModel: () => ExaonePreTrainedModel,
|
|
@@ -25365,6 +25463,7 @@ __export(models_exports, {
|
|
|
25365
25463
|
Gemma2Model: () => Gemma2Model,
|
|
25366
25464
|
Gemma2PreTrainedModel: () => Gemma2PreTrainedModel,
|
|
25367
25465
|
Gemma3ForCausalLM: () => Gemma3ForCausalLM,
|
|
25466
|
+
Gemma3ForConditionalGeneration: () => Gemma3ForConditionalGeneration,
|
|
25368
25467
|
Gemma3Model: () => Gemma3Model,
|
|
25369
25468
|
Gemma3PreTrainedModel: () => Gemma3PreTrainedModel,
|
|
25370
25469
|
Gemma3nForCausalLM: () => Gemma3nForCausalLM,
|
|
@@ -25375,6 +25474,10 @@ __export(models_exports, {
|
|
|
25375
25474
|
GemmaPreTrainedModel: () => GemmaPreTrainedModel,
|
|
25376
25475
|
GlmForCausalLM: () => GlmForCausalLM,
|
|
25377
25476
|
GlmModel: () => GlmModel,
|
|
25477
|
+
GlmMoeDsaForCausalLM: () => GlmMoeDsaForCausalLM,
|
|
25478
|
+
GlmMoeDsaModel: () => GlmMoeDsaModel,
|
|
25479
|
+
GlmMoeDsaPreTrainedModel: () => GlmMoeDsaPreTrainedModel,
|
|
25480
|
+
GlmOcrForConditionalGeneration: () => GlmOcrForConditionalGeneration,
|
|
25378
25481
|
GlmPreTrainedModel: () => GlmPreTrainedModel,
|
|
25379
25482
|
GptOssForCausalLM: () => GptOssForCausalLM,
|
|
25380
25483
|
GptOssModel: () => GptOssModel,
|
|
@@ -25421,6 +25524,7 @@ __export(models_exports, {
|
|
|
25421
25524
|
Lfm2MoePreTrainedModel: () => Lfm2MoePreTrainedModel,
|
|
25422
25525
|
Lfm2PreTrainedModel: () => Lfm2PreTrainedModel,
|
|
25423
25526
|
Lfm2VlForConditionalGeneration: () => Lfm2VlForConditionalGeneration,
|
|
25527
|
+
LightOnOcrForConditionalGeneration: () => LightOnOcrForConditionalGeneration,
|
|
25424
25528
|
LiteWhisperForConditionalGeneration: () => LiteWhisperForConditionalGeneration,
|
|
25425
25529
|
Llama4ForCausalLM: () => Llama4ForCausalLM,
|
|
25426
25530
|
Llama4PreTrainedModel: () => Llama4PreTrainedModel,
|
|
@@ -25470,6 +25574,9 @@ __export(models_exports, {
|
|
|
25470
25574
|
MimiEncoderOutput: () => MimiEncoderOutput,
|
|
25471
25575
|
MimiModel: () => MimiModel,
|
|
25472
25576
|
MimiPreTrainedModel: () => MimiPreTrainedModel,
|
|
25577
|
+
Mistral4ForCausalLM: () => Mistral4ForCausalLM,
|
|
25578
|
+
Mistral4Model: () => Mistral4Model,
|
|
25579
|
+
Mistral4PreTrainedModel: () => Mistral4PreTrainedModel,
|
|
25473
25580
|
MistralForCausalLM: () => MistralForCausalLM,
|
|
25474
25581
|
MistralModel: () => MistralModel,
|
|
25475
25582
|
MistralPreTrainedModel: () => MistralPreTrainedModel,
|
|
@@ -25527,6 +25634,9 @@ __export(models_exports, {
|
|
|
25527
25634
|
NanoChatForCausalLM: () => NanoChatForCausalLM,
|
|
25528
25635
|
NanoChatModel: () => NanoChatModel,
|
|
25529
25636
|
NanoChatPreTrainedModel: () => NanoChatPreTrainedModel,
|
|
25637
|
+
NemotronHForCausalLM: () => NemotronHForCausalLM,
|
|
25638
|
+
NemotronHModel: () => NemotronHModel,
|
|
25639
|
+
NemotronHPreTrainedModel: () => NemotronHPreTrainedModel,
|
|
25530
25640
|
NeoBertForMaskedLM: () => NeoBertForMaskedLM,
|
|
25531
25641
|
NeoBertForQuestionAnswering: () => NeoBertForQuestionAnswering,
|
|
25532
25642
|
NeoBertForSequenceClassification: () => NeoBertForSequenceClassification,
|
|
@@ -25664,6 +25774,9 @@ __export(models_exports, {
|
|
|
25664
25774
|
SnacEncoderModel: () => SnacEncoderModel,
|
|
25665
25775
|
SnacModel: () => SnacModel,
|
|
25666
25776
|
SnacPreTrainedModel: () => SnacPreTrainedModel,
|
|
25777
|
+
SolarOpenForCausalLM: () => SolarOpenForCausalLM,
|
|
25778
|
+
SolarOpenModel: () => SolarOpenModel,
|
|
25779
|
+
SolarOpenPreTrainedModel: () => SolarOpenPreTrainedModel,
|
|
25667
25780
|
SpeechT5ForSpeechToText: () => SpeechT5ForSpeechToText,
|
|
25668
25781
|
SpeechT5ForTextToSpeech: () => SpeechT5ForTextToSpeech,
|
|
25669
25782
|
SpeechT5HifiGan: () => SpeechT5HifiGan,
|
|
@@ -25838,7 +25951,7 @@ var ArceeModel = class extends ArceePreTrainedModel {
|
|
|
25838
25951
|
var ArceeForCausalLM = class extends ArceePreTrainedModel {
|
|
25839
25952
|
};
|
|
25840
25953
|
|
|
25841
|
-
// src/models/
|
|
25954
|
+
// src/models/audio_spectrogram_transformer/modeling_audio_spectrogram_transformer.js
|
|
25842
25955
|
var ASTPreTrainedModel = class extends PreTrainedModel {
|
|
25843
25956
|
};
|
|
25844
25957
|
var ASTModel = class extends ASTPreTrainedModel {
|
|
@@ -26173,6 +26286,12 @@ var ChineseCLIPPreTrainedModel = class extends PreTrainedModel {
|
|
|
26173
26286
|
var ChineseCLIPModel = class extends ChineseCLIPPreTrainedModel {
|
|
26174
26287
|
};
|
|
26175
26288
|
|
|
26289
|
+
// src/models/chmv2/modeling_chmv2.js
|
|
26290
|
+
var CHMv2PreTrainedModel = class extends PreTrainedModel {
|
|
26291
|
+
};
|
|
26292
|
+
var CHMv2ForDepthEstimation = class extends CHMv2PreTrainedModel {
|
|
26293
|
+
};
|
|
26294
|
+
|
|
26176
26295
|
// src/models/clap/modeling_clap.js
|
|
26177
26296
|
var ClapPreTrainedModel = class extends PreTrainedModel {
|
|
26178
26297
|
};
|
|
@@ -26511,6 +26630,14 @@ var DebertaForQuestionAnswering = class extends DebertaPreTrainedModel {
|
|
|
26511
26630
|
}
|
|
26512
26631
|
};
|
|
26513
26632
|
|
|
26633
|
+
// src/models/deepseek_v3/modeling_deepseek_v3.js
|
|
26634
|
+
var DeepseekV3PreTrainedModel = class extends PreTrainedModel {
|
|
26635
|
+
};
|
|
26636
|
+
var DeepseekV3Model = class extends DeepseekV3PreTrainedModel {
|
|
26637
|
+
};
|
|
26638
|
+
var DeepseekV3ForCausalLM = class extends DeepseekV3PreTrainedModel {
|
|
26639
|
+
};
|
|
26640
|
+
|
|
26514
26641
|
// src/models/deberta_v2/modeling_deberta_v2.js
|
|
26515
26642
|
var DebertaV2PreTrainedModel = class extends PreTrainedModel {
|
|
26516
26643
|
};
|
|
@@ -26859,6 +26986,45 @@ var EsmForTokenClassification = class extends EsmPreTrainedModel {
|
|
|
26859
26986
|
}
|
|
26860
26987
|
};
|
|
26861
26988
|
|
|
26989
|
+
// src/models/eurobert/modeling_eurobert.js
|
|
26990
|
+
var EuroBertPreTrainedModel = class extends PreTrainedModel {
|
|
26991
|
+
};
|
|
26992
|
+
var EuroBertModel = class extends EuroBertPreTrainedModel {
|
|
26993
|
+
};
|
|
26994
|
+
var EuroBertForMaskedLM = class extends EuroBertPreTrainedModel {
|
|
26995
|
+
/**
|
|
26996
|
+
* Calls the model on new inputs.
|
|
26997
|
+
*
|
|
26998
|
+
* @param {Object} model_inputs The inputs to the model.
|
|
26999
|
+
* @returns {Promise<MaskedLMOutput>} An object containing the model's output logits for masked language modeling.
|
|
27000
|
+
*/
|
|
27001
|
+
async _call(model_inputs) {
|
|
27002
|
+
return new MaskedLMOutput(await super._call(model_inputs));
|
|
27003
|
+
}
|
|
27004
|
+
};
|
|
27005
|
+
var EuroBertForSequenceClassification = class extends EuroBertPreTrainedModel {
|
|
27006
|
+
/**
|
|
27007
|
+
* Calls the model on new inputs.
|
|
27008
|
+
*
|
|
27009
|
+
* @param {Object} model_inputs The inputs to the model.
|
|
27010
|
+
* @returns {Promise<SequenceClassifierOutput>} An object containing the model's output logits for sequence classification.
|
|
27011
|
+
*/
|
|
27012
|
+
async _call(model_inputs) {
|
|
27013
|
+
return new SequenceClassifierOutput(await super._call(model_inputs));
|
|
27014
|
+
}
|
|
27015
|
+
};
|
|
27016
|
+
var EuroBertForTokenClassification = class extends EuroBertPreTrainedModel {
|
|
27017
|
+
/**
|
|
27018
|
+
* Calls the model on new inputs.
|
|
27019
|
+
*
|
|
27020
|
+
* @param {Object} model_inputs The inputs to the model.
|
|
27021
|
+
* @returns {Promise<TokenClassifierOutput>} An object containing the model's output logits for token classification.
|
|
27022
|
+
*/
|
|
27023
|
+
async _call(model_inputs) {
|
|
27024
|
+
return new TokenClassifierOutput(await super._call(model_inputs));
|
|
27025
|
+
}
|
|
27026
|
+
};
|
|
27027
|
+
|
|
26862
27028
|
// src/models/exaone/modeling_exaone.js
|
|
26863
27029
|
var ExaonePreTrainedModel = class extends PreTrainedModel {
|
|
26864
27030
|
};
|
|
@@ -27016,12 +27182,35 @@ var Gemma2Model = class extends Gemma2PreTrainedModel {
|
|
|
27016
27182
|
var Gemma2ForCausalLM = class extends Gemma2PreTrainedModel {
|
|
27017
27183
|
};
|
|
27018
27184
|
|
|
27185
|
+
// src/models/llava/modeling_llava.js
|
|
27186
|
+
var LlavaPreTrainedModel = class extends PreTrainedModel {
|
|
27187
|
+
forward_params = ["input_ids", "attention_mask", "pixel_values", "position_ids", "past_key_values"];
|
|
27188
|
+
};
|
|
27189
|
+
var LlavaForConditionalGeneration = class extends LlavaPreTrainedModel {
|
|
27190
|
+
_merge_input_ids_with_image_features(kwargs) {
|
|
27191
|
+
const vision_hidden_size = kwargs.image_features.dims.at(-1);
|
|
27192
|
+
const reshaped_image_hidden_states = kwargs.image_features.view(-1, vision_hidden_size);
|
|
27193
|
+
return default_merge_input_ids_with_image_features({
|
|
27194
|
+
// @ts-ignore
|
|
27195
|
+
image_token_id: this.config.image_token_index ?? this.config.image_token_id,
|
|
27196
|
+
...kwargs,
|
|
27197
|
+
image_features: reshaped_image_hidden_states
|
|
27198
|
+
});
|
|
27199
|
+
}
|
|
27200
|
+
};
|
|
27201
|
+
var Moondream1ForConditionalGeneration = class extends LlavaForConditionalGeneration {
|
|
27202
|
+
};
|
|
27203
|
+
var LlavaQwen2ForCausalLM = class extends LlavaForConditionalGeneration {
|
|
27204
|
+
};
|
|
27205
|
+
|
|
27019
27206
|
// src/models/gemma3/modeling_gemma3.js
|
|
27020
27207
|
var Gemma3PreTrainedModel = class extends PreTrainedModel {
|
|
27021
27208
|
};
|
|
27022
27209
|
var Gemma3Model = class extends Gemma3PreTrainedModel {
|
|
27023
27210
|
};
|
|
27024
|
-
var
|
|
27211
|
+
var Gemma3ForConditionalGeneration = class extends LlavaForConditionalGeneration {
|
|
27212
|
+
};
|
|
27213
|
+
var Gemma3ForCausalLM = class extends Gemma3ForConditionalGeneration {
|
|
27025
27214
|
};
|
|
27026
27215
|
|
|
27027
27216
|
// src/models/gemma3n/modeling_gemma3n.js
|
|
@@ -27134,6 +27323,382 @@ var GlmModel = class extends GlmPreTrainedModel {
|
|
|
27134
27323
|
var GlmForCausalLM = class extends GlmPreTrainedModel {
|
|
27135
27324
|
};
|
|
27136
27325
|
|
|
27326
|
+
// src/models/glm_moe_dsa/modeling_glm_moe_dsa.js
|
|
27327
|
+
var GlmMoeDsaPreTrainedModel = class extends PreTrainedModel {
|
|
27328
|
+
};
|
|
27329
|
+
var GlmMoeDsaModel = class extends GlmMoeDsaPreTrainedModel {
|
|
27330
|
+
};
|
|
27331
|
+
var GlmMoeDsaForCausalLM = class extends GlmMoeDsaPreTrainedModel {
|
|
27332
|
+
};
|
|
27333
|
+
|
|
27334
|
+
// src/models/qwen2_vl/modeling_qwen2_vl.js
|
|
27335
|
+
var Qwen2VLPreTrainedModel = class extends PreTrainedModel {
|
|
27336
|
+
forward_params = [
|
|
27337
|
+
// Text inputs
|
|
27338
|
+
"input_ids",
|
|
27339
|
+
"attention_mask",
|
|
27340
|
+
"position_ids",
|
|
27341
|
+
"past_key_values",
|
|
27342
|
+
// Vision inputs
|
|
27343
|
+
"pixel_values",
|
|
27344
|
+
"image_grid_thw"
|
|
27345
|
+
];
|
|
27346
|
+
};
|
|
27347
|
+
var Qwen2VLForConditionalGeneration = class extends Qwen2VLPreTrainedModel {
|
|
27348
|
+
// NOTE: This is used as the base class for all Qwen VL models and their CausalLM variants.
|
|
27349
|
+
// CausalLM variants (e.g., Qwen2VLForCausalLM) extend this class but load only
|
|
27350
|
+
// embed_tokens + decoder_model_merged (no vision_encoder) via MultimodalLanguageModelOnly type.
|
|
27351
|
+
image_grid_thw_name = "grid_thw";
|
|
27352
|
+
/**
|
|
27353
|
+
* Compute text-only 3D rope position IDs (all 3 dims get the same 1D positions).
|
|
27354
|
+
* @param {Tensor} input_ids
|
|
27355
|
+
* @param {Tensor} attention_mask
|
|
27356
|
+
* @returns {[Tensor, Tensor]} [position_ids, mrope_position_deltas]
|
|
27357
|
+
*/
|
|
27358
|
+
_get_text_only_rope_index(input_ids, attention_mask) {
|
|
27359
|
+
if (attention_mask) {
|
|
27360
|
+
const { data, dims } = cumsum_masked_fill(attention_mask);
|
|
27361
|
+
const position_ids = BigInt64Array.from({ length: 3 * data.length }, (_, i) => data[i % data.length]);
|
|
27362
|
+
const mrope_position_deltas = Array.from(
|
|
27363
|
+
{ length: dims[0] },
|
|
27364
|
+
(_, i) => max(data.subarray(dims[1] * i, dims[1] * (i + 1)))[0] + 1n + BigInt(dims[1])
|
|
27365
|
+
);
|
|
27366
|
+
return [
|
|
27367
|
+
new Tensor3("int64", position_ids, [3, ...dims]),
|
|
27368
|
+
new Tensor3("int64", mrope_position_deltas, [mrope_position_deltas.length, 1])
|
|
27369
|
+
];
|
|
27370
|
+
} else {
|
|
27371
|
+
const [batch_size, seq_length] = input_ids.dims;
|
|
27372
|
+
const position_ids = BigInt64Array.from(
|
|
27373
|
+
{ length: 3 * batch_size * seq_length },
|
|
27374
|
+
(_, i) => BigInt(Math.floor(i % seq_length / batch_size))
|
|
27375
|
+
);
|
|
27376
|
+
return [new Tensor3("int64", position_ids, [3, ...input_ids.dims]), zeros([batch_size, 1])];
|
|
27377
|
+
}
|
|
27378
|
+
}
|
|
27379
|
+
/**
|
|
27380
|
+
* Reorder per-segment position ID lists from [seg1[t,h,w], seg2[t,h,w], ...] into
|
|
27381
|
+
* global [all_t, all_h, all_w] order, then write back into the position_ids array
|
|
27382
|
+
* respecting attention mask.
|
|
27383
|
+
* @param {number[][]} llm_pos_ids_list List of per-segment position arrays, each of length 3*seg_len
|
|
27384
|
+
* @param {number[]} attn_mask Attention mask for this batch element
|
|
27385
|
+
* @param {number[][][]} position_ids_list [3][batch][seq] output array to write into
|
|
27386
|
+
* @param {number} batch_idx Current batch index
|
|
27387
|
+
* @returns {number[]} Flat reordered positions of length total_len
|
|
27388
|
+
*/
|
|
27389
|
+
_reorder_and_write_positions(llm_pos_ids_list, attn_mask, position_ids_list, batch_idx) {
|
|
27390
|
+
const total_len = llm_pos_ids_list.reduce((acc, x) => acc + x.length, 0);
|
|
27391
|
+
const llm_positions = new Array(total_len);
|
|
27392
|
+
let index = 0;
|
|
27393
|
+
for (let x = 0; x < 3; ++x) {
|
|
27394
|
+
for (const val of llm_pos_ids_list) {
|
|
27395
|
+
const seg_len = val.length / 3;
|
|
27396
|
+
for (let z2 = x * seg_len; z2 < (x + 1) * seg_len; ++z2) {
|
|
27397
|
+
llm_positions[index++] = val[z2];
|
|
27398
|
+
}
|
|
27399
|
+
}
|
|
27400
|
+
}
|
|
27401
|
+
let count2 = 0;
|
|
27402
|
+
for (let y = 0; y < attn_mask.length; ++y) {
|
|
27403
|
+
if (attn_mask[y] == 1) {
|
|
27404
|
+
for (let x = 0; x < 3; ++x) {
|
|
27405
|
+
position_ids_list[x][batch_idx][y] = llm_positions[x * total_len / 3 + count2];
|
|
27406
|
+
}
|
|
27407
|
+
++count2;
|
|
27408
|
+
}
|
|
27409
|
+
}
|
|
27410
|
+
return llm_positions;
|
|
27411
|
+
}
|
|
27412
|
+
/**
|
|
27413
|
+
* Build per-batch position ID segments for multimodal rope.
|
|
27414
|
+
* Override this in subclasses to change how vision/text segments are identified and positioned.
|
|
27415
|
+
* @param {object} params
|
|
27416
|
+
* @param {any[]} params.filtered_ids - attention-masked token IDs for this batch element
|
|
27417
|
+
* @param {any[][]} params.image_grid_thw_list - all image grid dimensions
|
|
27418
|
+
* @param {any[][]} params.video_grid_thw_list - all video grid dimensions
|
|
27419
|
+
* @param {number} params.spatial_merge_size
|
|
27420
|
+
* @param {{image_index: number, video_index: number}} params.state - mutable counters shared across batches
|
|
27421
|
+
* @returns {number[][]} llm_pos_ids_list - segments of [t..., h..., w...] positions
|
|
27422
|
+
*/
|
|
27423
|
+
_get_multimodal_rope_positions({
|
|
27424
|
+
filtered_ids,
|
|
27425
|
+
image_grid_thw_list,
|
|
27426
|
+
video_grid_thw_list,
|
|
27427
|
+
spatial_merge_size,
|
|
27428
|
+
state
|
|
27429
|
+
}) {
|
|
27430
|
+
const { image_token_id, video_token_id, vision_start_token_id } = this.config;
|
|
27431
|
+
const ids = filtered_ids;
|
|
27432
|
+
const vision_start_indices = ids.reduce((acc, x, idx) => {
|
|
27433
|
+
if (x == vision_start_token_id) acc.push(idx);
|
|
27434
|
+
return acc;
|
|
27435
|
+
}, []);
|
|
27436
|
+
const vision_tokens = vision_start_indices.map((x) => ids[x + 1]);
|
|
27437
|
+
const image_nums = vision_tokens.filter((x) => x == image_token_id).length;
|
|
27438
|
+
const video_nums = vision_tokens.filter((x) => x == video_token_id).length;
|
|
27439
|
+
const llm_pos_ids_list = [];
|
|
27440
|
+
let st2 = 0;
|
|
27441
|
+
let remain_images = image_nums;
|
|
27442
|
+
let remain_videos = video_nums;
|
|
27443
|
+
for (let j = 0; j < vision_tokens.length; ++j) {
|
|
27444
|
+
const next_image_token = ids.findIndex((x, i) => i > st2 && x == image_token_id);
|
|
27445
|
+
const next_video_token = ids.findIndex((x, i) => i > st2 && x == video_token_id);
|
|
27446
|
+
const ed_image = remain_images > 0 && next_image_token !== -1 ? next_image_token : ids.length + 1;
|
|
27447
|
+
const ed_video = remain_videos > 0 && next_video_token !== -1 ? next_video_token : ids.length + 1;
|
|
27448
|
+
let ed;
|
|
27449
|
+
let t, h, w;
|
|
27450
|
+
if (ed_image < ed_video) {
|
|
27451
|
+
[t, h, w] = image_grid_thw_list[state.image_index];
|
|
27452
|
+
++state.image_index;
|
|
27453
|
+
--remain_images;
|
|
27454
|
+
ed = ed_image;
|
|
27455
|
+
} else {
|
|
27456
|
+
[t, h, w] = video_grid_thw_list[state.video_index];
|
|
27457
|
+
++state.video_index;
|
|
27458
|
+
--remain_videos;
|
|
27459
|
+
ed = ed_video;
|
|
27460
|
+
}
|
|
27461
|
+
const [llm_grid_t, llm_grid_h, llm_grid_w] = [
|
|
27462
|
+
Number(t),
|
|
27463
|
+
Math.floor(Number(h) / spatial_merge_size),
|
|
27464
|
+
Math.floor(Number(w) / spatial_merge_size)
|
|
27465
|
+
];
|
|
27466
|
+
const text_len = ed - st2;
|
|
27467
|
+
const st_idx = llm_pos_ids_list.length > 0 ? max(llm_pos_ids_list.at(-1))[0] + 1 : 0;
|
|
27468
|
+
llm_pos_ids_list.push(Array.from({ length: 3 * text_len }, (_, i) => st_idx + i % text_len));
|
|
27469
|
+
const offset = text_len + st_idx;
|
|
27470
|
+
const grid_size = llm_grid_t * llm_grid_h * llm_grid_w;
|
|
27471
|
+
const t_index = Array.from(
|
|
27472
|
+
{ length: grid_size },
|
|
27473
|
+
(_, i) => offset + Math.floor(i / (llm_grid_h * llm_grid_w))
|
|
27474
|
+
);
|
|
27475
|
+
const h_index = Array.from(
|
|
27476
|
+
{ length: grid_size },
|
|
27477
|
+
(_, i) => offset + Math.floor(i / llm_grid_w) % llm_grid_h
|
|
27478
|
+
);
|
|
27479
|
+
const w_index = Array.from({ length: grid_size }, (_, i) => offset + i % llm_grid_w);
|
|
27480
|
+
llm_pos_ids_list.push([t_index, h_index, w_index].flat());
|
|
27481
|
+
st2 = ed + grid_size;
|
|
27482
|
+
}
|
|
27483
|
+
if (st2 < ids.length) {
|
|
27484
|
+
const st_idx = llm_pos_ids_list.length > 0 ? max(llm_pos_ids_list.at(-1))[0] + 1 : 0;
|
|
27485
|
+
const text_len = ids.length - st2;
|
|
27486
|
+
llm_pos_ids_list.push(Array.from({ length: 3 * text_len }, (_, i) => st_idx + i % text_len));
|
|
27487
|
+
}
|
|
27488
|
+
return llm_pos_ids_list;
|
|
27489
|
+
}
|
|
27490
|
+
/**
|
|
27491
|
+
* Calculate the 3D rope index based on image and video's temporal, height and width in LLM.
|
|
27492
|
+
*
|
|
27493
|
+
* Explanation:
|
|
27494
|
+
* Each embedding sequence contains vision embedding and text embedding or just contains text embedding.
|
|
27495
|
+
*
|
|
27496
|
+
* For pure text embedding sequence, the rotary position embedding has no difference with mordern LLMs.
|
|
27497
|
+
* Examples:
|
|
27498
|
+
* input_ids: [T T T T T], here T is for text.
|
|
27499
|
+
* temporal position_ids: [0, 1, 2, 3, 4]
|
|
27500
|
+
* height position_ids: [0, 1, 2, 3, 4]
|
|
27501
|
+
* width position_ids: [0, 1, 2, 3, 4]
|
|
27502
|
+
*
|
|
27503
|
+
* For vision and text embedding sequence, we calculate 3D rotary position embedding for vision part
|
|
27504
|
+
* and 1D rotary position embeddin for text part.
|
|
27505
|
+
* Examples:
|
|
27506
|
+
* Assume we have a video input with 3 temporal patches, 2 height patches and 2 width patches.
|
|
27507
|
+
* input_ids: [V V V V V V V V V V V V T T T T T], here V is for vision.
|
|
27508
|
+
* vision temporal position_ids: [0, 0, 0, 0, 1, 1, 1, 1, 2, 2, 2, 2]
|
|
27509
|
+
* vision height position_ids: [0, 0, 1, 1, 0, 0, 1, 1, 0, 0, 1, 1]
|
|
27510
|
+
* vision width position_ids: [0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1]
|
|
27511
|
+
* text temporal position_ids: [3, 4, 5, 6, 7]
|
|
27512
|
+
* text height position_ids: [3, 4, 5, 6, 7]
|
|
27513
|
+
* text width position_ids: [3, 4, 5, 6, 7]
|
|
27514
|
+
* Here we calculate the text start position_ids as the max vision position_ids plus 1.
|
|
27515
|
+
*
|
|
27516
|
+
* @param {Tensor} input_ids Indices of input sequence tokens in the vocabulary. Tensor of shape `(batch_size, sequence_length)`.
|
|
27517
|
+
* @param {Tensor} image_grid_thw (Optional) The temporal, height and width of feature shape of each image in LLM. Tensor of shape `(num_images, 3)`.
|
|
27518
|
+
* @param {Tensor} video_grid_thw (Optional) The temporal, height and width of feature shape of each video in LLM. Tensor of shape `(num_videos, 3)`.
|
|
27519
|
+
* @param {Tensor} attention_mask (Optional) Mask to avoid performing attention on padding token indices. Tensor of shape `(batch_size, sequence_length)`.
|
|
27520
|
+
* @returns {[Tensor, Tensor]} [position_ids, mrope_position_deltas]
|
|
27521
|
+
*/
|
|
27522
|
+
get_rope_index(input_ids, image_grid_thw, video_grid_thw, attention_mask) {
|
|
27523
|
+
const { vision_config } = this.config;
|
|
27524
|
+
const spatial_merge_size = vision_config.spatial_merge_size ?? 2;
|
|
27525
|
+
if (image_grid_thw || video_grid_thw) {
|
|
27526
|
+
const total_input_ids = input_ids.tolist();
|
|
27527
|
+
if (!attention_mask) {
|
|
27528
|
+
attention_mask = ones_like(input_ids);
|
|
27529
|
+
}
|
|
27530
|
+
const attention_mask_list = attention_mask.tolist();
|
|
27531
|
+
const position_ids_list = Array.from(
|
|
27532
|
+
{ length: 3 },
|
|
27533
|
+
() => Array.from({ length: input_ids.dims[0] }, () => Array.from({ length: input_ids.dims[1] }, () => 0))
|
|
27534
|
+
);
|
|
27535
|
+
const image_grid_thw_list = image_grid_thw ? image_grid_thw.tolist() : [];
|
|
27536
|
+
const video_grid_thw_list = video_grid_thw ? video_grid_thw.tolist() : [];
|
|
27537
|
+
const state = { image_index: 0, video_index: 0 };
|
|
27538
|
+
const mrope_position_deltas = [];
|
|
27539
|
+
for (let i = 0; i < total_input_ids.length; ++i) {
|
|
27540
|
+
const filtered_ids = total_input_ids[i].filter((_, j) => attention_mask_list[i][j] == 1);
|
|
27541
|
+
const llm_pos_ids_list = this._get_multimodal_rope_positions({
|
|
27542
|
+
filtered_ids,
|
|
27543
|
+
image_grid_thw_list,
|
|
27544
|
+
video_grid_thw_list,
|
|
27545
|
+
spatial_merge_size,
|
|
27546
|
+
state
|
|
27547
|
+
});
|
|
27548
|
+
const llm_positions = this._reorder_and_write_positions(
|
|
27549
|
+
llm_pos_ids_list,
|
|
27550
|
+
attention_mask_list[i],
|
|
27551
|
+
position_ids_list,
|
|
27552
|
+
i
|
|
27553
|
+
);
|
|
27554
|
+
mrope_position_deltas.push(max(llm_positions)[0] + 1 - total_input_ids[i].length);
|
|
27555
|
+
}
|
|
27556
|
+
return [
|
|
27557
|
+
new Tensor3("int64", position_ids_list.flat(Infinity), [3, input_ids.dims[0], input_ids.dims[1]]),
|
|
27558
|
+
new Tensor3("int64", mrope_position_deltas, [mrope_position_deltas.length, 1])
|
|
27559
|
+
];
|
|
27560
|
+
} else {
|
|
27561
|
+
return this._get_text_only_rope_index(input_ids, attention_mask);
|
|
27562
|
+
}
|
|
27563
|
+
}
|
|
27564
|
+
async encode_image({ pixel_values, image_grid_thw }) {
|
|
27565
|
+
const features = (await sessionRun(this.sessions["vision_encoder"], {
|
|
27566
|
+
pixel_values,
|
|
27567
|
+
[this.image_grid_thw_name]: image_grid_thw
|
|
27568
|
+
})).image_features;
|
|
27569
|
+
return features;
|
|
27570
|
+
}
|
|
27571
|
+
_merge_input_ids_with_image_features(kwargs) {
|
|
27572
|
+
return default_merge_input_ids_with_image_features({
|
|
27573
|
+
// @ts-ignore
|
|
27574
|
+
image_token_id: this.config.image_token_id,
|
|
27575
|
+
...kwargs
|
|
27576
|
+
});
|
|
27577
|
+
}
|
|
27578
|
+
prepare_inputs_for_generation(input_ids, model_inputs, generation_config) {
|
|
27579
|
+
if (!model_inputs.attention_mask || model_inputs.position_ids) {
|
|
27580
|
+
return model_inputs;
|
|
27581
|
+
}
|
|
27582
|
+
const session = this.sessions["decoder_model_merged"] ?? this.sessions["model"];
|
|
27583
|
+
if (!session.inputNames.includes("position_ids")) {
|
|
27584
|
+
return model_inputs;
|
|
27585
|
+
}
|
|
27586
|
+
if (!model_inputs.past_key_values) {
|
|
27587
|
+
[model_inputs.position_ids, model_inputs.rope_deltas] = this.get_rope_index(
|
|
27588
|
+
model_inputs.input_ids,
|
|
27589
|
+
model_inputs.image_grid_thw,
|
|
27590
|
+
model_inputs.video_grid_thw,
|
|
27591
|
+
model_inputs.attention_mask
|
|
27592
|
+
);
|
|
27593
|
+
} else {
|
|
27594
|
+
model_inputs.pixel_values = null;
|
|
27595
|
+
const past_length = model_inputs.past_key_values.get_seq_length();
|
|
27596
|
+
if (past_length < model_inputs.input_ids.dims[1]) {
|
|
27597
|
+
const [full_position_ids, rope_deltas] = this.get_rope_index(
|
|
27598
|
+
model_inputs.input_ids,
|
|
27599
|
+
model_inputs.image_grid_thw,
|
|
27600
|
+
model_inputs.video_grid_thw,
|
|
27601
|
+
model_inputs.attention_mask
|
|
27602
|
+
);
|
|
27603
|
+
model_inputs.rope_deltas = rope_deltas;
|
|
27604
|
+
model_inputs.position_ids = full_position_ids.slice(null, null, [past_length, null]);
|
|
27605
|
+
model_inputs.input_ids = model_inputs.input_ids.slice(null, [past_length, null]);
|
|
27606
|
+
} else {
|
|
27607
|
+
if (!model_inputs.rope_deltas) {
|
|
27608
|
+
[, model_inputs.rope_deltas] = this.get_rope_index(
|
|
27609
|
+
model_inputs.input_ids,
|
|
27610
|
+
model_inputs.image_grid_thw,
|
|
27611
|
+
model_inputs.video_grid_thw,
|
|
27612
|
+
model_inputs.attention_mask
|
|
27613
|
+
);
|
|
27614
|
+
}
|
|
27615
|
+
const delta = BigInt(past_length);
|
|
27616
|
+
const rope_deltas_list = model_inputs.rope_deltas.map((x) => delta + x);
|
|
27617
|
+
model_inputs.position_ids = stack([rope_deltas_list, rope_deltas_list, rope_deltas_list], 0);
|
|
27618
|
+
}
|
|
27619
|
+
}
|
|
27620
|
+
return model_inputs;
|
|
27621
|
+
}
|
|
27622
|
+
};
|
|
27623
|
+
var Qwen2VLForCausalLM = class extends Qwen2VLForConditionalGeneration {
|
|
27624
|
+
};
|
|
27625
|
+
|
|
27626
|
+
// src/models/qwen2_5_vl/modeling_qwen2_5_vl.js
|
|
27627
|
+
var Qwen2_5_VLForConditionalGeneration = class extends Qwen2VLForConditionalGeneration {
|
|
27628
|
+
image_grid_thw_name = "image_grid_thw";
|
|
27629
|
+
};
|
|
27630
|
+
var Qwen2_5_VLForCausalLM = class extends Qwen2VLForCausalLM {
|
|
27631
|
+
image_grid_thw_name = "image_grid_thw";
|
|
27632
|
+
};
|
|
27633
|
+
|
|
27634
|
+
// src/models/glm_ocr/modeling_glm_ocr.js
|
|
27635
|
+
var GlmOcrForConditionalGeneration = class extends Qwen2_5_VLForConditionalGeneration {
|
|
27636
|
+
/**
|
|
27637
|
+
* Compute 3D positional indices for vision tokens.
|
|
27638
|
+
* Temporal is constant, height is repeat-interleaved, width tiles.
|
|
27639
|
+
* @param {number} start_position
|
|
27640
|
+
* @param {number[]} grid_thw [T, H, W]
|
|
27641
|
+
* @param {number} temp_merge_size
|
|
27642
|
+
* @param {number} spatial_merge_size
|
|
27643
|
+
* @returns {number[]} Flat array of length 3 * seq_len: [temporal..., height..., width...]
|
|
27644
|
+
*/
|
|
27645
|
+
get_vision_position_ids(start_position, grid_thw, temp_merge_size, spatial_merge_size) {
|
|
27646
|
+
const llm_grid_t = Math.floor(grid_thw[0] / temp_merge_size);
|
|
27647
|
+
const llm_grid_h = Math.floor(grid_thw[1] / spatial_merge_size);
|
|
27648
|
+
const llm_grid_w = Math.floor(grid_thw[2] / spatial_merge_size);
|
|
27649
|
+
const seq_len = llm_grid_h * llm_grid_w * llm_grid_t;
|
|
27650
|
+
const t_pos = Array.from({ length: seq_len }, () => start_position);
|
|
27651
|
+
const h_pos = Array.from(
|
|
27652
|
+
{ length: seq_len },
|
|
27653
|
+
(_, i) => start_position + Math.floor(i / (llm_grid_w * llm_grid_t))
|
|
27654
|
+
);
|
|
27655
|
+
const w_pos = Array.from({ length: seq_len }, (_, i) => start_position + i % llm_grid_w);
|
|
27656
|
+
return [...t_pos, ...h_pos, ...w_pos];
|
|
27657
|
+
}
|
|
27658
|
+
/**
|
|
27659
|
+
* GlmOcr uses mm_token_type_ids-style grouping (image tokens identified by image_token_id)
|
|
27660
|
+
* instead of vision_start_token_id scanning used by Qwen2VL.
|
|
27661
|
+
* After a vision segment, position advances by max(h, w) / spatial_merge_size.
|
|
27662
|
+
*/
|
|
27663
|
+
_get_multimodal_rope_positions({
|
|
27664
|
+
filtered_ids,
|
|
27665
|
+
image_grid_thw_list,
|
|
27666
|
+
video_grid_thw_list,
|
|
27667
|
+
spatial_merge_size,
|
|
27668
|
+
state
|
|
27669
|
+
}) {
|
|
27670
|
+
const { image_token_id } = this.config;
|
|
27671
|
+
const groups = [];
|
|
27672
|
+
let group_start = 0;
|
|
27673
|
+
let current_type = filtered_ids[0] == image_token_id ? 1 : 0;
|
|
27674
|
+
for (let j = 1; j <= filtered_ids.length; ++j) {
|
|
27675
|
+
const t = j < filtered_ids.length ? filtered_ids[j] == image_token_id ? 1 : 0 : -1;
|
|
27676
|
+
if (t !== current_type) {
|
|
27677
|
+
groups.push([current_type, group_start, j]);
|
|
27678
|
+
group_start = j;
|
|
27679
|
+
current_type = t;
|
|
27680
|
+
}
|
|
27681
|
+
}
|
|
27682
|
+
let current_pos = 0;
|
|
27683
|
+
const llm_pos_ids_list = [];
|
|
27684
|
+
for (const [modality_type, start_idx, end_idx] of groups) {
|
|
27685
|
+
if (modality_type === 0) {
|
|
27686
|
+
const text_len = end_idx - start_idx;
|
|
27687
|
+
llm_pos_ids_list.push(Array.from({ length: 3 * text_len }, (_, i) => current_pos + i % text_len));
|
|
27688
|
+
current_pos += text_len;
|
|
27689
|
+
} else {
|
|
27690
|
+
const grid_thw = image_grid_thw_list[state.image_index++].map(Number);
|
|
27691
|
+
const temp_merge_size = grid_thw[0];
|
|
27692
|
+
llm_pos_ids_list.push(
|
|
27693
|
+
this.get_vision_position_ids(current_pos, grid_thw, temp_merge_size, spatial_merge_size)
|
|
27694
|
+
);
|
|
27695
|
+
current_pos += Math.max(grid_thw[1], grid_thw[2]) / spatial_merge_size;
|
|
27696
|
+
}
|
|
27697
|
+
}
|
|
27698
|
+
return llm_pos_ids_list;
|
|
27699
|
+
}
|
|
27700
|
+
};
|
|
27701
|
+
|
|
27137
27702
|
// src/models/glpn/modeling_glpn.js
|
|
27138
27703
|
var GLPNPreTrainedModel = class extends PreTrainedModel {
|
|
27139
27704
|
};
|
|
@@ -27332,27 +27897,6 @@ var HunYuanDenseV1Model = class extends HunYuanDenseV1PreTrainedModel {
|
|
|
27332
27897
|
var HunYuanDenseV1ForCausalLM = class extends HunYuanDenseV1PreTrainedModel {
|
|
27333
27898
|
};
|
|
27334
27899
|
|
|
27335
|
-
// src/models/llava/modeling_llava.js
|
|
27336
|
-
var LlavaPreTrainedModel = class extends PreTrainedModel {
|
|
27337
|
-
forward_params = ["input_ids", "attention_mask", "pixel_values", "position_ids", "past_key_values"];
|
|
27338
|
-
};
|
|
27339
|
-
var LlavaForConditionalGeneration = class extends LlavaPreTrainedModel {
|
|
27340
|
-
_merge_input_ids_with_image_features(kwargs) {
|
|
27341
|
-
const vision_hidden_size = kwargs.image_features.dims.at(-1);
|
|
27342
|
-
const reshaped_image_hidden_states = kwargs.image_features.view(-1, vision_hidden_size);
|
|
27343
|
-
return default_merge_input_ids_with_image_features({
|
|
27344
|
-
// @ts-ignore
|
|
27345
|
-
image_token_id: this.config.image_token_index ?? this.config.image_token_id,
|
|
27346
|
-
...kwargs,
|
|
27347
|
-
image_features: reshaped_image_hidden_states
|
|
27348
|
-
});
|
|
27349
|
-
}
|
|
27350
|
-
};
|
|
27351
|
-
var Moondream1ForConditionalGeneration = class extends LlavaForConditionalGeneration {
|
|
27352
|
-
};
|
|
27353
|
-
var LlavaQwen2ForCausalLM = class extends LlavaForConditionalGeneration {
|
|
27354
|
-
};
|
|
27355
|
-
|
|
27356
27900
|
// src/models/idefics3/modeling_idefics3.js
|
|
27357
27901
|
var Idefics3ForConditionalGeneration = class extends LlavaForConditionalGeneration {
|
|
27358
27902
|
forward_params = [
|
|
@@ -27446,6 +27990,10 @@ var Lfm2Model = class extends Lfm2PreTrainedModel {
|
|
|
27446
27990
|
var Lfm2ForCausalLM = class extends Lfm2PreTrainedModel {
|
|
27447
27991
|
};
|
|
27448
27992
|
|
|
27993
|
+
// src/models/lighton_ocr/modeling_lighton_ocr.js
|
|
27994
|
+
var LightOnOcrForConditionalGeneration = class extends LlavaForConditionalGeneration {
|
|
27995
|
+
};
|
|
27996
|
+
|
|
27449
27997
|
// src/models/lfm2_moe/modeling_lfm2_moe.js
|
|
27450
27998
|
var Lfm2MoePreTrainedModel = class extends PreTrainedModel {
|
|
27451
27999
|
};
|
|
@@ -27642,6 +28190,14 @@ var MistralModel = class extends MistralPreTrainedModel {
|
|
|
27642
28190
|
var MistralForCausalLM = class extends MistralPreTrainedModel {
|
|
27643
28191
|
};
|
|
27644
28192
|
|
|
28193
|
+
// src/models/mistral4/modeling_mistral4.js
|
|
28194
|
+
var Mistral4PreTrainedModel = class extends PreTrainedModel {
|
|
28195
|
+
};
|
|
28196
|
+
var Mistral4Model = class extends Mistral4PreTrainedModel {
|
|
28197
|
+
};
|
|
28198
|
+
var Mistral4ForCausalLM = class extends Mistral4PreTrainedModel {
|
|
28199
|
+
};
|
|
28200
|
+
|
|
27645
28201
|
// src/models/mobilebert/modeling_mobilebert.js
|
|
27646
28202
|
var MobileBertPreTrainedModel = class extends PreTrainedModel {
|
|
27647
28203
|
};
|
|
@@ -28110,6 +28666,14 @@ var NanoChatModel = class extends NanoChatPreTrainedModel {
|
|
|
28110
28666
|
var NanoChatForCausalLM = class extends NanoChatPreTrainedModel {
|
|
28111
28667
|
};
|
|
28112
28668
|
|
|
28669
|
+
// src/models/nemotron_h/modeling_nemotron_h.js
|
|
28670
|
+
var NemotronHPreTrainedModel = class extends PreTrainedModel {
|
|
28671
|
+
};
|
|
28672
|
+
var NemotronHModel = class extends NemotronHPreTrainedModel {
|
|
28673
|
+
};
|
|
28674
|
+
var NemotronHForCausalLM = class extends NemotronHPreTrainedModel {
|
|
28675
|
+
};
|
|
28676
|
+
|
|
28113
28677
|
// src/models/neobert/modeling_neobert.js
|
|
28114
28678
|
var NeoBertPreTrainedModel = class extends PreTrainedModel {
|
|
28115
28679
|
};
|
|
@@ -28390,252 +28954,6 @@ var Qwen2MoeModel = class extends Qwen2MoePreTrainedModel {
|
|
|
28390
28954
|
var Qwen2MoeForCausalLM = class extends Qwen2MoePreTrainedModel {
|
|
28391
28955
|
};
|
|
28392
28956
|
|
|
28393
|
-
// src/models/qwen2_vl/modeling_qwen2_vl.js
|
|
28394
|
-
var Qwen2VLPreTrainedModel = class extends PreTrainedModel {
|
|
28395
|
-
forward_params = [
|
|
28396
|
-
// Text inputs
|
|
28397
|
-
"input_ids",
|
|
28398
|
-
"attention_mask",
|
|
28399
|
-
"position_ids",
|
|
28400
|
-
"past_key_values",
|
|
28401
|
-
// Vision inputs
|
|
28402
|
-
"pixel_values",
|
|
28403
|
-
"image_grid_thw"
|
|
28404
|
-
];
|
|
28405
|
-
};
|
|
28406
|
-
var Qwen2VLForConditionalGeneration = class extends Qwen2VLPreTrainedModel {
|
|
28407
|
-
// NOTE: This is used as the base class for all Qwen VL models and their CausalLM variants.
|
|
28408
|
-
// CausalLM variants (e.g., Qwen2VLForCausalLM) extend this class but load only
|
|
28409
|
-
// embed_tokens + decoder_model_merged (no vision_encoder) via MultimodalLanguageModelOnly type.
|
|
28410
|
-
image_grid_thw_name = "grid_thw";
|
|
28411
|
-
/**
|
|
28412
|
-
* Calculate the 3D rope index based on image and video's temporal, height and width in LLM.
|
|
28413
|
-
*
|
|
28414
|
-
* Explanation:
|
|
28415
|
-
* Each embedding sequence contains vision embedding and text embedding or just contains text embedding.
|
|
28416
|
-
*
|
|
28417
|
-
* For pure text embedding sequence, the rotary position embedding has no difference with mordern LLMs.
|
|
28418
|
-
* Examples:
|
|
28419
|
-
* input_ids: [T T T T T], here T is for text.
|
|
28420
|
-
* temporal position_ids: [0, 1, 2, 3, 4]
|
|
28421
|
-
* height position_ids: [0, 1, 2, 3, 4]
|
|
28422
|
-
* width position_ids: [0, 1, 2, 3, 4]
|
|
28423
|
-
*
|
|
28424
|
-
* For vision and text embedding sequence, we calculate 3D rotary position embedding for vision part
|
|
28425
|
-
* and 1D rotary position embeddin for text part.
|
|
28426
|
-
* Examples:
|
|
28427
|
-
* Assume we have a video input with 3 temporal patches, 2 height patches and 2 width patches.
|
|
28428
|
-
* input_ids: [V V V V V V V V V V V V T T T T T], here V is for vision.
|
|
28429
|
-
* vision temporal position_ids: [0, 0, 0, 0, 1, 1, 1, 1, 2, 2, 2, 2]
|
|
28430
|
-
* vision height position_ids: [0, 0, 1, 1, 0, 0, 1, 1, 0, 0, 1, 1]
|
|
28431
|
-
* vision width position_ids: [0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1]
|
|
28432
|
-
* text temporal position_ids: [3, 4, 5, 6, 7]
|
|
28433
|
-
* text height position_ids: [3, 4, 5, 6, 7]
|
|
28434
|
-
* text width position_ids: [3, 4, 5, 6, 7]
|
|
28435
|
-
* Here we calculate the text start position_ids as the max vision position_ids plus 1.
|
|
28436
|
-
*
|
|
28437
|
-
* @param {Tensor} input_ids Indices of input sequence tokens in the vocabulary. Tensor of shape `(batch_size, sequence_length)`.
|
|
28438
|
-
* @param {Tensor} image_grid_thw (Optional) The temporal, height and width of feature shape of each image in LLM. Tensor of shape `(num_images, 3)`.
|
|
28439
|
-
* @param {Tensor} video_grid_thw (Optional) The temporal, height and width of feature shape of each video in LLM. Tensor of shape `(num_videos, 3)`.
|
|
28440
|
-
* @param {Tensor} attention_mask (Optional) Mask to avoid performing attention on padding token indices. Tensor of shape `(batch_size, sequence_length)`. Mask values selected in `[0, 1]`:
|
|
28441
|
-
* - 1 for tokens that are **not masked**,
|
|
28442
|
-
* - 0 for tokens that are **masked**.
|
|
28443
|
-
* @returns {[Tensor, Tensor]} [position_ids, mrope_position_deltas] with:
|
|
28444
|
-
* - position_ids: Tensor of shape `(3, batch_size, sequence_length)`.
|
|
28445
|
-
* - mrope_position_deltas: Tensor of shape `(batch_size)`.
|
|
28446
|
-
*/
|
|
28447
|
-
get_rope_index(input_ids, image_grid_thw, video_grid_thw, attention_mask) {
|
|
28448
|
-
const { vision_config, image_token_id, video_token_id, vision_start_token_id } = this.config;
|
|
28449
|
-
const spatial_merge_size = vision_config.spatial_merge_size ?? 2;
|
|
28450
|
-
const mrope_position_deltas = [];
|
|
28451
|
-
if (image_grid_thw || video_grid_thw) {
|
|
28452
|
-
let total_input_ids = input_ids.tolist();
|
|
28453
|
-
if (!attention_mask) {
|
|
28454
|
-
attention_mask = ones_like(input_ids);
|
|
28455
|
-
}
|
|
28456
|
-
const attention_mask_list = attention_mask.tolist();
|
|
28457
|
-
const position_ids_list = Array.from(
|
|
28458
|
-
{ length: 3 },
|
|
28459
|
-
(_) => Array.from({ length: input_ids.dims[0] }, (_2) => Array.from({ length: input_ids.dims[1] }, (_3) => 1))
|
|
28460
|
-
);
|
|
28461
|
-
const image_grid_thw_list = image_grid_thw ? image_grid_thw.tolist() : [];
|
|
28462
|
-
const video_grid_thw_list = video_grid_thw ? video_grid_thw.tolist() : [];
|
|
28463
|
-
let image_index = 0;
|
|
28464
|
-
let video_index = 0;
|
|
28465
|
-
for (let i = 0; i < total_input_ids.length; ++i) {
|
|
28466
|
-
const ids = total_input_ids[i].filter((_, j) => attention_mask_list[i][j] == 1);
|
|
28467
|
-
const vision_start_indices = ids.reduce((acc, x, idx) => {
|
|
28468
|
-
if (x == vision_start_token_id) acc.push(idx);
|
|
28469
|
-
return acc;
|
|
28470
|
-
}, []);
|
|
28471
|
-
const vision_tokens = vision_start_indices.map((x) => ids[x + 1]);
|
|
28472
|
-
const image_nums = vision_tokens.filter((x) => x == image_token_id).length;
|
|
28473
|
-
const video_nums = vision_tokens.filter((x) => x == video_token_id).length;
|
|
28474
|
-
let llm_pos_ids_list = [];
|
|
28475
|
-
let st2 = 0;
|
|
28476
|
-
let remain_images = image_nums;
|
|
28477
|
-
let remain_videos = video_nums;
|
|
28478
|
-
for (let j = 0; j < vision_tokens.length; ++j) {
|
|
28479
|
-
const next_image_token = ids.findIndex((x, i2) => i2 > st2 && x == image_token_id);
|
|
28480
|
-
const next_video_token = ids.findIndex((x, i2) => i2 > st2 && x == video_token_id);
|
|
28481
|
-
const ed_image = remain_images > 0 && next_image_token !== -1 ? next_image_token : ids.length + 1;
|
|
28482
|
-
const ed_video = remain_videos > 0 && next_video_token !== -1 ? next_video_token : ids.length + 1;
|
|
28483
|
-
let ed;
|
|
28484
|
-
let t, h, w;
|
|
28485
|
-
if (ed_image < ed_video) {
|
|
28486
|
-
[t, h, w] = image_grid_thw_list[image_index];
|
|
28487
|
-
++image_index;
|
|
28488
|
-
--remain_images;
|
|
28489
|
-
ed = ed_image;
|
|
28490
|
-
} else {
|
|
28491
|
-
[t, h, w] = video_grid_thw_list[video_index];
|
|
28492
|
-
++video_index;
|
|
28493
|
-
--remain_videos;
|
|
28494
|
-
ed = ed_video;
|
|
28495
|
-
}
|
|
28496
|
-
const [llm_grid_t, llm_grid_h, llm_grid_w] = [
|
|
28497
|
-
Number(t),
|
|
28498
|
-
Math.floor(Number(h) / spatial_merge_size),
|
|
28499
|
-
Math.floor(Number(w) / spatial_merge_size)
|
|
28500
|
-
];
|
|
28501
|
-
const text_len = ed - st2;
|
|
28502
|
-
const st_idx = llm_pos_ids_list.length > 0 ? max(llm_pos_ids_list.at(-1))[0] + 1 : 0;
|
|
28503
|
-
llm_pos_ids_list.push(Array.from({ length: 3 * text_len }, (_, i2) => st_idx + i2 % text_len));
|
|
28504
|
-
const offset = text_len + st_idx;
|
|
28505
|
-
const grid_size = llm_grid_t * llm_grid_h * llm_grid_w;
|
|
28506
|
-
const t_index = Array.from(
|
|
28507
|
-
{ length: grid_size },
|
|
28508
|
-
(_, i2) => offset + Math.floor(i2 / (llm_grid_h * llm_grid_w))
|
|
28509
|
-
);
|
|
28510
|
-
const h_index = Array.from(
|
|
28511
|
-
{ length: grid_size },
|
|
28512
|
-
(_, i2) => offset + Math.floor(i2 / llm_grid_w) % llm_grid_h
|
|
28513
|
-
);
|
|
28514
|
-
const w_index = Array.from({ length: grid_size }, (_, i2) => offset + i2 % llm_grid_w);
|
|
28515
|
-
llm_pos_ids_list.push([t_index, h_index, w_index].flat());
|
|
28516
|
-
st2 = ed + grid_size;
|
|
28517
|
-
}
|
|
28518
|
-
if (st2 < ids.length) {
|
|
28519
|
-
const st_idx = llm_pos_ids_list.length > 0 ? max(llm_pos_ids_list.at(-1))[0] + 1 : 0;
|
|
28520
|
-
const text_len = ids.length - st2;
|
|
28521
|
-
llm_pos_ids_list.push(Array.from({ length: 3 * text_len }, (_, i2) => st_idx + i2 % text_len));
|
|
28522
|
-
}
|
|
28523
|
-
const num_items = llm_pos_ids_list.reduce((acc, x) => acc + x.length, 0);
|
|
28524
|
-
const llm_positions = new Array(num_items);
|
|
28525
|
-
let index = 0;
|
|
28526
|
-
for (let x = 0; x < 3; ++x) {
|
|
28527
|
-
for (let y = 0; y < llm_pos_ids_list.length; ++y) {
|
|
28528
|
-
const val = llm_pos_ids_list[y];
|
|
28529
|
-
const text_len = val.length / 3;
|
|
28530
|
-
for (let z = x * text_len; z < (x + 1) * text_len; ++z) {
|
|
28531
|
-
llm_positions[index++] = val[z];
|
|
28532
|
-
}
|
|
28533
|
-
}
|
|
28534
|
-
}
|
|
28535
|
-
let count2 = 0;
|
|
28536
|
-
const attn_mask = attention_mask_list[i];
|
|
28537
|
-
for (let y = 0; y < attn_mask.length; ++y) {
|
|
28538
|
-
if (attn_mask[y] == 1) {
|
|
28539
|
-
for (let x = 0; x < 3; ++x) {
|
|
28540
|
-
position_ids_list[x][i][y] = llm_positions[x * num_items / 3 + count2];
|
|
28541
|
-
}
|
|
28542
|
-
++count2;
|
|
28543
|
-
}
|
|
28544
|
-
}
|
|
28545
|
-
const max_llm_positions = max(llm_positions)[0];
|
|
28546
|
-
mrope_position_deltas.push(max_llm_positions + 1 - total_input_ids[i].length);
|
|
28547
|
-
}
|
|
28548
|
-
return [
|
|
28549
|
-
new Tensor3("int64", position_ids_list.flat(Infinity), [3, input_ids.dims[0], input_ids.dims[1]]),
|
|
28550
|
-
new Tensor3("int64", mrope_position_deltas, [mrope_position_deltas.length, 1])
|
|
28551
|
-
];
|
|
28552
|
-
} else {
|
|
28553
|
-
if (attention_mask) {
|
|
28554
|
-
const { data, dims } = cumsum_masked_fill(attention_mask);
|
|
28555
|
-
const position_ids = BigInt64Array.from({ length: 3 * data.length }, (_, i) => data[i % data.length]);
|
|
28556
|
-
const mrope_position_deltas2 = Array.from(
|
|
28557
|
-
{ length: dims[0] },
|
|
28558
|
-
(_, i) => max(data.subarray(dims[1] * i, dims[1] * (i + 1)))[0] + 1n + BigInt(dims[1])
|
|
28559
|
-
);
|
|
28560
|
-
return [
|
|
28561
|
-
new Tensor3("int64", position_ids, [3, ...dims]),
|
|
28562
|
-
new Tensor3("int64", mrope_position_deltas2, [mrope_position_deltas2.length, 1])
|
|
28563
|
-
];
|
|
28564
|
-
} else {
|
|
28565
|
-
const [batch_size, seq_length] = input_ids.dims;
|
|
28566
|
-
const position_ids = BigInt64Array.from(
|
|
28567
|
-
{ length: 3 * batch_size * seq_length },
|
|
28568
|
-
(_, i) => BigInt(Math.floor(i % seq_length / batch_size))
|
|
28569
|
-
);
|
|
28570
|
-
return [new Tensor3("int64", position_ids, [3, ...input_ids.dims]), zeros([batch_size, 1])];
|
|
28571
|
-
}
|
|
28572
|
-
}
|
|
28573
|
-
}
|
|
28574
|
-
async encode_image({ pixel_values, image_grid_thw }) {
|
|
28575
|
-
const features = (await sessionRun(this.sessions["vision_encoder"], {
|
|
28576
|
-
pixel_values,
|
|
28577
|
-
[this.image_grid_thw_name]: image_grid_thw
|
|
28578
|
-
})).image_features;
|
|
28579
|
-
return features;
|
|
28580
|
-
}
|
|
28581
|
-
_merge_input_ids_with_image_features(kwargs) {
|
|
28582
|
-
return default_merge_input_ids_with_image_features({
|
|
28583
|
-
// @ts-ignore
|
|
28584
|
-
image_token_id: this.config.image_token_id,
|
|
28585
|
-
...kwargs
|
|
28586
|
-
});
|
|
28587
|
-
}
|
|
28588
|
-
prepare_inputs_for_generation(input_ids, model_inputs, generation_config) {
|
|
28589
|
-
if (model_inputs.attention_mask && !model_inputs.position_ids) {
|
|
28590
|
-
if (!model_inputs.past_key_values) {
|
|
28591
|
-
[model_inputs.position_ids, model_inputs.rope_deltas] = this.get_rope_index(
|
|
28592
|
-
model_inputs.input_ids,
|
|
28593
|
-
model_inputs.image_grid_thw,
|
|
28594
|
-
model_inputs.video_grid_thw,
|
|
28595
|
-
model_inputs.attention_mask
|
|
28596
|
-
);
|
|
28597
|
-
} else {
|
|
28598
|
-
model_inputs.pixel_values = null;
|
|
28599
|
-
const past_length = model_inputs.past_key_values.get_seq_length();
|
|
28600
|
-
if (past_length < model_inputs.input_ids.dims[1]) {
|
|
28601
|
-
const [full_position_ids, rope_deltas] = this.get_rope_index(
|
|
28602
|
-
model_inputs.input_ids,
|
|
28603
|
-
model_inputs.image_grid_thw,
|
|
28604
|
-
model_inputs.video_grid_thw,
|
|
28605
|
-
model_inputs.attention_mask
|
|
28606
|
-
);
|
|
28607
|
-
model_inputs.rope_deltas = rope_deltas;
|
|
28608
|
-
model_inputs.position_ids = full_position_ids.slice(null, null, [past_length, null]);
|
|
28609
|
-
model_inputs.input_ids = model_inputs.input_ids.slice(null, [past_length, null]);
|
|
28610
|
-
} else {
|
|
28611
|
-
if (!model_inputs.rope_deltas) {
|
|
28612
|
-
[, model_inputs.rope_deltas] = this.get_rope_index(
|
|
28613
|
-
model_inputs.input_ids,
|
|
28614
|
-
model_inputs.image_grid_thw,
|
|
28615
|
-
model_inputs.video_grid_thw,
|
|
28616
|
-
model_inputs.attention_mask
|
|
28617
|
-
);
|
|
28618
|
-
}
|
|
28619
|
-
const delta = BigInt(past_length);
|
|
28620
|
-
const rope_deltas_list = model_inputs.rope_deltas.map((x) => delta + x);
|
|
28621
|
-
model_inputs.position_ids = stack([rope_deltas_list, rope_deltas_list, rope_deltas_list], 0);
|
|
28622
|
-
}
|
|
28623
|
-
}
|
|
28624
|
-
}
|
|
28625
|
-
return model_inputs;
|
|
28626
|
-
}
|
|
28627
|
-
};
|
|
28628
|
-
var Qwen2VLForCausalLM = class extends Qwen2VLForConditionalGeneration {
|
|
28629
|
-
};
|
|
28630
|
-
|
|
28631
|
-
// src/models/qwen2_5_vl/modeling_qwen2_5_vl.js
|
|
28632
|
-
var Qwen2_5_VLForConditionalGeneration = class extends Qwen2VLForConditionalGeneration {
|
|
28633
|
-
image_grid_thw_name = "image_grid_thw";
|
|
28634
|
-
};
|
|
28635
|
-
var Qwen2_5_VLForCausalLM = class extends Qwen2VLForCausalLM {
|
|
28636
|
-
image_grid_thw_name = "image_grid_thw";
|
|
28637
|
-
};
|
|
28638
|
-
|
|
28639
28957
|
// src/models/qwen3/modeling_qwen3.js
|
|
28640
28958
|
var Qwen3PreTrainedModel = class extends PreTrainedModel {
|
|
28641
28959
|
};
|
|
@@ -29081,6 +29399,14 @@ var SnacDecoderModel = class extends SnacPreTrainedModel {
|
|
|
29081
29399
|
}
|
|
29082
29400
|
};
|
|
29083
29401
|
|
|
29402
|
+
// src/models/solar_open/modeling_solar_open.js
|
|
29403
|
+
var SolarOpenPreTrainedModel = class extends PreTrainedModel {
|
|
29404
|
+
};
|
|
29405
|
+
var SolarOpenModel = class extends SolarOpenPreTrainedModel {
|
|
29406
|
+
};
|
|
29407
|
+
var SolarOpenForCausalLM = class extends SolarOpenPreTrainedModel {
|
|
29408
|
+
};
|
|
29409
|
+
|
|
29084
29410
|
// src/models/speecht5/modeling_speecht5.js
|
|
29085
29411
|
var SpeechT5PreTrainedModel = class extends PreTrainedModel {
|
|
29086
29412
|
};
|
|
@@ -30197,6 +30523,7 @@ var YoutuForCausalLM = class extends YoutuPreTrainedModel {
|
|
|
30197
30523
|
// src/models/registry.js
|
|
30198
30524
|
var MODEL_MAPPING_NAMES_ENCODER_ONLY = /* @__PURE__ */ new Map([
|
|
30199
30525
|
["bert", "BertModel"],
|
|
30526
|
+
["eurobert", "EuroBertModel"],
|
|
30200
30527
|
["neobert", "NeoBertModel"],
|
|
30201
30528
|
["modernbert", "ModernBertModel"],
|
|
30202
30529
|
["nomic_bert", "NomicBertModel"],
|
|
@@ -30328,6 +30655,7 @@ var MODEL_MAPPING_NAMES_DECODER_ONLY = /* @__PURE__ */ new Map([
|
|
|
30328
30655
|
["gemma3_text", "Gemma3Model"],
|
|
30329
30656
|
["helium", "HeliumModel"],
|
|
30330
30657
|
["glm", "GlmModel"],
|
|
30658
|
+
["glm_moe_dsa", "GlmMoeDsaModel"],
|
|
30331
30659
|
["openelm", "OpenELMModel"],
|
|
30332
30660
|
["qwen2", "Qwen2Model"],
|
|
30333
30661
|
["qwen2_moe", "Qwen2MoeModel"],
|
|
@@ -30339,12 +30667,16 @@ var MODEL_MAPPING_NAMES_DECODER_ONLY = /* @__PURE__ */ new Map([
|
|
|
30339
30667
|
["mpt", "MptModel"],
|
|
30340
30668
|
["opt", "OPTModel"],
|
|
30341
30669
|
["mistral", "MistralModel"],
|
|
30670
|
+
["mistral4", "Mistral4Model"],
|
|
30342
30671
|
["ministral", "MinistralModel"],
|
|
30343
30672
|
["ministral3", "Ministral3Model"],
|
|
30344
30673
|
["ernie4_5", "Ernie4_5ForCausalLM"],
|
|
30345
30674
|
["starcoder2", "Starcoder2Model"],
|
|
30675
|
+
["deepseek_v3", "DeepseekV3Model"],
|
|
30346
30676
|
["falcon", "FalconModel"],
|
|
30347
30677
|
["falcon_h1", "FalconH1Model"],
|
|
30678
|
+
["nemotron_h", "NemotronHModel"],
|
|
30679
|
+
["solar_open", "SolarOpenModel"],
|
|
30348
30680
|
["stablelm", "StableLmModel"],
|
|
30349
30681
|
["modernbert-decoder", "ModernBertDecoderModel"],
|
|
30350
30682
|
["hunyuan_v1_dense", "HunYuanDenseV1Model"],
|
|
@@ -30364,6 +30696,7 @@ var MODEL_FOR_TEXT_TO_WAVEFORM_MAPPING_NAMES = /* @__PURE__ */ new Map([
|
|
|
30364
30696
|
]);
|
|
30365
30697
|
var MODEL_FOR_SEQUENCE_CLASSIFICATION_MAPPING_NAMES = /* @__PURE__ */ new Map([
|
|
30366
30698
|
["bert", "BertForSequenceClassification"],
|
|
30699
|
+
["eurobert", "EuroBertForSequenceClassification"],
|
|
30367
30700
|
["neobert", "NeoBertForSequenceClassification"],
|
|
30368
30701
|
["modernbert", "ModernBertForSequenceClassification"],
|
|
30369
30702
|
["roformer", "RoFormerForSequenceClassification"],
|
|
@@ -30386,6 +30719,7 @@ var MODEL_FOR_SEQUENCE_CLASSIFICATION_MAPPING_NAMES = /* @__PURE__ */ new Map([
|
|
|
30386
30719
|
]);
|
|
30387
30720
|
var MODEL_FOR_TOKEN_CLASSIFICATION_MAPPING_NAMES = /* @__PURE__ */ new Map([
|
|
30388
30721
|
["bert", "BertForTokenClassification"],
|
|
30722
|
+
["eurobert", "EuroBertForTokenClassification"],
|
|
30389
30723
|
["neobert", "NeoBertForTokenClassification"],
|
|
30390
30724
|
["modernbert", "ModernBertForTokenClassification"],
|
|
30391
30725
|
["roformer", "RoFormerForTokenClassification"],
|
|
@@ -30448,6 +30782,7 @@ var MODEL_FOR_CAUSAL_LM_MAPPING_NAMES = /* @__PURE__ */ new Map([
|
|
|
30448
30782
|
["gemma3", "Gemma3ForCausalLM"],
|
|
30449
30783
|
["helium", "HeliumForCausalLM"],
|
|
30450
30784
|
["glm", "GlmForCausalLM"],
|
|
30785
|
+
["glm_moe_dsa", "GlmMoeDsaForCausalLM"],
|
|
30451
30786
|
["openelm", "OpenELMForCausalLM"],
|
|
30452
30787
|
["qwen2", "Qwen2ForCausalLM"],
|
|
30453
30788
|
["qwen2_moe", "Qwen2MoeForCausalLM"],
|
|
@@ -30459,6 +30794,7 @@ var MODEL_FOR_CAUSAL_LM_MAPPING_NAMES = /* @__PURE__ */ new Map([
|
|
|
30459
30794
|
["qwen3_vl", "Qwen3VLForCausalLM"],
|
|
30460
30795
|
["qwen3_vl_moe", "Qwen3VLMoeForCausalLM"],
|
|
30461
30796
|
["qwen3_5", "Qwen3_5ForCausalLM"],
|
|
30797
|
+
["qwen3_5_text", "Qwen3_5ForCausalLM"],
|
|
30462
30798
|
["qwen3_5_moe", "Qwen3_5MoeForCausalLM"],
|
|
30463
30799
|
["gemma3n", "Gemma3nForCausalLM"],
|
|
30464
30800
|
["phi", "PhiForCausalLM"],
|
|
@@ -30467,13 +30803,17 @@ var MODEL_FOR_CAUSAL_LM_MAPPING_NAMES = /* @__PURE__ */ new Map([
|
|
|
30467
30803
|
["opt", "OPTForCausalLM"],
|
|
30468
30804
|
["mbart", "MBartForCausalLM"],
|
|
30469
30805
|
["mistral", "MistralForCausalLM"],
|
|
30806
|
+
["mistral4", "Mistral4ForCausalLM"],
|
|
30470
30807
|
["ministral", "MinistralForCausalLM"],
|
|
30471
30808
|
["ministral3", "Ministral3ForCausalLM"],
|
|
30472
30809
|
["ernie4_5", "Ernie4_5ForCausalLM"],
|
|
30473
30810
|
["starcoder2", "Starcoder2ForCausalLM"],
|
|
30811
|
+
["deepseek_v3", "DeepseekV3ForCausalLM"],
|
|
30474
30812
|
["falcon", "FalconForCausalLM"],
|
|
30475
30813
|
["falcon_h1", "FalconH1ForCausalLM"],
|
|
30814
|
+
["nemotron_h", "NemotronHForCausalLM"],
|
|
30476
30815
|
["trocr", "TrOCRForCausalLM"],
|
|
30816
|
+
["solar_open", "SolarOpenForCausalLM"],
|
|
30477
30817
|
["stablelm", "StableLmForCausalLM"],
|
|
30478
30818
|
["modernbert-decoder", "ModernBertDecoderForCausalLM"],
|
|
30479
30819
|
["hunyuan_v1_dense", "HunYuanDenseV1ForCausalLM"],
|
|
@@ -30484,6 +30824,7 @@ var MODEL_FOR_CAUSAL_LM_MAPPING_NAMES = /* @__PURE__ */ new Map([
|
|
|
30484
30824
|
var MODEL_FOR_MULTIMODALITY_MAPPING_NAMES = /* @__PURE__ */ new Map([["multi_modality", "MultiModalityCausalLM"]]);
|
|
30485
30825
|
var MODEL_FOR_MASKED_LM_MAPPING_NAMES = /* @__PURE__ */ new Map([
|
|
30486
30826
|
["bert", "BertForMaskedLM"],
|
|
30827
|
+
["eurobert", "EuroBertForMaskedLM"],
|
|
30487
30828
|
["neobert", "NeoBertForMaskedLM"],
|
|
30488
30829
|
["modernbert", "ModernBertForMaskedLM"],
|
|
30489
30830
|
["roformer", "RoFormerForMaskedLM"],
|
|
@@ -30541,8 +30882,11 @@ var MODEL_FOR_IMAGE_TEXT_TO_TEXT_MAPPING_NAMES = /* @__PURE__ */ new Map([
|
|
|
30541
30882
|
["smolvlm", "SmolVLMForConditionalGeneration"],
|
|
30542
30883
|
["paligemma", "PaliGemmaForConditionalGeneration"],
|
|
30543
30884
|
["llava_qwen2", "LlavaQwen2ForCausalLM"],
|
|
30885
|
+
["gemma3", "Gemma3ForConditionalGeneration"],
|
|
30544
30886
|
["gemma3n", "Gemma3nForConditionalGeneration"],
|
|
30545
|
-
["mistral3", "Mistral3ForConditionalGeneration"]
|
|
30887
|
+
["mistral3", "Mistral3ForConditionalGeneration"],
|
|
30888
|
+
["lighton_ocr", "LightOnOcrForConditionalGeneration"],
|
|
30889
|
+
["glm_ocr", "GlmOcrForConditionalGeneration"]
|
|
30546
30890
|
]);
|
|
30547
30891
|
var MODEL_FOR_AUDIO_TEXT_TO_TEXT_MAPPING_NAMES = /* @__PURE__ */ new Map([
|
|
30548
30892
|
["granite_speech", "GraniteSpeechForConditionalGeneration"],
|
|
@@ -30647,6 +30991,7 @@ var MODEL_FOR_TIME_SERIES_PREDICTION_MAPPING_NAMES = /* @__PURE__ */ new Map([
|
|
|
30647
30991
|
]);
|
|
30648
30992
|
var MODEL_FOR_IMAGE_TO_IMAGE_MAPPING_NAMES = /* @__PURE__ */ new Map([["swin2sr", "Swin2SRForImageSuperResolution"]]);
|
|
30649
30993
|
var MODEL_FOR_DEPTH_ESTIMATION_MAPPING_NAMES = /* @__PURE__ */ new Map([
|
|
30994
|
+
["chmv2", "CHMv2ForDepthEstimation"],
|
|
30650
30995
|
["dpt", "DPTForDepthEstimation"],
|
|
30651
30996
|
["depth_anything", "DepthAnythingForDepthEstimation"],
|
|
30652
30997
|
["glpn", "GLPNForDepthEstimation"],
|
|
@@ -30732,13 +31077,6 @@ var CUSTOM_MAPPING = [
|
|
|
30732
31077
|
],
|
|
30733
31078
|
["SupertonicForConditionalGeneration", SupertonicForConditionalGeneration, MODEL_TYPES.Supertonic],
|
|
30734
31079
|
["ChatterboxModel", ChatterboxModel, MODEL_TYPES.Chatterbox],
|
|
30735
|
-
["Qwen2VLForCausalLM", Qwen2VLForCausalLM, MODEL_TYPES.MultimodalLanguageModelOnly],
|
|
30736
|
-
["Qwen2_5_VLForCausalLM", Qwen2_5_VLForCausalLM, MODEL_TYPES.MultimodalLanguageModelOnly],
|
|
30737
|
-
["Qwen3VLForCausalLM", Qwen3VLForCausalLM, MODEL_TYPES.MultimodalLanguageModelOnly],
|
|
30738
|
-
["Qwen3VLMoeForCausalLM", Qwen3VLMoeForCausalLM, MODEL_TYPES.MultimodalLanguageModelOnly],
|
|
30739
|
-
["Qwen3_5ForCausalLM", Qwen3_5ForCausalLM, MODEL_TYPES.MultimodalLanguageModelOnly],
|
|
30740
|
-
["Qwen3_5MoeForCausalLM", Qwen3_5MoeForCausalLM, MODEL_TYPES.MultimodalLanguageModelOnly],
|
|
30741
|
-
["Gemma3nForCausalLM", Gemma3nForCausalLM, MODEL_TYPES.MultimodalLanguageModelOnly],
|
|
30742
31080
|
[
|
|
30743
31081
|
"VoxtralRealtimeForConditionalGeneration",
|
|
30744
31082
|
VoxtralRealtimeForConditionalGeneration,
|
|
@@ -32420,6 +32758,41 @@ var TASK_ALIASES = Object.freeze({
|
|
|
32420
32758
|
embeddings: "feature-extraction"
|
|
32421
32759
|
});
|
|
32422
32760
|
|
|
32761
|
+
// src/utils/model_registry/resolve_model_type.js
|
|
32762
|
+
function resolve_model_type(config, { warn = true } = {}) {
|
|
32763
|
+
const architectures = (
|
|
32764
|
+
/** @type {string[]} */
|
|
32765
|
+
config.architectures || []
|
|
32766
|
+
);
|
|
32767
|
+
for (const arch of architectures) {
|
|
32768
|
+
const mappedType = MODEL_TYPE_MAPPING.get(arch);
|
|
32769
|
+
if (mappedType !== void 0) {
|
|
32770
|
+
return mappedType;
|
|
32771
|
+
}
|
|
32772
|
+
}
|
|
32773
|
+
if (config.model_type) {
|
|
32774
|
+
const mappedType = MODEL_TYPE_MAPPING.get(config.model_type);
|
|
32775
|
+
if (mappedType !== void 0) {
|
|
32776
|
+
return mappedType;
|
|
32777
|
+
}
|
|
32778
|
+
for (const mapping of Object.values(MODEL_MAPPING_NAMES)) {
|
|
32779
|
+
if (mapping.has(config.model_type)) {
|
|
32780
|
+
const resolved = MODEL_TYPE_MAPPING.get(mapping.get(config.model_type));
|
|
32781
|
+
if (resolved !== void 0) {
|
|
32782
|
+
return resolved;
|
|
32783
|
+
}
|
|
32784
|
+
}
|
|
32785
|
+
}
|
|
32786
|
+
}
|
|
32787
|
+
if (warn) {
|
|
32788
|
+
const archList = architectures.length > 0 ? architectures.join(", ") : "(none)";
|
|
32789
|
+
logger.warn(
|
|
32790
|
+
`[resolve_model_type] Architecture(s) not found in MODEL_TYPE_MAPPING: [${archList}] for model type '${config.model_type}'. Falling back to EncoderOnly (single model.onnx file). If you encounter issues, please report at: ${GITHUB_ISSUE_URL}`
|
|
32791
|
+
);
|
|
32792
|
+
}
|
|
32793
|
+
return MODEL_TYPES.EncoderOnly;
|
|
32794
|
+
}
|
|
32795
|
+
|
|
32423
32796
|
// src/utils/model_registry/get_model_files.js
|
|
32424
32797
|
function get_config(modelId, { config = null, cache_dir = null, local_files_only = false, revision = "main" } = {}) {
|
|
32425
32798
|
if (config !== null) {
|
|
@@ -32442,43 +32815,7 @@ async function get_model_files(modelId, { config = null, dtype: overrideDtype =
|
|
|
32442
32815
|
const subfolder = "onnx";
|
|
32443
32816
|
const rawDevice = overrideDevice ?? custom_config.device;
|
|
32444
32817
|
let dtype = overrideDtype ?? custom_config.dtype;
|
|
32445
|
-
|
|
32446
|
-
const architectures = (
|
|
32447
|
-
/** @type {string[]} */
|
|
32448
|
-
config.architectures || []
|
|
32449
|
-
);
|
|
32450
|
-
let foundInMapping = false;
|
|
32451
|
-
for (const arch of architectures) {
|
|
32452
|
-
const mappedType = MODEL_TYPE_MAPPING.get(arch);
|
|
32453
|
-
if (mappedType !== void 0) {
|
|
32454
|
-
modelType = mappedType;
|
|
32455
|
-
foundInMapping = true;
|
|
32456
|
-
break;
|
|
32457
|
-
}
|
|
32458
|
-
}
|
|
32459
|
-
if (!foundInMapping && config.model_type) {
|
|
32460
|
-
const mappedType = MODEL_TYPE_MAPPING.get(config.model_type);
|
|
32461
|
-
if (mappedType !== void 0) {
|
|
32462
|
-
modelType = mappedType;
|
|
32463
|
-
foundInMapping = true;
|
|
32464
|
-
}
|
|
32465
|
-
if (!foundInMapping) {
|
|
32466
|
-
for (const mapping of Object.values(MODEL_MAPPING_NAMES)) {
|
|
32467
|
-
if (mapping.has(config.model_type)) {
|
|
32468
|
-
modelType = MODEL_TYPE_MAPPING.get(mapping.get(config.model_type));
|
|
32469
|
-
foundInMapping = true;
|
|
32470
|
-
break;
|
|
32471
|
-
}
|
|
32472
|
-
}
|
|
32473
|
-
}
|
|
32474
|
-
}
|
|
32475
|
-
if (!foundInMapping) {
|
|
32476
|
-
const archList = architectures.length > 0 ? architectures.join(", ") : "(none)";
|
|
32477
|
-
logger.warn(
|
|
32478
|
-
`[get_model_files] Architecture(s) not found in MODEL_TYPE_MAPPING: [${archList}] for model type '${config.model_type}'. Falling back to EncoderOnly (single model.onnx file). If you encounter issues, please report at: ${GITHUB_ISSUE_URL}`
|
|
32479
|
-
);
|
|
32480
|
-
modelType = MODEL_TYPES.EncoderOnly;
|
|
32481
|
-
}
|
|
32818
|
+
const modelType = resolve_model_type(config);
|
|
32482
32819
|
const add_model_file = (fileName, baseName = null) => {
|
|
32483
32820
|
baseName = baseName ?? fileName;
|
|
32484
32821
|
const selectedDevice = selectDevice(rawDevice, fileName);
|
|
@@ -33065,6 +33402,31 @@ async function clear_pipeline_cache(task, modelId, options = {}) {
|
|
|
33065
33402
|
return await clear_files_from_cache(modelId, files, options);
|
|
33066
33403
|
}
|
|
33067
33404
|
|
|
33405
|
+
// src/utils/model_registry/get_available_dtypes.js
|
|
33406
|
+
var CONCRETE_DTYPES = Object.keys(DEFAULT_DTYPE_SUFFIX_MAPPING);
|
|
33407
|
+
async function get_available_dtypes(modelId, { config = null, model_file_name = null, revision = "main", cache_dir = null, local_files_only = false } = {}) {
|
|
33408
|
+
config = await get_config(modelId, { config, cache_dir, local_files_only, revision });
|
|
33409
|
+
const subfolder = "onnx";
|
|
33410
|
+
const modelType = resolve_model_type(config);
|
|
33411
|
+
const { sessions } = getSessionsConfig(modelType, config, { model_file_name });
|
|
33412
|
+
const baseNames = Object.values(sessions);
|
|
33413
|
+
const metadataOptions = { revision, cache_dir, local_files_only };
|
|
33414
|
+
const probeResults = await Promise.all(
|
|
33415
|
+
CONCRETE_DTYPES.map(async (dtype) => {
|
|
33416
|
+
const suffix = DEFAULT_DTYPE_SUFFIX_MAPPING[dtype] ?? "";
|
|
33417
|
+
const allExist = await Promise.all(
|
|
33418
|
+
baseNames.map(async (baseName) => {
|
|
33419
|
+
const filename = `${subfolder}/${baseName}${suffix}.onnx`;
|
|
33420
|
+
const metadata = await get_file_metadata(modelId, filename, metadataOptions);
|
|
33421
|
+
return metadata.exists;
|
|
33422
|
+
})
|
|
33423
|
+
);
|
|
33424
|
+
return { dtype, available: allExist.every(Boolean) };
|
|
33425
|
+
})
|
|
33426
|
+
);
|
|
33427
|
+
return probeResults.filter((r) => r.available).map((r) => r.dtype);
|
|
33428
|
+
}
|
|
33429
|
+
|
|
33068
33430
|
// src/utils/model_registry/ModelRegistry.js
|
|
33069
33431
|
var ModelRegistry = class {
|
|
33070
33432
|
/**
|
|
@@ -33151,6 +33513,29 @@ var ModelRegistry = class {
|
|
|
33151
33513
|
static async get_processor_files(modelId) {
|
|
33152
33514
|
return get_processor_files(modelId);
|
|
33153
33515
|
}
|
|
33516
|
+
/**
|
|
33517
|
+
* Detects which quantization levels (dtypes) are available for a model
|
|
33518
|
+
* by checking which ONNX files exist on the hub or locally.
|
|
33519
|
+
*
|
|
33520
|
+
* A dtype is considered available if all required model session files
|
|
33521
|
+
* exist for that dtype.
|
|
33522
|
+
*
|
|
33523
|
+
* @param {string} modelId - The model id (e.g., "onnx-community/all-MiniLM-L6-v2-ONNX")
|
|
33524
|
+
* @param {Object} [options] - Optional parameters
|
|
33525
|
+
* @param {import('../../configs.js').PretrainedConfig} [options.config=null] - Pre-loaded config
|
|
33526
|
+
* @param {string} [options.model_file_name=null] - Override the model file name (excluding .onnx suffix)
|
|
33527
|
+
* @param {string} [options.revision='main'] - Model revision
|
|
33528
|
+
* @param {string} [options.cache_dir=null] - Custom cache directory
|
|
33529
|
+
* @param {boolean} [options.local_files_only=false] - Only check local files
|
|
33530
|
+
* @returns {Promise<string[]>} Array of available dtype strings (e.g., ['fp32', 'fp16', 'q4', 'q8'])
|
|
33531
|
+
*
|
|
33532
|
+
* @example
|
|
33533
|
+
* const dtypes = await ModelRegistry.get_available_dtypes('onnx-community/all-MiniLM-L6-v2-ONNX');
|
|
33534
|
+
* console.log(dtypes); // ['fp32', 'fp16', 'int8', 'uint8', 'q8', 'q4']
|
|
33535
|
+
*/
|
|
33536
|
+
static async get_available_dtypes(modelId, options = {}) {
|
|
33537
|
+
return get_available_dtypes(modelId, options);
|
|
33538
|
+
}
|
|
33154
33539
|
/**
|
|
33155
33540
|
* Quickly checks if a model is fully cached by verifying `config.json` is present,
|
|
33156
33541
|
* then confirming all required files are cached.
|
|
@@ -33385,6 +33770,9 @@ export {
|
|
|
33385
33770
|
BloomModel,
|
|
33386
33771
|
BloomPreTrainedModel,
|
|
33387
33772
|
BloomTokenizer,
|
|
33773
|
+
CHMv2ForDepthEstimation,
|
|
33774
|
+
CHMv2ImageProcessor,
|
|
33775
|
+
CHMv2PreTrainedModel,
|
|
33388
33776
|
CLIPFeatureExtractor,
|
|
33389
33777
|
CLIPImageProcessor,
|
|
33390
33778
|
CLIPModel,
|
|
@@ -33480,6 +33868,9 @@ export {
|
|
|
33480
33868
|
DebertaV2Tokenizer,
|
|
33481
33869
|
DecisionTransformerModel,
|
|
33482
33870
|
DecisionTransformerPreTrainedModel,
|
|
33871
|
+
DeepseekV3ForCausalLM,
|
|
33872
|
+
DeepseekV3Model,
|
|
33873
|
+
DeepseekV3PreTrainedModel,
|
|
33483
33874
|
DeiTFeatureExtractor,
|
|
33484
33875
|
DeiTForImageClassification,
|
|
33485
33876
|
DeiTImageProcessor,
|
|
@@ -33540,6 +33931,11 @@ export {
|
|
|
33540
33931
|
EsmModel,
|
|
33541
33932
|
EsmPreTrainedModel,
|
|
33542
33933
|
EsmTokenizer,
|
|
33934
|
+
EuroBertForMaskedLM,
|
|
33935
|
+
EuroBertForSequenceClassification,
|
|
33936
|
+
EuroBertForTokenClassification,
|
|
33937
|
+
EuroBertModel,
|
|
33938
|
+
EuroBertPreTrainedModel,
|
|
33543
33939
|
ExaoneForCausalLM,
|
|
33544
33940
|
ExaoneModel,
|
|
33545
33941
|
ExaonePreTrainedModel,
|
|
@@ -33586,8 +33982,11 @@ export {
|
|
|
33586
33982
|
Gemma2Model,
|
|
33587
33983
|
Gemma2PreTrainedModel,
|
|
33588
33984
|
Gemma3ForCausalLM,
|
|
33985
|
+
Gemma3ForConditionalGeneration,
|
|
33986
|
+
Gemma3ImageProcessor,
|
|
33589
33987
|
Gemma3Model,
|
|
33590
33988
|
Gemma3PreTrainedModel,
|
|
33989
|
+
Gemma3Processor,
|
|
33591
33990
|
Gemma3nAudioFeatureExtractor,
|
|
33592
33991
|
Gemma3nForCausalLM,
|
|
33593
33992
|
Gemma3nForConditionalGeneration,
|
|
@@ -33597,8 +33996,14 @@ export {
|
|
|
33597
33996
|
GemmaModel,
|
|
33598
33997
|
GemmaPreTrainedModel,
|
|
33599
33998
|
GemmaTokenizer,
|
|
33999
|
+
Glm46VImageProcessor,
|
|
34000
|
+
Glm46VProcessor,
|
|
33600
34001
|
GlmForCausalLM,
|
|
33601
34002
|
GlmModel,
|
|
34003
|
+
GlmMoeDsaForCausalLM,
|
|
34004
|
+
GlmMoeDsaModel,
|
|
34005
|
+
GlmMoeDsaPreTrainedModel,
|
|
34006
|
+
GlmOcrForConditionalGeneration,
|
|
33602
34007
|
GlmPreTrainedModel,
|
|
33603
34008
|
GptOssForCausalLM,
|
|
33604
34009
|
GptOssModel,
|
|
@@ -33664,6 +34069,7 @@ export {
|
|
|
33664
34069
|
Lfm2VlForConditionalGeneration,
|
|
33665
34070
|
Lfm2VlImageProcessor,
|
|
33666
34071
|
Lfm2VlProcessor,
|
|
34072
|
+
LightOnOcrForConditionalGeneration,
|
|
33667
34073
|
LiteWhisperForConditionalGeneration,
|
|
33668
34074
|
Llama4ForCausalLM,
|
|
33669
34075
|
Llama4PreTrainedModel,
|
|
@@ -33733,6 +34139,9 @@ export {
|
|
|
33733
34139
|
MimiPreTrainedModel,
|
|
33734
34140
|
MinLengthLogitsProcessor,
|
|
33735
34141
|
MinNewTokensLengthLogitsProcessor,
|
|
34142
|
+
Mistral4ForCausalLM,
|
|
34143
|
+
Mistral4Model,
|
|
34144
|
+
Mistral4PreTrainedModel,
|
|
33736
34145
|
MistralForCausalLM,
|
|
33737
34146
|
MistralModel,
|
|
33738
34147
|
MistralPreTrainedModel,
|
|
@@ -33804,6 +34213,9 @@ export {
|
|
|
33804
34213
|
NanoChatForCausalLM,
|
|
33805
34214
|
NanoChatModel,
|
|
33806
34215
|
NanoChatPreTrainedModel,
|
|
34216
|
+
NemotronHForCausalLM,
|
|
34217
|
+
NemotronHModel,
|
|
34218
|
+
NemotronHPreTrainedModel,
|
|
33807
34219
|
NeoBertForMaskedLM,
|
|
33808
34220
|
NeoBertForQuestionAnswering,
|
|
33809
34221
|
NeoBertForSequenceClassification,
|
|
@@ -33993,6 +34405,9 @@ export {
|
|
|
33993
34405
|
SnacFeatureExtractor,
|
|
33994
34406
|
SnacModel,
|
|
33995
34407
|
SnacPreTrainedModel,
|
|
34408
|
+
SolarOpenForCausalLM,
|
|
34409
|
+
SolarOpenModel,
|
|
34410
|
+
SolarOpenPreTrainedModel,
|
|
33996
34411
|
SpeechT5FeatureExtractor,
|
|
33997
34412
|
SpeechT5ForSpeechToText,
|
|
33998
34413
|
SpeechT5ForTextToSpeech,
|
|
@@ -34190,7 +34605,7 @@ export {
|
|
|
34190
34605
|
|
|
34191
34606
|
onnxruntime-web/dist/ort.webgpu.bundle.min.mjs:
|
|
34192
34607
|
(*!
|
|
34193
|
-
* ONNX Runtime Web v1.25.0-dev.
|
|
34608
|
+
* ONNX Runtime Web v1.25.0-dev.20260323-a99aad9d36
|
|
34194
34609
|
* Copyright (c) Microsoft Corporation. All rights reserved.
|
|
34195
34610
|
* Licensed under the MIT License.
|
|
34196
34611
|
*)
|