@huggingface/transformers 4.0.0-next.7 → 4.0.0-next.9

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (102) hide show
  1. package/README.md +13 -2
  2. package/dist/ort-wasm-simd-threaded.jsep.mjs +26 -26
  3. package/dist/transformers.js +1002 -587
  4. package/dist/transformers.min.js +23 -19
  5. package/dist/transformers.node.cjs +1030 -585
  6. package/dist/transformers.node.min.cjs +21 -17
  7. package/dist/transformers.node.min.mjs +21 -17
  8. package/dist/transformers.node.mjs +1000 -585
  9. package/dist/transformers.web.js +887 -472
  10. package/dist/transformers.web.min.js +21 -17
  11. package/package.json +3 -3
  12. package/src/configs.js +28 -22
  13. package/src/env.js +1 -1
  14. package/src/image_processors_utils.js +25 -15
  15. package/src/models/chmv2/image_processing_chmv2.js +3 -0
  16. package/src/models/chmv2/modeling_chmv2.js +4 -0
  17. package/src/models/deepseek_v3/modeling_deepseek_v3.js +5 -0
  18. package/src/models/eurobert/modeling_eurobert.js +41 -0
  19. package/src/models/gemma3/image_processing_gemma3.js +3 -0
  20. package/src/models/gemma3/modeling_gemma3.js +4 -1
  21. package/src/models/gemma3/processing_gemma3.js +45 -0
  22. package/src/models/glm46v/image_processing_glm46v.js +12 -0
  23. package/src/models/glm46v/processing_glm46v.js +5 -0
  24. package/src/models/glm_moe_dsa/modeling_glm_moe_dsa.js +5 -0
  25. package/src/models/glm_ocr/modeling_glm_ocr.js +78 -0
  26. package/src/models/image_processors.js +3 -0
  27. package/src/models/lfm2_vl/image_processing_lfm2_vl.js +1 -1
  28. package/src/models/lighton_ocr/modeling_lighton_ocr.js +3 -0
  29. package/src/models/mistral4/modeling_mistral4.js +5 -0
  30. package/src/models/modeling_utils.js +48 -25
  31. package/src/models/models.js +10 -1
  32. package/src/models/nemotron_h/modeling_nemotron_h.js +5 -0
  33. package/src/models/processors.js +2 -0
  34. package/src/models/qwen2_vl/modeling_qwen2_vl.js +226 -168
  35. package/src/models/qwen2_vl/processing_qwen2_vl.js +5 -4
  36. package/src/models/registry.js +19 -8
  37. package/src/models/solar_open/modeling_solar_open.js +5 -0
  38. package/src/pipelines.js +1 -0
  39. package/src/utils/hub.js +4 -1
  40. package/src/utils/model_registry/ModelRegistry.js +36 -0
  41. package/src/utils/model_registry/get_available_dtypes.js +68 -0
  42. package/src/utils/model_registry/get_file_metadata.js +1 -0
  43. package/src/utils/model_registry/get_model_files.js +7 -60
  44. package/src/utils/model_registry/resolve_model_type.js +66 -0
  45. package/types/configs.d.ts.map +1 -1
  46. package/types/image_processors_utils.d.ts +3 -2
  47. package/types/image_processors_utils.d.ts.map +1 -1
  48. package/types/models/{ast/modeling_ast.d.ts → audio_spectrogram_transformer/modeling_audio_spectrogram_transformer.d.ts} +1 -1
  49. package/types/models/audio_spectrogram_transformer/modeling_audio_spectrogram_transformer.d.ts.map +1 -0
  50. package/types/models/chmv2/image_processing_chmv2.d.ts +4 -0
  51. package/types/models/chmv2/image_processing_chmv2.d.ts.map +1 -0
  52. package/types/models/chmv2/modeling_chmv2.d.ts +6 -0
  53. package/types/models/chmv2/modeling_chmv2.d.ts.map +1 -0
  54. package/types/models/deepseek_v3/modeling_deepseek_v3.d.ts +8 -0
  55. package/types/models/deepseek_v3/modeling_deepseek_v3.d.ts.map +1 -0
  56. package/types/models/eurobert/modeling_eurobert.d.ts +36 -0
  57. package/types/models/eurobert/modeling_eurobert.d.ts.map +1 -0
  58. package/types/models/gemma3/image_processing_gemma3.d.ts +4 -0
  59. package/types/models/gemma3/image_processing_gemma3.d.ts.map +1 -0
  60. package/types/models/gemma3/modeling_gemma3.d.ts +4 -1
  61. package/types/models/gemma3/modeling_gemma3.d.ts.map +1 -1
  62. package/types/models/gemma3/processing_gemma3.d.ts +20 -0
  63. package/types/models/gemma3/processing_gemma3.d.ts.map +1 -0
  64. package/types/models/glm46v/image_processing_glm46v.d.ts +4 -0
  65. package/types/models/glm46v/image_processing_glm46v.d.ts.map +1 -0
  66. package/types/models/glm46v/processing_glm46v.d.ts +4 -0
  67. package/types/models/glm46v/processing_glm46v.d.ts.map +1 -0
  68. package/types/models/glm_moe_dsa/modeling_glm_moe_dsa.d.ts +8 -0
  69. package/types/models/glm_moe_dsa/modeling_glm_moe_dsa.d.ts.map +1 -0
  70. package/types/models/glm_ocr/modeling_glm_ocr.d.ts +26 -0
  71. package/types/models/glm_ocr/modeling_glm_ocr.d.ts.map +1 -0
  72. package/types/models/image_processors.d.ts +3 -0
  73. package/types/models/lighton_ocr/modeling_lighton_ocr.d.ts +4 -0
  74. package/types/models/lighton_ocr/modeling_lighton_ocr.d.ts.map +1 -0
  75. package/types/models/mistral4/modeling_mistral4.d.ts +8 -0
  76. package/types/models/mistral4/modeling_mistral4.d.ts.map +1 -0
  77. package/types/models/modeling_utils.d.ts +2 -3
  78. package/types/models/modeling_utils.d.ts.map +1 -1
  79. package/types/models/models.d.ts +10 -1
  80. package/types/models/nemotron_h/modeling_nemotron_h.d.ts +8 -0
  81. package/types/models/nemotron_h/modeling_nemotron_h.d.ts.map +1 -0
  82. package/types/models/processors.d.ts +2 -0
  83. package/types/models/qwen2_vl/modeling_qwen2_vl.d.ts +41 -6
  84. package/types/models/qwen2_vl/modeling_qwen2_vl.d.ts.map +1 -1
  85. package/types/models/qwen2_vl/processing_qwen2_vl.d.ts +1 -0
  86. package/types/models/qwen2_vl/processing_qwen2_vl.d.ts.map +1 -1
  87. package/types/models/registry.d.ts.map +1 -1
  88. package/types/models/solar_open/modeling_solar_open.d.ts +8 -0
  89. package/types/models/solar_open/modeling_solar_open.d.ts.map +1 -0
  90. package/types/pipelines.d.ts +1 -0
  91. package/types/pipelines.d.ts.map +1 -1
  92. package/types/utils/hub.d.ts.map +1 -1
  93. package/types/utils/model_registry/ModelRegistry.d.ts +27 -0
  94. package/types/utils/model_registry/ModelRegistry.d.ts.map +1 -1
  95. package/types/utils/model_registry/get_available_dtypes.d.ts +26 -0
  96. package/types/utils/model_registry/get_available_dtypes.d.ts.map +1 -0
  97. package/types/utils/model_registry/get_model_files.d.ts +25 -0
  98. package/types/utils/model_registry/get_model_files.d.ts.map +1 -1
  99. package/types/utils/model_registry/resolve_model_type.d.ts +24 -0
  100. package/types/utils/model_registry/resolve_model_type.d.ts.map +1 -0
  101. package/types/models/ast/modeling_ast.d.ts.map +0 -1
  102. /package/src/models/{ast/modeling_ast.js → audio_spectrogram_transformer/modeling_audio_spectrogram_transformer.js} +0 -0
@@ -20,7 +20,7 @@ var node_path_default = {};
20
20
  var node_url_default = {};
21
21
 
22
22
  // src/env.js
23
- var VERSION = "4.0.0-next.7";
23
+ var VERSION = "4.0.0-next.9";
24
24
  var HAS_SELF = typeof self !== "undefined";
25
25
  var IS_FS_AVAILABLE = !isEmpty(node_fs_default);
26
26
  var IS_PATH_AVAILABLE = !isEmpty(node_path_default);
@@ -250,7 +250,7 @@ var logger = {
250
250
  }
251
251
  };
252
252
 
253
- // ../../node_modules/.pnpm/@huggingface+tokenizers@0.1.2/node_modules/@huggingface/tokenizers/dist/tokenizers.mjs
253
+ // ../../node_modules/.pnpm/@huggingface+tokenizers@0.1.3/node_modules/@huggingface/tokenizers/dist/tokenizers.mjs
254
254
  var DictionarySplitter = class {
255
255
  /**
256
256
  * @param dictionary The dictionary of words to use for splitting.
@@ -1906,10 +1906,10 @@ var BPE = class extends TokenizerModel_default {
1906
1906
  );
1907
1907
  if (byte_tokens.every((x) => this.tokens_to_ids.has(x))) {
1908
1908
  output_tokens.push(...byte_tokens);
1909
- } else {
1909
+ } else if (this.unk_token != null) {
1910
1910
  output_tokens.push(this.unk_token);
1911
1911
  }
1912
- } else {
1912
+ } else if (this.unk_token != null) {
1913
1913
  output_tokens.push(this.unk_token);
1914
1914
  }
1915
1915
  }
@@ -5754,14 +5754,14 @@ var Random = class {
5754
5754
  * @returns {number} A normally distributed random value.
5755
5755
  */
5756
5756
  gauss(mu = 0, sigma = 1) {
5757
- let z = this._gauss_next;
5757
+ let z2 = this._gauss_next;
5758
5758
  this._gauss_next = null;
5759
- if (z === null) {
5759
+ if (z2 === null) {
5760
5760
  const x2pi = this.random() * 2 * Math.PI, g2rad = Math.sqrt(-2 * Math.log(1 - this.random()));
5761
- z = Math.cos(x2pi) * g2rad;
5761
+ z2 = Math.cos(x2pi) * g2rad;
5762
5762
  this._gauss_next = Math.sin(x2pi) * g2rad;
5763
5763
  }
5764
- return mu + z * sigma;
5764
+ return mu + z2 * sigma;
5765
5765
  }
5766
5766
  /**
5767
5767
  * Shuffles an array in-place using the Fisher-Yates algorithm.
@@ -6515,13 +6515,15 @@ async function storeCachedResource(path_or_repo_id, filename, cache2, cacheKey,
6515
6515
  wrapped_progress
6516
6516
  );
6517
6517
  } else if (typeof response !== "string") {
6518
+ const headers = new Headers(response.headers);
6519
+ headers.set("content-length", result.byteLength.toString());
6518
6520
  await cache2.put(
6519
6521
  cacheKey,
6520
6522
  new Response(
6521
6523
  /** @type {any} */
6522
6524
  result,
6523
6525
  {
6524
- headers: response.headers
6526
+ headers
6525
6527
  }
6526
6528
  )
6527
6529
  ).catch((err) => {
@@ -7483,7 +7485,7 @@ __export(onnxruntime_node_exports, {
7483
7485
  });
7484
7486
  var onnxruntime_node_default = {};
7485
7487
 
7486
- // ../../node_modules/.pnpm/onnxruntime-web@1.25.0-dev.20260307-d626b568e0/node_modules/onnxruntime-web/dist/ort.webgpu.bundle.min.mjs
7488
+ // ../../node_modules/.pnpm/onnxruntime-web@1.25.0-dev.20260323-a99aad9d36/node_modules/onnxruntime-web/dist/ort.webgpu.bundle.min.mjs
7487
7489
  var ort_webgpu_bundle_min_exports = {};
7488
7490
  __export(ort_webgpu_bundle_min_exports, {
7489
7491
  InferenceSession: () => Jf,
@@ -8251,7 +8253,7 @@ async function ts(a = {}) {
8251
8253
  throw L(e = "Aborted(" + e + ")"), W = true, e = new WebAssembly.RuntimeError(e + ". Build with -sASSERTIONS for more info."), R?.(e), e;
8252
8254
  }
8253
8255
  function Ye() {
8254
- return { a: { f: Vs, J: js, k: Hs, p: Ys, l: qs, ta: Js, b: Xs, ca: Qs, Ka: Sn, s: Zs, da: Ln, _a: On, Ga: Bn, Ia: Mn, $a: Cn, Ya: Un, Ra: Dn, Xa: Pn, pa: _n, Ha: Rn, Yb: Nn, Za: kn, Fa: Wn, eb: Ks, Da: ti, Tb: ri, Rb: oi, Ca: si, M: ii, I: ui, Sb: fi, ka: yi, Ub: bi, Ua: wi, Wb: Ti, La: vi, Pb: Ei, la: Si, Ta: Ar, bb: Ai, U: Oi, n: Di, c: Er, sb: Pi, w: _i, L: Ri, z: Ni, j: ki, o: Yn, tb: Wi, G: Fi, T: Gi, h: $i, u: zi, m: Vi, i: ji, Oa: Hi, Pa: Yi, Qa: qi, Ma: Qn, Na: Zn, Qb: Kn, fb: Xi, db: Ki, Y: eu, rb: tu, ma: ru, cb: Qi, gb: nu, ab: ou, Xb: au, N: Ji, hb: su, X: iu, Vb: uu, ob: bu, C: wu, sa: gu, ra: Tu, qb: vu, W: Eu, v: Su, nb: Au, mb: Iu, lb: xu, pb: Lu, kb: Ou, jb: Bu, ib: Mu, Va: ao, Wa: so, Ja: br, ea: io, oa: uo, Sa: fo, na: co, Db: Gf, xa: Df, Eb: Ff, ya: Uf, F: Ef, e: ff, r: sf, x: af, D: gf, Ib: Bf, ba: Lf, B: df, za: Mf, $: Pf, ha: Of, Fb: kf, Gb: Nf, Ba: Sf, Aa: xf, Jb: Af, wa: Wf, aa: Cf, d: cf, A: lf, q: uf, Cb: $f, t: mf, y: Tf, H: pf, E: hf, K: vf, S: _f, ja: wf, _: Rf, Kb: bf, Lb: yf, P: If2, g: Uu, a: Fe, Ob: qe, Hb: Du, ia: Pu, O: _u, qa: Ru, Mb: Nu, Q: ku, zb: Wu, Ab: Fu, ua: Gu, fa: $u, R: zu, Ea: Vu, va: ju, Z: Hu, xb: Yu, Zb: qu, V: Ju, Bb: Xu, ub: Qu, vb: Ku, wb: ef, ga: tf, yb: rf, Nb: nf } };
8256
+ return { a: { f: Vs, J: js, k: Hs, p: Ys, l: qs, ta: Js, b: Xs, ca: Qs, Ka: Sn, q: Zs, da: Ln, _a: On, Ga: Bn, Ia: Mn, $a: Cn, Ya: Un, Ra: Dn, Xa: Pn, pa: _n, Ha: Rn, Yb: Nn, Za: kn, Fa: Wn, eb: Ks, Da: ti, Tb: ri, Rb: oi, Ca: si, M: ii, I: ui, Sb: fi, ka: yi, Ub: bi, Ua: wi, Wb: Ti, La: vi, Pb: Ei, la: Si, Ta: Ar, bb: Ai, U: Oi, n: Di, c: Er, sb: Pi, w: _i, L: Ri, z: Ni, j: ki, o: Yn, tb: Wi, G: Fi, T: Gi, h: $i, u: zi, m: Vi, i: ji, Oa: Hi, Pa: Yi, Qa: qi, Ma: Qn, Na: Zn, Qb: Kn, fb: Xi, db: Ki, Y: eu, rb: tu, ma: ru, cb: Qi, gb: nu, ab: ou, Xb: au, N: Ji, hb: su, X: iu, Vb: uu, ob: bu, C: wu, sa: gu, ra: Tu, qb: vu, W: Eu, v: Su, nb: Au, mb: Iu, lb: xu, pb: Lu, kb: Ou, jb: Bu, ib: Mu, Va: ao, Wa: so, Ja: br, ea: io, oa: uo, Sa: fo, na: co, Db: Gf, xa: Df, Eb: Ff, ya: Uf, F: Ef, e: ff, s: sf, x: af, D: gf, Ib: Bf, ba: Lf, B: lf, za: Mf, $: Pf, ha: Of, Fb: kf, Gb: Nf, Ba: Sf, Aa: xf, Jb: Af, wa: Wf, aa: Cf, d: uf, A: df, r: cf, Cb: $f, t: mf, y: Tf, H: pf, E: hf, K: vf, S: _f, ja: wf, _: Rf, Kb: bf, Lb: yf, P: If2, g: Uu, a: Fe, Ob: qe, Hb: Du, ia: Pu, O: _u, qa: Ru, Mb: Nu, Q: ku, zb: Wu, Ab: Fu, ua: Gu, fa: $u, R: zu, Ea: Vu, va: ju, Z: Hu, xb: Yu, Zb: qu, V: Ju, Bb: Xu, ub: Qu, vb: Ku, wb: ef, ga: tf, yb: rf, Nb: nf } };
8255
8257
  }
8256
8258
  async function bt() {
8257
8259
  function e(o, u) {
@@ -8314,14 +8316,14 @@ async function ts(a = {}) {
8314
8316
  gt.push(t), Je[e.Nc] = t, t.Nc = e.Nc;
8315
8317
  var n = { Oc: "run", he: e.ge, Wc: e.Wc, Nc: e.Nc };
8316
8318
  return t.postMessage(n, e.Yc), 0;
8317
- }, z = 0, V = (e, t, ...n) => {
8319
+ }, G = 0, V = (e, t, ...n) => {
8318
8320
  var o, u = 16 * n.length, c = P(), h = Ft(u), b = h >>> 3;
8319
8321
  for (o of n) typeof o == "bigint" ? ((p(), pe)[b++ >>> 0] = 1n, (p(), pe)[b++ >>> 0] = o) : ((p(), pe)[b++ >>> 0] = 0n, (p(), ae)[b++ >>> 0] = o);
8320
8322
  return e = Lo(e, 0, u, h, t), D(c), e;
8321
8323
  };
8322
8324
  function qe(e) {
8323
8325
  if (i) return V(0, 1, e);
8324
- if (S = e, !(0 < z)) {
8326
+ if (S = e, !(0 < G)) {
8325
8327
  for (var t of gt) Se(t);
8326
8328
  for (t of We) Se(t);
8327
8329
  We = [], gt = [], Je = {}, W = true;
@@ -8366,7 +8368,7 @@ async function ts(a = {}) {
8366
8368
  We.push(e);
8367
8369
  }
8368
8370
  var Fe, zs = (e, t) => {
8369
- z = 0, e = zr(e, t), 0 < z ? S = e : Fr(e);
8371
+ G = 0, e = zr(e, t), 0 < G ? S = e : Fr(e);
8370
8372
  }, Ct = [], Ut = 0, me = (e) => -9007199254740992 > e || 9007199254740992 < e ? NaN : Number(e);
8371
8373
  function Vs(e) {
8372
8374
  var t = new wr(e >>>= 0);
@@ -8718,7 +8720,7 @@ async function ts(a = {}) {
8718
8720
  }
8719
8721
  var he = (e) => {
8720
8722
  if (!W) try {
8721
- if (e(), !(0 < z)) try {
8723
+ if (e(), !(0 < G)) try {
8722
8724
  i ? Wt() && Fr(S) : br(S);
8723
8725
  } catch (t) {
8724
8726
  t instanceof wt || t == "unwind" || y(0, t);
@@ -8746,7 +8748,7 @@ async function ts(a = {}) {
8746
8748
  return (t ? Vr[t] : of[e])(...Ir);
8747
8749
  }
8748
8750
  var Ei = () => {
8749
- z = 0;
8751
+ G = 0;
8750
8752
  };
8751
8753
  function Si(e) {
8752
8754
  e >>>= 0, i ? postMessage({ Oc: "cleanupThread", ie: e }) : yn(Je[e]);
@@ -8766,7 +8768,7 @@ async function ts(a = {}) {
8766
8768
  try {
8767
8769
  return e(...n);
8768
8770
  } finally {
8769
- W || (_t.pop(), Me && Ge === 1 && _t.length === 0 && (Ge = 0, z += 1, Pt(wa), typeof Fibers < "u" && Fibers.De()));
8771
+ W || (_t.pop(), Me && Ge === 1 && _t.length === 0 && (Ge = 0, G += 1, Pt(wa), typeof Fibers < "u" && Fibers.De()));
8770
8772
  }
8771
8773
  };
8772
8774
  return jn.set(e, t), t;
@@ -8781,7 +8783,7 @@ async function ts(a = {}) {
8781
8783
  try {
8782
8784
  var c = (function() {
8783
8785
  var E = (p(), x)[Me + 8 >>> 2 >>> 0];
8784
- return E = Vn.get(E), E = jn.get(E), --z, E();
8786
+ return E = Vn.get(E), E = jn.get(E), --G, E();
8785
8787
  })();
8786
8788
  } catch (E) {
8787
8789
  c = E, u = true;
@@ -8972,7 +8974,7 @@ async function ts(a = {}) {
8972
8974
  return L(ct(e >>> 0, t >>> 0));
8973
8975
  }
8974
8976
  var ou = () => {
8975
- throw z += 1, "unwind";
8977
+ throw G += 1, "unwind";
8976
8978
  };
8977
8979
  function au() {
8978
8980
  return 4294901760;
@@ -9065,15 +9067,15 @@ async function ts(a = {}) {
9065
9067
  }
9066
9068
  (b = (p(), A)[c + 24 >>> 2 >>> 0]) && (b = { label: Ne(b + 4) }, e.defaultQueue = b), e.label = Ne(c + 4);
9067
9069
  }
9068
- z += 1, lt(t, h.requestDevice(e).then((B) => {
9069
- --z, he(() => {
9070
- ce[u >>> 0] = B.queue, ce[o >>> 0] = B, lt(n, B.lost.then((ue) => {
9070
+ G += 1, lt(t, h.requestDevice(e).then((B) => {
9071
+ --G, he(() => {
9072
+ ce[u >>> 0] = B.queue, ce[o >>> 0] = B, G += 1, lt(n, B.lost.then((ue) => {
9071
9073
  he(() => {
9072
9074
  B.onuncapturederror = () => {
9073
9075
  };
9074
9076
  var ye = P(), fe = Ce(ue.message);
9075
9077
  _r(n, yu[ue.reason], fe), D(ye);
9076
- });
9078
+ }), --G;
9077
9079
  })), B.onuncapturederror = (ue) => {
9078
9080
  var ye = 5;
9079
9081
  ue.error instanceof GPUValidationError ? ye = 2 : ue.error instanceof GPUOutOfMemoryError ? ye = 3 : ue.error instanceof GPUInternalError && (ye = 4);
@@ -9082,7 +9084,7 @@ async function ts(a = {}) {
9082
9084
  }, "adapterInfo" in B || (B.adapterInfo = h.info), kr(t, 1, o, 0);
9083
9085
  });
9084
9086
  }, (B) => {
9085
- --z, he(() => {
9087
+ --G, he(() => {
9086
9088
  var ue = P(), ye = Ce(B.message);
9087
9089
  kr(t, 3, o, ye), n && _r(n, 4, ye), D(ue);
9088
9090
  });
@@ -9125,12 +9127,12 @@ async function ts(a = {}) {
9125
9127
  function vu(e, t, n, o, u) {
9126
9128
  e >>>= 0, t = me(t), n = me(n), u >>>= 0;
9127
9129
  var c = O(e);
9128
- Re[e] = [], u == 4294967295 && (u = void 0), z += 1, lt(t, c.mapAsync(n, o >>> 0, u).then(() => {
9129
- --z, he(() => {
9130
+ Re[e] = [], u == 4294967295 && (u = void 0), G += 1, lt(t, c.mapAsync(n, o >>> 0, u).then(() => {
9131
+ --G, he(() => {
9130
9132
  Rr(t, 1, 0);
9131
9133
  });
9132
9134
  }, (h) => {
9133
- --z, he(() => {
9135
+ --G, he(() => {
9134
9136
  P();
9135
9137
  var b = Ce(h.message);
9136
9138
  Rr(t, h.name === "AbortError" ? 4 : h.name === "OperationError" ? 3 : 0, b), delete Re[e];
@@ -9159,12 +9161,12 @@ async function ts(a = {}) {
9159
9161
  return ce[n >>> 0] = u, o && (Re[n] = []), true;
9160
9162
  }
9161
9163
  function Iu(e, t, n, o) {
9162
- e >>>= 0, t = me(t), o >>>= 0, n = du(n >>> 0), e = O(e), z += 1, lt(t, e.createComputePipelineAsync(n).then((u) => {
9163
- --z, he(() => {
9164
+ e >>>= 0, t = me(t), o >>>= 0, n = du(n >>> 0), e = O(e), G += 1, lt(t, e.createComputePipelineAsync(n).then((u) => {
9165
+ --G, he(() => {
9164
9166
  ce[o >>> 0] = u, Pr(t, 1, o, 0);
9165
9167
  });
9166
9168
  }, (u) => {
9167
- --z, he(() => {
9169
+ --G, he(() => {
9168
9170
  var c = P(), h = Ce(u.message);
9169
9171
  Pr(t, u.reason === "validation" ? 3 : u.reason === "internal" ? 4 : 0, o, h), D(c);
9170
9172
  });
@@ -9179,15 +9181,15 @@ async function ts(a = {}) {
9179
9181
  (e = O(e)).onuncapturederror = null, e.destroy();
9180
9182
  };
9181
9183
  function Ou(e, t) {
9182
- t = me(t), e = O(e >>> 0), z += 1, lt(t, e.popErrorScope().then((n) => {
9183
- --z, he(() => {
9184
+ t = me(t), e = O(e >>> 0), G += 1, lt(t, e.popErrorScope().then((n) => {
9185
+ --G, he(() => {
9184
9186
  var o = 5;
9185
9187
  n ? n instanceof GPUValidationError ? o = 2 : n instanceof GPUOutOfMemoryError ? o = 3 : n instanceof GPUInternalError && (o = 4) : o = 1;
9186
9188
  var u = P(), c = n ? Ce(n.message) : 0;
9187
9189
  Nr(t, 1, o, c), D(u);
9188
9190
  });
9189
9191
  }, (n) => {
9190
- --z, he(() => {
9192
+ --G, he(() => {
9191
9193
  var o = P(), u = Ce(n.message);
9192
9194
  Nr(t, 1, 5, u), D(o);
9193
9195
  });
@@ -9198,8 +9200,8 @@ async function ts(a = {}) {
9198
9200
  var u = { featureLevel: pu[(p(), x)[n + 4 >>> 2 >>> 0]], powerPreference: mu[(p(), x)[n + 8 >>> 2 >>> 0]], forceFallbackAdapter: !!(p(), A)[n + 12 >>> 2 >>> 0] };
9199
9201
  (e = (p(), A)[n >>> 2 >>> 0]) !== 0 && (p(), u.Fe = !!(p(), A)[e + 8 >>> 2 >>> 0]);
9200
9202
  }
9201
- "gpu" in navigator ? (z += 1, lt(t, navigator.gpu.requestAdapter(u).then((c) => {
9202
- --z, he(() => {
9203
+ "gpu" in navigator ? (G += 1, lt(t, navigator.gpu.requestAdapter(u).then((c) => {
9204
+ --G, he(() => {
9203
9205
  if (c) ce[o >>> 0] = c, Et(t, 1, o, 0);
9204
9206
  else {
9205
9207
  var h = P(), b = Ce("WebGPU not available on this browser (requestAdapter returned null)");
@@ -9207,7 +9209,7 @@ async function ts(a = {}) {
9207
9209
  }
9208
9210
  });
9209
9211
  }, (c) => {
9210
- --z, he(() => {
9212
+ --G, he(() => {
9211
9213
  var h = P(), b = Ce(c.message);
9212
9214
  Et(t, 4, o, b), D(h);
9213
9215
  });
@@ -9438,7 +9440,7 @@ async function ts(a = {}) {
9438
9440
  Te(`invalid type for getValue: ${t}`);
9439
9441
  }
9440
9442
  }, r.UTF8ToString = ct, r.stringToUTF8 = Pe, r.lengthBytesUTF8 = _e;
9441
- var lo, po, Dr, Wt, xe, pt, mo, ho, yo, bo, wo, go, To, vo, Eo, So, Ao, Pr, _r, Rr, Nr, Et, kr, Io, Wr, xo, Lo, Oo, Fr, Bo, Mo, Gr, N, St, Co, D, Ft, P, Uo, $r, Do, Po, _o, zr, Ro, No, ko, Wo, Fo, Go, $o, zo, Vo, jo, Ho, Yo, qo, Jo, Xo, Qo, Zo, Ko, ea, ta, ra, na, oa, aa, sa, ia, ua, fa, ca, da, la, pa, ma, ha, ya, ba, wa, ga, Ta, ke, of = [qe, yr, En, Ln, On, Bn, Mn, Cn, Un, Dn, Pn, _n, Rn, Nn, kn, Wn, Qn, Zn, Kn, ao, so, io, uo, fo, co], Vr = { 925676: (e, t, n, o, u) => {
9443
+ var lo, po, Dr, Wt, xe, pt, mo, ho, yo, bo, wo, go, To, vo, Eo, So, Ao, Pr, _r, Rr, Nr, Et, kr, Io, Wr, xo, Lo, Oo, Fr, Bo, Mo, Gr, N, St, Co, D, Ft, P, Uo, $r, Do, Po, _o, zr, Ro, No, ko, Wo, Fo, Go, $o, zo, Vo, jo, Ho, Yo, qo, Jo, Xo, Qo, Zo, Ko, ea, ta, ra, na, oa, aa, sa, ia, ua, fa, ca, da, la, pa, ma, ha, ya, ba, wa, ga, Ta, ke, of = [qe, yr, En, Ln, On, Bn, Mn, Cn, Un, Dn, Pn, _n, Rn, Nn, kn, Wn, Qn, Zn, Kn, ao, so, io, uo, fo, co], Vr = { 937012: (e, t, n, o, u) => {
9442
9444
  if (r === void 0 || !r.Uc) return 1;
9443
9445
  if ((e = ct(Number(e >>> 0))).startsWith("./") && (e = e.substring(2)), !(e = r.Uc.get(e))) return 2;
9444
9446
  if (t = Number(t >>> 0), n = Number(n >>> 0), o = Number(o >>> 0), t + n > e.byteLength) return 3;
@@ -9458,11 +9460,11 @@ async function ts(a = {}) {
9458
9460
  } catch {
9459
9461
  return 4;
9460
9462
  }
9461
- }, 926500: (e, t, n) => {
9463
+ }, 937836: (e, t, n) => {
9462
9464
  r.Sd(e, (p(), J).subarray(t >>> 0, t + n >>> 0));
9463
- }, 926564: () => r.me(), 926606: (e) => {
9465
+ }, 937900: () => r.me(), 937942: (e) => {
9464
9466
  r.jd(e);
9465
- }, 926643: () => typeof wasmOffsetConverter < "u" };
9467
+ }, 937979: () => typeof wasmOffsetConverter < "u" };
9466
9468
  function af(e, t, n, o) {
9467
9469
  var u = P();
9468
9470
  try {
@@ -9481,12 +9483,12 @@ async function ts(a = {}) {
9481
9483
  N(1, 0);
9482
9484
  }
9483
9485
  }
9484
- function uf(e, t, n) {
9485
- var o = P();
9486
+ function uf(e) {
9487
+ var t = P();
9486
9488
  try {
9487
- _o(e, t, n);
9488
- } catch (u) {
9489
- if (D(o), u !== u + 0) throw u;
9489
+ Ro(e);
9490
+ } catch (n) {
9491
+ if (D(t), n !== n + 0) throw n;
9490
9492
  N(1, 0);
9491
9493
  }
9492
9494
  }
@@ -9499,25 +9501,16 @@ async function ts(a = {}) {
9499
9501
  N(1, 0);
9500
9502
  }
9501
9503
  }
9502
- function cf(e) {
9503
- var t = P();
9504
- try {
9505
- Ro(e);
9506
- } catch (n) {
9507
- if (D(t), n !== n + 0) throw n;
9508
- N(1, 0);
9509
- }
9510
- }
9511
- function df(e, t, n, o, u, c, h) {
9512
- var b = P();
9504
+ function cf(e, t, n) {
9505
+ var o = P();
9513
9506
  try {
9514
- return Wo(e, t, n, o, u, c, h);
9515
- } catch (E) {
9516
- if (D(b), E !== E + 0) throw E;
9507
+ _o(e, t, n);
9508
+ } catch (u) {
9509
+ if (D(o), u !== u + 0) throw u;
9517
9510
  N(1, 0);
9518
9511
  }
9519
9512
  }
9520
- function lf(e, t) {
9513
+ function df(e, t) {
9521
9514
  var n = P();
9522
9515
  try {
9523
9516
  Vo(e, t);
@@ -9526,6 +9519,15 @@ async function ts(a = {}) {
9526
9519
  N(1, 0);
9527
9520
  }
9528
9521
  }
9522
+ function lf(e, t, n, o, u, c, h) {
9523
+ var b = P();
9524
+ try {
9525
+ return Wo(e, t, n, o, u, c, h);
9526
+ } catch (E) {
9527
+ if (D(b), E !== E + 0) throw E;
9528
+ N(1, 0);
9529
+ }
9530
+ }
9529
9531
  function pf(e, t, n, o, u, c) {
9530
9532
  var h = P();
9531
9533
  try {
@@ -9955,7 +9957,7 @@ var nc;
9955
9957
  var oc;
9956
9958
  var ac;
9957
9959
  var qt;
9958
- var $;
9960
+ var z;
9959
9961
  var je = k(() => {
9960
9962
  "use strict";
9961
9963
  Yt();
@@ -10011,19 +10013,19 @@ var je = k(() => {
10011
10013
  rr = false, ds = true, H(M);
10012
10014
  });
10013
10015
  })), await Promise.race(C), S) throw new Error(`WebAssembly backend initializing failed due to timeout: ${r}ms`);
10014
- }, $ = () => {
10016
+ }, z = () => {
10015
10017
  if (nn && rn) return rn;
10016
10018
  throw new Error("WebAssembly is not initialized yet.");
10017
10019
  };
10018
10020
  });
10019
10021
  var be;
10020
10022
  var Lt;
10021
- var G;
10023
+ var $;
10022
10024
  var nr = k(() => {
10023
10025
  "use strict";
10024
10026
  je();
10025
10027
  be = (a, r) => {
10026
- let s = $(), f = s.lengthBytesUTF8(a) + 1, i = s._malloc(f);
10028
+ let s = z(), f = s.lengthBytesUTF8(a) + 1, i = s._malloc(f);
10027
10029
  return s.stringToUTF8(a, i, f), r.push(i), i;
10028
10030
  }, Lt = (a, r, s, f) => {
10029
10031
  if (typeof a == "object" && a !== null) {
@@ -10037,8 +10039,8 @@ var nr = k(() => {
10037
10039
  else if (typeof d == "boolean") f(l, d ? "1" : "0");
10038
10040
  else throw new Error(`Can't handle extra config type: ${typeof d}`);
10039
10041
  });
10040
- }, G = (a) => {
10041
- let r = $(), s = r.stackSave();
10042
+ }, $ = (a) => {
10043
+ let r = z(), s = r.stackSave();
10042
10044
  try {
10043
10045
  let f = r.PTR_SIZE, i = r.stackAlloc(2 * f);
10044
10046
  r._OrtGetLastError(i, i + f);
@@ -10055,7 +10057,7 @@ var ps = k(() => {
10055
10057
  je();
10056
10058
  nr();
10057
10059
  ls = (a) => {
10058
- let r = $(), s = 0, f = [], i = a || {};
10060
+ let r = z(), s = 0, f = [], i = a || {};
10059
10061
  try {
10060
10062
  if (a?.logSeverityLevel === void 0) i.logSeverityLevel = 2;
10061
10063
  else if (typeof a.logSeverityLevel != "number" || !Number.isInteger(a.logSeverityLevel) || a.logSeverityLevel < 0 || a.logSeverityLevel > 4) throw new Error(`log severity level is not valid: ${a.logSeverityLevel}`);
@@ -10063,9 +10065,9 @@ var ps = k(() => {
10063
10065
  else if (typeof a.logVerbosityLevel != "number" || !Number.isInteger(a.logVerbosityLevel)) throw new Error(`log verbosity level is not valid: ${a.logVerbosityLevel}`);
10064
10066
  a?.terminate === void 0 && (i.terminate = false);
10065
10067
  let d = 0;
10066
- return a?.tag !== void 0 && (d = be(a.tag, f)), s = r._OrtCreateRunOptions(i.logSeverityLevel, i.logVerbosityLevel, !!i.terminate, d), s === 0 && G("Can't create run options."), a?.extra !== void 0 && Lt(a.extra, "", /* @__PURE__ */ new WeakSet(), (l, m) => {
10068
+ return a?.tag !== void 0 && (d = be(a.tag, f)), s = r._OrtCreateRunOptions(i.logSeverityLevel, i.logVerbosityLevel, !!i.terminate, d), s === 0 && $("Can't create run options."), a?.extra !== void 0 && Lt(a.extra, "", /* @__PURE__ */ new WeakSet(), (l, m) => {
10067
10069
  let y = be(l, f), w = be(m, f);
10068
- r._OrtAddRunConfigEntry(s, y, w) !== 0 && G(`Can't set a run config entry: ${l} - ${m}.`);
10070
+ r._OrtAddRunConfigEntry(s, y, w) !== 0 && $(`Can't set a run config entry: ${l} - ${m}.`);
10069
10071
  }), [s, f];
10070
10072
  } catch (d) {
10071
10073
  throw s !== 0 && r._OrtReleaseRunOptions(s), f.forEach((l) => r._free(l)), d;
@@ -10113,7 +10115,7 @@ var hs = k(() => {
10113
10115
  r.use_ort_model_bytes_directly || (r.use_ort_model_bytes_directly = "1"), a.executionProviders && a.executionProviders.some((s) => (typeof s == "string" ? s : s.name) === "webgpu") && (a.enableMemPattern = false);
10114
10116
  }, on = (a, r, s, f) => {
10115
10117
  let i = be(r, f), d = be(s, f);
10116
- $()._OrtAddSessionConfigEntry(a, i, d) !== 0 && G(`Can't set a session config entry: ${r} - ${s}.`);
10118
+ z()._OrtAddSessionConfigEntry(a, i, d) !== 0 && $(`Can't set a session config entry: ${r} - ${s}.`);
10117
10119
  }, ot = (a, r, s, f) => {
10118
10120
  let i = be(r, f), d = be(s, f);
10119
10121
  a.push([i, d]);
@@ -10144,7 +10146,7 @@ var hs = k(() => {
10144
10146
  }
10145
10147
  S.validationMode && ot(l, "validationMode", S.validationMode, s);
10146
10148
  }
10147
- let v = $().webgpuRegisterDevice(g);
10149
+ let v = z().webgpuRegisterDevice(g);
10148
10150
  if (v) {
10149
10151
  let [S, C, R] = v;
10150
10152
  ot(l, "deviceId", S.toString(), s), ot(l, "webgpuInstance", C.toString(), s), ot(l, "webgpuDevice", R.toString(), s);
@@ -10159,13 +10161,13 @@ var hs = k(() => {
10159
10161
  }
10160
10162
  let m = be(d, s), y = l.length, w = 0, T = 0;
10161
10163
  if (y > 0) {
10162
- w = $()._malloc(y * $().PTR_SIZE), s.push(w), T = $()._malloc(y * $().PTR_SIZE), s.push(T);
10163
- for (let g = 0; g < y; g++) $().setValue(w + g * $().PTR_SIZE, l[g][0], "*"), $().setValue(T + g * $().PTR_SIZE, l[g][1], "*");
10164
+ w = z()._malloc(y * z().PTR_SIZE), s.push(w), T = z()._malloc(y * z().PTR_SIZE), s.push(T);
10165
+ for (let g = 0; g < y; g++) z().setValue(w + g * z().PTR_SIZE, l[g][0], "*"), z().setValue(T + g * z().PTR_SIZE, l[g][1], "*");
10164
10166
  }
10165
- await $()._OrtAppendExecutionProvider(a, m, w, T, y) !== 0 && G(`Can't append execution provider: ${d}.`);
10167
+ await z()._OrtAppendExecutionProvider(a, m, w, T, y) !== 0 && $(`Can't append execution provider: ${d}.`);
10166
10168
  }
10167
10169
  }, ms = async (a) => {
10168
- let r = $(), s = 0, f = [], i = a || {};
10170
+ let r = z(), s = 0, f = [], i = a || {};
10169
10171
  uc(i);
10170
10172
  try {
10171
10173
  let d = sc(i.graphOptimizationLevel ?? "all"), l = ic(i.executionMode ?? "sequential"), m = typeof i.logId == "string" ? be(i.logId, f) : 0, y = i.logSeverityLevel ?? 2;
@@ -10173,7 +10175,7 @@ var hs = k(() => {
10173
10175
  let w = i.logVerbosityLevel ?? 0;
10174
10176
  if (!Number.isInteger(w) || w < 0 || w > 4) throw new Error(`log verbosity level is not valid: ${w}`);
10175
10177
  let T = typeof i.optimizedModelFilePath == "string" ? be(i.optimizedModelFilePath, f) : 0;
10176
- if (s = r._OrtCreateSessionOptions(d, !!i.enableCpuMemArena, !!i.enableMemPattern, l, !!i.enableProfiling, 0, m, y, w, T), s === 0 && G("Can't create session options."), i.executionProviders && await fc(s, i, f), i.enableGraphCapture !== void 0) {
10178
+ if (s = r._OrtCreateSessionOptions(d, !!i.enableCpuMemArena, !!i.enableMemPattern, l, !!i.enableProfiling, 0, m, y, w, T), s === 0 && $("Can't create session options."), i.executionProviders && await fc(s, i, f), i.enableGraphCapture !== void 0) {
10177
10179
  if (typeof i.enableGraphCapture != "boolean") throw new Error(`enableGraphCapture must be a boolean value: ${i.enableGraphCapture}`);
10178
10180
  on(s, "enableGraphCapture", i.enableGraphCapture.toString(), f);
10179
10181
  }
@@ -10181,13 +10183,13 @@ var hs = k(() => {
10181
10183
  if (typeof g != "string") throw new Error(`free dimension override name must be a string: ${g}`);
10182
10184
  if (typeof v != "number" || !Number.isInteger(v) || v < 0) throw new Error(`free dimension override value must be a non-negative integer: ${v}`);
10183
10185
  let S = be(g, f);
10184
- r._OrtAddFreeDimensionOverride(s, S, v) !== 0 && G(`Can't set a free dimension override: ${g} - ${v}.`);
10186
+ r._OrtAddFreeDimensionOverride(s, S, v) !== 0 && $(`Can't set a free dimension override: ${g} - ${v}.`);
10185
10187
  }
10186
10188
  return i.extra !== void 0 && Lt(i.extra, "", /* @__PURE__ */ new WeakSet(), (g, v) => {
10187
10189
  on(s, g, v, f);
10188
10190
  }), [s, f];
10189
10191
  } catch (d) {
10190
- throw s !== 0 && r._OrtReleaseSessionOptions(s) !== 0 && G("Can't release session options."), f.forEach((l) => r._free(l)), d;
10192
+ throw s !== 0 && r._OrtReleaseSessionOptions(s) !== 0 && $("Can't release session options."), f.forEach((l) => r._free(l)), d;
10191
10193
  }
10192
10194
  };
10193
10195
  });
@@ -10757,7 +10759,7 @@ var Os = k(() => {
10757
10759
  return l ? l.push(d) : this.temporarySessionTensorIds.set(r, [d]), d;
10758
10760
  }
10759
10761
  uploadTensor(r, s) {
10760
- if (!$().shouldTransferToMLTensor) throw new Error("Trying to upload to a MLTensor while shouldTransferToMLTensor is false");
10762
+ if (!z().shouldTransferToMLTensor) throw new Error("Trying to upload to a MLTensor while shouldTransferToMLTensor is false");
10761
10763
  le("verbose", () => `[WebNN] uploadTensor {tensorId: ${r}, data: ${s.byteLength}}`), this.tensorManager.upload(r, s);
10762
10764
  }
10763
10765
  async downloadTensor(r, s) {
@@ -10863,11 +10865,11 @@ var Kr = k(() => {
10863
10865
  nr();
10864
10866
  sn();
10865
10867
  yc = (a, r) => {
10866
- $()._OrtInit(a, r) !== 0 && G("Can't initialize onnxruntime.");
10868
+ z()._OrtInit(a, r) !== 0 && $("Can't initialize onnxruntime.");
10867
10869
  }, Jt = async (a) => {
10868
10870
  yc(a.wasm.numThreads, Ot(a.logLevel));
10869
10871
  }, Xt = async (a, r) => {
10870
- $().asyncInit?.();
10872
+ z().asyncInit?.();
10871
10873
  let s = a.webgpu.adapter;
10872
10874
  if (r === "webgpu") {
10873
10875
  if (typeof navigator > "u" || !navigator.gpu) throw new Error("WebGPU is not supported in current environment");
@@ -10882,29 +10884,29 @@ var Kr = k(() => {
10882
10884
  }
10883
10885
  }
10884
10886
  if (r === "webnn" && (typeof navigator > "u" || !navigator.ml)) throw new Error("WebNN is not supported in current environment");
10885
- if (r === "webgpu" && $().webgpuInit((f) => {
10887
+ if (r === "webgpu" && z().webgpuInit((f) => {
10886
10888
  a.webgpu.device = f;
10887
10889
  }), r === "webnn") {
10888
10890
  let f = new (Os(), $t(Ls)).WebNNBackend(a);
10889
- $().webnnInit([f, () => f.reserveTensorId(), (i) => f.releaseTensorId(i), async (i, d, l, m, y) => f.ensureTensor(i, d, l, m, y), (i, d) => {
10891
+ z().webnnInit([f, () => f.reserveTensorId(), (i) => f.releaseTensorId(i), async (i, d, l, m, y) => f.ensureTensor(i, d, l, m, y), (i, d) => {
10890
10892
  f.uploadTensor(i, d);
10891
10893
  }, async (i, d) => f.downloadTensor(i, d), (i, d) => f.registerMLContext(i, d), !!a.trace]);
10892
10894
  }
10893
10895
  }, it = /* @__PURE__ */ new Map(), bc = (a) => {
10894
- let r = $(), s = r.stackSave();
10896
+ let r = z(), s = r.stackSave();
10895
10897
  try {
10896
10898
  let f = r.PTR_SIZE, i = r.stackAlloc(2 * f);
10897
- r._OrtGetInputOutputCount(a, i, i + f) !== 0 && G("Can't get session input/output count.");
10899
+ r._OrtGetInputOutputCount(a, i, i + f) !== 0 && $("Can't get session input/output count.");
10898
10900
  let l = f === 4 ? "i32" : "i64";
10899
10901
  return [Number(r.getValue(i, l)), Number(r.getValue(i + f, l))];
10900
10902
  } finally {
10901
10903
  r.stackRestore(s);
10902
10904
  }
10903
10905
  }, Bs = (a, r) => {
10904
- let s = $(), f = s.stackSave(), i = 0;
10906
+ let s = z(), f = s.stackSave(), i = 0;
10905
10907
  try {
10906
10908
  let d = s.PTR_SIZE, l = s.stackAlloc(2 * d);
10907
- s._OrtGetInputOutputMetadata(a, r, l, l + d) !== 0 && G("Can't get session input/output metadata.");
10909
+ s._OrtGetInputOutputMetadata(a, r, l, l + d) !== 0 && $("Can't get session input/output metadata.");
10908
10910
  let y = Number(s.getValue(l, "*"));
10909
10911
  i = Number(s.getValue(l + d, "*"));
10910
10912
  let w = s.HEAP32[i / 4];
@@ -10919,11 +10921,11 @@ var Kr = k(() => {
10919
10921
  s.stackRestore(f), i !== 0 && s._OrtFree(i);
10920
10922
  }
10921
10923
  }, xt = (a) => {
10922
- let r = $(), s = r._malloc(a.byteLength);
10924
+ let r = z(), s = r._malloc(a.byteLength);
10923
10925
  if (s === 0) throw new Error(`Can't create a session. failed to allocate a buffer of size ${a.byteLength}.`);
10924
10926
  return r.HEAPU8.set(a, s), [s, a.byteLength];
10925
10927
  }, Qt = async (a, r) => {
10926
- let s, f, i = $();
10928
+ let s, f, i = z();
10927
10929
  Array.isArray(a) ? [s, f] = a : a.buffer === i.HEAPU8.buffer ? [s, f] = [a.byteOffset, a.byteLength] : [s, f] = xt(a);
10928
10930
  let d = 0, l = 0, m = 0, y = [], w = [], T = [];
10929
10931
  try {
@@ -10944,17 +10946,17 @@ var Kr = k(() => {
10944
10946
  } else i.currentContext = await i.webnnCreateMLContext();
10945
10947
  break;
10946
10948
  }
10947
- d = await i._OrtCreateSession(s, f, l), i.webgpuOnCreateSession?.(d), d === 0 && G("Can't create a session."), i.jsepOnCreateSession?.(), i.currentContext && (i.webnnRegisterMLContext(d, i.currentContext), i.currentContext = void 0, i.shouldTransferToMLTensor = true);
10949
+ d = await i._OrtCreateSession(s, f, l), i.webgpuOnCreateSession?.(d), d === 0 && $("Can't create a session."), i.jsepOnCreateSession?.(), i.currentContext && (i.webnnRegisterMLContext(d, i.currentContext), i.currentContext = void 0, i.shouldTransferToMLTensor = true);
10948
10950
  let [g, v] = bc(d), S = !!r?.enableGraphCapture, C = [], R = [], H = [], U = [], M = [];
10949
10951
  for (let L = 0; L < g; L++) {
10950
10952
  let [W, oe, p] = Bs(d, L);
10951
- W === 0 && G("Can't get an input name."), w.push(W);
10953
+ W === 0 && $("Can't get an input name."), w.push(W);
10952
10954
  let ne = i.UTF8ToString(W);
10953
10955
  C.push(ne), H.push(oe === 0 ? { name: ne, isTensor: false } : { name: ne, isTensor: true, type: or(oe), shape: p });
10954
10956
  }
10955
10957
  for (let L = 0; L < v; L++) {
10956
10958
  let [W, oe, p] = Bs(d, L + g);
10957
- W === 0 && G("Can't get an output name."), T.push(W);
10959
+ W === 0 && $("Can't get an output name."), T.push(W);
10958
10960
  let ne = i.UTF8ToString(W);
10959
10961
  R.push(ne), U.push(oe === 0 ? { name: ne, isTensor: false } : { name: ne, isTensor: true, type: or(oe), shape: p });
10960
10962
  {
@@ -10973,23 +10975,23 @@ var Kr = k(() => {
10973
10975
  }
10974
10976
  }
10975
10977
  let Y = null;
10976
- return M.some((L) => L === "gpu-buffer" || L === "ml-tensor" || L === "ml-tensor-cpu-output") && (m = i._OrtCreateBinding(d), m === 0 && G("Can't create IO binding."), Y = { handle: m, outputPreferredLocations: M, outputPreferredLocationsEncoded: M.map((L) => L === "ml-tensor-cpu-output" ? "ml-tensor" : L).map((L) => an(L)) }), it.set(d, [d, w, T, Y, S, false]), [d, C, R, H, U];
10978
+ return M.some((L) => L === "gpu-buffer" || L === "ml-tensor" || L === "ml-tensor-cpu-output") && (m = i._OrtCreateBinding(d), m === 0 && $("Can't create IO binding."), Y = { handle: m, outputPreferredLocations: M, outputPreferredLocationsEncoded: M.map((L) => L === "ml-tensor-cpu-output" ? "ml-tensor" : L).map((L) => an(L)) }), it.set(d, [d, w, T, Y, S, false]), [d, C, R, H, U];
10977
10979
  } catch (g) {
10978
- throw w.forEach((v) => i._OrtFree(v)), T.forEach((v) => i._OrtFree(v)), m !== 0 && i._OrtReleaseBinding(m) !== 0 && G("Can't release IO binding."), d !== 0 && i._OrtReleaseSession(d) !== 0 && G("Can't release session."), g;
10980
+ throw w.forEach((v) => i._OrtFree(v)), T.forEach((v) => i._OrtFree(v)), m !== 0 && i._OrtReleaseBinding(m) !== 0 && $("Can't release IO binding."), d !== 0 && i._OrtReleaseSession(d) !== 0 && $("Can't release session."), g;
10979
10981
  } finally {
10980
- i._free(s), l !== 0 && i._OrtReleaseSessionOptions(l) !== 0 && G("Can't release session options."), y.forEach((g) => i._free(g)), i.unmountExternalData?.();
10982
+ i._free(s), l !== 0 && i._OrtReleaseSessionOptions(l) !== 0 && $("Can't release session options."), y.forEach((g) => i._free(g)), i.unmountExternalData?.();
10981
10983
  }
10982
10984
  }, Zt = (a) => {
10983
- let r = $(), s = it.get(a);
10985
+ let r = z(), s = it.get(a);
10984
10986
  if (!s) throw new Error(`cannot release session. invalid session id: ${a}`);
10985
10987
  let [f, i, d, l, m] = s;
10986
- l && (m && r._OrtClearBoundOutputs(l.handle) !== 0 && G("Can't clear bound outputs."), r._OrtReleaseBinding(l.handle) !== 0 && G("Can't release IO binding.")), r.jsepOnReleaseSession?.(a), r.webnnOnReleaseSession?.(a), r.webgpuOnReleaseSession?.(a), i.forEach((y) => r._OrtFree(y)), d.forEach((y) => r._OrtFree(y)), r._OrtReleaseSession(f) !== 0 && G("Can't release session."), it.delete(a);
10988
+ l && (m && r._OrtClearBoundOutputs(l.handle) !== 0 && $("Can't clear bound outputs."), r._OrtReleaseBinding(l.handle) !== 0 && $("Can't release IO binding.")), r.jsepOnReleaseSession?.(a), r.webnnOnReleaseSession?.(a), r.webgpuOnReleaseSession?.(a), i.forEach((y) => r._OrtFree(y)), d.forEach((y) => r._OrtFree(y)), r._OrtReleaseSession(f) !== 0 && $("Can't release session."), it.delete(a);
10987
10989
  }, Ms = async (a, r, s, f, i, d, l = false) => {
10988
10990
  if (!a) {
10989
10991
  r.push(0);
10990
10992
  return;
10991
10993
  }
10992
- let m = $(), y = m.PTR_SIZE, w = a[0], T = a[1], g = a[3], v = g, S, C;
10994
+ let m = z(), y = m.PTR_SIZE, w = a[0], T = a[1], g = a[3], v = g, S, C;
10993
10995
  if (w === "string" && (g === "gpu-buffer" || g === "ml-tensor")) throw new Error("String tensor is not supported on GPU.");
10994
10996
  if (l && g !== "gpu-buffer") throw new Error(`External buffer must be provided for input/output index ${d} when enableGraphCapture is true.`);
10995
10997
  if (g === "gpu-buffer") {
@@ -11033,12 +11035,12 @@ var Kr = k(() => {
11033
11035
  try {
11034
11036
  T.forEach((M, Y) => m.setValue(H + Y * y, M, y === 4 ? "i32" : "i64"));
11035
11037
  let U = m._OrtCreateTensor(He(w), S, C, H, T.length, an(v));
11036
- U === 0 && G(`Can't create tensor for input/output. session=${f}, index=${d}.`), r.push(U);
11038
+ U === 0 && $(`Can't create tensor for input/output. session=${f}, index=${d}.`), r.push(U);
11037
11039
  } finally {
11038
11040
  m.stackRestore(R);
11039
11041
  }
11040
11042
  }, Kt = async (a, r, s, f, i, d) => {
11041
- let l = $(), m = l.PTR_SIZE, y = it.get(a);
11043
+ let l = z(), m = l.PTR_SIZE, y = it.get(a);
11042
11044
  if (!y) throw new Error(`cannot run inference. invalid session id: ${a}`);
11043
11045
  let w = y[0], T = y[1], g = y[2], v = y[3], S = y[4], C = y[5], R = r.length, H = f.length, U = 0, M = [], Y = [], L = [], W = [], oe = [], p = l.stackSave(), ne = l.stackAlloc(R * m), X = l.stackAlloc(R * m), J = l.stackAlloc(H * m), Ue = l.stackAlloc(H * m);
11044
11046
  try {
@@ -11054,33 +11056,33 @@ var Kr = k(() => {
11054
11056
  $e("wasm bindInputsOutputs");
11055
11057
  for (let q = 0; q < R; q++) {
11056
11058
  let we = r[q];
11057
- await l._OrtBindInput(_, T[we], Y[q]) !== 0 && G(`Can't bind input[${q}] for session=${a}.`);
11059
+ await l._OrtBindInput(_, T[we], Y[q]) !== 0 && $(`Can't bind input[${q}] for session=${a}.`);
11058
11060
  }
11059
11061
  for (let q = 0; q < H; q++) {
11060
11062
  let we = f[q];
11061
- i[q]?.[3] ? (oe.push(L[q]), l._OrtBindOutput(_, g[we], L[q], 0) !== 0 && G(`Can't bind pre-allocated output[${q}] for session=${a}.`)) : l._OrtBindOutput(_, g[we], 0, pe[we]) !== 0 && G(`Can't bind output[${q}] to ${ae[q]} for session=${a}.`);
11063
+ i[q]?.[3] ? (oe.push(L[q]), l._OrtBindOutput(_, g[we], L[q], 0) !== 0 && $(`Can't bind pre-allocated output[${q}] for session=${a}.`)) : l._OrtBindOutput(_, g[we], 0, pe[we]) !== 0 && $(`Can't bind output[${q}] to ${ae[q]} for session=${a}.`);
11062
11064
  }
11063
11065
  ze("wasm bindInputsOutputs"), it.set(a, [w, T, g, v, S, true]);
11064
11066
  }
11065
11067
  l.jsepOnRunStart?.(w), l.webnnOnRunStart?.(w);
11066
11068
  let Q;
11067
- v ? Q = await l._OrtRunWithBinding(w, v.handle, H, J, U) : Q = await l._OrtRun(w, X, ne, R, Ue, H, J, U), Q !== 0 && G("failed to call OrtRun().");
11069
+ v ? Q = await l._OrtRunWithBinding(w, v.handle, H, J, U) : Q = await l._OrtRun(w, X, ne, R, Ue, H, J, U), Q !== 0 && $("failed to call OrtRun().");
11068
11070
  let x = [], A = [];
11069
11071
  $e("wasm ProcessOutputTensor");
11070
11072
  for (let _ = 0; _ < H; _++) {
11071
11073
  let ae = Number(l.getValue(J + _ * m, "*"));
11072
11074
  if (ae === L[_] || oe.includes(L[_])) {
11073
- x.push(i[_]), ae !== L[_] && l._OrtReleaseTensor(ae) !== 0 && G("Can't release tensor.");
11075
+ x.push(i[_]), ae !== L[_] && l._OrtReleaseTensor(ae) !== 0 && $("Can't release tensor.");
11074
11076
  continue;
11075
11077
  }
11076
11078
  let pe = l.stackSave(), q = l.stackAlloc(4 * m), we = false, re, se = 0;
11077
11079
  try {
11078
- l._OrtGetTensorData(ae, q, q + m, q + 2 * m, q + 3 * m) !== 0 && G(`Can't access output tensor data on index ${_}.`);
11080
+ l._OrtGetTensorData(ae, q, q + m, q + 2 * m, q + 3 * m) !== 0 && $(`Can't access output tensor data on index ${_}.`);
11079
11081
  let Te = m === 4 ? "i32" : "i64", Ye = Number(l.getValue(q, Te));
11080
11082
  se = l.getValue(q + m, "*");
11081
11083
  let bt = l.getValue(q + m * 2, "*"), wt = Number(l.getValue(q + m * 3, Te)), Se = [];
11082
11084
  for (let ee = 0; ee < wt; ee++) Se.push(Number(l.getValue(bt + ee * m, Te)));
11083
- l._OrtFree(bt) !== 0 && G("Can't free memory for tensor dims.");
11085
+ l._OrtFree(bt) !== 0 && $("Can't free memory for tensor dims.");
11084
11086
  let Ae = Se.reduce((ee, Z) => ee * Z, 1);
11085
11087
  re = or(Ye);
11086
11088
  let Oe = v?.outputPreferredLocations[f[_]];
@@ -11088,24 +11090,24 @@ var Kr = k(() => {
11088
11090
  if (Oe === "gpu-buffer" || Oe === "ml-tensor") throw new Error("String tensor is not supported on GPU.");
11089
11091
  let ee = [];
11090
11092
  for (let Z = 0; Z < Ae; Z++) {
11091
- let z = l.getValue(se + Z * m, "*"), V = l.getValue(se + (Z + 1) * m, "*"), qe = Z === Ae - 1 ? void 0 : V - z;
11092
- ee.push(l.UTF8ToString(z, qe));
11093
+ let G = l.getValue(se + Z * m, "*"), V = l.getValue(se + (Z + 1) * m, "*"), qe = Z === Ae - 1 ? void 0 : V - G;
11094
+ ee.push(l.UTF8ToString(G, qe));
11093
11095
  }
11094
11096
  x.push([re, Se, ee, "cpu"]);
11095
11097
  } else if (Oe === "gpu-buffer" && Ae > 0) {
11096
11098
  let ee = l.webgpuGetBuffer;
11097
11099
  if (!ee) throw new Error('preferredLocation "gpu-buffer" is not supported without using WebGPU.');
11098
- let Z = ee(se), z = mt(Ye, Ae);
11099
- if (z === void 0 || !ar(re)) throw new Error(`Unsupported data type: ${re}`);
11100
+ let Z = ee(se), G = mt(Ye, Ae);
11101
+ if (G === void 0 || !ar(re)) throw new Error(`Unsupported data type: ${re}`);
11100
11102
  we = true;
11101
11103
  {
11102
11104
  l.webgpuRegisterBuffer(Z, a, se);
11103
- let V = l.webgpuCreateDownloader(Z, z, a);
11105
+ let V = l.webgpuCreateDownloader(Z, G, a);
11104
11106
  x.push([re, Se, { gpuBuffer: Z, download: async () => {
11105
11107
  let qe = await V();
11106
11108
  return new (at(re))(qe);
11107
11109
  }, dispose: () => {
11108
- l._OrtReleaseTensor(ae) !== 0 && G("Can't release tensor.");
11110
+ l._OrtReleaseTensor(ae) !== 0 && $("Can't release tensor.");
11109
11111
  } }, "gpu-buffer"]);
11110
11112
  }
11111
11113
  } else if (Oe === "ml-tensor" && Ae > 0) {
@@ -11120,8 +11122,8 @@ var Kr = k(() => {
11120
11122
  } else if (Oe === "ml-tensor-cpu-output" && Ae > 0) {
11121
11123
  let ee = l.webnnCreateMLTensorDownloader(se, re)(), Z = x.length;
11122
11124
  we = true, A.push((async () => {
11123
- let z = [Z, await ee];
11124
- return l.webnnReleaseTensorId(se), l._OrtReleaseTensor(ae), z;
11125
+ let G = [Z, await ee];
11126
+ return l.webnnReleaseTensorId(se), l._OrtReleaseTensor(ae), G;
11125
11127
  })()), x.push([re, Se, [], "cpu"]);
11126
11128
  } else {
11127
11129
  let ee = at(re), Z = new ee(Ae);
@@ -11131,7 +11133,7 @@ var Kr = k(() => {
11131
11133
  l.stackRestore(pe), re === "string" && se && l._free(se), we || l._OrtReleaseTensor(ae);
11132
11134
  }
11133
11135
  }
11134
- v && !S && (l._OrtClearBoundOutputs(v.handle) !== 0 && G("Can't clear bound outputs."), it.set(a, [w, T, g, v, S, false]));
11136
+ v && !S && (l._OrtClearBoundOutputs(v.handle) !== 0 && $("Can't clear bound outputs."), it.set(a, [w, T, g, v, S, false]));
11135
11137
  for (let [_, ae] of await Promise.all(A)) x[_][2] = ae;
11136
11138
  return ze("wasm ProcessOutputTensor"), x;
11137
11139
  } finally {
@@ -11142,10 +11144,10 @@ var Kr = k(() => {
11142
11144
  }), Y.forEach((Q) => l._OrtReleaseTensor(Q)), L.forEach((Q) => l._OrtReleaseTensor(Q)), W.forEach((Q) => l._free(Q)), U !== 0 && l._OrtReleaseRunOptions(U), M.forEach((Q) => l._free(Q));
11143
11145
  }
11144
11146
  }, er = (a) => {
11145
- let r = $(), s = it.get(a);
11147
+ let r = z(), s = it.get(a);
11146
11148
  if (!s) throw new Error("invalid session id");
11147
11149
  let f = s[0], i = r._OrtEndProfiling(f);
11148
- i === 0 && G("Can't get an profile file name."), r._OrtFree(i);
11150
+ i === 0 && $("Can't get an profile file name."), r._OrtFree(i);
11149
11151
  }, tr = (a) => {
11150
11152
  let r = [];
11151
11153
  for (let s of a) {
@@ -11378,7 +11380,7 @@ var $s = k(() => {
11378
11380
  Ve();
11379
11381
  Ve();
11380
11382
  Ve();
11381
- var Xa = "1.25.0-dev.20260307-d626b568e0";
11383
+ var Xa = "1.25.0-dev.20260323-a99aad9d36";
11382
11384
  var Tl = Zr;
11383
11385
  {
11384
11386
  let a = ($s(), $t(Gs)).wasmBackend;
@@ -11529,10 +11531,10 @@ var tensorToDataURL = (tensor, options) => {
11529
11531
  for (let i = 0; i < height; i++) {
11530
11532
  for (let j = 0; j < width; j++) {
11531
11533
  const R = (tensor.data[rTensorPointer++] - normBias[0]) * normMean[0];
11532
- const G2 = (tensor.data[gTensorPointer++] - normBias[1]) * normMean[1];
11534
+ const G = (tensor.data[gTensorPointer++] - normBias[1]) * normMean[1];
11533
11535
  const B = (tensor.data[bTensorPointer++] - normBias[2]) * normMean[2];
11534
11536
  const A = aTensorPointer === -1 ? 255 : (tensor.data[aTensorPointer++] - normBias[3]) * normMean[3];
11535
- pixels2DContext.fillStyle = "rgba(" + R + "," + G2 + "," + B + "," + A + ")";
11537
+ pixels2DContext.fillStyle = "rgba(" + R + "," + G + "," + B + "," + A + ")";
11536
11538
  pixels2DContext.fillRect(j, i, 1, 1);
11537
11539
  }
11538
11540
  }
@@ -16497,7 +16499,9 @@ var processors_exports = {};
16497
16499
  __export(processors_exports, {
16498
16500
  ChatterboxProcessor: () => ChatterboxProcessor,
16499
16501
  Florence2Processor: () => Florence2Processor,
16502
+ Gemma3Processor: () => Gemma3Processor,
16500
16503
  Gemma3nProcessor: () => Gemma3nProcessor,
16504
+ Glm46VProcessor: () => Glm46VProcessor,
16501
16505
  GraniteSpeechProcessor: () => GraniteSpeechProcessor,
16502
16506
  GroundingDinoProcessor: () => GroundingDinoProcessor,
16503
16507
  Idefics3Processor: () => Idefics3Processor,
@@ -19011,26 +19015,29 @@ function compute_segments(mask_probs, pred_scores, pred_labels, mask_threshold,
19011
19015
  }
19012
19016
  return [segmentation, segments];
19013
19017
  }
19014
- function smart_resize(height, width, factor = 28, min_pixels = 56 * 56, max_pixels = 14 * 14 * 4 * 1280) {
19018
+ function smart_resize(height, width, factor = 28, min_pixels = 56 * 56, max_pixels = 14 * 14 * 4 * 1280, temporal_factor = 1) {
19015
19019
  if (height < factor || width < factor) {
19016
- throw new Error(`height:${height} or width:${width} must be larger than factor:${factor}`);
19017
- } else if (Math.max(height, width) / Math.min(height, width) > 200) {
19020
+ const scale = Math.max(factor / height, factor / width);
19021
+ height = Math.round(height * scale);
19022
+ width = Math.round(width * scale);
19023
+ }
19024
+ if (Math.max(height, width) / Math.min(height, width) > 200) {
19018
19025
  throw new Error(
19019
19026
  `absolute aspect ratio must be smaller than 200, got ${Math.max(height, width) / Math.min(height, width)}`
19020
19027
  );
19021
19028
  }
19022
19029
  let h_bar = Math.round(height / factor) * factor;
19023
19030
  let w_bar = Math.round(width / factor) * factor;
19024
- if (h_bar * w_bar > max_pixels) {
19025
- const beta = Math.sqrt(height * width / max_pixels);
19026
- h_bar = Math.floor(height / beta / factor) * factor;
19027
- w_bar = Math.floor(width / beta / factor) * factor;
19028
- } else if (h_bar * w_bar < min_pixels) {
19029
- const beta = Math.sqrt(min_pixels / (height * width));
19031
+ if (temporal_factor * h_bar * w_bar > max_pixels) {
19032
+ const beta = Math.sqrt(temporal_factor * height * width / max_pixels);
19033
+ h_bar = Math.max(factor, Math.floor(height / beta / factor) * factor);
19034
+ w_bar = Math.max(factor, Math.floor(width / beta / factor) * factor);
19035
+ } else if (temporal_factor * h_bar * w_bar < min_pixels) {
19036
+ const beta = Math.sqrt(min_pixels / (temporal_factor * height * width));
19030
19037
  h_bar = Math.ceil(height * beta / factor) * factor;
19031
19038
  w_bar = Math.ceil(width * beta / factor) * factor;
19032
19039
  }
19033
- return [h_bar, w_bar];
19040
+ return [w_bar, h_bar];
19034
19041
  }
19035
19042
  function post_process_panoptic_segmentation(outputs, threshold = 0.5, mask_threshold = 0.5, overlap_mask_area_threshold = 0.8, label_ids_to_fuse = null, target_sizes = null) {
19036
19043
  if (label_ids_to_fuse === null) {
@@ -19109,7 +19116,7 @@ var ImageProcessor = class extends Callable2 {
19109
19116
  this.do_pad = config.do_pad;
19110
19117
  this.min_pixels = config.min_pixels;
19111
19118
  this.max_pixels = config.max_pixels;
19112
- if (this.do_pad && !this.pad_size && this.size && this.size.width !== void 0 && this.size.height !== void 0) {
19119
+ if (this.do_pad && !this.pad_size && !this.size_divisibility && this.size && this.size.width !== void 0 && this.size.height !== void 0) {
19113
19120
  this.pad_size = this.size;
19114
19121
  }
19115
19122
  this.do_flip_channel_order = config.do_flip_channel_order ?? false;
@@ -19397,10 +19404,8 @@ var ImageProcessor = class extends Callable2 {
19397
19404
  const padded = this.pad_image(pixelData, [image.height, image.width, image.channels], this.pad_size);
19398
19405
  [pixelData, imgDims] = padded;
19399
19406
  } else if (this.size_divisibility) {
19400
- const [paddedWidth, paddedHeight] = enforce_size_divisibility(
19401
- [imgDims[1], imgDims[0]],
19402
- this.size_divisibility
19403
- );
19407
+ const paddedWidth = Math.ceil(imgDims[1] / this.size_divisibility) * this.size_divisibility;
19408
+ const paddedHeight = Math.ceil(imgDims[0] / this.size_divisibility) * this.size_divisibility;
19404
19409
  [pixelData, imgDims] = this.pad_image(pixelData, imgDims, { width: paddedWidth, height: paddedHeight });
19405
19410
  }
19406
19411
  }
@@ -19477,6 +19482,7 @@ var image_processors_exports = {};
19477
19482
  __export(image_processors_exports, {
19478
19483
  BeitFeatureExtractor: () => BeitFeatureExtractor,
19479
19484
  BitImageProcessor: () => BitImageProcessor,
19485
+ CHMv2ImageProcessor: () => CHMv2ImageProcessor,
19480
19486
  CLIPFeatureExtractor: () => CLIPFeatureExtractor,
19481
19487
  CLIPImageProcessor: () => CLIPImageProcessor,
19482
19488
  ChineseCLIPFeatureExtractor: () => ChineseCLIPFeatureExtractor,
@@ -19493,6 +19499,8 @@ __export(image_processors_exports, {
19493
19499
  DonutImageProcessor: () => DonutImageProcessor,
19494
19500
  EfficientNetImageProcessor: () => EfficientNetImageProcessor,
19495
19501
  GLPNFeatureExtractor: () => GLPNFeatureExtractor,
19502
+ Gemma3ImageProcessor: () => Gemma3ImageProcessor,
19503
+ Glm46VImageProcessor: () => Glm46VImageProcessor,
19496
19504
  GroundingDinoImageProcessor: () => GroundingDinoImageProcessor,
19497
19505
  Idefics3ImageProcessor: () => Idefics3ImageProcessor,
19498
19506
  ImageFeatureExtractor: () => ImageProcessor,
@@ -19553,6 +19561,10 @@ var BitImageProcessor = class extends ImageProcessor {
19553
19561
  var ChineseCLIPFeatureExtractor = class extends ImageProcessor {
19554
19562
  };
19555
19563
 
19564
+ // src/models/chmv2/image_processing_chmv2.js
19565
+ var CHMv2ImageProcessor = class extends ImageProcessor {
19566
+ };
19567
+
19556
19568
  // src/models/clip/image_processing_clip.js
19557
19569
  var CLIPImageProcessor = class extends ImageProcessor {
19558
19570
  };
@@ -19672,6 +19684,69 @@ var EfficientNetImageProcessor = class extends ImageProcessor {
19672
19684
  }
19673
19685
  };
19674
19686
 
19687
+ // src/models/gemma3/image_processing_gemma3.js
19688
+ var Gemma3ImageProcessor = class extends ImageProcessor {
19689
+ };
19690
+
19691
+ // src/models/qwen2_vl/image_processing_qwen2_vl.js
19692
+ var Qwen2VLImageProcessor = class extends ImageProcessor {
19693
+ constructor(config) {
19694
+ super(config);
19695
+ this.min_pixels = config.min_pixels ?? config.size?.shortest_edge;
19696
+ this.max_pixels = config.max_pixels ?? config.size?.longest_edge;
19697
+ this.patch_size = config.patch_size;
19698
+ this.merge_size = config.merge_size;
19699
+ }
19700
+ /** @type {ImageProcessor['get_resize_output_image_size']} */
19701
+ get_resize_output_image_size(image, size) {
19702
+ const factor = this.patch_size * this.merge_size;
19703
+ return smart_resize(image.height, image.width, factor, this.min_pixels, this.max_pixels);
19704
+ }
19705
+ async _call(images, ...args) {
19706
+ const { pixel_values, original_sizes, reshaped_input_sizes } = await super._call(images, ...args);
19707
+ let patches = pixel_values;
19708
+ const { temporal_patch_size, merge_size, patch_size } = this.config;
19709
+ if (patches.dims[0] === 1) {
19710
+ patches = cat(
19711
+ Array.from({ length: temporal_patch_size }, () => patches),
19712
+ 0
19713
+ );
19714
+ }
19715
+ const grid_t = patches.dims[0] / temporal_patch_size;
19716
+ const channel = patches.dims[1];
19717
+ const grid_h = Math.floor(patches.dims[2] / patch_size);
19718
+ const grid_w = Math.floor(patches.dims[3] / patch_size);
19719
+ const flatten_patches = patches.view(
19720
+ grid_t,
19721
+ temporal_patch_size,
19722
+ channel,
19723
+ Math.floor(grid_h / merge_size),
19724
+ merge_size,
19725
+ patch_size,
19726
+ Math.floor(grid_w / merge_size),
19727
+ merge_size,
19728
+ patch_size
19729
+ ).permute(0, 3, 6, 4, 7, 2, 1, 5, 8).view(grid_t * grid_h * grid_w, channel * temporal_patch_size * patch_size * patch_size);
19730
+ const image_grid_thw = new Tensor3("int64", [grid_t, grid_h, grid_w], [1, 3]);
19731
+ return {
19732
+ pixel_values: flatten_patches,
19733
+ image_grid_thw,
19734
+ original_sizes,
19735
+ reshaped_input_sizes
19736
+ };
19737
+ }
19738
+ };
19739
+
19740
+ // src/models/glm46v/image_processing_glm46v.js
19741
+ var Glm46VImageProcessor = class extends Qwen2VLImageProcessor {
19742
+ /** @type {Qwen2VLImageProcessor['get_resize_output_image_size']} */
19743
+ get_resize_output_image_size(image, size) {
19744
+ const factor = this.patch_size * this.merge_size;
19745
+ const temporal_factor = this.config.temporal_patch_size ?? 2;
19746
+ return smart_resize(image.height, image.width, factor, this.min_pixels, this.max_pixels, temporal_factor);
19747
+ }
19748
+ };
19749
+
19675
19750
  // src/models/glpn/image_processing_glpn.js
19676
19751
  var GLPNFeatureExtractor = class extends ImageProcessor {
19677
19752
  };
@@ -20065,7 +20140,7 @@ var Lfm2VlImageProcessor = class extends ImageProcessor {
20065
20140
  const img = pixel_values.unsqueeze_(0);
20066
20141
  const total_factor = this.encoder_patch_size * this.downsample_factor;
20067
20142
  const f2 = total_factor ** 2;
20068
- const [new_height, new_width] = smart_resize(
20143
+ const [new_width, new_height] = smart_resize(
20069
20144
  Math.max(total_factor, height),
20070
20145
  Math.max(total_factor, width),
20071
20146
  total_factor,
@@ -20355,55 +20430,6 @@ var PixtralImageProcessor = class extends ImageProcessor {
20355
20430
  var PvtImageProcessor = class extends ImageProcessor {
20356
20431
  };
20357
20432
 
20358
- // src/models/qwen2_vl/image_processing_qwen2_vl.js
20359
- var Qwen2VLImageProcessor = class extends ImageProcessor {
20360
- constructor(config) {
20361
- super(config);
20362
- this.min_pixels = config.min_pixels ?? config.size?.shortest_edge;
20363
- this.max_pixels = config.max_pixels ?? config.size?.longest_edge;
20364
- this.patch_size = config.patch_size;
20365
- this.merge_size = config.merge_size;
20366
- }
20367
- /** @type {ImageProcessor['get_resize_output_image_size']} */
20368
- get_resize_output_image_size(image, size) {
20369
- const factor = this.patch_size * this.merge_size;
20370
- return smart_resize(image.height, image.width, factor, this.min_pixels, this.max_pixels);
20371
- }
20372
- async _call(images, ...args) {
20373
- const { pixel_values, original_sizes, reshaped_input_sizes } = await super._call(images, ...args);
20374
- let patches = pixel_values;
20375
- const { temporal_patch_size, merge_size, patch_size } = this.config;
20376
- if (patches.dims[0] === 1) {
20377
- patches = cat(
20378
- Array.from({ length: temporal_patch_size }, () => patches),
20379
- 0
20380
- );
20381
- }
20382
- const grid_t = patches.dims[0] / temporal_patch_size;
20383
- const channel = patches.dims[1];
20384
- const grid_h = Math.floor(patches.dims[2] / patch_size);
20385
- const grid_w = Math.floor(patches.dims[3] / patch_size);
20386
- const flatten_patches = patches.view(
20387
- grid_t,
20388
- temporal_patch_size,
20389
- channel,
20390
- Math.floor(grid_h / merge_size),
20391
- merge_size,
20392
- patch_size,
20393
- Math.floor(grid_w / merge_size),
20394
- merge_size,
20395
- patch_size
20396
- ).permute(0, 3, 6, 4, 7, 2, 1, 5, 8).view(grid_t * grid_h * grid_w, channel * temporal_patch_size * patch_size * patch_size);
20397
- const image_grid_thw = new Tensor3("int64", [grid_t, grid_h, grid_w], [1, 3]);
20398
- return {
20399
- pixel_values: flatten_patches,
20400
- image_grid_thw,
20401
- original_sizes,
20402
- reshaped_input_sizes
20403
- };
20404
- }
20405
- };
20406
-
20407
20433
  // src/models/rt_detr/image_processing_rt_detr.js
20408
20434
  var RTDetrImageProcessor = class extends ImageProcessor {
20409
20435
  /** @type {typeof post_process_object_detection} */
@@ -20885,6 +20911,48 @@ var Florence2Processor = class extends Processor {
20885
20911
  }
20886
20912
  };
20887
20913
 
20914
+ // src/models/gemma3/processing_gemma3.js
20915
+ var Gemma3Processor = class extends Processor {
20916
+ static tokenizer_class = AutoTokenizer;
20917
+ static image_processor_class = AutoImageProcessor;
20918
+ static uses_processor_config = true;
20919
+ static uses_chat_template_file = true;
20920
+ constructor(config, components, chat_template) {
20921
+ super(config, components, chat_template);
20922
+ this.image_seq_length = this.config.image_seq_length;
20923
+ const { boi_token, image_token, eoi_token } = this.tokenizer.config;
20924
+ this.boi_token = boi_token;
20925
+ this.image_token = image_token;
20926
+ this.eoi_token = eoi_token;
20927
+ const image_tokens_expanded = image_token.repeat(this.image_seq_length);
20928
+ this.full_image_sequence = `
20929
+
20930
+ ${boi_token}${image_tokens_expanded}${eoi_token}
20931
+
20932
+ `;
20933
+ }
20934
+ /**
20935
+ * @param {string|string[]} text
20936
+ * @param {import('../../utils/image.js').RawImage|import('../../utils/image.js').RawImage[]} [images]
20937
+ * @param {Object} [options]
20938
+ */
20939
+ async _call(text, images = null, options = {}) {
20940
+ if (typeof text === "string") {
20941
+ text = [text];
20942
+ }
20943
+ let image_inputs;
20944
+ if (images) {
20945
+ image_inputs = await this.image_processor(images, options);
20946
+ text = text.map((prompt) => prompt.replaceAll(this.boi_token, this.full_image_sequence));
20947
+ }
20948
+ const text_inputs = this.tokenizer(text, options);
20949
+ return {
20950
+ ...text_inputs,
20951
+ ...image_inputs
20952
+ };
20953
+ }
20954
+ };
20955
+
20888
20956
  // src/models/gemma3n/processing_gemma3n.js
20889
20957
  var Gemma3nProcessor = class extends Processor {
20890
20958
  static image_processor_class = AutoImageProcessor;
@@ -20957,6 +21025,56 @@ ${boi_token}${image_tokens_expanded}${eoi_token}
20957
21025
  }
20958
21026
  };
20959
21027
 
21028
+ // src/models/qwen2_vl/processing_qwen2_vl.js
21029
+ var Qwen2VLProcessor = class extends Processor {
21030
+ static image_processor_class = AutoImageProcessor;
21031
+ static tokenizer_class = AutoTokenizer;
21032
+ static image_token = "<|image_pad|>";
21033
+ /**
21034
+ *
21035
+ * @param {string|string[]} text
21036
+ * @param {RawImage|RawImage[]} images
21037
+ * @param {...any} args
21038
+ * @returns {Promise<any>}
21039
+ */
21040
+ async _call(text, images = null, ...args) {
21041
+ if (!Array.isArray(text)) {
21042
+ text = [text];
21043
+ }
21044
+ let image_inputs, image_grid_thw;
21045
+ if (images) {
21046
+ image_inputs = await this.image_processor(images);
21047
+ image_grid_thw = image_inputs.image_grid_thw;
21048
+ }
21049
+ if (image_grid_thw) {
21050
+ let merge_length = this.image_processor.config.merge_size ** 2;
21051
+ let index = 0;
21052
+ const image_token = (
21053
+ /** @type {typeof Qwen2VLProcessor} */
21054
+ this.constructor.image_token
21055
+ );
21056
+ const image_grid_thw_list = image_grid_thw.tolist();
21057
+ text = text.map((t) => {
21058
+ while (t.includes(image_token)) {
21059
+ const prod = Number(image_grid_thw_list[index++].reduce((a, b) => a * b, 1n));
21060
+ t = t.replace(image_token, "<|placeholder|>".repeat(Math.floor(prod / merge_length)));
21061
+ }
21062
+ return t.replaceAll("<|placeholder|>", image_token);
21063
+ });
21064
+ }
21065
+ const text_inputs = this.tokenizer(text);
21066
+ return {
21067
+ ...text_inputs,
21068
+ ...image_inputs
21069
+ };
21070
+ }
21071
+ };
21072
+
21073
+ // src/models/glm46v/processing_glm46v.js
21074
+ var Glm46VProcessor = class extends Qwen2VLProcessor {
21075
+ static image_token = "<|image|>";
21076
+ };
21077
+
20960
21078
  // src/models/granite_speech/processing_granite_speech.js
20961
21079
  var GraniteSpeechProcessor = class extends Processor {
20962
21080
  static tokenizer_class = AutoTokenizer;
@@ -21687,47 +21805,6 @@ var PyAnnoteProcessor = class extends Processor {
21687
21805
  }
21688
21806
  };
21689
21807
 
21690
- // src/models/qwen2_vl/processing_qwen2_vl.js
21691
- var Qwen2VLProcessor = class extends Processor {
21692
- static image_processor_class = AutoImageProcessor;
21693
- static tokenizer_class = AutoTokenizer;
21694
- /**
21695
- *
21696
- * @param {string|string[]} text
21697
- * @param {RawImage|RawImage[]} images
21698
- * @param {...any} args
21699
- * @returns {Promise<any>}
21700
- */
21701
- async _call(text, images = null, ...args) {
21702
- if (!Array.isArray(text)) {
21703
- text = [text];
21704
- }
21705
- let image_inputs, image_grid_thw;
21706
- if (images) {
21707
- image_inputs = await this.image_processor(images);
21708
- image_grid_thw = image_inputs.image_grid_thw;
21709
- }
21710
- if (image_grid_thw) {
21711
- let merge_length = this.image_processor.config.merge_size ** 2;
21712
- let index = 0;
21713
- const image_grid_thw_list = image_grid_thw.tolist();
21714
- text = text.map((t) => {
21715
- while (t.includes("<|image_pad|>")) {
21716
- const prod = Number(image_grid_thw_list[index++].reduce((a, b) => a * b, 1n));
21717
- t = t.replace("<|image_pad|>", "<|placeholder|>".repeat(Math.floor(prod / merge_length)));
21718
- }
21719
- return t.replaceAll("<|placeholder|>", "<|image_pad|>");
21720
- });
21721
- }
21722
- const text_inputs = this.tokenizer(text);
21723
- return {
21724
- ...text_inputs,
21725
- ...image_inputs
21726
- // TODO: ...videos_inputs,
21727
- };
21728
- }
21729
- };
21730
-
21731
21808
  // src/models/qwen2_5_vl/processing_qwen2_5_vl.js
21732
21809
  var Qwen2_5_VLProcessor = class extends Qwen2VLProcessor {
21733
21810
  };
@@ -22071,6 +22148,8 @@ function getNormalizedConfig(config) {
22071
22148
  case "gemma3n":
22072
22149
  case "lfm2_vl":
22073
22150
  case "chatterbox":
22151
+ case "lighton_ocr":
22152
+ case "glm_ocr":
22074
22153
  case "mistral3":
22075
22154
  case "qwen2_5_vl":
22076
22155
  case "qwen3_vl":
@@ -22146,6 +22225,8 @@ function getNormalizedConfig(config) {
22146
22225
  mapping["dim_kv"] = "head_dim";
22147
22226
  break;
22148
22227
  case "qwen3":
22228
+ case "solar_open":
22229
+ case "glm_ocr_text":
22149
22230
  case "gemma":
22150
22231
  case "gemma2":
22151
22232
  case "vaultgemma":
@@ -22156,6 +22237,7 @@ function getNormalizedConfig(config) {
22156
22237
  case "ernie4_5":
22157
22238
  case "hunyuan_v1_dense":
22158
22239
  case "falcon_h1":
22240
+ case "nemotron_h":
22159
22241
  case "ministral":
22160
22242
  case "ministral3":
22161
22243
  mapping["num_heads"] = "num_key_value_heads";
@@ -22190,6 +22272,9 @@ function getNormalizedConfig(config) {
22190
22272
  mapping["num_attention_heads"] = "num_attention_heads";
22191
22273
  break;
22192
22274
  case "youtu":
22275
+ case "deepseek_v3":
22276
+ case "glm_moe_dsa":
22277
+ case "mistral4":
22193
22278
  mapping["num_heads"] = "num_key_value_heads";
22194
22279
  mapping["num_layers"] = "num_hidden_layers";
22195
22280
  mapping["dim_kv"] = "qk_head_dim";
@@ -22278,6 +22363,7 @@ function getCacheShapes(config, options) {
22278
22363
  if (!(config instanceof PretrainedConfig)) {
22279
22364
  config = new PretrainedConfig(config);
22280
22365
  }
22366
+ const batch_size = options?.batch_size ?? 1;
22281
22367
  if (["lfm2", "lfm2_moe"].includes(config.model_type)) {
22282
22368
  const pkv_prefix = options?.prefix ?? "past_key_values";
22283
22369
  const conv_prefix = pkv_prefix === "present" ? "present" : "past";
@@ -22287,7 +22373,6 @@ function getCacheShapes(config, options) {
22287
22373
  config
22288
22374
  );
22289
22375
  const head_dim = hidden_size / num_attention_heads;
22290
- const batch_size = options?.batch_size ?? 1;
22291
22376
  for (let i = 0; i < layer_types.length; ++i) {
22292
22377
  if (layer_types[i] === "full_attention") {
22293
22378
  for (const kv of ["key", "value"]) {
@@ -22300,31 +22385,26 @@ function getCacheShapes(config, options) {
22300
22385
  }
22301
22386
  }
22302
22387
  return cache_values;
22303
- } else if (["granitemoehybrid", "falcon_h1"].includes(config.model_type)) {
22388
+ } else if (["granitemoehybrid", "falcon_h1", "nemotron_h"].includes(config.model_type)) {
22304
22389
  const pkv_prefix = options?.prefix ?? "past_key_values";
22305
22390
  const conv_prefix = pkv_prefix === "present" ? "present" : "past";
22306
- const cache_values = {};
22307
- const {
22308
- layer_types,
22309
- num_hidden_layers,
22310
- num_attention_heads,
22311
- num_key_value_heads,
22312
- hidden_size,
22313
- mamba_d_conv,
22314
- mamba_n_heads,
22315
- mamba_d_head,
22316
- mamba_d_state,
22317
- mamba_n_groups,
22318
- mamba_expand,
22319
- mamba_d_ssm
22320
- } = (
22391
+ const c = (
22321
22392
  /** @type {any} */
22322
22393
  config
22323
22394
  );
22324
- const head_dim = hidden_size / num_attention_heads;
22325
- const batch_size = options?.batch_size ?? 1;
22326
- const conv_d_inner = (mamba_d_ssm ?? mamba_expand * hidden_size) + 2 * mamba_n_groups * mamba_d_state;
22327
- for (let i = 0; i < num_hidden_layers; ++i) {
22395
+ const layer_types = c.layer_types ?? c.layers_block_type;
22396
+ const num_layers = c.num_hidden_layers ?? layer_types?.length;
22397
+ const num_key_value_heads = c.num_key_value_heads;
22398
+ const head_dim = c.head_dim ?? c.hidden_size / c.num_attention_heads;
22399
+ const mamba_n_heads = c.mamba_n_heads ?? c.mamba_num_heads;
22400
+ const mamba_d_head = c.mamba_d_head ?? c.mamba_head_dim;
22401
+ const mamba_d_state = c.mamba_d_state ?? c.ssm_state_size;
22402
+ const mamba_n_groups = c.mamba_n_groups ?? c.n_groups;
22403
+ const mamba_d_conv = c.mamba_d_conv ?? c.conv_kernel;
22404
+ const mamba_d_ssm = c.mamba_d_ssm ?? (c.mamba_expand ? c.mamba_expand * c.hidden_size : mamba_n_heads * mamba_d_head);
22405
+ const conv_d_inner = mamba_d_ssm + 2 * mamba_n_groups * mamba_d_state;
22406
+ const cache_values = {};
22407
+ for (let i = 0; i < num_layers; ++i) {
22328
22408
  if (!layer_types || layer_types[i] === "mamba") {
22329
22409
  cache_values[`${conv_prefix}_conv.${i}`] = [batch_size, conv_d_inner, mamba_d_conv];
22330
22410
  cache_values[`${conv_prefix}_ssm.${i}`] = [batch_size, mamba_n_heads, mamba_d_head, mamba_d_state];
@@ -22358,7 +22438,6 @@ function getCacheShapes(config, options) {
22358
22438
  const key_dim = linear_key_head_dim * linear_num_key_heads;
22359
22439
  const value_dim = linear_value_head_dim * linear_num_value_heads;
22360
22440
  const final_head_dim = head_dim ?? hidden_size / num_attention_heads;
22361
- const batch_size = options?.batch_size ?? 1;
22362
22441
  for (let i = 0; i < layer_types.length; ++i) {
22363
22442
  if (layer_types[i] === "full_attention") {
22364
22443
  for (const kv of ["key", "value"]) {
@@ -24054,8 +24133,7 @@ var MODEL_TYPES = {
24054
24133
  ImageAudioTextToText: 13,
24055
24134
  Supertonic: 14,
24056
24135
  Chatterbox: 15,
24057
- MultimodalLanguageModelOnly: 16,
24058
- VoxtralRealtime: 17
24136
+ VoxtralRealtime: 16
24059
24137
  };
24060
24138
  var MODEL_TYPE_CONFIG = {
24061
24139
  [MODEL_TYPES.DecoderOnly]: {
@@ -24112,12 +24190,12 @@ var MODEL_TYPE_CONFIG = {
24112
24190
  can_generate: true,
24113
24191
  forward: image_text_to_text_forward,
24114
24192
  prepare_inputs: multimodal_text_to_text_prepare_inputs_for_generation,
24115
- sessions: (config) => {
24193
+ sessions: (config, options, textOnly) => {
24116
24194
  const s = {
24117
24195
  embed_tokens: "embed_tokens",
24118
- vision_encoder: "vision_encoder",
24119
24196
  decoder_model_merged: "decoder_model_merged"
24120
24197
  };
24198
+ if (!textOnly) s["vision_encoder"] = "vision_encoder";
24121
24199
  if (config.is_encoder_decoder) s["model"] = "encoder_model";
24122
24200
  return s;
24123
24201
  },
@@ -24139,12 +24217,17 @@ var MODEL_TYPE_CONFIG = {
24139
24217
  [MODEL_TYPES.ImageAudioTextToText]: {
24140
24218
  can_generate: true,
24141
24219
  prepare_inputs: multimodal_text_to_text_prepare_inputs_for_generation,
24142
- sessions: () => ({
24143
- embed_tokens: "embed_tokens",
24144
- audio_encoder: "audio_encoder",
24145
- vision_encoder: "vision_encoder",
24146
- decoder_model_merged: "decoder_model_merged"
24147
- }),
24220
+ sessions: (config, options, textOnly) => {
24221
+ const s = {
24222
+ embed_tokens: "embed_tokens",
24223
+ decoder_model_merged: "decoder_model_merged"
24224
+ };
24225
+ if (!textOnly) {
24226
+ s["audio_encoder"] = "audio_encoder";
24227
+ s["vision_encoder"] = "vision_encoder";
24228
+ }
24229
+ return s;
24230
+ },
24148
24231
  optional_configs: { generation_config: "generation_config.json" }
24149
24232
  },
24150
24233
  [MODEL_TYPES.Phi3V]: {
@@ -24195,14 +24278,6 @@ var MODEL_TYPE_CONFIG = {
24195
24278
  cache_sessions: { model: true },
24196
24279
  optional_configs: { generation_config: "generation_config.json" }
24197
24280
  },
24198
- [MODEL_TYPES.MultimodalLanguageModelOnly]: {
24199
- can_generate: true,
24200
- forward: image_text_to_text_forward,
24201
- prepare_inputs: multimodal_text_to_text_prepare_inputs_for_generation,
24202
- sessions: () => ({ embed_tokens: "embed_tokens", decoder_model_merged: "decoder_model_merged" }),
24203
- cache_sessions: { decoder_model_merged: true },
24204
- optional_configs: { generation_config: "generation_config.json" }
24205
- },
24206
24281
  [MODEL_TYPES.VoxtralRealtime]: {
24207
24282
  can_generate: true,
24208
24283
  prepare_inputs: decoder_prepare_inputs_for_generation,
@@ -24228,6 +24303,19 @@ function getSessionsConfig(modelType, config, options = {}) {
24228
24303
  optional_configs: typeConfig.optional_configs
24229
24304
  };
24230
24305
  }
24306
+ function resolveTypeConfig(modelName, config) {
24307
+ let modelType = MODEL_TYPE_MAPPING.get(modelName);
24308
+ let textOnly = false;
24309
+ const nativeArch = config?.architectures?.[0];
24310
+ if (nativeArch && nativeArch !== modelName && modelName?.endsWith("ForCausalLM") && nativeArch.endsWith("ForConditionalGeneration")) {
24311
+ const nativeType = MODEL_TYPE_MAPPING.get(nativeArch);
24312
+ if (nativeType !== void 0) {
24313
+ modelType = nativeType;
24314
+ textOnly = true;
24315
+ }
24316
+ }
24317
+ return { typeConfig: MODEL_TYPE_CONFIG[modelType] ?? MODEL_TYPE_CONFIG.default, textOnly, modelType };
24318
+ }
24231
24319
  var MODEL_TYPE_MAPPING = /* @__PURE__ */ new Map();
24232
24320
  var MODEL_NAME_TO_CLASS_MAPPING = /* @__PURE__ */ new Map();
24233
24321
  var MODEL_CLASS_TO_NAME_MAPPING = /* @__PURE__ */ new Map();
@@ -24247,8 +24335,7 @@ var PreTrainedModel = class extends Callable2 {
24247
24335
  this.sessions = sessions;
24248
24336
  this.configs = configs;
24249
24337
  const modelName = MODEL_CLASS_TO_NAME_MAPPING.get(this.constructor);
24250
- const modelType = MODEL_TYPE_MAPPING.get(modelName);
24251
- const typeConfig = MODEL_TYPE_CONFIG[modelType] ?? MODEL_TYPE_CONFIG.default;
24338
+ const { typeConfig } = resolveTypeConfig(modelName, config);
24252
24339
  this.can_generate = typeConfig.can_generate;
24253
24340
  this._forward = typeConfig.forward;
24254
24341
  this._prepare_inputs_for_generation = typeConfig.prepare_inputs;
@@ -24311,9 +24398,8 @@ var PreTrainedModel = class extends Callable2 {
24311
24398
  session_options
24312
24399
  };
24313
24400
  const modelName = MODEL_CLASS_TO_NAME_MAPPING.get(this);
24314
- const modelType = MODEL_TYPE_MAPPING.get(modelName);
24315
24401
  config = options.config = await AutoConfig.from_pretrained(pretrained_model_name_or_path, options);
24316
- const typeConfig = MODEL_TYPE_CONFIG[modelType] ?? MODEL_TYPE_CONFIG.default;
24402
+ const { typeConfig, textOnly, modelType } = resolveTypeConfig(modelName, config);
24317
24403
  if (modelType === void 0) {
24318
24404
  const type = modelName ?? config?.model_type;
24319
24405
  if (type !== "custom") {
@@ -24322,7 +24408,7 @@ var PreTrainedModel = class extends Callable2 {
24322
24408
  );
24323
24409
  }
24324
24410
  }
24325
- const sessions = typeConfig.sessions(config, options);
24411
+ const sessions = typeConfig.sessions(config, options, textOnly);
24326
24412
  const promises = [
24327
24413
  constructSessions(pretrained_model_name_or_path, sessions, options, typeConfig.cache_sessions)
24328
24414
  ];
@@ -24986,7 +25072,9 @@ async function generic_text_to_text_forward(self2, {
24986
25072
  "qwen3_5",
24987
25073
  "qwen3_5_text",
24988
25074
  "qwen3_5_moe",
24989
- "qwen3_5_moe_text"
25075
+ "qwen3_5_moe_text",
25076
+ "glm_ocr",
25077
+ "glm_ocr_text"
24990
25078
  ].includes(self2.config.model_type)
24991
25079
  ) {
24992
25080
  const { image_grid_thw, video_grid_thw } = kwargs;
@@ -25210,6 +25298,8 @@ __export(models_exports, {
25210
25298
  BloomForCausalLM: () => BloomForCausalLM,
25211
25299
  BloomModel: () => BloomModel,
25212
25300
  BloomPreTrainedModel: () => BloomPreTrainedModel,
25301
+ CHMv2ForDepthEstimation: () => CHMv2ForDepthEstimation,
25302
+ CHMv2PreTrainedModel: () => CHMv2PreTrainedModel,
25213
25303
  CLIPModel: () => CLIPModel,
25214
25304
  CLIPPreTrainedModel: () => CLIPPreTrainedModel,
25215
25305
  CLIPSegForImageSegmentation: () => CLIPSegForImageSegmentation,
@@ -25284,6 +25374,9 @@ __export(models_exports, {
25284
25374
  DebertaV2PreTrainedModel: () => DebertaV2PreTrainedModel,
25285
25375
  DecisionTransformerModel: () => DecisionTransformerModel,
25286
25376
  DecisionTransformerPreTrainedModel: () => DecisionTransformerPreTrainedModel,
25377
+ DeepseekV3ForCausalLM: () => DeepseekV3ForCausalLM,
25378
+ DeepseekV3Model: () => DeepseekV3Model,
25379
+ DeepseekV3PreTrainedModel: () => DeepseekV3PreTrainedModel,
25287
25380
  DeiTForImageClassification: () => DeiTForImageClassification,
25288
25381
  DeiTModel: () => DeiTModel,
25289
25382
  DeiTPreTrainedModel: () => DeiTPreTrainedModel,
@@ -25329,6 +25422,11 @@ __export(models_exports, {
25329
25422
  EsmForTokenClassification: () => EsmForTokenClassification,
25330
25423
  EsmModel: () => EsmModel,
25331
25424
  EsmPreTrainedModel: () => EsmPreTrainedModel,
25425
+ EuroBertForMaskedLM: () => EuroBertForMaskedLM,
25426
+ EuroBertForSequenceClassification: () => EuroBertForSequenceClassification,
25427
+ EuroBertForTokenClassification: () => EuroBertForTokenClassification,
25428
+ EuroBertModel: () => EuroBertModel,
25429
+ EuroBertPreTrainedModel: () => EuroBertPreTrainedModel,
25332
25430
  ExaoneForCausalLM: () => ExaoneForCausalLM,
25333
25431
  ExaoneModel: () => ExaoneModel,
25334
25432
  ExaonePreTrainedModel: () => ExaonePreTrainedModel,
@@ -25365,6 +25463,7 @@ __export(models_exports, {
25365
25463
  Gemma2Model: () => Gemma2Model,
25366
25464
  Gemma2PreTrainedModel: () => Gemma2PreTrainedModel,
25367
25465
  Gemma3ForCausalLM: () => Gemma3ForCausalLM,
25466
+ Gemma3ForConditionalGeneration: () => Gemma3ForConditionalGeneration,
25368
25467
  Gemma3Model: () => Gemma3Model,
25369
25468
  Gemma3PreTrainedModel: () => Gemma3PreTrainedModel,
25370
25469
  Gemma3nForCausalLM: () => Gemma3nForCausalLM,
@@ -25375,6 +25474,10 @@ __export(models_exports, {
25375
25474
  GemmaPreTrainedModel: () => GemmaPreTrainedModel,
25376
25475
  GlmForCausalLM: () => GlmForCausalLM,
25377
25476
  GlmModel: () => GlmModel,
25477
+ GlmMoeDsaForCausalLM: () => GlmMoeDsaForCausalLM,
25478
+ GlmMoeDsaModel: () => GlmMoeDsaModel,
25479
+ GlmMoeDsaPreTrainedModel: () => GlmMoeDsaPreTrainedModel,
25480
+ GlmOcrForConditionalGeneration: () => GlmOcrForConditionalGeneration,
25378
25481
  GlmPreTrainedModel: () => GlmPreTrainedModel,
25379
25482
  GptOssForCausalLM: () => GptOssForCausalLM,
25380
25483
  GptOssModel: () => GptOssModel,
@@ -25421,6 +25524,7 @@ __export(models_exports, {
25421
25524
  Lfm2MoePreTrainedModel: () => Lfm2MoePreTrainedModel,
25422
25525
  Lfm2PreTrainedModel: () => Lfm2PreTrainedModel,
25423
25526
  Lfm2VlForConditionalGeneration: () => Lfm2VlForConditionalGeneration,
25527
+ LightOnOcrForConditionalGeneration: () => LightOnOcrForConditionalGeneration,
25424
25528
  LiteWhisperForConditionalGeneration: () => LiteWhisperForConditionalGeneration,
25425
25529
  Llama4ForCausalLM: () => Llama4ForCausalLM,
25426
25530
  Llama4PreTrainedModel: () => Llama4PreTrainedModel,
@@ -25470,6 +25574,9 @@ __export(models_exports, {
25470
25574
  MimiEncoderOutput: () => MimiEncoderOutput,
25471
25575
  MimiModel: () => MimiModel,
25472
25576
  MimiPreTrainedModel: () => MimiPreTrainedModel,
25577
+ Mistral4ForCausalLM: () => Mistral4ForCausalLM,
25578
+ Mistral4Model: () => Mistral4Model,
25579
+ Mistral4PreTrainedModel: () => Mistral4PreTrainedModel,
25473
25580
  MistralForCausalLM: () => MistralForCausalLM,
25474
25581
  MistralModel: () => MistralModel,
25475
25582
  MistralPreTrainedModel: () => MistralPreTrainedModel,
@@ -25527,6 +25634,9 @@ __export(models_exports, {
25527
25634
  NanoChatForCausalLM: () => NanoChatForCausalLM,
25528
25635
  NanoChatModel: () => NanoChatModel,
25529
25636
  NanoChatPreTrainedModel: () => NanoChatPreTrainedModel,
25637
+ NemotronHForCausalLM: () => NemotronHForCausalLM,
25638
+ NemotronHModel: () => NemotronHModel,
25639
+ NemotronHPreTrainedModel: () => NemotronHPreTrainedModel,
25530
25640
  NeoBertForMaskedLM: () => NeoBertForMaskedLM,
25531
25641
  NeoBertForQuestionAnswering: () => NeoBertForQuestionAnswering,
25532
25642
  NeoBertForSequenceClassification: () => NeoBertForSequenceClassification,
@@ -25664,6 +25774,9 @@ __export(models_exports, {
25664
25774
  SnacEncoderModel: () => SnacEncoderModel,
25665
25775
  SnacModel: () => SnacModel,
25666
25776
  SnacPreTrainedModel: () => SnacPreTrainedModel,
25777
+ SolarOpenForCausalLM: () => SolarOpenForCausalLM,
25778
+ SolarOpenModel: () => SolarOpenModel,
25779
+ SolarOpenPreTrainedModel: () => SolarOpenPreTrainedModel,
25667
25780
  SpeechT5ForSpeechToText: () => SpeechT5ForSpeechToText,
25668
25781
  SpeechT5ForTextToSpeech: () => SpeechT5ForTextToSpeech,
25669
25782
  SpeechT5HifiGan: () => SpeechT5HifiGan,
@@ -25838,7 +25951,7 @@ var ArceeModel = class extends ArceePreTrainedModel {
25838
25951
  var ArceeForCausalLM = class extends ArceePreTrainedModel {
25839
25952
  };
25840
25953
 
25841
- // src/models/ast/modeling_ast.js
25954
+ // src/models/audio_spectrogram_transformer/modeling_audio_spectrogram_transformer.js
25842
25955
  var ASTPreTrainedModel = class extends PreTrainedModel {
25843
25956
  };
25844
25957
  var ASTModel = class extends ASTPreTrainedModel {
@@ -26173,6 +26286,12 @@ var ChineseCLIPPreTrainedModel = class extends PreTrainedModel {
26173
26286
  var ChineseCLIPModel = class extends ChineseCLIPPreTrainedModel {
26174
26287
  };
26175
26288
 
26289
+ // src/models/chmv2/modeling_chmv2.js
26290
+ var CHMv2PreTrainedModel = class extends PreTrainedModel {
26291
+ };
26292
+ var CHMv2ForDepthEstimation = class extends CHMv2PreTrainedModel {
26293
+ };
26294
+
26176
26295
  // src/models/clap/modeling_clap.js
26177
26296
  var ClapPreTrainedModel = class extends PreTrainedModel {
26178
26297
  };
@@ -26511,6 +26630,14 @@ var DebertaForQuestionAnswering = class extends DebertaPreTrainedModel {
26511
26630
  }
26512
26631
  };
26513
26632
 
26633
+ // src/models/deepseek_v3/modeling_deepseek_v3.js
26634
+ var DeepseekV3PreTrainedModel = class extends PreTrainedModel {
26635
+ };
26636
+ var DeepseekV3Model = class extends DeepseekV3PreTrainedModel {
26637
+ };
26638
+ var DeepseekV3ForCausalLM = class extends DeepseekV3PreTrainedModel {
26639
+ };
26640
+
26514
26641
  // src/models/deberta_v2/modeling_deberta_v2.js
26515
26642
  var DebertaV2PreTrainedModel = class extends PreTrainedModel {
26516
26643
  };
@@ -26859,6 +26986,45 @@ var EsmForTokenClassification = class extends EsmPreTrainedModel {
26859
26986
  }
26860
26987
  };
26861
26988
 
26989
+ // src/models/eurobert/modeling_eurobert.js
26990
+ var EuroBertPreTrainedModel = class extends PreTrainedModel {
26991
+ };
26992
+ var EuroBertModel = class extends EuroBertPreTrainedModel {
26993
+ };
26994
+ var EuroBertForMaskedLM = class extends EuroBertPreTrainedModel {
26995
+ /**
26996
+ * Calls the model on new inputs.
26997
+ *
26998
+ * @param {Object} model_inputs The inputs to the model.
26999
+ * @returns {Promise<MaskedLMOutput>} An object containing the model's output logits for masked language modeling.
27000
+ */
27001
+ async _call(model_inputs) {
27002
+ return new MaskedLMOutput(await super._call(model_inputs));
27003
+ }
27004
+ };
27005
+ var EuroBertForSequenceClassification = class extends EuroBertPreTrainedModel {
27006
+ /**
27007
+ * Calls the model on new inputs.
27008
+ *
27009
+ * @param {Object} model_inputs The inputs to the model.
27010
+ * @returns {Promise<SequenceClassifierOutput>} An object containing the model's output logits for sequence classification.
27011
+ */
27012
+ async _call(model_inputs) {
27013
+ return new SequenceClassifierOutput(await super._call(model_inputs));
27014
+ }
27015
+ };
27016
+ var EuroBertForTokenClassification = class extends EuroBertPreTrainedModel {
27017
+ /**
27018
+ * Calls the model on new inputs.
27019
+ *
27020
+ * @param {Object} model_inputs The inputs to the model.
27021
+ * @returns {Promise<TokenClassifierOutput>} An object containing the model's output logits for token classification.
27022
+ */
27023
+ async _call(model_inputs) {
27024
+ return new TokenClassifierOutput(await super._call(model_inputs));
27025
+ }
27026
+ };
27027
+
26862
27028
  // src/models/exaone/modeling_exaone.js
26863
27029
  var ExaonePreTrainedModel = class extends PreTrainedModel {
26864
27030
  };
@@ -27016,12 +27182,35 @@ var Gemma2Model = class extends Gemma2PreTrainedModel {
27016
27182
  var Gemma2ForCausalLM = class extends Gemma2PreTrainedModel {
27017
27183
  };
27018
27184
 
27185
+ // src/models/llava/modeling_llava.js
27186
+ var LlavaPreTrainedModel = class extends PreTrainedModel {
27187
+ forward_params = ["input_ids", "attention_mask", "pixel_values", "position_ids", "past_key_values"];
27188
+ };
27189
+ var LlavaForConditionalGeneration = class extends LlavaPreTrainedModel {
27190
+ _merge_input_ids_with_image_features(kwargs) {
27191
+ const vision_hidden_size = kwargs.image_features.dims.at(-1);
27192
+ const reshaped_image_hidden_states = kwargs.image_features.view(-1, vision_hidden_size);
27193
+ return default_merge_input_ids_with_image_features({
27194
+ // @ts-ignore
27195
+ image_token_id: this.config.image_token_index ?? this.config.image_token_id,
27196
+ ...kwargs,
27197
+ image_features: reshaped_image_hidden_states
27198
+ });
27199
+ }
27200
+ };
27201
+ var Moondream1ForConditionalGeneration = class extends LlavaForConditionalGeneration {
27202
+ };
27203
+ var LlavaQwen2ForCausalLM = class extends LlavaForConditionalGeneration {
27204
+ };
27205
+
27019
27206
  // src/models/gemma3/modeling_gemma3.js
27020
27207
  var Gemma3PreTrainedModel = class extends PreTrainedModel {
27021
27208
  };
27022
27209
  var Gemma3Model = class extends Gemma3PreTrainedModel {
27023
27210
  };
27024
- var Gemma3ForCausalLM = class extends Gemma3PreTrainedModel {
27211
+ var Gemma3ForConditionalGeneration = class extends LlavaForConditionalGeneration {
27212
+ };
27213
+ var Gemma3ForCausalLM = class extends Gemma3ForConditionalGeneration {
27025
27214
  };
27026
27215
 
27027
27216
  // src/models/gemma3n/modeling_gemma3n.js
@@ -27134,6 +27323,382 @@ var GlmModel = class extends GlmPreTrainedModel {
27134
27323
  var GlmForCausalLM = class extends GlmPreTrainedModel {
27135
27324
  };
27136
27325
 
27326
+ // src/models/glm_moe_dsa/modeling_glm_moe_dsa.js
27327
+ var GlmMoeDsaPreTrainedModel = class extends PreTrainedModel {
27328
+ };
27329
+ var GlmMoeDsaModel = class extends GlmMoeDsaPreTrainedModel {
27330
+ };
27331
+ var GlmMoeDsaForCausalLM = class extends GlmMoeDsaPreTrainedModel {
27332
+ };
27333
+
27334
+ // src/models/qwen2_vl/modeling_qwen2_vl.js
27335
+ var Qwen2VLPreTrainedModel = class extends PreTrainedModel {
27336
+ forward_params = [
27337
+ // Text inputs
27338
+ "input_ids",
27339
+ "attention_mask",
27340
+ "position_ids",
27341
+ "past_key_values",
27342
+ // Vision inputs
27343
+ "pixel_values",
27344
+ "image_grid_thw"
27345
+ ];
27346
+ };
27347
+ var Qwen2VLForConditionalGeneration = class extends Qwen2VLPreTrainedModel {
27348
+ // NOTE: This is used as the base class for all Qwen VL models and their CausalLM variants.
27349
+ // CausalLM variants (e.g., Qwen2VLForCausalLM) extend this class but load only
27350
+ // embed_tokens + decoder_model_merged (no vision_encoder) via MultimodalLanguageModelOnly type.
27351
+ image_grid_thw_name = "grid_thw";
27352
+ /**
27353
+ * Compute text-only 3D rope position IDs (all 3 dims get the same 1D positions).
27354
+ * @param {Tensor} input_ids
27355
+ * @param {Tensor} attention_mask
27356
+ * @returns {[Tensor, Tensor]} [position_ids, mrope_position_deltas]
27357
+ */
27358
+ _get_text_only_rope_index(input_ids, attention_mask) {
27359
+ if (attention_mask) {
27360
+ const { data, dims } = cumsum_masked_fill(attention_mask);
27361
+ const position_ids = BigInt64Array.from({ length: 3 * data.length }, (_, i) => data[i % data.length]);
27362
+ const mrope_position_deltas = Array.from(
27363
+ { length: dims[0] },
27364
+ (_, i) => max(data.subarray(dims[1] * i, dims[1] * (i + 1)))[0] + 1n + BigInt(dims[1])
27365
+ );
27366
+ return [
27367
+ new Tensor3("int64", position_ids, [3, ...dims]),
27368
+ new Tensor3("int64", mrope_position_deltas, [mrope_position_deltas.length, 1])
27369
+ ];
27370
+ } else {
27371
+ const [batch_size, seq_length] = input_ids.dims;
27372
+ const position_ids = BigInt64Array.from(
27373
+ { length: 3 * batch_size * seq_length },
27374
+ (_, i) => BigInt(Math.floor(i % seq_length / batch_size))
27375
+ );
27376
+ return [new Tensor3("int64", position_ids, [3, ...input_ids.dims]), zeros([batch_size, 1])];
27377
+ }
27378
+ }
27379
+ /**
27380
+ * Reorder per-segment position ID lists from [seg1[t,h,w], seg2[t,h,w], ...] into
27381
+ * global [all_t, all_h, all_w] order, then write back into the position_ids array
27382
+ * respecting attention mask.
27383
+ * @param {number[][]} llm_pos_ids_list List of per-segment position arrays, each of length 3*seg_len
27384
+ * @param {number[]} attn_mask Attention mask for this batch element
27385
+ * @param {number[][][]} position_ids_list [3][batch][seq] output array to write into
27386
+ * @param {number} batch_idx Current batch index
27387
+ * @returns {number[]} Flat reordered positions of length total_len
27388
+ */
27389
+ _reorder_and_write_positions(llm_pos_ids_list, attn_mask, position_ids_list, batch_idx) {
27390
+ const total_len = llm_pos_ids_list.reduce((acc, x) => acc + x.length, 0);
27391
+ const llm_positions = new Array(total_len);
27392
+ let index = 0;
27393
+ for (let x = 0; x < 3; ++x) {
27394
+ for (const val of llm_pos_ids_list) {
27395
+ const seg_len = val.length / 3;
27396
+ for (let z2 = x * seg_len; z2 < (x + 1) * seg_len; ++z2) {
27397
+ llm_positions[index++] = val[z2];
27398
+ }
27399
+ }
27400
+ }
27401
+ let count2 = 0;
27402
+ for (let y = 0; y < attn_mask.length; ++y) {
27403
+ if (attn_mask[y] == 1) {
27404
+ for (let x = 0; x < 3; ++x) {
27405
+ position_ids_list[x][batch_idx][y] = llm_positions[x * total_len / 3 + count2];
27406
+ }
27407
+ ++count2;
27408
+ }
27409
+ }
27410
+ return llm_positions;
27411
+ }
27412
+ /**
27413
+ * Build per-batch position ID segments for multimodal rope.
27414
+ * Override this in subclasses to change how vision/text segments are identified and positioned.
27415
+ * @param {object} params
27416
+ * @param {any[]} params.filtered_ids - attention-masked token IDs for this batch element
27417
+ * @param {any[][]} params.image_grid_thw_list - all image grid dimensions
27418
+ * @param {any[][]} params.video_grid_thw_list - all video grid dimensions
27419
+ * @param {number} params.spatial_merge_size
27420
+ * @param {{image_index: number, video_index: number}} params.state - mutable counters shared across batches
27421
+ * @returns {number[][]} llm_pos_ids_list - segments of [t..., h..., w...] positions
27422
+ */
27423
+ _get_multimodal_rope_positions({
27424
+ filtered_ids,
27425
+ image_grid_thw_list,
27426
+ video_grid_thw_list,
27427
+ spatial_merge_size,
27428
+ state
27429
+ }) {
27430
+ const { image_token_id, video_token_id, vision_start_token_id } = this.config;
27431
+ const ids = filtered_ids;
27432
+ const vision_start_indices = ids.reduce((acc, x, idx) => {
27433
+ if (x == vision_start_token_id) acc.push(idx);
27434
+ return acc;
27435
+ }, []);
27436
+ const vision_tokens = vision_start_indices.map((x) => ids[x + 1]);
27437
+ const image_nums = vision_tokens.filter((x) => x == image_token_id).length;
27438
+ const video_nums = vision_tokens.filter((x) => x == video_token_id).length;
27439
+ const llm_pos_ids_list = [];
27440
+ let st2 = 0;
27441
+ let remain_images = image_nums;
27442
+ let remain_videos = video_nums;
27443
+ for (let j = 0; j < vision_tokens.length; ++j) {
27444
+ const next_image_token = ids.findIndex((x, i) => i > st2 && x == image_token_id);
27445
+ const next_video_token = ids.findIndex((x, i) => i > st2 && x == video_token_id);
27446
+ const ed_image = remain_images > 0 && next_image_token !== -1 ? next_image_token : ids.length + 1;
27447
+ const ed_video = remain_videos > 0 && next_video_token !== -1 ? next_video_token : ids.length + 1;
27448
+ let ed;
27449
+ let t, h, w;
27450
+ if (ed_image < ed_video) {
27451
+ [t, h, w] = image_grid_thw_list[state.image_index];
27452
+ ++state.image_index;
27453
+ --remain_images;
27454
+ ed = ed_image;
27455
+ } else {
27456
+ [t, h, w] = video_grid_thw_list[state.video_index];
27457
+ ++state.video_index;
27458
+ --remain_videos;
27459
+ ed = ed_video;
27460
+ }
27461
+ const [llm_grid_t, llm_grid_h, llm_grid_w] = [
27462
+ Number(t),
27463
+ Math.floor(Number(h) / spatial_merge_size),
27464
+ Math.floor(Number(w) / spatial_merge_size)
27465
+ ];
27466
+ const text_len = ed - st2;
27467
+ const st_idx = llm_pos_ids_list.length > 0 ? max(llm_pos_ids_list.at(-1))[0] + 1 : 0;
27468
+ llm_pos_ids_list.push(Array.from({ length: 3 * text_len }, (_, i) => st_idx + i % text_len));
27469
+ const offset = text_len + st_idx;
27470
+ const grid_size = llm_grid_t * llm_grid_h * llm_grid_w;
27471
+ const t_index = Array.from(
27472
+ { length: grid_size },
27473
+ (_, i) => offset + Math.floor(i / (llm_grid_h * llm_grid_w))
27474
+ );
27475
+ const h_index = Array.from(
27476
+ { length: grid_size },
27477
+ (_, i) => offset + Math.floor(i / llm_grid_w) % llm_grid_h
27478
+ );
27479
+ const w_index = Array.from({ length: grid_size }, (_, i) => offset + i % llm_grid_w);
27480
+ llm_pos_ids_list.push([t_index, h_index, w_index].flat());
27481
+ st2 = ed + grid_size;
27482
+ }
27483
+ if (st2 < ids.length) {
27484
+ const st_idx = llm_pos_ids_list.length > 0 ? max(llm_pos_ids_list.at(-1))[0] + 1 : 0;
27485
+ const text_len = ids.length - st2;
27486
+ llm_pos_ids_list.push(Array.from({ length: 3 * text_len }, (_, i) => st_idx + i % text_len));
27487
+ }
27488
+ return llm_pos_ids_list;
27489
+ }
27490
+ /**
27491
+ * Calculate the 3D rope index based on image and video's temporal, height and width in LLM.
27492
+ *
27493
+ * Explanation:
27494
+ * Each embedding sequence contains vision embedding and text embedding or just contains text embedding.
27495
+ *
27496
+ * For pure text embedding sequence, the rotary position embedding has no difference with mordern LLMs.
27497
+ * Examples:
27498
+ * input_ids: [T T T T T], here T is for text.
27499
+ * temporal position_ids: [0, 1, 2, 3, 4]
27500
+ * height position_ids: [0, 1, 2, 3, 4]
27501
+ * width position_ids: [0, 1, 2, 3, 4]
27502
+ *
27503
+ * For vision and text embedding sequence, we calculate 3D rotary position embedding for vision part
27504
+ * and 1D rotary position embeddin for text part.
27505
+ * Examples:
27506
+ * Assume we have a video input with 3 temporal patches, 2 height patches and 2 width patches.
27507
+ * input_ids: [V V V V V V V V V V V V T T T T T], here V is for vision.
27508
+ * vision temporal position_ids: [0, 0, 0, 0, 1, 1, 1, 1, 2, 2, 2, 2]
27509
+ * vision height position_ids: [0, 0, 1, 1, 0, 0, 1, 1, 0, 0, 1, 1]
27510
+ * vision width position_ids: [0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1]
27511
+ * text temporal position_ids: [3, 4, 5, 6, 7]
27512
+ * text height position_ids: [3, 4, 5, 6, 7]
27513
+ * text width position_ids: [3, 4, 5, 6, 7]
27514
+ * Here we calculate the text start position_ids as the max vision position_ids plus 1.
27515
+ *
27516
+ * @param {Tensor} input_ids Indices of input sequence tokens in the vocabulary. Tensor of shape `(batch_size, sequence_length)`.
27517
+ * @param {Tensor} image_grid_thw (Optional) The temporal, height and width of feature shape of each image in LLM. Tensor of shape `(num_images, 3)`.
27518
+ * @param {Tensor} video_grid_thw (Optional) The temporal, height and width of feature shape of each video in LLM. Tensor of shape `(num_videos, 3)`.
27519
+ * @param {Tensor} attention_mask (Optional) Mask to avoid performing attention on padding token indices. Tensor of shape `(batch_size, sequence_length)`.
27520
+ * @returns {[Tensor, Tensor]} [position_ids, mrope_position_deltas]
27521
+ */
27522
+ get_rope_index(input_ids, image_grid_thw, video_grid_thw, attention_mask) {
27523
+ const { vision_config } = this.config;
27524
+ const spatial_merge_size = vision_config.spatial_merge_size ?? 2;
27525
+ if (image_grid_thw || video_grid_thw) {
27526
+ const total_input_ids = input_ids.tolist();
27527
+ if (!attention_mask) {
27528
+ attention_mask = ones_like(input_ids);
27529
+ }
27530
+ const attention_mask_list = attention_mask.tolist();
27531
+ const position_ids_list = Array.from(
27532
+ { length: 3 },
27533
+ () => Array.from({ length: input_ids.dims[0] }, () => Array.from({ length: input_ids.dims[1] }, () => 0))
27534
+ );
27535
+ const image_grid_thw_list = image_grid_thw ? image_grid_thw.tolist() : [];
27536
+ const video_grid_thw_list = video_grid_thw ? video_grid_thw.tolist() : [];
27537
+ const state = { image_index: 0, video_index: 0 };
27538
+ const mrope_position_deltas = [];
27539
+ for (let i = 0; i < total_input_ids.length; ++i) {
27540
+ const filtered_ids = total_input_ids[i].filter((_, j) => attention_mask_list[i][j] == 1);
27541
+ const llm_pos_ids_list = this._get_multimodal_rope_positions({
27542
+ filtered_ids,
27543
+ image_grid_thw_list,
27544
+ video_grid_thw_list,
27545
+ spatial_merge_size,
27546
+ state
27547
+ });
27548
+ const llm_positions = this._reorder_and_write_positions(
27549
+ llm_pos_ids_list,
27550
+ attention_mask_list[i],
27551
+ position_ids_list,
27552
+ i
27553
+ );
27554
+ mrope_position_deltas.push(max(llm_positions)[0] + 1 - total_input_ids[i].length);
27555
+ }
27556
+ return [
27557
+ new Tensor3("int64", position_ids_list.flat(Infinity), [3, input_ids.dims[0], input_ids.dims[1]]),
27558
+ new Tensor3("int64", mrope_position_deltas, [mrope_position_deltas.length, 1])
27559
+ ];
27560
+ } else {
27561
+ return this._get_text_only_rope_index(input_ids, attention_mask);
27562
+ }
27563
+ }
27564
+ async encode_image({ pixel_values, image_grid_thw }) {
27565
+ const features = (await sessionRun(this.sessions["vision_encoder"], {
27566
+ pixel_values,
27567
+ [this.image_grid_thw_name]: image_grid_thw
27568
+ })).image_features;
27569
+ return features;
27570
+ }
27571
+ _merge_input_ids_with_image_features(kwargs) {
27572
+ return default_merge_input_ids_with_image_features({
27573
+ // @ts-ignore
27574
+ image_token_id: this.config.image_token_id,
27575
+ ...kwargs
27576
+ });
27577
+ }
27578
+ prepare_inputs_for_generation(input_ids, model_inputs, generation_config) {
27579
+ if (!model_inputs.attention_mask || model_inputs.position_ids) {
27580
+ return model_inputs;
27581
+ }
27582
+ const session = this.sessions["decoder_model_merged"] ?? this.sessions["model"];
27583
+ if (!session.inputNames.includes("position_ids")) {
27584
+ return model_inputs;
27585
+ }
27586
+ if (!model_inputs.past_key_values) {
27587
+ [model_inputs.position_ids, model_inputs.rope_deltas] = this.get_rope_index(
27588
+ model_inputs.input_ids,
27589
+ model_inputs.image_grid_thw,
27590
+ model_inputs.video_grid_thw,
27591
+ model_inputs.attention_mask
27592
+ );
27593
+ } else {
27594
+ model_inputs.pixel_values = null;
27595
+ const past_length = model_inputs.past_key_values.get_seq_length();
27596
+ if (past_length < model_inputs.input_ids.dims[1]) {
27597
+ const [full_position_ids, rope_deltas] = this.get_rope_index(
27598
+ model_inputs.input_ids,
27599
+ model_inputs.image_grid_thw,
27600
+ model_inputs.video_grid_thw,
27601
+ model_inputs.attention_mask
27602
+ );
27603
+ model_inputs.rope_deltas = rope_deltas;
27604
+ model_inputs.position_ids = full_position_ids.slice(null, null, [past_length, null]);
27605
+ model_inputs.input_ids = model_inputs.input_ids.slice(null, [past_length, null]);
27606
+ } else {
27607
+ if (!model_inputs.rope_deltas) {
27608
+ [, model_inputs.rope_deltas] = this.get_rope_index(
27609
+ model_inputs.input_ids,
27610
+ model_inputs.image_grid_thw,
27611
+ model_inputs.video_grid_thw,
27612
+ model_inputs.attention_mask
27613
+ );
27614
+ }
27615
+ const delta = BigInt(past_length);
27616
+ const rope_deltas_list = model_inputs.rope_deltas.map((x) => delta + x);
27617
+ model_inputs.position_ids = stack([rope_deltas_list, rope_deltas_list, rope_deltas_list], 0);
27618
+ }
27619
+ }
27620
+ return model_inputs;
27621
+ }
27622
+ };
27623
+ var Qwen2VLForCausalLM = class extends Qwen2VLForConditionalGeneration {
27624
+ };
27625
+
27626
+ // src/models/qwen2_5_vl/modeling_qwen2_5_vl.js
27627
+ var Qwen2_5_VLForConditionalGeneration = class extends Qwen2VLForConditionalGeneration {
27628
+ image_grid_thw_name = "image_grid_thw";
27629
+ };
27630
+ var Qwen2_5_VLForCausalLM = class extends Qwen2VLForCausalLM {
27631
+ image_grid_thw_name = "image_grid_thw";
27632
+ };
27633
+
27634
+ // src/models/glm_ocr/modeling_glm_ocr.js
27635
+ var GlmOcrForConditionalGeneration = class extends Qwen2_5_VLForConditionalGeneration {
27636
+ /**
27637
+ * Compute 3D positional indices for vision tokens.
27638
+ * Temporal is constant, height is repeat-interleaved, width tiles.
27639
+ * @param {number} start_position
27640
+ * @param {number[]} grid_thw [T, H, W]
27641
+ * @param {number} temp_merge_size
27642
+ * @param {number} spatial_merge_size
27643
+ * @returns {number[]} Flat array of length 3 * seq_len: [temporal..., height..., width...]
27644
+ */
27645
+ get_vision_position_ids(start_position, grid_thw, temp_merge_size, spatial_merge_size) {
27646
+ const llm_grid_t = Math.floor(grid_thw[0] / temp_merge_size);
27647
+ const llm_grid_h = Math.floor(grid_thw[1] / spatial_merge_size);
27648
+ const llm_grid_w = Math.floor(grid_thw[2] / spatial_merge_size);
27649
+ const seq_len = llm_grid_h * llm_grid_w * llm_grid_t;
27650
+ const t_pos = Array.from({ length: seq_len }, () => start_position);
27651
+ const h_pos = Array.from(
27652
+ { length: seq_len },
27653
+ (_, i) => start_position + Math.floor(i / (llm_grid_w * llm_grid_t))
27654
+ );
27655
+ const w_pos = Array.from({ length: seq_len }, (_, i) => start_position + i % llm_grid_w);
27656
+ return [...t_pos, ...h_pos, ...w_pos];
27657
+ }
27658
+ /**
27659
+ * GlmOcr uses mm_token_type_ids-style grouping (image tokens identified by image_token_id)
27660
+ * instead of vision_start_token_id scanning used by Qwen2VL.
27661
+ * After a vision segment, position advances by max(h, w) / spatial_merge_size.
27662
+ */
27663
+ _get_multimodal_rope_positions({
27664
+ filtered_ids,
27665
+ image_grid_thw_list,
27666
+ video_grid_thw_list,
27667
+ spatial_merge_size,
27668
+ state
27669
+ }) {
27670
+ const { image_token_id } = this.config;
27671
+ const groups = [];
27672
+ let group_start = 0;
27673
+ let current_type = filtered_ids[0] == image_token_id ? 1 : 0;
27674
+ for (let j = 1; j <= filtered_ids.length; ++j) {
27675
+ const t = j < filtered_ids.length ? filtered_ids[j] == image_token_id ? 1 : 0 : -1;
27676
+ if (t !== current_type) {
27677
+ groups.push([current_type, group_start, j]);
27678
+ group_start = j;
27679
+ current_type = t;
27680
+ }
27681
+ }
27682
+ let current_pos = 0;
27683
+ const llm_pos_ids_list = [];
27684
+ for (const [modality_type, start_idx, end_idx] of groups) {
27685
+ if (modality_type === 0) {
27686
+ const text_len = end_idx - start_idx;
27687
+ llm_pos_ids_list.push(Array.from({ length: 3 * text_len }, (_, i) => current_pos + i % text_len));
27688
+ current_pos += text_len;
27689
+ } else {
27690
+ const grid_thw = image_grid_thw_list[state.image_index++].map(Number);
27691
+ const temp_merge_size = grid_thw[0];
27692
+ llm_pos_ids_list.push(
27693
+ this.get_vision_position_ids(current_pos, grid_thw, temp_merge_size, spatial_merge_size)
27694
+ );
27695
+ current_pos += Math.max(grid_thw[1], grid_thw[2]) / spatial_merge_size;
27696
+ }
27697
+ }
27698
+ return llm_pos_ids_list;
27699
+ }
27700
+ };
27701
+
27137
27702
  // src/models/glpn/modeling_glpn.js
27138
27703
  var GLPNPreTrainedModel = class extends PreTrainedModel {
27139
27704
  };
@@ -27332,27 +27897,6 @@ var HunYuanDenseV1Model = class extends HunYuanDenseV1PreTrainedModel {
27332
27897
  var HunYuanDenseV1ForCausalLM = class extends HunYuanDenseV1PreTrainedModel {
27333
27898
  };
27334
27899
 
27335
- // src/models/llava/modeling_llava.js
27336
- var LlavaPreTrainedModel = class extends PreTrainedModel {
27337
- forward_params = ["input_ids", "attention_mask", "pixel_values", "position_ids", "past_key_values"];
27338
- };
27339
- var LlavaForConditionalGeneration = class extends LlavaPreTrainedModel {
27340
- _merge_input_ids_with_image_features(kwargs) {
27341
- const vision_hidden_size = kwargs.image_features.dims.at(-1);
27342
- const reshaped_image_hidden_states = kwargs.image_features.view(-1, vision_hidden_size);
27343
- return default_merge_input_ids_with_image_features({
27344
- // @ts-ignore
27345
- image_token_id: this.config.image_token_index ?? this.config.image_token_id,
27346
- ...kwargs,
27347
- image_features: reshaped_image_hidden_states
27348
- });
27349
- }
27350
- };
27351
- var Moondream1ForConditionalGeneration = class extends LlavaForConditionalGeneration {
27352
- };
27353
- var LlavaQwen2ForCausalLM = class extends LlavaForConditionalGeneration {
27354
- };
27355
-
27356
27900
  // src/models/idefics3/modeling_idefics3.js
27357
27901
  var Idefics3ForConditionalGeneration = class extends LlavaForConditionalGeneration {
27358
27902
  forward_params = [
@@ -27446,6 +27990,10 @@ var Lfm2Model = class extends Lfm2PreTrainedModel {
27446
27990
  var Lfm2ForCausalLM = class extends Lfm2PreTrainedModel {
27447
27991
  };
27448
27992
 
27993
+ // src/models/lighton_ocr/modeling_lighton_ocr.js
27994
+ var LightOnOcrForConditionalGeneration = class extends LlavaForConditionalGeneration {
27995
+ };
27996
+
27449
27997
  // src/models/lfm2_moe/modeling_lfm2_moe.js
27450
27998
  var Lfm2MoePreTrainedModel = class extends PreTrainedModel {
27451
27999
  };
@@ -27642,6 +28190,14 @@ var MistralModel = class extends MistralPreTrainedModel {
27642
28190
  var MistralForCausalLM = class extends MistralPreTrainedModel {
27643
28191
  };
27644
28192
 
28193
+ // src/models/mistral4/modeling_mistral4.js
28194
+ var Mistral4PreTrainedModel = class extends PreTrainedModel {
28195
+ };
28196
+ var Mistral4Model = class extends Mistral4PreTrainedModel {
28197
+ };
28198
+ var Mistral4ForCausalLM = class extends Mistral4PreTrainedModel {
28199
+ };
28200
+
27645
28201
  // src/models/mobilebert/modeling_mobilebert.js
27646
28202
  var MobileBertPreTrainedModel = class extends PreTrainedModel {
27647
28203
  };
@@ -28110,6 +28666,14 @@ var NanoChatModel = class extends NanoChatPreTrainedModel {
28110
28666
  var NanoChatForCausalLM = class extends NanoChatPreTrainedModel {
28111
28667
  };
28112
28668
 
28669
+ // src/models/nemotron_h/modeling_nemotron_h.js
28670
+ var NemotronHPreTrainedModel = class extends PreTrainedModel {
28671
+ };
28672
+ var NemotronHModel = class extends NemotronHPreTrainedModel {
28673
+ };
28674
+ var NemotronHForCausalLM = class extends NemotronHPreTrainedModel {
28675
+ };
28676
+
28113
28677
  // src/models/neobert/modeling_neobert.js
28114
28678
  var NeoBertPreTrainedModel = class extends PreTrainedModel {
28115
28679
  };
@@ -28390,252 +28954,6 @@ var Qwen2MoeModel = class extends Qwen2MoePreTrainedModel {
28390
28954
  var Qwen2MoeForCausalLM = class extends Qwen2MoePreTrainedModel {
28391
28955
  };
28392
28956
 
28393
- // src/models/qwen2_vl/modeling_qwen2_vl.js
28394
- var Qwen2VLPreTrainedModel = class extends PreTrainedModel {
28395
- forward_params = [
28396
- // Text inputs
28397
- "input_ids",
28398
- "attention_mask",
28399
- "position_ids",
28400
- "past_key_values",
28401
- // Vision inputs
28402
- "pixel_values",
28403
- "image_grid_thw"
28404
- ];
28405
- };
28406
- var Qwen2VLForConditionalGeneration = class extends Qwen2VLPreTrainedModel {
28407
- // NOTE: This is used as the base class for all Qwen VL models and their CausalLM variants.
28408
- // CausalLM variants (e.g., Qwen2VLForCausalLM) extend this class but load only
28409
- // embed_tokens + decoder_model_merged (no vision_encoder) via MultimodalLanguageModelOnly type.
28410
- image_grid_thw_name = "grid_thw";
28411
- /**
28412
- * Calculate the 3D rope index based on image and video's temporal, height and width in LLM.
28413
- *
28414
- * Explanation:
28415
- * Each embedding sequence contains vision embedding and text embedding or just contains text embedding.
28416
- *
28417
- * For pure text embedding sequence, the rotary position embedding has no difference with mordern LLMs.
28418
- * Examples:
28419
- * input_ids: [T T T T T], here T is for text.
28420
- * temporal position_ids: [0, 1, 2, 3, 4]
28421
- * height position_ids: [0, 1, 2, 3, 4]
28422
- * width position_ids: [0, 1, 2, 3, 4]
28423
- *
28424
- * For vision and text embedding sequence, we calculate 3D rotary position embedding for vision part
28425
- * and 1D rotary position embeddin for text part.
28426
- * Examples:
28427
- * Assume we have a video input with 3 temporal patches, 2 height patches and 2 width patches.
28428
- * input_ids: [V V V V V V V V V V V V T T T T T], here V is for vision.
28429
- * vision temporal position_ids: [0, 0, 0, 0, 1, 1, 1, 1, 2, 2, 2, 2]
28430
- * vision height position_ids: [0, 0, 1, 1, 0, 0, 1, 1, 0, 0, 1, 1]
28431
- * vision width position_ids: [0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1]
28432
- * text temporal position_ids: [3, 4, 5, 6, 7]
28433
- * text height position_ids: [3, 4, 5, 6, 7]
28434
- * text width position_ids: [3, 4, 5, 6, 7]
28435
- * Here we calculate the text start position_ids as the max vision position_ids plus 1.
28436
- *
28437
- * @param {Tensor} input_ids Indices of input sequence tokens in the vocabulary. Tensor of shape `(batch_size, sequence_length)`.
28438
- * @param {Tensor} image_grid_thw (Optional) The temporal, height and width of feature shape of each image in LLM. Tensor of shape `(num_images, 3)`.
28439
- * @param {Tensor} video_grid_thw (Optional) The temporal, height and width of feature shape of each video in LLM. Tensor of shape `(num_videos, 3)`.
28440
- * @param {Tensor} attention_mask (Optional) Mask to avoid performing attention on padding token indices. Tensor of shape `(batch_size, sequence_length)`. Mask values selected in `[0, 1]`:
28441
- * - 1 for tokens that are **not masked**,
28442
- * - 0 for tokens that are **masked**.
28443
- * @returns {[Tensor, Tensor]} [position_ids, mrope_position_deltas] with:
28444
- * - position_ids: Tensor of shape `(3, batch_size, sequence_length)`.
28445
- * - mrope_position_deltas: Tensor of shape `(batch_size)`.
28446
- */
28447
- get_rope_index(input_ids, image_grid_thw, video_grid_thw, attention_mask) {
28448
- const { vision_config, image_token_id, video_token_id, vision_start_token_id } = this.config;
28449
- const spatial_merge_size = vision_config.spatial_merge_size ?? 2;
28450
- const mrope_position_deltas = [];
28451
- if (image_grid_thw || video_grid_thw) {
28452
- let total_input_ids = input_ids.tolist();
28453
- if (!attention_mask) {
28454
- attention_mask = ones_like(input_ids);
28455
- }
28456
- const attention_mask_list = attention_mask.tolist();
28457
- const position_ids_list = Array.from(
28458
- { length: 3 },
28459
- (_) => Array.from({ length: input_ids.dims[0] }, (_2) => Array.from({ length: input_ids.dims[1] }, (_3) => 1))
28460
- );
28461
- const image_grid_thw_list = image_grid_thw ? image_grid_thw.tolist() : [];
28462
- const video_grid_thw_list = video_grid_thw ? video_grid_thw.tolist() : [];
28463
- let image_index = 0;
28464
- let video_index = 0;
28465
- for (let i = 0; i < total_input_ids.length; ++i) {
28466
- const ids = total_input_ids[i].filter((_, j) => attention_mask_list[i][j] == 1);
28467
- const vision_start_indices = ids.reduce((acc, x, idx) => {
28468
- if (x == vision_start_token_id) acc.push(idx);
28469
- return acc;
28470
- }, []);
28471
- const vision_tokens = vision_start_indices.map((x) => ids[x + 1]);
28472
- const image_nums = vision_tokens.filter((x) => x == image_token_id).length;
28473
- const video_nums = vision_tokens.filter((x) => x == video_token_id).length;
28474
- let llm_pos_ids_list = [];
28475
- let st2 = 0;
28476
- let remain_images = image_nums;
28477
- let remain_videos = video_nums;
28478
- for (let j = 0; j < vision_tokens.length; ++j) {
28479
- const next_image_token = ids.findIndex((x, i2) => i2 > st2 && x == image_token_id);
28480
- const next_video_token = ids.findIndex((x, i2) => i2 > st2 && x == video_token_id);
28481
- const ed_image = remain_images > 0 && next_image_token !== -1 ? next_image_token : ids.length + 1;
28482
- const ed_video = remain_videos > 0 && next_video_token !== -1 ? next_video_token : ids.length + 1;
28483
- let ed;
28484
- let t, h, w;
28485
- if (ed_image < ed_video) {
28486
- [t, h, w] = image_grid_thw_list[image_index];
28487
- ++image_index;
28488
- --remain_images;
28489
- ed = ed_image;
28490
- } else {
28491
- [t, h, w] = video_grid_thw_list[video_index];
28492
- ++video_index;
28493
- --remain_videos;
28494
- ed = ed_video;
28495
- }
28496
- const [llm_grid_t, llm_grid_h, llm_grid_w] = [
28497
- Number(t),
28498
- Math.floor(Number(h) / spatial_merge_size),
28499
- Math.floor(Number(w) / spatial_merge_size)
28500
- ];
28501
- const text_len = ed - st2;
28502
- const st_idx = llm_pos_ids_list.length > 0 ? max(llm_pos_ids_list.at(-1))[0] + 1 : 0;
28503
- llm_pos_ids_list.push(Array.from({ length: 3 * text_len }, (_, i2) => st_idx + i2 % text_len));
28504
- const offset = text_len + st_idx;
28505
- const grid_size = llm_grid_t * llm_grid_h * llm_grid_w;
28506
- const t_index = Array.from(
28507
- { length: grid_size },
28508
- (_, i2) => offset + Math.floor(i2 / (llm_grid_h * llm_grid_w))
28509
- );
28510
- const h_index = Array.from(
28511
- { length: grid_size },
28512
- (_, i2) => offset + Math.floor(i2 / llm_grid_w) % llm_grid_h
28513
- );
28514
- const w_index = Array.from({ length: grid_size }, (_, i2) => offset + i2 % llm_grid_w);
28515
- llm_pos_ids_list.push([t_index, h_index, w_index].flat());
28516
- st2 = ed + grid_size;
28517
- }
28518
- if (st2 < ids.length) {
28519
- const st_idx = llm_pos_ids_list.length > 0 ? max(llm_pos_ids_list.at(-1))[0] + 1 : 0;
28520
- const text_len = ids.length - st2;
28521
- llm_pos_ids_list.push(Array.from({ length: 3 * text_len }, (_, i2) => st_idx + i2 % text_len));
28522
- }
28523
- const num_items = llm_pos_ids_list.reduce((acc, x) => acc + x.length, 0);
28524
- const llm_positions = new Array(num_items);
28525
- let index = 0;
28526
- for (let x = 0; x < 3; ++x) {
28527
- for (let y = 0; y < llm_pos_ids_list.length; ++y) {
28528
- const val = llm_pos_ids_list[y];
28529
- const text_len = val.length / 3;
28530
- for (let z = x * text_len; z < (x + 1) * text_len; ++z) {
28531
- llm_positions[index++] = val[z];
28532
- }
28533
- }
28534
- }
28535
- let count2 = 0;
28536
- const attn_mask = attention_mask_list[i];
28537
- for (let y = 0; y < attn_mask.length; ++y) {
28538
- if (attn_mask[y] == 1) {
28539
- for (let x = 0; x < 3; ++x) {
28540
- position_ids_list[x][i][y] = llm_positions[x * num_items / 3 + count2];
28541
- }
28542
- ++count2;
28543
- }
28544
- }
28545
- const max_llm_positions = max(llm_positions)[0];
28546
- mrope_position_deltas.push(max_llm_positions + 1 - total_input_ids[i].length);
28547
- }
28548
- return [
28549
- new Tensor3("int64", position_ids_list.flat(Infinity), [3, input_ids.dims[0], input_ids.dims[1]]),
28550
- new Tensor3("int64", mrope_position_deltas, [mrope_position_deltas.length, 1])
28551
- ];
28552
- } else {
28553
- if (attention_mask) {
28554
- const { data, dims } = cumsum_masked_fill(attention_mask);
28555
- const position_ids = BigInt64Array.from({ length: 3 * data.length }, (_, i) => data[i % data.length]);
28556
- const mrope_position_deltas2 = Array.from(
28557
- { length: dims[0] },
28558
- (_, i) => max(data.subarray(dims[1] * i, dims[1] * (i + 1)))[0] + 1n + BigInt(dims[1])
28559
- );
28560
- return [
28561
- new Tensor3("int64", position_ids, [3, ...dims]),
28562
- new Tensor3("int64", mrope_position_deltas2, [mrope_position_deltas2.length, 1])
28563
- ];
28564
- } else {
28565
- const [batch_size, seq_length] = input_ids.dims;
28566
- const position_ids = BigInt64Array.from(
28567
- { length: 3 * batch_size * seq_length },
28568
- (_, i) => BigInt(Math.floor(i % seq_length / batch_size))
28569
- );
28570
- return [new Tensor3("int64", position_ids, [3, ...input_ids.dims]), zeros([batch_size, 1])];
28571
- }
28572
- }
28573
- }
28574
- async encode_image({ pixel_values, image_grid_thw }) {
28575
- const features = (await sessionRun(this.sessions["vision_encoder"], {
28576
- pixel_values,
28577
- [this.image_grid_thw_name]: image_grid_thw
28578
- })).image_features;
28579
- return features;
28580
- }
28581
- _merge_input_ids_with_image_features(kwargs) {
28582
- return default_merge_input_ids_with_image_features({
28583
- // @ts-ignore
28584
- image_token_id: this.config.image_token_id,
28585
- ...kwargs
28586
- });
28587
- }
28588
- prepare_inputs_for_generation(input_ids, model_inputs, generation_config) {
28589
- if (model_inputs.attention_mask && !model_inputs.position_ids) {
28590
- if (!model_inputs.past_key_values) {
28591
- [model_inputs.position_ids, model_inputs.rope_deltas] = this.get_rope_index(
28592
- model_inputs.input_ids,
28593
- model_inputs.image_grid_thw,
28594
- model_inputs.video_grid_thw,
28595
- model_inputs.attention_mask
28596
- );
28597
- } else {
28598
- model_inputs.pixel_values = null;
28599
- const past_length = model_inputs.past_key_values.get_seq_length();
28600
- if (past_length < model_inputs.input_ids.dims[1]) {
28601
- const [full_position_ids, rope_deltas] = this.get_rope_index(
28602
- model_inputs.input_ids,
28603
- model_inputs.image_grid_thw,
28604
- model_inputs.video_grid_thw,
28605
- model_inputs.attention_mask
28606
- );
28607
- model_inputs.rope_deltas = rope_deltas;
28608
- model_inputs.position_ids = full_position_ids.slice(null, null, [past_length, null]);
28609
- model_inputs.input_ids = model_inputs.input_ids.slice(null, [past_length, null]);
28610
- } else {
28611
- if (!model_inputs.rope_deltas) {
28612
- [, model_inputs.rope_deltas] = this.get_rope_index(
28613
- model_inputs.input_ids,
28614
- model_inputs.image_grid_thw,
28615
- model_inputs.video_grid_thw,
28616
- model_inputs.attention_mask
28617
- );
28618
- }
28619
- const delta = BigInt(past_length);
28620
- const rope_deltas_list = model_inputs.rope_deltas.map((x) => delta + x);
28621
- model_inputs.position_ids = stack([rope_deltas_list, rope_deltas_list, rope_deltas_list], 0);
28622
- }
28623
- }
28624
- }
28625
- return model_inputs;
28626
- }
28627
- };
28628
- var Qwen2VLForCausalLM = class extends Qwen2VLForConditionalGeneration {
28629
- };
28630
-
28631
- // src/models/qwen2_5_vl/modeling_qwen2_5_vl.js
28632
- var Qwen2_5_VLForConditionalGeneration = class extends Qwen2VLForConditionalGeneration {
28633
- image_grid_thw_name = "image_grid_thw";
28634
- };
28635
- var Qwen2_5_VLForCausalLM = class extends Qwen2VLForCausalLM {
28636
- image_grid_thw_name = "image_grid_thw";
28637
- };
28638
-
28639
28957
  // src/models/qwen3/modeling_qwen3.js
28640
28958
  var Qwen3PreTrainedModel = class extends PreTrainedModel {
28641
28959
  };
@@ -29081,6 +29399,14 @@ var SnacDecoderModel = class extends SnacPreTrainedModel {
29081
29399
  }
29082
29400
  };
29083
29401
 
29402
+ // src/models/solar_open/modeling_solar_open.js
29403
+ var SolarOpenPreTrainedModel = class extends PreTrainedModel {
29404
+ };
29405
+ var SolarOpenModel = class extends SolarOpenPreTrainedModel {
29406
+ };
29407
+ var SolarOpenForCausalLM = class extends SolarOpenPreTrainedModel {
29408
+ };
29409
+
29084
29410
  // src/models/speecht5/modeling_speecht5.js
29085
29411
  var SpeechT5PreTrainedModel = class extends PreTrainedModel {
29086
29412
  };
@@ -30197,6 +30523,7 @@ var YoutuForCausalLM = class extends YoutuPreTrainedModel {
30197
30523
  // src/models/registry.js
30198
30524
  var MODEL_MAPPING_NAMES_ENCODER_ONLY = /* @__PURE__ */ new Map([
30199
30525
  ["bert", "BertModel"],
30526
+ ["eurobert", "EuroBertModel"],
30200
30527
  ["neobert", "NeoBertModel"],
30201
30528
  ["modernbert", "ModernBertModel"],
30202
30529
  ["nomic_bert", "NomicBertModel"],
@@ -30328,6 +30655,7 @@ var MODEL_MAPPING_NAMES_DECODER_ONLY = /* @__PURE__ */ new Map([
30328
30655
  ["gemma3_text", "Gemma3Model"],
30329
30656
  ["helium", "HeliumModel"],
30330
30657
  ["glm", "GlmModel"],
30658
+ ["glm_moe_dsa", "GlmMoeDsaModel"],
30331
30659
  ["openelm", "OpenELMModel"],
30332
30660
  ["qwen2", "Qwen2Model"],
30333
30661
  ["qwen2_moe", "Qwen2MoeModel"],
@@ -30339,12 +30667,16 @@ var MODEL_MAPPING_NAMES_DECODER_ONLY = /* @__PURE__ */ new Map([
30339
30667
  ["mpt", "MptModel"],
30340
30668
  ["opt", "OPTModel"],
30341
30669
  ["mistral", "MistralModel"],
30670
+ ["mistral4", "Mistral4Model"],
30342
30671
  ["ministral", "MinistralModel"],
30343
30672
  ["ministral3", "Ministral3Model"],
30344
30673
  ["ernie4_5", "Ernie4_5ForCausalLM"],
30345
30674
  ["starcoder2", "Starcoder2Model"],
30675
+ ["deepseek_v3", "DeepseekV3Model"],
30346
30676
  ["falcon", "FalconModel"],
30347
30677
  ["falcon_h1", "FalconH1Model"],
30678
+ ["nemotron_h", "NemotronHModel"],
30679
+ ["solar_open", "SolarOpenModel"],
30348
30680
  ["stablelm", "StableLmModel"],
30349
30681
  ["modernbert-decoder", "ModernBertDecoderModel"],
30350
30682
  ["hunyuan_v1_dense", "HunYuanDenseV1Model"],
@@ -30364,6 +30696,7 @@ var MODEL_FOR_TEXT_TO_WAVEFORM_MAPPING_NAMES = /* @__PURE__ */ new Map([
30364
30696
  ]);
30365
30697
  var MODEL_FOR_SEQUENCE_CLASSIFICATION_MAPPING_NAMES = /* @__PURE__ */ new Map([
30366
30698
  ["bert", "BertForSequenceClassification"],
30699
+ ["eurobert", "EuroBertForSequenceClassification"],
30367
30700
  ["neobert", "NeoBertForSequenceClassification"],
30368
30701
  ["modernbert", "ModernBertForSequenceClassification"],
30369
30702
  ["roformer", "RoFormerForSequenceClassification"],
@@ -30386,6 +30719,7 @@ var MODEL_FOR_SEQUENCE_CLASSIFICATION_MAPPING_NAMES = /* @__PURE__ */ new Map([
30386
30719
  ]);
30387
30720
  var MODEL_FOR_TOKEN_CLASSIFICATION_MAPPING_NAMES = /* @__PURE__ */ new Map([
30388
30721
  ["bert", "BertForTokenClassification"],
30722
+ ["eurobert", "EuroBertForTokenClassification"],
30389
30723
  ["neobert", "NeoBertForTokenClassification"],
30390
30724
  ["modernbert", "ModernBertForTokenClassification"],
30391
30725
  ["roformer", "RoFormerForTokenClassification"],
@@ -30448,6 +30782,7 @@ var MODEL_FOR_CAUSAL_LM_MAPPING_NAMES = /* @__PURE__ */ new Map([
30448
30782
  ["gemma3", "Gemma3ForCausalLM"],
30449
30783
  ["helium", "HeliumForCausalLM"],
30450
30784
  ["glm", "GlmForCausalLM"],
30785
+ ["glm_moe_dsa", "GlmMoeDsaForCausalLM"],
30451
30786
  ["openelm", "OpenELMForCausalLM"],
30452
30787
  ["qwen2", "Qwen2ForCausalLM"],
30453
30788
  ["qwen2_moe", "Qwen2MoeForCausalLM"],
@@ -30459,6 +30794,7 @@ var MODEL_FOR_CAUSAL_LM_MAPPING_NAMES = /* @__PURE__ */ new Map([
30459
30794
  ["qwen3_vl", "Qwen3VLForCausalLM"],
30460
30795
  ["qwen3_vl_moe", "Qwen3VLMoeForCausalLM"],
30461
30796
  ["qwen3_5", "Qwen3_5ForCausalLM"],
30797
+ ["qwen3_5_text", "Qwen3_5ForCausalLM"],
30462
30798
  ["qwen3_5_moe", "Qwen3_5MoeForCausalLM"],
30463
30799
  ["gemma3n", "Gemma3nForCausalLM"],
30464
30800
  ["phi", "PhiForCausalLM"],
@@ -30467,13 +30803,17 @@ var MODEL_FOR_CAUSAL_LM_MAPPING_NAMES = /* @__PURE__ */ new Map([
30467
30803
  ["opt", "OPTForCausalLM"],
30468
30804
  ["mbart", "MBartForCausalLM"],
30469
30805
  ["mistral", "MistralForCausalLM"],
30806
+ ["mistral4", "Mistral4ForCausalLM"],
30470
30807
  ["ministral", "MinistralForCausalLM"],
30471
30808
  ["ministral3", "Ministral3ForCausalLM"],
30472
30809
  ["ernie4_5", "Ernie4_5ForCausalLM"],
30473
30810
  ["starcoder2", "Starcoder2ForCausalLM"],
30811
+ ["deepseek_v3", "DeepseekV3ForCausalLM"],
30474
30812
  ["falcon", "FalconForCausalLM"],
30475
30813
  ["falcon_h1", "FalconH1ForCausalLM"],
30814
+ ["nemotron_h", "NemotronHForCausalLM"],
30476
30815
  ["trocr", "TrOCRForCausalLM"],
30816
+ ["solar_open", "SolarOpenForCausalLM"],
30477
30817
  ["stablelm", "StableLmForCausalLM"],
30478
30818
  ["modernbert-decoder", "ModernBertDecoderForCausalLM"],
30479
30819
  ["hunyuan_v1_dense", "HunYuanDenseV1ForCausalLM"],
@@ -30484,6 +30824,7 @@ var MODEL_FOR_CAUSAL_LM_MAPPING_NAMES = /* @__PURE__ */ new Map([
30484
30824
  var MODEL_FOR_MULTIMODALITY_MAPPING_NAMES = /* @__PURE__ */ new Map([["multi_modality", "MultiModalityCausalLM"]]);
30485
30825
  var MODEL_FOR_MASKED_LM_MAPPING_NAMES = /* @__PURE__ */ new Map([
30486
30826
  ["bert", "BertForMaskedLM"],
30827
+ ["eurobert", "EuroBertForMaskedLM"],
30487
30828
  ["neobert", "NeoBertForMaskedLM"],
30488
30829
  ["modernbert", "ModernBertForMaskedLM"],
30489
30830
  ["roformer", "RoFormerForMaskedLM"],
@@ -30541,8 +30882,11 @@ var MODEL_FOR_IMAGE_TEXT_TO_TEXT_MAPPING_NAMES = /* @__PURE__ */ new Map([
30541
30882
  ["smolvlm", "SmolVLMForConditionalGeneration"],
30542
30883
  ["paligemma", "PaliGemmaForConditionalGeneration"],
30543
30884
  ["llava_qwen2", "LlavaQwen2ForCausalLM"],
30885
+ ["gemma3", "Gemma3ForConditionalGeneration"],
30544
30886
  ["gemma3n", "Gemma3nForConditionalGeneration"],
30545
- ["mistral3", "Mistral3ForConditionalGeneration"]
30887
+ ["mistral3", "Mistral3ForConditionalGeneration"],
30888
+ ["lighton_ocr", "LightOnOcrForConditionalGeneration"],
30889
+ ["glm_ocr", "GlmOcrForConditionalGeneration"]
30546
30890
  ]);
30547
30891
  var MODEL_FOR_AUDIO_TEXT_TO_TEXT_MAPPING_NAMES = /* @__PURE__ */ new Map([
30548
30892
  ["granite_speech", "GraniteSpeechForConditionalGeneration"],
@@ -30647,6 +30991,7 @@ var MODEL_FOR_TIME_SERIES_PREDICTION_MAPPING_NAMES = /* @__PURE__ */ new Map([
30647
30991
  ]);
30648
30992
  var MODEL_FOR_IMAGE_TO_IMAGE_MAPPING_NAMES = /* @__PURE__ */ new Map([["swin2sr", "Swin2SRForImageSuperResolution"]]);
30649
30993
  var MODEL_FOR_DEPTH_ESTIMATION_MAPPING_NAMES = /* @__PURE__ */ new Map([
30994
+ ["chmv2", "CHMv2ForDepthEstimation"],
30650
30995
  ["dpt", "DPTForDepthEstimation"],
30651
30996
  ["depth_anything", "DepthAnythingForDepthEstimation"],
30652
30997
  ["glpn", "GLPNForDepthEstimation"],
@@ -30732,13 +31077,6 @@ var CUSTOM_MAPPING = [
30732
31077
  ],
30733
31078
  ["SupertonicForConditionalGeneration", SupertonicForConditionalGeneration, MODEL_TYPES.Supertonic],
30734
31079
  ["ChatterboxModel", ChatterboxModel, MODEL_TYPES.Chatterbox],
30735
- ["Qwen2VLForCausalLM", Qwen2VLForCausalLM, MODEL_TYPES.MultimodalLanguageModelOnly],
30736
- ["Qwen2_5_VLForCausalLM", Qwen2_5_VLForCausalLM, MODEL_TYPES.MultimodalLanguageModelOnly],
30737
- ["Qwen3VLForCausalLM", Qwen3VLForCausalLM, MODEL_TYPES.MultimodalLanguageModelOnly],
30738
- ["Qwen3VLMoeForCausalLM", Qwen3VLMoeForCausalLM, MODEL_TYPES.MultimodalLanguageModelOnly],
30739
- ["Qwen3_5ForCausalLM", Qwen3_5ForCausalLM, MODEL_TYPES.MultimodalLanguageModelOnly],
30740
- ["Qwen3_5MoeForCausalLM", Qwen3_5MoeForCausalLM, MODEL_TYPES.MultimodalLanguageModelOnly],
30741
- ["Gemma3nForCausalLM", Gemma3nForCausalLM, MODEL_TYPES.MultimodalLanguageModelOnly],
30742
31080
  [
30743
31081
  "VoxtralRealtimeForConditionalGeneration",
30744
31082
  VoxtralRealtimeForConditionalGeneration,
@@ -32420,6 +32758,41 @@ var TASK_ALIASES = Object.freeze({
32420
32758
  embeddings: "feature-extraction"
32421
32759
  });
32422
32760
 
32761
+ // src/utils/model_registry/resolve_model_type.js
32762
+ function resolve_model_type(config, { warn = true } = {}) {
32763
+ const architectures = (
32764
+ /** @type {string[]} */
32765
+ config.architectures || []
32766
+ );
32767
+ for (const arch of architectures) {
32768
+ const mappedType = MODEL_TYPE_MAPPING.get(arch);
32769
+ if (mappedType !== void 0) {
32770
+ return mappedType;
32771
+ }
32772
+ }
32773
+ if (config.model_type) {
32774
+ const mappedType = MODEL_TYPE_MAPPING.get(config.model_type);
32775
+ if (mappedType !== void 0) {
32776
+ return mappedType;
32777
+ }
32778
+ for (const mapping of Object.values(MODEL_MAPPING_NAMES)) {
32779
+ if (mapping.has(config.model_type)) {
32780
+ const resolved = MODEL_TYPE_MAPPING.get(mapping.get(config.model_type));
32781
+ if (resolved !== void 0) {
32782
+ return resolved;
32783
+ }
32784
+ }
32785
+ }
32786
+ }
32787
+ if (warn) {
32788
+ const archList = architectures.length > 0 ? architectures.join(", ") : "(none)";
32789
+ logger.warn(
32790
+ `[resolve_model_type] Architecture(s) not found in MODEL_TYPE_MAPPING: [${archList}] for model type '${config.model_type}'. Falling back to EncoderOnly (single model.onnx file). If you encounter issues, please report at: ${GITHUB_ISSUE_URL}`
32791
+ );
32792
+ }
32793
+ return MODEL_TYPES.EncoderOnly;
32794
+ }
32795
+
32423
32796
  // src/utils/model_registry/get_model_files.js
32424
32797
  function get_config(modelId, { config = null, cache_dir = null, local_files_only = false, revision = "main" } = {}) {
32425
32798
  if (config !== null) {
@@ -32442,43 +32815,7 @@ async function get_model_files(modelId, { config = null, dtype: overrideDtype =
32442
32815
  const subfolder = "onnx";
32443
32816
  const rawDevice = overrideDevice ?? custom_config.device;
32444
32817
  let dtype = overrideDtype ?? custom_config.dtype;
32445
- let modelType;
32446
- const architectures = (
32447
- /** @type {string[]} */
32448
- config.architectures || []
32449
- );
32450
- let foundInMapping = false;
32451
- for (const arch of architectures) {
32452
- const mappedType = MODEL_TYPE_MAPPING.get(arch);
32453
- if (mappedType !== void 0) {
32454
- modelType = mappedType;
32455
- foundInMapping = true;
32456
- break;
32457
- }
32458
- }
32459
- if (!foundInMapping && config.model_type) {
32460
- const mappedType = MODEL_TYPE_MAPPING.get(config.model_type);
32461
- if (mappedType !== void 0) {
32462
- modelType = mappedType;
32463
- foundInMapping = true;
32464
- }
32465
- if (!foundInMapping) {
32466
- for (const mapping of Object.values(MODEL_MAPPING_NAMES)) {
32467
- if (mapping.has(config.model_type)) {
32468
- modelType = MODEL_TYPE_MAPPING.get(mapping.get(config.model_type));
32469
- foundInMapping = true;
32470
- break;
32471
- }
32472
- }
32473
- }
32474
- }
32475
- if (!foundInMapping) {
32476
- const archList = architectures.length > 0 ? architectures.join(", ") : "(none)";
32477
- logger.warn(
32478
- `[get_model_files] Architecture(s) not found in MODEL_TYPE_MAPPING: [${archList}] for model type '${config.model_type}'. Falling back to EncoderOnly (single model.onnx file). If you encounter issues, please report at: ${GITHUB_ISSUE_URL}`
32479
- );
32480
- modelType = MODEL_TYPES.EncoderOnly;
32481
- }
32818
+ const modelType = resolve_model_type(config);
32482
32819
  const add_model_file = (fileName, baseName = null) => {
32483
32820
  baseName = baseName ?? fileName;
32484
32821
  const selectedDevice = selectDevice(rawDevice, fileName);
@@ -33065,6 +33402,31 @@ async function clear_pipeline_cache(task, modelId, options = {}) {
33065
33402
  return await clear_files_from_cache(modelId, files, options);
33066
33403
  }
33067
33404
 
33405
+ // src/utils/model_registry/get_available_dtypes.js
33406
+ var CONCRETE_DTYPES = Object.keys(DEFAULT_DTYPE_SUFFIX_MAPPING);
33407
+ async function get_available_dtypes(modelId, { config = null, model_file_name = null, revision = "main", cache_dir = null, local_files_only = false } = {}) {
33408
+ config = await get_config(modelId, { config, cache_dir, local_files_only, revision });
33409
+ const subfolder = "onnx";
33410
+ const modelType = resolve_model_type(config);
33411
+ const { sessions } = getSessionsConfig(modelType, config, { model_file_name });
33412
+ const baseNames = Object.values(sessions);
33413
+ const metadataOptions = { revision, cache_dir, local_files_only };
33414
+ const probeResults = await Promise.all(
33415
+ CONCRETE_DTYPES.map(async (dtype) => {
33416
+ const suffix = DEFAULT_DTYPE_SUFFIX_MAPPING[dtype] ?? "";
33417
+ const allExist = await Promise.all(
33418
+ baseNames.map(async (baseName) => {
33419
+ const filename = `${subfolder}/${baseName}${suffix}.onnx`;
33420
+ const metadata = await get_file_metadata(modelId, filename, metadataOptions);
33421
+ return metadata.exists;
33422
+ })
33423
+ );
33424
+ return { dtype, available: allExist.every(Boolean) };
33425
+ })
33426
+ );
33427
+ return probeResults.filter((r) => r.available).map((r) => r.dtype);
33428
+ }
33429
+
33068
33430
  // src/utils/model_registry/ModelRegistry.js
33069
33431
  var ModelRegistry = class {
33070
33432
  /**
@@ -33151,6 +33513,29 @@ var ModelRegistry = class {
33151
33513
  static async get_processor_files(modelId) {
33152
33514
  return get_processor_files(modelId);
33153
33515
  }
33516
+ /**
33517
+ * Detects which quantization levels (dtypes) are available for a model
33518
+ * by checking which ONNX files exist on the hub or locally.
33519
+ *
33520
+ * A dtype is considered available if all required model session files
33521
+ * exist for that dtype.
33522
+ *
33523
+ * @param {string} modelId - The model id (e.g., "onnx-community/all-MiniLM-L6-v2-ONNX")
33524
+ * @param {Object} [options] - Optional parameters
33525
+ * @param {import('../../configs.js').PretrainedConfig} [options.config=null] - Pre-loaded config
33526
+ * @param {string} [options.model_file_name=null] - Override the model file name (excluding .onnx suffix)
33527
+ * @param {string} [options.revision='main'] - Model revision
33528
+ * @param {string} [options.cache_dir=null] - Custom cache directory
33529
+ * @param {boolean} [options.local_files_only=false] - Only check local files
33530
+ * @returns {Promise<string[]>} Array of available dtype strings (e.g., ['fp32', 'fp16', 'q4', 'q8'])
33531
+ *
33532
+ * @example
33533
+ * const dtypes = await ModelRegistry.get_available_dtypes('onnx-community/all-MiniLM-L6-v2-ONNX');
33534
+ * console.log(dtypes); // ['fp32', 'fp16', 'int8', 'uint8', 'q8', 'q4']
33535
+ */
33536
+ static async get_available_dtypes(modelId, options = {}) {
33537
+ return get_available_dtypes(modelId, options);
33538
+ }
33154
33539
  /**
33155
33540
  * Quickly checks if a model is fully cached by verifying `config.json` is present,
33156
33541
  * then confirming all required files are cached.
@@ -33385,6 +33770,9 @@ export {
33385
33770
  BloomModel,
33386
33771
  BloomPreTrainedModel,
33387
33772
  BloomTokenizer,
33773
+ CHMv2ForDepthEstimation,
33774
+ CHMv2ImageProcessor,
33775
+ CHMv2PreTrainedModel,
33388
33776
  CLIPFeatureExtractor,
33389
33777
  CLIPImageProcessor,
33390
33778
  CLIPModel,
@@ -33480,6 +33868,9 @@ export {
33480
33868
  DebertaV2Tokenizer,
33481
33869
  DecisionTransformerModel,
33482
33870
  DecisionTransformerPreTrainedModel,
33871
+ DeepseekV3ForCausalLM,
33872
+ DeepseekV3Model,
33873
+ DeepseekV3PreTrainedModel,
33483
33874
  DeiTFeatureExtractor,
33484
33875
  DeiTForImageClassification,
33485
33876
  DeiTImageProcessor,
@@ -33540,6 +33931,11 @@ export {
33540
33931
  EsmModel,
33541
33932
  EsmPreTrainedModel,
33542
33933
  EsmTokenizer,
33934
+ EuroBertForMaskedLM,
33935
+ EuroBertForSequenceClassification,
33936
+ EuroBertForTokenClassification,
33937
+ EuroBertModel,
33938
+ EuroBertPreTrainedModel,
33543
33939
  ExaoneForCausalLM,
33544
33940
  ExaoneModel,
33545
33941
  ExaonePreTrainedModel,
@@ -33586,8 +33982,11 @@ export {
33586
33982
  Gemma2Model,
33587
33983
  Gemma2PreTrainedModel,
33588
33984
  Gemma3ForCausalLM,
33985
+ Gemma3ForConditionalGeneration,
33986
+ Gemma3ImageProcessor,
33589
33987
  Gemma3Model,
33590
33988
  Gemma3PreTrainedModel,
33989
+ Gemma3Processor,
33591
33990
  Gemma3nAudioFeatureExtractor,
33592
33991
  Gemma3nForCausalLM,
33593
33992
  Gemma3nForConditionalGeneration,
@@ -33597,8 +33996,14 @@ export {
33597
33996
  GemmaModel,
33598
33997
  GemmaPreTrainedModel,
33599
33998
  GemmaTokenizer,
33999
+ Glm46VImageProcessor,
34000
+ Glm46VProcessor,
33600
34001
  GlmForCausalLM,
33601
34002
  GlmModel,
34003
+ GlmMoeDsaForCausalLM,
34004
+ GlmMoeDsaModel,
34005
+ GlmMoeDsaPreTrainedModel,
34006
+ GlmOcrForConditionalGeneration,
33602
34007
  GlmPreTrainedModel,
33603
34008
  GptOssForCausalLM,
33604
34009
  GptOssModel,
@@ -33664,6 +34069,7 @@ export {
33664
34069
  Lfm2VlForConditionalGeneration,
33665
34070
  Lfm2VlImageProcessor,
33666
34071
  Lfm2VlProcessor,
34072
+ LightOnOcrForConditionalGeneration,
33667
34073
  LiteWhisperForConditionalGeneration,
33668
34074
  Llama4ForCausalLM,
33669
34075
  Llama4PreTrainedModel,
@@ -33733,6 +34139,9 @@ export {
33733
34139
  MimiPreTrainedModel,
33734
34140
  MinLengthLogitsProcessor,
33735
34141
  MinNewTokensLengthLogitsProcessor,
34142
+ Mistral4ForCausalLM,
34143
+ Mistral4Model,
34144
+ Mistral4PreTrainedModel,
33736
34145
  MistralForCausalLM,
33737
34146
  MistralModel,
33738
34147
  MistralPreTrainedModel,
@@ -33804,6 +34213,9 @@ export {
33804
34213
  NanoChatForCausalLM,
33805
34214
  NanoChatModel,
33806
34215
  NanoChatPreTrainedModel,
34216
+ NemotronHForCausalLM,
34217
+ NemotronHModel,
34218
+ NemotronHPreTrainedModel,
33807
34219
  NeoBertForMaskedLM,
33808
34220
  NeoBertForQuestionAnswering,
33809
34221
  NeoBertForSequenceClassification,
@@ -33993,6 +34405,9 @@ export {
33993
34405
  SnacFeatureExtractor,
33994
34406
  SnacModel,
33995
34407
  SnacPreTrainedModel,
34408
+ SolarOpenForCausalLM,
34409
+ SolarOpenModel,
34410
+ SolarOpenPreTrainedModel,
33996
34411
  SpeechT5FeatureExtractor,
33997
34412
  SpeechT5ForSpeechToText,
33998
34413
  SpeechT5ForTextToSpeech,
@@ -34190,7 +34605,7 @@ export {
34190
34605
 
34191
34606
  onnxruntime-web/dist/ort.webgpu.bundle.min.mjs:
34192
34607
  (*!
34193
- * ONNX Runtime Web v1.25.0-dev.20260307-d626b568e0
34608
+ * ONNX Runtime Web v1.25.0-dev.20260323-a99aad9d36
34194
34609
  * Copyright (c) Microsoft Corporation. All rights reserved.
34195
34610
  * Licensed under the MIT License.
34196
34611
  *)