@huggingface/transformers 3.0.0-alpha.5 → 3.0.0-alpha.7

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -1,28 +1,22 @@
1
- (()=>{var e,t,n={"onnxruntime-node":
1
+ (()=>{var e,t,n={fs:
2
+ /*!*********************!*\
3
+ !*** external "fs" ***!
4
+ \*********************/e=>{"use strict";e.exports=require("fs")},"onnxruntime-node":
2
5
  /*!***********************************!*\
3
6
  !*** external "onnxruntime-node" ***!
4
- \***********************************/e=>{"use strict";e.exports=require("onnxruntime-node")},sharp:
7
+ \***********************************/e=>{"use strict";e.exports=require("onnxruntime-node")},path:
8
+ /*!***********************!*\
9
+ !*** external "path" ***!
10
+ \***********************/e=>{"use strict";e.exports=require("path")},sharp:
5
11
  /*!************************!*\
6
12
  !*** external "sharp" ***!
7
- \************************/e=>{"use strict";e.exports=require("sharp")},"?9c66":
13
+ \************************/e=>{"use strict";e.exports=require("sharp")},url:
14
+ /*!**********************!*\
15
+ !*** external "url" ***!
16
+ \**********************/e=>{"use strict";e.exports=require("url")},"?9c66":
8
17
  /*!****************************************!*\
9
18
  !*** onnxruntime-web/webgpu (ignored) ***!
10
- \****************************************/()=>{},"?7a2c":
11
- /*!********************!*\
12
- !*** fs (ignored) ***!
13
- \********************/()=>{},"?a42a":
14
- /*!**********************!*\
15
- !*** path (ignored) ***!
16
- \**********************/()=>{},"?569f":
17
- /*!********************!*\
18
- !*** fs (ignored) ***!
19
- \********************/()=>{},"?3f59":
20
- /*!**********************!*\
21
- !*** path (ignored) ***!
22
- \**********************/()=>{},"?154a":
23
- /*!*********************!*\
24
- !*** url (ignored) ***!
25
- \*********************/()=>{},"./node_modules/@huggingface/jinja/dist/index.js":
19
+ \****************************************/()=>{},"./node_modules/@huggingface/jinja/dist/index.js":
26
20
  /*!*******************************************************!*\
27
21
  !*** ./node_modules/@huggingface/jinja/dist/index.js ***!
28
22
  \*******************************************************/(e,t,n)=>{"use strict";n.r(t),n.d(t,{Environment:()=>Y,Interpreter:()=>K,Template:()=>ee,parse:()=>B,tokenize:()=>d});var s=Object.freeze({Text:"Text",NumericLiteral:"NumericLiteral",BooleanLiteral:"BooleanLiteral",StringLiteral:"StringLiteral",Identifier:"Identifier",Equals:"Equals",OpenParen:"OpenParen",CloseParen:"CloseParen",OpenStatement:"OpenStatement",CloseStatement:"CloseStatement",OpenExpression:"OpenExpression",CloseExpression:"CloseExpression",OpenSquareBracket:"OpenSquareBracket",CloseSquareBracket:"CloseSquareBracket",OpenCurlyBracket:"OpenCurlyBracket",CloseCurlyBracket:"CloseCurlyBracket",Comma:"Comma",Dot:"Dot",Colon:"Colon",Pipe:"Pipe",CallOperator:"CallOperator",AdditiveBinaryOperator:"AdditiveBinaryOperator",MultiplicativeBinaryOperator:"MultiplicativeBinaryOperator",ComparisonBinaryOperator:"ComparisonBinaryOperator",UnaryOperator:"UnaryOperator",Set:"Set",If:"If",For:"For",In:"In",Is:"Is",NotIn:"NotIn",Else:"Else",EndIf:"EndIf",ElseIf:"ElseIf",EndFor:"EndFor",And:"And",Or:"Or",Not:"UnaryOperator",Macro:"Macro",EndMacro:"EndMacro"}),r=Object.freeze({set:s.Set,for:s.For,in:s.In,is:s.Is,if:s.If,else:s.Else,endif:s.EndIf,elif:s.ElseIf,endfor:s.EndFor,and:s.And,or:s.Or,not:s.Not,"not in":s.NotIn,macro:s.Macro,endmacro:s.EndMacro,true:s.BooleanLiteral,false:s.BooleanLiteral,True:s.BooleanLiteral,False:s.BooleanLiteral}),o=class{constructor(e,t){this.value=e,this.type=t}};function i(e){return/\w/.test(e)}function a(e){return/[0-9]/.test(e)}var l=[["{%",s.OpenStatement],["%}",s.CloseStatement],["{{",s.OpenExpression],["}}",s.CloseExpression],["(",s.OpenParen],[")",s.CloseParen],["{",s.OpenCurlyBracket],["}",s.CloseCurlyBracket],["[",s.OpenSquareBracket],["]",s.CloseSquareBracket],[",",s.Comma],[".",s.Dot],[":",s.Colon],["|",s.Pipe],["<=",s.ComparisonBinaryOperator],[">=",s.ComparisonBinaryOperator],["==",s.ComparisonBinaryOperator],["!=",s.ComparisonBinaryOperator],["<",s.ComparisonBinaryOperator],[">",s.ComparisonBinaryOperator],["+",s.AdditiveBinaryOperator],["-",s.AdditiveBinaryOperator],["*",s.MultiplicativeBinaryOperator],["/",s.MultiplicativeBinaryOperator],["%",s.MultiplicativeBinaryOperator],["=",s.Equals]],c=new Map([["n","\n"],["t","\t"],["r","\r"],["b","\b"],["f","\f"],["v","\v"],["'","'"],['"','"'],["\\","\\"]]);function d(e,t={}){const n=[],d=function(e,t={}){return e.endsWith("\n")&&(e=e.slice(0,-1)),e=e.replace(/{#.*?#}/gs,"{##}"),t.lstrip_blocks&&(e=e.replace(/^[ \t]*({[#%])/gm,"$1")),t.trim_blocks&&(e=e.replace(/([#%]})\n/g,"$1")),e.replace(/{##}/g,"").replace(/-%}\s*/g,"%}").replace(/\s*{%-/g,"{%").replace(/-}}\s*/g,"}}").replace(/\s*{{-/g,"{{")}(e,t);let u=0;const h=e=>{let t="";for(;e(d[u]);)if("\\"!==d[u]){if(t+=d[u++],u>=d.length)throw new SyntaxError("Unexpected end of input")}else{if(++u,u>=d.length)throw new SyntaxError("Unexpected end of input");const e=d[u++],n=c.get(e);if(void 0===n)throw new SyntaxError(`Unexpected escaped character: ${e}`);t+=n}return t};e:for(;u<d.length;){const e=n.at(-1)?.type;if(void 0===e||e===s.CloseStatement||e===s.CloseExpression){let e="";for(;u<d.length&&("{"!==d[u]||"%"!==d[u+1]&&"{"!==d[u+1]);)e+=d[u++];if(e.length>0){n.push(new o(e,s.Text));continue}}h((e=>/\s/.test(e)));const t=d[u];if("-"===t||"+"===t){const e=n.at(-1)?.type;if(e===s.Text||void 0===e)throw new SyntaxError(`Unexpected character: ${t}`);switch(e){case s.Identifier:case s.NumericLiteral:case s.BooleanLiteral:case s.StringLiteral:case s.CloseParen:case s.CloseSquareBracket:break;default:{++u;const e=h(a);n.push(new o(`${t}${e}`,e.length>0?s.NumericLiteral:s.UnaryOperator));continue}}}for(const[e,t]of l){if(d.slice(u,u+e.length)===e){n.push(new o(e,t)),u+=e.length;continue e}}if("'"!==t&&'"'!==t)if(a(t)){const e=h(a);n.push(new o(e,s.NumericLiteral))}else{if(!i(t))throw new SyntaxError(`Unexpected character: ${t}`);{const e=h(i),t=Object.hasOwn(r,e)?r[e]:s.Identifier;t===s.In&&n.at(-1)?.type===s.Not?(n.pop(),n.push(new o("not in",s.NotIn))):n.push(new o(e,t))}}else{++u;const e=h((e=>e!==t));n.push(new o(e,s.StringLiteral)),++u}}return n}var u=class{type="Statement"},h=class extends u{constructor(e){super(),this.body=e}type="Program"},p=class extends u{constructor(e,t,n){super(),this.test=e,this.body=t,this.alternate=n}type="If"},_=class extends u{constructor(e,t,n,s){super(),this.loopvar=e,this.iterable=t,this.body=n,this.defaultBlock=s}type="For"},m=class extends u{constructor(e,t){super(),this.assignee=e,this.value=t}type="Set"},f=class extends u{constructor(e,t,n){super(),this.name=e,this.args=t,this.body=n}type="Macro"},g=class extends u{type="Expression"},w=class extends g{constructor(e,t,n){super(),this.object=e,this.property=t,this.computed=n}type="MemberExpression"},M=class extends g{constructor(e,t){super(),this.callee=e,this.args=t}type="CallExpression"},b=class extends g{constructor(e){super(),this.value=e}type="Identifier"},y=class extends g{constructor(e){super(),this.value=e}type="Literal"},x=class extends y{type="NumericLiteral"},k=class extends y{type="StringLiteral"},T=class extends y{type="BooleanLiteral"},v=class extends y{type="ArrayLiteral"},C=class extends y{type="TupleLiteral"},F=class extends y{type="ObjectLiteral"},P=class extends g{constructor(e,t,n){super(),this.operator=e,this.left=t,this.right=n}type="BinaryExpression"},S=class extends g{constructor(e,t){super(),this.operand=e,this.filter=t}type="FilterExpression"},A=class extends g{constructor(e,t){super(),this.iterable=e,this.test=t}type="SelectExpression"},E=class extends g{constructor(e,t,n){super(),this.operand=e,this.negate=t,this.test=n}type="TestExpression"},z=class extends g{constructor(e,t){super(),this.operator=e,this.argument=t}type="UnaryExpression"},L=class extends g{constructor(e=void 0,t=void 0,n=void 0){super(),this.start=e,this.stop=t,this.step=n}type="SliceExpression"},I=class extends g{constructor(e,t){super(),this.key=e,this.value=t}type="KeywordArgumentExpression"};function B(e){const t=new h([]);let n=0;function r(t,s){const r=e[n++];if(!r||r.type!==t)throw new Error(`Parser Error: ${s}. ${r.type} !== ${t}.`);return r}function o(){switch(e[n].type){case s.Text:return new k(r(s.Text,"Expected text token").value);case s.OpenStatement:return function(){let t;switch(r(s.OpenStatement,"Expected opening statement token"),e[n].type){case s.Set:++n,t=l(),r(s.CloseStatement,"Expected closing statement token");break;case s.If:++n,t=c(),r(s.OpenStatement,"Expected {% token"),r(s.EndIf,"Expected endif token"),r(s.CloseStatement,"Expected %} token");break;case s.Macro:++n,t=function(){const e=$();if("Identifier"!==e.type)throw new SyntaxError("Expected identifier following macro statement");const t=D();r(s.CloseStatement,"Expected closing statement token");const n=[];for(;i(s.OpenStatement,s.EndMacro);)n.push(o());return new f(e,t,n)}(),r(s.OpenStatement,"Expected {% token"),r(s.EndMacro,"Expected endmacro token"),r(s.CloseStatement,"Expected %} token");break;case s.For:++n,t=function(){const e=d(!0);if(!(e instanceof b||e instanceof C))throw new SyntaxError(`Expected identifier/tuple for the loop variable, got ${e.type} instead`);r(s.In,"Expected `in` keyword following loop variable");const t=u();r(s.CloseStatement,"Expected closing statement token");const l=[];for(;i(s.OpenStatement,s.EndFor)&&i(s.OpenStatement,s.Else);)l.push(o());const c=[];if(a(s.OpenStatement,s.Else))for(++n,++n,r(s.CloseStatement,"Expected closing statement token");i(s.OpenStatement,s.EndFor);)c.push(o());return new _(e,t,l,c)}(),r(s.OpenStatement,"Expected {% token"),r(s.EndFor,"Expected endfor token"),r(s.CloseStatement,"Expected %} token");break;default:throw new SyntaxError(`Unknown statement type: ${e[n].type}`)}return t}();case s.OpenExpression:return function(){r(s.OpenExpression,"Expected opening expression token");const e=u();return r(s.CloseExpression,"Expected closing expression token"),e}();default:throw new SyntaxError(`Unexpected token type: ${e[n].type}`)}}function i(...t){return n+t.length<=e.length&&t.some(((t,s)=>t!==e[n+s].type))}function a(...t){return n+t.length<=e.length&&t.every(((t,s)=>t===e[n+s].type))}function l(){const e=u();if(a(s.Equals)){++n;const t=l();return new m(e,t)}return e}function c(){const t=u();r(s.CloseStatement,"Expected closing statement token");const i=[],l=[];for(;e[n]?.type!==s.OpenStatement||e[n+1]?.type!==s.ElseIf&&e[n+1]?.type!==s.Else&&e[n+1]?.type!==s.EndIf;)i.push(o());if(e[n]?.type===s.OpenStatement&&e[n+1]?.type!==s.EndIf)if(++n,a(s.ElseIf))r(s.ElseIf,"Expected elseif token"),l.push(c());else for(r(s.Else,"Expected else token"),r(s.CloseStatement,"Expected closing statement token");e[n]?.type!==s.OpenStatement||e[n+1]?.type!==s.EndIf;)l.push(o());return new p(t,i,l)}function d(e=!1){const t=e?$:u,r=[t()],o=a(s.Comma);for(;o&&(++n,r.push(t()),a(s.Comma)););return o?new C(r):r[0]}function u(){return function(){const e=g();if(a(s.If)){++n;const t=g();if(a(s.Else)){++n;const s=g();return new p(t,[e],[s])}return new A(e,t)}return e}()}function g(){let t=y();for(;a(s.Or);){const s=e[n];++n;const r=y();t=new P(s,t,r)}return t}function y(){let t=B();for(;a(s.And);){const s=e[n];++n;const r=B();t=new P(s,t,r)}return t}function B(){let t;for(;a(s.Not);){const s=e[n];++n;const r=B();t=new z(s,r)}return t??function(){let t=N();for(;a(s.ComparisonBinaryOperator)||a(s.In)||a(s.NotIn);){const s=e[n];++n;const r=N();t=new P(s,t,r)}return t}()}function N(){let t=R();for(;a(s.AdditiveBinaryOperator);){const s=e[n];++n;const r=R();t=new P(s,t,r)}return t}function O(){const t=function(){let t=$();for(;a(s.Dot)||a(s.OpenSquareBracket);){const o=e[n];let i;++n;const a=o.type!==s.Dot;if(a)i=V(),r(s.CloseSquareBracket,"Expected closing square bracket");else if(i=$(),"Identifier"!==i.type)throw new SyntaxError("Expected identifier following dot operator");t=new w(t,i,a)}return t}();return a(s.OpenParen)?j(t):t}function j(e){let t=new M(e,D());return a(s.OpenParen)&&(t=j(t)),t}function D(){r(s.OpenParen,"Expected opening parenthesis for arguments list");const e=function(){const e=[];for(;!a(s.CloseParen);){let t=u();if(a(s.Equals)){if(++n,!(t instanceof b))throw new SyntaxError("Expected identifier for keyword argument");const e=u();t=new I(t,e)}e.push(t),a(s.Comma)&&++n}return e}();return r(s.CloseParen,"Expected closing parenthesis for arguments list"),e}function V(){const e=[];let t=!1;for(;!a(s.CloseSquareBracket);)a(s.Colon)?(e.push(void 0),++n,t=!0):(e.push(u()),a(s.Colon)&&(++n,t=!0));if(0===e.length)throw new SyntaxError("Expected at least one argument for member/slice expression");if(t){if(e.length>3)throw new SyntaxError("Expected 0-3 arguments for slice expression");return new L(...e)}return e[0]}function R(){let t=G();for(;a(s.MultiplicativeBinaryOperator);){const s=e[n];++n;const r=G();t=new P(s,t,r)}return t}function G(){let e=function(){let e=O();for(;a(s.Pipe);){++n;let t=$();if(!(t instanceof b))throw new SyntaxError("Expected identifier for the filter");a(s.OpenParen)&&(t=j(t)),e=new S(e,t)}return e}();for(;a(s.Is);){++n;const t=a(s.Not);t&&++n;let r=$();if(r instanceof T&&(r=new b(r.value.toString())),!(r instanceof b))throw new SyntaxError("Expected identifier for the test");e=new E(e,t,r)}return e}function $(){const t=e[n];switch(t.type){case s.NumericLiteral:return++n,new x(Number(t.value));case s.StringLiteral:return++n,new k(t.value);case s.BooleanLiteral:return++n,new T("true"===t.value.toLowerCase());case s.Identifier:return++n,new b(t.value);case s.OpenParen:{++n;const t=d();if(e[n].type!==s.CloseParen)throw new SyntaxError(`Expected closing parenthesis, got ${e[n].type} instead`);return++n,t}case s.OpenSquareBracket:{++n;const e=[];for(;!a(s.CloseSquareBracket);)e.push(u()),a(s.Comma)&&++n;return++n,new v(e)}case s.OpenCurlyBracket:{++n;const e=new Map;for(;!a(s.CloseCurlyBracket);){const t=u();r(s.Colon,"Expected colon between key and value in object literal");const o=u();e.set(t,o),a(s.Comma)&&++n}return++n,new F(e)}default:throw new SyntaxError(`Unexpected token: ${t.type}`)}}for(;n<e.length;)t.body.push(o());return t}function N(e,t,n=1){void 0===t&&(t=e,e=0);const s=[];for(let r=e;r<t;r+=n)s.push(r);return s}function O(e,t,n,s=1){const r=Math.sign(s);r>=0?(t=(t??=0)<0?Math.max(e.length+t,0):Math.min(t,e.length),n=(n??=e.length)<0?Math.max(e.length+n,0):Math.min(n,e.length)):(t=(t??=e.length-1)<0?Math.max(e.length+t,-1):Math.min(t,e.length-1),n=(n??=-1)<-1?Math.max(e.length+n,-1):Math.min(n,e.length-1));const o=[];for(let i=t;r*i<r*n;i+=s)o.push(e[i]);return o}function j(e){return e.replace(/\b\w/g,(e=>e.toUpperCase()))}var D=class{type="RuntimeValue";value;builtins=new Map;constructor(e=void 0){this.value=e}__bool__(){return new G(!!this.value)}},V=class extends D{type="NumericValue"},R=class extends D{type="StringValue";builtins=new Map([["upper",new X((()=>new R(this.value.toUpperCase())))],["lower",new X((()=>new R(this.value.toLowerCase())))],["strip",new X((()=>new R(this.value.trim())))],["title",new X((()=>new R(j(this.value))))],["length",new V(this.value.length)]])},G=class extends D{type="BooleanValue"},$=class extends D{type="ObjectValue";__bool__(){return new G(this.value.size>0)}builtins=new Map([["get",new X((([e,t])=>{if(!(e instanceof R))throw new Error(`Object key must be a string: got ${e.type}`);return this.value.get(e.value)??t??new Q}))],["items",new X((()=>new U(Array.from(this.value.entries()).map((([e,t])=>new U([new R(e),t]))))))]])},q=class extends ${type="KeywordArgumentsValue"},U=class extends D{type="ArrayValue";builtins=new Map([["length",new V(this.value.length)]]);__bool__(){return new G(this.value.length>0)}},W=class extends U{type="TupleValue"},X=class extends D{type="FunctionValue"},Q=class extends D{type="NullValue"},H=class extends D{type="UndefinedValue"},Y=class{constructor(e){this.parent=e}variables=new Map([["namespace",new X((e=>{if(0===e.length)return new $(new Map);if(1!==e.length||!(e[0]instanceof $))throw new Error("`namespace` expects either zero arguments or a single object argument");return e[0]}))]]);tests=new Map([["boolean",e=>"BooleanValue"===e.type],["callable",e=>e instanceof X],["odd",e=>{if("NumericValue"!==e.type)throw new Error(`Cannot apply test "odd" to type: ${e.type}`);return e.value%2!=0}],["even",e=>{if("NumericValue"!==e.type)throw new Error(`Cannot apply test "even" to type: ${e.type}`);return e.value%2==0}],["false",e=>"BooleanValue"===e.type&&!e.value],["true",e=>"BooleanValue"===e.type&&e.value],["string",e=>"StringValue"===e.type],["number",e=>"NumericValue"===e.type],["integer",e=>"NumericValue"===e.type&&Number.isInteger(e.value)],["iterable",e=>e instanceof U||e instanceof R],["lower",e=>{const t=e.value;return"StringValue"===e.type&&t===t.toLowerCase()}],["upper",e=>{const t=e.value;return"StringValue"===e.type&&t===t.toUpperCase()}],["none",e=>"NullValue"===e.type],["defined",e=>"UndefinedValue"!==e.type],["undefined",e=>"UndefinedValue"===e.type],["equalto",(e,t)=>e.value===t.value],["eq",(e,t)=>e.value===t.value]]);set(e,t){return this.declareVariable(e,J(t))}declareVariable(e,t){if(this.variables.has(e))throw new SyntaxError(`Variable already declared: ${e}`);return this.variables.set(e,t),t}setVariable(e,t){return this.variables.set(e,t),t}resolve(e){if(this.variables.has(e))return this;if(this.parent)return this.parent.resolve(e);throw new Error(`Unknown variable: ${e}`)}lookupVariable(e){try{return this.resolve(e).variables.get(e)??new H}catch{return new H}}},K=class{global;constructor(e){this.global=e??new Y}run(e){return this.evaluate(e,this.global)}evaluateBinaryExpression(e,t){const n=this.evaluate(e.left,t);switch(e.operator.value){case"and":return n.__bool__().value?this.evaluate(e.right,t):n;case"or":return n.__bool__().value?n:this.evaluate(e.right,t)}const s=this.evaluate(e.right,t);switch(e.operator.value){case"==":return new G(n.value==s.value);case"!=":return new G(n.value!=s.value)}if(n instanceof H||s instanceof H)throw new Error("Cannot perform operation on undefined values");if(n instanceof Q||s instanceof Q)throw new Error("Cannot perform operation on null values");if(n instanceof V&&s instanceof V)switch(e.operator.value){case"+":return new V(n.value+s.value);case"-":return new V(n.value-s.value);case"*":return new V(n.value*s.value);case"/":return new V(n.value/s.value);case"%":return new V(n.value%s.value);case"<":return new G(n.value<s.value);case">":return new G(n.value>s.value);case">=":return new G(n.value>=s.value);case"<=":return new G(n.value<=s.value)}else if(n instanceof U&&s instanceof U){if("+"===e.operator.value)return new U(n.value.concat(s.value))}else if(s instanceof U){const t=void 0!==s.value.find((e=>e.value===n.value));switch(e.operator.value){case"in":return new G(t);case"not in":return new G(!t)}}if((n instanceof R||s instanceof R)&&"+"===e.operator.value)return new R(n.value.toString()+s.value.toString());if(n instanceof R&&s instanceof R)switch(e.operator.value){case"in":return new G(s.value.includes(n.value));case"not in":return new G(!s.value.includes(n.value))}if(n instanceof R&&s instanceof $)switch(e.operator.value){case"in":return new G(s.value.has(n.value));case"not in":return new G(!s.value.has(n.value))}throw new SyntaxError(`Unknown operator "${e.operator.value}" between ${n.type} and ${s.type}`)}evaluateArguments(e,t){const n=[],s=new Map;for(const r of e)if("KeywordArgumentExpression"===r.type){const e=r;s.set(e.key.value,this.evaluate(e.value,t))}else{if(s.size>0)throw new Error("Positional arguments must come before keyword arguments");n.push(this.evaluate(r,t))}return[n,s]}evaluateFilterExpression(e,t){const n=this.evaluate(e.operand,t);if("Identifier"===e.filter.type){const t=e.filter;if("tojson"===t.value)return new R(Z(n));if(n instanceof U)switch(t.value){case"list":return n;case"first":return n.value[0];case"last":return n.value[n.value.length-1];case"length":return new V(n.value.length);case"reverse":return new U(n.value.reverse());case"sort":return new U(n.value.sort(((e,t)=>{if(e.type!==t.type)throw new Error(`Cannot compare different types: ${e.type} and ${t.type}`);switch(e.type){case"NumericValue":return e.value-t.value;case"StringValue":return e.value.localeCompare(t.value);default:throw new Error(`Cannot compare type: ${e.type}`)}})));default:throw new Error(`Unknown ArrayValue filter: ${t.value}`)}else if(n instanceof R)switch(t.value){case"length":return new V(n.value.length);case"upper":return new R(n.value.toUpperCase());case"lower":return new R(n.value.toLowerCase());case"title":return new R(j(n.value));case"capitalize":return new R(n.value.charAt(0).toUpperCase()+n.value.slice(1));case"trim":return new R(n.value.trim());case"indent":return new R(n.value.split("\n").map(((e,t)=>0===t||0===e.length?e:" "+e)).join("\n"));case"string":return n;default:throw new Error(`Unknown StringValue filter: ${t.value}`)}else{if(n instanceof V){if("abs"===t.value)return new V(Math.abs(n.value));throw new Error(`Unknown NumericValue filter: ${t.value}`)}if(n instanceof $)switch(t.value){case"items":return new U(Array.from(n.value.entries()).map((([e,t])=>new U([new R(e),t]))));case"length":return new V(n.value.size);default:throw new Error(`Unknown ObjectValue filter: ${t.value}`)}}throw new Error(`Cannot apply filter "${t.value}" to type: ${n.type}`)}if("CallExpression"===e.filter.type){const s=e.filter;if("Identifier"!==s.callee.type)throw new Error(`Unknown filter: ${s.callee.type}`);const r=s.callee.value;if("tojson"===r){const[,e]=this.evaluateArguments(s.args,t),r=e.get("indent")??new Q;if(!(r instanceof V||r instanceof Q))throw new Error("If set, indent must be a number");return new R(Z(n,r.value))}if(n instanceof U){switch(r){case"selectattr":{if(n.value.some((e=>!(e instanceof $))))throw new Error("`selectattr` can only be applied to array of objects");if(s.args.some((e=>"StringLiteral"!==e.type)))throw new Error("arguments of `selectattr` must be strings");const[e,r,o]=s.args.map((e=>this.evaluate(e,t)));let i;if(r){const e=t.tests.get(r.value);if(!e)throw new Error(`Unknown test: ${r.value}`);i=e}else i=(...e)=>e[0].__bool__().value;const a=n.value.filter((t=>{const n=t.value.get(e.value);return!!n&&i(n,o)}));return new U(a)}case"map":{const[,e]=this.evaluateArguments(s.args,t);if(e.has("attribute")){const t=e.get("attribute");if(!(t instanceof R))throw new Error("attribute must be a string");const s=e.get("default"),r=n.value.map((e=>{if(!(e instanceof $))throw new Error("items in map must be an object");return e.value.get(t.value)??s??new H}));return new U(r)}throw new Error("`map` expressions without `attribute` set are not currently supported.")}}throw new Error(`Unknown ArrayValue filter: ${r}`)}if(n instanceof R){if("indent"===r){const[e,r]=this.evaluateArguments(s.args,t),o=e.at(0)??r.get("width")??new V(4);if(!(o instanceof V))throw new Error("width must be a number");const i=e.at(1)??r.get("first")??new G(!1),a=e.at(2)??r.get("blank")??new G(!1),l=n.value.split("\n"),c=" ".repeat(o.value),d=l.map(((e,t)=>!i.value&&0===t||!a.value&&0===e.length?e:c+e));return new R(d.join("\n"))}throw new Error(`Unknown StringValue filter: ${r}`)}throw new Error(`Cannot apply filter "${r}" to type: ${n.type}`)}throw new Error(`Unknown filter: ${e.filter.type}`)}evaluateTestExpression(e,t){const n=this.evaluate(e.operand,t),s=t.tests.get(e.test.value);if(!s)throw new Error(`Unknown test: ${e.test.value}`);const r=s(n);return new G(e.negate?!r:r)}evaluateUnaryExpression(e,t){const n=this.evaluate(e.argument,t);if("not"===e.operator.value)return new G(!n.value);throw new SyntaxError(`Unknown operator: ${e.operator.value}`)}evalProgram(e,t){return this.evaluateBlock(e.body,t)}evaluateBlock(e,t){let n="";for(const s of e){const e=this.evaluate(s,t);"NullValue"!==e.type&&"UndefinedValue"!==e.type&&(n+=e.value)}return new R(n)}evaluateIdentifier(e,t){return t.lookupVariable(e.value)}evaluateCallExpression(e,t){const[n,s]=this.evaluateArguments(e.args,t);s.size>0&&n.push(new q(s));const r=this.evaluate(e.callee,t);if("FunctionValue"!==r.type)throw new Error(`Cannot call something that is not a function: got ${r.type}`);return r.value(n,t)}evaluateSliceExpression(e,t,n){if(!(e instanceof U||e instanceof R))throw new Error("Slice object must be an array or string");const s=this.evaluate(t.start,n),r=this.evaluate(t.stop,n),o=this.evaluate(t.step,n);if(!(s instanceof V||s instanceof H))throw new Error("Slice start must be numeric or undefined");if(!(r instanceof V||r instanceof H))throw new Error("Slice stop must be numeric or undefined");if(!(o instanceof V||o instanceof H))throw new Error("Slice step must be numeric or undefined");return e instanceof U?new U(O(e.value,s.value,r.value,o.value)):new R(O(Array.from(e.value),s.value,r.value,o.value).join(""))}evaluateMemberExpression(e,t){const n=this.evaluate(e.object,t);let s,r;if(e.computed){if("SliceExpression"===e.property.type)return this.evaluateSliceExpression(n,e.property,t);s=this.evaluate(e.property,t)}else s=new R(e.property.value);if(n instanceof $){if(!(s instanceof R))throw new Error(`Cannot access property with non-string: got ${s.type}`);r=n.value.get(s.value)??n.builtins.get(s.value)}else if(n instanceof U||n instanceof R)if(s instanceof V)r=n.value.at(s.value),n instanceof R&&(r=new R(n.value.at(s.value)));else{if(!(s instanceof R))throw new Error(`Cannot access property with non-string/non-number: got ${s.type}`);r=n.builtins.get(s.value)}else{if(!(s instanceof R))throw new Error(`Cannot access property with non-string: got ${s.type}`);r=n.builtins.get(s.value)}return r instanceof D?r:new H}evaluateSet(e,t){const n=this.evaluate(e.value,t);if("Identifier"===e.assignee.type){const s=e.assignee.value;t.setVariable(s,n)}else{if("MemberExpression"!==e.assignee.type)throw new Error(`Invalid LHS inside assignment expression: ${JSON.stringify(e.assignee)}`);{const s=e.assignee,r=this.evaluate(s.object,t);if(!(r instanceof $))throw new Error("Cannot assign to member of non-object");if("Identifier"!==s.property.type)throw new Error("Cannot assign to member with non-identifier property");r.value.set(s.property.value,n)}}return new Q}evaluateIf(e,t){const n=this.evaluate(e.test,t);return this.evaluateBlock(n.__bool__().value?e.body:e.alternate,t)}evaluateFor(e,t){const n=new Y(t);let s,r;if("SelectExpression"===e.iterable.type){const t=e.iterable;r=this.evaluate(t.iterable,n),s=t.test}else r=this.evaluate(e.iterable,n);if(!(r instanceof U))throw new Error(`Expected iterable type in for loop: got ${r.type}`);const o=[],i=[];for(let t=0;t<r.value.length;++t){const a=new Y(n),l=r.value[t];let c;if("Identifier"===e.loopvar.type)c=t=>t.setVariable(e.loopvar.value,l);else{if("TupleLiteral"!==e.loopvar.type)throw new Error(`Invalid loop variable(s): ${e.loopvar.type}`);{const t=e.loopvar;if("ArrayValue"!==l.type)throw new Error(`Cannot unpack non-iterable type: ${l.type}`);const n=l;if(t.value.length!==n.value.length)throw new Error(`Too ${t.value.length>n.value.length?"few":"many"} items to unpack`);c=e=>{for(let s=0;s<t.value.length;++s){if("Identifier"!==t.value[s].type)throw new Error(`Cannot unpack non-identifier type: ${t.value[s].type}`);e.setVariable(t.value[s].value,n.value[s])}}}}if(s){c(a);if(!this.evaluate(s,a).__bool__().value)continue}o.push(l),i.push(c)}let a="",l=!0;for(let t=0;t<o.length;++t){const s=new Map([["index",new V(t+1)],["index0",new V(t)],["revindex",new V(o.length-t)],["revindex0",new V(o.length-t-1)],["first",new G(0===t)],["last",new G(t===o.length-1)],["length",new V(o.length)],["previtem",t>0?o[t-1]:new H],["nextitem",t<o.length-1?o[t+1]:new H]]);n.setVariable("loop",new $(s)),i[t](n);a+=this.evaluateBlock(e.body,n).value,l=!1}if(l){a+=this.evaluateBlock(e.defaultBlock,n).value}return new R(a)}evaluateMacro(e,t){return t.setVariable(e.name.value,new X(((t,n)=>{const s=new Y(n);let r;t=t.slice(),"KeywordArgumentsValue"===t.at(-1)?.type&&(r=t.pop());for(let n=0;n<e.args.length;++n){const o=e.args[n],i=t[n];if("Identifier"===o.type){const e=o;if(!i)throw new Error(`Missing positional argument: ${e.value}`);s.setVariable(e.value,i)}else{if("KeywordArgumentExpression"!==o.type)throw new Error(`Unknown argument type: ${o.type}`);{const e=o,t=i??r?.value.get(e.key.value)??this.evaluate(e.value,s);s.setVariable(e.key.value,t)}}}return this.evaluateBlock(e.body,s)}))),new Q}evaluate(e,t){if(void 0===e)return new H;switch(e.type){case"Program":return this.evalProgram(e,t);case"Set":return this.evaluateSet(e,t);case"If":return this.evaluateIf(e,t);case"For":return this.evaluateFor(e,t);case"Macro":return this.evaluateMacro(e,t);case"NumericLiteral":return new V(Number(e.value));case"StringLiteral":return new R(e.value);case"BooleanLiteral":return new G(e.value);case"ArrayLiteral":return new U(e.value.map((e=>this.evaluate(e,t))));case"TupleLiteral":return new W(e.value.map((e=>this.evaluate(e,t))));case"ObjectLiteral":{const n=new Map;for(const[s,r]of e.value){const e=this.evaluate(s,t);if(!(e instanceof R))throw new Error(`Object keys must be strings: got ${e.type}`);n.set(e.value,this.evaluate(r,t))}return new $(n)}case"Identifier":return this.evaluateIdentifier(e,t);case"CallExpression":return this.evaluateCallExpression(e,t);case"MemberExpression":return this.evaluateMemberExpression(e,t);case"UnaryExpression":return this.evaluateUnaryExpression(e,t);case"BinaryExpression":return this.evaluateBinaryExpression(e,t);case"FilterExpression":return this.evaluateFilterExpression(e,t);case"TestExpression":return this.evaluateTestExpression(e,t);default:throw new SyntaxError(`Unknown node type: ${e.type}`)}}};function J(e){switch(typeof e){case"number":return new V(e);case"string":return new R(e);case"boolean":return new G(e);case"undefined":return new H;case"object":return null===e?new Q:Array.isArray(e)?new U(e.map(J)):new $(new Map(Object.entries(e).map((([e,t])=>[e,J(t)]))));case"function":return new X(((t,n)=>J(e(...t.map((e=>e.value)))??null)));default:throw new Error(`Cannot convert to runtime value: ${e}`)}}function Z(e,t,n){const s=n??0;switch(e.type){case"NullValue":case"UndefinedValue":return"null";case"NumericValue":case"StringValue":case"BooleanValue":return JSON.stringify(e.value);case"ArrayValue":case"ObjectValue":{const n=t?" ".repeat(t):"",r="\n"+n.repeat(s),o=r+n;if("ArrayValue"===e.type){const n=e.value.map((e=>Z(e,t,s+1)));return t?`[${o}${n.join(`,${o}`)}${r}]`:`[${n.join(", ")}]`}{const n=Array.from(e.value.entries()).map((([e,n])=>{const r=`"${e}": ${Z(n,t,s+1)}`;return t?`${o}${r}`:r}));return t?`{${n.join(",")}${r}}`:`{${n.join(", ")}}`}}default:throw new Error(`Cannot convert to JSON: ${e.type}`)}}var ee=class{parsed;constructor(e){const t=d(e,{lstrip_blocks:!0,trim_blocks:!0});this.parsed=B(t)}render(e){const t=new Y;t.set("false",!1),t.set("true",!0),t.set("raise_exception",(e=>{throw new Error(e)})),t.set("range",N);for(const[n,s]of Object.entries(e))t.set(n,s);return new K(t).run(this.parsed).value}}},"./node_modules/onnxruntime-common/dist/esm/backend-impl.js":
@@ -91,13 +85,13 @@
91
85
  \*************************************************************/(e,t,n)=>{"use strict";n.r(t),n.d(t,{version:()=>s});const s="1.18.0"},"./src/backends/onnx.js":
92
86
  /*!******************************!*\
93
87
  !*** ./src/backends/onnx.js ***!
94
- \******************************/(e,t,n)=>{"use strict";var s,r;n.r(t),n.d(t,{Tensor:()=>l.Tensor,createInferenceSession:()=>m,deviceToExecutionProviders:()=>p,isONNXProxy:()=>w,isONNXTensor:()=>f});var o=n(/*! ../env.js */"./src/env.js"),i=n(/*! onnxruntime-node */"onnxruntime-node"),a=n(/*! onnxruntime-web/webgpu */"?9c66"),l=n(/*! onnxruntime-common */"./node_modules/onnxruntime-common/dist/esm/index.js");const c=[];let d,u;o.apis.IS_NODE_ENV?(u=i??(s||(s=n.t(i,2))),c.push("cpu"),d=["cpu"]):(u=r||(r=n.t(a,2)),o.apis.IS_WEBGPU_AVAILABLE&&c.push("webgpu"),c.push("wasm"),d=["wasm"]);const h=u.InferenceSession;function p(e){let t=d;if(e){if(!c.includes(e))throw new Error(`Unsupported device: "${e}". Should be one of: ${c.join(", ")}.`);t=[e]}return t}let _=null;async function m(e,t){_&&await _;const n=h.create(e,t);return _??=n,await n}function f(e){return e instanceof u.Tensor}const g=u?.env;if(g?.wasm){g.wasm.wasmPaths=`https://cdn.jsdelivr.net/npm/@huggingface/transformers@${o.env.version}/dist/`,g.wasm.proxy=!o.apis.IS_WEBWORKER_ENV,"undefined"!=typeof crossOriginIsolated&&crossOriginIsolated||(g.wasm.numThreads=1);"undefined"!=typeof navigator&&/iP(hone|od|ad).+16_4.+AppleWebKit/.test(navigator.userAgent)&&(g.wasm.simd=!1)}function w(){return g?.wasm?.proxy}g?.webgpu&&(g.webgpu.powerPreference="high-performance"),o.env.backends.onnx=g},"./src/configs.js":
88
+ \******************************/(e,t,n)=>{"use strict";var s,r;n.r(t),n.d(t,{Tensor:()=>l.Tensor,createInferenceSession:()=>f,deviceToExecutionProviders:()=>_,isONNXProxy:()=>M,isONNXTensor:()=>g});var o=n(/*! ../env.js */"./src/env.js"),i=n(/*! onnxruntime-node */"onnxruntime-node"),a=n(/*! onnxruntime-web/webgpu */"?9c66"),l=n(/*! onnxruntime-common */"./node_modules/onnxruntime-common/dist/esm/index.js");const c=Object.freeze({auto:null,gpu:null,cpu:"cpu",wasm:"wasm",webgpu:"webgpu",cuda:"cuda",dml:"dml",webnn:{name:"webnn",deviceType:"cpu"},"webnn-npu":{name:"webnn",deviceType:"npu"},"webnn-gpu":{name:"webnn",deviceType:"gpu"},"webnn-cpu":{name:"webnn",deviceType:"cpu"}}),d=[];let u,h;if(o.apis.IS_NODE_ENV){switch(h=i??(s||(s=n.t(i,2))),process.platform){case"win32":d.push("dml");break;case"linux":"x64"===process.arch&&d.push("cuda")}d.push("cpu"),u=["cpu"]}else h=r||(r=n.t(a,2)),o.apis.IS_WEBNN_AVAILABLE&&d.push("webnn-npu","webnn-gpu","webnn-cpu","webnn"),o.apis.IS_WEBGPU_AVAILABLE&&d.push("webgpu"),d.push("wasm"),u=["wasm"];const p=h.InferenceSession;function _(e=null){if(!e)return u;switch(e){case"auto":return d;case"gpu":return d.filter((e=>["webgpu","cuda","dml","webnn-gpu"].includes(e)))}if(d.includes(e))return[c[e]??e];throw new Error(`Unsupported device: "${e}". Should be one of: ${d.join(", ")}.`)}let m=null;async function f(e,t){m&&await m;const n=p.create(e,t);return m??=n,await n}function g(e){return e instanceof h.Tensor}const w=h?.env;if(w?.wasm){w.wasm.wasmPaths=`https://cdn.jsdelivr.net/npm/@huggingface/transformers@${o.env.version}/dist/`,w.wasm.proxy=!o.apis.IS_WEBWORKER_ENV,"undefined"!=typeof crossOriginIsolated&&crossOriginIsolated||(w.wasm.numThreads=1);"undefined"!=typeof navigator&&/iP(hone|od|ad).+16_4.+AppleWebKit/.test(navigator.userAgent)&&(w.wasm.simd=!1)}function M(){return w?.wasm?.proxy}w?.webgpu&&(w.webgpu.powerPreference="high-performance"),o.env.backends.onnx=w},"./src/configs.js":
95
89
  /*!************************!*\
96
90
  !*** ./src/configs.js ***!
97
91
  \************************/(e,t,n)=>{"use strict";n.r(t),n.d(t,{AutoConfig:()=>l,PretrainedConfig:()=>a,getKeyValueShapes:()=>i});var s=n(/*! ./utils/core.js */"./src/utils/core.js"),r=n(/*! ./utils/hub.js */"./src/utils/hub.js");function o(e){const t={};let n={};switch(e.model_type){case"llava":case"paligemma":case"florence2":n=o(e.text_config);break;case"moondream1":n=o(e.phi_config);break;case"musicgen":n=o(e.decoder);break;case"gpt2":case"gptj":case"codegen":case"gpt_bigcode":t.num_heads="n_head",t.num_layers="n_layer",t.hidden_size="n_embd";break;case"gpt_neox":case"stablelm":case"opt":case"phi":case"phi3":case"falcon":t.num_heads="num_attention_heads",t.num_layers="num_hidden_layers",t.hidden_size="hidden_size";break;case"llama":case"cohere":case"mistral":case"starcoder2":case"qwen2":t.num_heads="num_key_value_heads",t.num_layers="num_hidden_layers",t.hidden_size="hidden_size",t.num_attention_heads="num_attention_heads";break;case"gemma":case"gemma2":t.num_heads="num_key_value_heads",t.num_layers="num_hidden_layers",t.dim_kv="head_dim";break;case"openelm":t.num_heads="num_kv_heads",t.num_layers="num_transformer_layers",t.dim_kv="head_dim";break;case"gpt_neo":case"donut-swin":t.num_heads="num_heads",t.num_layers="num_layers",t.hidden_size="hidden_size";break;case"bloom":t.num_heads="n_head",t.num_layers="n_layer",t.hidden_size="hidden_size";break;case"mpt":t.num_heads="n_heads",t.num_layers="n_layers",t.hidden_size="d_model";break;case"t5":case"mt5":case"longt5":t.num_decoder_layers="num_decoder_layers",t.num_decoder_heads="num_heads",t.decoder_dim_kv="d_kv",t.num_encoder_layers="num_layers",t.num_encoder_heads="num_heads",t.encoder_dim_kv="d_kv";break;case"bart":case"mbart":case"marian":case"whisper":case"m2m_100":case"blenderbot":case"blenderbot-small":case"florence2_language":t.num_decoder_layers="decoder_layers",t.num_decoder_heads="decoder_attention_heads",t.decoder_hidden_size="d_model",t.num_encoder_layers="encoder_layers",t.num_encoder_heads="encoder_attention_heads",t.encoder_hidden_size="d_model";break;case"speecht5":t.num_decoder_layers="decoder_layers",t.num_decoder_heads="decoder_attention_heads",t.decoder_hidden_size="hidden_size",t.num_encoder_layers="encoder_layers",t.num_encoder_heads="encoder_attention_heads",t.encoder_hidden_size="hidden_size";break;case"trocr":t.num_encoder_layers=t.num_decoder_layers="decoder_layers",t.num_encoder_heads=t.num_decoder_heads="decoder_attention_heads",t.encoder_hidden_size=t.decoder_hidden_size="d_model";break;case"musicgen_decoder":t.num_encoder_layers=t.num_decoder_layers="num_hidden_layers",t.num_encoder_heads=t.num_decoder_heads="num_attention_heads",t.encoder_hidden_size=t.decoder_hidden_size="hidden_size";break;case"vision-encoder-decoder":const r=o(e.decoder),i="num_decoder_layers"in r,a=(0,s.pick)(e,["model_type","is_encoder_decoder"]);return i?(a.num_decoder_layers=r.num_decoder_layers,a.num_decoder_heads=r.num_decoder_heads,a.decoder_hidden_size=r.decoder_hidden_size,a.num_encoder_layers=r.num_encoder_layers,a.num_encoder_heads=r.num_encoder_heads,a.encoder_hidden_size=r.encoder_hidden_size):(a.num_layers=r.num_layers,a.num_heads=r.num_heads,a.hidden_size=r.hidden_size),a}const r={...n,...(0,s.pick)(e,["model_type","multi_query","is_encoder_decoder"])};for(const n in t)r[n]=e[t[n]];return r}function i(e,{prefix:t="past_key_values"}={}){const n={},s=e.normalized_config;if(s.is_encoder_decoder&&"num_encoder_heads"in s&&"num_decoder_heads"in s){const e=s.encoder_dim_kv??s.encoder_hidden_size/s.num_encoder_heads,r=s.decoder_dim_kv??s.decoder_hidden_size/s.num_decoder_heads,o=[1,s.num_encoder_heads,0,e],i=[1,s.num_decoder_heads,0,r];for(let e=0;e<s.num_decoder_layers;++e)n[`${t}.${e}.encoder.key`]=o,n[`${t}.${e}.encoder.value`]=o,n[`${t}.${e}.decoder.key`]=i,n[`${t}.${e}.decoder.value`]=i}else{const e=s.num_heads,r=s.num_layers,o=s.dim_kv??s.hidden_size/(s.num_attention_heads??e);if("falcon"===s.model_type){const s=[1*e,0,o];for(let e=0;e<r;++e)n[`${t}.${e}.key`]=s,n[`${t}.${e}.value`]=s}else if(s.multi_query){const s=[1*e,0,2*o];for(let e=0;e<r;++e)n[`${t}.${e}.key_value`]=s}else if("bloom"===s.model_type){const s=[1*e,o,0],i=[1*e,0,o];for(let e=0;e<r;++e)n[`${t}.${e}.key`]=s,n[`${t}.${e}.value`]=i}else if("openelm"===s.model_type)for(let s=0;s<r;++s){const r=[1,e[s],0,o];n[`${t}.${s}.key`]=r,n[`${t}.${s}.value`]=r}else{const s=[1,e,0,o];for(let e=0;e<r;++e)n[`${t}.${e}.key`]=s,n[`${t}.${e}.value`]=s}}return n}class a{max_position_embeddings;constructor(e){this.model_type=null,this.is_encoder_decoder=!1,Object.assign(this,e),this.normalized_config=o(this)}static async from_pretrained(e,{progress_callback:t=null,config:n=null,cache_dir:s=null,local_files_only:o=!1,revision:i="main"}={}){!n||n instanceof a||(n=new a(n));const l=n??await async function(e,t){return await(0,r.getModelJSON)(e,"config.json",!0,t)}(e,{progress_callback:t,config:n,cache_dir:s,local_files_only:o,revision:i});return new this(l)}}class l{static async from_pretrained(...e){return a.from_pretrained(...e)}}},"./src/env.js":
98
92
  /*!********************!*\
99
93
  !*** ./src/env.js ***!
100
- \********************/(e,t,n)=>{"use strict";n.r(t),n.d(t,{apis:()=>_,env:()=>M});var s=n(/*! fs */"?569f"),r=n(/*! path */"?3f59"),o=n(/*! url */"?154a");const i="undefined"!=typeof self,a=i&&"DedicatedWorkerGlobalScope"===self.constructor.name,l=i&&"caches"in self,c="undefined"!=typeof navigator&&"gpu"in navigator,d="undefined"!=typeof process,u=d&&"node"===process?.release?.name,h=!b(s),p=!b(r),_=Object.freeze({IS_BROWSER_ENV:i,IS_WEBWORKER_ENV:a,IS_WEB_CACHE_AVAILABLE:l,IS_WEBGPU_AVAILABLE:c,IS_PROCESS_AVAILABLE:d,IS_NODE_ENV:u,IS_FS_AVAILABLE:h,IS_PATH_AVAILABLE:p}),m=h&&p,f=m?r.dirname(r.dirname(o.fileURLToPath("file:///workspaces/transformers.js/src/env.js"))):"./",g=m?r.join(f,"/.cache/"):null,w="/models/",M={version:"3.0.0-alpha.5",backends:{onnx:{},tfjs:{}},allowRemoteModels:!0,remoteHost:"https://huggingface.co/",remotePathTemplate:"{model}/resolve/{revision}/",allowLocalModels:!i,localModelPath:m?r.join(f,w):w,useFS:h,useBrowserCache:l,useFSCache:h,cacheDir:g,useCustomCache:!1,customCache:null};function b(e){return 0===Object.keys(e).length}},"./src/generation/configuration_utils.js":
94
+ \********************/(e,t,n)=>{"use strict";n.r(t),n.d(t,{apis:()=>m,env:()=>b});var s=n(/*! fs */"fs"),r=n(/*! path */"path"),o=n(/*! url */"url");const i="undefined"!=typeof self,a=i&&"DedicatedWorkerGlobalScope"===self.constructor.name,l=i&&"caches"in self,c="undefined"!=typeof navigator&&"gpu"in navigator,d="undefined"!=typeof navigator&&"ml"in navigator,u="undefined"!=typeof process,h=u&&"node"===process?.release?.name,p=!y(s),_=!y(r),m=Object.freeze({IS_BROWSER_ENV:i,IS_WEBWORKER_ENV:a,IS_WEB_CACHE_AVAILABLE:l,IS_WEBGPU_AVAILABLE:c,IS_WEBNN_AVAILABLE:d,IS_PROCESS_AVAILABLE:u,IS_NODE_ENV:h,IS_FS_AVAILABLE:p,IS_PATH_AVAILABLE:_}),f=p&&_,g=f?r.dirname(r.dirname(o.fileURLToPath("file:///workspaces/transformers.js/src/env.js"))):"./",w=f?r.join(g,"/.cache/"):null,M="/models/",b={version:"3.0.0-alpha.7",backends:{onnx:{},tfjs:{}},allowRemoteModels:!0,remoteHost:"https://huggingface.co/",remotePathTemplate:"{model}/resolve/{revision}/",allowLocalModels:!i,localModelPath:f?r.join(g,M):M,useFS:p,useBrowserCache:l,useFSCache:p,cacheDir:w,useCustomCache:!1,customCache:null};function y(e){return 0===Object.keys(e).length}},"./src/generation/configuration_utils.js":
101
95
  /*!***********************************************!*\
102
96
  !*** ./src/generation/configuration_utils.js ***!
103
97
  \***********************************************/(e,t,n)=>{"use strict";n.r(t),n.d(t,{GenerationConfig:()=>r});var s=n(/*! ../utils/core.js */"./src/utils/core.js");class r{max_length=20;max_new_tokens=null;min_length=0;min_new_tokens=null;early_stopping=!1;max_time=null;do_sample=!1;num_beams=1;num_beam_groups=1;penalty_alpha=null;use_cache=!0;temperature=1;top_k=50;top_p=1;typical_p=1;epsilon_cutoff=0;eta_cutoff=0;diversity_penalty=0;repetition_penalty=1;encoder_repetition_penalty=1;length_penalty=1;no_repeat_ngram_size=0;bad_words_ids=null;force_words_ids=null;renormalize_logits=!1;constraints=null;forced_bos_token_id=null;forced_eos_token_id=null;remove_invalid_values=!1;exponential_decay_length_penalty=null;suppress_tokens=null;begin_suppress_tokens=null;forced_decoder_ids=null;guidance_scale=null;num_return_sequences=1;output_attentions=!1;output_hidden_states=!1;output_scores=!1;return_dict_in_generate=!1;pad_token_id=null;bos_token_id=null;eos_token_id=null;encoder_no_repeat_ngram_size=0;decoder_start_token_id=null;generation_kwargs={};constructor(e){Object.assign(this,(0,s.pick)(e,Object.getOwnPropertyNames(this)))}}},"./src/generation/logits_process.js":
@@ -112,10 +106,10 @@
112
106
  \*********************************************/(e,t,n)=>{"use strict";n.r(t),n.d(t,{EosTokenCriteria:()=>a,InterruptableStoppingCriteria:()=>l,MaxLengthCriteria:()=>i,StoppingCriteria:()=>r,StoppingCriteriaList:()=>o});var s=n(/*! ../utils/generic.js */"./src/utils/generic.js");class r extends s.Callable{_call(e,t){throw Error("StoppingCriteria needs to be subclassed")}}class o extends s.Callable{constructor(){super(),this.criteria=[]}push(e){this.criteria.push(e)}extend(e){e instanceof o?e=e.criteria:e instanceof r&&(e=[e]),this.criteria.push(...e)}_call(e,t){const n=new Array(e.length).fill(!1);for(const s of this.criteria){const r=s(e,t);for(let e=0;e<n.length;++e)n[e]||=r[e]}return n}[Symbol.iterator](){return this.criteria.values()}}class i extends r{constructor(e,t=null){super(),this.max_length=e,this.max_position_embeddings=t}_call(e){return e.map((e=>e.length>=this.max_length))}}class a extends r{constructor(e){super(),Array.isArray(e)||(e=[e]),this.eos_token_id=e}_call(e,t){return e.map((e=>{const t=e.at(-1);return this.eos_token_id.some((e=>t==e))}))}}class l extends r{constructor(){super(),this.interrupted=!1}interrupt(){this.interrupted=!0}reset(){this.interrupted=!1}_call(e,t){return new Array(e.length).fill(this.interrupted)}}},"./src/generation/streamers.js":
113
107
  /*!*************************************!*\
114
108
  !*** ./src/generation/streamers.js ***!
115
- \*************************************/(e,t,n)=>{"use strict";n.r(t),n.d(t,{BaseStreamer:()=>i,TextStreamer:()=>l,WhisperTextStreamer:()=>c});var s=n(/*! ../utils/core.js */"./src/utils/core.js"),r=n(/*! ../tokenizers.js */"./src/tokenizers.js"),o=n(/*! ../env.js */"./src/env.js");class i{put(e){throw Error("Not implemented")}end(){throw Error("Not implemented")}}const a=o.apis.IS_PROCESS_AVAILABLE?e=>process.stdout.write(e):e=>console.log(e);class l extends i{constructor(e,{skip_prompt:t=!1,callback_function:n=null,token_callback_function:s=null,decode_kwargs:r={},...o}={}){super(),this.tokenizer=e,this.skip_prompt=t,this.callback_function=n??a,this.token_callback_function=s,this.decode_kwargs={...r,...o},this.token_cache=[],this.print_len=0,this.next_tokens_are_prompt=!0}put(e){if(e.length>1)throw Error("TextStreamer only supports batch size of 1");const t=e[0];if(this.token_callback_function?.(t),this.skip_prompt&&this.next_tokens_are_prompt)return void(this.next_tokens_are_prompt=!1);this.token_cache=(0,s.mergeArrays)(this.token_cache,t);const n=this.tokenizer.decode(this.token_cache,this.decode_kwargs);let o;n.endsWith("\n")?(o=n.slice(this.print_len),this.token_cache=[],this.print_len=0):n.length>0&&(0,r.is_chinese_char)(n.charCodeAt(n.length-1))?(o=n.slice(this.print_len),this.print_len+=o.length):(o=n.slice(this.print_len,n.lastIndexOf(" ")+1),this.print_len+=o.length),this.on_finalized_text(o,!1)}end(){let e;if(this.token_cache.length>0){e=this.tokenizer.decode(this.token_cache,this.decode_kwargs).slice(this.print_len),this.token_cache=[],this.print_len=0}else e="";this.next_tokens_are_prompt=!0,this.on_finalized_text(e,!0)}on_finalized_text(e,t){e.length>0&&this.callback_function?.(e),t&&this.callback_function===a&&o.apis.IS_PROCESS_AVAILABLE&&this.callback_function?.("\n")}}class c extends l{constructor(e,{skip_prompt:t=!1,callback_function:n=null,token_callback_function:s=null,on_chunk_start:r=null,on_chunk_end:o=null,on_finalize:i=null,time_precision:a=.02,skip_special_tokens:l=!0,decode_kwargs:c={}}={}){super(e,{skip_prompt:t,callback_function:n,token_callback_function:s,decode_kwargs:{skip_special_tokens:l,...c}}),this.timestamp_begin=e.timestamp_begin,this.on_chunk_start=r,this.on_chunk_end=o,this.on_finalize=i,this.time_precision=a,this.waiting_for_timestamp=!1}put(e){if(e.length>1)throw Error("WhisperTextStreamer only supports batch size of 1");const t=e[0];if(1===t.length){const n=Number(t[0])-this.timestamp_begin;if(n>=0){const t=n*this.time_precision;this.waiting_for_timestamp?this.on_chunk_end?.(t):this.on_chunk_start?.(t),this.waiting_for_timestamp=!this.waiting_for_timestamp,e=[[]]}}return super.put(e)}end(){super.end(),this.on_finalize?.()}}},"./src/models.js":
109
+ \*************************************/(e,t,n)=>{"use strict";n.r(t),n.d(t,{BaseStreamer:()=>i,TextStreamer:()=>l,WhisperTextStreamer:()=>c});var s=n(/*! ../utils/core.js */"./src/utils/core.js"),r=n(/*! ../tokenizers.js */"./src/tokenizers.js"),o=n(/*! ../env.js */"./src/env.js");class i{put(e){throw Error("Not implemented")}end(){throw Error("Not implemented")}}const a=o.apis.IS_PROCESS_AVAILABLE?e=>process.stdout.write(e):e=>console.log(e);class l extends i{constructor(e,{skip_prompt:t=!1,callback_function:n=null,token_callback_function:s=null,decode_kwargs:r={},...o}={}){super(),this.tokenizer=e,this.skip_prompt=t,this.callback_function=n??a,this.token_callback_function=s,this.decode_kwargs={...r,...o},this.token_cache=[],this.print_len=0,this.next_tokens_are_prompt=!0}put(e){if(e.length>1)throw Error("TextStreamer only supports batch size of 1");if(this.skip_prompt&&this.next_tokens_are_prompt)return void(this.next_tokens_are_prompt=!1);const t=e[0];this.token_callback_function?.(t),this.token_cache=(0,s.mergeArrays)(this.token_cache,t);const n=this.tokenizer.decode(this.token_cache,this.decode_kwargs);let o;n.endsWith("\n")?(o=n.slice(this.print_len),this.token_cache=[],this.print_len=0):n.length>0&&(0,r.is_chinese_char)(n.charCodeAt(n.length-1))?(o=n.slice(this.print_len),this.print_len+=o.length):(o=n.slice(this.print_len,n.lastIndexOf(" ")+1),this.print_len+=o.length),this.on_finalized_text(o,!1)}end(){let e;if(this.token_cache.length>0){e=this.tokenizer.decode(this.token_cache,this.decode_kwargs).slice(this.print_len),this.token_cache=[],this.print_len=0}else e="";this.next_tokens_are_prompt=!0,this.on_finalized_text(e,!0)}on_finalized_text(e,t){e.length>0&&this.callback_function?.(e),t&&this.callback_function===a&&o.apis.IS_PROCESS_AVAILABLE&&this.callback_function?.("\n")}}class c extends l{constructor(e,{skip_prompt:t=!1,callback_function:n=null,token_callback_function:s=null,on_chunk_start:r=null,on_chunk_end:o=null,on_finalize:i=null,time_precision:a=.02,skip_special_tokens:l=!0,decode_kwargs:c={}}={}){super(e,{skip_prompt:t,callback_function:n,token_callback_function:s,decode_kwargs:{skip_special_tokens:l,...c}}),this.timestamp_begin=e.timestamp_begin,this.on_chunk_start=r,this.on_chunk_end=o,this.on_finalize=i,this.time_precision=a,this.waiting_for_timestamp=!1}put(e){if(e.length>1)throw Error("WhisperTextStreamer only supports batch size of 1");const t=e[0];if(1===t.length){const n=Number(t[0])-this.timestamp_begin;if(n>=0){const t=n*this.time_precision;this.waiting_for_timestamp?this.on_chunk_end?.(t):this.on_chunk_start?.(t),this.waiting_for_timestamp=!this.waiting_for_timestamp,e=[[]]}}return super.put(e)}end(){super.end(),this.on_finalize?.()}}},"./src/models.js":
116
110
  /*!***********************!*\
117
111
  !*** ./src/models.js ***!
118
- \***********************/(e,t,n)=>{"use strict";n.r(t),n.d(t,{ASTForAudioClassification:()=>nn,ASTModel:()=>tn,ASTPreTrainedModel:()=>en,AlbertForMaskedLM:()=>ut,AlbertForQuestionAnswering:()=>dt,AlbertForSequenceClassification:()=>ct,AlbertModel:()=>lt,AlbertPreTrainedModel:()=>at,AutoModel:()=>na,AutoModelForAudioClassification:()=>ba,AutoModelForAudioFrameClassification:()=>xa,AutoModelForCTC:()=>Ma,AutoModelForCausalLM:()=>ca,AutoModelForDepthEstimation:()=>Ca,AutoModelForDocumentQuestionAnswering:()=>ka,AutoModelForImageClassification:()=>pa,AutoModelForImageFeatureExtraction:()=>Fa,AutoModelForImageMatting:()=>Ta,AutoModelForImageSegmentation:()=>_a,AutoModelForImageToImage:()=>va,AutoModelForMaskGeneration:()=>wa,AutoModelForMaskedLM:()=>da,AutoModelForObjectDetection:()=>fa,AutoModelForQuestionAnswering:()=>ua,AutoModelForSemanticSegmentation:()=>ma,AutoModelForSeq2SeqLM:()=>oa,AutoModelForSequenceClassification:()=>sa,AutoModelForSpeechSeq2Seq:()=>ia,AutoModelForTextToSpectrogram:()=>aa,AutoModelForTextToWaveform:()=>la,AutoModelForTokenClassification:()=>ra,AutoModelForVision2Seq:()=>ha,AutoModelForXVector:()=>ya,AutoModelForZeroShotObjectDetection:()=>ga,BartForConditionalGeneration:()=>kt,BartForSequenceClassification:()=>Tt,BartModel:()=>xt,BartPretrainedModel:()=>yt,BaseModelOutput:()=>$,BeitForImageClassification:()=>Us,BeitModel:()=>qs,BeitPreTrainedModel:()=>$s,BertForMaskedLM:()=>W,BertForQuestionAnswering:()=>H,BertForSequenceClassification:()=>X,BertForTokenClassification:()=>Q,BertModel:()=>U,BertPreTrainedModel:()=>q,BlenderbotForConditionalGeneration:()=>zt,BlenderbotModel:()=>Et,BlenderbotPreTrainedModel:()=>At,BlenderbotSmallForConditionalGeneration:()=>Bt,BlenderbotSmallModel:()=>It,BlenderbotSmallPreTrainedModel:()=>Lt,BloomForCausalLM:()=>fs,BloomModel:()=>ms,BloomPreTrainedModel:()=>_s,CLIPModel:()=>_n,CLIPPreTrainedModel:()=>pn,CLIPSegForImageSegmentation:()=>vn,CLIPSegModel:()=>Tn,CLIPSegPreTrainedModel:()=>kn,CLIPTextModelWithProjection:()=>mn,CLIPVisionModelWithProjection:()=>fn,CamembertForMaskedLM:()=>we,CamembertForQuestionAnswering:()=>ye,CamembertForSequenceClassification:()=>Me,CamembertForTokenClassification:()=>be,CamembertModel:()=>ge,CamembertPreTrainedModel:()=>fe,CausalLMOutput:()=>Ia,CausalLMOutputWithPast:()=>Ba,ChineseCLIPModel:()=>xn,ChineseCLIPPreTrainedModel:()=>yn,ClapAudioModelWithProjection:()=>Ho,ClapModel:()=>Xo,ClapPreTrainedModel:()=>Wo,ClapTextModelWithProjection:()=>Qo,CodeGenForCausalLM:()=>$n,CodeGenModel:()=>Gn,CodeGenPreTrainedModel:()=>Rn,CohereForCausalLM:()=>Hn,CohereModel:()=>Qn,CoherePreTrainedModel:()=>Xn,ConvBertForMaskedLM:()=>ie,ConvBertForQuestionAnswering:()=>ce,ConvBertForSequenceClassification:()=>ae,ConvBertForTokenClassification:()=>le,ConvBertModel:()=>oe,ConvBertPreTrainedModel:()=>re,ConvNextForImageClassification:()=>Ar,ConvNextModel:()=>Sr,ConvNextPreTrainedModel:()=>Pr,ConvNextV2ForImageClassification:()=>Lr,ConvNextV2Model:()=>zr,ConvNextV2PreTrainedModel:()=>Er,DPTForDepthEstimation:()=>br,DPTModel:()=>Mr,DPTPreTrainedModel:()=>wr,DebertaForMaskedLM:()=>Te,DebertaForQuestionAnswering:()=>Fe,DebertaForSequenceClassification:()=>ve,DebertaForTokenClassification:()=>Ce,DebertaModel:()=>ke,DebertaPreTrainedModel:()=>xe,DebertaV2ForMaskedLM:()=>Ae,DebertaV2ForQuestionAnswering:()=>Le,DebertaV2ForSequenceClassification:()=>Ee,DebertaV2ForTokenClassification:()=>ze,DebertaV2Model:()=>Se,DebertaV2PreTrainedModel:()=>Pe,DeiTForImageClassification:()=>lr,DeiTModel:()=>ar,DeiTPreTrainedModel:()=>ir,DepthAnythingForDepthEstimation:()=>xr,DepthAnythingPreTrainedModel:()=>yr,DetrForObjectDetection:()=>Qs,DetrForSegmentation:()=>Hs,DetrModel:()=>Xs,DetrObjectDetectionOutput:()=>Ys,DetrPreTrainedModel:()=>Ws,DetrSegmentationOutput:()=>Ks,Dinov2ForImageClassification:()=>Nr,Dinov2Model:()=>Br,Dinov2PreTrainedModel:()=>Ir,DistilBertForMaskedLM:()=>De,DistilBertForQuestionAnswering:()=>je,DistilBertForSequenceClassification:()=>Ne,DistilBertForTokenClassification:()=>Oe,DistilBertModel:()=>Be,DistilBertPreTrainedModel:()=>Ie,DonutSwinModel:()=>Fr,DonutSwinPreTrainedModel:()=>Cr,EfficientNetForImageClassification:()=>ai,EfficientNetModel:()=>ii,EfficientNetPreTrainedModel:()=>oi,ElectraForMaskedLM:()=>he,ElectraForQuestionAnswering:()=>me,ElectraForSequenceClassification:()=>pe,ElectraForTokenClassification:()=>_e,ElectraModel:()=>ue,ElectraPreTrainedModel:()=>de,EsmForMaskedLM:()=>Ge,EsmForSequenceClassification:()=>$e,EsmForTokenClassification:()=>qe,EsmModel:()=>Re,EsmPreTrainedModel:()=>Ve,FalconForCausalLM:()=>Uo,FalconModel:()=>qo,FalconPreTrainedModel:()=>$o,FastViTForImageClassification:()=>Ps,FastViTModel:()=>Fs,FastViTPreTrainedModel:()=>Cs,Florence2ForConditionalGeneration:()=>hn,Florence2PreTrainedModel:()=>un,GLPNForDepthEstimation:()=>vr,GLPNModel:()=>Tr,GLPNPreTrainedModel:()=>kr,GPT2LMHeadModel:()=>Pn,GPT2Model:()=>Fn,GPT2PreTrainedModel:()=>Cn,GPTBigCodeForCausalLM:()=>Vn,GPTBigCodeModel:()=>Dn,GPTBigCodePreTrainedModel:()=>jn,GPTJForCausalLM:()=>On,GPTJModel:()=>Nn,GPTJPreTrainedModel:()=>Bn,GPTNeoForCausalLM:()=>En,GPTNeoModel:()=>An,GPTNeoPreTrainedModel:()=>Sn,GPTNeoXForCausalLM:()=>In,GPTNeoXModel:()=>Ln,GPTNeoXPreTrainedModel:()=>zn,Gemma2ForCausalLM:()=>ts,Gemma2Model:()=>es,Gemma2PreTrainedModel:()=>Zn,GemmaForCausalLM:()=>Jn,GemmaModel:()=>Kn,GemmaPreTrainedModel:()=>Yn,HubertForCTC:()=>xo,HubertForSequenceClassification:()=>ko,HubertModel:()=>yo,HubertPreTrainedModel:()=>bo,ImageMattingOutput:()=>Na,LlamaForCausalLM:()=>Wn,LlamaModel:()=>Un,LlamaPreTrainedModel:()=>qn,LlavaForConditionalGeneration:()=>cn,LlavaPreTrainedModel:()=>ln,LongT5ForConditionalGeneration:()=>gt,LongT5Model:()=>ft,LongT5PreTrainedModel:()=>mt,M2M100ForConditionalGeneration:()=>Hr,M2M100Model:()=>Qr,M2M100PreTrainedModel:()=>Xr,MBartForCausalLM:()=>St,MBartForConditionalGeneration:()=>Ft,MBartForSequenceClassification:()=>Pt,MBartModel:()=>Ct,MBartPreTrainedModel:()=>vt,MPNetForMaskedLM:()=>Je,MPNetForQuestionAnswering:()=>tt,MPNetForSequenceClassification:()=>Ze,MPNetForTokenClassification:()=>et,MPNetModel:()=>Ke,MPNetPreTrainedModel:()=>Ye,MT5ForConditionalGeneration:()=>bt,MT5Model:()=>Mt,MT5PreTrainedModel:()=>wt,MarianMTModel:()=>Wr,MarianModel:()=>Ur,MarianPreTrainedModel:()=>qr,MaskedLMOutput:()=>za,MistralForCausalLM:()=>Do,MistralModel:()=>jo,MistralPreTrainedModel:()=>Oo,MobileBertForMaskedLM:()=>Xe,MobileBertForQuestionAnswering:()=>He,MobileBertForSequenceClassification:()=>Qe,MobileBertModel:()=>We,MobileBertPreTrainedModel:()=>Ue,MobileNetV1ForImageClassification:()=>_i,MobileNetV1Model:()=>pi,MobileNetV1PreTrainedModel:()=>hi,MobileNetV2ForImageClassification:()=>gi,MobileNetV2Model:()=>fi,MobileNetV2PreTrainedModel:()=>mi,MobileNetV3ForImageClassification:()=>bi,MobileNetV3Model:()=>Mi,MobileNetV3PreTrainedModel:()=>wi,MobileNetV4ForImageClassification:()=>ki,MobileNetV4Model:()=>xi,MobileNetV4PreTrainedModel:()=>yi,MobileViTForImageClassification:()=>Ls,MobileViTModel:()=>zs,MobileViTPreTrainedModel:()=>Es,MobileViTV2ForImageClassification:()=>Ns,MobileViTV2Model:()=>Bs,MobileViTV2PreTrainedModel:()=>Is,ModelOutput:()=>G,Moondream1ForConditionalGeneration:()=>dn,MptForCausalLM:()=>Ms,MptModel:()=>ws,MptPreTrainedModel:()=>gs,MusicgenForCausalLM:()=>di,MusicgenForConditionalGeneration:()=>ui,MusicgenModel:()=>ci,MusicgenPreTrainedModel:()=>li,NomicBertModel:()=>K,NomicBertPreTrainedModel:()=>Y,OPTForCausalLM:()=>xs,OPTModel:()=>ys,OPTPreTrainedModel:()=>bs,OpenELMForCausalLM:()=>rs,OpenELMModel:()=>ss,OpenELMPreTrainedModel:()=>ns,OwlViTForObjectDetection:()=>Ds,OwlViTModel:()=>js,OwlViTPreTrainedModel:()=>Os,Owlv2ForObjectDetection:()=>Gs,Owlv2Model:()=>Rs,Owlv2PreTrainedModel:()=>Vs,Phi3ForCausalLM:()=>ps,Phi3Model:()=>hs,Phi3PreTrainedModel:()=>us,PhiForCausalLM:()=>ds,PhiModel:()=>cs,PhiPreTrainedModel:()=>ls,PreTrainedModel:()=>R,PretrainedMixin:()=>Ti,PyAnnoteForAudioFrameClassification:()=>so,PyAnnoteModel:()=>no,PyAnnotePreTrainedModel:()=>to,QuestionAnsweringModelOutput:()=>La,Qwen2ForCausalLM:()=>as,Qwen2Model:()=>is,Qwen2PreTrainedModel:()=>os,RTDetrForObjectDetection:()=>er,RTDetrModel:()=>Zs,RTDetrObjectDetectionOutput:()=>tr,RTDetrPreTrainedModel:()=>Js,ResNetForImageClassification:()=>ur,ResNetModel:()=>dr,ResNetPreTrainedModel:()=>cr,RoFormerForMaskedLM:()=>ee,RoFormerForQuestionAnswering:()=>se,RoFormerForSequenceClassification:()=>te,RoFormerForTokenClassification:()=>ne,RoFormerModel:()=>Z,RoFormerPreTrainedModel:()=>J,RobertaForMaskedLM:()=>jt,RobertaForQuestionAnswering:()=>Rt,RobertaForSequenceClassification:()=>Dt,RobertaForTokenClassification:()=>Vt,RobertaModel:()=>Ot,RobertaPreTrainedModel:()=>Nt,SamImageSegmentationOutput:()=>$r,SamModel:()=>Gr,SamPreTrainedModel:()=>Rr,SegformerForImageClassification:()=>ei,SegformerForSemanticSegmentation:()=>ti,SegformerModel:()=>Zo,SegformerPreTrainedModel:()=>Jo,Seq2SeqLMOutput:()=>Pa,SequenceClassifierOutput:()=>Sa,SiglipModel:()=>wn,SiglipPreTrainedModel:()=>gn,SiglipTextModel:()=>Mn,SiglipVisionModel:()=>bn,SpeechT5ForSpeechToText:()=>zo,SpeechT5ForTextToSpeech:()=>Lo,SpeechT5HifiGan:()=>Io,SpeechT5Model:()=>Eo,SpeechT5PreTrainedModel:()=>Ao,SqueezeBertForMaskedLM:()=>rt,SqueezeBertForQuestionAnswering:()=>it,SqueezeBertForSequenceClassification:()=>ot,SqueezeBertModel:()=>st,SqueezeBertPreTrainedModel:()=>nt,StableLmForCausalLM:()=>ri,StableLmModel:()=>si,StableLmPreTrainedModel:()=>ni,Starcoder2ForCausalLM:()=>Go,Starcoder2Model:()=>Ro,Starcoder2PreTrainedModel:()=>Vo,Swin2SRForImageSuperResolution:()=>gr,Swin2SRModel:()=>fr,Swin2SRPreTrainedModel:()=>mr,SwinForImageClassification:()=>_r,SwinModel:()=>pr,SwinPreTrainedModel:()=>hr,T5ForConditionalGeneration:()=>_t,T5Model:()=>pt,T5PreTrainedModel:()=>ht,TableTransformerForObjectDetection:()=>rr,TableTransformerModel:()=>sr,TableTransformerObjectDetectionOutput:()=>or,TableTransformerPreTrainedModel:()=>nr,TokenClassifierOutput:()=>Ea,TrOCRForCausalLM:()=>No,TrOCRPreTrainedModel:()=>Bo,UniSpeechForCTC:()=>lo,UniSpeechForSequenceClassification:()=>co,UniSpeechModel:()=>ao,UniSpeechPreTrainedModel:()=>io,UniSpeechSatForAudioFrameClassification:()=>mo,UniSpeechSatForCTC:()=>po,UniSpeechSatForSequenceClassification:()=>_o,UniSpeechSatModel:()=>ho,UniSpeechSatPreTrainedModel:()=>uo,ViTForImageClassification:()=>vs,ViTModel:()=>Ts,ViTPreTrainedModel:()=>ks,VisionEncoderDecoderModel:()=>an,VitMatteForImageMatting:()=>As,VitMattePreTrainedModel:()=>Ss,VitsModel:()=>Ko,VitsModelOutput:()=>Oa,VitsPreTrainedModel:()=>Yo,Wav2Vec2BertForCTC:()=>wo,Wav2Vec2BertForSequenceClassification:()=>Mo,Wav2Vec2BertModel:()=>go,Wav2Vec2BertPreTrainedModel:()=>fo,Wav2Vec2ForAudioFrameClassification:()=>eo,Wav2Vec2ForCTC:()=>Jr,Wav2Vec2ForSequenceClassification:()=>Zr,Wav2Vec2Model:()=>Kr,Wav2Vec2PreTrainedModel:()=>Yr,WavLMForAudioFrameClassification:()=>So,WavLMForCTC:()=>Co,WavLMForSequenceClassification:()=>Fo,WavLMForXVector:()=>Po,WavLMModel:()=>vo,WavLMPreTrainedModel:()=>To,WeSpeakerResNetModel:()=>oo,WeSpeakerResNetPreTrainedModel:()=>ro,WhisperForConditionalGeneration:()=>on,WhisperModel:()=>rn,WhisperPreTrainedModel:()=>sn,XLMForQuestionAnswering:()=>Xt,XLMForSequenceClassification:()=>Ut,XLMForTokenClassification:()=>Wt,XLMModel:()=>$t,XLMPreTrainedModel:()=>Gt,XLMRobertaForMaskedLM:()=>Yt,XLMRobertaForQuestionAnswering:()=>Zt,XLMRobertaForSequenceClassification:()=>Kt,XLMRobertaForTokenClassification:()=>Jt,XLMRobertaModel:()=>Ht,XLMRobertaPreTrainedModel:()=>Qt,XLMWithLMHeadModel:()=>qt,XVectorOutput:()=>Aa,YolosForObjectDetection:()=>Dr,YolosModel:()=>jr,YolosObjectDetectionOutput:()=>Vr,YolosPreTrainedModel:()=>Or});var s=n(/*! ./configs.js */"./src/configs.js"),r=n(/*! ./backends/onnx.js */"./src/backends/onnx.js"),o=n(/*! ./utils/dtypes.js */"./src/utils/dtypes.js"),i=n(/*! ./utils/generic.js */"./src/utils/generic.js"),a=n(/*! ./utils/core.js */"./src/utils/core.js"),l=n(/*! ./utils/hub.js */"./src/utils/hub.js"),c=n(/*! ./generation/logits_process.js */"./src/generation/logits_process.js"),d=n(/*! ./generation/configuration_utils.js */"./src/generation/configuration_utils.js"),u=n(/*! ./utils/tensor.js */"./src/utils/tensor.js"),h=n(/*! ./utils/maths.js */"./src/utils/maths.js"),p=n(/*! ./generation/stopping_criteria.js */"./src/generation/stopping_criteria.js"),_=n(/*! ./generation/logits_sampler.js */"./src/generation/logits_sampler.js"),m=n(/*! ./env.js */"./src/env.js"),f=n(/*! ./models/whisper/generation_whisper.js */"./src/models/whisper/generation_whisper.js"),g=n(/*! ./models/whisper/common_whisper.js */"./src/models/whisper/common_whisper.js");const w=0,M=1,b=2,y=3,x=4,k=5,T=6,v=7,C=new Map,F=new Map,P=new Map;async function S(e,t,n){return Object.fromEntries(await Promise.all(Object.keys(t).map((async i=>{const{buffer:a,session_options:c}=await async function(e,t,n){let i=n.device;i&&"string"!=typeof i&&(i.hasOwnProperty(t)?i=i[t]:(console.warn(`device not specified for "${t}". Using the default device.`),i=null));const a=(0,r.deviceToExecutionProviders)(i);let c=n.dtype;if("string"!=typeof c&&(c&&c.hasOwnProperty(t)?c=c[t]:(c=o.DEFAULT_DEVICE_DTYPE_MAPPING[a[0]],console.warn(`dtype not specified for "${t}". Using the default dtype for this device (${c}).`))),!o.DEFAULT_DTYPE_SUFFIX_MAPPING.hasOwnProperty(c))throw new Error(`Invalid dtype: ${c}. Should be one of: ${Object.keys(o.DATA_TYPES).join(", ")}`);if(c===o.DATA_TYPES.fp16&&"webgpu"===i&&!await(0,o.isWebGpuFp16Supported)())throw new Error(`The device (${i}) does not support fp16.`);const d=o.DEFAULT_DTYPE_SUFFIX_MAPPING[c],u=`${n.subfolder??""}/${t}${d}.onnx`,h={...n.session_options}??{};h.executionProviders??=a;const p=(0,l.getModelFile)(e,u,!0,n);let _=[];if(n.use_external_data_format&&(!0===n.use_external_data_format||"object"==typeof n.use_external_data_format&&n.use_external_data_format.hasOwnProperty(t)&&!0===n.use_external_data_format[t])){if(m.apis.IS_NODE_ENV)throw new Error("External data format is not yet supported in Node.js");const s=`${t}${d}.onnx_data`,r=`${n.subfolder??""}/${s}`;_.push(new Promise((async(t,o)=>{const i=await(0,l.getModelFile)(e,r,!0,n);t({path:s,data:i})})))}else void 0!==h.externalData&&(_=h.externalData.map((async t=>{if("string"==typeof t.data){const s=await(0,l.getModelFile)(e,t.data,!0,n);return{...t,data:s}}return t})));if(_.length>0&&(h.externalData=await Promise.all(_)),"webgpu"===i){const e=(0,s.getKeyValueShapes)(n.config,{prefix:"present"});if(Object.keys(e).length>0&&!(0,r.isONNXProxy)()){const t={};for(const n in e)t[n]="gpu-buffer";h.preferredOutputLocation=t}}return{buffer:await p,session_options:h}}(e,t[i],n);return[i,await(0,r.createInferenceSession)(a,c)]}))))}async function A(e,t){const n=function(e,t){const n=Object.create(null),s=[];for(const o of e.inputNames){const e=t[o];e instanceof u.Tensor?n[o]=(0,r.isONNXProxy)()?e.clone():e:s.push(o)}if(s.length>0)throw new Error(`An error occurred during model execution: "Missing the following inputs: ${s.join(", ")}.`);const o=Object.keys(t).length,i=e.inputNames.length;if(o>i){let n=Object.keys(t).filter((t=>!e.inputNames.includes(t)));console.warn(`WARNING: Too many inputs were provided (${o} > ${i}). The following inputs will be ignored: "${n.join(", ")}".`)}return n}(e,t);try{const t=Object.fromEntries(Object.entries(n).map((([e,t])=>[e,t.ort_tensor])));let s=await e.run(t);return s=E(s),s}catch(e){throw console.error(`An error occurred during model execution: "${e}".`),console.error("Inputs given to model:",n),e}}function E(e){for(let t in e)(0,r.isONNXTensor)(e[t])?e[t]=new u.Tensor(e[t]):"object"==typeof e[t]&&E(e[t]);return e}function z(e){if(e instanceof u.Tensor)return e;if(0===e.length)throw Error("items must be non-empty");if(Array.isArray(e[0])){if(e.some((t=>t.length!==e[0].length)))throw Error("Unable to create tensor, you should probably activate truncation and/or padding with 'padding=True' and/or 'truncation=True' to have batched tensors with the same length.");return new u.Tensor("int64",BigInt64Array.from(e.flat().map((e=>BigInt(e)))),[e.length,e[0].length])}return new u.Tensor("int64",BigInt64Array.from(e.map((e=>BigInt(e)))),[1,e.length])}function L(e){return new u.Tensor("bool",[e],[1])}async function I(e,t){let{encoder_outputs:n,input_ids:s,decoder_input_ids:r,...o}=t;if(!n){const s=(0,a.pick)(t,e.sessions.model.inputNames);n=(await B(e,s)).last_hidden_state}o.input_ids=r,o.encoder_hidden_states=n,e.sessions.decoder_model_merged.inputNames.includes("encoder_attention_mask")&&(o.encoder_attention_mask=t.attention_mask);return await N(e,o,!0)}async function B(e,t){const n=e.sessions.model,s=(0,a.pick)(t,n.inputNames);if(n.inputNames.includes("inputs_embeds")&&!s.inputs_embeds){if(!t.input_ids)throw new Error("Both `input_ids` and `inputs_embeds` are missing in the model inputs.");s.inputs_embeds=await e.encode_text({input_ids:t.input_ids})}return n.inputNames.includes("token_type_ids")&&!s.token_type_ids&&(s.token_type_ids=new u.Tensor("int64",new BigInt64Array(s.input_ids.data.length),s.input_ids.dims)),await A(n,s)}async function N(e,t,n=!1){const s=e.sessions[n?"decoder_model_merged":"model"],{past_key_values:r,...o}=t;s.inputNames.includes("use_cache_branch")&&(o.use_cache_branch=L(!!r)),s.inputNames.includes("position_ids")&&o.attention_mask&&!o.position_ids&&(o.position_ids=function(e,t=null){const{input_ids:n,inputs_embeds:s,attention_mask:r}=e,[o,i]=r.dims,a=new BigInt64Array(r.data.length);for(let e=0;e<o;++e){const t=e*i;let n=BigInt(0);for(let e=0;e<i;++e){const s=t+e;0n===r.data[s]?a[s]=BigInt(1):(a[s]=n,n+=r.data[s])}}let l=new u.Tensor("int64",a,r.dims);if(t){const e=-(n??s).dims.at(1);l=l.slice(null,[e,null])}return l}(o,r)),e.addPastKeyValues(o,r);const i=(0,a.pick)(o,s.inputNames);return await A(s,i)}async function O(e,{input_ids:t=null,attention_mask:n=null,pixel_values:s=null,position_ids:r=null,inputs_embeds:o=null,past_key_values:i=null,generation_config:a=null,logits_processor:l=null,...c}){if(!o)if(o=await e.encode_text({input_ids:t}),s&&1!==t.dims[1]){const r=await e.encode_image({pixel_values:s});({inputs_embeds:o,attention_mask:n}=e._merge_input_ids_with_image_features({image_features:r,inputs_embeds:o,input_ids:t,attention_mask:n}))}else if(i&&s&&1===t.dims[1]){const e=t.dims[1],s=Object.values(i)[0].dims.at(-2);n=(0,u.cat)([(0,u.ones)([t.dims[0],s]),n.slice(null,[n.dims[1]-e,n.dims[1]])],1)}return await N(e,{inputs_embeds:o,past_key_values:i,attention_mask:n,position_ids:r,generation_config:a,logits_processor:l},!0)}function j(e,t,n,s){if(n.past_key_values){const t=Object.values(n.past_key_values)[0].dims.at(-2),{input_ids:s,attention_mask:r}=n;if(r&&r.dims[1]>s.dims[1]);else if(t<s.dims[1])n.input_ids=s.slice(null,[t,null]);else if(null!=e.config.image_token_index&&s.data.some((t=>t==e.config.image_token_index))){const r=e.config.num_image_tokens;if(!r)throw new Error("`num_image_tokens` is missing in the model configuration.");const o=s.dims[1]-(t-r);n.input_ids=s.slice(null,[-o,null]),n.attention_mask=(0,u.ones)([1,t+o])}}return n}function D(e,t,n,s){return n.past_key_values&&(t=t.map((e=>[e.at(-1)]))),{...n,decoder_input_ids:z(t)}}function V(e,...t){return e.config.is_encoder_decoder?D(e,...t):j(e,...t)}class R extends i.Callable{main_input_name="input_ids";forward_params=["input_ids","attention_mask"];constructor(e,t){super(),this.config=e,this.sessions=t;const n=P.get(this.constructor),s=C.get(n);switch(this.can_generate=!1,this._forward=null,this._prepare_inputs_for_generation=null,s){case x:this.can_generate=!0,this._forward=N,this._prepare_inputs_for_generation=j;break;case b:case y:case v:this.can_generate=!0,this._forward=I,this._prepare_inputs_for_generation=D;break;case M:this._forward=I;break;case T:this.can_generate=!0,this._forward=O,this._prepare_inputs_for_generation=V;break;default:this._forward=B}this.can_generate&&this.forward_params.push("past_key_values"),this.custom_config=this.config["transformers.js_config"]??{}}async dispose(){const e=[];for(const t of Object.values(this.sessions))t?.handler?.dispose&&e.push(t.handler.dispose());return await Promise.all(e)}static async from_pretrained(e,{progress_callback:t=null,config:n=null,cache_dir:r=null,local_files_only:o=!1,revision:i="main",model_file_name:a=null,subfolder:c="onnx",device:d=null,dtype:u=null,use_external_data_format:h=null,session_options:p={}}={}){let _={progress_callback:t,config:n,cache_dir:r,local_files_only:o,revision:i,model_file_name:a,subfolder:c,device:d,dtype:u,use_external_data_format:h,session_options:p};const m=P.get(this),f=C.get(m);let g;if(n=_.config=await s.AutoConfig.from_pretrained(e,_),f===x)g=await Promise.all([S(e,{model:_.model_file_name??"model"},_),(0,l.getModelJSON)(e,"generation_config.json",!1,_)]);else if(f===b||f===y)g=await Promise.all([S(e,{model:"encoder_model",decoder_model_merged:"decoder_model_merged"},_),(0,l.getModelJSON)(e,"generation_config.json",!1,_)]);else if(f===k)g=await Promise.all([S(e,{model:"vision_encoder",prompt_encoder_mask_decoder:"prompt_encoder_mask_decoder"},_)]);else if(f===M)g=await Promise.all([S(e,{model:"encoder_model",decoder_model_merged:"decoder_model_merged"},_)]);else if(f===T){const t={embed_tokens:"embed_tokens",vision_encoder:"vision_encoder",decoder_model_merged:"decoder_model_merged"};n.is_encoder_decoder&&(t.model="encoder_model"),g=await Promise.all([S(e,t,_),(0,l.getModelJSON)(e,"generation_config.json",!1,_)])}else f===v?g=await Promise.all([S(e,{model:"text_encoder",decoder_model_merged:"decoder_model_merged",encodec_decode:"encodec_decode"},_),(0,l.getModelJSON)(e,"generation_config.json",!1,_)]):(f!==w&&console.warn(`Model type for '${m??n?.model_type}' not found, assuming encoder-only architecture. Please report this at https://github.com/xenova/transformers.js/issues/new/choose.`),g=await Promise.all([S(e,{model:_.model_file_name??"model"},_)]));return new this(n,...g)}async _call(e){return await this.forward(e)}async forward(e){return await this._forward(this,e)}_get_logits_warper(e){const t=new c.LogitsProcessorList;return null!==e.temperature&&1!==e.temperature&&t.push(new c.TemperatureLogitsWarper(e.temperature)),null!==e.top_k&&0!==e.top_k&&t.push(new c.TopKLogitsWarper(e.top_k)),null!==e.top_p&&e.top_p<1&&t.push(new c.TopPLogitsWarper(e.top_p)),t}_get_logits_processor(e,t,n=null){const s=new c.LogitsProcessorList;if(null!==e.repetition_penalty&&1!==e.repetition_penalty&&s.push(new c.RepetitionPenaltyLogitsProcessor(e.repetition_penalty)),null!==e.no_repeat_ngram_size&&e.no_repeat_ngram_size>0&&s.push(new c.NoRepeatNGramLogitsProcessor(e.no_repeat_ngram_size)),null!==e.bad_words_ids&&s.push(new c.NoBadWordsLogitsProcessor(e.bad_words_ids,e.eos_token_id)),null!==e.min_length&&null!==e.eos_token_id&&e.min_length>0&&s.push(new c.MinLengthLogitsProcessor(e.min_length,e.eos_token_id)),null!==e.min_new_tokens&&null!==e.eos_token_id&&e.min_new_tokens>0&&s.push(new c.MinNewTokensLengthLogitsProcessor(t,e.min_new_tokens,e.eos_token_id)),null!==e.forced_bos_token_id&&s.push(new c.ForcedBOSTokenLogitsProcessor(e.forced_bos_token_id)),null!==e.forced_eos_token_id&&s.push(new c.ForcedEOSTokenLogitsProcessor(e.max_length,e.forced_eos_token_id)),null!==e.begin_suppress_tokens){const n=t>1||null===e.forced_bos_token_id?t:t+1;s.push(new c.SuppressTokensAtBeginLogitsProcessor(e.begin_suppress_tokens,n))}return null!==e.guidance_scale&&e.guidance_scale>1&&s.push(new c.ClassifierFreeGuidanceLogitsProcessor(e.guidance_scale)),null!==n&&s.extend(n),s}_prepare_generation_config(e,t,n=d.GenerationConfig){const s={...this.config};for(const e of["decoder","generator","text_config"])e in s&&Object.assign(s,s[e]);const r=new n(s);return"generation_config"in this&&Object.assign(r,this.generation_config),e&&Object.assign(r,e),t&&Object.assign(r,(0,a.pick)(t,Object.getOwnPropertyNames(r))),r}_get_stopping_criteria(e,t=null){const n=new p.StoppingCriteriaList;return null!==e.max_length&&n.push(new p.MaxLengthCriteria(e.max_length,this.config.max_position_embeddings??null)),null!==e.eos_token_id&&n.push(new p.EosTokenCriteria(e.eos_token_id)),t&&n.extend(t),n}_validate_model_class(){if(!this.can_generate){const e=[Ii,Oi,Li,Pi],t=P.get(this.constructor),n=new Set,s=this.config.model_type;for(const t of e){const e=t.get(s);e&&n.add(e[0])}let r=`The current model class (${t}) is not compatible with \`.generate()\`, as it doesn't have a language model head.`;throw n.size>0&&(r+=` Please use the following class instead: ${[...n].join(", ")}`),Error(r)}}prepare_inputs_for_generation(...e){return this._prepare_inputs_for_generation(this,...e)}_update_model_kwargs_for_generation({generated_input_ids:e,outputs:t,model_inputs:n,is_encoder_decoder:s}){return n.past_key_values=this.getPastKeyValues(t,n.past_key_values),n.input_ids=new u.Tensor("int64",e.flat(),[e.length,1]),s||(n.attention_mask=(0,u.cat)([n.attention_mask,(0,u.ones)([n.attention_mask.dims[0],1])],1)),n.position_ids=null,n}_prepare_model_inputs({inputs:e,bos_token_id:t,model_kwargs:n}){const s=(0,a.pick)(n,this.forward_params),r=this.main_input_name;if(r in s){if(e)throw new Error("`inputs`: {inputs}` were passed alongside {input_name} which is not allowed. Make sure to either pass {inputs} or {input_name}=...")}else s[r]=e;return{inputs_tensor:s[r],model_inputs:s,model_input_name:r}}async _prepare_encoder_decoder_kwargs_for_generation({inputs_tensor:e,model_inputs:t,model_input_name:n,generation_config:s}){if(this.sessions.model.inputNames.includes("inputs_embeds")&&!t.inputs_embeds&&"_prepare_inputs_embeds"in this){const{input_ids:e,pixel_values:n,attention_mask:s,...r}=t,o=await this._prepare_inputs_embeds(t);t={...r,...(0,a.pick)(o,["inputs_embeds","attention_mask"])}}let{last_hidden_state:r}=await B(this,t);if(null!==s.guidance_scale&&s.guidance_scale>1)r=(0,u.cat)([r,(0,u.full_like)(r,0)],0),"attention_mask"in t&&(t.attention_mask=(0,u.cat)([t.attention_mask,(0,u.zeros_like)(t.attention_mask)],0));else if(t.decoder_input_ids){const e=z(t.decoder_input_ids).dims[0];if(e!==r.dims[0]){if(1!==r.dims[0])throw new Error(`The encoder outputs have a different batch size (${r.dims[0]}) than the decoder inputs (${e}).`);r=(0,u.cat)(Array.from({length:e},(()=>r)),0)}}return t.encoder_outputs=r,t}_prepare_decoder_input_ids_for_generation({batch_size:e,model_input_name:t,model_kwargs:n,decoder_start_token_id:s,bos_token_id:r,generation_config:o}){let{decoder_input_ids:i,...a}=n;if(i)Array.isArray(i[0])||(i=Array.from({length:e},(()=>i)));else if(s??=r,"musicgen"===this.config.model_type)i=Array.from({length:e*this.config.decoder.num_codebooks},(()=>[s]));else if(Array.isArray(s)){if(s.length!==e)throw new Error(`\`decoder_start_token_id\` expcted to have length ${e} but got ${s.length}`);i=s}else i=Array.from({length:e},(()=>[s]));return i=z(i),n.decoder_attention_mask=(0,u.ones_like)(i),{input_ids:i,model_inputs:a}}async generate({inputs:e=null,generation_config:t=null,logits_processor:n=null,stopping_criteria:s=null,streamer:r=null,...o}){this._validate_model_class(),t=this._prepare_generation_config(t,o);let{inputs_tensor:i,model_inputs:a,model_input_name:l}=this._prepare_model_inputs({inputs:e,model_kwargs:o});const c=this.config.is_encoder_decoder;let d;c&&("encoder_outputs"in a||(a=await this._prepare_encoder_decoder_kwargs_for_generation({inputs_tensor:i,model_inputs:a,model_input_name:l,generation_config:t}))),c?({input_ids:d,model_inputs:a}=this._prepare_decoder_input_ids_for_generation({batch_size:a[l].dims.at(0),model_input_name:l,model_kwargs:a,decoder_start_token_id:t.decoder_start_token_id,bos_token_id:t.bos_token_id,generation_config:t})):d=a[l];let h=d.dims.at(-1);null!==t.max_new_tokens&&(t.max_length=h+t.max_new_tokens);const p=this._get_logits_processor(t,h,n),m=this._get_stopping_criteria(t,s),f=a[l].dims.at(0),g=_.LogitsSampler.getSampler(t),w=new Array(f).fill(0),M=d.tolist();r&&r.put(M);let b=null,y={};for(;;){a=this.prepare_inputs_for_generation(M,a,t);const e=await this.forward(a);if(t.output_attentions&&t.return_dict_in_generate){const t=this.getAttentions(e);for(const e in t)e in y||(y[e]=[]),y[e].push(t[e])}const n=p(M,e.logits.slice(null,-1,null)),s=[];for(let e=0;e<n.dims.at(0);++e){const t=n[e],r=await g(t);for(const[t,n]of r){const r=BigInt(t);w[e]+=n,M[e].push(r),s.push([r]);break}}r&&r.put(s);if(m(M).every((e=>e))){t.return_dict_in_generate&&(b=this.getPastKeyValues(e,a.past_key_values,!1));break}a=this._update_model_kwargs_for_generation({generated_input_ids:s,outputs:e,model_inputs:a,is_encoder_decoder:c})}r&&r.end();const x=new u.Tensor("int64",M.flat(),[M.length,M[0].length]);return t.return_dict_in_generate?{sequences:x,past_key_values:b,...y}:x}getPastKeyValues(e,t,n=!0){const s=Object.create(null);for(const r in e)if(r.startsWith("present")){const o=r.replace("present","past_key_values");if(t&&r.includes("encoder"))s[o]=t[o];else{if(n&&t){const e=t[o];"gpu-buffer"===e.location&&e.dispose()}s[o]=e[r]}}return s}getAttentions(e){const t={};for(const n of["cross_attentions","encoder_attentions","decoder_attentions"])for(const s in e)s.startsWith(n)&&(n in t||(t[n]=[]),t[n].push(e[s]));return t}addPastKeyValues(e,t){if(t)Object.assign(e,t);else{const t=this.custom_config.kv_cache_dtype??"float32",n="float16"===t?new Uint16Array:[],r=(0,s.getKeyValueShapes)(this.config);for(const s in r)e[s]=new u.Tensor(t,n,r[s])}}async encode_image({pixel_values:e}){const t=(await A(this.sessions.vision_encoder,{pixel_values:e})).image_features;return this.config.num_image_tokens||(console.warn(`The number of image tokens was not set in the model configuration. Setting it to the number of features detected by the vision encoder (${t.dims[1]}).`),this.config.num_image_tokens=t.dims[1]),t}async encode_text({input_ids:e}){return(await A(this.sessions.embed_tokens,{input_ids:e})).inputs_embeds}}class G{}class $ extends G{constructor({last_hidden_state:e,hidden_states:t=null,attentions:n=null}){super(),this.last_hidden_state=e,this.hidden_states=t,this.attentions=n}}class q extends R{}class U extends q{}class W extends q{async _call(e){return new za(await super._call(e))}}class X extends q{async _call(e){return new Sa(await super._call(e))}}class Q extends q{async _call(e){return new Ea(await super._call(e))}}class H extends q{async _call(e){return new La(await super._call(e))}}class Y extends R{}class K extends Y{}class J extends R{}class Z extends J{}class ee extends J{async _call(e){return new za(await super._call(e))}}class te extends J{async _call(e){return new Sa(await super._call(e))}}class ne extends J{async _call(e){return new Ea(await super._call(e))}}class se extends J{async _call(e){return new La(await super._call(e))}}class re extends R{}class oe extends re{}class ie extends re{async _call(e){return new za(await super._call(e))}}class ae extends re{async _call(e){return new Sa(await super._call(e))}}class le extends re{async _call(e){return new Ea(await super._call(e))}}class ce extends re{async _call(e){return new La(await super._call(e))}}class de extends R{}class ue extends de{}class he extends de{async _call(e){return new za(await super._call(e))}}class pe extends de{async _call(e){return new Sa(await super._call(e))}}class _e extends de{async _call(e){return new Ea(await super._call(e))}}class me extends de{async _call(e){return new La(await super._call(e))}}class fe extends R{}class ge extends fe{}class we extends fe{async _call(e){return new za(await super._call(e))}}class Me extends fe{async _call(e){return new Sa(await super._call(e))}}class be extends fe{async _call(e){return new Ea(await super._call(e))}}class ye extends fe{async _call(e){return new La(await super._call(e))}}class xe extends R{}class ke extends xe{}class Te extends xe{async _call(e){return new za(await super._call(e))}}class ve extends xe{async _call(e){return new Sa(await super._call(e))}}class Ce extends xe{async _call(e){return new Ea(await super._call(e))}}class Fe extends xe{async _call(e){return new La(await super._call(e))}}class Pe extends R{}class Se extends Pe{}class Ae extends Pe{async _call(e){return new za(await super._call(e))}}class Ee extends Pe{async _call(e){return new Sa(await super._call(e))}}class ze extends Pe{async _call(e){return new Ea(await super._call(e))}}class Le extends Pe{async _call(e){return new La(await super._call(e))}}class Ie extends R{}class Be extends Ie{}class Ne extends Ie{async _call(e){return new Sa(await super._call(e))}}class Oe extends Ie{async _call(e){return new Ea(await super._call(e))}}class je extends Ie{async _call(e){return new La(await super._call(e))}}class De extends Ie{async _call(e){return new za(await super._call(e))}}class Ve extends R{}class Re extends Ve{}class Ge extends Ve{async _call(e){return new za(await super._call(e))}}class $e extends Ve{async _call(e){return new Sa(await super._call(e))}}class qe extends Ve{async _call(e){return new Ea(await super._call(e))}}class Ue extends R{}class We extends Ue{}class Xe extends Ue{async _call(e){return new za(await super._call(e))}}class Qe extends Ue{async _call(e){return new Sa(await super._call(e))}}class He extends Ue{async _call(e){return new La(await super._call(e))}}class Ye extends R{}class Ke extends Ye{}class Je extends Ye{async _call(e){return new za(await super._call(e))}}class Ze extends Ye{async _call(e){return new Sa(await super._call(e))}}class et extends Ye{async _call(e){return new Ea(await super._call(e))}}class tt extends Ye{async _call(e){return new La(await super._call(e))}}class nt extends R{}class st extends nt{}class rt extends nt{async _call(e){return new za(await super._call(e))}}class ot extends nt{async _call(e){return new Sa(await super._call(e))}}class it extends nt{async _call(e){return new La(await super._call(e))}}class at extends R{}class lt extends at{}class ct extends at{async _call(e){return new Sa(await super._call(e))}}class dt extends at{async _call(e){return new La(await super._call(e))}}class ut extends at{async _call(e){return new za(await super._call(e))}}class ht extends R{forward_params=["input_ids","attention_mask","encoder_outputs","decoder_input_ids","decoder_attention_mask","past_key_values"];constructor(e,t,n){super(e,t),this.generation_config=n}}class pt extends ht{}class _t extends ht{}class mt extends R{constructor(e,t,n){super(e,t),this.generation_config=n}}class ft extends mt{}class gt extends mt{}class wt extends R{constructor(e,t,n){super(e,t),this.generation_config=n}}class Mt extends wt{}class bt extends wt{}class yt extends R{constructor(e,t,n){super(e,t),this.generation_config=n}}class xt extends yt{}class kt extends yt{}class Tt extends yt{async _call(e){return new Sa(await super._call(e))}}class vt extends R{constructor(e,t,n){super(e,t),this.generation_config=n}}class Ct extends vt{}class Ft extends vt{}class Pt extends vt{async _call(e){return new Sa(await super._call(e))}}class St extends vt{}class At extends R{constructor(e,t,n){super(e,t),this.generation_config=n}}class Et extends At{}class zt extends At{}class Lt extends R{constructor(e,t,n){super(e,t),this.generation_config=n}}class It extends Lt{}class Bt extends Lt{}class Nt extends R{}class Ot extends Nt{}class jt extends Nt{async _call(e){return new za(await super._call(e))}}class Dt extends Nt{async _call(e){return new Sa(await super._call(e))}}class Vt extends Nt{async _call(e){return new Ea(await super._call(e))}}class Rt extends Nt{async _call(e){return new La(await super._call(e))}}class Gt extends R{}class $t extends Gt{}class qt extends Gt{async _call(e){return new za(await super._call(e))}}class Ut extends Gt{async _call(e){return new Sa(await super._call(e))}}class Wt extends Gt{async _call(e){return new Ea(await super._call(e))}}class Xt extends Gt{async _call(e){return new La(await super._call(e))}}class Qt extends R{}class Ht extends Qt{}class Yt extends Qt{async _call(e){return new za(await super._call(e))}}class Kt extends Qt{async _call(e){return new Sa(await super._call(e))}}class Jt extends Qt{async _call(e){return new Ea(await super._call(e))}}class Zt extends Qt{async _call(e){return new La(await super._call(e))}}class en extends R{}class tn extends en{}class nn extends en{}class sn extends R{requires_attention_mask=!1;main_input_name="input_features";forward_params=["input_features","attention_mask","decoder_input_ids","decoder_attention_mask","past_key_values"];constructor(e,t,n){super(e,t),this.generation_config=n}}class rn extends sn{}class on extends sn{_prepare_generation_config(e,t){return super._prepare_generation_config(e,t,f.WhisperGenerationConfig)}_retrieve_init_tokens(e){const t=[e.decoder_start_token_id];let n=e.language;const s=e.task;if(e.is_multilingual){n||(console.warn("No language specified - defaulting to English (en)."),n="en");const r=`<|${(0,g.whisper_language_to_code)(n)}|>`;t.push(e.lang_to_id[r]),t.push(e.task_to_id[s??"transcribe"])}else if(n||s)throw new Error("Cannot specify `task` or `language` for an English-only model. If the model is intended to be multilingual, pass `is_multilingual=true` to generate, or update the generation config.");return!e.return_timestamps&&e.no_timestamps_token_id&&t.at(-1)!==e.no_timestamps_token_id?t.push(e.no_timestamps_token_id):e.return_timestamps&&t.at(-1)===e.no_timestamps_token_id&&(console.warn("<|notimestamps|> prompt token is removed from generation_config since `return_timestamps` is set to `true`."),t.pop()),t.filter((e=>null!=e))}async generate({inputs:e=null,generation_config:t=null,logits_processor:n=null,stopping_criteria:s=null,...r}){t=this._prepare_generation_config(t,r);const o=r.decoder_input_ids??this._retrieve_init_tokens(t);if(t.return_timestamps&&(n??=new c.LogitsProcessorList,n.push(new c.WhisperTimeStampLogitsProcessor(t,o))),t.begin_suppress_tokens&&(n??=new c.LogitsProcessorList,n.push(new c.SuppressTokensAtBeginLogitsProcessor(t.begin_suppress_tokens,o.length))),t.return_token_timestamps){if(!t.alignment_heads)throw new Error("Model generation config has no `alignment_heads`, token-level timestamps not available. See https://gist.github.com/hollance/42e32852f24243b748ae6bc1f985b13a on how to add this property to the generation config.");"translate"===t.task&&console.warn("Token-level timestamps may not be reliable for task 'translate'."),t.output_attentions=!0,t.return_dict_in_generate=!0}const i=await super.generate({inputs:e,generation_config:t,logits_processor:n,decoder_input_ids:o,...r});return t.return_token_timestamps&&(i.token_timestamps=this._extract_token_timestamps(i,t.alignment_heads,t.num_frames)),i}_extract_token_timestamps(e,t,n=null,s=.02){if(!e.cross_attentions)throw new Error("Model outputs must contain cross attentions to extract timestamps. This is most likely because the model was not exported with `output_attentions=True`.");null==n&&console.warn("`num_frames` has not been set, meaning the entire audio will be analyzed. This may lead to inaccurate token-level timestamps for short audios (< 30 seconds).");let r=this.config.median_filter_width;void 0===r&&(console.warn("Model config has no `median_filter_width`, using default value of 7."),r=7);const o=e.cross_attentions,i=Array.from({length:this.config.decoder_layers},((e,t)=>(0,u.cat)(o.map((e=>e[t])),2))),l=(0,u.stack)(t.map((([e,t])=>{if(e>=i.length)throw new Error(`Layer index ${e} is out of bounds for cross attentions (length ${i.length}).`);return n?i[e].slice(null,t,null,[0,n]):i[e].slice(null,t)}))).transpose(1,0,2,3),[c,d]=(0,u.std_mean)(l,-2,0,!0),p=l.clone();for(let e=0;e<p.dims[0];++e){const t=p[e];for(let n=0;n<t.dims[0];++n){const s=t[n],o=c[e][n][0].data,i=d[e][n][0].data;for(let e=0;e<s.dims[0];++e){let t=s[e].data;for(let e=0;e<t.length;++e)t[e]=(t[e]-i[e])/o[e];t.set((0,h.medianFilter)(t,r))}}}const _=[(0,u.mean)(p,1)],m=e.sequences.dims,f=new u.Tensor("float32",new Float32Array(m[0]*m[1]),m);for(let e=0;e<m[0];++e){const t=_[e].neg().squeeze_(0),[n,r]=(0,h.dynamic_time_warping)(t.tolist()),o=Array.from({length:n.length-1},((e,t)=>n[t+1]-n[t])),i=(0,a.mergeArrays)([1],o).map((e=>!!e)),l=[];for(let e=0;e<i.length;++e)i[e]&&l.push(r[e]*s);f[e].data.set(l,1)}return f}}class an extends R{main_input_name="pixel_values";forward_params=["pixel_values","input_ids","encoder_hidden_states","past_key_values"];constructor(e,t,n){super(e,t),this.generation_config=n}}class ln extends R{forward_params=["input_ids","pixel_values","attention_mask","position_ids","past_key_values"];constructor(e,t,n){super(e,t),this.generation_config=n}}class cn extends ln{_merge_input_ids_with_image_features({inputs_embeds:e,image_features:t,input_ids:n,attention_mask:s}){const r=this.config.image_token_index,o=n.tolist().map((e=>e.findIndex((e=>e==r)))),i=o.every((e=>-1===e)),a=o.every((e=>-1!==e));if(!i&&!a)throw new Error("Every input should contain either 0 or 1 image token.");if(i)return{inputs_embeds:e,attention_mask:s};const l=[],c=[];for(let n=0;n<o.length;++n){const r=o[n],i=e[n],a=t[n],d=s[n];l.push((0,u.cat)([i.slice([0,r]),a,i.slice([r+1,i.dims[0]])],0)),c.push((0,u.cat)([d.slice([0,r]),(0,u.ones)([a.dims[0]]),d.slice([r+1,d.dims[0]])],0))}return{inputs_embeds:(0,u.stack)(l,0),attention_mask:(0,u.stack)(c,0)}}}class dn extends cn{}class un extends R{forward_params=["input_ids","inputs_embeds","attention_mask","pixel_values","encoder_outputs","decoder_input_ids","decoder_inputs_embeds","decoder_attention_mask","past_key_values"];main_input_name="inputs_embeds";constructor(e,t,n){super(e,t),this.generation_config=n}}class hn extends un{_merge_input_ids_with_image_features({inputs_embeds:e,image_features:t,input_ids:n,attention_mask:s}){return{inputs_embeds:(0,u.cat)([t,e],1),attention_mask:(0,u.cat)([(0,u.ones)(t.dims.slice(0,2)),s],1)}}async _prepare_inputs_embeds({input_ids:e,pixel_values:t,inputs_embeds:n,attention_mask:s}){if(!e&&!t)throw new Error("Either `input_ids` or `pixel_values` should be provided.");let r,o;return e&&(r=await this.encode_text({input_ids:e})),t&&(o=await this.encode_image({pixel_values:t})),r&&o?({inputs_embeds:n,attention_mask:s}=this._merge_input_ids_with_image_features({inputs_embeds:r,image_features:o,input_ids:e,attention_mask:s})):n=r||o,{inputs_embeds:n,attention_mask:s}}async forward({input_ids:e,pixel_values:t,attention_mask:n,decoder_input_ids:s,decoder_attention_mask:r,encoder_outputs:o,past_key_values:i,inputs_embeds:a,decoder_inputs_embeds:l}){if(a||({inputs_embeds:a,attention_mask:n}=await this._prepare_inputs_embeds({input_ids:e,pixel_values:t,inputs_embeds:a,attention_mask:n})),!o){let{last_hidden_state:e}=await B(this,{inputs_embeds:a,attention_mask:n});o=e}if(!l){if(!s)throw new Error("Either `decoder_input_ids` or `decoder_inputs_embeds` should be provided.");l=await this.encode_text({input_ids:s})}const c={inputs_embeds:l,attention_mask:r,encoder_attention_mask:n,encoder_hidden_states:o,past_key_values:i};return await N(this,c,!0)}}class pn extends R{}class _n extends pn{}class mn extends pn{static async from_pretrained(e,t={}){return t.model_file_name??="text_model",super.from_pretrained(e,t)}}class fn extends pn{static async from_pretrained(e,t={}){return t.model_file_name??="vision_model",super.from_pretrained(e,t)}}class gn extends R{}class wn extends gn{}class Mn extends gn{static async from_pretrained(e,t={}){return t.model_file_name??="text_model",super.from_pretrained(e,t)}}class bn extends pn{static async from_pretrained(e,t={}){return t.model_file_name??="vision_model",super.from_pretrained(e,t)}}class yn extends R{}class xn extends yn{}class kn extends R{}class Tn extends kn{}class vn extends kn{}class Cn extends R{constructor(e,t,n){super(e,t),this.generation_config=n}}class Fn extends Cn{}class Pn extends Cn{}class Sn extends R{constructor(e,t,n){super(e,t),this.generation_config=n}}class An extends Sn{}class En extends Sn{}class zn extends R{constructor(e,t,n){super(e,t),this.generation_config=n}}class Ln extends zn{}class In extends zn{}class Bn extends R{constructor(e,t,n){super(e,t),this.generation_config=n}}class Nn extends Bn{}class On extends Bn{}class jn extends R{constructor(e,t,n){super(e,t),this.generation_config=n}}class Dn extends jn{}class Vn extends jn{}class Rn extends R{constructor(e,t,n){super(e,t),this.generation_config=n}}class Gn extends Rn{}class $n extends Rn{}class qn extends R{constructor(e,t,n){super(e,t),this.generation_config=n}}class Un extends qn{}class Wn extends qn{}class Xn extends R{constructor(e,t,n){super(e,t),this.generation_config=n}}class Qn extends Xn{}class Hn extends Xn{}class Yn extends R{constructor(e,t,n){super(e,t),this.generation_config=n}}class Kn extends Yn{}class Jn extends Yn{}class Zn extends R{constructor(e,t,n){super(e,t),this.generation_config=n}}class es extends Zn{}class ts extends Zn{}class ns extends R{constructor(e,t,n){super(e,t),this.generation_config=n}}class ss extends ns{}class rs extends ns{}class os extends R{constructor(e,t,n){super(e,t),this.generation_config=n}}class is extends os{}class as extends os{}class ls extends R{constructor(e,t,n){super(e,t),this.generation_config=n}}class cs extends ls{}class ds extends ls{}class us extends R{constructor(e,t,n){super(e,t),this.generation_config=n}}class hs extends us{}class ps extends us{}class _s extends R{constructor(e,t,n){super(e,t),this.generation_config=n}}class ms extends _s{}class fs extends _s{}class gs extends R{constructor(e,t,n){super(e,t),this.generation_config=n}}class ws extends gs{}class Ms extends gs{}class bs extends R{constructor(e,t,n){super(e,t),this.generation_config=n}}class ys extends bs{}class xs extends bs{}class ks extends R{}class Ts extends ks{}class vs extends ks{async _call(e){return new Sa(await super._call(e))}}class Cs extends R{}class Fs extends Cs{}class Ps extends Cs{async _call(e){return new Sa(await super._call(e))}}class Ss extends R{}class As extends Ss{async _call(e){return new Na(await super._call(e))}}class Es extends R{}class zs extends Es{}class Ls extends Es{async _call(e){return new Sa(await super._call(e))}}class Is extends R{}class Bs extends Is{}class Ns extends Is{async _call(e){return new Sa(await super._call(e))}}class Os extends R{}class js extends Os{}class Ds extends Os{}class Vs extends R{}class Rs extends Vs{}class Gs extends Vs{}class $s extends R{}class qs extends $s{}class Us extends $s{async _call(e){return new Sa(await super._call(e))}}class Ws extends R{}class Xs extends Ws{}class Qs extends Ws{async _call(e){return new Ys(await super._call(e))}}class Hs extends Ws{async _call(e){return new Ks(await super._call(e))}}class Ys extends G{constructor({logits:e,pred_boxes:t}){super(),this.logits=e,this.pred_boxes=t}}class Ks extends G{constructor({logits:e,pred_boxes:t,pred_masks:n}){super(),this.logits=e,this.pred_boxes=t,this.pred_masks=n}}class Js extends R{}class Zs extends Js{}class er extends Js{async _call(e){return new tr(await super._call(e))}}class tr extends G{constructor({logits:e,pred_boxes:t}){super(),this.logits=e,this.pred_boxes=t}}class nr extends R{}class sr extends nr{}class rr extends nr{async _call(e){return new or(await super._call(e))}}class or extends Ys{}class ir extends R{}class ar extends ir{}class lr extends ir{async _call(e){return new Sa(await super._call(e))}}class cr extends R{}class dr extends cr{}class ur extends cr{async _call(e){return new Sa(await super._call(e))}}class hr extends R{}class pr extends hr{}class _r extends hr{async _call(e){return new Sa(await super._call(e))}}class mr extends R{}class fr extends mr{}class gr extends mr{}class wr extends R{}class Mr extends wr{}class br extends wr{}class yr extends R{}class xr extends yr{}class kr extends R{}class Tr extends kr{}class vr extends kr{}class Cr extends R{}class Fr extends Cr{}class Pr extends R{}class Sr extends Pr{}class Ar extends Pr{async _call(e){return new Sa(await super._call(e))}}class Er extends R{}class zr extends Er{}class Lr extends Er{async _call(e){return new Sa(await super._call(e))}}class Ir extends R{}class Br extends Ir{}class Nr extends Ir{async _call(e){return new Sa(await super._call(e))}}class Or extends R{}class jr extends Or{}class Dr extends Or{async _call(e){return new Vr(await super._call(e))}}class Vr extends G{constructor({logits:e,pred_boxes:t}){super(),this.logits=e,this.pred_boxes=t}}class Rr extends R{}class Gr extends Rr{async get_image_embeddings({pixel_values:e}){return await B(this,{pixel_values:e})}async forward(e){if(e.image_embeddings&&e.image_positional_embeddings||(e={...e,...await this.get_image_embeddings(e)}),!e.input_labels&&e.input_points){const t=e.input_points.dims.slice(0,-1),n=t.reduce(((e,t)=>e*t),1);e.input_labels=new u.Tensor("int64",new BigInt64Array(n).fill(1n),t)}const t={image_embeddings:e.image_embeddings,image_positional_embeddings:e.image_positional_embeddings};return e.input_points&&(t.input_points=e.input_points),e.input_labels&&(t.input_labels=e.input_labels),e.input_boxes&&(t.input_boxes=e.input_boxes),await A(this.sessions.prompt_encoder_mask_decoder,t)}async _call(e){return new $r(await super._call(e))}}class $r extends G{constructor({iou_scores:e,pred_masks:t}){super(),this.iou_scores=e,this.pred_masks=t}}class qr extends R{constructor(e,t,n){super(e,t),this.generation_config=n}}class Ur extends qr{}class Wr extends qr{}class Xr extends R{constructor(e,t,n){super(e,t),this.generation_config=n}}class Qr extends Xr{}class Hr extends Xr{}class Yr extends R{}class Kr extends Yr{}class Jr extends Yr{async _call(e){return new Ia(await super._call(e))}}class Zr extends Yr{async _call(e){return new Sa(await super._call(e))}}class eo extends Yr{async _call(e){return new Ea(await super._call(e))}}class to extends R{}class no extends to{}class so extends to{async _call(e){return new Ea(await super._call(e))}}class ro extends R{}class oo extends ro{}class io extends R{}class ao extends io{}class lo extends io{async _call(e){return new Ia(await super._call(e))}}class co extends io{async _call(e){return new Sa(await super._call(e))}}class uo extends R{}class ho extends uo{}class po extends uo{async _call(e){return new Ia(await super._call(e))}}class _o extends uo{async _call(e){return new Sa(await super._call(e))}}class mo extends uo{async _call(e){return new Ea(await super._call(e))}}class fo extends R{}class go extends fo{}class wo extends fo{async _call(e){return new Ia(await super._call(e))}}class Mo extends fo{async _call(e){return new Sa(await super._call(e))}}class bo extends R{}class yo extends Yr{}class xo extends Yr{async _call(e){return new Ia(await super._call(e))}}class ko extends Yr{async _call(e){return new Sa(await super._call(e))}}class To extends R{}class vo extends To{}class Co extends To{async _call(e){return new Ia(await super._call(e))}}class Fo extends To{async _call(e){return new Sa(await super._call(e))}}class Po extends To{async _call(e){return new Aa(await super._call(e))}}class So extends To{async _call(e){return new Ea(await super._call(e))}}class Ao extends R{constructor(e,t,n){super(e,t),this.generation_config=n}}class Eo extends Ao{}class zo extends Ao{}class Lo extends Ao{async generate_speech(e,t,{threshold:n=.5,minlenratio:s=0,maxlenratio:r=20,vocoder:o=null}={}){const i={input_ids:e},{encoder_outputs:a,encoder_attention_mask:l}=await B(this,i),c=a.dims[1]/this.config.reduction_factor,d=Math.floor(c*r),h=Math.floor(c*s),p=this.config.num_mel_bins;let _=[],m=null,f=null,g=0;for(;;){++g;const e=L(!!f);let s;s=f?f.output_sequence_out:new u.Tensor("float32",new Float32Array(p),[1,1,p]);let r={use_cache_branch:e,output_sequence:s,encoder_attention_mask:l,speaker_embeddings:t,encoder_hidden_states:a};this.addPastKeyValues(r,m),f=await A(this.sessions.decoder_model_merged,r),m=this.getPastKeyValues(f,m);const{prob:o,spectrum:i}=f;if(_.push(i),g>=h&&(Array.from(o.data).filter((e=>e>=n)).length>0||g>=d))break}const w=(0,u.cat)(_),{waveform:M}=await A(o.sessions.model,{spectrogram:w});return{spectrogram:w,waveform:M}}}class Io extends R{main_input_name="spectrogram"}class Bo extends R{constructor(e,t,n){super(e,t),this.generation_config=n}}class No extends Bo{}class Oo extends R{constructor(e,t,n){super(e,t),this.generation_config=n}}class jo extends Oo{}class Do extends Oo{}class Vo extends R{constructor(e,t,n){super(e,t),this.generation_config=n}}class Ro extends Vo{}class Go extends Vo{}class $o extends R{constructor(e,t,n){super(e,t),this.generation_config=n}}class qo extends $o{}class Uo extends $o{}class Wo extends R{}class Xo extends Wo{}class Qo extends Wo{static async from_pretrained(e,t={}){return t.model_file_name??="text_model",super.from_pretrained(e,t)}}class Ho extends Wo{static async from_pretrained(e,t={}){return t.model_file_name??="audio_model",super.from_pretrained(e,t)}}class Yo extends R{}class Ko extends Yo{async _call(e){return new Oa(await super._call(e))}}class Jo extends R{}class Zo extends Jo{}class ei extends Jo{}class ti extends Jo{}class ni extends R{constructor(e,t,n){super(e,t),this.generation_config=n}}class si extends ni{}class ri extends ni{}class oi extends R{}class ii extends oi{}class ai extends oi{async _call(e){return new Sa(await super._call(e))}}class li extends R{}class ci extends li{}class di extends li{}class ui extends R{forward_params=["input_ids","attention_mask","encoder_outputs","decoder_input_ids","decoder_attention_mask","past_key_values"];constructor(e,t,n){super(e,t),this.generation_config=n}_apply_and_filter_by_delay_pattern_mask(e){const[t,n]=e.dims,s=this.config.decoder.num_codebooks,r=n-s;let o=0;for(let t=0;t<e.size;++t){if(e.data[t]===this.config.decoder.pad_token_id)continue;const i=t%n-Math.floor(t/n)%s;i>0&&i<=r&&(e.data[o++]=e.data[t])}const i=Math.floor(t/s),a=o/(i*s);return new u.Tensor(e.type,e.data.slice(0,o),[i,s,a])}prepare_inputs_for_generation(e,t,n){let s=structuredClone(e);for(let e=0;e<s.length;++e)for(let t=0;t<s[e].length;++t)e%this.config.decoder.num_codebooks>=t&&(s[e][t]=BigInt(this.config.decoder.pad_token_id));null!==n.guidance_scale&&n.guidance_scale>1&&(s=s.concat(s));return super.prepare_inputs_for_generation(s,t,n)}async generate(e){const t=await super.generate(e),n=this._apply_and_filter_by_delay_pattern_mask(t).unsqueeze_(0),{audio_values:s}=await A(this.sessions.encodec_decode,{audio_codes:n});return s}}class hi extends R{}class pi extends hi{}class _i extends hi{async _call(e){return new Sa(await super._call(e))}}class mi extends R{}class fi extends mi{}class gi extends mi{async _call(e){return new Sa(await super._call(e))}}class wi extends R{}class Mi extends wi{}class bi extends wi{async _call(e){return new Sa(await super._call(e))}}class yi extends R{}class xi extends yi{}class ki extends yi{async _call(e){return new Sa(await super._call(e))}}class Ti{static MODEL_CLASS_MAPPINGS=null;static BASE_IF_FAIL=!1;static async from_pretrained(e,{progress_callback:t=null,config:n=null,cache_dir:r=null,local_files_only:o=!1,revision:i="main",model_file_name:a=null,subfolder:l="onnx",device:c=null,dtype:d=null,use_external_data_format:u=null,session_options:h={}}={}){let p={progress_callback:t,config:n,cache_dir:r,local_files_only:o,revision:i,model_file_name:a,subfolder:l,device:c,dtype:d,use_external_data_format:u,session_options:h};if(p.config=await s.AutoConfig.from_pretrained(e,p),!this.MODEL_CLASS_MAPPINGS)throw new Error("`MODEL_CLASS_MAPPINGS` not implemented for this type of `AutoClass`: "+this.name);for(let t of this.MODEL_CLASS_MAPPINGS){const n=t.get(p.config.model_type);if(n)return await n[1].from_pretrained(e,p)}if(this.BASE_IF_FAIL)return console.warn(`Unknown model class "${p.config.model_type}", attempting to construct from base class.`),await R.from_pretrained(e,p);throw Error(`Unsupported model type: ${p.config.model_type}`)}}const vi=new Map([["bert",["BertModel",U]],["nomic_bert",["NomicBertModel",K]],["roformer",["RoFormerModel",Z]],["electra",["ElectraModel",ue]],["esm",["EsmModel",Re]],["convbert",["ConvBertModel",oe]],["camembert",["CamembertModel",ge]],["deberta",["DebertaModel",ke]],["deberta-v2",["DebertaV2Model",Se]],["mpnet",["MPNetModel",Ke]],["albert",["AlbertModel",lt]],["distilbert",["DistilBertModel",Be]],["roberta",["RobertaModel",Ot]],["xlm",["XLMModel",$t]],["xlm-roberta",["XLMRobertaModel",Ht]],["clap",["ClapModel",Xo]],["clip",["CLIPModel",_n]],["clipseg",["CLIPSegModel",Tn]],["chinese_clip",["ChineseCLIPModel",xn]],["siglip",["SiglipModel",wn]],["mobilebert",["MobileBertModel",We]],["squeezebert",["SqueezeBertModel",st]],["wav2vec2",["Wav2Vec2Model",Kr]],["wav2vec2-bert",["Wav2Vec2BertModel",go]],["unispeech",["UniSpeechModel",ao]],["unispeech-sat",["UniSpeechSatModel",ho]],["hubert",["HubertModel",yo]],["wavlm",["WavLMModel",vo]],["audio-spectrogram-transformer",["ASTModel",tn]],["vits",["VitsModel",Ko]],["pyannote",["PyAnnoteModel",no]],["wespeaker-resnet",["WeSpeakerResNetModel",oo]],["detr",["DetrModel",Xs]],["rt_detr",["RTDetrModel",Zs]],["table-transformer",["TableTransformerModel",sr]],["vit",["ViTModel",Ts]],["fastvit",["FastViTModel",Fs]],["mobilevit",["MobileViTModel",zs]],["mobilevitv2",["MobileViTV2Model",Bs]],["owlvit",["OwlViTModel",js]],["owlv2",["Owlv2Model",Rs]],["beit",["BeitModel",qs]],["deit",["DeiTModel",ar]],["convnext",["ConvNextModel",Sr]],["convnextv2",["ConvNextV2Model",zr]],["dinov2",["Dinov2Model",Br]],["resnet",["ResNetModel",dr]],["swin",["SwinModel",pr]],["swin2sr",["Swin2SRModel",fr]],["donut-swin",["DonutSwinModel",Fr]],["yolos",["YolosModel",jr]],["dpt",["DPTModel",Mr]],["glpn",["GLPNModel",Tr]],["hifigan",["SpeechT5HifiGan",Io]],["efficientnet",["EfficientNetModel",ii]],["mobilenet_v1",["MobileNetV1Model",pi]],["mobilenet_v2",["MobileNetV2Model",fi]],["mobilenet_v3",["MobileNetV3Model",Mi]],["mobilenet_v4",["MobileNetV4Model",xi]]]),Ci=new Map([["t5",["T5Model",pt]],["longt5",["LongT5Model",ft]],["mt5",["MT5Model",Mt]],["bart",["BartModel",xt]],["mbart",["MBartModel",Ct]],["marian",["MarianModel",Ur]],["whisper",["WhisperModel",rn]],["m2m_100",["M2M100Model",Qr]],["blenderbot",["BlenderbotModel",Et]],["blenderbot-small",["BlenderbotSmallModel",It]]]),Fi=new Map([["bloom",["BloomModel",ms]],["gpt2",["GPT2Model",Fn]],["gptj",["GPTJModel",Nn]],["gpt_bigcode",["GPTBigCodeModel",Dn]],["gpt_neo",["GPTNeoModel",An]],["gpt_neox",["GPTNeoXModel",Ln]],["codegen",["CodeGenModel",Gn]],["llama",["LlamaModel",Un]],["cohere",["CohereModel",Qn]],["gemma",["GemmaModel",Kn]],["gemma2",["Gemma2Model",es]],["openelm",["OpenELMModel",ss]],["qwen2",["Qwen2Model",is]],["phi",["PhiModel",cs]],["phi3",["Phi3Model",hs]],["mpt",["MptModel",ws]],["opt",["OPTModel",ys]],["mistral",["MistralModel",jo]],["starcoder2",["Starcoder2Model",Ro]],["falcon",["FalconModel",qo]],["stablelm",["StableLmModel",si]]]),Pi=new Map([["speecht5",["SpeechT5ForSpeechToText",zo]],["whisper",["WhisperForConditionalGeneration",on]]]),Si=new Map([["speecht5",["SpeechT5ForTextToSpeech",Lo]]]),Ai=new Map([["vits",["VitsModel",Ko]],["musicgen",["MusicgenForConditionalGeneration",ui]]]),Ei=new Map([["bert",["BertForSequenceClassification",X]],["roformer",["RoFormerForSequenceClassification",te]],["electra",["ElectraForSequenceClassification",pe]],["esm",["EsmForSequenceClassification",$e]],["convbert",["ConvBertForSequenceClassification",ae]],["camembert",["CamembertForSequenceClassification",Me]],["deberta",["DebertaForSequenceClassification",ve]],["deberta-v2",["DebertaV2ForSequenceClassification",Ee]],["mpnet",["MPNetForSequenceClassification",Ze]],["albert",["AlbertForSequenceClassification",ct]],["distilbert",["DistilBertForSequenceClassification",Ne]],["roberta",["RobertaForSequenceClassification",Dt]],["xlm",["XLMForSequenceClassification",Ut]],["xlm-roberta",["XLMRobertaForSequenceClassification",Kt]],["bart",["BartForSequenceClassification",Tt]],["mbart",["MBartForSequenceClassification",Pt]],["mobilebert",["MobileBertForSequenceClassification",Qe]],["squeezebert",["SqueezeBertForSequenceClassification",ot]]]),zi=new Map([["bert",["BertForTokenClassification",Q]],["roformer",["RoFormerForTokenClassification",ne]],["electra",["ElectraForTokenClassification",_e]],["esm",["EsmForTokenClassification",qe]],["convbert",["ConvBertForTokenClassification",le]],["camembert",["CamembertForTokenClassification",be]],["deberta",["DebertaForTokenClassification",Ce]],["deberta-v2",["DebertaV2ForTokenClassification",ze]],["mpnet",["MPNetForTokenClassification",et]],["distilbert",["DistilBertForTokenClassification",Oe]],["roberta",["RobertaForTokenClassification",Vt]],["xlm",["XLMForTokenClassification",Wt]],["xlm-roberta",["XLMRobertaForTokenClassification",Jt]]]),Li=new Map([["t5",["T5ForConditionalGeneration",_t]],["longt5",["LongT5ForConditionalGeneration",gt]],["mt5",["MT5ForConditionalGeneration",bt]],["bart",["BartForConditionalGeneration",kt]],["mbart",["MBartForConditionalGeneration",Ft]],["marian",["MarianMTModel",Wr]],["m2m_100",["M2M100ForConditionalGeneration",Hr]],["blenderbot",["BlenderbotForConditionalGeneration",zt]],["blenderbot-small",["BlenderbotSmallForConditionalGeneration",Bt]]]),Ii=new Map([["bloom",["BloomForCausalLM",fs]],["gpt2",["GPT2LMHeadModel",Pn]],["gptj",["GPTJForCausalLM",On]],["gpt_bigcode",["GPTBigCodeForCausalLM",Vn]],["gpt_neo",["GPTNeoForCausalLM",En]],["gpt_neox",["GPTNeoXForCausalLM",In]],["codegen",["CodeGenForCausalLM",$n]],["llama",["LlamaForCausalLM",Wn]],["cohere",["CohereForCausalLM",Hn]],["gemma",["GemmaForCausalLM",Jn]],["gemma2",["Gemma2ForCausalLM",ts]],["openelm",["OpenELMForCausalLM",rs]],["qwen2",["Qwen2ForCausalLM",as]],["phi",["PhiForCausalLM",ds]],["phi3",["Phi3ForCausalLM",ps]],["mpt",["MptForCausalLM",Ms]],["opt",["OPTForCausalLM",xs]],["mbart",["MBartForCausalLM",St]],["mistral",["MistralForCausalLM",Do]],["starcoder2",["Starcoder2ForCausalLM",Go]],["falcon",["FalconForCausalLM",Uo]],["trocr",["TrOCRForCausalLM",No]],["stablelm",["StableLmForCausalLM",ri]]]),Bi=new Map([["bert",["BertForMaskedLM",W]],["roformer",["RoFormerForMaskedLM",ee]],["electra",["ElectraForMaskedLM",he]],["esm",["EsmForMaskedLM",Ge]],["convbert",["ConvBertForMaskedLM",ie]],["camembert",["CamembertForMaskedLM",we]],["deberta",["DebertaForMaskedLM",Te]],["deberta-v2",["DebertaV2ForMaskedLM",Ae]],["mpnet",["MPNetForMaskedLM",Je]],["albert",["AlbertForMaskedLM",ut]],["distilbert",["DistilBertForMaskedLM",De]],["roberta",["RobertaForMaskedLM",jt]],["xlm",["XLMWithLMHeadModel",qt]],["xlm-roberta",["XLMRobertaForMaskedLM",Yt]],["mobilebert",["MobileBertForMaskedLM",Xe]],["squeezebert",["SqueezeBertForMaskedLM",rt]]]),Ni=new Map([["bert",["BertForQuestionAnswering",H]],["roformer",["RoFormerForQuestionAnswering",se]],["electra",["ElectraForQuestionAnswering",me]],["convbert",["ConvBertForQuestionAnswering",ce]],["camembert",["CamembertForQuestionAnswering",ye]],["deberta",["DebertaForQuestionAnswering",Fe]],["deberta-v2",["DebertaV2ForQuestionAnswering",Le]],["mpnet",["MPNetForQuestionAnswering",tt]],["albert",["AlbertForQuestionAnswering",dt]],["distilbert",["DistilBertForQuestionAnswering",je]],["roberta",["RobertaForQuestionAnswering",Rt]],["xlm",["XLMForQuestionAnswering",Xt]],["xlm-roberta",["XLMRobertaForQuestionAnswering",Zt]],["mobilebert",["MobileBertForQuestionAnswering",He]],["squeezebert",["SqueezeBertForQuestionAnswering",it]]]),Oi=new Map([["vision-encoder-decoder",["VisionEncoderDecoderModel",an]]]),ji=new Map([["llava",["LlavaForConditionalGeneration",cn]],["moondream1",["Moondream1ForConditionalGeneration",dn]],["florence2",["Florence2ForConditionalGeneration",hn]]]),Di=new Map([["vision-encoder-decoder",["VisionEncoderDecoderModel",an]]]),Vi=new Map([["vit",["ViTForImageClassification",vs]],["fastvit",["FastViTForImageClassification",Ps]],["mobilevit",["MobileViTForImageClassification",Ls]],["mobilevitv2",["MobileViTV2ForImageClassification",Ns]],["beit",["BeitForImageClassification",Us]],["deit",["DeiTForImageClassification",lr]],["convnext",["ConvNextForImageClassification",Ar]],["convnextv2",["ConvNextV2ForImageClassification",Lr]],["dinov2",["Dinov2ForImageClassification",Nr]],["resnet",["ResNetForImageClassification",ur]],["swin",["SwinForImageClassification",_r]],["segformer",["SegformerForImageClassification",ei]],["efficientnet",["EfficientNetForImageClassification",ai]],["mobilenet_v1",["MobileNetV1ForImageClassification",_i]],["mobilenet_v2",["MobileNetV2ForImageClassification",gi]],["mobilenet_v3",["MobileNetV3ForImageClassification",bi]],["mobilenet_v4",["MobileNetV4ForImageClassification",ki]]]),Ri=new Map([["detr",["DetrForObjectDetection",Qs]],["rt_detr",["RTDetrForObjectDetection",er]],["table-transformer",["TableTransformerForObjectDetection",rr]],["yolos",["YolosForObjectDetection",Dr]]]),Gi=new Map([["owlvit",["OwlViTForObjectDetection",Ds]],["owlv2",["Owlv2ForObjectDetection",Gs]]]),$i=new Map([["detr",["DetrForSegmentation",Hs]],["clipseg",["CLIPSegForImageSegmentation",vn]]]),qi=new Map([["segformer",["SegformerForSemanticSegmentation",ti]]]),Ui=new Map([["sam",["SamModel",Gr]]]),Wi=new Map([["wav2vec2",["Wav2Vec2ForCTC",Jr]],["wav2vec2-bert",["Wav2Vec2BertForCTC",wo]],["unispeech",["UniSpeechForCTC",lo]],["unispeech-sat",["UniSpeechSatForCTC",po]],["wavlm",["WavLMForCTC",Co]],["hubert",["HubertForCTC",xo]]]),Xi=new Map([["wav2vec2",["Wav2Vec2ForSequenceClassification",Zr]],["wav2vec2-bert",["Wav2Vec2BertForSequenceClassification",Mo]],["unispeech",["UniSpeechForSequenceClassification",co]],["unispeech-sat",["UniSpeechSatForSequenceClassification",_o]],["wavlm",["WavLMForSequenceClassification",Fo]],["hubert",["HubertForSequenceClassification",ko]],["audio-spectrogram-transformer",["ASTForAudioClassification",nn]]]),Qi=new Map([["wavlm",["WavLMForXVector",Po]]]),Hi=new Map([["unispeech-sat",["UniSpeechSatForAudioFrameClassification",mo]],["wavlm",["WavLMForAudioFrameClassification",So]],["wav2vec2",["Wav2Vec2ForAudioFrameClassification",eo]],["pyannote",["PyAnnoteForAudioFrameClassification",so]]]),Yi=new Map([["vitmatte",["VitMatteForImageMatting",As]]]),Ki=new Map([["swin2sr",["Swin2SRForImageSuperResolution",gr]]]),Ji=new Map([["dpt",["DPTForDepthEstimation",br]],["depth_anything",["DepthAnythingForDepthEstimation",xr]],["glpn",["GLPNForDepthEstimation",vr]]]),Zi=new Map([["clip",["CLIPVisionModelWithProjection",fn]],["siglip",["SiglipVisionModel",bn]]]),ea=[[vi,w],[Ci,M],[Fi,x],[Ei,w],[zi,w],[Li,b],[Pi,b],[Ii,x],[Bi,w],[Ni,w],[Oi,y],[ji,T],[Vi,w],[$i,w],[qi,w],[Yi,w],[Ki,w],[Ji,w],[Ri,w],[Gi,w],[Ui,k],[Wi,w],[Xi,w],[Si,b],[Ai,w],[Qi,w],[Hi,w],[Zi,w]];for(const[e,t]of ea)for(const[n,s]of e.values())C.set(n,t),P.set(s,n),F.set(n,s);const ta=[["MusicgenForConditionalGeneration",ui,v],["CLIPTextModelWithProjection",mn,w],["SiglipTextModel",Mn,w],["ClapTextModelWithProjection",Qo,w],["ClapAudioModelWithProjection",Ho,w]];for(const[e,t,n]of ta)C.set(e,n),P.set(t,e),F.set(e,t);class na extends Ti{static MODEL_CLASS_MAPPINGS=ea.map((e=>e[0]));static BASE_IF_FAIL=!0}class sa extends Ti{static MODEL_CLASS_MAPPINGS=[Ei]}class ra extends Ti{static MODEL_CLASS_MAPPINGS=[zi]}class oa extends Ti{static MODEL_CLASS_MAPPINGS=[Li]}class ia extends Ti{static MODEL_CLASS_MAPPINGS=[Pi]}class aa extends Ti{static MODEL_CLASS_MAPPINGS=[Si]}class la extends Ti{static MODEL_CLASS_MAPPINGS=[Ai]}class ca extends Ti{static MODEL_CLASS_MAPPINGS=[Ii]}class da extends Ti{static MODEL_CLASS_MAPPINGS=[Bi]}class ua extends Ti{static MODEL_CLASS_MAPPINGS=[Ni]}class ha extends Ti{static MODEL_CLASS_MAPPINGS=[Oi]}class pa extends Ti{static MODEL_CLASS_MAPPINGS=[Vi]}class _a extends Ti{static MODEL_CLASS_MAPPINGS=[$i]}class ma extends Ti{static MODEL_CLASS_MAPPINGS=[qi]}class fa extends Ti{static MODEL_CLASS_MAPPINGS=[Ri]}class ga extends Ti{static MODEL_CLASS_MAPPINGS=[Gi]}class wa extends Ti{static MODEL_CLASS_MAPPINGS=[Ui]}class Ma extends Ti{static MODEL_CLASS_MAPPINGS=[Wi]}class ba extends Ti{static MODEL_CLASS_MAPPINGS=[Xi]}class ya extends Ti{static MODEL_CLASS_MAPPINGS=[Qi]}class xa extends Ti{static MODEL_CLASS_MAPPINGS=[Hi]}class ka extends Ti{static MODEL_CLASS_MAPPINGS=[Di]}class Ta extends Ti{static MODEL_CLASS_MAPPINGS=[Yi]}class va extends Ti{static MODEL_CLASS_MAPPINGS=[Ki]}class Ca extends Ti{static MODEL_CLASS_MAPPINGS=[Ji]}class Fa extends Ti{static MODEL_CLASS_MAPPINGS=[Zi]}class Pa extends G{constructor({logits:e,past_key_values:t,encoder_outputs:n,decoder_attentions:s=null,cross_attentions:r=null}){super(),this.logits=e,this.past_key_values=t,this.encoder_outputs=n,this.decoder_attentions=s,this.cross_attentions=r}}class Sa extends G{constructor({logits:e}){super(),this.logits=e}}class Aa extends G{constructor({logits:e,embeddings:t}){super(),this.logits=e,this.embeddings=t}}class Ea extends G{constructor({logits:e}){super(),this.logits=e}}class za extends G{constructor({logits:e}){super(),this.logits=e}}class La extends G{constructor({start_logits:e,end_logits:t}){super(),this.start_logits=e,this.end_logits=t}}class Ia extends G{constructor({logits:e}){super(),this.logits=e}}class Ba extends G{constructor({logits:e,past_key_values:t}){super(),this.logits=e,this.past_key_values=t}}class Na extends G{constructor({alphas:e}){super(),this.alphas=e}}class Oa extends G{constructor({waveform:e,spectrogram:t}){super(),this.waveform=e,this.spectrogram=t}}},"./src/models/whisper/common_whisper.js":
112
+ \***********************/(e,t,n)=>{"use strict";n.r(t),n.d(t,{ASTForAudioClassification:()=>nn,ASTModel:()=>tn,ASTPreTrainedModel:()=>en,AlbertForMaskedLM:()=>ut,AlbertForQuestionAnswering:()=>dt,AlbertForSequenceClassification:()=>ct,AlbertModel:()=>lt,AlbertPreTrainedModel:()=>at,AutoModel:()=>na,AutoModelForAudioClassification:()=>ba,AutoModelForAudioFrameClassification:()=>xa,AutoModelForCTC:()=>Ma,AutoModelForCausalLM:()=>ca,AutoModelForDepthEstimation:()=>Ca,AutoModelForDocumentQuestionAnswering:()=>ka,AutoModelForImageClassification:()=>pa,AutoModelForImageFeatureExtraction:()=>Fa,AutoModelForImageMatting:()=>Ta,AutoModelForImageSegmentation:()=>_a,AutoModelForImageToImage:()=>va,AutoModelForMaskGeneration:()=>wa,AutoModelForMaskedLM:()=>da,AutoModelForObjectDetection:()=>fa,AutoModelForQuestionAnswering:()=>ua,AutoModelForSemanticSegmentation:()=>ma,AutoModelForSeq2SeqLM:()=>oa,AutoModelForSequenceClassification:()=>sa,AutoModelForSpeechSeq2Seq:()=>ia,AutoModelForTextToSpectrogram:()=>aa,AutoModelForTextToWaveform:()=>la,AutoModelForTokenClassification:()=>ra,AutoModelForVision2Seq:()=>ha,AutoModelForXVector:()=>ya,AutoModelForZeroShotObjectDetection:()=>ga,BartForConditionalGeneration:()=>kt,BartForSequenceClassification:()=>Tt,BartModel:()=>xt,BartPretrainedModel:()=>yt,BaseModelOutput:()=>$,BeitForImageClassification:()=>Us,BeitModel:()=>qs,BeitPreTrainedModel:()=>$s,BertForMaskedLM:()=>W,BertForQuestionAnswering:()=>H,BertForSequenceClassification:()=>X,BertForTokenClassification:()=>Q,BertModel:()=>U,BertPreTrainedModel:()=>q,BlenderbotForConditionalGeneration:()=>zt,BlenderbotModel:()=>Et,BlenderbotPreTrainedModel:()=>At,BlenderbotSmallForConditionalGeneration:()=>Bt,BlenderbotSmallModel:()=>It,BlenderbotSmallPreTrainedModel:()=>Lt,BloomForCausalLM:()=>fs,BloomModel:()=>ms,BloomPreTrainedModel:()=>_s,CLIPModel:()=>_n,CLIPPreTrainedModel:()=>pn,CLIPSegForImageSegmentation:()=>vn,CLIPSegModel:()=>Tn,CLIPSegPreTrainedModel:()=>kn,CLIPTextModelWithProjection:()=>mn,CLIPVisionModelWithProjection:()=>fn,CamembertForMaskedLM:()=>we,CamembertForQuestionAnswering:()=>ye,CamembertForSequenceClassification:()=>Me,CamembertForTokenClassification:()=>be,CamembertModel:()=>ge,CamembertPreTrainedModel:()=>fe,CausalLMOutput:()=>Ia,CausalLMOutputWithPast:()=>Ba,ChineseCLIPModel:()=>xn,ChineseCLIPPreTrainedModel:()=>yn,ClapAudioModelWithProjection:()=>Ho,ClapModel:()=>Xo,ClapPreTrainedModel:()=>Wo,ClapTextModelWithProjection:()=>Qo,CodeGenForCausalLM:()=>$n,CodeGenModel:()=>Gn,CodeGenPreTrainedModel:()=>Rn,CohereForCausalLM:()=>Hn,CohereModel:()=>Qn,CoherePreTrainedModel:()=>Xn,ConvBertForMaskedLM:()=>ie,ConvBertForQuestionAnswering:()=>ce,ConvBertForSequenceClassification:()=>ae,ConvBertForTokenClassification:()=>le,ConvBertModel:()=>oe,ConvBertPreTrainedModel:()=>re,ConvNextForImageClassification:()=>Ar,ConvNextModel:()=>Sr,ConvNextPreTrainedModel:()=>Pr,ConvNextV2ForImageClassification:()=>Lr,ConvNextV2Model:()=>zr,ConvNextV2PreTrainedModel:()=>Er,DPTForDepthEstimation:()=>br,DPTModel:()=>Mr,DPTPreTrainedModel:()=>wr,DebertaForMaskedLM:()=>Te,DebertaForQuestionAnswering:()=>Fe,DebertaForSequenceClassification:()=>ve,DebertaForTokenClassification:()=>Ce,DebertaModel:()=>ke,DebertaPreTrainedModel:()=>xe,DebertaV2ForMaskedLM:()=>Ae,DebertaV2ForQuestionAnswering:()=>Le,DebertaV2ForSequenceClassification:()=>Ee,DebertaV2ForTokenClassification:()=>ze,DebertaV2Model:()=>Se,DebertaV2PreTrainedModel:()=>Pe,DeiTForImageClassification:()=>lr,DeiTModel:()=>ar,DeiTPreTrainedModel:()=>ir,DepthAnythingForDepthEstimation:()=>xr,DepthAnythingPreTrainedModel:()=>yr,DetrForObjectDetection:()=>Qs,DetrForSegmentation:()=>Hs,DetrModel:()=>Xs,DetrObjectDetectionOutput:()=>Ys,DetrPreTrainedModel:()=>Ws,DetrSegmentationOutput:()=>Ks,Dinov2ForImageClassification:()=>Nr,Dinov2Model:()=>Br,Dinov2PreTrainedModel:()=>Ir,DistilBertForMaskedLM:()=>De,DistilBertForQuestionAnswering:()=>je,DistilBertForSequenceClassification:()=>Ne,DistilBertForTokenClassification:()=>Oe,DistilBertModel:()=>Be,DistilBertPreTrainedModel:()=>Ie,DonutSwinModel:()=>Fr,DonutSwinPreTrainedModel:()=>Cr,EfficientNetForImageClassification:()=>ai,EfficientNetModel:()=>ii,EfficientNetPreTrainedModel:()=>oi,ElectraForMaskedLM:()=>he,ElectraForQuestionAnswering:()=>me,ElectraForSequenceClassification:()=>pe,ElectraForTokenClassification:()=>_e,ElectraModel:()=>ue,ElectraPreTrainedModel:()=>de,EsmForMaskedLM:()=>Ge,EsmForSequenceClassification:()=>$e,EsmForTokenClassification:()=>qe,EsmModel:()=>Re,EsmPreTrainedModel:()=>Ve,FalconForCausalLM:()=>Uo,FalconModel:()=>qo,FalconPreTrainedModel:()=>$o,FastViTForImageClassification:()=>Ps,FastViTModel:()=>Fs,FastViTPreTrainedModel:()=>Cs,Florence2ForConditionalGeneration:()=>hn,Florence2PreTrainedModel:()=>un,GLPNForDepthEstimation:()=>vr,GLPNModel:()=>Tr,GLPNPreTrainedModel:()=>kr,GPT2LMHeadModel:()=>Pn,GPT2Model:()=>Fn,GPT2PreTrainedModel:()=>Cn,GPTBigCodeForCausalLM:()=>Vn,GPTBigCodeModel:()=>Dn,GPTBigCodePreTrainedModel:()=>jn,GPTJForCausalLM:()=>On,GPTJModel:()=>Nn,GPTJPreTrainedModel:()=>Bn,GPTNeoForCausalLM:()=>En,GPTNeoModel:()=>An,GPTNeoPreTrainedModel:()=>Sn,GPTNeoXForCausalLM:()=>In,GPTNeoXModel:()=>Ln,GPTNeoXPreTrainedModel:()=>zn,Gemma2ForCausalLM:()=>ts,Gemma2Model:()=>es,Gemma2PreTrainedModel:()=>Zn,GemmaForCausalLM:()=>Jn,GemmaModel:()=>Kn,GemmaPreTrainedModel:()=>Yn,HubertForCTC:()=>xo,HubertForSequenceClassification:()=>ko,HubertModel:()=>yo,HubertPreTrainedModel:()=>bo,ImageMattingOutput:()=>Na,LlamaForCausalLM:()=>Wn,LlamaModel:()=>Un,LlamaPreTrainedModel:()=>qn,LlavaForConditionalGeneration:()=>cn,LlavaPreTrainedModel:()=>ln,LongT5ForConditionalGeneration:()=>gt,LongT5Model:()=>ft,LongT5PreTrainedModel:()=>mt,M2M100ForConditionalGeneration:()=>Hr,M2M100Model:()=>Qr,M2M100PreTrainedModel:()=>Xr,MBartForCausalLM:()=>St,MBartForConditionalGeneration:()=>Ft,MBartForSequenceClassification:()=>Pt,MBartModel:()=>Ct,MBartPreTrainedModel:()=>vt,MPNetForMaskedLM:()=>Je,MPNetForQuestionAnswering:()=>tt,MPNetForSequenceClassification:()=>Ze,MPNetForTokenClassification:()=>et,MPNetModel:()=>Ke,MPNetPreTrainedModel:()=>Ye,MT5ForConditionalGeneration:()=>bt,MT5Model:()=>Mt,MT5PreTrainedModel:()=>wt,MarianMTModel:()=>Wr,MarianModel:()=>Ur,MarianPreTrainedModel:()=>qr,MaskedLMOutput:()=>za,MistralForCausalLM:()=>Do,MistralModel:()=>jo,MistralPreTrainedModel:()=>Oo,MobileBertForMaskedLM:()=>Xe,MobileBertForQuestionAnswering:()=>He,MobileBertForSequenceClassification:()=>Qe,MobileBertModel:()=>We,MobileBertPreTrainedModel:()=>Ue,MobileNetV1ForImageClassification:()=>_i,MobileNetV1Model:()=>pi,MobileNetV1PreTrainedModel:()=>hi,MobileNetV2ForImageClassification:()=>gi,MobileNetV2Model:()=>fi,MobileNetV2PreTrainedModel:()=>mi,MobileNetV3ForImageClassification:()=>bi,MobileNetV3Model:()=>Mi,MobileNetV3PreTrainedModel:()=>wi,MobileNetV4ForImageClassification:()=>ki,MobileNetV4Model:()=>xi,MobileNetV4PreTrainedModel:()=>yi,MobileViTForImageClassification:()=>Ls,MobileViTModel:()=>zs,MobileViTPreTrainedModel:()=>Es,MobileViTV2ForImageClassification:()=>Ns,MobileViTV2Model:()=>Bs,MobileViTV2PreTrainedModel:()=>Is,ModelOutput:()=>G,Moondream1ForConditionalGeneration:()=>dn,MptForCausalLM:()=>Ms,MptModel:()=>ws,MptPreTrainedModel:()=>gs,MusicgenForCausalLM:()=>di,MusicgenForConditionalGeneration:()=>ui,MusicgenModel:()=>ci,MusicgenPreTrainedModel:()=>li,NomicBertModel:()=>K,NomicBertPreTrainedModel:()=>Y,OPTForCausalLM:()=>xs,OPTModel:()=>ys,OPTPreTrainedModel:()=>bs,OpenELMForCausalLM:()=>rs,OpenELMModel:()=>ss,OpenELMPreTrainedModel:()=>ns,OwlViTForObjectDetection:()=>Ds,OwlViTModel:()=>js,OwlViTPreTrainedModel:()=>Os,Owlv2ForObjectDetection:()=>Gs,Owlv2Model:()=>Rs,Owlv2PreTrainedModel:()=>Vs,Phi3ForCausalLM:()=>ps,Phi3Model:()=>hs,Phi3PreTrainedModel:()=>us,PhiForCausalLM:()=>ds,PhiModel:()=>cs,PhiPreTrainedModel:()=>ls,PreTrainedModel:()=>R,PretrainedMixin:()=>Ti,PyAnnoteForAudioFrameClassification:()=>so,PyAnnoteModel:()=>no,PyAnnotePreTrainedModel:()=>to,QuestionAnsweringModelOutput:()=>La,Qwen2ForCausalLM:()=>as,Qwen2Model:()=>is,Qwen2PreTrainedModel:()=>os,RTDetrForObjectDetection:()=>er,RTDetrModel:()=>Zs,RTDetrObjectDetectionOutput:()=>tr,RTDetrPreTrainedModel:()=>Js,ResNetForImageClassification:()=>ur,ResNetModel:()=>dr,ResNetPreTrainedModel:()=>cr,RoFormerForMaskedLM:()=>ee,RoFormerForQuestionAnswering:()=>se,RoFormerForSequenceClassification:()=>te,RoFormerForTokenClassification:()=>ne,RoFormerModel:()=>Z,RoFormerPreTrainedModel:()=>J,RobertaForMaskedLM:()=>jt,RobertaForQuestionAnswering:()=>Rt,RobertaForSequenceClassification:()=>Dt,RobertaForTokenClassification:()=>Vt,RobertaModel:()=>Ot,RobertaPreTrainedModel:()=>Nt,SamImageSegmentationOutput:()=>$r,SamModel:()=>Gr,SamPreTrainedModel:()=>Rr,SegformerForImageClassification:()=>ei,SegformerForSemanticSegmentation:()=>ti,SegformerModel:()=>Zo,SegformerPreTrainedModel:()=>Jo,Seq2SeqLMOutput:()=>Pa,SequenceClassifierOutput:()=>Sa,SiglipModel:()=>wn,SiglipPreTrainedModel:()=>gn,SiglipTextModel:()=>Mn,SiglipVisionModel:()=>bn,SpeechT5ForSpeechToText:()=>zo,SpeechT5ForTextToSpeech:()=>Lo,SpeechT5HifiGan:()=>Io,SpeechT5Model:()=>Eo,SpeechT5PreTrainedModel:()=>Ao,SqueezeBertForMaskedLM:()=>rt,SqueezeBertForQuestionAnswering:()=>it,SqueezeBertForSequenceClassification:()=>ot,SqueezeBertModel:()=>st,SqueezeBertPreTrainedModel:()=>nt,StableLmForCausalLM:()=>ri,StableLmModel:()=>si,StableLmPreTrainedModel:()=>ni,Starcoder2ForCausalLM:()=>Go,Starcoder2Model:()=>Ro,Starcoder2PreTrainedModel:()=>Vo,Swin2SRForImageSuperResolution:()=>gr,Swin2SRModel:()=>fr,Swin2SRPreTrainedModel:()=>mr,SwinForImageClassification:()=>_r,SwinModel:()=>pr,SwinPreTrainedModel:()=>hr,T5ForConditionalGeneration:()=>_t,T5Model:()=>pt,T5PreTrainedModel:()=>ht,TableTransformerForObjectDetection:()=>rr,TableTransformerModel:()=>sr,TableTransformerObjectDetectionOutput:()=>or,TableTransformerPreTrainedModel:()=>nr,TokenClassifierOutput:()=>Ea,TrOCRForCausalLM:()=>No,TrOCRPreTrainedModel:()=>Bo,UniSpeechForCTC:()=>lo,UniSpeechForSequenceClassification:()=>co,UniSpeechModel:()=>ao,UniSpeechPreTrainedModel:()=>io,UniSpeechSatForAudioFrameClassification:()=>mo,UniSpeechSatForCTC:()=>po,UniSpeechSatForSequenceClassification:()=>_o,UniSpeechSatModel:()=>ho,UniSpeechSatPreTrainedModel:()=>uo,ViTForImageClassification:()=>vs,ViTModel:()=>Ts,ViTPreTrainedModel:()=>ks,VisionEncoderDecoderModel:()=>an,VitMatteForImageMatting:()=>As,VitMattePreTrainedModel:()=>Ss,VitsModel:()=>Ko,VitsModelOutput:()=>Oa,VitsPreTrainedModel:()=>Yo,Wav2Vec2BertForCTC:()=>wo,Wav2Vec2BertForSequenceClassification:()=>Mo,Wav2Vec2BertModel:()=>go,Wav2Vec2BertPreTrainedModel:()=>fo,Wav2Vec2ForAudioFrameClassification:()=>eo,Wav2Vec2ForCTC:()=>Jr,Wav2Vec2ForSequenceClassification:()=>Zr,Wav2Vec2Model:()=>Kr,Wav2Vec2PreTrainedModel:()=>Yr,WavLMForAudioFrameClassification:()=>So,WavLMForCTC:()=>Co,WavLMForSequenceClassification:()=>Fo,WavLMForXVector:()=>Po,WavLMModel:()=>vo,WavLMPreTrainedModel:()=>To,WeSpeakerResNetModel:()=>oo,WeSpeakerResNetPreTrainedModel:()=>ro,WhisperForConditionalGeneration:()=>on,WhisperModel:()=>rn,WhisperPreTrainedModel:()=>sn,XLMForQuestionAnswering:()=>Xt,XLMForSequenceClassification:()=>Ut,XLMForTokenClassification:()=>Wt,XLMModel:()=>$t,XLMPreTrainedModel:()=>Gt,XLMRobertaForMaskedLM:()=>Yt,XLMRobertaForQuestionAnswering:()=>Zt,XLMRobertaForSequenceClassification:()=>Kt,XLMRobertaForTokenClassification:()=>Jt,XLMRobertaModel:()=>Ht,XLMRobertaPreTrainedModel:()=>Qt,XLMWithLMHeadModel:()=>qt,XVectorOutput:()=>Aa,YolosForObjectDetection:()=>Dr,YolosModel:()=>jr,YolosObjectDetectionOutput:()=>Vr,YolosPreTrainedModel:()=>Or});var s=n(/*! ./configs.js */"./src/configs.js"),r=n(/*! ./backends/onnx.js */"./src/backends/onnx.js"),o=n(/*! ./utils/dtypes.js */"./src/utils/dtypes.js"),i=n(/*! ./utils/generic.js */"./src/utils/generic.js"),a=n(/*! ./utils/core.js */"./src/utils/core.js"),l=n(/*! ./utils/hub.js */"./src/utils/hub.js"),c=n(/*! ./generation/logits_process.js */"./src/generation/logits_process.js"),d=n(/*! ./generation/configuration_utils.js */"./src/generation/configuration_utils.js"),u=n(/*! ./utils/tensor.js */"./src/utils/tensor.js"),h=n(/*! ./utils/maths.js */"./src/utils/maths.js"),p=n(/*! ./generation/stopping_criteria.js */"./src/generation/stopping_criteria.js"),_=n(/*! ./generation/logits_sampler.js */"./src/generation/logits_sampler.js"),m=n(/*! ./env.js */"./src/env.js"),f=n(/*! ./models/whisper/generation_whisper.js */"./src/models/whisper/generation_whisper.js"),g=n(/*! ./models/whisper/common_whisper.js */"./src/models/whisper/common_whisper.js");const w=0,M=1,b=2,y=3,x=4,k=5,T=6,v=7,C=new Map,F=new Map,P=new Map;async function S(e,t,n){return Object.fromEntries(await Promise.all(Object.keys(t).map((async i=>{const{buffer:a,session_options:c}=await async function(e,t,n){let i=n.device;i&&"string"!=typeof i&&(i.hasOwnProperty(t)?i=i[t]:(console.warn(`device not specified for "${t}". Using the default device.`),i=null));const a=i??(m.apis.IS_NODE_ENV?"cpu":"wasm"),c=(0,r.deviceToExecutionProviders)(a);let d=n.dtype;"string"!=typeof d&&(d&&d.hasOwnProperty(t)?d=d[t]:(d=o.DEFAULT_DEVICE_DTYPE_MAPPING[a]??o.DATA_TYPES.fp32,console.warn(`dtype not specified for "${t}". Using the default dtype (${d}) for this device (${a}).`)));const u=d;if(!o.DEFAULT_DTYPE_SUFFIX_MAPPING.hasOwnProperty(u))throw new Error(`Invalid dtype: ${u}. Should be one of: ${Object.keys(o.DATA_TYPES).join(", ")}`);if(u===o.DATA_TYPES.fp16&&"webgpu"===a&&!await(0,o.isWebGpuFp16Supported)())throw new Error(`The device (${a}) does not support fp16.`);const h=o.DEFAULT_DTYPE_SUFFIX_MAPPING[u],p=`${n.subfolder??""}/${t}${h}.onnx`,_={...n.session_options}??{};_.executionProviders??=c;const f=(0,l.getModelFile)(e,p,!0,n);let g=[];if(n.use_external_data_format&&(!0===n.use_external_data_format||"object"==typeof n.use_external_data_format&&n.use_external_data_format.hasOwnProperty(t)&&!0===n.use_external_data_format[t])){if(m.apis.IS_NODE_ENV)throw new Error("External data format is not yet supported in Node.js");const s=`${t}${h}.onnx_data`,r=`${n.subfolder??""}/${s}`;g.push(new Promise((async(t,o)=>{const i=await(0,l.getModelFile)(e,r,!0,n);t({path:s,data:i})})))}else void 0!==_.externalData&&(g=_.externalData.map((async t=>{if("string"==typeof t.data){const s=await(0,l.getModelFile)(e,t.data,!0,n);return{...t,data:s}}return t})));if(g.length>0&&(_.externalData=await Promise.all(g)),"webgpu"===a){const e=(0,s.getKeyValueShapes)(n.config,{prefix:"present"});if(Object.keys(e).length>0&&!(0,r.isONNXProxy)()){const t={};for(const n in e)t[n]="gpu-buffer";_.preferredOutputLocation=t}}return{buffer:await f,session_options:_}}(e,t[i],n);return[i,await(0,r.createInferenceSession)(a,c)]}))))}async function A(e,t){const n=function(e,t){const n=Object.create(null),s=[];for(const o of e.inputNames){const e=t[o];e instanceof u.Tensor?n[o]=(0,r.isONNXProxy)()?e.clone():e:s.push(o)}if(s.length>0)throw new Error(`An error occurred during model execution: "Missing the following inputs: ${s.join(", ")}.`);const o=Object.keys(t).length,i=e.inputNames.length;if(o>i){let n=Object.keys(t).filter((t=>!e.inputNames.includes(t)));console.warn(`WARNING: Too many inputs were provided (${o} > ${i}). The following inputs will be ignored: "${n.join(", ")}".`)}return n}(e,t);try{const t=Object.fromEntries(Object.entries(n).map((([e,t])=>[e,t.ort_tensor])));let s=await e.run(t);return s=E(s),s}catch(e){throw console.error(`An error occurred during model execution: "${e}".`),console.error("Inputs given to model:",n),e}}function E(e){for(let t in e)(0,r.isONNXTensor)(e[t])?e[t]=new u.Tensor(e[t]):"object"==typeof e[t]&&E(e[t]);return e}function z(e){if(e instanceof u.Tensor)return e;if(0===e.length)throw Error("items must be non-empty");if(Array.isArray(e[0])){if(e.some((t=>t.length!==e[0].length)))throw Error("Unable to create tensor, you should probably activate truncation and/or padding with 'padding=True' and/or 'truncation=True' to have batched tensors with the same length.");return new u.Tensor("int64",BigInt64Array.from(e.flat().map((e=>BigInt(e)))),[e.length,e[0].length])}return new u.Tensor("int64",BigInt64Array.from(e.map((e=>BigInt(e)))),[1,e.length])}function L(e){return new u.Tensor("bool",[e],[1])}async function I(e,t){let{encoder_outputs:n,input_ids:s,decoder_input_ids:r,...o}=t;if(!n){const s=(0,a.pick)(t,e.sessions.model.inputNames);n=(await B(e,s)).last_hidden_state}o.input_ids=r,o.encoder_hidden_states=n,e.sessions.decoder_model_merged.inputNames.includes("encoder_attention_mask")&&(o.encoder_attention_mask=t.attention_mask);return await N(e,o,!0)}async function B(e,t){const n=e.sessions.model,s=(0,a.pick)(t,n.inputNames);if(n.inputNames.includes("inputs_embeds")&&!s.inputs_embeds){if(!t.input_ids)throw new Error("Both `input_ids` and `inputs_embeds` are missing in the model inputs.");s.inputs_embeds=await e.encode_text({input_ids:t.input_ids})}return n.inputNames.includes("token_type_ids")&&!s.token_type_ids&&(s.token_type_ids=new u.Tensor("int64",new BigInt64Array(s.input_ids.data.length),s.input_ids.dims)),await A(n,s)}async function N(e,t,n=!1){const s=e.sessions[n?"decoder_model_merged":"model"],{past_key_values:r,...o}=t;s.inputNames.includes("use_cache_branch")&&(o.use_cache_branch=L(!!r)),s.inputNames.includes("position_ids")&&o.attention_mask&&!o.position_ids&&(o.position_ids=function(e,t=null){const{input_ids:n,inputs_embeds:s,attention_mask:r}=e,[o,i]=r.dims,a=new BigInt64Array(r.data.length);for(let e=0;e<o;++e){const t=e*i;let n=BigInt(0);for(let e=0;e<i;++e){const s=t+e;0n===r.data[s]?a[s]=BigInt(1):(a[s]=n,n+=r.data[s])}}let l=new u.Tensor("int64",a,r.dims);if(t){const e=-(n??s).dims.at(1);l=l.slice(null,[e,null])}return l}(o,r)),e.addPastKeyValues(o,r);const i=(0,a.pick)(o,s.inputNames);return await A(s,i)}async function O(e,{input_ids:t=null,attention_mask:n=null,pixel_values:s=null,position_ids:r=null,inputs_embeds:o=null,past_key_values:i=null,generation_config:a=null,logits_processor:l=null,...c}){if(!o)if(o=await e.encode_text({input_ids:t}),s&&1!==t.dims[1]){const r=await e.encode_image({pixel_values:s});({inputs_embeds:o,attention_mask:n}=e._merge_input_ids_with_image_features({image_features:r,inputs_embeds:o,input_ids:t,attention_mask:n}))}else if(i&&s&&1===t.dims[1]){const e=t.dims[1],s=Object.values(i)[0].dims.at(-2);n=(0,u.cat)([(0,u.ones)([t.dims[0],s]),n.slice(null,[n.dims[1]-e,n.dims[1]])],1)}return await N(e,{inputs_embeds:o,past_key_values:i,attention_mask:n,position_ids:r,generation_config:a,logits_processor:l},!0)}function j(e,t,n,s){if(n.past_key_values){const t=Object.values(n.past_key_values)[0].dims.at(-2),{input_ids:s,attention_mask:r}=n;if(r&&r.dims[1]>s.dims[1]);else if(t<s.dims[1])n.input_ids=s.slice(null,[t,null]);else if(null!=e.config.image_token_index&&s.data.some((t=>t==e.config.image_token_index))){const r=e.config.num_image_tokens;if(!r)throw new Error("`num_image_tokens` is missing in the model configuration.");const o=s.dims[1]-(t-r);n.input_ids=s.slice(null,[-o,null]),n.attention_mask=(0,u.ones)([1,t+o])}}return n}function D(e,t,n,s){return n.past_key_values&&(t=t.map((e=>[e.at(-1)]))),{...n,decoder_input_ids:z(t)}}function V(e,...t){return e.config.is_encoder_decoder?D(e,...t):j(e,...t)}class R extends i.Callable{main_input_name="input_ids";forward_params=["input_ids","attention_mask"];constructor(e,t){super(),this.config=e,this.sessions=t;const n=P.get(this.constructor),s=C.get(n);switch(this.can_generate=!1,this._forward=null,this._prepare_inputs_for_generation=null,s){case x:this.can_generate=!0,this._forward=N,this._prepare_inputs_for_generation=j;break;case b:case y:case v:this.can_generate=!0,this._forward=I,this._prepare_inputs_for_generation=D;break;case M:this._forward=I;break;case T:this.can_generate=!0,this._forward=O,this._prepare_inputs_for_generation=V;break;default:this._forward=B}this.can_generate&&this.forward_params.push("past_key_values"),this.custom_config=this.config["transformers.js_config"]??{}}async dispose(){const e=[];for(const t of Object.values(this.sessions))t?.handler?.dispose&&e.push(t.handler.dispose());return await Promise.all(e)}static async from_pretrained(e,{progress_callback:t=null,config:n=null,cache_dir:r=null,local_files_only:o=!1,revision:i="main",model_file_name:a=null,subfolder:c="onnx",device:d=null,dtype:u=null,use_external_data_format:h=null,session_options:p={}}={}){let _={progress_callback:t,config:n,cache_dir:r,local_files_only:o,revision:i,model_file_name:a,subfolder:c,device:d,dtype:u,use_external_data_format:h,session_options:p};const m=P.get(this),f=C.get(m);let g;if(n=_.config=await s.AutoConfig.from_pretrained(e,_),f===x)g=await Promise.all([S(e,{model:_.model_file_name??"model"},_),(0,l.getModelJSON)(e,"generation_config.json",!1,_)]);else if(f===b||f===y)g=await Promise.all([S(e,{model:"encoder_model",decoder_model_merged:"decoder_model_merged"},_),(0,l.getModelJSON)(e,"generation_config.json",!1,_)]);else if(f===k)g=await Promise.all([S(e,{model:"vision_encoder",prompt_encoder_mask_decoder:"prompt_encoder_mask_decoder"},_)]);else if(f===M)g=await Promise.all([S(e,{model:"encoder_model",decoder_model_merged:"decoder_model_merged"},_)]);else if(f===T){const t={embed_tokens:"embed_tokens",vision_encoder:"vision_encoder",decoder_model_merged:"decoder_model_merged"};n.is_encoder_decoder&&(t.model="encoder_model"),g=await Promise.all([S(e,t,_),(0,l.getModelJSON)(e,"generation_config.json",!1,_)])}else f===v?g=await Promise.all([S(e,{model:"text_encoder",decoder_model_merged:"decoder_model_merged",encodec_decode:"encodec_decode"},_),(0,l.getModelJSON)(e,"generation_config.json",!1,_)]):(f!==w&&console.warn(`Model type for '${m??n?.model_type}' not found, assuming encoder-only architecture. Please report this at https://github.com/xenova/transformers.js/issues/new/choose.`),g=await Promise.all([S(e,{model:_.model_file_name??"model"},_)]));return new this(n,...g)}async _call(e){return await this.forward(e)}async forward(e){return await this._forward(this,e)}_get_logits_warper(e){const t=new c.LogitsProcessorList;return null!==e.temperature&&1!==e.temperature&&t.push(new c.TemperatureLogitsWarper(e.temperature)),null!==e.top_k&&0!==e.top_k&&t.push(new c.TopKLogitsWarper(e.top_k)),null!==e.top_p&&e.top_p<1&&t.push(new c.TopPLogitsWarper(e.top_p)),t}_get_logits_processor(e,t,n=null){const s=new c.LogitsProcessorList;if(null!==e.repetition_penalty&&1!==e.repetition_penalty&&s.push(new c.RepetitionPenaltyLogitsProcessor(e.repetition_penalty)),null!==e.no_repeat_ngram_size&&e.no_repeat_ngram_size>0&&s.push(new c.NoRepeatNGramLogitsProcessor(e.no_repeat_ngram_size)),null!==e.bad_words_ids&&s.push(new c.NoBadWordsLogitsProcessor(e.bad_words_ids,e.eos_token_id)),null!==e.min_length&&null!==e.eos_token_id&&e.min_length>0&&s.push(new c.MinLengthLogitsProcessor(e.min_length,e.eos_token_id)),null!==e.min_new_tokens&&null!==e.eos_token_id&&e.min_new_tokens>0&&s.push(new c.MinNewTokensLengthLogitsProcessor(t,e.min_new_tokens,e.eos_token_id)),null!==e.forced_bos_token_id&&s.push(new c.ForcedBOSTokenLogitsProcessor(e.forced_bos_token_id)),null!==e.forced_eos_token_id&&s.push(new c.ForcedEOSTokenLogitsProcessor(e.max_length,e.forced_eos_token_id)),null!==e.begin_suppress_tokens){const n=t>1||null===e.forced_bos_token_id?t:t+1;s.push(new c.SuppressTokensAtBeginLogitsProcessor(e.begin_suppress_tokens,n))}return null!==e.guidance_scale&&e.guidance_scale>1&&s.push(new c.ClassifierFreeGuidanceLogitsProcessor(e.guidance_scale)),null!==n&&s.extend(n),s}_prepare_generation_config(e,t,n=d.GenerationConfig){const s={...this.config};for(const e of["decoder","generator","text_config"])e in s&&Object.assign(s,s[e]);const r=new n(s);return"generation_config"in this&&Object.assign(r,this.generation_config),e&&Object.assign(r,e),t&&Object.assign(r,(0,a.pick)(t,Object.getOwnPropertyNames(r))),r}_get_stopping_criteria(e,t=null){const n=new p.StoppingCriteriaList;return null!==e.max_length&&n.push(new p.MaxLengthCriteria(e.max_length,this.config.max_position_embeddings??null)),null!==e.eos_token_id&&n.push(new p.EosTokenCriteria(e.eos_token_id)),t&&n.extend(t),n}_validate_model_class(){if(!this.can_generate){const e=[Ii,Oi,Li,Pi],t=P.get(this.constructor),n=new Set,s=this.config.model_type;for(const t of e){const e=t.get(s);e&&n.add(e[0])}let r=`The current model class (${t}) is not compatible with \`.generate()\`, as it doesn't have a language model head.`;throw n.size>0&&(r+=` Please use the following class instead: ${[...n].join(", ")}`),Error(r)}}prepare_inputs_for_generation(...e){return this._prepare_inputs_for_generation(this,...e)}_update_model_kwargs_for_generation({generated_input_ids:e,outputs:t,model_inputs:n,is_encoder_decoder:s}){return n.past_key_values=this.getPastKeyValues(t,n.past_key_values),n.input_ids=new u.Tensor("int64",e.flat(),[e.length,1]),s||(n.attention_mask=(0,u.cat)([n.attention_mask,(0,u.ones)([n.attention_mask.dims[0],1])],1)),n.position_ids=null,n}_prepare_model_inputs({inputs:e,bos_token_id:t,model_kwargs:n}){const s=(0,a.pick)(n,this.forward_params),r=this.main_input_name;if(r in s){if(e)throw new Error("`inputs`: {inputs}` were passed alongside {input_name} which is not allowed. Make sure to either pass {inputs} or {input_name}=...")}else s[r]=e;return{inputs_tensor:s[r],model_inputs:s,model_input_name:r}}async _prepare_encoder_decoder_kwargs_for_generation({inputs_tensor:e,model_inputs:t,model_input_name:n,generation_config:s}){if(this.sessions.model.inputNames.includes("inputs_embeds")&&!t.inputs_embeds&&"_prepare_inputs_embeds"in this){const{input_ids:e,pixel_values:n,attention_mask:s,...r}=t,o=await this._prepare_inputs_embeds(t);t={...r,...(0,a.pick)(o,["inputs_embeds","attention_mask"])}}let{last_hidden_state:r}=await B(this,t);if(null!==s.guidance_scale&&s.guidance_scale>1)r=(0,u.cat)([r,(0,u.full_like)(r,0)],0),"attention_mask"in t&&(t.attention_mask=(0,u.cat)([t.attention_mask,(0,u.zeros_like)(t.attention_mask)],0));else if(t.decoder_input_ids){const e=z(t.decoder_input_ids).dims[0];if(e!==r.dims[0]){if(1!==r.dims[0])throw new Error(`The encoder outputs have a different batch size (${r.dims[0]}) than the decoder inputs (${e}).`);r=(0,u.cat)(Array.from({length:e},(()=>r)),0)}}return t.encoder_outputs=r,t}_prepare_decoder_input_ids_for_generation({batch_size:e,model_input_name:t,model_kwargs:n,decoder_start_token_id:s,bos_token_id:r,generation_config:o}){let{decoder_input_ids:i,...a}=n;if(i)Array.isArray(i[0])||(i=Array.from({length:e},(()=>i)));else if(s??=r,"musicgen"===this.config.model_type)i=Array.from({length:e*this.config.decoder.num_codebooks},(()=>[s]));else if(Array.isArray(s)){if(s.length!==e)throw new Error(`\`decoder_start_token_id\` expcted to have length ${e} but got ${s.length}`);i=s}else i=Array.from({length:e},(()=>[s]));return i=z(i),n.decoder_attention_mask=(0,u.ones_like)(i),{input_ids:i,model_inputs:a}}async generate({inputs:e=null,generation_config:t=null,logits_processor:n=null,stopping_criteria:s=null,streamer:r=null,...o}){this._validate_model_class(),t=this._prepare_generation_config(t,o);let{inputs_tensor:i,model_inputs:a,model_input_name:l}=this._prepare_model_inputs({inputs:e,model_kwargs:o});const c=this.config.is_encoder_decoder;let d;c&&("encoder_outputs"in a||(a=await this._prepare_encoder_decoder_kwargs_for_generation({inputs_tensor:i,model_inputs:a,model_input_name:l,generation_config:t}))),c?({input_ids:d,model_inputs:a}=this._prepare_decoder_input_ids_for_generation({batch_size:a[l].dims.at(0),model_input_name:l,model_kwargs:a,decoder_start_token_id:t.decoder_start_token_id,bos_token_id:t.bos_token_id,generation_config:t})):d=a[l];let h=d.dims.at(-1);null!==t.max_new_tokens&&(t.max_length=h+t.max_new_tokens);const p=this._get_logits_processor(t,h,n),m=this._get_stopping_criteria(t,s),f=a[l].dims.at(0),g=_.LogitsSampler.getSampler(t),w=new Array(f).fill(0),M=d.tolist();r&&r.put(M);let b=null,y={};for(;;){a=this.prepare_inputs_for_generation(M,a,t);const e=await this.forward(a);if(t.output_attentions&&t.return_dict_in_generate){const t=this.getAttentions(e);for(const e in t)e in y||(y[e]=[]),y[e].push(t[e])}const n=p(M,e.logits.slice(null,-1,null)),s=[];for(let e=0;e<n.dims.at(0);++e){const t=n[e],r=await g(t);for(const[t,n]of r){const r=BigInt(t);w[e]+=n,M[e].push(r),s.push([r]);break}}r&&r.put(s);if(m(M).every((e=>e))){t.return_dict_in_generate&&(b=this.getPastKeyValues(e,a.past_key_values,!1));break}a=this._update_model_kwargs_for_generation({generated_input_ids:s,outputs:e,model_inputs:a,is_encoder_decoder:c})}r&&r.end();const x=new u.Tensor("int64",M.flat(),[M.length,M[0].length]);return t.return_dict_in_generate?{sequences:x,past_key_values:b,...y}:x}getPastKeyValues(e,t,n=!0){const s=Object.create(null);for(const r in e)if(r.startsWith("present")){const o=r.replace("present","past_key_values");if(t&&r.includes("encoder"))s[o]=t[o];else{if(n&&t){const e=t[o];"gpu-buffer"===e.location&&e.dispose()}s[o]=e[r]}}return s}getAttentions(e){const t={};for(const n of["cross_attentions","encoder_attentions","decoder_attentions"])for(const s in e)s.startsWith(n)&&(n in t||(t[n]=[]),t[n].push(e[s]));return t}addPastKeyValues(e,t){if(t)Object.assign(e,t);else{const t=this.custom_config.kv_cache_dtype??"float32",n="float16"===t?new Uint16Array:[],r=(0,s.getKeyValueShapes)(this.config);for(const s in r)e[s]=new u.Tensor(t,n,r[s])}}async encode_image({pixel_values:e}){const t=(await A(this.sessions.vision_encoder,{pixel_values:e})).image_features;return this.config.num_image_tokens||(console.warn(`The number of image tokens was not set in the model configuration. Setting it to the number of features detected by the vision encoder (${t.dims[1]}).`),this.config.num_image_tokens=t.dims[1]),t}async encode_text({input_ids:e}){return(await A(this.sessions.embed_tokens,{input_ids:e})).inputs_embeds}}class G{}class $ extends G{constructor({last_hidden_state:e,hidden_states:t=null,attentions:n=null}){super(),this.last_hidden_state=e,this.hidden_states=t,this.attentions=n}}class q extends R{}class U extends q{}class W extends q{async _call(e){return new za(await super._call(e))}}class X extends q{async _call(e){return new Sa(await super._call(e))}}class Q extends q{async _call(e){return new Ea(await super._call(e))}}class H extends q{async _call(e){return new La(await super._call(e))}}class Y extends R{}class K extends Y{}class J extends R{}class Z extends J{}class ee extends J{async _call(e){return new za(await super._call(e))}}class te extends J{async _call(e){return new Sa(await super._call(e))}}class ne extends J{async _call(e){return new Ea(await super._call(e))}}class se extends J{async _call(e){return new La(await super._call(e))}}class re extends R{}class oe extends re{}class ie extends re{async _call(e){return new za(await super._call(e))}}class ae extends re{async _call(e){return new Sa(await super._call(e))}}class le extends re{async _call(e){return new Ea(await super._call(e))}}class ce extends re{async _call(e){return new La(await super._call(e))}}class de extends R{}class ue extends de{}class he extends de{async _call(e){return new za(await super._call(e))}}class pe extends de{async _call(e){return new Sa(await super._call(e))}}class _e extends de{async _call(e){return new Ea(await super._call(e))}}class me extends de{async _call(e){return new La(await super._call(e))}}class fe extends R{}class ge extends fe{}class we extends fe{async _call(e){return new za(await super._call(e))}}class Me extends fe{async _call(e){return new Sa(await super._call(e))}}class be extends fe{async _call(e){return new Ea(await super._call(e))}}class ye extends fe{async _call(e){return new La(await super._call(e))}}class xe extends R{}class ke extends xe{}class Te extends xe{async _call(e){return new za(await super._call(e))}}class ve extends xe{async _call(e){return new Sa(await super._call(e))}}class Ce extends xe{async _call(e){return new Ea(await super._call(e))}}class Fe extends xe{async _call(e){return new La(await super._call(e))}}class Pe extends R{}class Se extends Pe{}class Ae extends Pe{async _call(e){return new za(await super._call(e))}}class Ee extends Pe{async _call(e){return new Sa(await super._call(e))}}class ze extends Pe{async _call(e){return new Ea(await super._call(e))}}class Le extends Pe{async _call(e){return new La(await super._call(e))}}class Ie extends R{}class Be extends Ie{}class Ne extends Ie{async _call(e){return new Sa(await super._call(e))}}class Oe extends Ie{async _call(e){return new Ea(await super._call(e))}}class je extends Ie{async _call(e){return new La(await super._call(e))}}class De extends Ie{async _call(e){return new za(await super._call(e))}}class Ve extends R{}class Re extends Ve{}class Ge extends Ve{async _call(e){return new za(await super._call(e))}}class $e extends Ve{async _call(e){return new Sa(await super._call(e))}}class qe extends Ve{async _call(e){return new Ea(await super._call(e))}}class Ue extends R{}class We extends Ue{}class Xe extends Ue{async _call(e){return new za(await super._call(e))}}class Qe extends Ue{async _call(e){return new Sa(await super._call(e))}}class He extends Ue{async _call(e){return new La(await super._call(e))}}class Ye extends R{}class Ke extends Ye{}class Je extends Ye{async _call(e){return new za(await super._call(e))}}class Ze extends Ye{async _call(e){return new Sa(await super._call(e))}}class et extends Ye{async _call(e){return new Ea(await super._call(e))}}class tt extends Ye{async _call(e){return new La(await super._call(e))}}class nt extends R{}class st extends nt{}class rt extends nt{async _call(e){return new za(await super._call(e))}}class ot extends nt{async _call(e){return new Sa(await super._call(e))}}class it extends nt{async _call(e){return new La(await super._call(e))}}class at extends R{}class lt extends at{}class ct extends at{async _call(e){return new Sa(await super._call(e))}}class dt extends at{async _call(e){return new La(await super._call(e))}}class ut extends at{async _call(e){return new za(await super._call(e))}}class ht extends R{forward_params=["input_ids","attention_mask","encoder_outputs","decoder_input_ids","decoder_attention_mask","past_key_values"];constructor(e,t,n){super(e,t),this.generation_config=n}}class pt extends ht{}class _t extends ht{}class mt extends R{constructor(e,t,n){super(e,t),this.generation_config=n}}class ft extends mt{}class gt extends mt{}class wt extends R{constructor(e,t,n){super(e,t),this.generation_config=n}}class Mt extends wt{}class bt extends wt{}class yt extends R{constructor(e,t,n){super(e,t),this.generation_config=n}}class xt extends yt{}class kt extends yt{}class Tt extends yt{async _call(e){return new Sa(await super._call(e))}}class vt extends R{constructor(e,t,n){super(e,t),this.generation_config=n}}class Ct extends vt{}class Ft extends vt{}class Pt extends vt{async _call(e){return new Sa(await super._call(e))}}class St extends vt{}class At extends R{constructor(e,t,n){super(e,t),this.generation_config=n}}class Et extends At{}class zt extends At{}class Lt extends R{constructor(e,t,n){super(e,t),this.generation_config=n}}class It extends Lt{}class Bt extends Lt{}class Nt extends R{}class Ot extends Nt{}class jt extends Nt{async _call(e){return new za(await super._call(e))}}class Dt extends Nt{async _call(e){return new Sa(await super._call(e))}}class Vt extends Nt{async _call(e){return new Ea(await super._call(e))}}class Rt extends Nt{async _call(e){return new La(await super._call(e))}}class Gt extends R{}class $t extends Gt{}class qt extends Gt{async _call(e){return new za(await super._call(e))}}class Ut extends Gt{async _call(e){return new Sa(await super._call(e))}}class Wt extends Gt{async _call(e){return new Ea(await super._call(e))}}class Xt extends Gt{async _call(e){return new La(await super._call(e))}}class Qt extends R{}class Ht extends Qt{}class Yt extends Qt{async _call(e){return new za(await super._call(e))}}class Kt extends Qt{async _call(e){return new Sa(await super._call(e))}}class Jt extends Qt{async _call(e){return new Ea(await super._call(e))}}class Zt extends Qt{async _call(e){return new La(await super._call(e))}}class en extends R{}class tn extends en{}class nn extends en{}class sn extends R{requires_attention_mask=!1;main_input_name="input_features";forward_params=["input_features","attention_mask","decoder_input_ids","decoder_attention_mask","past_key_values"];constructor(e,t,n){super(e,t),this.generation_config=n}}class rn extends sn{}class on extends sn{_prepare_generation_config(e,t){return super._prepare_generation_config(e,t,f.WhisperGenerationConfig)}_retrieve_init_tokens(e){const t=[e.decoder_start_token_id];let n=e.language;const s=e.task;if(e.is_multilingual){n||(console.warn("No language specified - defaulting to English (en)."),n="en");const r=`<|${(0,g.whisper_language_to_code)(n)}|>`;t.push(e.lang_to_id[r]),t.push(e.task_to_id[s??"transcribe"])}else if(n||s)throw new Error("Cannot specify `task` or `language` for an English-only model. If the model is intended to be multilingual, pass `is_multilingual=true` to generate, or update the generation config.");return!e.return_timestamps&&e.no_timestamps_token_id&&t.at(-1)!==e.no_timestamps_token_id?t.push(e.no_timestamps_token_id):e.return_timestamps&&t.at(-1)===e.no_timestamps_token_id&&(console.warn("<|notimestamps|> prompt token is removed from generation_config since `return_timestamps` is set to `true`."),t.pop()),t.filter((e=>null!=e))}async generate({inputs:e=null,generation_config:t=null,logits_processor:n=null,stopping_criteria:s=null,...r}){t=this._prepare_generation_config(t,r);const o=r.decoder_input_ids??this._retrieve_init_tokens(t);if(t.return_timestamps&&(n??=new c.LogitsProcessorList,n.push(new c.WhisperTimeStampLogitsProcessor(t,o))),t.begin_suppress_tokens&&(n??=new c.LogitsProcessorList,n.push(new c.SuppressTokensAtBeginLogitsProcessor(t.begin_suppress_tokens,o.length))),t.return_token_timestamps){if(!t.alignment_heads)throw new Error("Model generation config has no `alignment_heads`, token-level timestamps not available. See https://gist.github.com/hollance/42e32852f24243b748ae6bc1f985b13a on how to add this property to the generation config.");"translate"===t.task&&console.warn("Token-level timestamps may not be reliable for task 'translate'."),t.output_attentions=!0,t.return_dict_in_generate=!0}const i=await super.generate({inputs:e,generation_config:t,logits_processor:n,decoder_input_ids:o,...r});return t.return_token_timestamps&&(i.token_timestamps=this._extract_token_timestamps(i,t.alignment_heads,t.num_frames)),i}_extract_token_timestamps(e,t,n=null,s=.02){if(!e.cross_attentions)throw new Error("Model outputs must contain cross attentions to extract timestamps. This is most likely because the model was not exported with `output_attentions=True`.");null==n&&console.warn("`num_frames` has not been set, meaning the entire audio will be analyzed. This may lead to inaccurate token-level timestamps for short audios (< 30 seconds).");let r=this.config.median_filter_width;void 0===r&&(console.warn("Model config has no `median_filter_width`, using default value of 7."),r=7);const o=e.cross_attentions,i=Array.from({length:this.config.decoder_layers},((e,t)=>(0,u.cat)(o.map((e=>e[t])),2))),l=(0,u.stack)(t.map((([e,t])=>{if(e>=i.length)throw new Error(`Layer index ${e} is out of bounds for cross attentions (length ${i.length}).`);return n?i[e].slice(null,t,null,[0,n]):i[e].slice(null,t)}))).transpose(1,0,2,3),[c,d]=(0,u.std_mean)(l,-2,0,!0),p=l.clone();for(let e=0;e<p.dims[0];++e){const t=p[e];for(let n=0;n<t.dims[0];++n){const s=t[n],o=c[e][n][0].data,i=d[e][n][0].data;for(let e=0;e<s.dims[0];++e){let t=s[e].data;for(let e=0;e<t.length;++e)t[e]=(t[e]-i[e])/o[e];t.set((0,h.medianFilter)(t,r))}}}const _=[(0,u.mean)(p,1)],m=e.sequences.dims,f=new u.Tensor("float32",new Float32Array(m[0]*m[1]),m);for(let e=0;e<m[0];++e){const t=_[e].neg().squeeze_(0),[n,r]=(0,h.dynamic_time_warping)(t.tolist()),o=Array.from({length:n.length-1},((e,t)=>n[t+1]-n[t])),i=(0,a.mergeArrays)([1],o).map((e=>!!e)),l=[];for(let e=0;e<i.length;++e)i[e]&&l.push(r[e]*s);f[e].data.set(l,1)}return f}}class an extends R{main_input_name="pixel_values";forward_params=["pixel_values","input_ids","encoder_hidden_states","past_key_values"];constructor(e,t,n){super(e,t),this.generation_config=n}}class ln extends R{forward_params=["input_ids","pixel_values","attention_mask","position_ids","past_key_values"];constructor(e,t,n){super(e,t),this.generation_config=n}}class cn extends ln{_merge_input_ids_with_image_features({inputs_embeds:e,image_features:t,input_ids:n,attention_mask:s}){const r=this.config.image_token_index,o=n.tolist().map((e=>e.findIndex((e=>e==r)))),i=o.every((e=>-1===e)),a=o.every((e=>-1!==e));if(!i&&!a)throw new Error("Every input should contain either 0 or 1 image token.");if(i)return{inputs_embeds:e,attention_mask:s};const l=[],c=[];for(let n=0;n<o.length;++n){const r=o[n],i=e[n],a=t[n],d=s[n];l.push((0,u.cat)([i.slice([0,r]),a,i.slice([r+1,i.dims[0]])],0)),c.push((0,u.cat)([d.slice([0,r]),(0,u.ones)([a.dims[0]]),d.slice([r+1,d.dims[0]])],0))}return{inputs_embeds:(0,u.stack)(l,0),attention_mask:(0,u.stack)(c,0)}}}class dn extends cn{}class un extends R{forward_params=["input_ids","inputs_embeds","attention_mask","pixel_values","encoder_outputs","decoder_input_ids","decoder_inputs_embeds","decoder_attention_mask","past_key_values"];main_input_name="inputs_embeds";constructor(e,t,n){super(e,t),this.generation_config=n}}class hn extends un{_merge_input_ids_with_image_features({inputs_embeds:e,image_features:t,input_ids:n,attention_mask:s}){return{inputs_embeds:(0,u.cat)([t,e],1),attention_mask:(0,u.cat)([(0,u.ones)(t.dims.slice(0,2)),s],1)}}async _prepare_inputs_embeds({input_ids:e,pixel_values:t,inputs_embeds:n,attention_mask:s}){if(!e&&!t)throw new Error("Either `input_ids` or `pixel_values` should be provided.");let r,o;return e&&(r=await this.encode_text({input_ids:e})),t&&(o=await this.encode_image({pixel_values:t})),r&&o?({inputs_embeds:n,attention_mask:s}=this._merge_input_ids_with_image_features({inputs_embeds:r,image_features:o,input_ids:e,attention_mask:s})):n=r||o,{inputs_embeds:n,attention_mask:s}}async forward({input_ids:e,pixel_values:t,attention_mask:n,decoder_input_ids:s,decoder_attention_mask:r,encoder_outputs:o,past_key_values:i,inputs_embeds:a,decoder_inputs_embeds:l}){if(a||({inputs_embeds:a,attention_mask:n}=await this._prepare_inputs_embeds({input_ids:e,pixel_values:t,inputs_embeds:a,attention_mask:n})),!o){let{last_hidden_state:e}=await B(this,{inputs_embeds:a,attention_mask:n});o=e}if(!l){if(!s)throw new Error("Either `decoder_input_ids` or `decoder_inputs_embeds` should be provided.");l=await this.encode_text({input_ids:s})}const c={inputs_embeds:l,attention_mask:r,encoder_attention_mask:n,encoder_hidden_states:o,past_key_values:i};return await N(this,c,!0)}}class pn extends R{}class _n extends pn{}class mn extends pn{static async from_pretrained(e,t={}){return t.model_file_name??="text_model",super.from_pretrained(e,t)}}class fn extends pn{static async from_pretrained(e,t={}){return t.model_file_name??="vision_model",super.from_pretrained(e,t)}}class gn extends R{}class wn extends gn{}class Mn extends gn{static async from_pretrained(e,t={}){return t.model_file_name??="text_model",super.from_pretrained(e,t)}}class bn extends pn{static async from_pretrained(e,t={}){return t.model_file_name??="vision_model",super.from_pretrained(e,t)}}class yn extends R{}class xn extends yn{}class kn extends R{}class Tn extends kn{}class vn extends kn{}class Cn extends R{constructor(e,t,n){super(e,t),this.generation_config=n}}class Fn extends Cn{}class Pn extends Cn{}class Sn extends R{constructor(e,t,n){super(e,t),this.generation_config=n}}class An extends Sn{}class En extends Sn{}class zn extends R{constructor(e,t,n){super(e,t),this.generation_config=n}}class Ln extends zn{}class In extends zn{}class Bn extends R{constructor(e,t,n){super(e,t),this.generation_config=n}}class Nn extends Bn{}class On extends Bn{}class jn extends R{constructor(e,t,n){super(e,t),this.generation_config=n}}class Dn extends jn{}class Vn extends jn{}class Rn extends R{constructor(e,t,n){super(e,t),this.generation_config=n}}class Gn extends Rn{}class $n extends Rn{}class qn extends R{constructor(e,t,n){super(e,t),this.generation_config=n}}class Un extends qn{}class Wn extends qn{}class Xn extends R{constructor(e,t,n){super(e,t),this.generation_config=n}}class Qn extends Xn{}class Hn extends Xn{}class Yn extends R{constructor(e,t,n){super(e,t),this.generation_config=n}}class Kn extends Yn{}class Jn extends Yn{}class Zn extends R{constructor(e,t,n){super(e,t),this.generation_config=n}}class es extends Zn{}class ts extends Zn{}class ns extends R{constructor(e,t,n){super(e,t),this.generation_config=n}}class ss extends ns{}class rs extends ns{}class os extends R{constructor(e,t,n){super(e,t),this.generation_config=n}}class is extends os{}class as extends os{}class ls extends R{constructor(e,t,n){super(e,t),this.generation_config=n}}class cs extends ls{}class ds extends ls{}class us extends R{constructor(e,t,n){super(e,t),this.generation_config=n}}class hs extends us{}class ps extends us{}class _s extends R{constructor(e,t,n){super(e,t),this.generation_config=n}}class ms extends _s{}class fs extends _s{}class gs extends R{constructor(e,t,n){super(e,t),this.generation_config=n}}class ws extends gs{}class Ms extends gs{}class bs extends R{constructor(e,t,n){super(e,t),this.generation_config=n}}class ys extends bs{}class xs extends bs{}class ks extends R{}class Ts extends ks{}class vs extends ks{async _call(e){return new Sa(await super._call(e))}}class Cs extends R{}class Fs extends Cs{}class Ps extends Cs{async _call(e){return new Sa(await super._call(e))}}class Ss extends R{}class As extends Ss{async _call(e){return new Na(await super._call(e))}}class Es extends R{}class zs extends Es{}class Ls extends Es{async _call(e){return new Sa(await super._call(e))}}class Is extends R{}class Bs extends Is{}class Ns extends Is{async _call(e){return new Sa(await super._call(e))}}class Os extends R{}class js extends Os{}class Ds extends Os{}class Vs extends R{}class Rs extends Vs{}class Gs extends Vs{}class $s extends R{}class qs extends $s{}class Us extends $s{async _call(e){return new Sa(await super._call(e))}}class Ws extends R{}class Xs extends Ws{}class Qs extends Ws{async _call(e){return new Ys(await super._call(e))}}class Hs extends Ws{async _call(e){return new Ks(await super._call(e))}}class Ys extends G{constructor({logits:e,pred_boxes:t}){super(),this.logits=e,this.pred_boxes=t}}class Ks extends G{constructor({logits:e,pred_boxes:t,pred_masks:n}){super(),this.logits=e,this.pred_boxes=t,this.pred_masks=n}}class Js extends R{}class Zs extends Js{}class er extends Js{async _call(e){return new tr(await super._call(e))}}class tr extends G{constructor({logits:e,pred_boxes:t}){super(),this.logits=e,this.pred_boxes=t}}class nr extends R{}class sr extends nr{}class rr extends nr{async _call(e){return new or(await super._call(e))}}class or extends Ys{}class ir extends R{}class ar extends ir{}class lr extends ir{async _call(e){return new Sa(await super._call(e))}}class cr extends R{}class dr extends cr{}class ur extends cr{async _call(e){return new Sa(await super._call(e))}}class hr extends R{}class pr extends hr{}class _r extends hr{async _call(e){return new Sa(await super._call(e))}}class mr extends R{}class fr extends mr{}class gr extends mr{}class wr extends R{}class Mr extends wr{}class br extends wr{}class yr extends R{}class xr extends yr{}class kr extends R{}class Tr extends kr{}class vr extends kr{}class Cr extends R{}class Fr extends Cr{}class Pr extends R{}class Sr extends Pr{}class Ar extends Pr{async _call(e){return new Sa(await super._call(e))}}class Er extends R{}class zr extends Er{}class Lr extends Er{async _call(e){return new Sa(await super._call(e))}}class Ir extends R{}class Br extends Ir{}class Nr extends Ir{async _call(e){return new Sa(await super._call(e))}}class Or extends R{}class jr extends Or{}class Dr extends Or{async _call(e){return new Vr(await super._call(e))}}class Vr extends G{constructor({logits:e,pred_boxes:t}){super(),this.logits=e,this.pred_boxes=t}}class Rr extends R{}class Gr extends Rr{async get_image_embeddings({pixel_values:e}){return await B(this,{pixel_values:e})}async forward(e){if(e.image_embeddings&&e.image_positional_embeddings||(e={...e,...await this.get_image_embeddings(e)}),!e.input_labels&&e.input_points){const t=e.input_points.dims.slice(0,-1),n=t.reduce(((e,t)=>e*t),1);e.input_labels=new u.Tensor("int64",new BigInt64Array(n).fill(1n),t)}const t={image_embeddings:e.image_embeddings,image_positional_embeddings:e.image_positional_embeddings};return e.input_points&&(t.input_points=e.input_points),e.input_labels&&(t.input_labels=e.input_labels),e.input_boxes&&(t.input_boxes=e.input_boxes),await A(this.sessions.prompt_encoder_mask_decoder,t)}async _call(e){return new $r(await super._call(e))}}class $r extends G{constructor({iou_scores:e,pred_masks:t}){super(),this.iou_scores=e,this.pred_masks=t}}class qr extends R{constructor(e,t,n){super(e,t),this.generation_config=n}}class Ur extends qr{}class Wr extends qr{}class Xr extends R{constructor(e,t,n){super(e,t),this.generation_config=n}}class Qr extends Xr{}class Hr extends Xr{}class Yr extends R{}class Kr extends Yr{}class Jr extends Yr{async _call(e){return new Ia(await super._call(e))}}class Zr extends Yr{async _call(e){return new Sa(await super._call(e))}}class eo extends Yr{async _call(e){return new Ea(await super._call(e))}}class to extends R{}class no extends to{}class so extends to{async _call(e){return new Ea(await super._call(e))}}class ro extends R{}class oo extends ro{}class io extends R{}class ao extends io{}class lo extends io{async _call(e){return new Ia(await super._call(e))}}class co extends io{async _call(e){return new Sa(await super._call(e))}}class uo extends R{}class ho extends uo{}class po extends uo{async _call(e){return new Ia(await super._call(e))}}class _o extends uo{async _call(e){return new Sa(await super._call(e))}}class mo extends uo{async _call(e){return new Ea(await super._call(e))}}class fo extends R{}class go extends fo{}class wo extends fo{async _call(e){return new Ia(await super._call(e))}}class Mo extends fo{async _call(e){return new Sa(await super._call(e))}}class bo extends R{}class yo extends Yr{}class xo extends Yr{async _call(e){return new Ia(await super._call(e))}}class ko extends Yr{async _call(e){return new Sa(await super._call(e))}}class To extends R{}class vo extends To{}class Co extends To{async _call(e){return new Ia(await super._call(e))}}class Fo extends To{async _call(e){return new Sa(await super._call(e))}}class Po extends To{async _call(e){return new Aa(await super._call(e))}}class So extends To{async _call(e){return new Ea(await super._call(e))}}class Ao extends R{constructor(e,t,n){super(e,t),this.generation_config=n}}class Eo extends Ao{}class zo extends Ao{}class Lo extends Ao{async generate_speech(e,t,{threshold:n=.5,minlenratio:s=0,maxlenratio:r=20,vocoder:o=null}={}){const i={input_ids:e},{encoder_outputs:a,encoder_attention_mask:l}=await B(this,i),c=a.dims[1]/this.config.reduction_factor,d=Math.floor(c*r),h=Math.floor(c*s),p=this.config.num_mel_bins;let _=[],m=null,f=null,g=0;for(;;){++g;const e=L(!!f);let s;s=f?f.output_sequence_out:new u.Tensor("float32",new Float32Array(p),[1,1,p]);let r={use_cache_branch:e,output_sequence:s,encoder_attention_mask:l,speaker_embeddings:t,encoder_hidden_states:a};this.addPastKeyValues(r,m),f=await A(this.sessions.decoder_model_merged,r),m=this.getPastKeyValues(f,m);const{prob:o,spectrum:i}=f;if(_.push(i),g>=h&&(Array.from(o.data).filter((e=>e>=n)).length>0||g>=d))break}const w=(0,u.cat)(_),{waveform:M}=await A(o.sessions.model,{spectrogram:w});return{spectrogram:w,waveform:M}}}class Io extends R{main_input_name="spectrogram"}class Bo extends R{constructor(e,t,n){super(e,t),this.generation_config=n}}class No extends Bo{}class Oo extends R{constructor(e,t,n){super(e,t),this.generation_config=n}}class jo extends Oo{}class Do extends Oo{}class Vo extends R{constructor(e,t,n){super(e,t),this.generation_config=n}}class Ro extends Vo{}class Go extends Vo{}class $o extends R{constructor(e,t,n){super(e,t),this.generation_config=n}}class qo extends $o{}class Uo extends $o{}class Wo extends R{}class Xo extends Wo{}class Qo extends Wo{static async from_pretrained(e,t={}){return t.model_file_name??="text_model",super.from_pretrained(e,t)}}class Ho extends Wo{static async from_pretrained(e,t={}){return t.model_file_name??="audio_model",super.from_pretrained(e,t)}}class Yo extends R{}class Ko extends Yo{async _call(e){return new Oa(await super._call(e))}}class Jo extends R{}class Zo extends Jo{}class ei extends Jo{}class ti extends Jo{}class ni extends R{constructor(e,t,n){super(e,t),this.generation_config=n}}class si extends ni{}class ri extends ni{}class oi extends R{}class ii extends oi{}class ai extends oi{async _call(e){return new Sa(await super._call(e))}}class li extends R{}class ci extends li{}class di extends li{}class ui extends R{forward_params=["input_ids","attention_mask","encoder_outputs","decoder_input_ids","decoder_attention_mask","past_key_values"];constructor(e,t,n){super(e,t),this.generation_config=n}_apply_and_filter_by_delay_pattern_mask(e){const[t,n]=e.dims,s=this.config.decoder.num_codebooks,r=n-s;let o=0;for(let t=0;t<e.size;++t){if(e.data[t]===this.config.decoder.pad_token_id)continue;const i=t%n-Math.floor(t/n)%s;i>0&&i<=r&&(e.data[o++]=e.data[t])}const i=Math.floor(t/s),a=o/(i*s);return new u.Tensor(e.type,e.data.slice(0,o),[i,s,a])}prepare_inputs_for_generation(e,t,n){let s=structuredClone(e);for(let e=0;e<s.length;++e)for(let t=0;t<s[e].length;++t)e%this.config.decoder.num_codebooks>=t&&(s[e][t]=BigInt(this.config.decoder.pad_token_id));null!==n.guidance_scale&&n.guidance_scale>1&&(s=s.concat(s));return super.prepare_inputs_for_generation(s,t,n)}async generate(e){const t=await super.generate(e),n=this._apply_and_filter_by_delay_pattern_mask(t).unsqueeze_(0),{audio_values:s}=await A(this.sessions.encodec_decode,{audio_codes:n});return s}}class hi extends R{}class pi extends hi{}class _i extends hi{async _call(e){return new Sa(await super._call(e))}}class mi extends R{}class fi extends mi{}class gi extends mi{async _call(e){return new Sa(await super._call(e))}}class wi extends R{}class Mi extends wi{}class bi extends wi{async _call(e){return new Sa(await super._call(e))}}class yi extends R{}class xi extends yi{}class ki extends yi{async _call(e){return new Sa(await super._call(e))}}class Ti{static MODEL_CLASS_MAPPINGS=null;static BASE_IF_FAIL=!1;static async from_pretrained(e,{progress_callback:t=null,config:n=null,cache_dir:r=null,local_files_only:o=!1,revision:i="main",model_file_name:a=null,subfolder:l="onnx",device:c=null,dtype:d=null,use_external_data_format:u=null,session_options:h={}}={}){let p={progress_callback:t,config:n,cache_dir:r,local_files_only:o,revision:i,model_file_name:a,subfolder:l,device:c,dtype:d,use_external_data_format:u,session_options:h};if(p.config=await s.AutoConfig.from_pretrained(e,p),!this.MODEL_CLASS_MAPPINGS)throw new Error("`MODEL_CLASS_MAPPINGS` not implemented for this type of `AutoClass`: "+this.name);for(let t of this.MODEL_CLASS_MAPPINGS){const n=t.get(p.config.model_type);if(n)return await n[1].from_pretrained(e,p)}if(this.BASE_IF_FAIL)return console.warn(`Unknown model class "${p.config.model_type}", attempting to construct from base class.`),await R.from_pretrained(e,p);throw Error(`Unsupported model type: ${p.config.model_type}`)}}const vi=new Map([["bert",["BertModel",U]],["nomic_bert",["NomicBertModel",K]],["roformer",["RoFormerModel",Z]],["electra",["ElectraModel",ue]],["esm",["EsmModel",Re]],["convbert",["ConvBertModel",oe]],["camembert",["CamembertModel",ge]],["deberta",["DebertaModel",ke]],["deberta-v2",["DebertaV2Model",Se]],["mpnet",["MPNetModel",Ke]],["albert",["AlbertModel",lt]],["distilbert",["DistilBertModel",Be]],["roberta",["RobertaModel",Ot]],["xlm",["XLMModel",$t]],["xlm-roberta",["XLMRobertaModel",Ht]],["clap",["ClapModel",Xo]],["clip",["CLIPModel",_n]],["clipseg",["CLIPSegModel",Tn]],["chinese_clip",["ChineseCLIPModel",xn]],["siglip",["SiglipModel",wn]],["mobilebert",["MobileBertModel",We]],["squeezebert",["SqueezeBertModel",st]],["wav2vec2",["Wav2Vec2Model",Kr]],["wav2vec2-bert",["Wav2Vec2BertModel",go]],["unispeech",["UniSpeechModel",ao]],["unispeech-sat",["UniSpeechSatModel",ho]],["hubert",["HubertModel",yo]],["wavlm",["WavLMModel",vo]],["audio-spectrogram-transformer",["ASTModel",tn]],["vits",["VitsModel",Ko]],["pyannote",["PyAnnoteModel",no]],["wespeaker-resnet",["WeSpeakerResNetModel",oo]],["detr",["DetrModel",Xs]],["rt_detr",["RTDetrModel",Zs]],["table-transformer",["TableTransformerModel",sr]],["vit",["ViTModel",Ts]],["fastvit",["FastViTModel",Fs]],["mobilevit",["MobileViTModel",zs]],["mobilevitv2",["MobileViTV2Model",Bs]],["owlvit",["OwlViTModel",js]],["owlv2",["Owlv2Model",Rs]],["beit",["BeitModel",qs]],["deit",["DeiTModel",ar]],["convnext",["ConvNextModel",Sr]],["convnextv2",["ConvNextV2Model",zr]],["dinov2",["Dinov2Model",Br]],["resnet",["ResNetModel",dr]],["swin",["SwinModel",pr]],["swin2sr",["Swin2SRModel",fr]],["donut-swin",["DonutSwinModel",Fr]],["yolos",["YolosModel",jr]],["dpt",["DPTModel",Mr]],["glpn",["GLPNModel",Tr]],["hifigan",["SpeechT5HifiGan",Io]],["efficientnet",["EfficientNetModel",ii]],["mobilenet_v1",["MobileNetV1Model",pi]],["mobilenet_v2",["MobileNetV2Model",fi]],["mobilenet_v3",["MobileNetV3Model",Mi]],["mobilenet_v4",["MobileNetV4Model",xi]]]),Ci=new Map([["t5",["T5Model",pt]],["longt5",["LongT5Model",ft]],["mt5",["MT5Model",Mt]],["bart",["BartModel",xt]],["mbart",["MBartModel",Ct]],["marian",["MarianModel",Ur]],["whisper",["WhisperModel",rn]],["m2m_100",["M2M100Model",Qr]],["blenderbot",["BlenderbotModel",Et]],["blenderbot-small",["BlenderbotSmallModel",It]]]),Fi=new Map([["bloom",["BloomModel",ms]],["gpt2",["GPT2Model",Fn]],["gptj",["GPTJModel",Nn]],["gpt_bigcode",["GPTBigCodeModel",Dn]],["gpt_neo",["GPTNeoModel",An]],["gpt_neox",["GPTNeoXModel",Ln]],["codegen",["CodeGenModel",Gn]],["llama",["LlamaModel",Un]],["cohere",["CohereModel",Qn]],["gemma",["GemmaModel",Kn]],["gemma2",["Gemma2Model",es]],["openelm",["OpenELMModel",ss]],["qwen2",["Qwen2Model",is]],["phi",["PhiModel",cs]],["phi3",["Phi3Model",hs]],["mpt",["MptModel",ws]],["opt",["OPTModel",ys]],["mistral",["MistralModel",jo]],["starcoder2",["Starcoder2Model",Ro]],["falcon",["FalconModel",qo]],["stablelm",["StableLmModel",si]]]),Pi=new Map([["speecht5",["SpeechT5ForSpeechToText",zo]],["whisper",["WhisperForConditionalGeneration",on]]]),Si=new Map([["speecht5",["SpeechT5ForTextToSpeech",Lo]]]),Ai=new Map([["vits",["VitsModel",Ko]],["musicgen",["MusicgenForConditionalGeneration",ui]]]),Ei=new Map([["bert",["BertForSequenceClassification",X]],["roformer",["RoFormerForSequenceClassification",te]],["electra",["ElectraForSequenceClassification",pe]],["esm",["EsmForSequenceClassification",$e]],["convbert",["ConvBertForSequenceClassification",ae]],["camembert",["CamembertForSequenceClassification",Me]],["deberta",["DebertaForSequenceClassification",ve]],["deberta-v2",["DebertaV2ForSequenceClassification",Ee]],["mpnet",["MPNetForSequenceClassification",Ze]],["albert",["AlbertForSequenceClassification",ct]],["distilbert",["DistilBertForSequenceClassification",Ne]],["roberta",["RobertaForSequenceClassification",Dt]],["xlm",["XLMForSequenceClassification",Ut]],["xlm-roberta",["XLMRobertaForSequenceClassification",Kt]],["bart",["BartForSequenceClassification",Tt]],["mbart",["MBartForSequenceClassification",Pt]],["mobilebert",["MobileBertForSequenceClassification",Qe]],["squeezebert",["SqueezeBertForSequenceClassification",ot]]]),zi=new Map([["bert",["BertForTokenClassification",Q]],["roformer",["RoFormerForTokenClassification",ne]],["electra",["ElectraForTokenClassification",_e]],["esm",["EsmForTokenClassification",qe]],["convbert",["ConvBertForTokenClassification",le]],["camembert",["CamembertForTokenClassification",be]],["deberta",["DebertaForTokenClassification",Ce]],["deberta-v2",["DebertaV2ForTokenClassification",ze]],["mpnet",["MPNetForTokenClassification",et]],["distilbert",["DistilBertForTokenClassification",Oe]],["roberta",["RobertaForTokenClassification",Vt]],["xlm",["XLMForTokenClassification",Wt]],["xlm-roberta",["XLMRobertaForTokenClassification",Jt]]]),Li=new Map([["t5",["T5ForConditionalGeneration",_t]],["longt5",["LongT5ForConditionalGeneration",gt]],["mt5",["MT5ForConditionalGeneration",bt]],["bart",["BartForConditionalGeneration",kt]],["mbart",["MBartForConditionalGeneration",Ft]],["marian",["MarianMTModel",Wr]],["m2m_100",["M2M100ForConditionalGeneration",Hr]],["blenderbot",["BlenderbotForConditionalGeneration",zt]],["blenderbot-small",["BlenderbotSmallForConditionalGeneration",Bt]]]),Ii=new Map([["bloom",["BloomForCausalLM",fs]],["gpt2",["GPT2LMHeadModel",Pn]],["gptj",["GPTJForCausalLM",On]],["gpt_bigcode",["GPTBigCodeForCausalLM",Vn]],["gpt_neo",["GPTNeoForCausalLM",En]],["gpt_neox",["GPTNeoXForCausalLM",In]],["codegen",["CodeGenForCausalLM",$n]],["llama",["LlamaForCausalLM",Wn]],["cohere",["CohereForCausalLM",Hn]],["gemma",["GemmaForCausalLM",Jn]],["gemma2",["Gemma2ForCausalLM",ts]],["openelm",["OpenELMForCausalLM",rs]],["qwen2",["Qwen2ForCausalLM",as]],["phi",["PhiForCausalLM",ds]],["phi3",["Phi3ForCausalLM",ps]],["mpt",["MptForCausalLM",Ms]],["opt",["OPTForCausalLM",xs]],["mbart",["MBartForCausalLM",St]],["mistral",["MistralForCausalLM",Do]],["starcoder2",["Starcoder2ForCausalLM",Go]],["falcon",["FalconForCausalLM",Uo]],["trocr",["TrOCRForCausalLM",No]],["stablelm",["StableLmForCausalLM",ri]]]),Bi=new Map([["bert",["BertForMaskedLM",W]],["roformer",["RoFormerForMaskedLM",ee]],["electra",["ElectraForMaskedLM",he]],["esm",["EsmForMaskedLM",Ge]],["convbert",["ConvBertForMaskedLM",ie]],["camembert",["CamembertForMaskedLM",we]],["deberta",["DebertaForMaskedLM",Te]],["deberta-v2",["DebertaV2ForMaskedLM",Ae]],["mpnet",["MPNetForMaskedLM",Je]],["albert",["AlbertForMaskedLM",ut]],["distilbert",["DistilBertForMaskedLM",De]],["roberta",["RobertaForMaskedLM",jt]],["xlm",["XLMWithLMHeadModel",qt]],["xlm-roberta",["XLMRobertaForMaskedLM",Yt]],["mobilebert",["MobileBertForMaskedLM",Xe]],["squeezebert",["SqueezeBertForMaskedLM",rt]]]),Ni=new Map([["bert",["BertForQuestionAnswering",H]],["roformer",["RoFormerForQuestionAnswering",se]],["electra",["ElectraForQuestionAnswering",me]],["convbert",["ConvBertForQuestionAnswering",ce]],["camembert",["CamembertForQuestionAnswering",ye]],["deberta",["DebertaForQuestionAnswering",Fe]],["deberta-v2",["DebertaV2ForQuestionAnswering",Le]],["mpnet",["MPNetForQuestionAnswering",tt]],["albert",["AlbertForQuestionAnswering",dt]],["distilbert",["DistilBertForQuestionAnswering",je]],["roberta",["RobertaForQuestionAnswering",Rt]],["xlm",["XLMForQuestionAnswering",Xt]],["xlm-roberta",["XLMRobertaForQuestionAnswering",Zt]],["mobilebert",["MobileBertForQuestionAnswering",He]],["squeezebert",["SqueezeBertForQuestionAnswering",it]]]),Oi=new Map([["vision-encoder-decoder",["VisionEncoderDecoderModel",an]]]),ji=new Map([["llava",["LlavaForConditionalGeneration",cn]],["moondream1",["Moondream1ForConditionalGeneration",dn]],["florence2",["Florence2ForConditionalGeneration",hn]]]),Di=new Map([["vision-encoder-decoder",["VisionEncoderDecoderModel",an]]]),Vi=new Map([["vit",["ViTForImageClassification",vs]],["fastvit",["FastViTForImageClassification",Ps]],["mobilevit",["MobileViTForImageClassification",Ls]],["mobilevitv2",["MobileViTV2ForImageClassification",Ns]],["beit",["BeitForImageClassification",Us]],["deit",["DeiTForImageClassification",lr]],["convnext",["ConvNextForImageClassification",Ar]],["convnextv2",["ConvNextV2ForImageClassification",Lr]],["dinov2",["Dinov2ForImageClassification",Nr]],["resnet",["ResNetForImageClassification",ur]],["swin",["SwinForImageClassification",_r]],["segformer",["SegformerForImageClassification",ei]],["efficientnet",["EfficientNetForImageClassification",ai]],["mobilenet_v1",["MobileNetV1ForImageClassification",_i]],["mobilenet_v2",["MobileNetV2ForImageClassification",gi]],["mobilenet_v3",["MobileNetV3ForImageClassification",bi]],["mobilenet_v4",["MobileNetV4ForImageClassification",ki]]]),Ri=new Map([["detr",["DetrForObjectDetection",Qs]],["rt_detr",["RTDetrForObjectDetection",er]],["table-transformer",["TableTransformerForObjectDetection",rr]],["yolos",["YolosForObjectDetection",Dr]]]),Gi=new Map([["owlvit",["OwlViTForObjectDetection",Ds]],["owlv2",["Owlv2ForObjectDetection",Gs]]]),$i=new Map([["detr",["DetrForSegmentation",Hs]],["clipseg",["CLIPSegForImageSegmentation",vn]]]),qi=new Map([["segformer",["SegformerForSemanticSegmentation",ti]]]),Ui=new Map([["sam",["SamModel",Gr]]]),Wi=new Map([["wav2vec2",["Wav2Vec2ForCTC",Jr]],["wav2vec2-bert",["Wav2Vec2BertForCTC",wo]],["unispeech",["UniSpeechForCTC",lo]],["unispeech-sat",["UniSpeechSatForCTC",po]],["wavlm",["WavLMForCTC",Co]],["hubert",["HubertForCTC",xo]]]),Xi=new Map([["wav2vec2",["Wav2Vec2ForSequenceClassification",Zr]],["wav2vec2-bert",["Wav2Vec2BertForSequenceClassification",Mo]],["unispeech",["UniSpeechForSequenceClassification",co]],["unispeech-sat",["UniSpeechSatForSequenceClassification",_o]],["wavlm",["WavLMForSequenceClassification",Fo]],["hubert",["HubertForSequenceClassification",ko]],["audio-spectrogram-transformer",["ASTForAudioClassification",nn]]]),Qi=new Map([["wavlm",["WavLMForXVector",Po]]]),Hi=new Map([["unispeech-sat",["UniSpeechSatForAudioFrameClassification",mo]],["wavlm",["WavLMForAudioFrameClassification",So]],["wav2vec2",["Wav2Vec2ForAudioFrameClassification",eo]],["pyannote",["PyAnnoteForAudioFrameClassification",so]]]),Yi=new Map([["vitmatte",["VitMatteForImageMatting",As]]]),Ki=new Map([["swin2sr",["Swin2SRForImageSuperResolution",gr]]]),Ji=new Map([["dpt",["DPTForDepthEstimation",br]],["depth_anything",["DepthAnythingForDepthEstimation",xr]],["glpn",["GLPNForDepthEstimation",vr]]]),Zi=new Map([["clip",["CLIPVisionModelWithProjection",fn]],["siglip",["SiglipVisionModel",bn]]]),ea=[[vi,w],[Ci,M],[Fi,x],[Ei,w],[zi,w],[Li,b],[Pi,b],[Ii,x],[Bi,w],[Ni,w],[Oi,y],[ji,T],[Vi,w],[$i,w],[qi,w],[Yi,w],[Ki,w],[Ji,w],[Ri,w],[Gi,w],[Ui,k],[Wi,w],[Xi,w],[Si,b],[Ai,w],[Qi,w],[Hi,w],[Zi,w]];for(const[e,t]of ea)for(const[n,s]of e.values())C.set(n,t),P.set(s,n),F.set(n,s);const ta=[["MusicgenForConditionalGeneration",ui,v],["CLIPTextModelWithProjection",mn,w],["SiglipTextModel",Mn,w],["ClapTextModelWithProjection",Qo,w],["ClapAudioModelWithProjection",Ho,w]];for(const[e,t,n]of ta)C.set(e,n),P.set(t,e),F.set(e,t);class na extends Ti{static MODEL_CLASS_MAPPINGS=ea.map((e=>e[0]));static BASE_IF_FAIL=!0}class sa extends Ti{static MODEL_CLASS_MAPPINGS=[Ei]}class ra extends Ti{static MODEL_CLASS_MAPPINGS=[zi]}class oa extends Ti{static MODEL_CLASS_MAPPINGS=[Li]}class ia extends Ti{static MODEL_CLASS_MAPPINGS=[Pi]}class aa extends Ti{static MODEL_CLASS_MAPPINGS=[Si]}class la extends Ti{static MODEL_CLASS_MAPPINGS=[Ai]}class ca extends Ti{static MODEL_CLASS_MAPPINGS=[Ii]}class da extends Ti{static MODEL_CLASS_MAPPINGS=[Bi]}class ua extends Ti{static MODEL_CLASS_MAPPINGS=[Ni]}class ha extends Ti{static MODEL_CLASS_MAPPINGS=[Oi]}class pa extends Ti{static MODEL_CLASS_MAPPINGS=[Vi]}class _a extends Ti{static MODEL_CLASS_MAPPINGS=[$i]}class ma extends Ti{static MODEL_CLASS_MAPPINGS=[qi]}class fa extends Ti{static MODEL_CLASS_MAPPINGS=[Ri]}class ga extends Ti{static MODEL_CLASS_MAPPINGS=[Gi]}class wa extends Ti{static MODEL_CLASS_MAPPINGS=[Ui]}class Ma extends Ti{static MODEL_CLASS_MAPPINGS=[Wi]}class ba extends Ti{static MODEL_CLASS_MAPPINGS=[Xi]}class ya extends Ti{static MODEL_CLASS_MAPPINGS=[Qi]}class xa extends Ti{static MODEL_CLASS_MAPPINGS=[Hi]}class ka extends Ti{static MODEL_CLASS_MAPPINGS=[Di]}class Ta extends Ti{static MODEL_CLASS_MAPPINGS=[Yi]}class va extends Ti{static MODEL_CLASS_MAPPINGS=[Ki]}class Ca extends Ti{static MODEL_CLASS_MAPPINGS=[Ji]}class Fa extends Ti{static MODEL_CLASS_MAPPINGS=[Zi]}class Pa extends G{constructor({logits:e,past_key_values:t,encoder_outputs:n,decoder_attentions:s=null,cross_attentions:r=null}){super(),this.logits=e,this.past_key_values=t,this.encoder_outputs=n,this.decoder_attentions=s,this.cross_attentions=r}}class Sa extends G{constructor({logits:e}){super(),this.logits=e}}class Aa extends G{constructor({logits:e,embeddings:t}){super(),this.logits=e,this.embeddings=t}}class Ea extends G{constructor({logits:e}){super(),this.logits=e}}class za extends G{constructor({logits:e}){super(),this.logits=e}}class La extends G{constructor({start_logits:e,end_logits:t}){super(),this.start_logits=e,this.end_logits=t}}class Ia extends G{constructor({logits:e}){super(),this.logits=e}}class Ba extends G{constructor({logits:e,past_key_values:t}){super(),this.logits=e,this.past_key_values=t}}class Na extends G{constructor({alphas:e}){super(),this.alphas=e}}class Oa extends G{constructor({waveform:e,spectrogram:t}){super(),this.waveform=e,this.spectrogram=t}}},"./src/models/whisper/common_whisper.js":
119
113
  /*!**********************************************!*\
120
114
  !*** ./src/models/whisper/common_whisper.js ***!
121
115
  \**********************************************/(e,t,n)=>{"use strict";n.r(t),n.d(t,{WHISPER_LANGUAGE_MAPPING:()=>r,WHISPER_TO_LANGUAGE_CODE_MAPPING:()=>o,whisper_language_to_code:()=>i});const s=[["en","english"],["zh","chinese"],["de","german"],["es","spanish"],["ru","russian"],["ko","korean"],["fr","french"],["ja","japanese"],["pt","portuguese"],["tr","turkish"],["pl","polish"],["ca","catalan"],["nl","dutch"],["ar","arabic"],["sv","swedish"],["it","italian"],["id","indonesian"],["hi","hindi"],["fi","finnish"],["vi","vietnamese"],["he","hebrew"],["uk","ukrainian"],["el","greek"],["ms","malay"],["cs","czech"],["ro","romanian"],["da","danish"],["hu","hungarian"],["ta","tamil"],["no","norwegian"],["th","thai"],["ur","urdu"],["hr","croatian"],["bg","bulgarian"],["lt","lithuanian"],["la","latin"],["mi","maori"],["ml","malayalam"],["cy","welsh"],["sk","slovak"],["te","telugu"],["fa","persian"],["lv","latvian"],["bn","bengali"],["sr","serbian"],["az","azerbaijani"],["sl","slovenian"],["kn","kannada"],["et","estonian"],["mk","macedonian"],["br","breton"],["eu","basque"],["is","icelandic"],["hy","armenian"],["ne","nepali"],["mn","mongolian"],["bs","bosnian"],["kk","kazakh"],["sq","albanian"],["sw","swahili"],["gl","galician"],["mr","marathi"],["pa","punjabi"],["si","sinhala"],["km","khmer"],["sn","shona"],["yo","yoruba"],["so","somali"],["af","afrikaans"],["oc","occitan"],["ka","georgian"],["be","belarusian"],["tg","tajik"],["sd","sindhi"],["gu","gujarati"],["am","amharic"],["yi","yiddish"],["lo","lao"],["uz","uzbek"],["fo","faroese"],["ht","haitian creole"],["ps","pashto"],["tk","turkmen"],["nn","nynorsk"],["mt","maltese"],["sa","sanskrit"],["lb","luxembourgish"],["my","myanmar"],["bo","tibetan"],["tl","tagalog"],["mg","malagasy"],["as","assamese"],["tt","tatar"],["haw","hawaiian"],["ln","lingala"],["ha","hausa"],["ba","bashkir"],["jw","javanese"],["su","sundanese"]],r=new Map(s),o=new Map([...s.map((([e,t])=>[t,e])),["burmese","my"],["valencian","ca"],["flemish","nl"],["haitian","ht"],["letzeburgesch","lb"],["pushto","ps"],["panjabi","pa"],["moldavian","ro"],["moldovan","ro"],["sinhalese","si"],["castilian","es"]]);function i(e){e=e.toLowerCase();let t=o.get(e);if(void 0===t){if(!r.has(e)){const t=2===e.length?r.keys():r.values();throw new Error(`Language "${e}" is not supported. Must be one of: ${JSON.stringify(t)}`)}t=e}return t}},"./src/models/whisper/generation_whisper.js":
@@ -148,16 +142,16 @@
148
142
  \**************************************/(e,t,n)=>{"use strict";n.r(t),n.d(t,{CharTrie:()=>r,PriorityQueue:()=>s,TokenLattice:()=>i});class s{constructor(e=((e,t)=>e>t),t=1/0){this._heap=[],this._comparator=e,this._maxSize=t}get size(){return this._heap.length}isEmpty(){return 0===this.size}peek(){return this._heap[0]}push(...e){return this.extend(e)}extend(e){for(const t of e)if(this.size<this._maxSize)this._heap.push(t),this._siftUp();else{const e=this._smallest();this._comparator(t,this._heap[e])&&(this._heap[e]=t,this._siftUpFrom(e))}return this.size}pop(){const e=this.peek(),t=this.size-1;return t>0&&this._swap(0,t),this._heap.pop(),this._siftDown(),e}replace(e){const t=this.peek();return this._heap[0]=e,this._siftDown(),t}_parent(e){return(e+1>>>1)-1}_left(e){return 1+(e<<1)}_right(e){return e+1<<1}_greater(e,t){return this._comparator(this._heap[e],this._heap[t])}_swap(e,t){const n=this._heap[e];this._heap[e]=this._heap[t],this._heap[t]=n}_siftUp(){this._siftUpFrom(this.size-1)}_siftUpFrom(e){for(;e>0&&this._greater(e,this._parent(e));)this._swap(e,this._parent(e)),e=this._parent(e)}_siftDown(){let e=0;for(;this._left(e)<this.size&&this._greater(this._left(e),e)||this._right(e)<this.size&&this._greater(this._right(e),e);){const t=this._right(e)<this.size&&this._greater(this._right(e),this._left(e))?this._right(e):this._left(e);this._swap(e,t),e=t}}_smallest(){return 2**Math.floor(Math.log2(this.size))-1}}class r{constructor(){this.root=o.default()}extend(e){for(let t of e)this.push(t)}push(e){let t=this.root;for(let n of e){let e=t.children.get(n);void 0===e&&(e=o.default(),t.children.set(n,e)),t=e}t.isLeaf=!0}*commonPrefixSearch(e){let t=this.root,n="";for(let s=0;s<e.length&&void 0!==t;++s){const r=e[s];n+=r,t=t.children.get(r),void 0!==t&&t.isLeaf&&(yield n)}}}class o{constructor(e,t){this.isLeaf=e,this.children=t}static default(){return new o(!1,new Map)}}class i{constructor(e,t,n){this.sentence=e,this.len=e.length,this.bosTokenId=t,this.eosTokenId=n,this.nodes=[],this.beginNodes=Array.from({length:this.len+1},(()=>[])),this.endNodes=Array.from({length:this.len+1},(()=>[]));const s=new a(this.bosTokenId,0,0,0,0),r=new a(this.eosTokenId,1,this.len,0,0);this.nodes.push(s.clone()),this.nodes.push(r.clone()),this.beginNodes[this.len].push(r),this.endNodes[0].push(s)}insert(e,t,n,s){const r=this.nodes.length,o=new a(s,r,e,t,n);this.beginNodes[e].push(o),this.endNodes[e+t].push(o),this.nodes.push(o)}viterbi(){const e=this.len;let t=0;for(;t<=e;){if(0==this.beginNodes[t].length)return[];for(let e of this.beginNodes[t]){e.prev=null;let n=0,s=null;for(let r of this.endNodes[t]){const t=r.backtraceScore+e.score;(null===s||t>n)&&(s=r.clone(),n=t)}if(null===s)return[];e.prev=s,e.backtraceScore=n}++t}const n=[],s=this.beginNodes[e][0].prev;if(null===s)return[];let r=s.clone();for(;null!==r.prev;){n.push(r.clone());const e=r.clone();r=e.prev.clone()}return n.reverse(),n}piece(e){return this.sentence.slice(e.pos,e.pos+e.length)}tokens(){return this.viterbi().map((e=>this.piece(e)))}tokenIds(){return this.viterbi().map((e=>e.tokenId))}}class a{constructor(e,t,n,s,r){this.tokenId=e,this.nodeId=t,this.pos=n,this.length=s,this.score=r,this.prev=null,this.backtraceScore=0}clone(){const e=new a(this.tokenId,this.nodeId,this.pos,this.length,this.score);return e.prev=this.prev,e.backtraceScore=this.backtraceScore,e}}},"./src/utils/devices.js":
149
143
  /*!******************************!*\
150
144
  !*** ./src/utils/devices.js ***!
151
- \******************************/(e,t,n)=>{"use strict";n.r(t),n.d(t,{DEVICE_TYPES:()=>s});const s=Object.freeze({cpu:"cpu",gpu:"gpu",wasm:"wasm",webgpu:"webgpu"})},"./src/utils/dtypes.js":
145
+ \******************************/(e,t,n)=>{"use strict";n.r(t),n.d(t,{DEVICE_TYPES:()=>s});const s=Object.freeze({auto:"auto",gpu:"gpu",cpu:"cpu",wasm:"wasm",webgpu:"webgpu",cuda:"cuda",dml:"dml",webnn:"webnn","webnn-npu":"webnn-npu","webnn-gpu":"webnn-gpu","webnn-cpu":"webnn-cpu"})},"./src/utils/dtypes.js":
152
146
  /*!*****************************!*\
153
147
  !*** ./src/utils/dtypes.js ***!
154
- \*****************************/(e,t,n)=>{"use strict";n.r(t),n.d(t,{DATA_TYPES:()=>i,DEFAULT_DEVICE_DTYPE_MAPPING:()=>a,DEFAULT_DTYPE_SUFFIX_MAPPING:()=>l,isWebGpuFp16Supported:()=>o});var s=n(/*! ../env.js */"./src/env.js"),r=n(/*! ./devices.js */"./src/utils/devices.js");const o=function(){let e;return async function(){if(void 0===e)if(s.apis.IS_WEBGPU_AVAILABLE)try{const t=await navigator.gpu.requestAdapter();e=t.features.has("shader-f16")}catch(t){e=!1}else e=!1;return e}}(),i=Object.freeze({fp32:"fp32",fp16:"fp16",q8:"q8",int8:"int8",uint8:"uint8",q4:"q4",bnb4:"bnb4",q4f16:"q4f16"}),a=Object.freeze({[r.DEVICE_TYPES.cpu]:i.q8,[r.DEVICE_TYPES.gpu]:i.fp32,[r.DEVICE_TYPES.wasm]:i.q8,[r.DEVICE_TYPES.webgpu]:i.fp32}),l=Object.freeze({[i.fp32]:"",[i.fp16]:"_fp16",[i.int8]:"_int8",[i.uint8]:"_uint8",[i.q8]:"_quantized",[i.q4]:"_q4",[i.q4f16]:"_q4f16",[i.bnb4]:"_bnb4"})},"./src/utils/generic.js":
148
+ \*****************************/(e,t,n)=>{"use strict";n.r(t),n.d(t,{DATA_TYPES:()=>i,DEFAULT_DEVICE_DTYPE_MAPPING:()=>a,DEFAULT_DTYPE_SUFFIX_MAPPING:()=>l,isWebGpuFp16Supported:()=>o});var s=n(/*! ../env.js */"./src/env.js"),r=n(/*! ./devices.js */"./src/utils/devices.js");const o=function(){let e;return async function(){if(void 0===e)if(s.apis.IS_WEBGPU_AVAILABLE)try{const t=await navigator.gpu.requestAdapter();e=t.features.has("shader-f16")}catch(t){e=!1}else e=!1;return e}}(),i=Object.freeze({fp32:"fp32",fp16:"fp16",q8:"q8",int8:"int8",uint8:"uint8",q4:"q4",bnb4:"bnb4",q4f16:"q4f16"}),a=Object.freeze({[r.DEVICE_TYPES.wasm]:i.q8}),l=Object.freeze({[i.fp32]:"",[i.fp16]:"_fp16",[i.int8]:"_int8",[i.uint8]:"_uint8",[i.q8]:"_quantized",[i.q4]:"_q4",[i.q4f16]:"_q4f16",[i.bnb4]:"_bnb4"})},"./src/utils/generic.js":
155
149
  /*!******************************!*\
156
150
  !*** ./src/utils/generic.js ***!
157
151
  \******************************/(e,t,n)=>{"use strict";n.r(t),n.d(t,{Callable:()=>s});const s=class{constructor(){let e=function(...t){return e._call(...t)};return Object.setPrototypeOf(e,new.target.prototype)}_call(...e){throw Error("Must implement _call method in subclass")}}},"./src/utils/hub.js":
158
152
  /*!**************************!*\
159
153
  !*** ./src/utils/hub.js ***!
160
- \**************************/(e,t,n)=>{"use strict";n.r(t),n.d(t,{getFile:()=>c,getModelFile:()=>h,getModelJSON:()=>p});var s=n(/*! fs */"?7a2c"),r=n(/*! path */"?a42a"),o=n(/*! ../env.js */"./src/env.js"),i=n(/*! ./core.js */"./src/utils/core.js");class a{_CONTENT_TYPE_MAP={txt:"text/plain",html:"text/html",css:"text/css",js:"text/javascript",json:"application/json",png:"image/png",jpg:"image/jpeg",jpeg:"image/jpeg",gif:"image/gif"};constructor(e){if(this.filePath=e,this.headers=new Headers,this.exists=s.existsSync(e),this.exists){this.status=200,this.statusText="OK";let t=s.statSync(e);this.headers.set("content-length",t.size.toString()),this.updateContentType();let n=this;this.body=new ReadableStream({start(e){n.arrayBuffer().then((t=>{e.enqueue(new Uint8Array(t)),e.close()}))}})}else this.status=404,this.statusText="Not Found",this.body=null}updateContentType(){const e=this.filePath.toString().split(".").pop().toLowerCase();this.headers.set("content-type",this._CONTENT_TYPE_MAP[e]??"application/octet-stream")}clone(){let e=new a(this.filePath);return e.exists=this.exists,e.status=this.status,e.statusText=this.statusText,e.headers=new Headers(this.headers),e}async arrayBuffer(){return(await s.promises.readFile(this.filePath)).buffer}async blob(){const e=await s.promises.readFile(this.filePath);return new Blob([e],{type:this.headers.get("content-type")})}async text(){return await s.promises.readFile(this.filePath,"utf8")}async json(){return JSON.parse(await this.text())}}function l(e,t=null,n=null){let s;try{s=new URL(e)}catch(e){return!1}return!(t&&!t.includes(s.protocol))&&!(n&&!n.includes(s.hostname))}async function c(e){if(o.env.useFS&&!l(e,["http:","https:","blob:"]))return new a(e);if("undefined"!=typeof process&&"node"===process?.release?.name){const t=!!process.env?.TESTING_REMOTELY,n=o.env.version,s=new Headers;s.set("User-Agent",`transformers.js/${n}; is_ci/${t};`);if(l(e,["http:","https:"],["huggingface.co","hf.co"])){const e=process.env?.HF_TOKEN??process.env?.HF_ACCESS_TOKEN;e&&s.set("Authorization",`Bearer ${e}`)}return fetch(e,{headers:s})}return fetch(e)}const d={400:"Bad request error occurred while trying to load file",401:"Unauthorized access to file",403:"Forbidden access to file",404:"Could not locate file",408:"Request timeout error occurred while trying to load file",500:"Internal server error error occurred while trying to load file",502:"Bad gateway error occurred while trying to load file",503:"Service unavailable error occurred while trying to load file",504:"Gateway timeout error occurred while trying to load file"};class u{constructor(e){this.path=e}async match(e){let t=r.join(this.path,e),n=new a(t);return n.exists?n:void 0}async put(e,t){const n=Buffer.from(await t.arrayBuffer());let o=r.join(this.path,e);try{await s.promises.mkdir(r.dirname(o),{recursive:!0}),await s.promises.writeFile(o,n)}catch(e){console.warn("An error occurred while writing the file to cache:",e)}}}async function h(e,t,n=!0,s={}){if(!o.env.allowLocalModels){if(s.local_files_only)throw Error("Invalid configuration detected: local models are disabled (`env.allowLocalModels=false`) but you have requested to only use local models (`local_files_only=true`).");if(!o.env.allowRemoteModels)throw Error("Invalid configuration detected: both local and remote models are disabled. Fix by setting `env.allowLocalModels` or `env.allowRemoteModels` to `true`.")}let r;if((0,i.dispatchCallback)(s.progress_callback,{status:"initiate",name:e,file:t}),!r&&o.env.useBrowserCache){if("undefined"==typeof caches)throw Error("Browser cache is not available in this environment.");try{r=await caches.open("transformers-cache")}catch(e){console.warn("An error occurred while opening the browser cache:",e)}}if(!r&&o.env.useFSCache&&(r=new u(s.cache_dir??o.env.cacheDir)),!r&&o.env.useCustomCache){if(!o.env.customCache)throw Error("`env.useCustomCache=true`, but `env.customCache` is not defined.");if(!o.env.customCache.match||!o.env.customCache.put)throw new Error("`env.customCache` must be an object which implements the `match` and `put` functions of the Web Cache API. For more information, see https://developer.mozilla.org/en-US/docs/Web/API/Cache");r=o.env.customCache}const a=s.revision??"main";let h,p,m=_(e,t),f=_(o.env.localModelPath,m),g=_(o.env.remoteHost,o.env.remotePathTemplate.replaceAll("{model}",e).replaceAll("{revision}",encodeURIComponent(a)),t),w="main"===a?m:_(e,a,t),M=r instanceof u?w:g,b=!1;r&&(p=await async function(e,...t){for(let n of t)try{let t=await e.match(n);if(t)return t}catch(e){continue}}(r,f,M));const y=void 0!==p;if(void 0===p){if(o.env.allowLocalModels){if(l(m,["http:","https:"])){if(s.local_files_only)throw new Error(`\`local_files_only=true\`, but attempted to load a remote file from: ${m}.`);if(!o.env.allowRemoteModels)throw new Error(`\`env.allowRemoteModels=false\`, but attempted to load a remote file from: ${m}.`)}else try{p=await c(f),h=f}catch(e){console.warn(`Unable to load from local path "${f}": "${e}"`)}}if(void 0===p||404===p.status){if(s.local_files_only||!o.env.allowRemoteModels){if(n)throw Error(`\`local_files_only=true\` or \`env.allowRemoteModels=false\` and file was not found locally at "${f}".`);return null}if(p=await c(g),200!==p.status)return function(e,t,n){if(!n)return null;const s=d[e]??`Error (${e}) occurred while trying to load file`;throw Error(`${s}: "${t}".`)}(p.status,g,n);h=M}b=r&&"undefined"!=typeof Response&&p instanceof Response&&200===p.status}(0,i.dispatchCallback)(s.progress_callback,{status:"download",name:e,file:t});const x={status:"progress",name:e,file:t};let k;return s.progress_callback?y&&"undefined"!=typeof navigator&&/firefox/i.test(navigator.userAgent)?(k=new Uint8Array(await p.arrayBuffer()),(0,i.dispatchCallback)(s.progress_callback,{...x,progress:100,loaded:k.length,total:k.length})):k=await async function(e,t){const n=e.headers.get("Content-Length");null===n&&console.warn("Unable to determine content-length from response headers. Will expand buffer when needed.");let s=parseInt(n??"0"),r=new Uint8Array(s),o=0;const i=e.body.getReader();async function a(){const{done:e,value:n}=await i.read();if(e)return;let l=o+n.length;if(l>s){s=l;let e=new Uint8Array(s);e.set(r),r=e}r.set(n,o),o=l;return t({progress:o/s*100,loaded:o,total:s}),a()}return await a(),r}(p,(e=>{(0,i.dispatchCallback)(s.progress_callback,{...x,...e})})):k=new Uint8Array(await p.arrayBuffer()),b&&h&&void 0===await r.match(h)&&await r.put(h,new Response(k,{headers:p.headers})).catch((e=>{console.warn(`Unable to add response to browser cache: ${e}.`)})),(0,i.dispatchCallback)(s.progress_callback,{status:"done",name:e,file:t}),k}async function p(e,t,n=!0,s={}){let r=await h(e,t,n,s);if(null===r)return{};let o=new TextDecoder("utf-8").decode(r);return JSON.parse(o)}function _(...e){return(e=e.map(((t,n)=>(n&&(t=t.replace(new RegExp("^/"),"")),n!==e.length-1&&(t=t.replace(new RegExp("/$"),"")),t)))).join("/")}},"./src/utils/image.js":
154
+ \**************************/(e,t,n)=>{"use strict";n.r(t),n.d(t,{getFile:()=>d,getModelFile:()=>p,getModelJSON:()=>_});var s=n(/*! fs */"fs"),r=n(/*! path */"path"),o=n(/*! ../env.js */"./src/env.js"),i=n(/*! ./core.js */"./src/utils/core.js");const a={txt:"text/plain",html:"text/html",css:"text/css",js:"text/javascript",json:"application/json",png:"image/png",jpg:"image/jpeg",jpeg:"image/jpeg",gif:"image/gif"};class l{constructor(e){if(this.filePath=e,this.headers=new Headers,this.exists=s.existsSync(e),this.exists){this.status=200,this.statusText="OK";let t=s.statSync(e);this.headers.set("content-length",t.size.toString()),this.updateContentType();let n=this;this.body=new ReadableStream({start(e){n.arrayBuffer().then((t=>{e.enqueue(new Uint8Array(t)),e.close()}))}})}else this.status=404,this.statusText="Not Found",this.body=null}updateContentType(){const e=this.filePath.toString().split(".").pop().toLowerCase();this.headers.set("content-type",a[e]??"application/octet-stream")}clone(){let e=new l(this.filePath);return e.exists=this.exists,e.status=this.status,e.statusText=this.statusText,e.headers=new Headers(this.headers),e}async arrayBuffer(){return(await s.promises.readFile(this.filePath)).buffer}async blob(){const e=await s.promises.readFile(this.filePath);return new Blob([e],{type:this.headers.get("content-type")})}async text(){return await s.promises.readFile(this.filePath,"utf8")}async json(){return JSON.parse(await this.text())}}function c(e,t=null,n=null){let s;try{s=new URL(e)}catch(e){return!1}return!(t&&!t.includes(s.protocol))&&!(n&&!n.includes(s.hostname))}async function d(e){if(o.env.useFS&&!c(e,["http:","https:","blob:"]))return new l(e);if("undefined"!=typeof process&&"node"===process?.release?.name){const t=!!process.env?.TESTING_REMOTELY,n=o.env.version,s=new Headers;s.set("User-Agent",`transformers.js/${n}; is_ci/${t};`);if(c(e,["http:","https:"],["huggingface.co","hf.co"])){const e=process.env?.HF_TOKEN??process.env?.HF_ACCESS_TOKEN;e&&s.set("Authorization",`Bearer ${e}`)}return fetch(e,{headers:s})}return fetch(e)}const u={400:"Bad request error occurred while trying to load file",401:"Unauthorized access to file",403:"Forbidden access to file",404:"Could not locate file",408:"Request timeout error occurred while trying to load file",500:"Internal server error error occurred while trying to load file",502:"Bad gateway error occurred while trying to load file",503:"Service unavailable error occurred while trying to load file",504:"Gateway timeout error occurred while trying to load file"};class h{constructor(e){this.path=e}async match(e){let t=r.join(this.path,e),n=new l(t);return n.exists?n:void 0}async put(e,t){const n=Buffer.from(await t.arrayBuffer());let o=r.join(this.path,e);try{await s.promises.mkdir(r.dirname(o),{recursive:!0}),await s.promises.writeFile(o,n)}catch(e){console.warn("An error occurred while writing the file to cache:",e)}}}async function p(e,t,n=!0,s={}){if(!o.env.allowLocalModels){if(s.local_files_only)throw Error("Invalid configuration detected: local models are disabled (`env.allowLocalModels=false`) but you have requested to only use local models (`local_files_only=true`).");if(!o.env.allowRemoteModels)throw Error("Invalid configuration detected: both local and remote models are disabled. Fix by setting `env.allowLocalModels` or `env.allowRemoteModels` to `true`.")}let r;if((0,i.dispatchCallback)(s.progress_callback,{status:"initiate",name:e,file:t}),!r&&o.env.useBrowserCache){if("undefined"==typeof caches)throw Error("Browser cache is not available in this environment.");try{r=await caches.open("transformers-cache")}catch(e){console.warn("An error occurred while opening the browser cache:",e)}}if(!r&&o.env.useFSCache&&(r=new h(s.cache_dir??o.env.cacheDir)),!r&&o.env.useCustomCache){if(!o.env.customCache)throw Error("`env.useCustomCache=true`, but `env.customCache` is not defined.");if(!o.env.customCache.match||!o.env.customCache.put)throw new Error("`env.customCache` must be an object which implements the `match` and `put` functions of the Web Cache API. For more information, see https://developer.mozilla.org/en-US/docs/Web/API/Cache");r=o.env.customCache}const a=s.revision??"main";let l,p,_=m(e,t),f=m(o.env.localModelPath,_),g=m(o.env.remoteHost,o.env.remotePathTemplate.replaceAll("{model}",e).replaceAll("{revision}",encodeURIComponent(a)),t),w="main"===a?_:m(e,a,t),M=r instanceof h?w:g,b=!1;r&&(p=await async function(e,...t){for(let n of t)try{let t=await e.match(n);if(t)return t}catch(e){continue}}(r,f,M));const y=void 0!==p;if(void 0===p){if(o.env.allowLocalModels){if(c(_,["http:","https:"])){if(s.local_files_only)throw new Error(`\`local_files_only=true\`, but attempted to load a remote file from: ${_}.`);if(!o.env.allowRemoteModels)throw new Error(`\`env.allowRemoteModels=false\`, but attempted to load a remote file from: ${_}.`)}else try{p=await d(f),l=f}catch(e){console.warn(`Unable to load from local path "${f}": "${e}"`)}}if(void 0===p||404===p.status){if(s.local_files_only||!o.env.allowRemoteModels){if(n)throw Error(`\`local_files_only=true\` or \`env.allowRemoteModels=false\` and file was not found locally at "${f}".`);return null}if(p=await d(g),200!==p.status)return function(e,t,n){if(!n)return null;const s=u[e]??`Error (${e}) occurred while trying to load file`;throw Error(`${s}: "${t}".`)}(p.status,g,n);l=M}b=r&&"undefined"!=typeof Response&&p instanceof Response&&200===p.status}(0,i.dispatchCallback)(s.progress_callback,{status:"download",name:e,file:t});const x={status:"progress",name:e,file:t};let k;return s.progress_callback?y&&"undefined"!=typeof navigator&&/firefox/i.test(navigator.userAgent)?(k=new Uint8Array(await p.arrayBuffer()),(0,i.dispatchCallback)(s.progress_callback,{...x,progress:100,loaded:k.length,total:k.length})):k=await async function(e,t){const n=e.headers.get("Content-Length");null===n&&console.warn("Unable to determine content-length from response headers. Will expand buffer when needed.");let s=parseInt(n??"0"),r=new Uint8Array(s),o=0;const i=e.body.getReader();async function a(){const{done:e,value:n}=await i.read();if(e)return;let l=o+n.length;if(l>s){s=l;let e=new Uint8Array(s);e.set(r),r=e}r.set(n,o),o=l;return t({progress:o/s*100,loaded:o,total:s}),a()}return await a(),r}(p,(e=>{(0,i.dispatchCallback)(s.progress_callback,{...x,...e})})):k=new Uint8Array(await p.arrayBuffer()),b&&l&&void 0===await r.match(l)&&await r.put(l,new Response(k,{headers:p.headers})).catch((e=>{console.warn(`Unable to add response to browser cache: ${e}.`)})),(0,i.dispatchCallback)(s.progress_callback,{status:"done",name:e,file:t}),k}async function _(e,t,n=!0,s={}){let r=await p(e,t,n,s);if(null===r)return{};let o=new TextDecoder("utf-8").decode(r);return JSON.parse(o)}function m(...e){return(e=e.map(((t,n)=>(n&&(t=t.replace(new RegExp("^/"),"")),n!==e.length-1&&(t=t.replace(new RegExp("/$"),"")),t)))).join("/")}},"./src/utils/image.js":
161
155
  /*!****************************!*\
162
156
  !*** ./src/utils/image.js ***!
163
157
  \****************************/(e,t,n)=>{"use strict";n.r(t),n.d(t,{RawImage:()=>_});var s=n(/*! ./hub.js */"./src/utils/hub.js"),r=n(/*! ../env.js */"./src/env.js"),o=n(/*! ./tensor.js */"./src/utils/tensor.js"),i=n(/*! sharp */"sharp");const a="undefined"!=typeof self,l=a&&"DedicatedWorkerGlobalScope"===self.constructor.name;let c,d,u;if(a)c=(e,t)=>{if(!self.OffscreenCanvas)throw new Error("OffscreenCanvas not supported by this browser.");return new self.OffscreenCanvas(e,t)},u=self.createImageBitmap,d=self.ImageData;else{if(!i)throw new Error("Unable to load image processing library.");u=async e=>{const t=(await e.metadata()).channels,{data:n,info:s}=await e.rotate().raw().toBuffer({resolveWithObject:!0}),r=new _(new Uint8ClampedArray(n),s.width,s.height,s.channels);return void 0!==t&&t!==s.channels&&r.convert(t),r}}const h={0:"nearest",1:"lanczos",2:"bilinear",3:"bicubic",4:"box",5:"hamming"},p=new Map([["png","image/png"],["jpg","image/jpeg"],["jpeg","image/jpeg"],["gif","image/gif"]]);class _{constructor(e,t,n,s){this.data=e,this.width=t,this.height=n,this.channels=s}get size(){return[this.width,this.height]}static async read(e){if(e instanceof _)return e;if("string"==typeof e||e instanceof URL)return await this.fromURL(e);throw new Error("Unsupported input type: "+typeof e)}static fromCanvas(e){if(!a)throw new Error("fromCanvas() is only supported in browser environments.");const t=e.getContext("2d").getImageData(0,0,e.width,e.height).data;return new _(t,e.width,e.height,4)}static async fromURL(e){const t=await(0,s.getFile)(e);if(200!==t.status)throw new Error(`Unable to read image from "${e}" (${t.status} ${t.statusText})`);const n=await t.blob();return this.fromBlob(n)}static async fromBlob(e){if(a){const t=await u(e),n=c(t.width,t.height).getContext("2d");return n.drawImage(t,0,0),new this(n.getImageData(0,0,t.width,t.height).data,t.width,t.height,4)}{const t=i(await e.arrayBuffer());return await u(t)}}static fromTensor(e,t="CHW"){if(3!==e.dims.length)throw new Error(`Tensor should have 3 dimensions, but has ${e.dims.length} dimensions.`);if("CHW"===t)e=e.transpose(1,2,0);else if("HWC"!==t)throw new Error(`Unsupported channel format: ${t}`);if(!(e.data instanceof Uint8ClampedArray||e.data instanceof Uint8Array))throw new Error(`Unsupported tensor type: ${e.type}`);switch(e.dims[2]){case 1:case 2:case 3:case 4:return new _(e.data,e.dims[1],e.dims[0],e.dims[2]);default:throw new Error(`Unsupported number of channels: ${e.dims[2]}`)}}grayscale(){if(1===this.channels)return this;const e=new Uint8ClampedArray(this.width*this.height*1);switch(this.channels){case 3:case 4:for(let t=0,n=0;t<this.data.length;t+=this.channels){const s=this.data[t],r=this.data[t+1],o=this.data[t+2];e[n++]=Math.round(.2989*s+.587*r+.114*o)}break;default:throw new Error(`Conversion failed due to unsupported number of channels: ${this.channels}`)}return this._update(e,this.width,this.height,1)}rgb(){if(3===this.channels)return this;const e=new Uint8ClampedArray(this.width*this.height*3);switch(this.channels){case 1:for(let t=0,n=0;t<this.data.length;++t)e[n++]=this.data[t],e[n++]=this.data[t],e[n++]=this.data[t];break;case 4:for(let t=0,n=0;t<this.data.length;t+=4)e[n++]=this.data[t],e[n++]=this.data[t+1],e[n++]=this.data[t+2];break;default:throw new Error(`Conversion failed due to unsupported number of channels: ${this.channels}`)}return this._update(e,this.width,this.height,3)}rgba(){if(4===this.channels)return this;const e=new Uint8ClampedArray(this.width*this.height*4);switch(this.channels){case 1:for(let t=0,n=0;t<this.data.length;++t)e[n++]=this.data[t],e[n++]=this.data[t],e[n++]=this.data[t],e[n++]=255;break;case 3:for(let t=0,n=0;t<this.data.length;t+=3)e[n++]=this.data[t],e[n++]=this.data[t+1],e[n++]=this.data[t+2],e[n++]=255;break;default:throw new Error(`Conversion failed due to unsupported number of channels: ${this.channels}`)}return this._update(e,this.width,this.height,4)}async resize(e,t,{resample:n=2}={}){let s=h[n]??n;if(a){const n=this.channels,s=this.toCanvas(),r=c(e,t).getContext("2d");r.drawImage(s,0,0,e,t);return new _(r.getImageData(0,0,e,t).data,e,t,4).convert(n)}{let n=this.toSharp();switch(s){case"box":case"hamming":"box"!==s&&"hamming"!==s||(console.warn(`Resampling method ${s} is not yet supported. Using bilinear instead.`),s="bilinear");case"nearest":case"bilinear":case"bicubic":n=n.affine([e/this.width,0,0,t/this.height],{interpolator:s});break;case"lanczos":n=n.resize({width:e,height:t,fit:"fill",kernel:"lanczos3"});break;default:throw new Error(`Resampling method ${s} is not supported.`)}return await u(n)}}async pad([e,t,n,s]){if(e=Math.max(e,0),t=Math.max(t,0),n=Math.max(n,0),s=Math.max(s,0),0===e&&0===t&&0===n&&0===s)return this;if(a){const r=this.channels,o=this.toCanvas(),i=this.width+e+t,a=this.height+n+s,l=c(i,a).getContext("2d");l.drawImage(o,0,0,this.width,this.height,e,n,i,a);return new _(l.getImageData(0,0,i,a).data,i,a,4).convert(r)}{const r=this.toSharp().extend({left:e,right:t,top:n,bottom:s});return await u(r)}}async crop([e,t,n,s]){if(e=Math.max(e,0),t=Math.max(t,0),n=Math.min(n,this.width-1),s=Math.min(s,this.height-1),0===e&&0===t&&n===this.width-1&&s===this.height-1)return this;const r=n-e+1,o=s-t+1;if(a){const n=this.channels,s=this.toCanvas(),i=c(r,o).getContext("2d");i.drawImage(s,e,t,r,o,0,0,r,o);return new _(i.getImageData(0,0,r,o).data,r,o,4).convert(n)}{const n=this.toSharp().extract({left:e,top:t,width:r,height:o});return await u(n)}}async center_crop(e,t){if(this.width===e&&this.height===t)return this;const n=(this.width-e)/2,s=(this.height-t)/2;if(a){const r=this.channels,o=this.toCanvas(),i=c(e,t).getContext("2d");let a=0,l=0,d=0,u=0;n>=0?a=n:d=-n,s>=0?l=s:u=-s,i.drawImage(o,a,l,e,t,d,u,e,t);return new _(i.getImageData(0,0,e,t).data,e,t,4).convert(r)}{let r=this.toSharp();if(n>=0&&s>=0)r=r.extract({left:Math.floor(n),top:Math.floor(s),width:e,height:t});else if(n<=0&&s<=0){const o=Math.floor(-s),i=Math.floor(-n);r=r.extend({top:o,left:i,right:e-this.width-i,bottom:t-this.height-o})}else{let o=[0,0],i=0;s<0?(o[0]=Math.floor(-s),o[1]=t-this.height-o[0]):i=Math.floor(s);let a=[0,0],l=0;n<0?(a[0]=Math.floor(-n),a[1]=e-this.width-a[0]):l=Math.floor(n),r=r.extend({top:o[0],bottom:o[1],left:a[0],right:a[1]}).extract({left:l,top:i,width:e,height:t})}return await u(r)}}async toBlob(e="image/png",t=1){if(!a)throw new Error("toBlob() is only supported in browser environments.");const n=this.toCanvas();return await n.convertToBlob({type:e,quality:t})}toTensor(e="CHW"){let t=new o.Tensor("uint8",new Uint8Array(this.data),[this.height,this.width,this.channels]);if("HWC"===e);else{if("CHW"!==e)throw new Error(`Unsupported channel format: ${e}`);t=t.permute(2,0,1)}return t}toCanvas(){if(!a)throw new Error("toCanvas() is only supported in browser environments.");const e=this.clone().rgba(),t=c(e.width,e.height),n=new d(e.data,e.width,e.height);return t.getContext("2d").putImageData(n,0,0),t}_update(e,t,n,s=null){return this.data=e,this.width=t,this.height=n,null!==s&&(this.channels=s),this}clone(){return new _(this.data.slice(),this.width,this.height,this.channels)}convert(e){if(this.channels===e)return this;switch(e){case 1:this.grayscale();break;case 3:this.rgb();break;case 4:this.rgba();break;default:throw new Error(`Conversion failed due to unsupported number of channels: ${this.channels}`)}return this}async save(e){if(!a){if(r.env.useFS){const t=this.toSharp();return await t.toFile(e)}throw new Error("Unable to save the image because filesystem is disabled in this environment.")}{if(l)throw new Error("Unable to save an image from a Web Worker.");const t=e.split(".").pop().toLowerCase(),n=p.get(t)??"image/png",s=await this.toBlob(n),r=URL.createObjectURL(s),o=document.createElement("a");o.href=r,o.download=e,o.click(),o.remove()}}toSharp(){if(a)throw new Error("toSharp() is only supported in server-side environments.");return i(this.data,{raw:{width:this.width,height:this.height,channels:this.channels}})}}},"./src/utils/maths.js":