npm - @weavelogic/knowledge-graph-agent - Versions diffs - 0.6.0 → 0.7.1 - Mend

@weavelogic/knowledge-graph-agent 0.6.0 → 0.7.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (219) hide show

package/dist/node_modules/@xenova/transformers/src/models.js ADDED Viewed

@@ -0,0 +1,3852 @@
+import { AutoConfig } from "./configs.js";
+import { mergeArrays, Callable, isTypedArray, isIntegralNumber } from "./utils/core.js";
+import { getModelJSON, getModelFile } from "./utils/hub.js";
+import { WhisperTimeStampLogitsProcessor, LogitsProcessorList, RepetitionPenaltyLogitsProcessor, NoRepeatNGramLogitsProcessor, NoBadWordsLogitsProcessor, MinLengthLogitsProcessor, MinNewTokensLengthLogitsProcessor, ForcedBOSTokenLogitsProcessor, ForcedEOSTokenLogitsProcessor, SuppressTokensAtBeginLogitsProcessor, ForceTokensLogitsProcessor, GenerationConfig, Sampler } from "./utils/generation.js";
+import { cat, stack, std_mean, mean, Tensor, dynamicTimeWarping, ones_like } from "./utils/tensor.js";
+import { ONNX, executionProviders } from "./backends/onnx.js";
+import "./tokenizers.js";
+import { medianFilter } from "./utils/maths.js";
+import "./utils/image.js";
+import "./env.js";
+const { InferenceSession, Tensor: ONNXTensor, env } = ONNX;
+const MODEL_TYPES = {
+  EncoderOnly: 0,
+  EncoderDecoder: 1,
+  Seq2Seq: 2,
+  Vision2Seq: 3,
+  DecoderOnly: 4,
+  MaskGeneration: 5
+};
+const MODEL_TYPE_MAPPING = /* @__PURE__ */ new Map();
+const MODEL_NAME_TO_CLASS_MAPPING = /* @__PURE__ */ new Map();
+const MODEL_CLASS_TO_NAME_MAPPING = /* @__PURE__ */ new Map();
+async function constructSession(pretrained_model_name_or_path, fileName, options) {
+  let modelFileName = `onnx/${fileName}${options.quantized ? "_quantized" : ""}.onnx`;
+  let buffer = await getModelFile(pretrained_model_name_or_path, modelFileName, true, options);
+  try {
+    return await InferenceSession.create(buffer, {
+      executionProviders
+    });
+  } catch (err) {
+    if (executionProviders.length === 1 && executionProviders[0] === "wasm") {
+      throw err;
+    }
+    console.warn(err);
+    console.warn(
+      "Something went wrong during model construction (most likely a missing operation). Using `wasm` as a fallback. "
+    );
+    return await InferenceSession.create(buffer, {
+      executionProviders: ["wasm"]
+    });
+  }
+}
+function validateInputs(session, inputs) {
+  const checkedInputs = /* @__PURE__ */ Object.create(null);
+  const missingInputs = [];
+  for (const inputName of session.inputNames) {
+    const tensor = inputs[inputName];
+    if (!(tensor instanceof Tensor)) {
+      missingInputs.push(inputName);
+      continue;
+    }
+    checkedInputs[inputName] = env.wasm.proxy ? tensor.clone() : tensor;
+  }
+  if (missingInputs.length > 0) {
+    throw new Error(
+      `An error occurred during model execution: "Missing the following inputs: ${missingInputs.join(", ")}.`
+    );
+  }
+  const numInputsProvided = Object.keys(inputs).length;
+  const numInputsNeeded = session.inputNames.length;
+  if (numInputsProvided > numInputsNeeded) {
+    let ignored = Object.keys(inputs).filter((inputName) => !session.inputNames.includes(inputName));
+    console.warn(`WARNING: Too many inputs were provided (${numInputsProvided} > ${numInputsNeeded}). The following inputs will be ignored: "${ignored.join(", ")}".`);
+  }
+  return checkedInputs;
+}
+async function sessionRun(session, inputs) {
+  const checkedInputs = validateInputs(session, inputs);
+  try {
+    let output = await session.run(checkedInputs);
+    output = replaceTensors(output);
+    return output;
+  } catch (e) {
+    console.error(`An error occurred during model execution: "${e}".`);
+    console.error("Inputs given to model:", checkedInputs);
+    throw e;
+  }
+}
+function replaceTensors(obj) {
+  for (let prop in obj) {
+    if (obj[prop] instanceof ONNXTensor) {
+      obj[prop] = new Tensor(obj[prop]);
+    } else if (typeof obj[prop] === "object") {
+      replaceTensors(obj[prop]);
+    }
+  }
+  return obj;
+}
+function toI64Tensor(items) {
+  if (items instanceof Tensor) {
+    return items;
+  }
+  if (items.length === 0) {
+    throw Error("items must be non-empty");
+  }
+  if (Array.isArray(items[0])) {
+    if (items.some((x) => x.length !== items[0].length)) {
+      throw Error("Unable to create tensor, you should probably activate truncation and/or padding with 'padding=True' and/or 'truncation=True' to have batched tensors with the same length.");
+    }
+    return new Tensor(
+      "int64",
+      BigInt64Array.from(items.flat().map((x) => BigInt(x))),
+      [items.length, items[0].length]
+    );
+  } else {
+    return new Tensor(
+      "int64",
+      BigInt64Array.from(items.map((x) => BigInt(x))),
+      [1, items.length]
+    );
+  }
+}
+function prepareAttentionMask(self, tokens) {
+  let pad_token_id = self.config.pad_token_id ?? null;
+  let eos_token_id = self.config.eos_token_id ?? null;
+  if (isIntegralNumber(eos_token_id)) {
+    eos_token_id = [eos_token_id];
+  }
+  let is_pad_token_in_inputs = tokens.indexOf(pad_token_id) !== -1;
+  let is_pad_token_not_equal_to_eos_token_id = eos_token_id === null || !eos_token_id.includes(pad_token_id);
+  if (is_pad_token_in_inputs && is_pad_token_not_equal_to_eos_token_id) {
+    let data = BigInt64Array.from(
+      // Note: != so that int matches bigint
+      // @ts-ignore
+      tokens.data.map((x) => x != pad_token_id)
+    );
+    return new Tensor("int64", data, tokens.dims);
+  } else {
+    return ones_like(tokens);
+  }
+}
+function preparePositionIds(session, feeds, use_cache_branch) {
+  if (!session.inputNames.includes("position_ids")) return;
+  const data = new BigInt64Array(feeds.attention_mask.data.length);
+  for (let i = 0; i < feeds.attention_mask.dims[0]; ++i) {
+    let start = i * feeds.attention_mask.dims[1];
+    let sum = BigInt(0);
+    for (let j = 0; j < feeds.attention_mask.dims[1]; ++j) {
+      const index = start + j;
+      if (feeds.attention_mask.data[index] === 0n) {
+        data[index] = BigInt(1);
+      } else {
+        data[index] = sum;
+        sum += feeds.attention_mask.data[index];
+      }
+    }
+  }
+  feeds.position_ids = new Tensor("int64", data, feeds.attention_mask.dims);
+  if (use_cache_branch) {
+    feeds.position_ids = feeds.position_ids.slice(null, -1).unsqueeze_(-1);
+  }
+}
+function boolTensor(value) {
+  return new Tensor("bool", [value], [1]);
+}
+async function seq2seqForward(self, model_inputs) {
+  let { encoder_outputs, past_key_values } = model_inputs;
+  if (!encoder_outputs) {
+    encoder_outputs = (await encoderForward(self, model_inputs)).last_hidden_state;
+  }
+  let decoderFeeds = {
+    input_ids: model_inputs.decoder_input_ids,
+    encoder_hidden_states: encoder_outputs
+  };
+  const use_cache_branch = !!past_key_values;
+  if (self.decoder_merged_session.inputNames.includes("use_cache_branch")) {
+    decoderFeeds.use_cache_branch = boolTensor(use_cache_branch);
+  }
+  if (self.decoder_merged_session.inputNames.includes("encoder_attention_mask")) {
+    decoderFeeds.encoder_attention_mask = model_inputs.attention_mask;
+  }
+  preparePositionIds(self.decoder_merged_session, decoderFeeds, use_cache_branch);
+  self.addPastKeyValues(decoderFeeds, past_key_values);
+  const decoderResults = await sessionRun(self.decoder_merged_session, decoderFeeds);
+  let logits = decoderResults.logits;
+  past_key_values = self.getPastKeyValues(decoderResults, past_key_values);
+  const attns = self.getAttentions(decoderResults);
+  return new Seq2SeqLMOutput({ logits, past_key_values, encoder_outputs, ...attns });
+}
+function seq2seqStartBeams(self, inputTokenIds, generation_config, numOutputTokens) {
+  let beams = [];
+  let beamId = 0;
+  const requires_attention_mask = self.requires_attention_mask ?? true;
+  let decoder_input_ids = generation_config.decoder_input_ids ?? generation_config.decoder_start_token_id ?? generation_config.bos_token_id ?? generation_config.eos_token_id;
+  if (decoder_input_ids instanceof Tensor) {
+    decoder_input_ids = decoder_input_ids.tolist().flat();
+  } else if (!Array.isArray(decoder_input_ids)) {
+    decoder_input_ids = [decoder_input_ids];
+  }
+  for (let tokens of inputTokenIds) {
+    tokens.dims = [1, ...tokens.dims];
+    let start = {
+      inputs: tokens,
+      encoder_outputs: null,
+      prev_model_outputs: null,
+      output_token_ids: decoder_input_ids,
+      done: false,
+      score: 0,
+      id: beamId++
+      // assign unique id to beams
+    };
+    if (requires_attention_mask) {
+      start.attention_mask = prepareAttentionMask(self, tokens);
+    }
+    beams.push(start);
+  }
+  return beams;
+}
+async function seq2seqRunBeam(self, beam) {
+  const input_name = self.main_input_name;
+  let decoder_input_ids = beam.output_token_ids;
+  if (beam.prev_model_outputs) {
+    decoder_input_ids = decoder_input_ids.slice(-1);
+  }
+  let model_inputs = {
+    [input_name]: beam.inputs,
+    decoder_input_ids: toI64Tensor(decoder_input_ids),
+    encoder_outputs: beam.encoder_outputs,
+    past_key_values: beam.prev_model_outputs?.past_key_values
+  };
+  if (beam.attention_mask) {
+    model_inputs.attention_mask = beam.attention_mask;
+  }
+  let output = await self.forward(model_inputs);
+  beam.prev_model_outputs = output;
+  beam.encoder_outputs = output.encoder_outputs;
+  return output;
+}
+function seq2seqUpdatebeam(beam, newTokenId) {
+  beam.output_token_ids = [...beam.output_token_ids, newTokenId];
+}
+async function encoderForward(self, model_inputs) {
+  const encoderFeeds = /* @__PURE__ */ Object.create(null);
+  for (const key of self.session.inputNames) {
+    encoderFeeds[key] = model_inputs[key];
+  }
+  if (self.session.inputNames.includes("token_type_ids") && !encoderFeeds.token_type_ids) {
+    encoderFeeds.token_type_ids = new Tensor(
+      "int64",
+      new BigInt64Array(encoderFeeds.input_ids.data.length),
+      encoderFeeds.input_ids.dims
+    );
+  }
+  return await sessionRun(self.session, encoderFeeds);
+}
+async function decoderForward(self, model_inputs) {
+  let { input_ids, past_key_values, attention_mask } = model_inputs;
+  let decoderFeeds = {
+    input_ids,
+    attention_mask: attention_mask ?? prepareAttentionMask(self, input_ids)
+  };
+  const use_cache_branch = !!past_key_values;
+  if (self.session.inputNames.includes("use_cache_branch")) {
+    decoderFeeds.use_cache_branch = boolTensor(use_cache_branch);
+  }
+  preparePositionIds(self.session, decoderFeeds, use_cache_branch);
+  self.addPastKeyValues(decoderFeeds, past_key_values);
+  let decoderResults = await sessionRun(self.session, decoderFeeds);
+  let logits = decoderResults.logits;
+  past_key_values = self.getPastKeyValues(decoderResults, past_key_values);
+  return { logits, past_key_values };
+}
+function decoderStartBeams(self, inputTokenIds, generation_config, numOutputTokens, inputs_attention_mask) {
+  let beams = [];
+  let beamId = 0;
+  for (let tokens of inputTokenIds) {
+    let output_token_ids = tokens.tolist().map(Number);
+    tokens.dims = [1, ...tokens.dims];
+    let attn_mask;
+    if (inputs_attention_mask) {
+      attn_mask = inputs_attention_mask[beamId];
+      attn_mask.dims = [1, ...attn_mask.dims];
+    } else {
+      attn_mask = prepareAttentionMask(self, tokens);
+    }
+    let start = {
+      input: tokens,
+      model_input_ids: tokens,
+      attention_mask: attn_mask,
+      prev_model_outputs: null,
+      output_token_ids,
+      num_output_tokens: numOutputTokens,
+      done: false,
+      score: 0,
+      id: beamId++
+      // assign unique id to beams
+    };
+    beams.push(start);
+  }
+  return beams;
+}
+async function decoderRunBeam(self, beam) {
+  let attnMaskData = new BigInt64Array(beam.output_token_ids.length).fill(1n);
+  let model_inputs = {
+    input_ids: beam.model_input_ids,
+    attention_mask: new Tensor(
+      "int64",
+      attnMaskData,
+      [1, attnMaskData.length]
+    ),
+    past_key_values: beam.prev_model_outputs?.past_key_values
+  };
+  let output = await self.forward(model_inputs);
+  beam.prev_model_outputs = output;
+  return output;
+}
+function decoderUpdatebeam(beam, newTokenId) {
+  beam.output_token_ids = [...beam.output_token_ids, newTokenId];
+  beam.model_input_ids = new Tensor("int64", [BigInt(newTokenId)], [1, 1]);
+}
+class PreTrainedModel extends Callable {
+  main_input_name = "input_ids";
+  /**
+   * Creates a new instance of the `PreTrainedModel` class.
+   * @param {Object} config The model configuration.
+   * @param {any} session session for the model.
+   */
+  constructor(config, session) {
+    super();
+    this.config = config;
+    this.session = session;
+    const modelName = MODEL_CLASS_TO_NAME_MAPPING.get(this.constructor);
+    const modelType = MODEL_TYPE_MAPPING.get(modelName);
+    this.can_generate = false;
+    this._runBeam = null;
+    this._getStartBeams = null;
+    this._updateBeam = null;
+    this._forward = null;
+    if (modelType === MODEL_TYPES.DecoderOnly) {
+      this.can_generate = true;
+      this._runBeam = decoderRunBeam;
+      this._getStartBeams = decoderStartBeams;
+      this._updateBeam = decoderUpdatebeam;
+      this._forward = decoderForward;
+    } else if (modelType === MODEL_TYPES.Seq2Seq || modelType === MODEL_TYPES.Vision2Seq) {
+      this.can_generate = true;
+      this._runBeam = seq2seqRunBeam;
+      this._getStartBeams = seq2seqStartBeams;
+      this._updateBeam = seq2seqUpdatebeam;
+      this._forward = seq2seqForward;
+    } else if (modelType === MODEL_TYPES.EncoderDecoder) {
+      this._forward = encoderForward;
+    } else {
+      this._forward = encoderForward;
+    }
+  }
+  /**
+  * Disposes of all the ONNX sessions that were created during inference.
+  * @returns {Promise<unknown[]>} An array of promises, one for each ONNX session that is being disposed.
+  * @todo Use https://developer.mozilla.org/en-US/docs/Web/JavaScript/Reference/Global_Objects/FinalizationRegistry
+  */
+  async dispose() {
+    const promises = [];
+    for (let key of Object.keys(this)) {
+      const item = this[key];
+      if (item instanceof InferenceSession) {
+        promises.push(item.handler.dispose());
+      }
+    }
+    return await Promise.all(promises);
+  }
+  /**
+   * Instantiate one of the model classes of the library from a pretrained model.
+   *
+   * The model class to instantiate is selected based on the `model_type` property of the config object
+   * (either passed as an argument or loaded from `pretrained_model_name_or_path` if possible)
+   *
+   * @param {string} pretrained_model_name_or_path The name or path of the pretrained model. Can be either:
+   * - A string, the *model id* of a pretrained model hosted inside a model repo on huggingface.co.
+   *   Valid model ids can be located at the root-level, like `bert-base-uncased`, or namespaced under a
+   *   user or organization name, like `dbmdz/bert-base-german-cased`.
+   * - A path to a *directory* containing model weights, e.g., `./my_model_directory/`.
+   * @param {import('./utils/hub.js').PretrainedOptions} options Additional options for loading the model.
+   *
+   * @returns {Promise<PreTrainedModel>} A new instance of the `PreTrainedModel` class.
+   */
+  static async from_pretrained(pretrained_model_name_or_path, {
+    quantized = true,
+    progress_callback = null,
+    config = null,
+    cache_dir = null,
+    local_files_only = false,
+    revision = "main",
+    model_file_name = null
+  } = {}) {
+    let options = {
+      quantized,
+      progress_callback,
+      config,
+      cache_dir,
+      local_files_only,
+      revision,
+      model_file_name
+    };
+    const modelName = MODEL_CLASS_TO_NAME_MAPPING.get(this);
+    const modelType = MODEL_TYPE_MAPPING.get(modelName);
+    let info;
+    if (modelType === MODEL_TYPES.DecoderOnly) {
+      info = await Promise.all([
+        AutoConfig.from_pretrained(pretrained_model_name_or_path, options),
+        constructSession(pretrained_model_name_or_path, options.model_file_name ?? "decoder_model_merged", options),
+        getModelJSON(pretrained_model_name_or_path, "generation_config.json", false, options)
+      ]);
+    } else if (modelType === MODEL_TYPES.Seq2Seq || modelType === MODEL_TYPES.Vision2Seq) {
+      info = await Promise.all([
+        AutoConfig.from_pretrained(pretrained_model_name_or_path, options),
+        constructSession(pretrained_model_name_or_path, "encoder_model", options),
+        constructSession(pretrained_model_name_or_path, "decoder_model_merged", options),
+        getModelJSON(pretrained_model_name_or_path, "generation_config.json", false, options)
+      ]);
+    } else if (modelType === MODEL_TYPES.MaskGeneration) {
+      info = await Promise.all([
+        AutoConfig.from_pretrained(pretrained_model_name_or_path, options),
+        constructSession(pretrained_model_name_or_path, "vision_encoder", options),
+        constructSession(pretrained_model_name_or_path, "prompt_encoder_mask_decoder", options)
+      ]);
+    } else if (modelType === MODEL_TYPES.EncoderDecoder) {
+      info = await Promise.all([
+        AutoConfig.from_pretrained(pretrained_model_name_or_path, options),
+        constructSession(pretrained_model_name_or_path, "encoder_model", options),
+        constructSession(pretrained_model_name_or_path, "decoder_model_merged", options)
+      ]);
+    } else {
+      if (modelType !== MODEL_TYPES.EncoderOnly) {
+        console.warn(`Model type for '${modelName ?? config?.model_type}' not found, assuming encoder-only architecture. Please report this at https://github.com/xenova/transformers.js/issues/new/choose.`);
+      }
+      info = await Promise.all([
+        AutoConfig.from_pretrained(pretrained_model_name_or_path, options),
+        constructSession(pretrained_model_name_or_path, options.model_file_name ?? "model", options)
+      ]);
+    }
+    return new this(...info);
+  }
+  /**
+   * Runs the model with the provided inputs
+   * @param {Object} model_inputs Object containing input tensors
+   * @returns {Promise<Object>} Object containing output tensors
+   */
+  async _call(model_inputs) {
+    return await this.forward(model_inputs);
+  }
+  /**
+   * Forward method for a pretrained model. If not overridden by a subclass, the correct forward method
+   * will be chosen based on the model type.
+   * @param {Object} model_inputs The input data to the model in the format specified in the ONNX model.
+   * @returns {Promise<Object>} The output data from the model in the format specified in the ONNX model.
+   * @throws {Error} This method must be implemented in subclasses.
+   */
+  async forward(model_inputs) {
+    return await this._forward(this, model_inputs);
+  }
+  /**
+   * @param {import('./utils/generation.js').GenerationConfigType} generation_config
+   * @param {number} input_ids_seq_length The starting sequence length for the input ids.
+   * @returns {LogitsProcessorList}
+   * @private
+   */
+  _get_logits_processor(generation_config, input_ids_seq_length, logits_processor = null) {
+    const processors = new LogitsProcessorList();
+    if (generation_config.repetition_penalty !== null && generation_config.repetition_penalty !== 1) {
+      processors.push(new RepetitionPenaltyLogitsProcessor(generation_config.repetition_penalty));
+    }
+    if (generation_config.no_repeat_ngram_size !== null && generation_config.no_repeat_ngram_size > 0) {
+      processors.push(new NoRepeatNGramLogitsProcessor(generation_config.no_repeat_ngram_size));
+    }
+    if (generation_config.bad_words_ids !== null) {
+      processors.push(new NoBadWordsLogitsProcessor(generation_config.bad_words_ids, generation_config.eos_token_id));
+    }
+    if (generation_config.min_length !== null && generation_config.eos_token_id !== null && generation_config.min_length > 0) {
+      processors.push(new MinLengthLogitsProcessor(generation_config.min_length, generation_config.eos_token_id));
+    }
+    if (generation_config.min_new_tokens !== null && generation_config.eos_token_id !== null && generation_config.min_new_tokens > 0) {
+      processors.push(new MinNewTokensLengthLogitsProcessor(
+        input_ids_seq_length,
+        generation_config.min_new_tokens,
+        generation_config.eos_token_id
+      ));
+    }
+    if (generation_config.forced_bos_token_id !== null) {
+      processors.push(new ForcedBOSTokenLogitsProcessor(generation_config.forced_bos_token_id));
+    }
+    if (generation_config.forced_eos_token_id !== null) {
+      processors.push(new ForcedEOSTokenLogitsProcessor(
+        generation_config.max_length,
+        generation_config.forced_eos_token_id
+      ));
+    }
+    if (generation_config.begin_suppress_tokens !== null) {
+      let begin_index = input_ids_seq_length > 1 || generation_config.forced_bos_token_id === null ? input_ids_seq_length : input_ids_seq_length + 1;
+      if (generation_config.forced_decoder_ids !== null) {
+        begin_index += generation_config.forced_decoder_ids[generation_config.forced_decoder_ids.length - 1][0];
+      }
+      processors.push(new SuppressTokensAtBeginLogitsProcessor(generation_config.begin_suppress_tokens, begin_index));
+    }
+    if (generation_config.forced_decoder_ids !== null) {
+      processors.push(new ForceTokensLogitsProcessor(generation_config.forced_decoder_ids));
+    }
+    if (logits_processor !== null) {
+      processors.extend(logits_processor);
+    }
+    return processors;
+  }
+  /**
+   * This function merges multiple generation configs together to form a final generation config to be used by the model for text generation.
+   * It first creates an empty `GenerationConfig` object, then it applies the model's own `generation_config` property to it. Finally, if a `generation_config` object was passed in the arguments, it overwrites the corresponding properties in the final config with those of the passed config object.
+   * @param {import('./utils/generation.js').GenerationConfigType} generation_config A `GenerationConfig` object containing generation parameters.
+   * @returns {import('./utils/generation.js').GenerationConfigType} The final generation config object to be used by the model for text generation.
+   */
+  _get_generation_config(generation_config) {
+    let gen_config = new GenerationConfig(this.config);
+    if ("generation_config" in this) {
+      Object.assign(gen_config, this.generation_config);
+    }
+    if (generation_config !== null) {
+      Object.assign(gen_config, generation_config);
+    }
+    return gen_config;
+  }
+  /**
+   * @typedef {import('./utils/maths.js').TypedArray} TypedArray
+   */
+  /**
+   * @typedef {{ sequences: Tensor, decoder_attentions: Tensor, cross_attentions: Tensor }} EncoderDecoderOutput
+   * @typedef {Object} DecoderOutput
+   *
+   * Generates text based on the given inputs and generation configuration using the model.
+   * @param {Tensor|Array|TypedArray} inputs An array of input token IDs.
+   * @param {Object|GenerationConfig|null} generation_config The generation configuration to use. If null, default configuration will be used.
+   * @param {Object|null} logits_processor An optional logits processor to use. If null, a new LogitsProcessorList instance will be created.
+   * @param {Object} options options
+   * @param {Object} [options.inputs_attention_mask=null] An optional attention mask for the inputs.
+   * @returns {Promise<number[][]|EncoderDecoderOutput|DecoderOutput>} An array of generated output sequences, where each sequence is an array of token IDs.
+   * @throws {Error} Throws an error if the inputs array is empty.
+   */
+  async generate(inputs, generation_config = null, logits_processor = null, {
+    inputs_attention_mask = null
+  } = {}) {
+    if (!this.can_generate) {
+      const modelName = MODEL_CLASS_TO_NAME_MAPPING.get(this.constructor);
+      let errorMessage = `The current model class (${modelName}) is not compatible with \`.generate()\`, as it doesn't have a language model head.`;
+      const modelType = this.config.model_type;
+      const possibleInfo = MODEL_WITH_LM_HEAD_MAPPING_NAMES.get(modelType) ?? MODEL_FOR_SEQ_TO_SEQ_CAUSAL_LM_MAPPING_NAMES.get(modelType) ?? MODEL_FOR_SPEECH_SEQ_2_SEQ_MAPPING_NAMES.get(modelType) ?? MODEL_FOR_VISION_2_SEQ_MAPPING_NAMES.get(modelType);
+      if (possibleInfo) {
+        errorMessage += ` Please use the following class instead: '${possibleInfo[0]}'`;
+      }
+      throw Error(errorMessage);
+    }
+    if (!(inputs instanceof Tensor) && !isTypedArray(inputs) && !Array.isArray(inputs)) {
+      throw Error(`\`inputs\` must be a Tensor, TypedArray, or Array, but is "${inputs.constructor.name}".`);
+    }
+    let input_ids_seq_length;
+    if (this.config.is_encoder_decoder) {
+      input_ids_seq_length = 0;
+    } else {
+      input_ids_seq_length = inputs instanceof Tensor ? inputs.dims.at(-1) : inputs.length;
+      if (input_ids_seq_length === 0) {
+        throw Error("Must supply a non-empty array of input token ids.");
+      }
+    }
+    generation_config = this._get_generation_config(generation_config);
+    logits_processor = logits_processor ?? new LogitsProcessorList();
+    logits_processor = this._get_logits_processor(
+      generation_config,
+      input_ids_seq_length,
+      logits_processor
+    );
+    let eos_token_ids = generation_config.eos_token_id;
+    if (eos_token_ids !== null && !Array.isArray(eos_token_ids)) {
+      eos_token_ids = [eos_token_ids];
+    }
+    let numOutputTokens = 1;
+    const maxOutputTokens = numOutputTokens + (generation_config.max_new_tokens ?? Infinity);
+    const useMaxLength = Number.isInteger(generation_config.max_length) && (generation_config.max_new_tokens ?? null) === null;
+    let sampler = Sampler.getSampler(generation_config);
+    let beams = this.getStartBeams(inputs, generation_config, numOutputTokens, inputs_attention_mask);
+    while (beams.some((x) => !x.done) && numOutputTokens < maxOutputTokens) {
+      let newest_beams = [];
+      for (let beam of beams) {
+        if (beam.done) {
+          newest_beams.push(beam);
+          continue;
+        }
+        if (useMaxLength && beam.output_token_ids.length >= generation_config.max_length) {
+          beam.done = true;
+          newest_beams.push(beam);
+          continue;
+        }
+        let output = await this.runBeam(beam);
+        if (generation_config.output_attentions) {
+          this.addAttentionsToBeam(beam, output);
+        }
+        if (generation_config.output_scores) ;
+        let logits = output.logits.slice(null, -1, null);
+        logits_processor(beam.output_token_ids, logits);
+        let sampledTokens = sampler(logits);
+        for (let [newTokenId, logProb] of sampledTokens) {
+          let newBeam = { ...beam };
+          this.updateBeam(newBeam, newTokenId);
+          newBeam.score += logProb;
+          if (eos_token_ids && eos_token_ids.includes(newTokenId)) {
+            newBeam.done = true;
+          }
+          newest_beams.push(newBeam);
+        }
+      }
+      ++numOutputTokens;
+      newest_beams = this.groupBeams(newest_beams).map(
+        (group) => group.sort((a, b) => b.score - a.score).slice(0, generation_config.num_beams)
+        // remove outside beam width
+      );
+      beams = newest_beams.flat();
+      if (generation_config.callback_function) {
+        generation_config.callback_function(beams);
+      }
+    }
+    const groupedBeams = this.groupBeams(beams);
+    const getFlattened = (key) => groupedBeams.map(
+      (batch) => {
+        if (generation_config.num_return_sequences > 1) {
+          return batch.slice(0, generation_config.num_return_sequences).map((x) => x[key]);
+        } else {
+          return [batch[0][key]];
+        }
+      }
+    ).flat();
+    const sequences = getFlattened("output_token_ids");
+    if (generation_config.return_dict_in_generate) {
+      const decoder_attentions = getFlattened("decoder_attentions");
+      const cross_attentions = getFlattened("cross_attentions");
+      return {
+        sequences,
+        decoder_attentions,
+        cross_attentions
+      };
+    } else {
+      return sequences;
+    }
+  }
+  /**
+   * Helper function to add attentions to beam
+   * @param {Object} beam
+   * @param {Object} output
+   * @private
+   */
+  addAttentionsToBeam(beam, output) {
+    if (this.config.is_encoder_decoder) {
+      if (!output.cross_attentions || output.cross_attentions.length === 0) {
+        throw Error(
+          "`output_attentions` is true, but the model did not produce cross-attentions. This is most likely because the model was not exported with `output_attentions=True`."
+        );
+      }
+      if (!beam.cross_attentions) {
+        beam.cross_attentions = [];
+      }
+      beam.cross_attentions.push(output.cross_attentions);
+    }
+    if (!output.decoder_attentions || output.decoder_attentions.length === 0) {
+      throw Error(
+        "`output_attentions` is true, but the model did not produce decoder-attentions. This is most likely because the model was not exported with `output_attentions=True`."
+      );
+    }
+    if (!beam.decoder_attentions) {
+      beam.decoder_attentions = [];
+    }
+    beam.decoder_attentions.push(output.decoder_attentions);
+  }
+  /**
+   * Groups an array of beam objects by their ids.
+   *
+   * @param {Array} beams The array of beam objects to group.
+   * @returns {Array} An array of arrays, where each inner array contains beam objects with the same id.
+   */
+  groupBeams(beams) {
+    const groups = /* @__PURE__ */ Object.create(null);
+    for (const obj of beams) {
+      if (groups[obj.id] === void 0) {
+        groups[obj.id] = [obj];
+      } else {
+        groups[obj.id].push(obj);
+      }
+    }
+    return Object.values(groups);
+  }
+  /**
+   * Returns an object containing past key values from the given decoder results object.
+   *
+   * @param {Object} decoderResults The decoder results object.
+   * @param {Object} pastKeyValues The previous past key values.
+   * @returns {Object} An object containing past key values.
+   */
+  getPastKeyValues(decoderResults, pastKeyValues) {
+    const pkvs = /* @__PURE__ */ Object.create(null);
+    for (const name in decoderResults) {
+      if (name.startsWith("present")) {
+        let newName = name.replace("present", "past_key_values");
+        if (pastKeyValues && name.includes("encoder")) {
+          pkvs[newName] = pastKeyValues[newName];
+        } else {
+          pkvs[newName] = decoderResults[name];
+        }
+      }
+    }
+    return pkvs;
+  }
+  /**
+   * Returns an object containing attentions from the given decoder results object.
+   *
+   * @param {Object} decoderResults The decoder results object.
+   * @returns {Object} An object containing attentions.
+   */
+  getAttentions(decoderResults) {
+    const attns = /* @__PURE__ */ Object.create(null);
+    for (const attnName of ["cross_attentions", "decoder_attentions"]) {
+      const result = [];
+      for (const name in decoderResults) {
+        if (name.startsWith(attnName)) {
+          const index = name.split(".").pop();
+          result[index] = decoderResults[name];
+        }
+      }
+      attns[attnName] = result;
+    }
+    return attns;
+  }
+  /**
+   * Adds past key values to the decoder feeds object. If pastKeyValues is null, creates new tensors for past key values.
+   *
+   * @param {Object} decoderFeeds The decoder feeds object to add past key values to.
+   * @param {Object} pastKeyValues An object containing past key values.
+   */
+  addPastKeyValues(decoderFeeds, pastKeyValues) {
+    if (pastKeyValues) {
+      Object.assign(decoderFeeds, pastKeyValues);
+    } else {
+      const batch_size = 1;
+      if (this.config.is_encoder_decoder && (this.add_encoder_pkv ?? true)) {
+        let encoder_dims = [batch_size, this.num_encoder_heads, 0, this.encoder_dim_kv];
+        let decoder_dims = [batch_size, this.num_decoder_heads, 0, this.decoder_dim_kv];
+        for (let i = 0; i < this.num_decoder_layers; ++i) {
+          decoderFeeds[`past_key_values.${i}.encoder.key`] = new Tensor("float32", [], encoder_dims);
+          decoderFeeds[`past_key_values.${i}.encoder.value`] = new Tensor("float32", [], encoder_dims);
+          decoderFeeds[`past_key_values.${i}.decoder.key`] = new Tensor("float32", [], decoder_dims);
+          decoderFeeds[`past_key_values.${i}.decoder.value`] = new Tensor("float32", [], decoder_dims);
+        }
+      } else if (this.config.model_type === "falcon") {
+        let dims = [batch_size * this.num_heads, 0, this.dim_kv];
+        for (let i = 0; i < this.num_layers; ++i) {
+          decoderFeeds[`past_key_values.${i}.key`] = new Tensor("float32", [], dims);
+          decoderFeeds[`past_key_values.${i}.value`] = new Tensor("float32", [], dims);
+        }
+      } else if (this.config.multi_query) {
+        let dims = [batch_size * this.num_heads, 0, 2 * this.dim_kv];
+        for (let i = 0; i < this.num_layers; ++i) {
+          decoderFeeds[`past_key_values.${i}.key_value`] = new Tensor("float32", [], dims);
+        }
+      } else if (this.config.model_type === "bloom") {
+        let keyDims = [batch_size * this.num_heads, this.dim_kv, 0];
+        let valueDims = [batch_size * this.num_heads, 0, this.dim_kv];
+        for (let i = 0; i < this.num_layers; ++i) {
+          decoderFeeds[`past_key_values.${i}.key`] = new Tensor("float32", [], keyDims);
+          decoderFeeds[`past_key_values.${i}.value`] = new Tensor("float32", [], valueDims);
+        }
+      } else {
+        let dims = [batch_size, this.num_heads, 0, this.dim_kv];
+        for (let i = 0; i < this.num_layers; ++i) {
+          decoderFeeds[`past_key_values.${i}.key`] = new Tensor("float32", [], dims);
+          decoderFeeds[`past_key_values.${i}.value`] = new Tensor("float32", [], dims);
+        }
+      }
+    }
+  }
+  /**
+   * Initializes and returns the beam for text generation task
+   * @param {Tensor} inputTokenIds The input token ids.
+   * @param {Object} generation_config The generation config.
+   * @param {number} numOutputTokens The number of tokens to be generated.
+   * @param {Tensor} inputs_attention_mask Optional input attention mask.
+   * @returns {any} A Beam object representing the initialized beam.
+   * @private
+   */
+  getStartBeams(inputTokenIds, generation_config, numOutputTokens, inputs_attention_mask) {
+    return this._getStartBeams(this, inputTokenIds, generation_config, numOutputTokens, inputs_attention_mask);
+  }
+  /**
+   * Runs a single step of the beam search generation algorithm.
+   * @param {any} beam The current beam being generated.
+   * @returns {Promise<any>} The updated beam after a single generation step.
+   * @private
+   */
+  async runBeam(beam) {
+    return await this._runBeam(this, beam);
+  }
+  /**
+   * Update a beam with a new token ID.
+   * @param {Object} beam The beam to update.
+   * @param {number} newTokenId The new token ID to add to the beam's output.
+   * @private
+   */
+  updateBeam(beam, newTokenId) {
+    return this._updateBeam(beam, newTokenId);
+  }
+}
+class ModelOutput {
+}
+class BertPreTrainedModel extends PreTrainedModel {
+}
+class BertModel extends BertPreTrainedModel {
+}
+class BertForMaskedLM extends BertPreTrainedModel {
+  /**
+   * Calls the model on new inputs.
+   *
+   * @param {Object} model_inputs The inputs to the model.
+   * @returns {Promise<MaskedLMOutput>} An object containing the model's output logits for masked language modeling.
+   */
+  async _call(model_inputs) {
+    return new MaskedLMOutput(await super._call(model_inputs));
+  }
+}
+class BertForSequenceClassification extends BertPreTrainedModel {
+  /**
+   * Calls the model on new inputs.
+   *
+   * @param {Object} model_inputs The inputs to the model.
+   * @returns {Promise<SequenceClassifierOutput>} An object containing the model's output logits for sequence classification.
+   */
+  async _call(model_inputs) {
+    return new SequenceClassifierOutput(await super._call(model_inputs));
+  }
+}
+class BertForTokenClassification extends BertPreTrainedModel {
+  /**
+   * Calls the model on new inputs.
+   *
+   * @param {Object} model_inputs The inputs to the model.
+   * @returns {Promise<TokenClassifierOutput>} An object containing the model's output logits for token classification.
+   */
+  async _call(model_inputs) {
+    return new TokenClassifierOutput(await super._call(model_inputs));
+  }
+}
+class BertForQuestionAnswering extends BertPreTrainedModel {
+  /**
+   * Calls the model on new inputs.
+   *
+   * @param {Object} model_inputs The inputs to the model.
+   * @returns {Promise<QuestionAnsweringModelOutput>} An object containing the model's output logits for question answering.
+   */
+  async _call(model_inputs) {
+    return new QuestionAnsweringModelOutput(await super._call(model_inputs));
+  }
+}
+class NomicBertPreTrainedModel extends PreTrainedModel {
+}
+class NomicBertModel extends NomicBertPreTrainedModel {
+}
+class RoFormerPreTrainedModel extends PreTrainedModel {
+}
+class RoFormerModel extends RoFormerPreTrainedModel {
+}
+class RoFormerForMaskedLM extends RoFormerPreTrainedModel {
+  /**
+   * Calls the model on new inputs.
+   *
+   * @param {Object} model_inputs The inputs to the model.
+   * @returns {Promise<MaskedLMOutput>} An object containing the model's output logits for masked language modeling.
+   */
+  async _call(model_inputs) {
+    return new MaskedLMOutput(await super._call(model_inputs));
+  }
+}
+class RoFormerForSequenceClassification extends RoFormerPreTrainedModel {
+  /**
+   * Calls the model on new inputs.
+   *
+   * @param {Object} model_inputs The inputs to the model.
+   * @returns {Promise<SequenceClassifierOutput>} An object containing the model's output logits for sequence classification.
+   */
+  async _call(model_inputs) {
+    return new SequenceClassifierOutput(await super._call(model_inputs));
+  }
+}
+class RoFormerForTokenClassification extends RoFormerPreTrainedModel {
+  /**
+   * Calls the model on new inputs.
+   *
+   * @param {Object} model_inputs The inputs to the model.
+   * @returns {Promise<TokenClassifierOutput>} An object containing the model's output logits for token classification.
+   */
+  async _call(model_inputs) {
+    return new TokenClassifierOutput(await super._call(model_inputs));
+  }
+}
+class RoFormerForQuestionAnswering extends RoFormerPreTrainedModel {
+  /**
+   * Calls the model on new inputs.
+   *
+   * @param {Object} model_inputs The inputs to the model.
+   * @returns {Promise<QuestionAnsweringModelOutput>} An object containing the model's output logits for question answering.
+   */
+  async _call(model_inputs) {
+    return new QuestionAnsweringModelOutput(await super._call(model_inputs));
+  }
+}
+class ConvBertPreTrainedModel extends PreTrainedModel {
+}
+class ConvBertModel extends ConvBertPreTrainedModel {
+}
+class ConvBertForMaskedLM extends ConvBertPreTrainedModel {
+  /**
+   * Calls the model on new inputs.
+   *
+   * @param {Object} model_inputs The inputs to the model.
+   * @returns {Promise<MaskedLMOutput>} An object containing the model's output logits for masked language modeling.
+   */
+  async _call(model_inputs) {
+    return new MaskedLMOutput(await super._call(model_inputs));
+  }
+}
+class ConvBertForSequenceClassification extends ConvBertPreTrainedModel {
+  /**
+   * Calls the model on new inputs.
+   *
+   * @param {Object} model_inputs The inputs to the model.
+   * @returns {Promise<SequenceClassifierOutput>} An object containing the model's output logits for sequence classification.
+   */
+  async _call(model_inputs) {
+    return new SequenceClassifierOutput(await super._call(model_inputs));
+  }
+}
+class ConvBertForTokenClassification extends ConvBertPreTrainedModel {
+  /**
+   * Calls the model on new inputs.
+   *
+   * @param {Object} model_inputs The inputs to the model.
+   * @returns {Promise<TokenClassifierOutput>} An object containing the model's output logits for token classification.
+   */
+  async _call(model_inputs) {
+    return new TokenClassifierOutput(await super._call(model_inputs));
+  }
+}
+class ConvBertForQuestionAnswering extends ConvBertPreTrainedModel {
+  /**
+   * Calls the model on new inputs.
+   *
+   * @param {Object} model_inputs The inputs to the model.
+   * @returns {Promise<QuestionAnsweringModelOutput>} An object containing the model's output logits for question answering.
+   */
+  async _call(model_inputs) {
+    return new QuestionAnsweringModelOutput(await super._call(model_inputs));
+  }
+}
+class ElectraPreTrainedModel extends PreTrainedModel {
+}
+class ElectraModel extends ElectraPreTrainedModel {
+}
+class ElectraForMaskedLM extends ElectraPreTrainedModel {
+  /**
+   * Calls the model on new inputs.
+   *
+   * @param {Object} model_inputs The inputs to the model.
+   * @returns {Promise<MaskedLMOutput>} An object containing the model's output logits for masked language modeling.
+   */
+  async _call(model_inputs) {
+    return new MaskedLMOutput(await super._call(model_inputs));
+  }
+}
+class ElectraForSequenceClassification extends ElectraPreTrainedModel {
+  /**
+   * Calls the model on new inputs.
+   *
+   * @param {Object} model_inputs The inputs to the model.
+   * @returns {Promise<SequenceClassifierOutput>} An object containing the model's output logits for sequence classification.
+   */
+  async _call(model_inputs) {
+    return new SequenceClassifierOutput(await super._call(model_inputs));
+  }
+}
+class ElectraForTokenClassification extends ElectraPreTrainedModel {
+  /**
+   * Calls the model on new inputs.
+   *
+   * @param {Object} model_inputs The inputs to the model.
+   * @returns {Promise<TokenClassifierOutput>} An object containing the model's output logits for token classification.
+   */
+  async _call(model_inputs) {
+    return new TokenClassifierOutput(await super._call(model_inputs));
+  }
+}
+class ElectraForQuestionAnswering extends ElectraPreTrainedModel {
+  /**
+   * Calls the model on new inputs.
+   *
+   * @param {Object} model_inputs The inputs to the model.
+   * @returns {Promise<QuestionAnsweringModelOutput>} An object containing the model's output logits for question answering.
+   */
+  async _call(model_inputs) {
+    return new QuestionAnsweringModelOutput(await super._call(model_inputs));
+  }
+}
+class CamembertPreTrainedModel extends PreTrainedModel {
+}
+class CamembertModel extends CamembertPreTrainedModel {
+}
+class CamembertForMaskedLM extends CamembertPreTrainedModel {
+  /**
+   * Calls the model on new inputs.
+   *
+   * @param {Object} model_inputs The inputs to the model.
+   * @returns {Promise<MaskedLMOutput>} An object containing the model's output logits for masked language modeling.
+   */
+  async _call(model_inputs) {
+    return new MaskedLMOutput(await super._call(model_inputs));
+  }
+}
+class CamembertForSequenceClassification extends CamembertPreTrainedModel {
+  /**
+   * Calls the model on new inputs.
+   *
+   * @param {Object} model_inputs The inputs to the model.
+   * @returns {Promise<SequenceClassifierOutput>} An object containing the model's output logits for sequence classification.
+   */
+  async _call(model_inputs) {
+    return new SequenceClassifierOutput(await super._call(model_inputs));
+  }
+}
+class CamembertForTokenClassification extends CamembertPreTrainedModel {
+  /**
+   * Calls the model on new inputs.
+   *
+   * @param {Object} model_inputs The inputs to the model.
+   * @returns {Promise<TokenClassifierOutput>} An object containing the model's output logits for token classification.
+   */
+  async _call(model_inputs) {
+    return new TokenClassifierOutput(await super._call(model_inputs));
+  }
+}
+class CamembertForQuestionAnswering extends CamembertPreTrainedModel {
+  /**
+   * Calls the model on new inputs.
+   *
+   * @param {Object} model_inputs The inputs to the model.
+   * @returns {Promise<QuestionAnsweringModelOutput>} An object containing the model's output logits for question answering.
+   */
+  async _call(model_inputs) {
+    return new QuestionAnsweringModelOutput(await super._call(model_inputs));
+  }
+}
+class DebertaPreTrainedModel extends PreTrainedModel {
+}
+class DebertaModel extends DebertaPreTrainedModel {
+}
+class DebertaForMaskedLM extends DebertaPreTrainedModel {
+  /**
+   * Calls the model on new inputs.
+   *
+   * @param {Object} model_inputs The inputs to the model.
+   * @returns {Promise<MaskedLMOutput>} An object containing the model's output logits for masked language modeling.
+   */
+  async _call(model_inputs) {
+    return new MaskedLMOutput(await super._call(model_inputs));
+  }
+}
+class DebertaForSequenceClassification extends DebertaPreTrainedModel {
+  /**
+   * Calls the model on new inputs.
+   *
+   * @param {Object} model_inputs The inputs to the model.
+   * @returns {Promise<SequenceClassifierOutput>} An object containing the model's output logits for sequence classification.
+   */
+  async _call(model_inputs) {
+    return new SequenceClassifierOutput(await super._call(model_inputs));
+  }
+}
+class DebertaForTokenClassification extends DebertaPreTrainedModel {
+  /**
+   * Calls the model on new inputs.
+   *
+   * @param {Object} model_inputs The inputs to the model.
+   * @returns {Promise<TokenClassifierOutput>} An object containing the model's output logits for token classification.
+   */
+  async _call(model_inputs) {
+    return new TokenClassifierOutput(await super._call(model_inputs));
+  }
+}
+class DebertaForQuestionAnswering extends DebertaPreTrainedModel {
+  /**
+   * Calls the model on new inputs.
+   *
+   * @param {Object} model_inputs The inputs to the model.
+   * @returns {Promise<QuestionAnsweringModelOutput>} An object containing the model's output logits for question answering.
+   */
+  async _call(model_inputs) {
+    return new QuestionAnsweringModelOutput(await super._call(model_inputs));
+  }
+}
+class DebertaV2PreTrainedModel extends PreTrainedModel {
+}
+class DebertaV2Model extends DebertaV2PreTrainedModel {
+}
+class DebertaV2ForMaskedLM extends DebertaV2PreTrainedModel {
+  /**
+   * Calls the model on new inputs.
+   *
+   * @param {Object} model_inputs The inputs to the model.
+   * @returns {Promise<MaskedLMOutput>} An object containing the model's output logits for masked language modeling.
+   */
+  async _call(model_inputs) {
+    return new MaskedLMOutput(await super._call(model_inputs));
+  }
+}
+class DebertaV2ForSequenceClassification extends DebertaV2PreTrainedModel {
+  /**
+   * Calls the model on new inputs.
+   *
+   * @param {Object} model_inputs The inputs to the model.
+   * @returns {Promise<SequenceClassifierOutput>} An object containing the model's output logits for sequence classification.
+   */
+  async _call(model_inputs) {
+    return new SequenceClassifierOutput(await super._call(model_inputs));
+  }
+}
+class DebertaV2ForTokenClassification extends DebertaV2PreTrainedModel {
+  /**
+   * Calls the model on new inputs.
+   *
+   * @param {Object} model_inputs The inputs to the model.
+   * @returns {Promise<TokenClassifierOutput>} An object containing the model's output logits for token classification.
+   */
+  async _call(model_inputs) {
+    return new TokenClassifierOutput(await super._call(model_inputs));
+  }
+}
+class DebertaV2ForQuestionAnswering extends DebertaV2PreTrainedModel {
+  /**
+   * Calls the model on new inputs.
+   *
+   * @param {Object} model_inputs The inputs to the model.
+   * @returns {Promise<QuestionAnsweringModelOutput>} An object containing the model's output logits for question answering.
+   */
+  async _call(model_inputs) {
+    return new QuestionAnsweringModelOutput(await super._call(model_inputs));
+  }
+}
+class DistilBertPreTrainedModel extends PreTrainedModel {
+}
+class DistilBertModel extends DistilBertPreTrainedModel {
+}
+class DistilBertForSequenceClassification extends DistilBertPreTrainedModel {
+  /**
+   * Calls the model on new inputs.
+   *
+   * @param {Object} model_inputs The inputs to the model.
+   * @returns {Promise<SequenceClassifierOutput>} An object containing the model's output logits for sequence classification.
+   */
+  async _call(model_inputs) {
+    return new SequenceClassifierOutput(await super._call(model_inputs));
+  }
+}
+class DistilBertForTokenClassification extends DistilBertPreTrainedModel {
+  /**
+   * Calls the model on new inputs.
+   *
+   * @param {Object} model_inputs The inputs to the model.
+   * @returns {Promise<TokenClassifierOutput>} An object containing the model's output logits for token classification.
+   */
+  async _call(model_inputs) {
+    return new TokenClassifierOutput(await super._call(model_inputs));
+  }
+}
+class DistilBertForQuestionAnswering extends DistilBertPreTrainedModel {
+  /**
+   * Calls the model on new inputs.
+   *
+   * @param {Object} model_inputs The inputs to the model.
+   * @returns {Promise<QuestionAnsweringModelOutput>} An object containing the model's output logits for question answering.
+   */
+  async _call(model_inputs) {
+    return new QuestionAnsweringModelOutput(await super._call(model_inputs));
+  }
+}
+class DistilBertForMaskedLM extends DistilBertPreTrainedModel {
+  /**
+   * Calls the model on new inputs.
+   *
+   * @param {Object} model_inputs The inputs to the model.
+   * @returns {Promise<MaskedLMOutput>} returned object
+   */
+  async _call(model_inputs) {
+    return new MaskedLMOutput(await super._call(model_inputs));
+  }
+}
+class EsmPreTrainedModel extends PreTrainedModel {
+}
+class EsmModel extends EsmPreTrainedModel {
+}
+class EsmForMaskedLM extends EsmPreTrainedModel {
+  /**
+   * Calls the model on new inputs.
+   *
+   * @param {Object} model_inputs The inputs to the model.
+   * @returns {Promise<MaskedLMOutput>} An object containing the model's output logits for masked language modeling.
+   */
+  async _call(model_inputs) {
+    return new MaskedLMOutput(await super._call(model_inputs));
+  }
+}
+class EsmForSequenceClassification extends EsmPreTrainedModel {
+  /**
+   * Calls the model on new inputs.
+   *
+   * @param {Object} model_inputs The inputs to the model.
+   * @returns {Promise<SequenceClassifierOutput>} An object containing the model's output logits for sequence classification.
+   */
+  async _call(model_inputs) {
+    return new SequenceClassifierOutput(await super._call(model_inputs));
+  }
+}
+class EsmForTokenClassification extends EsmPreTrainedModel {
+  /**
+   * Calls the model on new inputs.
+   *
+   * @param {Object} model_inputs The inputs to the model.
+   * @returns {Promise<TokenClassifierOutput>} An object containing the model's output logits for token classification.
+   */
+  async _call(model_inputs) {
+    return new TokenClassifierOutput(await super._call(model_inputs));
+  }
+}
+class MobileBertPreTrainedModel extends PreTrainedModel {
+}
+class MobileBertModel extends MobileBertPreTrainedModel {
+}
+class MobileBertForMaskedLM extends MobileBertPreTrainedModel {
+  /**
+   * Calls the model on new inputs.
+   *
+   * @param {Object} model_inputs The inputs to the model.
+   * @returns {Promise<MaskedLMOutput>} returned object
+   */
+  async _call(model_inputs) {
+    return new MaskedLMOutput(await super._call(model_inputs));
+  }
+}
+class MobileBertForSequenceClassification extends MobileBertPreTrainedModel {
+  /**
+   * Calls the model on new inputs.
+   *
+   * @param {Object} model_inputs The inputs to the model.
+   * @returns {Promise<SequenceClassifierOutput>} returned object
+   */
+  async _call(model_inputs) {
+    return new SequenceClassifierOutput(await super._call(model_inputs));
+  }
+}
+class MobileBertForQuestionAnswering extends MobileBertPreTrainedModel {
+  /**
+   * Calls the model on new inputs.
+   *
+   * @param {Object} model_inputs The inputs to the model.
+   * @returns {Promise<QuestionAnsweringModelOutput>} returned object
+   */
+  async _call(model_inputs) {
+    return new QuestionAnsweringModelOutput(await super._call(model_inputs));
+  }
+}
+class MPNetPreTrainedModel extends PreTrainedModel {
+}
+class MPNetModel extends MPNetPreTrainedModel {
+}
+class MPNetForMaskedLM extends MPNetPreTrainedModel {
+  /**
+   * Calls the model on new inputs.
+   *
+   * @param {Object} model_inputs The inputs to the model.
+   * @returns {Promise<MaskedLMOutput>} An object containing the model's output logits for masked language modeling.
+   */
+  async _call(model_inputs) {
+    return new MaskedLMOutput(await super._call(model_inputs));
+  }
+}
+class MPNetForSequenceClassification extends MPNetPreTrainedModel {
+  /**
+   * Calls the model on new inputs.
+   *
+   * @param {Object} model_inputs The inputs to the model.
+   * @returns {Promise<SequenceClassifierOutput>} An object containing the model's output logits for sequence classification.
+   */
+  async _call(model_inputs) {
+    return new SequenceClassifierOutput(await super._call(model_inputs));
+  }
+}
+class MPNetForTokenClassification extends MPNetPreTrainedModel {
+  /**
+   * Calls the model on new inputs.
+   *
+   * @param {Object} model_inputs The inputs to the model.
+   * @returns {Promise<TokenClassifierOutput>} An object containing the model's output logits for token classification.
+   */
+  async _call(model_inputs) {
+    return new TokenClassifierOutput(await super._call(model_inputs));
+  }
+}
+class MPNetForQuestionAnswering extends MPNetPreTrainedModel {
+  /**
+   * Calls the model on new inputs.
+   *
+   * @param {Object} model_inputs The inputs to the model.
+   * @returns {Promise<QuestionAnsweringModelOutput>} An object containing the model's output logits for question answering.
+   */
+  async _call(model_inputs) {
+    return new QuestionAnsweringModelOutput(await super._call(model_inputs));
+  }
+}
+class SqueezeBertPreTrainedModel extends PreTrainedModel {
+}
+class SqueezeBertModel extends SqueezeBertPreTrainedModel {
+}
+class SqueezeBertForMaskedLM extends SqueezeBertPreTrainedModel {
+  /**
+   * Calls the model on new inputs.
+   *
+   * @param {Object} model_inputs The inputs to the model.
+   * @returns {Promise<MaskedLMOutput>} returned object
+   */
+  async _call(model_inputs) {
+    return new MaskedLMOutput(await super._call(model_inputs));
+  }
+}
+class SqueezeBertForSequenceClassification extends SqueezeBertPreTrainedModel {
+  /**
+   * Calls the model on new inputs.
+   *
+   * @param {Object} model_inputs The inputs to the model.
+   * @returns {Promise<SequenceClassifierOutput>} returned object
+   */
+  async _call(model_inputs) {
+    return new SequenceClassifierOutput(await super._call(model_inputs));
+  }
+}
+class SqueezeBertForQuestionAnswering extends SqueezeBertPreTrainedModel {
+  /**
+   * Calls the model on new inputs.
+   *
+   * @param {Object} model_inputs The inputs to the model.
+   * @returns {Promise<QuestionAnsweringModelOutput>} returned object
+   */
+  async _call(model_inputs) {
+    return new QuestionAnsweringModelOutput(await super._call(model_inputs));
+  }
+}
+class AlbertPreTrainedModel extends PreTrainedModel {
+}
+class AlbertModel extends AlbertPreTrainedModel {
+}
+class AlbertForSequenceClassification extends AlbertPreTrainedModel {
+  /**
+   * Calls the model on new inputs.
+   *
+   * @param {Object} model_inputs The inputs to the model.
+   * @returns {Promise<SequenceClassifierOutput>} returned object
+   */
+  async _call(model_inputs) {
+    return new SequenceClassifierOutput(await super._call(model_inputs));
+  }
+}
+class AlbertForQuestionAnswering extends AlbertPreTrainedModel {
+  /**
+   * Calls the model on new inputs.
+   *
+   * @param {Object} model_inputs The inputs to the model.
+   * @returns {Promise<QuestionAnsweringModelOutput>} returned object
+   */
+  async _call(model_inputs) {
+    return new QuestionAnsweringModelOutput(await super._call(model_inputs));
+  }
+}
+class AlbertForMaskedLM extends AlbertPreTrainedModel {
+  /**
+   * Calls the model on new inputs.
+   *
+   * @param {Object} model_inputs The inputs to the model.
+   * @returns {Promise<MaskedLMOutput>} returned object
+   */
+  async _call(model_inputs) {
+    return new MaskedLMOutput(await super._call(model_inputs));
+  }
+}
+class T5PreTrainedModel extends PreTrainedModel {
+}
+class T5Model extends T5PreTrainedModel {
+}
+class T5ForConditionalGeneration extends T5PreTrainedModel {
+  /**
+   * Creates a new instance of the `T5ForConditionalGeneration` class.
+   * @param {Object} config The model configuration.
+   * @param {any} session session for the model.
+   * @param {any} decoder_merged_session session for the decoder.
+   * @param {GenerationConfig} generation_config The generation configuration.
+   */
+  constructor(config, session, decoder_merged_session, generation_config) {
+    super(config, session);
+    this.decoder_merged_session = decoder_merged_session;
+    this.generation_config = generation_config;
+    this.num_decoder_layers = this.config.num_decoder_layers;
+    this.num_decoder_heads = this.config.num_heads;
+    this.decoder_dim_kv = this.config.d_kv;
+    this.num_encoder_layers = this.config.num_layers;
+    this.num_encoder_heads = this.config.num_heads;
+    this.encoder_dim_kv = this.config.d_kv;
+  }
+}
+class LongT5PreTrainedModel extends PreTrainedModel {
+}
+class LongT5Model extends LongT5PreTrainedModel {
+}
+class LongT5ForConditionalGeneration extends LongT5PreTrainedModel {
+  /**
+   * Creates a new instance of the `LongT5ForConditionalGeneration` class.
+   * @param {Object} config The model configuration.
+   * @param {any} session session for the model.
+   * @param {any} decoder_merged_session session for the decoder.
+   * @param {GenerationConfig} generation_config The generation configuration.
+   */
+  constructor(config, session, decoder_merged_session, generation_config) {
+    super(config, session);
+    this.decoder_merged_session = decoder_merged_session;
+    this.generation_config = generation_config;
+    this.num_decoder_layers = this.config.num_decoder_layers;
+    this.num_decoder_heads = this.config.num_heads;
+    this.decoder_dim_kv = this.config.d_kv;
+    this.num_encoder_layers = this.config.num_layers;
+    this.num_encoder_heads = this.config.num_heads;
+    this.encoder_dim_kv = this.config.d_kv;
+  }
+}
+class MT5PreTrainedModel extends PreTrainedModel {
+}
+class MT5Model extends MT5PreTrainedModel {
+}
+class MT5ForConditionalGeneration extends MT5PreTrainedModel {
+  /**
+   * Creates a new instance of the `MT5ForConditionalGeneration` class.
+   * @param {any} config The model configuration.
+   * @param {any} session The ONNX session containing the encoder weights.
+   * @param {any} decoder_merged_session The ONNX session containing the merged decoder weights.
+   * @param {GenerationConfig} generation_config The generation configuration.
+   */
+  constructor(config, session, decoder_merged_session, generation_config) {
+    super(config, session);
+    this.decoder_merged_session = decoder_merged_session;
+    this.generation_config = generation_config;
+    this.num_decoder_layers = this.config.num_decoder_layers;
+    this.num_decoder_heads = this.config.num_heads;
+    this.decoder_dim_kv = this.config.d_kv;
+    this.num_encoder_layers = this.config.num_layers;
+    this.num_encoder_heads = this.config.num_heads;
+    this.encoder_dim_kv = this.config.d_kv;
+  }
+}
+class BartPretrainedModel extends PreTrainedModel {
+}
+class BartModel extends BartPretrainedModel {
+}
+class BartForConditionalGeneration extends BartPretrainedModel {
+  /**
+   * Creates a new instance of the `BartForConditionalGeneration` class.
+   * @param {Object} config The configuration object for the Bart model.
+   * @param {Object} session The ONNX session used to execute the model.
+   * @param {Object} decoder_merged_session The ONNX session used to execute the decoder.
+   * @param {Object} generation_config The generation configuration object.
+   */
+  constructor(config, session, decoder_merged_session, generation_config) {
+    super(config, session);
+    this.decoder_merged_session = decoder_merged_session;
+    this.generation_config = generation_config;
+    this.num_decoder_layers = this.config.decoder_layers;
+    this.num_decoder_heads = this.config.decoder_attention_heads;
+    this.decoder_dim_kv = this.config.d_model / this.num_decoder_heads;
+    this.num_encoder_layers = this.config.encoder_layers;
+    this.num_encoder_heads = this.config.encoder_attention_heads;
+    this.encoder_dim_kv = this.config.d_model / this.num_encoder_heads;
+  }
+}
+class BartForSequenceClassification extends BartPretrainedModel {
+  /**
+   * Calls the model on new inputs.
+   *
+   * @param {Object} model_inputs The inputs to the model.
+   * @returns {Promise<SequenceClassifierOutput>} An object containing the model's output logits for sequence classification.
+   */
+  async _call(model_inputs) {
+    return new SequenceClassifierOutput(await super._call(model_inputs));
+  }
+}
+class MBartPreTrainedModel extends PreTrainedModel {
+}
+class MBartModel extends MBartPreTrainedModel {
+}
+class MBartForConditionalGeneration extends MBartPreTrainedModel {
+  /**
+   * Creates a new instance of the `MBartForConditionalGeneration` class.
+   * @param {Object} config The configuration object for the Bart model.
+   * @param {Object} session The ONNX session used to execute the model.
+   * @param {Object} decoder_merged_session The ONNX session used to execute the decoder.
+   * @param {Object} generation_config The generation configuration object.
+   */
+  constructor(config, session, decoder_merged_session, generation_config) {
+    super(config, session);
+    this.decoder_merged_session = decoder_merged_session;
+    this.generation_config = generation_config;
+    this.num_decoder_layers = this.config.decoder_layers;
+    this.num_decoder_heads = this.config.decoder_attention_heads;
+    this.decoder_dim_kv = this.config.d_model / this.num_decoder_heads;
+    this.num_encoder_layers = this.config.encoder_layers;
+    this.num_encoder_heads = this.config.encoder_attention_heads;
+    this.encoder_dim_kv = this.config.d_model / this.num_encoder_heads;
+  }
+}
+class MBartForSequenceClassification extends MBartPreTrainedModel {
+  /**
+   * Calls the model on new inputs.
+   *
+   * @param {Object} model_inputs The inputs to the model.
+   * @returns {Promise<SequenceClassifierOutput>} An object containing the model's output logits for sequence classification.
+   */
+  async _call(model_inputs) {
+    return new SequenceClassifierOutput(await super._call(model_inputs));
+  }
+}
+class MBartForCausalLM extends MBartPreTrainedModel {
+  /**
+   * Creates a new instance of the `MBartForCausalLM` class.
+   * @param {Object} config Configuration object for the model.
+   * @param {Object} decoder_merged_session ONNX Session object for the decoder.
+   * @param {Object} generation_config Configuration object for the generation process.
+   */
+  constructor(config, decoder_merged_session, generation_config) {
+    super(config, decoder_merged_session);
+    this.generation_config = generation_config;
+    this.num_decoder_layers = this.config.decoder_layers;
+    this.num_decoder_heads = this.config.decoder_attention_heads;
+    this.decoder_dim_kv = this.config.d_model / this.num_decoder_heads;
+    this.num_encoder_layers = this.config.encoder_layers;
+    this.num_encoder_heads = this.config.encoder_attention_heads;
+    this.encoder_dim_kv = this.config.d_model / this.num_encoder_heads;
+  }
+}
+class BlenderbotPreTrainedModel extends PreTrainedModel {
+}
+class BlenderbotModel extends BlenderbotPreTrainedModel {
+}
+class BlenderbotForConditionalGeneration extends BlenderbotPreTrainedModel {
+  /**
+   * Creates a new instance of the `BlenderbotForConditionalGeneration` class.
+   * @param {any} config The model configuration.
+   * @param {any} session The ONNX session containing the encoder weights.
+   * @param {any} decoder_merged_session The ONNX session containing the merged decoder weights.
+   * @param {GenerationConfig} generation_config The generation configuration.
+   */
+  constructor(config, session, decoder_merged_session, generation_config) {
+    super(config, session);
+    this.decoder_merged_session = decoder_merged_session;
+    this.generation_config = generation_config;
+    this.num_decoder_layers = this.config.decoder_layers;
+    this.num_decoder_heads = this.config.decoder_attention_heads;
+    this.decoder_dim_kv = this.config.d_model / this.num_decoder_heads;
+    this.num_encoder_layers = this.config.encoder_layers;
+    this.num_encoder_heads = this.config.encoder_attention_heads;
+    this.encoder_dim_kv = this.config.d_model / this.num_encoder_heads;
+  }
+}
+class BlenderbotSmallPreTrainedModel extends PreTrainedModel {
+}
+class BlenderbotSmallModel extends BlenderbotSmallPreTrainedModel {
+}
+class BlenderbotSmallForConditionalGeneration extends BlenderbotSmallPreTrainedModel {
+  /**
+   * Creates a new instance of the `BlenderbotForConditionalGeneration` class.
+   * @param {any} config The model configuration.
+   * @param {any} session The ONNX session containing the encoder weights.
+   * @param {any} decoder_merged_session The ONNX session containing the merged decoder weights.
+   * @param {GenerationConfig} generation_config The generation configuration.
+   */
+  constructor(config, session, decoder_merged_session, generation_config) {
+    super(config, session);
+    this.decoder_merged_session = decoder_merged_session;
+    this.generation_config = generation_config;
+    this.num_decoder_layers = this.config.decoder_layers;
+    this.num_decoder_heads = this.config.decoder_attention_heads;
+    this.decoder_dim_kv = this.config.d_model / this.num_decoder_heads;
+    this.num_encoder_layers = this.config.encoder_layers;
+    this.num_encoder_heads = this.config.encoder_attention_heads;
+    this.encoder_dim_kv = this.config.d_model / this.num_encoder_heads;
+  }
+}
+class RobertaPreTrainedModel extends PreTrainedModel {
+}
+class RobertaModel extends RobertaPreTrainedModel {
+}
+class RobertaForMaskedLM extends RobertaPreTrainedModel {
+  /**
+   * Calls the model on new inputs.
+   *
+   * @param {Object} model_inputs The inputs to the model.
+   * @returns {Promise<MaskedLMOutput>} returned object
+   */
+  async _call(model_inputs) {
+    return new MaskedLMOutput(await super._call(model_inputs));
+  }
+}
+class RobertaForSequenceClassification extends RobertaPreTrainedModel {
+  /**
+   * Calls the model on new inputs.
+   *
+   * @param {Object} model_inputs The inputs to the model.
+   * @returns {Promise<SequenceClassifierOutput>} returned object
+   */
+  async _call(model_inputs) {
+    return new SequenceClassifierOutput(await super._call(model_inputs));
+  }
+}
+class RobertaForTokenClassification extends RobertaPreTrainedModel {
+  /**
+   * Calls the model on new inputs.
+   *
+   * @param {Object} model_inputs The inputs to the model.
+   * @returns {Promise<TokenClassifierOutput>} An object containing the model's output logits for token classification.
+   */
+  async _call(model_inputs) {
+    return new TokenClassifierOutput(await super._call(model_inputs));
+  }
+}
+class RobertaForQuestionAnswering extends RobertaPreTrainedModel {
+  /**
+   * Calls the model on new inputs.
+   *
+   * @param {Object} model_inputs The inputs to the model.
+   * @returns {Promise<QuestionAnsweringModelOutput>} returned object
+   */
+  async _call(model_inputs) {
+    return new QuestionAnsweringModelOutput(await super._call(model_inputs));
+  }
+}
+class XLMPreTrainedModel extends PreTrainedModel {
+}
+class XLMModel extends XLMPreTrainedModel {
+}
+class XLMWithLMHeadModel extends XLMPreTrainedModel {
+  /**
+   * Calls the model on new inputs.
+   *
+   * @param {Object} model_inputs The inputs to the model.
+   * @returns {Promise<MaskedLMOutput>} returned object
+   */
+  async _call(model_inputs) {
+    return new MaskedLMOutput(await super._call(model_inputs));
+  }
+}
+class XLMForSequenceClassification extends XLMPreTrainedModel {
+  /**
+   * Calls the model on new inputs.
+   *
+   * @param {Object} model_inputs The inputs to the model.
+   * @returns {Promise<SequenceClassifierOutput>} returned object
+   */
+  async _call(model_inputs) {
+    return new SequenceClassifierOutput(await super._call(model_inputs));
+  }
+}
+class XLMForTokenClassification extends XLMPreTrainedModel {
+  /**
+   * Calls the model on new inputs.
+   *
+   * @param {Object} model_inputs The inputs to the model.
+   * @returns {Promise<TokenClassifierOutput>} An object containing the model's output logits for token classification.
+   */
+  async _call(model_inputs) {
+    return new TokenClassifierOutput(await super._call(model_inputs));
+  }
+}
+class XLMForQuestionAnswering extends XLMPreTrainedModel {
+  /**
+   * Calls the model on new inputs.
+   *
+   * @param {Object} model_inputs The inputs to the model.
+   * @returns {Promise<QuestionAnsweringModelOutput>} returned object
+   */
+  async _call(model_inputs) {
+    return new QuestionAnsweringModelOutput(await super._call(model_inputs));
+  }
+}
+class XLMRobertaPreTrainedModel extends PreTrainedModel {
+}
+class XLMRobertaModel extends XLMRobertaPreTrainedModel {
+}
+class XLMRobertaForMaskedLM extends XLMRobertaPreTrainedModel {
+  /**
+   * Calls the model on new inputs.
+   *
+   * @param {Object} model_inputs The inputs to the model.
+   * @returns {Promise<MaskedLMOutput>} returned object
+   */
+  async _call(model_inputs) {
+    return new MaskedLMOutput(await super._call(model_inputs));
+  }
+}
+class XLMRobertaForSequenceClassification extends XLMRobertaPreTrainedModel {
+  /**
+   * Calls the model on new inputs.
+   *
+   * @param {Object} model_inputs The inputs to the model.
+   * @returns {Promise<SequenceClassifierOutput>} returned object
+   */
+  async _call(model_inputs) {
+    return new SequenceClassifierOutput(await super._call(model_inputs));
+  }
+}
+class XLMRobertaForTokenClassification extends XLMRobertaPreTrainedModel {
+  /**
+   * Calls the model on new inputs.
+   *
+   * @param {Object} model_inputs The inputs to the model.
+   * @returns {Promise<TokenClassifierOutput>} An object containing the model's output logits for token classification.
+   */
+  async _call(model_inputs) {
+    return new TokenClassifierOutput(await super._call(model_inputs));
+  }
+}
+class XLMRobertaForQuestionAnswering extends XLMRobertaPreTrainedModel {
+  /**
+   * Calls the model on new inputs.
+   *
+   * @param {Object} model_inputs The inputs to the model.
+   * @returns {Promise<QuestionAnsweringModelOutput>} returned object
+   */
+  async _call(model_inputs) {
+    return new QuestionAnsweringModelOutput(await super._call(model_inputs));
+  }
+}
+class ASTPreTrainedModel extends PreTrainedModel {
+}
+class ASTModel extends ASTPreTrainedModel {
+}
+class ASTForAudioClassification extends ASTPreTrainedModel {
+}
+class WhisperPreTrainedModel extends PreTrainedModel {
+}
+class WhisperModel extends WhisperPreTrainedModel {
+}
+class WhisperForConditionalGeneration extends WhisperPreTrainedModel {
+  requires_attention_mask = false;
+  main_input_name = "input_features";
+  /**
+   * Creates a new instance of the `WhisperForConditionalGeneration` class.
+   * @param {Object} config Configuration object for the model.
+   * @param {Object} session ONNX Session object for the model.
+   * @param {Object} decoder_merged_session ONNX Session object for the decoder.
+   * @param {Object} generation_config Configuration object for the generation process.
+   */
+  constructor(config, session, decoder_merged_session, generation_config) {
+    super(config, session);
+    this.decoder_merged_session = decoder_merged_session;
+    this.generation_config = generation_config;
+    this.num_decoder_layers = this.config.decoder_layers;
+    this.num_decoder_heads = this.config.decoder_attention_heads;
+    this.decoder_dim_kv = this.config.d_model / this.num_decoder_heads;
+    this.num_encoder_layers = this.config.encoder_layers;
+    this.num_encoder_heads = this.config.encoder_attention_heads;
+    this.encoder_dim_kv = this.config.d_model / this.num_encoder_heads;
+  }
+  /**
+   * @typedef {Object} WhisperGenerationConfig
+   * @extends GenerationConfig
+   * @property {boolean} [return_timestamps=null] Whether to return the timestamps with the text. This enables the `WhisperTimestampsLogitsProcessor`.
+   * @property {boolean} [return_token_timestamps=null] Whether to return token-level timestamps
+   * with the text. This can be used with or without the `return_timestamps` option. To get word-level
+   * timestamps, use the tokenizer to group the tokens into words.
+   * @property {number} [num_frames=null]  The number of audio frames available in this chunk. This is only used generating word-level timestamps.
+   */
+  /**
+   * Generates outputs based on input and generation configuration.
+   * @param {Object} inputs Input data for the model.
+   * @param {WhisperGenerationConfig} generation_config Configuration object for the generation process.
+   * @param {Object} logits_processor Optional logits processor object.
+   * @returns {Promise<Object>} Promise object represents the generated outputs.
+   */
+  async generate(inputs, generation_config = null, logits_processor = null) {
+    generation_config = this._get_generation_config(generation_config);
+    generation_config.return_timestamps ??= false;
+    if (generation_config.return_timestamps) {
+      logits_processor = [new WhisperTimeStampLogitsProcessor(generation_config)];
+    }
+    if (generation_config.return_token_timestamps) {
+      generation_config.output_attentions = true;
+      generation_config.return_dict_in_generate = true;
+      if (generation_config.task === "translate") {
+        console.warn("Token-level timestamps may not be reliable for task 'translate'.");
+      }
+      if (!generation_config.alignment_heads) {
+        throw new Error(
+          "Model generation config has no `alignment_heads`, token-level timestamps not available. See https://gist.github.com/hollance/42e32852f24243b748ae6bc1f985b13a on how to add this property to the generation config."
+        );
+      }
+    }
+    const outputs = await super.generate(inputs, generation_config, logits_processor);
+    if (generation_config.return_token_timestamps && generation_config.alignment_heads) {
+      outputs["token_timestamps"] = this._extract_token_timestamps(
+        outputs,
+        generation_config.alignment_heads,
+        generation_config.num_frames
+      );
+    }
+    return outputs;
+  }
+  /**
+   * Calculates token-level timestamps using the encoder-decoder cross-attentions and
+   * dynamic time-warping (DTW) to map each output token to a position in the input audio.
+   * @param {Object} generate_outputs Outputs generated by the model
+   * @param {Tensor[][][]} generate_outputs.cross_attentions The cross attentions output by the model
+   * @param {Tensor[][][]} generate_outputs.decoder_attentions The decoder attentions output by the model
+   * @param {number[][]} generate_outputs.sequences The sequences output by the model
+   * @param {number[][]} alignment_heads Alignment heads of the model
+   * @param {number} [num_frames=null] Number of frames in the input audio.
+   * @param {number} [time_precision=0.02] Precision of the timestamps in seconds
+   * @returns {Tensor} tensor containing the timestamps in seconds for each predicted token
+   */
+  _extract_token_timestamps(generate_outputs, alignment_heads, num_frames = null, time_precision = 0.02) {
+    if (!generate_outputs.cross_attentions) {
+      throw new Error(
+        "Model outputs must contain cross attentions to extract timestamps. This is most likely because the model was not exported with `output_attentions=True`."
+      );
+    }
+    let median_filter_width = this.config.median_filter_width;
+    if (median_filter_width === void 0) {
+      console.warn("Model config has no `median_filter_width`, using default value of 7.");
+      median_filter_width = 7;
+    }
+    const batchedMatrices = generate_outputs.cross_attentions.map((batch) => {
+      let cross_attentions = Array.from(
+        { length: this.config.decoder_layers },
+        (_, i) => cat(batch.map((x) => x[i]), 2)
+      );
+      let weights = stack(alignment_heads.map(([l, h]) => {
+        return num_frames ? cross_attentions[l].slice(null, h, null, [0, num_frames]) : cross_attentions[l].slice(null, h);
+      }));
+      weights = weights.transpose(1, 0, 2, 3);
+      let [std, calculatedMean] = std_mean(weights, -2, 0, true);
+      let smoothedWeights = weights.clone();
+      for (let a = 0; a < smoothedWeights.dims[0]; ++a) {
+        let aTensor = smoothedWeights[a];
+        for (let b = 0; b < aTensor.dims[0]; ++b) {
+          let bTensor = aTensor[b];
+          const stdTensor = std[a][b][0];
+          const meanTensor = calculatedMean[a][b][0];
+          for (let c = 0; c < bTensor.dims[0]; ++c) {
+            let cTensor = bTensor[c];
+            for (let d = 0; d < cTensor.data.length; ++d) {
+              cTensor.data[d] = (cTensor.data[d] - meanTensor.data[d]) / stdTensor.data[d];
+            }
+            cTensor.data.set(medianFilter(cTensor.data, median_filter_width));
+          }
+        }
+      }
+      const matrix = mean(smoothedWeights, 1);
+      return matrix;
+    });
+    const timestampsShape = [generate_outputs.sequences.length, generate_outputs.sequences[0].length];
+    const timestamps = new Tensor(
+      "float32",
+      new Float32Array(timestampsShape[0] * timestampsShape[1]),
+      timestampsShape
+    );
+    for (let batch_idx = 0; batch_idx < timestampsShape[0]; ++batch_idx) {
+      const matrix = batchedMatrices[batch_idx].neg().squeeze_(0);
+      let [text_indices, time_indices] = dynamicTimeWarping(matrix);
+      let diffs = Array.from({ length: text_indices.length - 1 }, (v, i) => text_indices[i + 1] - text_indices[i]);
+      let jumps = mergeArrays([1], diffs).map((x) => !!x);
+      let jump_times = [];
+      for (let i = 0; i < jumps.length; ++i) {
+        if (jumps[i]) {
+          jump_times.push(time_indices[i] * time_precision);
+        }
+      }
+      timestamps[batch_idx].data.set(jump_times, 1);
+    }
+    return timestamps;
+  }
+}
+class VisionEncoderDecoderModel extends PreTrainedModel {
+  main_input_name = "pixel_values";
+  /**
+   * Creates a new instance of the `VisionEncoderDecoderModel` class.
+   * @param {Object} config The configuration object specifying the hyperparameters and other model settings.
+   * @param {Object} session The ONNX session containing the encoder model.
+   * @param {any} decoder_merged_session The ONNX session containing the merged decoder model.
+   * @param {Object} generation_config Configuration object for the generation process.
+   */
+  constructor(config, session, decoder_merged_session, generation_config) {
+    super(config, session);
+    this.decoder_merged_session = decoder_merged_session;
+    this.generation_config = generation_config;
+    const encoderConfig = this.config.encoder;
+    const decoderConfig = this.config.decoder;
+    const encoderModelType = encoderConfig.model_type;
+    const encoderModel = MODEL_MAPPING_NAMES_ENCODER_ONLY.get(encoderModelType) ?? MODEL_MAPPING_NAMES_ENCODER_DECODER.get(encoderModelType);
+    if (!encoderModel) {
+      console.warn(`Model type for encoder '${encoderModelType}' not found, assuming encoder-only architecture. Please report this at https://github.com/xenova/transformers.js/issues/new/choose.`);
+    }
+    const decoderModel = MODEL_WITH_LM_HEAD_MAPPING_NAMES.get(decoderConfig.model_type);
+    if (!decoderModel) {
+      throw new Error(`Unable to construct \`VisionEncoderDecoder\` due to unsupported decoder: "${this.config.decoder.model_type}"`);
+    }
+    const decoderModelClass = decoderModel[1];
+    const decoder = new decoderModelClass(decoderConfig, decoder_merged_session, generation_config);
+    this.add_encoder_pkv = "num_decoder_layers" in decoder;
+    if (this.add_encoder_pkv) {
+      this.num_decoder_layers = decoder.num_decoder_layers;
+      this.num_decoder_heads = decoder.num_decoder_heads;
+      this.decoder_dim_kv = decoder.decoder_dim_kv;
+      this.num_encoder_layers = decoder.num_encoder_layers;
+      this.num_encoder_heads = decoder.num_encoder_heads;
+      this.encoder_dim_kv = decoder.encoder_dim_kv;
+    } else {
+      this.num_layers = decoder.num_layers;
+      this.num_heads = decoder.num_heads;
+      this.dim_kv = decoder.dim_kv;
+    }
+  }
+}
+class CLIPPreTrainedModel extends PreTrainedModel {
+}
+class CLIPModel extends CLIPPreTrainedModel {
+}
+class CLIPTextModelWithProjection extends CLIPPreTrainedModel {
+  /** @type {PreTrainedModel.from_pretrained} */
+  static async from_pretrained(pretrained_model_name_or_path, options = {}) {
+    options.model_file_name ??= "text_model";
+    return super.from_pretrained(pretrained_model_name_or_path, options);
+  }
+}
+class CLIPVisionModelWithProjection extends CLIPPreTrainedModel {
+  /** @type {PreTrainedModel.from_pretrained} */
+  static async from_pretrained(pretrained_model_name_or_path, options = {}) {
+    options.model_file_name ??= "vision_model";
+    return super.from_pretrained(pretrained_model_name_or_path, options);
+  }
+}
+class SiglipPreTrainedModel extends PreTrainedModel {
+}
+class SiglipModel extends SiglipPreTrainedModel {
+}
+class SiglipTextModel extends SiglipPreTrainedModel {
+  /** @type {PreTrainedModel.from_pretrained} */
+  static async from_pretrained(pretrained_model_name_or_path, options = {}) {
+    options.model_file_name ??= "text_model";
+    return super.from_pretrained(pretrained_model_name_or_path, options);
+  }
+}
+class SiglipVisionModel extends CLIPPreTrainedModel {
+  /** @type {PreTrainedModel.from_pretrained} */
+  static async from_pretrained(pretrained_model_name_or_path, options = {}) {
+    options.model_file_name ??= "vision_model";
+    return super.from_pretrained(pretrained_model_name_or_path, options);
+  }
+}
+class ChineseCLIPPreTrainedModel extends PreTrainedModel {
+}
+class ChineseCLIPModel extends ChineseCLIPPreTrainedModel {
+}
+class CLIPSegPreTrainedModel extends PreTrainedModel {
+}
+class CLIPSegModel extends CLIPSegPreTrainedModel {
+}
+class CLIPSegForImageSegmentation extends CLIPSegPreTrainedModel {
+}
+class GPT2PreTrainedModel extends PreTrainedModel {
+  /**
+   * Creates a new instance of the `GPT2PreTrainedModel` class.
+   * @param {Object} config The configuration of the model.
+   * @param {any} session The ONNX session containing the model weights.
+   * @param {GenerationConfig} generation_config The generation configuration.
+   */
+  constructor(config, session, generation_config) {
+    super(config, session);
+    this.generation_config = generation_config;
+    this.config.pad_token_id = this.config.eos_token_id;
+    this.num_heads = this.config.n_head;
+    this.num_layers = this.config.n_layer;
+    this.dim_kv = this.config.n_embd / this.num_heads;
+  }
+}
+class GPT2Model extends GPT2PreTrainedModel {
+}
+class GPT2LMHeadModel extends GPT2PreTrainedModel {
+}
+class GPTNeoPreTrainedModel extends PreTrainedModel {
+  /**
+   * Creates a new instance of the `GPTNeoPreTrainedModel` class.
+   * @param {Object} config The configuration of the model.
+   * @param {any} session The ONNX session containing the model weights.
+   * @param {GenerationConfig} generation_config The generation configuration.
+   */
+  constructor(config, session, generation_config) {
+    super(config, session);
+    this.generation_config = generation_config;
+    this.config.pad_token_id = this.config.eos_token_id;
+    this.num_heads = this.config.num_heads;
+    this.num_layers = this.config.num_layers;
+    this.dim_kv = this.config.hidden_size / this.num_heads;
+  }
+}
+class GPTNeoModel extends GPTNeoPreTrainedModel {
+}
+class GPTNeoForCausalLM extends GPTNeoPreTrainedModel {
+}
+class GPTNeoXPreTrainedModel extends PreTrainedModel {
+  /**
+   * Creates a new instance of the `GPTNeoXPreTrainedModel` class.
+   * @param {Object} config The configuration of the model.
+   * @param {any} session The ONNX session containing the model weights.
+   * @param {GenerationConfig} generation_config The generation configuration.
+   */
+  constructor(config, session, generation_config) {
+    super(config, session);
+    this.generation_config = generation_config;
+    this.config.pad_token_id = this.config.eos_token_id;
+    this.num_heads = this.config.num_attention_heads;
+    this.num_layers = this.config.num_hidden_layers;
+    this.dim_kv = this.config.hidden_size / this.num_heads;
+  }
+}
+class GPTNeoXModel extends GPTNeoXPreTrainedModel {
+}
+class GPTNeoXForCausalLM extends GPTNeoXPreTrainedModel {
+}
+class GPTJPreTrainedModel extends PreTrainedModel {
+  /**
+   * Creates a new instance of the `GPTJPreTrainedModel` class.
+   * @param {Object} config The configuration of the model.
+   * @param {any} session The ONNX session containing the model weights.
+   * @param {GenerationConfig} generation_config The generation configuration.
+   */
+  constructor(config, session, generation_config) {
+    super(config, session);
+    this.generation_config = generation_config;
+    this.config.pad_token_id = this.config.eos_token_id;
+    this.num_heads = this.config.n_head;
+    this.num_layers = this.config.n_layer;
+    this.dim_kv = this.config.n_embd / this.num_heads;
+  }
+}
+class GPTJModel extends GPTJPreTrainedModel {
+}
+class GPTJForCausalLM extends GPTJPreTrainedModel {
+}
+class GPTBigCodePreTrainedModel extends PreTrainedModel {
+  /**
+   * Creates a new instance of the `GPTBigCodePreTrainedModel` class.
+   * @param {Object} config The configuration of the model.
+   * @param {any} session The ONNX session containing the model weights.
+   * @param {GenerationConfig} generation_config The generation configuration.
+   */
+  constructor(config, session, generation_config) {
+    super(config, session);
+    this.generation_config = generation_config;
+    this.config.pad_token_id = this.config.eos_token_id;
+    this.num_heads = this.config.n_head;
+    this.num_layers = this.config.n_layer;
+    this.dim_kv = this.config.n_embd / this.num_heads;
+  }
+}
+class GPTBigCodeModel extends GPTBigCodePreTrainedModel {
+}
+class GPTBigCodeForCausalLM extends GPTBigCodePreTrainedModel {
+}
+class CodeGenPreTrainedModel extends PreTrainedModel {
+  /**
+   * Creates a new instance of the `CodeGenPreTrainedModel` class.
+   * @param {Object} config The model configuration object.
+   * @param {Object} session The ONNX session object.
+   * @param {GenerationConfig} generation_config The generation configuration.
+   */
+  constructor(config, session, generation_config) {
+    super(config, session);
+    this.generation_config = generation_config;
+    this.config.pad_token_id = this.config.eos_token_id;
+    this.num_heads = this.config.n_head;
+    this.num_layers = this.config.n_layer;
+    this.dim_kv = this.config.n_embd / this.num_heads;
+  }
+}
+class CodeGenModel extends CodeGenPreTrainedModel {
+}
+class CodeGenForCausalLM extends CodeGenPreTrainedModel {
+}
+class LlamaPreTrainedModel extends PreTrainedModel {
+  /**
+   * Creates a new instance of the `LlamaPreTrainedModel` class.
+   * @param {Object} config The model configuration object.
+   * @param {Object} session The ONNX session object.
+   * @param {GenerationConfig} generation_config The generation configuration.
+   */
+  constructor(config, session, generation_config) {
+    super(config, session);
+    this.generation_config = generation_config;
+    this.config.pad_token_id = this.config.eos_token_id;
+    this.num_heads = this.config.num_key_value_heads ?? this.config.num_attention_heads;
+    this.num_layers = this.config.num_hidden_layers;
+    this.dim_kv = this.config.hidden_size / this.config.num_attention_heads;
+  }
+}
+class LlamaModel extends LlamaPreTrainedModel {
+}
+class LlamaForCausalLM extends LlamaPreTrainedModel {
+}
+class Qwen2PreTrainedModel extends PreTrainedModel {
+  /**
+   * Creates a new instance of the `Qwen2PreTrainedModel` class.
+   * @param {Object} config The model configuration object.
+   * @param {Object} session The ONNX session object.
+   * @param {GenerationConfig} generation_config The generation configuration.
+   */
+  constructor(config, session, generation_config) {
+    super(config, session);
+    this.generation_config = generation_config;
+    this.config.pad_token_id = this.config.eos_token_id;
+    this.num_heads = this.config.num_key_value_heads ?? this.config.num_attention_heads;
+    this.num_layers = this.config.num_hidden_layers;
+    this.dim_kv = this.config.hidden_size / this.config.num_attention_heads;
+  }
+}
+class Qwen2Model extends Qwen2PreTrainedModel {
+}
+class Qwen2ForCausalLM extends Qwen2PreTrainedModel {
+}
+class PhiPreTrainedModel extends PreTrainedModel {
+  /**
+   * Creates a new instance of the `PhiPreTrainedModel` class.
+   * @param {Object} config The model configuration object.
+   * @param {Object} session The ONNX session object.
+   * @param {GenerationConfig} generation_config The generation configuration.
+   */
+  constructor(config, session, generation_config) {
+    super(config, session);
+    this.generation_config = generation_config;
+    this.config.pad_token_id = this.config.eos_token_id;
+    this.num_heads = this.config.num_attention_heads;
+    this.num_layers = this.config.num_hidden_layers;
+    this.dim_kv = this.config.hidden_size / this.num_heads;
+  }
+}
+class PhiModel extends PhiPreTrainedModel {
+}
+class PhiForCausalLM extends PhiPreTrainedModel {
+}
+class BloomPreTrainedModel extends PreTrainedModel {
+  /**
+   * Creates a new instance of the `BloomPreTrainedModel` class.
+   * @param {Object} config The configuration of the model.
+   * @param {any} session The ONNX session containing the model weights.
+   * @param {GenerationConfig} generation_config The generation configuration.
+   */
+  constructor(config, session, generation_config) {
+    super(config, session);
+    this.generation_config = generation_config;
+    this.config.pad_token_id = this.config.eos_token_id;
+    this.num_heads = this.config.n_head;
+    this.num_layers = this.config.n_layer;
+    this.dim_kv = this.config.hidden_size / this.num_heads;
+  }
+}
+class BloomModel extends BloomPreTrainedModel {
+}
+class BloomForCausalLM extends BloomPreTrainedModel {
+}
+class MptPreTrainedModel extends PreTrainedModel {
+  /**
+   * Creates a new instance of the `MptPreTrainedModel` class.
+   * @param {Object} config The model configuration object.
+   * @param {Object} session The ONNX session object.
+   * @param {GenerationConfig} generation_config The generation configuration.
+   */
+  constructor(config, session, generation_config) {
+    super(config, session);
+    this.generation_config = generation_config;
+    this.config.pad_token_id = this.config.eos_token_id;
+    this.num_heads = this.config.n_heads;
+    this.num_layers = this.config.n_layers;
+    this.dim_kv = this.config.d_model / this.num_heads;
+  }
+}
+class MptModel extends MptPreTrainedModel {
+}
+class MptForCausalLM extends MptPreTrainedModel {
+}
+class OPTPreTrainedModel extends PreTrainedModel {
+  /**
+   * Creates a new instance of the `OPTPreTrainedModel` class.
+   * @param {Object} config The model configuration object.
+   * @param {Object} session The ONNX session object.
+   * @param {GenerationConfig} generation_config The generation configuration.
+   */
+  constructor(config, session, generation_config) {
+    super(config, session);
+    this.generation_config = generation_config;
+    this.config.pad_token_id = this.config.eos_token_id;
+    this.num_heads = this.config.num_attention_heads;
+    this.num_layers = this.config.num_hidden_layers;
+    this.dim_kv = this.config.hidden_size / this.num_heads;
+  }
+}
+class OPTModel extends OPTPreTrainedModel {
+}
+class OPTForCausalLM extends OPTPreTrainedModel {
+}
+class ViTPreTrainedModel extends PreTrainedModel {
+}
+class ViTModel extends ViTPreTrainedModel {
+}
+class ViTForImageClassification extends ViTPreTrainedModel {
+  /**
+   * @param {any} model_inputs
+   */
+  async _call(model_inputs) {
+    return new SequenceClassifierOutput(await super._call(model_inputs));
+  }
+}
+class FastViTPreTrainedModel extends PreTrainedModel {
+}
+class FastViTModel extends FastViTPreTrainedModel {
+}
+class FastViTForImageClassification extends FastViTPreTrainedModel {
+  /**
+   * @param {any} model_inputs
+   */
+  async _call(model_inputs) {
+    return new SequenceClassifierOutput(await super._call(model_inputs));
+  }
+}
+class VitMattePreTrainedModel extends PreTrainedModel {
+}
+class VitMatteForImageMatting extends VitMattePreTrainedModel {
+  /**
+   * @param {any} model_inputs
+   */
+  async _call(model_inputs) {
+    return new ImageMattingOutput(await super._call(model_inputs));
+  }
+}
+class MobileViTPreTrainedModel extends PreTrainedModel {
+}
+class MobileViTModel extends MobileViTPreTrainedModel {
+}
+class MobileViTForImageClassification extends MobileViTPreTrainedModel {
+  /**
+   * @param {any} model_inputs
+   */
+  async _call(model_inputs) {
+    return new SequenceClassifierOutput(await super._call(model_inputs));
+  }
+}
+class MobileViTV2PreTrainedModel extends PreTrainedModel {
+}
+class MobileViTV2Model extends MobileViTV2PreTrainedModel {
+}
+class MobileViTV2ForImageClassification extends MobileViTV2PreTrainedModel {
+  /**
+   * @param {any} model_inputs
+   */
+  async _call(model_inputs) {
+    return new SequenceClassifierOutput(await super._call(model_inputs));
+  }
+}
+class OwlViTPreTrainedModel extends PreTrainedModel {
+}
+class OwlViTModel extends OwlViTPreTrainedModel {
+}
+class OwlViTForObjectDetection extends OwlViTPreTrainedModel {
+}
+class Owlv2PreTrainedModel extends PreTrainedModel {
+}
+class Owlv2Model extends Owlv2PreTrainedModel {
+}
+class Owlv2ForObjectDetection extends Owlv2PreTrainedModel {
+}
+class BeitPreTrainedModel extends PreTrainedModel {
+}
+class BeitModel extends BeitPreTrainedModel {
+}
+class BeitForImageClassification extends BeitPreTrainedModel {
+  /**
+   * @param {any} model_inputs
+   */
+  async _call(model_inputs) {
+    return new SequenceClassifierOutput(await super._call(model_inputs));
+  }
+}
+class DetrPreTrainedModel extends PreTrainedModel {
+}
+class DetrModel extends DetrPreTrainedModel {
+}
+class DetrForObjectDetection extends DetrPreTrainedModel {
+  /**
+   * @param {any} model_inputs
+   */
+  async _call(model_inputs) {
+    return new DetrObjectDetectionOutput(await super._call(model_inputs));
+  }
+}
+class DetrForSegmentation extends DetrPreTrainedModel {
+  /**
+   * Runs the model with the provided inputs
+   * @param {Object} model_inputs Model inputs
+   * @returns {Promise<DetrSegmentationOutput>} Object containing segmentation outputs
+   */
+  async _call(model_inputs) {
+    return new DetrSegmentationOutput(await super._call(model_inputs));
+  }
+}
+class DetrObjectDetectionOutput extends ModelOutput {
+  /**
+   * @param {Object} output The output of the model.
+   * @param {Tensor} output.logits Classification logits (including no-object) for all queries.
+   * @param {Tensor} output.pred_boxes Normalized boxes coordinates for all queries, represented as (center_x, center_y, width, height).
+   * These values are normalized in [0, 1], relative to the size of each individual image in the batch (disregarding possible padding).
+   */
+  constructor({ logits, pred_boxes }) {
+    super();
+    this.logits = logits;
+    this.pred_boxes = pred_boxes;
+  }
+}
+class DetrSegmentationOutput extends ModelOutput {
+  /**
+   * @param {Object} output The output of the model.
+   * @param {Tensor} output.logits The output logits of the model.
+   * @param {Tensor} output.pred_boxes Predicted boxes.
+   * @param {Tensor} output.pred_masks Predicted masks.
+   */
+  constructor({ logits, pred_boxes, pred_masks }) {
+    super();
+    this.logits = logits;
+    this.pred_boxes = pred_boxes;
+    this.pred_masks = pred_masks;
+  }
+}
+class TableTransformerPreTrainedModel extends PreTrainedModel {
+}
+class TableTransformerModel extends TableTransformerPreTrainedModel {
+}
+class TableTransformerForObjectDetection extends TableTransformerPreTrainedModel {
+  /**
+   * @param {any} model_inputs
+   */
+  async _call(model_inputs) {
+    return new TableTransformerObjectDetectionOutput(await super._call(model_inputs));
+  }
+}
+class TableTransformerObjectDetectionOutput extends DetrObjectDetectionOutput {
+}
+class DeiTPreTrainedModel extends PreTrainedModel {
+}
+class DeiTModel extends DeiTPreTrainedModel {
+}
+class DeiTForImageClassification extends DeiTPreTrainedModel {
+  /**
+   * @param {any} model_inputs
+   */
+  async _call(model_inputs) {
+    return new SequenceClassifierOutput(await super._call(model_inputs));
+  }
+}
+class ResNetPreTrainedModel extends PreTrainedModel {
+}
+class ResNetModel extends ResNetPreTrainedModel {
+}
+class ResNetForImageClassification extends ResNetPreTrainedModel {
+  /**
+   * @param {any} model_inputs
+   */
+  async _call(model_inputs) {
+    return new SequenceClassifierOutput(await super._call(model_inputs));
+  }
+}
+class SwinPreTrainedModel extends PreTrainedModel {
+}
+class SwinModel extends SwinPreTrainedModel {
+}
+class SwinForImageClassification extends SwinPreTrainedModel {
+  /**
+   * @param {any} model_inputs
+   */
+  async _call(model_inputs) {
+    return new SequenceClassifierOutput(await super._call(model_inputs));
+  }
+}
+class Swin2SRPreTrainedModel extends PreTrainedModel {
+}
+class Swin2SRModel extends Swin2SRPreTrainedModel {
+}
+class Swin2SRForImageSuperResolution extends Swin2SRPreTrainedModel {
+}
+class DPTPreTrainedModel extends PreTrainedModel {
+}
+class DPTModel extends DPTPreTrainedModel {
+}
+class DPTForDepthEstimation extends DPTPreTrainedModel {
+}
+class DepthAnythingPreTrainedModel extends PreTrainedModel {
+}
+class DepthAnythingForDepthEstimation extends DepthAnythingPreTrainedModel {
+}
+class GLPNPreTrainedModel extends PreTrainedModel {
+}
+class GLPNModel extends GLPNPreTrainedModel {
+}
+class GLPNForDepthEstimation extends GLPNPreTrainedModel {
+}
+class DonutSwinPreTrainedModel extends PreTrainedModel {
+}
+class DonutSwinModel extends DonutSwinPreTrainedModel {
+}
+class ConvNextPreTrainedModel extends PreTrainedModel {
+}
+class ConvNextModel extends ConvNextPreTrainedModel {
+}
+class ConvNextForImageClassification extends ConvNextPreTrainedModel {
+  /**
+   * @param {any} model_inputs
+   */
+  async _call(model_inputs) {
+    return new SequenceClassifierOutput(await super._call(model_inputs));
+  }
+}
+class ConvNextV2PreTrainedModel extends PreTrainedModel {
+}
+class ConvNextV2Model extends ConvNextV2PreTrainedModel {
+}
+class ConvNextV2ForImageClassification extends ConvNextV2PreTrainedModel {
+  /**
+   * @param {any} model_inputs
+   */
+  async _call(model_inputs) {
+    return new SequenceClassifierOutput(await super._call(model_inputs));
+  }
+}
+class Dinov2PreTrainedModel extends PreTrainedModel {
+}
+class Dinov2Model extends Dinov2PreTrainedModel {
+}
+class Dinov2ForImageClassification extends Dinov2PreTrainedModel {
+  /**
+   * @param {any} model_inputs
+   */
+  async _call(model_inputs) {
+    return new SequenceClassifierOutput(await super._call(model_inputs));
+  }
+}
+class YolosPreTrainedModel extends PreTrainedModel {
+}
+class YolosModel extends YolosPreTrainedModel {
+}
+class YolosForObjectDetection extends YolosPreTrainedModel {
+  /**
+   * @param {any} model_inputs
+   */
+  async _call(model_inputs) {
+    return new YolosObjectDetectionOutput(await super._call(model_inputs));
+  }
+}
+class YolosObjectDetectionOutput extends ModelOutput {
+  /**
+   * @param {Object} output The output of the model.
+   * @param {Tensor} output.logits Classification logits (including no-object) for all queries.
+   * @param {Tensor} output.pred_boxes Normalized boxes coordinates for all queries, represented as (center_x, center_y, width, height).
+   * These values are normalized in [0, 1], relative to the size of each individual image in the batch (disregarding possible padding).
+   */
+  constructor({ logits, pred_boxes }) {
+    super();
+    this.logits = logits;
+    this.pred_boxes = pred_boxes;
+  }
+}
+class SamPreTrainedModel extends PreTrainedModel {
+}
+class SamModel extends SamPreTrainedModel {
+  /**
+   * Creates a new instance of the `SamModel` class.
+   * @param {Object} config The configuration object specifying the hyperparameters and other model settings.
+   * @param {Object} vision_encoder The ONNX session containing the vision encoder model.
+   * @param {any} prompt_encoder_mask_decoder The ONNX session containing the prompt encoder and mask decoder model.
+   */
+  constructor(config, vision_encoder, prompt_encoder_mask_decoder) {
+    super(config, vision_encoder);
+    this.prompt_encoder_mask_decoder = prompt_encoder_mask_decoder;
+  }
+  /**
+   * Compute image embeddings and positional image embeddings, given the pixel values of an image.
+   * @param {Object} model_inputs Object containing the model inputs.
+   * @param {Tensor} model_inputs.pixel_values Pixel values obtained using a `SamProcessor`.
+   * @returns {Promise<{ image_embeddings: Tensor, image_positional_embeddings: Tensor }>} The image embeddings and positional image embeddings.
+   */
+  async get_image_embeddings({ pixel_values }) {
+    return await encoderForward(this, { pixel_values });
+  }
+  /**
+   * @typedef {Object} SamModelInputs Object containing the model inputs.
+   * @property {Tensor} pixel_values Pixel values as a Tensor with shape `(batch_size, num_channels, height, width)`.
+   * These can be obtained using a `SamProcessor`.
+   * @property {Tensor} input_points Input 2D spatial points with shape `(batch_size, num_points, 2)`.
+   * This is used by the prompt encoder to encode the prompt.
+   * @property {Tensor} [input_labels] Input labels for the points, as a Tensor of shape `(batch_size, point_batch_size, num_points)`.
+   * This is used by the prompt encoder to encode the prompt. There are 4 types of labels:
+   *  - `1`: the point is a point that contains the object of interest
+   *  - `0`: the point is a point that does not contain the object of interest
+   *  - `-1`: the point corresponds to the background
+   *  - `-10`: the point is a padding point, thus should be ignored by the prompt encoder
+   * @property {Tensor} [image_embeddings] Image embeddings used by the mask decoder.
+   * @property {Tensor} [image_positional_embeddings] Image positional embeddings used by the mask decoder.
+   */
+  /**
+   * @param {SamModelInputs} model_inputs Object containing the model inputs.
+   * @returns {Promise<Object>} The output of the model.
+   */
+  async forward(model_inputs) {
+    if (!model_inputs.image_embeddings || !model_inputs.image_positional_embeddings) {
+      model_inputs = {
+        ...model_inputs,
+        ...await this.get_image_embeddings(model_inputs)
+      };
+    }
+    if (!model_inputs.input_labels) {
+      const shape = model_inputs.input_points.dims.slice(0, -1);
+      const numElements = shape.reduce((a, b) => a * b, 1);
+      model_inputs.input_labels = new Tensor(
+        "int64",
+        new BigInt64Array(numElements).fill(1n),
+        shape
+      );
+    }
+    return await sessionRun(this.prompt_encoder_mask_decoder, {
+      input_points: model_inputs.input_points,
+      input_labels: model_inputs.input_labels,
+      image_embeddings: model_inputs.image_embeddings,
+      image_positional_embeddings: model_inputs.image_positional_embeddings
+    });
+  }
+  /**
+   * Runs the model with the provided inputs
+   * @param {Object} model_inputs Model inputs
+   * @returns {Promise<SamImageSegmentationOutput>} Object containing segmentation outputs
+   */
+  async _call(model_inputs) {
+    return new SamImageSegmentationOutput(await super._call(model_inputs));
+  }
+}
+class SamImageSegmentationOutput extends ModelOutput {
+  /**
+   * @param {Object} output The output of the model.
+   * @param {Tensor} output.iou_scores The output logits of the model.
+   * @param {Tensor} output.pred_masks Predicted boxes.
+   */
+  constructor({ iou_scores, pred_masks }) {
+    super();
+    this.iou_scores = iou_scores;
+    this.pred_masks = pred_masks;
+  }
+}
+class MarianPreTrainedModel extends PreTrainedModel {
+}
+class MarianModel extends MarianPreTrainedModel {
+}
+class MarianMTModel extends MarianPreTrainedModel {
+  /**
+   * Creates a new instance of the `MarianMTModel` class.
+  * @param {Object} config The model configuration object.
+  * @param {Object} session The ONNX session object.
+  * @param {any} decoder_merged_session
+  * @param {any} generation_config
+  */
+  constructor(config, session, decoder_merged_session, generation_config) {
+    super(config, session);
+    this.decoder_merged_session = decoder_merged_session;
+    this.generation_config = generation_config;
+    this.num_decoder_layers = this.config.decoder_layers;
+    this.num_decoder_heads = this.config.decoder_attention_heads;
+    this.decoder_dim_kv = this.config.d_model / this.num_decoder_heads;
+    this.num_encoder_layers = this.config.encoder_layers;
+    this.num_encoder_heads = this.config.encoder_attention_heads;
+    this.encoder_dim_kv = this.config.d_model / this.num_encoder_heads;
+  }
+}
+class M2M100PreTrainedModel extends PreTrainedModel {
+}
+class M2M100Model extends M2M100PreTrainedModel {
+}
+class M2M100ForConditionalGeneration extends M2M100PreTrainedModel {
+  /**
+   * Creates a new instance of the `M2M100ForConditionalGeneration` class.
+  * @param {Object} config The model configuration object.
+  * @param {Object} session The ONNX session object.
+  * @param {any} decoder_merged_session
+  * @param {any} generation_config
+  */
+  constructor(config, session, decoder_merged_session, generation_config) {
+    super(config, session);
+    this.decoder_merged_session = decoder_merged_session;
+    this.generation_config = generation_config;
+    this.num_decoder_layers = this.config.decoder_layers;
+    this.num_decoder_heads = this.config.decoder_attention_heads;
+    this.decoder_dim_kv = this.config.d_model / this.num_decoder_heads;
+    this.num_encoder_layers = this.config.encoder_layers;
+    this.num_encoder_heads = this.config.encoder_attention_heads;
+    this.encoder_dim_kv = this.config.d_model / this.num_encoder_heads;
+  }
+}
+class Wav2Vec2PreTrainedModel extends PreTrainedModel {
+}
+class Wav2Vec2Model extends Wav2Vec2PreTrainedModel {
+}
+class Wav2Vec2ForCTC extends Wav2Vec2PreTrainedModel {
+  /**
+   * @param {Object} model_inputs
+   * @param {Tensor} model_inputs.input_values Float values of input raw speech waveform.
+   * @param {Tensor} model_inputs.attention_mask Mask to avoid performing convolution and attention on padding token indices. Mask values selected in [0, 1]
+   */
+  async _call(model_inputs) {
+    return new CausalLMOutput(await super._call(model_inputs));
+  }
+}
+class Wav2Vec2ForSequenceClassification extends Wav2Vec2PreTrainedModel {
+  /**
+   * Calls the model on new inputs.
+   * @param {Object} model_inputs The inputs to the model.
+   * @returns {Promise<SequenceClassifierOutput>} An object containing the model's output logits for sequence classification.
+   */
+  async _call(model_inputs) {
+    return new SequenceClassifierOutput(await super._call(model_inputs));
+  }
+}
+class Wav2Vec2ForAudioFrameClassification extends Wav2Vec2PreTrainedModel {
+  /**
+   * Calls the model on new inputs.
+   * @param {Object} model_inputs The inputs to the model.
+   * @returns {Promise<TokenClassifierOutput>} An object containing the model's output logits for sequence classification.
+   */
+  async _call(model_inputs) {
+    return new TokenClassifierOutput(await super._call(model_inputs));
+  }
+}
+class UniSpeechPreTrainedModel extends PreTrainedModel {
+}
+class UniSpeechModel extends UniSpeechPreTrainedModel {
+}
+class UniSpeechForCTC extends UniSpeechPreTrainedModel {
+  /**
+   * @param {Object} model_inputs
+   * @param {Tensor} model_inputs.input_values Float values of input raw speech waveform.
+   * @param {Tensor} model_inputs.attention_mask Mask to avoid performing convolution and attention on padding token indices. Mask values selected in [0, 1]
+   */
+  async _call(model_inputs) {
+    return new CausalLMOutput(await super._call(model_inputs));
+  }
+}
+class UniSpeechForSequenceClassification extends UniSpeechPreTrainedModel {
+  /**
+   * Calls the model on new inputs.
+   * @param {Object} model_inputs The inputs to the model.
+   * @returns {Promise<SequenceClassifierOutput>} An object containing the model's output logits for sequence classification.
+   */
+  async _call(model_inputs) {
+    return new SequenceClassifierOutput(await super._call(model_inputs));
+  }
+}
+class UniSpeechSatPreTrainedModel extends PreTrainedModel {
+}
+class UniSpeechSatModel extends UniSpeechSatPreTrainedModel {
+}
+class UniSpeechSatForCTC extends UniSpeechSatPreTrainedModel {
+  /**
+   * @param {Object} model_inputs
+   * @param {Tensor} model_inputs.input_values Float values of input raw speech waveform.
+   * @param {Tensor} model_inputs.attention_mask Mask to avoid performing convolution and attention on padding token indices. Mask values selected in [0, 1]
+   */
+  async _call(model_inputs) {
+    return new CausalLMOutput(await super._call(model_inputs));
+  }
+}
+class UniSpeechSatForSequenceClassification extends UniSpeechSatPreTrainedModel {
+  /**
+   * Calls the model on new inputs.
+   * @param {Object} model_inputs The inputs to the model.
+   * @returns {Promise<SequenceClassifierOutput>} An object containing the model's output logits for sequence classification.
+   */
+  async _call(model_inputs) {
+    return new SequenceClassifierOutput(await super._call(model_inputs));
+  }
+}
+class UniSpeechSatForAudioFrameClassification extends UniSpeechSatPreTrainedModel {
+  /**
+   * Calls the model on new inputs.
+   * @param {Object} model_inputs The inputs to the model.
+   * @returns {Promise<TokenClassifierOutput>} An object containing the model's output logits for sequence classification.
+   */
+  async _call(model_inputs) {
+    return new TokenClassifierOutput(await super._call(model_inputs));
+  }
+}
+class Wav2Vec2BertPreTrainedModel extends PreTrainedModel {
+}
+class Wav2Vec2BertModel extends Wav2Vec2BertPreTrainedModel {
+}
+class Wav2Vec2BertForCTC extends Wav2Vec2BertPreTrainedModel {
+  /**
+   * @param {Object} model_inputs
+   * @param {Tensor} model_inputs.input_features Float values of input mel-spectrogram.
+   * @param {Tensor} model_inputs.attention_mask Mask to avoid performing convolution and attention on padding token indices. Mask values selected in [0, 1]
+   */
+  async _call(model_inputs) {
+    return new CausalLMOutput(await super._call(model_inputs));
+  }
+}
+class Wav2Vec2BertForSequenceClassification extends Wav2Vec2BertPreTrainedModel {
+  /**
+   * Calls the model on new inputs.
+   * @param {Object} model_inputs The inputs to the model.
+   * @returns {Promise<SequenceClassifierOutput>} An object containing the model's output logits for sequence classification.
+   */
+  async _call(model_inputs) {
+    return new SequenceClassifierOutput(await super._call(model_inputs));
+  }
+}
+class HubertModel extends Wav2Vec2PreTrainedModel {
+}
+class HubertForCTC extends Wav2Vec2PreTrainedModel {
+  /**
+   * @param {Object} model_inputs
+   * @param {Tensor} model_inputs.input_values Float values of input raw speech waveform.
+   * @param {Tensor} model_inputs.attention_mask Mask to avoid performing convolution and attention on padding token indices. Mask values selected in [0, 1]
+   */
+  async _call(model_inputs) {
+    return new CausalLMOutput(await super._call(model_inputs));
+  }
+}
+class HubertForSequenceClassification extends Wav2Vec2PreTrainedModel {
+  /**
+   * Calls the model on new inputs.
+   * @param {Object} model_inputs The inputs to the model.
+   * @returns {Promise<SequenceClassifierOutput>} An object containing the model's output logits for sequence classification.
+   */
+  async _call(model_inputs) {
+    return new SequenceClassifierOutput(await super._call(model_inputs));
+  }
+}
+class WavLMPreTrainedModel extends PreTrainedModel {
+}
+class WavLMModel extends WavLMPreTrainedModel {
+}
+class WavLMForCTC extends WavLMPreTrainedModel {
+  /**
+   * @param {Object} model_inputs
+   * @param {Tensor} model_inputs.input_values Float values of input raw speech waveform.
+   * @param {Tensor} model_inputs.attention_mask Mask to avoid performing convolution and attention on padding token indices. Mask values selected in [0, 1]
+   */
+  async _call(model_inputs) {
+    return new CausalLMOutput(await super._call(model_inputs));
+  }
+}
+class WavLMForSequenceClassification extends WavLMPreTrainedModel {
+  /**
+   * Calls the model on new inputs.
+   * @param {Object} model_inputs The inputs to the model.
+   * @returns {Promise<SequenceClassifierOutput>} An object containing the model's output logits for sequence classification.
+   */
+  async _call(model_inputs) {
+    return new SequenceClassifierOutput(await super._call(model_inputs));
+  }
+}
+class WavLMForXVector extends WavLMPreTrainedModel {
+  /**
+   * Calls the model on new inputs.
+   * @param {Object} model_inputs The inputs to the model.
+   * @returns {Promise<XVectorOutput>} An object containing the model's output logits and speaker embeddings.
+   */
+  async _call(model_inputs) {
+    return new XVectorOutput(await super._call(model_inputs));
+  }
+}
+class WavLMForAudioFrameClassification extends WavLMPreTrainedModel {
+  /**
+   * Calls the model on new inputs.
+   * @param {Object} model_inputs The inputs to the model.
+   * @returns {Promise<TokenClassifierOutput>} An object containing the model's output logits for sequence classification.
+   */
+  async _call(model_inputs) {
+    return new TokenClassifierOutput(await super._call(model_inputs));
+  }
+}
+class SpeechT5PreTrainedModel extends PreTrainedModel {
+}
+class SpeechT5ForSpeechToText extends SpeechT5PreTrainedModel {
+}
+class SpeechT5ForTextToSpeech extends SpeechT5PreTrainedModel {
+  /**
+   * Creates a new instance of the `SpeechT5ForTextToSpeech` class.
+   * @param {Object} config The model configuration.
+   * @param {any} session session for the model.
+   * @param {any} decoder_merged_session session for the decoder.
+   * @param {GenerationConfig} generation_config The generation configuration.
+   */
+  constructor(config, session, decoder_merged_session, generation_config) {
+    super(config, session);
+    this.decoder_merged_session = decoder_merged_session;
+    this.generation_config = generation_config;
+    this.num_decoder_layers = this.config.decoder_layers;
+    this.num_decoder_heads = this.config.decoder_attention_heads;
+    this.decoder_dim_kv = this.config.hidden_size / this.num_decoder_heads;
+    this.num_encoder_layers = this.config.encoder_layers;
+    this.num_encoder_heads = this.config.encoder_attention_heads;
+    this.encoder_dim_kv = this.config.hidden_size / this.num_encoder_heads;
+  }
+  /**
+   * @typedef {Object} SpeechOutput
+   * @property {Tensor} [spectrogram] The predicted log-mel spectrogram of shape
+   * `(output_sequence_length, config.num_mel_bins)`. Returned when no `vocoder` is provided
+   * @property {Tensor} [waveform] The predicted waveform of shape `(num_frames,)`. Returned when a `vocoder` is provided.
+   * @property {Tensor} [cross_attentions] The outputs of the decoder's cross-attention layers of shape
+   * `(config.decoder_layers, config.decoder_attention_heads, output_sequence_length, input_sequence_length)`. returned when `output_cross_attentions` is `true`.
+   */
+  /**
+   * Converts a sequence of input tokens into a sequence of mel spectrograms, which are subsequently turned into a speech waveform using a vocoder.
+   * @param {Tensor} input_values Indices of input sequence tokens in the vocabulary.
+   * @param {Tensor} speaker_embeddings Tensor containing the speaker embeddings.
+   * @param {Object} options Optional parameters for generating speech.
+   * @param {number} [options.threshold=0.5] The generated sequence ends when the predicted stop token probability exceeds this value.
+   * @param {number} [options.minlenratio=0.0] Used to calculate the minimum required length for the output sequence.
+   * @param {number} [options.maxlenratio=20.0] Used to calculate the maximum allowed length for the output sequence.
+   * @param {Object} [options.vocoder=null] The vocoder that converts the mel spectrogram into a speech waveform. If `null`, the output is the mel spectrogram.
+   * @param {boolean} [options.output_cross_attentions=false] Whether or not to return the attentions tensors of the decoder's cross-attention layers.
+   * @returns {Promise<SpeechOutput>} A promise which resolves to an object containing the spectrogram, waveform, and cross-attention tensors.
+   */
+  async generate_speech(input_values, speaker_embeddings, {
+    threshold = 0.5,
+    minlenratio = 0,
+    maxlenratio = 20,
+    vocoder = null
+    // output_cross_attentions = false, // TODO add
+  } = {}) {
+    const model_inputs = {
+      input_ids: input_values
+    };
+    const { encoder_outputs, encoder_attention_mask } = await encoderForward(this, model_inputs);
+    const r = encoder_outputs.dims[1] / this.config.reduction_factor;
+    const maxlen = Math.floor(r * maxlenratio);
+    const minlen = Math.floor(r * minlenratio);
+    const num_mel_bins = this.config.num_mel_bins;
+    let spectrogramParts = [];
+    let past_key_values = null;
+    let decoder_outputs = null;
+    let idx = 0;
+    while (true) {
+      ++idx;
+      const use_cache_branch = boolTensor(!!decoder_outputs);
+      let output_sequence;
+      if (decoder_outputs) {
+        output_sequence = decoder_outputs.output_sequence_out;
+      } else {
+        output_sequence = new Tensor(
+          "float32",
+          new Float32Array(num_mel_bins),
+          [1, 1, num_mel_bins]
+        );
+      }
+      let decoderFeeds = {
+        use_cache_branch,
+        output_sequence,
+        encoder_attention_mask,
+        speaker_embeddings,
+        encoder_hidden_states: encoder_outputs
+      };
+      this.addPastKeyValues(decoderFeeds, past_key_values);
+      decoder_outputs = await sessionRun(this.decoder_merged_session, decoderFeeds);
+      past_key_values = this.getPastKeyValues(decoder_outputs, past_key_values);
+      const { prob, spectrum } = decoder_outputs;
+      spectrogramParts.push(spectrum);
+      if (idx >= minlen && // Finished when stop token or maximum length is reached.
+      (Array.from(prob.data).filter((p) => p >= threshold).length > 0 || idx >= maxlen)) {
+        break;
+      }
+    }
+    const spectrogram = cat(spectrogramParts);
+    const { waveform } = await sessionRun(vocoder.session, { spectrogram });
+    return {
+      spectrogram,
+      waveform
+      // cross_attentions: null, // TODO add
+    };
+  }
+}
+class SpeechT5HifiGan extends PreTrainedModel {
+  main_input_name = "spectrogram";
+}
+class TrOCRPreTrainedModel extends PreTrainedModel {
+  /**
+   * Creates a new instance of the `TrOCRPreTrainedModel` class.
+   * @param {Object} config The configuration of the model.
+   * @param {any} session The ONNX session containing the model weights.
+   * @param {GenerationConfig} generation_config The generation configuration.
+   */
+  constructor(config, session, generation_config) {
+    super(config, session);
+    this.generation_config = generation_config;
+    this.config.pad_token_id = this.config.eos_token_id;
+    this.num_encoder_layers = this.num_decoder_layers = this.config.decoder_layers;
+    this.num_encoder_heads = this.num_decoder_heads = this.config.decoder_attention_heads;
+    this.encoder_dim_kv = this.decoder_dim_kv = this.config.d_model / this.num_decoder_heads;
+  }
+}
+class TrOCRForCausalLM extends TrOCRPreTrainedModel {
+}
+class MistralPreTrainedModel extends PreTrainedModel {
+  /**
+   * Creates a new instance of the `MistralPreTrainedModel` class.
+   * @param {Object} config The configuration of the model.
+   * @param {any} session The ONNX session containing the model weights.
+   * @param {GenerationConfig} generation_config The generation configuration.
+   */
+  constructor(config, session, generation_config) {
+    super(config, session);
+    this.generation_config = generation_config;
+    this.config.pad_token_id = this.config.eos_token_id;
+    this.num_heads = this.config.num_key_value_heads;
+    this.num_layers = this.config.num_hidden_layers;
+    this.dim_kv = this.config.hidden_size / this.config.num_attention_heads;
+  }
+}
+class MistralModel extends MistralPreTrainedModel {
+}
+class MistralForCausalLM extends MistralPreTrainedModel {
+}
+class Starcoder2PreTrainedModel extends PreTrainedModel {
+  /**
+   * Creates a new instance of the `Starcoder2PreTrainedModel` class.
+   * @param {Object} config The configuration of the model.
+   * @param {any} session The ONNX session containing the model weights.
+   * @param {GenerationConfig} generation_config The generation configuration.
+   */
+  constructor(config, session, generation_config) {
+    super(config, session);
+    this.generation_config = generation_config;
+    this.config.pad_token_id = this.config.eos_token_id;
+    this.num_heads = this.config.num_key_value_heads;
+    this.num_layers = this.config.num_hidden_layers;
+    this.dim_kv = this.config.hidden_size / this.config.num_attention_heads;
+  }
+}
+class Starcoder2Model extends Starcoder2PreTrainedModel {
+}
+class Starcoder2ForCausalLM extends Starcoder2PreTrainedModel {
+}
+class FalconPreTrainedModel extends PreTrainedModel {
+  /**
+   * Creates a new instance of the `FalconPreTrainedModel` class.
+   * @param {Object} config The configuration of the model.
+   * @param {any} session The ONNX session containing the model weights.
+   * @param {GenerationConfig} generation_config The generation configuration.
+   */
+  constructor(config, session, generation_config) {
+    super(config, session);
+    this.generation_config = generation_config;
+    this.config.pad_token_id = this.config.eos_token_id;
+    this.num_heads = this.config.num_attention_heads;
+    this.num_layers = this.config.num_hidden_layers;
+    this.dim_kv = this.config.hidden_size / this.config.num_attention_heads;
+  }
+}
+class FalconModel extends FalconPreTrainedModel {
+}
+class FalconForCausalLM extends FalconPreTrainedModel {
+}
+class ClapPreTrainedModel extends PreTrainedModel {
+}
+class ClapModel extends ClapPreTrainedModel {
+}
+class ClapTextModelWithProjection extends ClapPreTrainedModel {
+  /** @type {PreTrainedModel.from_pretrained} */
+  static async from_pretrained(pretrained_model_name_or_path, options = {}) {
+    options.model_file_name ??= "text_model";
+    return super.from_pretrained(pretrained_model_name_or_path, options);
+  }
+}
+class ClapAudioModelWithProjection extends ClapPreTrainedModel {
+  /** @type {PreTrainedModel.from_pretrained} */
+  static async from_pretrained(pretrained_model_name_or_path, options = {}) {
+    options.model_file_name ??= "audio_model";
+    return super.from_pretrained(pretrained_model_name_or_path, options);
+  }
+}
+class VitsPreTrainedModel extends PreTrainedModel {
+}
+class VitsModel extends VitsPreTrainedModel {
+  /**
+   * Calls the model on new inputs.
+   * @param {Object} model_inputs The inputs to the model.
+   * @returns {Promise<VitsModelOutput>} The outputs for the VITS model.
+   */
+  async _call(model_inputs) {
+    return new VitsModelOutput(await super._call(model_inputs));
+  }
+}
+class SegformerPreTrainedModel extends PreTrainedModel {
+}
+class SegformerForImageClassification extends SegformerPreTrainedModel {
+}
+class SegformerForSemanticSegmentation extends SegformerPreTrainedModel {
+}
+class StableLmPreTrainedModel extends PreTrainedModel {
+  /**
+   * Creates a new instance of the `StableLmPreTrainedModel` class.
+   * @param {Object} config The configuration of the model.
+   * @param {any} session The ONNX session containing the model weights.
+   * @param {GenerationConfig} generation_config The generation configuration.
+   */
+  constructor(config, session, generation_config) {
+    super(config, session);
+    this.generation_config = generation_config;
+    this.config.pad_token_id = this.config.eos_token_id;
+    this.num_heads = this.config.num_attention_heads;
+    this.num_layers = this.config.num_hidden_layers;
+    this.dim_kv = this.config.hidden_size / this.num_heads;
+  }
+}
+class StableLmForCausalLM extends StableLmPreTrainedModel {
+}
+class EfficientNetPreTrainedModel extends PreTrainedModel {
+}
+class EfficientNetModel extends EfficientNetPreTrainedModel {
+}
+class EfficientNetForImageClassification extends EfficientNetPreTrainedModel {
+  /**
+   * @param {any} model_inputs
+   */
+  async _call(model_inputs) {
+    return new SequenceClassifierOutput(await super._call(model_inputs));
+  }
+}
+class PretrainedMixin {
+  /**
+   * Mapping from model type to model class.
+   * @type {Map<string, Object>[]}
+   */
+  static MODEL_CLASS_MAPPINGS = null;
+  /**
+   * Whether to attempt to instantiate the base class (`PretrainedModel`) if
+   * the model type is not found in the mapping.
+   */
+  static BASE_IF_FAIL = false;
+  /** @type {PreTrainedModel.from_pretrained} */
+  static async from_pretrained(pretrained_model_name_or_path, {
+    quantized = true,
+    progress_callback = null,
+    config = null,
+    cache_dir = null,
+    local_files_only = false,
+    revision = "main",
+    model_file_name = null
+  } = {}) {
+    let options = {
+      quantized,
+      progress_callback,
+      config,
+      cache_dir,
+      local_files_only,
+      revision,
+      model_file_name
+    };
+    config = await AutoConfig.from_pretrained(pretrained_model_name_or_path, options);
+    if (!options.config) {
+      options.config = config;
+    }
+    if (!this.MODEL_CLASS_MAPPINGS) {
+      throw new Error("`MODEL_CLASS_MAPPINGS` not implemented for this type of `AutoClass`: " + this.name);
+    }
+    for (let MODEL_CLASS_MAPPING of this.MODEL_CLASS_MAPPINGS) {
+      const modelInfo = MODEL_CLASS_MAPPING.get(config.model_type);
+      if (!modelInfo) {
+        continue;
+      }
+      return await modelInfo[1].from_pretrained(pretrained_model_name_or_path, options);
+    }
+    if (this.BASE_IF_FAIL) {
+      console.warn(`Unknown model class "${config.model_type}", attempting to construct from base class.`);
+      return await PreTrainedModel.from_pretrained(pretrained_model_name_or_path, options);
+    } else {
+      throw Error(`Unsupported model type: ${config.model_type}`);
+    }
+  }
+}
+const MODEL_MAPPING_NAMES_ENCODER_ONLY = /* @__PURE__ */ new Map([
+  ["bert", ["BertModel", BertModel]],
+  ["nomic_bert", ["NomicBertModel", NomicBertModel]],
+  ["roformer", ["RoFormerModel", RoFormerModel]],
+  ["electra", ["ElectraModel", ElectraModel]],
+  ["esm", ["EsmModel", EsmModel]],
+  ["convbert", ["ConvBertModel", ConvBertModel]],
+  ["camembert", ["CamembertModel", CamembertModel]],
+  ["deberta", ["DebertaModel", DebertaModel]],
+  ["deberta-v2", ["DebertaV2Model", DebertaV2Model]],
+  ["mpnet", ["MPNetModel", MPNetModel]],
+  ["albert", ["AlbertModel", AlbertModel]],
+  ["distilbert", ["DistilBertModel", DistilBertModel]],
+  ["roberta", ["RobertaModel", RobertaModel]],
+  ["xlm", ["XLMModel", XLMModel]],
+  ["xlm-roberta", ["XLMRobertaModel", XLMRobertaModel]],
+  ["clap", ["ClapModel", ClapModel]],
+  ["clip", ["CLIPModel", CLIPModel]],
+  ["clipseg", ["CLIPSegModel", CLIPSegModel]],
+  ["chinese_clip", ["ChineseCLIPModel", ChineseCLIPModel]],
+  ["siglip", ["SiglipModel", SiglipModel]],
+  ["mobilebert", ["MobileBertModel", MobileBertModel]],
+  ["squeezebert", ["SqueezeBertModel", SqueezeBertModel]],
+  ["wav2vec2", ["Wav2Vec2Model", Wav2Vec2Model]],
+  ["wav2vec2-bert", ["Wav2Vec2BertModel", Wav2Vec2BertModel]],
+  ["unispeech", ["UniSpeechModel", UniSpeechModel]],
+  ["unispeech-sat", ["UniSpeechSatModel", UniSpeechSatModel]],
+  ["hubert", ["HubertModel", HubertModel]],
+  ["wavlm", ["WavLMModel", WavLMModel]],
+  ["audio-spectrogram-transformer", ["ASTModel", ASTModel]],
+  ["vits", ["VitsModel", VitsModel]],
+  ["detr", ["DetrModel", DetrModel]],
+  ["table-transformer", ["TableTransformerModel", TableTransformerModel]],
+  ["vit", ["ViTModel", ViTModel]],
+  ["fastvit", ["FastViTModel", FastViTModel]],
+  ["mobilevit", ["MobileViTModel", MobileViTModel]],
+  ["mobilevitv2", ["MobileViTV2Model", MobileViTV2Model]],
+  ["owlvit", ["OwlViTModel", OwlViTModel]],
+  ["owlv2", ["Owlv2Model", Owlv2Model]],
+  ["beit", ["BeitModel", BeitModel]],
+  ["deit", ["DeiTModel", DeiTModel]],
+  ["convnext", ["ConvNextModel", ConvNextModel]],
+  ["convnextv2", ["ConvNextV2Model", ConvNextV2Model]],
+  ["dinov2", ["Dinov2Model", Dinov2Model]],
+  ["resnet", ["ResNetModel", ResNetModel]],
+  ["swin", ["SwinModel", SwinModel]],
+  ["swin2sr", ["Swin2SRModel", Swin2SRModel]],
+  ["donut-swin", ["DonutSwinModel", DonutSwinModel]],
+  ["yolos", ["YolosModel", YolosModel]],
+  ["dpt", ["DPTModel", DPTModel]],
+  ["glpn", ["GLPNModel", GLPNModel]],
+  ["hifigan", ["SpeechT5HifiGan", SpeechT5HifiGan]],
+  ["efficientnet", ["EfficientNetModel", EfficientNetModel]]
+]);
+const MODEL_MAPPING_NAMES_ENCODER_DECODER = /* @__PURE__ */ new Map([
+  ["t5", ["T5Model", T5Model]],
+  ["longt5", ["LongT5Model", LongT5Model]],
+  ["mt5", ["MT5Model", MT5Model]],
+  ["bart", ["BartModel", BartModel]],
+  ["mbart", ["MBartModel", MBartModel]],
+  ["marian", ["MarianModel", MarianModel]],
+  ["whisper", ["WhisperModel", WhisperModel]],
+  ["m2m_100", ["M2M100Model", M2M100Model]],
+  ["blenderbot", ["BlenderbotModel", BlenderbotModel]],
+  ["blenderbot-small", ["BlenderbotSmallModel", BlenderbotSmallModel]]
+]);
+const MODEL_MAPPING_NAMES_DECODER_ONLY = /* @__PURE__ */ new Map([
+  ["bloom", ["BloomModel", BloomModel]],
+  ["gpt2", ["GPT2Model", GPT2Model]],
+  ["gptj", ["GPTJModel", GPTJModel]],
+  ["gpt_bigcode", ["GPTBigCodeModel", GPTBigCodeModel]],
+  ["gpt_neo", ["GPTNeoModel", GPTNeoModel]],
+  ["gpt_neox", ["GPTNeoXModel", GPTNeoXModel]],
+  ["codegen", ["CodeGenModel", CodeGenModel]],
+  ["llama", ["LlamaModel", LlamaModel]],
+  ["qwen2", ["Qwen2Model", Qwen2Model]],
+  ["phi", ["PhiModel", PhiModel]],
+  ["mpt", ["MptModel", MptModel]],
+  ["opt", ["OPTModel", OPTModel]],
+  ["mistral", ["MistralModel", MistralModel]],
+  ["starcoder2", ["Starcoder2Model", Starcoder2Model]],
+  ["falcon", ["FalconModel", FalconModel]]
+]);
+const MODEL_FOR_SPEECH_SEQ_2_SEQ_MAPPING_NAMES = /* @__PURE__ */ new Map([
+  ["speecht5", ["SpeechT5ForSpeechToText", SpeechT5ForSpeechToText]],
+  ["whisper", ["WhisperForConditionalGeneration", WhisperForConditionalGeneration]]
+]);
+const MODEL_FOR_TEXT_TO_SPECTROGRAM_MAPPING_NAMES = /* @__PURE__ */ new Map([
+  ["speecht5", ["SpeechT5ForTextToSpeech", SpeechT5ForTextToSpeech]]
+]);
+const MODEL_FOR_TEXT_TO_WAVEFORM_MAPPING_NAMES = /* @__PURE__ */ new Map([
+  ["vits", ["VitsModel", VitsModel]]
+]);
+const MODEL_FOR_SEQUENCE_CLASSIFICATION_MAPPING_NAMES = /* @__PURE__ */ new Map([
+  ["bert", ["BertForSequenceClassification", BertForSequenceClassification]],
+  ["roformer", ["RoFormerForSequenceClassification", RoFormerForSequenceClassification]],
+  ["electra", ["ElectraForSequenceClassification", ElectraForSequenceClassification]],
+  ["esm", ["EsmForSequenceClassification", EsmForSequenceClassification]],
+  ["convbert", ["ConvBertForSequenceClassification", ConvBertForSequenceClassification]],
+  ["camembert", ["CamembertForSequenceClassification", CamembertForSequenceClassification]],
+  ["deberta", ["DebertaForSequenceClassification", DebertaForSequenceClassification]],
+  ["deberta-v2", ["DebertaV2ForSequenceClassification", DebertaV2ForSequenceClassification]],
+  ["mpnet", ["MPNetForSequenceClassification", MPNetForSequenceClassification]],
+  ["albert", ["AlbertForSequenceClassification", AlbertForSequenceClassification]],
+  ["distilbert", ["DistilBertForSequenceClassification", DistilBertForSequenceClassification]],
+  ["roberta", ["RobertaForSequenceClassification", RobertaForSequenceClassification]],
+  ["xlm", ["XLMForSequenceClassification", XLMForSequenceClassification]],
+  ["xlm-roberta", ["XLMRobertaForSequenceClassification", XLMRobertaForSequenceClassification]],
+  ["bart", ["BartForSequenceClassification", BartForSequenceClassification]],
+  ["mbart", ["MBartForSequenceClassification", MBartForSequenceClassification]],
+  ["mobilebert", ["MobileBertForSequenceClassification", MobileBertForSequenceClassification]],
+  ["squeezebert", ["SqueezeBertForSequenceClassification", SqueezeBertForSequenceClassification]]
+]);
+const MODEL_FOR_TOKEN_CLASSIFICATION_MAPPING_NAMES = /* @__PURE__ */ new Map([
+  ["bert", ["BertForTokenClassification", BertForTokenClassification]],
+  ["roformer", ["RoFormerForTokenClassification", RoFormerForTokenClassification]],
+  ["electra", ["ElectraForTokenClassification", ElectraForTokenClassification]],
+  ["esm", ["EsmForTokenClassification", EsmForTokenClassification]],
+  ["convbert", ["ConvBertForTokenClassification", ConvBertForTokenClassification]],
+  ["camembert", ["CamembertForTokenClassification", CamembertForTokenClassification]],
+  ["deberta", ["DebertaForTokenClassification", DebertaForTokenClassification]],
+  ["deberta-v2", ["DebertaV2ForTokenClassification", DebertaV2ForTokenClassification]],
+  ["mpnet", ["MPNetForTokenClassification", MPNetForTokenClassification]],
+  ["distilbert", ["DistilBertForTokenClassification", DistilBertForTokenClassification]],
+  ["roberta", ["RobertaForTokenClassification", RobertaForTokenClassification]],
+  ["xlm", ["XLMForTokenClassification", XLMForTokenClassification]],
+  ["xlm-roberta", ["XLMRobertaForTokenClassification", XLMRobertaForTokenClassification]]
+]);
+const MODEL_FOR_SEQ_TO_SEQ_CAUSAL_LM_MAPPING_NAMES = /* @__PURE__ */ new Map([
+  ["t5", ["T5ForConditionalGeneration", T5ForConditionalGeneration]],
+  ["longt5", ["LongT5ForConditionalGeneration", LongT5ForConditionalGeneration]],
+  ["mt5", ["MT5ForConditionalGeneration", MT5ForConditionalGeneration]],
+  ["bart", ["BartForConditionalGeneration", BartForConditionalGeneration]],
+  ["mbart", ["MBartForConditionalGeneration", MBartForConditionalGeneration]],
+  ["marian", ["MarianMTModel", MarianMTModel]],
+  ["m2m_100", ["M2M100ForConditionalGeneration", M2M100ForConditionalGeneration]],
+  ["blenderbot", ["BlenderbotForConditionalGeneration", BlenderbotForConditionalGeneration]],
+  ["blenderbot-small", ["BlenderbotSmallForConditionalGeneration", BlenderbotSmallForConditionalGeneration]]
+]);
+const MODEL_WITH_LM_HEAD_MAPPING_NAMES = /* @__PURE__ */ new Map([
+  ["bloom", ["BloomForCausalLM", BloomForCausalLM]],
+  ["gpt2", ["GPT2LMHeadModel", GPT2LMHeadModel]],
+  ["gptj", ["GPTJForCausalLM", GPTJForCausalLM]],
+  ["gpt_bigcode", ["GPTBigCodeForCausalLM", GPTBigCodeForCausalLM]],
+  ["gpt_neo", ["GPTNeoForCausalLM", GPTNeoForCausalLM]],
+  ["gpt_neox", ["GPTNeoXForCausalLM", GPTNeoXForCausalLM]],
+  ["codegen", ["CodeGenForCausalLM", CodeGenForCausalLM]],
+  ["llama", ["LlamaForCausalLM", LlamaForCausalLM]],
+  ["qwen2", ["Qwen2ForCausalLM", Qwen2ForCausalLM]],
+  ["phi", ["PhiForCausalLM", PhiForCausalLM]],
+  ["mpt", ["MptForCausalLM", MptForCausalLM]],
+  ["opt", ["OPTForCausalLM", OPTForCausalLM]],
+  ["mbart", ["MBartForCausalLM", MBartForCausalLM]],
+  ["mistral", ["MistralForCausalLM", MistralForCausalLM]],
+  ["starcoder2", ["Starcoder2ForCausalLM", Starcoder2ForCausalLM]],
+  ["falcon", ["FalconForCausalLM", FalconForCausalLM]],
+  ["trocr", ["TrOCRForCausalLM", TrOCRForCausalLM]],
+  ["stablelm", ["StableLmForCausalLM", StableLmForCausalLM]]
+]);
+const MODEL_FOR_MASKED_LM_MAPPING_NAMES = /* @__PURE__ */ new Map([
+  ["bert", ["BertForMaskedLM", BertForMaskedLM]],
+  ["roformer", ["RoFormerForMaskedLM", RoFormerForMaskedLM]],
+  ["electra", ["ElectraForMaskedLM", ElectraForMaskedLM]],
+  ["esm", ["EsmForMaskedLM", EsmForMaskedLM]],
+  ["convbert", ["ConvBertForMaskedLM", ConvBertForMaskedLM]],
+  ["camembert", ["CamembertForMaskedLM", CamembertForMaskedLM]],
+  ["deberta", ["DebertaForMaskedLM", DebertaForMaskedLM]],
+  ["deberta-v2", ["DebertaV2ForMaskedLM", DebertaV2ForMaskedLM]],
+  ["mpnet", ["MPNetForMaskedLM", MPNetForMaskedLM]],
+  ["albert", ["AlbertForMaskedLM", AlbertForMaskedLM]],
+  ["distilbert", ["DistilBertForMaskedLM", DistilBertForMaskedLM]],
+  ["roberta", ["RobertaForMaskedLM", RobertaForMaskedLM]],
+  ["xlm", ["XLMWithLMHeadModel", XLMWithLMHeadModel]],
+  ["xlm-roberta", ["XLMRobertaForMaskedLM", XLMRobertaForMaskedLM]],
+  ["mobilebert", ["MobileBertForMaskedLM", MobileBertForMaskedLM]],
+  ["squeezebert", ["SqueezeBertForMaskedLM", SqueezeBertForMaskedLM]]
+]);
+const MODEL_FOR_QUESTION_ANSWERING_MAPPING_NAMES = /* @__PURE__ */ new Map([
+  ["bert", ["BertForQuestionAnswering", BertForQuestionAnswering]],
+  ["roformer", ["RoFormerForQuestionAnswering", RoFormerForQuestionAnswering]],
+  ["electra", ["ElectraForQuestionAnswering", ElectraForQuestionAnswering]],
+  ["convbert", ["ConvBertForQuestionAnswering", ConvBertForQuestionAnswering]],
+  ["camembert", ["CamembertForQuestionAnswering", CamembertForQuestionAnswering]],
+  ["deberta", ["DebertaForQuestionAnswering", DebertaForQuestionAnswering]],
+  ["deberta-v2", ["DebertaV2ForQuestionAnswering", DebertaV2ForQuestionAnswering]],
+  ["mpnet", ["MPNetForQuestionAnswering", MPNetForQuestionAnswering]],
+  ["albert", ["AlbertForQuestionAnswering", AlbertForQuestionAnswering]],
+  ["distilbert", ["DistilBertForQuestionAnswering", DistilBertForQuestionAnswering]],
+  ["roberta", ["RobertaForQuestionAnswering", RobertaForQuestionAnswering]],
+  ["xlm", ["XLMForQuestionAnswering", XLMForQuestionAnswering]],
+  ["xlm-roberta", ["XLMRobertaForQuestionAnswering", XLMRobertaForQuestionAnswering]],
+  ["mobilebert", ["MobileBertForQuestionAnswering", MobileBertForQuestionAnswering]],
+  ["squeezebert", ["SqueezeBertForQuestionAnswering", SqueezeBertForQuestionAnswering]]
+]);
+const MODEL_FOR_VISION_2_SEQ_MAPPING_NAMES = /* @__PURE__ */ new Map([
+  ["vision-encoder-decoder", ["VisionEncoderDecoderModel", VisionEncoderDecoderModel]]
+]);
+const MODEL_FOR_IMAGE_CLASSIFICATION_MAPPING_NAMES = /* @__PURE__ */ new Map([
+  ["vit", ["ViTForImageClassification", ViTForImageClassification]],
+  ["fastvit", ["FastViTForImageClassification", FastViTForImageClassification]],
+  ["mobilevit", ["MobileViTForImageClassification", MobileViTForImageClassification]],
+  ["mobilevitv2", ["MobileViTV2ForImageClassification", MobileViTV2ForImageClassification]],
+  ["beit", ["BeitForImageClassification", BeitForImageClassification]],
+  ["deit", ["DeiTForImageClassification", DeiTForImageClassification]],
+  ["convnext", ["ConvNextForImageClassification", ConvNextForImageClassification]],
+  ["convnextv2", ["ConvNextV2ForImageClassification", ConvNextV2ForImageClassification]],
+  ["dinov2", ["Dinov2ForImageClassification", Dinov2ForImageClassification]],
+  ["resnet", ["ResNetForImageClassification", ResNetForImageClassification]],
+  ["swin", ["SwinForImageClassification", SwinForImageClassification]],
+  ["segformer", ["SegformerForImageClassification", SegformerForImageClassification]],
+  ["efficientnet", ["EfficientNetForImageClassification", EfficientNetForImageClassification]]
+]);
+const MODEL_FOR_OBJECT_DETECTION_MAPPING_NAMES = /* @__PURE__ */ new Map([
+  ["detr", ["DetrForObjectDetection", DetrForObjectDetection]],
+  ["table-transformer", ["TableTransformerForObjectDetection", TableTransformerForObjectDetection]],
+  ["yolos", ["YolosForObjectDetection", YolosForObjectDetection]]
+]);
+const MODEL_FOR_ZERO_SHOT_OBJECT_DETECTION_MAPPING_NAMES = /* @__PURE__ */ new Map([
+  ["owlvit", ["OwlViTForObjectDetection", OwlViTForObjectDetection]],
+  ["owlv2", ["Owlv2ForObjectDetection", Owlv2ForObjectDetection]]
+]);
+const MODEL_FOR_IMAGE_SEGMENTATION_MAPPING_NAMES = /* @__PURE__ */ new Map([
+  ["detr", ["DetrForSegmentation", DetrForSegmentation]],
+  ["clipseg", ["CLIPSegForImageSegmentation", CLIPSegForImageSegmentation]]
+]);
+const MODEL_FOR_SEMANTIC_SEGMENTATION_MAPPING_NAMES = /* @__PURE__ */ new Map([
+  ["segformer", ["SegformerForSemanticSegmentation", SegformerForSemanticSegmentation]]
+]);
+const MODEL_FOR_MASK_GENERATION_MAPPING_NAMES = /* @__PURE__ */ new Map([
+  ["sam", ["SamModel", SamModel]]
+]);
+const MODEL_FOR_CTC_MAPPING_NAMES = /* @__PURE__ */ new Map([
+  ["wav2vec2", ["Wav2Vec2ForCTC", Wav2Vec2ForCTC]],
+  ["wav2vec2-bert", ["Wav2Vec2BertForCTC", Wav2Vec2BertForCTC]],
+  ["unispeech", ["UniSpeechForCTC", UniSpeechForCTC]],
+  ["unispeech-sat", ["UniSpeechSatForCTC", UniSpeechSatForCTC]],
+  ["wavlm", ["WavLMForCTC", WavLMForCTC]],
+  ["hubert", ["HubertForCTC", HubertForCTC]]
+]);
+const MODEL_FOR_AUDIO_CLASSIFICATION_MAPPING_NAMES = /* @__PURE__ */ new Map([
+  ["wav2vec2", ["Wav2Vec2ForSequenceClassification", Wav2Vec2ForSequenceClassification]],
+  ["wav2vec2-bert", ["Wav2Vec2BertForSequenceClassification", Wav2Vec2BertForSequenceClassification]],
+  ["unispeech", ["UniSpeechForSequenceClassification", UniSpeechForSequenceClassification]],
+  ["unispeech-sat", ["UniSpeechSatForSequenceClassification", UniSpeechSatForSequenceClassification]],
+  ["wavlm", ["WavLMForSequenceClassification", WavLMForSequenceClassification]],
+  ["hubert", ["HubertForSequenceClassification", HubertForSequenceClassification]],
+  ["audio-spectrogram-transformer", ["ASTForAudioClassification", ASTForAudioClassification]]
+]);
+const MODEL_FOR_AUDIO_XVECTOR_MAPPING_NAMES = /* @__PURE__ */ new Map([
+  ["wavlm", ["WavLMForXVector", WavLMForXVector]]
+]);
+const MODEL_FOR_AUDIO_FRAME_CLASSIFICATION_MAPPING_NAMES = /* @__PURE__ */ new Map([
+  ["unispeech-sat", ["UniSpeechSatForAudioFrameClassification", UniSpeechSatForAudioFrameClassification]],
+  ["wavlm", ["WavLMForAudioFrameClassification", WavLMForAudioFrameClassification]],
+  ["wav2vec2", ["Wav2Vec2ForAudioFrameClassification", Wav2Vec2ForAudioFrameClassification]]
+]);
+const MODEL_FOR_IMAGE_MATTING_MAPPING_NAMES = /* @__PURE__ */ new Map([
+  ["vitmatte", ["VitMatteForImageMatting", VitMatteForImageMatting]]
+]);
+const MODEL_FOR_IMAGE_TO_IMAGE_MAPPING_NAMES = /* @__PURE__ */ new Map([
+  ["swin2sr", ["Swin2SRForImageSuperResolution", Swin2SRForImageSuperResolution]]
+]);
+const MODEL_FOR_DEPTH_ESTIMATION_MAPPING_NAMES = /* @__PURE__ */ new Map([
+  ["dpt", ["DPTForDepthEstimation", DPTForDepthEstimation]],
+  ["depth_anything", ["DepthAnythingForDepthEstimation", DepthAnythingForDepthEstimation]],
+  ["glpn", ["GLPNForDepthEstimation", GLPNForDepthEstimation]]
+]);
+const MODEL_FOR_IMAGE_FEATURE_EXTRACTION_MAPPING_NAMES = /* @__PURE__ */ new Map([
+  ["clip", ["CLIPVisionModelWithProjection", CLIPVisionModelWithProjection]],
+  ["siglip", ["SiglipVisionModel", SiglipVisionModel]]
+]);
+const MODEL_CLASS_TYPE_MAPPING = [
+  [MODEL_MAPPING_NAMES_ENCODER_ONLY, MODEL_TYPES.EncoderOnly],
+  [MODEL_MAPPING_NAMES_ENCODER_DECODER, MODEL_TYPES.EncoderDecoder],
+  [MODEL_MAPPING_NAMES_DECODER_ONLY, MODEL_TYPES.DecoderOnly],
+  [MODEL_FOR_SEQUENCE_CLASSIFICATION_MAPPING_NAMES, MODEL_TYPES.EncoderOnly],
+  [MODEL_FOR_TOKEN_CLASSIFICATION_MAPPING_NAMES, MODEL_TYPES.EncoderOnly],
+  [MODEL_FOR_SEQ_TO_SEQ_CAUSAL_LM_MAPPING_NAMES, MODEL_TYPES.Seq2Seq],
+  [MODEL_FOR_SPEECH_SEQ_2_SEQ_MAPPING_NAMES, MODEL_TYPES.Seq2Seq],
+  [MODEL_WITH_LM_HEAD_MAPPING_NAMES, MODEL_TYPES.DecoderOnly],
+  [MODEL_FOR_MASKED_LM_MAPPING_NAMES, MODEL_TYPES.EncoderOnly],
+  [MODEL_FOR_QUESTION_ANSWERING_MAPPING_NAMES, MODEL_TYPES.EncoderOnly],
+  [MODEL_FOR_VISION_2_SEQ_MAPPING_NAMES, MODEL_TYPES.Vision2Seq],
+  [MODEL_FOR_IMAGE_CLASSIFICATION_MAPPING_NAMES, MODEL_TYPES.EncoderOnly],
+  [MODEL_FOR_IMAGE_SEGMENTATION_MAPPING_NAMES, MODEL_TYPES.EncoderOnly],
+  [MODEL_FOR_SEMANTIC_SEGMENTATION_MAPPING_NAMES, MODEL_TYPES.EncoderOnly],
+  [MODEL_FOR_IMAGE_MATTING_MAPPING_NAMES, MODEL_TYPES.EncoderOnly],
+  [MODEL_FOR_IMAGE_TO_IMAGE_MAPPING_NAMES, MODEL_TYPES.EncoderOnly],
+  [MODEL_FOR_DEPTH_ESTIMATION_MAPPING_NAMES, MODEL_TYPES.EncoderOnly],
+  [MODEL_FOR_OBJECT_DETECTION_MAPPING_NAMES, MODEL_TYPES.EncoderOnly],
+  [MODEL_FOR_ZERO_SHOT_OBJECT_DETECTION_MAPPING_NAMES, MODEL_TYPES.EncoderOnly],
+  [MODEL_FOR_MASK_GENERATION_MAPPING_NAMES, MODEL_TYPES.MaskGeneration],
+  [MODEL_FOR_CTC_MAPPING_NAMES, MODEL_TYPES.EncoderOnly],
+  [MODEL_FOR_AUDIO_CLASSIFICATION_MAPPING_NAMES, MODEL_TYPES.EncoderOnly],
+  [MODEL_FOR_TEXT_TO_SPECTROGRAM_MAPPING_NAMES, MODEL_TYPES.Seq2Seq],
+  [MODEL_FOR_TEXT_TO_WAVEFORM_MAPPING_NAMES, MODEL_TYPES.EncoderOnly],
+  [MODEL_FOR_AUDIO_XVECTOR_MAPPING_NAMES, MODEL_TYPES.EncoderOnly],
+  [MODEL_FOR_AUDIO_FRAME_CLASSIFICATION_MAPPING_NAMES, MODEL_TYPES.EncoderOnly],
+  // Custom:
+  [MODEL_FOR_IMAGE_FEATURE_EXTRACTION_MAPPING_NAMES, MODEL_TYPES.EncoderOnly]
+];
+for (const [mappings, type] of MODEL_CLASS_TYPE_MAPPING) {
+  for (const [name, model] of mappings.values()) {
+    MODEL_TYPE_MAPPING.set(name, type);
+    MODEL_CLASS_TO_NAME_MAPPING.set(model, name);
+    MODEL_NAME_TO_CLASS_MAPPING.set(name, model);
+  }
+}
+const CUSTOM_MAPPING = [
+  ["CLIPTextModelWithProjection", CLIPTextModelWithProjection, MODEL_TYPES.EncoderOnly],
+  ["SiglipTextModel", SiglipTextModel, MODEL_TYPES.EncoderOnly],
+  ["ClapTextModelWithProjection", ClapTextModelWithProjection, MODEL_TYPES.EncoderOnly],
+  ["ClapAudioModelWithProjection", ClapAudioModelWithProjection, MODEL_TYPES.EncoderOnly]
+];
+for (const [name, model, type] of CUSTOM_MAPPING) {
+  MODEL_TYPE_MAPPING.set(name, type);
+  MODEL_CLASS_TO_NAME_MAPPING.set(model, name);
+  MODEL_NAME_TO_CLASS_MAPPING.set(name, model);
+}
+class AutoModel extends PretrainedMixin {
+  /** @type {Map<string, Object>[]} */
+  // @ts-ignore
+  static MODEL_CLASS_MAPPINGS = MODEL_CLASS_TYPE_MAPPING.map((x) => x[0]);
+  static BASE_IF_FAIL = true;
+}
+class Seq2SeqLMOutput extends ModelOutput {
+  /**
+   * @param {Object} output The output of the model.
+   * @param {Tensor} output.logits The output logits of the model.
+   * @param {Tensor} output.past_key_values An tensor of key/value pairs that represent the previous state of the model.
+   * @param {Tensor} output.encoder_outputs The output of the encoder in a sequence-to-sequence model.
+   * @param {Tensor} [output.decoder_attentions] Attentions weights of the decoder, after the attention softmax, used to compute the weighted average in the self-attention heads.
+   * @param {Tensor} [output.cross_attentions] Attentions weights of the decoder's cross-attention layer, after the attention softmax, used to compute the weighted average in the cross-attention heads.
+   */
+  constructor({ logits, past_key_values, encoder_outputs, decoder_attentions = null, cross_attentions = null }) {
+    super();
+    this.logits = logits;
+    this.past_key_values = past_key_values;
+    this.encoder_outputs = encoder_outputs;
+    this.decoder_attentions = decoder_attentions;
+    this.cross_attentions = cross_attentions;
+  }
+}
+class SequenceClassifierOutput extends ModelOutput {
+  /**
+   * @param {Object} output The output of the model.
+   * @param {Tensor} output.logits classification (or regression if config.num_labels==1) scores (before SoftMax).
+   */
+  constructor({ logits }) {
+    super();
+    this.logits = logits;
+  }
+}
+class XVectorOutput extends ModelOutput {
+  /**
+   * @param {Object} output The output of the model.
+   * @param {Tensor} output.logits Classification hidden states before AMSoftmax, of shape `(batch_size, config.xvector_output_dim)`.
+   * @param {Tensor} output.embeddings Utterance embeddings used for vector similarity-based retrieval, of shape `(batch_size, config.xvector_output_dim)`.
+   */
+  constructor({ logits, embeddings }) {
+    super();
+    this.logits = logits;
+    this.embeddings = embeddings;
+  }
+}
+class TokenClassifierOutput extends ModelOutput {
+  /**
+   * @param {Object} output The output of the model.
+   * @param {Tensor} output.logits Classification scores (before SoftMax).
+   */
+  constructor({ logits }) {
+    super();
+    this.logits = logits;
+  }
+}
+class MaskedLMOutput extends ModelOutput {
+  /**
+   * @param {Object} output The output of the model.
+   * @param {Tensor} output.logits Prediction scores of the language modeling head (scores for each vocabulary token before SoftMax).
+   */
+  constructor({ logits }) {
+    super();
+    this.logits = logits;
+  }
+}
+class QuestionAnsweringModelOutput extends ModelOutput {
+  /**
+   * @param {Object} output The output of the model.
+   * @param {Tensor} output.start_logits Span-start scores (before SoftMax).
+   * @param {Tensor} output.end_logits Span-end scores (before SoftMax).
+   */
+  constructor({ start_logits, end_logits }) {
+    super();
+    this.start_logits = start_logits;
+    this.end_logits = end_logits;
+  }
+}
+class CausalLMOutput extends ModelOutput {
+  /**
+   * @param {Object} output The output of the model.
+   * @param {Tensor} output.logits Prediction scores of the language modeling head (scores for each vocabulary token before softmax).
+   */
+  constructor({ logits }) {
+    super();
+    this.logits = logits;
+  }
+}
+class ImageMattingOutput extends ModelOutput {
+  /**
+   * @param {Object} output The output of the model.
+   * @param {Tensor} output.alphas Estimated alpha values, of shape `(batch_size, num_channels, height, width)`.
+   */
+  constructor({ alphas }) {
+    super();
+    this.alphas = alphas;
+  }
+}
+class VitsModelOutput extends ModelOutput {
+  /**
+   * @param {Object} output The output of the model.
+   * @param {Tensor} output.waveform The final audio waveform predicted by the model, of shape `(batch_size, sequence_length)`.
+   * @param {Tensor} output.spectrogram The log-mel spectrogram predicted at the output of the flow model.
+   * This spectrogram is passed to the Hi-Fi GAN decoder model to obtain the final audio waveform.
+   */
+  constructor({ waveform, spectrogram }) {
+    super();
+    this.waveform = waveform;
+    this.spectrogram = spectrogram;
+  }
+}
+export {
+  ASTForAudioClassification,
+  ASTModel,
+  ASTPreTrainedModel,
+  AlbertForMaskedLM,
+  AlbertForQuestionAnswering,
+  AlbertForSequenceClassification,
+  AlbertModel,
+  AlbertPreTrainedModel,
+  AutoModel,
+  BartForConditionalGeneration,
+  BartForSequenceClassification,
+  BartModel,
+  BartPretrainedModel,
+  BeitForImageClassification,
+  BeitModel,
+  BeitPreTrainedModel,
+  BertForMaskedLM,
+  BertForQuestionAnswering,
+  BertForSequenceClassification,
+  BertForTokenClassification,
+  BertModel,
+  BertPreTrainedModel,
+  BlenderbotForConditionalGeneration,
+  BlenderbotModel,
+  BlenderbotPreTrainedModel,
+  BlenderbotSmallForConditionalGeneration,
+  BlenderbotSmallModel,
+  BlenderbotSmallPreTrainedModel,
+  BloomForCausalLM,
+  BloomModel,
+  BloomPreTrainedModel,
+  CLIPModel,
+  CLIPPreTrainedModel,
+  CLIPSegForImageSegmentation,
+  CLIPSegModel,
+  CLIPSegPreTrainedModel,
+  CLIPTextModelWithProjection,
+  CLIPVisionModelWithProjection,
+  CamembertForMaskedLM,
+  CamembertForQuestionAnswering,
+  CamembertForSequenceClassification,
+  CamembertForTokenClassification,
+  CamembertModel,
+  CamembertPreTrainedModel,
+  CausalLMOutput,
+  ChineseCLIPModel,
+  ChineseCLIPPreTrainedModel,
+  ClapAudioModelWithProjection,
+  ClapModel,
+  ClapPreTrainedModel,
+  ClapTextModelWithProjection,
+  CodeGenForCausalLM,
+  CodeGenModel,
+  CodeGenPreTrainedModel,
+  ConvBertForMaskedLM,
+  ConvBertForQuestionAnswering,
+  ConvBertForSequenceClassification,
+  ConvBertForTokenClassification,
+  ConvBertModel,
+  ConvBertPreTrainedModel,
+  ConvNextForImageClassification,
+  ConvNextModel,
+  ConvNextPreTrainedModel,
+  ConvNextV2ForImageClassification,
+  ConvNextV2Model,
+  ConvNextV2PreTrainedModel,
+  DPTForDepthEstimation,
+  DPTModel,
+  DPTPreTrainedModel,
+  DebertaForMaskedLM,
+  DebertaForQuestionAnswering,
+  DebertaForSequenceClassification,
+  DebertaForTokenClassification,
+  DebertaModel,
+  DebertaPreTrainedModel,
+  DebertaV2ForMaskedLM,
+  DebertaV2ForQuestionAnswering,
+  DebertaV2ForSequenceClassification,
+  DebertaV2ForTokenClassification,
+  DebertaV2Model,
+  DebertaV2PreTrainedModel,
+  DeiTForImageClassification,
+  DeiTModel,
+  DeiTPreTrainedModel,
+  DepthAnythingForDepthEstimation,
+  DepthAnythingPreTrainedModel,
+  DetrForObjectDetection,
+  DetrForSegmentation,
+  DetrModel,
+  DetrObjectDetectionOutput,
+  DetrPreTrainedModel,
+  DetrSegmentationOutput,
+  Dinov2ForImageClassification,
+  Dinov2Model,
+  Dinov2PreTrainedModel,
+  DistilBertForMaskedLM,
+  DistilBertForQuestionAnswering,
+  DistilBertForSequenceClassification,
+  DistilBertForTokenClassification,
+  DistilBertModel,
+  DistilBertPreTrainedModel,
+  DonutSwinModel,
+  DonutSwinPreTrainedModel,
+  EfficientNetForImageClassification,
+  EfficientNetModel,
+  EfficientNetPreTrainedModel,
+  ElectraForMaskedLM,
+  ElectraForQuestionAnswering,
+  ElectraForSequenceClassification,
+  ElectraForTokenClassification,
+  ElectraModel,
+  ElectraPreTrainedModel,
+  EsmForMaskedLM,
+  EsmForSequenceClassification,
+  EsmForTokenClassification,
+  EsmModel,
+  EsmPreTrainedModel,
+  FalconForCausalLM,
+  FalconModel,
+  FalconPreTrainedModel,
+  FastViTForImageClassification,
+  FastViTModel,
+  FastViTPreTrainedModel,
+  GLPNForDepthEstimation,
+  GLPNModel,
+  GLPNPreTrainedModel,
+  GPT2LMHeadModel,
+  GPT2Model,
+  GPT2PreTrainedModel,
+  GPTBigCodeForCausalLM,
+  GPTBigCodeModel,
+  GPTBigCodePreTrainedModel,
+  GPTJForCausalLM,
+  GPTJModel,
+  GPTJPreTrainedModel,
+  GPTNeoForCausalLM,
+  GPTNeoModel,
+  GPTNeoPreTrainedModel,
+  GPTNeoXForCausalLM,
+  GPTNeoXModel,
+  GPTNeoXPreTrainedModel,
+  HubertForCTC,
+  HubertForSequenceClassification,
+  HubertModel,
+  ImageMattingOutput,
+  LlamaForCausalLM,
+  LlamaModel,
+  LlamaPreTrainedModel,
+  LongT5ForConditionalGeneration,
+  LongT5Model,
+  LongT5PreTrainedModel,
+  M2M100ForConditionalGeneration,
+  M2M100Model,
+  M2M100PreTrainedModel,
+  MBartForCausalLM,
+  MBartForConditionalGeneration,
+  MBartForSequenceClassification,
+  MBartModel,
+  MBartPreTrainedModel,
+  MPNetForMaskedLM,
+  MPNetForQuestionAnswering,
+  MPNetForSequenceClassification,
+  MPNetForTokenClassification,
+  MPNetModel,
+  MPNetPreTrainedModel,
+  MT5ForConditionalGeneration,
+  MT5Model,
+  MT5PreTrainedModel,
+  MarianMTModel,
+  MarianModel,
+  MarianPreTrainedModel,
+  MaskedLMOutput,
+  MistralForCausalLM,
+  MistralModel,
+  MistralPreTrainedModel,
+  MobileBertForMaskedLM,
+  MobileBertForQuestionAnswering,
+  MobileBertForSequenceClassification,
+  MobileBertModel,
+  MobileBertPreTrainedModel,
+  MobileViTForImageClassification,
+  MobileViTModel,
+  MobileViTPreTrainedModel,
+  MobileViTV2ForImageClassification,
+  MobileViTV2Model,
+  MobileViTV2PreTrainedModel,
+  ModelOutput,
+  MptForCausalLM,
+  MptModel,
+  MptPreTrainedModel,
+  NomicBertModel,
+  NomicBertPreTrainedModel,
+  OPTForCausalLM,
+  OPTModel,
+  OPTPreTrainedModel,
+  OwlViTForObjectDetection,
+  OwlViTModel,
+  OwlViTPreTrainedModel,
+  Owlv2ForObjectDetection,
+  Owlv2Model,
+  Owlv2PreTrainedModel,
+  PhiForCausalLM,
+  PhiModel,
+  PhiPreTrainedModel,
+  PreTrainedModel,
+  PretrainedMixin,
+  QuestionAnsweringModelOutput,
+  Qwen2ForCausalLM,
+  Qwen2Model,
+  Qwen2PreTrainedModel,
+  ResNetForImageClassification,
+  ResNetModel,
+  ResNetPreTrainedModel,
+  RoFormerForMaskedLM,
+  RoFormerForQuestionAnswering,
+  RoFormerForSequenceClassification,
+  RoFormerForTokenClassification,
+  RoFormerModel,
+  RoFormerPreTrainedModel,
+  RobertaForMaskedLM,
+  RobertaForQuestionAnswering,
+  RobertaForSequenceClassification,
+  RobertaForTokenClassification,
+  RobertaModel,
+  RobertaPreTrainedModel,
+  SamImageSegmentationOutput,
+  SamModel,
+  SamPreTrainedModel,
+  SegformerForImageClassification,
+  SegformerForSemanticSegmentation,
+  SegformerPreTrainedModel,
+  Seq2SeqLMOutput,
+  SequenceClassifierOutput,
+  SiglipModel,
+  SiglipPreTrainedModel,
+  SiglipTextModel,
+  SiglipVisionModel,
+  SpeechT5ForSpeechToText,
+  SpeechT5ForTextToSpeech,
+  SpeechT5HifiGan,
+  SpeechT5PreTrainedModel,
+  SqueezeBertForMaskedLM,
+  SqueezeBertForQuestionAnswering,
+  SqueezeBertForSequenceClassification,
+  SqueezeBertModel,
+  SqueezeBertPreTrainedModel,
+  StableLmForCausalLM,
+  StableLmPreTrainedModel,
+  Starcoder2ForCausalLM,
+  Starcoder2Model,
+  Starcoder2PreTrainedModel,
+  Swin2SRForImageSuperResolution,
+  Swin2SRModel,
+  Swin2SRPreTrainedModel,
+  SwinForImageClassification,
+  SwinModel,
+  SwinPreTrainedModel,
+  T5ForConditionalGeneration,
+  T5Model,
+  T5PreTrainedModel,
+  TableTransformerForObjectDetection,
+  TableTransformerModel,
+  TableTransformerObjectDetectionOutput,
+  TableTransformerPreTrainedModel,
+  TokenClassifierOutput,
+  TrOCRForCausalLM,
+  TrOCRPreTrainedModel,
+  UniSpeechForCTC,
+  UniSpeechForSequenceClassification,
+  UniSpeechModel,
+  UniSpeechPreTrainedModel,
+  UniSpeechSatForAudioFrameClassification,
+  UniSpeechSatForCTC,
+  UniSpeechSatForSequenceClassification,
+  UniSpeechSatModel,
+  UniSpeechSatPreTrainedModel,
+  ViTForImageClassification,
+  ViTModel,
+  ViTPreTrainedModel,
+  VisionEncoderDecoderModel,
+  VitMatteForImageMatting,
+  VitMattePreTrainedModel,
+  VitsModel,
+  VitsModelOutput,
+  VitsPreTrainedModel,
+  Wav2Vec2BertForCTC,
+  Wav2Vec2BertForSequenceClassification,
+  Wav2Vec2BertModel,
+  Wav2Vec2BertPreTrainedModel,
+  Wav2Vec2ForAudioFrameClassification,
+  Wav2Vec2ForCTC,
+  Wav2Vec2ForSequenceClassification,
+  Wav2Vec2Model,
+  Wav2Vec2PreTrainedModel,
+  WavLMForAudioFrameClassification,
+  WavLMForCTC,
+  WavLMForSequenceClassification,
+  WavLMForXVector,
+  WavLMModel,
+  WavLMPreTrainedModel,
+  WhisperForConditionalGeneration,
+  WhisperModel,
+  WhisperPreTrainedModel,
+  XLMForQuestionAnswering,
+  XLMForSequenceClassification,
+  XLMForTokenClassification,
+  XLMModel,
+  XLMPreTrainedModel,
+  XLMRobertaForMaskedLM,
+  XLMRobertaForQuestionAnswering,
+  XLMRobertaForSequenceClassification,
+  XLMRobertaForTokenClassification,
+  XLMRobertaModel,
+  XLMRobertaPreTrainedModel,
+  XLMWithLMHeadModel,
+  XVectorOutput,
+  YolosForObjectDetection,
+  YolosModel,
+  YolosObjectDetectionOutput,
+  YolosPreTrainedModel
+};
+//# sourceMappingURL=models.js.map