npm - web-llm-runner - Versions diffs - 0.1.14 → 0.1.17 - Mend

web-llm-runner 0.1.14 → 0.1.17

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (7) hide show

package/lib/index.js +682 -35
package/lib/index.js.map +1 -1
package/lib/onnx_engine.d.ts +3 -1
package/lib/onnx_engine.d.ts.map +1 -1
package/lib/wrapper/WebLLMWrapper.d.ts +1 -0
package/lib/wrapper/WebLLMWrapper.d.ts.map +1 -1
package/package.json +1 -1

package/lib/index.js CHANGED Viewed

@@ -12814,7 +12814,7 @@ class EnvImpl {
 /**
  * Represent a set of flags as a global singleton.
  */
-const env$2 = new EnvImpl();
+const env$3 = new EnvImpl();
 // Copyright (c) Microsoft Corporation. All rights reserved.
 // Licensed under the MIT License.
@@ -13513,7 +13513,7 @@ var lib = /*#__PURE__*/Object.freeze({
 	__proto__: null,
 	InferenceSession: InferenceSession$1,
 	Tensor: Tensor$1,
-	env: env$2,
+	env: env$3,
 	registerBackend: registerBackend
 });
@@ -13673,7 +13673,7 @@ if (onnx_env?.wasm) {
  * @property {Object} customCache The custom cache to use. Defaults to `null`. Note: this must be an object which
  * implements the `match` and `put` functions of the Web Cache API. For more information, see https://developer.mozilla.org/en-US/docs/Web/API/Cache
  */
-const env$1 = {
+const env$2 = {
     version: VERSION,
     remoteHost: 'https://huggingface.co/',
@@ -13862,12 +13862,12 @@ function isValidUrl(string, protocols = null, validHosts = null) {
  */
 async function getFile(urlOrPath) {
-    if (env$1.useFS && !isValidUrl(urlOrPath, ['http:', 'https:', 'blob:'])) {
+    if (env$2.useFS && !isValidUrl(urlOrPath, ['http:', 'https:', 'blob:'])) {
         return new FileResponse(urlOrPath);
     } else if (typeof process !== 'undefined' && process?.release?.name === 'node') {
         const IS_CI = !!process.env?.TESTING_REMOTELY;
-        const version = env$1.version;
+        const version = env$2.version;
         const headers = new Headers();
         headers.set('User-Agent', `transformers.js/${version}; is_ci/${IS_CI};`);
@@ -14014,7 +14014,7 @@ async function tryCache(cache, ...names) {
  */
 async function getModelFile(path_or_repo_id, filename, fatal = true, options = {}) {
-    if (!env$1.allowLocalModels) {
+    if (!env$2.allowLocalModels) {
         // User has disabled local models, so we just make sure other settings are correct.
         if (options.local_files_only) {
@@ -14032,7 +14032,7 @@ async function getModelFile(path_or_repo_id, filename, fatal = true, options = {
     // First, check if the a caching backend is available
     // If no caching mechanism available, will download the file every time
     let cache;
-    if (!cache && env$1.useBrowserCache) {
+    if (!cache && env$2.useBrowserCache) {
         if (typeof caches === 'undefined') {
             throw Error('Browser cache is not available in this environment.')
         }
@@ -14048,21 +14048,21 @@ async function getModelFile(path_or_repo_id, filename, fatal = true, options = {
         }
     }
-    if (!cache && env$1.useFSCache) {
+    if (!cache && env$2.useFSCache) {
         // TODO throw error if not available
         // If `cache_dir` is not specified, use the default cache directory
-        cache = new FileCache(options.cache_dir ?? env$1.cacheDir);
+        cache = new FileCache(options.cache_dir ?? env$2.cacheDir);
     }
     const revision = options.revision ?? 'main';
     let requestURL = pathJoin(path_or_repo_id, filename);
-    let localPath = pathJoin(env$1.localModelPath, requestURL);
+    let localPath = pathJoin(env$2.localModelPath, requestURL);
     let remoteURL = pathJoin(
-        env$1.remoteHost,
-        env$1.remotePathTemplate
+        env$2.remoteHost,
+        env$2.remotePathTemplate
             .replaceAll('{model}', path_or_repo_id)
             .replaceAll('{revision}', encodeURIComponent(revision)),
         filename
@@ -14096,7 +14096,7 @@ async function getModelFile(path_or_repo_id, filename, fatal = true, options = {
     if (response === undefined) {
         // Caching not available, or file is not cached, so we perform the request
-        if (env$1.allowLocalModels) {
+        if (env$2.allowLocalModels) {
             // Accessing local models is enabled, so we try to get the file locally.
             // If request is a valid HTTP URL, we skip the local file check. Otherwise, we try to get the file locally.
             const isURL = isValidUrl(requestURL, ['http:', 'https:']);
@@ -23790,7 +23790,7 @@ class BeamSearchSampler extends Sampler {
     }
 }
-const { InferenceSession, Tensor: ONNXTensor, env } = ONNX;
+const { InferenceSession, Tensor: ONNXTensor, env: env$1 } = ONNX;
 /** @typedef {import('onnxruntime-web').InferenceSession} InferenceSession */
@@ -23877,7 +23877,7 @@ function validateInputs(session, inputs) {
         // NOTE: When `env.wasm.proxy is true` the tensor is moved across the Worker
         // boundary, transferring ownership to the worker and invalidating the tensor.
         // So, in this case, we simply sacrifice a clone for it.
-        checkedInputs[inputName] = env.wasm.proxy ? tensor.clone() : tensor;
+        checkedInputs[inputName] = env$1.wasm.proxy ? tensor.clone() : tensor;
     }
     if (missingInputs.length > 0) {
         throw new Error(
@@ -30562,7 +30562,7 @@ class RawImage {
             // Clean up: remove the anchor element from the DOM
             downloadLink.remove();
-        } else if (!env$1.useFS) {
+        } else if (!env$2.useFS) {
             throw new Error('Unable to save the image because filesystem is disabled in this environment.')
         } else {
@@ -36561,7 +36561,7 @@ const TASK_ALIASES = Object.freeze({
  * @returns {Promise<AllTasks[T]>} A Pipeline object for the specified task.
  * @throws {Error} If an unsupported pipeline is requested.
  */
-async function pipeline(
+async function pipeline$1(
     task,
     model = null,
     {
@@ -36679,6 +36679,511 @@ async function loadItems(mapping, model, pretrainedOptions) {
     return result;
 }
+/**
+ * @file Entry point for the Transformers.js library. Only the exports from this file
+ * are available to the end user, and are grouped as follows:
+ *
+ * 1. [Pipelines](./pipelines)
+ * 2. [Environment variables](./env)
+ * 3. [Models](./models)
+ * 4. [Tokenizers](./tokenizers)
+ * 5. [Processors](./processors)
+ *
+ * @module transformers
+ */
+var Transformers = /*#__PURE__*/Object.freeze({
+	__proto__: null,
+	ASTFeatureExtractor: ASTFeatureExtractor,
+	ASTForAudioClassification: ASTForAudioClassification,
+	ASTModel: ASTModel,
+	ASTPreTrainedModel: ASTPreTrainedModel,
+	AlbertForMaskedLM: AlbertForMaskedLM,
+	AlbertForQuestionAnswering: AlbertForQuestionAnswering,
+	AlbertForSequenceClassification: AlbertForSequenceClassification,
+	AlbertModel: AlbertModel,
+	AlbertPreTrainedModel: AlbertPreTrainedModel,
+	AlbertTokenizer: AlbertTokenizer,
+	AudioClassificationPipeline: AudioClassificationPipeline,
+	AutoConfig: AutoConfig,
+	AutoModel: AutoModel,
+	AutoModelForAudioClassification: AutoModelForAudioClassification,
+	AutoModelForCTC: AutoModelForCTC,
+	AutoModelForCausalLM: AutoModelForCausalLM,
+	AutoModelForDepthEstimation: AutoModelForDepthEstimation,
+	AutoModelForDocumentQuestionAnswering: AutoModelForDocumentQuestionAnswering,
+	AutoModelForImageClassification: AutoModelForImageClassification,
+	AutoModelForImageFeatureExtraction: AutoModelForImageFeatureExtraction,
+	AutoModelForImageSegmentation: AutoModelForImageSegmentation,
+	AutoModelForImageToImage: AutoModelForImageToImage,
+	AutoModelForMaskedLM: AutoModelForMaskedLM,
+	AutoModelForObjectDetection: AutoModelForObjectDetection,
+	AutoModelForQuestionAnswering: AutoModelForQuestionAnswering,
+	AutoModelForSemanticSegmentation: AutoModelForSemanticSegmentation,
+	AutoModelForSeq2SeqLM: AutoModelForSeq2SeqLM,
+	AutoModelForSequenceClassification: AutoModelForSequenceClassification,
+	AutoModelForSpeechSeq2Seq: AutoModelForSpeechSeq2Seq,
+	AutoModelForTextToSpectrogram: AutoModelForTextToSpectrogram,
+	AutoModelForTextToWaveform: AutoModelForTextToWaveform,
+	AutoModelForTokenClassification: AutoModelForTokenClassification,
+	AutoModelForVision2Seq: AutoModelForVision2Seq,
+	AutoModelForZeroShotObjectDetection: AutoModelForZeroShotObjectDetection,
+	AutoProcessor: AutoProcessor,
+	AutoTokenizer: AutoTokenizer,
+	AutomaticSpeechRecognitionPipeline: AutomaticSpeechRecognitionPipeline,
+	BartForConditionalGeneration: BartForConditionalGeneration,
+	BartForSequenceClassification: BartForSequenceClassification,
+	BartModel: BartModel,
+	BartPretrainedModel: BartPretrainedModel,
+	BartTokenizer: BartTokenizer,
+	BeitFeatureExtractor: BeitFeatureExtractor,
+	BeitForImageClassification: BeitForImageClassification,
+	BeitModel: BeitModel,
+	BeitPreTrainedModel: BeitPreTrainedModel,
+	BertForMaskedLM: BertForMaskedLM,
+	BertForQuestionAnswering: BertForQuestionAnswering,
+	BertForSequenceClassification: BertForSequenceClassification,
+	BertForTokenClassification: BertForTokenClassification,
+	BertModel: BertModel,
+	BertPreTrainedModel: BertPreTrainedModel,
+	BertTokenizer: BertTokenizer,
+	BitImageProcessor: BitImageProcessor,
+	BlenderbotForConditionalGeneration: BlenderbotForConditionalGeneration,
+	BlenderbotModel: BlenderbotModel,
+	BlenderbotPreTrainedModel: BlenderbotPreTrainedModel,
+	BlenderbotSmallForConditionalGeneration: BlenderbotSmallForConditionalGeneration,
+	BlenderbotSmallModel: BlenderbotSmallModel,
+	BlenderbotSmallPreTrainedModel: BlenderbotSmallPreTrainedModel,
+	BlenderbotSmallTokenizer: BlenderbotSmallTokenizer,
+	BlenderbotTokenizer: BlenderbotTokenizer,
+	BloomForCausalLM: BloomForCausalLM,
+	BloomModel: BloomModel,
+	BloomPreTrainedModel: BloomPreTrainedModel,
+	BloomTokenizer: BloomTokenizer,
+	CLIPFeatureExtractor: CLIPFeatureExtractor,
+	CLIPModel: CLIPModel,
+	CLIPPreTrainedModel: CLIPPreTrainedModel,
+	CLIPSegForImageSegmentation: CLIPSegForImageSegmentation,
+	CLIPSegModel: CLIPSegModel,
+	CLIPSegPreTrainedModel: CLIPSegPreTrainedModel,
+	CLIPTextModelWithProjection: CLIPTextModelWithProjection,
+	CLIPTokenizer: CLIPTokenizer,
+	CLIPVisionModelWithProjection: CLIPVisionModelWithProjection,
+	CamembertForMaskedLM: CamembertForMaskedLM,
+	CamembertForQuestionAnswering: CamembertForQuestionAnswering,
+	CamembertForSequenceClassification: CamembertForSequenceClassification,
+	CamembertForTokenClassification: CamembertForTokenClassification,
+	CamembertModel: CamembertModel,
+	CamembertPreTrainedModel: CamembertPreTrainedModel,
+	CamembertTokenizer: CamembertTokenizer,
+	CausalLMOutput: CausalLMOutput,
+	ChineseCLIPFeatureExtractor: ChineseCLIPFeatureExtractor,
+	ChineseCLIPModel: ChineseCLIPModel,
+	ChineseCLIPPreTrainedModel: ChineseCLIPPreTrainedModel,
+	ClapAudioModelWithProjection: ClapAudioModelWithProjection,
+	ClapFeatureExtractor: ClapFeatureExtractor,
+	ClapModel: ClapModel,
+	ClapPreTrainedModel: ClapPreTrainedModel,
+	ClapTextModelWithProjection: ClapTextModelWithProjection,
+	CodeGenForCausalLM: CodeGenForCausalLM,
+	CodeGenModel: CodeGenModel,
+	CodeGenPreTrainedModel: CodeGenPreTrainedModel,
+	CodeGenTokenizer: CodeGenTokenizer,
+	CodeLlamaTokenizer: CodeLlamaTokenizer,
+	CohereTokenizer: CohereTokenizer,
+	ConvBertForMaskedLM: ConvBertForMaskedLM,
+	ConvBertForQuestionAnswering: ConvBertForQuestionAnswering,
+	ConvBertForSequenceClassification: ConvBertForSequenceClassification,
+	ConvBertForTokenClassification: ConvBertForTokenClassification,
+	ConvBertModel: ConvBertModel,
+	ConvBertPreTrainedModel: ConvBertPreTrainedModel,
+	ConvBertTokenizer: ConvBertTokenizer,
+	ConvNextFeatureExtractor: ConvNextFeatureExtractor,
+	ConvNextForImageClassification: ConvNextForImageClassification,
+	ConvNextImageProcessor: ConvNextImageProcessor,
+	ConvNextModel: ConvNextModel,
+	ConvNextPreTrainedModel: ConvNextPreTrainedModel,
+	ConvNextV2ForImageClassification: ConvNextV2ForImageClassification,
+	ConvNextV2Model: ConvNextV2Model,
+	ConvNextV2PreTrainedModel: ConvNextV2PreTrainedModel,
+	DPTFeatureExtractor: DPTFeatureExtractor,
+	DPTForDepthEstimation: DPTForDepthEstimation,
+	DPTImageProcessor: DPTImageProcessor,
+	DPTModel: DPTModel,
+	DPTPreTrainedModel: DPTPreTrainedModel,
+	DebertaForMaskedLM: DebertaForMaskedLM,
+	DebertaForQuestionAnswering: DebertaForQuestionAnswering,
+	DebertaForSequenceClassification: DebertaForSequenceClassification,
+	DebertaForTokenClassification: DebertaForTokenClassification,
+	DebertaModel: DebertaModel,
+	DebertaPreTrainedModel: DebertaPreTrainedModel,
+	DebertaTokenizer: DebertaTokenizer,
+	DebertaV2ForMaskedLM: DebertaV2ForMaskedLM,
+	DebertaV2ForQuestionAnswering: DebertaV2ForQuestionAnswering,
+	DebertaV2ForSequenceClassification: DebertaV2ForSequenceClassification,
+	DebertaV2ForTokenClassification: DebertaV2ForTokenClassification,
+	DebertaV2Model: DebertaV2Model,
+	DebertaV2PreTrainedModel: DebertaV2PreTrainedModel,
+	DebertaV2Tokenizer: DebertaV2Tokenizer,
+	DeiTFeatureExtractor: DeiTFeatureExtractor,
+	DeiTForImageClassification: DeiTForImageClassification,
+	DeiTModel: DeiTModel,
+	DeiTPreTrainedModel: DeiTPreTrainedModel,
+	DepthAnythingForDepthEstimation: DepthAnythingForDepthEstimation,
+	DepthAnythingPreTrainedModel: DepthAnythingPreTrainedModel,
+	DepthEstimationPipeline: DepthEstimationPipeline,
+	DetrFeatureExtractor: DetrFeatureExtractor,
+	DetrForObjectDetection: DetrForObjectDetection,
+	DetrForSegmentation: DetrForSegmentation,
+	DetrModel: DetrModel,
+	DetrObjectDetectionOutput: DetrObjectDetectionOutput,
+	DetrPreTrainedModel: DetrPreTrainedModel,
+	DetrSegmentationOutput: DetrSegmentationOutput,
+	Dinov2ForImageClassification: Dinov2ForImageClassification,
+	Dinov2Model: Dinov2Model,
+	Dinov2PreTrainedModel: Dinov2PreTrainedModel,
+	DistilBertForMaskedLM: DistilBertForMaskedLM,
+	DistilBertForQuestionAnswering: DistilBertForQuestionAnswering,
+	DistilBertForSequenceClassification: DistilBertForSequenceClassification,
+	DistilBertForTokenClassification: DistilBertForTokenClassification,
+	DistilBertModel: DistilBertModel,
+	DistilBertPreTrainedModel: DistilBertPreTrainedModel,
+	DistilBertTokenizer: DistilBertTokenizer,
+	DocumentQuestionAnsweringPipeline: DocumentQuestionAnsweringPipeline,
+	DonutFeatureExtractor: DonutFeatureExtractor,
+	DonutSwinModel: DonutSwinModel,
+	DonutSwinPreTrainedModel: DonutSwinPreTrainedModel,
+	EfficientNetForImageClassification: EfficientNetForImageClassification,
+	EfficientNetImageProcessor: EfficientNetImageProcessor,
+	EfficientNetModel: EfficientNetModel,
+	EfficientNetPreTrainedModel: EfficientNetPreTrainedModel,
+	ElectraForMaskedLM: ElectraForMaskedLM,
+	ElectraForQuestionAnswering: ElectraForQuestionAnswering,
+	ElectraForSequenceClassification: ElectraForSequenceClassification,
+	ElectraForTokenClassification: ElectraForTokenClassification,
+	ElectraModel: ElectraModel,
+	ElectraPreTrainedModel: ElectraPreTrainedModel,
+	ElectraTokenizer: ElectraTokenizer,
+	EsmForMaskedLM: EsmForMaskedLM,
+	EsmForSequenceClassification: EsmForSequenceClassification,
+	EsmForTokenClassification: EsmForTokenClassification,
+	EsmModel: EsmModel,
+	EsmPreTrainedModel: EsmPreTrainedModel,
+	EsmTokenizer: EsmTokenizer,
+	FFT: FFT,
+	FalconForCausalLM: FalconForCausalLM,
+	FalconModel: FalconModel,
+	FalconPreTrainedModel: FalconPreTrainedModel,
+	FalconTokenizer: FalconTokenizer,
+	FastViTForImageClassification: FastViTForImageClassification,
+	FastViTModel: FastViTModel,
+	FastViTPreTrainedModel: FastViTPreTrainedModel,
+	FeatureExtractionPipeline: FeatureExtractionPipeline,
+	FeatureExtractor: FeatureExtractor,
+	FillMaskPipeline: FillMaskPipeline,
+	GLPNFeatureExtractor: GLPNFeatureExtractor,
+	GLPNForDepthEstimation: GLPNForDepthEstimation,
+	GLPNModel: GLPNModel,
+	GLPNPreTrainedModel: GLPNPreTrainedModel,
+	GPT2LMHeadModel: GPT2LMHeadModel,
+	GPT2Model: GPT2Model,
+	GPT2PreTrainedModel: GPT2PreTrainedModel,
+	GPT2Tokenizer: GPT2Tokenizer,
+	GPTBigCodeForCausalLM: GPTBigCodeForCausalLM,
+	GPTBigCodeModel: GPTBigCodeModel,
+	GPTBigCodePreTrainedModel: GPTBigCodePreTrainedModel,
+	GPTJForCausalLM: GPTJForCausalLM,
+	GPTJModel: GPTJModel,
+	GPTJPreTrainedModel: GPTJPreTrainedModel,
+	GPTNeoForCausalLM: GPTNeoForCausalLM,
+	GPTNeoModel: GPTNeoModel,
+	GPTNeoPreTrainedModel: GPTNeoPreTrainedModel,
+	GPTNeoXForCausalLM: GPTNeoXForCausalLM,
+	GPTNeoXModel: GPTNeoXModel,
+	GPTNeoXPreTrainedModel: GPTNeoXPreTrainedModel,
+	GPTNeoXTokenizer: GPTNeoXTokenizer,
+	GemmaTokenizer: GemmaTokenizer,
+	Grok1Tokenizer: Grok1Tokenizer,
+	HerbertTokenizer: HerbertTokenizer,
+	HubertForCTC: HubertForCTC,
+	HubertForSequenceClassification: HubertForSequenceClassification,
+	HubertModel: HubertModel,
+	ImageClassificationPipeline: ImageClassificationPipeline,
+	ImageFeatureExtractionPipeline: ImageFeatureExtractionPipeline,
+	ImageFeatureExtractor: ImageFeatureExtractor,
+	ImageMattingOutput: ImageMattingOutput,
+	ImageSegmentationPipeline: ImageSegmentationPipeline,
+	ImageToImagePipeline: ImageToImagePipeline,
+	ImageToTextPipeline: ImageToTextPipeline,
+	LlamaForCausalLM: LlamaForCausalLM,
+	LlamaModel: LlamaModel,
+	LlamaPreTrainedModel: LlamaPreTrainedModel,
+	LlamaTokenizer: LlamaTokenizer,
+	LongT5ForConditionalGeneration: LongT5ForConditionalGeneration,
+	LongT5Model: LongT5Model,
+	LongT5PreTrainedModel: LongT5PreTrainedModel,
+	M2M100ForConditionalGeneration: M2M100ForConditionalGeneration,
+	M2M100Model: M2M100Model,
+	M2M100PreTrainedModel: M2M100PreTrainedModel,
+	M2M100Tokenizer: M2M100Tokenizer,
+	MBart50Tokenizer: MBart50Tokenizer,
+	MBartForCausalLM: MBartForCausalLM,
+	MBartForConditionalGeneration: MBartForConditionalGeneration,
+	MBartForSequenceClassification: MBartForSequenceClassification,
+	MBartModel: MBartModel,
+	MBartPreTrainedModel: MBartPreTrainedModel,
+	MBartTokenizer: MBartTokenizer,
+	MPNetForMaskedLM: MPNetForMaskedLM,
+	MPNetForQuestionAnswering: MPNetForQuestionAnswering,
+	MPNetForSequenceClassification: MPNetForSequenceClassification,
+	MPNetForTokenClassification: MPNetForTokenClassification,
+	MPNetModel: MPNetModel,
+	MPNetPreTrainedModel: MPNetPreTrainedModel,
+	MPNetTokenizer: MPNetTokenizer,
+	MT5ForConditionalGeneration: MT5ForConditionalGeneration,
+	MT5Model: MT5Model,
+	MT5PreTrainedModel: MT5PreTrainedModel,
+	MarianMTModel: MarianMTModel,
+	MarianModel: MarianModel,
+	MarianPreTrainedModel: MarianPreTrainedModel,
+	MarianTokenizer: MarianTokenizer,
+	MaskedLMOutput: MaskedLMOutput,
+	MistralForCausalLM: MistralForCausalLM,
+	MistralModel: MistralModel,
+	MistralPreTrainedModel: MistralPreTrainedModel,
+	MobileBertForMaskedLM: MobileBertForMaskedLM,
+	MobileBertForQuestionAnswering: MobileBertForQuestionAnswering,
+	MobileBertForSequenceClassification: MobileBertForSequenceClassification,
+	MobileBertModel: MobileBertModel,
+	MobileBertPreTrainedModel: MobileBertPreTrainedModel,
+	MobileBertTokenizer: MobileBertTokenizer,
+	MobileViTFeatureExtractor: MobileViTFeatureExtractor,
+	MobileViTForImageClassification: MobileViTForImageClassification,
+	MobileViTImageProcessor: MobileViTImageProcessor,
+	MobileViTModel: MobileViTModel,
+	MobileViTPreTrainedModel: MobileViTPreTrainedModel,
+	MobileViTV2ForImageClassification: MobileViTV2ForImageClassification,
+	MobileViTV2Model: MobileViTV2Model,
+	MobileViTV2PreTrainedModel: MobileViTV2PreTrainedModel,
+	ModelOutput: ModelOutput,
+	MptForCausalLM: MptForCausalLM,
+	MptModel: MptModel,
+	MptPreTrainedModel: MptPreTrainedModel,
+	NllbTokenizer: NllbTokenizer,
+	NomicBertModel: NomicBertModel,
+	NomicBertPreTrainedModel: NomicBertPreTrainedModel,
+	NougatImageProcessor: NougatImageProcessor,
+	NougatTokenizer: NougatTokenizer,
+	OPTForCausalLM: OPTForCausalLM,
+	OPTModel: OPTModel,
+	OPTPreTrainedModel: OPTPreTrainedModel,
+	ObjectDetectionPipeline: ObjectDetectionPipeline,
+	OwlViTFeatureExtractor: OwlViTFeatureExtractor,
+	OwlViTForObjectDetection: OwlViTForObjectDetection,
+	OwlViTModel: OwlViTModel,
+	OwlViTPreTrainedModel: OwlViTPreTrainedModel,
+	OwlViTProcessor: OwlViTProcessor,
+	Owlv2ForObjectDetection: Owlv2ForObjectDetection,
+	Owlv2ImageProcessor: Owlv2ImageProcessor,
+	Owlv2Model: Owlv2Model,
+	Owlv2PreTrainedModel: Owlv2PreTrainedModel,
+	PhiForCausalLM: PhiForCausalLM,
+	PhiModel: PhiModel,
+	PhiPreTrainedModel: PhiPreTrainedModel,
+	Pipeline: Pipeline,
+	PreTrainedModel: PreTrainedModel,
+	PreTrainedTokenizer: PreTrainedTokenizer,
+	PretrainedConfig: PretrainedConfig,
+	PretrainedMixin: PretrainedMixin,
+	Processor: Processor,
+	QuestionAnsweringModelOutput: QuestionAnsweringModelOutput,
+	QuestionAnsweringPipeline: QuestionAnsweringPipeline,
+	Qwen2ForCausalLM: Qwen2ForCausalLM,
+	Qwen2Model: Qwen2Model,
+	Qwen2PreTrainedModel: Qwen2PreTrainedModel,
+	Qwen2Tokenizer: Qwen2Tokenizer,
+	RawImage: RawImage,
+	ResNetForImageClassification: ResNetForImageClassification,
+	ResNetModel: ResNetModel,
+	ResNetPreTrainedModel: ResNetPreTrainedModel,
+	RoFormerForMaskedLM: RoFormerForMaskedLM,
+	RoFormerForQuestionAnswering: RoFormerForQuestionAnswering,
+	RoFormerForSequenceClassification: RoFormerForSequenceClassification,
+	RoFormerForTokenClassification: RoFormerForTokenClassification,
+	RoFormerModel: RoFormerModel,
+	RoFormerPreTrainedModel: RoFormerPreTrainedModel,
+	RoFormerTokenizer: RoFormerTokenizer,
+	RobertaForMaskedLM: RobertaForMaskedLM,
+	RobertaForQuestionAnswering: RobertaForQuestionAnswering,
+	RobertaForSequenceClassification: RobertaForSequenceClassification,
+	RobertaForTokenClassification: RobertaForTokenClassification,
+	RobertaModel: RobertaModel,
+	RobertaPreTrainedModel: RobertaPreTrainedModel,
+	RobertaTokenizer: RobertaTokenizer,
+	SamImageProcessor: SamImageProcessor,
+	SamImageSegmentationOutput: SamImageSegmentationOutput,
+	SamModel: SamModel,
+	SamPreTrainedModel: SamPreTrainedModel,
+	SamProcessor: SamProcessor,
+	SeamlessM4TFeatureExtractor: SeamlessM4TFeatureExtractor,
+	SegformerFeatureExtractor: SegformerFeatureExtractor,
+	SegformerForImageClassification: SegformerForImageClassification,
+	SegformerForSemanticSegmentation: SegformerForSemanticSegmentation,
+	SegformerPreTrainedModel: SegformerPreTrainedModel,
+	Seq2SeqLMOutput: Seq2SeqLMOutput,
+	SequenceClassifierOutput: SequenceClassifierOutput,
+	SiglipImageProcessor: SiglipImageProcessor,
+	SiglipModel: SiglipModel,
+	SiglipPreTrainedModel: SiglipPreTrainedModel,
+	SiglipTextModel: SiglipTextModel,
+	SiglipTokenizer: SiglipTokenizer,
+	SiglipVisionModel: SiglipVisionModel,
+	SpeechT5FeatureExtractor: SpeechT5FeatureExtractor,
+	SpeechT5ForSpeechToText: SpeechT5ForSpeechToText,
+	SpeechT5ForTextToSpeech: SpeechT5ForTextToSpeech,
+	SpeechT5HifiGan: SpeechT5HifiGan,
+	SpeechT5PreTrainedModel: SpeechT5PreTrainedModel,
+	SpeechT5Processor: SpeechT5Processor,
+	SpeechT5Tokenizer: SpeechT5Tokenizer,
+	SqueezeBertForMaskedLM: SqueezeBertForMaskedLM,
+	SqueezeBertForQuestionAnswering: SqueezeBertForQuestionAnswering,
+	SqueezeBertForSequenceClassification: SqueezeBertForSequenceClassification,
+	SqueezeBertModel: SqueezeBertModel,
+	SqueezeBertPreTrainedModel: SqueezeBertPreTrainedModel,
+	SqueezeBertTokenizer: SqueezeBertTokenizer,
+	StableLmForCausalLM: StableLmForCausalLM,
+	StableLmPreTrainedModel: StableLmPreTrainedModel,
+	Starcoder2ForCausalLM: Starcoder2ForCausalLM,
+	Starcoder2Model: Starcoder2Model,
+	Starcoder2PreTrainedModel: Starcoder2PreTrainedModel,
+	SummarizationPipeline: SummarizationPipeline,
+	Swin2SRForImageSuperResolution: Swin2SRForImageSuperResolution,
+	Swin2SRImageProcessor: Swin2SRImageProcessor,
+	Swin2SRModel: Swin2SRModel,
+	Swin2SRPreTrainedModel: Swin2SRPreTrainedModel,
+	SwinForImageClassification: SwinForImageClassification,
+	SwinModel: SwinModel,
+	SwinPreTrainedModel: SwinPreTrainedModel,
+	T5ForConditionalGeneration: T5ForConditionalGeneration,
+	T5Model: T5Model,
+	T5PreTrainedModel: T5PreTrainedModel,
+	T5Tokenizer: T5Tokenizer,
+	TableTransformerForObjectDetection: TableTransformerForObjectDetection,
+	TableTransformerModel: TableTransformerModel,
+	TableTransformerObjectDetectionOutput: TableTransformerObjectDetectionOutput,
+	TableTransformerPreTrainedModel: TableTransformerPreTrainedModel,
+	Tensor: Tensor,
+	Text2TextGenerationPipeline: Text2TextGenerationPipeline,
+	TextClassificationPipeline: TextClassificationPipeline,
+	TextGenerationPipeline: TextGenerationPipeline,
+	TextToAudioPipeline: TextToAudioPipeline,
+	TokenClassificationPipeline: TokenClassificationPipeline,
+	TokenClassifierOutput: TokenClassifierOutput,
+	TokenizerModel: TokenizerModel,
+	TrOCRForCausalLM: TrOCRForCausalLM,
+	TrOCRPreTrainedModel: TrOCRPreTrainedModel,
+	TranslationPipeline: TranslationPipeline,
+	UniSpeechForCTC: UniSpeechForCTC,
+	UniSpeechForSequenceClassification: UniSpeechForSequenceClassification,
+	UniSpeechModel: UniSpeechModel,
+	UniSpeechPreTrainedModel: UniSpeechPreTrainedModel,
+	UniSpeechSatForAudioFrameClassification: UniSpeechSatForAudioFrameClassification,
+	UniSpeechSatForCTC: UniSpeechSatForCTC,
+	UniSpeechSatForSequenceClassification: UniSpeechSatForSequenceClassification,
+	UniSpeechSatModel: UniSpeechSatModel,
+	UniSpeechSatPreTrainedModel: UniSpeechSatPreTrainedModel,
+	ViTFeatureExtractor: ViTFeatureExtractor,
+	ViTForImageClassification: ViTForImageClassification,
+	ViTImageProcessor: ViTImageProcessor,
+	ViTModel: ViTModel,
+	ViTPreTrainedModel: ViTPreTrainedModel,
+	VisionEncoderDecoderModel: VisionEncoderDecoderModel,
+	VitMatteForImageMatting: VitMatteForImageMatting,
+	VitMatteImageProcessor: VitMatteImageProcessor,
+	VitMattePreTrainedModel: VitMattePreTrainedModel,
+	VitsModel: VitsModel,
+	VitsModelOutput: VitsModelOutput,
+	VitsPreTrainedModel: VitsPreTrainedModel,
+	VitsTokenizer: VitsTokenizer,
+	Wav2Vec2BertForCTC: Wav2Vec2BertForCTC,
+	Wav2Vec2BertForSequenceClassification: Wav2Vec2BertForSequenceClassification,
+	Wav2Vec2BertModel: Wav2Vec2BertModel,
+	Wav2Vec2BertPreTrainedModel: Wav2Vec2BertPreTrainedModel,
+	Wav2Vec2CTCTokenizer: Wav2Vec2CTCTokenizer,
+	Wav2Vec2FeatureExtractor: Wav2Vec2FeatureExtractor,
+	Wav2Vec2ForAudioFrameClassification: Wav2Vec2ForAudioFrameClassification,
+	Wav2Vec2ForCTC: Wav2Vec2ForCTC,
+	Wav2Vec2ForSequenceClassification: Wav2Vec2ForSequenceClassification,
+	Wav2Vec2Model: Wav2Vec2Model,
+	Wav2Vec2PreTrainedModel: Wav2Vec2PreTrainedModel,
+	Wav2Vec2ProcessorWithLM: Wav2Vec2ProcessorWithLM,
+	WavLMForAudioFrameClassification: WavLMForAudioFrameClassification,
+	WavLMForCTC: WavLMForCTC,
+	WavLMForSequenceClassification: WavLMForSequenceClassification,
+	WavLMForXVector: WavLMForXVector,
+	WavLMModel: WavLMModel,
+	WavLMPreTrainedModel: WavLMPreTrainedModel,
+	WhisperFeatureExtractor: WhisperFeatureExtractor,
+	WhisperForConditionalGeneration: WhisperForConditionalGeneration,
+	WhisperModel: WhisperModel,
+	WhisperPreTrainedModel: WhisperPreTrainedModel,
+	WhisperProcessor: WhisperProcessor,
+	WhisperTokenizer: WhisperTokenizer,
+	XLMForQuestionAnswering: XLMForQuestionAnswering,
+	XLMForSequenceClassification: XLMForSequenceClassification,
+	XLMForTokenClassification: XLMForTokenClassification,
+	XLMModel: XLMModel,
+	XLMPreTrainedModel: XLMPreTrainedModel,
+	XLMRobertaForMaskedLM: XLMRobertaForMaskedLM,
+	XLMRobertaForQuestionAnswering: XLMRobertaForQuestionAnswering,
+	XLMRobertaForSequenceClassification: XLMRobertaForSequenceClassification,
+	XLMRobertaForTokenClassification: XLMRobertaForTokenClassification,
+	XLMRobertaModel: XLMRobertaModel,
+	XLMRobertaPreTrainedModel: XLMRobertaPreTrainedModel,
+	XLMRobertaTokenizer: XLMRobertaTokenizer,
+	XLMTokenizer: XLMTokenizer,
+	XLMWithLMHeadModel: XLMWithLMHeadModel,
+	XVectorOutput: XVectorOutput,
+	YolosFeatureExtractor: YolosFeatureExtractor,
+	YolosForObjectDetection: YolosForObjectDetection,
+	YolosModel: YolosModel,
+	YolosObjectDetectionOutput: YolosObjectDetectionOutput,
+	YolosPreTrainedModel: YolosPreTrainedModel,
+	ZeroShotAudioClassificationPipeline: ZeroShotAudioClassificationPipeline,
+	ZeroShotClassificationPipeline: ZeroShotClassificationPipeline,
+	ZeroShotImageClassificationPipeline: ZeroShotImageClassificationPipeline,
+	ZeroShotObjectDetectionPipeline: ZeroShotObjectDetectionPipeline,
+	bankers_round: bankers_round,
+	cat: cat,
+	dynamicTimeWarping: dynamicTimeWarping,
+	env: env$2,
+	getTopItems: getTopItems,
+	hanning: hanning,
+	interpolate: interpolate,
+	interpolate_data: interpolate_data,
+	log_softmax: log_softmax,
+	max: max,
+	mean: mean,
+	mean_pooling: mean_pooling,
+	medianFilter: medianFilter,
+	mel_filter_bank: mel_filter_bank,
+	min: min,
+	ones: ones,
+	ones_like: ones_like,
+	permute: permute,
+	permute_data: permute_data,
+	pipeline: pipeline$1,
+	quantize_embeddings: quantize_embeddings,
+	read_audio: read_audio,
+	round: round,
+	softmax: softmax,
+	spectrogram: spectrogram,
+	stack: stack,
+	std_mean: std_mean,
+	window_function: window_function
+});
+const { pipeline, env } = Transformers;
 /**
  * ONNXEngine implements MLCEngineInterface using Transformers.js (ONNX Runtime Web).
  * This provides a CPU/WASM fallback for browsers that do not support WebGPU.
@@ -36688,6 +37193,7 @@ class ONNXEngine {
     modelId = null;
     appConfig;
     initProgressCallback;
+    repoId = null;
     // APIs
     chat;
     completions;
@@ -36698,8 +37204,8 @@ class ONNXEngine {
         this.embeddings = new Embeddings(this);
         this.appConfig = prebuiltAppConfig;
         // Default env settings for browser
-        env$1.allowLocalModels = false;
-        env$1.useBrowserCache = true;
+        env.allowLocalModels = false;
+        env.useBrowserCache = true;
     }
     setInitProgressCallback(initProgressCallback) {
         this.initProgressCallback = initProgressCallback;
@@ -36719,6 +37225,7 @@ class ONNXEngine {
             const { findModelRecord } = await Promise.resolve().then(function () { return support; });
             const record = findModelRecord(id, this.appConfig);
             repoId = record.onnx_id || id;
+            this.repoId = repoId;
         }
         catch (e) {
             log.warn(`Model record not found for ${id}, using raw ID for ONNX.`);
@@ -36739,8 +37246,9 @@ class ONNXEngine {
         }
         try {
             // For T5 models, text2text-generation is the standard task in transformers.js
-            const task = repoId.toLowerCase().includes("t5") ? "text2text-generation" : "text-generation";
-            this.generator = await pipeline(task, repoId, {
+            const currentRepoId = this.repoId || id;
+            const task = currentRepoId.toLowerCase().includes("t5") ? "text2text-generation" : "text-generation";
+            this.generator = await pipeline(task, currentRepoId, {
                 progress_callback: (p) => {
                     if (this.initProgressCallback && (p.status === 'progress' || p.status === 'downloading')) {
                         const pctValue = (typeof p.progress === 'number') ? p.progress : 0;
@@ -36812,28 +37320,138 @@ class ONNXEngine {
         };
     }
     async *asyncGenerateStreaming(prompt, request) {
-        // Current simple implementation yields only a single chunk.
-        // In future iterations, we can integrate the Transformers.js TextStreamer
-        const result = await this.generateNonStreaming(prompt, request);
-        const content = result.choices[0].message.content;
+        if (!this.generator)
+            throw new Error("ONNX model not loaded.");
+        const model = this.modelId;
+        const created = Math.floor(Date.now() / 1000);
+        const id = crypto.randomUUID();
+        const queue = [];
+        let isDone = false;
+        const streamer = new undefined(this.generator.tokenizer, {
+            skip_prompt: true,
+            callback_function: (text) => {
+                queue.push(text);
+            },
+        });
+        // Run generation in the background
+        (this.repoId || "").toLowerCase().includes("t5") ? "text2text-generation" : "text-generation";
+        this.generator(prompt, {
+            max_new_tokens: request.max_tokens || 256,
+            temperature: request.temperature || 0.7,
+            top_p: request.top_p || 1.0,
+            do_sample: (request.temperature ?? 1.0) > 0,
+            repetition_penalty: request.repetition_penalty || 1.1,
+            streamer,
+        }).finally(() => {
+            isDone = true;
+        });
+        while (!isDone || queue.length > 0) {
+            if (queue.length > 0) {
+                const content = queue.shift();
+                yield {
+                    id,
+                    choices: [{
+                            delta: { content },
+                            finish_reason: null,
+                            index: 0,
+                        }],
+                    model,
+                    object: 'chat.completion.chunk',
+                    created,
+                };
+            }
+            else {
+                await new Promise(r => setTimeout(r, 10));
+            }
+        }
         yield {
-            id: result.id,
+            id,
             choices: [{
-                    delta: { role: 'assistant', content: content },
+                    delta: {},
                     finish_reason: 'stop',
                     index: 0,
-                    logprobs: null
                 }],
-            model: result.model,
+            model,
             object: 'chat.completion.chunk',
-            created: result.created
+            created,
         };
     }
-    async completion(_request) {
-        throw new Error("Generic completion not yet implemented in ONNXEngine fallback.");
+    async completion(request) {
+        if (!this.generator)
+            throw new Error("ONNX model not loaded.");
+        const prompt = typeof request.prompt === 'string' ? request.prompt : (Array.isArray(request.prompt) ? request.prompt[0] : "");
+        if (request.stream) {
+            return this.asyncGenerateStreamingCompletion(prompt, request);
+        }
+        else {
+            const result = await this.generator(prompt, {
+                max_new_tokens: request.max_tokens || 256,
+                temperature: request.temperature || 0.7,
+                top_p: request.top_p || 1.0,
+                do_sample: (request.temperature ?? 1.0) > 0,
+                repetition_penalty: request.repetition_penalty || 1.1,
+            });
+            const fullText = result[0].generated_text;
+            const text = fullText.startsWith(prompt) ? fullText.slice(prompt.length) : fullText;
+            return {
+                id: crypto.randomUUID(),
+                choices: [{
+                        text,
+                        finish_reason: 'stop',
+                        index: 0,
+                        logprobs: null
+                    }],
+                model: this.modelId,
+                object: 'text_completion',
+                created: Math.floor(Date.now() / 1000),
+                usage: { prompt_tokens: 0, completion_tokens: 0, total_tokens: 0 }
+            };
+        }
+    }
+    async *asyncGenerateStreamingCompletion(prompt, request) {
+        const id = crypto.randomUUID();
+        const created = Math.floor(Date.now() / 1000);
+        const model = this.modelId;
+        const queue = [];
+        let isDone = false;
+        const streamer = new undefined(this.generator.tokenizer, {
+            skip_prompt: true,
+            callback_function: (text) => { queue.push(text); },
+        });
+        this.generator(prompt, {
+            max_new_tokens: request.max_tokens || 256,
+            temperature: request.temperature || 0.7,
+            streamer,
+        }).finally(() => { isDone = true; });
+        while (!isDone || queue.length > 0) {
+            if (queue.length > 0) {
+                yield {
+                    id,
+                    choices: [{ text: queue.shift(), finish_reason: null, index: 0 }],
+                    model,
+                    object: 'text_completion',
+                    created,
+                };
+            }
+            else {
+                await new Promise(r => setTimeout(r, 10));
+            }
+        }
     }
-    async embedding(_request) {
-        throw new Error("Embeddings not yet implemented in ONNXEngine fallback.");
+    async embedding(request) {
+        const input = Array.isArray(request.input) ? request.input : [request.input];
+        const extractor = await pipeline('feature-extraction', this.modelId);
+        const results = await Promise.all(input.map(text => extractor(text, { pooling: 'mean', normalize: true })));
+        return {
+            object: 'list',
+            data: results.map((res, i) => ({
+                object: 'embedding',
+                index: i,
+                embedding: Array.from(res.data)
+            })),
+            model: this.modelId,
+            usage: { prompt_tokens: 0, total_tokens: 0, extra: {} }
+        };
     }
     async runtimeStatsText() {
         return "Backend: ONNX Runtime (WASM/CPU Falback)";
@@ -38034,7 +38652,28 @@ class WebLLM {
         return list.map((m) => m.model_id);
     }
     async local_model_available(model_id) {
-        return await hasModelInCache(model_id);
+        const isMLCCached = await hasModelInCache(model_id);
+        if (isMLCCached)
+            return true;
+        // Check ONNX cache fallback
+        const record = prebuiltAppConfig.model_list.find(m => m.model_id === model_id);
+        if (record && record.onnx_id) {
+            return await this.hasONNXInCache(record.onnx_id);
+        }
+        return false;
+    }
+    async hasONNXInCache(onnx_id) {
+        if (typeof caches === 'undefined')
+            return false;
+        try {
+            const cache = await caches.open('transformers-cache');
+            const url = `https://huggingface.co/${onnx_id}/resolve/main/config.json`;
+            const match = await cache.match(url);
+            return !!match;
+        }
+        catch (e) {
+            return false;
+        }
     }
     async download_model(model_id, progressCallback) {
         // Initial feedback
@@ -38055,6 +38694,14 @@ class WebLLM {
         return this.downloadProgress[model_id] || "No progress available.";
     }
     async delete_model(model_id) {
+        const record = prebuiltAppConfig.model_list.find(m => m.model_id === model_id);
+        if (record && record.onnx_id) {
+            // For ONNX, we currently clear the whole transformers-cache for simplicity
+            // as individual file deletion is complex without a full manifest.
+            if (typeof caches !== 'undefined') {
+                await caches.delete('transformers-cache');
+            }
+        }
         await deleteModelAllInfoInCache(model_id);
     }
     // chat endpoints (Stateful)