web-llm-runner 0.1.14 → 0.1.17

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/lib/index.js CHANGED
@@ -12814,7 +12814,7 @@ class EnvImpl {
12814
12814
  /**
12815
12815
  * Represent a set of flags as a global singleton.
12816
12816
  */
12817
- const env$2 = new EnvImpl();
12817
+ const env$3 = new EnvImpl();
12818
12818
 
12819
12819
  // Copyright (c) Microsoft Corporation. All rights reserved.
12820
12820
  // Licensed under the MIT License.
@@ -13513,7 +13513,7 @@ var lib = /*#__PURE__*/Object.freeze({
13513
13513
  __proto__: null,
13514
13514
  InferenceSession: InferenceSession$1,
13515
13515
  Tensor: Tensor$1,
13516
- env: env$2,
13516
+ env: env$3,
13517
13517
  registerBackend: registerBackend
13518
13518
  });
13519
13519
 
@@ -13673,7 +13673,7 @@ if (onnx_env?.wasm) {
13673
13673
  * @property {Object} customCache The custom cache to use. Defaults to `null`. Note: this must be an object which
13674
13674
  * implements the `match` and `put` functions of the Web Cache API. For more information, see https://developer.mozilla.org/en-US/docs/Web/API/Cache
13675
13675
  */
13676
- const env$1 = {
13676
+ const env$2 = {
13677
13677
  version: VERSION,
13678
13678
 
13679
13679
  remoteHost: 'https://huggingface.co/',
@@ -13862,12 +13862,12 @@ function isValidUrl(string, protocols = null, validHosts = null) {
13862
13862
  */
13863
13863
  async function getFile(urlOrPath) {
13864
13864
 
13865
- if (env$1.useFS && !isValidUrl(urlOrPath, ['http:', 'https:', 'blob:'])) {
13865
+ if (env$2.useFS && !isValidUrl(urlOrPath, ['http:', 'https:', 'blob:'])) {
13866
13866
  return new FileResponse(urlOrPath);
13867
13867
 
13868
13868
  } else if (typeof process !== 'undefined' && process?.release?.name === 'node') {
13869
13869
  const IS_CI = !!process.env?.TESTING_REMOTELY;
13870
- const version = env$1.version;
13870
+ const version = env$2.version;
13871
13871
 
13872
13872
  const headers = new Headers();
13873
13873
  headers.set('User-Agent', `transformers.js/${version}; is_ci/${IS_CI};`);
@@ -14014,7 +14014,7 @@ async function tryCache(cache, ...names) {
14014
14014
  */
14015
14015
  async function getModelFile(path_or_repo_id, filename, fatal = true, options = {}) {
14016
14016
 
14017
- if (!env$1.allowLocalModels) {
14017
+ if (!env$2.allowLocalModels) {
14018
14018
  // User has disabled local models, so we just make sure other settings are correct.
14019
14019
 
14020
14020
  if (options.local_files_only) {
@@ -14032,7 +14032,7 @@ async function getModelFile(path_or_repo_id, filename, fatal = true, options = {
14032
14032
  // First, check if the a caching backend is available
14033
14033
  // If no caching mechanism available, will download the file every time
14034
14034
  let cache;
14035
- if (!cache && env$1.useBrowserCache) {
14035
+ if (!cache && env$2.useBrowserCache) {
14036
14036
  if (typeof caches === 'undefined') {
14037
14037
  throw Error('Browser cache is not available in this environment.')
14038
14038
  }
@@ -14048,21 +14048,21 @@ async function getModelFile(path_or_repo_id, filename, fatal = true, options = {
14048
14048
  }
14049
14049
  }
14050
14050
 
14051
- if (!cache && env$1.useFSCache) {
14051
+ if (!cache && env$2.useFSCache) {
14052
14052
  // TODO throw error if not available
14053
14053
 
14054
14054
  // If `cache_dir` is not specified, use the default cache directory
14055
- cache = new FileCache(options.cache_dir ?? env$1.cacheDir);
14055
+ cache = new FileCache(options.cache_dir ?? env$2.cacheDir);
14056
14056
  }
14057
14057
 
14058
14058
  const revision = options.revision ?? 'main';
14059
14059
 
14060
14060
  let requestURL = pathJoin(path_or_repo_id, filename);
14061
- let localPath = pathJoin(env$1.localModelPath, requestURL);
14061
+ let localPath = pathJoin(env$2.localModelPath, requestURL);
14062
14062
 
14063
14063
  let remoteURL = pathJoin(
14064
- env$1.remoteHost,
14065
- env$1.remotePathTemplate
14064
+ env$2.remoteHost,
14065
+ env$2.remotePathTemplate
14066
14066
  .replaceAll('{model}', path_or_repo_id)
14067
14067
  .replaceAll('{revision}', encodeURIComponent(revision)),
14068
14068
  filename
@@ -14096,7 +14096,7 @@ async function getModelFile(path_or_repo_id, filename, fatal = true, options = {
14096
14096
  if (response === undefined) {
14097
14097
  // Caching not available, or file is not cached, so we perform the request
14098
14098
 
14099
- if (env$1.allowLocalModels) {
14099
+ if (env$2.allowLocalModels) {
14100
14100
  // Accessing local models is enabled, so we try to get the file locally.
14101
14101
  // If request is a valid HTTP URL, we skip the local file check. Otherwise, we try to get the file locally.
14102
14102
  const isURL = isValidUrl(requestURL, ['http:', 'https:']);
@@ -23790,7 +23790,7 @@ class BeamSearchSampler extends Sampler {
23790
23790
  }
23791
23791
  }
23792
23792
 
23793
- const { InferenceSession, Tensor: ONNXTensor, env } = ONNX;
23793
+ const { InferenceSession, Tensor: ONNXTensor, env: env$1 } = ONNX;
23794
23794
 
23795
23795
  /** @typedef {import('onnxruntime-web').InferenceSession} InferenceSession */
23796
23796
 
@@ -23877,7 +23877,7 @@ function validateInputs(session, inputs) {
23877
23877
  // NOTE: When `env.wasm.proxy is true` the tensor is moved across the Worker
23878
23878
  // boundary, transferring ownership to the worker and invalidating the tensor.
23879
23879
  // So, in this case, we simply sacrifice a clone for it.
23880
- checkedInputs[inputName] = env.wasm.proxy ? tensor.clone() : tensor;
23880
+ checkedInputs[inputName] = env$1.wasm.proxy ? tensor.clone() : tensor;
23881
23881
  }
23882
23882
  if (missingInputs.length > 0) {
23883
23883
  throw new Error(
@@ -30562,7 +30562,7 @@ class RawImage {
30562
30562
  // Clean up: remove the anchor element from the DOM
30563
30563
  downloadLink.remove();
30564
30564
 
30565
- } else if (!env$1.useFS) {
30565
+ } else if (!env$2.useFS) {
30566
30566
  throw new Error('Unable to save the image because filesystem is disabled in this environment.')
30567
30567
 
30568
30568
  } else {
@@ -36561,7 +36561,7 @@ const TASK_ALIASES = Object.freeze({
36561
36561
  * @returns {Promise<AllTasks[T]>} A Pipeline object for the specified task.
36562
36562
  * @throws {Error} If an unsupported pipeline is requested.
36563
36563
  */
36564
- async function pipeline(
36564
+ async function pipeline$1(
36565
36565
  task,
36566
36566
  model = null,
36567
36567
  {
@@ -36679,6 +36679,511 @@ async function loadItems(mapping, model, pretrainedOptions) {
36679
36679
  return result;
36680
36680
  }
36681
36681
 
36682
+ /**
36683
+ * @file Entry point for the Transformers.js library. Only the exports from this file
36684
+ * are available to the end user, and are grouped as follows:
36685
+ *
36686
+ * 1. [Pipelines](./pipelines)
36687
+ * 2. [Environment variables](./env)
36688
+ * 3. [Models](./models)
36689
+ * 4. [Tokenizers](./tokenizers)
36690
+ * 5. [Processors](./processors)
36691
+ *
36692
+ * @module transformers
36693
+ */
36694
+
36695
+ var Transformers = /*#__PURE__*/Object.freeze({
36696
+ __proto__: null,
36697
+ ASTFeatureExtractor: ASTFeatureExtractor,
36698
+ ASTForAudioClassification: ASTForAudioClassification,
36699
+ ASTModel: ASTModel,
36700
+ ASTPreTrainedModel: ASTPreTrainedModel,
36701
+ AlbertForMaskedLM: AlbertForMaskedLM,
36702
+ AlbertForQuestionAnswering: AlbertForQuestionAnswering,
36703
+ AlbertForSequenceClassification: AlbertForSequenceClassification,
36704
+ AlbertModel: AlbertModel,
36705
+ AlbertPreTrainedModel: AlbertPreTrainedModel,
36706
+ AlbertTokenizer: AlbertTokenizer,
36707
+ AudioClassificationPipeline: AudioClassificationPipeline,
36708
+ AutoConfig: AutoConfig,
36709
+ AutoModel: AutoModel,
36710
+ AutoModelForAudioClassification: AutoModelForAudioClassification,
36711
+ AutoModelForCTC: AutoModelForCTC,
36712
+ AutoModelForCausalLM: AutoModelForCausalLM,
36713
+ AutoModelForDepthEstimation: AutoModelForDepthEstimation,
36714
+ AutoModelForDocumentQuestionAnswering: AutoModelForDocumentQuestionAnswering,
36715
+ AutoModelForImageClassification: AutoModelForImageClassification,
36716
+ AutoModelForImageFeatureExtraction: AutoModelForImageFeatureExtraction,
36717
+ AutoModelForImageSegmentation: AutoModelForImageSegmentation,
36718
+ AutoModelForImageToImage: AutoModelForImageToImage,
36719
+ AutoModelForMaskedLM: AutoModelForMaskedLM,
36720
+ AutoModelForObjectDetection: AutoModelForObjectDetection,
36721
+ AutoModelForQuestionAnswering: AutoModelForQuestionAnswering,
36722
+ AutoModelForSemanticSegmentation: AutoModelForSemanticSegmentation,
36723
+ AutoModelForSeq2SeqLM: AutoModelForSeq2SeqLM,
36724
+ AutoModelForSequenceClassification: AutoModelForSequenceClassification,
36725
+ AutoModelForSpeechSeq2Seq: AutoModelForSpeechSeq2Seq,
36726
+ AutoModelForTextToSpectrogram: AutoModelForTextToSpectrogram,
36727
+ AutoModelForTextToWaveform: AutoModelForTextToWaveform,
36728
+ AutoModelForTokenClassification: AutoModelForTokenClassification,
36729
+ AutoModelForVision2Seq: AutoModelForVision2Seq,
36730
+ AutoModelForZeroShotObjectDetection: AutoModelForZeroShotObjectDetection,
36731
+ AutoProcessor: AutoProcessor,
36732
+ AutoTokenizer: AutoTokenizer,
36733
+ AutomaticSpeechRecognitionPipeline: AutomaticSpeechRecognitionPipeline,
36734
+ BartForConditionalGeneration: BartForConditionalGeneration,
36735
+ BartForSequenceClassification: BartForSequenceClassification,
36736
+ BartModel: BartModel,
36737
+ BartPretrainedModel: BartPretrainedModel,
36738
+ BartTokenizer: BartTokenizer,
36739
+ BeitFeatureExtractor: BeitFeatureExtractor,
36740
+ BeitForImageClassification: BeitForImageClassification,
36741
+ BeitModel: BeitModel,
36742
+ BeitPreTrainedModel: BeitPreTrainedModel,
36743
+ BertForMaskedLM: BertForMaskedLM,
36744
+ BertForQuestionAnswering: BertForQuestionAnswering,
36745
+ BertForSequenceClassification: BertForSequenceClassification,
36746
+ BertForTokenClassification: BertForTokenClassification,
36747
+ BertModel: BertModel,
36748
+ BertPreTrainedModel: BertPreTrainedModel,
36749
+ BertTokenizer: BertTokenizer,
36750
+ BitImageProcessor: BitImageProcessor,
36751
+ BlenderbotForConditionalGeneration: BlenderbotForConditionalGeneration,
36752
+ BlenderbotModel: BlenderbotModel,
36753
+ BlenderbotPreTrainedModel: BlenderbotPreTrainedModel,
36754
+ BlenderbotSmallForConditionalGeneration: BlenderbotSmallForConditionalGeneration,
36755
+ BlenderbotSmallModel: BlenderbotSmallModel,
36756
+ BlenderbotSmallPreTrainedModel: BlenderbotSmallPreTrainedModel,
36757
+ BlenderbotSmallTokenizer: BlenderbotSmallTokenizer,
36758
+ BlenderbotTokenizer: BlenderbotTokenizer,
36759
+ BloomForCausalLM: BloomForCausalLM,
36760
+ BloomModel: BloomModel,
36761
+ BloomPreTrainedModel: BloomPreTrainedModel,
36762
+ BloomTokenizer: BloomTokenizer,
36763
+ CLIPFeatureExtractor: CLIPFeatureExtractor,
36764
+ CLIPModel: CLIPModel,
36765
+ CLIPPreTrainedModel: CLIPPreTrainedModel,
36766
+ CLIPSegForImageSegmentation: CLIPSegForImageSegmentation,
36767
+ CLIPSegModel: CLIPSegModel,
36768
+ CLIPSegPreTrainedModel: CLIPSegPreTrainedModel,
36769
+ CLIPTextModelWithProjection: CLIPTextModelWithProjection,
36770
+ CLIPTokenizer: CLIPTokenizer,
36771
+ CLIPVisionModelWithProjection: CLIPVisionModelWithProjection,
36772
+ CamembertForMaskedLM: CamembertForMaskedLM,
36773
+ CamembertForQuestionAnswering: CamembertForQuestionAnswering,
36774
+ CamembertForSequenceClassification: CamembertForSequenceClassification,
36775
+ CamembertForTokenClassification: CamembertForTokenClassification,
36776
+ CamembertModel: CamembertModel,
36777
+ CamembertPreTrainedModel: CamembertPreTrainedModel,
36778
+ CamembertTokenizer: CamembertTokenizer,
36779
+ CausalLMOutput: CausalLMOutput,
36780
+ ChineseCLIPFeatureExtractor: ChineseCLIPFeatureExtractor,
36781
+ ChineseCLIPModel: ChineseCLIPModel,
36782
+ ChineseCLIPPreTrainedModel: ChineseCLIPPreTrainedModel,
36783
+ ClapAudioModelWithProjection: ClapAudioModelWithProjection,
36784
+ ClapFeatureExtractor: ClapFeatureExtractor,
36785
+ ClapModel: ClapModel,
36786
+ ClapPreTrainedModel: ClapPreTrainedModel,
36787
+ ClapTextModelWithProjection: ClapTextModelWithProjection,
36788
+ CodeGenForCausalLM: CodeGenForCausalLM,
36789
+ CodeGenModel: CodeGenModel,
36790
+ CodeGenPreTrainedModel: CodeGenPreTrainedModel,
36791
+ CodeGenTokenizer: CodeGenTokenizer,
36792
+ CodeLlamaTokenizer: CodeLlamaTokenizer,
36793
+ CohereTokenizer: CohereTokenizer,
36794
+ ConvBertForMaskedLM: ConvBertForMaskedLM,
36795
+ ConvBertForQuestionAnswering: ConvBertForQuestionAnswering,
36796
+ ConvBertForSequenceClassification: ConvBertForSequenceClassification,
36797
+ ConvBertForTokenClassification: ConvBertForTokenClassification,
36798
+ ConvBertModel: ConvBertModel,
36799
+ ConvBertPreTrainedModel: ConvBertPreTrainedModel,
36800
+ ConvBertTokenizer: ConvBertTokenizer,
36801
+ ConvNextFeatureExtractor: ConvNextFeatureExtractor,
36802
+ ConvNextForImageClassification: ConvNextForImageClassification,
36803
+ ConvNextImageProcessor: ConvNextImageProcessor,
36804
+ ConvNextModel: ConvNextModel,
36805
+ ConvNextPreTrainedModel: ConvNextPreTrainedModel,
36806
+ ConvNextV2ForImageClassification: ConvNextV2ForImageClassification,
36807
+ ConvNextV2Model: ConvNextV2Model,
36808
+ ConvNextV2PreTrainedModel: ConvNextV2PreTrainedModel,
36809
+ DPTFeatureExtractor: DPTFeatureExtractor,
36810
+ DPTForDepthEstimation: DPTForDepthEstimation,
36811
+ DPTImageProcessor: DPTImageProcessor,
36812
+ DPTModel: DPTModel,
36813
+ DPTPreTrainedModel: DPTPreTrainedModel,
36814
+ DebertaForMaskedLM: DebertaForMaskedLM,
36815
+ DebertaForQuestionAnswering: DebertaForQuestionAnswering,
36816
+ DebertaForSequenceClassification: DebertaForSequenceClassification,
36817
+ DebertaForTokenClassification: DebertaForTokenClassification,
36818
+ DebertaModel: DebertaModel,
36819
+ DebertaPreTrainedModel: DebertaPreTrainedModel,
36820
+ DebertaTokenizer: DebertaTokenizer,
36821
+ DebertaV2ForMaskedLM: DebertaV2ForMaskedLM,
36822
+ DebertaV2ForQuestionAnswering: DebertaV2ForQuestionAnswering,
36823
+ DebertaV2ForSequenceClassification: DebertaV2ForSequenceClassification,
36824
+ DebertaV2ForTokenClassification: DebertaV2ForTokenClassification,
36825
+ DebertaV2Model: DebertaV2Model,
36826
+ DebertaV2PreTrainedModel: DebertaV2PreTrainedModel,
36827
+ DebertaV2Tokenizer: DebertaV2Tokenizer,
36828
+ DeiTFeatureExtractor: DeiTFeatureExtractor,
36829
+ DeiTForImageClassification: DeiTForImageClassification,
36830
+ DeiTModel: DeiTModel,
36831
+ DeiTPreTrainedModel: DeiTPreTrainedModel,
36832
+ DepthAnythingForDepthEstimation: DepthAnythingForDepthEstimation,
36833
+ DepthAnythingPreTrainedModel: DepthAnythingPreTrainedModel,
36834
+ DepthEstimationPipeline: DepthEstimationPipeline,
36835
+ DetrFeatureExtractor: DetrFeatureExtractor,
36836
+ DetrForObjectDetection: DetrForObjectDetection,
36837
+ DetrForSegmentation: DetrForSegmentation,
36838
+ DetrModel: DetrModel,
36839
+ DetrObjectDetectionOutput: DetrObjectDetectionOutput,
36840
+ DetrPreTrainedModel: DetrPreTrainedModel,
36841
+ DetrSegmentationOutput: DetrSegmentationOutput,
36842
+ Dinov2ForImageClassification: Dinov2ForImageClassification,
36843
+ Dinov2Model: Dinov2Model,
36844
+ Dinov2PreTrainedModel: Dinov2PreTrainedModel,
36845
+ DistilBertForMaskedLM: DistilBertForMaskedLM,
36846
+ DistilBertForQuestionAnswering: DistilBertForQuestionAnswering,
36847
+ DistilBertForSequenceClassification: DistilBertForSequenceClassification,
36848
+ DistilBertForTokenClassification: DistilBertForTokenClassification,
36849
+ DistilBertModel: DistilBertModel,
36850
+ DistilBertPreTrainedModel: DistilBertPreTrainedModel,
36851
+ DistilBertTokenizer: DistilBertTokenizer,
36852
+ DocumentQuestionAnsweringPipeline: DocumentQuestionAnsweringPipeline,
36853
+ DonutFeatureExtractor: DonutFeatureExtractor,
36854
+ DonutSwinModel: DonutSwinModel,
36855
+ DonutSwinPreTrainedModel: DonutSwinPreTrainedModel,
36856
+ EfficientNetForImageClassification: EfficientNetForImageClassification,
36857
+ EfficientNetImageProcessor: EfficientNetImageProcessor,
36858
+ EfficientNetModel: EfficientNetModel,
36859
+ EfficientNetPreTrainedModel: EfficientNetPreTrainedModel,
36860
+ ElectraForMaskedLM: ElectraForMaskedLM,
36861
+ ElectraForQuestionAnswering: ElectraForQuestionAnswering,
36862
+ ElectraForSequenceClassification: ElectraForSequenceClassification,
36863
+ ElectraForTokenClassification: ElectraForTokenClassification,
36864
+ ElectraModel: ElectraModel,
36865
+ ElectraPreTrainedModel: ElectraPreTrainedModel,
36866
+ ElectraTokenizer: ElectraTokenizer,
36867
+ EsmForMaskedLM: EsmForMaskedLM,
36868
+ EsmForSequenceClassification: EsmForSequenceClassification,
36869
+ EsmForTokenClassification: EsmForTokenClassification,
36870
+ EsmModel: EsmModel,
36871
+ EsmPreTrainedModel: EsmPreTrainedModel,
36872
+ EsmTokenizer: EsmTokenizer,
36873
+ FFT: FFT,
36874
+ FalconForCausalLM: FalconForCausalLM,
36875
+ FalconModel: FalconModel,
36876
+ FalconPreTrainedModel: FalconPreTrainedModel,
36877
+ FalconTokenizer: FalconTokenizer,
36878
+ FastViTForImageClassification: FastViTForImageClassification,
36879
+ FastViTModel: FastViTModel,
36880
+ FastViTPreTrainedModel: FastViTPreTrainedModel,
36881
+ FeatureExtractionPipeline: FeatureExtractionPipeline,
36882
+ FeatureExtractor: FeatureExtractor,
36883
+ FillMaskPipeline: FillMaskPipeline,
36884
+ GLPNFeatureExtractor: GLPNFeatureExtractor,
36885
+ GLPNForDepthEstimation: GLPNForDepthEstimation,
36886
+ GLPNModel: GLPNModel,
36887
+ GLPNPreTrainedModel: GLPNPreTrainedModel,
36888
+ GPT2LMHeadModel: GPT2LMHeadModel,
36889
+ GPT2Model: GPT2Model,
36890
+ GPT2PreTrainedModel: GPT2PreTrainedModel,
36891
+ GPT2Tokenizer: GPT2Tokenizer,
36892
+ GPTBigCodeForCausalLM: GPTBigCodeForCausalLM,
36893
+ GPTBigCodeModel: GPTBigCodeModel,
36894
+ GPTBigCodePreTrainedModel: GPTBigCodePreTrainedModel,
36895
+ GPTJForCausalLM: GPTJForCausalLM,
36896
+ GPTJModel: GPTJModel,
36897
+ GPTJPreTrainedModel: GPTJPreTrainedModel,
36898
+ GPTNeoForCausalLM: GPTNeoForCausalLM,
36899
+ GPTNeoModel: GPTNeoModel,
36900
+ GPTNeoPreTrainedModel: GPTNeoPreTrainedModel,
36901
+ GPTNeoXForCausalLM: GPTNeoXForCausalLM,
36902
+ GPTNeoXModel: GPTNeoXModel,
36903
+ GPTNeoXPreTrainedModel: GPTNeoXPreTrainedModel,
36904
+ GPTNeoXTokenizer: GPTNeoXTokenizer,
36905
+ GemmaTokenizer: GemmaTokenizer,
36906
+ Grok1Tokenizer: Grok1Tokenizer,
36907
+ HerbertTokenizer: HerbertTokenizer,
36908
+ HubertForCTC: HubertForCTC,
36909
+ HubertForSequenceClassification: HubertForSequenceClassification,
36910
+ HubertModel: HubertModel,
36911
+ ImageClassificationPipeline: ImageClassificationPipeline,
36912
+ ImageFeatureExtractionPipeline: ImageFeatureExtractionPipeline,
36913
+ ImageFeatureExtractor: ImageFeatureExtractor,
36914
+ ImageMattingOutput: ImageMattingOutput,
36915
+ ImageSegmentationPipeline: ImageSegmentationPipeline,
36916
+ ImageToImagePipeline: ImageToImagePipeline,
36917
+ ImageToTextPipeline: ImageToTextPipeline,
36918
+ LlamaForCausalLM: LlamaForCausalLM,
36919
+ LlamaModel: LlamaModel,
36920
+ LlamaPreTrainedModel: LlamaPreTrainedModel,
36921
+ LlamaTokenizer: LlamaTokenizer,
36922
+ LongT5ForConditionalGeneration: LongT5ForConditionalGeneration,
36923
+ LongT5Model: LongT5Model,
36924
+ LongT5PreTrainedModel: LongT5PreTrainedModel,
36925
+ M2M100ForConditionalGeneration: M2M100ForConditionalGeneration,
36926
+ M2M100Model: M2M100Model,
36927
+ M2M100PreTrainedModel: M2M100PreTrainedModel,
36928
+ M2M100Tokenizer: M2M100Tokenizer,
36929
+ MBart50Tokenizer: MBart50Tokenizer,
36930
+ MBartForCausalLM: MBartForCausalLM,
36931
+ MBartForConditionalGeneration: MBartForConditionalGeneration,
36932
+ MBartForSequenceClassification: MBartForSequenceClassification,
36933
+ MBartModel: MBartModel,
36934
+ MBartPreTrainedModel: MBartPreTrainedModel,
36935
+ MBartTokenizer: MBartTokenizer,
36936
+ MPNetForMaskedLM: MPNetForMaskedLM,
36937
+ MPNetForQuestionAnswering: MPNetForQuestionAnswering,
36938
+ MPNetForSequenceClassification: MPNetForSequenceClassification,
36939
+ MPNetForTokenClassification: MPNetForTokenClassification,
36940
+ MPNetModel: MPNetModel,
36941
+ MPNetPreTrainedModel: MPNetPreTrainedModel,
36942
+ MPNetTokenizer: MPNetTokenizer,
36943
+ MT5ForConditionalGeneration: MT5ForConditionalGeneration,
36944
+ MT5Model: MT5Model,
36945
+ MT5PreTrainedModel: MT5PreTrainedModel,
36946
+ MarianMTModel: MarianMTModel,
36947
+ MarianModel: MarianModel,
36948
+ MarianPreTrainedModel: MarianPreTrainedModel,
36949
+ MarianTokenizer: MarianTokenizer,
36950
+ MaskedLMOutput: MaskedLMOutput,
36951
+ MistralForCausalLM: MistralForCausalLM,
36952
+ MistralModel: MistralModel,
36953
+ MistralPreTrainedModel: MistralPreTrainedModel,
36954
+ MobileBertForMaskedLM: MobileBertForMaskedLM,
36955
+ MobileBertForQuestionAnswering: MobileBertForQuestionAnswering,
36956
+ MobileBertForSequenceClassification: MobileBertForSequenceClassification,
36957
+ MobileBertModel: MobileBertModel,
36958
+ MobileBertPreTrainedModel: MobileBertPreTrainedModel,
36959
+ MobileBertTokenizer: MobileBertTokenizer,
36960
+ MobileViTFeatureExtractor: MobileViTFeatureExtractor,
36961
+ MobileViTForImageClassification: MobileViTForImageClassification,
36962
+ MobileViTImageProcessor: MobileViTImageProcessor,
36963
+ MobileViTModel: MobileViTModel,
36964
+ MobileViTPreTrainedModel: MobileViTPreTrainedModel,
36965
+ MobileViTV2ForImageClassification: MobileViTV2ForImageClassification,
36966
+ MobileViTV2Model: MobileViTV2Model,
36967
+ MobileViTV2PreTrainedModel: MobileViTV2PreTrainedModel,
36968
+ ModelOutput: ModelOutput,
36969
+ MptForCausalLM: MptForCausalLM,
36970
+ MptModel: MptModel,
36971
+ MptPreTrainedModel: MptPreTrainedModel,
36972
+ NllbTokenizer: NllbTokenizer,
36973
+ NomicBertModel: NomicBertModel,
36974
+ NomicBertPreTrainedModel: NomicBertPreTrainedModel,
36975
+ NougatImageProcessor: NougatImageProcessor,
36976
+ NougatTokenizer: NougatTokenizer,
36977
+ OPTForCausalLM: OPTForCausalLM,
36978
+ OPTModel: OPTModel,
36979
+ OPTPreTrainedModel: OPTPreTrainedModel,
36980
+ ObjectDetectionPipeline: ObjectDetectionPipeline,
36981
+ OwlViTFeatureExtractor: OwlViTFeatureExtractor,
36982
+ OwlViTForObjectDetection: OwlViTForObjectDetection,
36983
+ OwlViTModel: OwlViTModel,
36984
+ OwlViTPreTrainedModel: OwlViTPreTrainedModel,
36985
+ OwlViTProcessor: OwlViTProcessor,
36986
+ Owlv2ForObjectDetection: Owlv2ForObjectDetection,
36987
+ Owlv2ImageProcessor: Owlv2ImageProcessor,
36988
+ Owlv2Model: Owlv2Model,
36989
+ Owlv2PreTrainedModel: Owlv2PreTrainedModel,
36990
+ PhiForCausalLM: PhiForCausalLM,
36991
+ PhiModel: PhiModel,
36992
+ PhiPreTrainedModel: PhiPreTrainedModel,
36993
+ Pipeline: Pipeline,
36994
+ PreTrainedModel: PreTrainedModel,
36995
+ PreTrainedTokenizer: PreTrainedTokenizer,
36996
+ PretrainedConfig: PretrainedConfig,
36997
+ PretrainedMixin: PretrainedMixin,
36998
+ Processor: Processor,
36999
+ QuestionAnsweringModelOutput: QuestionAnsweringModelOutput,
37000
+ QuestionAnsweringPipeline: QuestionAnsweringPipeline,
37001
+ Qwen2ForCausalLM: Qwen2ForCausalLM,
37002
+ Qwen2Model: Qwen2Model,
37003
+ Qwen2PreTrainedModel: Qwen2PreTrainedModel,
37004
+ Qwen2Tokenizer: Qwen2Tokenizer,
37005
+ RawImage: RawImage,
37006
+ ResNetForImageClassification: ResNetForImageClassification,
37007
+ ResNetModel: ResNetModel,
37008
+ ResNetPreTrainedModel: ResNetPreTrainedModel,
37009
+ RoFormerForMaskedLM: RoFormerForMaskedLM,
37010
+ RoFormerForQuestionAnswering: RoFormerForQuestionAnswering,
37011
+ RoFormerForSequenceClassification: RoFormerForSequenceClassification,
37012
+ RoFormerForTokenClassification: RoFormerForTokenClassification,
37013
+ RoFormerModel: RoFormerModel,
37014
+ RoFormerPreTrainedModel: RoFormerPreTrainedModel,
37015
+ RoFormerTokenizer: RoFormerTokenizer,
37016
+ RobertaForMaskedLM: RobertaForMaskedLM,
37017
+ RobertaForQuestionAnswering: RobertaForQuestionAnswering,
37018
+ RobertaForSequenceClassification: RobertaForSequenceClassification,
37019
+ RobertaForTokenClassification: RobertaForTokenClassification,
37020
+ RobertaModel: RobertaModel,
37021
+ RobertaPreTrainedModel: RobertaPreTrainedModel,
37022
+ RobertaTokenizer: RobertaTokenizer,
37023
+ SamImageProcessor: SamImageProcessor,
37024
+ SamImageSegmentationOutput: SamImageSegmentationOutput,
37025
+ SamModel: SamModel,
37026
+ SamPreTrainedModel: SamPreTrainedModel,
37027
+ SamProcessor: SamProcessor,
37028
+ SeamlessM4TFeatureExtractor: SeamlessM4TFeatureExtractor,
37029
+ SegformerFeatureExtractor: SegformerFeatureExtractor,
37030
+ SegformerForImageClassification: SegformerForImageClassification,
37031
+ SegformerForSemanticSegmentation: SegformerForSemanticSegmentation,
37032
+ SegformerPreTrainedModel: SegformerPreTrainedModel,
37033
+ Seq2SeqLMOutput: Seq2SeqLMOutput,
37034
+ SequenceClassifierOutput: SequenceClassifierOutput,
37035
+ SiglipImageProcessor: SiglipImageProcessor,
37036
+ SiglipModel: SiglipModel,
37037
+ SiglipPreTrainedModel: SiglipPreTrainedModel,
37038
+ SiglipTextModel: SiglipTextModel,
37039
+ SiglipTokenizer: SiglipTokenizer,
37040
+ SiglipVisionModel: SiglipVisionModel,
37041
+ SpeechT5FeatureExtractor: SpeechT5FeatureExtractor,
37042
+ SpeechT5ForSpeechToText: SpeechT5ForSpeechToText,
37043
+ SpeechT5ForTextToSpeech: SpeechT5ForTextToSpeech,
37044
+ SpeechT5HifiGan: SpeechT5HifiGan,
37045
+ SpeechT5PreTrainedModel: SpeechT5PreTrainedModel,
37046
+ SpeechT5Processor: SpeechT5Processor,
37047
+ SpeechT5Tokenizer: SpeechT5Tokenizer,
37048
+ SqueezeBertForMaskedLM: SqueezeBertForMaskedLM,
37049
+ SqueezeBertForQuestionAnswering: SqueezeBertForQuestionAnswering,
37050
+ SqueezeBertForSequenceClassification: SqueezeBertForSequenceClassification,
37051
+ SqueezeBertModel: SqueezeBertModel,
37052
+ SqueezeBertPreTrainedModel: SqueezeBertPreTrainedModel,
37053
+ SqueezeBertTokenizer: SqueezeBertTokenizer,
37054
+ StableLmForCausalLM: StableLmForCausalLM,
37055
+ StableLmPreTrainedModel: StableLmPreTrainedModel,
37056
+ Starcoder2ForCausalLM: Starcoder2ForCausalLM,
37057
+ Starcoder2Model: Starcoder2Model,
37058
+ Starcoder2PreTrainedModel: Starcoder2PreTrainedModel,
37059
+ SummarizationPipeline: SummarizationPipeline,
37060
+ Swin2SRForImageSuperResolution: Swin2SRForImageSuperResolution,
37061
+ Swin2SRImageProcessor: Swin2SRImageProcessor,
37062
+ Swin2SRModel: Swin2SRModel,
37063
+ Swin2SRPreTrainedModel: Swin2SRPreTrainedModel,
37064
+ SwinForImageClassification: SwinForImageClassification,
37065
+ SwinModel: SwinModel,
37066
+ SwinPreTrainedModel: SwinPreTrainedModel,
37067
+ T5ForConditionalGeneration: T5ForConditionalGeneration,
37068
+ T5Model: T5Model,
37069
+ T5PreTrainedModel: T5PreTrainedModel,
37070
+ T5Tokenizer: T5Tokenizer,
37071
+ TableTransformerForObjectDetection: TableTransformerForObjectDetection,
37072
+ TableTransformerModel: TableTransformerModel,
37073
+ TableTransformerObjectDetectionOutput: TableTransformerObjectDetectionOutput,
37074
+ TableTransformerPreTrainedModel: TableTransformerPreTrainedModel,
37075
+ Tensor: Tensor,
37076
+ Text2TextGenerationPipeline: Text2TextGenerationPipeline,
37077
+ TextClassificationPipeline: TextClassificationPipeline,
37078
+ TextGenerationPipeline: TextGenerationPipeline,
37079
+ TextToAudioPipeline: TextToAudioPipeline,
37080
+ TokenClassificationPipeline: TokenClassificationPipeline,
37081
+ TokenClassifierOutput: TokenClassifierOutput,
37082
+ TokenizerModel: TokenizerModel,
37083
+ TrOCRForCausalLM: TrOCRForCausalLM,
37084
+ TrOCRPreTrainedModel: TrOCRPreTrainedModel,
37085
+ TranslationPipeline: TranslationPipeline,
37086
+ UniSpeechForCTC: UniSpeechForCTC,
37087
+ UniSpeechForSequenceClassification: UniSpeechForSequenceClassification,
37088
+ UniSpeechModel: UniSpeechModel,
37089
+ UniSpeechPreTrainedModel: UniSpeechPreTrainedModel,
37090
+ UniSpeechSatForAudioFrameClassification: UniSpeechSatForAudioFrameClassification,
37091
+ UniSpeechSatForCTC: UniSpeechSatForCTC,
37092
+ UniSpeechSatForSequenceClassification: UniSpeechSatForSequenceClassification,
37093
+ UniSpeechSatModel: UniSpeechSatModel,
37094
+ UniSpeechSatPreTrainedModel: UniSpeechSatPreTrainedModel,
37095
+ ViTFeatureExtractor: ViTFeatureExtractor,
37096
+ ViTForImageClassification: ViTForImageClassification,
37097
+ ViTImageProcessor: ViTImageProcessor,
37098
+ ViTModel: ViTModel,
37099
+ ViTPreTrainedModel: ViTPreTrainedModel,
37100
+ VisionEncoderDecoderModel: VisionEncoderDecoderModel,
37101
+ VitMatteForImageMatting: VitMatteForImageMatting,
37102
+ VitMatteImageProcessor: VitMatteImageProcessor,
37103
+ VitMattePreTrainedModel: VitMattePreTrainedModel,
37104
+ VitsModel: VitsModel,
37105
+ VitsModelOutput: VitsModelOutput,
37106
+ VitsPreTrainedModel: VitsPreTrainedModel,
37107
+ VitsTokenizer: VitsTokenizer,
37108
+ Wav2Vec2BertForCTC: Wav2Vec2BertForCTC,
37109
+ Wav2Vec2BertForSequenceClassification: Wav2Vec2BertForSequenceClassification,
37110
+ Wav2Vec2BertModel: Wav2Vec2BertModel,
37111
+ Wav2Vec2BertPreTrainedModel: Wav2Vec2BertPreTrainedModel,
37112
+ Wav2Vec2CTCTokenizer: Wav2Vec2CTCTokenizer,
37113
+ Wav2Vec2FeatureExtractor: Wav2Vec2FeatureExtractor,
37114
+ Wav2Vec2ForAudioFrameClassification: Wav2Vec2ForAudioFrameClassification,
37115
+ Wav2Vec2ForCTC: Wav2Vec2ForCTC,
37116
+ Wav2Vec2ForSequenceClassification: Wav2Vec2ForSequenceClassification,
37117
+ Wav2Vec2Model: Wav2Vec2Model,
37118
+ Wav2Vec2PreTrainedModel: Wav2Vec2PreTrainedModel,
37119
+ Wav2Vec2ProcessorWithLM: Wav2Vec2ProcessorWithLM,
37120
+ WavLMForAudioFrameClassification: WavLMForAudioFrameClassification,
37121
+ WavLMForCTC: WavLMForCTC,
37122
+ WavLMForSequenceClassification: WavLMForSequenceClassification,
37123
+ WavLMForXVector: WavLMForXVector,
37124
+ WavLMModel: WavLMModel,
37125
+ WavLMPreTrainedModel: WavLMPreTrainedModel,
37126
+ WhisperFeatureExtractor: WhisperFeatureExtractor,
37127
+ WhisperForConditionalGeneration: WhisperForConditionalGeneration,
37128
+ WhisperModel: WhisperModel,
37129
+ WhisperPreTrainedModel: WhisperPreTrainedModel,
37130
+ WhisperProcessor: WhisperProcessor,
37131
+ WhisperTokenizer: WhisperTokenizer,
37132
+ XLMForQuestionAnswering: XLMForQuestionAnswering,
37133
+ XLMForSequenceClassification: XLMForSequenceClassification,
37134
+ XLMForTokenClassification: XLMForTokenClassification,
37135
+ XLMModel: XLMModel,
37136
+ XLMPreTrainedModel: XLMPreTrainedModel,
37137
+ XLMRobertaForMaskedLM: XLMRobertaForMaskedLM,
37138
+ XLMRobertaForQuestionAnswering: XLMRobertaForQuestionAnswering,
37139
+ XLMRobertaForSequenceClassification: XLMRobertaForSequenceClassification,
37140
+ XLMRobertaForTokenClassification: XLMRobertaForTokenClassification,
37141
+ XLMRobertaModel: XLMRobertaModel,
37142
+ XLMRobertaPreTrainedModel: XLMRobertaPreTrainedModel,
37143
+ XLMRobertaTokenizer: XLMRobertaTokenizer,
37144
+ XLMTokenizer: XLMTokenizer,
37145
+ XLMWithLMHeadModel: XLMWithLMHeadModel,
37146
+ XVectorOutput: XVectorOutput,
37147
+ YolosFeatureExtractor: YolosFeatureExtractor,
37148
+ YolosForObjectDetection: YolosForObjectDetection,
37149
+ YolosModel: YolosModel,
37150
+ YolosObjectDetectionOutput: YolosObjectDetectionOutput,
37151
+ YolosPreTrainedModel: YolosPreTrainedModel,
37152
+ ZeroShotAudioClassificationPipeline: ZeroShotAudioClassificationPipeline,
37153
+ ZeroShotClassificationPipeline: ZeroShotClassificationPipeline,
37154
+ ZeroShotImageClassificationPipeline: ZeroShotImageClassificationPipeline,
37155
+ ZeroShotObjectDetectionPipeline: ZeroShotObjectDetectionPipeline,
37156
+ bankers_round: bankers_round,
37157
+ cat: cat,
37158
+ dynamicTimeWarping: dynamicTimeWarping,
37159
+ env: env$2,
37160
+ getTopItems: getTopItems,
37161
+ hanning: hanning,
37162
+ interpolate: interpolate,
37163
+ interpolate_data: interpolate_data,
37164
+ log_softmax: log_softmax,
37165
+ max: max,
37166
+ mean: mean,
37167
+ mean_pooling: mean_pooling,
37168
+ medianFilter: medianFilter,
37169
+ mel_filter_bank: mel_filter_bank,
37170
+ min: min,
37171
+ ones: ones,
37172
+ ones_like: ones_like,
37173
+ permute: permute,
37174
+ permute_data: permute_data,
37175
+ pipeline: pipeline$1,
37176
+ quantize_embeddings: quantize_embeddings,
37177
+ read_audio: read_audio,
37178
+ round: round,
37179
+ softmax: softmax,
37180
+ spectrogram: spectrogram,
37181
+ stack: stack,
37182
+ std_mean: std_mean,
37183
+ window_function: window_function
37184
+ });
37185
+
37186
+ const { pipeline, env } = Transformers;
36682
37187
  /**
36683
37188
  * ONNXEngine implements MLCEngineInterface using Transformers.js (ONNX Runtime Web).
36684
37189
  * This provides a CPU/WASM fallback for browsers that do not support WebGPU.
@@ -36688,6 +37193,7 @@ class ONNXEngine {
36688
37193
  modelId = null;
36689
37194
  appConfig;
36690
37195
  initProgressCallback;
37196
+ repoId = null;
36691
37197
  // APIs
36692
37198
  chat;
36693
37199
  completions;
@@ -36698,8 +37204,8 @@ class ONNXEngine {
36698
37204
  this.embeddings = new Embeddings(this);
36699
37205
  this.appConfig = prebuiltAppConfig;
36700
37206
  // Default env settings for browser
36701
- env$1.allowLocalModels = false;
36702
- env$1.useBrowserCache = true;
37207
+ env.allowLocalModels = false;
37208
+ env.useBrowserCache = true;
36703
37209
  }
36704
37210
  setInitProgressCallback(initProgressCallback) {
36705
37211
  this.initProgressCallback = initProgressCallback;
@@ -36719,6 +37225,7 @@ class ONNXEngine {
36719
37225
  const { findModelRecord } = await Promise.resolve().then(function () { return support; });
36720
37226
  const record = findModelRecord(id, this.appConfig);
36721
37227
  repoId = record.onnx_id || id;
37228
+ this.repoId = repoId;
36722
37229
  }
36723
37230
  catch (e) {
36724
37231
  log.warn(`Model record not found for ${id}, using raw ID for ONNX.`);
@@ -36739,8 +37246,9 @@ class ONNXEngine {
36739
37246
  }
36740
37247
  try {
36741
37248
  // For T5 models, text2text-generation is the standard task in transformers.js
36742
- const task = repoId.toLowerCase().includes("t5") ? "text2text-generation" : "text-generation";
36743
- this.generator = await pipeline(task, repoId, {
37249
+ const currentRepoId = this.repoId || id;
37250
+ const task = currentRepoId.toLowerCase().includes("t5") ? "text2text-generation" : "text-generation";
37251
+ this.generator = await pipeline(task, currentRepoId, {
36744
37252
  progress_callback: (p) => {
36745
37253
  if (this.initProgressCallback && (p.status === 'progress' || p.status === 'downloading')) {
36746
37254
  const pctValue = (typeof p.progress === 'number') ? p.progress : 0;
@@ -36812,28 +37320,138 @@ class ONNXEngine {
36812
37320
  };
36813
37321
  }
36814
37322
  async *asyncGenerateStreaming(prompt, request) {
36815
- // Current simple implementation yields only a single chunk.
36816
- // In future iterations, we can integrate the Transformers.js TextStreamer
36817
- const result = await this.generateNonStreaming(prompt, request);
36818
- const content = result.choices[0].message.content;
37323
+ if (!this.generator)
37324
+ throw new Error("ONNX model not loaded.");
37325
+ const model = this.modelId;
37326
+ const created = Math.floor(Date.now() / 1000);
37327
+ const id = crypto.randomUUID();
37328
+ const queue = [];
37329
+ let isDone = false;
37330
+ const streamer = new undefined(this.generator.tokenizer, {
37331
+ skip_prompt: true,
37332
+ callback_function: (text) => {
37333
+ queue.push(text);
37334
+ },
37335
+ });
37336
+ // Run generation in the background
37337
+ (this.repoId || "").toLowerCase().includes("t5") ? "text2text-generation" : "text-generation";
37338
+ this.generator(prompt, {
37339
+ max_new_tokens: request.max_tokens || 256,
37340
+ temperature: request.temperature || 0.7,
37341
+ top_p: request.top_p || 1.0,
37342
+ do_sample: (request.temperature ?? 1.0) > 0,
37343
+ repetition_penalty: request.repetition_penalty || 1.1,
37344
+ streamer,
37345
+ }).finally(() => {
37346
+ isDone = true;
37347
+ });
37348
+ while (!isDone || queue.length > 0) {
37349
+ if (queue.length > 0) {
37350
+ const content = queue.shift();
37351
+ yield {
37352
+ id,
37353
+ choices: [{
37354
+ delta: { content },
37355
+ finish_reason: null,
37356
+ index: 0,
37357
+ }],
37358
+ model,
37359
+ object: 'chat.completion.chunk',
37360
+ created,
37361
+ };
37362
+ }
37363
+ else {
37364
+ await new Promise(r => setTimeout(r, 10));
37365
+ }
37366
+ }
36819
37367
  yield {
36820
- id: result.id,
37368
+ id,
36821
37369
  choices: [{
36822
- delta: { role: 'assistant', content: content },
37370
+ delta: {},
36823
37371
  finish_reason: 'stop',
36824
37372
  index: 0,
36825
- logprobs: null
36826
37373
  }],
36827
- model: result.model,
37374
+ model,
36828
37375
  object: 'chat.completion.chunk',
36829
- created: result.created
37376
+ created,
36830
37377
  };
36831
37378
  }
36832
- async completion(_request) {
36833
- throw new Error("Generic completion not yet implemented in ONNXEngine fallback.");
37379
+ async completion(request) {
37380
+ if (!this.generator)
37381
+ throw new Error("ONNX model not loaded.");
37382
+ const prompt = typeof request.prompt === 'string' ? request.prompt : (Array.isArray(request.prompt) ? request.prompt[0] : "");
37383
+ if (request.stream) {
37384
+ return this.asyncGenerateStreamingCompletion(prompt, request);
37385
+ }
37386
+ else {
37387
+ const result = await this.generator(prompt, {
37388
+ max_new_tokens: request.max_tokens || 256,
37389
+ temperature: request.temperature || 0.7,
37390
+ top_p: request.top_p || 1.0,
37391
+ do_sample: (request.temperature ?? 1.0) > 0,
37392
+ repetition_penalty: request.repetition_penalty || 1.1,
37393
+ });
37394
+ const fullText = result[0].generated_text;
37395
+ const text = fullText.startsWith(prompt) ? fullText.slice(prompt.length) : fullText;
37396
+ return {
37397
+ id: crypto.randomUUID(),
37398
+ choices: [{
37399
+ text,
37400
+ finish_reason: 'stop',
37401
+ index: 0,
37402
+ logprobs: null
37403
+ }],
37404
+ model: this.modelId,
37405
+ object: 'text_completion',
37406
+ created: Math.floor(Date.now() / 1000),
37407
+ usage: { prompt_tokens: 0, completion_tokens: 0, total_tokens: 0 }
37408
+ };
37409
+ }
37410
+ }
37411
+ async *asyncGenerateStreamingCompletion(prompt, request) {
37412
+ const id = crypto.randomUUID();
37413
+ const created = Math.floor(Date.now() / 1000);
37414
+ const model = this.modelId;
37415
+ const queue = [];
37416
+ let isDone = false;
37417
+ const streamer = new undefined(this.generator.tokenizer, {
37418
+ skip_prompt: true,
37419
+ callback_function: (text) => { queue.push(text); },
37420
+ });
37421
+ this.generator(prompt, {
37422
+ max_new_tokens: request.max_tokens || 256,
37423
+ temperature: request.temperature || 0.7,
37424
+ streamer,
37425
+ }).finally(() => { isDone = true; });
37426
+ while (!isDone || queue.length > 0) {
37427
+ if (queue.length > 0) {
37428
+ yield {
37429
+ id,
37430
+ choices: [{ text: queue.shift(), finish_reason: null, index: 0 }],
37431
+ model,
37432
+ object: 'text_completion',
37433
+ created,
37434
+ };
37435
+ }
37436
+ else {
37437
+ await new Promise(r => setTimeout(r, 10));
37438
+ }
37439
+ }
36834
37440
  }
36835
- async embedding(_request) {
36836
- throw new Error("Embeddings not yet implemented in ONNXEngine fallback.");
37441
+ async embedding(request) {
37442
+ const input = Array.isArray(request.input) ? request.input : [request.input];
37443
+ const extractor = await pipeline('feature-extraction', this.modelId);
37444
+ const results = await Promise.all(input.map(text => extractor(text, { pooling: 'mean', normalize: true })));
37445
+ return {
37446
+ object: 'list',
37447
+ data: results.map((res, i) => ({
37448
+ object: 'embedding',
37449
+ index: i,
37450
+ embedding: Array.from(res.data)
37451
+ })),
37452
+ model: this.modelId,
37453
+ usage: { prompt_tokens: 0, total_tokens: 0, extra: {} }
37454
+ };
36837
37455
  }
36838
37456
  async runtimeStatsText() {
36839
37457
  return "Backend: ONNX Runtime (WASM/CPU Falback)";
@@ -38034,7 +38652,28 @@ class WebLLM {
38034
38652
  return list.map((m) => m.model_id);
38035
38653
  }
38036
38654
  async local_model_available(model_id) {
38037
- return await hasModelInCache(model_id);
38655
+ const isMLCCached = await hasModelInCache(model_id);
38656
+ if (isMLCCached)
38657
+ return true;
38658
+ // Check ONNX cache fallback
38659
+ const record = prebuiltAppConfig.model_list.find(m => m.model_id === model_id);
38660
+ if (record && record.onnx_id) {
38661
+ return await this.hasONNXInCache(record.onnx_id);
38662
+ }
38663
+ return false;
38664
+ }
38665
+ async hasONNXInCache(onnx_id) {
38666
+ if (typeof caches === 'undefined')
38667
+ return false;
38668
+ try {
38669
+ const cache = await caches.open('transformers-cache');
38670
+ const url = `https://huggingface.co/${onnx_id}/resolve/main/config.json`;
38671
+ const match = await cache.match(url);
38672
+ return !!match;
38673
+ }
38674
+ catch (e) {
38675
+ return false;
38676
+ }
38038
38677
  }
38039
38678
  async download_model(model_id, progressCallback) {
38040
38679
  // Initial feedback
@@ -38055,6 +38694,14 @@ class WebLLM {
38055
38694
  return this.downloadProgress[model_id] || "No progress available.";
38056
38695
  }
38057
38696
  async delete_model(model_id) {
38697
+ const record = prebuiltAppConfig.model_list.find(m => m.model_id === model_id);
38698
+ if (record && record.onnx_id) {
38699
+ // For ONNX, we currently clear the whole transformers-cache for simplicity
38700
+ // as individual file deletion is complex without a full manifest.
38701
+ if (typeof caches !== 'undefined') {
38702
+ await caches.delete('transformers-cache');
38703
+ }
38704
+ }
38058
38705
  await deleteModelAllInfoInCache(model_id);
38059
38706
  }
38060
38707
  // chat endpoints (Stateful)