@omote/core 0.1.2 → 0.2.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/dist/index.js CHANGED
@@ -5982,7 +5982,7 @@ async function load_video(src, { num_frames = null, fps = null } = {}) {
5982
5982
  video.remove();
5983
5983
  return new RawVideo(frames, duration);
5984
5984
  }
5985
- var ONNX_WEB, import_meta, __defProp2, __export2, emptyObj, node_fs_default, emptyObj2, node_path_default, emptyObj3, node_url_default, VERSION, IS_PROCESS_AVAILABLE, IS_NODE_ENV, IS_FS_AVAILABLE, IS_PATH_AVAILABLE, IS_DENO_RUNTIME, IS_BUN_RUNTIME, IS_BROWSER_ENV, IS_WEBWORKER_ENV, IS_WEB_CACHE_AVAILABLE, IS_WEBGPU_AVAILABLE, IS_WEBNN_AVAILABLE, isSafari, IS_SAFARI, apis, RUNNING_LOCALLY, dirname__, DEFAULT_CACHE_DIR, DEFAULT_LOCAL_MODEL_PATH, localModelPath, env2, Callable, CONTENT_TYPE_MAP, FileResponse, FileCache, ERROR_MAPPING, MAX_EXTERNAL_DATA_CHUNKS, REPO_ID_REGEX, P2FFT, NP2FFT, FFT, uint16_to_float32, onnxruntime_node_exports, noop, emptyObj4, onnxruntime_node_default, Readable, pipeline, createWriteStream, createReadStream, DEVICE_TO_EXECUTION_PROVIDER_MAPPING, LOG_LEVELS, DEFAULT_LOG_LEVEL, supportedDevices, defaultDevices, ONNX, ORT_SYMBOL, InferenceSession2, IS_WEB_ENV, webInitChain, wasmLoadPromise, webInferenceChain, ONNX_ENV, wrap, _a, TensorOpRegistry, DataTypeMap, Tensor22, arrayToIndexTensor, PriorityQueue, CharTrie, CharTrieNode, TokenLattice, TokenLatticeNode, DictionarySplitter, LRUCache, TOKEN_TYPES, Token, ORDERED_MAPPING_TABLE, ESCAPE_CHARACTERS, Statement, Program, If, For, Break, Continue, SetStatement, Macro, Comment, Expression, MemberExpression, CallExpression, Identifier, Literal, IntegerLiteral, FloatLiteral, StringLiteral, ArrayLiteral, TupleLiteral, ObjectLiteral, BinaryExpression, FilterExpression, FilterStatement, SelectExpression, TestExpression, UnaryExpression, SliceExpression, KeywordArgumentExpression, SpreadExpression, CallStatement, Ternary, BreakControl, ContinueControl, RuntimeValue, IntegerValue, FloatValue, StringValue, BooleanValue, ObjectValue, KeywordArgumentsValue, ArrayValue, TupleValue, FunctionValue, NullValue, UndefinedValue, Environment, Interpreter, NEWLINE, OPEN_STATEMENT, CLOSE_STATEMENT, Template, WHISPER_LANGUAGES, WHISPER_LANGUAGE_MAPPING, WHISPER_TO_LANGUAGE_CODE_MAPPING, PUNCTUATION_REGEX, PUNCTUATION_ONLY_REGEX, BLOOM_SPLIT_CHARS, PROBLEMATIC_REGEX_MAP, AddedToken, TokenizerModel, WordPieceTokenizer, Unigram, BYTES_TO_UNICODE, UNICODE_TO_BYTES, BPE, LegacyTokenizerModel, Normalizer, Replace, UnicodeNormalizer, NFC, NFD, NFKC, NFKD, StripNormalizer, StripAccents, Lowercase, Prepend, NormalizerSequence, BertNormalizer, PreTokenizer, BertPreTokenizer, ByteLevelPreTokenizer, SplitPreTokenizer, PunctuationPreTokenizer, DigitsPreTokenizer, PostProcessor, BertProcessing, RobertaProcessing, TemplateProcessing, ByteLevelPostProcessor, PostProcessorSequence, Decoder, ReplaceDecoder, ByteFallback, FuseDecoder, StripDecoder, WordPieceDecoder, ByteLevelDecoder, CTCDecoder, DecoderSequence, BPEDecoder, VitsDecoder, MetaspacePreTokenizer, MetaspaceDecoder, Precompiled, PreTokenizerSequence, WhitespacePreTokenizer, WhitespaceSplit, ReplacePreTokenizer, FixedLengthPreTokenizer, SPECIAL_TOKEN_ATTRIBUTES, PreTrainedTokenizer, TokenizersBackend, BertTokenizer, AlbertTokenizer, MobileBertTokenizer, SqueezeBertTokenizer, DebertaTokenizer, DebertaV2Tokenizer, HerbertTokenizer, ConvBertTokenizer, RoFormerTokenizer, DistilBertTokenizer, CamembertTokenizer, XLMTokenizer, ElectraTokenizer, T5Tokenizer, GPT2Tokenizer, BartTokenizer, MBartTokenizer, MBart50Tokenizer, RobertaTokenizer, BloomTokenizer, SPIECE_UNDERLINE, LlamaTokenizer, CodeLlamaTokenizer, XLMRobertaTokenizer, MPNetTokenizer, FalconTokenizer, GPTNeoXTokenizer, EsmTokenizer, Qwen2Tokenizer, GemmaTokenizer, Grok1Tokenizer, NllbTokenizer, M2M100Tokenizer, WhisperTokenizer, CodeGenTokenizer, CLIPTokenizer, SiglipTokenizer, MarianTokenizer, Wav2Vec2CTCTokenizer, BlenderbotTokenizer, BlenderbotSmallTokenizer, SpeechT5Tokenizer, NougatTokenizer, VitsTokenizer, CohereTokenizer, MgpstrTokenizer, Ernie4_5_Tokenizer, _a2, AutoTokenizer, GITHUB_ISSUE_URL, FEATURE_EXTRACTOR_NAME, IMAGE_PROCESSOR_NAME, PROCESSOR_NAME, CHAT_TEMPLATE_NAME, _a3, Processor, processors_exports, FeatureExtractor, feature_extractors_exports, noop2, Readable2, noop3, pipeline2, HERTZ_TO_MEL_MAPPING, MEL_TO_HERTZ_MAPPING, RawAudio, ASTFeatureExtractor, EncodecFeatureExtractor, ChatterboxFeatureExtractor, ClapFeatureExtractor, DacFeatureExtractor, Gemma3nAudioFeatureExtractor, MoonshineFeatureExtractor, EPSILON, ParakeetFeatureExtractor, PyAnnoteFeatureExtractor, SeamlessM4TFeatureExtractor, SnacFeatureExtractor, SpeechT5FeatureExtractor, Wav2Vec2FeatureExtractor, WeSpeakerFeatureExtractor, WhisperFeatureExtractor, emptyObj5, sharp_default, createCanvasFunction, ImageDataClass, loadImageFunction, IS_BROWSER_OR_WEBWORKER, RESAMPLING_MAPPING, CONTENT_TYPE_MAP2, RawImage, load_image, ImageProcessor, AutoFeatureExtractor, _a4, ChatterboxProcessor, image_processors_exports, BeitFeatureExtractor, BitImageProcessor, ChineseCLIPFeatureExtractor, CLIPImageProcessor, CLIPFeatureExtractor, ConvNextImageProcessor, ConvNextFeatureExtractor, DeiTImageProcessor, DeiTFeatureExtractor, DetrImageProcessor, DetrFeatureExtractor, DINOv3ViTImageProcessor, DonutImageProcessor, DonutFeatureExtractor, DPTImageProcessor, DPTFeatureExtractor, EfficientNetImageProcessor, GLPNFeatureExtractor, GroundingDinoImageProcessor, Idefics3ImageProcessor, VLMImageProcessor, JinaCLIPImageProcessor, LlavaOnevisionImageProcessor, MaskFormerImageProcessor, MaskFormerFeatureExtractor, Mask2FormerImageProcessor, MobileNetV1ImageProcessor, MobileNetV1FeatureExtractor, MobileNetV2ImageProcessor, MobileNetV2FeatureExtractor, MobileNetV3ImageProcessor, MobileNetV3FeatureExtractor, MobileNetV4ImageProcessor, MobileNetV4FeatureExtractor, MobileViTImageProcessor, MobileViTFeatureExtractor, NougatImageProcessor, OwlViTImageProcessor, OwlViTFeatureExtractor, Owlv2ImageProcessor, IMAGE_SIZE, SLICE_AXES, ceil, floor, sqrt, Phi3VImageProcessor, PvtImageProcessor, Qwen2VLImageProcessor, RTDetrImageProcessor, SamImageProcessor, SegformerImageProcessor, SegformerFeatureExtractor, SiglipImageProcessor, Swin2SRImageProcessor, ViTImageProcessor, ViTFeatureExtractor, VitMatteImageProcessor, VitPoseImageProcessor, YolosImageProcessor, YolosFeatureExtractor, AutoImageProcessor, _a5, Florence2Processor, _a6, Gemma3nProcessor, _a7, GroundingDinoProcessor, _a8, Idefics3Processor, _a9, VLChatProcessor, _a10, JinaCLIPProcessor, _a11, LlavaProcessor, DECODE_TYPE_MAPPING, _a12, MgpstrProcessor, _a13, MoonshineProcessor, _a14, OwlViTProcessor, IMAGE_TOKEN, IMAGE_TOKEN_PATTERN, _a15, Phi3VProcessor, IMAGE_TOKEN2, _a16, PaliGemmaProcessor, _a17, PyAnnoteProcessor, _a18, Qwen2VLProcessor, _a19, SamProcessor, Sam2Processor, Sam2VideoProcessor, _a20, SpeechT5Processor, _a21, UltravoxProcessor, AUDIO_TOKEN, BEGIN_AUDIO_TOKEN, NUM_AUDIO_TOKENS, _a22, VoxtralProcessor, _a23, Wav2Vec2Processor, _a24, Wav2Vec2ProcessorWithLM, _a25, WhisperProcessor, AutoProcessor, PretrainedConfig, AutoConfig, DEVICE_TYPES, isWebGpuFp16Supported, DATA_TYPES, DEFAULT_DEVICE_DTYPE_MAPPING, DEFAULT_DTYPE_SUFFIX_MAPPING, LogitsProcessor, LogitsWarper, LogitsProcessorList, ForcedBOSTokenLogitsProcessor, ForcedEOSTokenLogitsProcessor, SuppressTokensAtBeginLogitsProcessor, WhisperTimeStampLogitsProcessor, NoRepeatNGramLogitsProcessor, RepetitionPenaltyLogitsProcessor, MinLengthLogitsProcessor, MinNewTokensLengthLogitsProcessor, NoBadWordsLogitsProcessor, ClassifierFreeGuidanceLogitsProcessor, TemperatureLogitsWarper, TopPLogitsWarper, TopKLogitsWarper, GenerationConfig, StoppingCriteria, StoppingCriteriaList, MaxLengthCriteria, EosTokenCriteria, InterruptableStoppingCriteria, LogitsSampler, GreedySampler, MultinomialSampler, BeamSearchSampler, WhisperGenerationConfig, MODEL_TYPES, MODEL_TYPE_MAPPING, MODEL_NAME_TO_CLASS_MAPPING, MODEL_CLASS_TO_NAME_MAPPING, PreTrainedModel, ModelOutput, BaseModelOutput, BertPreTrainedModel, BertModel, BertForMaskedLM, BertForSequenceClassification, BertForTokenClassification, BertForQuestionAnswering, NeoBertPreTrainedModel, NeoBertModel, NeoBertForMaskedLM, NeoBertForSequenceClassification, NeoBertForTokenClassification, NeoBertForQuestionAnswering, ModernBertPreTrainedModel, ModernBertModel, ModernBertForMaskedLM, ModernBertForSequenceClassification, ModernBertForTokenClassification, ModernBertDecoderPreTrainedModel, ModernBertDecoderModel, ModernBertDecoderForCausalLM, NomicBertPreTrainedModel, NomicBertModel, RoFormerPreTrainedModel, RoFormerModel, RoFormerForMaskedLM, RoFormerForSequenceClassification, RoFormerForTokenClassification, RoFormerForQuestionAnswering, ConvBertPreTrainedModel, ConvBertModel, ConvBertForMaskedLM, ConvBertForSequenceClassification, ConvBertForTokenClassification, ConvBertForQuestionAnswering, ElectraPreTrainedModel, ElectraModel, ElectraForMaskedLM, ElectraForSequenceClassification, ElectraForTokenClassification, ElectraForQuestionAnswering, CamembertPreTrainedModel, CamembertModel, CamembertForMaskedLM, CamembertForSequenceClassification, CamembertForTokenClassification, CamembertForQuestionAnswering, DebertaPreTrainedModel, DebertaModel, DebertaForMaskedLM, DebertaForSequenceClassification, DebertaForTokenClassification, DebertaForQuestionAnswering, DebertaV2PreTrainedModel, DebertaV2Model, DebertaV2ForMaskedLM, DebertaV2ForSequenceClassification, DebertaV2ForTokenClassification, DebertaV2ForQuestionAnswering, DistilBertPreTrainedModel, DistilBertModel, DistilBertForSequenceClassification, DistilBertForTokenClassification, DistilBertForQuestionAnswering, DistilBertForMaskedLM, EsmPreTrainedModel, EsmModel, EsmForMaskedLM, EsmForSequenceClassification, EsmForTokenClassification, MobileBertPreTrainedModel, MobileBertModel, MobileBertForMaskedLM, MobileBertForSequenceClassification, MobileBertForQuestionAnswering, MPNetPreTrainedModel, MPNetModel, MPNetForMaskedLM, MPNetForSequenceClassification, MPNetForTokenClassification, MPNetForQuestionAnswering, SqueezeBertPreTrainedModel, SqueezeBertModel, SqueezeBertForMaskedLM, SqueezeBertForSequenceClassification, SqueezeBertForQuestionAnswering, AlbertPreTrainedModel, AlbertModel, AlbertForSequenceClassification, AlbertForQuestionAnswering, AlbertForMaskedLM, T5PreTrainedModel, T5Model, T5ForConditionalGeneration, LongT5PreTrainedModel, LongT5Model, LongT5ForConditionalGeneration, MT5PreTrainedModel, MT5Model, MT5ForConditionalGeneration, BartPretrainedModel, BartModel, BartForConditionalGeneration, BartForSequenceClassification, MBartPreTrainedModel, MBartModel, MBartForConditionalGeneration, MBartForSequenceClassification, MBartForCausalLM, BlenderbotPreTrainedModel, BlenderbotModel, BlenderbotForConditionalGeneration, BlenderbotSmallPreTrainedModel, BlenderbotSmallModel, BlenderbotSmallForConditionalGeneration, RobertaPreTrainedModel, RobertaModel, RobertaForMaskedLM, RobertaForSequenceClassification, RobertaForTokenClassification, RobertaForQuestionAnswering, XLMPreTrainedModel, XLMModel, XLMWithLMHeadModel, XLMForSequenceClassification, XLMForTokenClassification, XLMForQuestionAnswering, XLMRobertaPreTrainedModel, XLMRobertaModel, XLMRobertaForMaskedLM, XLMRobertaForSequenceClassification, XLMRobertaForTokenClassification, XLMRobertaForQuestionAnswering, ASTPreTrainedModel, ASTModel, ASTForAudioClassification, WhisperPreTrainedModel, WhisperModel, WhisperForConditionalGeneration, LiteWhisperForConditionalGeneration, MoonshinePreTrainedModel, MoonshineModel, MoonshineForConditionalGeneration, VisionEncoderDecoderModel, LlavaPreTrainedModel, LlavaForConditionalGeneration, LlavaOnevisionForConditionalGeneration, Moondream1ForConditionalGeneration, Florence2PreTrainedModel, Florence2ForConditionalGeneration, PaliGemmaPreTrainedModel, PaliGemmaForConditionalGeneration, LlavaQwen2ForCausalLM, Gemma3nPreTrainedModel, Gemma3nForConditionalGeneration, Idefics3PreTrainedModel, Idefics3ForConditionalGeneration, SmolVLMForConditionalGeneration, Phi3VPreTrainedModel, Phi3VForCausalLM, CLIPPreTrainedModel, CLIPModel, CLIPTextModel, CLIPTextModelWithProjection, CLIPVisionModel, CLIPVisionModelWithProjection, SiglipPreTrainedModel, SiglipModel, SiglipTextModel, SiglipVisionModel, ChineseCLIPPreTrainedModel, ChineseCLIPModel, JinaCLIPPreTrainedModel, JinaCLIPModel, JinaCLIPTextModel, JinaCLIPVisionModel, CLIPSegPreTrainedModel, CLIPSegModel, CLIPSegForImageSegmentation, GPT2PreTrainedModel, GPT2Model, GPT2LMHeadModel, GptOssPreTrainedModel, GptOssModel, GptOssForCausalLM, JAISPreTrainedModel, JAISModel, JAISLMHeadModel, GPTNeoPreTrainedModel, GPTNeoModel, GPTNeoForCausalLM, GPTNeoXPreTrainedModel, GPTNeoXModel, GPTNeoXForCausalLM, GPTJPreTrainedModel, GPTJModel, GPTJForCausalLM, GPTBigCodePreTrainedModel, GPTBigCodeModel, GPTBigCodeForCausalLM, CodeGenPreTrainedModel, CodeGenModel, CodeGenForCausalLM, LlamaPreTrainedModel, LlamaModel, LlamaForCausalLM, Llama4PreTrainedModel, Llama4ForCausalLM, NanoChatPreTrainedModel, NanoChatModel, NanoChatForCausalLM, ApertusPreTrainedModel, ApertusModel, ApertusForCausalLM, ArceePreTrainedModel, ArceeModel, ArceeForCausalLM, Lfm2PreTrainedModel, Lfm2Model, Lfm2ForCausalLM, SmolLM3PreTrainedModel, SmolLM3Model, SmolLM3ForCausalLM, HeliumPreTrainedModel, HeliumModel, HeliumForCausalLM, GlmPreTrainedModel, GlmModel, GlmForCausalLM, ExaonePreTrainedModel, ExaoneModel, ExaoneForCausalLM, MobileLLMPreTrainedModel, MobileLLMModel, MobileLLMForCausalLM, OlmoPreTrainedModel, OlmoModel, OlmoForCausalLM, Olmo2PreTrainedModel, Olmo2Model, Olmo2ForCausalLM, Olmo3PreTrainedModel, Olmo3Model, Olmo3ForCausalLM, GranitePreTrainedModel, GraniteModel, GraniteForCausalLM, GraniteMoeHybridPreTrainedModel, GraniteMoeHybridModel, GraniteMoeHybridForCausalLM, CoherePreTrainedModel, CohereModel, CohereForCausalLM, GemmaPreTrainedModel, GemmaModel, GemmaForCausalLM, Gemma2PreTrainedModel, Gemma2Model, Gemma2ForCausalLM, VaultGemmaPreTrainedModel, VaultGemmaModel, VaultGemmaForCausalLM, Gemma3PreTrainedModel, Gemma3Model, Gemma3ForCausalLM, OpenELMPreTrainedModel, OpenELMModel, OpenELMForCausalLM, Qwen2PreTrainedModel, Qwen2Model, Qwen2ForCausalLM, Qwen3PreTrainedModel, Qwen3Model, Qwen3ForCausalLM, Qwen2VLPreTrainedModel, Qwen2VLForConditionalGeneration, PhiPreTrainedModel, PhiModel, PhiForCausalLM, Phi3PreTrainedModel, Phi3Model, Phi3ForCausalLM, BloomPreTrainedModel, BloomModel, BloomForCausalLM, MptPreTrainedModel, MptModel, MptForCausalLM, OPTPreTrainedModel, OPTModel, OPTForCausalLM, ViTPreTrainedModel, ViTModel, ViTForImageClassification, IJepaPreTrainedModel, IJepaModel, IJepaForImageClassification, VitPosePreTrainedModel, VitPoseForPoseEstimation, PvtPreTrainedModel, PvtModel, PvtForImageClassification, ViTMAEPreTrainedModel, ViTMAEModel, ViTMSNPreTrainedModel, ViTMSNModel, ViTMSNForImageClassification, GroupViTPreTrainedModel, GroupViTModel, FastViTPreTrainedModel, FastViTModel, FastViTForImageClassification, VitMattePreTrainedModel, VitMatteForImageMatting, MobileViTPreTrainedModel, MobileViTModel, MobileViTForImageClassification, MobileViTV2PreTrainedModel, MobileViTV2Model, MobileViTV2ForImageClassification, OwlViTPreTrainedModel, OwlViTModel, OwlViTForObjectDetection, Owlv2PreTrainedModel, Owlv2Model, Owlv2ForObjectDetection, BeitPreTrainedModel, BeitModel, BeitForImageClassification, DetrPreTrainedModel, DetrModel, DetrForObjectDetection, DetrForSegmentation, DetrObjectDetectionOutput, DetrSegmentationOutput, RTDetrPreTrainedModel, RTDetrModel, RTDetrForObjectDetection, RTDetrObjectDetectionOutput, RTDetrV2PreTrainedModel, RTDetrV2Model, RTDetrV2ForObjectDetection, RTDetrV2ObjectDetectionOutput, RFDetrPreTrainedModel, RFDetrModel, RFDetrForObjectDetection, RFDetrObjectDetectionOutput, DFinePreTrainedModel, DFineModel, DFineForObjectDetection, TableTransformerPreTrainedModel, TableTransformerModel, TableTransformerForObjectDetection, TableTransformerObjectDetectionOutput, DeiTPreTrainedModel, DeiTModel, DeiTForImageClassification, HieraPreTrainedModel, HieraModel, HieraForImageClassification, ResNetPreTrainedModel, ResNetModel, ResNetForImageClassification, SwinPreTrainedModel, SwinModel, SwinForImageClassification, SwinForSemanticSegmentation, Swin2SRPreTrainedModel, Swin2SRModel, Swin2SRForImageSuperResolution, DPTPreTrainedModel, DPTModel, DPTForDepthEstimation, DepthAnythingPreTrainedModel, DepthAnythingForDepthEstimation, SapiensPreTrainedModel, SapiensForSemanticSegmentation, SapiensForDepthEstimation, SapiensForNormalEstimation, DepthProPreTrainedModel, DepthProForDepthEstimation, Metric3DPreTrainedModel, Metric3DForDepthEstimation, Metric3Dv2PreTrainedModel, Metric3Dv2ForDepthEstimation, MaskFormerPreTrainedModel, MaskFormerModel, MaskFormerForInstanceSegmentation, GLPNPreTrainedModel, GLPNModel, GLPNForDepthEstimation, DonutSwinPreTrainedModel, DonutSwinModel, ConvNextPreTrainedModel, ConvNextModel, ConvNextForImageClassification, ConvNextV2PreTrainedModel, ConvNextV2Model, ConvNextV2ForImageClassification, Dinov2PreTrainedModel, Dinov2Model, Dinov2ForImageClassification, Dinov2WithRegistersPreTrainedModel, Dinov2WithRegistersModel, Dinov2WithRegistersForImageClassification, DINOv3ViTPreTrainedModel, DINOv3ViTModel, DINOv3ConvNextPreTrainedModel, DINOv3ConvNextModel, GroundingDinoPreTrainedModel, GroundingDinoForObjectDetection, YolosPreTrainedModel, YolosModel, YolosForObjectDetection, YolosObjectDetectionOutput, SamPreTrainedModel, SamModel, SamImageSegmentationOutput, Sam2ImageSegmentationOutput, Sam2PreTrainedModel, Sam2Model, EdgeTamModel, Sam3TrackerModel, MarianPreTrainedModel, MarianModel, MarianMTModel, M2M100PreTrainedModel, M2M100Model, M2M100ForConditionalGeneration, Wav2Vec2PreTrainedModel, Wav2Vec2Model, Wav2Vec2ForCTC, Wav2Vec2ForSequenceClassification, Wav2Vec2ForAudioFrameClassification, ParakeetPreTrainedModel, ParakeetForCTC, PyAnnotePreTrainedModel, PyAnnoteModel, PyAnnoteForAudioFrameClassification, WeSpeakerResNetPreTrainedModel, WeSpeakerResNetModel, UniSpeechPreTrainedModel, UniSpeechModel, UniSpeechForCTC, UniSpeechForSequenceClassification, UniSpeechSatPreTrainedModel, UniSpeechSatModel, UniSpeechSatForCTC, UniSpeechSatForSequenceClassification, UniSpeechSatForAudioFrameClassification, Wav2Vec2BertPreTrainedModel, Wav2Vec2BertModel, Wav2Vec2BertForCTC, Wav2Vec2BertForSequenceClassification, HubertPreTrainedModel, HubertModel, HubertForCTC, HubertForSequenceClassification, WavLMPreTrainedModel, WavLMModel, WavLMForCTC, WavLMForSequenceClassification, WavLMForXVector, WavLMForAudioFrameClassification, StyleTextToSpeech2PreTrainedModel, StyleTextToSpeech2Model, SpeechT5PreTrainedModel, SpeechT5Model, SpeechT5ForSpeechToText, SpeechT5ForTextToSpeech, SpeechT5HifiGan, SupertonicPreTrainedModel, SupertonicForConditionalGeneration, TrOCRPreTrainedModel, TrOCRForCausalLM, MistralPreTrainedModel, MistralModel, MistralForCausalLM, Ernie4_5_PretrainedModel, Ernie4_5_Model, Ernie4_5_ForCausalLM, Starcoder2PreTrainedModel, Starcoder2Model, Starcoder2ForCausalLM, FalconPreTrainedModel, FalconModel, FalconForCausalLM, ClapPreTrainedModel, ClapModel, ClapTextModelWithProjection, ClapAudioModelWithProjection, VitsPreTrainedModel, VitsModel, SegformerPreTrainedModel, SegformerModel, SegformerForImageClassification, SegformerForSemanticSegmentation, StableLmPreTrainedModel, StableLmModel, StableLmForCausalLM, EfficientNetPreTrainedModel, EfficientNetModel, EfficientNetForImageClassification, MusicgenPreTrainedModel, MusicgenModel, MusicgenForCausalLM, MusicgenForConditionalGeneration, MobileNetV1PreTrainedModel, MobileNetV1Model, MobileNetV1ForImageClassification, MobileNetV1ForSemanticSegmentation, MobileNetV2PreTrainedModel, MobileNetV2Model, MobileNetV2ForImageClassification, MobileNetV2ForSemanticSegmentation, MobileNetV3PreTrainedModel, MobileNetV3Model, MobileNetV3ForImageClassification, MobileNetV3ForSemanticSegmentation, MobileNetV4PreTrainedModel, MobileNetV4Model, MobileNetV4ForImageClassification, MobileNetV4ForSemanticSegmentation, DecisionTransformerPreTrainedModel, DecisionTransformerModel, MultiModalityPreTrainedModel, MultiModalityCausalLM, MgpstrModelOutput, MgpstrPreTrainedModel, MgpstrForSceneTextRecognition, PatchTSTPreTrainedModel, PatchTSTModel, PatchTSTForPrediction, PatchTSMixerPreTrainedModel, PatchTSMixerModel, PatchTSMixerForPrediction, UltravoxPreTrainedModel, UltravoxModel, VoxtralForConditionalGeneration, MimiPreTrainedModel, MimiEncoderOutput, MimiDecoderOutput, MimiModel, MimiEncoderModel, MimiDecoderModel, DacPreTrainedModel, DacEncoderOutput, DacDecoderOutput, DacModel, DacEncoderModel, DacDecoderModel, SnacPreTrainedModel, SnacModel, SnacEncoderModel, SnacDecoderModel, ChatterboxPreTrainedModel, ChatterboxModel, _a26, PretrainedMixin, MODEL_MAPPING_NAMES_ENCODER_ONLY, MODEL_MAPPING_NAMES_ENCODER_DECODER, MODEL_MAPPING_NAMES_AUTO_ENCODER, MODEL_MAPPING_NAMES_DECODER_ONLY, MODEL_FOR_SPEECH_SEQ_2_SEQ_MAPPING_NAMES, MODEL_FOR_TEXT_TO_SPECTROGRAM_MAPPING_NAMES, MODEL_FOR_TEXT_TO_WAVEFORM_MAPPING_NAMES, MODEL_FOR_SEQUENCE_CLASSIFICATION_MAPPING_NAMES, MODEL_FOR_TOKEN_CLASSIFICATION_MAPPING_NAMES, MODEL_FOR_SEQ_TO_SEQ_CAUSAL_LM_MAPPING_NAMES, MODEL_FOR_CAUSAL_LM_MAPPING_NAMES, MODEL_FOR_MULTIMODALITY_MAPPING_NAMES, MODEL_FOR_MASKED_LM_MAPPING_NAMES, MODEL_FOR_QUESTION_ANSWERING_MAPPING_NAMES, MODEL_FOR_VISION_2_SEQ_MAPPING_NAMES, MODEL_FOR_IMAGE_TEXT_TO_TEXT_MAPPING_NAMES, MODEL_FOR_AUDIO_TEXT_TO_TEXT_MAPPING_NAMES, MODEL_FOR_DOCUMENT_QUESTION_ANSWERING_MAPPING_NAMES, MODEL_FOR_IMAGE_CLASSIFICATION_MAPPING_NAMES, MODEL_FOR_OBJECT_DETECTION_MAPPING_NAMES, MODEL_FOR_ZERO_SHOT_OBJECT_DETECTION_MAPPING_NAMES, MODEL_FOR_IMAGE_SEGMENTATION_MAPPING_NAMES, MODEL_FOR_SEMANTIC_SEGMENTATION_MAPPING_NAMES, MODEL_FOR_UNIVERSAL_SEGMENTATION_MAPPING_NAMES, MODEL_FOR_MASK_GENERATION_MAPPING_NAMES, MODEL_FOR_CTC_MAPPING_NAMES, MODEL_FOR_AUDIO_CLASSIFICATION_MAPPING_NAMES, MODEL_FOR_AUDIO_XVECTOR_MAPPING_NAMES, MODEL_FOR_AUDIO_FRAME_CLASSIFICATION_MAPPING_NAMES, MODEL_FOR_IMAGE_MATTING_MAPPING_NAMES, MODEL_FOR_TIME_SERIES_PREDICTION_MAPPING_NAMES, MODEL_FOR_IMAGE_TO_IMAGE_MAPPING_NAMES, MODEL_FOR_DEPTH_ESTIMATION_MAPPING_NAMES, MODEL_FOR_NORMAL_ESTIMATION_MAPPING_NAMES, MODEL_FOR_POSE_ESTIMATION_MAPPING_NAMES, MODEL_FOR_IMAGE_FEATURE_EXTRACTION_MAPPING_NAMES, MODEL_CLASS_TYPE_MAPPING, CUSTOM_MAPPING, CUSTOM_ARCHITECTURES, _a27, AutoModel, _a28, AutoModelForSequenceClassification, _a29, AutoModelForTokenClassification, _a30, AutoModelForSeq2SeqLM, _a31, AutoModelForSpeechSeq2Seq, _a32, AutoModelForTextToSpectrogram, _a33, AutoModelForTextToWaveform, _a34, AutoModelForCausalLM, _a35, AutoModelForMaskedLM, _a36, AutoModelForQuestionAnswering, _a37, AutoModelForVision2Seq, _a38, AutoModelForImageClassification, _a39, AutoModelForImageSegmentation, _a40, AutoModelForSemanticSegmentation, _a41, AutoModelForUniversalSegmentation, _a42, AutoModelForObjectDetection, _a43, AutoModelForZeroShotObjectDetection, _a44, AutoModelForMaskGeneration, _a45, AutoModelForCTC, _a46, AutoModelForAudioClassification, _a47, AutoModelForXVector, _a48, AutoModelForAudioFrameClassification, _a49, AutoModelForDocumentQuestionAnswering, _a50, AutoModelForImageMatting, _a51, AutoModelForImageToImage, _a52, AutoModelForDepthEstimation, _a53, AutoModelForNormalEstimation, _a54, AutoModelForPoseEstimation, _a55, AutoModelForImageFeatureExtraction, _a56, AutoModelForImageTextToText, _a57, AutoModelForAudioTextToText, Seq2SeqLMOutput, SequenceClassifierOutput, XVectorOutput, TokenClassifierOutput, MaskedLMOutput, QuestionAnsweringModelOutput, CausalLMOutput, CausalLMOutputWithPast, ImageMattingOutput, VitsModelOutput, Pipeline, TextClassificationPipeline, TokenClassificationPipeline, QuestionAnsweringPipeline, FillMaskPipeline, Text2TextGenerationPipeline, SummarizationPipeline, TranslationPipeline, TextGenerationPipeline, ZeroShotClassificationPipeline, AudioClassificationPipeline, ZeroShotAudioClassificationPipeline, AutomaticSpeechRecognitionPipeline, TextToAudioPipeline, ImageToTextPipeline, ImageClassificationPipeline, ImageSegmentationPipeline, BackgroundRemovalPipeline, ZeroShotImageClassificationPipeline, ObjectDetectionPipeline, ZeroShotObjectDetectionPipeline, DocumentQuestionAnsweringPipeline, ImageToImagePipeline, DepthEstimationPipeline, FeatureExtractionPipeline, ImageFeatureExtractionPipeline, SUPPORTED_TASKS, TASK_ALIASES, RawVideoFrame, RawVideo, BaseStreamer, stdout_write, TextStreamer, WhisperTextStreamer;
5985
+ var ONNX_WEB, import_meta, __defProp2, __export2, emptyObj, node_fs_default, emptyObj2, node_path_default, emptyObj3, node_url_default, VERSION, IS_PROCESS_AVAILABLE, IS_NODE_ENV, IS_FS_AVAILABLE, IS_PATH_AVAILABLE, IS_DENO_RUNTIME, IS_BUN_RUNTIME, IS_BROWSER_ENV, IS_WEBWORKER_ENV, IS_WEB_CACHE_AVAILABLE, IS_WEBGPU_AVAILABLE, IS_WEBNN_AVAILABLE, isSafari2, IS_SAFARI, apis, RUNNING_LOCALLY, dirname__, DEFAULT_CACHE_DIR, DEFAULT_LOCAL_MODEL_PATH, localModelPath, env2, Callable, CONTENT_TYPE_MAP, FileResponse, FileCache, ERROR_MAPPING, MAX_EXTERNAL_DATA_CHUNKS, REPO_ID_REGEX, P2FFT, NP2FFT, FFT, uint16_to_float32, onnxruntime_node_exports, noop, emptyObj4, onnxruntime_node_default, Readable, pipeline, createWriteStream, createReadStream, DEVICE_TO_EXECUTION_PROVIDER_MAPPING, LOG_LEVELS, DEFAULT_LOG_LEVEL, supportedDevices, defaultDevices, ONNX, ORT_SYMBOL, InferenceSession2, IS_WEB_ENV, webInitChain, wasmLoadPromise, webInferenceChain, ONNX_ENV, wrap, _a, TensorOpRegistry, DataTypeMap, Tensor22, arrayToIndexTensor, PriorityQueue, CharTrie, CharTrieNode, TokenLattice, TokenLatticeNode, DictionarySplitter, LRUCache, TOKEN_TYPES, Token, ORDERED_MAPPING_TABLE, ESCAPE_CHARACTERS, Statement, Program, If, For, Break, Continue, SetStatement, Macro, Comment, Expression, MemberExpression, CallExpression, Identifier, Literal, IntegerLiteral, FloatLiteral, StringLiteral, ArrayLiteral, TupleLiteral, ObjectLiteral, BinaryExpression, FilterExpression, FilterStatement, SelectExpression, TestExpression, UnaryExpression, SliceExpression, KeywordArgumentExpression, SpreadExpression, CallStatement, Ternary, BreakControl, ContinueControl, RuntimeValue, IntegerValue, FloatValue, StringValue, BooleanValue, ObjectValue, KeywordArgumentsValue, ArrayValue, TupleValue, FunctionValue, NullValue, UndefinedValue, Environment, Interpreter, NEWLINE, OPEN_STATEMENT, CLOSE_STATEMENT, Template, WHISPER_LANGUAGES, WHISPER_LANGUAGE_MAPPING, WHISPER_TO_LANGUAGE_CODE_MAPPING, PUNCTUATION_REGEX, PUNCTUATION_ONLY_REGEX, BLOOM_SPLIT_CHARS, PROBLEMATIC_REGEX_MAP, AddedToken, TokenizerModel, WordPieceTokenizer, Unigram, BYTES_TO_UNICODE, UNICODE_TO_BYTES, BPE, LegacyTokenizerModel, Normalizer, Replace, UnicodeNormalizer, NFC, NFD, NFKC, NFKD, StripNormalizer, StripAccents, Lowercase, Prepend, NormalizerSequence, BertNormalizer, PreTokenizer, BertPreTokenizer, ByteLevelPreTokenizer, SplitPreTokenizer, PunctuationPreTokenizer, DigitsPreTokenizer, PostProcessor, BertProcessing, RobertaProcessing, TemplateProcessing, ByteLevelPostProcessor, PostProcessorSequence, Decoder, ReplaceDecoder, ByteFallback, FuseDecoder, StripDecoder, WordPieceDecoder, ByteLevelDecoder, CTCDecoder, DecoderSequence, BPEDecoder, VitsDecoder, MetaspacePreTokenizer, MetaspaceDecoder, Precompiled, PreTokenizerSequence, WhitespacePreTokenizer, WhitespaceSplit, ReplacePreTokenizer, FixedLengthPreTokenizer, SPECIAL_TOKEN_ATTRIBUTES, PreTrainedTokenizer, TokenizersBackend, BertTokenizer, AlbertTokenizer, MobileBertTokenizer, SqueezeBertTokenizer, DebertaTokenizer, DebertaV2Tokenizer, HerbertTokenizer, ConvBertTokenizer, RoFormerTokenizer, DistilBertTokenizer, CamembertTokenizer, XLMTokenizer, ElectraTokenizer, T5Tokenizer, GPT2Tokenizer, BartTokenizer, MBartTokenizer, MBart50Tokenizer, RobertaTokenizer, BloomTokenizer, SPIECE_UNDERLINE, LlamaTokenizer, CodeLlamaTokenizer, XLMRobertaTokenizer, MPNetTokenizer, FalconTokenizer, GPTNeoXTokenizer, EsmTokenizer, Qwen2Tokenizer, GemmaTokenizer, Grok1Tokenizer, NllbTokenizer, M2M100Tokenizer, WhisperTokenizer, CodeGenTokenizer, CLIPTokenizer, SiglipTokenizer, MarianTokenizer, Wav2Vec2CTCTokenizer, BlenderbotTokenizer, BlenderbotSmallTokenizer, SpeechT5Tokenizer, NougatTokenizer, VitsTokenizer, CohereTokenizer, MgpstrTokenizer, Ernie4_5_Tokenizer, _a2, AutoTokenizer, GITHUB_ISSUE_URL, FEATURE_EXTRACTOR_NAME, IMAGE_PROCESSOR_NAME, PROCESSOR_NAME, CHAT_TEMPLATE_NAME, _a3, Processor, processors_exports, FeatureExtractor, feature_extractors_exports, noop2, Readable2, noop3, pipeline2, HERTZ_TO_MEL_MAPPING, MEL_TO_HERTZ_MAPPING, RawAudio, ASTFeatureExtractor, EncodecFeatureExtractor, ChatterboxFeatureExtractor, ClapFeatureExtractor, DacFeatureExtractor, Gemma3nAudioFeatureExtractor, MoonshineFeatureExtractor, EPSILON, ParakeetFeatureExtractor, PyAnnoteFeatureExtractor, SeamlessM4TFeatureExtractor, SnacFeatureExtractor, SpeechT5FeatureExtractor, Wav2Vec2FeatureExtractor, WeSpeakerFeatureExtractor, WhisperFeatureExtractor, emptyObj5, sharp_default, createCanvasFunction, ImageDataClass, loadImageFunction, IS_BROWSER_OR_WEBWORKER, RESAMPLING_MAPPING, CONTENT_TYPE_MAP2, RawImage, load_image, ImageProcessor, AutoFeatureExtractor, _a4, ChatterboxProcessor, image_processors_exports, BeitFeatureExtractor, BitImageProcessor, ChineseCLIPFeatureExtractor, CLIPImageProcessor, CLIPFeatureExtractor, ConvNextImageProcessor, ConvNextFeatureExtractor, DeiTImageProcessor, DeiTFeatureExtractor, DetrImageProcessor, DetrFeatureExtractor, DINOv3ViTImageProcessor, DonutImageProcessor, DonutFeatureExtractor, DPTImageProcessor, DPTFeatureExtractor, EfficientNetImageProcessor, GLPNFeatureExtractor, GroundingDinoImageProcessor, Idefics3ImageProcessor, VLMImageProcessor, JinaCLIPImageProcessor, LlavaOnevisionImageProcessor, MaskFormerImageProcessor, MaskFormerFeatureExtractor, Mask2FormerImageProcessor, MobileNetV1ImageProcessor, MobileNetV1FeatureExtractor, MobileNetV2ImageProcessor, MobileNetV2FeatureExtractor, MobileNetV3ImageProcessor, MobileNetV3FeatureExtractor, MobileNetV4ImageProcessor, MobileNetV4FeatureExtractor, MobileViTImageProcessor, MobileViTFeatureExtractor, NougatImageProcessor, OwlViTImageProcessor, OwlViTFeatureExtractor, Owlv2ImageProcessor, IMAGE_SIZE, SLICE_AXES, ceil, floor, sqrt, Phi3VImageProcessor, PvtImageProcessor, Qwen2VLImageProcessor, RTDetrImageProcessor, SamImageProcessor, SegformerImageProcessor, SegformerFeatureExtractor, SiglipImageProcessor, Swin2SRImageProcessor, ViTImageProcessor, ViTFeatureExtractor, VitMatteImageProcessor, VitPoseImageProcessor, YolosImageProcessor, YolosFeatureExtractor, AutoImageProcessor, _a5, Florence2Processor, _a6, Gemma3nProcessor, _a7, GroundingDinoProcessor, _a8, Idefics3Processor, _a9, VLChatProcessor, _a10, JinaCLIPProcessor, _a11, LlavaProcessor, DECODE_TYPE_MAPPING, _a12, MgpstrProcessor, _a13, MoonshineProcessor, _a14, OwlViTProcessor, IMAGE_TOKEN, IMAGE_TOKEN_PATTERN, _a15, Phi3VProcessor, IMAGE_TOKEN2, _a16, PaliGemmaProcessor, _a17, PyAnnoteProcessor, _a18, Qwen2VLProcessor, _a19, SamProcessor, Sam2Processor, Sam2VideoProcessor, _a20, SpeechT5Processor, _a21, UltravoxProcessor, AUDIO_TOKEN, BEGIN_AUDIO_TOKEN, NUM_AUDIO_TOKENS, _a22, VoxtralProcessor, _a23, Wav2Vec2Processor, _a24, Wav2Vec2ProcessorWithLM, _a25, WhisperProcessor, AutoProcessor, PretrainedConfig, AutoConfig, DEVICE_TYPES, isWebGpuFp16Supported, DATA_TYPES, DEFAULT_DEVICE_DTYPE_MAPPING, DEFAULT_DTYPE_SUFFIX_MAPPING, LogitsProcessor, LogitsWarper, LogitsProcessorList, ForcedBOSTokenLogitsProcessor, ForcedEOSTokenLogitsProcessor, SuppressTokensAtBeginLogitsProcessor, WhisperTimeStampLogitsProcessor, NoRepeatNGramLogitsProcessor, RepetitionPenaltyLogitsProcessor, MinLengthLogitsProcessor, MinNewTokensLengthLogitsProcessor, NoBadWordsLogitsProcessor, ClassifierFreeGuidanceLogitsProcessor, TemperatureLogitsWarper, TopPLogitsWarper, TopKLogitsWarper, GenerationConfig, StoppingCriteria, StoppingCriteriaList, MaxLengthCriteria, EosTokenCriteria, InterruptableStoppingCriteria, LogitsSampler, GreedySampler, MultinomialSampler, BeamSearchSampler, WhisperGenerationConfig, MODEL_TYPES, MODEL_TYPE_MAPPING, MODEL_NAME_TO_CLASS_MAPPING, MODEL_CLASS_TO_NAME_MAPPING, PreTrainedModel, ModelOutput, BaseModelOutput, BertPreTrainedModel, BertModel, BertForMaskedLM, BertForSequenceClassification, BertForTokenClassification, BertForQuestionAnswering, NeoBertPreTrainedModel, NeoBertModel, NeoBertForMaskedLM, NeoBertForSequenceClassification, NeoBertForTokenClassification, NeoBertForQuestionAnswering, ModernBertPreTrainedModel, ModernBertModel, ModernBertForMaskedLM, ModernBertForSequenceClassification, ModernBertForTokenClassification, ModernBertDecoderPreTrainedModel, ModernBertDecoderModel, ModernBertDecoderForCausalLM, NomicBertPreTrainedModel, NomicBertModel, RoFormerPreTrainedModel, RoFormerModel, RoFormerForMaskedLM, RoFormerForSequenceClassification, RoFormerForTokenClassification, RoFormerForQuestionAnswering, ConvBertPreTrainedModel, ConvBertModel, ConvBertForMaskedLM, ConvBertForSequenceClassification, ConvBertForTokenClassification, ConvBertForQuestionAnswering, ElectraPreTrainedModel, ElectraModel, ElectraForMaskedLM, ElectraForSequenceClassification, ElectraForTokenClassification, ElectraForQuestionAnswering, CamembertPreTrainedModel, CamembertModel, CamembertForMaskedLM, CamembertForSequenceClassification, CamembertForTokenClassification, CamembertForQuestionAnswering, DebertaPreTrainedModel, DebertaModel, DebertaForMaskedLM, DebertaForSequenceClassification, DebertaForTokenClassification, DebertaForQuestionAnswering, DebertaV2PreTrainedModel, DebertaV2Model, DebertaV2ForMaskedLM, DebertaV2ForSequenceClassification, DebertaV2ForTokenClassification, DebertaV2ForQuestionAnswering, DistilBertPreTrainedModel, DistilBertModel, DistilBertForSequenceClassification, DistilBertForTokenClassification, DistilBertForQuestionAnswering, DistilBertForMaskedLM, EsmPreTrainedModel, EsmModel, EsmForMaskedLM, EsmForSequenceClassification, EsmForTokenClassification, MobileBertPreTrainedModel, MobileBertModel, MobileBertForMaskedLM, MobileBertForSequenceClassification, MobileBertForQuestionAnswering, MPNetPreTrainedModel, MPNetModel, MPNetForMaskedLM, MPNetForSequenceClassification, MPNetForTokenClassification, MPNetForQuestionAnswering, SqueezeBertPreTrainedModel, SqueezeBertModel, SqueezeBertForMaskedLM, SqueezeBertForSequenceClassification, SqueezeBertForQuestionAnswering, AlbertPreTrainedModel, AlbertModel, AlbertForSequenceClassification, AlbertForQuestionAnswering, AlbertForMaskedLM, T5PreTrainedModel, T5Model, T5ForConditionalGeneration, LongT5PreTrainedModel, LongT5Model, LongT5ForConditionalGeneration, MT5PreTrainedModel, MT5Model, MT5ForConditionalGeneration, BartPretrainedModel, BartModel, BartForConditionalGeneration, BartForSequenceClassification, MBartPreTrainedModel, MBartModel, MBartForConditionalGeneration, MBartForSequenceClassification, MBartForCausalLM, BlenderbotPreTrainedModel, BlenderbotModel, BlenderbotForConditionalGeneration, BlenderbotSmallPreTrainedModel, BlenderbotSmallModel, BlenderbotSmallForConditionalGeneration, RobertaPreTrainedModel, RobertaModel, RobertaForMaskedLM, RobertaForSequenceClassification, RobertaForTokenClassification, RobertaForQuestionAnswering, XLMPreTrainedModel, XLMModel, XLMWithLMHeadModel, XLMForSequenceClassification, XLMForTokenClassification, XLMForQuestionAnswering, XLMRobertaPreTrainedModel, XLMRobertaModel, XLMRobertaForMaskedLM, XLMRobertaForSequenceClassification, XLMRobertaForTokenClassification, XLMRobertaForQuestionAnswering, ASTPreTrainedModel, ASTModel, ASTForAudioClassification, WhisperPreTrainedModel, WhisperModel, WhisperForConditionalGeneration, LiteWhisperForConditionalGeneration, MoonshinePreTrainedModel, MoonshineModel, MoonshineForConditionalGeneration, VisionEncoderDecoderModel, LlavaPreTrainedModel, LlavaForConditionalGeneration, LlavaOnevisionForConditionalGeneration, Moondream1ForConditionalGeneration, Florence2PreTrainedModel, Florence2ForConditionalGeneration, PaliGemmaPreTrainedModel, PaliGemmaForConditionalGeneration, LlavaQwen2ForCausalLM, Gemma3nPreTrainedModel, Gemma3nForConditionalGeneration, Idefics3PreTrainedModel, Idefics3ForConditionalGeneration, SmolVLMForConditionalGeneration, Phi3VPreTrainedModel, Phi3VForCausalLM, CLIPPreTrainedModel, CLIPModel, CLIPTextModel, CLIPTextModelWithProjection, CLIPVisionModel, CLIPVisionModelWithProjection, SiglipPreTrainedModel, SiglipModel, SiglipTextModel, SiglipVisionModel, ChineseCLIPPreTrainedModel, ChineseCLIPModel, JinaCLIPPreTrainedModel, JinaCLIPModel, JinaCLIPTextModel, JinaCLIPVisionModel, CLIPSegPreTrainedModel, CLIPSegModel, CLIPSegForImageSegmentation, GPT2PreTrainedModel, GPT2Model, GPT2LMHeadModel, GptOssPreTrainedModel, GptOssModel, GptOssForCausalLM, JAISPreTrainedModel, JAISModel, JAISLMHeadModel, GPTNeoPreTrainedModel, GPTNeoModel, GPTNeoForCausalLM, GPTNeoXPreTrainedModel, GPTNeoXModel, GPTNeoXForCausalLM, GPTJPreTrainedModel, GPTJModel, GPTJForCausalLM, GPTBigCodePreTrainedModel, GPTBigCodeModel, GPTBigCodeForCausalLM, CodeGenPreTrainedModel, CodeGenModel, CodeGenForCausalLM, LlamaPreTrainedModel, LlamaModel, LlamaForCausalLM, Llama4PreTrainedModel, Llama4ForCausalLM, NanoChatPreTrainedModel, NanoChatModel, NanoChatForCausalLM, ApertusPreTrainedModel, ApertusModel, ApertusForCausalLM, ArceePreTrainedModel, ArceeModel, ArceeForCausalLM, Lfm2PreTrainedModel, Lfm2Model, Lfm2ForCausalLM, SmolLM3PreTrainedModel, SmolLM3Model, SmolLM3ForCausalLM, HeliumPreTrainedModel, HeliumModel, HeliumForCausalLM, GlmPreTrainedModel, GlmModel, GlmForCausalLM, ExaonePreTrainedModel, ExaoneModel, ExaoneForCausalLM, MobileLLMPreTrainedModel, MobileLLMModel, MobileLLMForCausalLM, OlmoPreTrainedModel, OlmoModel, OlmoForCausalLM, Olmo2PreTrainedModel, Olmo2Model, Olmo2ForCausalLM, Olmo3PreTrainedModel, Olmo3Model, Olmo3ForCausalLM, GranitePreTrainedModel, GraniteModel, GraniteForCausalLM, GraniteMoeHybridPreTrainedModel, GraniteMoeHybridModel, GraniteMoeHybridForCausalLM, CoherePreTrainedModel, CohereModel, CohereForCausalLM, GemmaPreTrainedModel, GemmaModel, GemmaForCausalLM, Gemma2PreTrainedModel, Gemma2Model, Gemma2ForCausalLM, VaultGemmaPreTrainedModel, VaultGemmaModel, VaultGemmaForCausalLM, Gemma3PreTrainedModel, Gemma3Model, Gemma3ForCausalLM, OpenELMPreTrainedModel, OpenELMModel, OpenELMForCausalLM, Qwen2PreTrainedModel, Qwen2Model, Qwen2ForCausalLM, Qwen3PreTrainedModel, Qwen3Model, Qwen3ForCausalLM, Qwen2VLPreTrainedModel, Qwen2VLForConditionalGeneration, PhiPreTrainedModel, PhiModel, PhiForCausalLM, Phi3PreTrainedModel, Phi3Model, Phi3ForCausalLM, BloomPreTrainedModel, BloomModel, BloomForCausalLM, MptPreTrainedModel, MptModel, MptForCausalLM, OPTPreTrainedModel, OPTModel, OPTForCausalLM, ViTPreTrainedModel, ViTModel, ViTForImageClassification, IJepaPreTrainedModel, IJepaModel, IJepaForImageClassification, VitPosePreTrainedModel, VitPoseForPoseEstimation, PvtPreTrainedModel, PvtModel, PvtForImageClassification, ViTMAEPreTrainedModel, ViTMAEModel, ViTMSNPreTrainedModel, ViTMSNModel, ViTMSNForImageClassification, GroupViTPreTrainedModel, GroupViTModel, FastViTPreTrainedModel, FastViTModel, FastViTForImageClassification, VitMattePreTrainedModel, VitMatteForImageMatting, MobileViTPreTrainedModel, MobileViTModel, MobileViTForImageClassification, MobileViTV2PreTrainedModel, MobileViTV2Model, MobileViTV2ForImageClassification, OwlViTPreTrainedModel, OwlViTModel, OwlViTForObjectDetection, Owlv2PreTrainedModel, Owlv2Model, Owlv2ForObjectDetection, BeitPreTrainedModel, BeitModel, BeitForImageClassification, DetrPreTrainedModel, DetrModel, DetrForObjectDetection, DetrForSegmentation, DetrObjectDetectionOutput, DetrSegmentationOutput, RTDetrPreTrainedModel, RTDetrModel, RTDetrForObjectDetection, RTDetrObjectDetectionOutput, RTDetrV2PreTrainedModel, RTDetrV2Model, RTDetrV2ForObjectDetection, RTDetrV2ObjectDetectionOutput, RFDetrPreTrainedModel, RFDetrModel, RFDetrForObjectDetection, RFDetrObjectDetectionOutput, DFinePreTrainedModel, DFineModel, DFineForObjectDetection, TableTransformerPreTrainedModel, TableTransformerModel, TableTransformerForObjectDetection, TableTransformerObjectDetectionOutput, DeiTPreTrainedModel, DeiTModel, DeiTForImageClassification, HieraPreTrainedModel, HieraModel, HieraForImageClassification, ResNetPreTrainedModel, ResNetModel, ResNetForImageClassification, SwinPreTrainedModel, SwinModel, SwinForImageClassification, SwinForSemanticSegmentation, Swin2SRPreTrainedModel, Swin2SRModel, Swin2SRForImageSuperResolution, DPTPreTrainedModel, DPTModel, DPTForDepthEstimation, DepthAnythingPreTrainedModel, DepthAnythingForDepthEstimation, SapiensPreTrainedModel, SapiensForSemanticSegmentation, SapiensForDepthEstimation, SapiensForNormalEstimation, DepthProPreTrainedModel, DepthProForDepthEstimation, Metric3DPreTrainedModel, Metric3DForDepthEstimation, Metric3Dv2PreTrainedModel, Metric3Dv2ForDepthEstimation, MaskFormerPreTrainedModel, MaskFormerModel, MaskFormerForInstanceSegmentation, GLPNPreTrainedModel, GLPNModel, GLPNForDepthEstimation, DonutSwinPreTrainedModel, DonutSwinModel, ConvNextPreTrainedModel, ConvNextModel, ConvNextForImageClassification, ConvNextV2PreTrainedModel, ConvNextV2Model, ConvNextV2ForImageClassification, Dinov2PreTrainedModel, Dinov2Model, Dinov2ForImageClassification, Dinov2WithRegistersPreTrainedModel, Dinov2WithRegistersModel, Dinov2WithRegistersForImageClassification, DINOv3ViTPreTrainedModel, DINOv3ViTModel, DINOv3ConvNextPreTrainedModel, DINOv3ConvNextModel, GroundingDinoPreTrainedModel, GroundingDinoForObjectDetection, YolosPreTrainedModel, YolosModel, YolosForObjectDetection, YolosObjectDetectionOutput, SamPreTrainedModel, SamModel, SamImageSegmentationOutput, Sam2ImageSegmentationOutput, Sam2PreTrainedModel, Sam2Model, EdgeTamModel, Sam3TrackerModel, MarianPreTrainedModel, MarianModel, MarianMTModel, M2M100PreTrainedModel, M2M100Model, M2M100ForConditionalGeneration, Wav2Vec2PreTrainedModel, Wav2Vec2Model, Wav2Vec2ForCTC, Wav2Vec2ForSequenceClassification, Wav2Vec2ForAudioFrameClassification, ParakeetPreTrainedModel, ParakeetForCTC, PyAnnotePreTrainedModel, PyAnnoteModel, PyAnnoteForAudioFrameClassification, WeSpeakerResNetPreTrainedModel, WeSpeakerResNetModel, UniSpeechPreTrainedModel, UniSpeechModel, UniSpeechForCTC, UniSpeechForSequenceClassification, UniSpeechSatPreTrainedModel, UniSpeechSatModel, UniSpeechSatForCTC, UniSpeechSatForSequenceClassification, UniSpeechSatForAudioFrameClassification, Wav2Vec2BertPreTrainedModel, Wav2Vec2BertModel, Wav2Vec2BertForCTC, Wav2Vec2BertForSequenceClassification, HubertPreTrainedModel, HubertModel, HubertForCTC, HubertForSequenceClassification, WavLMPreTrainedModel, WavLMModel, WavLMForCTC, WavLMForSequenceClassification, WavLMForXVector, WavLMForAudioFrameClassification, StyleTextToSpeech2PreTrainedModel, StyleTextToSpeech2Model, SpeechT5PreTrainedModel, SpeechT5Model, SpeechT5ForSpeechToText, SpeechT5ForTextToSpeech, SpeechT5HifiGan, SupertonicPreTrainedModel, SupertonicForConditionalGeneration, TrOCRPreTrainedModel, TrOCRForCausalLM, MistralPreTrainedModel, MistralModel, MistralForCausalLM, Ernie4_5_PretrainedModel, Ernie4_5_Model, Ernie4_5_ForCausalLM, Starcoder2PreTrainedModel, Starcoder2Model, Starcoder2ForCausalLM, FalconPreTrainedModel, FalconModel, FalconForCausalLM, ClapPreTrainedModel, ClapModel, ClapTextModelWithProjection, ClapAudioModelWithProjection, VitsPreTrainedModel, VitsModel, SegformerPreTrainedModel, SegformerModel, SegformerForImageClassification, SegformerForSemanticSegmentation, StableLmPreTrainedModel, StableLmModel, StableLmForCausalLM, EfficientNetPreTrainedModel, EfficientNetModel, EfficientNetForImageClassification, MusicgenPreTrainedModel, MusicgenModel, MusicgenForCausalLM, MusicgenForConditionalGeneration, MobileNetV1PreTrainedModel, MobileNetV1Model, MobileNetV1ForImageClassification, MobileNetV1ForSemanticSegmentation, MobileNetV2PreTrainedModel, MobileNetV2Model, MobileNetV2ForImageClassification, MobileNetV2ForSemanticSegmentation, MobileNetV3PreTrainedModel, MobileNetV3Model, MobileNetV3ForImageClassification, MobileNetV3ForSemanticSegmentation, MobileNetV4PreTrainedModel, MobileNetV4Model, MobileNetV4ForImageClassification, MobileNetV4ForSemanticSegmentation, DecisionTransformerPreTrainedModel, DecisionTransformerModel, MultiModalityPreTrainedModel, MultiModalityCausalLM, MgpstrModelOutput, MgpstrPreTrainedModel, MgpstrForSceneTextRecognition, PatchTSTPreTrainedModel, PatchTSTModel, PatchTSTForPrediction, PatchTSMixerPreTrainedModel, PatchTSMixerModel, PatchTSMixerForPrediction, UltravoxPreTrainedModel, UltravoxModel, VoxtralForConditionalGeneration, MimiPreTrainedModel, MimiEncoderOutput, MimiDecoderOutput, MimiModel, MimiEncoderModel, MimiDecoderModel, DacPreTrainedModel, DacEncoderOutput, DacDecoderOutput, DacModel, DacEncoderModel, DacDecoderModel, SnacPreTrainedModel, SnacModel, SnacEncoderModel, SnacDecoderModel, ChatterboxPreTrainedModel, ChatterboxModel, _a26, PretrainedMixin, MODEL_MAPPING_NAMES_ENCODER_ONLY, MODEL_MAPPING_NAMES_ENCODER_DECODER, MODEL_MAPPING_NAMES_AUTO_ENCODER, MODEL_MAPPING_NAMES_DECODER_ONLY, MODEL_FOR_SPEECH_SEQ_2_SEQ_MAPPING_NAMES, MODEL_FOR_TEXT_TO_SPECTROGRAM_MAPPING_NAMES, MODEL_FOR_TEXT_TO_WAVEFORM_MAPPING_NAMES, MODEL_FOR_SEQUENCE_CLASSIFICATION_MAPPING_NAMES, MODEL_FOR_TOKEN_CLASSIFICATION_MAPPING_NAMES, MODEL_FOR_SEQ_TO_SEQ_CAUSAL_LM_MAPPING_NAMES, MODEL_FOR_CAUSAL_LM_MAPPING_NAMES, MODEL_FOR_MULTIMODALITY_MAPPING_NAMES, MODEL_FOR_MASKED_LM_MAPPING_NAMES, MODEL_FOR_QUESTION_ANSWERING_MAPPING_NAMES, MODEL_FOR_VISION_2_SEQ_MAPPING_NAMES, MODEL_FOR_IMAGE_TEXT_TO_TEXT_MAPPING_NAMES, MODEL_FOR_AUDIO_TEXT_TO_TEXT_MAPPING_NAMES, MODEL_FOR_DOCUMENT_QUESTION_ANSWERING_MAPPING_NAMES, MODEL_FOR_IMAGE_CLASSIFICATION_MAPPING_NAMES, MODEL_FOR_OBJECT_DETECTION_MAPPING_NAMES, MODEL_FOR_ZERO_SHOT_OBJECT_DETECTION_MAPPING_NAMES, MODEL_FOR_IMAGE_SEGMENTATION_MAPPING_NAMES, MODEL_FOR_SEMANTIC_SEGMENTATION_MAPPING_NAMES, MODEL_FOR_UNIVERSAL_SEGMENTATION_MAPPING_NAMES, MODEL_FOR_MASK_GENERATION_MAPPING_NAMES, MODEL_FOR_CTC_MAPPING_NAMES, MODEL_FOR_AUDIO_CLASSIFICATION_MAPPING_NAMES, MODEL_FOR_AUDIO_XVECTOR_MAPPING_NAMES, MODEL_FOR_AUDIO_FRAME_CLASSIFICATION_MAPPING_NAMES, MODEL_FOR_IMAGE_MATTING_MAPPING_NAMES, MODEL_FOR_TIME_SERIES_PREDICTION_MAPPING_NAMES, MODEL_FOR_IMAGE_TO_IMAGE_MAPPING_NAMES, MODEL_FOR_DEPTH_ESTIMATION_MAPPING_NAMES, MODEL_FOR_NORMAL_ESTIMATION_MAPPING_NAMES, MODEL_FOR_POSE_ESTIMATION_MAPPING_NAMES, MODEL_FOR_IMAGE_FEATURE_EXTRACTION_MAPPING_NAMES, MODEL_CLASS_TYPE_MAPPING, CUSTOM_MAPPING, CUSTOM_ARCHITECTURES, _a27, AutoModel, _a28, AutoModelForSequenceClassification, _a29, AutoModelForTokenClassification, _a30, AutoModelForSeq2SeqLM, _a31, AutoModelForSpeechSeq2Seq, _a32, AutoModelForTextToSpectrogram, _a33, AutoModelForTextToWaveform, _a34, AutoModelForCausalLM, _a35, AutoModelForMaskedLM, _a36, AutoModelForQuestionAnswering, _a37, AutoModelForVision2Seq, _a38, AutoModelForImageClassification, _a39, AutoModelForImageSegmentation, _a40, AutoModelForSemanticSegmentation, _a41, AutoModelForUniversalSegmentation, _a42, AutoModelForObjectDetection, _a43, AutoModelForZeroShotObjectDetection, _a44, AutoModelForMaskGeneration, _a45, AutoModelForCTC, _a46, AutoModelForAudioClassification, _a47, AutoModelForXVector, _a48, AutoModelForAudioFrameClassification, _a49, AutoModelForDocumentQuestionAnswering, _a50, AutoModelForImageMatting, _a51, AutoModelForImageToImage, _a52, AutoModelForDepthEstimation, _a53, AutoModelForNormalEstimation, _a54, AutoModelForPoseEstimation, _a55, AutoModelForImageFeatureExtraction, _a56, AutoModelForImageTextToText, _a57, AutoModelForAudioTextToText, Seq2SeqLMOutput, SequenceClassifierOutput, XVectorOutput, TokenClassifierOutput, MaskedLMOutput, QuestionAnsweringModelOutput, CausalLMOutput, CausalLMOutputWithPast, ImageMattingOutput, VitsModelOutput, Pipeline, TextClassificationPipeline, TokenClassificationPipeline, QuestionAnsweringPipeline, FillMaskPipeline, Text2TextGenerationPipeline, SummarizationPipeline, TranslationPipeline, TextGenerationPipeline, ZeroShotClassificationPipeline, AudioClassificationPipeline, ZeroShotAudioClassificationPipeline, AutomaticSpeechRecognitionPipeline, TextToAudioPipeline, ImageToTextPipeline, ImageClassificationPipeline, ImageSegmentationPipeline, BackgroundRemovalPipeline, ZeroShotImageClassificationPipeline, ObjectDetectionPipeline, ZeroShotObjectDetectionPipeline, DocumentQuestionAnsweringPipeline, ImageToImagePipeline, DepthEstimationPipeline, FeatureExtractionPipeline, ImageFeatureExtractionPipeline, SUPPORTED_TASKS, TASK_ALIASES, RawVideoFrame, RawVideo, BaseStreamer, stdout_write, TextStreamer, WhisperTextStreamer;
5986
5986
  var init_transformers_web = __esm({
5987
5987
  "node_modules/@huggingface/transformers/dist/transformers.web.js"() {
5988
5988
  "use strict";
@@ -6014,7 +6014,7 @@ var init_transformers_web = __esm({
6014
6014
  IS_WEB_CACHE_AVAILABLE = typeof self !== "undefined" && "caches" in self;
6015
6015
  IS_WEBGPU_AVAILABLE = IS_NODE_ENV || typeof navigator !== "undefined" && "gpu" in navigator;
6016
6016
  IS_WEBNN_AVAILABLE = typeof navigator !== "undefined" && "ml" in navigator;
6017
- isSafari = () => {
6017
+ isSafari2 = () => {
6018
6018
  if (typeof navigator === "undefined") {
6019
6019
  return false;
6020
6020
  }
@@ -6024,7 +6024,7 @@ var init_transformers_web = __esm({
6024
6024
  const notOtherBrowser = !userAgent.match(/CriOS|FxiOS|EdgiOS|OPiOS|mercury|brave/i) && !userAgent.includes("Chrome") && !userAgent.includes("Android");
6025
6025
  return isAppleVendor && notOtherBrowser;
6026
6026
  };
6027
- IS_SAFARI = isSafari();
6027
+ IS_SAFARI = isSafari2();
6028
6028
  apis = Object.freeze({
6029
6029
  /** Whether we are running in a browser environment (and not a web worker) */
6030
6030
  IS_BROWSER_ENV,
@@ -26505,6 +26505,7 @@ ${boi_token}${image_tokens_expanded}${eoi_token}
26505
26505
  // src/index.ts
26506
26506
  var index_exports = {};
26507
26507
  __export(index_exports, {
26508
+ ARKIT_BLENDSHAPES: () => ARKIT_BLENDSHAPES,
26508
26509
  AgentCoreAdapter: () => AgentCoreAdapter,
26509
26510
  AnimationGraph: () => AnimationGraph,
26510
26511
  AudioChunkCoalescer: () => AudioChunkCoalescer,
@@ -26540,6 +26541,8 @@ __export(index_exports, {
26540
26541
  SileroVADWorker: () => SileroVADWorker,
26541
26542
  SyncedAudioPipeline: () => SyncedAudioPipeline,
26542
26543
  TenantManager: () => TenantManager,
26544
+ WAV2ARKIT_BLENDSHAPES: () => WAV2ARKIT_BLENDSHAPES,
26545
+ Wav2ArkitCpuInference: () => Wav2ArkitCpuInference,
26543
26546
  Wav2Vec2Inference: () => Wav2Vec2Inference,
26544
26547
  WhisperInference: () => WhisperInference,
26545
26548
  blendEmotions: () => blendEmotions,
@@ -26551,6 +26554,7 @@ __export(index_exports, {
26551
26554
  configureLogging: () => configureLogging,
26552
26555
  configureTelemetry: () => configureTelemetry,
26553
26556
  createEmotionVector: () => createEmotionVector,
26557
+ createLipSync: () => createLipSync,
26554
26558
  createLogger: () => createLogger,
26555
26559
  createSessionWithFallback: () => createSessionWithFallback,
26556
26560
  createSileroVAD: () => createSileroVAD,
@@ -26575,6 +26579,7 @@ __export(index_exports, {
26575
26579
  isIOSSafari: () => isIOSSafari,
26576
26580
  isMobile: () => isMobile,
26577
26581
  isOnnxRuntimeLoaded: () => isOnnxRuntimeLoaded,
26582
+ isSafari: () => isSafari,
26578
26583
  isSpeechRecognitionAvailable: () => isSpeechRecognitionAvailable,
26579
26584
  isWebGPUAvailable: () => isWebGPUAvailable,
26580
26585
  lerpEmotion: () => lerpEmotion,
@@ -26583,15 +26588,18 @@ __export(index_exports, {
26583
26588
  nukeBrowserCaches: () => nukeBrowserCaches,
26584
26589
  parseHuggingFaceUrl: () => parseHuggingFaceUrl,
26585
26590
  preloadModels: () => preloadModels,
26591
+ remapWav2ArkitToLam: () => remapWav2ArkitToLam,
26586
26592
  resetLoggingConfig: () => resetLoggingConfig,
26587
26593
  resolveBackend: () => resolveBackend,
26588
26594
  scanForInvalidCaches: () => scanForInvalidCaches,
26589
26595
  setLogLevel: () => setLogLevel,
26590
26596
  setLoggingEnabled: () => setLoggingEnabled,
26591
26597
  shouldEnableWasmProxy: () => shouldEnableWasmProxy,
26598
+ shouldUseCpuLipSync: () => shouldUseCpuLipSync,
26592
26599
  shouldUseNativeASR: () => shouldUseNativeASR,
26593
26600
  shouldUseServerLipSync: () => shouldUseServerLipSync,
26594
26601
  supportsVADWorker: () => supportsVADWorker,
26602
+ symmetrizeBlendshapes: () => symmetrizeBlendshapes,
26595
26603
  validateCachedResponse: () => validateCachedResponse
26596
26604
  });
26597
26605
  module.exports = __toCommonJS(index_exports);
@@ -26832,6 +26840,19 @@ var AudioScheduler = class {
26832
26840
  async initialize() {
26833
26841
  console.log("[AudioScheduler] Ready for lazy initialization");
26834
26842
  }
26843
+ /**
26844
+ * Eagerly create and warm up the AudioContext
26845
+ *
26846
+ * Call this when a playback session starts (e.g., when AI response begins).
26847
+ * The AudioContext needs time to initialize the audio hardware — on Windows
26848
+ * this can take 50-100ms. By warming up early (before audio data arrives),
26849
+ * the context is fully ready when schedule() is first called.
26850
+ *
26851
+ * Must be called after a user gesture (click/tap) for autoplay policy.
26852
+ */
26853
+ async warmup() {
26854
+ await this.ensureContext();
26855
+ }
26835
26856
  /**
26836
26857
  * Ensure AudioContext is created and ready
26837
26858
  * Called lazily on first schedule() - requires user gesture
@@ -26862,7 +26883,7 @@ var AudioScheduler = class {
26862
26883
  const ctx = await this.ensureContext();
26863
26884
  const channels = this.options.channels ?? 1;
26864
26885
  if (!this.isPlaying) {
26865
- this.nextPlayTime = ctx.currentTime;
26886
+ this.nextPlayTime = ctx.currentTime + 0.05;
26866
26887
  this.isPlaying = true;
26867
26888
  }
26868
26889
  const audioBuffer = ctx.createBuffer(channels, audioData.length, ctx.sampleRate);
@@ -26936,8 +26957,19 @@ var AudioScheduler = class {
26936
26957
  }
26937
26958
  /**
26938
26959
  * Reset scheduler state for new playback session
26960
+ * Stops any orphaned sources that weren't cleaned up by cancelAll()
26939
26961
  */
26940
26962
  reset() {
26963
+ if (this.context) {
26964
+ const now = this.context.currentTime;
26965
+ for (const { source, gainNode } of this.scheduledSources) {
26966
+ try {
26967
+ gainNode.gain.setValueAtTime(0, now);
26968
+ source.stop(now);
26969
+ } catch {
26970
+ }
26971
+ }
26972
+ }
26941
26973
  this.nextPlayTime = 0;
26942
26974
  this.isPlaying = false;
26943
26975
  this.scheduledSources = [];
@@ -27065,7 +27097,7 @@ var LAMPipeline = class {
27065
27097
  newBuffer.set(this.buffer, 0);
27066
27098
  newBuffer.set(samples, this.buffer.length);
27067
27099
  this.buffer = newBuffer;
27068
- if (this.buffer.length >= this.REQUIRED_SAMPLES) {
27100
+ while (this.buffer.length >= this.REQUIRED_SAMPLES) {
27069
27101
  await this.processBuffer(lam);
27070
27102
  }
27071
27103
  }
@@ -27218,12 +27250,20 @@ var LAMPipeline = class {
27218
27250
  };
27219
27251
 
27220
27252
  // src/audio/SyncedAudioPipeline.ts
27253
+ function pcm16ToFloat32(buffer) {
27254
+ const byteLen = buffer.byteLength & ~1;
27255
+ const int16 = byteLen === buffer.byteLength ? new Int16Array(buffer) : new Int16Array(buffer, 0, byteLen / 2);
27256
+ const float32 = new Float32Array(int16.length);
27257
+ for (let i = 0; i < int16.length; i++) {
27258
+ float32[i] = int16[i] / 32768;
27259
+ }
27260
+ return float32;
27261
+ }
27221
27262
  var SyncedAudioPipeline = class extends EventEmitter {
27222
27263
  constructor(options) {
27223
27264
  super();
27224
27265
  this.options = options;
27225
- this.waitingForFirstLAM = false;
27226
- this.bufferedChunks = [];
27266
+ this.playbackStarted = false;
27227
27267
  this.monitorInterval = null;
27228
27268
  this.frameAnimationId = null;
27229
27269
  const sampleRate = options.sampleRate ?? 16e3;
@@ -27234,11 +27274,6 @@ var SyncedAudioPipeline = class extends EventEmitter {
27234
27274
  });
27235
27275
  this.lamPipeline = new LAMPipeline({
27236
27276
  sampleRate,
27237
- onInference: (frameCount) => {
27238
- if (this.waitingForFirstLAM) {
27239
- this.onFirstLAMComplete();
27240
- }
27241
- },
27242
27277
  onError: (error) => {
27243
27278
  this.emit("error", error);
27244
27279
  }
@@ -27254,25 +27289,24 @@ var SyncedAudioPipeline = class extends EventEmitter {
27254
27289
  * Start a new playback session
27255
27290
  *
27256
27291
  * Resets all state and prepares for incoming audio chunks.
27257
- * Enables wait-for-first-LAM synchronization.
27292
+ * Audio will be scheduled immediately as chunks arrive (no buffering).
27258
27293
  */
27259
27294
  start() {
27295
+ this.stopMonitoring();
27260
27296
  this.scheduler.reset();
27261
27297
  this.coalescer.reset();
27262
27298
  this.lamPipeline.reset();
27263
- this.bufferedChunks = [];
27264
- this.waitingForFirstLAM = true;
27299
+ this.playbackStarted = false;
27300
+ this.scheduler.warmup();
27265
27301
  this.startFrameLoop();
27266
27302
  this.startMonitoring();
27267
27303
  }
27268
27304
  /**
27269
27305
  * Receive audio chunk from network
27270
27306
  *
27271
- * Implements wait-for-first-LAM pattern:
27272
- * - Chunks are coalesced into optimal buffers
27273
- * - Buffers are sent to LAM for processing
27274
- * - Audio scheduling waits until first LAM completes
27275
- * - Then all buffered audio is scheduled together with LAM frames
27307
+ * Audio-first design: schedules audio immediately, LAM runs in background.
27308
+ * This prevents LAM inference (50-300ms) from blocking audio scheduling,
27309
+ * which caused audible stuttering with continuous audio streams.
27276
27310
  *
27277
27311
  * @param chunk - Uint8Array containing Int16 PCM audio
27278
27312
  */
@@ -27281,51 +27315,15 @@ var SyncedAudioPipeline = class extends EventEmitter {
27281
27315
  if (!combined) {
27282
27316
  return;
27283
27317
  }
27284
- const int16 = new Int16Array(combined);
27285
- const float32 = new Float32Array(int16.length);
27286
- for (let i = 0; i < int16.length; i++) {
27287
- float32[i] = int16[i] / 32768;
27288
- }
27289
- if (this.waitingForFirstLAM) {
27290
- this.bufferedChunks.push(combined);
27291
- const estimatedTime = this.scheduler.getCurrentTime();
27292
- await this.lamPipeline.push(float32, estimatedTime, this.options.lam);
27293
- } else {
27294
- const scheduleTime = await this.scheduler.schedule(float32);
27295
- await this.lamPipeline.push(float32, scheduleTime, this.options.lam);
27296
- }
27297
- }
27298
- /**
27299
- * Handle first LAM inference completion
27300
- *
27301
- * This is the critical synchronization point:
27302
- * - LAM frames are now ready in the queue
27303
- * - Schedule all buffered audio chunks
27304
- * - Adjust LAM frame timestamps to match actual schedule time
27305
- * - Audio and LAM start playing together, perfectly synchronized
27306
- */
27307
- async onFirstLAMComplete() {
27308
- this.waitingForFirstLAM = false;
27309
- const beforeSchedule = this.scheduler.getCurrentTime();
27310
- let actualStartTime = beforeSchedule;
27311
- for (let i = 0; i < this.bufferedChunks.length; i++) {
27312
- const buffer = this.bufferedChunks[i];
27313
- const int16 = new Int16Array(buffer);
27314
- const float32 = new Float32Array(int16.length);
27315
- for (let j = 0; j < int16.length; j++) {
27316
- float32[j] = int16[j] / 32768;
27317
- }
27318
- const scheduleTime = await this.scheduler.schedule(float32);
27319
- if (i === 0) {
27320
- actualStartTime = scheduleTime;
27321
- }
27322
- }
27323
- const timeOffset = actualStartTime - beforeSchedule;
27324
- if (timeOffset !== 0) {
27325
- this.lamPipeline.adjustTimestamps(timeOffset);
27318
+ const float32 = pcm16ToFloat32(combined);
27319
+ const scheduleTime = await this.scheduler.schedule(float32);
27320
+ if (!this.playbackStarted) {
27321
+ this.playbackStarted = true;
27322
+ this.emit("playback_start", scheduleTime);
27326
27323
  }
27327
- this.bufferedChunks = [];
27328
- this.emit("playback_start", actualStartTime);
27324
+ this.lamPipeline.push(float32, scheduleTime, this.options.lam).catch((err) => {
27325
+ this.emit("error", err);
27326
+ });
27329
27327
  }
27330
27328
  /**
27331
27329
  * End of audio stream
@@ -27357,10 +27355,9 @@ var SyncedAudioPipeline = class extends EventEmitter {
27357
27355
  async stop(fadeOutMs = 50) {
27358
27356
  this.stopMonitoring();
27359
27357
  await this.scheduler.cancelAll(fadeOutMs);
27360
- this.bufferedChunks = [];
27361
27358
  this.coalescer.reset();
27362
27359
  this.lamPipeline.reset();
27363
- this.waitingForFirstLAM = false;
27360
+ this.playbackStarted = false;
27364
27361
  this.emit("playback_complete", void 0);
27365
27362
  }
27366
27363
  /**
@@ -27417,8 +27414,7 @@ var SyncedAudioPipeline = class extends EventEmitter {
27417
27414
  */
27418
27415
  getState() {
27419
27416
  return {
27420
- waitingForFirstLAM: this.waitingForFirstLAM,
27421
- bufferedChunks: this.bufferedChunks.length,
27417
+ playbackStarted: this.playbackStarted,
27422
27418
  coalescerFill: this.coalescer.fillLevel,
27423
27419
  lamFill: this.lamPipeline.fillLevel,
27424
27420
  queuedFrames: this.lamPipeline.queuedFrameCount,
@@ -27434,7 +27430,6 @@ var SyncedAudioPipeline = class extends EventEmitter {
27434
27430
  this.scheduler.dispose();
27435
27431
  this.coalescer.reset();
27436
27432
  this.lamPipeline.reset();
27437
- this.bufferedChunks = [];
27438
27433
  }
27439
27434
  };
27440
27435
 
@@ -28876,12 +28871,12 @@ var Logger = class _Logger {
28876
28871
  };
28877
28872
  var loggerCache = /* @__PURE__ */ new Map();
28878
28873
  function createLogger(module2) {
28879
- let logger12 = loggerCache.get(module2);
28880
- if (!logger12) {
28881
- logger12 = new Logger(module2);
28882
- loggerCache.set(module2, logger12);
28874
+ let logger13 = loggerCache.get(module2);
28875
+ if (!logger13) {
28876
+ logger13 = new Logger(module2);
28877
+ loggerCache.set(module2, logger13);
28883
28878
  }
28884
- return logger12;
28879
+ return logger13;
28885
28880
  }
28886
28881
  var noopLogger = {
28887
28882
  module: "noop",
@@ -28925,7 +28920,7 @@ function hasWebGPUApi() {
28925
28920
  return "gpu" in navigator && navigator.gpu !== void 0;
28926
28921
  }
28927
28922
  function getRecommendedBackend() {
28928
- if (isIOS()) {
28923
+ if (isSafari() || isIOS()) {
28929
28924
  return "wasm";
28930
28925
  }
28931
28926
  return "webgpu";
@@ -28969,6 +28964,14 @@ function shouldEnableWasmProxy() {
28969
28964
  }
28970
28965
  return true;
28971
28966
  }
28967
+ function isSafari() {
28968
+ if (typeof navigator === "undefined") return false;
28969
+ const ua = navigator.userAgent.toLowerCase();
28970
+ return /safari/.test(ua) && !/chrome|crios|fxios|chromium|edg/.test(ua);
28971
+ }
28972
+ function shouldUseCpuLipSync() {
28973
+ return isSafari();
28974
+ }
28972
28975
  function isSpeechRecognitionAvailable() {
28973
28976
  if (typeof window === "undefined") return false;
28974
28977
  return "SpeechRecognition" in window || "webkitSpeechRecognition" in window;
@@ -29013,13 +29016,13 @@ async function isWebGPUAvailable() {
29013
29016
  return false;
29014
29017
  }
29015
29018
  }
29016
- function configureWasm(ort2) {
29017
- ort2.env.wasm.wasmPaths = WASM_CDN_PATH;
29019
+ function configureWasm(ort) {
29020
+ ort.env.wasm.wasmPaths = WASM_CDN_PATH;
29018
29021
  const numThreads = getOptimalWasmThreads();
29019
29022
  const enableProxy = shouldEnableWasmProxy();
29020
- ort2.env.wasm.numThreads = numThreads;
29021
- ort2.env.wasm.simd = true;
29022
- ort2.env.wasm.proxy = enableProxy;
29023
+ ort.env.wasm.numThreads = numThreads;
29024
+ ort.env.wasm.simd = true;
29025
+ ort.env.wasm.proxy = enableProxy;
29023
29026
  logger.info("WASM configured", {
29024
29027
  numThreads,
29025
29028
  simd: true,
@@ -29067,8 +29070,8 @@ async function getOnnxRuntimeForPreference(preference = "auto") {
29067
29070
  webgpuAvailable,
29068
29071
  resolvedBackend: backend
29069
29072
  });
29070
- const ort2 = await getOnnxRuntime(backend);
29071
- return { ort: ort2, backend };
29073
+ const ort = await getOnnxRuntime(backend);
29074
+ return { ort, backend };
29072
29075
  }
29073
29076
  function getSessionOptions(backend) {
29074
29077
  if (backend === "webgpu") {
@@ -29089,12 +29092,12 @@ function getSessionOptions(backend) {
29089
29092
  };
29090
29093
  }
29091
29094
  async function createSessionWithFallback(modelBuffer, preferredBackend) {
29092
- const ort2 = await getOnnxRuntime(preferredBackend);
29095
+ const ort = await getOnnxRuntime(preferredBackend);
29093
29096
  const modelData = new Uint8Array(modelBuffer);
29094
29097
  if (preferredBackend === "webgpu") {
29095
29098
  try {
29096
29099
  const options2 = getSessionOptions("webgpu");
29097
- const session2 = await ort2.InferenceSession.create(modelData, options2);
29100
+ const session2 = await ort.InferenceSession.create(modelData, options2);
29098
29101
  logger.info("Session created with WebGPU backend");
29099
29102
  return { session: session2, backend: "webgpu" };
29100
29103
  } catch (err) {
@@ -29104,7 +29107,7 @@ async function createSessionWithFallback(modelBuffer, preferredBackend) {
29104
29107
  }
29105
29108
  }
29106
29109
  const options = getSessionOptions("wasm");
29107
- const session = await ort2.InferenceSession.create(modelData, options);
29110
+ const session = await ort.InferenceSession.create(modelData, options);
29108
29111
  logger.info("Session created with WASM backend");
29109
29112
  return { session, backend: "wasm" };
29110
29113
  }
@@ -29115,8 +29118,7 @@ function isOnnxRuntimeLoaded() {
29115
29118
  return ortInstance !== null;
29116
29119
  }
29117
29120
 
29118
- // src/inference/Wav2Vec2Inference.ts
29119
- var logger2 = createLogger("Wav2Vec2");
29121
+ // src/inference/blendshapeUtils.ts
29120
29122
  var LAM_BLENDSHAPES = [
29121
29123
  "browDownLeft",
29122
29124
  "browDownRight",
@@ -29171,40 +29173,7 @@ var LAM_BLENDSHAPES = [
29171
29173
  "noseSneerRight",
29172
29174
  "tongueOut"
29173
29175
  ];
29174
- var CTC_VOCAB = [
29175
- "<pad>",
29176
- "<s>",
29177
- "</s>",
29178
- "<unk>",
29179
- "|",
29180
- "E",
29181
- "T",
29182
- "A",
29183
- "O",
29184
- "N",
29185
- "I",
29186
- "H",
29187
- "S",
29188
- "R",
29189
- "D",
29190
- "L",
29191
- "U",
29192
- "M",
29193
- "W",
29194
- "C",
29195
- "F",
29196
- "G",
29197
- "Y",
29198
- "P",
29199
- "B",
29200
- "V",
29201
- "K",
29202
- "'",
29203
- "X",
29204
- "J",
29205
- "Q",
29206
- "Z"
29207
- ];
29176
+ var ARKIT_BLENDSHAPES = LAM_BLENDSHAPES;
29208
29177
  var ARKIT_SYMMETRIC_PAIRS = [
29209
29178
  ["jawLeft", "jawRight"],
29210
29179
  ["mouthLeft", "mouthRight"],
@@ -29240,6 +29209,107 @@ function symmetrizeBlendshapes(frame) {
29240
29209
  }
29241
29210
  return result;
29242
29211
  }
29212
+ var WAV2ARKIT_BLENDSHAPES = [
29213
+ "browDownLeft",
29214
+ "browDownRight",
29215
+ "browInnerUp",
29216
+ "browOuterUpLeft",
29217
+ "browOuterUpRight",
29218
+ "cheekPuff",
29219
+ "cheekSquintLeft",
29220
+ "cheekSquintRight",
29221
+ "eyeBlinkLeft",
29222
+ "eyeBlinkRight",
29223
+ "eyeLookDownLeft",
29224
+ "eyeLookDownRight",
29225
+ "eyeLookInLeft",
29226
+ "eyeLookInRight",
29227
+ "eyeLookOutLeft",
29228
+ "eyeLookOutRight",
29229
+ "eyeLookUpLeft",
29230
+ "eyeLookUpRight",
29231
+ "eyeSquintLeft",
29232
+ "eyeSquintRight",
29233
+ "eyeWideLeft",
29234
+ "eyeWideRight",
29235
+ "jawForward",
29236
+ "jawLeft",
29237
+ "jawOpen",
29238
+ "mouthFrownLeft",
29239
+ "mouthFrownRight",
29240
+ "mouthFunnel",
29241
+ "mouthLeft",
29242
+ "mouthLowerDownLeft",
29243
+ "mouthLowerDownRight",
29244
+ "mouthPressLeft",
29245
+ "mouthPressRight",
29246
+ "mouthPucker",
29247
+ "mouthRight",
29248
+ "mouthRollLower",
29249
+ "mouthRollUpper",
29250
+ "mouthShrugLower",
29251
+ "mouthShrugUpper",
29252
+ "mouthSmileLeft",
29253
+ "mouthSmileRight",
29254
+ "mouthStretchLeft",
29255
+ "mouthStretchRight",
29256
+ "mouthUpperUpLeft",
29257
+ "mouthUpperUpRight",
29258
+ "noseSneerLeft",
29259
+ "noseSneerRight",
29260
+ "tongueOut",
29261
+ "mouthClose",
29262
+ "mouthDimpleLeft",
29263
+ "mouthDimpleRight",
29264
+ "jawRight"
29265
+ ];
29266
+ var REMAP_WAV2ARKIT_TO_LAM = WAV2ARKIT_BLENDSHAPES.map(
29267
+ (name) => LAM_BLENDSHAPES.indexOf(name)
29268
+ );
29269
+ function remapWav2ArkitToLam(frame) {
29270
+ const result = new Float32Array(52);
29271
+ for (let i = 0; i < 52; i++) {
29272
+ result[REMAP_WAV2ARKIT_TO_LAM[i]] = frame[i];
29273
+ }
29274
+ return result;
29275
+ }
29276
+
29277
+ // src/inference/Wav2Vec2Inference.ts
29278
+ var logger2 = createLogger("Wav2Vec2");
29279
+ var CTC_VOCAB = [
29280
+ "<pad>",
29281
+ "<s>",
29282
+ "</s>",
29283
+ "<unk>",
29284
+ "|",
29285
+ "E",
29286
+ "T",
29287
+ "A",
29288
+ "O",
29289
+ "N",
29290
+ "I",
29291
+ "H",
29292
+ "S",
29293
+ "R",
29294
+ "D",
29295
+ "L",
29296
+ "U",
29297
+ "M",
29298
+ "W",
29299
+ "C",
29300
+ "F",
29301
+ "G",
29302
+ "Y",
29303
+ "P",
29304
+ "B",
29305
+ "V",
29306
+ "K",
29307
+ "'",
29308
+ "X",
29309
+ "J",
29310
+ "Q",
29311
+ "Z"
29312
+ ];
29243
29313
  var Wav2Vec2Inference = class {
29244
29314
  constructor(config) {
29245
29315
  this.session = null;
@@ -29276,8 +29346,8 @@ var Wav2Vec2Inference = class {
29276
29346
  });
29277
29347
  try {
29278
29348
  logger2.info("Loading ONNX Runtime...", { preference: this.config.backend || "auto" });
29279
- const { ort: ort2, backend } = await getOnnxRuntimeForPreference(this.config.backend || "auto");
29280
- this.ort = ort2;
29349
+ const { ort, backend } = await getOnnxRuntimeForPreference(this.config.backend || "auto");
29350
+ this.ort = ort;
29281
29351
  this._backend = backend;
29282
29352
  logger2.info("ONNX Runtime loaded", { backend: this._backend });
29283
29353
  const cache = getModelCache();
@@ -29478,6 +29548,7 @@ var Wav2Vec2Inference = class {
29478
29548
  blendshapes,
29479
29549
  asrLogits,
29480
29550
  text,
29551
+ numFrames: numA2EFrames,
29481
29552
  numA2EFrames,
29482
29553
  numASRFrames,
29483
29554
  inferenceTimeMs
@@ -29845,8 +29916,293 @@ var WhisperInference = class _WhisperInference {
29845
29916
  }
29846
29917
  };
29847
29918
 
29919
+ // src/inference/Wav2ArkitCpuInference.ts
29920
+ var logger5 = createLogger("Wav2ArkitCpu");
29921
+ var Wav2ArkitCpuInference = class {
29922
+ constructor(config) {
29923
+ this.session = null;
29924
+ this.ort = null;
29925
+ this._backend = "wasm";
29926
+ this.isLoading = false;
29927
+ // Inference queue for handling concurrent calls
29928
+ this.inferenceQueue = Promise.resolve();
29929
+ this.config = config;
29930
+ }
29931
+ get backend() {
29932
+ return this.session ? this._backend : null;
29933
+ }
29934
+ get isLoaded() {
29935
+ return this.session !== null;
29936
+ }
29937
+ /**
29938
+ * Load the ONNX model
29939
+ */
29940
+ async load() {
29941
+ if (this.isLoading) {
29942
+ throw new Error("Model is already loading");
29943
+ }
29944
+ if (this.session) {
29945
+ throw new Error("Model already loaded. Call dispose() first.");
29946
+ }
29947
+ this.isLoading = true;
29948
+ const startTime = performance.now();
29949
+ const telemetry = getTelemetry();
29950
+ const span = telemetry?.startSpan("Wav2ArkitCpu.load", {
29951
+ "model.url": this.config.modelUrl,
29952
+ "model.backend_requested": this.config.backend || "wasm"
29953
+ });
29954
+ try {
29955
+ const preference = this.config.backend || "wasm";
29956
+ logger5.info("Loading ONNX Runtime...", { preference });
29957
+ const { ort, backend } = await getOnnxRuntimeForPreference(preference);
29958
+ this.ort = ort;
29959
+ this._backend = backend;
29960
+ logger5.info("ONNX Runtime loaded", { backend: this._backend });
29961
+ const cache = getModelCache();
29962
+ const modelUrl = this.config.modelUrl;
29963
+ const isCached = await cache.has(modelUrl);
29964
+ let modelBuffer;
29965
+ if (isCached) {
29966
+ logger5.debug("Loading model from cache", { modelUrl });
29967
+ modelBuffer = await cache.get(modelUrl);
29968
+ if (!modelBuffer) {
29969
+ logger5.warn("Cache corruption detected, clearing and retrying", { modelUrl });
29970
+ await cache.delete(modelUrl);
29971
+ modelBuffer = await fetchWithCache(modelUrl);
29972
+ }
29973
+ } else {
29974
+ logger5.debug("Fetching and caching model", { modelUrl });
29975
+ modelBuffer = await fetchWithCache(modelUrl);
29976
+ }
29977
+ if (!modelBuffer) {
29978
+ throw new Error(`Failed to load model: ${modelUrl}`);
29979
+ }
29980
+ logger5.debug("Creating ONNX session", {
29981
+ size: formatBytes(modelBuffer.byteLength),
29982
+ backend: this._backend
29983
+ });
29984
+ const sessionOptions = getSessionOptions(this._backend);
29985
+ const modelData = new Uint8Array(modelBuffer);
29986
+ this.session = await this.ort.InferenceSession.create(modelData, sessionOptions);
29987
+ const loadTimeMs = performance.now() - startTime;
29988
+ logger5.info("Model loaded successfully", {
29989
+ backend: this._backend,
29990
+ loadTimeMs: Math.round(loadTimeMs),
29991
+ inputs: this.session.inputNames,
29992
+ outputs: this.session.outputNames
29993
+ });
29994
+ span?.setAttributes({
29995
+ "model.backend": this._backend,
29996
+ "model.load_time_ms": loadTimeMs,
29997
+ "model.cached": isCached
29998
+ });
29999
+ span?.end();
30000
+ telemetry?.recordHistogram("omote.model.load_time", loadTimeMs, {
30001
+ model: "wav2arkit_cpu",
30002
+ backend: this._backend
30003
+ });
30004
+ logger5.debug("Running warmup inference");
30005
+ const warmupStart = performance.now();
30006
+ const silentAudio = new Float32Array(16e3);
30007
+ await this.infer(silentAudio);
30008
+ const warmupTimeMs = performance.now() - warmupStart;
30009
+ logger5.info("Warmup inference complete", {
30010
+ warmupTimeMs: Math.round(warmupTimeMs),
30011
+ backend: this._backend
30012
+ });
30013
+ telemetry?.recordHistogram("omote.model.warmup_time", warmupTimeMs, {
30014
+ model: "wav2arkit_cpu",
30015
+ backend: this._backend
30016
+ });
30017
+ return {
30018
+ backend: this._backend,
30019
+ loadTimeMs,
30020
+ inputNames: [...this.session.inputNames],
30021
+ outputNames: [...this.session.outputNames]
30022
+ };
30023
+ } catch (error) {
30024
+ span?.endWithError(error instanceof Error ? error : new Error(String(error)));
30025
+ telemetry?.incrementCounter("omote.errors.total", 1, {
30026
+ model: "wav2arkit_cpu",
30027
+ error_type: "load_failed"
30028
+ });
30029
+ throw error;
30030
+ } finally {
30031
+ this.isLoading = false;
30032
+ }
30033
+ }
30034
+ /**
30035
+ * Run inference on raw audio
30036
+ *
30037
+ * Accepts variable-length audio (not fixed to 16000 samples).
30038
+ * Output frames = ceil(30 * numSamples / 16000).
30039
+ *
30040
+ * @param audioSamples - Float32Array of raw audio at 16kHz
30041
+ * @param _identityIndex - Ignored (identity 11 is baked into the model)
30042
+ */
30043
+ async infer(audioSamples, _identityIndex) {
30044
+ if (!this.session) {
30045
+ throw new Error("Model not loaded. Call load() first.");
30046
+ }
30047
+ const audioCopy = new Float32Array(audioSamples);
30048
+ const feeds = {
30049
+ "audio_waveform": new this.ort.Tensor("float32", audioCopy, [1, audioCopy.length])
30050
+ };
30051
+ return this.queueInference(feeds, audioCopy.length);
30052
+ }
30053
+ /**
30054
+ * Queue inference to serialize ONNX session calls
30055
+ */
30056
+ queueInference(feeds, inputSamples) {
30057
+ return new Promise((resolve, reject) => {
30058
+ this.inferenceQueue = this.inferenceQueue.then(async () => {
30059
+ const telemetry = getTelemetry();
30060
+ const span = telemetry?.startSpan("Wav2ArkitCpu.infer", {
30061
+ "inference.backend": this._backend,
30062
+ "inference.input_samples": inputSamples
30063
+ });
30064
+ try {
30065
+ const startTime = performance.now();
30066
+ const results = await this.session.run(feeds);
30067
+ const inferenceTimeMs = performance.now() - startTime;
30068
+ const blendshapeOutput = results["blendshapes"];
30069
+ if (!blendshapeOutput) {
30070
+ throw new Error("Missing blendshapes output from model");
30071
+ }
30072
+ const blendshapeData = blendshapeOutput.data;
30073
+ const numFrames = blendshapeOutput.dims[1];
30074
+ const numBlendshapes = blendshapeOutput.dims[2];
30075
+ const blendshapes = [];
30076
+ for (let f = 0; f < numFrames; f++) {
30077
+ const rawFrame = blendshapeData.slice(f * numBlendshapes, (f + 1) * numBlendshapes);
30078
+ const remapped = remapWav2ArkitToLam(rawFrame);
30079
+ blendshapes.push(symmetrizeBlendshapes(remapped));
30080
+ }
30081
+ logger5.trace("Inference completed", {
30082
+ inferenceTimeMs: Math.round(inferenceTimeMs * 100) / 100,
30083
+ numFrames,
30084
+ inputSamples
30085
+ });
30086
+ span?.setAttributes({
30087
+ "inference.duration_ms": inferenceTimeMs,
30088
+ "inference.frames": numFrames
30089
+ });
30090
+ span?.end();
30091
+ telemetry?.recordHistogram("omote.inference.latency", inferenceTimeMs, {
30092
+ model: "wav2arkit_cpu",
30093
+ backend: this._backend
30094
+ });
30095
+ telemetry?.incrementCounter("omote.inference.total", 1, {
30096
+ model: "wav2arkit_cpu",
30097
+ backend: this._backend,
30098
+ status: "success"
30099
+ });
30100
+ resolve({
30101
+ blendshapes,
30102
+ numFrames,
30103
+ inferenceTimeMs
30104
+ });
30105
+ } catch (err) {
30106
+ span?.endWithError(err instanceof Error ? err : new Error(String(err)));
30107
+ telemetry?.incrementCounter("omote.inference.total", 1, {
30108
+ model: "wav2arkit_cpu",
30109
+ backend: this._backend,
30110
+ status: "error"
30111
+ });
30112
+ reject(err);
30113
+ }
30114
+ });
30115
+ });
30116
+ }
30117
+ /**
30118
+ * Dispose of the model and free resources
30119
+ */
30120
+ async dispose() {
30121
+ if (this.session) {
30122
+ await this.session.release();
30123
+ this.session = null;
30124
+ }
30125
+ }
30126
+ };
30127
+
30128
+ // src/inference/createLipSync.ts
30129
+ var logger6 = createLogger("createLipSync");
30130
+ function createLipSync(config) {
30131
+ const mode = config.mode ?? "auto";
30132
+ const fallbackOnError = config.fallbackOnError ?? true;
30133
+ let useCpu;
30134
+ if (mode === "cpu") {
30135
+ useCpu = true;
30136
+ logger6.info("Forcing CPU lip sync model (wav2arkit_cpu)");
30137
+ } else if (mode === "gpu") {
30138
+ useCpu = false;
30139
+ logger6.info("Forcing GPU lip sync model (Wav2Vec2)");
30140
+ } else {
30141
+ useCpu = isSafari();
30142
+ logger6.info("Auto-detected lip sync model", {
30143
+ useCpu,
30144
+ isSafari: isSafari()
30145
+ });
30146
+ }
30147
+ if (useCpu) {
30148
+ logger6.info("Creating Wav2ArkitCpuInference (1.8MB, WASM)");
30149
+ return new Wav2ArkitCpuInference({
30150
+ modelUrl: config.cpuModelUrl
30151
+ });
30152
+ }
30153
+ const gpuInstance = new Wav2Vec2Inference({
30154
+ modelUrl: config.gpuModelUrl,
30155
+ backend: config.gpuBackend ?? "auto",
30156
+ numIdentityClasses: config.numIdentityClasses
30157
+ });
30158
+ if (fallbackOnError) {
30159
+ logger6.info("Creating Wav2Vec2Inference with CPU fallback");
30160
+ return new LipSyncWithFallback(gpuInstance, config);
30161
+ }
30162
+ logger6.info("Creating Wav2Vec2Inference (no fallback)");
30163
+ return gpuInstance;
30164
+ }
30165
+ var LipSyncWithFallback = class {
30166
+ constructor(gpuInstance, config) {
30167
+ this.hasFallenBack = false;
30168
+ this.implementation = gpuInstance;
30169
+ this.config = config;
30170
+ }
30171
+ get backend() {
30172
+ return this.implementation.backend;
30173
+ }
30174
+ get isLoaded() {
30175
+ return this.implementation.isLoaded;
30176
+ }
30177
+ async load() {
30178
+ try {
30179
+ return await this.implementation.load();
30180
+ } catch (error) {
30181
+ logger6.warn("GPU model load failed, falling back to CPU model", {
30182
+ error: error instanceof Error ? error.message : String(error)
30183
+ });
30184
+ try {
30185
+ await this.implementation.dispose();
30186
+ } catch {
30187
+ }
30188
+ this.implementation = new Wav2ArkitCpuInference({
30189
+ modelUrl: this.config.cpuModelUrl
30190
+ });
30191
+ this.hasFallenBack = true;
30192
+ logger6.info("Fallback to Wav2ArkitCpuInference successful");
30193
+ return await this.implementation.load();
30194
+ }
30195
+ }
30196
+ async infer(audioSamples, identityIndex) {
30197
+ return this.implementation.infer(audioSamples, identityIndex);
30198
+ }
30199
+ async dispose() {
30200
+ return this.implementation.dispose();
30201
+ }
30202
+ };
30203
+
29848
30204
  // src/inference/SileroVADInference.ts
29849
- var logger5 = createLogger("SileroVAD");
30205
+ var logger7 = createLogger("SileroVAD");
29850
30206
  var SileroVADInference = class {
29851
30207
  constructor(config) {
29852
30208
  this.session = null;
@@ -29918,32 +30274,32 @@ var SileroVADInference = class {
29918
30274
  "model.sample_rate": this.config.sampleRate
29919
30275
  });
29920
30276
  try {
29921
- logger5.info("Loading ONNX Runtime...", { preference: this.config.backend });
29922
- const { ort: ort2, backend } = await getOnnxRuntimeForPreference(this.config.backend);
29923
- this.ort = ort2;
30277
+ logger7.info("Loading ONNX Runtime...", { preference: this.config.backend });
30278
+ const { ort, backend } = await getOnnxRuntimeForPreference(this.config.backend);
30279
+ this.ort = ort;
29924
30280
  this._backend = backend;
29925
- logger5.info("ONNX Runtime loaded", { backend: this._backend });
30281
+ logger7.info("ONNX Runtime loaded", { backend: this._backend });
29926
30282
  const cache = getModelCache();
29927
30283
  const modelUrl = this.config.modelUrl;
29928
30284
  const isCached = await cache.has(modelUrl);
29929
30285
  let modelBuffer;
29930
30286
  if (isCached) {
29931
- logger5.debug("Loading model from cache", { modelUrl });
30287
+ logger7.debug("Loading model from cache", { modelUrl });
29932
30288
  modelBuffer = await cache.get(modelUrl);
29933
30289
  } else {
29934
- logger5.debug("Fetching and caching model", { modelUrl });
30290
+ logger7.debug("Fetching and caching model", { modelUrl });
29935
30291
  modelBuffer = await fetchWithCache(modelUrl);
29936
30292
  }
29937
- logger5.debug("Creating ONNX session", {
30293
+ logger7.debug("Creating ONNX session", {
29938
30294
  size: formatBytes(modelBuffer.byteLength),
29939
30295
  backend: this._backend
29940
30296
  });
29941
30297
  const sessionOptions = getSessionOptions(this._backend);
29942
30298
  const modelData = new Uint8Array(modelBuffer);
29943
- this.session = await ort2.InferenceSession.create(modelData, sessionOptions);
30299
+ this.session = await ort.InferenceSession.create(modelData, sessionOptions);
29944
30300
  this.reset();
29945
30301
  const loadTimeMs = performance.now() - startTime;
29946
- logger5.info("Model loaded successfully", {
30302
+ logger7.info("Model loaded successfully", {
29947
30303
  backend: this._backend,
29948
30304
  loadTimeMs: Math.round(loadTimeMs),
29949
30305
  sampleRate: this.config.sampleRate,
@@ -30096,7 +30452,7 @@ var SileroVADInference = class {
30096
30452
  this.preSpeechBuffer.shift();
30097
30453
  }
30098
30454
  }
30099
- logger5.trace("Skipping VAD inference - audio too quiet", {
30455
+ logger7.trace("Skipping VAD inference - audio too quiet", {
30100
30456
  rms: Math.round(rms * 1e4) / 1e4,
30101
30457
  threshold: MIN_ENERGY_THRESHOLD
30102
30458
  });
@@ -30150,7 +30506,7 @@ var SileroVADInference = class {
30150
30506
  if (isSpeech && !this.wasSpeaking) {
30151
30507
  preSpeechChunks = [...this.preSpeechBuffer];
30152
30508
  this.preSpeechBuffer = [];
30153
- logger5.debug("Speech started with pre-speech buffer", {
30509
+ logger7.debug("Speech started with pre-speech buffer", {
30154
30510
  preSpeechChunks: preSpeechChunks.length,
30155
30511
  durationMs: Math.round(preSpeechChunks.length * this.getChunkDurationMs())
30156
30512
  });
@@ -30163,7 +30519,7 @@ var SileroVADInference = class {
30163
30519
  this.preSpeechBuffer = [];
30164
30520
  }
30165
30521
  this.wasSpeaking = isSpeech;
30166
- logger5.trace("VAD inference completed", {
30522
+ logger7.trace("VAD inference completed", {
30167
30523
  probability: Math.round(probability * 1e3) / 1e3,
30168
30524
  isSpeech,
30169
30525
  inferenceTimeMs: Math.round(inferenceTimeMs * 100) / 100
@@ -30219,7 +30575,7 @@ var SileroVADInference = class {
30219
30575
  SileroVADInference.isWebGPUAvailable = isWebGPUAvailable;
30220
30576
 
30221
30577
  // src/inference/SileroVADWorker.ts
30222
- var logger6 = createLogger("SileroVADWorker");
30578
+ var logger8 = createLogger("SileroVADWorker");
30223
30579
  var WASM_CDN_PATH2 = "https://cdn.jsdelivr.net/npm/onnxruntime-web@1.23.2/dist/";
30224
30580
  var LOAD_TIMEOUT_MS = 1e4;
30225
30581
  var INFERENCE_TIMEOUT_MS = 1e3;
@@ -30482,7 +30838,7 @@ var SileroVADWorker = class {
30482
30838
  this.handleWorkerMessage(event.data);
30483
30839
  };
30484
30840
  worker.onerror = (error) => {
30485
- logger6.error("Worker error", { error: error.message });
30841
+ logger8.error("Worker error", { error: error.message });
30486
30842
  for (const [, resolver] of this.pendingResolvers) {
30487
30843
  resolver.reject(new Error(`Worker error: ${error.message}`));
30488
30844
  }
@@ -30558,9 +30914,9 @@ var SileroVADWorker = class {
30558
30914
  "model.sample_rate": this.config.sampleRate
30559
30915
  });
30560
30916
  try {
30561
- logger6.info("Creating VAD worker...");
30917
+ logger8.info("Creating VAD worker...");
30562
30918
  this.worker = this.createWorker();
30563
- logger6.info("Loading model in worker...", {
30919
+ logger8.info("Loading model in worker...", {
30564
30920
  modelUrl: this.config.modelUrl,
30565
30921
  sampleRate: this.config.sampleRate
30566
30922
  });
@@ -30576,7 +30932,7 @@ var SileroVADWorker = class {
30576
30932
  );
30577
30933
  this._isLoaded = true;
30578
30934
  const loadTimeMs = performance.now() - startTime;
30579
- logger6.info("VAD worker loaded successfully", {
30935
+ logger8.info("VAD worker loaded successfully", {
30580
30936
  backend: "wasm",
30581
30937
  loadTimeMs: Math.round(loadTimeMs),
30582
30938
  workerLoadTimeMs: Math.round(result.loadTimeMs),
@@ -30683,7 +31039,7 @@ var SileroVADWorker = class {
30683
31039
  if (isSpeech && !this.wasSpeaking) {
30684
31040
  preSpeechChunks = [...this.preSpeechBuffer];
30685
31041
  this.preSpeechBuffer = [];
30686
- logger6.debug("Speech started with pre-speech buffer", {
31042
+ logger8.debug("Speech started with pre-speech buffer", {
30687
31043
  preSpeechChunks: preSpeechChunks.length,
30688
31044
  durationMs: Math.round(preSpeechChunks.length * this.getChunkDurationMs())
30689
31045
  });
@@ -30696,7 +31052,7 @@ var SileroVADWorker = class {
30696
31052
  this.preSpeechBuffer = [];
30697
31053
  }
30698
31054
  this.wasSpeaking = isSpeech;
30699
- logger6.trace("VAD worker inference completed", {
31055
+ logger8.trace("VAD worker inference completed", {
30700
31056
  probability: Math.round(result.probability * 1e3) / 1e3,
30701
31057
  isSpeech,
30702
31058
  inferenceTimeMs: Math.round(inferenceTimeMs * 100) / 100,
@@ -30764,18 +31120,18 @@ var SileroVADWorker = class {
30764
31120
  };
30765
31121
 
30766
31122
  // src/inference/createSileroVAD.ts
30767
- var logger7 = createLogger("createSileroVAD");
31123
+ var logger9 = createLogger("createSileroVAD");
30768
31124
  function supportsVADWorker() {
30769
31125
  if (typeof Worker === "undefined") {
30770
- logger7.debug("Worker not supported: Worker constructor undefined");
31126
+ logger9.debug("Worker not supported: Worker constructor undefined");
30771
31127
  return false;
30772
31128
  }
30773
31129
  if (typeof URL === "undefined" || typeof URL.createObjectURL === "undefined") {
30774
- logger7.debug("Worker not supported: URL.createObjectURL unavailable");
31130
+ logger9.debug("Worker not supported: URL.createObjectURL unavailable");
30775
31131
  return false;
30776
31132
  }
30777
31133
  if (typeof Blob === "undefined") {
30778
- logger7.debug("Worker not supported: Blob constructor unavailable");
31134
+ logger9.debug("Worker not supported: Blob constructor unavailable");
30779
31135
  return false;
30780
31136
  }
30781
31137
  return true;
@@ -30785,19 +31141,19 @@ function createSileroVAD(config) {
30785
31141
  let useWorker;
30786
31142
  if (config.useWorker !== void 0) {
30787
31143
  useWorker = config.useWorker;
30788
- logger7.debug("Worker preference explicitly set", { useWorker });
31144
+ logger9.debug("Worker preference explicitly set", { useWorker });
30789
31145
  } else {
30790
31146
  const workerSupported = supportsVADWorker();
30791
31147
  const onMobile = isMobile();
30792
31148
  useWorker = workerSupported && !onMobile;
30793
- logger7.debug("Auto-detected Worker preference", {
31149
+ logger9.debug("Auto-detected Worker preference", {
30794
31150
  useWorker,
30795
31151
  workerSupported,
30796
31152
  onMobile
30797
31153
  });
30798
31154
  }
30799
31155
  if (useWorker) {
30800
- logger7.info("Creating SileroVADWorker (off-main-thread)");
31156
+ logger9.info("Creating SileroVADWorker (off-main-thread)");
30801
31157
  const worker = new SileroVADWorker({
30802
31158
  modelUrl: config.modelUrl,
30803
31159
  sampleRate: config.sampleRate,
@@ -30809,7 +31165,7 @@ function createSileroVAD(config) {
30809
31165
  }
30810
31166
  return worker;
30811
31167
  }
30812
- logger7.info("Creating SileroVADInference (main thread)");
31168
+ logger9.info("Creating SileroVADInference (main thread)");
30813
31169
  return new SileroVADInference(config);
30814
31170
  }
30815
31171
  var VADWorkerWithFallback = class {
@@ -30835,7 +31191,7 @@ var VADWorkerWithFallback = class {
30835
31191
  try {
30836
31192
  return await this.implementation.load();
30837
31193
  } catch (error) {
30838
- logger7.warn("Worker load failed, falling back to main thread", {
31194
+ logger9.warn("Worker load failed, falling back to main thread", {
30839
31195
  error: error instanceof Error ? error.message : String(error)
30840
31196
  });
30841
31197
  try {
@@ -30844,7 +31200,7 @@ var VADWorkerWithFallback = class {
30844
31200
  }
30845
31201
  this.implementation = new SileroVADInference(this.config);
30846
31202
  this.hasFallenBack = true;
30847
- logger7.info("Fallback to SileroVADInference successful");
31203
+ logger9.info("Fallback to SileroVADInference successful");
30848
31204
  return await this.implementation.load();
30849
31205
  }
30850
31206
  }
@@ -30866,7 +31222,7 @@ var VADWorkerWithFallback = class {
30866
31222
  };
30867
31223
 
30868
31224
  // src/inference/Emotion2VecInference.ts
30869
- var logger8 = createLogger("Emotion2Vec");
31225
+ var logger10 = createLogger("Emotion2Vec");
30870
31226
  var EMOTION2VEC_LABELS = ["neutral", "happy", "angry", "sad"];
30871
31227
  var Emotion2VecInference = class {
30872
31228
  constructor(config) {
@@ -30908,36 +31264,36 @@ var Emotion2VecInference = class {
30908
31264
  "model.backend_requested": this.config.backend
30909
31265
  });
30910
31266
  try {
30911
- logger8.info("Loading ONNX Runtime...", { preference: this.config.backend });
30912
- const { ort: ort2, backend } = await getOnnxRuntimeForPreference(this.config.backend);
30913
- this.ort = ort2;
31267
+ logger10.info("Loading ONNX Runtime...", { preference: this.config.backend });
31268
+ const { ort, backend } = await getOnnxRuntimeForPreference(this.config.backend);
31269
+ this.ort = ort;
30914
31270
  this._backend = backend;
30915
- logger8.info("ONNX Runtime loaded", { backend: this._backend });
30916
- logger8.info("Checking model cache...");
31271
+ logger10.info("ONNX Runtime loaded", { backend: this._backend });
31272
+ logger10.info("Checking model cache...");
30917
31273
  const cache = getModelCache();
30918
31274
  const modelUrl = this.config.modelUrl;
30919
31275
  const isCached = await cache.has(modelUrl);
30920
- logger8.info("Cache check complete", { modelUrl, isCached });
31276
+ logger10.info("Cache check complete", { modelUrl, isCached });
30921
31277
  let modelBuffer;
30922
31278
  if (isCached) {
30923
- logger8.info("Loading model from cache...", { modelUrl });
31279
+ logger10.info("Loading model from cache...", { modelUrl });
30924
31280
  modelBuffer = await cache.get(modelUrl);
30925
- logger8.info("Model loaded from cache", { size: formatBytes(modelBuffer.byteLength) });
31281
+ logger10.info("Model loaded from cache", { size: formatBytes(modelBuffer.byteLength) });
30926
31282
  } else {
30927
- logger8.info("Fetching model (not cached)...", { modelUrl });
31283
+ logger10.info("Fetching model (not cached)...", { modelUrl });
30928
31284
  modelBuffer = await fetchWithCache(modelUrl);
30929
- logger8.info("Model fetched and cached", { size: formatBytes(modelBuffer.byteLength) });
31285
+ logger10.info("Model fetched and cached", { size: formatBytes(modelBuffer.byteLength) });
30930
31286
  }
30931
- logger8.info("Creating ONNX session (this may take a while for large models)...");
30932
- logger8.debug("Creating ONNX session", {
31287
+ logger10.info("Creating ONNX session (this may take a while for large models)...");
31288
+ logger10.debug("Creating ONNX session", {
30933
31289
  size: formatBytes(modelBuffer.byteLength),
30934
31290
  backend: this._backend
30935
31291
  });
30936
31292
  const sessionOptions = getSessionOptions(this._backend);
30937
31293
  const modelData = new Uint8Array(modelBuffer);
30938
- this.session = await ort2.InferenceSession.create(modelData, sessionOptions);
31294
+ this.session = await ort.InferenceSession.create(modelData, sessionOptions);
30939
31295
  const loadTimeMs = performance.now() - startTime;
30940
- logger8.info("Model loaded successfully", {
31296
+ logger10.info("Model loaded successfully", {
30941
31297
  backend: this._backend,
30942
31298
  loadTimeMs: Math.round(loadTimeMs),
30943
31299
  sampleRate: this.config.sampleRate,
@@ -31049,7 +31405,7 @@ var Emotion2VecInference = class {
31049
31405
  });
31050
31406
  }
31051
31407
  const inferenceTimeMs = performance.now() - startTime;
31052
- logger8.debug("Emotion inference completed", {
31408
+ logger10.debug("Emotion inference completed", {
31053
31409
  numFrames,
31054
31410
  dominant: dominant.emotion,
31055
31411
  confidence: Math.round(dominant.confidence * 100),
@@ -31125,15 +31481,8 @@ var Emotion2VecInference = class {
31125
31481
  */
31126
31482
  Emotion2VecInference.isWebGPUAvailable = isWebGPUAvailable;
31127
31483
 
31128
- // src/inference/ChatterboxTurboInference.ts
31129
- var import_webgpu = __toESM(require("onnxruntime-web/webgpu"));
31130
- init_transformers_web();
31131
- var logger9 = createLogger("ChatterboxTurbo");
31132
- env2.allowLocalModels = true;
31133
- import_webgpu.default.env.wasm.wasmPaths = "https://cdn.jsdelivr.net/npm/onnxruntime-web@1.23.2/dist/";
31134
-
31135
31484
  // src/inference/SafariSpeechRecognition.ts
31136
- var logger10 = createLogger("SafariSpeech");
31485
+ var logger11 = createLogger("SafariSpeech");
31137
31486
  var SafariSpeechRecognition = class _SafariSpeechRecognition {
31138
31487
  constructor(config = {}) {
31139
31488
  this.recognition = null;
@@ -31152,7 +31501,7 @@ var SafariSpeechRecognition = class _SafariSpeechRecognition {
31152
31501
  interimResults: config.interimResults ?? true,
31153
31502
  maxAlternatives: config.maxAlternatives ?? 1
31154
31503
  };
31155
- logger10.debug("SafariSpeechRecognition created", {
31504
+ logger11.debug("SafariSpeechRecognition created", {
31156
31505
  language: this.config.language,
31157
31506
  continuous: this.config.continuous
31158
31507
  });
@@ -31213,7 +31562,7 @@ var SafariSpeechRecognition = class _SafariSpeechRecognition {
31213
31562
  */
31214
31563
  async start() {
31215
31564
  if (this.isListening) {
31216
- logger10.warn("Already listening");
31565
+ logger11.warn("Already listening");
31217
31566
  return;
31218
31567
  }
31219
31568
  if (!_SafariSpeechRecognition.isAvailable()) {
@@ -31243,7 +31592,7 @@ var SafariSpeechRecognition = class _SafariSpeechRecognition {
31243
31592
  this.isListening = true;
31244
31593
  this.startTime = performance.now();
31245
31594
  this.accumulatedText = "";
31246
- logger10.info("Speech recognition started", {
31595
+ logger11.info("Speech recognition started", {
31247
31596
  language: this.config.language
31248
31597
  });
31249
31598
  span?.end();
@@ -31258,7 +31607,7 @@ var SafariSpeechRecognition = class _SafariSpeechRecognition {
31258
31607
  */
31259
31608
  async stop() {
31260
31609
  if (!this.isListening || !this.recognition) {
31261
- logger10.warn("Not currently listening");
31610
+ logger11.warn("Not currently listening");
31262
31611
  return {
31263
31612
  text: this.accumulatedText,
31264
31613
  language: this.config.language,
@@ -31287,7 +31636,7 @@ var SafariSpeechRecognition = class _SafariSpeechRecognition {
31287
31636
  if (this.recognition && this.isListening) {
31288
31637
  this.recognition.abort();
31289
31638
  this.isListening = false;
31290
- logger10.info("Speech recognition aborted");
31639
+ logger11.info("Speech recognition aborted");
31291
31640
  }
31292
31641
  }
31293
31642
  /**
@@ -31318,7 +31667,7 @@ var SafariSpeechRecognition = class _SafariSpeechRecognition {
31318
31667
  this.isListening = false;
31319
31668
  this.resultCallbacks = [];
31320
31669
  this.errorCallbacks = [];
31321
- logger10.debug("SafariSpeechRecognition disposed");
31670
+ logger11.debug("SafariSpeechRecognition disposed");
31322
31671
  }
31323
31672
  /**
31324
31673
  * Set up event handlers for the recognition instance
@@ -31346,7 +31695,7 @@ var SafariSpeechRecognition = class _SafariSpeechRecognition {
31346
31695
  confidence: alternative.confidence
31347
31696
  };
31348
31697
  this.emitResult(speechResult);
31349
- logger10.trace("Speech result", {
31698
+ logger11.trace("Speech result", {
31350
31699
  text: text.substring(0, 50),
31351
31700
  isFinal,
31352
31701
  confidence: alternative.confidence
@@ -31356,12 +31705,12 @@ var SafariSpeechRecognition = class _SafariSpeechRecognition {
31356
31705
  span?.end();
31357
31706
  } catch (error) {
31358
31707
  span?.endWithError(error instanceof Error ? error : new Error(String(error)));
31359
- logger10.error("Error processing speech result", { error });
31708
+ logger11.error("Error processing speech result", { error });
31360
31709
  }
31361
31710
  };
31362
31711
  this.recognition.onerror = (event) => {
31363
31712
  const error = new Error(`Speech recognition error: ${event.error} - ${event.message}`);
31364
- logger10.error("Speech recognition error", { error: event.error, message: event.message });
31713
+ logger11.error("Speech recognition error", { error: event.error, message: event.message });
31365
31714
  this.emitError(error);
31366
31715
  if (this.stopRejecter) {
31367
31716
  this.stopRejecter(error);
@@ -31371,7 +31720,7 @@ var SafariSpeechRecognition = class _SafariSpeechRecognition {
31371
31720
  };
31372
31721
  this.recognition.onend = () => {
31373
31722
  this.isListening = false;
31374
- logger10.info("Speech recognition ended", {
31723
+ logger11.info("Speech recognition ended", {
31375
31724
  totalText: this.accumulatedText.length,
31376
31725
  durationMs: performance.now() - this.startTime
31377
31726
  });
@@ -31388,13 +31737,13 @@ var SafariSpeechRecognition = class _SafariSpeechRecognition {
31388
31737
  }
31389
31738
  };
31390
31739
  this.recognition.onstart = () => {
31391
- logger10.debug("Speech recognition started by browser");
31740
+ logger11.debug("Speech recognition started by browser");
31392
31741
  };
31393
31742
  this.recognition.onspeechstart = () => {
31394
- logger10.debug("Speech detected");
31743
+ logger11.debug("Speech detected");
31395
31744
  };
31396
31745
  this.recognition.onspeechend = () => {
31397
- logger10.debug("Speech ended");
31746
+ logger11.debug("Speech ended");
31398
31747
  };
31399
31748
  }
31400
31749
  /**
@@ -31405,7 +31754,7 @@ var SafariSpeechRecognition = class _SafariSpeechRecognition {
31405
31754
  try {
31406
31755
  callback(result);
31407
31756
  } catch (error) {
31408
- logger10.error("Error in result callback", { error });
31757
+ logger11.error("Error in result callback", { error });
31409
31758
  }
31410
31759
  }
31411
31760
  }
@@ -31417,7 +31766,7 @@ var SafariSpeechRecognition = class _SafariSpeechRecognition {
31417
31766
  try {
31418
31767
  callback(error);
31419
31768
  } catch (callbackError) {
31420
- logger10.error("Error in error callback", { error: callbackError });
31769
+ logger11.error("Error in error callback", { error: callbackError });
31421
31770
  }
31422
31771
  }
31423
31772
  }
@@ -32840,12 +33189,12 @@ async function isHuggingFaceCDNReachable(testUrl = HF_CDN_TEST_URL) {
32840
33189
  }
32841
33190
 
32842
33191
  // src/utils/transformersCacheClear.ts
32843
- var logger11 = createLogger("TransformersCache");
33192
+ var logger12 = createLogger("TransformersCache");
32844
33193
  async function clearTransformersCache(options) {
32845
33194
  const verbose = options?.verbose ?? true;
32846
33195
  const additionalPatterns = options?.additionalPatterns ?? [];
32847
33196
  if (!("caches" in window)) {
32848
- logger11.warn("Cache API not available in this environment");
33197
+ logger12.warn("Cache API not available in this environment");
32849
33198
  return [];
32850
33199
  }
32851
33200
  try {
@@ -32863,18 +33212,18 @@ async function clearTransformersCache(options) {
32863
33212
  );
32864
33213
  if (shouldDelete) {
32865
33214
  if (verbose) {
32866
- logger11.info("Deleting cache", { cacheName });
33215
+ logger12.info("Deleting cache", { cacheName });
32867
33216
  }
32868
33217
  const deleted = await caches.delete(cacheName);
32869
33218
  if (deleted) {
32870
33219
  deletedCaches.push(cacheName);
32871
33220
  } else if (verbose) {
32872
- logger11.warn("Failed to delete cache", { cacheName });
33221
+ logger12.warn("Failed to delete cache", { cacheName });
32873
33222
  }
32874
33223
  }
32875
33224
  }
32876
33225
  if (verbose) {
32877
- logger11.info("Cache clearing complete", {
33226
+ logger12.info("Cache clearing complete", {
32878
33227
  totalCaches: cacheNames.length,
32879
33228
  deletedCount: deletedCaches.length,
32880
33229
  deletedCaches
@@ -32882,35 +33231,35 @@ async function clearTransformersCache(options) {
32882
33231
  }
32883
33232
  return deletedCaches;
32884
33233
  } catch (error) {
32885
- logger11.error("Error clearing caches", { error });
33234
+ logger12.error("Error clearing caches", { error });
32886
33235
  throw error;
32887
33236
  }
32888
33237
  }
32889
33238
  async function clearSpecificCache(cacheName) {
32890
33239
  if (!("caches" in window)) {
32891
- logger11.warn("Cache API not available in this environment");
33240
+ logger12.warn("Cache API not available in this environment");
32892
33241
  return false;
32893
33242
  }
32894
33243
  try {
32895
33244
  const deleted = await caches.delete(cacheName);
32896
- logger11.info("Cache deletion attempt", { cacheName, deleted });
33245
+ logger12.info("Cache deletion attempt", { cacheName, deleted });
32897
33246
  return deleted;
32898
33247
  } catch (error) {
32899
- logger11.error("Error deleting cache", { cacheName, error });
33248
+ logger12.error("Error deleting cache", { cacheName, error });
32900
33249
  return false;
32901
33250
  }
32902
33251
  }
32903
33252
  async function listCaches() {
32904
33253
  if (!("caches" in window)) {
32905
- logger11.warn("Cache API not available in this environment");
33254
+ logger12.warn("Cache API not available in this environment");
32906
33255
  return [];
32907
33256
  }
32908
33257
  try {
32909
33258
  const cacheNames = await caches.keys();
32910
- logger11.debug("Available caches", { cacheNames });
33259
+ logger12.debug("Available caches", { cacheNames });
32911
33260
  return cacheNames;
32912
33261
  } catch (error) {
32913
- logger11.error("Error listing caches", { error });
33262
+ logger12.error("Error listing caches", { error });
32914
33263
  return [];
32915
33264
  }
32916
33265
  }
@@ -32952,7 +33301,7 @@ async function validateCachedResponse(cacheName, requestUrl) {
32952
33301
  reason: valid ? "Valid response" : `Invalid: status=${response.status}, contentType=${contentType}, isHtml=${isHtml || looksLikeHtml}`
32953
33302
  };
32954
33303
  } catch (error) {
32955
- logger11.error("Error validating cached response", { cacheName, requestUrl, error });
33304
+ logger12.error("Error validating cached response", { cacheName, requestUrl, error });
32956
33305
  return {
32957
33306
  exists: false,
32958
33307
  valid: false,
@@ -32989,7 +33338,7 @@ async function scanForInvalidCaches() {
32989
33338
  }
32990
33339
  }
32991
33340
  }
32992
- logger11.info("Cache scan complete", {
33341
+ logger12.info("Cache scan complete", {
32993
33342
  totalCaches: cacheNames.length,
32994
33343
  scannedEntries,
32995
33344
  invalidCount: invalidEntries.length
@@ -33000,13 +33349,13 @@ async function scanForInvalidCaches() {
33000
33349
  invalidEntries
33001
33350
  };
33002
33351
  } catch (error) {
33003
- logger11.error("Error scanning caches", { error });
33352
+ logger12.error("Error scanning caches", { error });
33004
33353
  throw error;
33005
33354
  }
33006
33355
  }
33007
33356
  async function nukeBrowserCaches(preventRecreation = false) {
33008
33357
  if (!("caches" in window)) {
33009
- logger11.warn("Cache API not available in this environment");
33358
+ logger12.warn("Cache API not available in this environment");
33010
33359
  return 0;
33011
33360
  }
33012
33361
  try {
@@ -33018,17 +33367,17 @@ async function nukeBrowserCaches(preventRecreation = false) {
33018
33367
  deletedCount++;
33019
33368
  }
33020
33369
  }
33021
- logger11.info("All browser caches cleared", {
33370
+ logger12.info("All browser caches cleared", {
33022
33371
  totalDeleted: deletedCount
33023
33372
  });
33024
33373
  if (preventRecreation) {
33025
33374
  const { env: env3 } = await Promise.resolve().then(() => (init_transformers_web(), transformers_web_exports));
33026
33375
  env3.useBrowserCache = false;
33027
- logger11.warn("Browser cache creation disabled (env.useBrowserCache = false)");
33376
+ logger12.warn("Browser cache creation disabled (env.useBrowserCache = false)");
33028
33377
  }
33029
33378
  return deletedCount;
33030
33379
  } catch (error) {
33031
- logger11.error("Error nuking caches", { error });
33380
+ logger12.error("Error nuking caches", { error });
33032
33381
  throw error;
33033
33382
  }
33034
33383
  }