PyPI - ipex-llm - Versions diffs - 2.2.0b20250223__py3-none-manylinux2010_x86_64.whl → 2.2.0b20250225__py3-none-manylinux2010_x86_64.whl - Mend

ipex-llm 2.2.0b20250223__py3-none-manylinux2010_x86_64.whl → 2.2.0b20250225__py3-none-manylinux2010_x86_64.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (13) hide show

ipex_llm/transformers/convert.py CHANGED Viewed

@@ -1070,7 +1070,9 @@ def _optimize_pre(model, qtype=None):
         model.apply(pre_register_inv_freq)
     elif model.config.model_type == "multi_modality":
         _optimize_pre(model.language_model)
+    elif model.config.model_type == "deepseek_v3" and model.config.hidden_size == 2048:
+        from ipex_llm.transformers.models.deepseek import padding_mla_v_hd
+        model.apply(padding_mla_v_hd)
     return model
@@ -2023,6 +2025,17 @@ def _optimize_post(model):
         # llm
         _optimize_post(model.language_model)
+    elif model.config.model_type == "deepseek_v3" and model.config.hidden_size == 2048:
+        modeling_module_name = model.__class__.__module__
+        module = importlib.import_module(modeling_module_name)
+        from ipex_llm.transformers.models.common import rms_norm_forward
+        from ipex_llm.transformers.models.deepseek import deepseek_model_forward
+        from ipex_llm.transformers.models.deepseek import deepseek_attention_forward
+        from ipex_llm.transformers.models.deepseek import deepseek_moe_forward
+        convert_forward(model, module.DeepseekV3RMSNorm, rms_norm_forward)
+        convert_forward(model, module.DeepseekV3Model, deepseek_model_forward)
+        convert_forward(model, module.DeepseekV3Attention, deepseek_attention_forward)
+        convert_forward(model, module.DeepseekV3MoE, deepseek_moe_forward)
     return model

ipex_llm/transformers/models/common.py CHANGED Viewed

@@ -95,6 +95,33 @@ def padding_attention_hd_base(module: torch.nn.Module, attention_class,
         module.old_head_dim = old_head_dim
+def padding_mla_v_hd_base(module: torch.nn.Module, attention_class):
+    if (
+        isinstance(attention_class, str) and module.__class__.__name__ == attention_class
+        or not isinstance(attention_class, str) and isinstance(module, attention_class)
+    ):
+        k_head_dim = module.q_head_dim
+        v_head_dim = module.v_head_dim
+        if v_head_dim < k_head_dim:
+            kv_b_proj = module.kv_b_proj
+            w = kv_b_proj.weight.data.view(module.num_heads,
+                                           module.qk_nope_head_dim + module.v_head_dim,
+                                           module.kv_lora_rank)
+            k_w, v_w = w.split([module.qk_nope_head_dim, module.v_head_dim], dim=1)
+            new_v_w = torch.zeros([module.num_heads, k_head_dim, module.kv_lora_rank],
+                                  dtype=v_w.dtype, device=v_w.device)
+            new_v_w[:, :v_head_dim, :] = v_w
+            new_w = torch.cat([k_w, new_v_w], dim=1).view(-1, module.kv_lora_rank)
+            new_kv_b_proj = torch.nn.Linear(0, 0, bias=False,
+                                            dtype=new_w.dtype, device=new_w.device)
+            new_kv_b_proj.in_features = new_w.size(1)
+            new_kv_b_proj.out_features = new_w.size(0)
+            new_kv_b_proj.weight = torch.nn.Parameter(new_w, False)
+            module.kv_b_proj = new_kv_b_proj
 def padding_states_hd(states: torch.Tensor, old_head_dim: int, new_head_dim: int):
     bsz, num_heads, seq_len, head_dim = states.size()
     if head_dim == old_head_dim and old_head_dim < new_head_dim:

ipex_llm/transformers/models/deepseek.py ADDED Viewed

@@ -0,0 +1,303 @@
+#
+# Copyright 2016 The BigDL Authors.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+#
+# Some parts of this file is adapted from
+# https://huggingface.co/deepseek-ai/DeepSeek-R1/blob/main/modeling_deepseek.py
+# which is licensed under Apache License 2.0:
+#
+# https://github.com/OpenBMB/MiniCPM/blob/main/LICENSE
+#
+import torch
+import warnings
+from typing import Optional, Tuple, List, Union
+from transformers.cache_utils import Cache
+from transformers.modeling_outputs import BaseModelOutputWithPast
+from transformers.modeling_attn_mask_utils import _prepare_4d_causal_attention_mask
+from ipex_llm.utils.common.log4Error import invalidInputError
+from ipex_llm.transformers.kv import DynamicNormalCache
+from ipex_llm.transformers.models.common import padding_mla_v_hd_base
+from ipex_llm.transformers.models.common import scaled_dot_product_attention
+from ipex_llm.transformers.models.utils import rotate_half
+def padding_mla_v_hd(module: torch.nn.Module):
+    padding_mla_v_hd_base(module, "DeepseekV3Attention")
+def deepseek_model_forward(
+    self,
+    input_ids: torch.LongTensor = None,
+    attention_mask: Optional[torch.Tensor] = None,
+    position_ids: Optional[torch.LongTensor] = None,
+    past_key_values: Optional[List[torch.FloatTensor]] = None,
+    inputs_embeds: Optional[torch.FloatTensor] = None,
+    use_cache: Optional[bool] = None,
+    output_attentions: Optional[bool] = None,
+    output_hidden_states: Optional[bool] = None,
+    return_dict: Optional[bool] = None,
+) -> Union[Tuple, BaseModelOutputWithPast]:
+    output_attentions = (
+        output_attentions if output_attentions is not None
+        else self.config.output_attentions
+    )
+    output_hidden_states = (
+        output_hidden_states if output_hidden_states is not None
+        else self.config.output_hidden_states
+    )
+    use_cache = use_cache if use_cache is not None else self.config.use_cache
+    return_dict = (
+        return_dict if return_dict is not None else self.config.use_return_dict
+    )
+    # retrieve input_ids and inputs_embeds
+    invalidInputError((input_ids is None) ^ (inputs_embeds is None),
+                      "You cannot specify both input_ids and inputs_embeds at the same time, "
+                      "and must specify either one")
+    if inputs_embeds is None:
+        inputs_embeds = self.embed_tokens(input_ids)
+    batch_size, seq_length = inputs_embeds.shape[:2]
+    # IPEX-LLM OPT start: kv cache
+    past_key_values_length = 0
+    use_cache = True if inputs_embeds.device.type == "xpu" else use_cache
+    if use_cache:
+        if not isinstance(past_key_values, DynamicNormalCache):
+            past_key_values = DynamicNormalCache.from_legacy_cache(past_key_values)
+        past_key_values_length = past_key_values.get_usable_length(seq_length)
+    # IPEX-LLM OPT end: kv cache
+    if position_ids is None:
+        position_ids = torch.arange(
+            past_key_values_length,
+            seq_length + past_key_values_length,
+            dtype=torch.long,
+            device=inputs_embeds.device,
+        )
+        position_ids = position_ids.unsqueeze(0)
+    # IPEX-LLM OPT start: fuse rope
+    if inputs_embeds.device.type == "xpu" and position_ids is not None:
+        cos, sin = self.layers[0].self_attn.rotary_emb(inputs_embeds,
+                                                       seq_length + past_key_values_length)
+        cos = cos[position_ids[0]].contiguous()
+        sin = sin[position_ids[0]].contiguous()
+        position_embeddings = (cos, sin)
+    else:
+        position_embeddings = None
+    # IPEX-LLM OPT end: fuse rope
+    # 4d mask is passed through the layers
+    attention_mask = _prepare_4d_causal_attention_mask(
+        attention_mask,
+        (batch_size, seq_length),
+        inputs_embeds,
+        past_key_values_length,
+    )
+    # embed positions
+    hidden_states = inputs_embeds
+    # decoder layers
+    all_hidden_states = () if output_hidden_states else None
+    all_self_attns = () if output_attentions else None
+    next_decoder_cache = None
+    for decoder_layer in self.layers:
+        if output_hidden_states:
+            all_hidden_states += (hidden_states,)
+        layer_outputs = decoder_layer(
+            hidden_states,
+            attention_mask=attention_mask,
+            position_ids=position_ids,
+            past_key_value=past_key_values,
+            output_attentions=output_attentions,
+            use_cache=use_cache,
+            position_embeddings=position_embeddings,
+        )
+        hidden_states = layer_outputs[0]
+        if use_cache:
+            next_decoder_cache = layer_outputs[2 if output_attentions else 1]
+        if output_attentions:
+            all_self_attns += (layer_outputs[1],)
+    hidden_states = self.norm(hidden_states)
+    # add hidden states from the last decoder layer
+    if output_hidden_states:
+        all_hidden_states += (hidden_states,)
+    next_cache = next_decoder_cache
+    if not return_dict:
+        return tuple(
+            v
+            for v in [hidden_states, next_cache, all_hidden_states, all_self_attns]
+            if v is not None
+        )
+    return BaseModelOutputWithPast(
+        last_hidden_state=hidden_states,
+        past_key_values=next_cache,
+        hidden_states=all_hidden_states,
+        attentions=all_self_attns,
+    )
+def apply_rotary_pos_emb(q, k, cos, sin, position_ids, unsqueeze_dim=1):
+    cos = cos[position_ids].unsqueeze(unsqueeze_dim)
+    sin = sin[position_ids].unsqueeze(unsqueeze_dim)
+    b, h, s, d = q.shape
+    q = q.view(b, h, s, d // 2, 2).transpose(4, 3).reshape(b, h, s, d)
+    b, h, s, d = k.shape
+    k = k.view(b, h, s, d // 2, 2).transpose(4, 3).reshape(b, h, s, d)
+    q_embed = (q * cos) + (rotate_half(q) * sin)
+    k_embed = (k * cos) + (rotate_half(k) * sin)
+    return q_embed, k_embed
+def deepseek_attention_forward(
+    self,
+    hidden_states: torch.Tensor,
+    attention_mask: Optional[torch.Tensor] = None,
+    position_ids: Optional[torch.LongTensor] = None,
+    past_key_value: Optional[Cache] = None,
+    output_attentions: bool = False,
+    use_cache: bool = False,
+    **kwargs,
+) -> Tuple[torch.Tensor, Optional[torch.Tensor], Optional[Tuple[torch.Tensor]]]:
+    if "padding_mask" in kwargs:
+        warnings.warn(
+            "Passing `padding_mask` is deprecated and will be removed in v4.37. "
+            "Please make sure use `attention_mask` instead.`"
+        )
+    bsz, q_len, _ = hidden_states.size()
+    if self.q_lora_rank is None:
+        q = self.q_proj(hidden_states)
+    else:
+        q = self.q_b_proj(self.q_a_layernorm(self.q_a_proj(hidden_states)))
+    q = q.view(bsz, q_len, self.num_heads, self.q_head_dim).transpose(1, 2)
+    compressed_kv = self.kv_a_proj_with_mqa(hidden_states)
+    compressed_kv, k_pe = torch.split(
+        compressed_kv, [self.kv_lora_rank, self.qk_rope_head_dim], dim=-1
+    )
+    k_pe = k_pe.view(bsz, q_len, 1, self.qk_rope_head_dim).transpose(1, 2)
+    kv = (
+        self.kv_b_proj(self.kv_a_layernorm(compressed_kv))
+        .view(bsz, q_len, self.num_heads, self.qk_nope_head_dim + self.q_head_dim)
+        .transpose(1, 2)
+    )
+    k_nope, value_states = torch.split(
+        kv, [self.qk_nope_head_dim, self.q_head_dim], dim=-1
+    )
+    kv_seq_len = value_states.shape[-2]
+    if past_key_value is not None:
+        kv_seq_len += past_key_value.get_usable_length(kv_seq_len, self.layer_idx)
+    position_embeddings = kwargs.get("position_embeddings", None)
+    if position_embeddings is not None:
+        query_states = q
+        key_states = torch.cat(
+            [k_nope, k_pe.expand([-1, self.num_heads, -1, -1])],
+            dim=-1
+        )
+        import xe_addons
+        cos, sin = position_embeddings
+        xe_addons.rotary_two_with_cache_inplaced(query_states[:, :, :, self.qk_nope_head_dim:],
+                                                 key_states[:, :, :, self.qk_nope_head_dim:],
+                                                 cos, sin, True)
+    else:
+        q_nope, q_pe = torch.split(
+            q, [self.qk_nope_head_dim, self.qk_rope_head_dim], dim=-1
+        )
+        cos, sin = self.rotary_emb(value_states, seq_len=kv_seq_len)
+        q_pe, k_pe = apply_rotary_pos_emb(q_pe, k_pe, cos, sin, position_ids)
+        query_states = k_pe.new_empty(bsz, self.num_heads, q_len, self.q_head_dim)
+        query_states[:, :, :, : self.qk_nope_head_dim] = q_nope
+        query_states[:, :, :, self.qk_nope_head_dim:] = q_pe
+        key_states = k_pe.new_empty(bsz, self.num_heads, q_len, self.q_head_dim)
+        key_states[:, :, :, : self.qk_nope_head_dim] = k_nope
+        key_states[:, :, :, self.qk_nope_head_dim:] = k_pe
+    if past_key_value is not None:
+        key_states, value_states = past_key_value.update(key_states, value_states,
+                                                         self.layer_idx, None)
+    attn_weights = None
+    attn_output = scaled_dot_product_attention(
+        query_states, key_states, value_states,
+        attention_mask, q_len == kv_seq_len, self.softmax_scale
+    )
+    attn_output = attn_output[:, :, :, :self.v_head_dim]
+    attn_output = attn_output.transpose(1, 2).contiguous()
+    attn_output = attn_output.reshape(bsz, q_len, self.num_heads * self.v_head_dim)
+    attn_output = self.o_proj(attn_output)
+    if not output_attentions:
+        attn_weights = None
+    return attn_output, attn_weights, past_key_value
+def moe_infer_decode(self, x: torch.Tensor, topk_ids: torch.Tensor, topk_weight: torch.Tensor):
+    idxs = topk_ids.flatten().tolist()
+    outputs = []
+    for i in idxs:
+        expert = self.experts[i]
+        expert_out = expert(x)
+        outputs.append(expert_out)
+    outs = torch.cat(outputs, dim=0)
+    reshaped_topk_weight = topk_weight.squeeze(0).unsqueeze(-1).to(outs.dtype)
+    final_out = (outs * reshaped_topk_weight).sum(dim=0, keepdim=True)
+    return final_out
+def deepseek_moe_forward(self, hidden_states: torch.Tensor):
+    identity = hidden_states
+    orig_shape = hidden_states.shape
+    topk_idx, topk_weight = self.gate(hidden_states)
+    hidden_states = hidden_states.view(-1, hidden_states.shape[-1])
+    flat_topk_idx = topk_idx.view(-1)
+    if not self.training:
+        # IPEX-LLM OPT start : add special moe_infer implementation for decoding
+        if topk_idx.size(0) == 1:
+            y = moe_infer_decode(self, hidden_states, topk_idx, topk_weight)
+        else:
+            y = self.moe_infer(hidden_states, topk_idx, topk_weight)
+        y = y.view(*orig_shape)
+        # IPEX-LLM OPT end
+    if self.config.n_shared_experts is not None:
+        y = y + self.shared_experts(identity)
+    return y

ipex_llm/transformers/models/minicpm3.py CHANGED Viewed

@@ -1,3 +1,25 @@
+#
+# Copyright 2016 The BigDL Authors.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+#
+# Some parts of this file is adapted from
+# https://hf-mirror.com/openbmb/MiniCPM3-4B/blob/main/modeling_minicpm.py
+# which is licensed under Apache License 2.0:
+#
+# https://github.com/OpenBMB/MiniCPM/blob/main/LICENSE
+#
 import torch
 import warnings
@@ -122,9 +144,6 @@ def minicpm3_attention_forward(
     q = self.q_b_proj(self.q_a_layernorm(self.q_a_proj(hidden_states)))
     q = q.view(bsz, q_len, self.num_heads, self.q_head_dim).transpose(1, 2)
-    q_nope, q_pe = torch.split(
-        q, [self.qk_nope_head_dim, self.qk_rope_head_dim], dim=-1
-    )
     compressed_kv = self.kv_a_proj_with_mqa(hidden_states)
     compressed_kv, k_pe = torch.split(
@@ -169,6 +188,9 @@ def minicpm3_attention_forward(
         else:
             invalidInputError(f"unknown rope method: {self.rotary_emb.__class__.__name__}")
     else:
+        q_nope, q_pe = torch.split(
+            q, [self.qk_nope_head_dim, self.qk_rope_head_dim], dim=-1
+        )
         cos, sin = self.rotary_emb(value_states, seq_len=kv_seq_len)
         q_pe, k_pe = apply_rotary_pos_emb(q_pe, k_pe, cos, sin, position_ids)

ipex_llm/transformers/xgrammar.py ADDED Viewed

@@ -0,0 +1,47 @@
+#
+# Copyright 2016 The BigDL Authors.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+#
+from transformers import PreTrainedTokenizer, LogitsProcessor
+from ipex_llm.utils.modules import insert_fake_module
+insert_fake_module("xgrammar.kernels.apply_token_bitmask_inplace_cuda")
+insert_fake_module("xgrammar.kernels.apply_token_bitmask_inplace_triton")
+insert_fake_module(
+    "xgrammar.kernels.apply_token_bitmask_inplace_cuda.apply_token_bitmask_inplace_cuda"
+)
+insert_fake_module(
+    "xgrammar.kernels.apply_token_bitmask_inplace_triton.apply_token_bitmask_inplace_triton"
+)
+import xgrammar as xgr
+def create_json_logits_processor(tokenizer: PreTrainedTokenizer, vocab_size: int, schema=None):
+    tokenizer_info = xgr.TokenizerInfo.from_huggingface(tokenizer, vocab_size=vocab_size)
+    grammar_compiler = xgr.GrammarCompiler(tokenizer_info)
+    if schema is None:
+        compiled_grammar = grammar_compiler.compile_builtin_json_grammar()
+    else:
+        compiled_grammar = grammar_compiler.compile_json_schema(schema)
+    processor = xgr.contrib.hf.LogitsProcessor(compiled_grammar)
+    return processor
+def reset_json_logits_processor(processor: LogitsProcessor) -> LogitsProcessor:
+    compiled_grammar = processor.compiled_grammar
+    new_processor = xgr.contrib.hf.LogitsProcessor(compiled_grammar)
+    return new_processor

{ipex_llm-2.2.0b20250223.dist-info → ipex_llm-2.2.0b20250225.dist-info}/METADATA RENAMED Viewed

@@ -1,6 +1,6 @@
 Metadata-Version: 2.1
 Name: ipex-llm
-Version: 2.2.0b20250223
+Version: 2.2.0b20250225
 Summary: Large Language Model Develop Toolkit
 Home-page: https://github.com/intel-analytics/ipex-llm
 Author: BigDL Authors
@@ -27,7 +27,7 @@ Requires-Dist: intel-openmp ; (platform_machine == "x86_64" or platform_machine
 Requires-Dist: torch ==2.1.2+cpu ; (platform_system == "Linux") and extra == 'all'
 Requires-Dist: torch ==2.1.2 ; (platform_system == "Windows") and extra == 'all'
 Provides-Extra: cpp
-Requires-Dist: bigdl-core-cpp ==2.6.0b20250223 ; extra == 'cpp'
+Requires-Dist: bigdl-core-cpp ==2.6.0b20250225 ; extra == 'cpp'
 Requires-Dist: setuptools ; extra == 'cpp'
 Requires-Dist: onednn-devel ==2025.0.1 ; (platform_system == "Windows") and extra == 'cpp'
 Requires-Dist: onednn ==2025.0.1 ; (platform_system == "Windows") and extra == 'cpp'
@@ -60,7 +60,7 @@ Requires-Dist: transformers ==4.40.0 ; extra == 'npu'
 Requires-Dist: intel-openmp ; (platform_machine == "x86_64" or platform_machine == "AMD64") and extra == 'npu'
 Requires-Dist: torch ==2.1.2+cpu ; (platform_system == "Linux") and extra == 'npu'
 Requires-Dist: torch ==2.1.2 ; (platform_system == "Windows") and extra == 'npu'
-Requires-Dist: bigdl-core-npu ==2.6.0b20250223 ; (platform_system == "Windows") and extra == 'npu'
+Requires-Dist: bigdl-core-npu ==2.6.0b20250225 ; (platform_system == "Windows") and extra == 'npu'
 Provides-Extra: serving
 Requires-Dist: py-cpuinfo ; extra == 'serving'
 Requires-Dist: fschat[model_worker,webui] ==0.2.36 ; extra == 'serving'
@@ -80,9 +80,9 @@ Requires-Dist: setuptools <70.0.0 ; extra == 'xpu'
 Requires-Dist: torch ==2.1.0a0 ; extra == 'xpu'
 Requires-Dist: torchvision ==0.16.0a0 ; extra == 'xpu'
 Requires-Dist: intel-extension-for-pytorch ==2.1.10+xpu ; extra == 'xpu'
-Requires-Dist: bigdl-core-xe-21 ==2.6.0b20250223 ; extra == 'xpu'
-Requires-Dist: bigdl-core-xe-batch-21 ==2.6.0b20250223 ; extra == 'xpu'
-Requires-Dist: bigdl-core-xe-addons-21 ==2.6.0b20250223 ; extra == 'xpu'
+Requires-Dist: bigdl-core-xe-21 ==2.6.0b20250225 ; extra == 'xpu'
+Requires-Dist: bigdl-core-xe-batch-21 ==2.6.0b20250225 ; extra == 'xpu'
+Requires-Dist: bigdl-core-xe-addons-21 ==2.6.0b20250225 ; extra == 'xpu'
 Provides-Extra: xpu-2-1
 Requires-Dist: py-cpuinfo ; extra == 'xpu-2-1'
 Requires-Dist: protobuf ; extra == 'xpu-2-1'
@@ -97,9 +97,9 @@ Requires-Dist: setuptools <70.0.0 ; extra == 'xpu-2-1'
 Requires-Dist: torch ==2.1.0a0 ; extra == 'xpu-2-1'
 Requires-Dist: torchvision ==0.16.0a0 ; extra == 'xpu-2-1'
 Requires-Dist: intel-extension-for-pytorch ==2.1.10+xpu ; extra == 'xpu-2-1'
-Requires-Dist: bigdl-core-xe-21 ==2.6.0b20250223 ; extra == 'xpu-2-1'
-Requires-Dist: bigdl-core-xe-batch-21 ==2.6.0b20250223 ; extra == 'xpu-2-1'
-Requires-Dist: bigdl-core-xe-addons-21 ==2.6.0b20250223 ; extra == 'xpu-2-1'
+Requires-Dist: bigdl-core-xe-21 ==2.6.0b20250225 ; extra == 'xpu-2-1'
+Requires-Dist: bigdl-core-xe-batch-21 ==2.6.0b20250225 ; extra == 'xpu-2-1'
+Requires-Dist: bigdl-core-xe-addons-21 ==2.6.0b20250225 ; extra == 'xpu-2-1'
 Requires-Dist: intel-openmp ; (platform_machine == "x86_64" or platform_machine == "AMD64") and extra == 'xpu-2-1'
 Requires-Dist: dpcpp-cpp-rt ==2024.0.2 ; (platform_system == "Windows") and extra == 'xpu-2-1'
 Requires-Dist: mkl-dpcpp ==2024.0.0 ; (platform_system == "Windows") and extra == 'xpu-2-1'
@@ -117,7 +117,7 @@ Requires-Dist: setuptools ; extra == 'xpu-2-6'
 Requires-Dist: torch ==2.6.0+xpu ; extra == 'xpu-2-6'
 Requires-Dist: torchvision ==0.21.0+xpu ; extra == 'xpu-2-6'
 Requires-Dist: torchaudio ==2.6.0+xpu ; extra == 'xpu-2-6'
-Requires-Dist: bigdl-core-xe-all ==2.6.0b20250223 ; extra == 'xpu-2-6'
+Requires-Dist: bigdl-core-xe-all ==2.6.0b20250225 ; extra == 'xpu-2-6'
 Requires-Dist: onednn-devel ==2025.0.1 ; extra == 'xpu-2-6'
 Requires-Dist: onednn ==2025.0.1 ; extra == 'xpu-2-6'
 Requires-Dist: dpcpp-cpp-rt ==2025.0.2 ; extra == 'xpu-2-6'
@@ -133,9 +133,9 @@ Requires-Dist: tokenizers ==0.15.2 ; extra == 'xpu-arc'
 Requires-Dist: accelerate ==0.23.0 ; extra == 'xpu-arc'
 Requires-Dist: tabulate ; extra == 'xpu-arc'
 Requires-Dist: setuptools ; extra == 'xpu-arc'
-Requires-Dist: bigdl-core-xe-23 ==2.6.0b20250223 ; extra == 'xpu-arc'
-Requires-Dist: bigdl-core-xe-batch-23 ==2.6.0b20250223 ; extra == 'xpu-arc'
-Requires-Dist: bigdl-core-xe-addons-23 ==2.6.0b20250223 ; extra == 'xpu-arc'
+Requires-Dist: bigdl-core-xe-23 ==2.6.0b20250225 ; extra == 'xpu-arc'
+Requires-Dist: bigdl-core-xe-batch-23 ==2.6.0b20250225 ; extra == 'xpu-arc'
+Requires-Dist: bigdl-core-xe-addons-23 ==2.6.0b20250225 ; extra == 'xpu-arc'
 Requires-Dist: intel-openmp ; (platform_machine == "x86_64" or platform_machine == "AMD64") and extra == 'xpu-arc'
 Requires-Dist: torch ==2.3.1+cxx11.abi ; (platform_system == "Linux") and extra == 'xpu-arc'
 Requires-Dist: torchvision ==0.18.1+cxx11.abi ; (platform_system == "Linux") and extra == 'xpu-arc'
@@ -156,9 +156,9 @@ Requires-Dist: tokenizers ==0.15.2 ; extra == 'xpu-arl'
 Requires-Dist: accelerate ==0.23.0 ; extra == 'xpu-arl'
 Requires-Dist: tabulate ; extra == 'xpu-arl'
 Requires-Dist: setuptools ; extra == 'xpu-arl'
-Requires-Dist: bigdl-core-xe-23 ==2.6.0b20250223 ; extra == 'xpu-arl'
-Requires-Dist: bigdl-core-xe-batch-23 ==2.6.0b20250223 ; extra == 'xpu-arl'
-Requires-Dist: bigdl-core-xe-addons-23 ==2.6.0b20250223 ; extra == 'xpu-arl'
+Requires-Dist: bigdl-core-xe-23 ==2.6.0b20250225 ; extra == 'xpu-arl'
+Requires-Dist: bigdl-core-xe-batch-23 ==2.6.0b20250225 ; extra == 'xpu-arl'
+Requires-Dist: bigdl-core-xe-addons-23 ==2.6.0b20250225 ; extra == 'xpu-arl'
 Requires-Dist: intel-openmp ; (platform_machine == "x86_64" or platform_machine == "AMD64") and extra == 'xpu-arl'
 Requires-Dist: torch ==2.3.1+cxx11.abi ; (platform_system == "Linux") and extra == 'xpu-arl'
 Requires-Dist: torchvision ==0.18.1+cxx11.abi ; (platform_system == "Linux") and extra == 'xpu-arl'
@@ -179,9 +179,9 @@ Requires-Dist: tokenizers ==0.15.2 ; extra == 'xpu-lnl'
 Requires-Dist: accelerate ==0.23.0 ; extra == 'xpu-lnl'
 Requires-Dist: tabulate ; extra == 'xpu-lnl'
 Requires-Dist: setuptools ; extra == 'xpu-lnl'
-Requires-Dist: bigdl-core-xe-23 ==2.6.0b20250223 ; extra == 'xpu-lnl'
-Requires-Dist: bigdl-core-xe-batch-23 ==2.6.0b20250223 ; extra == 'xpu-lnl'
-Requires-Dist: bigdl-core-xe-addons-23 ==2.6.0b20250223 ; extra == 'xpu-lnl'
+Requires-Dist: bigdl-core-xe-23 ==2.6.0b20250225 ; extra == 'xpu-lnl'
+Requires-Dist: bigdl-core-xe-batch-23 ==2.6.0b20250225 ; extra == 'xpu-lnl'
+Requires-Dist: bigdl-core-xe-addons-23 ==2.6.0b20250225 ; extra == 'xpu-lnl'
 Requires-Dist: intel-openmp ; (platform_machine == "x86_64" or platform_machine == "AMD64") and extra == 'xpu-lnl'
 Requires-Dist: torch ==2.3.1+cxx11.abi ; (platform_system == "Linux") and extra == 'xpu-lnl'
 Requires-Dist: torchvision ==0.18.1+cxx11.abi ; (platform_system == "Linux") and extra == 'xpu-lnl'

{ipex_llm-2.2.0b20250223.dist-info → ipex_llm-2.2.0b20250225.dist-info}/RECORD RENAMED Viewed

@@ -94,7 +94,7 @@ ipex_llm/serving/fastchat/tgi_api_protocol.py,sha256=brT3k3-V0NJrU4fRqUwWjC0O3iO
 ipex_llm/serving/fastchat/tgi_api_server.py,sha256=agNTAEiZPSuj3dEdIdYKwkoY0cXOUDX06DiM9VP2knQ,24418
 ipex_llm/serving/fastchat/vllm_worker.py,sha256=ZLz2Q9GxJO6r_LOiP6epgCRjBGk-K4EB1SNEWSJp5DA,11091
 ipex_llm/transformers/__init__.py,sha256=BreA3EY6hkNq0rVixb_sUuTLzMrcWXTt3yRsshCPHHQ,1214
-ipex_llm/transformers/convert.py,sha256=--X5moNSvtV8Mtzg2fh9v1Ej1iyYyocQwo5pHlhkPqo,102230
+ipex_llm/transformers/convert.py,sha256=294hk2uMQPN0DrPIpqhfgFvR7klvlHKS53DUUhoWaeU,103273
 ipex_llm/transformers/convert_ipex.py,sha256=_nSnUTQy-yfkKaqGdqnBdWztZf3NGmnbZ0TKaDrF4X4,14617
 ipex_llm/transformers/embedding.py,sha256=bdgk59DvD4ZZyxRzewXOR7g56nThgO6uhIwk8QL7f-s,9299
 ipex_llm/transformers/kv.py,sha256=k4TU18LlA-Sbq9WNNQnfuzu3RSFBwFhmaV3BcGN5bAo,19191
@@ -113,6 +113,7 @@ ipex_llm/transformers/speculative.py,sha256=0XNLgc9dGswJHVPrXo4iM7pPxkWwfFfJMECc
 ipex_llm/transformers/streamer.py,sha256=RrVlLblzCOtABRUpaMXAyaMnCGgLUtAi_YesLumRbww,4842
 ipex_llm/transformers/training_patch.py,sha256=oxMkUtqyvqJiprw6dE3skkYfD1HOmUlH9N0hBkbn0G0,10799
 ipex_llm/transformers/utils.py,sha256=a-2wbflSd_yYnC5qcMoY5HLR1yT_QpxeX_WpGpaDLrA,17457
+ipex_llm/transformers/xgrammar.py,sha256=dd-e0DO0s-t-idngVzncnPAa_Gxb8YNoRJ3WROkwrs8,1840
 ipex_llm/transformers/xpu_customize_fwd.py,sha256=PUBYLnTbaBXUs3Dnte9Gqln2XFk8iA62SmloWjr7GJI,7668
 ipex_llm/transformers/xpu_ops.py,sha256=z95iTtcDQvNyJOvB4A6B_ECTYjHp4A7x-FsssoETOMs,4914
 ipex_llm/transformers/awq/__init__.py,sha256=Du5gu3-eeAkeDO_dEMBTzrDBA66DSN3uL3-rn8WGXQw,875
@@ -152,8 +153,9 @@ ipex_llm/transformers/models/chatglm.py,sha256=UHai1t2AUtGmF765_eHF8LUMVQzp_oCBx
 ipex_llm/transformers/models/chatglm2.py,sha256=KyAIX7zGVQDQuwwM3QMBNWZbTeMHEzKUIgAryT0voHc,14933
 ipex_llm/transformers/models/chatglm4.py,sha256=QvUehdaCePB3MNHyWg3dneDxmjtBdxYeKUyQUVcsgfM,16886
 ipex_llm/transformers/models/chatglm4v.py,sha256=L6y45M_wjS2_HqchmCUxRlQZUNuSNCGOiynAQrGh918,14124
-ipex_llm/transformers/models/common.py,sha256=VKouwfP3q7nQkYnDeDYfjLwKgr_Qk9uc3ZCISaxbfn4,11646
+ipex_llm/transformers/models/common.py,sha256=0OTRaXekOPApRdQ8UKl5Du8DOtKJ6awnQIStvYvFQOI,13018
 ipex_llm/transformers/models/decilm.py,sha256=P-PBuDPf07GvKggLwJx_wPwIn6esN3rX8ai2JxRuZmE,5246
+ipex_llm/transformers/models/deepseek.py,sha256=2w2bWbbuYi__fPs56vE9Wq5bdiZCF2NkYJNXf-b9LjQ,11130
 ipex_llm/transformers/models/deepseek_v3.py,sha256=CTgwIKQlUPlUCbOxc9Id5GapWkXOP6pMtkguYrWpCio,10003
 ipex_llm/transformers/models/gemma.py,sha256=_E3Yw8Y45xyNVeLqyVKcpr8kjuICtETeL82cJ-bWJuU,9424
 ipex_llm/transformers/models/gemma2.py,sha256=2WZuv-FLzJyTJFaYxOuzJt47QE64M0lHnzAiO5T6ozI,8049
@@ -166,7 +168,7 @@ ipex_llm/transformers/models/internvl.py,sha256=Vx0vENIEQLX2M6P398mw5TOhpks0U8xf
 ipex_llm/transformers/models/janus.py,sha256=0URo2NC8_2CGaOl3CiVB3IFTVsYyplMFgjBJdPDNBsY,1509
 ipex_llm/transformers/models/llama.py,sha256=rqrNjuZb_jeb9MKx0z-FSVoGx8YDBxQzPJ9ZUvYhgx0,9138
 ipex_llm/transformers/models/minicpm.py,sha256=eaPNVNrep0_xGoELhZd886ff0ceoKqB6cusdAhd52eE,10145
-ipex_llm/transformers/models/minicpm3.py,sha256=11cYl8KM2hoIJNMAOZMxiwCu6dMhup9ric_OEn8-VrQ,9363
+ipex_llm/transformers/models/minicpm3.py,sha256=37P_yMjw8RIzy27qL_E7kzbQRNW6f0xYQNK9xtoe5kI,10183
 ipex_llm/transformers/models/minicpmv.py,sha256=PP05b5iTnrMpiseCn8iJcxKJDnfq7WqXp9Mrch0kKZ0,9876
 ipex_llm/transformers/models/mistral.py,sha256=uVhkdXaq15v1P3QY0emVsA7SxUbAWChHEEXYN-drjpQ,7449
 ipex_llm/transformers/models/mllama.py,sha256=ZyRq9DTKsvk1AlRbr-z6ngjS3Sr_7YuGZ6-Yr1MBBAM,10937
@@ -260,11 +262,11 @@ ipex_llm/vllm/xpu/engine/__init__.py,sha256=pY_CpyuZd72fr6s32ejeKHKFW0K4vUU2rzZj
 ipex_llm/vllm/xpu/engine/engine.py,sha256=NvCMbp0X8NVrOqbwm4FTvXOptTRLzu9jQsy37ZHnTk8,9493
 ipex_llm/vllm/xpu/entrypoints/openai/api_server.py,sha256=D577nxWlyoWaHXNXIEvS3ViKSSWL3XZq8D8t6izD7x4,33250
 ipex_llm/vllm/xpu/entrypoints/openai/cli_args.py,sha256=hB398yYtKauASRzevctScdbFIjiiSGMAe1bwEuIHrhY,10893
-ipex_llm-2.2.0b20250223.data/scripts/ipex-llm-init,sha256=fLQsT2dRL6H5bThb4GuIWotAuqoLsIxFwA-0c2qmaO8,6672
-ipex_llm-2.2.0b20250223.data/scripts/llm-chat,sha256=TdUnUmNapzuoe1c8IzrdVOQwWEg8IqsMSBRlOD3daZM,2249
-ipex_llm-2.2.0b20250223.data/scripts/llm-cli,sha256=RXGPlLElHxcKzoUxljEMBIAXbzCDysXL-Nxw-xF-7LU,2457
-ipex_llm-2.2.0b20250223.dist-info/METADATA,sha256=cD2rMxvz0NtJa5GHFRIrjjpe2SutmWllkgqGNSqrGOs,12369
-ipex_llm-2.2.0b20250223.dist-info/WHEEL,sha256=PPJcBMAZibF_2GFE9NmOJGqiaSMPiNFbJd6QaJjdA6Y,109
-ipex_llm-2.2.0b20250223.dist-info/entry_points.txt,sha256=TiUyBB2MRmfF3ko-pyAEzqeBCRnyhu27bNOAsWPp3e8,61
-ipex_llm-2.2.0b20250223.dist-info/top_level.txt,sha256=CGCMHM-SyqUabU4h8RqJ2KTYckQUO3LvIWwmUQ6Qbzw,9
-ipex_llm-2.2.0b20250223.dist-info/RECORD,,
+ipex_llm-2.2.0b20250225.data/scripts/ipex-llm-init,sha256=fLQsT2dRL6H5bThb4GuIWotAuqoLsIxFwA-0c2qmaO8,6672
+ipex_llm-2.2.0b20250225.data/scripts/llm-chat,sha256=TdUnUmNapzuoe1c8IzrdVOQwWEg8IqsMSBRlOD3daZM,2249
+ipex_llm-2.2.0b20250225.data/scripts/llm-cli,sha256=RXGPlLElHxcKzoUxljEMBIAXbzCDysXL-Nxw-xF-7LU,2457
+ipex_llm-2.2.0b20250225.dist-info/METADATA,sha256=3_EbaWHFTUZ3JXASEqCh8-KfRdJ-s0TRsdOk6L2-Fyo,12369
+ipex_llm-2.2.0b20250225.dist-info/WHEEL,sha256=PPJcBMAZibF_2GFE9NmOJGqiaSMPiNFbJd6QaJjdA6Y,109
+ipex_llm-2.2.0b20250225.dist-info/entry_points.txt,sha256=TiUyBB2MRmfF3ko-pyAEzqeBCRnyhu27bNOAsWPp3e8,61
+ipex_llm-2.2.0b20250225.dist-info/top_level.txt,sha256=CGCMHM-SyqUabU4h8RqJ2KTYckQUO3LvIWwmUQ6Qbzw,9
+ipex_llm-2.2.0b20250225.dist-info/RECORD,,

{ipex_llm-2.2.0b20250223.data → ipex_llm-2.2.0b20250225.data}/scripts/ipex-llm-init RENAMED Viewed

File without changes

{ipex_llm-2.2.0b20250223.data → ipex_llm-2.2.0b20250225.data}/scripts/llm-chat RENAMED Viewed

File without changes

{ipex_llm-2.2.0b20250223.data → ipex_llm-2.2.0b20250225.data}/scripts/llm-cli RENAMED Viewed

File without changes

{ipex_llm-2.2.0b20250223.dist-info → ipex_llm-2.2.0b20250225.dist-info}/WHEEL RENAMED Viewed

File without changes

{ipex_llm-2.2.0b20250223.dist-info → ipex_llm-2.2.0b20250225.dist-info}/entry_points.txt RENAMED Viewed

File without changes

{ipex_llm-2.2.0b20250223.dist-info → ipex_llm-2.2.0b20250225.dist-info}/top_level.txt RENAMED Viewed

File without changes