PyPI - ipex-llm - Versions diffs - 2.2.0b20250227__py3-none-manylinux2010_x86_64.whl → 2.2.0b20250301__py3-none-manylinux2010_x86_64.whl - Mend

ipex-llm 2.2.0b20250227__py3-none-manylinux2010_x86_64.whl → 2.2.0b20250301__py3-none-manylinux2010_x86_64.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (10) hide show

ipex_llm/transformers/models/chatglm4v.py CHANGED Viewed

@@ -19,7 +19,7 @@
 import torch
 from typing import Optional, Tuple, Union
-from ipex_llm.transformers.models.common import merge_qkv_base
+from ipex_llm.transformers.models.common import merge_qkv_base, padding_qkv_hd
 from ipex_llm.transformers.models.common import scaled_dot_product_attention
 from ipex_llm.transformers.models.utils import update_past_key_value
 from ipex_llm.transformers.models.utils import use_quantize_kv_cache, use_sdp
@@ -265,26 +265,18 @@ def visual_attention_forward(self, x: "tensor(B, L, D)") -> "tensor(B, L, D)":
     q, k, v = qkv[0], qkv[1], qkv[2]
     bsz, q_len, kv_seq_len, head_dim = q.shape
-    if use_sdp(q_len, kv_seq_len, head_dim, q):
-        import xe_addons
-        out = xe_addons.sdp(q, k, v, None)
-    elif q.device.type == "cpu":
-        out = torch.nn.functional.scaled_dot_product_attention(q, k, v,
-                                                               attn_mask=None,
-                                                               dropout_p=0.,
-                                                               is_causal=False)
-    else:
-        attn_weights = torch.matmul(q / math.sqrt(head_dim),
-                                    k.transpose(2, 3)).to(v.dtype)
-        if kv_seq_len >= 2048 or bsz >= 64:
-            # for memory considerations, do not upcast attention to fp32
-            # for long sequences or large batches
-            attn_weights = torch.nn.functional.softmax(attn_weights, dim=-1)
-        else:
-            # upcast attention to fp32
-            attn_weights = torch.nn.functional.softmax(attn_weights, dim=-1,
-                                                       dtype=torch.float32).to(v.dtype)
-        out = torch.matmul(attn_weights, v)
+    q, k, v = padding_qkv_hd(
+        q, k, v,
+        head_dim, 128
+    )
+    attn_weights = None
+    attn_output = scaled_dot_product_attention(
+        q, k.contiguous(), v.contiguous(),
+        None, False, 1 / math.sqrt(head_dim)
+    )
+    out = attn_output[:, :, :, :head_dim]
     output = self.dense(out.transpose(1, 2).reshape(B, L, -1))
     output = self.output_dropout(output)
     return output

ipex_llm/transformers/models/deepseek.py CHANGED Viewed

@@ -271,6 +271,25 @@ def deepseek_attention_forward(
     return attn_output, attn_weights, past_key_value
+def fuse_gate_forward(self, x: torch.Tensor):
+    if x.device.type == "xpu" and x.dtype in [torch.float, torch.half]:
+        x = x.view(-1, x.size(-1))
+        logits = torch.nn.functional.linear(
+            x.type(torch.float32), self.weight.type(torch.float32), None
+        )
+        scores = logits.sigmoid()
+        import xe_addons
+        topk_idx, topk_weight = xe_addons.moe_group_topk(
+            scores, self.e_score_correction_bias,
+            self.n_group, 2, self.topk_group, self.top_k,
+            self.top_k > 1 and self.norm_topk_prob, 1e-20, self.routed_scaling_factor
+        )
+    else:
+        topk_idx, topk_weight = self(x)
+    return topk_idx, topk_weight.to(x.dtype)
 def moe_infer_decode(self, x: torch.Tensor, topk_ids: torch.Tensor, topk_weight: torch.Tensor):
     if (
         x.device.type == "xpu"
@@ -301,7 +320,7 @@ def moe_infer_decode(self, x: torch.Tensor, topk_ids: torch.Tensor, topk_weight:
             expert_out = expert(x)
             outputs.append(expert_out)
         outs = torch.cat(outputs, dim=0)
-        reshaped_topk_weight = topk_weight.squeeze(0).unsqueeze(-1).to(outs.dtype)
+        reshaped_topk_weight = topk_weight.squeeze(0).unsqueeze(-1)
         final_out = (outs * reshaped_topk_weight).sum(dim=0, keepdim=True)
     return final_out
@@ -309,11 +328,13 @@ def moe_infer_decode(self, x: torch.Tensor, topk_ids: torch.Tensor, topk_weight:
 def deepseek_moe_forward(self, hidden_states: torch.Tensor):
     identity = hidden_states
     orig_shape = hidden_states.shape
-    topk_idx, topk_weight = self.gate(hidden_states)
+    # IPEX-LLM OPT start: fuse grouped topk in gate forward
+    topk_idx, topk_weight = fuse_gate_forward(self.gate, hidden_states)
+    # IPEX-LLM OPT end
     hidden_states = hidden_states.view(-1, hidden_states.shape[-1])
     flat_topk_idx = topk_idx.view(-1)
     if not self.training:
-        # IPEX-LLM OPT start : add special moe_infer implementation for decoding
+        # IPEX-LLM OPT start: add special moe_infer implementation for decoding
         if topk_idx.size(0) == 1 and self.ep_size == 1:
             y = moe_infer_decode(self, hidden_states, topk_idx, topk_weight)
         else:

{ipex_llm-2.2.0b20250227.dist-info → ipex_llm-2.2.0b20250301.dist-info}/METADATA RENAMED Viewed

@@ -1,6 +1,6 @@
 Metadata-Version: 2.1
 Name: ipex-llm
-Version: 2.2.0b20250227
+Version: 2.2.0b20250301
 Summary: Large Language Model Develop Toolkit
 Home-page: https://github.com/intel-analytics/ipex-llm
 Author: BigDL Authors
@@ -27,7 +27,7 @@ Requires-Dist: intel-openmp ; (platform_machine == "x86_64" or platform_machine
 Requires-Dist: torch ==2.1.2+cpu ; (platform_system == "Linux") and extra == 'all'
 Requires-Dist: torch ==2.1.2 ; (platform_system == "Windows") and extra == 'all'
 Provides-Extra: cpp
-Requires-Dist: bigdl-core-cpp ==2.6.0b20250227 ; extra == 'cpp'
+Requires-Dist: bigdl-core-cpp ==2.6.0b20250301 ; extra == 'cpp'
 Requires-Dist: setuptools ; extra == 'cpp'
 Requires-Dist: onednn-devel ==2025.0.1 ; (platform_system == "Windows") and extra == 'cpp'
 Requires-Dist: onednn ==2025.0.1 ; (platform_system == "Windows") and extra == 'cpp'
@@ -60,7 +60,7 @@ Requires-Dist: transformers ==4.40.0 ; extra == 'npu'
 Requires-Dist: intel-openmp ; (platform_machine == "x86_64" or platform_machine == "AMD64") and extra == 'npu'
 Requires-Dist: torch ==2.1.2+cpu ; (platform_system == "Linux") and extra == 'npu'
 Requires-Dist: torch ==2.1.2 ; (platform_system == "Windows") and extra == 'npu'
-Requires-Dist: bigdl-core-npu ==2.6.0b20250227 ; (platform_system == "Windows") and extra == 'npu'
+Requires-Dist: bigdl-core-npu ==2.6.0b20250301 ; (platform_system == "Windows") and extra == 'npu'
 Provides-Extra: serving
 Requires-Dist: py-cpuinfo ; extra == 'serving'
 Requires-Dist: fschat[model_worker,webui] ==0.2.36 ; extra == 'serving'
@@ -80,9 +80,9 @@ Requires-Dist: setuptools <70.0.0 ; extra == 'xpu'
 Requires-Dist: torch ==2.1.0a0 ; extra == 'xpu'
 Requires-Dist: torchvision ==0.16.0a0 ; extra == 'xpu'
 Requires-Dist: intel-extension-for-pytorch ==2.1.10+xpu ; extra == 'xpu'
-Requires-Dist: bigdl-core-xe-21 ==2.6.0b20250227 ; extra == 'xpu'
-Requires-Dist: bigdl-core-xe-batch-21 ==2.6.0b20250227 ; extra == 'xpu'
-Requires-Dist: bigdl-core-xe-addons-21 ==2.6.0b20250227 ; extra == 'xpu'
+Requires-Dist: bigdl-core-xe-21 ==2.6.0b20250301 ; extra == 'xpu'
+Requires-Dist: bigdl-core-xe-batch-21 ==2.6.0b20250301 ; extra == 'xpu'
+Requires-Dist: bigdl-core-xe-addons-21 ==2.6.0b20250301 ; extra == 'xpu'
 Provides-Extra: xpu-2-1
 Requires-Dist: py-cpuinfo ; extra == 'xpu-2-1'
 Requires-Dist: protobuf ; extra == 'xpu-2-1'
@@ -97,9 +97,9 @@ Requires-Dist: setuptools <70.0.0 ; extra == 'xpu-2-1'
 Requires-Dist: torch ==2.1.0a0 ; extra == 'xpu-2-1'
 Requires-Dist: torchvision ==0.16.0a0 ; extra == 'xpu-2-1'
 Requires-Dist: intel-extension-for-pytorch ==2.1.10+xpu ; extra == 'xpu-2-1'
-Requires-Dist: bigdl-core-xe-21 ==2.6.0b20250227 ; extra == 'xpu-2-1'
-Requires-Dist: bigdl-core-xe-batch-21 ==2.6.0b20250227 ; extra == 'xpu-2-1'
-Requires-Dist: bigdl-core-xe-addons-21 ==2.6.0b20250227 ; extra == 'xpu-2-1'
+Requires-Dist: bigdl-core-xe-21 ==2.6.0b20250301 ; extra == 'xpu-2-1'
+Requires-Dist: bigdl-core-xe-batch-21 ==2.6.0b20250301 ; extra == 'xpu-2-1'
+Requires-Dist: bigdl-core-xe-addons-21 ==2.6.0b20250301 ; extra == 'xpu-2-1'
 Requires-Dist: intel-openmp ; (platform_machine == "x86_64" or platform_machine == "AMD64") and extra == 'xpu-2-1'
 Requires-Dist: dpcpp-cpp-rt ==2024.0.2 ; (platform_system == "Windows") and extra == 'xpu-2-1'
 Requires-Dist: mkl-dpcpp ==2024.0.0 ; (platform_system == "Windows") and extra == 'xpu-2-1'
@@ -117,7 +117,7 @@ Requires-Dist: setuptools ; extra == 'xpu-2-6'
 Requires-Dist: torch ==2.6.0+xpu ; extra == 'xpu-2-6'
 Requires-Dist: torchvision ==0.21.0+xpu ; extra == 'xpu-2-6'
 Requires-Dist: torchaudio ==2.6.0+xpu ; extra == 'xpu-2-6'
-Requires-Dist: bigdl-core-xe-all ==2.6.0b20250227 ; extra == 'xpu-2-6'
+Requires-Dist: bigdl-core-xe-all ==2.6.0b20250301 ; extra == 'xpu-2-6'
 Requires-Dist: onednn-devel ==2025.0.1 ; extra == 'xpu-2-6'
 Requires-Dist: onednn ==2025.0.1 ; extra == 'xpu-2-6'
 Requires-Dist: dpcpp-cpp-rt ==2025.0.2 ; extra == 'xpu-2-6'
@@ -133,9 +133,9 @@ Requires-Dist: tokenizers ==0.15.2 ; extra == 'xpu-arc'
 Requires-Dist: accelerate ==0.23.0 ; extra == 'xpu-arc'
 Requires-Dist: tabulate ; extra == 'xpu-arc'
 Requires-Dist: setuptools ; extra == 'xpu-arc'
-Requires-Dist: bigdl-core-xe-23 ==2.6.0b20250227 ; extra == 'xpu-arc'
-Requires-Dist: bigdl-core-xe-batch-23 ==2.6.0b20250227 ; extra == 'xpu-arc'
-Requires-Dist: bigdl-core-xe-addons-23 ==2.6.0b20250227 ; extra == 'xpu-arc'
+Requires-Dist: bigdl-core-xe-23 ==2.6.0b20250301 ; extra == 'xpu-arc'
+Requires-Dist: bigdl-core-xe-batch-23 ==2.6.0b20250301 ; extra == 'xpu-arc'
+Requires-Dist: bigdl-core-xe-addons-23 ==2.6.0b20250301 ; extra == 'xpu-arc'
 Requires-Dist: intel-openmp ; (platform_machine == "x86_64" or platform_machine == "AMD64") and extra == 'xpu-arc'
 Requires-Dist: torch ==2.3.1+cxx11.abi ; (platform_system == "Linux") and extra == 'xpu-arc'
 Requires-Dist: torchvision ==0.18.1+cxx11.abi ; (platform_system == "Linux") and extra == 'xpu-arc'
@@ -156,9 +156,9 @@ Requires-Dist: tokenizers ==0.15.2 ; extra == 'xpu-arl'
 Requires-Dist: accelerate ==0.23.0 ; extra == 'xpu-arl'
 Requires-Dist: tabulate ; extra == 'xpu-arl'
 Requires-Dist: setuptools ; extra == 'xpu-arl'
-Requires-Dist: bigdl-core-xe-23 ==2.6.0b20250227 ; extra == 'xpu-arl'
-Requires-Dist: bigdl-core-xe-batch-23 ==2.6.0b20250227 ; extra == 'xpu-arl'
-Requires-Dist: bigdl-core-xe-addons-23 ==2.6.0b20250227 ; extra == 'xpu-arl'
+Requires-Dist: bigdl-core-xe-23 ==2.6.0b20250301 ; extra == 'xpu-arl'
+Requires-Dist: bigdl-core-xe-batch-23 ==2.6.0b20250301 ; extra == 'xpu-arl'
+Requires-Dist: bigdl-core-xe-addons-23 ==2.6.0b20250301 ; extra == 'xpu-arl'
 Requires-Dist: intel-openmp ; (platform_machine == "x86_64" or platform_machine == "AMD64") and extra == 'xpu-arl'
 Requires-Dist: torch ==2.3.1+cxx11.abi ; (platform_system == "Linux") and extra == 'xpu-arl'
 Requires-Dist: torchvision ==0.18.1+cxx11.abi ; (platform_system == "Linux") and extra == 'xpu-arl'
@@ -179,9 +179,9 @@ Requires-Dist: tokenizers ==0.15.2 ; extra == 'xpu-lnl'
 Requires-Dist: accelerate ==0.23.0 ; extra == 'xpu-lnl'
 Requires-Dist: tabulate ; extra == 'xpu-lnl'
 Requires-Dist: setuptools ; extra == 'xpu-lnl'
-Requires-Dist: bigdl-core-xe-23 ==2.6.0b20250227 ; extra == 'xpu-lnl'
-Requires-Dist: bigdl-core-xe-batch-23 ==2.6.0b20250227 ; extra == 'xpu-lnl'
-Requires-Dist: bigdl-core-xe-addons-23 ==2.6.0b20250227 ; extra == 'xpu-lnl'
+Requires-Dist: bigdl-core-xe-23 ==2.6.0b20250301 ; extra == 'xpu-lnl'
+Requires-Dist: bigdl-core-xe-batch-23 ==2.6.0b20250301 ; extra == 'xpu-lnl'
+Requires-Dist: bigdl-core-xe-addons-23 ==2.6.0b20250301 ; extra == 'xpu-lnl'
 Requires-Dist: intel-openmp ; (platform_machine == "x86_64" or platform_machine == "AMD64") and extra == 'xpu-lnl'
 Requires-Dist: torch ==2.3.1+cxx11.abi ; (platform_system == "Linux") and extra == 'xpu-lnl'
 Requires-Dist: torchvision ==0.18.1+cxx11.abi ; (platform_system == "Linux") and extra == 'xpu-lnl'

{ipex_llm-2.2.0b20250227.dist-info → ipex_llm-2.2.0b20250301.dist-info}/RECORD RENAMED Viewed

@@ -152,10 +152,10 @@ ipex_llm/transformers/models/bloom.py,sha256=PxfzyYT-nFn3K5rZhTQjmcEjUUzAhUFzxIN
 ipex_llm/transformers/models/chatglm.py,sha256=UHai1t2AUtGmF765_eHF8LUMVQzp_oCBx8TJB21WrHk,12597
 ipex_llm/transformers/models/chatglm2.py,sha256=KyAIX7zGVQDQuwwM3QMBNWZbTeMHEzKUIgAryT0voHc,14933
 ipex_llm/transformers/models/chatglm4.py,sha256=QvUehdaCePB3MNHyWg3dneDxmjtBdxYeKUyQUVcsgfM,16886
-ipex_llm/transformers/models/chatglm4v.py,sha256=L6y45M_wjS2_HqchmCUxRlQZUNuSNCGOiynAQrGh918,14124
+ipex_llm/transformers/models/chatglm4v.py,sha256=Ba9Xtzwtzk_rzg5khGqDrlHfJsDwc5YcM5_yPoord7o,13324
 ipex_llm/transformers/models/common.py,sha256=0OTRaXekOPApRdQ8UKl5Du8DOtKJ6awnQIStvYvFQOI,13018
 ipex_llm/transformers/models/decilm.py,sha256=P-PBuDPf07GvKggLwJx_wPwIn6esN3rX8ai2JxRuZmE,5246
-ipex_llm/transformers/models/deepseek.py,sha256=bMUAbTf2GaSyWuTwQxh_6LJqx7RvlqOQpCYw4DsC6BQ,12310
+ipex_llm/transformers/models/deepseek.py,sha256=w6tGeyJ9joD7lQBiZ6A01Z00g8hAXC1N2yGtJh8kyuk,13096
 ipex_llm/transformers/models/deepseek_v3.py,sha256=CTgwIKQlUPlUCbOxc9Id5GapWkXOP6pMtkguYrWpCio,10003
 ipex_llm/transformers/models/gemma.py,sha256=_E3Yw8Y45xyNVeLqyVKcpr8kjuICtETeL82cJ-bWJuU,9424
 ipex_llm/transformers/models/gemma2.py,sha256=2WZuv-FLzJyTJFaYxOuzJt47QE64M0lHnzAiO5T6ozI,8049
@@ -262,11 +262,11 @@ ipex_llm/vllm/xpu/engine/__init__.py,sha256=pY_CpyuZd72fr6s32ejeKHKFW0K4vUU2rzZj
 ipex_llm/vllm/xpu/engine/engine.py,sha256=NvCMbp0X8NVrOqbwm4FTvXOptTRLzu9jQsy37ZHnTk8,9493
 ipex_llm/vllm/xpu/entrypoints/openai/api_server.py,sha256=D577nxWlyoWaHXNXIEvS3ViKSSWL3XZq8D8t6izD7x4,33250
 ipex_llm/vllm/xpu/entrypoints/openai/cli_args.py,sha256=hB398yYtKauASRzevctScdbFIjiiSGMAe1bwEuIHrhY,10893
-ipex_llm-2.2.0b20250227.data/scripts/ipex-llm-init,sha256=fLQsT2dRL6H5bThb4GuIWotAuqoLsIxFwA-0c2qmaO8,6672
-ipex_llm-2.2.0b20250227.data/scripts/llm-chat,sha256=TdUnUmNapzuoe1c8IzrdVOQwWEg8IqsMSBRlOD3daZM,2249
-ipex_llm-2.2.0b20250227.data/scripts/llm-cli,sha256=RXGPlLElHxcKzoUxljEMBIAXbzCDysXL-Nxw-xF-7LU,2457
-ipex_llm-2.2.0b20250227.dist-info/METADATA,sha256=cyJrw4GeU1bOptch_YZF-a-a6336yn9oMn0DeyF5ktU,12369
-ipex_llm-2.2.0b20250227.dist-info/WHEEL,sha256=PPJcBMAZibF_2GFE9NmOJGqiaSMPiNFbJd6QaJjdA6Y,109
-ipex_llm-2.2.0b20250227.dist-info/entry_points.txt,sha256=TiUyBB2MRmfF3ko-pyAEzqeBCRnyhu27bNOAsWPp3e8,61
-ipex_llm-2.2.0b20250227.dist-info/top_level.txt,sha256=CGCMHM-SyqUabU4h8RqJ2KTYckQUO3LvIWwmUQ6Qbzw,9
-ipex_llm-2.2.0b20250227.dist-info/RECORD,,
+ipex_llm-2.2.0b20250301.data/scripts/ipex-llm-init,sha256=fLQsT2dRL6H5bThb4GuIWotAuqoLsIxFwA-0c2qmaO8,6672
+ipex_llm-2.2.0b20250301.data/scripts/llm-chat,sha256=TdUnUmNapzuoe1c8IzrdVOQwWEg8IqsMSBRlOD3daZM,2249
+ipex_llm-2.2.0b20250301.data/scripts/llm-cli,sha256=RXGPlLElHxcKzoUxljEMBIAXbzCDysXL-Nxw-xF-7LU,2457
+ipex_llm-2.2.0b20250301.dist-info/METADATA,sha256=pumiPBr8CKo5gIgCZoloybk379pwwMCf5HqFcQ4Lwnc,12369
+ipex_llm-2.2.0b20250301.dist-info/WHEEL,sha256=PPJcBMAZibF_2GFE9NmOJGqiaSMPiNFbJd6QaJjdA6Y,109
+ipex_llm-2.2.0b20250301.dist-info/entry_points.txt,sha256=TiUyBB2MRmfF3ko-pyAEzqeBCRnyhu27bNOAsWPp3e8,61
+ipex_llm-2.2.0b20250301.dist-info/top_level.txt,sha256=CGCMHM-SyqUabU4h8RqJ2KTYckQUO3LvIWwmUQ6Qbzw,9
+ipex_llm-2.2.0b20250301.dist-info/RECORD,,

{ipex_llm-2.2.0b20250227.data → ipex_llm-2.2.0b20250301.data}/scripts/ipex-llm-init RENAMED Viewed

File without changes

{ipex_llm-2.2.0b20250227.data → ipex_llm-2.2.0b20250301.data}/scripts/llm-chat RENAMED Viewed

File without changes

{ipex_llm-2.2.0b20250227.data → ipex_llm-2.2.0b20250301.data}/scripts/llm-cli RENAMED Viewed

File without changes

{ipex_llm-2.2.0b20250227.dist-info → ipex_llm-2.2.0b20250301.dist-info}/WHEEL RENAMED Viewed

File without changes

{ipex_llm-2.2.0b20250227.dist-info → ipex_llm-2.2.0b20250301.dist-info}/entry_points.txt RENAMED Viewed

File without changes

{ipex_llm-2.2.0b20250227.dist-info → ipex_llm-2.2.0b20250301.dist-info}/top_level.txt RENAMED Viewed

File without changes