ipex-llm 2.2.0b20250227__py3-none-manylinux2010_x86_64.whl → 2.2.0b20250301__py3-none-manylinux2010_x86_64.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -19,7 +19,7 @@
19
19
 
20
20
  import torch
21
21
  from typing import Optional, Tuple, Union
22
- from ipex_llm.transformers.models.common import merge_qkv_base
22
+ from ipex_llm.transformers.models.common import merge_qkv_base, padding_qkv_hd
23
23
  from ipex_llm.transformers.models.common import scaled_dot_product_attention
24
24
  from ipex_llm.transformers.models.utils import update_past_key_value
25
25
  from ipex_llm.transformers.models.utils import use_quantize_kv_cache, use_sdp
@@ -265,26 +265,18 @@ def visual_attention_forward(self, x: "tensor(B, L, D)") -> "tensor(B, L, D)":
265
265
  q, k, v = qkv[0], qkv[1], qkv[2]
266
266
 
267
267
  bsz, q_len, kv_seq_len, head_dim = q.shape
268
- if use_sdp(q_len, kv_seq_len, head_dim, q):
269
- import xe_addons
270
- out = xe_addons.sdp(q, k, v, None)
271
- elif q.device.type == "cpu":
272
- out = torch.nn.functional.scaled_dot_product_attention(q, k, v,
273
- attn_mask=None,
274
- dropout_p=0.,
275
- is_causal=False)
276
- else:
277
- attn_weights = torch.matmul(q / math.sqrt(head_dim),
278
- k.transpose(2, 3)).to(v.dtype)
279
- if kv_seq_len >= 2048 or bsz >= 64:
280
- # for memory considerations, do not upcast attention to fp32
281
- # for long sequences or large batches
282
- attn_weights = torch.nn.functional.softmax(attn_weights, dim=-1)
283
- else:
284
- # upcast attention to fp32
285
- attn_weights = torch.nn.functional.softmax(attn_weights, dim=-1,
286
- dtype=torch.float32).to(v.dtype)
287
- out = torch.matmul(attn_weights, v)
268
+ q, k, v = padding_qkv_hd(
269
+ q, k, v,
270
+ head_dim, 128
271
+ )
272
+
273
+ attn_weights = None
274
+ attn_output = scaled_dot_product_attention(
275
+ q, k.contiguous(), v.contiguous(),
276
+ None, False, 1 / math.sqrt(head_dim)
277
+ )
278
+
279
+ out = attn_output[:, :, :, :head_dim]
288
280
  output = self.dense(out.transpose(1, 2).reshape(B, L, -1))
289
281
  output = self.output_dropout(output)
290
282
  return output
@@ -271,6 +271,25 @@ def deepseek_attention_forward(
271
271
  return attn_output, attn_weights, past_key_value
272
272
 
273
273
 
274
+ def fuse_gate_forward(self, x: torch.Tensor):
275
+ if x.device.type == "xpu" and x.dtype in [torch.float, torch.half]:
276
+ x = x.view(-1, x.size(-1))
277
+ logits = torch.nn.functional.linear(
278
+ x.type(torch.float32), self.weight.type(torch.float32), None
279
+ )
280
+ scores = logits.sigmoid()
281
+
282
+ import xe_addons
283
+ topk_idx, topk_weight = xe_addons.moe_group_topk(
284
+ scores, self.e_score_correction_bias,
285
+ self.n_group, 2, self.topk_group, self.top_k,
286
+ self.top_k > 1 and self.norm_topk_prob, 1e-20, self.routed_scaling_factor
287
+ )
288
+ else:
289
+ topk_idx, topk_weight = self(x)
290
+ return topk_idx, topk_weight.to(x.dtype)
291
+
292
+
274
293
  def moe_infer_decode(self, x: torch.Tensor, topk_ids: torch.Tensor, topk_weight: torch.Tensor):
275
294
  if (
276
295
  x.device.type == "xpu"
@@ -301,7 +320,7 @@ def moe_infer_decode(self, x: torch.Tensor, topk_ids: torch.Tensor, topk_weight:
301
320
  expert_out = expert(x)
302
321
  outputs.append(expert_out)
303
322
  outs = torch.cat(outputs, dim=0)
304
- reshaped_topk_weight = topk_weight.squeeze(0).unsqueeze(-1).to(outs.dtype)
323
+ reshaped_topk_weight = topk_weight.squeeze(0).unsqueeze(-1)
305
324
  final_out = (outs * reshaped_topk_weight).sum(dim=0, keepdim=True)
306
325
  return final_out
307
326
 
@@ -309,11 +328,13 @@ def moe_infer_decode(self, x: torch.Tensor, topk_ids: torch.Tensor, topk_weight:
309
328
  def deepseek_moe_forward(self, hidden_states: torch.Tensor):
310
329
  identity = hidden_states
311
330
  orig_shape = hidden_states.shape
312
- topk_idx, topk_weight = self.gate(hidden_states)
331
+ # IPEX-LLM OPT start: fuse grouped topk in gate forward
332
+ topk_idx, topk_weight = fuse_gate_forward(self.gate, hidden_states)
333
+ # IPEX-LLM OPT end
313
334
  hidden_states = hidden_states.view(-1, hidden_states.shape[-1])
314
335
  flat_topk_idx = topk_idx.view(-1)
315
336
  if not self.training:
316
- # IPEX-LLM OPT start : add special moe_infer implementation for decoding
337
+ # IPEX-LLM OPT start: add special moe_infer implementation for decoding
317
338
  if topk_idx.size(0) == 1 and self.ep_size == 1:
318
339
  y = moe_infer_decode(self, hidden_states, topk_idx, topk_weight)
319
340
  else:
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.1
2
2
  Name: ipex-llm
3
- Version: 2.2.0b20250227
3
+ Version: 2.2.0b20250301
4
4
  Summary: Large Language Model Develop Toolkit
5
5
  Home-page: https://github.com/intel-analytics/ipex-llm
6
6
  Author: BigDL Authors
@@ -27,7 +27,7 @@ Requires-Dist: intel-openmp ; (platform_machine == "x86_64" or platform_machine
27
27
  Requires-Dist: torch ==2.1.2+cpu ; (platform_system == "Linux") and extra == 'all'
28
28
  Requires-Dist: torch ==2.1.2 ; (platform_system == "Windows") and extra == 'all'
29
29
  Provides-Extra: cpp
30
- Requires-Dist: bigdl-core-cpp ==2.6.0b20250227 ; extra == 'cpp'
30
+ Requires-Dist: bigdl-core-cpp ==2.6.0b20250301 ; extra == 'cpp'
31
31
  Requires-Dist: setuptools ; extra == 'cpp'
32
32
  Requires-Dist: onednn-devel ==2025.0.1 ; (platform_system == "Windows") and extra == 'cpp'
33
33
  Requires-Dist: onednn ==2025.0.1 ; (platform_system == "Windows") and extra == 'cpp'
@@ -60,7 +60,7 @@ Requires-Dist: transformers ==4.40.0 ; extra == 'npu'
60
60
  Requires-Dist: intel-openmp ; (platform_machine == "x86_64" or platform_machine == "AMD64") and extra == 'npu'
61
61
  Requires-Dist: torch ==2.1.2+cpu ; (platform_system == "Linux") and extra == 'npu'
62
62
  Requires-Dist: torch ==2.1.2 ; (platform_system == "Windows") and extra == 'npu'
63
- Requires-Dist: bigdl-core-npu ==2.6.0b20250227 ; (platform_system == "Windows") and extra == 'npu'
63
+ Requires-Dist: bigdl-core-npu ==2.6.0b20250301 ; (platform_system == "Windows") and extra == 'npu'
64
64
  Provides-Extra: serving
65
65
  Requires-Dist: py-cpuinfo ; extra == 'serving'
66
66
  Requires-Dist: fschat[model_worker,webui] ==0.2.36 ; extra == 'serving'
@@ -80,9 +80,9 @@ Requires-Dist: setuptools <70.0.0 ; extra == 'xpu'
80
80
  Requires-Dist: torch ==2.1.0a0 ; extra == 'xpu'
81
81
  Requires-Dist: torchvision ==0.16.0a0 ; extra == 'xpu'
82
82
  Requires-Dist: intel-extension-for-pytorch ==2.1.10+xpu ; extra == 'xpu'
83
- Requires-Dist: bigdl-core-xe-21 ==2.6.0b20250227 ; extra == 'xpu'
84
- Requires-Dist: bigdl-core-xe-batch-21 ==2.6.0b20250227 ; extra == 'xpu'
85
- Requires-Dist: bigdl-core-xe-addons-21 ==2.6.0b20250227 ; extra == 'xpu'
83
+ Requires-Dist: bigdl-core-xe-21 ==2.6.0b20250301 ; extra == 'xpu'
84
+ Requires-Dist: bigdl-core-xe-batch-21 ==2.6.0b20250301 ; extra == 'xpu'
85
+ Requires-Dist: bigdl-core-xe-addons-21 ==2.6.0b20250301 ; extra == 'xpu'
86
86
  Provides-Extra: xpu-2-1
87
87
  Requires-Dist: py-cpuinfo ; extra == 'xpu-2-1'
88
88
  Requires-Dist: protobuf ; extra == 'xpu-2-1'
@@ -97,9 +97,9 @@ Requires-Dist: setuptools <70.0.0 ; extra == 'xpu-2-1'
97
97
  Requires-Dist: torch ==2.1.0a0 ; extra == 'xpu-2-1'
98
98
  Requires-Dist: torchvision ==0.16.0a0 ; extra == 'xpu-2-1'
99
99
  Requires-Dist: intel-extension-for-pytorch ==2.1.10+xpu ; extra == 'xpu-2-1'
100
- Requires-Dist: bigdl-core-xe-21 ==2.6.0b20250227 ; extra == 'xpu-2-1'
101
- Requires-Dist: bigdl-core-xe-batch-21 ==2.6.0b20250227 ; extra == 'xpu-2-1'
102
- Requires-Dist: bigdl-core-xe-addons-21 ==2.6.0b20250227 ; extra == 'xpu-2-1'
100
+ Requires-Dist: bigdl-core-xe-21 ==2.6.0b20250301 ; extra == 'xpu-2-1'
101
+ Requires-Dist: bigdl-core-xe-batch-21 ==2.6.0b20250301 ; extra == 'xpu-2-1'
102
+ Requires-Dist: bigdl-core-xe-addons-21 ==2.6.0b20250301 ; extra == 'xpu-2-1'
103
103
  Requires-Dist: intel-openmp ; (platform_machine == "x86_64" or platform_machine == "AMD64") and extra == 'xpu-2-1'
104
104
  Requires-Dist: dpcpp-cpp-rt ==2024.0.2 ; (platform_system == "Windows") and extra == 'xpu-2-1'
105
105
  Requires-Dist: mkl-dpcpp ==2024.0.0 ; (platform_system == "Windows") and extra == 'xpu-2-1'
@@ -117,7 +117,7 @@ Requires-Dist: setuptools ; extra == 'xpu-2-6'
117
117
  Requires-Dist: torch ==2.6.0+xpu ; extra == 'xpu-2-6'
118
118
  Requires-Dist: torchvision ==0.21.0+xpu ; extra == 'xpu-2-6'
119
119
  Requires-Dist: torchaudio ==2.6.0+xpu ; extra == 'xpu-2-6'
120
- Requires-Dist: bigdl-core-xe-all ==2.6.0b20250227 ; extra == 'xpu-2-6'
120
+ Requires-Dist: bigdl-core-xe-all ==2.6.0b20250301 ; extra == 'xpu-2-6'
121
121
  Requires-Dist: onednn-devel ==2025.0.1 ; extra == 'xpu-2-6'
122
122
  Requires-Dist: onednn ==2025.0.1 ; extra == 'xpu-2-6'
123
123
  Requires-Dist: dpcpp-cpp-rt ==2025.0.2 ; extra == 'xpu-2-6'
@@ -133,9 +133,9 @@ Requires-Dist: tokenizers ==0.15.2 ; extra == 'xpu-arc'
133
133
  Requires-Dist: accelerate ==0.23.0 ; extra == 'xpu-arc'
134
134
  Requires-Dist: tabulate ; extra == 'xpu-arc'
135
135
  Requires-Dist: setuptools ; extra == 'xpu-arc'
136
- Requires-Dist: bigdl-core-xe-23 ==2.6.0b20250227 ; extra == 'xpu-arc'
137
- Requires-Dist: bigdl-core-xe-batch-23 ==2.6.0b20250227 ; extra == 'xpu-arc'
138
- Requires-Dist: bigdl-core-xe-addons-23 ==2.6.0b20250227 ; extra == 'xpu-arc'
136
+ Requires-Dist: bigdl-core-xe-23 ==2.6.0b20250301 ; extra == 'xpu-arc'
137
+ Requires-Dist: bigdl-core-xe-batch-23 ==2.6.0b20250301 ; extra == 'xpu-arc'
138
+ Requires-Dist: bigdl-core-xe-addons-23 ==2.6.0b20250301 ; extra == 'xpu-arc'
139
139
  Requires-Dist: intel-openmp ; (platform_machine == "x86_64" or platform_machine == "AMD64") and extra == 'xpu-arc'
140
140
  Requires-Dist: torch ==2.3.1+cxx11.abi ; (platform_system == "Linux") and extra == 'xpu-arc'
141
141
  Requires-Dist: torchvision ==0.18.1+cxx11.abi ; (platform_system == "Linux") and extra == 'xpu-arc'
@@ -156,9 +156,9 @@ Requires-Dist: tokenizers ==0.15.2 ; extra == 'xpu-arl'
156
156
  Requires-Dist: accelerate ==0.23.0 ; extra == 'xpu-arl'
157
157
  Requires-Dist: tabulate ; extra == 'xpu-arl'
158
158
  Requires-Dist: setuptools ; extra == 'xpu-arl'
159
- Requires-Dist: bigdl-core-xe-23 ==2.6.0b20250227 ; extra == 'xpu-arl'
160
- Requires-Dist: bigdl-core-xe-batch-23 ==2.6.0b20250227 ; extra == 'xpu-arl'
161
- Requires-Dist: bigdl-core-xe-addons-23 ==2.6.0b20250227 ; extra == 'xpu-arl'
159
+ Requires-Dist: bigdl-core-xe-23 ==2.6.0b20250301 ; extra == 'xpu-arl'
160
+ Requires-Dist: bigdl-core-xe-batch-23 ==2.6.0b20250301 ; extra == 'xpu-arl'
161
+ Requires-Dist: bigdl-core-xe-addons-23 ==2.6.0b20250301 ; extra == 'xpu-arl'
162
162
  Requires-Dist: intel-openmp ; (platform_machine == "x86_64" or platform_machine == "AMD64") and extra == 'xpu-arl'
163
163
  Requires-Dist: torch ==2.3.1+cxx11.abi ; (platform_system == "Linux") and extra == 'xpu-arl'
164
164
  Requires-Dist: torchvision ==0.18.1+cxx11.abi ; (platform_system == "Linux") and extra == 'xpu-arl'
@@ -179,9 +179,9 @@ Requires-Dist: tokenizers ==0.15.2 ; extra == 'xpu-lnl'
179
179
  Requires-Dist: accelerate ==0.23.0 ; extra == 'xpu-lnl'
180
180
  Requires-Dist: tabulate ; extra == 'xpu-lnl'
181
181
  Requires-Dist: setuptools ; extra == 'xpu-lnl'
182
- Requires-Dist: bigdl-core-xe-23 ==2.6.0b20250227 ; extra == 'xpu-lnl'
183
- Requires-Dist: bigdl-core-xe-batch-23 ==2.6.0b20250227 ; extra == 'xpu-lnl'
184
- Requires-Dist: bigdl-core-xe-addons-23 ==2.6.0b20250227 ; extra == 'xpu-lnl'
182
+ Requires-Dist: bigdl-core-xe-23 ==2.6.0b20250301 ; extra == 'xpu-lnl'
183
+ Requires-Dist: bigdl-core-xe-batch-23 ==2.6.0b20250301 ; extra == 'xpu-lnl'
184
+ Requires-Dist: bigdl-core-xe-addons-23 ==2.6.0b20250301 ; extra == 'xpu-lnl'
185
185
  Requires-Dist: intel-openmp ; (platform_machine == "x86_64" or platform_machine == "AMD64") and extra == 'xpu-lnl'
186
186
  Requires-Dist: torch ==2.3.1+cxx11.abi ; (platform_system == "Linux") and extra == 'xpu-lnl'
187
187
  Requires-Dist: torchvision ==0.18.1+cxx11.abi ; (platform_system == "Linux") and extra == 'xpu-lnl'
@@ -152,10 +152,10 @@ ipex_llm/transformers/models/bloom.py,sha256=PxfzyYT-nFn3K5rZhTQjmcEjUUzAhUFzxIN
152
152
  ipex_llm/transformers/models/chatglm.py,sha256=UHai1t2AUtGmF765_eHF8LUMVQzp_oCBx8TJB21WrHk,12597
153
153
  ipex_llm/transformers/models/chatglm2.py,sha256=KyAIX7zGVQDQuwwM3QMBNWZbTeMHEzKUIgAryT0voHc,14933
154
154
  ipex_llm/transformers/models/chatglm4.py,sha256=QvUehdaCePB3MNHyWg3dneDxmjtBdxYeKUyQUVcsgfM,16886
155
- ipex_llm/transformers/models/chatglm4v.py,sha256=L6y45M_wjS2_HqchmCUxRlQZUNuSNCGOiynAQrGh918,14124
155
+ ipex_llm/transformers/models/chatglm4v.py,sha256=Ba9Xtzwtzk_rzg5khGqDrlHfJsDwc5YcM5_yPoord7o,13324
156
156
  ipex_llm/transformers/models/common.py,sha256=0OTRaXekOPApRdQ8UKl5Du8DOtKJ6awnQIStvYvFQOI,13018
157
157
  ipex_llm/transformers/models/decilm.py,sha256=P-PBuDPf07GvKggLwJx_wPwIn6esN3rX8ai2JxRuZmE,5246
158
- ipex_llm/transformers/models/deepseek.py,sha256=bMUAbTf2GaSyWuTwQxh_6LJqx7RvlqOQpCYw4DsC6BQ,12310
158
+ ipex_llm/transformers/models/deepseek.py,sha256=w6tGeyJ9joD7lQBiZ6A01Z00g8hAXC1N2yGtJh8kyuk,13096
159
159
  ipex_llm/transformers/models/deepseek_v3.py,sha256=CTgwIKQlUPlUCbOxc9Id5GapWkXOP6pMtkguYrWpCio,10003
160
160
  ipex_llm/transformers/models/gemma.py,sha256=_E3Yw8Y45xyNVeLqyVKcpr8kjuICtETeL82cJ-bWJuU,9424
161
161
  ipex_llm/transformers/models/gemma2.py,sha256=2WZuv-FLzJyTJFaYxOuzJt47QE64M0lHnzAiO5T6ozI,8049
@@ -262,11 +262,11 @@ ipex_llm/vllm/xpu/engine/__init__.py,sha256=pY_CpyuZd72fr6s32ejeKHKFW0K4vUU2rzZj
262
262
  ipex_llm/vllm/xpu/engine/engine.py,sha256=NvCMbp0X8NVrOqbwm4FTvXOptTRLzu9jQsy37ZHnTk8,9493
263
263
  ipex_llm/vllm/xpu/entrypoints/openai/api_server.py,sha256=D577nxWlyoWaHXNXIEvS3ViKSSWL3XZq8D8t6izD7x4,33250
264
264
  ipex_llm/vllm/xpu/entrypoints/openai/cli_args.py,sha256=hB398yYtKauASRzevctScdbFIjiiSGMAe1bwEuIHrhY,10893
265
- ipex_llm-2.2.0b20250227.data/scripts/ipex-llm-init,sha256=fLQsT2dRL6H5bThb4GuIWotAuqoLsIxFwA-0c2qmaO8,6672
266
- ipex_llm-2.2.0b20250227.data/scripts/llm-chat,sha256=TdUnUmNapzuoe1c8IzrdVOQwWEg8IqsMSBRlOD3daZM,2249
267
- ipex_llm-2.2.0b20250227.data/scripts/llm-cli,sha256=RXGPlLElHxcKzoUxljEMBIAXbzCDysXL-Nxw-xF-7LU,2457
268
- ipex_llm-2.2.0b20250227.dist-info/METADATA,sha256=cyJrw4GeU1bOptch_YZF-a-a6336yn9oMn0DeyF5ktU,12369
269
- ipex_llm-2.2.0b20250227.dist-info/WHEEL,sha256=PPJcBMAZibF_2GFE9NmOJGqiaSMPiNFbJd6QaJjdA6Y,109
270
- ipex_llm-2.2.0b20250227.dist-info/entry_points.txt,sha256=TiUyBB2MRmfF3ko-pyAEzqeBCRnyhu27bNOAsWPp3e8,61
271
- ipex_llm-2.2.0b20250227.dist-info/top_level.txt,sha256=CGCMHM-SyqUabU4h8RqJ2KTYckQUO3LvIWwmUQ6Qbzw,9
272
- ipex_llm-2.2.0b20250227.dist-info/RECORD,,
265
+ ipex_llm-2.2.0b20250301.data/scripts/ipex-llm-init,sha256=fLQsT2dRL6H5bThb4GuIWotAuqoLsIxFwA-0c2qmaO8,6672
266
+ ipex_llm-2.2.0b20250301.data/scripts/llm-chat,sha256=TdUnUmNapzuoe1c8IzrdVOQwWEg8IqsMSBRlOD3daZM,2249
267
+ ipex_llm-2.2.0b20250301.data/scripts/llm-cli,sha256=RXGPlLElHxcKzoUxljEMBIAXbzCDysXL-Nxw-xF-7LU,2457
268
+ ipex_llm-2.2.0b20250301.dist-info/METADATA,sha256=pumiPBr8CKo5gIgCZoloybk379pwwMCf5HqFcQ4Lwnc,12369
269
+ ipex_llm-2.2.0b20250301.dist-info/WHEEL,sha256=PPJcBMAZibF_2GFE9NmOJGqiaSMPiNFbJd6QaJjdA6Y,109
270
+ ipex_llm-2.2.0b20250301.dist-info/entry_points.txt,sha256=TiUyBB2MRmfF3ko-pyAEzqeBCRnyhu27bNOAsWPp3e8,61
271
+ ipex_llm-2.2.0b20250301.dist-info/top_level.txt,sha256=CGCMHM-SyqUabU4h8RqJ2KTYckQUO3LvIWwmUQ6Qbzw,9
272
+ ipex_llm-2.2.0b20250301.dist-info/RECORD,,