ipex-llm 2.2.0b20250226__py3-none-manylinux2010_x86_64.whl → 2.2.0b20250227__py3-none-manylinux2010_x86_64.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -272,15 +272,37 @@ def deepseek_attention_forward(
272
272
 
273
273
 
274
274
  def moe_infer_decode(self, x: torch.Tensor, topk_ids: torch.Tensor, topk_weight: torch.Tensor):
275
- idxs = topk_ids.flatten().tolist()
276
- outputs = []
277
- for i in idxs:
278
- expert = self.experts[i]
279
- expert_out = expert(x)
280
- outputs.append(expert_out)
281
- outs = torch.cat(outputs, dim=0)
282
- reshaped_topk_weight = topk_weight.squeeze(0).unsqueeze(-1).to(outs.dtype)
283
- final_out = (outs * reshaped_topk_weight).sum(dim=0, keepdim=True)
275
+ if (
276
+ x.device.type == "xpu"
277
+ and x.dtype in [torch.float, torch.half]
278
+ and self.experts[0].down_proj.qtype == 2
279
+ ):
280
+ if getattr(self, "gates", None) is None:
281
+ gate_addrs = [expert.gate_proj.weight.data_ptr() for expert in self.experts]
282
+ up_addrs = [expert.up_proj.weight.data_ptr() for expert in self.experts]
283
+ down_addrs = [expert.down_proj.weight.data_ptr() for expert in self.experts]
284
+ gates = torch.tensor(gate_addrs, dtype=torch.uint64, device=x.device)
285
+ ups = torch.tensor(up_addrs, dtype=torch.uint64, device=x.device)
286
+ downs = torch.tensor(down_addrs, dtype=torch.uint64, device=x.device)
287
+ self.register_buffer("gates", gates, persistent=False)
288
+ self.register_buffer("ups", ups, persistent=False)
289
+ self.register_buffer("downs", downs, persistent=False)
290
+
291
+ import xe_linear
292
+ final_out = xe_linear.moe_forward_vec(
293
+ x, topk_ids, topk_weight, self.gates, self.ups, self.downs,
294
+ x.size(-1), self.experts[0].intermediate_size, 2
295
+ )
296
+ else:
297
+ idxs = topk_ids.flatten().tolist()
298
+ outputs = []
299
+ for i in idxs:
300
+ expert = self.experts[i]
301
+ expert_out = expert(x)
302
+ outputs.append(expert_out)
303
+ outs = torch.cat(outputs, dim=0)
304
+ reshaped_topk_weight = topk_weight.squeeze(0).unsqueeze(-1).to(outs.dtype)
305
+ final_out = (outs * reshaped_topk_weight).sum(dim=0, keepdim=True)
284
306
  return final_out
285
307
 
286
308
 
@@ -292,7 +314,7 @@ def deepseek_moe_forward(self, hidden_states: torch.Tensor):
292
314
  flat_topk_idx = topk_idx.view(-1)
293
315
  if not self.training:
294
316
  # IPEX-LLM OPT start : add special moe_infer implementation for decoding
295
- if topk_idx.size(0) == 1:
317
+ if topk_idx.size(0) == 1 and self.ep_size == 1:
296
318
  y = moe_infer_decode(self, hidden_states, topk_idx, topk_weight)
297
319
  else:
298
320
  y = self.moe_infer(hidden_states, topk_idx, topk_weight)
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.1
2
2
  Name: ipex-llm
3
- Version: 2.2.0b20250226
3
+ Version: 2.2.0b20250227
4
4
  Summary: Large Language Model Develop Toolkit
5
5
  Home-page: https://github.com/intel-analytics/ipex-llm
6
6
  Author: BigDL Authors
@@ -27,7 +27,7 @@ Requires-Dist: intel-openmp ; (platform_machine == "x86_64" or platform_machine
27
27
  Requires-Dist: torch ==2.1.2+cpu ; (platform_system == "Linux") and extra == 'all'
28
28
  Requires-Dist: torch ==2.1.2 ; (platform_system == "Windows") and extra == 'all'
29
29
  Provides-Extra: cpp
30
- Requires-Dist: bigdl-core-cpp ==2.6.0b20250226 ; extra == 'cpp'
30
+ Requires-Dist: bigdl-core-cpp ==2.6.0b20250227 ; extra == 'cpp'
31
31
  Requires-Dist: setuptools ; extra == 'cpp'
32
32
  Requires-Dist: onednn-devel ==2025.0.1 ; (platform_system == "Windows") and extra == 'cpp'
33
33
  Requires-Dist: onednn ==2025.0.1 ; (platform_system == "Windows") and extra == 'cpp'
@@ -60,7 +60,7 @@ Requires-Dist: transformers ==4.40.0 ; extra == 'npu'
60
60
  Requires-Dist: intel-openmp ; (platform_machine == "x86_64" or platform_machine == "AMD64") and extra == 'npu'
61
61
  Requires-Dist: torch ==2.1.2+cpu ; (platform_system == "Linux") and extra == 'npu'
62
62
  Requires-Dist: torch ==2.1.2 ; (platform_system == "Windows") and extra == 'npu'
63
- Requires-Dist: bigdl-core-npu ==2.6.0b20250226 ; (platform_system == "Windows") and extra == 'npu'
63
+ Requires-Dist: bigdl-core-npu ==2.6.0b20250227 ; (platform_system == "Windows") and extra == 'npu'
64
64
  Provides-Extra: serving
65
65
  Requires-Dist: py-cpuinfo ; extra == 'serving'
66
66
  Requires-Dist: fschat[model_worker,webui] ==0.2.36 ; extra == 'serving'
@@ -80,9 +80,9 @@ Requires-Dist: setuptools <70.0.0 ; extra == 'xpu'
80
80
  Requires-Dist: torch ==2.1.0a0 ; extra == 'xpu'
81
81
  Requires-Dist: torchvision ==0.16.0a0 ; extra == 'xpu'
82
82
  Requires-Dist: intel-extension-for-pytorch ==2.1.10+xpu ; extra == 'xpu'
83
- Requires-Dist: bigdl-core-xe-21 ==2.6.0b20250226 ; extra == 'xpu'
84
- Requires-Dist: bigdl-core-xe-batch-21 ==2.6.0b20250226 ; extra == 'xpu'
85
- Requires-Dist: bigdl-core-xe-addons-21 ==2.6.0b20250226 ; extra == 'xpu'
83
+ Requires-Dist: bigdl-core-xe-21 ==2.6.0b20250227 ; extra == 'xpu'
84
+ Requires-Dist: bigdl-core-xe-batch-21 ==2.6.0b20250227 ; extra == 'xpu'
85
+ Requires-Dist: bigdl-core-xe-addons-21 ==2.6.0b20250227 ; extra == 'xpu'
86
86
  Provides-Extra: xpu-2-1
87
87
  Requires-Dist: py-cpuinfo ; extra == 'xpu-2-1'
88
88
  Requires-Dist: protobuf ; extra == 'xpu-2-1'
@@ -97,9 +97,9 @@ Requires-Dist: setuptools <70.0.0 ; extra == 'xpu-2-1'
97
97
  Requires-Dist: torch ==2.1.0a0 ; extra == 'xpu-2-1'
98
98
  Requires-Dist: torchvision ==0.16.0a0 ; extra == 'xpu-2-1'
99
99
  Requires-Dist: intel-extension-for-pytorch ==2.1.10+xpu ; extra == 'xpu-2-1'
100
- Requires-Dist: bigdl-core-xe-21 ==2.6.0b20250226 ; extra == 'xpu-2-1'
101
- Requires-Dist: bigdl-core-xe-batch-21 ==2.6.0b20250226 ; extra == 'xpu-2-1'
102
- Requires-Dist: bigdl-core-xe-addons-21 ==2.6.0b20250226 ; extra == 'xpu-2-1'
100
+ Requires-Dist: bigdl-core-xe-21 ==2.6.0b20250227 ; extra == 'xpu-2-1'
101
+ Requires-Dist: bigdl-core-xe-batch-21 ==2.6.0b20250227 ; extra == 'xpu-2-1'
102
+ Requires-Dist: bigdl-core-xe-addons-21 ==2.6.0b20250227 ; extra == 'xpu-2-1'
103
103
  Requires-Dist: intel-openmp ; (platform_machine == "x86_64" or platform_machine == "AMD64") and extra == 'xpu-2-1'
104
104
  Requires-Dist: dpcpp-cpp-rt ==2024.0.2 ; (platform_system == "Windows") and extra == 'xpu-2-1'
105
105
  Requires-Dist: mkl-dpcpp ==2024.0.0 ; (platform_system == "Windows") and extra == 'xpu-2-1'
@@ -117,7 +117,7 @@ Requires-Dist: setuptools ; extra == 'xpu-2-6'
117
117
  Requires-Dist: torch ==2.6.0+xpu ; extra == 'xpu-2-6'
118
118
  Requires-Dist: torchvision ==0.21.0+xpu ; extra == 'xpu-2-6'
119
119
  Requires-Dist: torchaudio ==2.6.0+xpu ; extra == 'xpu-2-6'
120
- Requires-Dist: bigdl-core-xe-all ==2.6.0b20250226 ; extra == 'xpu-2-6'
120
+ Requires-Dist: bigdl-core-xe-all ==2.6.0b20250227 ; extra == 'xpu-2-6'
121
121
  Requires-Dist: onednn-devel ==2025.0.1 ; extra == 'xpu-2-6'
122
122
  Requires-Dist: onednn ==2025.0.1 ; extra == 'xpu-2-6'
123
123
  Requires-Dist: dpcpp-cpp-rt ==2025.0.2 ; extra == 'xpu-2-6'
@@ -133,9 +133,9 @@ Requires-Dist: tokenizers ==0.15.2 ; extra == 'xpu-arc'
133
133
  Requires-Dist: accelerate ==0.23.0 ; extra == 'xpu-arc'
134
134
  Requires-Dist: tabulate ; extra == 'xpu-arc'
135
135
  Requires-Dist: setuptools ; extra == 'xpu-arc'
136
- Requires-Dist: bigdl-core-xe-23 ==2.6.0b20250226 ; extra == 'xpu-arc'
137
- Requires-Dist: bigdl-core-xe-batch-23 ==2.6.0b20250226 ; extra == 'xpu-arc'
138
- Requires-Dist: bigdl-core-xe-addons-23 ==2.6.0b20250226 ; extra == 'xpu-arc'
136
+ Requires-Dist: bigdl-core-xe-23 ==2.6.0b20250227 ; extra == 'xpu-arc'
137
+ Requires-Dist: bigdl-core-xe-batch-23 ==2.6.0b20250227 ; extra == 'xpu-arc'
138
+ Requires-Dist: bigdl-core-xe-addons-23 ==2.6.0b20250227 ; extra == 'xpu-arc'
139
139
  Requires-Dist: intel-openmp ; (platform_machine == "x86_64" or platform_machine == "AMD64") and extra == 'xpu-arc'
140
140
  Requires-Dist: torch ==2.3.1+cxx11.abi ; (platform_system == "Linux") and extra == 'xpu-arc'
141
141
  Requires-Dist: torchvision ==0.18.1+cxx11.abi ; (platform_system == "Linux") and extra == 'xpu-arc'
@@ -156,9 +156,9 @@ Requires-Dist: tokenizers ==0.15.2 ; extra == 'xpu-arl'
156
156
  Requires-Dist: accelerate ==0.23.0 ; extra == 'xpu-arl'
157
157
  Requires-Dist: tabulate ; extra == 'xpu-arl'
158
158
  Requires-Dist: setuptools ; extra == 'xpu-arl'
159
- Requires-Dist: bigdl-core-xe-23 ==2.6.0b20250226 ; extra == 'xpu-arl'
160
- Requires-Dist: bigdl-core-xe-batch-23 ==2.6.0b20250226 ; extra == 'xpu-arl'
161
- Requires-Dist: bigdl-core-xe-addons-23 ==2.6.0b20250226 ; extra == 'xpu-arl'
159
+ Requires-Dist: bigdl-core-xe-23 ==2.6.0b20250227 ; extra == 'xpu-arl'
160
+ Requires-Dist: bigdl-core-xe-batch-23 ==2.6.0b20250227 ; extra == 'xpu-arl'
161
+ Requires-Dist: bigdl-core-xe-addons-23 ==2.6.0b20250227 ; extra == 'xpu-arl'
162
162
  Requires-Dist: intel-openmp ; (platform_machine == "x86_64" or platform_machine == "AMD64") and extra == 'xpu-arl'
163
163
  Requires-Dist: torch ==2.3.1+cxx11.abi ; (platform_system == "Linux") and extra == 'xpu-arl'
164
164
  Requires-Dist: torchvision ==0.18.1+cxx11.abi ; (platform_system == "Linux") and extra == 'xpu-arl'
@@ -179,9 +179,9 @@ Requires-Dist: tokenizers ==0.15.2 ; extra == 'xpu-lnl'
179
179
  Requires-Dist: accelerate ==0.23.0 ; extra == 'xpu-lnl'
180
180
  Requires-Dist: tabulate ; extra == 'xpu-lnl'
181
181
  Requires-Dist: setuptools ; extra == 'xpu-lnl'
182
- Requires-Dist: bigdl-core-xe-23 ==2.6.0b20250226 ; extra == 'xpu-lnl'
183
- Requires-Dist: bigdl-core-xe-batch-23 ==2.6.0b20250226 ; extra == 'xpu-lnl'
184
- Requires-Dist: bigdl-core-xe-addons-23 ==2.6.0b20250226 ; extra == 'xpu-lnl'
182
+ Requires-Dist: bigdl-core-xe-23 ==2.6.0b20250227 ; extra == 'xpu-lnl'
183
+ Requires-Dist: bigdl-core-xe-batch-23 ==2.6.0b20250227 ; extra == 'xpu-lnl'
184
+ Requires-Dist: bigdl-core-xe-addons-23 ==2.6.0b20250227 ; extra == 'xpu-lnl'
185
185
  Requires-Dist: intel-openmp ; (platform_machine == "x86_64" or platform_machine == "AMD64") and extra == 'xpu-lnl'
186
186
  Requires-Dist: torch ==2.3.1+cxx11.abi ; (platform_system == "Linux") and extra == 'xpu-lnl'
187
187
  Requires-Dist: torchvision ==0.18.1+cxx11.abi ; (platform_system == "Linux") and extra == 'xpu-lnl'
@@ -155,7 +155,7 @@ ipex_llm/transformers/models/chatglm4.py,sha256=QvUehdaCePB3MNHyWg3dneDxmjtBdxYe
155
155
  ipex_llm/transformers/models/chatglm4v.py,sha256=L6y45M_wjS2_HqchmCUxRlQZUNuSNCGOiynAQrGh918,14124
156
156
  ipex_llm/transformers/models/common.py,sha256=0OTRaXekOPApRdQ8UKl5Du8DOtKJ6awnQIStvYvFQOI,13018
157
157
  ipex_llm/transformers/models/decilm.py,sha256=P-PBuDPf07GvKggLwJx_wPwIn6esN3rX8ai2JxRuZmE,5246
158
- ipex_llm/transformers/models/deepseek.py,sha256=2w2bWbbuYi__fPs56vE9Wq5bdiZCF2NkYJNXf-b9LjQ,11130
158
+ ipex_llm/transformers/models/deepseek.py,sha256=bMUAbTf2GaSyWuTwQxh_6LJqx7RvlqOQpCYw4DsC6BQ,12310
159
159
  ipex_llm/transformers/models/deepseek_v3.py,sha256=CTgwIKQlUPlUCbOxc9Id5GapWkXOP6pMtkguYrWpCio,10003
160
160
  ipex_llm/transformers/models/gemma.py,sha256=_E3Yw8Y45xyNVeLqyVKcpr8kjuICtETeL82cJ-bWJuU,9424
161
161
  ipex_llm/transformers/models/gemma2.py,sha256=2WZuv-FLzJyTJFaYxOuzJt47QE64M0lHnzAiO5T6ozI,8049
@@ -262,11 +262,11 @@ ipex_llm/vllm/xpu/engine/__init__.py,sha256=pY_CpyuZd72fr6s32ejeKHKFW0K4vUU2rzZj
262
262
  ipex_llm/vllm/xpu/engine/engine.py,sha256=NvCMbp0X8NVrOqbwm4FTvXOptTRLzu9jQsy37ZHnTk8,9493
263
263
  ipex_llm/vllm/xpu/entrypoints/openai/api_server.py,sha256=D577nxWlyoWaHXNXIEvS3ViKSSWL3XZq8D8t6izD7x4,33250
264
264
  ipex_llm/vllm/xpu/entrypoints/openai/cli_args.py,sha256=hB398yYtKauASRzevctScdbFIjiiSGMAe1bwEuIHrhY,10893
265
- ipex_llm-2.2.0b20250226.data/scripts/ipex-llm-init,sha256=fLQsT2dRL6H5bThb4GuIWotAuqoLsIxFwA-0c2qmaO8,6672
266
- ipex_llm-2.2.0b20250226.data/scripts/llm-chat,sha256=TdUnUmNapzuoe1c8IzrdVOQwWEg8IqsMSBRlOD3daZM,2249
267
- ipex_llm-2.2.0b20250226.data/scripts/llm-cli,sha256=RXGPlLElHxcKzoUxljEMBIAXbzCDysXL-Nxw-xF-7LU,2457
268
- ipex_llm-2.2.0b20250226.dist-info/METADATA,sha256=crm22LHoY7OU7EP-L1X4RZL53s2iKxlslqt9QyZcMus,12369
269
- ipex_llm-2.2.0b20250226.dist-info/WHEEL,sha256=PPJcBMAZibF_2GFE9NmOJGqiaSMPiNFbJd6QaJjdA6Y,109
270
- ipex_llm-2.2.0b20250226.dist-info/entry_points.txt,sha256=TiUyBB2MRmfF3ko-pyAEzqeBCRnyhu27bNOAsWPp3e8,61
271
- ipex_llm-2.2.0b20250226.dist-info/top_level.txt,sha256=CGCMHM-SyqUabU4h8RqJ2KTYckQUO3LvIWwmUQ6Qbzw,9
272
- ipex_llm-2.2.0b20250226.dist-info/RECORD,,
265
+ ipex_llm-2.2.0b20250227.data/scripts/ipex-llm-init,sha256=fLQsT2dRL6H5bThb4GuIWotAuqoLsIxFwA-0c2qmaO8,6672
266
+ ipex_llm-2.2.0b20250227.data/scripts/llm-chat,sha256=TdUnUmNapzuoe1c8IzrdVOQwWEg8IqsMSBRlOD3daZM,2249
267
+ ipex_llm-2.2.0b20250227.data/scripts/llm-cli,sha256=RXGPlLElHxcKzoUxljEMBIAXbzCDysXL-Nxw-xF-7LU,2457
268
+ ipex_llm-2.2.0b20250227.dist-info/METADATA,sha256=cyJrw4GeU1bOptch_YZF-a-a6336yn9oMn0DeyF5ktU,12369
269
+ ipex_llm-2.2.0b20250227.dist-info/WHEEL,sha256=PPJcBMAZibF_2GFE9NmOJGqiaSMPiNFbJd6QaJjdA6Y,109
270
+ ipex_llm-2.2.0b20250227.dist-info/entry_points.txt,sha256=TiUyBB2MRmfF3ko-pyAEzqeBCRnyhu27bNOAsWPp3e8,61
271
+ ipex_llm-2.2.0b20250227.dist-info/top_level.txt,sha256=CGCMHM-SyqUabU4h8RqJ2KTYckQUO3LvIWwmUQ6Qbzw,9
272
+ ipex_llm-2.2.0b20250227.dist-info/RECORD,,