ipex-llm 2.2.0b20250227__py3-none-manylinux2010_x86_64.whl → 2.2.0b20250301__py3-none-manylinux2010_x86_64.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- ipex_llm/transformers/models/chatglm4v.py +13 -21
- ipex_llm/transformers/models/deepseek.py +24 -3
- {ipex_llm-2.2.0b20250227.dist-info → ipex_llm-2.2.0b20250301.dist-info}/METADATA +19 -19
- {ipex_llm-2.2.0b20250227.dist-info → ipex_llm-2.2.0b20250301.dist-info}/RECORD +10 -10
- {ipex_llm-2.2.0b20250227.data → ipex_llm-2.2.0b20250301.data}/scripts/ipex-llm-init +0 -0
- {ipex_llm-2.2.0b20250227.data → ipex_llm-2.2.0b20250301.data}/scripts/llm-chat +0 -0
- {ipex_llm-2.2.0b20250227.data → ipex_llm-2.2.0b20250301.data}/scripts/llm-cli +0 -0
- {ipex_llm-2.2.0b20250227.dist-info → ipex_llm-2.2.0b20250301.dist-info}/WHEEL +0 -0
- {ipex_llm-2.2.0b20250227.dist-info → ipex_llm-2.2.0b20250301.dist-info}/entry_points.txt +0 -0
- {ipex_llm-2.2.0b20250227.dist-info → ipex_llm-2.2.0b20250301.dist-info}/top_level.txt +0 -0
@@ -19,7 +19,7 @@
|
|
19
19
|
|
20
20
|
import torch
|
21
21
|
from typing import Optional, Tuple, Union
|
22
|
-
from ipex_llm.transformers.models.common import merge_qkv_base
|
22
|
+
from ipex_llm.transformers.models.common import merge_qkv_base, padding_qkv_hd
|
23
23
|
from ipex_llm.transformers.models.common import scaled_dot_product_attention
|
24
24
|
from ipex_llm.transformers.models.utils import update_past_key_value
|
25
25
|
from ipex_llm.transformers.models.utils import use_quantize_kv_cache, use_sdp
|
@@ -265,26 +265,18 @@ def visual_attention_forward(self, x: "tensor(B, L, D)") -> "tensor(B, L, D)":
|
|
265
265
|
q, k, v = qkv[0], qkv[1], qkv[2]
|
266
266
|
|
267
267
|
bsz, q_len, kv_seq_len, head_dim = q.shape
|
268
|
-
|
269
|
-
|
270
|
-
|
271
|
-
|
272
|
-
|
273
|
-
|
274
|
-
|
275
|
-
|
276
|
-
|
277
|
-
|
278
|
-
|
279
|
-
|
280
|
-
# for memory considerations, do not upcast attention to fp32
|
281
|
-
# for long sequences or large batches
|
282
|
-
attn_weights = torch.nn.functional.softmax(attn_weights, dim=-1)
|
283
|
-
else:
|
284
|
-
# upcast attention to fp32
|
285
|
-
attn_weights = torch.nn.functional.softmax(attn_weights, dim=-1,
|
286
|
-
dtype=torch.float32).to(v.dtype)
|
287
|
-
out = torch.matmul(attn_weights, v)
|
268
|
+
q, k, v = padding_qkv_hd(
|
269
|
+
q, k, v,
|
270
|
+
head_dim, 128
|
271
|
+
)
|
272
|
+
|
273
|
+
attn_weights = None
|
274
|
+
attn_output = scaled_dot_product_attention(
|
275
|
+
q, k.contiguous(), v.contiguous(),
|
276
|
+
None, False, 1 / math.sqrt(head_dim)
|
277
|
+
)
|
278
|
+
|
279
|
+
out = attn_output[:, :, :, :head_dim]
|
288
280
|
output = self.dense(out.transpose(1, 2).reshape(B, L, -1))
|
289
281
|
output = self.output_dropout(output)
|
290
282
|
return output
|
@@ -271,6 +271,25 @@ def deepseek_attention_forward(
|
|
271
271
|
return attn_output, attn_weights, past_key_value
|
272
272
|
|
273
273
|
|
274
|
+
def fuse_gate_forward(self, x: torch.Tensor):
|
275
|
+
if x.device.type == "xpu" and x.dtype in [torch.float, torch.half]:
|
276
|
+
x = x.view(-1, x.size(-1))
|
277
|
+
logits = torch.nn.functional.linear(
|
278
|
+
x.type(torch.float32), self.weight.type(torch.float32), None
|
279
|
+
)
|
280
|
+
scores = logits.sigmoid()
|
281
|
+
|
282
|
+
import xe_addons
|
283
|
+
topk_idx, topk_weight = xe_addons.moe_group_topk(
|
284
|
+
scores, self.e_score_correction_bias,
|
285
|
+
self.n_group, 2, self.topk_group, self.top_k,
|
286
|
+
self.top_k > 1 and self.norm_topk_prob, 1e-20, self.routed_scaling_factor
|
287
|
+
)
|
288
|
+
else:
|
289
|
+
topk_idx, topk_weight = self(x)
|
290
|
+
return topk_idx, topk_weight.to(x.dtype)
|
291
|
+
|
292
|
+
|
274
293
|
def moe_infer_decode(self, x: torch.Tensor, topk_ids: torch.Tensor, topk_weight: torch.Tensor):
|
275
294
|
if (
|
276
295
|
x.device.type == "xpu"
|
@@ -301,7 +320,7 @@ def moe_infer_decode(self, x: torch.Tensor, topk_ids: torch.Tensor, topk_weight:
|
|
301
320
|
expert_out = expert(x)
|
302
321
|
outputs.append(expert_out)
|
303
322
|
outs = torch.cat(outputs, dim=0)
|
304
|
-
reshaped_topk_weight = topk_weight.squeeze(0).unsqueeze(-1)
|
323
|
+
reshaped_topk_weight = topk_weight.squeeze(0).unsqueeze(-1)
|
305
324
|
final_out = (outs * reshaped_topk_weight).sum(dim=0, keepdim=True)
|
306
325
|
return final_out
|
307
326
|
|
@@ -309,11 +328,13 @@ def moe_infer_decode(self, x: torch.Tensor, topk_ids: torch.Tensor, topk_weight:
|
|
309
328
|
def deepseek_moe_forward(self, hidden_states: torch.Tensor):
|
310
329
|
identity = hidden_states
|
311
330
|
orig_shape = hidden_states.shape
|
312
|
-
|
331
|
+
# IPEX-LLM OPT start: fuse grouped topk in gate forward
|
332
|
+
topk_idx, topk_weight = fuse_gate_forward(self.gate, hidden_states)
|
333
|
+
# IPEX-LLM OPT end
|
313
334
|
hidden_states = hidden_states.view(-1, hidden_states.shape[-1])
|
314
335
|
flat_topk_idx = topk_idx.view(-1)
|
315
336
|
if not self.training:
|
316
|
-
# IPEX-LLM OPT start
|
337
|
+
# IPEX-LLM OPT start: add special moe_infer implementation for decoding
|
317
338
|
if topk_idx.size(0) == 1 and self.ep_size == 1:
|
318
339
|
y = moe_infer_decode(self, hidden_states, topk_idx, topk_weight)
|
319
340
|
else:
|
@@ -1,6 +1,6 @@
|
|
1
1
|
Metadata-Version: 2.1
|
2
2
|
Name: ipex-llm
|
3
|
-
Version: 2.2.
|
3
|
+
Version: 2.2.0b20250301
|
4
4
|
Summary: Large Language Model Develop Toolkit
|
5
5
|
Home-page: https://github.com/intel-analytics/ipex-llm
|
6
6
|
Author: BigDL Authors
|
@@ -27,7 +27,7 @@ Requires-Dist: intel-openmp ; (platform_machine == "x86_64" or platform_machine
|
|
27
27
|
Requires-Dist: torch ==2.1.2+cpu ; (platform_system == "Linux") and extra == 'all'
|
28
28
|
Requires-Dist: torch ==2.1.2 ; (platform_system == "Windows") and extra == 'all'
|
29
29
|
Provides-Extra: cpp
|
30
|
-
Requires-Dist: bigdl-core-cpp ==2.6.
|
30
|
+
Requires-Dist: bigdl-core-cpp ==2.6.0b20250301 ; extra == 'cpp'
|
31
31
|
Requires-Dist: setuptools ; extra == 'cpp'
|
32
32
|
Requires-Dist: onednn-devel ==2025.0.1 ; (platform_system == "Windows") and extra == 'cpp'
|
33
33
|
Requires-Dist: onednn ==2025.0.1 ; (platform_system == "Windows") and extra == 'cpp'
|
@@ -60,7 +60,7 @@ Requires-Dist: transformers ==4.40.0 ; extra == 'npu'
|
|
60
60
|
Requires-Dist: intel-openmp ; (platform_machine == "x86_64" or platform_machine == "AMD64") and extra == 'npu'
|
61
61
|
Requires-Dist: torch ==2.1.2+cpu ; (platform_system == "Linux") and extra == 'npu'
|
62
62
|
Requires-Dist: torch ==2.1.2 ; (platform_system == "Windows") and extra == 'npu'
|
63
|
-
Requires-Dist: bigdl-core-npu ==2.6.
|
63
|
+
Requires-Dist: bigdl-core-npu ==2.6.0b20250301 ; (platform_system == "Windows") and extra == 'npu'
|
64
64
|
Provides-Extra: serving
|
65
65
|
Requires-Dist: py-cpuinfo ; extra == 'serving'
|
66
66
|
Requires-Dist: fschat[model_worker,webui] ==0.2.36 ; extra == 'serving'
|
@@ -80,9 +80,9 @@ Requires-Dist: setuptools <70.0.0 ; extra == 'xpu'
|
|
80
80
|
Requires-Dist: torch ==2.1.0a0 ; extra == 'xpu'
|
81
81
|
Requires-Dist: torchvision ==0.16.0a0 ; extra == 'xpu'
|
82
82
|
Requires-Dist: intel-extension-for-pytorch ==2.1.10+xpu ; extra == 'xpu'
|
83
|
-
Requires-Dist: bigdl-core-xe-21 ==2.6.
|
84
|
-
Requires-Dist: bigdl-core-xe-batch-21 ==2.6.
|
85
|
-
Requires-Dist: bigdl-core-xe-addons-21 ==2.6.
|
83
|
+
Requires-Dist: bigdl-core-xe-21 ==2.6.0b20250301 ; extra == 'xpu'
|
84
|
+
Requires-Dist: bigdl-core-xe-batch-21 ==2.6.0b20250301 ; extra == 'xpu'
|
85
|
+
Requires-Dist: bigdl-core-xe-addons-21 ==2.6.0b20250301 ; extra == 'xpu'
|
86
86
|
Provides-Extra: xpu-2-1
|
87
87
|
Requires-Dist: py-cpuinfo ; extra == 'xpu-2-1'
|
88
88
|
Requires-Dist: protobuf ; extra == 'xpu-2-1'
|
@@ -97,9 +97,9 @@ Requires-Dist: setuptools <70.0.0 ; extra == 'xpu-2-1'
|
|
97
97
|
Requires-Dist: torch ==2.1.0a0 ; extra == 'xpu-2-1'
|
98
98
|
Requires-Dist: torchvision ==0.16.0a0 ; extra == 'xpu-2-1'
|
99
99
|
Requires-Dist: intel-extension-for-pytorch ==2.1.10+xpu ; extra == 'xpu-2-1'
|
100
|
-
Requires-Dist: bigdl-core-xe-21 ==2.6.
|
101
|
-
Requires-Dist: bigdl-core-xe-batch-21 ==2.6.
|
102
|
-
Requires-Dist: bigdl-core-xe-addons-21 ==2.6.
|
100
|
+
Requires-Dist: bigdl-core-xe-21 ==2.6.0b20250301 ; extra == 'xpu-2-1'
|
101
|
+
Requires-Dist: bigdl-core-xe-batch-21 ==2.6.0b20250301 ; extra == 'xpu-2-1'
|
102
|
+
Requires-Dist: bigdl-core-xe-addons-21 ==2.6.0b20250301 ; extra == 'xpu-2-1'
|
103
103
|
Requires-Dist: intel-openmp ; (platform_machine == "x86_64" or platform_machine == "AMD64") and extra == 'xpu-2-1'
|
104
104
|
Requires-Dist: dpcpp-cpp-rt ==2024.0.2 ; (platform_system == "Windows") and extra == 'xpu-2-1'
|
105
105
|
Requires-Dist: mkl-dpcpp ==2024.0.0 ; (platform_system == "Windows") and extra == 'xpu-2-1'
|
@@ -117,7 +117,7 @@ Requires-Dist: setuptools ; extra == 'xpu-2-6'
|
|
117
117
|
Requires-Dist: torch ==2.6.0+xpu ; extra == 'xpu-2-6'
|
118
118
|
Requires-Dist: torchvision ==0.21.0+xpu ; extra == 'xpu-2-6'
|
119
119
|
Requires-Dist: torchaudio ==2.6.0+xpu ; extra == 'xpu-2-6'
|
120
|
-
Requires-Dist: bigdl-core-xe-all ==2.6.
|
120
|
+
Requires-Dist: bigdl-core-xe-all ==2.6.0b20250301 ; extra == 'xpu-2-6'
|
121
121
|
Requires-Dist: onednn-devel ==2025.0.1 ; extra == 'xpu-2-6'
|
122
122
|
Requires-Dist: onednn ==2025.0.1 ; extra == 'xpu-2-6'
|
123
123
|
Requires-Dist: dpcpp-cpp-rt ==2025.0.2 ; extra == 'xpu-2-6'
|
@@ -133,9 +133,9 @@ Requires-Dist: tokenizers ==0.15.2 ; extra == 'xpu-arc'
|
|
133
133
|
Requires-Dist: accelerate ==0.23.0 ; extra == 'xpu-arc'
|
134
134
|
Requires-Dist: tabulate ; extra == 'xpu-arc'
|
135
135
|
Requires-Dist: setuptools ; extra == 'xpu-arc'
|
136
|
-
Requires-Dist: bigdl-core-xe-23 ==2.6.
|
137
|
-
Requires-Dist: bigdl-core-xe-batch-23 ==2.6.
|
138
|
-
Requires-Dist: bigdl-core-xe-addons-23 ==2.6.
|
136
|
+
Requires-Dist: bigdl-core-xe-23 ==2.6.0b20250301 ; extra == 'xpu-arc'
|
137
|
+
Requires-Dist: bigdl-core-xe-batch-23 ==2.6.0b20250301 ; extra == 'xpu-arc'
|
138
|
+
Requires-Dist: bigdl-core-xe-addons-23 ==2.6.0b20250301 ; extra == 'xpu-arc'
|
139
139
|
Requires-Dist: intel-openmp ; (platform_machine == "x86_64" or platform_machine == "AMD64") and extra == 'xpu-arc'
|
140
140
|
Requires-Dist: torch ==2.3.1+cxx11.abi ; (platform_system == "Linux") and extra == 'xpu-arc'
|
141
141
|
Requires-Dist: torchvision ==0.18.1+cxx11.abi ; (platform_system == "Linux") and extra == 'xpu-arc'
|
@@ -156,9 +156,9 @@ Requires-Dist: tokenizers ==0.15.2 ; extra == 'xpu-arl'
|
|
156
156
|
Requires-Dist: accelerate ==0.23.0 ; extra == 'xpu-arl'
|
157
157
|
Requires-Dist: tabulate ; extra == 'xpu-arl'
|
158
158
|
Requires-Dist: setuptools ; extra == 'xpu-arl'
|
159
|
-
Requires-Dist: bigdl-core-xe-23 ==2.6.
|
160
|
-
Requires-Dist: bigdl-core-xe-batch-23 ==2.6.
|
161
|
-
Requires-Dist: bigdl-core-xe-addons-23 ==2.6.
|
159
|
+
Requires-Dist: bigdl-core-xe-23 ==2.6.0b20250301 ; extra == 'xpu-arl'
|
160
|
+
Requires-Dist: bigdl-core-xe-batch-23 ==2.6.0b20250301 ; extra == 'xpu-arl'
|
161
|
+
Requires-Dist: bigdl-core-xe-addons-23 ==2.6.0b20250301 ; extra == 'xpu-arl'
|
162
162
|
Requires-Dist: intel-openmp ; (platform_machine == "x86_64" or platform_machine == "AMD64") and extra == 'xpu-arl'
|
163
163
|
Requires-Dist: torch ==2.3.1+cxx11.abi ; (platform_system == "Linux") and extra == 'xpu-arl'
|
164
164
|
Requires-Dist: torchvision ==0.18.1+cxx11.abi ; (platform_system == "Linux") and extra == 'xpu-arl'
|
@@ -179,9 +179,9 @@ Requires-Dist: tokenizers ==0.15.2 ; extra == 'xpu-lnl'
|
|
179
179
|
Requires-Dist: accelerate ==0.23.0 ; extra == 'xpu-lnl'
|
180
180
|
Requires-Dist: tabulate ; extra == 'xpu-lnl'
|
181
181
|
Requires-Dist: setuptools ; extra == 'xpu-lnl'
|
182
|
-
Requires-Dist: bigdl-core-xe-23 ==2.6.
|
183
|
-
Requires-Dist: bigdl-core-xe-batch-23 ==2.6.
|
184
|
-
Requires-Dist: bigdl-core-xe-addons-23 ==2.6.
|
182
|
+
Requires-Dist: bigdl-core-xe-23 ==2.6.0b20250301 ; extra == 'xpu-lnl'
|
183
|
+
Requires-Dist: bigdl-core-xe-batch-23 ==2.6.0b20250301 ; extra == 'xpu-lnl'
|
184
|
+
Requires-Dist: bigdl-core-xe-addons-23 ==2.6.0b20250301 ; extra == 'xpu-lnl'
|
185
185
|
Requires-Dist: intel-openmp ; (platform_machine == "x86_64" or platform_machine == "AMD64") and extra == 'xpu-lnl'
|
186
186
|
Requires-Dist: torch ==2.3.1+cxx11.abi ; (platform_system == "Linux") and extra == 'xpu-lnl'
|
187
187
|
Requires-Dist: torchvision ==0.18.1+cxx11.abi ; (platform_system == "Linux") and extra == 'xpu-lnl'
|
@@ -152,10 +152,10 @@ ipex_llm/transformers/models/bloom.py,sha256=PxfzyYT-nFn3K5rZhTQjmcEjUUzAhUFzxIN
|
|
152
152
|
ipex_llm/transformers/models/chatglm.py,sha256=UHai1t2AUtGmF765_eHF8LUMVQzp_oCBx8TJB21WrHk,12597
|
153
153
|
ipex_llm/transformers/models/chatglm2.py,sha256=KyAIX7zGVQDQuwwM3QMBNWZbTeMHEzKUIgAryT0voHc,14933
|
154
154
|
ipex_llm/transformers/models/chatglm4.py,sha256=QvUehdaCePB3MNHyWg3dneDxmjtBdxYeKUyQUVcsgfM,16886
|
155
|
-
ipex_llm/transformers/models/chatglm4v.py,sha256=
|
155
|
+
ipex_llm/transformers/models/chatglm4v.py,sha256=Ba9Xtzwtzk_rzg5khGqDrlHfJsDwc5YcM5_yPoord7o,13324
|
156
156
|
ipex_llm/transformers/models/common.py,sha256=0OTRaXekOPApRdQ8UKl5Du8DOtKJ6awnQIStvYvFQOI,13018
|
157
157
|
ipex_llm/transformers/models/decilm.py,sha256=P-PBuDPf07GvKggLwJx_wPwIn6esN3rX8ai2JxRuZmE,5246
|
158
|
-
ipex_llm/transformers/models/deepseek.py,sha256=
|
158
|
+
ipex_llm/transformers/models/deepseek.py,sha256=w6tGeyJ9joD7lQBiZ6A01Z00g8hAXC1N2yGtJh8kyuk,13096
|
159
159
|
ipex_llm/transformers/models/deepseek_v3.py,sha256=CTgwIKQlUPlUCbOxc9Id5GapWkXOP6pMtkguYrWpCio,10003
|
160
160
|
ipex_llm/transformers/models/gemma.py,sha256=_E3Yw8Y45xyNVeLqyVKcpr8kjuICtETeL82cJ-bWJuU,9424
|
161
161
|
ipex_llm/transformers/models/gemma2.py,sha256=2WZuv-FLzJyTJFaYxOuzJt47QE64M0lHnzAiO5T6ozI,8049
|
@@ -262,11 +262,11 @@ ipex_llm/vllm/xpu/engine/__init__.py,sha256=pY_CpyuZd72fr6s32ejeKHKFW0K4vUU2rzZj
|
|
262
262
|
ipex_llm/vllm/xpu/engine/engine.py,sha256=NvCMbp0X8NVrOqbwm4FTvXOptTRLzu9jQsy37ZHnTk8,9493
|
263
263
|
ipex_llm/vllm/xpu/entrypoints/openai/api_server.py,sha256=D577nxWlyoWaHXNXIEvS3ViKSSWL3XZq8D8t6izD7x4,33250
|
264
264
|
ipex_llm/vllm/xpu/entrypoints/openai/cli_args.py,sha256=hB398yYtKauASRzevctScdbFIjiiSGMAe1bwEuIHrhY,10893
|
265
|
-
ipex_llm-2.2.
|
266
|
-
ipex_llm-2.2.
|
267
|
-
ipex_llm-2.2.
|
268
|
-
ipex_llm-2.2.
|
269
|
-
ipex_llm-2.2.
|
270
|
-
ipex_llm-2.2.
|
271
|
-
ipex_llm-2.2.
|
272
|
-
ipex_llm-2.2.
|
265
|
+
ipex_llm-2.2.0b20250301.data/scripts/ipex-llm-init,sha256=fLQsT2dRL6H5bThb4GuIWotAuqoLsIxFwA-0c2qmaO8,6672
|
266
|
+
ipex_llm-2.2.0b20250301.data/scripts/llm-chat,sha256=TdUnUmNapzuoe1c8IzrdVOQwWEg8IqsMSBRlOD3daZM,2249
|
267
|
+
ipex_llm-2.2.0b20250301.data/scripts/llm-cli,sha256=RXGPlLElHxcKzoUxljEMBIAXbzCDysXL-Nxw-xF-7LU,2457
|
268
|
+
ipex_llm-2.2.0b20250301.dist-info/METADATA,sha256=pumiPBr8CKo5gIgCZoloybk379pwwMCf5HqFcQ4Lwnc,12369
|
269
|
+
ipex_llm-2.2.0b20250301.dist-info/WHEEL,sha256=PPJcBMAZibF_2GFE9NmOJGqiaSMPiNFbJd6QaJjdA6Y,109
|
270
|
+
ipex_llm-2.2.0b20250301.dist-info/entry_points.txt,sha256=TiUyBB2MRmfF3ko-pyAEzqeBCRnyhu27bNOAsWPp3e8,61
|
271
|
+
ipex_llm-2.2.0b20250301.dist-info/top_level.txt,sha256=CGCMHM-SyqUabU4h8RqJ2KTYckQUO3LvIWwmUQ6Qbzw,9
|
272
|
+
ipex_llm-2.2.0b20250301.dist-info/RECORD,,
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|