ipex-llm 2.3.0b20250515__py3-none-manylinux2010_x86_64.whl → 2.3.0b20250518__py3-none-manylinux2010_x86_64.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -373,3 +373,41 @@ def moe_softmax_topk(router_logits: torch.Tensor, top_k: int, norm_topk_prob: bo
373
373
  router_logits, top_k, norm_topk_prob
374
374
  )
375
375
  return selected_experts, routing_weights
376
+
377
+
378
+ # q,k,v_proj should be ipex-llm quantized linears
379
+ def merge_quantized_qkv(q_proj, k_proj, v_proj, module):
380
+ from ipex_llm.transformers.low_bit_linear import FP4Params
381
+ from ipex_llm.ggml.quantize import ggml_tensor_qtype
382
+ has_qtype = (hasattr(q_proj.weight, 'qtype')
383
+ and hasattr(k_proj.weight, 'qtype')
384
+ and hasattr(v_proj.weight, 'qtype'))
385
+ invalidInputError((has_qtype
386
+ and q_proj.weight.qtype == k_proj.weight.qtype
387
+ and q_proj.weight.qtype == v_proj.weight.qtype
388
+ and q_proj.weight.qtype in ggml_tensor_qtype.values()),
389
+ f"{q_proj.weight.qtype} is not supported, "
390
+ f"only {ggml_tensor_qtype.values()} are supported now.")
391
+ origin_device = q_proj.weight.device
392
+ q_proj.weight = q_proj.weight.to('cpu')
393
+ k_proj.weight = k_proj.weight.to('cpu')
394
+ v_proj.weight = v_proj.weight.to('cpu')
395
+ linears = [q_proj, k_proj, v_proj]
396
+ new_weight = torch.cat(list(linear.weight.data for linear in linears), dim=0)
397
+ if q_proj.has_bias:
398
+ new_bias = torch.cat(list(linear.bias.data for linear in linears), dim=0)
399
+ q_proj.bias = torch.nn.Parameter(new_bias, requires_grad=False)
400
+ new_out_features = sum(layer.out_features for layer in linears)
401
+ new_params_low_bit = FP4Params(data=new_weight.data,
402
+ requires_grad=False,
403
+ quantized=True,
404
+ _shape=[new_out_features, q_proj.in_features],
405
+ convert_shape_only=False,
406
+ qtype=q_proj.weight.qtype,
407
+ in_features=q_proj.in_features,
408
+ enable_scale_search=False)
409
+ q_proj.out_features = new_out_features
410
+ q_proj.weight = new_params_low_bit.to(origin_device)
411
+ del module.q_proj.weight
412
+ module.qkv_proj = module.q_proj
413
+ del module.k_proj, module.v_proj, module.q_proj
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.1
2
2
  Name: ipex-llm
3
- Version: 2.3.0b20250515
3
+ Version: 2.3.0b20250518
4
4
  Summary: Large Language Model Develop Toolkit
5
5
  Home-page: https://github.com/intel-analytics/ipex-llm
6
6
  Author: BigDL Authors
@@ -27,7 +27,7 @@ Requires-Dist: intel-openmp ; (platform_machine == "x86_64" or platform_machine
27
27
  Requires-Dist: torch ==2.1.2+cpu ; (platform_system == "Linux") and extra == 'all'
28
28
  Requires-Dist: torch ==2.1.2 ; (platform_system == "Windows") and extra == 'all'
29
29
  Provides-Extra: cpp
30
- Requires-Dist: bigdl-core-cpp ==2.7.0b20250515 ; extra == 'cpp'
30
+ Requires-Dist: bigdl-core-cpp ==2.7.0b20250518 ; extra == 'cpp'
31
31
  Requires-Dist: setuptools ; extra == 'cpp'
32
32
  Requires-Dist: onednn-devel ==2025.0.1 ; (platform_system == "Windows") and extra == 'cpp'
33
33
  Requires-Dist: onednn ==2025.0.1 ; (platform_system == "Windows") and extra == 'cpp'
@@ -60,7 +60,7 @@ Requires-Dist: transformers ==4.40.0 ; extra == 'npu'
60
60
  Requires-Dist: intel-openmp ; (platform_machine == "x86_64" or platform_machine == "AMD64") and extra == 'npu'
61
61
  Requires-Dist: torch ==2.1.2+cpu ; (platform_system == "Linux") and extra == 'npu'
62
62
  Requires-Dist: torch ==2.1.2 ; (platform_system == "Windows") and extra == 'npu'
63
- Requires-Dist: bigdl-core-npu ==2.7.0b20250515 ; (platform_system == "Windows") and extra == 'npu'
63
+ Requires-Dist: bigdl-core-npu ==2.7.0b20250518 ; (platform_system == "Windows") and extra == 'npu'
64
64
  Provides-Extra: serving
65
65
  Requires-Dist: py-cpuinfo ; extra == 'serving'
66
66
  Requires-Dist: fschat[model_worker,webui] ==0.2.36 ; extra == 'serving'
@@ -80,9 +80,9 @@ Requires-Dist: setuptools <70.0.0 ; extra == 'xpu'
80
80
  Requires-Dist: torch ==2.1.0a0 ; extra == 'xpu'
81
81
  Requires-Dist: torchvision ==0.16.0a0 ; extra == 'xpu'
82
82
  Requires-Dist: intel-extension-for-pytorch ==2.1.10+xpu ; extra == 'xpu'
83
- Requires-Dist: bigdl-core-xe-21 ==2.7.0b20250515 ; extra == 'xpu'
84
- Requires-Dist: bigdl-core-xe-batch-21 ==2.7.0b20250515 ; extra == 'xpu'
85
- Requires-Dist: bigdl-core-xe-addons-21 ==2.7.0b20250515 ; extra == 'xpu'
83
+ Requires-Dist: bigdl-core-xe-21 ==2.7.0b20250518 ; extra == 'xpu'
84
+ Requires-Dist: bigdl-core-xe-batch-21 ==2.7.0b20250518 ; extra == 'xpu'
85
+ Requires-Dist: bigdl-core-xe-addons-21 ==2.7.0b20250518 ; extra == 'xpu'
86
86
  Provides-Extra: xpu-2-1
87
87
  Requires-Dist: py-cpuinfo ; extra == 'xpu-2-1'
88
88
  Requires-Dist: protobuf ; extra == 'xpu-2-1'
@@ -97,9 +97,9 @@ Requires-Dist: setuptools <70.0.0 ; extra == 'xpu-2-1'
97
97
  Requires-Dist: torch ==2.1.0a0 ; extra == 'xpu-2-1'
98
98
  Requires-Dist: torchvision ==0.16.0a0 ; extra == 'xpu-2-1'
99
99
  Requires-Dist: intel-extension-for-pytorch ==2.1.10+xpu ; extra == 'xpu-2-1'
100
- Requires-Dist: bigdl-core-xe-21 ==2.7.0b20250515 ; extra == 'xpu-2-1'
101
- Requires-Dist: bigdl-core-xe-batch-21 ==2.7.0b20250515 ; extra == 'xpu-2-1'
102
- Requires-Dist: bigdl-core-xe-addons-21 ==2.7.0b20250515 ; extra == 'xpu-2-1'
100
+ Requires-Dist: bigdl-core-xe-21 ==2.7.0b20250518 ; extra == 'xpu-2-1'
101
+ Requires-Dist: bigdl-core-xe-batch-21 ==2.7.0b20250518 ; extra == 'xpu-2-1'
102
+ Requires-Dist: bigdl-core-xe-addons-21 ==2.7.0b20250518 ; extra == 'xpu-2-1'
103
103
  Requires-Dist: intel-openmp ; (platform_machine == "x86_64" or platform_machine == "AMD64") and extra == 'xpu-2-1'
104
104
  Requires-Dist: dpcpp-cpp-rt ==2024.0.2 ; (platform_system == "Windows") and extra == 'xpu-2-1'
105
105
  Requires-Dist: mkl-dpcpp ==2024.0.0 ; (platform_system == "Windows") and extra == 'xpu-2-1'
@@ -117,7 +117,7 @@ Requires-Dist: setuptools ; extra == 'xpu-2-6'
117
117
  Requires-Dist: torch ==2.6.0+xpu ; extra == 'xpu-2-6'
118
118
  Requires-Dist: torchvision ==0.21.0+xpu ; extra == 'xpu-2-6'
119
119
  Requires-Dist: torchaudio ==2.6.0+xpu ; extra == 'xpu-2-6'
120
- Requires-Dist: bigdl-core-xe-all ==2.7.0b20250515 ; extra == 'xpu-2-6'
120
+ Requires-Dist: bigdl-core-xe-all ==2.7.0b20250518 ; extra == 'xpu-2-6'
121
121
  Requires-Dist: onednn-devel ==2025.0.1 ; extra == 'xpu-2-6'
122
122
  Requires-Dist: onednn ==2025.0.1 ; extra == 'xpu-2-6'
123
123
  Requires-Dist: dpcpp-cpp-rt ==2025.0.2 ; extra == 'xpu-2-6'
@@ -132,7 +132,7 @@ Requires-Dist: tokenizers ==0.15.2 ; extra == 'xpu-2-6-arl'
132
132
  Requires-Dist: accelerate ==0.23.0 ; extra == 'xpu-2-6-arl'
133
133
  Requires-Dist: tabulate ; extra == 'xpu-2-6-arl'
134
134
  Requires-Dist: setuptools ; extra == 'xpu-2-6-arl'
135
- Requires-Dist: bigdl-core-xe-all ==2.7.0b20250515 ; extra == 'xpu-2-6-arl'
135
+ Requires-Dist: bigdl-core-xe-all ==2.7.0b20250518 ; extra == 'xpu-2-6-arl'
136
136
  Requires-Dist: onednn-devel ==2025.0.1 ; extra == 'xpu-2-6-arl'
137
137
  Requires-Dist: onednn ==2025.0.1 ; extra == 'xpu-2-6-arl'
138
138
  Requires-Dist: dpcpp-cpp-rt ==2025.0.2 ; extra == 'xpu-2-6-arl'
@@ -153,7 +153,7 @@ ipex_llm/transformers/models/chatglm.py,sha256=DQM63oPIVMMTBQN4O4hPF4WY1aSiTWq4B
153
153
  ipex_llm/transformers/models/chatglm2.py,sha256=KyAIX7zGVQDQuwwM3QMBNWZbTeMHEzKUIgAryT0voHc,14933
154
154
  ipex_llm/transformers/models/chatglm4.py,sha256=QvUehdaCePB3MNHyWg3dneDxmjtBdxYeKUyQUVcsgfM,16886
155
155
  ipex_llm/transformers/models/chatglm4v.py,sha256=Ba9Xtzwtzk_rzg5khGqDrlHfJsDwc5YcM5_yPoord7o,13324
156
- ipex_llm/transformers/models/common.py,sha256=TnSEPaXixHg2kjbvNnHtM4raN5ayMzwkI7nyW4J0Rwk,16043
156
+ ipex_llm/transformers/models/common.py,sha256=NjwDmhrTcLQ4ceP5VKndl8YJ0uFdkftVCi3pn3oTOic,18080
157
157
  ipex_llm/transformers/models/decilm.py,sha256=P-PBuDPf07GvKggLwJx_wPwIn6esN3rX8ai2JxRuZmE,5246
158
158
  ipex_llm/transformers/models/deepseek.py,sha256=BJocxhznzvM99IZeAWuhyHEBm6Z2-AwhLFYMdjMJuc4,13083
159
159
  ipex_llm/transformers/models/deepseek_v3.py,sha256=CTgwIKQlUPlUCbOxc9Id5GapWkXOP6pMtkguYrWpCio,10003
@@ -265,11 +265,11 @@ ipex_llm/vllm/xpu/engine/__init__.py,sha256=sOvwLx_Zj0jiRCGj9W3DgGTfcSU3hABYhgIQ
265
265
  ipex_llm/vllm/xpu/engine/engine.py,sha256=XAprw7VifjfnR915TZOaKcxe3QCFsVBgxzS8qOdn1yg,14462
266
266
  ipex_llm/vllm/xpu/entrypoints/openai/api_server.py,sha256=uWHyyHLw-B8wXBnQw9_MCG81tKK9Jb0dyq1xfYHgoNw,45905
267
267
  ipex_llm/vllm/xpu/entrypoints/openai/cli_args.py,sha256=hB398yYtKauASRzevctScdbFIjiiSGMAe1bwEuIHrhY,10893
268
- ipex_llm-2.3.0b20250515.data/scripts/ipex-llm-init,sha256=fLQsT2dRL6H5bThb4GuIWotAuqoLsIxFwA-0c2qmaO8,6672
269
- ipex_llm-2.3.0b20250515.data/scripts/llm-chat,sha256=TdUnUmNapzuoe1c8IzrdVOQwWEg8IqsMSBRlOD3daZM,2249
270
- ipex_llm-2.3.0b20250515.data/scripts/llm-cli,sha256=RXGPlLElHxcKzoUxljEMBIAXbzCDysXL-Nxw-xF-7LU,2457
271
- ipex_llm-2.3.0b20250515.dist-info/METADATA,sha256=_lbNuaDcLJThqZdxaHamNmc-qDmC7S7F7E_MgDqH_lk,8865
272
- ipex_llm-2.3.0b20250515.dist-info/WHEEL,sha256=PPJcBMAZibF_2GFE9NmOJGqiaSMPiNFbJd6QaJjdA6Y,109
273
- ipex_llm-2.3.0b20250515.dist-info/entry_points.txt,sha256=TiUyBB2MRmfF3ko-pyAEzqeBCRnyhu27bNOAsWPp3e8,61
274
- ipex_llm-2.3.0b20250515.dist-info/top_level.txt,sha256=CGCMHM-SyqUabU4h8RqJ2KTYckQUO3LvIWwmUQ6Qbzw,9
275
- ipex_llm-2.3.0b20250515.dist-info/RECORD,,
268
+ ipex_llm-2.3.0b20250518.data/scripts/ipex-llm-init,sha256=fLQsT2dRL6H5bThb4GuIWotAuqoLsIxFwA-0c2qmaO8,6672
269
+ ipex_llm-2.3.0b20250518.data/scripts/llm-chat,sha256=TdUnUmNapzuoe1c8IzrdVOQwWEg8IqsMSBRlOD3daZM,2249
270
+ ipex_llm-2.3.0b20250518.data/scripts/llm-cli,sha256=RXGPlLElHxcKzoUxljEMBIAXbzCDysXL-Nxw-xF-7LU,2457
271
+ ipex_llm-2.3.0b20250518.dist-info/METADATA,sha256=EyH2NoP3wcaSi92HT3Ss1zNnvVhsRwkHtPeFhAqMvD8,8865
272
+ ipex_llm-2.3.0b20250518.dist-info/WHEEL,sha256=PPJcBMAZibF_2GFE9NmOJGqiaSMPiNFbJd6QaJjdA6Y,109
273
+ ipex_llm-2.3.0b20250518.dist-info/entry_points.txt,sha256=TiUyBB2MRmfF3ko-pyAEzqeBCRnyhu27bNOAsWPp3e8,61
274
+ ipex_llm-2.3.0b20250518.dist-info/top_level.txt,sha256=CGCMHM-SyqUabU4h8RqJ2KTYckQUO3LvIWwmUQ6Qbzw,9
275
+ ipex_llm-2.3.0b20250518.dist-info/RECORD,,