ipex-llm 2.3.0b20250506__py3-none-win_amd64.whl → 2.3.0b20250507__py3-none-win_amd64.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (41) hide show
  1. ipex_llm/libs/bloom-api.dll +0 -0
  2. ipex_llm/libs/bloom.dll +0 -0
  3. ipex_llm/libs/gptneox-api.dll +0 -0
  4. ipex_llm/libs/gptneox.dll +0 -0
  5. ipex_llm/libs/libbloom_avx.dll +0 -0
  6. ipex_llm/libs/libbloom_vnni.dll +0 -0
  7. ipex_llm/libs/libgptneox_avx.dll +0 -0
  8. ipex_llm/libs/libgptneox_vnni.dll +0 -0
  9. ipex_llm/libs/libllama_avx.dll +0 -0
  10. ipex_llm/libs/libllama_vnni.dll +0 -0
  11. ipex_llm/libs/libstarcoder_avx.dll +0 -0
  12. ipex_llm/libs/libstarcoder_vnni.dll +0 -0
  13. ipex_llm/libs/llama-api.dll +0 -0
  14. ipex_llm/libs/llama.dll +0 -0
  15. ipex_llm/libs/main-bloom.exe +0 -0
  16. ipex_llm/libs/main-gptneox.exe +0 -0
  17. ipex_llm/libs/main-llama.exe +0 -0
  18. ipex_llm/libs/main-starcoder.exe +0 -0
  19. ipex_llm/libs/pipeline.dll +0 -0
  20. ipex_llm/libs/quantize-bloom.exe +0 -0
  21. ipex_llm/libs/quantize-bloom_vnni.exe +0 -0
  22. ipex_llm/libs/quantize-gptneox.exe +0 -0
  23. ipex_llm/libs/quantize-gptneox_vnni.exe +0 -0
  24. ipex_llm/libs/quantize-llama.exe +0 -0
  25. ipex_llm/libs/quantize-llama_vnni.exe +0 -0
  26. ipex_llm/libs/quantize-starcoder.exe +0 -0
  27. ipex_llm/libs/quantize-starcoder_vnni.exe +0 -0
  28. ipex_llm/libs/starcoder-api.dll +0 -0
  29. ipex_llm/libs/starcoder.dll +0 -0
  30. ipex_llm/transformers/convert.py +28 -1
  31. ipex_llm/transformers/models/qwen3.py +115 -0
  32. ipex_llm/transformers/models/qwen3_moe.py +142 -0
  33. ipex_llm/vllm/xpu/model_convert.py +2 -1
  34. {ipex_llm-2.3.0b20250506.dist-info → ipex_llm-2.3.0b20250507.dist-info}/METADATA +11 -11
  35. {ipex_llm-2.3.0b20250506.dist-info → ipex_llm-2.3.0b20250507.dist-info}/RECORD +41 -39
  36. {ipex_llm-2.3.0b20250506.data → ipex_llm-2.3.0b20250507.data}/scripts/ipex-llm-init.bat +0 -0
  37. {ipex_llm-2.3.0b20250506.data → ipex_llm-2.3.0b20250507.data}/scripts/llm-chat.ps1 +0 -0
  38. {ipex_llm-2.3.0b20250506.data → ipex_llm-2.3.0b20250507.data}/scripts/llm-cli.ps1 +0 -0
  39. {ipex_llm-2.3.0b20250506.dist-info → ipex_llm-2.3.0b20250507.dist-info}/WHEEL +0 -0
  40. {ipex_llm-2.3.0b20250506.dist-info → ipex_llm-2.3.0b20250507.dist-info}/entry_points.txt +0 -0
  41. {ipex_llm-2.3.0b20250506.dist-info → ipex_llm-2.3.0b20250507.dist-info}/top_level.txt +0 -0
Binary file
ipex_llm/libs/bloom.dll CHANGED
Binary file
Binary file
ipex_llm/libs/gptneox.dll CHANGED
Binary file
Binary file
Binary file
Binary file
Binary file
Binary file
Binary file
Binary file
Binary file
Binary file
ipex_llm/libs/llama.dll CHANGED
Binary file
Binary file
Binary file
Binary file
Binary file
Binary file
Binary file
Binary file
Binary file
Binary file
Binary file
Binary file
Binary file
Binary file
Binary file
Binary file
@@ -1078,6 +1078,12 @@ def _optimize_pre(model, qtype=None):
1078
1078
  elif model.config.model_type == "qwen2_5_omni":
1079
1079
  from ipex_llm.transformers.models.qwen2_5_omni import merge_qkv
1080
1080
  model.apply(merge_qkv)
1081
+ elif model.config.model_type == "qwen3":
1082
+ from ipex_llm.transformers.models.qwen3 import merge_qkv
1083
+ model.apply(merge_qkv)
1084
+ elif model.config.model_type == "qwen3_moe":
1085
+ from ipex_llm.transformers.models.qwen3_moe import merge_qkv
1086
+ model.apply(merge_qkv)
1081
1087
  return model
1082
1088
 
1083
1089
 
@@ -2106,7 +2112,28 @@ def _optimize_post(model):
2106
2112
  convert_forward(model.token2wav, module.DiTAttention, dit_attention_forward)
2107
2113
  dit_model = model.token2wav.code2wav_dit_model
2108
2114
  dit_model._create_block_diff = MethodType(_create_block_diff, dit_model)
2109
-
2115
+ elif model.config.model_type == "qwen3":
2116
+ modeling_module_name = model.__class__.__module__
2117
+ module = importlib.import_module(modeling_module_name)
2118
+ from ipex_llm.transformers.models.common import rms_norm_forward
2119
+ from ipex_llm.transformers.models.qwen3 import qwen3_model_forward
2120
+ from ipex_llm.transformers.models.qwen3 import qwen3_attention_forward
2121
+ from ipex_llm.transformers.models.common import mlp_silu_forward
2122
+ convert_forward(model, module.Qwen3RMSNorm, rms_norm_forward)
2123
+ convert_forward(model, module.Qwen3Model, qwen3_model_forward)
2124
+ convert_forward(model, module.Qwen3Attention, qwen3_attention_forward)
2125
+ convert_forward(model, module.Qwen3MLP, mlp_silu_forward)
2126
+ elif model.config.model_type == "qwen3_moe":
2127
+ modeling_module_name = model.__class__.__module__
2128
+ module = importlib.import_module(modeling_module_name)
2129
+ from ipex_llm.transformers.models.common import rms_norm_forward
2130
+ from ipex_llm.transformers.models.qwen3_moe import qwen3_moe_model_forward
2131
+ from ipex_llm.transformers.models.qwen3 import qwen3_attention_forward
2132
+ from ipex_llm.transformers.models.qwen3_moe import qwen3_moe_moe_forward
2133
+ convert_forward(model, module.Qwen3MoeRMSNorm, rms_norm_forward)
2134
+ convert_forward(model, module.Qwen3MoeModel, qwen3_moe_model_forward)
2135
+ convert_forward(model, module.Qwen3MoeAttention, qwen3_attention_forward)
2136
+ convert_forward(model, module.Qwen3MoeSparseMoeBlock, qwen3_moe_moe_forward)
2110
2137
  return model
2111
2138
 
2112
2139
 
@@ -0,0 +1,115 @@
1
+ #
2
+ # Copyright 2016 The BigDL Authors.
3
+ #
4
+ # Licensed under the Apache License, Version 2.0 (the "License");
5
+ # you may not use this file except in compliance with the License.
6
+ # You may obtain a copy of the License at
7
+ #
8
+ # http://www.apache.org/licenses/LICENSE-2.0
9
+ #
10
+ # Unless required by applicable law or agreed to in writing, software
11
+ # distributed under the License is distributed on an "AS IS" BASIS,
12
+ # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13
+ # See the License for the specific language governing permissions and
14
+ # limitations under the License.
15
+ #
16
+
17
+ import torch
18
+ from typing import Optional, List, Tuple
19
+ from transformers.processing_utils import Unpack
20
+ from transformers.cache_utils import Cache
21
+ from transformers.modeling_outputs import MoeModelOutputWithPast
22
+ from transformers.modeling_flash_attention_utils import FlashAttentionKwargs
23
+
24
+ from transformers.models.qwen3.modeling_qwen3 import apply_rotary_pos_emb
25
+ from transformers.models.qwen3.modeling_qwen3 import Qwen3Model, Qwen3Attention
26
+
27
+ from ipex_llm.transformers.kv import DynamicNormalCache
28
+ from ipex_llm.transformers.models.common import merge_qkv_base
29
+ from ipex_llm.transformers.models.common import scaled_dot_product_attention
30
+ from ipex_llm.transformers.models.utils import make_cache_contiguous_inplaced
31
+
32
+
33
+ def merge_qkv(module: torch.nn.Module):
34
+ merge_qkv_base(module, Qwen3Attention)
35
+
36
+
37
+ def qwen3_model_forward(
38
+ self,
39
+ input_ids: Optional[torch.LongTensor] = None,
40
+ attention_mask: Optional[torch.Tensor] = None,
41
+ position_ids: Optional[torch.LongTensor] = None,
42
+ past_key_values: Optional[List[torch.FloatTensor]] = None,
43
+ inputs_embeds: Optional[torch.FloatTensor] = None,
44
+ use_cache: Optional[bool] = None,
45
+ output_attentions: Optional[bool] = None,
46
+ output_hidden_states: Optional[bool] = None,
47
+ output_router_logits: Optional[bool] = None,
48
+ cache_position: Optional[torch.LongTensor] = None,
49
+ **flash_attn_kwargs: Unpack[FlashAttentionKwargs],
50
+ ) -> MoeModelOutputWithPast:
51
+ device = input_ids.device if input_ids is not None else inputs_embeds.device
52
+ use_cache = use_cache if use_cache is not None else self.config.use_cache
53
+ use_cache = True if device.type == "xpu" else use_cache
54
+ if use_cache and not isinstance(past_key_values, DynamicNormalCache):
55
+ past_key_values = DynamicNormalCache.from_legacy_cache(past_key_values)
56
+
57
+ return Qwen3Model.forward(
58
+ self=self,
59
+ input_ids=input_ids,
60
+ attention_mask=attention_mask,
61
+ position_ids=position_ids,
62
+ past_key_values=past_key_values,
63
+ inputs_embeds=inputs_embeds,
64
+ use_cache=use_cache,
65
+ output_attentions=output_attentions,
66
+ output_hidden_states=output_hidden_states,
67
+ output_router_logits=output_router_logits,
68
+ cache_position=cache_position,
69
+ **flash_attn_kwargs,
70
+ )
71
+
72
+
73
+ def qwen3_attention_forward(
74
+ self,
75
+ hidden_states: torch.Tensor,
76
+ position_embeddings: Tuple[torch.Tensor, torch.Tensor],
77
+ attention_mask: Optional[torch.Tensor],
78
+ past_key_value: Optional[Cache] = None,
79
+ cache_position: Optional[torch.LongTensor] = None,
80
+ **kwargs: Unpack[FlashAttentionKwargs],
81
+ ):
82
+ bsz, q_len, _ = hidden_states.size()
83
+ device = hidden_states.device
84
+
85
+ qkv = self.qkv_proj(hidden_states)
86
+ qkv = qkv.view(bsz, q_len, -1, self.head_dim)
87
+ qkv = qkv.transpose(1, 2)
88
+ query_states, key_states, value_states = qkv.split([self.config.num_attention_heads,
89
+ self.config.num_key_value_heads,
90
+ self.config.num_key_value_heads], dim=1)
91
+ query_states = self.q_norm(query_states)
92
+ key_states = self.k_norm(key_states)
93
+
94
+ cos, sin = position_embeddings
95
+ if device.type == "xpu":
96
+ import xe_addons
97
+ make_cache_contiguous_inplaced(cos, sin)
98
+ xe_addons.rotary_half_with_cache_inplaced(query_states, key_states, cos, sin)
99
+ else:
100
+ query_states, key_states = apply_rotary_pos_emb(query_states, key_states, cos, sin)
101
+
102
+ if past_key_value is not None:
103
+ cache_kwargs = {"sin": sin, "cos": cos, "cache_position": cache_position}
104
+ key_states, value_states = past_key_value.update(key_states, value_states,
105
+ self.layer_idx, cache_kwargs)
106
+ attn_weights = None
107
+ attn_output = scaled_dot_product_attention(
108
+ query_states, key_states, value_states,
109
+ attention_mask, q_len == key_states.size(2), self.scaling
110
+ )
111
+ attn_output = attn_output.transpose(1, 2).contiguous()
112
+
113
+ attn_output = attn_output.reshape(bsz, q_len, -1)
114
+ attn_output = self.o_proj(attn_output)
115
+ return attn_output, attn_weights
@@ -0,0 +1,142 @@
1
+ #
2
+ # Copyright 2016 The BigDL Authors.
3
+ #
4
+ # Licensed under the Apache License, Version 2.0 (the "License");
5
+ # you may not use this file except in compliance with the License.
6
+ # You may obtain a copy of the License at
7
+ #
8
+ # http://www.apache.org/licenses/LICENSE-2.0
9
+ #
10
+ # Unless required by applicable law or agreed to in writing, software
11
+ # distributed under the License is distributed on an "AS IS" BASIS,
12
+ # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13
+ # See the License for the specific language governing permissions and
14
+ # limitations under the License.
15
+ #
16
+
17
+ import torch
18
+ from typing import Optional, List
19
+ from transformers.processing_utils import Unpack
20
+ from transformers.modeling_outputs import MoeModelOutputWithPast
21
+ from transformers.modeling_flash_attention_utils import FlashAttentionKwargs
22
+
23
+ from transformers.models.qwen3_moe.modeling_qwen3_moe import Qwen3MoeModel, Qwen3MoeAttention
24
+
25
+ from ipex_llm.transformers.kv import DynamicNormalCache
26
+ from ipex_llm.transformers.models.common import merge_qkv_base
27
+ from ipex_llm.transformers.models.utils import use_fuse_moe
28
+
29
+
30
+ def merge_qkv(module: torch.nn.Module):
31
+ merge_qkv_base(module, Qwen3MoeAttention)
32
+
33
+
34
+ def qwen3_moe_model_forward(
35
+ self,
36
+ input_ids: Optional[torch.LongTensor] = None,
37
+ attention_mask: Optional[torch.Tensor] = None,
38
+ position_ids: Optional[torch.LongTensor] = None,
39
+ past_key_values: Optional[List[torch.FloatTensor]] = None,
40
+ inputs_embeds: Optional[torch.FloatTensor] = None,
41
+ use_cache: Optional[bool] = None,
42
+ output_attentions: Optional[bool] = None,
43
+ output_hidden_states: Optional[bool] = None,
44
+ output_router_logits: Optional[bool] = None,
45
+ cache_position: Optional[torch.LongTensor] = None,
46
+ **flash_attn_kwargs: Unpack[FlashAttentionKwargs],
47
+ ) -> MoeModelOutputWithPast:
48
+ device = input_ids.device if input_ids is not None else inputs_embeds.device
49
+ use_cache = use_cache if use_cache is not None else self.config.use_cache
50
+ use_cache = True if device.type == "xpu" else use_cache
51
+ if use_cache and not isinstance(past_key_values, DynamicNormalCache):
52
+ past_key_values = DynamicNormalCache.from_legacy_cache(past_key_values)
53
+
54
+ return Qwen3MoeModel.forward(
55
+ self=self,
56
+ input_ids=input_ids,
57
+ attention_mask=attention_mask,
58
+ position_ids=position_ids,
59
+ past_key_values=past_key_values,
60
+ inputs_embeds=inputs_embeds,
61
+ use_cache=use_cache,
62
+ output_attentions=output_attentions,
63
+ output_hidden_states=output_hidden_states,
64
+ output_router_logits=output_router_logits,
65
+ cache_position=cache_position,
66
+ **flash_attn_kwargs,
67
+ )
68
+
69
+
70
+ def qwen3_moe_moe_forward(self, hidden_states: torch.Tensor) -> torch.Tensor:
71
+ batch_size, sequence_length, hidden_dim = hidden_states.shape
72
+ hidden_states = hidden_states.view(-1, hidden_dim)
73
+ router_logits = self.gate(hidden_states)
74
+
75
+ if router_logits.device == "xpu":
76
+ import xe_addons
77
+ selected_experts, routing_weights = xe_addons.moe_softmax_topk(
78
+ router_logits, self.top_k, self.norm_topk_prob
79
+ )
80
+ else:
81
+ routing_weights = torch.nn.functional.softmax(router_logits, dim=1, dtype=torch.float)
82
+ routing_weights, selected_experts = torch.topk(routing_weights, self.top_k, dim=-1)
83
+ if self.norm_topk_prob:
84
+ routing_weights /= routing_weights.sum(dim=-1, keepdim=True)
85
+ routing_weights = routing_weights.to(hidden_states.dtype)
86
+
87
+ if selected_experts.size(0) == 1:
88
+ if use_fuse_moe(hidden_states, self.experts[0].down_proj.qtype):
89
+ if getattr(self, "gates", None) is None:
90
+ gate_addrs = [expert.gate_proj.weight.data_ptr() for expert in self.experts]
91
+ up_addrs = [expert.up_proj.weight.data_ptr() for expert in self.experts]
92
+ down_addrs = [expert.down_proj.weight.data_ptr() for expert in self.experts]
93
+ gates = torch.tensor(gate_addrs, dtype=torch.uint64, device=hidden_states.device)
94
+ ups = torch.tensor(up_addrs, dtype=torch.uint64, device=hidden_states.device)
95
+ downs = torch.tensor(down_addrs, dtype=torch.uint64, device=hidden_states.device)
96
+ self.register_buffer("gates", gates, persistent=False)
97
+ self.register_buffer("ups", ups, persistent=False)
98
+ self.register_buffer("downs", downs, persistent=False)
99
+
100
+ import xe_linear
101
+ final_hidden_states = xe_linear.moe_forward_vec(
102
+ hidden_states, selected_experts, routing_weights, self.gates, self.ups, self.downs,
103
+ hidden_states.size(-1), self.experts[0].intermediate_size,
104
+ self.experts[0].down_proj.qtype
105
+ )
106
+ else:
107
+ idxs = selected_experts.flatten().tolist()
108
+ outputs = []
109
+ for i in idxs:
110
+ expert = self.experts[i]
111
+ expert_out = expert(hidden_states)
112
+ outputs.append(expert_out)
113
+ outs = torch.cat(outputs, dim=0)
114
+ reshaped_topk_weight = routing_weights.squeeze(0).unsqueeze(-1)
115
+ final_hidden_states = (outs * reshaped_topk_weight).sum(dim=0, keepdim=True)
116
+ else:
117
+ final_hidden_states = torch.zeros(
118
+ (batch_size * sequence_length, hidden_dim),
119
+ dtype=hidden_states.dtype, device=hidden_states.device
120
+ )
121
+
122
+ # One hot encode the selected experts to create an expert mask
123
+ # this will be used to easily index which expert is going to be sollicitated
124
+ expert_mask = torch.nn.functional.one_hot(selected_experts,
125
+ num_classes=self.num_experts).permute(2, 1, 0)
126
+
127
+ # Loop over all available experts in the model and perform the computation on each expert
128
+ for expert_idx in range(self.num_experts):
129
+ expert_layer = self.experts[expert_idx]
130
+ idx, top_x = torch.where(expert_mask[expert_idx])
131
+
132
+ # Index the correct hidden states and compute the expert hidden state for
133
+ # the current expert. We need to make sure to multiply the output hidden
134
+ # states by `routing_weights` on the corresponding tokens (top-1 and top-2)
135
+ current_state = hidden_states[None, top_x].reshape(-1, hidden_dim)
136
+ current_hidden_states = expert_layer(current_state) * routing_weights[top_x, idx, None]
137
+
138
+ # However `index_add_` only support torch tensors for indexing so we'll use
139
+ # the `top_x` tensor here.
140
+ final_hidden_states.index_add_(0, top_x, current_hidden_states.to(hidden_states.dtype))
141
+ final_hidden_states = final_hidden_states.reshape(batch_size, sequence_length, hidden_dim)
142
+ return final_hidden_states, router_logits
@@ -65,13 +65,14 @@ def _model_sample_convert():
65
65
  def _ipex_llm_convert(load_in_low_bit):
66
66
  # import pdb
67
67
  # pdb.set_trace()
68
- from vllm.worker.xpu_model_runner import XPUModelRunner
68
+ from vllm.worker.xpu_model_runner import XPUModelRunner, XPUModelRunnerBase
69
69
  from ipex_llm.vllm.xpu.ipex_llm_wrapper import get_ipex_llm_wrapper
70
70
  from ipex_llm.vllm.xpu.ipex_llm_v1_wrapper import get_ipex_llm_v1_wrapper
71
71
  import vllm.executor.ray_utils as ray_utils_v0
72
72
  import vllm.v1.executor.ray_utils as ray_utils_v1
73
73
  from vllm.v1.worker.gpu_model_runner import GPUModelRunner
74
74
  setattr(XPUModelRunner, "load_model", get_load_function(load_in_low_bit))
75
+ setattr(XPUModelRunnerBase, "load_model", get_load_function(load_in_low_bit))
75
76
  setattr(GPUModelRunner, "load_model", get_load_function(load_in_low_bit))
76
77
  setattr(ray_utils_v0, "RayWorkerWrapper", get_ipex_llm_wrapper(load_in_low_bit))
77
78
  setattr(ray_utils_v1, "RayWorkerWrapper", get_ipex_llm_v1_wrapper(load_in_low_bit))
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.1
2
2
  Name: ipex-llm
3
- Version: 2.3.0b20250506
3
+ Version: 2.3.0b20250507
4
4
  Summary: Large Language Model Develop Toolkit
5
5
  Home-page: https://github.com/intel-analytics/ipex-llm
6
6
  Author: BigDL Authors
@@ -27,7 +27,7 @@ Requires-Dist: intel-openmp ; (platform_machine == "x86_64" or platform_machine
27
27
  Requires-Dist: torch ==2.1.2+cpu ; (platform_system == "Linux") and extra == 'all'
28
28
  Requires-Dist: torch ==2.1.2 ; (platform_system == "Windows") and extra == 'all'
29
29
  Provides-Extra: cpp
30
- Requires-Dist: bigdl-core-cpp ==2.7.0b20250506 ; extra == 'cpp'
30
+ Requires-Dist: bigdl-core-cpp ==2.7.0b20250507 ; extra == 'cpp'
31
31
  Requires-Dist: setuptools ; extra == 'cpp'
32
32
  Requires-Dist: onednn-devel ==2025.0.1 ; (platform_system == "Windows") and extra == 'cpp'
33
33
  Requires-Dist: onednn ==2025.0.1 ; (platform_system == "Windows") and extra == 'cpp'
@@ -60,7 +60,7 @@ Requires-Dist: transformers ==4.40.0 ; extra == 'npu'
60
60
  Requires-Dist: intel-openmp ; (platform_machine == "x86_64" or platform_machine == "AMD64") and extra == 'npu'
61
61
  Requires-Dist: torch ==2.1.2+cpu ; (platform_system == "Linux") and extra == 'npu'
62
62
  Requires-Dist: torch ==2.1.2 ; (platform_system == "Windows") and extra == 'npu'
63
- Requires-Dist: bigdl-core-npu ==2.7.0b20250506 ; (platform_system == "Windows") and extra == 'npu'
63
+ Requires-Dist: bigdl-core-npu ==2.7.0b20250507 ; (platform_system == "Windows") and extra == 'npu'
64
64
  Provides-Extra: serving
65
65
  Requires-Dist: py-cpuinfo ; extra == 'serving'
66
66
  Requires-Dist: fschat[model_worker,webui] ==0.2.36 ; extra == 'serving'
@@ -80,9 +80,9 @@ Requires-Dist: setuptools <70.0.0 ; extra == 'xpu'
80
80
  Requires-Dist: torch ==2.1.0a0 ; extra == 'xpu'
81
81
  Requires-Dist: torchvision ==0.16.0a0 ; extra == 'xpu'
82
82
  Requires-Dist: intel-extension-for-pytorch ==2.1.10+xpu ; extra == 'xpu'
83
- Requires-Dist: bigdl-core-xe-21 ==2.7.0b20250506 ; extra == 'xpu'
84
- Requires-Dist: bigdl-core-xe-batch-21 ==2.7.0b20250506 ; extra == 'xpu'
85
- Requires-Dist: bigdl-core-xe-addons-21 ==2.7.0b20250506 ; extra == 'xpu'
83
+ Requires-Dist: bigdl-core-xe-21 ==2.7.0b20250507 ; extra == 'xpu'
84
+ Requires-Dist: bigdl-core-xe-batch-21 ==2.7.0b20250507 ; extra == 'xpu'
85
+ Requires-Dist: bigdl-core-xe-addons-21 ==2.7.0b20250507 ; extra == 'xpu'
86
86
  Provides-Extra: xpu-2-1
87
87
  Requires-Dist: py-cpuinfo ; extra == 'xpu-2-1'
88
88
  Requires-Dist: protobuf ; extra == 'xpu-2-1'
@@ -97,9 +97,9 @@ Requires-Dist: setuptools <70.0.0 ; extra == 'xpu-2-1'
97
97
  Requires-Dist: torch ==2.1.0a0 ; extra == 'xpu-2-1'
98
98
  Requires-Dist: torchvision ==0.16.0a0 ; extra == 'xpu-2-1'
99
99
  Requires-Dist: intel-extension-for-pytorch ==2.1.10+xpu ; extra == 'xpu-2-1'
100
- Requires-Dist: bigdl-core-xe-21 ==2.7.0b20250506 ; extra == 'xpu-2-1'
101
- Requires-Dist: bigdl-core-xe-batch-21 ==2.7.0b20250506 ; extra == 'xpu-2-1'
102
- Requires-Dist: bigdl-core-xe-addons-21 ==2.7.0b20250506 ; extra == 'xpu-2-1'
100
+ Requires-Dist: bigdl-core-xe-21 ==2.7.0b20250507 ; extra == 'xpu-2-1'
101
+ Requires-Dist: bigdl-core-xe-batch-21 ==2.7.0b20250507 ; extra == 'xpu-2-1'
102
+ Requires-Dist: bigdl-core-xe-addons-21 ==2.7.0b20250507 ; extra == 'xpu-2-1'
103
103
  Requires-Dist: intel-openmp ; (platform_machine == "x86_64" or platform_machine == "AMD64") and extra == 'xpu-2-1'
104
104
  Requires-Dist: dpcpp-cpp-rt ==2024.0.2 ; (platform_system == "Windows") and extra == 'xpu-2-1'
105
105
  Requires-Dist: mkl-dpcpp ==2024.0.0 ; (platform_system == "Windows") and extra == 'xpu-2-1'
@@ -117,7 +117,7 @@ Requires-Dist: setuptools ; extra == 'xpu-2-6'
117
117
  Requires-Dist: torch ==2.6.0+xpu ; extra == 'xpu-2-6'
118
118
  Requires-Dist: torchvision ==0.21.0+xpu ; extra == 'xpu-2-6'
119
119
  Requires-Dist: torchaudio ==2.6.0+xpu ; extra == 'xpu-2-6'
120
- Requires-Dist: bigdl-core-xe-all ==2.7.0b20250506 ; extra == 'xpu-2-6'
120
+ Requires-Dist: bigdl-core-xe-all ==2.7.0b20250507 ; extra == 'xpu-2-6'
121
121
  Requires-Dist: onednn-devel ==2025.0.1 ; extra == 'xpu-2-6'
122
122
  Requires-Dist: onednn ==2025.0.1 ; extra == 'xpu-2-6'
123
123
  Requires-Dist: dpcpp-cpp-rt ==2025.0.2 ; extra == 'xpu-2-6'
@@ -132,7 +132,7 @@ Requires-Dist: tokenizers ==0.15.2 ; extra == 'xpu-2-6-arl'
132
132
  Requires-Dist: accelerate ==0.23.0 ; extra == 'xpu-2-6-arl'
133
133
  Requires-Dist: tabulate ; extra == 'xpu-2-6-arl'
134
134
  Requires-Dist: setuptools ; extra == 'xpu-2-6-arl'
135
- Requires-Dist: bigdl-core-xe-all ==2.7.0b20250506 ; extra == 'xpu-2-6-arl'
135
+ Requires-Dist: bigdl-core-xe-all ==2.7.0b20250507 ; extra == 'xpu-2-6-arl'
136
136
  Requires-Dist: onednn-devel ==2025.0.1 ; extra == 'xpu-2-6-arl'
137
137
  Requires-Dist: onednn ==2025.0.1 ; extra == 'xpu-2-6-arl'
138
138
  Requires-Dist: dpcpp-cpp-rt ==2025.0.2 ; extra == 'xpu-2-6-arl'
@@ -41,35 +41,35 @@ ipex_llm/langchain/llms/transformerspipelinellm.py,sha256=vm522YPPwWxxAPVvQBtxRf
41
41
  ipex_llm/langchain/vllm/__init__.py,sha256=T-EbRT6GJ_8RCu-iLmSzcftOimXSPQf2d5X72AUAy2Y,874
42
42
  ipex_llm/langchain/vllm/vllm.py,sha256=6dxc-ZISZQrJilEa_HA827l75Dv9rcHpY_G6FdJ8BVs,7793
43
43
  ipex_llm/libs/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
44
- ipex_llm/libs/bloom-api.dll,sha256=22wrJqv9sWR9HnPLzio3B296jvY6NinrMPnt1c0_MFQ,36352
45
- ipex_llm/libs/bloom.dll,sha256=9en-Ji91waynOJHs8VXrPAMV2JrMawAeUrx4dTmiJP4,507904
46
- ipex_llm/libs/gptneox-api.dll,sha256=UKL0fcB3g3JI5njBv4j2UULRS_6jvfl6iSpp0NwtiHc,24576
47
- ipex_llm/libs/gptneox.dll,sha256=Wt945Qf_5GbY5jfpHg7mG9xn6LoDvoGyJjmx-oH6Yuw,568320
48
- ipex_llm/libs/libbloom_avx.dll,sha256=3X7c6OiRyh40OVt7x9CKthiNEyEje4cjE8Dc18JUHMo,536576
49
- ipex_llm/libs/libbloom_vnni.dll,sha256=bTkMhPRpruA2ub8-IOsyqq09UXTWKmrBdDlazlLvDNQ,508416
50
- ipex_llm/libs/libgptneox_avx.dll,sha256=gsX3W1UFTEZY4rk7Pb2HspiQ1TJKQKcIwIQY_ENDCaQ,596992
51
- ipex_llm/libs/libgptneox_vnni.dll,sha256=AgPQA4Dt5ZzLIqrRwIpMWh0LjaLYMGmNAbwJJSzGh8A,568832
52
- ipex_llm/libs/libllama_avx.dll,sha256=c5kbeImKfxR3j1OtVD5ojVDg9bmEHDHFtnJWr3_IApA,591360
53
- ipex_llm/libs/libllama_vnni.dll,sha256=hFszK131YSqH1JSYqc7V6cpPJR9xoXJK0LhO9fecXQ8,563200
54
- ipex_llm/libs/libstarcoder_avx.dll,sha256=eYYr_hJjrenb3Jua3-bxrWpXj1WUbryJpxCfdjFIw_I,627712
55
- ipex_llm/libs/libstarcoder_vnni.dll,sha256=47uszx8WV0cMlN9DbqlPJ2oiSepk1OUDtCNVGGqTh5I,599552
56
- ipex_llm/libs/llama-api.dll,sha256=mzm_HqP6hSe1dpFKWAN2dl7mFBHQd79NOkPunLxwMCc,25600
57
- ipex_llm/libs/llama.dll,sha256=69NmJ5PhYO3NM1ZEcWFzTL7dTHIDo-6grG2E84-YqAg,562688
58
- ipex_llm/libs/main-bloom.exe,sha256=_EpnRlG0qUFrdY8AbMZmKALfv3l3OA4tewupZpKrm0A,103424
59
- ipex_llm/libs/main-gptneox.exe,sha256=hJvEs3hA6NrIzqlWkFQvikjnNs6maHm1ThPhG1t3Kvw,98816
60
- ipex_llm/libs/main-llama.exe,sha256=YBt0PyyVpOAxQdZZ8bwnV-gpMoPpg_Lqmsxm9WuzdJ0,99840
61
- ipex_llm/libs/main-starcoder.exe,sha256=EGAWFWMsvAJBw43r0JwItz-okFge5e6F7yOpitDqnCY,157696
62
- ipex_llm/libs/pipeline.dll,sha256=kzD55sWU-71D0qcUL-YlOwFK_xAySW_pIQ3H6ev_L_4,73216
63
- ipex_llm/libs/quantize-bloom.exe,sha256=wCTjZtTUkjhXDMfk78ib5g9no76EL3GfpCmnuiD4lW4,126464
64
- ipex_llm/libs/quantize-bloom_vnni.exe,sha256=0oIOV9Y6C-FQH-0C0ZOk34ROX616bvxfZFz_vsL4G60,128000
65
- ipex_llm/libs/quantize-gptneox.exe,sha256=nJ0hBR-kdE9UWWV-iMN_gb13s2pqhixaro35m12OXd8,104448
66
- ipex_llm/libs/quantize-gptneox_vnni.exe,sha256=ZVjbdMhQT9_5J9Q-dVDdUcFxmZIX-ZVnwzoKtvE85Hg,104960
67
- ipex_llm/libs/quantize-llama.exe,sha256=F75XdM4fjQ4ot5caKLmixnzTMPSBtBSrYhZ12DPblSU,110080
68
- ipex_llm/libs/quantize-llama_vnni.exe,sha256=_J3eM4LgcSG7ACb8DTSb5OPFvSv7wMKQ-Kdewxf070M,110592
69
- ipex_llm/libs/quantize-starcoder.exe,sha256=zvpmhrrbm2pXA5MrhyZj0buhvswwkS_Vuq32Sdm8w3s,127488
70
- ipex_llm/libs/quantize-starcoder_vnni.exe,sha256=QoT-09g3aP44ln_ULGa-1sQJD3c2IsjA9SnrrdkHjt4,128512
71
- ipex_llm/libs/starcoder-api.dll,sha256=-h3oVRrPWo168PRcbOFa_n1pIVVjDXoNqP98dPatZKw,21504
72
- ipex_llm/libs/starcoder.dll,sha256=1igLCjVlgHCs22hdxokewN2affmiXbCC2OeVtIaI-DA,599040
44
+ ipex_llm/libs/bloom-api.dll,sha256=Tt4tHy_XQ-MEiEw6Up7SpsWRlvqYtGwwpH26WdHArOc,36352
45
+ ipex_llm/libs/bloom.dll,sha256=EA5yFTwHb_PGvMvz4Cyb1tnvmAxdzxC5-wuV6p0OMKI,507904
46
+ ipex_llm/libs/gptneox-api.dll,sha256=LIaPSIwg7huhRjoUgyzE9F2O-DODUbywfz2UwLDNOeg,24576
47
+ ipex_llm/libs/gptneox.dll,sha256=WatAJmN3tQVb0rWRptVvF-dtkAPa0E7ysgg2iz7PCN8,568320
48
+ ipex_llm/libs/libbloom_avx.dll,sha256=6NBma3ZePdGuPD80NiLLbwGd2Ja_fHFyz5j7wbYfb9M,536576
49
+ ipex_llm/libs/libbloom_vnni.dll,sha256=0friR2pBOZ7sLw51TabkjtpbUuIOrnNKn5wN6UdBiKo,508416
50
+ ipex_llm/libs/libgptneox_avx.dll,sha256=KtoxICvRPw7QLaBsh2_TtEl3Uwo2Mhoblm2Zvu2Kzrc,596992
51
+ ipex_llm/libs/libgptneox_vnni.dll,sha256=fFnNCW-WzG0Zu2m-nyEiQfefNvmhPM94Bx3AVp3nu8Q,568832
52
+ ipex_llm/libs/libllama_avx.dll,sha256=BWXO_CZT6Qps9hZI9KzJJ5Z4gUl_NflK_TlmyoJe4-I,591360
53
+ ipex_llm/libs/libllama_vnni.dll,sha256=EmfDxxZ5bqjWCsgzjQBj5pstoC8Ky_hXuepz8CWn6Rc,563200
54
+ ipex_llm/libs/libstarcoder_avx.dll,sha256=-MkXNLpWj0HxIomfy4hrv2Q-Bts8beRG06giVAPOlXk,627712
55
+ ipex_llm/libs/libstarcoder_vnni.dll,sha256=C2jUNQ-64b8Teprc6I564lvpkqdPvZADUEuO0ytOb8M,599552
56
+ ipex_llm/libs/llama-api.dll,sha256=oZ2RtujrylsJQdVRuEcsuD2OlX9j8mdaP-1mDTF7vho,25600
57
+ ipex_llm/libs/llama.dll,sha256=3drx1vyctJLIDjG12df8t4ejyo5BmMA9aPtisNn5otQ,562688
58
+ ipex_llm/libs/main-bloom.exe,sha256=fnieKQvKt1ppyEWY9bitNt2iaDkW6CBKHqEPBYCH20I,103424
59
+ ipex_llm/libs/main-gptneox.exe,sha256=xV7yvYUhvnLTdrYNOZqR-FO3dpK2sGknRffCTBnux-g,98816
60
+ ipex_llm/libs/main-llama.exe,sha256=HbI0RIngIauJl_molsqiKarm2jnN9k-XPFTRy72I-rE,99840
61
+ ipex_llm/libs/main-starcoder.exe,sha256=IbO6csHarglFS76k0F6zLqdqdVsqNzx5us_6wvsiNt8,157696
62
+ ipex_llm/libs/pipeline.dll,sha256=bE_ZKwL0jhC5npmY3Rst4bX5SRavoefJSgmmCGmh3ac,73216
63
+ ipex_llm/libs/quantize-bloom.exe,sha256=7_p6sqNt1DflgDNZM4kP4byMmGQ_EtPPrXaBogeKVto,126464
64
+ ipex_llm/libs/quantize-bloom_vnni.exe,sha256=mJ9hYQDSBmmzu2z-iHFKjUA7Gor7PlEPagvjnzXva1Q,128000
65
+ ipex_llm/libs/quantize-gptneox.exe,sha256=uw_e-hgup0bgB3boPo2SM7nvI27IeJDaJE2u1h5Kp0U,104448
66
+ ipex_llm/libs/quantize-gptneox_vnni.exe,sha256=vjJusWjA_7-jVXNz-dmyoh1m-4JjGn_bEkxUTi6YOMU,104960
67
+ ipex_llm/libs/quantize-llama.exe,sha256=zq8cbPhW6raS0-m4S1v2BrHt4A3Z_lDfXLbtm0hy7pE,110080
68
+ ipex_llm/libs/quantize-llama_vnni.exe,sha256=YYBlgR3sj7O6FExih8liLarKbSfdtiGaGUp3kLQOgg0,110592
69
+ ipex_llm/libs/quantize-starcoder.exe,sha256=iWxQHYSXAxCKRUu7uRZKPoFO4AiINjEcziIOwVF57FY,127488
70
+ ipex_llm/libs/quantize-starcoder_vnni.exe,sha256=c22imSa2x6MNcEOTVuWLoXz1hM0C6NEh6ai7kTsowwY,128512
71
+ ipex_llm/libs/starcoder-api.dll,sha256=EMtZBU0dAHFtgRepVnPLl-KU-6yp1PM1j62AyS7IKW8,21504
72
+ ipex_llm/libs/starcoder.dll,sha256=KJxkaBk43ERKcIgTFHJ-vfd0GdWXea25zDAWSJIbTGo,599040
73
73
  ipex_llm/llamaindex/__init__.py,sha256=T-EbRT6GJ_8RCu-iLmSzcftOimXSPQf2d5X72AUAy2Y,874
74
74
  ipex_llm/llamaindex/llms/__init__.py,sha256=KP1lEdGqDuxPoxL1ZSH25Pm2kKMPJBWUTLR0ckSLMIU,1139
75
75
  ipex_llm/llamaindex/llms/bigdlllm.py,sha256=FQBzq1KOjfc6uofTXAha3O7TqpJkNfOFepXQmOVlbnI,26314
@@ -87,7 +87,7 @@ ipex_llm/serving/fastchat/tgi_api_protocol.py,sha256=brT3k3-V0NJrU4fRqUwWjC0O3iO
87
87
  ipex_llm/serving/fastchat/tgi_api_server.py,sha256=agNTAEiZPSuj3dEdIdYKwkoY0cXOUDX06DiM9VP2knQ,24418
88
88
  ipex_llm/serving/fastchat/vllm_worker.py,sha256=ZLz2Q9GxJO6r_LOiP6epgCRjBGk-K4EB1SNEWSJp5DA,11091
89
89
  ipex_llm/transformers/__init__.py,sha256=BreA3EY6hkNq0rVixb_sUuTLzMrcWXTt3yRsshCPHHQ,1214
90
- ipex_llm/transformers/convert.py,sha256=fmunTuZCTgTnZZXfDaCNHlO4TMDBGUgqPRKuOLp7x9Y,107586
90
+ ipex_llm/transformers/convert.py,sha256=mJWejcYwe2gCqPsLFchq_umhkwG_6tchikEHGWyQWy8,109431
91
91
  ipex_llm/transformers/convert_ipex.py,sha256=_nSnUTQy-yfkKaqGdqnBdWztZf3NGmnbZ0TKaDrF4X4,14617
92
92
  ipex_llm/transformers/embedding.py,sha256=bdgk59DvD4ZZyxRzewXOR7g56nThgO6uhIwk8QL7f-s,9299
93
93
  ipex_llm/transformers/kv.py,sha256=src_HcVDKFwQ1V8hdTrFQw5RIwUewM9VOR47GVTPJG4,21187
@@ -174,6 +174,8 @@ ipex_llm/transformers/models/qwen2.py,sha256=zK-FpUaxEhjD4gZa1ZvArodAilz29T_cpeA
174
174
  ipex_llm/transformers/models/qwen2_5_omni.py,sha256=gNnWvGiPntn6GlyIUtCVYCSIMPU6FG9TCS7fhWbwerY,18779
175
175
  ipex_llm/transformers/models/qwen2_moe.py,sha256=a0gYo-ngf8SxaEnBdZUJDnPS6Mkn_poDd8xqhx50icI,19516
176
176
  ipex_llm/transformers/models/qwen2_vl.py,sha256=G-9e2oN4f5p5IWQ-zsBZuONxTura3BjlgyT2meigbHQ,13579
177
+ ipex_llm/transformers/models/qwen3.py,sha256=n7kbImp1i3-6Tki2y2NwMKZ6f6tItyLu_0USmCF7_qY,4756
178
+ ipex_llm/transformers/models/qwen3_moe.py,sha256=4S0nxMRA3gneaAcXNbPowgZq70_rb3L9PObRQtUNsww,6812
177
179
  ipex_llm/transformers/models/qwen_vl.py,sha256=lwNwCJNsBvOu1TGNDW_E2IlCjI7XZwHY8qIcZpuNDFc,17187
178
180
  ipex_llm/transformers/models/rwkv4.py,sha256=H4KMtxN0JA2ZTXnonHpsUUJ5xULemo-D1Jzl0ri_UY8,6123
179
181
  ipex_llm/transformers/models/rwkv5.py,sha256=OkRNj1pCAZg1z2Fw-I0DEnxLEdZyPeRSQ6msrkxLOCs,10710
@@ -251,16 +253,16 @@ ipex_llm/vllm/cpu/entrypoints/openai/cli_args.py,sha256=hB398yYtKauASRzevctScdbF
251
253
  ipex_llm/vllm/xpu/__init__.py,sha256=zBSG6nzrVF5QnpR6_f7kPhBFeowTE9gaZ7D5m98E7_w,585
252
254
  ipex_llm/vllm/xpu/ipex_llm_v1_wrapper.py,sha256=pd939vFomKIg9Qn2NO4u0OF6hPgvQpqcfJSxqBzcqhA,825
253
255
  ipex_llm/vllm/xpu/ipex_llm_wrapper.py,sha256=_CbhvBuf_KPnmLfngYKtJl5gPAHVsG2mWth3wSeaH3M,892
254
- ipex_llm/vllm/xpu/model_convert.py,sha256=NnD7xj7O9ZHVUedCxqAH8GEZvagokZmBXrjYSLpigkY,9541
256
+ ipex_llm/vllm/xpu/model_convert.py,sha256=vkzH9quwVcjoAviPWz2IdbPec9u2YTdG6KNUEt6j7dU,9643
255
257
  ipex_llm/vllm/xpu/engine/__init__.py,sha256=sOvwLx_Zj0jiRCGj9W3DgGTfcSU3hABYhgIQI7T6cxU,879
256
258
  ipex_llm/vllm/xpu/engine/engine.py,sha256=XAprw7VifjfnR915TZOaKcxe3QCFsVBgxzS8qOdn1yg,14462
257
259
  ipex_llm/vllm/xpu/entrypoints/openai/api_server.py,sha256=uWHyyHLw-B8wXBnQw9_MCG81tKK9Jb0dyq1xfYHgoNw,45905
258
260
  ipex_llm/vllm/xpu/entrypoints/openai/cli_args.py,sha256=hB398yYtKauASRzevctScdbFIjiiSGMAe1bwEuIHrhY,10893
259
- ipex_llm-2.3.0b20250506.data/scripts/ipex-llm-init.bat,sha256=HPtCYuDYwEatq7dAwOvdfVcHYCpAVdbj75K1qh0vQek,2578
260
- ipex_llm-2.3.0b20250506.data/scripts/llm-chat.ps1,sha256=6qrs-hGVAV8IKh7Jx8nq_XrnZcjd7qGU5wndArM7Yag,2769
261
- ipex_llm-2.3.0b20250506.data/scripts/llm-cli.ps1,sha256=3qBtTLs_EjYDnM8YyCpJhzLnGCKTEGssu9UNqfkjVXs,3009
262
- ipex_llm-2.3.0b20250506.dist-info/METADATA,sha256=LtfQ-1Y1ougxU2AMzpnbh3XdCE0Gb1xgf7LIAGyB6iQ,8865
263
- ipex_llm-2.3.0b20250506.dist-info/WHEEL,sha256=6iYPr8vTHsyDK75jr9X0V3I9wPSVmtwr_8fdATBciGk,98
264
- ipex_llm-2.3.0b20250506.dist-info/entry_points.txt,sha256=TiUyBB2MRmfF3ko-pyAEzqeBCRnyhu27bNOAsWPp3e8,61
265
- ipex_llm-2.3.0b20250506.dist-info/top_level.txt,sha256=CGCMHM-SyqUabU4h8RqJ2KTYckQUO3LvIWwmUQ6Qbzw,9
266
- ipex_llm-2.3.0b20250506.dist-info/RECORD,,
261
+ ipex_llm-2.3.0b20250507.data/scripts/ipex-llm-init.bat,sha256=HPtCYuDYwEatq7dAwOvdfVcHYCpAVdbj75K1qh0vQek,2578
262
+ ipex_llm-2.3.0b20250507.data/scripts/llm-chat.ps1,sha256=6qrs-hGVAV8IKh7Jx8nq_XrnZcjd7qGU5wndArM7Yag,2769
263
+ ipex_llm-2.3.0b20250507.data/scripts/llm-cli.ps1,sha256=3qBtTLs_EjYDnM8YyCpJhzLnGCKTEGssu9UNqfkjVXs,3009
264
+ ipex_llm-2.3.0b20250507.dist-info/METADATA,sha256=dOnD7OAqHtDZF8uGb-xz3Xhw_pNyTL3ntanZ-5kehDY,8865
265
+ ipex_llm-2.3.0b20250507.dist-info/WHEEL,sha256=6iYPr8vTHsyDK75jr9X0V3I9wPSVmtwr_8fdATBciGk,98
266
+ ipex_llm-2.3.0b20250507.dist-info/entry_points.txt,sha256=TiUyBB2MRmfF3ko-pyAEzqeBCRnyhu27bNOAsWPp3e8,61
267
+ ipex_llm-2.3.0b20250507.dist-info/top_level.txt,sha256=CGCMHM-SyqUabU4h8RqJ2KTYckQUO3LvIWwmUQ6Qbzw,9
268
+ ipex_llm-2.3.0b20250507.dist-info/RECORD,,