ipex-llm 2.3.0b20250506__py3-none-win_amd64.whl → 2.3.0b20250509__py3-none-win_amd64.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- ipex_llm/libs/bloom-api.dll +0 -0
- ipex_llm/libs/bloom.dll +0 -0
- ipex_llm/libs/gptneox-api.dll +0 -0
- ipex_llm/libs/gptneox.dll +0 -0
- ipex_llm/libs/libbloom_avx.dll +0 -0
- ipex_llm/libs/libbloom_vnni.dll +0 -0
- ipex_llm/libs/libgptneox_avx.dll +0 -0
- ipex_llm/libs/libgptneox_vnni.dll +0 -0
- ipex_llm/libs/libllama_avx.dll +0 -0
- ipex_llm/libs/libllama_vnni.dll +0 -0
- ipex_llm/libs/libstarcoder_avx.dll +0 -0
- ipex_llm/libs/libstarcoder_vnni.dll +0 -0
- ipex_llm/libs/llama-api.dll +0 -0
- ipex_llm/libs/llama.dll +0 -0
- ipex_llm/libs/main-bloom.exe +0 -0
- ipex_llm/libs/main-gptneox.exe +0 -0
- ipex_llm/libs/main-llama.exe +0 -0
- ipex_llm/libs/main-starcoder.exe +0 -0
- ipex_llm/libs/pipeline.dll +0 -0
- ipex_llm/libs/quantize-bloom.exe +0 -0
- ipex_llm/libs/quantize-bloom_vnni.exe +0 -0
- ipex_llm/libs/quantize-gptneox.exe +0 -0
- ipex_llm/libs/quantize-gptneox_vnni.exe +0 -0
- ipex_llm/libs/quantize-llama.exe +0 -0
- ipex_llm/libs/quantize-llama_vnni.exe +0 -0
- ipex_llm/libs/quantize-starcoder.exe +0 -0
- ipex_llm/libs/quantize-starcoder_vnni.exe +0 -0
- ipex_llm/libs/starcoder-api.dll +0 -0
- ipex_llm/libs/starcoder.dll +0 -0
- ipex_llm/transformers/convert.py +28 -1
- ipex_llm/transformers/models/common.py +8 -0
- ipex_llm/transformers/models/llama.py +2 -3
- ipex_llm/transformers/models/qwen2_5_omni.py +2 -2
- ipex_llm/transformers/models/qwen3.py +114 -0
- ipex_llm/transformers/models/qwen3_moe.py +142 -0
- ipex_llm/vllm/xpu/model_convert.py +2 -1
- {ipex_llm-2.3.0b20250506.dist-info → ipex_llm-2.3.0b20250509.dist-info}/METADATA +11 -11
- {ipex_llm-2.3.0b20250506.dist-info → ipex_llm-2.3.0b20250509.dist-info}/RECORD +44 -42
- {ipex_llm-2.3.0b20250506.data → ipex_llm-2.3.0b20250509.data}/scripts/ipex-llm-init.bat +0 -0
- {ipex_llm-2.3.0b20250506.data → ipex_llm-2.3.0b20250509.data}/scripts/llm-chat.ps1 +0 -0
- {ipex_llm-2.3.0b20250506.data → ipex_llm-2.3.0b20250509.data}/scripts/llm-cli.ps1 +0 -0
- {ipex_llm-2.3.0b20250506.dist-info → ipex_llm-2.3.0b20250509.dist-info}/WHEEL +0 -0
- {ipex_llm-2.3.0b20250506.dist-info → ipex_llm-2.3.0b20250509.dist-info}/entry_points.txt +0 -0
- {ipex_llm-2.3.0b20250506.dist-info → ipex_llm-2.3.0b20250509.dist-info}/top_level.txt +0 -0
ipex_llm/libs/bloom-api.dll
CHANGED
Binary file
|
ipex_llm/libs/bloom.dll
CHANGED
Binary file
|
ipex_llm/libs/gptneox-api.dll
CHANGED
Binary file
|
ipex_llm/libs/gptneox.dll
CHANGED
Binary file
|
ipex_llm/libs/libbloom_avx.dll
CHANGED
Binary file
|
ipex_llm/libs/libbloom_vnni.dll
CHANGED
Binary file
|
ipex_llm/libs/libgptneox_avx.dll
CHANGED
Binary file
|
Binary file
|
ipex_llm/libs/libllama_avx.dll
CHANGED
Binary file
|
ipex_llm/libs/libllama_vnni.dll
CHANGED
Binary file
|
Binary file
|
Binary file
|
ipex_llm/libs/llama-api.dll
CHANGED
Binary file
|
ipex_llm/libs/llama.dll
CHANGED
Binary file
|
ipex_llm/libs/main-bloom.exe
CHANGED
Binary file
|
ipex_llm/libs/main-gptneox.exe
CHANGED
Binary file
|
ipex_llm/libs/main-llama.exe
CHANGED
Binary file
|
ipex_llm/libs/main-starcoder.exe
CHANGED
Binary file
|
ipex_llm/libs/pipeline.dll
CHANGED
Binary file
|
ipex_llm/libs/quantize-bloom.exe
CHANGED
Binary file
|
Binary file
|
Binary file
|
Binary file
|
ipex_llm/libs/quantize-llama.exe
CHANGED
Binary file
|
Binary file
|
Binary file
|
Binary file
|
ipex_llm/libs/starcoder-api.dll
CHANGED
Binary file
|
ipex_llm/libs/starcoder.dll
CHANGED
Binary file
|
ipex_llm/transformers/convert.py
CHANGED
@@ -1078,6 +1078,12 @@ def _optimize_pre(model, qtype=None):
|
|
1078
1078
|
elif model.config.model_type == "qwen2_5_omni":
|
1079
1079
|
from ipex_llm.transformers.models.qwen2_5_omni import merge_qkv
|
1080
1080
|
model.apply(merge_qkv)
|
1081
|
+
elif model.config.model_type == "qwen3":
|
1082
|
+
from ipex_llm.transformers.models.qwen3 import merge_qkv
|
1083
|
+
model.apply(merge_qkv)
|
1084
|
+
elif model.config.model_type == "qwen3_moe":
|
1085
|
+
from ipex_llm.transformers.models.qwen3_moe import merge_qkv
|
1086
|
+
model.apply(merge_qkv)
|
1081
1087
|
return model
|
1082
1088
|
|
1083
1089
|
|
@@ -2106,7 +2112,28 @@ def _optimize_post(model):
|
|
2106
2112
|
convert_forward(model.token2wav, module.DiTAttention, dit_attention_forward)
|
2107
2113
|
dit_model = model.token2wav.code2wav_dit_model
|
2108
2114
|
dit_model._create_block_diff = MethodType(_create_block_diff, dit_model)
|
2109
|
-
|
2115
|
+
elif model.config.model_type == "qwen3":
|
2116
|
+
modeling_module_name = model.__class__.__module__
|
2117
|
+
module = importlib.import_module(modeling_module_name)
|
2118
|
+
from ipex_llm.transformers.models.common import rms_norm_forward
|
2119
|
+
from ipex_llm.transformers.models.qwen3 import qwen3_model_forward
|
2120
|
+
from ipex_llm.transformers.models.qwen3 import qwen3_attention_forward
|
2121
|
+
from ipex_llm.transformers.models.common import mlp_silu_forward
|
2122
|
+
convert_forward(model, module.Qwen3RMSNorm, rms_norm_forward)
|
2123
|
+
convert_forward(model, module.Qwen3Model, qwen3_model_forward)
|
2124
|
+
convert_forward(model, module.Qwen3Attention, qwen3_attention_forward)
|
2125
|
+
convert_forward(model, module.Qwen3MLP, mlp_silu_forward)
|
2126
|
+
elif model.config.model_type == "qwen3_moe":
|
2127
|
+
modeling_module_name = model.__class__.__module__
|
2128
|
+
module = importlib.import_module(modeling_module_name)
|
2129
|
+
from ipex_llm.transformers.models.common import rms_norm_forward
|
2130
|
+
from ipex_llm.transformers.models.qwen3_moe import qwen3_moe_model_forward
|
2131
|
+
from ipex_llm.transformers.models.qwen3 import qwen3_attention_forward
|
2132
|
+
from ipex_llm.transformers.models.qwen3_moe import qwen3_moe_moe_forward
|
2133
|
+
convert_forward(model, module.Qwen3MoeRMSNorm, rms_norm_forward)
|
2134
|
+
convert_forward(model, module.Qwen3MoeModel, qwen3_moe_model_forward)
|
2135
|
+
convert_forward(model, module.Qwen3MoeAttention, qwen3_attention_forward)
|
2136
|
+
convert_forward(model, module.Qwen3MoeSparseMoeBlock, qwen3_moe_moe_forward)
|
2110
2137
|
return model
|
2111
2138
|
|
2112
2139
|
|
@@ -357,3 +357,11 @@ def rotary_two_with_cache_inplaced(query_states: torch.Tensor, key_states: torch
|
|
357
357
|
import xe_addons
|
358
358
|
xe_addons.rotary_two_with_cache_inplaced(query_states, key_states,
|
359
359
|
cos, sin, half_layout)
|
360
|
+
|
361
|
+
|
362
|
+
def rotary_half_with_cache_inplaced(query_states: torch.Tensor, key_states: torch.Tensor,
|
363
|
+
cos: torch.Tensor, sin: torch.Tensor):
|
364
|
+
import xe_addons
|
365
|
+
from ipex_llm.transformers.models.utils import make_cache_contiguous_inplaced
|
366
|
+
make_cache_contiguous_inplaced(cos, sin)
|
367
|
+
xe_addons.rotary_half_with_cache_inplaced(query_states, key_states, cos, sin)
|
@@ -162,9 +162,8 @@ def llama_attention_forward(
|
|
162
162
|
query_states, key_states)
|
163
163
|
else:
|
164
164
|
# transformers >= 4.46
|
165
|
-
|
166
|
-
|
167
|
-
xe_addons.rotary_half_with_cache_inplaced(query_states, key_states, cos, sin)
|
165
|
+
from ipex_llm.transformers.models.common import rotary_half_with_cache_inplaced
|
166
|
+
rotary_half_with_cache_inplaced(query_states, key_states, cos, sin)
|
168
167
|
else:
|
169
168
|
if position_embeddings is None:
|
170
169
|
if isinstance(getattr(self.rotary_emb, "cos_cached", None), torch.Tensor):
|
@@ -62,8 +62,8 @@ def qwen2_5_omni_attention_forward(
|
|
62
62
|
|
63
63
|
cos, sin = position_embeddings
|
64
64
|
if query_states.device.type == "xpu":
|
65
|
-
import
|
66
|
-
|
65
|
+
from ipex_llm.transformers.models.common import rotary_half_with_cache_inplaced
|
66
|
+
rotary_half_with_cache_inplaced(query_states, key_states, cos, sin)
|
67
67
|
else:
|
68
68
|
query_states, key_states = apply_multimodal_rotary_pos_emb(
|
69
69
|
query_states, key_states, cos, sin, self.rope_scaling["mrope_section"]
|
@@ -0,0 +1,114 @@
|
|
1
|
+
#
|
2
|
+
# Copyright 2016 The BigDL Authors.
|
3
|
+
#
|
4
|
+
# Licensed under the Apache License, Version 2.0 (the "License");
|
5
|
+
# you may not use this file except in compliance with the License.
|
6
|
+
# You may obtain a copy of the License at
|
7
|
+
#
|
8
|
+
# http://www.apache.org/licenses/LICENSE-2.0
|
9
|
+
#
|
10
|
+
# Unless required by applicable law or agreed to in writing, software
|
11
|
+
# distributed under the License is distributed on an "AS IS" BASIS,
|
12
|
+
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
13
|
+
# See the License for the specific language governing permissions and
|
14
|
+
# limitations under the License.
|
15
|
+
#
|
16
|
+
|
17
|
+
import torch
|
18
|
+
from typing import Optional, List, Tuple
|
19
|
+
from transformers.processing_utils import Unpack
|
20
|
+
from transformers.cache_utils import Cache
|
21
|
+
from transformers.modeling_outputs import MoeModelOutputWithPast
|
22
|
+
from transformers.modeling_flash_attention_utils import FlashAttentionKwargs
|
23
|
+
|
24
|
+
from transformers.models.qwen3.modeling_qwen3 import apply_rotary_pos_emb
|
25
|
+
from transformers.models.qwen3.modeling_qwen3 import Qwen3Model, Qwen3Attention
|
26
|
+
|
27
|
+
from ipex_llm.transformers.kv import DynamicNormalCache
|
28
|
+
from ipex_llm.transformers.models.common import merge_qkv_base
|
29
|
+
from ipex_llm.transformers.models.common import scaled_dot_product_attention
|
30
|
+
from ipex_llm.transformers.models.utils import make_cache_contiguous_inplaced
|
31
|
+
|
32
|
+
|
33
|
+
def merge_qkv(module: torch.nn.Module):
|
34
|
+
merge_qkv_base(module, Qwen3Attention)
|
35
|
+
|
36
|
+
|
37
|
+
def qwen3_model_forward(
|
38
|
+
self,
|
39
|
+
input_ids: Optional[torch.LongTensor] = None,
|
40
|
+
attention_mask: Optional[torch.Tensor] = None,
|
41
|
+
position_ids: Optional[torch.LongTensor] = None,
|
42
|
+
past_key_values: Optional[List[torch.FloatTensor]] = None,
|
43
|
+
inputs_embeds: Optional[torch.FloatTensor] = None,
|
44
|
+
use_cache: Optional[bool] = None,
|
45
|
+
output_attentions: Optional[bool] = None,
|
46
|
+
output_hidden_states: Optional[bool] = None,
|
47
|
+
output_router_logits: Optional[bool] = None,
|
48
|
+
cache_position: Optional[torch.LongTensor] = None,
|
49
|
+
**flash_attn_kwargs: Unpack[FlashAttentionKwargs],
|
50
|
+
) -> MoeModelOutputWithPast:
|
51
|
+
device = input_ids.device if input_ids is not None else inputs_embeds.device
|
52
|
+
use_cache = use_cache if use_cache is not None else self.config.use_cache
|
53
|
+
use_cache = True if device.type == "xpu" else use_cache
|
54
|
+
if use_cache and not isinstance(past_key_values, DynamicNormalCache):
|
55
|
+
past_key_values = DynamicNormalCache.from_legacy_cache(past_key_values)
|
56
|
+
|
57
|
+
return Qwen3Model.forward(
|
58
|
+
self=self,
|
59
|
+
input_ids=input_ids,
|
60
|
+
attention_mask=attention_mask,
|
61
|
+
position_ids=position_ids,
|
62
|
+
past_key_values=past_key_values,
|
63
|
+
inputs_embeds=inputs_embeds,
|
64
|
+
use_cache=use_cache,
|
65
|
+
output_attentions=output_attentions,
|
66
|
+
output_hidden_states=output_hidden_states,
|
67
|
+
output_router_logits=output_router_logits,
|
68
|
+
cache_position=cache_position,
|
69
|
+
**flash_attn_kwargs,
|
70
|
+
)
|
71
|
+
|
72
|
+
|
73
|
+
def qwen3_attention_forward(
|
74
|
+
self,
|
75
|
+
hidden_states: torch.Tensor,
|
76
|
+
position_embeddings: Tuple[torch.Tensor, torch.Tensor],
|
77
|
+
attention_mask: Optional[torch.Tensor],
|
78
|
+
past_key_value: Optional[Cache] = None,
|
79
|
+
cache_position: Optional[torch.LongTensor] = None,
|
80
|
+
**kwargs: Unpack[FlashAttentionKwargs],
|
81
|
+
):
|
82
|
+
bsz, q_len, _ = hidden_states.size()
|
83
|
+
device = hidden_states.device
|
84
|
+
|
85
|
+
qkv = self.qkv_proj(hidden_states)
|
86
|
+
qkv = qkv.view(bsz, q_len, -1, self.head_dim)
|
87
|
+
qkv = qkv.transpose(1, 2)
|
88
|
+
query_states, key_states, value_states = qkv.split([self.config.num_attention_heads,
|
89
|
+
self.config.num_key_value_heads,
|
90
|
+
self.config.num_key_value_heads], dim=1)
|
91
|
+
query_states = self.q_norm(query_states)
|
92
|
+
key_states = self.k_norm(key_states)
|
93
|
+
|
94
|
+
cos, sin = position_embeddings
|
95
|
+
if device.type == "xpu":
|
96
|
+
from ipex_llm.transformers.models.common import rotary_half_with_cache_inplaced
|
97
|
+
rotary_half_with_cache_inplaced(query_states, key_states, cos, sin)
|
98
|
+
else:
|
99
|
+
query_states, key_states = apply_rotary_pos_emb(query_states, key_states, cos, sin)
|
100
|
+
|
101
|
+
if past_key_value is not None:
|
102
|
+
cache_kwargs = {"sin": sin, "cos": cos, "cache_position": cache_position}
|
103
|
+
key_states, value_states = past_key_value.update(key_states, value_states,
|
104
|
+
self.layer_idx, cache_kwargs)
|
105
|
+
attn_weights = None
|
106
|
+
attn_output = scaled_dot_product_attention(
|
107
|
+
query_states, key_states, value_states,
|
108
|
+
attention_mask, q_len == key_states.size(2), self.scaling
|
109
|
+
)
|
110
|
+
attn_output = attn_output.transpose(1, 2).contiguous()
|
111
|
+
|
112
|
+
attn_output = attn_output.reshape(bsz, q_len, -1)
|
113
|
+
attn_output = self.o_proj(attn_output)
|
114
|
+
return attn_output, attn_weights
|
@@ -0,0 +1,142 @@
|
|
1
|
+
#
|
2
|
+
# Copyright 2016 The BigDL Authors.
|
3
|
+
#
|
4
|
+
# Licensed under the Apache License, Version 2.0 (the "License");
|
5
|
+
# you may not use this file except in compliance with the License.
|
6
|
+
# You may obtain a copy of the License at
|
7
|
+
#
|
8
|
+
# http://www.apache.org/licenses/LICENSE-2.0
|
9
|
+
#
|
10
|
+
# Unless required by applicable law or agreed to in writing, software
|
11
|
+
# distributed under the License is distributed on an "AS IS" BASIS,
|
12
|
+
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
13
|
+
# See the License for the specific language governing permissions and
|
14
|
+
# limitations under the License.
|
15
|
+
#
|
16
|
+
|
17
|
+
import torch
|
18
|
+
from typing import Optional, List
|
19
|
+
from transformers.processing_utils import Unpack
|
20
|
+
from transformers.modeling_outputs import MoeModelOutputWithPast
|
21
|
+
from transformers.modeling_flash_attention_utils import FlashAttentionKwargs
|
22
|
+
|
23
|
+
from transformers.models.qwen3_moe.modeling_qwen3_moe import Qwen3MoeModel, Qwen3MoeAttention
|
24
|
+
|
25
|
+
from ipex_llm.transformers.kv import DynamicNormalCache
|
26
|
+
from ipex_llm.transformers.models.common import merge_qkv_base
|
27
|
+
from ipex_llm.transformers.models.utils import use_fuse_moe
|
28
|
+
|
29
|
+
|
30
|
+
def merge_qkv(module: torch.nn.Module):
|
31
|
+
merge_qkv_base(module, Qwen3MoeAttention)
|
32
|
+
|
33
|
+
|
34
|
+
def qwen3_moe_model_forward(
|
35
|
+
self,
|
36
|
+
input_ids: Optional[torch.LongTensor] = None,
|
37
|
+
attention_mask: Optional[torch.Tensor] = None,
|
38
|
+
position_ids: Optional[torch.LongTensor] = None,
|
39
|
+
past_key_values: Optional[List[torch.FloatTensor]] = None,
|
40
|
+
inputs_embeds: Optional[torch.FloatTensor] = None,
|
41
|
+
use_cache: Optional[bool] = None,
|
42
|
+
output_attentions: Optional[bool] = None,
|
43
|
+
output_hidden_states: Optional[bool] = None,
|
44
|
+
output_router_logits: Optional[bool] = None,
|
45
|
+
cache_position: Optional[torch.LongTensor] = None,
|
46
|
+
**flash_attn_kwargs: Unpack[FlashAttentionKwargs],
|
47
|
+
) -> MoeModelOutputWithPast:
|
48
|
+
device = input_ids.device if input_ids is not None else inputs_embeds.device
|
49
|
+
use_cache = use_cache if use_cache is not None else self.config.use_cache
|
50
|
+
use_cache = True if device.type == "xpu" else use_cache
|
51
|
+
if use_cache and not isinstance(past_key_values, DynamicNormalCache):
|
52
|
+
past_key_values = DynamicNormalCache.from_legacy_cache(past_key_values)
|
53
|
+
|
54
|
+
return Qwen3MoeModel.forward(
|
55
|
+
self=self,
|
56
|
+
input_ids=input_ids,
|
57
|
+
attention_mask=attention_mask,
|
58
|
+
position_ids=position_ids,
|
59
|
+
past_key_values=past_key_values,
|
60
|
+
inputs_embeds=inputs_embeds,
|
61
|
+
use_cache=use_cache,
|
62
|
+
output_attentions=output_attentions,
|
63
|
+
output_hidden_states=output_hidden_states,
|
64
|
+
output_router_logits=output_router_logits,
|
65
|
+
cache_position=cache_position,
|
66
|
+
**flash_attn_kwargs,
|
67
|
+
)
|
68
|
+
|
69
|
+
|
70
|
+
def qwen3_moe_moe_forward(self, hidden_states: torch.Tensor) -> torch.Tensor:
|
71
|
+
batch_size, sequence_length, hidden_dim = hidden_states.shape
|
72
|
+
hidden_states = hidden_states.view(-1, hidden_dim)
|
73
|
+
router_logits = self.gate(hidden_states)
|
74
|
+
|
75
|
+
if router_logits.device == "xpu":
|
76
|
+
import xe_addons
|
77
|
+
selected_experts, routing_weights = xe_addons.moe_softmax_topk(
|
78
|
+
router_logits, self.top_k, self.norm_topk_prob
|
79
|
+
)
|
80
|
+
else:
|
81
|
+
routing_weights = torch.nn.functional.softmax(router_logits, dim=1, dtype=torch.float)
|
82
|
+
routing_weights, selected_experts = torch.topk(routing_weights, self.top_k, dim=-1)
|
83
|
+
if self.norm_topk_prob:
|
84
|
+
routing_weights /= routing_weights.sum(dim=-1, keepdim=True)
|
85
|
+
routing_weights = routing_weights.to(hidden_states.dtype)
|
86
|
+
|
87
|
+
if selected_experts.size(0) == 1:
|
88
|
+
if use_fuse_moe(hidden_states, self.experts[0].down_proj.qtype):
|
89
|
+
if getattr(self, "gates", None) is None:
|
90
|
+
gate_addrs = [expert.gate_proj.weight.data_ptr() for expert in self.experts]
|
91
|
+
up_addrs = [expert.up_proj.weight.data_ptr() for expert in self.experts]
|
92
|
+
down_addrs = [expert.down_proj.weight.data_ptr() for expert in self.experts]
|
93
|
+
gates = torch.tensor(gate_addrs, dtype=torch.uint64, device=hidden_states.device)
|
94
|
+
ups = torch.tensor(up_addrs, dtype=torch.uint64, device=hidden_states.device)
|
95
|
+
downs = torch.tensor(down_addrs, dtype=torch.uint64, device=hidden_states.device)
|
96
|
+
self.register_buffer("gates", gates, persistent=False)
|
97
|
+
self.register_buffer("ups", ups, persistent=False)
|
98
|
+
self.register_buffer("downs", downs, persistent=False)
|
99
|
+
|
100
|
+
import xe_linear
|
101
|
+
final_hidden_states = xe_linear.moe_forward_vec(
|
102
|
+
hidden_states, selected_experts, routing_weights, self.gates, self.ups, self.downs,
|
103
|
+
hidden_states.size(-1), self.experts[0].intermediate_size,
|
104
|
+
self.experts[0].down_proj.qtype
|
105
|
+
)
|
106
|
+
else:
|
107
|
+
idxs = selected_experts.flatten().tolist()
|
108
|
+
outputs = []
|
109
|
+
for i in idxs:
|
110
|
+
expert = self.experts[i]
|
111
|
+
expert_out = expert(hidden_states)
|
112
|
+
outputs.append(expert_out)
|
113
|
+
outs = torch.cat(outputs, dim=0)
|
114
|
+
reshaped_topk_weight = routing_weights.squeeze(0).unsqueeze(-1)
|
115
|
+
final_hidden_states = (outs * reshaped_topk_weight).sum(dim=0, keepdim=True)
|
116
|
+
else:
|
117
|
+
final_hidden_states = torch.zeros(
|
118
|
+
(batch_size * sequence_length, hidden_dim),
|
119
|
+
dtype=hidden_states.dtype, device=hidden_states.device
|
120
|
+
)
|
121
|
+
|
122
|
+
# One hot encode the selected experts to create an expert mask
|
123
|
+
# this will be used to easily index which expert is going to be sollicitated
|
124
|
+
expert_mask = torch.nn.functional.one_hot(selected_experts,
|
125
|
+
num_classes=self.num_experts).permute(2, 1, 0)
|
126
|
+
|
127
|
+
# Loop over all available experts in the model and perform the computation on each expert
|
128
|
+
for expert_idx in range(self.num_experts):
|
129
|
+
expert_layer = self.experts[expert_idx]
|
130
|
+
idx, top_x = torch.where(expert_mask[expert_idx])
|
131
|
+
|
132
|
+
# Index the correct hidden states and compute the expert hidden state for
|
133
|
+
# the current expert. We need to make sure to multiply the output hidden
|
134
|
+
# states by `routing_weights` on the corresponding tokens (top-1 and top-2)
|
135
|
+
current_state = hidden_states[None, top_x].reshape(-1, hidden_dim)
|
136
|
+
current_hidden_states = expert_layer(current_state) * routing_weights[top_x, idx, None]
|
137
|
+
|
138
|
+
# However `index_add_` only support torch tensors for indexing so we'll use
|
139
|
+
# the `top_x` tensor here.
|
140
|
+
final_hidden_states.index_add_(0, top_x, current_hidden_states.to(hidden_states.dtype))
|
141
|
+
final_hidden_states = final_hidden_states.reshape(batch_size, sequence_length, hidden_dim)
|
142
|
+
return final_hidden_states, router_logits
|
@@ -65,13 +65,14 @@ def _model_sample_convert():
|
|
65
65
|
def _ipex_llm_convert(load_in_low_bit):
|
66
66
|
# import pdb
|
67
67
|
# pdb.set_trace()
|
68
|
-
from vllm.worker.xpu_model_runner import XPUModelRunner
|
68
|
+
from vllm.worker.xpu_model_runner import XPUModelRunner, XPUModelRunnerBase
|
69
69
|
from ipex_llm.vllm.xpu.ipex_llm_wrapper import get_ipex_llm_wrapper
|
70
70
|
from ipex_llm.vllm.xpu.ipex_llm_v1_wrapper import get_ipex_llm_v1_wrapper
|
71
71
|
import vllm.executor.ray_utils as ray_utils_v0
|
72
72
|
import vllm.v1.executor.ray_utils as ray_utils_v1
|
73
73
|
from vllm.v1.worker.gpu_model_runner import GPUModelRunner
|
74
74
|
setattr(XPUModelRunner, "load_model", get_load_function(load_in_low_bit))
|
75
|
+
setattr(XPUModelRunnerBase, "load_model", get_load_function(load_in_low_bit))
|
75
76
|
setattr(GPUModelRunner, "load_model", get_load_function(load_in_low_bit))
|
76
77
|
setattr(ray_utils_v0, "RayWorkerWrapper", get_ipex_llm_wrapper(load_in_low_bit))
|
77
78
|
setattr(ray_utils_v1, "RayWorkerWrapper", get_ipex_llm_v1_wrapper(load_in_low_bit))
|
@@ -1,6 +1,6 @@
|
|
1
1
|
Metadata-Version: 2.1
|
2
2
|
Name: ipex-llm
|
3
|
-
Version: 2.3.
|
3
|
+
Version: 2.3.0b20250509
|
4
4
|
Summary: Large Language Model Develop Toolkit
|
5
5
|
Home-page: https://github.com/intel-analytics/ipex-llm
|
6
6
|
Author: BigDL Authors
|
@@ -27,7 +27,7 @@ Requires-Dist: intel-openmp ; (platform_machine == "x86_64" or platform_machine
|
|
27
27
|
Requires-Dist: torch ==2.1.2+cpu ; (platform_system == "Linux") and extra == 'all'
|
28
28
|
Requires-Dist: torch ==2.1.2 ; (platform_system == "Windows") and extra == 'all'
|
29
29
|
Provides-Extra: cpp
|
30
|
-
Requires-Dist: bigdl-core-cpp ==2.7.
|
30
|
+
Requires-Dist: bigdl-core-cpp ==2.7.0b20250509 ; extra == 'cpp'
|
31
31
|
Requires-Dist: setuptools ; extra == 'cpp'
|
32
32
|
Requires-Dist: onednn-devel ==2025.0.1 ; (platform_system == "Windows") and extra == 'cpp'
|
33
33
|
Requires-Dist: onednn ==2025.0.1 ; (platform_system == "Windows") and extra == 'cpp'
|
@@ -60,7 +60,7 @@ Requires-Dist: transformers ==4.40.0 ; extra == 'npu'
|
|
60
60
|
Requires-Dist: intel-openmp ; (platform_machine == "x86_64" or platform_machine == "AMD64") and extra == 'npu'
|
61
61
|
Requires-Dist: torch ==2.1.2+cpu ; (platform_system == "Linux") and extra == 'npu'
|
62
62
|
Requires-Dist: torch ==2.1.2 ; (platform_system == "Windows") and extra == 'npu'
|
63
|
-
Requires-Dist: bigdl-core-npu ==2.7.
|
63
|
+
Requires-Dist: bigdl-core-npu ==2.7.0b20250509 ; (platform_system == "Windows") and extra == 'npu'
|
64
64
|
Provides-Extra: serving
|
65
65
|
Requires-Dist: py-cpuinfo ; extra == 'serving'
|
66
66
|
Requires-Dist: fschat[model_worker,webui] ==0.2.36 ; extra == 'serving'
|
@@ -80,9 +80,9 @@ Requires-Dist: setuptools <70.0.0 ; extra == 'xpu'
|
|
80
80
|
Requires-Dist: torch ==2.1.0a0 ; extra == 'xpu'
|
81
81
|
Requires-Dist: torchvision ==0.16.0a0 ; extra == 'xpu'
|
82
82
|
Requires-Dist: intel-extension-for-pytorch ==2.1.10+xpu ; extra == 'xpu'
|
83
|
-
Requires-Dist: bigdl-core-xe-21 ==2.7.
|
84
|
-
Requires-Dist: bigdl-core-xe-batch-21 ==2.7.
|
85
|
-
Requires-Dist: bigdl-core-xe-addons-21 ==2.7.
|
83
|
+
Requires-Dist: bigdl-core-xe-21 ==2.7.0b20250509 ; extra == 'xpu'
|
84
|
+
Requires-Dist: bigdl-core-xe-batch-21 ==2.7.0b20250509 ; extra == 'xpu'
|
85
|
+
Requires-Dist: bigdl-core-xe-addons-21 ==2.7.0b20250509 ; extra == 'xpu'
|
86
86
|
Provides-Extra: xpu-2-1
|
87
87
|
Requires-Dist: py-cpuinfo ; extra == 'xpu-2-1'
|
88
88
|
Requires-Dist: protobuf ; extra == 'xpu-2-1'
|
@@ -97,9 +97,9 @@ Requires-Dist: setuptools <70.0.0 ; extra == 'xpu-2-1'
|
|
97
97
|
Requires-Dist: torch ==2.1.0a0 ; extra == 'xpu-2-1'
|
98
98
|
Requires-Dist: torchvision ==0.16.0a0 ; extra == 'xpu-2-1'
|
99
99
|
Requires-Dist: intel-extension-for-pytorch ==2.1.10+xpu ; extra == 'xpu-2-1'
|
100
|
-
Requires-Dist: bigdl-core-xe-21 ==2.7.
|
101
|
-
Requires-Dist: bigdl-core-xe-batch-21 ==2.7.
|
102
|
-
Requires-Dist: bigdl-core-xe-addons-21 ==2.7.
|
100
|
+
Requires-Dist: bigdl-core-xe-21 ==2.7.0b20250509 ; extra == 'xpu-2-1'
|
101
|
+
Requires-Dist: bigdl-core-xe-batch-21 ==2.7.0b20250509 ; extra == 'xpu-2-1'
|
102
|
+
Requires-Dist: bigdl-core-xe-addons-21 ==2.7.0b20250509 ; extra == 'xpu-2-1'
|
103
103
|
Requires-Dist: intel-openmp ; (platform_machine == "x86_64" or platform_machine == "AMD64") and extra == 'xpu-2-1'
|
104
104
|
Requires-Dist: dpcpp-cpp-rt ==2024.0.2 ; (platform_system == "Windows") and extra == 'xpu-2-1'
|
105
105
|
Requires-Dist: mkl-dpcpp ==2024.0.0 ; (platform_system == "Windows") and extra == 'xpu-2-1'
|
@@ -117,7 +117,7 @@ Requires-Dist: setuptools ; extra == 'xpu-2-6'
|
|
117
117
|
Requires-Dist: torch ==2.6.0+xpu ; extra == 'xpu-2-6'
|
118
118
|
Requires-Dist: torchvision ==0.21.0+xpu ; extra == 'xpu-2-6'
|
119
119
|
Requires-Dist: torchaudio ==2.6.0+xpu ; extra == 'xpu-2-6'
|
120
|
-
Requires-Dist: bigdl-core-xe-all ==2.7.
|
120
|
+
Requires-Dist: bigdl-core-xe-all ==2.7.0b20250509 ; extra == 'xpu-2-6'
|
121
121
|
Requires-Dist: onednn-devel ==2025.0.1 ; extra == 'xpu-2-6'
|
122
122
|
Requires-Dist: onednn ==2025.0.1 ; extra == 'xpu-2-6'
|
123
123
|
Requires-Dist: dpcpp-cpp-rt ==2025.0.2 ; extra == 'xpu-2-6'
|
@@ -132,7 +132,7 @@ Requires-Dist: tokenizers ==0.15.2 ; extra == 'xpu-2-6-arl'
|
|
132
132
|
Requires-Dist: accelerate ==0.23.0 ; extra == 'xpu-2-6-arl'
|
133
133
|
Requires-Dist: tabulate ; extra == 'xpu-2-6-arl'
|
134
134
|
Requires-Dist: setuptools ; extra == 'xpu-2-6-arl'
|
135
|
-
Requires-Dist: bigdl-core-xe-all ==2.7.
|
135
|
+
Requires-Dist: bigdl-core-xe-all ==2.7.0b20250509 ; extra == 'xpu-2-6-arl'
|
136
136
|
Requires-Dist: onednn-devel ==2025.0.1 ; extra == 'xpu-2-6-arl'
|
137
137
|
Requires-Dist: onednn ==2025.0.1 ; extra == 'xpu-2-6-arl'
|
138
138
|
Requires-Dist: dpcpp-cpp-rt ==2025.0.2 ; extra == 'xpu-2-6-arl'
|
@@ -41,35 +41,35 @@ ipex_llm/langchain/llms/transformerspipelinellm.py,sha256=vm522YPPwWxxAPVvQBtxRf
|
|
41
41
|
ipex_llm/langchain/vllm/__init__.py,sha256=T-EbRT6GJ_8RCu-iLmSzcftOimXSPQf2d5X72AUAy2Y,874
|
42
42
|
ipex_llm/langchain/vllm/vllm.py,sha256=6dxc-ZISZQrJilEa_HA827l75Dv9rcHpY_G6FdJ8BVs,7793
|
43
43
|
ipex_llm/libs/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
44
|
-
ipex_llm/libs/bloom-api.dll,sha256=
|
45
|
-
ipex_llm/libs/bloom.dll,sha256=
|
46
|
-
ipex_llm/libs/gptneox-api.dll,sha256=
|
47
|
-
ipex_llm/libs/gptneox.dll,sha256=
|
48
|
-
ipex_llm/libs/libbloom_avx.dll,sha256=
|
49
|
-
ipex_llm/libs/libbloom_vnni.dll,sha256=
|
50
|
-
ipex_llm/libs/libgptneox_avx.dll,sha256=
|
51
|
-
ipex_llm/libs/libgptneox_vnni.dll,sha256=
|
52
|
-
ipex_llm/libs/libllama_avx.dll,sha256=
|
53
|
-
ipex_llm/libs/libllama_vnni.dll,sha256=
|
54
|
-
ipex_llm/libs/libstarcoder_avx.dll,sha256=
|
55
|
-
ipex_llm/libs/libstarcoder_vnni.dll,sha256=
|
56
|
-
ipex_llm/libs/llama-api.dll,sha256=
|
57
|
-
ipex_llm/libs/llama.dll,sha256=
|
58
|
-
ipex_llm/libs/main-bloom.exe,sha256=
|
59
|
-
ipex_llm/libs/main-gptneox.exe,sha256=
|
60
|
-
ipex_llm/libs/main-llama.exe,sha256=
|
61
|
-
ipex_llm/libs/main-starcoder.exe,sha256=
|
62
|
-
ipex_llm/libs/pipeline.dll,sha256=
|
63
|
-
ipex_llm/libs/quantize-bloom.exe,sha256=
|
64
|
-
ipex_llm/libs/quantize-bloom_vnni.exe,sha256=
|
65
|
-
ipex_llm/libs/quantize-gptneox.exe,sha256=
|
66
|
-
ipex_llm/libs/quantize-gptneox_vnni.exe,sha256=
|
67
|
-
ipex_llm/libs/quantize-llama.exe,sha256=
|
68
|
-
ipex_llm/libs/quantize-llama_vnni.exe,sha256=
|
69
|
-
ipex_llm/libs/quantize-starcoder.exe,sha256=
|
70
|
-
ipex_llm/libs/quantize-starcoder_vnni.exe,sha256=
|
71
|
-
ipex_llm/libs/starcoder-api.dll,sha256
|
72
|
-
ipex_llm/libs/starcoder.dll,sha256=
|
44
|
+
ipex_llm/libs/bloom-api.dll,sha256=HBP-BbpyYlO31IgWLn5ZB9C2WkoWxm0WmYfYnDxJoQo,36352
|
45
|
+
ipex_llm/libs/bloom.dll,sha256=_6SaObZPPHPOa2G6RWv0W3i4EM9hbWqJf486bKjIxho,507904
|
46
|
+
ipex_llm/libs/gptneox-api.dll,sha256=sC_r9LIUOESDNjJ6bSsj4rGGWGCalzuuw5OErW7i4jM,24576
|
47
|
+
ipex_llm/libs/gptneox.dll,sha256=ftd-p6ySrfVuxeu36Dutp5Uyqh9aB7WQZVh7JoTsV2k,568320
|
48
|
+
ipex_llm/libs/libbloom_avx.dll,sha256=b7Skvd13qX9gqWFi8OqokYIpISmQp0R8yX7qCNuM-iY,536576
|
49
|
+
ipex_llm/libs/libbloom_vnni.dll,sha256=tAjaQnWSok1SGNNWpd4enfiXylU_hOvD2nF7dg1Wpg8,508416
|
50
|
+
ipex_llm/libs/libgptneox_avx.dll,sha256=7MJoXekhZUUsUvpEZE5KjqEHA9MbrxQ6GjoJtLuFsOE,596992
|
51
|
+
ipex_llm/libs/libgptneox_vnni.dll,sha256=dGHlovMvxutjqOPO3End_C-yPV2eUzQGuXFAITDzX7s,568832
|
52
|
+
ipex_llm/libs/libllama_avx.dll,sha256=R2atFxM77WRUj88q5lSUmzZFgjS47Ji4t74KbZUtJ4I,591360
|
53
|
+
ipex_llm/libs/libllama_vnni.dll,sha256=S7UGEME_CP63vd6bANtBVpYesSBt8x3elU0F_KYLRYY,563200
|
54
|
+
ipex_llm/libs/libstarcoder_avx.dll,sha256=qc-KjhGCG_ZnjnsI1U1etwT78N61WHlk1VvllXqlgh4,627712
|
55
|
+
ipex_llm/libs/libstarcoder_vnni.dll,sha256=KuvvKh9fJft9CAfaAMCleCCiswEJgCPKCn07vpnxlZw,599552
|
56
|
+
ipex_llm/libs/llama-api.dll,sha256=qoI-5RdriF-KJUwi8oPGxE7kY0fRI4PdXpKU4jQIjkw,25600
|
57
|
+
ipex_llm/libs/llama.dll,sha256=TgPMXRStpN0GPKihhO6pXw3AO63I5mPl-6c3SrzZmmk,562688
|
58
|
+
ipex_llm/libs/main-bloom.exe,sha256=dN_BFeid3ql6XwWFTc0xr84ZvoNA13H6sKNjBevJbnc,103424
|
59
|
+
ipex_llm/libs/main-gptneox.exe,sha256=7LUbaYGj0Kh2DaoTYB58mqRc8mCdUZgHauHMwqxor2Q,98816
|
60
|
+
ipex_llm/libs/main-llama.exe,sha256=hJ9ti6VC-3JmnK5ZtOGq65xx8cz4Tz7npo1fNt-YnpY,99840
|
61
|
+
ipex_llm/libs/main-starcoder.exe,sha256=dCOM_sW-QsMgT0h9kt9pC0qahnPTYY5ZHSk4JpJrLeA,157696
|
62
|
+
ipex_llm/libs/pipeline.dll,sha256=qYOqRYp9eHwi1NL3gjui1qufqTcBqPgPQ5RdE7jPOxE,73216
|
63
|
+
ipex_llm/libs/quantize-bloom.exe,sha256=UpuSnaB_7gOJE_OHoWKlY1gS8nn3XGEKqKfm7fIJBj8,126464
|
64
|
+
ipex_llm/libs/quantize-bloom_vnni.exe,sha256=yLkIzDPlrV7k4LZrrd4sONVs_V13-12012qS41YIMaU,128000
|
65
|
+
ipex_llm/libs/quantize-gptneox.exe,sha256=swmEmXDzyXq1A0WGDO_j1j9JE7FsHiDGmqXJsTH0Yg8,104448
|
66
|
+
ipex_llm/libs/quantize-gptneox_vnni.exe,sha256=nA25jWQYatXaOiUQ6YdNbXfN8_zGkPT1yws3bLYQHIY,104960
|
67
|
+
ipex_llm/libs/quantize-llama.exe,sha256=KljH89T3oxBJXdC_KE2ajEL5PACKqbduUuYSdHXbTdE,110080
|
68
|
+
ipex_llm/libs/quantize-llama_vnni.exe,sha256=sIqQS6o6mIaI_6nRTWkvPxzTJgkYc6WF7T_aidzAFHg,110592
|
69
|
+
ipex_llm/libs/quantize-starcoder.exe,sha256=iH2agDG0h11W8zn_2kEV4InzUiJ8TMVCXg9JFw60UIk,127488
|
70
|
+
ipex_llm/libs/quantize-starcoder_vnni.exe,sha256=Oj8Nc3iBYhJrCril6UNVQpiZX0htfZhjYHkI3ysDz3A,128512
|
71
|
+
ipex_llm/libs/starcoder-api.dll,sha256=ZV9wMoWgswCrpBye8qCA8xy6Vw1rKxfg_0S_Ih7hThg,21504
|
72
|
+
ipex_llm/libs/starcoder.dll,sha256=tHMgcTu0oDJnuKKFKGO_chQwxrcgMQewKPQJ_JMQvQ0,599040
|
73
73
|
ipex_llm/llamaindex/__init__.py,sha256=T-EbRT6GJ_8RCu-iLmSzcftOimXSPQf2d5X72AUAy2Y,874
|
74
74
|
ipex_llm/llamaindex/llms/__init__.py,sha256=KP1lEdGqDuxPoxL1ZSH25Pm2kKMPJBWUTLR0ckSLMIU,1139
|
75
75
|
ipex_llm/llamaindex/llms/bigdlllm.py,sha256=FQBzq1KOjfc6uofTXAha3O7TqpJkNfOFepXQmOVlbnI,26314
|
@@ -87,7 +87,7 @@ ipex_llm/serving/fastchat/tgi_api_protocol.py,sha256=brT3k3-V0NJrU4fRqUwWjC0O3iO
|
|
87
87
|
ipex_llm/serving/fastchat/tgi_api_server.py,sha256=agNTAEiZPSuj3dEdIdYKwkoY0cXOUDX06DiM9VP2knQ,24418
|
88
88
|
ipex_llm/serving/fastchat/vllm_worker.py,sha256=ZLz2Q9GxJO6r_LOiP6epgCRjBGk-K4EB1SNEWSJp5DA,11091
|
89
89
|
ipex_llm/transformers/__init__.py,sha256=BreA3EY6hkNq0rVixb_sUuTLzMrcWXTt3yRsshCPHHQ,1214
|
90
|
-
ipex_llm/transformers/convert.py,sha256=
|
90
|
+
ipex_llm/transformers/convert.py,sha256=mJWejcYwe2gCqPsLFchq_umhkwG_6tchikEHGWyQWy8,109431
|
91
91
|
ipex_llm/transformers/convert_ipex.py,sha256=_nSnUTQy-yfkKaqGdqnBdWztZf3NGmnbZ0TKaDrF4X4,14617
|
92
92
|
ipex_llm/transformers/embedding.py,sha256=bdgk59DvD4ZZyxRzewXOR7g56nThgO6uhIwk8QL7f-s,9299
|
93
93
|
ipex_llm/transformers/kv.py,sha256=src_HcVDKFwQ1V8hdTrFQw5RIwUewM9VOR47GVTPJG4,21187
|
@@ -146,7 +146,7 @@ ipex_llm/transformers/models/chatglm.py,sha256=DQM63oPIVMMTBQN4O4hPF4WY1aSiTWq4B
|
|
146
146
|
ipex_llm/transformers/models/chatglm2.py,sha256=KyAIX7zGVQDQuwwM3QMBNWZbTeMHEzKUIgAryT0voHc,14933
|
147
147
|
ipex_llm/transformers/models/chatglm4.py,sha256=QvUehdaCePB3MNHyWg3dneDxmjtBdxYeKUyQUVcsgfM,16886
|
148
148
|
ipex_llm/transformers/models/chatglm4v.py,sha256=Ba9Xtzwtzk_rzg5khGqDrlHfJsDwc5YcM5_yPoord7o,13324
|
149
|
-
ipex_llm/transformers/models/common.py,sha256=
|
149
|
+
ipex_llm/transformers/models/common.py,sha256=wd4DwvTuB12m-e8Rbr-DWIO8nyioqGYTR3cGSSQXIEQ,15771
|
150
150
|
ipex_llm/transformers/models/decilm.py,sha256=P-PBuDPf07GvKggLwJx_wPwIn6esN3rX8ai2JxRuZmE,5246
|
151
151
|
ipex_llm/transformers/models/deepseek.py,sha256=BJocxhznzvM99IZeAWuhyHEBm6Z2-AwhLFYMdjMJuc4,13083
|
152
152
|
ipex_llm/transformers/models/deepseek_v3.py,sha256=CTgwIKQlUPlUCbOxc9Id5GapWkXOP6pMtkguYrWpCio,10003
|
@@ -159,7 +159,7 @@ ipex_llm/transformers/models/gptneox.py,sha256=loRh1x_5S6BCeOr_s5xr-N_1SQHL3Y5Ii
|
|
159
159
|
ipex_llm/transformers/models/internlm.py,sha256=JZFrI2HXsIAfM-6pA2RO0wcXopOliC1FggLMzNzaDZ4,17404
|
160
160
|
ipex_llm/transformers/models/internvl.py,sha256=Vx0vENIEQLX2M6P398mw5TOhpks0U8xf8rtRQvy94go,8154
|
161
161
|
ipex_llm/transformers/models/janus.py,sha256=0URo2NC8_2CGaOl3CiVB3IFTVsYyplMFgjBJdPDNBsY,1509
|
162
|
-
ipex_llm/transformers/models/llama.py,sha256=
|
162
|
+
ipex_llm/transformers/models/llama.py,sha256=lp5FEDd4SF753axuZSw8mdqZaZbgdeSX0_gcmCgy9To,9124
|
163
163
|
ipex_llm/transformers/models/minicpm.py,sha256=eaPNVNrep0_xGoELhZd886ff0ceoKqB6cusdAhd52eE,10145
|
164
164
|
ipex_llm/transformers/models/minicpm3.py,sha256=37P_yMjw8RIzy27qL_E7kzbQRNW6f0xYQNK9xtoe5kI,10183
|
165
165
|
ipex_llm/transformers/models/minicpmv.py,sha256=PP05b5iTnrMpiseCn8iJcxKJDnfq7WqXp9Mrch0kKZ0,9876
|
@@ -171,9 +171,11 @@ ipex_llm/transformers/models/phi3.py,sha256=AaWB7TPQdrDYgpcVHglG0Q0480bxNOw1mFeP
|
|
171
171
|
ipex_llm/transformers/models/phixtral.py,sha256=MDTMghcu7qAmZmRcUGqXXDXhSU3y_N59HRIXmlcjp5g,4890
|
172
172
|
ipex_llm/transformers/models/qwen.py,sha256=A3WiVCzA7NLkcjp4zhFkZvKZzZWZlg0WFuVV_556TAI,19543
|
173
173
|
ipex_llm/transformers/models/qwen2.py,sha256=zK-FpUaxEhjD4gZa1ZvArodAilz29T_cpeAqfCGosc0,14317
|
174
|
-
ipex_llm/transformers/models/qwen2_5_omni.py,sha256=
|
174
|
+
ipex_llm/transformers/models/qwen2_5_omni.py,sha256=Vb4OqXXa2hBN3HvIVbal1zeflJLtmo1KCA6wwQplLTk,18832
|
175
175
|
ipex_llm/transformers/models/qwen2_moe.py,sha256=a0gYo-ngf8SxaEnBdZUJDnPS6Mkn_poDd8xqhx50icI,19516
|
176
176
|
ipex_llm/transformers/models/qwen2_vl.py,sha256=G-9e2oN4f5p5IWQ-zsBZuONxTura3BjlgyT2meigbHQ,13579
|
177
|
+
ipex_llm/transformers/models/qwen3.py,sha256=uMVtfezBOLotNDHoUyMKZhwAPTn9pkV4yDATf2hRThE,4760
|
178
|
+
ipex_llm/transformers/models/qwen3_moe.py,sha256=4S0nxMRA3gneaAcXNbPowgZq70_rb3L9PObRQtUNsww,6812
|
177
179
|
ipex_llm/transformers/models/qwen_vl.py,sha256=lwNwCJNsBvOu1TGNDW_E2IlCjI7XZwHY8qIcZpuNDFc,17187
|
178
180
|
ipex_llm/transformers/models/rwkv4.py,sha256=H4KMtxN0JA2ZTXnonHpsUUJ5xULemo-D1Jzl0ri_UY8,6123
|
179
181
|
ipex_llm/transformers/models/rwkv5.py,sha256=OkRNj1pCAZg1z2Fw-I0DEnxLEdZyPeRSQ6msrkxLOCs,10710
|
@@ -251,16 +253,16 @@ ipex_llm/vllm/cpu/entrypoints/openai/cli_args.py,sha256=hB398yYtKauASRzevctScdbF
|
|
251
253
|
ipex_llm/vllm/xpu/__init__.py,sha256=zBSG6nzrVF5QnpR6_f7kPhBFeowTE9gaZ7D5m98E7_w,585
|
252
254
|
ipex_llm/vllm/xpu/ipex_llm_v1_wrapper.py,sha256=pd939vFomKIg9Qn2NO4u0OF6hPgvQpqcfJSxqBzcqhA,825
|
253
255
|
ipex_llm/vllm/xpu/ipex_llm_wrapper.py,sha256=_CbhvBuf_KPnmLfngYKtJl5gPAHVsG2mWth3wSeaH3M,892
|
254
|
-
ipex_llm/vllm/xpu/model_convert.py,sha256=
|
256
|
+
ipex_llm/vllm/xpu/model_convert.py,sha256=vkzH9quwVcjoAviPWz2IdbPec9u2YTdG6KNUEt6j7dU,9643
|
255
257
|
ipex_llm/vllm/xpu/engine/__init__.py,sha256=sOvwLx_Zj0jiRCGj9W3DgGTfcSU3hABYhgIQI7T6cxU,879
|
256
258
|
ipex_llm/vllm/xpu/engine/engine.py,sha256=XAprw7VifjfnR915TZOaKcxe3QCFsVBgxzS8qOdn1yg,14462
|
257
259
|
ipex_llm/vllm/xpu/entrypoints/openai/api_server.py,sha256=uWHyyHLw-B8wXBnQw9_MCG81tKK9Jb0dyq1xfYHgoNw,45905
|
258
260
|
ipex_llm/vllm/xpu/entrypoints/openai/cli_args.py,sha256=hB398yYtKauASRzevctScdbFIjiiSGMAe1bwEuIHrhY,10893
|
259
|
-
ipex_llm-2.3.
|
260
|
-
ipex_llm-2.3.
|
261
|
-
ipex_llm-2.3.
|
262
|
-
ipex_llm-2.3.
|
263
|
-
ipex_llm-2.3.
|
264
|
-
ipex_llm-2.3.
|
265
|
-
ipex_llm-2.3.
|
266
|
-
ipex_llm-2.3.
|
261
|
+
ipex_llm-2.3.0b20250509.data/scripts/ipex-llm-init.bat,sha256=HPtCYuDYwEatq7dAwOvdfVcHYCpAVdbj75K1qh0vQek,2578
|
262
|
+
ipex_llm-2.3.0b20250509.data/scripts/llm-chat.ps1,sha256=6qrs-hGVAV8IKh7Jx8nq_XrnZcjd7qGU5wndArM7Yag,2769
|
263
|
+
ipex_llm-2.3.0b20250509.data/scripts/llm-cli.ps1,sha256=3qBtTLs_EjYDnM8YyCpJhzLnGCKTEGssu9UNqfkjVXs,3009
|
264
|
+
ipex_llm-2.3.0b20250509.dist-info/METADATA,sha256=QK-O--aciL8G_UH1WFbDSkik8FgjLC7aIGyVMKt3bgI,8865
|
265
|
+
ipex_llm-2.3.0b20250509.dist-info/WHEEL,sha256=6iYPr8vTHsyDK75jr9X0V3I9wPSVmtwr_8fdATBciGk,98
|
266
|
+
ipex_llm-2.3.0b20250509.dist-info/entry_points.txt,sha256=TiUyBB2MRmfF3ko-pyAEzqeBCRnyhu27bNOAsWPp3e8,61
|
267
|
+
ipex_llm-2.3.0b20250509.dist-info/top_level.txt,sha256=CGCMHM-SyqUabU4h8RqJ2KTYckQUO3LvIWwmUQ6Qbzw,9
|
268
|
+
ipex_llm-2.3.0b20250509.dist-info/RECORD,,
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|