ipex-llm 2.2.0b20250224__py3-none-manylinux2010_x86_64.whl → 2.2.0b20250225__py3-none-manylinux2010_x86_64.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -1070,7 +1070,9 @@ def _optimize_pre(model, qtype=None):
1070
1070
  model.apply(pre_register_inv_freq)
1071
1071
  elif model.config.model_type == "multi_modality":
1072
1072
  _optimize_pre(model.language_model)
1073
-
1073
+ elif model.config.model_type == "deepseek_v3" and model.config.hidden_size == 2048:
1074
+ from ipex_llm.transformers.models.deepseek import padding_mla_v_hd
1075
+ model.apply(padding_mla_v_hd)
1074
1076
  return model
1075
1077
 
1076
1078
 
@@ -2023,6 +2025,17 @@ def _optimize_post(model):
2023
2025
 
2024
2026
  # llm
2025
2027
  _optimize_post(model.language_model)
2028
+ elif model.config.model_type == "deepseek_v3" and model.config.hidden_size == 2048:
2029
+ modeling_module_name = model.__class__.__module__
2030
+ module = importlib.import_module(modeling_module_name)
2031
+ from ipex_llm.transformers.models.common import rms_norm_forward
2032
+ from ipex_llm.transformers.models.deepseek import deepseek_model_forward
2033
+ from ipex_llm.transformers.models.deepseek import deepseek_attention_forward
2034
+ from ipex_llm.transformers.models.deepseek import deepseek_moe_forward
2035
+ convert_forward(model, module.DeepseekV3RMSNorm, rms_norm_forward)
2036
+ convert_forward(model, module.DeepseekV3Model, deepseek_model_forward)
2037
+ convert_forward(model, module.DeepseekV3Attention, deepseek_attention_forward)
2038
+ convert_forward(model, module.DeepseekV3MoE, deepseek_moe_forward)
2026
2039
 
2027
2040
  return model
2028
2041
 
@@ -95,6 +95,33 @@ def padding_attention_hd_base(module: torch.nn.Module, attention_class,
95
95
  module.old_head_dim = old_head_dim
96
96
 
97
97
 
98
+ def padding_mla_v_hd_base(module: torch.nn.Module, attention_class):
99
+ if (
100
+ isinstance(attention_class, str) and module.__class__.__name__ == attention_class
101
+ or not isinstance(attention_class, str) and isinstance(module, attention_class)
102
+ ):
103
+ k_head_dim = module.q_head_dim
104
+ v_head_dim = module.v_head_dim
105
+ if v_head_dim < k_head_dim:
106
+ kv_b_proj = module.kv_b_proj
107
+ w = kv_b_proj.weight.data.view(module.num_heads,
108
+ module.qk_nope_head_dim + module.v_head_dim,
109
+ module.kv_lora_rank)
110
+ k_w, v_w = w.split([module.qk_nope_head_dim, module.v_head_dim], dim=1)
111
+ new_v_w = torch.zeros([module.num_heads, k_head_dim, module.kv_lora_rank],
112
+ dtype=v_w.dtype, device=v_w.device)
113
+ new_v_w[:, :v_head_dim, :] = v_w
114
+ new_w = torch.cat([k_w, new_v_w], dim=1).view(-1, module.kv_lora_rank)
115
+
116
+ new_kv_b_proj = torch.nn.Linear(0, 0, bias=False,
117
+ dtype=new_w.dtype, device=new_w.device)
118
+ new_kv_b_proj.in_features = new_w.size(1)
119
+ new_kv_b_proj.out_features = new_w.size(0)
120
+ new_kv_b_proj.weight = torch.nn.Parameter(new_w, False)
121
+
122
+ module.kv_b_proj = new_kv_b_proj
123
+
124
+
98
125
  def padding_states_hd(states: torch.Tensor, old_head_dim: int, new_head_dim: int):
99
126
  bsz, num_heads, seq_len, head_dim = states.size()
100
127
  if head_dim == old_head_dim and old_head_dim < new_head_dim:
@@ -0,0 +1,303 @@
1
+ #
2
+ # Copyright 2016 The BigDL Authors.
3
+ #
4
+ # Licensed under the Apache License, Version 2.0 (the "License");
5
+ # you may not use this file except in compliance with the License.
6
+ # You may obtain a copy of the License at
7
+ #
8
+ # http://www.apache.org/licenses/LICENSE-2.0
9
+ #
10
+ # Unless required by applicable law or agreed to in writing, software
11
+ # distributed under the License is distributed on an "AS IS" BASIS,
12
+ # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13
+ # See the License for the specific language governing permissions and
14
+ # limitations under the License.
15
+ #
16
+ # Some parts of this file is adapted from
17
+ # https://huggingface.co/deepseek-ai/DeepSeek-R1/blob/main/modeling_deepseek.py
18
+ # which is licensed under Apache License 2.0:
19
+ #
20
+ # https://github.com/OpenBMB/MiniCPM/blob/main/LICENSE
21
+ #
22
+
23
+ import torch
24
+ import warnings
25
+
26
+ from typing import Optional, Tuple, List, Union
27
+ from transformers.cache_utils import Cache
28
+ from transformers.modeling_outputs import BaseModelOutputWithPast
29
+ from transformers.modeling_attn_mask_utils import _prepare_4d_causal_attention_mask
30
+
31
+ from ipex_llm.utils.common.log4Error import invalidInputError
32
+ from ipex_llm.transformers.kv import DynamicNormalCache
33
+ from ipex_llm.transformers.models.common import padding_mla_v_hd_base
34
+ from ipex_llm.transformers.models.common import scaled_dot_product_attention
35
+ from ipex_llm.transformers.models.utils import rotate_half
36
+
37
+
38
+ def padding_mla_v_hd(module: torch.nn.Module):
39
+ padding_mla_v_hd_base(module, "DeepseekV3Attention")
40
+
41
+
42
+ def deepseek_model_forward(
43
+ self,
44
+ input_ids: torch.LongTensor = None,
45
+ attention_mask: Optional[torch.Tensor] = None,
46
+ position_ids: Optional[torch.LongTensor] = None,
47
+ past_key_values: Optional[List[torch.FloatTensor]] = None,
48
+ inputs_embeds: Optional[torch.FloatTensor] = None,
49
+ use_cache: Optional[bool] = None,
50
+ output_attentions: Optional[bool] = None,
51
+ output_hidden_states: Optional[bool] = None,
52
+ return_dict: Optional[bool] = None,
53
+ ) -> Union[Tuple, BaseModelOutputWithPast]:
54
+ output_attentions = (
55
+ output_attentions if output_attentions is not None
56
+ else self.config.output_attentions
57
+ )
58
+ output_hidden_states = (
59
+ output_hidden_states if output_hidden_states is not None
60
+ else self.config.output_hidden_states
61
+ )
62
+
63
+ use_cache = use_cache if use_cache is not None else self.config.use_cache
64
+
65
+ return_dict = (
66
+ return_dict if return_dict is not None else self.config.use_return_dict
67
+ )
68
+
69
+ # retrieve input_ids and inputs_embeds
70
+ invalidInputError((input_ids is None) ^ (inputs_embeds is None),
71
+ "You cannot specify both input_ids and inputs_embeds at the same time, "
72
+ "and must specify either one")
73
+
74
+ if inputs_embeds is None:
75
+ inputs_embeds = self.embed_tokens(input_ids)
76
+
77
+ batch_size, seq_length = inputs_embeds.shape[:2]
78
+
79
+ # IPEX-LLM OPT start: kv cache
80
+ past_key_values_length = 0
81
+ use_cache = True if inputs_embeds.device.type == "xpu" else use_cache
82
+ if use_cache:
83
+ if not isinstance(past_key_values, DynamicNormalCache):
84
+ past_key_values = DynamicNormalCache.from_legacy_cache(past_key_values)
85
+ past_key_values_length = past_key_values.get_usable_length(seq_length)
86
+ # IPEX-LLM OPT end: kv cache
87
+
88
+ if position_ids is None:
89
+ position_ids = torch.arange(
90
+ past_key_values_length,
91
+ seq_length + past_key_values_length,
92
+ dtype=torch.long,
93
+ device=inputs_embeds.device,
94
+ )
95
+ position_ids = position_ids.unsqueeze(0)
96
+
97
+ # IPEX-LLM OPT start: fuse rope
98
+ if inputs_embeds.device.type == "xpu" and position_ids is not None:
99
+ cos, sin = self.layers[0].self_attn.rotary_emb(inputs_embeds,
100
+ seq_length + past_key_values_length)
101
+ cos = cos[position_ids[0]].contiguous()
102
+ sin = sin[position_ids[0]].contiguous()
103
+ position_embeddings = (cos, sin)
104
+ else:
105
+ position_embeddings = None
106
+ # IPEX-LLM OPT end: fuse rope
107
+
108
+ # 4d mask is passed through the layers
109
+ attention_mask = _prepare_4d_causal_attention_mask(
110
+ attention_mask,
111
+ (batch_size, seq_length),
112
+ inputs_embeds,
113
+ past_key_values_length,
114
+ )
115
+
116
+ # embed positions
117
+ hidden_states = inputs_embeds
118
+
119
+ # decoder layers
120
+ all_hidden_states = () if output_hidden_states else None
121
+ all_self_attns = () if output_attentions else None
122
+ next_decoder_cache = None
123
+
124
+ for decoder_layer in self.layers:
125
+ if output_hidden_states:
126
+ all_hidden_states += (hidden_states,)
127
+
128
+ layer_outputs = decoder_layer(
129
+ hidden_states,
130
+ attention_mask=attention_mask,
131
+ position_ids=position_ids,
132
+ past_key_value=past_key_values,
133
+ output_attentions=output_attentions,
134
+ use_cache=use_cache,
135
+ position_embeddings=position_embeddings,
136
+ )
137
+
138
+ hidden_states = layer_outputs[0]
139
+
140
+ if use_cache:
141
+ next_decoder_cache = layer_outputs[2 if output_attentions else 1]
142
+
143
+ if output_attentions:
144
+ all_self_attns += (layer_outputs[1],)
145
+
146
+ hidden_states = self.norm(hidden_states)
147
+
148
+ # add hidden states from the last decoder layer
149
+ if output_hidden_states:
150
+ all_hidden_states += (hidden_states,)
151
+
152
+ next_cache = next_decoder_cache
153
+ if not return_dict:
154
+ return tuple(
155
+ v
156
+ for v in [hidden_states, next_cache, all_hidden_states, all_self_attns]
157
+ if v is not None
158
+ )
159
+ return BaseModelOutputWithPast(
160
+ last_hidden_state=hidden_states,
161
+ past_key_values=next_cache,
162
+ hidden_states=all_hidden_states,
163
+ attentions=all_self_attns,
164
+ )
165
+
166
+
167
+ def apply_rotary_pos_emb(q, k, cos, sin, position_ids, unsqueeze_dim=1):
168
+ cos = cos[position_ids].unsqueeze(unsqueeze_dim)
169
+ sin = sin[position_ids].unsqueeze(unsqueeze_dim)
170
+
171
+ b, h, s, d = q.shape
172
+ q = q.view(b, h, s, d // 2, 2).transpose(4, 3).reshape(b, h, s, d)
173
+
174
+ b, h, s, d = k.shape
175
+ k = k.view(b, h, s, d // 2, 2).transpose(4, 3).reshape(b, h, s, d)
176
+
177
+ q_embed = (q * cos) + (rotate_half(q) * sin)
178
+ k_embed = (k * cos) + (rotate_half(k) * sin)
179
+ return q_embed, k_embed
180
+
181
+
182
+ def deepseek_attention_forward(
183
+ self,
184
+ hidden_states: torch.Tensor,
185
+ attention_mask: Optional[torch.Tensor] = None,
186
+ position_ids: Optional[torch.LongTensor] = None,
187
+ past_key_value: Optional[Cache] = None,
188
+ output_attentions: bool = False,
189
+ use_cache: bool = False,
190
+ **kwargs,
191
+ ) -> Tuple[torch.Tensor, Optional[torch.Tensor], Optional[Tuple[torch.Tensor]]]:
192
+ if "padding_mask" in kwargs:
193
+ warnings.warn(
194
+ "Passing `padding_mask` is deprecated and will be removed in v4.37. "
195
+ "Please make sure use `attention_mask` instead.`"
196
+ )
197
+
198
+ bsz, q_len, _ = hidden_states.size()
199
+
200
+ if self.q_lora_rank is None:
201
+ q = self.q_proj(hidden_states)
202
+ else:
203
+ q = self.q_b_proj(self.q_a_layernorm(self.q_a_proj(hidden_states)))
204
+ q = q.view(bsz, q_len, self.num_heads, self.q_head_dim).transpose(1, 2)
205
+
206
+ compressed_kv = self.kv_a_proj_with_mqa(hidden_states)
207
+ compressed_kv, k_pe = torch.split(
208
+ compressed_kv, [self.kv_lora_rank, self.qk_rope_head_dim], dim=-1
209
+ )
210
+ k_pe = k_pe.view(bsz, q_len, 1, self.qk_rope_head_dim).transpose(1, 2)
211
+ kv = (
212
+ self.kv_b_proj(self.kv_a_layernorm(compressed_kv))
213
+ .view(bsz, q_len, self.num_heads, self.qk_nope_head_dim + self.q_head_dim)
214
+ .transpose(1, 2)
215
+ )
216
+
217
+ k_nope, value_states = torch.split(
218
+ kv, [self.qk_nope_head_dim, self.q_head_dim], dim=-1
219
+ )
220
+ kv_seq_len = value_states.shape[-2]
221
+ if past_key_value is not None:
222
+ kv_seq_len += past_key_value.get_usable_length(kv_seq_len, self.layer_idx)
223
+
224
+ position_embeddings = kwargs.get("position_embeddings", None)
225
+ if position_embeddings is not None:
226
+ query_states = q
227
+ key_states = torch.cat(
228
+ [k_nope, k_pe.expand([-1, self.num_heads, -1, -1])],
229
+ dim=-1
230
+ )
231
+ import xe_addons
232
+ cos, sin = position_embeddings
233
+ xe_addons.rotary_two_with_cache_inplaced(query_states[:, :, :, self.qk_nope_head_dim:],
234
+ key_states[:, :, :, self.qk_nope_head_dim:],
235
+ cos, sin, True)
236
+ else:
237
+ q_nope, q_pe = torch.split(
238
+ q, [self.qk_nope_head_dim, self.qk_rope_head_dim], dim=-1
239
+ )
240
+ cos, sin = self.rotary_emb(value_states, seq_len=kv_seq_len)
241
+ q_pe, k_pe = apply_rotary_pos_emb(q_pe, k_pe, cos, sin, position_ids)
242
+
243
+ query_states = k_pe.new_empty(bsz, self.num_heads, q_len, self.q_head_dim)
244
+ query_states[:, :, :, : self.qk_nope_head_dim] = q_nope
245
+ query_states[:, :, :, self.qk_nope_head_dim:] = q_pe
246
+
247
+ key_states = k_pe.new_empty(bsz, self.num_heads, q_len, self.q_head_dim)
248
+ key_states[:, :, :, : self.qk_nope_head_dim] = k_nope
249
+ key_states[:, :, :, self.qk_nope_head_dim:] = k_pe
250
+
251
+ if past_key_value is not None:
252
+ key_states, value_states = past_key_value.update(key_states, value_states,
253
+ self.layer_idx, None)
254
+
255
+ attn_weights = None
256
+ attn_output = scaled_dot_product_attention(
257
+ query_states, key_states, value_states,
258
+ attention_mask, q_len == kv_seq_len, self.softmax_scale
259
+ )
260
+ attn_output = attn_output[:, :, :, :self.v_head_dim]
261
+
262
+ attn_output = attn_output.transpose(1, 2).contiguous()
263
+
264
+ attn_output = attn_output.reshape(bsz, q_len, self.num_heads * self.v_head_dim)
265
+
266
+ attn_output = self.o_proj(attn_output)
267
+
268
+ if not output_attentions:
269
+ attn_weights = None
270
+
271
+ return attn_output, attn_weights, past_key_value
272
+
273
+
274
+ def moe_infer_decode(self, x: torch.Tensor, topk_ids: torch.Tensor, topk_weight: torch.Tensor):
275
+ idxs = topk_ids.flatten().tolist()
276
+ outputs = []
277
+ for i in idxs:
278
+ expert = self.experts[i]
279
+ expert_out = expert(x)
280
+ outputs.append(expert_out)
281
+ outs = torch.cat(outputs, dim=0)
282
+ reshaped_topk_weight = topk_weight.squeeze(0).unsqueeze(-1).to(outs.dtype)
283
+ final_out = (outs * reshaped_topk_weight).sum(dim=0, keepdim=True)
284
+ return final_out
285
+
286
+
287
+ def deepseek_moe_forward(self, hidden_states: torch.Tensor):
288
+ identity = hidden_states
289
+ orig_shape = hidden_states.shape
290
+ topk_idx, topk_weight = self.gate(hidden_states)
291
+ hidden_states = hidden_states.view(-1, hidden_states.shape[-1])
292
+ flat_topk_idx = topk_idx.view(-1)
293
+ if not self.training:
294
+ # IPEX-LLM OPT start : add special moe_infer implementation for decoding
295
+ if topk_idx.size(0) == 1:
296
+ y = moe_infer_decode(self, hidden_states, topk_idx, topk_weight)
297
+ else:
298
+ y = self.moe_infer(hidden_states, topk_idx, topk_weight)
299
+ y = y.view(*orig_shape)
300
+ # IPEX-LLM OPT end
301
+ if self.config.n_shared_experts is not None:
302
+ y = y + self.shared_experts(identity)
303
+ return y
@@ -1,3 +1,25 @@
1
+ #
2
+ # Copyright 2016 The BigDL Authors.
3
+ #
4
+ # Licensed under the Apache License, Version 2.0 (the "License");
5
+ # you may not use this file except in compliance with the License.
6
+ # You may obtain a copy of the License at
7
+ #
8
+ # http://www.apache.org/licenses/LICENSE-2.0
9
+ #
10
+ # Unless required by applicable law or agreed to in writing, software
11
+ # distributed under the License is distributed on an "AS IS" BASIS,
12
+ # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13
+ # See the License for the specific language governing permissions and
14
+ # limitations under the License.
15
+ #
16
+ # Some parts of this file is adapted from
17
+ # https://hf-mirror.com/openbmb/MiniCPM3-4B/blob/main/modeling_minicpm.py
18
+ # which is licensed under Apache License 2.0:
19
+ #
20
+ # https://github.com/OpenBMB/MiniCPM/blob/main/LICENSE
21
+ #
22
+
1
23
  import torch
2
24
  import warnings
3
25
 
@@ -122,9 +144,6 @@ def minicpm3_attention_forward(
122
144
 
123
145
  q = self.q_b_proj(self.q_a_layernorm(self.q_a_proj(hidden_states)))
124
146
  q = q.view(bsz, q_len, self.num_heads, self.q_head_dim).transpose(1, 2)
125
- q_nope, q_pe = torch.split(
126
- q, [self.qk_nope_head_dim, self.qk_rope_head_dim], dim=-1
127
- )
128
147
 
129
148
  compressed_kv = self.kv_a_proj_with_mqa(hidden_states)
130
149
  compressed_kv, k_pe = torch.split(
@@ -169,6 +188,9 @@ def minicpm3_attention_forward(
169
188
  else:
170
189
  invalidInputError(f"unknown rope method: {self.rotary_emb.__class__.__name__}")
171
190
  else:
191
+ q_nope, q_pe = torch.split(
192
+ q, [self.qk_nope_head_dim, self.qk_rope_head_dim], dim=-1
193
+ )
172
194
  cos, sin = self.rotary_emb(value_states, seq_len=kv_seq_len)
173
195
  q_pe, k_pe = apply_rotary_pos_emb(q_pe, k_pe, cos, sin, position_ids)
174
196
 
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.1
2
2
  Name: ipex-llm
3
- Version: 2.2.0b20250224
3
+ Version: 2.2.0b20250225
4
4
  Summary: Large Language Model Develop Toolkit
5
5
  Home-page: https://github.com/intel-analytics/ipex-llm
6
6
  Author: BigDL Authors
@@ -27,7 +27,7 @@ Requires-Dist: intel-openmp ; (platform_machine == "x86_64" or platform_machine
27
27
  Requires-Dist: torch ==2.1.2+cpu ; (platform_system == "Linux") and extra == 'all'
28
28
  Requires-Dist: torch ==2.1.2 ; (platform_system == "Windows") and extra == 'all'
29
29
  Provides-Extra: cpp
30
- Requires-Dist: bigdl-core-cpp ==2.6.0b20250224 ; extra == 'cpp'
30
+ Requires-Dist: bigdl-core-cpp ==2.6.0b20250225 ; extra == 'cpp'
31
31
  Requires-Dist: setuptools ; extra == 'cpp'
32
32
  Requires-Dist: onednn-devel ==2025.0.1 ; (platform_system == "Windows") and extra == 'cpp'
33
33
  Requires-Dist: onednn ==2025.0.1 ; (platform_system == "Windows") and extra == 'cpp'
@@ -60,7 +60,7 @@ Requires-Dist: transformers ==4.40.0 ; extra == 'npu'
60
60
  Requires-Dist: intel-openmp ; (platform_machine == "x86_64" or platform_machine == "AMD64") and extra == 'npu'
61
61
  Requires-Dist: torch ==2.1.2+cpu ; (platform_system == "Linux") and extra == 'npu'
62
62
  Requires-Dist: torch ==2.1.2 ; (platform_system == "Windows") and extra == 'npu'
63
- Requires-Dist: bigdl-core-npu ==2.6.0b20250224 ; (platform_system == "Windows") and extra == 'npu'
63
+ Requires-Dist: bigdl-core-npu ==2.6.0b20250225 ; (platform_system == "Windows") and extra == 'npu'
64
64
  Provides-Extra: serving
65
65
  Requires-Dist: py-cpuinfo ; extra == 'serving'
66
66
  Requires-Dist: fschat[model_worker,webui] ==0.2.36 ; extra == 'serving'
@@ -80,9 +80,9 @@ Requires-Dist: setuptools <70.0.0 ; extra == 'xpu'
80
80
  Requires-Dist: torch ==2.1.0a0 ; extra == 'xpu'
81
81
  Requires-Dist: torchvision ==0.16.0a0 ; extra == 'xpu'
82
82
  Requires-Dist: intel-extension-for-pytorch ==2.1.10+xpu ; extra == 'xpu'
83
- Requires-Dist: bigdl-core-xe-21 ==2.6.0b20250224 ; extra == 'xpu'
84
- Requires-Dist: bigdl-core-xe-batch-21 ==2.6.0b20250224 ; extra == 'xpu'
85
- Requires-Dist: bigdl-core-xe-addons-21 ==2.6.0b20250224 ; extra == 'xpu'
83
+ Requires-Dist: bigdl-core-xe-21 ==2.6.0b20250225 ; extra == 'xpu'
84
+ Requires-Dist: bigdl-core-xe-batch-21 ==2.6.0b20250225 ; extra == 'xpu'
85
+ Requires-Dist: bigdl-core-xe-addons-21 ==2.6.0b20250225 ; extra == 'xpu'
86
86
  Provides-Extra: xpu-2-1
87
87
  Requires-Dist: py-cpuinfo ; extra == 'xpu-2-1'
88
88
  Requires-Dist: protobuf ; extra == 'xpu-2-1'
@@ -97,9 +97,9 @@ Requires-Dist: setuptools <70.0.0 ; extra == 'xpu-2-1'
97
97
  Requires-Dist: torch ==2.1.0a0 ; extra == 'xpu-2-1'
98
98
  Requires-Dist: torchvision ==0.16.0a0 ; extra == 'xpu-2-1'
99
99
  Requires-Dist: intel-extension-for-pytorch ==2.1.10+xpu ; extra == 'xpu-2-1'
100
- Requires-Dist: bigdl-core-xe-21 ==2.6.0b20250224 ; extra == 'xpu-2-1'
101
- Requires-Dist: bigdl-core-xe-batch-21 ==2.6.0b20250224 ; extra == 'xpu-2-1'
102
- Requires-Dist: bigdl-core-xe-addons-21 ==2.6.0b20250224 ; extra == 'xpu-2-1'
100
+ Requires-Dist: bigdl-core-xe-21 ==2.6.0b20250225 ; extra == 'xpu-2-1'
101
+ Requires-Dist: bigdl-core-xe-batch-21 ==2.6.0b20250225 ; extra == 'xpu-2-1'
102
+ Requires-Dist: bigdl-core-xe-addons-21 ==2.6.0b20250225 ; extra == 'xpu-2-1'
103
103
  Requires-Dist: intel-openmp ; (platform_machine == "x86_64" or platform_machine == "AMD64") and extra == 'xpu-2-1'
104
104
  Requires-Dist: dpcpp-cpp-rt ==2024.0.2 ; (platform_system == "Windows") and extra == 'xpu-2-1'
105
105
  Requires-Dist: mkl-dpcpp ==2024.0.0 ; (platform_system == "Windows") and extra == 'xpu-2-1'
@@ -117,7 +117,7 @@ Requires-Dist: setuptools ; extra == 'xpu-2-6'
117
117
  Requires-Dist: torch ==2.6.0+xpu ; extra == 'xpu-2-6'
118
118
  Requires-Dist: torchvision ==0.21.0+xpu ; extra == 'xpu-2-6'
119
119
  Requires-Dist: torchaudio ==2.6.0+xpu ; extra == 'xpu-2-6'
120
- Requires-Dist: bigdl-core-xe-all ==2.6.0b20250224 ; extra == 'xpu-2-6'
120
+ Requires-Dist: bigdl-core-xe-all ==2.6.0b20250225 ; extra == 'xpu-2-6'
121
121
  Requires-Dist: onednn-devel ==2025.0.1 ; extra == 'xpu-2-6'
122
122
  Requires-Dist: onednn ==2025.0.1 ; extra == 'xpu-2-6'
123
123
  Requires-Dist: dpcpp-cpp-rt ==2025.0.2 ; extra == 'xpu-2-6'
@@ -133,9 +133,9 @@ Requires-Dist: tokenizers ==0.15.2 ; extra == 'xpu-arc'
133
133
  Requires-Dist: accelerate ==0.23.0 ; extra == 'xpu-arc'
134
134
  Requires-Dist: tabulate ; extra == 'xpu-arc'
135
135
  Requires-Dist: setuptools ; extra == 'xpu-arc'
136
- Requires-Dist: bigdl-core-xe-23 ==2.6.0b20250224 ; extra == 'xpu-arc'
137
- Requires-Dist: bigdl-core-xe-batch-23 ==2.6.0b20250224 ; extra == 'xpu-arc'
138
- Requires-Dist: bigdl-core-xe-addons-23 ==2.6.0b20250224 ; extra == 'xpu-arc'
136
+ Requires-Dist: bigdl-core-xe-23 ==2.6.0b20250225 ; extra == 'xpu-arc'
137
+ Requires-Dist: bigdl-core-xe-batch-23 ==2.6.0b20250225 ; extra == 'xpu-arc'
138
+ Requires-Dist: bigdl-core-xe-addons-23 ==2.6.0b20250225 ; extra == 'xpu-arc'
139
139
  Requires-Dist: intel-openmp ; (platform_machine == "x86_64" or platform_machine == "AMD64") and extra == 'xpu-arc'
140
140
  Requires-Dist: torch ==2.3.1+cxx11.abi ; (platform_system == "Linux") and extra == 'xpu-arc'
141
141
  Requires-Dist: torchvision ==0.18.1+cxx11.abi ; (platform_system == "Linux") and extra == 'xpu-arc'
@@ -156,9 +156,9 @@ Requires-Dist: tokenizers ==0.15.2 ; extra == 'xpu-arl'
156
156
  Requires-Dist: accelerate ==0.23.0 ; extra == 'xpu-arl'
157
157
  Requires-Dist: tabulate ; extra == 'xpu-arl'
158
158
  Requires-Dist: setuptools ; extra == 'xpu-arl'
159
- Requires-Dist: bigdl-core-xe-23 ==2.6.0b20250224 ; extra == 'xpu-arl'
160
- Requires-Dist: bigdl-core-xe-batch-23 ==2.6.0b20250224 ; extra == 'xpu-arl'
161
- Requires-Dist: bigdl-core-xe-addons-23 ==2.6.0b20250224 ; extra == 'xpu-arl'
159
+ Requires-Dist: bigdl-core-xe-23 ==2.6.0b20250225 ; extra == 'xpu-arl'
160
+ Requires-Dist: bigdl-core-xe-batch-23 ==2.6.0b20250225 ; extra == 'xpu-arl'
161
+ Requires-Dist: bigdl-core-xe-addons-23 ==2.6.0b20250225 ; extra == 'xpu-arl'
162
162
  Requires-Dist: intel-openmp ; (platform_machine == "x86_64" or platform_machine == "AMD64") and extra == 'xpu-arl'
163
163
  Requires-Dist: torch ==2.3.1+cxx11.abi ; (platform_system == "Linux") and extra == 'xpu-arl'
164
164
  Requires-Dist: torchvision ==0.18.1+cxx11.abi ; (platform_system == "Linux") and extra == 'xpu-arl'
@@ -179,9 +179,9 @@ Requires-Dist: tokenizers ==0.15.2 ; extra == 'xpu-lnl'
179
179
  Requires-Dist: accelerate ==0.23.0 ; extra == 'xpu-lnl'
180
180
  Requires-Dist: tabulate ; extra == 'xpu-lnl'
181
181
  Requires-Dist: setuptools ; extra == 'xpu-lnl'
182
- Requires-Dist: bigdl-core-xe-23 ==2.6.0b20250224 ; extra == 'xpu-lnl'
183
- Requires-Dist: bigdl-core-xe-batch-23 ==2.6.0b20250224 ; extra == 'xpu-lnl'
184
- Requires-Dist: bigdl-core-xe-addons-23 ==2.6.0b20250224 ; extra == 'xpu-lnl'
182
+ Requires-Dist: bigdl-core-xe-23 ==2.6.0b20250225 ; extra == 'xpu-lnl'
183
+ Requires-Dist: bigdl-core-xe-batch-23 ==2.6.0b20250225 ; extra == 'xpu-lnl'
184
+ Requires-Dist: bigdl-core-xe-addons-23 ==2.6.0b20250225 ; extra == 'xpu-lnl'
185
185
  Requires-Dist: intel-openmp ; (platform_machine == "x86_64" or platform_machine == "AMD64") and extra == 'xpu-lnl'
186
186
  Requires-Dist: torch ==2.3.1+cxx11.abi ; (platform_system == "Linux") and extra == 'xpu-lnl'
187
187
  Requires-Dist: torchvision ==0.18.1+cxx11.abi ; (platform_system == "Linux") and extra == 'xpu-lnl'
@@ -94,7 +94,7 @@ ipex_llm/serving/fastchat/tgi_api_protocol.py,sha256=brT3k3-V0NJrU4fRqUwWjC0O3iO
94
94
  ipex_llm/serving/fastchat/tgi_api_server.py,sha256=agNTAEiZPSuj3dEdIdYKwkoY0cXOUDX06DiM9VP2knQ,24418
95
95
  ipex_llm/serving/fastchat/vllm_worker.py,sha256=ZLz2Q9GxJO6r_LOiP6epgCRjBGk-K4EB1SNEWSJp5DA,11091
96
96
  ipex_llm/transformers/__init__.py,sha256=BreA3EY6hkNq0rVixb_sUuTLzMrcWXTt3yRsshCPHHQ,1214
97
- ipex_llm/transformers/convert.py,sha256=--X5moNSvtV8Mtzg2fh9v1Ej1iyYyocQwo5pHlhkPqo,102230
97
+ ipex_llm/transformers/convert.py,sha256=294hk2uMQPN0DrPIpqhfgFvR7klvlHKS53DUUhoWaeU,103273
98
98
  ipex_llm/transformers/convert_ipex.py,sha256=_nSnUTQy-yfkKaqGdqnBdWztZf3NGmnbZ0TKaDrF4X4,14617
99
99
  ipex_llm/transformers/embedding.py,sha256=bdgk59DvD4ZZyxRzewXOR7g56nThgO6uhIwk8QL7f-s,9299
100
100
  ipex_llm/transformers/kv.py,sha256=k4TU18LlA-Sbq9WNNQnfuzu3RSFBwFhmaV3BcGN5bAo,19191
@@ -153,8 +153,9 @@ ipex_llm/transformers/models/chatglm.py,sha256=UHai1t2AUtGmF765_eHF8LUMVQzp_oCBx
153
153
  ipex_llm/transformers/models/chatglm2.py,sha256=KyAIX7zGVQDQuwwM3QMBNWZbTeMHEzKUIgAryT0voHc,14933
154
154
  ipex_llm/transformers/models/chatglm4.py,sha256=QvUehdaCePB3MNHyWg3dneDxmjtBdxYeKUyQUVcsgfM,16886
155
155
  ipex_llm/transformers/models/chatglm4v.py,sha256=L6y45M_wjS2_HqchmCUxRlQZUNuSNCGOiynAQrGh918,14124
156
- ipex_llm/transformers/models/common.py,sha256=VKouwfP3q7nQkYnDeDYfjLwKgr_Qk9uc3ZCISaxbfn4,11646
156
+ ipex_llm/transformers/models/common.py,sha256=0OTRaXekOPApRdQ8UKl5Du8DOtKJ6awnQIStvYvFQOI,13018
157
157
  ipex_llm/transformers/models/decilm.py,sha256=P-PBuDPf07GvKggLwJx_wPwIn6esN3rX8ai2JxRuZmE,5246
158
+ ipex_llm/transformers/models/deepseek.py,sha256=2w2bWbbuYi__fPs56vE9Wq5bdiZCF2NkYJNXf-b9LjQ,11130
158
159
  ipex_llm/transformers/models/deepseek_v3.py,sha256=CTgwIKQlUPlUCbOxc9Id5GapWkXOP6pMtkguYrWpCio,10003
159
160
  ipex_llm/transformers/models/gemma.py,sha256=_E3Yw8Y45xyNVeLqyVKcpr8kjuICtETeL82cJ-bWJuU,9424
160
161
  ipex_llm/transformers/models/gemma2.py,sha256=2WZuv-FLzJyTJFaYxOuzJt47QE64M0lHnzAiO5T6ozI,8049
@@ -167,7 +168,7 @@ ipex_llm/transformers/models/internvl.py,sha256=Vx0vENIEQLX2M6P398mw5TOhpks0U8xf
167
168
  ipex_llm/transformers/models/janus.py,sha256=0URo2NC8_2CGaOl3CiVB3IFTVsYyplMFgjBJdPDNBsY,1509
168
169
  ipex_llm/transformers/models/llama.py,sha256=rqrNjuZb_jeb9MKx0z-FSVoGx8YDBxQzPJ9ZUvYhgx0,9138
169
170
  ipex_llm/transformers/models/minicpm.py,sha256=eaPNVNrep0_xGoELhZd886ff0ceoKqB6cusdAhd52eE,10145
170
- ipex_llm/transformers/models/minicpm3.py,sha256=11cYl8KM2hoIJNMAOZMxiwCu6dMhup9ric_OEn8-VrQ,9363
171
+ ipex_llm/transformers/models/minicpm3.py,sha256=37P_yMjw8RIzy27qL_E7kzbQRNW6f0xYQNK9xtoe5kI,10183
171
172
  ipex_llm/transformers/models/minicpmv.py,sha256=PP05b5iTnrMpiseCn8iJcxKJDnfq7WqXp9Mrch0kKZ0,9876
172
173
  ipex_llm/transformers/models/mistral.py,sha256=uVhkdXaq15v1P3QY0emVsA7SxUbAWChHEEXYN-drjpQ,7449
173
174
  ipex_llm/transformers/models/mllama.py,sha256=ZyRq9DTKsvk1AlRbr-z6ngjS3Sr_7YuGZ6-Yr1MBBAM,10937
@@ -261,11 +262,11 @@ ipex_llm/vllm/xpu/engine/__init__.py,sha256=pY_CpyuZd72fr6s32ejeKHKFW0K4vUU2rzZj
261
262
  ipex_llm/vllm/xpu/engine/engine.py,sha256=NvCMbp0X8NVrOqbwm4FTvXOptTRLzu9jQsy37ZHnTk8,9493
262
263
  ipex_llm/vllm/xpu/entrypoints/openai/api_server.py,sha256=D577nxWlyoWaHXNXIEvS3ViKSSWL3XZq8D8t6izD7x4,33250
263
264
  ipex_llm/vllm/xpu/entrypoints/openai/cli_args.py,sha256=hB398yYtKauASRzevctScdbFIjiiSGMAe1bwEuIHrhY,10893
264
- ipex_llm-2.2.0b20250224.data/scripts/ipex-llm-init,sha256=fLQsT2dRL6H5bThb4GuIWotAuqoLsIxFwA-0c2qmaO8,6672
265
- ipex_llm-2.2.0b20250224.data/scripts/llm-chat,sha256=TdUnUmNapzuoe1c8IzrdVOQwWEg8IqsMSBRlOD3daZM,2249
266
- ipex_llm-2.2.0b20250224.data/scripts/llm-cli,sha256=RXGPlLElHxcKzoUxljEMBIAXbzCDysXL-Nxw-xF-7LU,2457
267
- ipex_llm-2.2.0b20250224.dist-info/METADATA,sha256=6AFayutzAHKTJoabKLkD2avZ94bGTCqMR5FuM3lYgzc,12369
268
- ipex_llm-2.2.0b20250224.dist-info/WHEEL,sha256=PPJcBMAZibF_2GFE9NmOJGqiaSMPiNFbJd6QaJjdA6Y,109
269
- ipex_llm-2.2.0b20250224.dist-info/entry_points.txt,sha256=TiUyBB2MRmfF3ko-pyAEzqeBCRnyhu27bNOAsWPp3e8,61
270
- ipex_llm-2.2.0b20250224.dist-info/top_level.txt,sha256=CGCMHM-SyqUabU4h8RqJ2KTYckQUO3LvIWwmUQ6Qbzw,9
271
- ipex_llm-2.2.0b20250224.dist-info/RECORD,,
265
+ ipex_llm-2.2.0b20250225.data/scripts/ipex-llm-init,sha256=fLQsT2dRL6H5bThb4GuIWotAuqoLsIxFwA-0c2qmaO8,6672
266
+ ipex_llm-2.2.0b20250225.data/scripts/llm-chat,sha256=TdUnUmNapzuoe1c8IzrdVOQwWEg8IqsMSBRlOD3daZM,2249
267
+ ipex_llm-2.2.0b20250225.data/scripts/llm-cli,sha256=RXGPlLElHxcKzoUxljEMBIAXbzCDysXL-Nxw-xF-7LU,2457
268
+ ipex_llm-2.2.0b20250225.dist-info/METADATA,sha256=3_EbaWHFTUZ3JXASEqCh8-KfRdJ-s0TRsdOk6L2-Fyo,12369
269
+ ipex_llm-2.2.0b20250225.dist-info/WHEEL,sha256=PPJcBMAZibF_2GFE9NmOJGqiaSMPiNFbJd6QaJjdA6Y,109
270
+ ipex_llm-2.2.0b20250225.dist-info/entry_points.txt,sha256=TiUyBB2MRmfF3ko-pyAEzqeBCRnyhu27bNOAsWPp3e8,61
271
+ ipex_llm-2.2.0b20250225.dist-info/top_level.txt,sha256=CGCMHM-SyqUabU4h8RqJ2KTYckQUO3LvIWwmUQ6Qbzw,9
272
+ ipex_llm-2.2.0b20250225.dist-info/RECORD,,