ipex-llm 2.2.0b20250109__py3-none-manylinux2010_x86_64.whl → 2.2.0b20250111__py3-none-manylinux2010_x86_64.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -1590,6 +1590,9 @@ def _optimize_post(model):
1590
1590
  convert_forward(model,
1591
1591
  module.Qwen2ForCausalLM,
1592
1592
  qwen2_causal_lm_forward)
1593
+ convert_forward(model,
1594
+ module.Qwen2Model,
1595
+ qwen2_model_forward)
1593
1596
  convert_forward(model,
1594
1597
  module.Qwen2RMSNorm,
1595
1598
  rms_norm_forward)
@@ -1602,12 +1605,6 @@ def _optimize_post(model):
1602
1605
  convert_forward(model,
1603
1606
  module.Qwen2SdpaAttention,
1604
1607
  qwen2_attention_forward)
1605
- if version.parse(trans_version) >= version.parse("4.42"):
1606
- from ipex_llm.transformers.models.qwen2 import qwen2_model_forward_4_42
1607
- convert_forward(model, module.Qwen2Model, qwen2_model_forward_4_42)
1608
- else:
1609
- from ipex_llm.transformers.models.qwen2 import qwen2_model_forward
1610
- convert_forward(model, module.Qwen2Model, qwen2_model_forward)
1611
1608
  elif model.config.model_type == "qwen2_moe":
1612
1609
  # for Qwen1.5-MOE-A2.7B
1613
1610
  modeling_module_name = model.__class__.__module__
@@ -1819,9 +1816,7 @@ def _optimize_post(model):
1819
1816
  from ipex_llm.transformers.models.phi3 import attention_forward
1820
1817
  convert_forward(model, module.Phi3Attention, attention_forward)
1821
1818
  convert_forward(model, module.Phi3SdpaAttention, attention_forward)
1822
- from ipex_llm.transformers.models.phi3 import mlp_forward
1823
- convert_forward(model, module.Phi3MLP, mlp_forward)
1824
- from ipex_llm.transformers.models.common import rms_norm_forward
1819
+ convert_forward(model, module.Phi3MLP, mlp_silu_forward)
1825
1820
  convert_forward(model, module.Phi3RMSNorm, rms_norm_forward)
1826
1821
  if model.config.model_type == "phi3":
1827
1822
  from ipex_llm.transformers.models.phi3 import phi3_model_forward_wrapper
@@ -52,7 +52,14 @@ import os
52
52
 
53
53
 
54
54
  def _ipex_optimize_rmsnorm(_model, supported_classes, is_tpp=False, is_woq=False):
55
- from intel_extension_for_pytorch.transformers.models.cpu.fusions.mha_fusion import _IPEXRMSNorm
55
+ try:
56
+ # old version use name `_IPEXRMSNorm`
57
+ from intel_extension_for_pytorch.transformers.models.cpu.fusions.mha_fusion \
58
+ import _IPEXRMSNorm
59
+ except ImportError:
60
+ # new version use name `_IPEXRMSNormCPU`
61
+ from intel_extension_for_pytorch.transformers.models.cpu.fusions.mha_fusion \
62
+ import _IPEXRMSNormCPU as _IPEXRMSNorm
56
63
  for supported_class in supported_classes:
57
64
  lowering_class_cpu(
58
65
  _model,
@@ -47,7 +47,7 @@ import os
47
47
  import torch
48
48
  import torch.distributed
49
49
  import torch.nn.functional as F
50
- from torch import Tensor, device, dtype, nn
50
+ from torch import Tensor, dtype, nn
51
51
  from operator import mul
52
52
  from functools import reduce
53
53
  from ipex_llm.transformers.xpu_customize_fwd import custom_fwd, custom_bwd
@@ -294,10 +294,10 @@ def use_batch_forward(x: torch.Tensor, qtype: int, output_len: int):
294
294
  if hard_condition:
295
295
  return (
296
296
  batch_size > 1
297
- or (device in ["arc"] and qtype in [SYM_INT8, FP4])
298
- or (device in ["arc", "mtl"] and qtype in [FP8E4])
299
- or (device in ["lnl"] and qtype in [SYM_INT4] and x.shape[1] % 512 == 0)
300
- or (device in ["bmg"] and qtype in [SYM_INT4, FP8E5])
297
+ or (device_name in ["arc"] and qtype in [SYM_INT8, FP4])
298
+ or (device_name in ["arc", "mtl"] and qtype in [FP8E4])
299
+ or (device_name in ["lnl"] and qtype in [SYM_INT4] and x.shape[1] % 512 == 0)
300
+ or (device_name in ["bmg"] and qtype in [SYM_INT4, FP8E5])
301
301
  )
302
302
  return False
303
303
 
@@ -30,8 +30,7 @@ from ipex_llm.transformers.models.utils import use_quantize_kv_cache, restore_fp
30
30
  from ipex_llm.transformers.models.utils import update_past_key_value
31
31
  from ipex_llm.transformers.models.utils import should_use_fuse_rope
32
32
  from ipex_llm.transformers.models.utils import use_sdp
33
- from ipex_llm.transformers.models.utils import apply_rotary_pos_emb, SILU
34
- from ipex_llm.transformers.models.utils import mlp_fusion_check
33
+ from ipex_llm.transformers.models.utils import apply_rotary_pos_emb
35
34
  from ipex_llm.transformers.models.utils import is_enough_kv_cache_room_4_36
36
35
  from ipex_llm.transformers.kv import DynamicCompressFp8Cache, DynamicCompressCache
37
36
  import warnings
@@ -113,21 +113,6 @@ def internlm_attention_forward(
113
113
  return attn_output, attn_weights, past_key_value
114
114
 
115
115
 
116
- def repeat_kv(hidden_states: torch.Tensor, n_rep: int) -> torch.Tensor:
117
- """
118
- This is the equivalent of torch.repeat_interleave(x, dim=1, repeats=n_rep).
119
- The hidden states go from (batch,
120
- num_key_value_heads, seqlen, head_dim) to
121
- (batch, num_attention_heads, seqlen, head_dim)
122
- """
123
- batch, num_key_value_heads, slen, head_dim = hidden_states.shape
124
- if n_rep == 1:
125
- return hidden_states
126
- hidden_states = hidden_states[:, :, None, :, :].expand(batch, num_key_value_heads,
127
- n_rep, slen, head_dim)
128
- return hidden_states.reshape(batch, num_key_value_heads * n_rep, slen, head_dim)
129
-
130
-
131
116
  def internlm2_attention_forward(
132
117
  self,
133
118
  hidden_states: torch.Tensor,
@@ -39,7 +39,6 @@ import warnings
39
39
  from ipex_llm.transformers.models.common import attention_softmax
40
40
  from ipex_llm.transformers.models.common import scaled_dot_product_attention
41
41
  from ipex_llm.transformers.models.utils import should_use_fuse_rope, rotate_half
42
- from ipex_llm.transformers.models.utils import mlp_fusion_check, SILU
43
42
  from ipex_llm.transformers.models.utils import use_sdp, use_sdp_causal
44
43
  from ipex_llm.transformers.models.utils import use_quantize_kv_cache, restore_fp8_kv_cache
45
44
  from ipex_llm.transformers.models.utils import should_use_compresskv, is_enough_kv_cache_room_4_36
@@ -213,24 +212,8 @@ def split_mlp(module: torch.nn.Module):
213
212
 
214
213
  del module.gate_up_proj
215
214
 
216
-
217
- def mlp_forward(
218
- self,
219
- hidden_states: torch.FloatTensor
220
- ) -> torch.FloatTensor:
221
- x_2d = hidden_states.view(-1, hidden_states.shape[-1])
222
- qtype = getattr(self.gate_proj, "qtype", None)
223
- if mlp_fusion_check(x_2d, qtype, self.training):
224
- x_2d = x_2d.contiguous()
225
- import xe_linear
226
- return self.down_proj(xe_linear.mlp_forward_xpu(
227
- x_2d, self.gate_proj.weight.data, self.up_proj.weight.data,
228
- x_2d.shape[0], x_2d.shape[1], self.gate_proj.out_features,
229
- SILU, qtype
230
- ))
231
- return self.down_proj(
232
- self.activation_fn(self.gate_proj(hidden_states)) * self.up_proj(hidden_states)
233
- )
215
+ # rename activation function
216
+ module.act_fn = module.activation_fn
234
217
 
235
218
 
236
219
  def phi3_model_forward_wrapper(origin_model_forward):
@@ -51,217 +51,14 @@ from ipex_llm.transformers.models.utils import use_quantize_kv_cache, \
51
51
  should_use_compresskv, is_enough_kv_cache_room_4_36
52
52
  from ipex_llm.transformers.kv import DynamicFp8Cache, DynamicNormalCache, \
53
53
  DynamicCompressCache, DynamicCompressFp8Cache
54
- from ipex_llm.utils.common import invalidInputError
55
54
 
56
- from transformers.models.qwen2.modeling_qwen2 import Qwen2Attention, Qwen2MLP
55
+ from transformers.models.qwen2.modeling_qwen2 import Qwen2Model, Qwen2Attention, Qwen2MLP
57
56
  from transformers.models.qwen2.modeling_qwen2 import apply_rotary_pos_emb
58
57
  from transformers.modeling_outputs import BaseModelOutputWithPast, CausalLMOutputWithPast
59
58
  from transformers.cache_utils import Cache
60
- from transformers import logging
61
-
62
-
63
- logger = logging.get_logger(__name__)
64
59
 
65
60
 
66
61
  def qwen2_model_forward(
67
- self,
68
- input_ids: torch.LongTensor = None,
69
- attention_mask: Optional[torch.Tensor] = None,
70
- position_ids: Optional[torch.LongTensor] = None,
71
- past_key_values: Optional[List[torch.FloatTensor]] = None,
72
- inputs_embeds: Optional[torch.FloatTensor] = None,
73
- use_cache: Optional[bool] = None,
74
- output_attentions: Optional[bool] = None,
75
- output_hidden_states: Optional[bool] = None,
76
- return_dict: Optional[bool] = None,
77
- cache_position: Optional[torch.LongTensor] = None, # for transformers >= 4.42
78
- ) -> Union[Tuple, BaseModelOutputWithPast]:
79
- output_attentions = (
80
- output_attentions if output_attentions is not None
81
- else self.config.output_attentions
82
- )
83
- output_hidden_states = (
84
- output_hidden_states if output_hidden_states is not None
85
- else self.config.output_hidden_states
86
- )
87
- use_cache = use_cache if use_cache is not None else self.config.use_cache
88
-
89
- return_dict = return_dict if return_dict is not None else self.config.use_return_dict
90
-
91
- # retrieve input_ids and inputs_embeds
92
- if input_ids is not None and inputs_embeds is not None:
93
- invalidInputError(False,
94
- "You cannot specify both input_ids and inputs_embeds at the same time")
95
- elif input_ids is not None:
96
- batch_size, seq_length = input_ids.shape
97
- elif inputs_embeds is not None:
98
- batch_size, seq_length, _ = inputs_embeds.shape
99
- else:
100
- invalidInputError(False,
101
- "You have to specify either decoder_input_ids or decoder_inputs_embeds")
102
-
103
- if self.gradient_checkpointing and self.training:
104
- if use_cache:
105
- logger.warning_once(
106
- "`use_cache=True` is incompatible with gradient checkpointing. "
107
- "Setting `use_cache=False`..."
108
- )
109
- use_cache = False
110
-
111
- past_key_values_length = 0
112
-
113
- # ipex-llm changes start
114
- # IPEX-LLM OPT: kv cache and quantize kv cache
115
- inputs = input_ids if input_ids is not None else inputs_embeds
116
- num_heads, num_kv_heads = self.config.num_attention_heads, self.config.num_key_value_heads
117
- use_quantize_kv = (
118
- self.config.hidden_size != 3584 # disable quantize kv in specific model
119
- and use_quantize_kv_cache(self.layers[0].mlp.up_proj, inputs, num_heads, num_kv_heads)
120
- )
121
- use_compress_kv = should_use_compresskv(inputs, inputs.shape[1]) or \
122
- isinstance(past_key_values, DynamicCompressCache)
123
-
124
- if use_cache:
125
- if use_compress_kv and not isinstance(past_key_values, DynamicCompressCache):
126
- if use_quantize_kv:
127
- past_key_values = DynamicCompressFp8Cache.from_legacy_cache(past_key_values)
128
- else:
129
- past_key_values = DynamicCompressCache.from_legacy_cache(past_key_values)
130
- elif use_quantize_kv and not use_compress_kv and not isinstance(past_key_values,
131
- DynamicFp8Cache):
132
- past_key_values = DynamicFp8Cache.from_legacy_cache(past_key_values)
133
- if not use_quantize_kv and not use_compress_kv and not isinstance(past_key_values,
134
- DynamicNormalCache):
135
- past_key_values = DynamicNormalCache.from_legacy_cache(past_key_values)
136
- past_key_values_length = past_key_values.get_usable_length(seq_length)
137
- # ipex-llm changes end
138
-
139
- if position_ids is None:
140
- device = input_ids.device if input_ids is not None else inputs_embeds.device
141
- position_ids = torch.arange(
142
- past_key_values_length, seq_length + past_key_values_length,
143
- dtype=torch.long, device=device
144
- )
145
- position_ids = position_ids.unsqueeze(0).view(-1, seq_length)
146
- else:
147
- position_ids = position_ids.view(-1, seq_length).long()
148
-
149
- if inputs_embeds is None:
150
- inputs_embeds = self.embed_tokens(input_ids)
151
-
152
- flash_attn_2 = self._attn_implementation == "flash_attention_2"
153
- if attention_mask is not None and flash_attn_2 and use_cache:
154
-
155
- is_padding_right = attention_mask[:, -1].sum().item() != batch_size
156
- if is_padding_right:
157
- invalidInputError(
158
- False,
159
- "You are attempting to perform batched generation with padding_side='right'"
160
- " this may lead to unexpected behaviour for Flash Attention version of Qwen2."
161
- " Make sure to call `tokenizer.padding_side = 'left'` before tokenizing "
162
- "the input. "
163
- )
164
-
165
- from transformers.models.qwen2.modeling_qwen2 import _prepare_4d_causal_attention_mask_for_sdpa
166
- from transformers.models.qwen2.modeling_qwen2 import _prepare_4d_causal_attention_mask
167
-
168
- # ipex-llm changes start: don't generate `attention_mask` in decode phase
169
- if seq_length == 1:
170
- attention_mask = None
171
- # ipex-llm changes end
172
- elif self._attn_implementation == "flash_attention_2":
173
- # 2d mask is passed through the layers
174
- attention_mask = attention_mask if (attention_mask is not None and
175
- 0 in attention_mask) else None
176
- elif self._attn_implementation == "sdpa" and not output_attentions:
177
- # output_attentions=True can not be supported when using SDPA, and we fall back on
178
- # the manual implementation that requires a 4D causal mask in all cases.
179
- attention_mask = _prepare_4d_causal_attention_mask_for_sdpa(
180
- attention_mask,
181
- (batch_size, seq_length),
182
- inputs_embeds,
183
- past_key_values_length,
184
- )
185
- else:
186
- # 4d mask is passed through the layers
187
- attention_mask = _prepare_4d_causal_attention_mask(
188
- attention_mask,
189
- (batch_size, seq_length),
190
- inputs_embeds,
191
- past_key_values_length,
192
- sliding_window=self.config.sliding_window,
193
- )
194
-
195
- hidden_states = inputs_embeds
196
-
197
- # decoder layers
198
- all_hidden_states = () if output_hidden_states else None
199
- all_self_attns = () if output_attentions else None
200
- next_decoder_cache = None
201
-
202
- for decoder_layer in self.layers:
203
- if output_hidden_states:
204
- all_hidden_states += (hidden_states,)
205
-
206
- if self.gradient_checkpointing and self.training:
207
- layer_outputs = self._gradient_checkpointing_func(
208
- decoder_layer.__call__,
209
- hidden_states,
210
- attention_mask,
211
- position_ids,
212
- past_key_values,
213
- output_attentions,
214
- use_cache,
215
- )
216
- else:
217
- # ipex-llm changes
218
- curr_device = decoder_layer.input_layernorm.weight.device
219
- if attention_mask is not None:
220
- attention_mask = attention_mask.to(curr_device)
221
- if position_ids is not None:
222
- position_ids = position_ids.to(curr_device)
223
- # ipex-llm changes end
224
- layer_outputs = decoder_layer(
225
- hidden_states,
226
- attention_mask=attention_mask,
227
- position_ids=position_ids,
228
- past_key_value=past_key_values,
229
- output_attentions=output_attentions,
230
- use_cache=use_cache,
231
- )
232
-
233
- hidden_states = layer_outputs[0]
234
-
235
- if use_cache:
236
- next_decoder_cache = layer_outputs[2 if output_attentions else 1]
237
-
238
- if output_attentions:
239
- all_self_attns += (layer_outputs[1],)
240
-
241
- hidden_states = self.norm(hidden_states)
242
-
243
- # add hidden states from the last decoder layer
244
- if output_hidden_states:
245
- all_hidden_states += (hidden_states,)
246
-
247
- # ipex-llm changes start: remove `to_legacy_cache`
248
- next_cache = None
249
- if use_cache:
250
- next_cache = next_decoder_cache
251
- # ipex-llm changes end
252
-
253
- if not return_dict:
254
- return tuple(v for v in [hidden_states, next_cache,
255
- all_hidden_states, all_self_attns] if v is not None)
256
- return BaseModelOutputWithPast(
257
- last_hidden_state=hidden_states,
258
- past_key_values=next_cache,
259
- hidden_states=all_hidden_states,
260
- attentions=all_self_attns,
261
- )
262
-
263
-
264
- def qwen2_model_forward_4_42(
265
62
  self,
266
63
  input_ids: torch.LongTensor = None,
267
64
  attention_mask: Optional[torch.Tensor] = None,
@@ -274,44 +71,17 @@ def qwen2_model_forward_4_42(
274
71
  return_dict: Optional[bool] = None,
275
72
  cache_position: Optional[torch.LongTensor] = None,
276
73
  ) -> Union[Tuple, BaseModelOutputWithPast]:
277
- output_attentions = (
278
- output_attentions if output_attentions is not None
279
- else self.config.output_attentions
280
- )
281
- output_hidden_states = (
282
- output_hidden_states if output_hidden_states is not None
283
- else self.config.output_hidden_states
284
- )
74
+ # IPEX-LLM OPT start: kv cache and quantize kv cache
75
+ inputs = input_ids if input_ids is not None else inputs_embeds
285
76
  use_cache = use_cache if use_cache is not None else self.config.use_cache
77
+ use_cache = True if inputs.device.type == "xpu" else use_cache
286
78
 
287
- return_dict = return_dict if return_dict is not None else self.config.use_return_dict
288
-
289
- invalidInputError(
290
- (input_ids is None) ^ (inputs_embeds is None),
291
- "You cannot specify both input_ids and inputs_embeds at the same time, "
292
- "and must specify either one"
79
+ use_quantize_kv = self.config.hidden_size != 3584 and use_quantize_kv_cache(
80
+ self.layers[0].mlp.down_proj, inputs,
81
+ self.config.num_attention_heads, self.config.num_key_value_heads
293
82
  )
294
83
 
295
- if self.gradient_checkpointing and self.training:
296
- if use_cache:
297
- logger.warning_once(
298
- "`use_cache=True` is incompatible with gradient checkpointing. "
299
- "Setting `use_cache=False`..."
300
- )
301
- use_cache = False
302
-
303
- if inputs_embeds is None:
304
- inputs_embeds = self.embed_tokens(input_ids)
305
-
306
- # ipex-llm changes start
307
- # IPEX-LLM OPT: kv cache and quantize kv cache
308
- num_heads, num_kv_heads = self.config.num_attention_heads, self.config.num_key_value_heads
309
- use_quantize_kv = (
310
- self.config.hidden_size != 3584 # disable quantize kv in specific model
311
- and use_quantize_kv_cache(self.layers[0].mlp.up_proj, inputs_embeds,
312
- num_heads, num_kv_heads)
313
- )
314
- use_compress_kv = should_use_compresskv(inputs_embeds, inputs_embeds.shape[1]) or \
84
+ use_compress_kv = should_use_compresskv(inputs, inputs.shape[1]) or \
315
85
  isinstance(past_key_values, DynamicCompressCache)
316
86
 
317
87
  if use_cache:
@@ -328,79 +98,24 @@ def qwen2_model_forward_4_42(
328
98
  past_key_values = DynamicNormalCache.from_legacy_cache(past_key_values)
329
99
  # ipex-llm changes end
330
100
 
331
- if cache_position is None:
332
- past_seen_tokens = past_key_values.get_seq_length() if past_key_values is not None else 0
333
- cache_position = torch.arange(
334
- past_seen_tokens, past_seen_tokens + inputs_embeds.shape[1], device=inputs_embeds.device
335
- )
336
- if position_ids is None:
337
- position_ids = cache_position.unsqueeze(0)
338
-
339
- causal_mask = self._update_causal_mask(
340
- attention_mask, inputs_embeds, cache_position, past_key_values, output_attentions
341
- )
342
-
343
- hidden_states = inputs_embeds
344
-
345
- # decoder layers
346
- all_hidden_states = () if output_hidden_states else None
347
- all_self_attns = () if output_attentions else None
348
- next_decoder_cache = None
349
-
350
- for decoder_layer in self.layers:
351
- if output_hidden_states:
352
- all_hidden_states += (hidden_states,)
353
-
354
- if self.gradient_checkpointing and self.training:
355
- layer_outputs = self._gradient_checkpointing_func(
356
- decoder_layer.__call__,
357
- hidden_states,
358
- causal_mask,
359
- position_ids,
360
- past_key_values,
361
- output_attentions,
362
- use_cache,
363
- cache_position,
364
- )
365
- else:
366
- layer_outputs = decoder_layer(
367
- hidden_states,
368
- attention_mask=causal_mask,
369
- position_ids=position_ids,
370
- past_key_value=past_key_values,
371
- output_attentions=output_attentions,
372
- use_cache=use_cache,
373
- cache_position=cache_position,
374
- )
375
-
376
- hidden_states = layer_outputs[0]
377
-
378
- if use_cache:
379
- next_decoder_cache = layer_outputs[2 if output_attentions else 1]
380
-
381
- if output_attentions:
382
- all_self_attns += (layer_outputs[1],)
383
-
384
- hidden_states = self.norm(hidden_states)
385
-
386
- # add hidden states from the last decoder layer
387
- if output_hidden_states:
388
- all_hidden_states += (hidden_states,)
389
-
390
- # ipex-llm changes start: remove `to_legacy_cache`
391
- next_cache = None
392
- if use_cache:
393
- next_cache = next_decoder_cache
394
- # ipex-llm changes end
101
+ # `cache_position` is required after transformers 4.42
102
+ if cache_position is not None:
103
+ kwargs = {"cache_position": cache_position}
104
+ else:
105
+ kwargs = {}
395
106
 
396
- if not return_dict:
397
- return tuple(v for v in [hidden_states, next_cache,
398
- all_hidden_states, all_self_attns] if v is not None)
399
- return BaseModelOutputWithPast(
400
- last_hidden_state=hidden_states,
401
- past_key_values=next_cache,
402
- hidden_states=all_hidden_states,
403
- attentions=all_self_attns,
107
+ return Qwen2Model.forward(
108
+ self=self,
109
+ input_ids=input_ids,
110
+ attention_mask=attention_mask,
111
+ position_ids=position_ids,
112
+ past_key_values=past_key_values,
113
+ inputs_embeds=inputs_embeds,
114
+ use_cache=use_cache,
115
+ output_attentions=output_attentions,
116
+ output_hidden_states=output_hidden_states,
117
+ return_dict=return_dict,
118
+ **kwargs
404
119
  )
405
120
 
406
121
 
@@ -272,26 +272,6 @@ def use_xmx(x: torch.Tensor, qtype: int):
272
272
  )
273
273
 
274
274
 
275
- def fp16_fusion_check(proj, x, training):
276
- # only use fp16 fusion on PVC inference
277
- if proj is None:
278
- return False
279
- if not hasattr(proj, "qtype"):
280
- return False
281
- if proj.qtype != ggml_tensor_qtype["fp16"]:
282
- return False
283
- if proj.weight_type != 2:
284
- return False
285
- if training:
286
- return False
287
- if x.requires_grad:
288
- return False
289
- device_type = get_xpu_device_name(x.device)
290
- if device_type != "pvc":
291
- return False
292
- return True
293
-
294
-
295
275
  def repeat_kv(hidden_states: torch.Tensor, n_rep: int) -> torch.Tensor:
296
276
  batch, num_key_value_heads, slen, head_dim = hidden_states.shape
297
277
  if n_rep == 1:
@@ -182,13 +182,17 @@ class _BaseAutoModelClass:
182
182
  if hasattr(model, "config") and model.config.model_type == "glm":
183
183
  # convert to llama structure
184
184
  from .npu_models.glm_edge import convert_config, load_weights, convert_state_dict
185
- import json
186
185
  original_path = model.config._name_or_path
187
186
  del model
188
187
 
189
- with open(os.path.join(original_path, "config.json")) as f:
190
- original_config = json.load(f)
188
+ original_config, _ = PretrainedConfig.get_config_dict(original_path)
191
189
  config = convert_config(original_config)
190
+
191
+ if not os.path.isdir(original_path):
192
+ # all model files are already cached
193
+ from transformers.utils.hub import cached_file
194
+ resolved_file = cached_file(original_path, "config.json")
195
+ original_path = os.path.dirname(resolved_file)
192
196
  original_state_dict = load_weights(original_path)
193
197
  new_dict, _ = convert_state_dict(original_state_dict, config,
194
198
  original_config.get("partial_rotary_factor", 1.0),
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.1
2
2
  Name: ipex-llm
3
- Version: 2.2.0b20250109
3
+ Version: 2.2.0b20250111
4
4
  Summary: Large Language Model Develop Toolkit
5
5
  Home-page: https://github.com/intel-analytics/ipex-llm
6
6
  Author: BigDL Authors
@@ -27,10 +27,10 @@ Requires-Dist: intel-openmp ; (platform_machine == "x86_64" or platform_machine
27
27
  Requires-Dist: torch ==2.1.2+cpu ; (platform_system == "Linux") and extra == 'all'
28
28
  Requires-Dist: torch ==2.1.2 ; (platform_system == "Windows") and extra == 'all'
29
29
  Provides-Extra: cpp
30
- Requires-Dist: bigdl-core-cpp ==2.6.0b20250109 ; extra == 'cpp'
30
+ Requires-Dist: bigdl-core-cpp ==2.6.0b20250111 ; extra == 'cpp'
31
31
  Requires-Dist: setuptools ; extra == 'cpp'
32
32
  Provides-Extra: cpp-arl
33
- Requires-Dist: bigdl-core-cpp ==2.6.0b20250109 ; extra == 'cpp-arl'
33
+ Requires-Dist: bigdl-core-cpp ==2.6.0b20250111 ; extra == 'cpp-arl'
34
34
  Requires-Dist: setuptools ; extra == 'cpp-arl'
35
35
  Requires-Dist: onednn-devel ==2024.1.1 ; (platform_system == "Windows") and extra == 'cpp-arl'
36
36
  Requires-Dist: onednn ==2024.1.1 ; (platform_system == "Windows") and extra == 'cpp-arl'
@@ -67,7 +67,7 @@ Requires-Dist: transformers ==4.40.0 ; extra == 'npu'
67
67
  Requires-Dist: intel-openmp ; (platform_machine == "x86_64" or platform_machine == "AMD64") and extra == 'npu'
68
68
  Requires-Dist: torch ==2.1.2+cpu ; (platform_system == "Linux") and extra == 'npu'
69
69
  Requires-Dist: torch ==2.1.2 ; (platform_system == "Windows") and extra == 'npu'
70
- Requires-Dist: bigdl-core-npu ==2.6.0b20250109 ; (platform_system == "Windows") and extra == 'npu'
70
+ Requires-Dist: bigdl-core-npu ==2.6.0b20250111 ; (platform_system == "Windows") and extra == 'npu'
71
71
  Provides-Extra: serving
72
72
  Requires-Dist: py-cpuinfo ; extra == 'serving'
73
73
  Requires-Dist: fschat[model_worker,webui] ==0.2.36 ; extra == 'serving'
@@ -87,9 +87,9 @@ Requires-Dist: setuptools <70.0.0 ; extra == 'xpu'
87
87
  Requires-Dist: torch ==2.1.0a0 ; extra == 'xpu'
88
88
  Requires-Dist: torchvision ==0.16.0a0 ; extra == 'xpu'
89
89
  Requires-Dist: intel-extension-for-pytorch ==2.1.10+xpu ; extra == 'xpu'
90
- Requires-Dist: bigdl-core-xe-21 ==2.6.0b20250109 ; extra == 'xpu'
91
- Requires-Dist: bigdl-core-xe-batch-21 ==2.6.0b20250109 ; extra == 'xpu'
92
- Requires-Dist: bigdl-core-xe-addons-21 ==2.6.0b20250109 ; extra == 'xpu'
90
+ Requires-Dist: bigdl-core-xe-21 ==2.6.0b20250111 ; extra == 'xpu'
91
+ Requires-Dist: bigdl-core-xe-batch-21 ==2.6.0b20250111 ; extra == 'xpu'
92
+ Requires-Dist: bigdl-core-xe-addons-21 ==2.6.0b20250111 ; extra == 'xpu'
93
93
  Provides-Extra: xpu-2-1
94
94
  Requires-Dist: py-cpuinfo ; extra == 'xpu-2-1'
95
95
  Requires-Dist: protobuf ; extra == 'xpu-2-1'
@@ -104,9 +104,9 @@ Requires-Dist: setuptools <70.0.0 ; extra == 'xpu-2-1'
104
104
  Requires-Dist: torch ==2.1.0a0 ; extra == 'xpu-2-1'
105
105
  Requires-Dist: torchvision ==0.16.0a0 ; extra == 'xpu-2-1'
106
106
  Requires-Dist: intel-extension-for-pytorch ==2.1.10+xpu ; extra == 'xpu-2-1'
107
- Requires-Dist: bigdl-core-xe-21 ==2.6.0b20250109 ; extra == 'xpu-2-1'
108
- Requires-Dist: bigdl-core-xe-batch-21 ==2.6.0b20250109 ; extra == 'xpu-2-1'
109
- Requires-Dist: bigdl-core-xe-addons-21 ==2.6.0b20250109 ; extra == 'xpu-2-1'
107
+ Requires-Dist: bigdl-core-xe-21 ==2.6.0b20250111 ; extra == 'xpu-2-1'
108
+ Requires-Dist: bigdl-core-xe-batch-21 ==2.6.0b20250111 ; extra == 'xpu-2-1'
109
+ Requires-Dist: bigdl-core-xe-addons-21 ==2.6.0b20250111 ; extra == 'xpu-2-1'
110
110
  Requires-Dist: intel-openmp ; (platform_machine == "x86_64" or platform_machine == "AMD64") and extra == 'xpu-2-1'
111
111
  Requires-Dist: dpcpp-cpp-rt ==2024.0.2 ; (platform_system == "Windows") and extra == 'xpu-2-1'
112
112
  Requires-Dist: mkl-dpcpp ==2024.0.0 ; (platform_system == "Windows") and extra == 'xpu-2-1'
@@ -124,7 +124,7 @@ Requires-Dist: setuptools ; extra == 'xpu-2-6'
124
124
  Requires-Dist: torch ==2.6.0+xpu ; extra == 'xpu-2-6'
125
125
  Requires-Dist: torchvision ==0.21.0+xpu ; extra == 'xpu-2-6'
126
126
  Requires-Dist: torchaudio ==2.6.0+xpu ; extra == 'xpu-2-6'
127
- Requires-Dist: bigdl-core-xe-all ==2.6.0b20250109 ; extra == 'xpu-2-6'
127
+ Requires-Dist: bigdl-core-xe-all ==2.6.0b20250111 ; extra == 'xpu-2-6'
128
128
  Requires-Dist: intel-openmp ; (platform_machine == "x86_64" or platform_machine == "AMD64") and extra == 'xpu-2-6'
129
129
  Provides-Extra: xpu-arc
130
130
  Requires-Dist: py-cpuinfo ; extra == 'xpu-arc'
@@ -137,9 +137,9 @@ Requires-Dist: tokenizers ==0.15.2 ; extra == 'xpu-arc'
137
137
  Requires-Dist: accelerate ==0.23.0 ; extra == 'xpu-arc'
138
138
  Requires-Dist: tabulate ; extra == 'xpu-arc'
139
139
  Requires-Dist: setuptools ; extra == 'xpu-arc'
140
- Requires-Dist: bigdl-core-xe-23 ==2.6.0b20250109 ; extra == 'xpu-arc'
141
- Requires-Dist: bigdl-core-xe-batch-23 ==2.6.0b20250109 ; extra == 'xpu-arc'
142
- Requires-Dist: bigdl-core-xe-addons-23 ==2.6.0b20250109 ; extra == 'xpu-arc'
140
+ Requires-Dist: bigdl-core-xe-23 ==2.6.0b20250111 ; extra == 'xpu-arc'
141
+ Requires-Dist: bigdl-core-xe-batch-23 ==2.6.0b20250111 ; extra == 'xpu-arc'
142
+ Requires-Dist: bigdl-core-xe-addons-23 ==2.6.0b20250111 ; extra == 'xpu-arc'
143
143
  Requires-Dist: intel-openmp ; (platform_machine == "x86_64" or platform_machine == "AMD64") and extra == 'xpu-arc'
144
144
  Requires-Dist: torch ==2.3.1+cxx11.abi ; (platform_system == "Linux") and extra == 'xpu-arc'
145
145
  Requires-Dist: torchvision ==0.18.1+cxx11.abi ; (platform_system == "Linux") and extra == 'xpu-arc'
@@ -160,9 +160,9 @@ Requires-Dist: tokenizers ==0.15.2 ; extra == 'xpu-arl'
160
160
  Requires-Dist: accelerate ==0.23.0 ; extra == 'xpu-arl'
161
161
  Requires-Dist: tabulate ; extra == 'xpu-arl'
162
162
  Requires-Dist: setuptools ; extra == 'xpu-arl'
163
- Requires-Dist: bigdl-core-xe-23 ==2.6.0b20250109 ; extra == 'xpu-arl'
164
- Requires-Dist: bigdl-core-xe-batch-23 ==2.6.0b20250109 ; extra == 'xpu-arl'
165
- Requires-Dist: bigdl-core-xe-addons-23 ==2.6.0b20250109 ; extra == 'xpu-arl'
163
+ Requires-Dist: bigdl-core-xe-23 ==2.6.0b20250111 ; extra == 'xpu-arl'
164
+ Requires-Dist: bigdl-core-xe-batch-23 ==2.6.0b20250111 ; extra == 'xpu-arl'
165
+ Requires-Dist: bigdl-core-xe-addons-23 ==2.6.0b20250111 ; extra == 'xpu-arl'
166
166
  Requires-Dist: intel-openmp ; (platform_machine == "x86_64" or platform_machine == "AMD64") and extra == 'xpu-arl'
167
167
  Requires-Dist: torch ==2.3.1+cxx11.abi ; (platform_system == "Linux") and extra == 'xpu-arl'
168
168
  Requires-Dist: torchvision ==0.18.1+cxx11.abi ; (platform_system == "Linux") and extra == 'xpu-arl'
@@ -183,9 +183,9 @@ Requires-Dist: tokenizers ==0.15.2 ; extra == 'xpu-lnl'
183
183
  Requires-Dist: accelerate ==0.23.0 ; extra == 'xpu-lnl'
184
184
  Requires-Dist: tabulate ; extra == 'xpu-lnl'
185
185
  Requires-Dist: setuptools ; extra == 'xpu-lnl'
186
- Requires-Dist: bigdl-core-xe-23 ==2.6.0b20250109 ; extra == 'xpu-lnl'
187
- Requires-Dist: bigdl-core-xe-batch-23 ==2.6.0b20250109 ; extra == 'xpu-lnl'
188
- Requires-Dist: bigdl-core-xe-addons-23 ==2.6.0b20250109 ; extra == 'xpu-lnl'
186
+ Requires-Dist: bigdl-core-xe-23 ==2.6.0b20250111 ; extra == 'xpu-lnl'
187
+ Requires-Dist: bigdl-core-xe-batch-23 ==2.6.0b20250111 ; extra == 'xpu-lnl'
188
+ Requires-Dist: bigdl-core-xe-addons-23 ==2.6.0b20250111 ; extra == 'xpu-lnl'
189
189
  Requires-Dist: intel-openmp ; (platform_machine == "x86_64" or platform_machine == "AMD64") and extra == 'xpu-lnl'
190
190
  Requires-Dist: torch ==2.3.1+cxx11.abi ; (platform_system == "Linux") and extra == 'xpu-lnl'
191
191
  Requires-Dist: torchvision ==0.18.1+cxx11.abi ; (platform_system == "Linux") and extra == 'xpu-lnl'
@@ -94,17 +94,17 @@ ipex_llm/serving/fastchat/tgi_api_protocol.py,sha256=brT3k3-V0NJrU4fRqUwWjC0O3iO
94
94
  ipex_llm/serving/fastchat/tgi_api_server.py,sha256=agNTAEiZPSuj3dEdIdYKwkoY0cXOUDX06DiM9VP2knQ,24418
95
95
  ipex_llm/serving/fastchat/vllm_worker.py,sha256=ZLz2Q9GxJO6r_LOiP6epgCRjBGk-K4EB1SNEWSJp5DA,11091
96
96
  ipex_llm/transformers/__init__.py,sha256=l4KkMkLe-pRC7b_kj6LCfeifgE-Uo33_Av_FwN9HnFA,1074
97
- ipex_llm/transformers/convert.py,sha256=umI137wqV2d4itS0AJQoZcygeWBATpSJSDJ805cZ-SY,98499
98
- ipex_llm/transformers/convert_ipex.py,sha256=iKXo0n8fVFTOA2fNYYrByMFK0dovL-kLd2sVDk88AlQ,14334
97
+ ipex_llm/transformers/convert.py,sha256=i2IOmDnQBKNtcfUL95l8w4rNBMiU4SqX_9uz_LtPHMI,98086
98
+ ipex_llm/transformers/convert_ipex.py,sha256=_nSnUTQy-yfkKaqGdqnBdWztZf3NGmnbZ0TKaDrF4X4,14617
99
99
  ipex_llm/transformers/embedding.py,sha256=bdgk59DvD4ZZyxRzewXOR7g56nThgO6uhIwk8QL7f-s,9299
100
100
  ipex_llm/transformers/kv.py,sha256=k4TU18LlA-Sbq9WNNQnfuzu3RSFBwFhmaV3BcGN5bAo,19191
101
101
  ipex_llm/transformers/lisa.py,sha256=F5WxbtXQ7RdKulj83h_2DnEIgKiKGZf7zvOmg6QBl2s,3289
102
102
  ipex_llm/transformers/loader.py,sha256=AwjV5RpI2t2bedlv7ZhLm8cfd-QJZm5hny-XyjIvdnk,6876
103
103
  ipex_llm/transformers/lookup.py,sha256=b6OlZ9OV10R9qeWw8mVryVpDxszkjwLkldvi7GPMJY8,19614
104
- ipex_llm/transformers/low_bit_linear.py,sha256=Obdd08D9dvuroS_6XWo4DXO_DrNRsbAqjz-mQAHmfxY,40845
104
+ ipex_llm/transformers/low_bit_linear.py,sha256=QBHrAG7lgOgVO1LHPNlimn8Icm44kEpnWOLtVuHoHDA,40857
105
105
  ipex_llm/transformers/model.py,sha256=fj7LBjrWtWwDJJYXnWiXsLGS4ayqqHfnh0p51dSDssE,40908
106
106
  ipex_llm/transformers/modelling_bigdl.py,sha256=7JpNVMuyq_OmtNUaMFMXdxPWZp2q0QHC02QeA-VTPOw,6709
107
- ipex_llm/transformers/npu_model.py,sha256=YW02GeVz-9ZGqxAeSz0AOvciS-17bo9eK5ZOBrICwSQ,39508
107
+ ipex_llm/transformers/npu_model.py,sha256=X8ZtvZJpzz64XrSPhUYXXZmdJcbZ9X6G3Vlzw-zgN1Q,39749
108
108
  ipex_llm/transformers/patches.py,sha256=halPWm__ORh2fRFSIFPiCNg3LQBfrRkTPtmtRpBJCZQ,1286
109
109
  ipex_llm/transformers/pipeline_parallel.py,sha256=uNZpOXljNmdoEYnP8U-VFiN4dRZb2piQbIf2bG9LQnE,49051
110
110
  ipex_llm/transformers/qlora.py,sha256=jtPGsvWFjbTUGzDBCdfftnCis_0nJQNRpACSwXUbbGU,14943
@@ -144,7 +144,7 @@ ipex_llm/transformers/gguf/models/model_implement/yuan2/configuration_yuan.py,sh
144
144
  ipex_llm/transformers/gguf/models/model_implement/yuan2/yuan_hf_model.py,sha256=_AOGMV65XHxgTxIib7lgs49InopcecTzRwgtYR8NTUg,51084
145
145
  ipex_llm/transformers/models/__init__.py,sha256=tp2DcVkKg1-QvdYk7DY7rZvQWCDQ4ZjU8NAQ7Fclrpg,584
146
146
  ipex_llm/transformers/models/aquila.py,sha256=VZb5Drpo_fTxwcExZ397LygnsNPX2sVbie9_JeFudZI,5252
147
- ipex_llm/transformers/models/baichuan.py,sha256=cAQLmVG-3R8CSTGTcDy2JOOzVe-Ej8AXjIEIjvZBGlo,18376
147
+ ipex_llm/transformers/models/baichuan.py,sha256=8b43mBRZJEf_xLNoodhA4r9x1anqwC3Wt8awWel-aUo,18306
148
148
  ipex_llm/transformers/models/bert.py,sha256=0Mm9jkvkzBxtc_z_GE1TcZoPz-HOg2Z2973ZEWgSwJk,5601
149
149
  ipex_llm/transformers/models/bloom.py,sha256=PxfzyYT-nFn3K5rZhTQjmcEjUUzAhUFzxIN4kzRlCuc,8103
150
150
  ipex_llm/transformers/models/chatglm.py,sha256=UHai1t2AUtGmF765_eHF8LUMVQzp_oCBx8TJB21WrHk,12597
@@ -159,7 +159,7 @@ ipex_llm/transformers/models/glm.py,sha256=lmeEWd_W2O638VzVW4Gm6cJre5XZcg_QBmPs8
159
159
  ipex_llm/transformers/models/gpt2.py,sha256=YSaNgK1uLCFDuIFqnKO0Mi-AsOZsYav-7pNf_NpKGdM,3445
160
160
  ipex_llm/transformers/models/gptbigcode.py,sha256=cP1_qGWoa43R2WacAMblShjku4QupcCZiLaPPAoOUs4,9101
161
161
  ipex_llm/transformers/models/gptneox.py,sha256=loRh1x_5S6BCeOr_s5xr-N_1SQHL3Y5IiUBAEyoMUqQ,6172
162
- ipex_llm/transformers/models/internlm.py,sha256=OifyiobRligleyZLpLBSe44A6Sq0uMG-8-NOcRCcT4Q,18080
162
+ ipex_llm/transformers/models/internlm.py,sha256=JZFrI2HXsIAfM-6pA2RO0wcXopOliC1FggLMzNzaDZ4,17404
163
163
  ipex_llm/transformers/models/internvl.py,sha256=Vx0vENIEQLX2M6P398mw5TOhpks0U8xf8rtRQvy94go,8154
164
164
  ipex_llm/transformers/models/llama.py,sha256=NzpyQve_RC9ez1W-jWPLGZ80k_S1I5Rx5saAzCsDIoI,8558
165
165
  ipex_llm/transformers/models/minicpm.py,sha256=eaPNVNrep0_xGoELhZd886ff0ceoKqB6cusdAhd52eE,10145
@@ -169,10 +169,10 @@ ipex_llm/transformers/models/mistral.py,sha256=uVhkdXaq15v1P3QY0emVsA7SxUbAWChHE
169
169
  ipex_llm/transformers/models/mllama.py,sha256=ZyRq9DTKsvk1AlRbr-z6ngjS3Sr_7YuGZ6-Yr1MBBAM,10937
170
170
  ipex_llm/transformers/models/mpt.py,sha256=z02NwHogJZVh-Mk4sYoIzR90SFIKhoNN_-ifsD907TQ,9540
171
171
  ipex_llm/transformers/models/phi.py,sha256=E6qz4EEuHIVGvaPo-wtLC5lz3iyMqTbAE_cRlcjQRKI,6670
172
- ipex_llm/transformers/models/phi3.py,sha256=Fo6PlZ24Gdm7eeeZOTMm1Bfh3U6P4rvq7-_2FHvp0vE,15503
172
+ ipex_llm/transformers/models/phi3.py,sha256=AaWB7TPQdrDYgpcVHglG0Q0480bxNOw1mFePddlBEFk,14849
173
173
  ipex_llm/transformers/models/phixtral.py,sha256=MDTMghcu7qAmZmRcUGqXXDXhSU3y_N59HRIXmlcjp5g,4890
174
174
  ipex_llm/transformers/models/qwen.py,sha256=A3WiVCzA7NLkcjp4zhFkZvKZzZWZlg0WFuVV_556TAI,19543
175
- ipex_llm/transformers/models/qwen2.py,sha256=JLaY9ZT7A22oO0G8K-nvjvKQDaIrKA5o-jEHvk_y3eI,25604
175
+ ipex_llm/transformers/models/qwen2.py,sha256=zK-FpUaxEhjD4gZa1ZvArodAilz29T_cpeAqfCGosc0,14317
176
176
  ipex_llm/transformers/models/qwen2_moe.py,sha256=a0gYo-ngf8SxaEnBdZUJDnPS6Mkn_poDd8xqhx50icI,19516
177
177
  ipex_llm/transformers/models/qwen2_vl.py,sha256=NrhxlaPj7W-HUBmKc3CSTwZy1lkoZ9qDaxM4GvE0kHs,13583
178
178
  ipex_llm/transformers/models/qwen_vl.py,sha256=j7Nzzz2Qvynu9yrCXmoEfERjw43hXof5TbXIs7Ms-oY,17105
@@ -181,7 +181,7 @@ ipex_llm/transformers/models/rwkv5.py,sha256=OkRNj1pCAZg1z2Fw-I0DEnxLEdZyPeRSQ6m
181
181
  ipex_llm/transformers/models/sd.py,sha256=VvHV5u-0k2MgHu3NL9113hPj7DgfxqctuKzEEeNfRDU,5981
182
182
  ipex_llm/transformers/models/stablelm.py,sha256=fj-XtOnR6kggnFUQTMPCOOzolkPztN06WAv8QW-XRnI,7054
183
183
  ipex_llm/transformers/models/starcoder2.py,sha256=ONKvD7JCkRM0DI-R56x28QFBJ7CjD5hOZBQ_3WfOcNk,6626
184
- ipex_llm/transformers/models/utils.py,sha256=ihbWS5kQK2KHDVPkMhgjik3nM8B2fWf-E-z4BWNUstk,15568
184
+ ipex_llm/transformers/models/utils.py,sha256=WYBc26vSiy_CzV07z-eT5ts90Kko2yUmS3DDZtfGcRk,15065
185
185
  ipex_llm/transformers/models/yuan.py,sha256=JYAn_ZaSGK0NBJLEIxCACfAq084a66GFJkdd5NbpmMA,7732
186
186
  ipex_llm/transformers/npu_models/__init__.py,sha256=ulEUGLjaP48LCrVeury3UxLjXxKzRi0UpSG4bYu-7f8,585
187
187
  ipex_llm/transformers/npu_models/baichuan.py,sha256=fJtd7fBrttySghRUgfZTAdxLjsSNC-XL08HISsXigLE,4685
@@ -250,11 +250,11 @@ ipex_llm/vllm/xpu/engine/__init__.py,sha256=pY_CpyuZd72fr6s32ejeKHKFW0K4vUU2rzZj
250
250
  ipex_llm/vllm/xpu/engine/engine.py,sha256=k4-D27WS_Gk3mA--w3HWAjPjb4Aiu043MVPi0ZoAUBc,5984
251
251
  ipex_llm/vllm/xpu/entrypoints/openai/api_server.py,sha256=GshTZFB8e4PWvqckfbmTOU6b0oLkNn7A-vzLuG9--j8,21544
252
252
  ipex_llm/vllm/xpu/entrypoints/openai/cli_args.py,sha256=2rENA2ucynMaIjiZBEh2ez1o5vR32GaP514t39CD7KM,8676
253
- ipex_llm-2.2.0b20250109.data/scripts/ipex-llm-init,sha256=fLQsT2dRL6H5bThb4GuIWotAuqoLsIxFwA-0c2qmaO8,6672
254
- ipex_llm-2.2.0b20250109.data/scripts/llm-chat,sha256=TdUnUmNapzuoe1c8IzrdVOQwWEg8IqsMSBRlOD3daZM,2249
255
- ipex_llm-2.2.0b20250109.data/scripts/llm-cli,sha256=RXGPlLElHxcKzoUxljEMBIAXbzCDysXL-Nxw-xF-7LU,2457
256
- ipex_llm-2.2.0b20250109.dist-info/METADATA,sha256=gPslIWSw_X5E5ULhQa8rOHeRo_UeBDXCAyPjBSPB-nU,12705
257
- ipex_llm-2.2.0b20250109.dist-info/WHEEL,sha256=PPJcBMAZibF_2GFE9NmOJGqiaSMPiNFbJd6QaJjdA6Y,109
258
- ipex_llm-2.2.0b20250109.dist-info/entry_points.txt,sha256=TiUyBB2MRmfF3ko-pyAEzqeBCRnyhu27bNOAsWPp3e8,61
259
- ipex_llm-2.2.0b20250109.dist-info/top_level.txt,sha256=CGCMHM-SyqUabU4h8RqJ2KTYckQUO3LvIWwmUQ6Qbzw,9
260
- ipex_llm-2.2.0b20250109.dist-info/RECORD,,
253
+ ipex_llm-2.2.0b20250111.data/scripts/ipex-llm-init,sha256=fLQsT2dRL6H5bThb4GuIWotAuqoLsIxFwA-0c2qmaO8,6672
254
+ ipex_llm-2.2.0b20250111.data/scripts/llm-chat,sha256=TdUnUmNapzuoe1c8IzrdVOQwWEg8IqsMSBRlOD3daZM,2249
255
+ ipex_llm-2.2.0b20250111.data/scripts/llm-cli,sha256=RXGPlLElHxcKzoUxljEMBIAXbzCDysXL-Nxw-xF-7LU,2457
256
+ ipex_llm-2.2.0b20250111.dist-info/METADATA,sha256=8HtPWBsOYbGoboTzVcl5ygPez_bfjvEzqxWanbYhx_o,12705
257
+ ipex_llm-2.2.0b20250111.dist-info/WHEEL,sha256=PPJcBMAZibF_2GFE9NmOJGqiaSMPiNFbJd6QaJjdA6Y,109
258
+ ipex_llm-2.2.0b20250111.dist-info/entry_points.txt,sha256=TiUyBB2MRmfF3ko-pyAEzqeBCRnyhu27bNOAsWPp3e8,61
259
+ ipex_llm-2.2.0b20250111.dist-info/top_level.txt,sha256=CGCMHM-SyqUabU4h8RqJ2KTYckQUO3LvIWwmUQ6Qbzw,9
260
+ ipex_llm-2.2.0b20250111.dist-info/RECORD,,