ipex-llm 2.2.0b20250108__py3-none-win_amd64.whl → 2.2.0b20250109__py3-none-win_amd64.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (60) hide show
  1. ipex_llm/libs/bloom-api.dll +0 -0
  2. ipex_llm/libs/bloom.dll +0 -0
  3. ipex_llm/libs/gptneox-api.dll +0 -0
  4. ipex_llm/libs/gptneox.dll +0 -0
  5. ipex_llm/libs/libbloom_avx.dll +0 -0
  6. ipex_llm/libs/libbloom_vnni.dll +0 -0
  7. ipex_llm/libs/libgptneox_avx.dll +0 -0
  8. ipex_llm/libs/libgptneox_vnni.dll +0 -0
  9. ipex_llm/libs/libllama_avx.dll +0 -0
  10. ipex_llm/libs/libllama_vnni.dll +0 -0
  11. ipex_llm/libs/libstarcoder_avx.dll +0 -0
  12. ipex_llm/libs/libstarcoder_vnni.dll +0 -0
  13. ipex_llm/libs/llama-api.dll +0 -0
  14. ipex_llm/libs/llama.dll +0 -0
  15. ipex_llm/libs/main-bloom.exe +0 -0
  16. ipex_llm/libs/main-gptneox.exe +0 -0
  17. ipex_llm/libs/main-llama.exe +0 -0
  18. ipex_llm/libs/main-starcoder.exe +0 -0
  19. ipex_llm/libs/pipeline.dll +0 -0
  20. ipex_llm/libs/quantize-bloom.exe +0 -0
  21. ipex_llm/libs/quantize-bloom_vnni.exe +0 -0
  22. ipex_llm/libs/quantize-gptneox.exe +0 -0
  23. ipex_llm/libs/quantize-gptneox_vnni.exe +0 -0
  24. ipex_llm/libs/quantize-llama.exe +0 -0
  25. ipex_llm/libs/quantize-llama_vnni.exe +0 -0
  26. ipex_llm/libs/quantize-starcoder.exe +0 -0
  27. ipex_llm/libs/quantize-starcoder_vnni.exe +0 -0
  28. ipex_llm/libs/starcoder-api.dll +0 -0
  29. ipex_llm/libs/starcoder.dll +0 -0
  30. ipex_llm/transformers/convert.py +5 -13
  31. ipex_llm/transformers/models/baichuan.py +7 -36
  32. ipex_llm/transformers/models/bert.py +2 -13
  33. ipex_llm/transformers/models/chatglm2.py +8 -31
  34. ipex_llm/transformers/models/chatglm4.py +9 -4
  35. ipex_llm/transformers/models/chatglm4v.py +1 -1
  36. ipex_llm/transformers/models/common.py +3 -1
  37. ipex_llm/transformers/models/glm.py +1 -1
  38. ipex_llm/transformers/models/internlm.py +6 -3
  39. ipex_llm/transformers/models/llama.py +1 -1
  40. ipex_llm/transformers/models/minicpm.py +1 -1
  41. ipex_llm/transformers/models/minicpm3.py +3 -1
  42. ipex_llm/transformers/models/mistral.py +1 -1
  43. ipex_llm/transformers/models/mllama.py +1 -1
  44. ipex_llm/transformers/models/phi3.py +6 -2
  45. ipex_llm/transformers/models/qwen.py +4 -2
  46. ipex_llm/transformers/models/qwen2.py +4 -3
  47. ipex_llm/transformers/models/qwen2_moe.py +4 -2
  48. ipex_llm/transformers/models/qwen2_vl.py +3 -1
  49. ipex_llm/transformers/models/stablelm.py +3 -1
  50. ipex_llm/transformers/models/starcoder2.py +3 -1
  51. ipex_llm/transformers/models/utils.py +7 -3
  52. ipex_llm/transformers/models/yuan.py +2 -1
  53. {ipex_llm-2.2.0b20250108.dist-info → ipex_llm-2.2.0b20250109.dist-info}/METADATA +20 -20
  54. {ipex_llm-2.2.0b20250108.dist-info → ipex_llm-2.2.0b20250109.dist-info}/RECORD +60 -60
  55. {ipex_llm-2.2.0b20250108.data → ipex_llm-2.2.0b20250109.data}/scripts/ipex-llm-init.bat +0 -0
  56. {ipex_llm-2.2.0b20250108.data → ipex_llm-2.2.0b20250109.data}/scripts/llm-chat.ps1 +0 -0
  57. {ipex_llm-2.2.0b20250108.data → ipex_llm-2.2.0b20250109.data}/scripts/llm-cli.ps1 +0 -0
  58. {ipex_llm-2.2.0b20250108.dist-info → ipex_llm-2.2.0b20250109.dist-info}/WHEEL +0 -0
  59. {ipex_llm-2.2.0b20250108.dist-info → ipex_llm-2.2.0b20250109.dist-info}/entry_points.txt +0 -0
  60. {ipex_llm-2.2.0b20250108.dist-info → ipex_llm-2.2.0b20250109.dist-info}/top_level.txt +0 -0
Binary file
ipex_llm/libs/bloom.dll CHANGED
Binary file
Binary file
ipex_llm/libs/gptneox.dll CHANGED
Binary file
Binary file
Binary file
Binary file
Binary file
Binary file
Binary file
Binary file
Binary file
Binary file
ipex_llm/libs/llama.dll CHANGED
Binary file
Binary file
Binary file
Binary file
Binary file
Binary file
Binary file
Binary file
Binary file
Binary file
Binary file
Binary file
Binary file
Binary file
Binary file
Binary file
@@ -1325,7 +1325,6 @@ def _optimize_post(model):
1325
1325
  modeling_module_name = model.__class__.__module__
1326
1326
  module = importlib.import_module(modeling_module_name)
1327
1327
  from ipex_llm.transformers.models.chatglm2 import chatglm2_attention_forward
1328
- from ipex_llm.transformers.models.chatglm2 import chatglm_rms_norm_forward
1329
1328
  from ipex_llm.transformers.models.chatglm2 import chatglm2_encoder_forward
1330
1329
  from ipex_llm.transformers.models.chatglm2 import chatglm2_model_forward
1331
1330
  from ipex_llm.transformers.models.chatglm2 import mlp_forward
@@ -1338,9 +1337,7 @@ def _optimize_post(model):
1338
1337
  convert_forward(model,
1339
1338
  module.ChatGLMModel,
1340
1339
  chatglm2_model_forward)
1341
- convert_forward(model,
1342
- module.RMSNorm,
1343
- chatglm_rms_norm_forward)
1340
+ convert_forward(model, module.RMSNorm, rms_norm_forward)
1344
1341
  convert_forward(model, module.MLP, mlp_forward)
1345
1342
  # for codegeex-nano
1346
1343
  if hasattr(model.config, "rope_ratio"):
@@ -1358,8 +1355,7 @@ def _optimize_post(model):
1358
1355
  # glm4 family
1359
1356
  modeling_module_name = model.__class__.__module__
1360
1357
  module = importlib.import_module(modeling_module_name)
1361
- from ipex_llm.transformers.models.chatglm2 import chatglm_rms_norm_forward
1362
- convert_forward(model, module.RMSNorm, chatglm_rms_norm_forward)
1358
+ convert_forward(model, module.RMSNorm, rms_norm_forward)
1363
1359
 
1364
1360
  if hasattr(model.transformer, "vision"):
1365
1361
  # glm4 vision family
@@ -1448,8 +1444,8 @@ def _optimize_post(model):
1448
1444
  elif model.config.model_type == "baichuan":
1449
1445
  modeling_module_name = model.__class__.__module__
1450
1446
  module = importlib.import_module(modeling_module_name)
1451
- from ipex_llm.transformers.models.baichuan import baichuan_mlp_forward
1452
- convert_forward(model, module.MLP, baichuan_mlp_forward)
1447
+ convert_forward(model, module.RMSNorm, rms_norm_forward)
1448
+ convert_forward(model, module.MLP, mlp_silu_forward)
1453
1449
 
1454
1450
  if model.config.hidden_size in [4096, 2048]:
1455
1451
  # baichuan-7B and baichuan2-7B
@@ -1458,7 +1454,6 @@ def _optimize_post(model):
1458
1454
  for i in range(len(model.model.layers)):
1459
1455
  setattr(model.model.layers[i].self_attn, "layer_idx", i)
1460
1456
  convert_forward(model, module.Attention, baichuan_attention_forward_7b)
1461
- convert_forward(model, module.RMSNorm, rms_norm_forward)
1462
1457
  if model.config.vocab_size == 125696:
1463
1458
  # baichuan2-7B
1464
1459
  convert_forward(model, module.BaichuanModel, baichuan_model_7b_forward)
@@ -1468,9 +1463,7 @@ def _optimize_post(model):
1468
1463
  elif model.config.hidden_size == 5120:
1469
1464
  # baichuan-13B and baichuan2-13B
1470
1465
  from ipex_llm.transformers.models.baichuan import baichuan_attention_forward_13b
1471
- from ipex_llm.transformers.models.baichuan import baichuan_13b_rms_norm_forward
1472
1466
  convert_forward(model, module.BaichuanAttention, baichuan_attention_forward_13b)
1473
- convert_forward(model, module.RMSNorm, baichuan_13b_rms_norm_forward)
1474
1467
 
1475
1468
  if model.config.vocab_size == 125696:
1476
1469
  # baichaun2-13B
@@ -1565,7 +1558,6 @@ def _optimize_post(model):
1565
1558
  from ipex_llm.transformers.models.qwen import qwen_attention_forward
1566
1559
  from ipex_llm.transformers.models.qwen import qwen_attention_forward_registered
1567
1560
  from ipex_llm.transformers.models.qwen import qwen_mlp_forward
1568
- from ipex_llm.transformers.models.chatglm2 import chatglm_rms_norm_forward
1569
1561
  from ipex_llm.transformers.models.qwen import qwen_model_forward
1570
1562
  if model.config.max_position_embeddings == 8192 \
1571
1563
  and model.config.hidden_size == 4096:
@@ -1580,7 +1572,7 @@ def _optimize_post(model):
1580
1572
  )
1581
1573
  convert_forward(model,
1582
1574
  module.RMSNorm,
1583
- chatglm_rms_norm_forward)
1575
+ rms_norm_forward)
1584
1576
  convert_forward(model,
1585
1577
  module.QWenMLP,
1586
1578
  qwen_mlp_forward)
@@ -47,38 +47,6 @@ def pre_compute_inv_freq(module: torch.nn.Module):
47
47
  module.register_buffer("inv_freq", inv_freq, persistent=False)
48
48
 
49
49
 
50
- def baichuan_13b_rms_norm_forward(self, hidden_states):
51
- if hidden_states.device.type == "xpu" and not (self.training or hidden_states.requires_grad):
52
- import xe_addons
53
- x_2d = hidden_states.reshape(-1, hidden_states.size(-1)).contiguous()
54
- output = xe_addons.rms_norm(self.weight, x_2d, self.epsilon)
55
- return output.reshape(hidden_states.shape)
56
-
57
- input_dtype = hidden_states.dtype
58
- hidden_states = hidden_states.to(torch.float32)
59
- variance = hidden_states.pow(2).mean(-1, keepdim=True)
60
- hidden_states = hidden_states * torch.rsqrt(variance + self.epsilon)
61
- return self.weight * hidden_states.to(input_dtype)
62
-
63
-
64
- def baichuan_mlp_forward(
65
- self,
66
- x: torch.Tensor,
67
- ) -> torch.Tensor:
68
- x_2d = x.view(-1, x.shape[-1])
69
- qtype = getattr(self.gate_proj, "qtype", None)
70
- if mlp_fusion_check(x_2d, qtype, self.training):
71
- import xe_linear
72
- if not x_2d.is_contiguous():
73
- x_2d = x_2d.contiguous()
74
- return self.down_proj(xe_linear.mlp_forward_xpu(
75
- x_2d, self.gate_proj.weight.data, self.up_proj.weight.data,
76
- x_2d.shape[0], x_2d.shape[1], self.gate_proj.out_len,
77
- SILU, qtype
78
- ))
79
- return self.down_proj(self.act_fn(self.gate_proj(x)) * self.up_proj(x))
80
-
81
-
82
50
  def baichuan_model_7b_forward(
83
51
  self,
84
52
  input_ids: torch.LongTensor = None,
@@ -105,7 +73,9 @@ def baichuan_model_7b_forward(
105
73
  if use_cache:
106
74
  inputs = input_ids if input_ids is not None else inputs_embeds
107
75
  use_compress_kv = should_use_compresskv(inputs, inputs.shape[1])
108
- use_quantize_kv = use_quantize_kv_cache(self.layers[0].mlp.up_proj, inputs)
76
+ use_quantize_kv = use_quantize_kv_cache(self.layers[0].mlp.up_proj, inputs,
77
+ self.config.num_attention_heads,
78
+ self.config.num_attention_heads)
109
79
  if use_compress_kv and not isinstance(past_key_values,
110
80
  DynamicCompressCache):
111
81
  if use_quantize_kv:
@@ -278,8 +248,6 @@ def baichuan_attention_forward_7b(
278
248
  key_states = key_states.to(hidden_states.dtype)
279
249
 
280
250
  # IPEX-LLM OPT: kv cache and quantize kv
281
- use_quantize_kv = use_quantize_kv_cache(self.W_pack, hidden_states)
282
-
283
251
  # [CompressKV]
284
252
  if use_compresskv:
285
253
  enough_kv_room = is_enough_kv_cache_room_4_36(past_key_value,
@@ -290,6 +258,8 @@ def baichuan_attention_forward_7b(
290
258
  query_states, attention_mask, 1,
291
259
  self.config, enough_kv_room, KV_CACHE_ALLOC_BLOCK_LENGTH)
292
260
  else:
261
+ use_quantize_kv = use_quantize_kv_cache(self.W_pack, hidden_states,
262
+ self.num_heads, self.num_heads)
293
263
  key_states, value_states = update_past_key_value(
294
264
  past_key_value, key_states, value_states,
295
265
  kv_seq_len, use_quantize_kv, device
@@ -340,7 +310,8 @@ def baichuan_attention_forward_13b(
340
310
  kv_seq_len += past_key_value[0].shape[2]
341
311
 
342
312
  # IPEX-LLM OPT: kv cache and quantize kv
343
- use_quantize_kv = use_quantize_kv_cache(self.W_pack, hidden_states)
313
+ use_quantize_kv = use_quantize_kv_cache(self.W_pack, hidden_states,
314
+ self.num_heads, self.num_heads)
344
315
  key_states, value_states = update_past_key_value(
345
316
  past_key_value, key_states, value_states,
346
317
  kv_seq_len, use_quantize_kv, device
@@ -36,24 +36,13 @@ import math
36
36
  import torch
37
37
  from typing import Optional, Tuple
38
38
  from transformers.models.bert.modeling_bert import BertSelfAttention, BertEncoder
39
+ from ipex_llm.transformers.models.common import merge_linear
39
40
  from ipex_llm.utils.common import invalidInputError
40
41
 
41
42
 
42
43
  def merge_qkv(module: torch.nn.Module):
43
44
  if isinstance(module, BertSelfAttention):
44
- q_w = module.query.weight.data
45
- k_w = module.key.weight.data
46
- v_w = module.value.weight.data
47
- q_b = module.query.bias.data
48
- k_b = module.key.bias.data
49
- v_b = module.value.bias.data
50
- new_w = torch.cat([q_w, k_w, v_w], dim=0)
51
- new_b = torch.cat([q_b, k_b, v_b], dim=-1)
52
- qkv = torch.nn.Linear(0, 0, bias=True)
53
- qkv.weight = torch.nn.Parameter(new_w, requires_grad=False)
54
- qkv.bias = torch.nn.Parameter(new_b, requires_grad=False)
55
- qkv.in_features = module.query.in_features
56
- qkv.out_features = module.query.out_features * 3
45
+ qkv = merge_linear([module.query, module.key, module.value])
57
46
  module.qkv = qkv
58
47
  del module.query
59
48
  del module.key
@@ -33,34 +33,6 @@ from ipex_llm.transformers.kv import DynamicCompressCache, DynamicCompressFp8Cac
33
33
  KV_CACHE_ALLOC_BLOCK_LENGTH = int(os.environ.get("KV_CACHE_ALLOC_BLOCK_LENGTH", 256))
34
34
 
35
35
 
36
- def repeat_kv(hidden_states: torch.Tensor, n_rep: int) -> torch.Tensor:
37
- """
38
- This is the equivalent of torch.repeat_interleave(x, dim=1, repeats=n_rep). The hidden states
39
- go from (batch, num_key_value_heads, seqlen, head_dim) to
40
- (batch, num_attention_heads, seqlen, head_dim)
41
- """
42
- batch, num_key_value_heads, slen, head_dim = hidden_states.shape
43
- if n_rep == 1:
44
- return hidden_states
45
- hidden_states = hidden_states[:, :, None, :, :].expand(batch, num_key_value_heads,
46
- n_rep, slen, head_dim)
47
- return hidden_states.reshape(batch, num_key_value_heads * n_rep, slen, head_dim)
48
-
49
-
50
- def chatglm_rms_norm_forward(self, hidden_states):
51
- if hidden_states.device.type == "xpu" and not (self.training and hidden_states.requires_grad):
52
- import xe_addons
53
- x_2d = hidden_states.reshape(-1, hidden_states.size(-1)).contiguous()
54
- output = xe_addons.rms_norm(self.weight, x_2d, self.eps)
55
- return output.reshape(hidden_states.shape)
56
-
57
- input_dtype = hidden_states.dtype
58
- hidden_states = hidden_states.to(torch.float32)
59
- variance = hidden_states.pow(2).mean(-1, keepdim=True)
60
- hidden_states = hidden_states * torch.rsqrt(variance + self.eps)
61
- return self.weight * hidden_states.to(input_dtype)
62
-
63
-
64
36
  def chatglm2_model_forward(
65
37
  self,
66
38
  input_ids,
@@ -91,8 +63,13 @@ def chatglm2_model_forward(
91
63
 
92
64
  if use_cache:
93
65
  use_compress_kv = should_use_compresskv(input_ids, input_ids.shape[1])
66
+ n_heads = self.config.num_attention_heads
67
+ if self.config.multi_query_attention:
68
+ n_kv_heads = self.config.multi_query_group_num
69
+ else:
70
+ n_kv_heads = n_heads
94
71
  use_quantize_kv = use_quantize_kv_cache(self.encoder.layers[0].mlp.gate_proj,
95
- input_ids)
72
+ input_ids, n_heads, n_kv_heads)
96
73
  if use_compress_kv and not isinstance(past_key_values,
97
74
  DynamicCompressCache):
98
75
  if use_quantize_kv:
@@ -285,8 +262,6 @@ def chatglm2_attention_forward(
285
262
  key_states[..., :rot_dim] = k_rot[...]
286
263
 
287
264
  # IPEX-LLM OPT: kv cache and quantize kv
288
- use_quantize_kv = use_quantize_kv_cache(self.query_key_value, query_states)
289
-
290
265
  # [CompressKV]
291
266
  if use_compresskv:
292
267
  from transformers.configuration_utils import PretrainedConfig
@@ -300,6 +275,8 @@ def chatglm2_attention_forward(
300
275
  self.config, enough_kv_room, KV_CACHE_ALLOC_BLOCK_LENGTH
301
276
  )
302
277
  else:
278
+ use_quantize_kv = use_quantize_kv_cache(self.query_key_value, query_states,
279
+ n_head, n_kv_head)
303
280
  key_states, value_states = update_past_key_value(
304
281
  past_key_value, key_states, value_states,
305
282
  kv_seq_len, use_quantize_kv, hidden_states.device
@@ -55,8 +55,13 @@ def chatglm4_model_forward(
55
55
  if use_cache:
56
56
  inputs = input_ids if input_ids is not None else inputs_embeds
57
57
  use_compress_kv = should_use_compresskv(inputs, inputs.shape[1])
58
- use_quantize_kv = use_quantize_kv_cache(self.encoder.layers[0].mlp.gate_proj,
59
- inputs)
58
+ n_heads = self.config.num_attention_heads
59
+ if self.config.multi_query_attention:
60
+ n_kv_heads = self.config.multi_query_group_num
61
+ else:
62
+ n_kv_heads = n_heads
63
+ use_quantize_kv = use_quantize_kv_cache(self.encoder.layers[0].mlp.gate_proj, inputs,
64
+ n_heads, n_kv_heads)
60
65
  if use_compress_kv and not isinstance(past_key_values,
61
66
  DynamicCompressCache):
62
67
  if use_quantize_kv:
@@ -211,8 +216,6 @@ def chatglm4_attention_forward(
211
216
  key_states[..., :rot_dim] = k_rot[...]
212
217
 
213
218
  # IPEX-LLM OPT: kv cache and quantize kv
214
- use_quantize_kv = use_quantize_kv_cache(self.query_key_value, query_states)
215
-
216
219
  # [CompressKV]
217
220
  if use_compresskv:
218
221
  from transformers.configuration_utils import PretrainedConfig
@@ -226,6 +229,8 @@ def chatglm4_attention_forward(
226
229
  self.config, enough_kv_room, KV_CACHE_ALLOC_BLOCK_LENGTH
227
230
  )
228
231
  else:
232
+ use_quantize_kv = use_quantize_kv_cache(self.query_key_value, query_states,
233
+ n_head, n_kv_head)
229
234
  key_states, value_states = update_past_key_value(
230
235
  past_key_value, key_states, value_states,
231
236
  kv_seq_len, use_quantize_kv, hidden_states.device
@@ -230,7 +230,7 @@ def chatglm4v_attention_forward(
230
230
  key_states[..., :rot_dim] = k_rot[...]
231
231
 
232
232
  # IPEX-LLM OPT: kv cache and quantize kv
233
- use_quantize_kv = use_quantize_kv_cache(self.query_key_value, query_states)
233
+ use_quantize_kv = use_quantize_kv_cache(self.query_key_value, query_states, n_head, n_kv_head)
234
234
  key_states, value_states = update_past_key_value(
235
235
  past_key_value, key_states, value_states,
236
236
  kv_seq_len, use_quantize_kv, hidden_states.device
@@ -157,8 +157,10 @@ def rms_norm_forward(self, hidden_states: torch.Tensor):
157
157
  weight = self.weight
158
158
  if hasattr(self, "variance_epsilon"):
159
159
  eps = self.variance_epsilon
160
- else:
160
+ elif hasattr(self, "epsilon"):
161
161
  eps = self.epsilon
162
+ else:
163
+ eps = self.eps
162
164
 
163
165
  if hidden_states.device.type == 'xpu' and hidden_states.dtype in [torch.float, torch.half]:
164
166
  import xe_addons
@@ -147,7 +147,7 @@ def glm_model_forward_wrapper(origin_forward):
147
147
  use_cache = use_cache if use_cache is not None else self.config.use_cache
148
148
  use_cache = use_cache or inputs.device.type == 'xpu'
149
149
  use_quantize_kv = use_quantize_kv_cache(self.layers[0].mlp.down_proj, inputs,
150
- self.config.num_attention_heads //
150
+ self.config.num_attention_heads,
151
151
  self.config.num_key_value_heads)
152
152
 
153
153
  if use_cache:
@@ -87,7 +87,8 @@ def internlm_attention_forward(
87
87
  )
88
88
 
89
89
  # IPEX-LLM OPT: kv cache and quantzie kv cache
90
- use_quantize_kv = use_quantize_kv_cache(self.qkv_proj, hidden_states)
90
+ use_quantize_kv = use_quantize_kv_cache(self.qkv_proj, hidden_states,
91
+ self.num_heads, self.num_heads)
91
92
  key_states, value_states = update_past_key_value(
92
93
  past_key_value, key_states, value_states,
93
94
  kv_seq_len, use_quantize_kv, hidden_states.device
@@ -171,7 +172,8 @@ def internlm2_attention_forward(
171
172
  )
172
173
 
173
174
  # IPEX-LLM OPT: kv cache and quantzie kv cache
174
- use_quantize_kv = use_quantize_kv_cache(self.wqkv, hidden_states)
175
+ use_quantize_kv = use_quantize_kv_cache(self.wqkv, hidden_states,
176
+ self.num_heads, self.num_key_value_heads)
175
177
  key_states, value_states = update_past_key_value(
176
178
  past_key_value, key_states, value_states,
177
179
  kv_seq_len, use_quantize_kv, hidden_states.device
@@ -346,7 +348,8 @@ def internlm_xcomposser2_attention_forward(
346
348
  query_states, key_states, cos, sin, position_ids, "internlm")
347
349
 
348
350
  # IPEX-LLM OPT: kv cache and quantzie kv cache
349
- use_quantize_kv = use_quantize_kv_cache(self.wqkv, hidden_states)
351
+ use_quantize_kv = use_quantize_kv_cache(self.wqkv, hidden_states,
352
+ self.num_heads, self.num_key_value_heads)
350
353
  key_states, value_states = update_past_key_value(
351
354
  past_key_value, key_states, value_states,
352
355
  kv_seq_len, use_quantize_kv, device
@@ -72,7 +72,7 @@ def llama_model_forward(
72
72
  use_cache = True if inputs.device.type == "xpu" else use_cache
73
73
  use_quantize_kv = use_quantize_kv_cache(
74
74
  self.layers[0].mlp.down_proj, inputs,
75
- self.config.num_attention_heads // self.config.num_key_value_heads
75
+ self.config.num_attention_heads, self.config.num_key_value_heads
76
76
  )
77
77
  use_compresskv = should_use_compresskv(inputs, inputs.shape[1]) or \
78
78
  isinstance(past_key_values, DynamicCompressCache)
@@ -159,7 +159,7 @@ def minicpm_model_forward_wrapper(origin_forward):
159
159
  # IPEX-LLM OPT: kv cache and quantize kv cache
160
160
  inputs = input_ids if input_ids is not None else inputs_embeds
161
161
  use_quantize_kv = use_quantize_kv_cache(self.layers[0].mlp.up_proj, inputs,
162
- self.config.num_attention_heads //
162
+ self.config.num_attention_heads,
163
163
  self.config.num_key_value_heads)
164
164
  use_compress_kv = should_use_compresskv(inputs, inputs.shape[1]) or \
165
165
  isinstance(past_key_values, DynamicCompressCache)
@@ -66,7 +66,9 @@ def minicpm3_model_forward_wrapper(origin_forward):
66
66
  inputs = input_ids if input_ids is not None else inputs_embeds
67
67
  use_cache = use_cache if use_cache is not None else self.config.use_cache
68
68
  use_cache = True if inputs.device.type == "xpu" else use_cache
69
- use_quantize_kv = use_quantize_kv_cache(self.layers[0].mlp.down_proj, inputs)
69
+ num_heads, num_kv_heads = self.config.num_attention_heads, self.config.num_key_value_heads
70
+ use_quantize_kv = use_quantize_kv_cache(self.layers[0].mlp.down_proj, inputs,
71
+ num_heads, num_kv_heads)
70
72
  if use_cache:
71
73
  if use_quantize_kv and not isinstance(past_key_values, DynamicFp8Cache):
72
74
  past_key_values = DynamicFp8Cache.from_legacy_cache(past_key_values)
@@ -71,7 +71,7 @@ def mistral_model_forward(
71
71
  use_cache = use_cache if use_cache is not None else self.config.use_cache
72
72
  use_cache = use_cache or inputs.device.type == 'xpu'
73
73
  use_quantize_kv = use_quantize_kv_cache(self.layers[0].mlp.down_proj, inputs,
74
- self.config.num_attention_heads //
74
+ self.config.num_attention_heads,
75
75
  self.config.num_key_value_heads)
76
76
  use_compress_kv = should_use_compresskv(inputs, inputs.size(1)) or \
77
77
  isinstance(past_key_values, DynamicCompressCache)
@@ -113,7 +113,7 @@ def mllama_text_model_forward(
113
113
  use_cache = True if inputs.device.type == "xpu" else use_cache
114
114
  use_quantize_kv = use_quantize_kv_cache(
115
115
  self.layers[0].mlp.down_proj, inputs,
116
- self.config.num_attention_heads // self.config.num_key_value_heads
116
+ self.config.num_attention_heads, self.config.num_key_value_heads
117
117
  )
118
118
  if use_cache:
119
119
  if use_quantize_kv and not isinstance(past_key_values, DynamicFp8Cache):
@@ -249,7 +249,9 @@ def phi3_model_forward_wrapper(origin_model_forward):
249
249
  # IPEX-LLM OPT: kv cache and quantize kv cache and sdp
250
250
  use_cache = use_cache if use_cache is not None else self.config.use_cache
251
251
  inputs = input_ids if input_ids is not None else inputs_embeds
252
- use_quantize_kv = use_quantize_kv_cache(self.layers[0].mlp.down_proj, inputs)
252
+ num_heads, num_kv_heads = self.config.num_attention_heads, self.config.num_key_value_heads
253
+ use_quantize_kv = use_quantize_kv_cache(self.layers[0].mlp.down_proj, inputs,
254
+ num_heads, num_kv_heads)
253
255
  use_compress_kv = should_use_compresskv(inputs, inputs.shape[1]) or \
254
256
  isinstance(past_key_values, DynamicCompressCache)
255
257
  if use_cache:
@@ -305,7 +307,9 @@ def phi3v_model_forward_wrapper(origin_model_forward):
305
307
  ):
306
308
  # IPEX-LLM OPT: kv cache and quantize kv cache and sdp
307
309
  use_cache = use_cache if use_cache is not None else self.config.use_cache
308
- use_quantize_kv = use_quantize_kv_cache(self.layers[0].mlp.down_proj, input_ids)
310
+ num_heads, num_kv_heads = self.config.num_attention_heads, self.config.num_key_value_heads
311
+ use_quantize_kv = use_quantize_kv_cache(self.layers[0].mlp.down_proj, input_ids,
312
+ num_heads, num_kv_heads)
309
313
  if use_cache:
310
314
  if use_quantize_kv and not isinstance(past_key_values, DynamicFp8Cache):
311
315
  past_key_values = DynamicFp8Cache.from_legacy_cache(past_key_values)
@@ -107,7 +107,8 @@ def qwen_attention_forward(
107
107
  query_states = query_states * logn_tensor.type_as(query_states).expand_as(query_states)
108
108
 
109
109
  # IPEX-LLM OPT: kv cache and quantzie kv cache
110
- use_quantize_kv = use_quantize_kv_cache(self.c_attn, hidden_states)
110
+ use_quantize_kv = use_quantize_kv_cache(self.c_attn, hidden_states,
111
+ self.num_heads, self.num_heads)
111
112
  key_states, value_states = update_past_key_value(
112
113
  past_key_value, key_states, value_states,
113
114
  kv_seq_len, use_quantize_kv, device
@@ -205,7 +206,8 @@ def qwen_attention_forward_registered(
205
206
  query_states = query_states * logn_tensor.type_as(query_states).expand_as(query_states)
206
207
 
207
208
  # IPEX-LLM OPT: kv cache and quantzie kv cache
208
- use_quantize_kv = use_quantize_kv_cache(self.c_attn, hidden_states)
209
+ use_quantize_kv = use_quantize_kv_cache(self.c_attn, hidden_states,
210
+ self.num_heads, self.num_heads)
209
211
  key_states, value_states = update_past_key_value(
210
212
  past_key_value, key_states, value_states,
211
213
  kv_seq_len, use_quantize_kv, device
@@ -113,10 +113,10 @@ def qwen2_model_forward(
113
113
  # ipex-llm changes start
114
114
  # IPEX-LLM OPT: kv cache and quantize kv cache
115
115
  inputs = input_ids if input_ids is not None else inputs_embeds
116
+ num_heads, num_kv_heads = self.config.num_attention_heads, self.config.num_key_value_heads
116
117
  use_quantize_kv = (
117
118
  self.config.hidden_size != 3584 # disable quantize kv in specific model
118
- and use_quantize_kv_cache(self.layers[0].mlp.up_proj, inputs,
119
- self.config.num_attention_heads//self.config.num_key_value_heads)
119
+ and use_quantize_kv_cache(self.layers[0].mlp.up_proj, inputs, num_heads, num_kv_heads)
120
120
  )
121
121
  use_compress_kv = should_use_compresskv(inputs, inputs.shape[1]) or \
122
122
  isinstance(past_key_values, DynamicCompressCache)
@@ -305,10 +305,11 @@ def qwen2_model_forward_4_42(
305
305
 
306
306
  # ipex-llm changes start
307
307
  # IPEX-LLM OPT: kv cache and quantize kv cache
308
+ num_heads, num_kv_heads = self.config.num_attention_heads, self.config.num_key_value_heads
308
309
  use_quantize_kv = (
309
310
  self.config.hidden_size != 3584 # disable quantize kv in specific model
310
311
  and use_quantize_kv_cache(self.layers[0].mlp.up_proj, inputs_embeds,
311
- self.config.num_attention_heads//self.config.num_key_value_heads)
312
+ num_heads, num_kv_heads)
312
313
  )
313
314
  use_compress_kv = should_use_compresskv(inputs_embeds, inputs_embeds.shape[1]) or \
314
315
  isinstance(past_key_values, DynamicCompressCache)
@@ -73,8 +73,10 @@ def qwen2moe_model_forward(
73
73
  return_dict: Optional[bool] = None,
74
74
  ):
75
75
  use_cache = use_cache if use_cache is not None else self.config.use_cache
76
- input = input_ids if input_ids is not None else inputs_embeds
77
- use_quantize_kv = use_quantize_kv_cache(self.layers[0].mlp.shared_expert.up_proj, input)
76
+ inputs = input_ids if input_ids is not None else inputs_embeds
77
+ num_heads, num_kv_heads = self.config.num_attention_heads, self.config.num_key_value_heads
78
+ use_quantize_kv = use_quantize_kv_cache(self.layers[0].mlp.shared_expert.up_proj, inputs,
79
+ num_heads, num_kv_heads)
78
80
  if use_cache:
79
81
  if use_quantize_kv and not isinstance(past_key_values, DynamicFp8Cache):
80
82
  past_key_values = DynamicFp8Cache.from_legacy_cache(past_key_values)
@@ -88,7 +88,9 @@ def qwen2_vl_model_forward(
88
88
  # IPEX-LLM OPT start: kv cache and quantize kv cache
89
89
  inputs = input_ids if input_ids is not None else inputs_embeds
90
90
  use_cache = True if inputs.device.type == "xpu" else use_cache
91
- use_quantize_kv = use_quantize_kv_cache(self.layers[0].mlp.down_proj, inputs)
91
+ num_heads, num_kv_heads = self.config.num_attention_heads, self.config.num_key_value_heads
92
+ use_quantize_kv = use_quantize_kv_cache(self.layers[0].mlp.down_proj, inputs,
93
+ num_heads, num_kv_heads)
92
94
  if use_cache:
93
95
  if use_quantize_kv and not isinstance(past_key_values, DynamicFp8Cache):
94
96
  past_key_values = DynamicFp8Cache.from_legacy_cache(past_key_values)
@@ -69,8 +69,10 @@ def stablelm_model_forward(
69
69
  ):
70
70
  # IPEX-LLM OPT: kv cache and quantize kv cache
71
71
  use_cache = use_cache if use_cache is not None else self.config.use_cache
72
+ num_heads, num_kv_heads = self.config.num_attention_heads, self.config.num_key_value_heads
72
73
  use_quantize_kv = (self.layers[0].self_attn.head_dim in [64, 80, 96, 128]
73
- and use_quantize_kv_cache(self.layers[0].mlp.up_proj, input_ids))
74
+ and use_quantize_kv_cache(self.layers[0].mlp.up_proj, input_ids,
75
+ num_heads, num_kv_heads))
74
76
  if use_cache:
75
77
  if use_quantize_kv and not isinstance(past_key_values, DynamicFp8Cache):
76
78
  past_key_values = DynamicFp8Cache.from_legacy_cache(past_key_values)
@@ -132,7 +132,9 @@ def model_forward(
132
132
  return_dict: Optional[bool] = None,
133
133
  ):
134
134
  use_cache = use_cache if use_cache is not None else self.config.use_cache
135
- use_quantize_kv = use_quantize_kv_cache(self.layers[0].mlp.c_fc, input_ids)
135
+ num_heads, num_kv_heads = self.config.num_attention_heads, self.config.num_key_value_heads
136
+ use_quantize_kv = use_quantize_kv_cache(self.layers[0].mlp.c_fc, input_ids,
137
+ num_heads, num_kv_heads)
136
138
  if use_cache:
137
139
  if use_quantize_kv and not isinstance(past_key_values, DynamicFp8Cache):
138
140
  past_key_values = DynamicFp8Cache.from_legacy_cache(past_key_values)
@@ -74,7 +74,8 @@ def append_kv_cache(cache_k, cache_v, key_states, value_states):
74
74
  return new_cache_k, new_cache_v
75
75
 
76
76
 
77
- def use_quantize_kv_cache(linear: torch.nn.Module, x: torch.Tensor, kv_group: int = 1) -> bool:
77
+ def use_quantize_kv_cache(linear: torch.nn.Module, x: torch.Tensor,
78
+ num_heads: int, num_kv_heads: int) -> bool:
78
79
  if os.environ.get("BIGDL_QUANTIZE_KV_CACHE", None) is not None:
79
80
  warnings.warn(
80
81
  "`BIGDL_QUANTIZE_KV_CACHE` is deprecated and will be removed in future releases. "
@@ -90,8 +91,11 @@ def use_quantize_kv_cache(linear: torch.nn.Module, x: torch.Tensor, kv_group: in
90
91
  else:
91
92
  device_name = get_xpu_device_name(x.device)
92
93
  return (
93
- device_name in ["mtl", "lnl", "arl"] and kv_group == 1
94
- or device_name in ["arc", "bmg"] and x.size(0) > 1
94
+ num_kv_heads >= 4
95
+ and (
96
+ device_name in ["mtl", "lnl", "arl"] and num_heads // num_kv_heads <= 4
97
+ or device_name in ["arc", "bmg"] and x.size(0) > 1
98
+ )
95
99
  )
96
100
 
97
101
 
@@ -158,7 +158,8 @@ def yuan_attention_forward(
158
158
  "yuan")
159
159
 
160
160
  # IPEX-LLM OPT: kv cache and quantzie kv cache
161
- use_quantize_kv = use_quantize_kv_cache(self.qk_proj, hidden_states)
161
+ use_quantize_kv = use_quantize_kv_cache(self.qk_proj, hidden_states,
162
+ self.num_heads, self.num_heads)
162
163
  key_states, value_states = update_past_key_value(
163
164
  None if past_key_value is None else (past_key_value[0], past_key_value[1]),
164
165
  key_states, value_states,
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.1
2
2
  Name: ipex-llm
3
- Version: 2.2.0b20250108
3
+ Version: 2.2.0b20250109
4
4
  Summary: Large Language Model Develop Toolkit
5
5
  Home-page: https://github.com/intel-analytics/ipex-llm
6
6
  Author: BigDL Authors
@@ -27,10 +27,10 @@ Requires-Dist: intel-openmp ; (platform_machine == "x86_64" or platform_machine
27
27
  Requires-Dist: torch ==2.1.2+cpu ; (platform_system == "Linux") and extra == 'all'
28
28
  Requires-Dist: torch ==2.1.2 ; (platform_system == "Windows") and extra == 'all'
29
29
  Provides-Extra: cpp
30
- Requires-Dist: bigdl-core-cpp ==2.6.0b20250108 ; extra == 'cpp'
30
+ Requires-Dist: bigdl-core-cpp ==2.6.0b20250109 ; extra == 'cpp'
31
31
  Requires-Dist: setuptools ; extra == 'cpp'
32
32
  Provides-Extra: cpp-arl
33
- Requires-Dist: bigdl-core-cpp ==2.6.0b20250108 ; extra == 'cpp-arl'
33
+ Requires-Dist: bigdl-core-cpp ==2.6.0b20250109 ; extra == 'cpp-arl'
34
34
  Requires-Dist: setuptools ; extra == 'cpp-arl'
35
35
  Requires-Dist: onednn-devel ==2024.1.1 ; (platform_system == "Windows") and extra == 'cpp-arl'
36
36
  Requires-Dist: onednn ==2024.1.1 ; (platform_system == "Windows") and extra == 'cpp-arl'
@@ -67,7 +67,7 @@ Requires-Dist: transformers ==4.40.0 ; extra == 'npu'
67
67
  Requires-Dist: intel-openmp ; (platform_machine == "x86_64" or platform_machine == "AMD64") and extra == 'npu'
68
68
  Requires-Dist: torch ==2.1.2+cpu ; (platform_system == "Linux") and extra == 'npu'
69
69
  Requires-Dist: torch ==2.1.2 ; (platform_system == "Windows") and extra == 'npu'
70
- Requires-Dist: bigdl-core-npu ==2.6.0b20250108 ; (platform_system == "Windows") and extra == 'npu'
70
+ Requires-Dist: bigdl-core-npu ==2.6.0b20250109 ; (platform_system == "Windows") and extra == 'npu'
71
71
  Provides-Extra: serving
72
72
  Requires-Dist: py-cpuinfo ; extra == 'serving'
73
73
  Requires-Dist: fschat[model_worker,webui] ==0.2.36 ; extra == 'serving'
@@ -87,9 +87,9 @@ Requires-Dist: setuptools <70.0.0 ; extra == 'xpu'
87
87
  Requires-Dist: torch ==2.1.0a0 ; extra == 'xpu'
88
88
  Requires-Dist: torchvision ==0.16.0a0 ; extra == 'xpu'
89
89
  Requires-Dist: intel-extension-for-pytorch ==2.1.10+xpu ; extra == 'xpu'
90
- Requires-Dist: bigdl-core-xe-21 ==2.6.0b20250108 ; extra == 'xpu'
91
- Requires-Dist: bigdl-core-xe-batch-21 ==2.6.0b20250108 ; extra == 'xpu'
92
- Requires-Dist: bigdl-core-xe-addons-21 ==2.6.0b20250108 ; extra == 'xpu'
90
+ Requires-Dist: bigdl-core-xe-21 ==2.6.0b20250109 ; extra == 'xpu'
91
+ Requires-Dist: bigdl-core-xe-batch-21 ==2.6.0b20250109 ; extra == 'xpu'
92
+ Requires-Dist: bigdl-core-xe-addons-21 ==2.6.0b20250109 ; extra == 'xpu'
93
93
  Provides-Extra: xpu-2-1
94
94
  Requires-Dist: py-cpuinfo ; extra == 'xpu-2-1'
95
95
  Requires-Dist: protobuf ; extra == 'xpu-2-1'
@@ -104,9 +104,9 @@ Requires-Dist: setuptools <70.0.0 ; extra == 'xpu-2-1'
104
104
  Requires-Dist: torch ==2.1.0a0 ; extra == 'xpu-2-1'
105
105
  Requires-Dist: torchvision ==0.16.0a0 ; extra == 'xpu-2-1'
106
106
  Requires-Dist: intel-extension-for-pytorch ==2.1.10+xpu ; extra == 'xpu-2-1'
107
- Requires-Dist: bigdl-core-xe-21 ==2.6.0b20250108 ; extra == 'xpu-2-1'
108
- Requires-Dist: bigdl-core-xe-batch-21 ==2.6.0b20250108 ; extra == 'xpu-2-1'
109
- Requires-Dist: bigdl-core-xe-addons-21 ==2.6.0b20250108 ; extra == 'xpu-2-1'
107
+ Requires-Dist: bigdl-core-xe-21 ==2.6.0b20250109 ; extra == 'xpu-2-1'
108
+ Requires-Dist: bigdl-core-xe-batch-21 ==2.6.0b20250109 ; extra == 'xpu-2-1'
109
+ Requires-Dist: bigdl-core-xe-addons-21 ==2.6.0b20250109 ; extra == 'xpu-2-1'
110
110
  Requires-Dist: intel-openmp ; (platform_machine == "x86_64" or platform_machine == "AMD64") and extra == 'xpu-2-1'
111
111
  Requires-Dist: dpcpp-cpp-rt ==2024.0.2 ; (platform_system == "Windows") and extra == 'xpu-2-1'
112
112
  Requires-Dist: mkl-dpcpp ==2024.0.0 ; (platform_system == "Windows") and extra == 'xpu-2-1'
@@ -124,7 +124,7 @@ Requires-Dist: setuptools ; extra == 'xpu-2-6'
124
124
  Requires-Dist: torch ==2.6.0+xpu ; extra == 'xpu-2-6'
125
125
  Requires-Dist: torchvision ==0.21.0+xpu ; extra == 'xpu-2-6'
126
126
  Requires-Dist: torchaudio ==2.6.0+xpu ; extra == 'xpu-2-6'
127
- Requires-Dist: bigdl-core-xe-all ==2.6.0b20250108 ; extra == 'xpu-2-6'
127
+ Requires-Dist: bigdl-core-xe-all ==2.6.0b20250109 ; extra == 'xpu-2-6'
128
128
  Requires-Dist: intel-openmp ; (platform_machine == "x86_64" or platform_machine == "AMD64") and extra == 'xpu-2-6'
129
129
  Provides-Extra: xpu-arc
130
130
  Requires-Dist: py-cpuinfo ; extra == 'xpu-arc'
@@ -137,9 +137,9 @@ Requires-Dist: tokenizers ==0.15.2 ; extra == 'xpu-arc'
137
137
  Requires-Dist: accelerate ==0.23.0 ; extra == 'xpu-arc'
138
138
  Requires-Dist: tabulate ; extra == 'xpu-arc'
139
139
  Requires-Dist: setuptools ; extra == 'xpu-arc'
140
- Requires-Dist: bigdl-core-xe-23 ==2.6.0b20250108 ; extra == 'xpu-arc'
141
- Requires-Dist: bigdl-core-xe-batch-23 ==2.6.0b20250108 ; extra == 'xpu-arc'
142
- Requires-Dist: bigdl-core-xe-addons-23 ==2.6.0b20250108 ; extra == 'xpu-arc'
140
+ Requires-Dist: bigdl-core-xe-23 ==2.6.0b20250109 ; extra == 'xpu-arc'
141
+ Requires-Dist: bigdl-core-xe-batch-23 ==2.6.0b20250109 ; extra == 'xpu-arc'
142
+ Requires-Dist: bigdl-core-xe-addons-23 ==2.6.0b20250109 ; extra == 'xpu-arc'
143
143
  Requires-Dist: intel-openmp ; (platform_machine == "x86_64" or platform_machine == "AMD64") and extra == 'xpu-arc'
144
144
  Requires-Dist: torch ==2.3.1+cxx11.abi ; (platform_system == "Linux") and extra == 'xpu-arc'
145
145
  Requires-Dist: torchvision ==0.18.1+cxx11.abi ; (platform_system == "Linux") and extra == 'xpu-arc'
@@ -160,9 +160,9 @@ Requires-Dist: tokenizers ==0.15.2 ; extra == 'xpu-arl'
160
160
  Requires-Dist: accelerate ==0.23.0 ; extra == 'xpu-arl'
161
161
  Requires-Dist: tabulate ; extra == 'xpu-arl'
162
162
  Requires-Dist: setuptools ; extra == 'xpu-arl'
163
- Requires-Dist: bigdl-core-xe-23 ==2.6.0b20250108 ; extra == 'xpu-arl'
164
- Requires-Dist: bigdl-core-xe-batch-23 ==2.6.0b20250108 ; extra == 'xpu-arl'
165
- Requires-Dist: bigdl-core-xe-addons-23 ==2.6.0b20250108 ; extra == 'xpu-arl'
163
+ Requires-Dist: bigdl-core-xe-23 ==2.6.0b20250109 ; extra == 'xpu-arl'
164
+ Requires-Dist: bigdl-core-xe-batch-23 ==2.6.0b20250109 ; extra == 'xpu-arl'
165
+ Requires-Dist: bigdl-core-xe-addons-23 ==2.6.0b20250109 ; extra == 'xpu-arl'
166
166
  Requires-Dist: intel-openmp ; (platform_machine == "x86_64" or platform_machine == "AMD64") and extra == 'xpu-arl'
167
167
  Requires-Dist: torch ==2.3.1+cxx11.abi ; (platform_system == "Linux") and extra == 'xpu-arl'
168
168
  Requires-Dist: torchvision ==0.18.1+cxx11.abi ; (platform_system == "Linux") and extra == 'xpu-arl'
@@ -183,9 +183,9 @@ Requires-Dist: tokenizers ==0.15.2 ; extra == 'xpu-lnl'
183
183
  Requires-Dist: accelerate ==0.23.0 ; extra == 'xpu-lnl'
184
184
  Requires-Dist: tabulate ; extra == 'xpu-lnl'
185
185
  Requires-Dist: setuptools ; extra == 'xpu-lnl'
186
- Requires-Dist: bigdl-core-xe-23 ==2.6.0b20250108 ; extra == 'xpu-lnl'
187
- Requires-Dist: bigdl-core-xe-batch-23 ==2.6.0b20250108 ; extra == 'xpu-lnl'
188
- Requires-Dist: bigdl-core-xe-addons-23 ==2.6.0b20250108 ; extra == 'xpu-lnl'
186
+ Requires-Dist: bigdl-core-xe-23 ==2.6.0b20250109 ; extra == 'xpu-lnl'
187
+ Requires-Dist: bigdl-core-xe-batch-23 ==2.6.0b20250109 ; extra == 'xpu-lnl'
188
+ Requires-Dist: bigdl-core-xe-addons-23 ==2.6.0b20250109 ; extra == 'xpu-lnl'
189
189
  Requires-Dist: intel-openmp ; (platform_machine == "x86_64" or platform_machine == "AMD64") and extra == 'xpu-lnl'
190
190
  Requires-Dist: torch ==2.3.1+cxx11.abi ; (platform_system == "Linux") and extra == 'xpu-lnl'
191
191
  Requires-Dist: torchvision ==0.18.1+cxx11.abi ; (platform_system == "Linux") and extra == 'xpu-lnl'
@@ -41,35 +41,35 @@ ipex_llm/langchain/llms/transformerspipelinellm.py,sha256=vm522YPPwWxxAPVvQBtxRf
41
41
  ipex_llm/langchain/vllm/__init__.py,sha256=T-EbRT6GJ_8RCu-iLmSzcftOimXSPQf2d5X72AUAy2Y,874
42
42
  ipex_llm/langchain/vllm/vllm.py,sha256=6dxc-ZISZQrJilEa_HA827l75Dv9rcHpY_G6FdJ8BVs,7793
43
43
  ipex_llm/libs/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
44
- ipex_llm/libs/bloom-api.dll,sha256=t7Nyzwaw_n5nvLlhgJiwSqNg1c7YBfenefXc183BRyk,36352
45
- ipex_llm/libs/bloom.dll,sha256=ovvFDBSr66rJ8qI0k3q80OhM_L_RdhkdFIyMdhuNN60,506880
46
- ipex_llm/libs/gptneox-api.dll,sha256=G6BNZuxVROD4tACkc6wKvB8E0Nxxih5EMOtN8DQ2sdc,24576
47
- ipex_llm/libs/gptneox.dll,sha256=0jy7OHIcGS0R73wJQWN9B4gPyzep3hFRA9In9GNDUJE,567296
48
- ipex_llm/libs/libbloom_avx.dll,sha256=DlJmmwkqg8pfKwml32BcLF6XpXScL-YN92DOVFV_miI,535040
49
- ipex_llm/libs/libbloom_vnni.dll,sha256=rdLpFY-J6HLoed0qePhUTX-XJ0YVNX_R4sBZP95MOWI,506880
50
- ipex_llm/libs/libgptneox_avx.dll,sha256=-Jl7ni0hPESfPU9lA8ce_NkGkwpm0bSm7XFJNwfLMRc,595456
51
- ipex_llm/libs/libgptneox_vnni.dll,sha256=1Z7j6wAopq1o8uuTK7M6MWiHAHV_hWk-t--ZY-pWRIQ,567808
52
- ipex_llm/libs/libllama_avx.dll,sha256=mri2vXRQsWDaRXxVF3A6p6xZhv8MIpL13bmYcc210yk,589824
53
- ipex_llm/libs/libllama_vnni.dll,sha256=XBYn0-ITZ5HsbzS4bRjzepYGAm5-beVdO1euHjSCpjA,561664
54
- ipex_llm/libs/libstarcoder_avx.dll,sha256=-BCtiz3zd3b6MKD-CwHtzFsPt18aixsl92kHFIqxips,626688
55
- ipex_llm/libs/libstarcoder_vnni.dll,sha256=tXI6hrDAt5a3mrdtFrhdwl0HSc7pknd_LwpsIYzk4gY,598528
56
- ipex_llm/libs/llama-api.dll,sha256=_9ho1AFFsg44Zw79bQ7bzo55tgG35wLHUr_EPEBIgwA,25600
57
- ipex_llm/libs/llama.dll,sha256=53Ox4JEoucwN7GZwfp5DcQEQ_sGFFG5Ltf8yf7HRmjo,561152
58
- ipex_llm/libs/main-bloom.exe,sha256=KQWdkGczuRmIzaXw_706jkh2Y_SdVxEBpQyzeaBG_uA,103424
59
- ipex_llm/libs/main-gptneox.exe,sha256=oLK8czzwTAtSI6oC6fS5CndD6tMHsMZzJe1p-zOxN0Y,98816
60
- ipex_llm/libs/main-llama.exe,sha256=4keLFggPPNguBG1sbTwLmM_DExpiIwlKdXdF3IcQYKI,99840
61
- ipex_llm/libs/main-starcoder.exe,sha256=Re5Tv8QjdmWSEzQb5m72DJnRAW6sLKxDBFLkvcU33Eg,157696
62
- ipex_llm/libs/pipeline.dll,sha256=8MYtxJgWAt_meXvNz6W14uu-_w7o-DDBTkRajcToS9A,72704
63
- ipex_llm/libs/quantize-bloom.exe,sha256=vW97INEmsF-Krnqu7yVYMcNGrBcyQrmHSe9tAqQGGVg,126464
64
- ipex_llm/libs/quantize-bloom_vnni.exe,sha256=jyBpPHZa5qL757oigXZiGDTFpOtAYm8c8DS-DNXvlew,127488
65
- ipex_llm/libs/quantize-gptneox.exe,sha256=WR-2syRXF0yTgVmGkzwfXBWj7F9eSKVRLqaF05cGJco,104448
66
- ipex_llm/libs/quantize-gptneox_vnni.exe,sha256=TwxQOINAsmkLbjhRpGxr_vyRFDtW_qXkWmfXQX75O9o,104960
67
- ipex_llm/libs/quantize-llama.exe,sha256=__XzuraAfAofURLra-jjo4XEgGFmfqZFEJoiwfNaQEo,109568
68
- ipex_llm/libs/quantize-llama_vnni.exe,sha256=MeWkIB0C5nJIM3czBltbFjhKeP3-THdR2Yezze6q1R4,110592
69
- ipex_llm/libs/quantize-starcoder.exe,sha256=hnVDigU8QyKDplAaZwe1BAraTHkBw-seGLs8qjHHRDc,127488
70
- ipex_llm/libs/quantize-starcoder_vnni.exe,sha256=bcYLxXFJ_CHjXbbzCm5jpPQO13IDZePKoZf9vD6CUCc,128512
71
- ipex_llm/libs/starcoder-api.dll,sha256=fec8YDBzBoRQrnNbNsZZ7TljcBAUyk7fx19hpSiaxfY,21504
72
- ipex_llm/libs/starcoder.dll,sha256=zlVWa_5DzHEfCMGCIIOfy6POODnAcKFv1jp9BlcZgw4,598016
44
+ ipex_llm/libs/bloom-api.dll,sha256=vLC0Hy_vsNY1cTKoFMJ7g9iPzKV5X_YL8U9N3k8FHFk,36352
45
+ ipex_llm/libs/bloom.dll,sha256=hVQn4GiRqyD8P-_XbuQD_inkB74jxR8MutecrzrIQWQ,506880
46
+ ipex_llm/libs/gptneox-api.dll,sha256=R7YnJz6n4TCdPq5NYWuSaanj-hGrYnzZfJ9P4z033Ws,24576
47
+ ipex_llm/libs/gptneox.dll,sha256=Iyi18Z9buCMvglEGOPHYlpL4kSa4CADaXUMXJF4Bhqs,567296
48
+ ipex_llm/libs/libbloom_avx.dll,sha256=8NZmJGuvsGISZDOO1wp6jtfh6xWGmLfFhS9RfGl8WEk,535040
49
+ ipex_llm/libs/libbloom_vnni.dll,sha256=mTGMJzQiM7Mj0Qdrz7Xs7YWWypXijWum_LmSBR5Q2uE,506880
50
+ ipex_llm/libs/libgptneox_avx.dll,sha256=617wPDx7BAimc3oOSF2X0jh9h6j4mdBFRBt-1Qkmz1A,595456
51
+ ipex_llm/libs/libgptneox_vnni.dll,sha256=DmCL8HfUA8DbNQiZHDB77fpadSqnHSaxHpWRCw_FTDA,567808
52
+ ipex_llm/libs/libllama_avx.dll,sha256=JHRfoFLp_B-3J5B1oa2FnKIIjKK6ZJaJrTfJXLgrGIA,589824
53
+ ipex_llm/libs/libllama_vnni.dll,sha256=WxH8QfmfpuZ9rZJP2lcoXX5Zp9lonun-hhw26O40w78,561664
54
+ ipex_llm/libs/libstarcoder_avx.dll,sha256=9Dqf2uWGA8XlGAjOqgMQiC-iNJYBhOhNiFk0GZTfwiQ,626688
55
+ ipex_llm/libs/libstarcoder_vnni.dll,sha256=ob3olLuRXCGvpNuBYImrt92aejvhDsZpi-m_r75lIUY,598528
56
+ ipex_llm/libs/llama-api.dll,sha256=1OMZVvZun0kQBd1EEQZUvSRJXHfaJgo9WYka4tAamGA,25600
57
+ ipex_llm/libs/llama.dll,sha256=ZLuhZQAjPc76oJ-PO_yUlZi24qlzIBs8OXnR9-2LTGU,561152
58
+ ipex_llm/libs/main-bloom.exe,sha256=T17qQytgapUQAmXSkH38RO21Ynb9NrOSiD3erxr4kJI,103424
59
+ ipex_llm/libs/main-gptneox.exe,sha256=qskuY8ijrPRcBzTtGb8gB1PAoSvE4wkJe2GgiAAWS-4,98816
60
+ ipex_llm/libs/main-llama.exe,sha256=t-mQ7luxLky6boib8wSRecL81DAdKF3HsLZ-oN4Agm4,99840
61
+ ipex_llm/libs/main-starcoder.exe,sha256=WQ2L7L2XdmHxzt6L3uaHuWwTJVL9xg683y4_7avVVgk,157696
62
+ ipex_llm/libs/pipeline.dll,sha256=_Kx68vo1DLJc57qF8aGEhdiCFbcAsm8xYQWeJHogVuM,72704
63
+ ipex_llm/libs/quantize-bloom.exe,sha256=CTJhn-jYm4V99Fcv2R2mHkcYMfzQOU8IiqMKfqRK59A,126464
64
+ ipex_llm/libs/quantize-bloom_vnni.exe,sha256=aauMlSe4e05rKB61u4PQdckBNWQzrISe5_HXkRCTCfI,127488
65
+ ipex_llm/libs/quantize-gptneox.exe,sha256=QP-fDnaF6FElKal_kRh_N5nB-WUQKah8vG33lJQ3g9c,104448
66
+ ipex_llm/libs/quantize-gptneox_vnni.exe,sha256=PUgZZTq1a36tbtsgu-jnK0MHlsFQ-zPtAsAQ7PLczJU,104960
67
+ ipex_llm/libs/quantize-llama.exe,sha256=uagQGbAgP8ripvyS0CREOQH0JhwHRi5dWeo_Q3criNc,109568
68
+ ipex_llm/libs/quantize-llama_vnni.exe,sha256=fB7-E0PGSliaZDfu9bCl1hdcoD7xIMoRHiE6fFTJ75g,110592
69
+ ipex_llm/libs/quantize-starcoder.exe,sha256=6OMrGkHL87jw18EVzlEj9NJpjTl7b3_7YOF_YRC24sI,127488
70
+ ipex_llm/libs/quantize-starcoder_vnni.exe,sha256=i6SaJyFmS_S2jjl9Cpa_5S-RyHo1_ABD0gx2maxTdfc,128512
71
+ ipex_llm/libs/starcoder-api.dll,sha256=CgnbCueyY_M3QN-lskMQBPPVrfuKgy2Qt0aVV6wXPgQ,21504
72
+ ipex_llm/libs/starcoder.dll,sha256=bc_dnjXNNVaXQlVdGp-a6IMZO3J2Ux9hwEQH7xoEaek,598016
73
73
  ipex_llm/llamaindex/__init__.py,sha256=T-EbRT6GJ_8RCu-iLmSzcftOimXSPQf2d5X72AUAy2Y,874
74
74
  ipex_llm/llamaindex/llms/__init__.py,sha256=KP1lEdGqDuxPoxL1ZSH25Pm2kKMPJBWUTLR0ckSLMIU,1139
75
75
  ipex_llm/llamaindex/llms/bigdlllm.py,sha256=FQBzq1KOjfc6uofTXAha3O7TqpJkNfOFepXQmOVlbnI,26314
@@ -87,7 +87,7 @@ ipex_llm/serving/fastchat/tgi_api_protocol.py,sha256=brT3k3-V0NJrU4fRqUwWjC0O3iO
87
87
  ipex_llm/serving/fastchat/tgi_api_server.py,sha256=agNTAEiZPSuj3dEdIdYKwkoY0cXOUDX06DiM9VP2knQ,24418
88
88
  ipex_llm/serving/fastchat/vllm_worker.py,sha256=ZLz2Q9GxJO6r_LOiP6epgCRjBGk-K4EB1SNEWSJp5DA,11091
89
89
  ipex_llm/transformers/__init__.py,sha256=l4KkMkLe-pRC7b_kj6LCfeifgE-Uo33_Av_FwN9HnFA,1074
90
- ipex_llm/transformers/convert.py,sha256=APf2uHMgEeiAhsKm9dPgPWlyO0ADq2yHtZgovv9oczU,99101
90
+ ipex_llm/transformers/convert.py,sha256=umI137wqV2d4itS0AJQoZcygeWBATpSJSDJ805cZ-SY,98499
91
91
  ipex_llm/transformers/convert_ipex.py,sha256=iKXo0n8fVFTOA2fNYYrByMFK0dovL-kLd2sVDk88AlQ,14334
92
92
  ipex_llm/transformers/embedding.py,sha256=bdgk59DvD4ZZyxRzewXOR7g56nThgO6uhIwk8QL7f-s,9299
93
93
  ipex_llm/transformers/kv.py,sha256=k4TU18LlA-Sbq9WNNQnfuzu3RSFBwFhmaV3BcGN5bAo,19191
@@ -137,45 +137,45 @@ ipex_llm/transformers/gguf/models/model_implement/yuan2/configuration_yuan.py,sh
137
137
  ipex_llm/transformers/gguf/models/model_implement/yuan2/yuan_hf_model.py,sha256=_AOGMV65XHxgTxIib7lgs49InopcecTzRwgtYR8NTUg,51084
138
138
  ipex_llm/transformers/models/__init__.py,sha256=tp2DcVkKg1-QvdYk7DY7rZvQWCDQ4ZjU8NAQ7Fclrpg,584
139
139
  ipex_llm/transformers/models/aquila.py,sha256=VZb5Drpo_fTxwcExZ397LygnsNPX2sVbie9_JeFudZI,5252
140
- ipex_llm/transformers/models/baichuan.py,sha256=oJCAEENSG8oQhJ-QPN2SiapARjAGdOM6nEbyCcYOMCo,19334
141
- ipex_llm/transformers/models/bert.py,sha256=bJNic2pt1kph0kBwdK5MRGyWupFfx2Ts0V3D1L-5kWo,6085
140
+ ipex_llm/transformers/models/baichuan.py,sha256=cAQLmVG-3R8CSTGTcDy2JOOzVe-Ej8AXjIEIjvZBGlo,18376
141
+ ipex_llm/transformers/models/bert.py,sha256=0Mm9jkvkzBxtc_z_GE1TcZoPz-HOg2Z2973ZEWgSwJk,5601
142
142
  ipex_llm/transformers/models/bloom.py,sha256=PxfzyYT-nFn3K5rZhTQjmcEjUUzAhUFzxIN4kzRlCuc,8103
143
143
  ipex_llm/transformers/models/chatglm.py,sha256=UHai1t2AUtGmF765_eHF8LUMVQzp_oCBx8TJB21WrHk,12597
144
- ipex_llm/transformers/models/chatglm2.py,sha256=SGCABJdYQLW0zDarEoWrEQLuWlbq9iQhYU8ZeR1-ptQ,15957
145
- ipex_llm/transformers/models/chatglm4.py,sha256=AAhAFFDDas5DBQPfh2Mwl7a2v7taKf6xphoeeNNFaBI,16593
146
- ipex_llm/transformers/models/chatglm4v.py,sha256=tyjDDyF6FEgLAT24EG3i4-auxZvkwmeLIy0Hds4K5Yo,14105
147
- ipex_llm/transformers/models/common.py,sha256=4obQMGF02FCiXrHnFle9Fsx7C33b1FDt37qJJ4YgxRc,11578
144
+ ipex_llm/transformers/models/chatglm2.py,sha256=KyAIX7zGVQDQuwwM3QMBNWZbTeMHEzKUIgAryT0voHc,14933
145
+ ipex_llm/transformers/models/chatglm4.py,sha256=QvUehdaCePB3MNHyWg3dneDxmjtBdxYeKUyQUVcsgfM,16886
146
+ ipex_llm/transformers/models/chatglm4v.py,sha256=L6y45M_wjS2_HqchmCUxRlQZUNuSNCGOiynAQrGh918,14124
147
+ ipex_llm/transformers/models/common.py,sha256=Q3IEfGqvxoHyfIIF5s8qHmOJBBP3b2jyVAXk8C3b1Pg,11636
148
148
  ipex_llm/transformers/models/decilm.py,sha256=P-PBuDPf07GvKggLwJx_wPwIn6esN3rX8ai2JxRuZmE,5246
149
149
  ipex_llm/transformers/models/gemma.py,sha256=_E3Yw8Y45xyNVeLqyVKcpr8kjuICtETeL82cJ-bWJuU,9424
150
150
  ipex_llm/transformers/models/gemma2.py,sha256=2WZuv-FLzJyTJFaYxOuzJt47QE64M0lHnzAiO5T6ozI,8049
151
- ipex_llm/transformers/models/glm.py,sha256=PE43uks9lojndBBHFVXK1VWisHhbY-kuCmhq0CwmD4s,7204
151
+ ipex_llm/transformers/models/glm.py,sha256=lmeEWd_W2O638VzVW4Gm6cJre5XZcg_QBmPs8NWqXsM,7202
152
152
  ipex_llm/transformers/models/gpt2.py,sha256=YSaNgK1uLCFDuIFqnKO0Mi-AsOZsYav-7pNf_NpKGdM,3445
153
153
  ipex_llm/transformers/models/gptbigcode.py,sha256=cP1_qGWoa43R2WacAMblShjku4QupcCZiLaPPAoOUs4,9101
154
154
  ipex_llm/transformers/models/gptneox.py,sha256=loRh1x_5S6BCeOr_s5xr-N_1SQHL3Y5IiUBAEyoMUqQ,6172
155
- ipex_llm/transformers/models/internlm.py,sha256=ZbIUMDwNRcrCeduXfbA_uq1AUEWawEt6CJRvQl3LkAg,17832
155
+ ipex_llm/transformers/models/internlm.py,sha256=OifyiobRligleyZLpLBSe44A6Sq0uMG-8-NOcRCcT4Q,18080
156
156
  ipex_llm/transformers/models/internvl.py,sha256=Vx0vENIEQLX2M6P398mw5TOhpks0U8xf8rtRQvy94go,8154
157
- ipex_llm/transformers/models/llama.py,sha256=n1JG1uElMB8t3Hpae94S6YTO_5q2N5BUAhb7mncvA6E,8560
158
- ipex_llm/transformers/models/minicpm.py,sha256=_eYBYafQxnuqKo9ENNkua73KU5goU2z-dkaLlF5uHnA,10147
159
- ipex_llm/transformers/models/minicpm3.py,sha256=FhNS6mi2rg7dSdF_QQGrao3g9EC6XLn1MTKd-kd0wF0,9191
157
+ ipex_llm/transformers/models/llama.py,sha256=NzpyQve_RC9ez1W-jWPLGZ80k_S1I5Rx5saAzCsDIoI,8558
158
+ ipex_llm/transformers/models/minicpm.py,sha256=eaPNVNrep0_xGoELhZd886ff0ceoKqB6cusdAhd52eE,10145
159
+ ipex_llm/transformers/models/minicpm3.py,sha256=11cYl8KM2hoIJNMAOZMxiwCu6dMhup9ric_OEn8-VrQ,9363
160
160
  ipex_llm/transformers/models/minicpmv.py,sha256=PP05b5iTnrMpiseCn8iJcxKJDnfq7WqXp9Mrch0kKZ0,9876
161
- ipex_llm/transformers/models/mistral.py,sha256=rE1GWQxXvF6aG-buPHDR13zeynDZEDIubPF4PiVhZbM,7451
162
- ipex_llm/transformers/models/mllama.py,sha256=ogpLmmN_OwcFUyjYB-oDC-l3uw8urFvUEc5edkjWHAk,10939
161
+ ipex_llm/transformers/models/mistral.py,sha256=uVhkdXaq15v1P3QY0emVsA7SxUbAWChHEEXYN-drjpQ,7449
162
+ ipex_llm/transformers/models/mllama.py,sha256=ZyRq9DTKsvk1AlRbr-z6ngjS3Sr_7YuGZ6-Yr1MBBAM,10937
163
163
  ipex_llm/transformers/models/mpt.py,sha256=z02NwHogJZVh-Mk4sYoIzR90SFIKhoNN_-ifsD907TQ,9540
164
164
  ipex_llm/transformers/models/phi.py,sha256=E6qz4EEuHIVGvaPo-wtLC5lz3iyMqTbAE_cRlcjQRKI,6670
165
- ipex_llm/transformers/models/phi3.py,sha256=jkiadJ85ToHpymY5GOM6orWlnx6LKN8_-v1MUcfGWPg,15159
165
+ ipex_llm/transformers/models/phi3.py,sha256=Fo6PlZ24Gdm7eeeZOTMm1Bfh3U6P4rvq7-_2FHvp0vE,15503
166
166
  ipex_llm/transformers/models/phixtral.py,sha256=MDTMghcu7qAmZmRcUGqXXDXhSU3y_N59HRIXmlcjp5g,4890
167
- ipex_llm/transformers/models/qwen.py,sha256=XIJ_bLzediBURWU-OOS3H6WBIGXQue6jDdUHJsAabwY,19391
168
- ipex_llm/transformers/models/qwen2.py,sha256=b49HO4GSudwGJ3n6uHVno1oo3DgRt3jOjtQnLOB3cdY,25530
169
- ipex_llm/transformers/models/qwen2_moe.py,sha256=EA_OYxYAEgrvi7VpDW192AJXG9Fwe2aBtOAZPkOAJk4,19350
170
- ipex_llm/transformers/models/qwen2_vl.py,sha256=jIm4yZSd751BkRqgj3wR1QBkDIh-TMCLAMM8SZ8n6Qo,13419
167
+ ipex_llm/transformers/models/qwen.py,sha256=A3WiVCzA7NLkcjp4zhFkZvKZzZWZlg0WFuVV_556TAI,19543
168
+ ipex_llm/transformers/models/qwen2.py,sha256=JLaY9ZT7A22oO0G8K-nvjvKQDaIrKA5o-jEHvk_y3eI,25604
169
+ ipex_llm/transformers/models/qwen2_moe.py,sha256=a0gYo-ngf8SxaEnBdZUJDnPS6Mkn_poDd8xqhx50icI,19516
170
+ ipex_llm/transformers/models/qwen2_vl.py,sha256=NrhxlaPj7W-HUBmKc3CSTwZy1lkoZ9qDaxM4GvE0kHs,13583
171
171
  ipex_llm/transformers/models/qwen_vl.py,sha256=j7Nzzz2Qvynu9yrCXmoEfERjw43hXof5TbXIs7Ms-oY,17105
172
172
  ipex_llm/transformers/models/rwkv4.py,sha256=H4KMtxN0JA2ZTXnonHpsUUJ5xULemo-D1Jzl0ri_UY8,6123
173
173
  ipex_llm/transformers/models/rwkv5.py,sha256=OkRNj1pCAZg1z2Fw-I0DEnxLEdZyPeRSQ6msrkxLOCs,10710
174
174
  ipex_llm/transformers/models/sd.py,sha256=VvHV5u-0k2MgHu3NL9113hPj7DgfxqctuKzEEeNfRDU,5981
175
- ipex_llm/transformers/models/stablelm.py,sha256=RGQCYuQhYqtZ1j3RZkYi0_QvCRnUgUIPYxfBcLnElzg,6885
176
- ipex_llm/transformers/models/starcoder2.py,sha256=4P3mhRYf2Kreb1ESjrQGfy1puLMmZXgV35zf-Tksvao,6462
177
- ipex_llm/transformers/models/utils.py,sha256=isBCMMQP3j_opmda9XzD_dPk1ejvEXTztggbu1yIMSc,15439
178
- ipex_llm/transformers/models/yuan.py,sha256=1jRPebwAK2ENbyYokOmb4LSVo-szucWiygz9zTv-scs,7656
175
+ ipex_llm/transformers/models/stablelm.py,sha256=fj-XtOnR6kggnFUQTMPCOOzolkPztN06WAv8QW-XRnI,7054
176
+ ipex_llm/transformers/models/starcoder2.py,sha256=ONKvD7JCkRM0DI-R56x28QFBJ7CjD5hOZBQ_3WfOcNk,6626
177
+ ipex_llm/transformers/models/utils.py,sha256=ihbWS5kQK2KHDVPkMhgjik3nM8B2fWf-E-z4BWNUstk,15568
178
+ ipex_llm/transformers/models/yuan.py,sha256=JYAn_ZaSGK0NBJLEIxCACfAq084a66GFJkdd5NbpmMA,7732
179
179
  ipex_llm/transformers/npu_models/__init__.py,sha256=ulEUGLjaP48LCrVeury3UxLjXxKzRi0UpSG4bYu-7f8,585
180
180
  ipex_llm/transformers/npu_models/baichuan.py,sha256=fJtd7fBrttySghRUgfZTAdxLjsSNC-XL08HISsXigLE,4685
181
181
  ipex_llm/transformers/npu_models/baichuan_mp.py,sha256=tHhO-0v5z6IhxsfzAPYWXVbLrV_4z89DIb4JjE3207M,45026
@@ -243,11 +243,11 @@ ipex_llm/vllm/xpu/engine/__init__.py,sha256=pY_CpyuZd72fr6s32ejeKHKFW0K4vUU2rzZj
243
243
  ipex_llm/vllm/xpu/engine/engine.py,sha256=k4-D27WS_Gk3mA--w3HWAjPjb4Aiu043MVPi0ZoAUBc,5984
244
244
  ipex_llm/vllm/xpu/entrypoints/openai/api_server.py,sha256=GshTZFB8e4PWvqckfbmTOU6b0oLkNn7A-vzLuG9--j8,21544
245
245
  ipex_llm/vllm/xpu/entrypoints/openai/cli_args.py,sha256=2rENA2ucynMaIjiZBEh2ez1o5vR32GaP514t39CD7KM,8676
246
- ipex_llm-2.2.0b20250108.data/scripts/ipex-llm-init.bat,sha256=HPtCYuDYwEatq7dAwOvdfVcHYCpAVdbj75K1qh0vQek,2578
247
- ipex_llm-2.2.0b20250108.data/scripts/llm-chat.ps1,sha256=6qrs-hGVAV8IKh7Jx8nq_XrnZcjd7qGU5wndArM7Yag,2769
248
- ipex_llm-2.2.0b20250108.data/scripts/llm-cli.ps1,sha256=3qBtTLs_EjYDnM8YyCpJhzLnGCKTEGssu9UNqfkjVXs,3009
249
- ipex_llm-2.2.0b20250108.dist-info/METADATA,sha256=NJp_uuPOJe8b5UQ8ASJbfzen2BGoc2DEM1ZInzr0X9E,12705
250
- ipex_llm-2.2.0b20250108.dist-info/WHEEL,sha256=6iYPr8vTHsyDK75jr9X0V3I9wPSVmtwr_8fdATBciGk,98
251
- ipex_llm-2.2.0b20250108.dist-info/entry_points.txt,sha256=TiUyBB2MRmfF3ko-pyAEzqeBCRnyhu27bNOAsWPp3e8,61
252
- ipex_llm-2.2.0b20250108.dist-info/top_level.txt,sha256=CGCMHM-SyqUabU4h8RqJ2KTYckQUO3LvIWwmUQ6Qbzw,9
253
- ipex_llm-2.2.0b20250108.dist-info/RECORD,,
246
+ ipex_llm-2.2.0b20250109.data/scripts/ipex-llm-init.bat,sha256=HPtCYuDYwEatq7dAwOvdfVcHYCpAVdbj75K1qh0vQek,2578
247
+ ipex_llm-2.2.0b20250109.data/scripts/llm-chat.ps1,sha256=6qrs-hGVAV8IKh7Jx8nq_XrnZcjd7qGU5wndArM7Yag,2769
248
+ ipex_llm-2.2.0b20250109.data/scripts/llm-cli.ps1,sha256=3qBtTLs_EjYDnM8YyCpJhzLnGCKTEGssu9UNqfkjVXs,3009
249
+ ipex_llm-2.2.0b20250109.dist-info/METADATA,sha256=gPslIWSw_X5E5ULhQa8rOHeRo_UeBDXCAyPjBSPB-nU,12705
250
+ ipex_llm-2.2.0b20250109.dist-info/WHEEL,sha256=6iYPr8vTHsyDK75jr9X0V3I9wPSVmtwr_8fdATBciGk,98
251
+ ipex_llm-2.2.0b20250109.dist-info/entry_points.txt,sha256=TiUyBB2MRmfF3ko-pyAEzqeBCRnyhu27bNOAsWPp3e8,61
252
+ ipex_llm-2.2.0b20250109.dist-info/top_level.txt,sha256=CGCMHM-SyqUabU4h8RqJ2KTYckQUO3LvIWwmUQ6Qbzw,9
253
+ ipex_llm-2.2.0b20250109.dist-info/RECORD,,