ipex-llm 2.2.0b20250101__py3-none-win_amd64.whl → 2.2.0b20250102__py3-none-win_amd64.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (49) hide show
  1. ipex_llm/libs/bloom-api.dll +0 -0
  2. ipex_llm/libs/bloom.dll +0 -0
  3. ipex_llm/libs/gptneox-api.dll +0 -0
  4. ipex_llm/libs/gptneox.dll +0 -0
  5. ipex_llm/libs/libbloom_avx.dll +0 -0
  6. ipex_llm/libs/libbloom_vnni.dll +0 -0
  7. ipex_llm/libs/libgptneox_avx.dll +0 -0
  8. ipex_llm/libs/libgptneox_vnni.dll +0 -0
  9. ipex_llm/libs/libllama_avx.dll +0 -0
  10. ipex_llm/libs/libllama_vnni.dll +0 -0
  11. ipex_llm/libs/libstarcoder_avx.dll +0 -0
  12. ipex_llm/libs/libstarcoder_vnni.dll +0 -0
  13. ipex_llm/libs/llama-api.dll +0 -0
  14. ipex_llm/libs/llama.dll +0 -0
  15. ipex_llm/libs/main-bloom.exe +0 -0
  16. ipex_llm/libs/main-gptneox.exe +0 -0
  17. ipex_llm/libs/main-llama.exe +0 -0
  18. ipex_llm/libs/main-starcoder.exe +0 -0
  19. ipex_llm/libs/pipeline.dll +0 -0
  20. ipex_llm/libs/quantize-bloom.exe +0 -0
  21. ipex_llm/libs/quantize-bloom_vnni.exe +0 -0
  22. ipex_llm/libs/quantize-gptneox.exe +0 -0
  23. ipex_llm/libs/quantize-gptneox_vnni.exe +0 -0
  24. ipex_llm/libs/quantize-llama.exe +0 -0
  25. ipex_llm/libs/quantize-llama_vnni.exe +0 -0
  26. ipex_llm/libs/quantize-starcoder.exe +0 -0
  27. ipex_llm/libs/quantize-starcoder_vnni.exe +0 -0
  28. ipex_llm/libs/starcoder-api.dll +0 -0
  29. ipex_llm/libs/starcoder.dll +0 -0
  30. ipex_llm/optimize.py +3 -1
  31. ipex_llm/transformers/convert.py +3 -2
  32. ipex_llm/transformers/low_bit_linear.py +8 -2
  33. ipex_llm/transformers/model.py +3 -1
  34. ipex_llm/transformers/models/baichuan.py +5 -11
  35. ipex_llm/transformers/models/chatglm.py +2 -2
  36. ipex_llm/transformers/models/qwen.py +34 -46
  37. ipex_llm/transformers/models/qwen2.py +5 -19
  38. ipex_llm/transformers/npu_model.py +3 -3
  39. ipex_llm/transformers/npu_models/convert.py +40 -18
  40. ipex_llm/transformers/npu_models/npu_llm_cpp.py +18 -9
  41. ipex_llm/transformers/npu_pipeline_model/qwen.py +4 -0
  42. {ipex_llm-2.2.0b20250101.dist-info → ipex_llm-2.2.0b20250102.dist-info}/METADATA +19 -19
  43. {ipex_llm-2.2.0b20250101.dist-info → ipex_llm-2.2.0b20250102.dist-info}/RECORD +49 -49
  44. {ipex_llm-2.2.0b20250101.data → ipex_llm-2.2.0b20250102.data}/scripts/ipex-llm-init.bat +0 -0
  45. {ipex_llm-2.2.0b20250101.data → ipex_llm-2.2.0b20250102.data}/scripts/llm-chat.ps1 +0 -0
  46. {ipex_llm-2.2.0b20250101.data → ipex_llm-2.2.0b20250102.data}/scripts/llm-cli.ps1 +0 -0
  47. {ipex_llm-2.2.0b20250101.dist-info → ipex_llm-2.2.0b20250102.dist-info}/WHEEL +0 -0
  48. {ipex_llm-2.2.0b20250101.dist-info → ipex_llm-2.2.0b20250102.dist-info}/entry_points.txt +0 -0
  49. {ipex_llm-2.2.0b20250101.dist-info → ipex_llm-2.2.0b20250102.dist-info}/top_level.txt +0 -0
Binary file
ipex_llm/libs/bloom.dll CHANGED
Binary file
Binary file
ipex_llm/libs/gptneox.dll CHANGED
Binary file
Binary file
Binary file
Binary file
Binary file
Binary file
Binary file
Binary file
Binary file
Binary file
ipex_llm/libs/llama.dll CHANGED
Binary file
Binary file
Binary file
Binary file
Binary file
Binary file
Binary file
Binary file
Binary file
Binary file
Binary file
Binary file
Binary file
Binary file
Binary file
Binary file
ipex_llm/optimize.py CHANGED
@@ -254,7 +254,9 @@ def optimize_model(model, low_bit='sym_int4', optimize_llm=True, modules_to_not_
254
254
  torch_dtype=torch_dtype,
255
255
  optimize_model=optimize_llm,
256
256
  modules_to_not_convert=modules_to_not_convert,
257
- cpu_embedding=cpu_embedding)
257
+ cpu_embedding=cpu_embedding,
258
+ disable_optimize_pre=kwargs.pop("disable_optimize_pre",
259
+ False))
258
260
  # add save_low_bit to pretrained model dynamically
259
261
  import types
260
262
  model._bigdl_config = dict()
@@ -1081,7 +1081,8 @@ def ggml_convert_low_bit(model, qtype, optimize_model=True,
1081
1081
  torch_dtype="auto",
1082
1082
  imatrix_data=None,
1083
1083
  embedding_qtype=None,
1084
- mixed_precision=False):
1084
+ mixed_precision=False,
1085
+ disable_optimize_pre=False):
1085
1086
  if qtype in ggml_tensor_qtype.values():
1086
1087
  index = list(ggml_tensor_qtype.values()).index(qtype)
1087
1088
  logger.info(f"Converting the current model to "
@@ -1104,7 +1105,7 @@ def ggml_convert_low_bit(model, qtype, optimize_model=True,
1104
1105
  model = _optimize_ipex(model, qtype)
1105
1106
  return model
1106
1107
 
1107
- if optimize_model:
1108
+ if optimize_model and not disable_optimize_pre:
1108
1109
  model = _optimize_pre(model, qtype)
1109
1110
 
1110
1111
  act_order = False
@@ -764,6 +764,7 @@ class FP16Linear(nn.Linear):
764
764
  # weigh_type = 3 means weight has been transposed by esimd method
765
765
  self.weight_type = 1
766
766
  self.optimize_lm_head = optimize_lm_head
767
+ self.disable_fp16_opt = False
767
768
 
768
769
  def forward(self, x: torch.Tensor):
769
770
  # only work for GPU
@@ -779,8 +780,11 @@ class FP16Linear(nn.Linear):
779
780
  self.weight.data = self.weight.data.to(x.dtype)
780
781
 
781
782
  if not self.use_esimd_kernel(x):
782
- if get_ipex_version() < "2.1.10+xpu" \
783
- or get_xpu_device_type(x) not in ["arc", "flex", "pvc"]:
783
+ if (
784
+ get_ipex_version() < "2.1.10+xpu"
785
+ or get_xpu_device_type(x) not in ["arc", "flex", "pvc"]
786
+ or self.disable_fp16_opt
787
+ ):
784
788
  if self.weight_type == 2:
785
789
  self.weight = torch.nn.Parameter(self.weight.transpose(0, 1).contiguous(),
786
790
  requires_grad=False)
@@ -845,6 +849,8 @@ class FP16Linear(nn.Linear):
845
849
 
846
850
  def use_esimd_kernel(self, x):
847
851
  gpu_type = get_xpu_device_type(x)
852
+ if self.disable_fp16_opt:
853
+ return False
848
854
  # esimd kernel can only be used for Arc and Flex
849
855
  if gpu_type not in ["arc", "flex"]:
850
856
  return False
@@ -445,6 +445,7 @@ class _BaseAutoModelClass:
445
445
  mixed_precision = kwargs.pop("mixed_precision", False)
446
446
  if embedding_qtype is not None:
447
447
  embedding_qtype = ggml_tensor_qtype[embedding_qtype]
448
+ disable_optimize_pre = kwargs.pop("disable_optimize_pre", False)
448
449
  _args = copy.deepcopy(args)
449
450
  _kwargs = copy.deepcopy(kwargs)
450
451
  awq_config = None
@@ -513,7 +514,8 @@ class _BaseAutoModelClass:
513
514
  torch_dtype=kwargs.get("torch_dtype", 'auto'),
514
515
  imatrix_data=imatrix_data,
515
516
  embedding_qtype=embedding_qtype,
516
- mixed_precision=mixed_precision)
517
+ mixed_precision=mixed_precision,
518
+ disable_optimize_pre=disable_optimize_pre)
517
519
 
518
520
  if disk_embedding:
519
521
  from ipex_llm.transformers.embedding import DiskEmbedding
@@ -29,7 +29,7 @@ from ipex_llm.transformers.models.utils import use_quantize_kv_cache, restore_fp
29
29
  should_use_compresskv
30
30
  from ipex_llm.transformers.models.utils import update_past_key_value
31
31
  from ipex_llm.transformers.models.utils import should_use_fuse_rope
32
- from ipex_llm.transformers.models.utils import use_flash_attention, use_sdp
32
+ from ipex_llm.transformers.models.utils import use_sdp
33
33
  from ipex_llm.transformers.models.utils import apply_rotary_pos_emb, SILU
34
34
  from ipex_llm.transformers.models.utils import mlp_fusion_check
35
35
  from ipex_llm.transformers.models.utils import is_enough_kv_cache_room_4_36
@@ -301,16 +301,10 @@ def baichuan_attention_forward_7b(
301
301
 
302
302
  # IPEX-LLM OPT: sdp
303
303
  attn_weights = None
304
- if use_flash_attention(query_states, key_states, attention_mask):
305
- attn_output = F.scaled_dot_product_attention(query_states.to(dtype=torch.float16),
306
- key_states.to(dtype=torch.float16),
307
- value_states.to(dtype=torch.float16),
308
- is_causal=True).to(hidden_states.dtype)
309
- else:
310
- attn_output = scaled_dot_product_attention(
311
- query_states, key_states, value_states,
312
- attention_mask, q_len == kv_seq_len
313
- )
304
+ attn_output = scaled_dot_product_attention(
305
+ query_states, key_states, value_states,
306
+ attention_mask, q_len == kv_seq_len
307
+ )
314
308
 
315
309
  attn_output = attn_output.transpose(1, 2).contiguous()
316
310
  attn_output = attn_output.reshape(bsz, q_len, self.hidden_size)
@@ -23,7 +23,7 @@ import torch.utils.checkpoint
23
23
  import torch.nn.functional as F
24
24
  from typing import Optional, Tuple
25
25
  from ipex_llm.transformers.models.utils import init_kv_cache, extend_kv_cache, append_kv_cache
26
- from ipex_llm.transformers.models.utils import use_flash_attention, use_sdp
26
+ from ipex_llm.transformers.models.utils import use_sdp
27
27
 
28
28
 
29
29
  def rotate_half(x):
@@ -41,7 +41,7 @@ def apply_rotary_pos_emb_index(q, k, cos, sin, position_id):
41
41
 
42
42
 
43
43
  def glm_sdpa(query, key, value, attention_mask=None, is_causal=False):
44
- if use_flash_attention(query, key, attention_mask) or query.device.type == 'cpu':
44
+ if query.device.type == 'cpu':
45
45
  context_layer = F.scaled_dot_product_attention(query.to(key.dtype),
46
46
  key,
47
47
  value,
@@ -33,7 +33,6 @@ from ipex_llm.transformers.models.utils import update_past_key_value, should_use
33
33
  from ipex_llm.transformers.models.utils import use_quantize_kv_cache
34
34
  from ipex_llm.transformers.models.utils import rotate_half, SILU
35
35
  from ipex_llm.transformers.models.utils import mlp_fusion_check
36
- from ipex_llm.transformers.models.utils import use_flash_attention
37
36
  from ipex_llm.utils.common import invalidInputError
38
37
  from transformers.modeling_outputs import BaseModelOutputWithPast
39
38
 
@@ -116,33 +115,28 @@ def qwen_attention_forward(
116
115
  past_key_value = (key_states.transpose(1, 2),
117
116
  value_states.transpose(1, 2)) if use_cache else None
118
117
 
119
- # IPEX-LLM OPT: sdp
118
+ # IPEX-LLM OPT: sdpa
120
119
  attn_weights = None
121
- if use_flash_attention(query_states, key_states, attention_mask):
122
- attn_output = F.scaled_dot_product_attention(query_states.to(dtype=torch.float16),
123
- key_states.to(dtype=torch.float16),
124
- value_states.to(dtype=torch.float16),
125
- is_causal=True).to(hidden_states.dtype)
120
+
121
+ if q_len > 1 and q_len != kv_seq_len:
122
+ causal_mask = torch.tril(
123
+ torch.ones((kv_seq_len, kv_seq_len), dtype=torch.bool, device=query_states.device)
124
+ ).view(1, 1, kv_seq_len, kv_seq_len)
125
+ causal_mask = causal_mask[
126
+ :, :, kv_seq_len - q_len:kv_seq_len, :kv_seq_len
127
+ ]
128
+ attention_mask = torch.zeros(causal_mask.shape, dtype=query_states.dtype,
129
+ device=query_states.device)
130
+ attention_mask.masked_fill_(causal_mask.logical_not(),
131
+ torch.finfo(attention_mask.dtype).min)
132
+ attention_mask = attention_mask.expand([bsz, -1, -1, -1])
126
133
  else:
127
- if q_len > 1 and q_len != kv_seq_len:
128
- causal_mask = torch.tril(
129
- torch.ones((kv_seq_len, kv_seq_len), dtype=torch.bool, device=query_states.device)
130
- ).view(1, 1, kv_seq_len, kv_seq_len)
131
- causal_mask = causal_mask[
132
- :, :, kv_seq_len - q_len:kv_seq_len, :kv_seq_len
133
- ]
134
- attention_mask = torch.zeros(causal_mask.shape, dtype=query_states.dtype,
135
- device=query_states.device)
136
- attention_mask.masked_fill_(causal_mask.logical_not(),
137
- torch.finfo(attention_mask.dtype).min)
138
- attention_mask = attention_mask.expand([bsz, -1, -1, -1])
139
- else:
140
- attention_mask = None
134
+ attention_mask = None
141
135
 
142
- attn_output = scaled_dot_product_attention(
143
- query_states, key_states, value_states,
144
- attention_mask, q_len == kv_seq_len
145
- )
136
+ attn_output = scaled_dot_product_attention(
137
+ query_states, key_states, value_states,
138
+ attention_mask, q_len == kv_seq_len
139
+ )
146
140
 
147
141
  attn_output = attn_output.transpose(1, 2).contiguous()
148
142
  attn_output = attn_output.view(bsz, q_len, self.hidden_size)
@@ -219,31 +213,25 @@ def qwen_attention_forward_registered(
219
213
  past_key_value = (key_states.transpose(1, 2),
220
214
  value_states.transpose(1, 2)) if use_cache else None
221
215
 
222
- # IPEX-LLM OPT: sdp
216
+ # IPEX-LLM OPT: sdpa
223
217
  attn_weights = None
224
218
 
225
- if use_flash_attention(query_states, key_states, attention_mask):
226
- attn_output = F.scaled_dot_product_attention(query_states.to(dtype=torch.float16),
227
- key_states.to(dtype=torch.float16),
228
- value_states.to(dtype=torch.float16),
229
- is_causal=True).to(hidden_states.dtype)
219
+ if q_len > 1 and q_len != kv_seq_len:
220
+ causal_mask = registered_causal_mask[
221
+ :, :, kv_seq_len - q_len:kv_seq_len, :kv_seq_len
222
+ ]
223
+ attention_mask = torch.zeros(causal_mask.shape, dtype=query_states.dtype,
224
+ device=query_states.device)
225
+ attention_mask.masked_fill_(causal_mask.logical_not(),
226
+ torch.finfo(attention_mask.dtype).min)
227
+ attention_mask = attention_mask.expand([bsz, -1, -1, -1])
230
228
  else:
231
- if q_len > 1 and q_len != kv_seq_len:
232
- causal_mask = registered_causal_mask[
233
- :, :, kv_seq_len - q_len:kv_seq_len, :kv_seq_len
234
- ]
235
- attention_mask = torch.zeros(causal_mask.shape, dtype=query_states.dtype,
236
- device=query_states.device)
237
- attention_mask.masked_fill_(causal_mask.logical_not(),
238
- torch.finfo(attention_mask.dtype).min)
239
- attention_mask = attention_mask.expand([bsz, -1, -1, -1])
240
- else:
241
- attention_mask = None
229
+ attention_mask = None
242
230
 
243
- attn_output = scaled_dot_product_attention(
244
- query_states, key_states, value_states,
245
- attention_mask, q_len == kv_seq_len
246
- )
231
+ attn_output = scaled_dot_product_attention(
232
+ query_states, key_states, value_states,
233
+ attention_mask, q_len == kv_seq_len
234
+ )
247
235
 
248
236
  attn_output = attn_output.transpose(1, 2).contiguous()
249
237
  attn_output = attn_output.view(bsz, q_len, self.hidden_size)
@@ -38,12 +38,10 @@
38
38
  #
39
39
 
40
40
  import os
41
- import math
42
41
  from typing import Optional, Tuple, Union, List
43
42
 
44
43
  import torch
45
44
  from torch.nn import CrossEntropyLoss
46
- from torch.nn.functional import scaled_dot_product_attention as sdpa
47
45
 
48
46
  from ipex_llm.transformers.models.common import merge_qkv_base
49
47
  from ipex_llm.transformers.models.common import scaled_dot_product_attention
@@ -51,13 +49,12 @@ from ipex_llm.transformers.models.utils import SILU, mlp_fusion_check
51
49
  from ipex_llm.transformers.models.utils import should_use_fuse_rope
52
50
  from ipex_llm.transformers.models.utils import use_quantize_kv_cache, \
53
51
  should_use_compresskv, is_enough_kv_cache_room_4_36
54
- from ipex_llm.transformers.models.utils import use_flash_attention
55
52
  from ipex_llm.transformers.kv import DynamicFp8Cache, DynamicNormalCache, \
56
53
  DynamicCompressCache, DynamicCompressFp8Cache
57
54
  from ipex_llm.utils.common import invalidInputError
58
55
 
59
56
  from transformers.models.qwen2.modeling_qwen2 import Qwen2Attention, Qwen2MLP
60
- from transformers.models.qwen2.modeling_qwen2 import apply_rotary_pos_emb, repeat_kv
57
+ from transformers.models.qwen2.modeling_qwen2 import apply_rotary_pos_emb
61
58
  from transformers.modeling_outputs import BaseModelOutputWithPast, CausalLMOutputWithPast
62
59
  from transformers.cache_utils import Cache
63
60
  from transformers import logging
@@ -580,21 +577,10 @@ def qwen2_attention_forward(
580
577
  self.layer_idx, None)
581
578
 
582
579
  attn_weights = None
583
- if use_flash_attention(query_states, key_states, attention_mask):
584
- if attention_mask is not None:
585
- attention_mask = attention_mask[:, :, :, :kv_seq_len]
586
- # repeat k/v heads if n_kv_heads < n_heads
587
- key_states = repeat_kv(key_states, self.num_key_value_groups)
588
- value_states = repeat_kv(value_states, self.num_key_value_groups)
589
- attn_output = sdpa(query_states.to(device, dtype=torch.float16),
590
- key_states.to(device, dtype=torch.float16),
591
- value_states.to(device, dtype=torch.float16),
592
- is_causal=True).to(hidden_states.dtype)
593
- else:
594
- attn_output = scaled_dot_product_attention(
595
- query_states, key_states, value_states,
596
- attention_mask, q_len == kv_seq_len
597
- )
580
+ attn_output = scaled_dot_product_attention(
581
+ query_states, key_states, value_states,
582
+ attention_mask, q_len == kv_seq_len
583
+ )
598
584
 
599
585
  attn_output = attn_output.transpose(1, 2).contiguous()
600
586
  attn_output = attn_output.reshape(bsz, q_len, self.hidden_size)
@@ -301,8 +301,7 @@ class _BaseAutoModelClass:
301
301
  model.share_memory()
302
302
 
303
303
  if not pipeline:
304
- if (not hasattr(model, 'llm') and
305
- model.config.model_type in ["qwen2", "llama", "minicpm"]):
304
+ if model.config.model_type in ["qwen2", "llama", "minicpm"]:
306
305
  from ipex_llm.transformers.npu_models.convert import optimize_llm_single_process
307
306
  optimize_llm_single_process(
308
307
  llm,
@@ -312,7 +311,8 @@ class _BaseAutoModelClass:
312
311
  group_size=quantization_group_size,
313
312
  qtype=qtype,
314
313
  save_directory=save_directory,
315
- fuse_layers=fuse_layers
314
+ fuse_layers=fuse_layers,
315
+ has_llm=hasattr(model, "llm")
316
316
  )
317
317
  else:
318
318
  optimize_llm(
@@ -449,7 +449,8 @@ def optimize_llm_single_process(
449
449
  group_size: int,
450
450
  qtype: str,
451
451
  save_directory: str,
452
- fuse_layers: int=None
452
+ fuse_layers: int=None,
453
+ has_llm: bool=False
453
454
  ):
454
455
  from ipex_llm.transformers.npu_pipeline_model.convert_pipeline import convert_llm
455
456
  from .npu_llm_cpp import load_model_from_file
@@ -468,8 +469,13 @@ def optimize_llm_single_process(
468
469
  model.kv_len = kv_len
469
470
  model.model_ptr = model_ptr
470
471
  model.save_directory = save_directory
471
- model.vocab_size = model.config.vocab_size
472
+ if model.config.vocab_size == 151666:
473
+ # for MiniCPM-V 2.6, 152064 is vocab_size of Qwen2-7B
474
+ model.vocab_size = 152064
475
+ else:
476
+ model.vocab_size = model.config.vocab_size
472
477
  model.logits_buffer = torch.empty(1, 1, model.vocab_size, dtype=torch.float32)
478
+ model.max_prompt_len = max_prompt_len
473
479
  except:
474
480
  invalidInputError(False,
475
481
  "False to InitLLMPipeline.")
@@ -478,9 +484,10 @@ def optimize_llm_single_process(
478
484
  general_convert(model, PreTrainedModel, prepare_input_ids, "prepare_inputs_for_generation")
479
485
  general_convert(model, PreTrainedModel, causal_lm_forward)
480
486
  # patch generate function
481
- import types
482
- model.original_generate = model.generate
483
- model.generate = types.MethodType(generate, model)
487
+ if not has_llm:
488
+ import types
489
+ model.original_generate = model.generate
490
+ model.generate = types.MethodType(generate, model)
484
491
  return model
485
492
 
486
493
 
@@ -491,9 +498,10 @@ def prepare_input_ids(
491
498
  else: # prefill, reset the model here
492
499
  from .npu_llm_cpp import reset
493
500
  reset(self.model_ptr)
494
- model_inputs = {
495
- "input_ids": input_ids
496
- }
501
+ if inputs_embeds is not None and past_key_values is None:
502
+ model_inputs = {"inputs_embeds": inputs_embeds}
503
+ else:
504
+ model_inputs = {"input_ids": input_ids}
497
505
  return model_inputs
498
506
 
499
507
 
@@ -511,17 +519,31 @@ def causal_lm_forward(
511
519
  return_dict: Optional[bool] = None,
512
520
  ) -> Union[Tuple, CausalLMOutputWithPast]:
513
521
  from .npu_llm_cpp import run_prefill_with_logits, run_decode_with_logits
514
- if isinstance(input_ids[0], torch.Tensor):
515
- input_list = input_ids[0].flatten().tolist()
516
- else:
517
- input_list = input_ids[0]
518
- input_length = len(input_list)
519
- if input_length > 1:
520
- logits = run_prefill_with_logits(self.model_ptr, input_list,
521
- self.logits_buffer, self.vocab_size)
522
+ if input_ids is not None:
523
+ if isinstance(input_ids[0], torch.Tensor):
524
+ input_list = input_ids[0].flatten().tolist()
525
+ else:
526
+ input_list = input_ids[0]
527
+ input_length = len(input_list)
528
+ if input_length > 1:
529
+ logits = run_prefill_with_logits(self.model_ptr, input_list,
530
+ self.logits_buffer, self.vocab_size)
531
+ else:
532
+ logits = run_decode_with_logits(self.model_ptr, input_list[0],
533
+ self.logits_buffer, self.vocab_size)
534
+ elif inputs_embeds is not None:
535
+ seq_len = inputs_embeds.shape[1]
536
+ pad_len = self.max_prompt_len - seq_len
537
+ inputs_embeds = torch.nn.functional.pad(inputs_embeds.to(torch.float16),
538
+ (0, 0, 0, pad_len), value=0.0)
539
+ logits = run_prefill_with_logits(self.model_ptr, None, self.logits_buffer,
540
+ self.vocab_size, inputs_embeds, seq_len)
522
541
  else:
523
- logits = run_decode_with_logits(self.model_ptr, input_list[0],
524
- self.logits_buffer, self.vocab_size)
542
+ invalidInputError(False, "Please specify either input_ids or inputs_embeds.")
543
+
544
+ if self.config.vocab_size == 151666:
545
+ # for MiniCPM-V 2.6
546
+ logits = logits[:, :, :151666]
525
547
 
526
548
  return CausalLMOutputWithPast(
527
549
  loss=None,
@@ -48,8 +48,8 @@ _lib = ctypes.cdll.LoadLibrary(_lib_path)
48
48
  _lib.load_model_from_file.argtypes = [ctypes.c_char_p]
49
49
  _lib.load_model_from_file.restype = ctypes.c_void_p
50
50
 
51
- _lib.run_prefill.argtypes = [ctypes.c_void_p, ctypes.POINTER(ctypes.c_int), ctypes.c_int,
52
- ctypes.c_float]
51
+ _lib.run_prefill.argtypes = [ctypes.c_void_p, ctypes.c_void_p, ctypes.c_int,
52
+ ctypes.c_float, ctypes.c_bool]
53
53
  _lib.run_prefill.restype = ctypes.POINTER(ctypes.c_float)
54
54
 
55
55
  _lib.run_decode.argtypes = [ctypes.c_void_p, ctypes.c_int, ctypes.c_float]
@@ -61,8 +61,10 @@ _lib.llm_sample_token.restype = ctypes.c_int
61
61
  _lib.reset.argtypes = [ctypes.c_void_p]
62
62
  _lib.reset.restype = None
63
63
 
64
- _lib.run_prefill_with_logits.argtypes = [ctypes.c_void_p, ctypes.POINTER(ctypes.c_int),
65
- ctypes.c_int, ctypes.POINTER(ctypes.c_float), ctypes.c_int]
64
+ _lib.run_prefill_with_logits.argtypes = [ctypes.c_void_p, ctypes.c_void_p,
65
+ ctypes.c_int, ctypes.POINTER(ctypes.c_float),
66
+ ctypes.c_int, ctypes.c_bool]
67
+
66
68
  _lib.run_prefill_with_logits.restype = None
67
69
 
68
70
  _lib.run_decode_with_logits.argtypes = [ctypes.c_void_p, ctypes.c_int,
@@ -77,7 +79,7 @@ def load_model_from_file(model_dir: str):
77
79
  def run_prefill(model_ptr, input_ids, vocab_size, repetition_penalty=1.0):
78
80
  input_ptr = (ctypes.c_int32 * len(input_ids))(*input_ids)
79
81
  input_len = len(input_ids)
80
- plogits = _lib.run_prefill(model_ptr, input_ptr, input_len, repetition_penalty)
82
+ plogits = _lib.run_prefill(model_ptr, input_ptr, input_len, repetition_penalty, False)
81
83
  new_token = _lib.llm_sample_token(plogits, True, vocab_size)
82
84
  return new_token
83
85
 
@@ -88,12 +90,19 @@ def run_decode(model_ptr, input_id, vocab_size, repetition_penalty=1.0):
88
90
  return new_token
89
91
 
90
92
 
91
- def run_prefill_with_logits(model_ptr, input_ids, logits, vocab_size):
92
- input_ptr = (ctypes.c_int32 * len(input_ids))(*input_ids)
93
- input_len = len(input_ids)
93
+ def run_prefill_with_logits(model_ptr, input_ids, logits, vocab_size,
94
+ inputs_embeds=None, seq_len=None):
95
+ if input_ids is not None:
96
+ input_ptr = (ctypes.c_int32 * len(input_ids))(*input_ids)
97
+ input_len = len(input_ids)
98
+ else:
99
+ input_ptr = inputs_embeds.contiguous().data.data_ptr()
100
+ input_ptr = ctypes.cast(input_ptr, ctypes.c_void_p)
101
+ input_len = seq_len
94
102
  logits_ptr = logits.data.data_ptr()
95
103
  logits_ptr = ctypes.cast(logits_ptr, ctypes.POINTER(ctypes.c_float))
96
- _lib.run_prefill_with_logits(model_ptr, input_ptr, input_len, logits_ptr, vocab_size)
104
+ _lib.run_prefill_with_logits(model_ptr, input_ptr, input_len, logits_ptr,
105
+ vocab_size, (input_ids is None))
97
106
  return logits
98
107
 
99
108
 
@@ -34,6 +34,10 @@ def convert_lm_head_and_embedding(model, temp_dir, weight_dir,
34
34
  lm_head_n_splits = 1
35
35
  asym = getattr(model.config, "asym", False)
36
36
 
37
+ if vocab_size == 151666:
38
+ # for MiniCPM-V 2.6 lm_head on NPU
39
+ vocab_size = 152064
40
+
37
41
  if not isinstance(lm_head, SlicedLMHead):
38
42
  asym = lm_head.qtype == "asym_int4_rtn"
39
43
  if asym:
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.1
2
2
  Name: ipex-llm
3
- Version: 2.2.0b20250101
3
+ Version: 2.2.0b20250102
4
4
  Summary: Large Language Model Develop Toolkit
5
5
  Home-page: https://github.com/intel-analytics/ipex-llm
6
6
  Author: BigDL Authors
@@ -27,10 +27,10 @@ Requires-Dist: intel-openmp ; (platform_machine == "x86_64" or platform_machine
27
27
  Requires-Dist: torch ==2.1.2+cpu ; (platform_system == "Linux") and extra == 'all'
28
28
  Requires-Dist: torch ==2.1.2 ; (platform_system == "Windows") and extra == 'all'
29
29
  Provides-Extra: cpp
30
- Requires-Dist: bigdl-core-cpp ==2.6.0b20250101 ; extra == 'cpp'
30
+ Requires-Dist: bigdl-core-cpp ==2.6.0b20250102 ; extra == 'cpp'
31
31
  Requires-Dist: setuptools ; extra == 'cpp'
32
32
  Provides-Extra: cpp-arl
33
- Requires-Dist: bigdl-core-cpp ==2.6.0b20250101 ; extra == 'cpp-arl'
33
+ Requires-Dist: bigdl-core-cpp ==2.6.0b20250102 ; extra == 'cpp-arl'
34
34
  Requires-Dist: setuptools ; extra == 'cpp-arl'
35
35
  Requires-Dist: onednn-devel ==2024.1.1 ; (platform_system == "Windows") and extra == 'cpp-arl'
36
36
  Requires-Dist: dpcpp-cpp-rt ==2024.2.1 ; (platform_system == "Windows") and extra == 'cpp-arl'
@@ -65,7 +65,7 @@ Requires-Dist: transformers ==4.40.0 ; extra == 'npu'
65
65
  Requires-Dist: intel-openmp ; (platform_machine == "x86_64" or platform_machine == "AMD64") and extra == 'npu'
66
66
  Requires-Dist: torch ==2.1.2+cpu ; (platform_system == "Linux") and extra == 'npu'
67
67
  Requires-Dist: torch ==2.1.2 ; (platform_system == "Windows") and extra == 'npu'
68
- Requires-Dist: bigdl-core-npu ==2.6.0b20250101 ; (platform_system == "Windows") and extra == 'npu'
68
+ Requires-Dist: bigdl-core-npu ==2.6.0b20250102 ; (platform_system == "Windows") and extra == 'npu'
69
69
  Provides-Extra: serving
70
70
  Requires-Dist: py-cpuinfo ; extra == 'serving'
71
71
  Requires-Dist: fschat[model_worker,webui] ==0.2.36 ; extra == 'serving'
@@ -85,9 +85,9 @@ Requires-Dist: setuptools <70.0.0 ; extra == 'xpu'
85
85
  Requires-Dist: torch ==2.1.0a0 ; extra == 'xpu'
86
86
  Requires-Dist: torchvision ==0.16.0a0 ; extra == 'xpu'
87
87
  Requires-Dist: intel-extension-for-pytorch ==2.1.10+xpu ; extra == 'xpu'
88
- Requires-Dist: bigdl-core-xe-21 ==2.6.0b20250101 ; extra == 'xpu'
89
- Requires-Dist: bigdl-core-xe-batch-21 ==2.6.0b20250101 ; extra == 'xpu'
90
- Requires-Dist: bigdl-core-xe-addons-21 ==2.6.0b20250101 ; extra == 'xpu'
88
+ Requires-Dist: bigdl-core-xe-21 ==2.6.0b20250102 ; extra == 'xpu'
89
+ Requires-Dist: bigdl-core-xe-batch-21 ==2.6.0b20250102 ; extra == 'xpu'
90
+ Requires-Dist: bigdl-core-xe-addons-21 ==2.6.0b20250102 ; extra == 'xpu'
91
91
  Provides-Extra: xpu-2-1
92
92
  Requires-Dist: py-cpuinfo ; extra == 'xpu-2-1'
93
93
  Requires-Dist: protobuf ; extra == 'xpu-2-1'
@@ -102,9 +102,9 @@ Requires-Dist: setuptools <70.0.0 ; extra == 'xpu-2-1'
102
102
  Requires-Dist: torch ==2.1.0a0 ; extra == 'xpu-2-1'
103
103
  Requires-Dist: torchvision ==0.16.0a0 ; extra == 'xpu-2-1'
104
104
  Requires-Dist: intel-extension-for-pytorch ==2.1.10+xpu ; extra == 'xpu-2-1'
105
- Requires-Dist: bigdl-core-xe-21 ==2.6.0b20250101 ; extra == 'xpu-2-1'
106
- Requires-Dist: bigdl-core-xe-batch-21 ==2.6.0b20250101 ; extra == 'xpu-2-1'
107
- Requires-Dist: bigdl-core-xe-addons-21 ==2.6.0b20250101 ; extra == 'xpu-2-1'
105
+ Requires-Dist: bigdl-core-xe-21 ==2.6.0b20250102 ; extra == 'xpu-2-1'
106
+ Requires-Dist: bigdl-core-xe-batch-21 ==2.6.0b20250102 ; extra == 'xpu-2-1'
107
+ Requires-Dist: bigdl-core-xe-addons-21 ==2.6.0b20250102 ; extra == 'xpu-2-1'
108
108
  Requires-Dist: intel-openmp ; (platform_machine == "x86_64" or platform_machine == "AMD64") and extra == 'xpu-2-1'
109
109
  Requires-Dist: dpcpp-cpp-rt ==2024.0.2 ; (platform_system == "Windows") and extra == 'xpu-2-1'
110
110
  Requires-Dist: mkl-dpcpp ==2024.0.0 ; (platform_system == "Windows") and extra == 'xpu-2-1'
@@ -119,9 +119,9 @@ Requires-Dist: tokenizers ==0.15.2 ; extra == 'xpu-arc'
119
119
  Requires-Dist: accelerate ==0.23.0 ; extra == 'xpu-arc'
120
120
  Requires-Dist: tabulate ; extra == 'xpu-arc'
121
121
  Requires-Dist: setuptools ; extra == 'xpu-arc'
122
- Requires-Dist: bigdl-core-xe-23 ==2.6.0b20250101 ; extra == 'xpu-arc'
123
- Requires-Dist: bigdl-core-xe-batch-23 ==2.6.0b20250101 ; extra == 'xpu-arc'
124
- Requires-Dist: bigdl-core-xe-addons-23 ==2.6.0b20250101 ; extra == 'xpu-arc'
122
+ Requires-Dist: bigdl-core-xe-23 ==2.6.0b20250102 ; extra == 'xpu-arc'
123
+ Requires-Dist: bigdl-core-xe-batch-23 ==2.6.0b20250102 ; extra == 'xpu-arc'
124
+ Requires-Dist: bigdl-core-xe-addons-23 ==2.6.0b20250102 ; extra == 'xpu-arc'
125
125
  Requires-Dist: intel-openmp ; (platform_machine == "x86_64" or platform_machine == "AMD64") and extra == 'xpu-arc'
126
126
  Requires-Dist: torch ==2.3.1+cxx11.abi ; (platform_system == "Linux") and extra == 'xpu-arc'
127
127
  Requires-Dist: torchvision ==0.18.1+cxx11.abi ; (platform_system == "Linux") and extra == 'xpu-arc'
@@ -141,9 +141,9 @@ Requires-Dist: tokenizers ==0.15.2 ; extra == 'xpu-arl'
141
141
  Requires-Dist: accelerate ==0.23.0 ; extra == 'xpu-arl'
142
142
  Requires-Dist: tabulate ; extra == 'xpu-arl'
143
143
  Requires-Dist: setuptools ; extra == 'xpu-arl'
144
- Requires-Dist: bigdl-core-xe-23 ==2.6.0b20250101 ; extra == 'xpu-arl'
145
- Requires-Dist: bigdl-core-xe-batch-23 ==2.6.0b20250101 ; extra == 'xpu-arl'
146
- Requires-Dist: bigdl-core-xe-addons-23 ==2.6.0b20250101 ; extra == 'xpu-arl'
144
+ Requires-Dist: bigdl-core-xe-23 ==2.6.0b20250102 ; extra == 'xpu-arl'
145
+ Requires-Dist: bigdl-core-xe-batch-23 ==2.6.0b20250102 ; extra == 'xpu-arl'
146
+ Requires-Dist: bigdl-core-xe-addons-23 ==2.6.0b20250102 ; extra == 'xpu-arl'
147
147
  Requires-Dist: intel-openmp ; (platform_machine == "x86_64" or platform_machine == "AMD64") and extra == 'xpu-arl'
148
148
  Requires-Dist: torch ==2.3.1+cxx11.abi ; (platform_system == "Linux") and extra == 'xpu-arl'
149
149
  Requires-Dist: torchvision ==0.18.1+cxx11.abi ; (platform_system == "Linux") and extra == 'xpu-arl'
@@ -163,9 +163,9 @@ Requires-Dist: tokenizers ==0.15.2 ; extra == 'xpu-lnl'
163
163
  Requires-Dist: accelerate ==0.23.0 ; extra == 'xpu-lnl'
164
164
  Requires-Dist: tabulate ; extra == 'xpu-lnl'
165
165
  Requires-Dist: setuptools ; extra == 'xpu-lnl'
166
- Requires-Dist: bigdl-core-xe-23 ==2.6.0b20250101 ; extra == 'xpu-lnl'
167
- Requires-Dist: bigdl-core-xe-batch-23 ==2.6.0b20250101 ; extra == 'xpu-lnl'
168
- Requires-Dist: bigdl-core-xe-addons-23 ==2.6.0b20250101 ; extra == 'xpu-lnl'
166
+ Requires-Dist: bigdl-core-xe-23 ==2.6.0b20250102 ; extra == 'xpu-lnl'
167
+ Requires-Dist: bigdl-core-xe-batch-23 ==2.6.0b20250102 ; extra == 'xpu-lnl'
168
+ Requires-Dist: bigdl-core-xe-addons-23 ==2.6.0b20250102 ; extra == 'xpu-lnl'
169
169
  Requires-Dist: intel-openmp ; (platform_machine == "x86_64" or platform_machine == "AMD64") and extra == 'xpu-lnl'
170
170
  Requires-Dist: torch ==2.3.1+cxx11.abi ; (platform_system == "Linux") and extra == 'xpu-lnl'
171
171
  Requires-Dist: torchvision ==0.18.1+cxx11.abi ; (platform_system == "Linux") and extra == 'xpu-lnl'
@@ -2,7 +2,7 @@ ipex_llm/__init__.py,sha256=kSA9JjVLPlpN4YchWtfOybRh4XiP6d_VTYvzbAouPSU,2118
2
2
  ipex_llm/convert_model.py,sha256=jopEe6wu88ZPZfNFhgnQUu7807iciiWW_EMyTsVni5A,6816
3
3
  ipex_llm/llm_patching.py,sha256=becMYcawtR8lgl2yeRQhvvX6CLaq09WZGm9dDmLJWL0,3232
4
4
  ipex_llm/models.py,sha256=XROP6GLLrGQDlogGXpXZENbV143YNi6j0VPJeOdQ3Cg,1063
5
- ipex_llm/optimize.py,sha256=4VYz8vgxSnrqBJhz__eB7hCJSwrkNx_t_wvTLxPlPyI,12253
5
+ ipex_llm/optimize.py,sha256=ml-qEpzsrWGcd-Wia6IxPBC1PhqT5pi_lp3VTOH_ns0,12415
6
6
  ipex_llm/cli/llm-chat.ps1,sha256=6qrs-hGVAV8IKh7Jx8nq_XrnZcjd7qGU5wndArM7Yag,2769
7
7
  ipex_llm/cli/llm-cli.ps1,sha256=3qBtTLs_EjYDnM8YyCpJhzLnGCKTEGssu9UNqfkjVXs,3009
8
8
  ipex_llm/cli/prompts/chat-with-llm.txt,sha256=PpSyd4FQQd-T7ptfXL9jZp7dgstevu1fsxWFa0IQ5Oc,216
@@ -41,35 +41,35 @@ ipex_llm/langchain/llms/transformerspipelinellm.py,sha256=vm522YPPwWxxAPVvQBtxRf
41
41
  ipex_llm/langchain/vllm/__init__.py,sha256=T-EbRT6GJ_8RCu-iLmSzcftOimXSPQf2d5X72AUAy2Y,874
42
42
  ipex_llm/langchain/vllm/vllm.py,sha256=6dxc-ZISZQrJilEa_HA827l75Dv9rcHpY_G6FdJ8BVs,7793
43
43
  ipex_llm/libs/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
44
- ipex_llm/libs/bloom-api.dll,sha256=2IyCxPjsahzooryTFYEq-7R6nkrliaZm8h3-Y6n35C4,36352
45
- ipex_llm/libs/bloom.dll,sha256=E2AQed3DUyh7u36R8_vi0Om92aDzkO-MP_aK4vehb58,506880
46
- ipex_llm/libs/gptneox-api.dll,sha256=dCc4_7Xl-Isu_xlGe9iZQfbSabN2V8WOByWyDIZLp_4,24576
47
- ipex_llm/libs/gptneox.dll,sha256=q3M6r7NEOL_CRooJqq3AZ2icota9FwlWsr4nab6izd0,567296
48
- ipex_llm/libs/libbloom_avx.dll,sha256=3KbBuBjRFd1k9fSaXzkIbXNB4zPuyQVEZt-9bx_NJOc,535040
49
- ipex_llm/libs/libbloom_vnni.dll,sha256=x_rnRwlhmUs0d7E_mkn77VBFd-_MtiwfoWCb6TOyKCc,506880
50
- ipex_llm/libs/libgptneox_avx.dll,sha256=VG1twX6up-CqWVgH_bhYXXAg6MhL8oP8CZL6oiaM8pg,595456
51
- ipex_llm/libs/libgptneox_vnni.dll,sha256=r6Pdq6XjvouI0NispOQco-CM4xSewQQThOBRmijgwyQ,567808
52
- ipex_llm/libs/libllama_avx.dll,sha256=mUHLFVirXIR2viOeKkugcuHEBJpSLmsc3d6V9Y0zDz4,589824
53
- ipex_llm/libs/libllama_vnni.dll,sha256=FXWgzdFnPw4o_UAAktlEZNDev8CT7SCdwzs3zV8mlMY,561664
54
- ipex_llm/libs/libstarcoder_avx.dll,sha256=ErlXweTeHBexsQQLwoaiPspJjRYu7R22GTYLMidXQCw,626688
55
- ipex_llm/libs/libstarcoder_vnni.dll,sha256=l54B9SqZDjmLRQlbsZqXauYfGo58xEiOctAaidVqnJw,598528
56
- ipex_llm/libs/llama-api.dll,sha256=cw4PwQE9d5eYEYi-7vY0aG83a28pFPfzlprW_YXzBg8,25600
57
- ipex_llm/libs/llama.dll,sha256=AiSnrnpJRvZrB3HIMTlecXpCblR0o7_fnHMdKclvCsg,561152
58
- ipex_llm/libs/main-bloom.exe,sha256=HfhuIi1jJa1gdkROK3yt97k8Q5noiYdfwXXKeI1d4XE,103424
59
- ipex_llm/libs/main-gptneox.exe,sha256=RX9FcsWS2oB9EKKT3DXZwkPWwSP9TMjysVxpk4e_FLQ,98816
60
- ipex_llm/libs/main-llama.exe,sha256=QQRN1avtgAnmrNLfrmGWL5BtOk9Id90nL8Y04OJzmMo,99840
61
- ipex_llm/libs/main-starcoder.exe,sha256=xVhlJ1m-H5W4c9YrdveFXumyedg23m4g0xZYqhaEbn4,157696
62
- ipex_llm/libs/pipeline.dll,sha256=XO75bZ1_OgLYxdPVeW3BNcd5d26YcO6sxVq0HHCueVU,72704
63
- ipex_llm/libs/quantize-bloom.exe,sha256=lU6EmSLCnlYi8vRHsXbq8sEGR8q6dPpyKS4cBQXNLyA,126464
64
- ipex_llm/libs/quantize-bloom_vnni.exe,sha256=rK2NcJzLcqd_I8_D_oX-VHsw8VCcLNYrF937D2LLaDU,127488
65
- ipex_llm/libs/quantize-gptneox.exe,sha256=UMzUlM4x5WT-J58IMhwTy2Hu0ZsNzC5UjjwtVapHQAQ,104448
66
- ipex_llm/libs/quantize-gptneox_vnni.exe,sha256=WIAcb-27zWDc9jgaODw5mHa4YK7MU5QgGPVbdYcftPA,104960
67
- ipex_llm/libs/quantize-llama.exe,sha256=H_M_xrapNgrXuWfvSvvO5jPlGG45hn9nuceFj-hnhwA,109568
68
- ipex_llm/libs/quantize-llama_vnni.exe,sha256=8x-57l7IUfre4gHT-RM63-tsRayGCFHSIpAfYapP27E,110592
69
- ipex_llm/libs/quantize-starcoder.exe,sha256=U0-IhAZl2jRHLuf90PUSrw4d21CTNnWdoFtoqfMNdq4,127488
70
- ipex_llm/libs/quantize-starcoder_vnni.exe,sha256=zCC8EJs9KUmglJklqndbPNrGfo9IKzB5Pwqo7aBZSBU,128512
71
- ipex_llm/libs/starcoder-api.dll,sha256=aX-nqvXEzk6GxiDB4nC0Qy1KBftizAhV_h1u6YhVzrw,21504
72
- ipex_llm/libs/starcoder.dll,sha256=fSQg0niB930j4GxEpEAFFMPhPlxZ_DnbLUy4OrM08Q4,598016
44
+ ipex_llm/libs/bloom-api.dll,sha256=D4QwdMzMWEpvWCt6Qhf3TNufORG3i1vGsvrxKX0UeG0,36352
45
+ ipex_llm/libs/bloom.dll,sha256=HiUWN81LFRa4ylyTNLlGbIiufHTlHr60HSpuvlaNcoM,506880
46
+ ipex_llm/libs/gptneox-api.dll,sha256=U9_GilTEOKSvrLXbDd3-iAUjjJUtP0Ud-KOsi-51Xjo,24576
47
+ ipex_llm/libs/gptneox.dll,sha256=RL0yZh9g1sj98wA8Ekx7xr2S_PSad5Ll-uhdF05ev64,567296
48
+ ipex_llm/libs/libbloom_avx.dll,sha256=2ouFAhOxkBYHMerLJHkefV1XbgRHDFqN-KBneiH9g-I,535040
49
+ ipex_llm/libs/libbloom_vnni.dll,sha256=Ab9mZrSYvKE82yJ70VQDpl8qI_0GaIOOidEWLsJxbAo,506880
50
+ ipex_llm/libs/libgptneox_avx.dll,sha256=3dV2YKFjy_u5tYZayi2URq7hpmDIqGafalP7WQEK-Tw,595456
51
+ ipex_llm/libs/libgptneox_vnni.dll,sha256=-xfG7NBOcISipqhp8qOhtxwpCKdHtWxQai3FMoLQLhQ,567808
52
+ ipex_llm/libs/libllama_avx.dll,sha256=gKtT8c3qklcvigz1POAbRtBwAdvsPFyXS7yZVipOTBM,589824
53
+ ipex_llm/libs/libllama_vnni.dll,sha256=i9wyOe9xqShLO_DlYbo7cn1b47uMJ4pwPwVx88EyGzY,561664
54
+ ipex_llm/libs/libstarcoder_avx.dll,sha256=RQNEbNTNCSv3qgDcKJCx87GB3eBMysL-UEPQdYj3WTE,626688
55
+ ipex_llm/libs/libstarcoder_vnni.dll,sha256=7luUJRSUDj9AyEEemYTxJzs4bcx8MqM0sLukiZfnVxc,598528
56
+ ipex_llm/libs/llama-api.dll,sha256=CwGyWmsT8XHnRiuZ4OzBIIlHwRlkiu_IaovirJqwbRY,25600
57
+ ipex_llm/libs/llama.dll,sha256=LAyZioZRUs_CCv4vo5cqzUWpz1MQMc_c4n0pLaygFvA,561152
58
+ ipex_llm/libs/main-bloom.exe,sha256=qMBUiZQZcr2RLc0U2cnnGUig9S3F70bqHRjwUsKV_Ns,103424
59
+ ipex_llm/libs/main-gptneox.exe,sha256=wsi7_iK9sTA4PNBW5_y_Jpa1gqcE1eK97DFv_lB8RgY,98816
60
+ ipex_llm/libs/main-llama.exe,sha256=BSBjl3RX7QzMq7cy5BVrXEzGcXiauSsKZ9VL_y8w-qI,99840
61
+ ipex_llm/libs/main-starcoder.exe,sha256=qzk78keT_JNdVi7cdf-ycpX2QD6N0ZkO0Z9GDUqZgqc,157696
62
+ ipex_llm/libs/pipeline.dll,sha256=C5E1tWcpdFJZVVjr7CfbcJDULeQom9hIjX6Zgb_rbso,72704
63
+ ipex_llm/libs/quantize-bloom.exe,sha256=35E6hdsDi_njtgEcAaSE9WwelkHd6yT9IV8fWROx-ts,126464
64
+ ipex_llm/libs/quantize-bloom_vnni.exe,sha256=_ECFuoYcxXPDob77sTS6vZvjufPg2su3BhcUZtP__AU,127488
65
+ ipex_llm/libs/quantize-gptneox.exe,sha256=dJsoz68WhbSOF0I5LD3D5AIypsAPJC-aJAqIG-XHjrA,104448
66
+ ipex_llm/libs/quantize-gptneox_vnni.exe,sha256=4rXbo9XEh8Ka_bYyXCgIW68qLSHGkYwkrwFhiNkLeXM,104960
67
+ ipex_llm/libs/quantize-llama.exe,sha256=s1jEeBhAy4DBAUPQ7DpZuqavX5Thy1yff-U9ENcYZPA,109568
68
+ ipex_llm/libs/quantize-llama_vnni.exe,sha256=a4C5W-pxItLAaiVYGrHXkHsEH_nB0sVmnd5fyQv19ec,110592
69
+ ipex_llm/libs/quantize-starcoder.exe,sha256=Uhz8MG9ElW__KmkjR89QgHhqJvESste0iISLw0pvgXw,127488
70
+ ipex_llm/libs/quantize-starcoder_vnni.exe,sha256=lv3V22NpQs_H_6HgORxigYv47OzEECECrz4IzYmxcjs,128512
71
+ ipex_llm/libs/starcoder-api.dll,sha256=-6pbKFCVrDXJ5N-KPAIN4tvmRlphbKxUOoYMR_1iFYc,21504
72
+ ipex_llm/libs/starcoder.dll,sha256=e40-bxKji-RYl12eW2VwJB-m70A6bJQyR75TsBZGmT8,598016
73
73
  ipex_llm/llamaindex/__init__.py,sha256=T-EbRT6GJ_8RCu-iLmSzcftOimXSPQf2d5X72AUAy2Y,874
74
74
  ipex_llm/llamaindex/llms/__init__.py,sha256=KP1lEdGqDuxPoxL1ZSH25Pm2kKMPJBWUTLR0ckSLMIU,1139
75
75
  ipex_llm/llamaindex/llms/bigdlllm.py,sha256=FQBzq1KOjfc6uofTXAha3O7TqpJkNfOFepXQmOVlbnI,26314
@@ -87,17 +87,17 @@ ipex_llm/serving/fastchat/tgi_api_protocol.py,sha256=brT3k3-V0NJrU4fRqUwWjC0O3iO
87
87
  ipex_llm/serving/fastchat/tgi_api_server.py,sha256=agNTAEiZPSuj3dEdIdYKwkoY0cXOUDX06DiM9VP2knQ,24418
88
88
  ipex_llm/serving/fastchat/vllm_worker.py,sha256=ZLz2Q9GxJO6r_LOiP6epgCRjBGk-K4EB1SNEWSJp5DA,11091
89
89
  ipex_llm/transformers/__init__.py,sha256=l4KkMkLe-pRC7b_kj6LCfeifgE-Uo33_Av_FwN9HnFA,1074
90
- ipex_llm/transformers/convert.py,sha256=B4oI836JHEqg_qT3dcl2RaJdQs7rOyigMwj-racxhkc,106379
90
+ ipex_llm/transformers/convert.py,sha256=V4KDyi-2FVWSYZAxe4PlAxGGZbauSbOCuqq56ME9yyQ,106461
91
91
  ipex_llm/transformers/convert_ipex.py,sha256=iKXo0n8fVFTOA2fNYYrByMFK0dovL-kLd2sVDk88AlQ,14334
92
92
  ipex_llm/transformers/embedding.py,sha256=bdgk59DvD4ZZyxRzewXOR7g56nThgO6uhIwk8QL7f-s,9299
93
93
  ipex_llm/transformers/kv.py,sha256=k4TU18LlA-Sbq9WNNQnfuzu3RSFBwFhmaV3BcGN5bAo,19191
94
94
  ipex_llm/transformers/lisa.py,sha256=F5WxbtXQ7RdKulj83h_2DnEIgKiKGZf7zvOmg6QBl2s,3289
95
95
  ipex_llm/transformers/loader.py,sha256=cOgX93xOC-4dt01GTJ5wyd7PjZ8S43r4mctkR2YxVuw,6893
96
96
  ipex_llm/transformers/lookup.py,sha256=c4ETIha6ZLbWvhcclSKRDdi5Ipuet4mfUnOkBa0E8kk,19607
97
- ipex_llm/transformers/low_bit_linear.py,sha256=TJfEqNp6zB6YnNEUASga302WQXzNdrmU_miGCM0u-F8,41504
98
- ipex_llm/transformers/model.py,sha256=N-g9IQVvBiBhbL5Fo5DTWbmHPZY52sjfFuq0B8Qu6h4,40952
97
+ ipex_llm/transformers/low_bit_linear.py,sha256=dyyYyCqw0GK8hzaUGanrg-uIhU1HTLEEbvbxXMlm-80,41668
98
+ ipex_llm/transformers/model.py,sha256=KcRjkauGg48BYrUBoUZaVMpg7Piuz5JrfIpVZd3EIjs,41105
99
99
  ipex_llm/transformers/modelling_bigdl.py,sha256=7JpNVMuyq_OmtNUaMFMXdxPWZp2q0QHC02QeA-VTPOw,6709
100
- ipex_llm/transformers/npu_model.py,sha256=wPFEB4W1rYbpO_XqepREMef69dzo-zkFoqFRb_mqneA,37862
100
+ ipex_llm/transformers/npu_model.py,sha256=a1mkyc6EqD7AJhqbYzokGhFubNpt5trIMuZT_dQKlTk,37861
101
101
  ipex_llm/transformers/patches.py,sha256=halPWm__ORh2fRFSIFPiCNg3LQBfrRkTPtmtRpBJCZQ,1286
102
102
  ipex_llm/transformers/pipeline_parallel.py,sha256=uNZpOXljNmdoEYnP8U-VFiN4dRZb2piQbIf2bG9LQnE,49051
103
103
  ipex_llm/transformers/qlora.py,sha256=jtPGsvWFjbTUGzDBCdfftnCis_0nJQNRpACSwXUbbGU,14943
@@ -136,10 +136,10 @@ ipex_llm/transformers/gguf/models/model_implement/yuan2/configuration_yuan.py,sh
136
136
  ipex_llm/transformers/gguf/models/model_implement/yuan2/yuan_hf_model.py,sha256=_AOGMV65XHxgTxIib7lgs49InopcecTzRwgtYR8NTUg,51084
137
137
  ipex_llm/transformers/models/__init__.py,sha256=tp2DcVkKg1-QvdYk7DY7rZvQWCDQ4ZjU8NAQ7Fclrpg,584
138
138
  ipex_llm/transformers/models/aquila.py,sha256=VZb5Drpo_fTxwcExZ397LygnsNPX2sVbie9_JeFudZI,5252
139
- ipex_llm/transformers/models/baichuan.py,sha256=0dkTSPqGPgSnwa8zSNKroam0pvSyQLSRpbb43-OgIlc,19815
139
+ ipex_llm/transformers/models/baichuan.py,sha256=oJCAEENSG8oQhJ-QPN2SiapARjAGdOM6nEbyCcYOMCo,19334
140
140
  ipex_llm/transformers/models/bert.py,sha256=bJNic2pt1kph0kBwdK5MRGyWupFfx2Ts0V3D1L-5kWo,6085
141
141
  ipex_llm/transformers/models/bloom.py,sha256=PxfzyYT-nFn3K5rZhTQjmcEjUUzAhUFzxIN4kzRlCuc,8103
142
- ipex_llm/transformers/models/chatglm.py,sha256=xCEhYzaXyTDBXqz111Uw4IW5x4TLbtBbYfmBT623gRI,12669
142
+ ipex_llm/transformers/models/chatglm.py,sha256=UHai1t2AUtGmF765_eHF8LUMVQzp_oCBx8TJB21WrHk,12597
143
143
  ipex_llm/transformers/models/chatglm2.py,sha256=kfJThuKYb3unAB1XCzfop1iDW1gOkyFOjSr-lEjUdS0,24781
144
144
  ipex_llm/transformers/models/chatglm4.py,sha256=AAhAFFDDas5DBQPfh2Mwl7a2v7taKf6xphoeeNNFaBI,16593
145
145
  ipex_llm/transformers/models/chatglm4v.py,sha256=YRfuf9g1E0MQ_7wbHAOMvadFnO-j3LqI_k1SaRkDs0M,14055
@@ -167,8 +167,8 @@ ipex_llm/transformers/models/mpt.py,sha256=z02NwHogJZVh-Mk4sYoIzR90SFIKhoNN_-ifs
167
167
  ipex_llm/transformers/models/phi.py,sha256=E6qz4EEuHIVGvaPo-wtLC5lz3iyMqTbAE_cRlcjQRKI,6670
168
168
  ipex_llm/transformers/models/phi3.py,sha256=jkiadJ85ToHpymY5GOM6orWlnx6LKN8_-v1MUcfGWPg,15159
169
169
  ipex_llm/transformers/models/phixtral.py,sha256=MDTMghcu7qAmZmRcUGqXXDXhSU3y_N59HRIXmlcjp5g,4890
170
- ipex_llm/transformers/models/qwen.py,sha256=iP4wcjdIZ0CvqbM8muM96y-rghpEnZSR3TgjiOBIq5k,20475
171
- ipex_llm/transformers/models/qwen2.py,sha256=k_FcPfPAXIotRE03ULFBYA5hWE0M5CfktAhCWbEy8Yw,26369
170
+ ipex_llm/transformers/models/qwen.py,sha256=XIJ_bLzediBURWU-OOS3H6WBIGXQue6jDdUHJsAabwY,19391
171
+ ipex_llm/transformers/models/qwen2.py,sha256=b49HO4GSudwGJ3n6uHVno1oo3DgRt3jOjtQnLOB3cdY,25530
172
172
  ipex_llm/transformers/models/qwen2_moe.py,sha256=EA_OYxYAEgrvi7VpDW192AJXG9Fwe2aBtOAZPkOAJk4,19350
173
173
  ipex_llm/transformers/models/qwen2_vl.py,sha256=jIm4yZSd751BkRqgj3wR1QBkDIh-TMCLAMM8SZ8n6Qo,13419
174
174
  ipex_llm/transformers/models/qwen_vl.py,sha256=j7Nzzz2Qvynu9yrCXmoEfERjw43hXof5TbXIs7Ms-oY,17105
@@ -185,7 +185,7 @@ ipex_llm/transformers/npu_models/baichuan_mp.py,sha256=tHhO-0v5z6IhxsfzAPYWXVbLr
185
185
  ipex_llm/transformers/npu_models/chatglm.py,sha256=YzpGLZ7ORt6qkwW9mCwZ_xhOAI8uHSDHJrmqWgNM234,10511
186
186
  ipex_llm/transformers/npu_models/chatglm4.py,sha256=J4523DzhIzZxIvlf1V9qU4auzEGKvC80YqyxuCJygjw,9795
187
187
  ipex_llm/transformers/npu_models/common.py,sha256=tTUJL7IxVrJSnXle6nla35wTUrBf2sOEt7Ya1qyMezY,4853
188
- ipex_llm/transformers/npu_models/convert.py,sha256=cX10r7Q0a2qFcEIhRcvmw1eSmUtmeOcoNn8kVqv3Su8,24224
188
+ ipex_llm/transformers/npu_models/convert.py,sha256=FILSGnoltcR9FMrCkw0eOKh6p3sbBI5i0Ms8AsJc04E,25342
189
189
  ipex_llm/transformers/npu_models/convert_mp.py,sha256=t7160V4MmYpnex2NfuLTcqoc1meGEXdYi4AAPotfbzk,24518
190
190
  ipex_llm/transformers/npu_models/glm_edge.py,sha256=VsJex-6530h4ZQk35TxRe1MnttAHT41omE8LV47LgBE,6723
191
191
  ipex_llm/transformers/npu_models/kv.py,sha256=2OSFO9Z6e4nGdVxXEM-Bq2qa_npYYbGmQt3lcCZxTlU,9201
@@ -198,7 +198,7 @@ ipex_llm/transformers/npu_models/minicpm_mp.py,sha256=0iCRWN9UIUQp5tSKyu-orpGCOx
198
198
  ipex_llm/transformers/npu_models/minicpmv_mp.py,sha256=m11WT6s_H5wkFtlz7aHMOL9b_CoL_G5MhoL5te4la_Q,20147
199
199
  ipex_llm/transformers/npu_models/mistral.py,sha256=iRdmIQI_bbbZxRCYRvnV4rWjX2t-6vkHNl1ICAsLoy4,10759
200
200
  ipex_llm/transformers/npu_models/mp_models_base.py,sha256=rY-5tq8DfxRsiaIITl0PQOTiPLJnUm_5L-oWzbK12N8,28429
201
- ipex_llm/transformers/npu_models/npu_llm_cpp.py,sha256=SL1p5UBjheufhum-GktNQQ4iXjRlZ9Hgatzb3NFr6Bs,3900
201
+ ipex_llm/transformers/npu_models/npu_llm_cpp.py,sha256=B40sBujvy31ETFBgcYAf4CN23UuTCBEJVaxjIMaoEHk,4268
202
202
  ipex_llm/transformers/npu_models/paraformer_mp.py,sha256=lGEjmKHW_Pk3BE3nqa1ZVgJ3P5p4lNp7p6wMV7KrtCU,37871
203
203
  ipex_llm/transformers/npu_models/phi3.py,sha256=R-EuqHsTrPTX33HtCGAMFlRdXB_j5mH_7FDnj62JtNM,6555
204
204
  ipex_llm/transformers/npu_models/phi3_v.py,sha256=EMZuTPkGfuDVp9c5BU1HyzXHWKswHRQ8bvQjzocIyHA,7737
@@ -213,7 +213,7 @@ ipex_llm/transformers/npu_pipeline_model/convert_pipeline.py,sha256=953Gua2tFKLI
213
213
  ipex_llm/transformers/npu_pipeline_model/llama.py,sha256=MnvHRytLt3oy5jIPUBe8AeEJ6PtPWLbhQ5a9WqjZ1TQ,19905
214
214
  ipex_llm/transformers/npu_pipeline_model/minicpm.py,sha256=MDMesYlVbECKdK0xxkt1LwHgpkJOO7ZwBExYAwMGQa0,20637
215
215
  ipex_llm/transformers/npu_pipeline_model/pipeline_cpp.py,sha256=JNmodAMg_NQvDILug3E_fGXEh6cd3wsj4bvAzcd-vaU,2749
216
- ipex_llm/transformers/npu_pipeline_model/qwen.py,sha256=iVUNrcRLXE5eQGZIetgkLTINo8RW15RHM7SUetvJNRQ,14748
216
+ ipex_llm/transformers/npu_pipeline_model/qwen.py,sha256=3paMXr1viuztybhmVLqQ9XvM3EZbxncDuNSNwLF8OI0,14849
217
217
  ipex_llm/utils/__init__.py,sha256=NdB_InYE65dNgW4ruEPUOlgKEO2ELcsJoqkP7O5kpog,1391
218
218
  ipex_llm/utils/benchmark_util_4_29.py,sha256=OU1W1quiaiJGsg1pd3HM9O6PmVSaPA0HHE7R8hNTfmQ,258653
219
219
  ipex_llm/utils/benchmark_util_4_42.py,sha256=HEiClCgKDp_T64HH8ulSTly8dvt6UwPDYZfrPVYvXcc,225383
@@ -244,11 +244,11 @@ ipex_llm/vllm/xpu/engine/__init__.py,sha256=pY_CpyuZd72fr6s32ejeKHKFW0K4vUU2rzZj
244
244
  ipex_llm/vllm/xpu/engine/engine.py,sha256=k4-D27WS_Gk3mA--w3HWAjPjb4Aiu043MVPi0ZoAUBc,5984
245
245
  ipex_llm/vllm/xpu/entrypoints/openai/api_server.py,sha256=GshTZFB8e4PWvqckfbmTOU6b0oLkNn7A-vzLuG9--j8,21544
246
246
  ipex_llm/vllm/xpu/entrypoints/openai/cli_args.py,sha256=2rENA2ucynMaIjiZBEh2ez1o5vR32GaP514t39CD7KM,8676
247
- ipex_llm-2.2.0b20250101.data/scripts/ipex-llm-init.bat,sha256=HPtCYuDYwEatq7dAwOvdfVcHYCpAVdbj75K1qh0vQek,2578
248
- ipex_llm-2.2.0b20250101.data/scripts/llm-chat.ps1,sha256=6qrs-hGVAV8IKh7Jx8nq_XrnZcjd7qGU5wndArM7Yag,2769
249
- ipex_llm-2.2.0b20250101.data/scripts/llm-cli.ps1,sha256=3qBtTLs_EjYDnM8YyCpJhzLnGCKTEGssu9UNqfkjVXs,3009
250
- ipex_llm-2.2.0b20250101.dist-info/METADATA,sha256=J_SK__bQX0TB_vOUTfpk9DV4DhTzVc-LzVzN_LmERV8,11374
251
- ipex_llm-2.2.0b20250101.dist-info/WHEEL,sha256=6iYPr8vTHsyDK75jr9X0V3I9wPSVmtwr_8fdATBciGk,98
252
- ipex_llm-2.2.0b20250101.dist-info/entry_points.txt,sha256=TiUyBB2MRmfF3ko-pyAEzqeBCRnyhu27bNOAsWPp3e8,61
253
- ipex_llm-2.2.0b20250101.dist-info/top_level.txt,sha256=CGCMHM-SyqUabU4h8RqJ2KTYckQUO3LvIWwmUQ6Qbzw,9
254
- ipex_llm-2.2.0b20250101.dist-info/RECORD,,
247
+ ipex_llm-2.2.0b20250102.data/scripts/ipex-llm-init.bat,sha256=HPtCYuDYwEatq7dAwOvdfVcHYCpAVdbj75K1qh0vQek,2578
248
+ ipex_llm-2.2.0b20250102.data/scripts/llm-chat.ps1,sha256=6qrs-hGVAV8IKh7Jx8nq_XrnZcjd7qGU5wndArM7Yag,2769
249
+ ipex_llm-2.2.0b20250102.data/scripts/llm-cli.ps1,sha256=3qBtTLs_EjYDnM8YyCpJhzLnGCKTEGssu9UNqfkjVXs,3009
250
+ ipex_llm-2.2.0b20250102.dist-info/METADATA,sha256=fF_EkmZQW5wODRZlaJEQgQnS6Xieiem4h1vZcvsRxRE,11374
251
+ ipex_llm-2.2.0b20250102.dist-info/WHEEL,sha256=6iYPr8vTHsyDK75jr9X0V3I9wPSVmtwr_8fdATBciGk,98
252
+ ipex_llm-2.2.0b20250102.dist-info/entry_points.txt,sha256=TiUyBB2MRmfF3ko-pyAEzqeBCRnyhu27bNOAsWPp3e8,61
253
+ ipex_llm-2.2.0b20250102.dist-info/top_level.txt,sha256=CGCMHM-SyqUabU4h8RqJ2KTYckQUO3LvIWwmUQ6Qbzw,9
254
+ ipex_llm-2.2.0b20250102.dist-info/RECORD,,