ipex-llm 2.2.0b20250105.post0__py3-none-win_amd64.whl → 2.2.0b20250106.post1__py3-none-win_amd64.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (45) hide show
  1. ipex_llm/libs/bloom-api.dll +0 -0
  2. ipex_llm/libs/bloom.dll +0 -0
  3. ipex_llm/libs/gptneox-api.dll +0 -0
  4. ipex_llm/libs/gptneox.dll +0 -0
  5. ipex_llm/libs/libbloom_avx.dll +0 -0
  6. ipex_llm/libs/libbloom_vnni.dll +0 -0
  7. ipex_llm/libs/libgptneox_avx.dll +0 -0
  8. ipex_llm/libs/libgptneox_vnni.dll +0 -0
  9. ipex_llm/libs/libllama_avx.dll +0 -0
  10. ipex_llm/libs/libllama_vnni.dll +0 -0
  11. ipex_llm/libs/libstarcoder_avx.dll +0 -0
  12. ipex_llm/libs/libstarcoder_vnni.dll +0 -0
  13. ipex_llm/libs/llama-api.dll +0 -0
  14. ipex_llm/libs/llama.dll +0 -0
  15. ipex_llm/libs/main-bloom.exe +0 -0
  16. ipex_llm/libs/main-gptneox.exe +0 -0
  17. ipex_llm/libs/main-llama.exe +0 -0
  18. ipex_llm/libs/main-starcoder.exe +0 -0
  19. ipex_llm/libs/pipeline.dll +0 -0
  20. ipex_llm/libs/quantize-bloom.exe +0 -0
  21. ipex_llm/libs/quantize-bloom_vnni.exe +0 -0
  22. ipex_llm/libs/quantize-gptneox.exe +0 -0
  23. ipex_llm/libs/quantize-gptneox_vnni.exe +0 -0
  24. ipex_llm/libs/quantize-llama.exe +0 -0
  25. ipex_llm/libs/quantize-llama_vnni.exe +0 -0
  26. ipex_llm/libs/quantize-starcoder.exe +0 -0
  27. ipex_llm/libs/quantize-starcoder_vnni.exe +0 -0
  28. ipex_llm/libs/starcoder-api.dll +0 -0
  29. ipex_llm/libs/starcoder.dll +0 -0
  30. ipex_llm/transformers/convert.py +2 -26
  31. ipex_llm/transformers/loader.py +1 -1
  32. ipex_llm/transformers/low_bit_linear.py +9 -23
  33. ipex_llm/transformers/model.py +0 -7
  34. ipex_llm/transformers/models/utils.py +2 -15
  35. ipex_llm/transformers/speculative.py +2 -14
  36. ipex_llm/transformers/utils.py +2 -0
  37. {ipex_llm-2.2.0b20250105.post0.dist-info → ipex_llm-2.2.0b20250106.post1.dist-info}/METADATA +20 -20
  38. {ipex_llm-2.2.0b20250105.post0.dist-info → ipex_llm-2.2.0b20250106.post1.dist-info}/RECORD +44 -45
  39. ipex_llm/transformers/models/gptj.py +0 -441
  40. {ipex_llm-2.2.0b20250105.post0.data → ipex_llm-2.2.0b20250106.post1.data}/scripts/ipex-llm-init.bat +0 -0
  41. {ipex_llm-2.2.0b20250105.post0.data → ipex_llm-2.2.0b20250106.post1.data}/scripts/llm-chat.ps1 +0 -0
  42. {ipex_llm-2.2.0b20250105.post0.data → ipex_llm-2.2.0b20250106.post1.data}/scripts/llm-cli.ps1 +0 -0
  43. {ipex_llm-2.2.0b20250105.post0.dist-info → ipex_llm-2.2.0b20250106.post1.dist-info}/WHEEL +0 -0
  44. {ipex_llm-2.2.0b20250105.post0.dist-info → ipex_llm-2.2.0b20250106.post1.dist-info}/entry_points.txt +0 -0
  45. {ipex_llm-2.2.0b20250105.post0.dist-info → ipex_llm-2.2.0b20250106.post1.dist-info}/top_level.txt +0 -0
Binary file
ipex_llm/libs/bloom.dll CHANGED
Binary file
Binary file
ipex_llm/libs/gptneox.dll CHANGED
Binary file
Binary file
Binary file
Binary file
Binary file
Binary file
Binary file
Binary file
Binary file
Binary file
ipex_llm/libs/llama.dll CHANGED
Binary file
Binary file
Binary file
Binary file
Binary file
Binary file
Binary file
Binary file
Binary file
Binary file
Binary file
Binary file
Binary file
Binary file
Binary file
Binary file
@@ -680,18 +680,9 @@ def _replace_with_low_bit_linear(model, qtype, modules_to_not_convert=None,
680
680
  optimize_lm_head=optimize_lm_head
681
681
  )
682
682
  device = module.weight.data.device
683
- from ipex_llm.transformers.utils import get_ipex_version
684
- if get_ipex_version() < "2.1.10+xpu":
685
- new_linear._parameters['weight'] = nn.Parameter(module.weight)
686
- else:
687
- # only from 2.1, ipex provides matmul_bias_out
688
- # so we need to transpose weight
689
- new_weight = module.weight.transpose(0, 1).contiguous()
690
- new_linear._parameters['weight'] = nn.Parameter(new_weight)
691
- new_linear.weight_type = 2
683
+ new_linear._parameters['weight'] = nn.Parameter(module.weight)
692
684
  if module.bias is not None:
693
- new_linear._parameters['bias'] = nn.Parameter(module.bias.data)\
694
- .to(device)
685
+ new_linear._parameters['bias'] = nn.Parameter(module.bias.data).to(device)
695
686
  elif qtype == ggml_tensor_qtype["bf16"]:
696
687
  module.to(torch.bfloat16)
697
688
  if _USE_VLLM:
@@ -1452,21 +1443,6 @@ def _optimize_post(model):
1452
1443
  module.MultiheadAttention,
1453
1444
  mpt_multihead_attention_forward
1454
1445
  )
1455
- elif "gptj" in model.config.model_type:
1456
- # dolly-v1-6b
1457
- modeling_module_name = model.__class__.__module__
1458
- module = importlib.import_module(modeling_module_name)
1459
- from ipex_llm.transformers.models.gptj import gptj_attention_forward, gptj_model_forward,\
1460
- gptj_block_forward
1461
- convert_forward(model,
1462
- module.GPTJAttention,
1463
- gptj_attention_forward)
1464
- convert_forward(model,
1465
- module.GPTJModel,
1466
- gptj_model_forward)
1467
- convert_forward(model,
1468
- module.GPTJBlock,
1469
- gptj_block_forward)
1470
1446
  elif "bloom" in model.config.model_type:
1471
1447
  modeling_module_name = model.__class__.__module__
1472
1448
  module = importlib.import_module(modeling_module_name)
@@ -22,7 +22,7 @@ import time
22
22
  from datetime import date
23
23
  import argparse
24
24
  from ipex_llm.utils.common import invalidInputError
25
- from transformers import AutoTokenizer, GPTJForCausalLM, LlamaTokenizer
25
+ from transformers import AutoTokenizer, LlamaTokenizer
26
26
 
27
27
  LLAMA_IDS = ['llama', 'vicuna', 'merged-baize']
28
28
 
@@ -286,7 +286,7 @@ def use_batch_forward(x: torch.Tensor, qtype: int, output_len: int):
286
286
  or (
287
287
  qtype in [SYM_INT8, FP4, FP6, Q4_K, Q6_K]
288
288
  and batch_size <= 48
289
- and device_name in ["arc", "pvc", "mtl", "lnl", "arl"]
289
+ and device_name in ["arc", "pvc", "mtl", "arl"]
290
290
  and x.shape[1] % 256 == 0
291
291
  and output_len % 32 == 0
292
292
  )
@@ -759,9 +759,9 @@ class FP16Linear(nn.Linear):
759
759
  self.weight_length = self.out_len * self.in_len
760
760
  self.qtype = ggml_tensor_qtype["fp16"]
761
761
  self.mp_group = mp_group
762
- # weigh_type = 1 means original weight
763
- # weigh_type = 2 means weight has been transposed
764
- # weigh_type = 3 means weight has been transposed by esimd method
762
+ # weight_type = 1 means original weight
763
+ # weight_type = 2 means weight has been transposed
764
+ # weight_type = 3 means weight has been transposed by esimd method
765
765
  self.weight_type = 1
766
766
  self.optimize_lm_head = optimize_lm_head
767
767
  self.disable_fp16_opt = False
@@ -775,28 +775,14 @@ class FP16Linear(nn.Linear):
775
775
 
776
776
  x = x.to(torch.float16)
777
777
  if self.bias is not None and self.bias.dtype != x.dtype:
778
- self.bias.data = self.bias.data.to(x.dtype)
778
+ self.bias.data = self.bias.data.to(x.dtype)
779
779
  if self.weight is not None and self.weight.dtype != x.dtype:
780
780
  self.weight.data = self.weight.data.to(x.dtype)
781
781
 
782
782
  if not self.use_esimd_kernel(x):
783
- if (
784
- get_ipex_version() < "2.1.10+xpu"
785
- or get_xpu_device_name(x.device) not in ["arc", "pvc"]
786
- or self.disable_fp16_opt
787
- ):
788
- if self.weight_type == 2:
789
- self.weight = torch.nn.Parameter(self.weight.transpose(0, 1).contiguous(),
790
- requires_grad=False)
791
- self.weight_type = 1
792
- result = F.linear(x, self.weight, self.bias)
793
- else:
794
- if self.weight_type == 1:
795
- self.weight = torch.nn.Parameter(self.weight.transpose(0, 1).contiguous(),
796
- requires_grad=False)
797
- self.weight_type = 2
798
- result = torch.ops.torch_ipex.matmul_bias_out(x.contiguous(),
799
- self.weight, self.bias)
783
+ invalidInputError(self.weight_type == 1, "weight_type should be 1")
784
+ result = F.linear(x, self.weight, self.bias)
785
+
800
786
  if self.mp_group is not None:
801
787
  if get_use_vllm():
802
788
  result = self.mp_group.all_reduce(result)
@@ -852,7 +838,7 @@ class FP16Linear(nn.Linear):
852
838
  if self.disable_fp16_opt:
853
839
  return False
854
840
  # esimd kernel can only be used for Arc and Flex
855
- if gpu_type not in ["arc", "flex"]:
841
+ if gpu_type not in ["arc"]:
856
842
  return False
857
843
  # now esimd kernel can only be used for specific cases (llama2-7b shape)
858
844
  if self.in_len == 11008 and self.out_features == 4096:
@@ -103,12 +103,6 @@ def save_low_bit(self, *args, **kwargs):
103
103
  self.to(origin_device)
104
104
 
105
105
 
106
- def _load_pre():
107
- from transformers import GPTJModel
108
- from ipex_llm.transformers.models.gptj import gptj_model_new_init
109
- GPTJModel.__init__ = gptj_model_new_init
110
-
111
-
112
106
  class _BaseAutoModelClass:
113
107
  HF_MODEL = None
114
108
 
@@ -495,7 +489,6 @@ class _BaseAutoModelClass:
495
489
  else:
496
490
  if quant_config is not None:
497
491
  kwargs["quantization_config"] = quant_config
498
- _load_pre()
499
492
  try:
500
493
  # To handle the input CUDA setting (such as 'device_map={"":0}'), ignore it
501
494
  kwargs.pop('device_map', None)
@@ -168,7 +168,7 @@ def should_use_fuse_rope(hidden_states, position_ids, training):
168
168
 
169
169
  def apply_rotary_pos_emb(q, k, cos, sin, position_ids, model_family):
170
170
  if model_family in ["llama", "baichuan", "internlm", "aquila", "gpt_neox", "mistral",
171
- "mixtral", "qwen2", "yuan", "stablelm", "qwen2_moe"]:
171
+ "qwen2", "yuan", "stablelm", "qwen2_moe"]:
172
172
  # The first two dimensions of cos and sin are always 1, so we can `squeeze` them.
173
173
  cos = cos.squeeze(1).squeeze(0) # [seq_len, dim]
174
174
  sin = sin.squeeze(1).squeeze(0) # [seq_len, dim]
@@ -183,7 +183,7 @@ def apply_rotary_pos_emb(q, k, cos, sin, position_ids, model_family):
183
183
  q_embed = (q * cos) + (rotate_half(q) * sin)
184
184
  k_embed = (k * cos) + (rotate_half(k) * sin)
185
185
  return q_embed, k_embed
186
- elif model_family in ["gptj", "chatglm"]:
186
+ elif model_family in ["chatglm"]:
187
187
  q_embed = (q * cos) + (rotate_every_two(q) * sin)
188
188
  k_embed = (k * cos) + (rotate_every_two(k) * sin)
189
189
  return q_embed, k_embed
@@ -192,19 +192,6 @@ def apply_rotary_pos_emb(q, k, cos, sin, position_ids, model_family):
192
192
  f"{model_family} is not supported.")
193
193
 
194
194
 
195
- def apply_ipex_rotate_every_two(q, k, cos, sin):
196
- # ipex's apply_rotary_embedding_two_qk can change the origin storage,
197
- # so q/k will get the result directly.
198
- from ipex_llm.transformers.utils import get_ipex_version
199
- if get_ipex_version() >= "2.1.10+xpu":
200
- torch.ops.torch_ipex.apply_rotary_embedding_two_qk(
201
- q, k, sin, cos, q, k
202
- )
203
- else:
204
- torch.ops.torch_ipex.apply_rotary_embedding(q, sin, cos, q)
205
- torch.ops.torch_ipex.apply_rotary_embedding(k, sin, cos, k)
206
-
207
-
208
195
  def is_enough_kv_cache_room_4_36(past_key_value, idx, seq_len=1):
209
196
  # to determinate if is enough kv cache room in transformers==4.36
210
197
  # seq_len for current seq len
@@ -432,8 +432,7 @@ def _check_and_extend_kv_cache(past_key_values, max_step_draft, kv_alloc_block_l
432
432
  from ipex_llm.transformers.models.utils import is_enough_kv_cache_room_4_31, \
433
433
  extend_kv_cache
434
434
  enough_kv_room = True
435
- if model_type not in ["chatglm", "qwen", "baichuan", "llama", "mistral",
436
- "gptj", "opt"]:
435
+ if model_type not in ["chatglm", "qwen", "baichuan", "llama", "mistral", "opt"]:
437
436
  return past_key_values, False
438
437
  cache_k = past_key_values[0][0]
439
438
  if model_type == "chatglm":
@@ -527,7 +526,7 @@ def _crop_past_key_values(self, past_key_values, new_cache_size, _enable_ipex=Fa
527
526
  v[:-(new_cache_size), :, :, :])
528
527
  for k, v in past_key_values
529
528
  ]
530
- elif self.config.model_type in ["baichuan", "gptj"]:
529
+ elif self.config.model_type in ["baichuan"]:
531
530
  past_key_values = [
532
531
  (k[:, :, :-(new_cache_size), :],
533
532
  v[:, :, :-(new_cache_size), :])
@@ -796,13 +795,6 @@ def _non_cpu_ipex_verify(self, verify_input_ids, past_key_values, cur_attention_
796
795
  device=verify_input_ids.device)
797
796
  position_ids = position_ids.unsqueeze(0).repeat(1, 1) + past_key_value_len
798
797
  forward_args["position_ids"] = position_ids
799
- elif self.config.model_type == "gptj":
800
- past_length = past_key_values[0][0].size(2)
801
- input_len = verify_input_ids.shape[1]
802
- position_ids = torch.arange(past_length, input_len + past_length,
803
- dtype=torch.long, device=verify_input_ids.device)
804
- position_ids = position_ids.unsqueeze(0).view(-1, input_len)
805
- forward_args["position_ids"] = position_ids
806
798
 
807
799
  return self(**forward_args)
808
800
 
@@ -971,10 +963,6 @@ def speculative_generate(self,
971
963
  past_key_value_len = past_key_values[0][0].shape[0]
972
964
  position_ids = torch.Tensor([[past_key_value_len + step_draft]]).long()
973
965
  forward_args["position_ids"] = position_ids
974
- elif self.config.model_type == "gptj":
975
- past_length = draft_past_key_values[0][0].size(2)
976
- position_ids = torch.Tensor([[past_length]]).long().to(self.device)
977
- forward_args["position_ids"] = position_ids
978
966
 
979
967
  if _enable_ipex:
980
968
  if any(keyword in self.config.model_type
@@ -172,6 +172,8 @@ def get_xpu_device_name(device: torch.device):
172
172
  if device.type != "xpu":
173
173
  return device.type
174
174
  else:
175
+ # possiable device name:
176
+ # ["arc", "pvc", "mtl", "lnl", "bmg", "arl", "legacy", "unknown"]
175
177
  import xe_linear
176
178
  return xe_linear.get_xpu_device_name(device)
177
179
 
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.1
2
2
  Name: ipex-llm
3
- Version: 2.2.0b20250105.post0
3
+ Version: 2.2.0b20250106.post1
4
4
  Summary: Large Language Model Develop Toolkit
5
5
  Home-page: https://github.com/intel-analytics/ipex-llm
6
6
  Author: BigDL Authors
@@ -27,10 +27,10 @@ Requires-Dist: intel-openmp ; (platform_machine == "x86_64" or platform_machine
27
27
  Requires-Dist: torch ==2.1.2+cpu ; (platform_system == "Linux") and extra == 'all'
28
28
  Requires-Dist: torch ==2.1.2 ; (platform_system == "Windows") and extra == 'all'
29
29
  Provides-Extra: cpp
30
- Requires-Dist: bigdl-core-cpp ==2.6.0b20250106.post0 ; extra == 'cpp'
30
+ Requires-Dist: bigdl-core-cpp ==2.6.0b20250106.post1 ; extra == 'cpp'
31
31
  Requires-Dist: setuptools ; extra == 'cpp'
32
32
  Provides-Extra: cpp-arl
33
- Requires-Dist: bigdl-core-cpp ==2.6.0b20250106.post0 ; extra == 'cpp-arl'
33
+ Requires-Dist: bigdl-core-cpp ==2.6.0b20250106.post1 ; extra == 'cpp-arl'
34
34
  Requires-Dist: setuptools ; extra == 'cpp-arl'
35
35
  Requires-Dist: onednn-devel ==2024.1.1 ; (platform_system == "Windows") and extra == 'cpp-arl'
36
36
  Requires-Dist: onednn ==2024.1.1 ; (platform_system == "Windows") and extra == 'cpp-arl'
@@ -67,7 +67,7 @@ Requires-Dist: transformers ==4.40.0 ; extra == 'npu'
67
67
  Requires-Dist: intel-openmp ; (platform_machine == "x86_64" or platform_machine == "AMD64") and extra == 'npu'
68
68
  Requires-Dist: torch ==2.1.2+cpu ; (platform_system == "Linux") and extra == 'npu'
69
69
  Requires-Dist: torch ==2.1.2 ; (platform_system == "Windows") and extra == 'npu'
70
- Requires-Dist: bigdl-core-npu ==2.6.0b20250106.post0 ; (platform_system == "Windows") and extra == 'npu'
70
+ Requires-Dist: bigdl-core-npu ==2.6.0b20250106.post1 ; (platform_system == "Windows") and extra == 'npu'
71
71
  Provides-Extra: serving
72
72
  Requires-Dist: py-cpuinfo ; extra == 'serving'
73
73
  Requires-Dist: fschat[model_worker,webui] ==0.2.36 ; extra == 'serving'
@@ -87,9 +87,9 @@ Requires-Dist: setuptools <70.0.0 ; extra == 'xpu'
87
87
  Requires-Dist: torch ==2.1.0a0 ; extra == 'xpu'
88
88
  Requires-Dist: torchvision ==0.16.0a0 ; extra == 'xpu'
89
89
  Requires-Dist: intel-extension-for-pytorch ==2.1.10+xpu ; extra == 'xpu'
90
- Requires-Dist: bigdl-core-xe-21 ==2.6.0b20250106.post0 ; extra == 'xpu'
91
- Requires-Dist: bigdl-core-xe-batch-21 ==2.6.0b20250106.post0 ; extra == 'xpu'
92
- Requires-Dist: bigdl-core-xe-addons-21 ==2.6.0b20250106.post0 ; extra == 'xpu'
90
+ Requires-Dist: bigdl-core-xe-21 ==2.6.0b20250106.post1 ; extra == 'xpu'
91
+ Requires-Dist: bigdl-core-xe-batch-21 ==2.6.0b20250106.post1 ; extra == 'xpu'
92
+ Requires-Dist: bigdl-core-xe-addons-21 ==2.6.0b20250106.post1 ; extra == 'xpu'
93
93
  Provides-Extra: xpu-2-1
94
94
  Requires-Dist: py-cpuinfo ; extra == 'xpu-2-1'
95
95
  Requires-Dist: protobuf ; extra == 'xpu-2-1'
@@ -104,9 +104,9 @@ Requires-Dist: setuptools <70.0.0 ; extra == 'xpu-2-1'
104
104
  Requires-Dist: torch ==2.1.0a0 ; extra == 'xpu-2-1'
105
105
  Requires-Dist: torchvision ==0.16.0a0 ; extra == 'xpu-2-1'
106
106
  Requires-Dist: intel-extension-for-pytorch ==2.1.10+xpu ; extra == 'xpu-2-1'
107
- Requires-Dist: bigdl-core-xe-21 ==2.6.0b20250106.post0 ; extra == 'xpu-2-1'
108
- Requires-Dist: bigdl-core-xe-batch-21 ==2.6.0b20250106.post0 ; extra == 'xpu-2-1'
109
- Requires-Dist: bigdl-core-xe-addons-21 ==2.6.0b20250106.post0 ; extra == 'xpu-2-1'
107
+ Requires-Dist: bigdl-core-xe-21 ==2.6.0b20250106.post1 ; extra == 'xpu-2-1'
108
+ Requires-Dist: bigdl-core-xe-batch-21 ==2.6.0b20250106.post1 ; extra == 'xpu-2-1'
109
+ Requires-Dist: bigdl-core-xe-addons-21 ==2.6.0b20250106.post1 ; extra == 'xpu-2-1'
110
110
  Requires-Dist: intel-openmp ; (platform_machine == "x86_64" or platform_machine == "AMD64") and extra == 'xpu-2-1'
111
111
  Requires-Dist: dpcpp-cpp-rt ==2024.0.2 ; (platform_system == "Windows") and extra == 'xpu-2-1'
112
112
  Requires-Dist: mkl-dpcpp ==2024.0.0 ; (platform_system == "Windows") and extra == 'xpu-2-1'
@@ -124,7 +124,7 @@ Requires-Dist: setuptools ; extra == 'xpu-2-6'
124
124
  Requires-Dist: torch ==2.6.0+xpu ; extra == 'xpu-2-6'
125
125
  Requires-Dist: torchvision ==0.21.0+xpu ; extra == 'xpu-2-6'
126
126
  Requires-Dist: torchaudio ==2.6.0+xpu ; extra == 'xpu-2-6'
127
- Requires-Dist: bigdl-core-xe-all ==2.6.0b20250106.post0 ; extra == 'xpu-2-6'
127
+ Requires-Dist: bigdl-core-xe-all ==2.6.0b20250106.post1 ; extra == 'xpu-2-6'
128
128
  Requires-Dist: intel-openmp ; (platform_machine == "x86_64" or platform_machine == "AMD64") and extra == 'xpu-2-6'
129
129
  Provides-Extra: xpu-arc
130
130
  Requires-Dist: py-cpuinfo ; extra == 'xpu-arc'
@@ -137,9 +137,9 @@ Requires-Dist: tokenizers ==0.15.2 ; extra == 'xpu-arc'
137
137
  Requires-Dist: accelerate ==0.23.0 ; extra == 'xpu-arc'
138
138
  Requires-Dist: tabulate ; extra == 'xpu-arc'
139
139
  Requires-Dist: setuptools ; extra == 'xpu-arc'
140
- Requires-Dist: bigdl-core-xe-23 ==2.6.0b20250106.post0 ; extra == 'xpu-arc'
141
- Requires-Dist: bigdl-core-xe-batch-23 ==2.6.0b20250106.post0 ; extra == 'xpu-arc'
142
- Requires-Dist: bigdl-core-xe-addons-23 ==2.6.0b20250106.post0 ; extra == 'xpu-arc'
140
+ Requires-Dist: bigdl-core-xe-23 ==2.6.0b20250106.post1 ; extra == 'xpu-arc'
141
+ Requires-Dist: bigdl-core-xe-batch-23 ==2.6.0b20250106.post1 ; extra == 'xpu-arc'
142
+ Requires-Dist: bigdl-core-xe-addons-23 ==2.6.0b20250106.post1 ; extra == 'xpu-arc'
143
143
  Requires-Dist: intel-openmp ; (platform_machine == "x86_64" or platform_machine == "AMD64") and extra == 'xpu-arc'
144
144
  Requires-Dist: torch ==2.3.1+cxx11.abi ; (platform_system == "Linux") and extra == 'xpu-arc'
145
145
  Requires-Dist: torchvision ==0.18.1+cxx11.abi ; (platform_system == "Linux") and extra == 'xpu-arc'
@@ -160,9 +160,9 @@ Requires-Dist: tokenizers ==0.15.2 ; extra == 'xpu-arl'
160
160
  Requires-Dist: accelerate ==0.23.0 ; extra == 'xpu-arl'
161
161
  Requires-Dist: tabulate ; extra == 'xpu-arl'
162
162
  Requires-Dist: setuptools ; extra == 'xpu-arl'
163
- Requires-Dist: bigdl-core-xe-23 ==2.6.0b20250106.post0 ; extra == 'xpu-arl'
164
- Requires-Dist: bigdl-core-xe-batch-23 ==2.6.0b20250106.post0 ; extra == 'xpu-arl'
165
- Requires-Dist: bigdl-core-xe-addons-23 ==2.6.0b20250106.post0 ; extra == 'xpu-arl'
163
+ Requires-Dist: bigdl-core-xe-23 ==2.6.0b20250106.post1 ; extra == 'xpu-arl'
164
+ Requires-Dist: bigdl-core-xe-batch-23 ==2.6.0b20250106.post1 ; extra == 'xpu-arl'
165
+ Requires-Dist: bigdl-core-xe-addons-23 ==2.6.0b20250106.post1 ; extra == 'xpu-arl'
166
166
  Requires-Dist: intel-openmp ; (platform_machine == "x86_64" or platform_machine == "AMD64") and extra == 'xpu-arl'
167
167
  Requires-Dist: torch ==2.3.1+cxx11.abi ; (platform_system == "Linux") and extra == 'xpu-arl'
168
168
  Requires-Dist: torchvision ==0.18.1+cxx11.abi ; (platform_system == "Linux") and extra == 'xpu-arl'
@@ -183,9 +183,9 @@ Requires-Dist: tokenizers ==0.15.2 ; extra == 'xpu-lnl'
183
183
  Requires-Dist: accelerate ==0.23.0 ; extra == 'xpu-lnl'
184
184
  Requires-Dist: tabulate ; extra == 'xpu-lnl'
185
185
  Requires-Dist: setuptools ; extra == 'xpu-lnl'
186
- Requires-Dist: bigdl-core-xe-23 ==2.6.0b20250106.post0 ; extra == 'xpu-lnl'
187
- Requires-Dist: bigdl-core-xe-batch-23 ==2.6.0b20250106.post0 ; extra == 'xpu-lnl'
188
- Requires-Dist: bigdl-core-xe-addons-23 ==2.6.0b20250106.post0 ; extra == 'xpu-lnl'
186
+ Requires-Dist: bigdl-core-xe-23 ==2.6.0b20250106.post1 ; extra == 'xpu-lnl'
187
+ Requires-Dist: bigdl-core-xe-batch-23 ==2.6.0b20250106.post1 ; extra == 'xpu-lnl'
188
+ Requires-Dist: bigdl-core-xe-addons-23 ==2.6.0b20250106.post1 ; extra == 'xpu-lnl'
189
189
  Requires-Dist: intel-openmp ; (platform_machine == "x86_64" or platform_machine == "AMD64") and extra == 'xpu-lnl'
190
190
  Requires-Dist: torch ==2.3.1+cxx11.abi ; (platform_system == "Linux") and extra == 'xpu-lnl'
191
191
  Requires-Dist: torchvision ==0.18.1+cxx11.abi ; (platform_system == "Linux") and extra == 'xpu-lnl'
@@ -41,35 +41,35 @@ ipex_llm/langchain/llms/transformerspipelinellm.py,sha256=vm522YPPwWxxAPVvQBtxRf
41
41
  ipex_llm/langchain/vllm/__init__.py,sha256=T-EbRT6GJ_8RCu-iLmSzcftOimXSPQf2d5X72AUAy2Y,874
42
42
  ipex_llm/langchain/vllm/vllm.py,sha256=6dxc-ZISZQrJilEa_HA827l75Dv9rcHpY_G6FdJ8BVs,7793
43
43
  ipex_llm/libs/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
44
- ipex_llm/libs/bloom-api.dll,sha256=91zD_S-epu_Gp_dAblCkxDwZFjSBhhfho6to8r3K3Ac,36352
45
- ipex_llm/libs/bloom.dll,sha256=wxdywP2aMiNAKEw3uU31aAe9RR2r1JjhIaqPuslYBZU,506880
46
- ipex_llm/libs/gptneox-api.dll,sha256=MNnJv8cjFUg_siCIlKPSCg8_kTVHrTFTvDPKu8SAHZw,24576
47
- ipex_llm/libs/gptneox.dll,sha256=d-3KIFy39GehluUVnpipGeN1qTqVkKWrz5dNT6j9SPk,567296
48
- ipex_llm/libs/libbloom_avx.dll,sha256=iwht0gpe7yTRMF3F5ZQ8DRs26vQi7i2D8Mafg_Yyt5Q,535040
49
- ipex_llm/libs/libbloom_vnni.dll,sha256=VJxjLCbNP0wZBW6Rw0owR_rc9z3PKPONHLHOIZYiZiM,506880
50
- ipex_llm/libs/libgptneox_avx.dll,sha256=3Ja3SkOp6wCtA1NB6GRBQLseB91bzgbhKOWRKh0o01A,595456
51
- ipex_llm/libs/libgptneox_vnni.dll,sha256=8hinv9vxdzjsW7PJhukAxKeLAtF2f2PQOmzwnNEzDD8,567808
52
- ipex_llm/libs/libllama_avx.dll,sha256=R7yRH3t4q0CXy0Sv91m8j2z0wnG6Qa_-JxEp6kTcgt0,589824
53
- ipex_llm/libs/libllama_vnni.dll,sha256=Cg9vrS5J4Uev0iW7p5346-flWKLjNJqyItv89g3qwRs,561664
54
- ipex_llm/libs/libstarcoder_avx.dll,sha256=dCgKfpmh57NN4sjs6j7bQFtO-URXcTmPS6oktgLHwsU,626688
55
- ipex_llm/libs/libstarcoder_vnni.dll,sha256=KZufV9B-riiEIitRMxv9nmGWxXM6Qbrv4E8ybustdQA,598528
56
- ipex_llm/libs/llama-api.dll,sha256=lS0rqYlsIUJMOBi1rkLqurGnCsCaDCAe4QZCHZGGeGI,25600
57
- ipex_llm/libs/llama.dll,sha256=2exLzMA9NFPkMldvfZeT-Ua6moJjcUDsTwv-hyLEjFg,561152
58
- ipex_llm/libs/main-bloom.exe,sha256=Rjc3aUPGdbJ1nkRNWZ9j637pA4nZ6fC4EBXJE3vhKkM,103424
59
- ipex_llm/libs/main-gptneox.exe,sha256=eIq223joAOSPSi0nKxNI0_z9CjC_9f0XGx5ZEFdMX8w,98816
60
- ipex_llm/libs/main-llama.exe,sha256=-jaXiwAERqW8QYjOB1VYs2-hU-HSLYoheA9R7hQnqPg,99840
61
- ipex_llm/libs/main-starcoder.exe,sha256=-2M-nKhMW8AMhUvwno-iN-x32G_Csb6TCK4b45-ywQE,157696
62
- ipex_llm/libs/pipeline.dll,sha256=J_rocSlOut7S0JZQnZlUD-IQ92D5P4pAvzbrPZFG7us,72704
63
- ipex_llm/libs/quantize-bloom.exe,sha256=XYSsf4HvaYKc0J8022uz6nz6AifT-8FhB7dqJRuEGoE,126464
64
- ipex_llm/libs/quantize-bloom_vnni.exe,sha256=4mpem2yXPOPKoSsezR32Ckng-_mVE37yHWxY4PG48mc,127488
65
- ipex_llm/libs/quantize-gptneox.exe,sha256=ZqX02ScTq8X90MRYbJVo5NFdI8E0MzwCpS2UGGK6mO8,104448
66
- ipex_llm/libs/quantize-gptneox_vnni.exe,sha256=rr-sll3eMtxB0Rg-tXQsFUhcrHkkLnAu8HZWzeqVPJ4,104960
67
- ipex_llm/libs/quantize-llama.exe,sha256=XrWIjKL-5XaqThU-fNnfVS4pTc1Yp_pGlhwALHU4c-k,109568
68
- ipex_llm/libs/quantize-llama_vnni.exe,sha256=AuQqKGxs2U3S2Zyk-GMDFFpaTKeQpGR3ydW5gZ8FbWA,110592
69
- ipex_llm/libs/quantize-starcoder.exe,sha256=X3TroCzyM_i8El6D0-V2mufnRgZc5kBsW1qNoJKIdcE,127488
70
- ipex_llm/libs/quantize-starcoder_vnni.exe,sha256=K1NgSQOr5-xEEOHqD25qxJe5Z6esvUivIJEkBYto6KM,128512
71
- ipex_llm/libs/starcoder-api.dll,sha256=YTFtKQ0jSo2cgwO7HkIfjPvD5jANUaDWSa9ykCLfU5s,21504
72
- ipex_llm/libs/starcoder.dll,sha256=KWX7QLhGFBGUBea-Od0udwKxnJnblsi4y8TSJl9p67g,598016
44
+ ipex_llm/libs/bloom-api.dll,sha256=MSVCD9yi2nlgksGF-5u7tKE660pzfRthAuTWK_7ozbE,36352
45
+ ipex_llm/libs/bloom.dll,sha256=DMJiNnnBbuZArLpPrSIeVgBQtgHRKK0gfqF3e_4N4Ak,506880
46
+ ipex_llm/libs/gptneox-api.dll,sha256=B9yLQPHTYv0o88cZ3pYCfY5ksnetr0xcWC4wKYXhsKs,24576
47
+ ipex_llm/libs/gptneox.dll,sha256=B8H5jlqDLVMSi4wqdPZ0ozI1smbuY7jRN-9YQXSoDQI,567296
48
+ ipex_llm/libs/libbloom_avx.dll,sha256=syY7Lc2kwoM7D57WPaj2XIMJCXyY1IfXClgkY8Lrs1w,535040
49
+ ipex_llm/libs/libbloom_vnni.dll,sha256=RtkwaaPVlbWv6OtxClKILy5CYlL98olnJYdFbe95tEM,506880
50
+ ipex_llm/libs/libgptneox_avx.dll,sha256=1gmdd3DdFrMJBI4pXyI0_703ndz2kopDkK8vz7uPyKI,595456
51
+ ipex_llm/libs/libgptneox_vnni.dll,sha256=bUKisE3_Cg4rZ2AtjGhVoXtKqNVAo4MZI7N7Q4Q_cf0,567808
52
+ ipex_llm/libs/libllama_avx.dll,sha256=lfcj2lw9lIgmDifxIzNnzcz6fHuYUIVYvdvwoM8ijOE,589824
53
+ ipex_llm/libs/libllama_vnni.dll,sha256=5xY7lEJ-sW6owTNcBO8SWRCoweo7k8N4FS4v9-lQcdc,561664
54
+ ipex_llm/libs/libstarcoder_avx.dll,sha256=z8P_H55-1VQFB9KcBxnBrSlkLlsLpxPghN9VfKpQulA,626688
55
+ ipex_llm/libs/libstarcoder_vnni.dll,sha256=rPEGDTZlUTl0hUP40uRe8idjkVhNkyYk818RYmONL1Q,598528
56
+ ipex_llm/libs/llama-api.dll,sha256=XJP636817YbUBOneSVGLqr9-qefV4DV6vR_8P7e8Q7k,25600
57
+ ipex_llm/libs/llama.dll,sha256=YF2m8rwTy7hRbCo31FG-fo1q6DStBAxenNgkz4_gkr0,561152
58
+ ipex_llm/libs/main-bloom.exe,sha256=lWEzFmr2zWPLnDUy3l-FBv5k-xLn7BXm1A3RSkZx890,103424
59
+ ipex_llm/libs/main-gptneox.exe,sha256=ex0FAncs6bLngz7-jmtCxuDNEWlZCXThBMCdveDJnEs,98816
60
+ ipex_llm/libs/main-llama.exe,sha256=ooEHT6YMGZTCKo3h-m3E7f8vkc6aMJPZiRH9dU_4Wu0,99840
61
+ ipex_llm/libs/main-starcoder.exe,sha256=r9rZZhk0V4_6L7EH7evcBdmRyWSqiZ_oNon3mjSCXEY,157696
62
+ ipex_llm/libs/pipeline.dll,sha256=5rhnCddgAUozj2ZQox5ykjsWivaNsK-jVcvArO9Hfy8,72704
63
+ ipex_llm/libs/quantize-bloom.exe,sha256=V-ht-dVMJ9dq4O9vokGlBoOyTCCIgZoZMC-Cn8UdPL4,126464
64
+ ipex_llm/libs/quantize-bloom_vnni.exe,sha256=q14nAfmyRJW57xLG10wKCODG-cSOZ6KP7VCIVPAqslM,127488
65
+ ipex_llm/libs/quantize-gptneox.exe,sha256=Mjhxhcb8FXwrGI0dMRrHXzOOWkLZwtYQkpj4i2gSTYk,104448
66
+ ipex_llm/libs/quantize-gptneox_vnni.exe,sha256=_9D8mqr8zCI7sAzRZ_KlCayqDq0ctQWcwAzI4NWdH7U,104960
67
+ ipex_llm/libs/quantize-llama.exe,sha256=TsPfUBMnQNcKA44_QG9wrmRSEcs2bAmiSOD-ll57g6w,109568
68
+ ipex_llm/libs/quantize-llama_vnni.exe,sha256=1fUjt6woxEAzYQ1b0Ok3JRo1YcFcTiogB-l0RdOs_iY,110592
69
+ ipex_llm/libs/quantize-starcoder.exe,sha256=-wvnnZhGw_eKPnbhZiiaepceFbd_zMUY7qQwrKg4miw,127488
70
+ ipex_llm/libs/quantize-starcoder_vnni.exe,sha256=iPbhV8PTL7J3JXJnJkRw1tsvaHA0kGxgwpttSzVkUHQ,128512
71
+ ipex_llm/libs/starcoder-api.dll,sha256=bpzxqpZZE9Q7Yghy-33_Hkvo-OLvKntfqH-DJLWfXjQ,21504
72
+ ipex_llm/libs/starcoder.dll,sha256=QXucnOagZLcxQF8PG_nvQ9kW4XvexngcqrmJE5N5o9s,598016
73
73
  ipex_llm/llamaindex/__init__.py,sha256=T-EbRT6GJ_8RCu-iLmSzcftOimXSPQf2d5X72AUAy2Y,874
74
74
  ipex_llm/llamaindex/llms/__init__.py,sha256=KP1lEdGqDuxPoxL1ZSH25Pm2kKMPJBWUTLR0ckSLMIU,1139
75
75
  ipex_llm/llamaindex/llms/bigdlllm.py,sha256=FQBzq1KOjfc6uofTXAha3O7TqpJkNfOFepXQmOVlbnI,26314
@@ -87,25 +87,25 @@ ipex_llm/serving/fastchat/tgi_api_protocol.py,sha256=brT3k3-V0NJrU4fRqUwWjC0O3iO
87
87
  ipex_llm/serving/fastchat/tgi_api_server.py,sha256=agNTAEiZPSuj3dEdIdYKwkoY0cXOUDX06DiM9VP2knQ,24418
88
88
  ipex_llm/serving/fastchat/vllm_worker.py,sha256=ZLz2Q9GxJO6r_LOiP6epgCRjBGk-K4EB1SNEWSJp5DA,11091
89
89
  ipex_llm/transformers/__init__.py,sha256=l4KkMkLe-pRC7b_kj6LCfeifgE-Uo33_Av_FwN9HnFA,1074
90
- ipex_llm/transformers/convert.py,sha256=pFm6VlU84u_Llr2sp6-gRrEYDeNgIk2QPukolq4IE1s,99947
90
+ ipex_llm/transformers/convert.py,sha256=TxWdTTOSvh-j5jqokQJVWykta4U4LHupE1QJ-9udzwc,98733
91
91
  ipex_llm/transformers/convert_ipex.py,sha256=iKXo0n8fVFTOA2fNYYrByMFK0dovL-kLd2sVDk88AlQ,14334
92
92
  ipex_llm/transformers/embedding.py,sha256=bdgk59DvD4ZZyxRzewXOR7g56nThgO6uhIwk8QL7f-s,9299
93
93
  ipex_llm/transformers/kv.py,sha256=k4TU18LlA-Sbq9WNNQnfuzu3RSFBwFhmaV3BcGN5bAo,19191
94
94
  ipex_llm/transformers/lisa.py,sha256=F5WxbtXQ7RdKulj83h_2DnEIgKiKGZf7zvOmg6QBl2s,3289
95
- ipex_llm/transformers/loader.py,sha256=cOgX93xOC-4dt01GTJ5wyd7PjZ8S43r4mctkR2YxVuw,6893
95
+ ipex_llm/transformers/loader.py,sha256=AwjV5RpI2t2bedlv7ZhLm8cfd-QJZm5hny-XyjIvdnk,6876
96
96
  ipex_llm/transformers/lookup.py,sha256=b6OlZ9OV10R9qeWw8mVryVpDxszkjwLkldvi7GPMJY8,19614
97
- ipex_llm/transformers/low_bit_linear.py,sha256=nKraUvZJ7UdXP29HSE4CJPIVxmN-TvG8dpT4gpleuyQ,41688
98
- ipex_llm/transformers/model.py,sha256=KcRjkauGg48BYrUBoUZaVMpg7Piuz5JrfIpVZd3EIjs,41105
97
+ ipex_llm/transformers/low_bit_linear.py,sha256=lPIvDuRoS0zusiJ6vw_fOTJgK5ylh4CuD3U-qs8ih4Y,40869
98
+ ipex_llm/transformers/model.py,sha256=fj7LBjrWtWwDJJYXnWiXsLGS4ayqqHfnh0p51dSDssE,40908
99
99
  ipex_llm/transformers/modelling_bigdl.py,sha256=7JpNVMuyq_OmtNUaMFMXdxPWZp2q0QHC02QeA-VTPOw,6709
100
100
  ipex_llm/transformers/npu_model.py,sha256=YW02GeVz-9ZGqxAeSz0AOvciS-17bo9eK5ZOBrICwSQ,39508
101
101
  ipex_llm/transformers/patches.py,sha256=halPWm__ORh2fRFSIFPiCNg3LQBfrRkTPtmtRpBJCZQ,1286
102
102
  ipex_llm/transformers/pipeline_parallel.py,sha256=uNZpOXljNmdoEYnP8U-VFiN4dRZb2piQbIf2bG9LQnE,49051
103
103
  ipex_llm/transformers/qlora.py,sha256=jtPGsvWFjbTUGzDBCdfftnCis_0nJQNRpACSwXUbbGU,14943
104
104
  ipex_llm/transformers/relora.py,sha256=-dYzUV0P-IhO2jFdnzN9-v_sFzJpRj3ZwN9eCJzOoCw,16567
105
- ipex_llm/transformers/speculative.py,sha256=Zf1nQb5GXpJQrUHBTL-H4RUBfdv3lGhfehzudHimhYk,64109
105
+ ipex_llm/transformers/speculative.py,sha256=0XNLgc9dGswJHVPrXo4iM7pPxkWwfFfJMECcivJSnIc,63368
106
106
  ipex_llm/transformers/streamer.py,sha256=RrVlLblzCOtABRUpaMXAyaMnCGgLUtAi_YesLumRbww,4842
107
107
  ipex_llm/transformers/training_patch.py,sha256=oxMkUtqyvqJiprw6dE3skkYfD1HOmUlH9N0hBkbn0G0,10799
108
- ipex_llm/transformers/utils.py,sha256=fXLIlr9hoBr27p3w3xzczZGPk2cCTIRbUKBkiVCGYbc,16889
108
+ ipex_llm/transformers/utils.py,sha256=7syzq4jnEo-mWmS9E2VZ2GcFrjojWI8E7Hcx0tloifg,16996
109
109
  ipex_llm/transformers/xpu_customize_fwd.py,sha256=wFpIhs5F6tkNs8gBOrLxWdhLzO3EDHovVkERPIAoAvg,7611
110
110
  ipex_llm/transformers/xpu_ops.py,sha256=H46-69pMRQhekbAEoDfNacCInLWycMHDqrgMGLvFYfI,4362
111
111
  ipex_llm/transformers/awq/__init__.py,sha256=Du5gu3-eeAkeDO_dEMBTzrDBA66DSN3uL3-rn8WGXQw,875
@@ -151,7 +151,6 @@ ipex_llm/transformers/models/gemma2.py,sha256=2WZuv-FLzJyTJFaYxOuzJt47QE64M0lHnz
151
151
  ipex_llm/transformers/models/glm.py,sha256=gHYgfn20jPRL-ElXy-rUqMh6_LQcc5x7DEXSZuRA4E0,7094
152
152
  ipex_llm/transformers/models/gpt2.py,sha256=YSaNgK1uLCFDuIFqnKO0Mi-AsOZsYav-7pNf_NpKGdM,3445
153
153
  ipex_llm/transformers/models/gptbigcode.py,sha256=cP1_qGWoa43R2WacAMblShjku4QupcCZiLaPPAoOUs4,9101
154
- ipex_llm/transformers/models/gptj.py,sha256=TTIx461X2nOcIkrAcZhEf7d7mjJ3yvEC9KLVc1-hrpc,17973
155
154
  ipex_llm/transformers/models/gptneox.py,sha256=loRh1x_5S6BCeOr_s5xr-N_1SQHL3Y5IiUBAEyoMUqQ,6172
156
155
  ipex_llm/transformers/models/internlm.py,sha256=ZbIUMDwNRcrCeduXfbA_uq1AUEWawEt6CJRvQl3LkAg,17832
157
156
  ipex_llm/transformers/models/internvl.py,sha256=Vx0vENIEQLX2M6P398mw5TOhpks0U8xf8rtRQvy94go,8154
@@ -175,7 +174,7 @@ ipex_llm/transformers/models/rwkv5.py,sha256=OkRNj1pCAZg1z2Fw-I0DEnxLEdZyPeRSQ6m
175
174
  ipex_llm/transformers/models/sd.py,sha256=VvHV5u-0k2MgHu3NL9113hPj7DgfxqctuKzEEeNfRDU,5981
176
175
  ipex_llm/transformers/models/stablelm.py,sha256=RGQCYuQhYqtZ1j3RZkYi0_QvCRnUgUIPYxfBcLnElzg,6885
177
176
  ipex_llm/transformers/models/starcoder2.py,sha256=4P3mhRYf2Kreb1ESjrQGfy1puLMmZXgV35zf-Tksvao,6462
178
- ipex_llm/transformers/models/utils.py,sha256=Qbz7UkYSbsM5bodH2445O0-JF50Mu3UEwW0j2ZNxHSU,15997
177
+ ipex_llm/transformers/models/utils.py,sha256=85rGIzGZvWe3JjYpWcuc1nfzI_tn_zFcdZpWivxJkl0,15457
179
178
  ipex_llm/transformers/models/yuan.py,sha256=1jRPebwAK2ENbyYokOmb4LSVo-szucWiygz9zTv-scs,7656
180
179
  ipex_llm/transformers/npu_models/__init__.py,sha256=ulEUGLjaP48LCrVeury3UxLjXxKzRi0UpSG4bYu-7f8,585
181
180
  ipex_llm/transformers/npu_models/baichuan.py,sha256=fJtd7fBrttySghRUgfZTAdxLjsSNC-XL08HISsXigLE,4685
@@ -244,11 +243,11 @@ ipex_llm/vllm/xpu/engine/__init__.py,sha256=pY_CpyuZd72fr6s32ejeKHKFW0K4vUU2rzZj
244
243
  ipex_llm/vllm/xpu/engine/engine.py,sha256=k4-D27WS_Gk3mA--w3HWAjPjb4Aiu043MVPi0ZoAUBc,5984
245
244
  ipex_llm/vllm/xpu/entrypoints/openai/api_server.py,sha256=GshTZFB8e4PWvqckfbmTOU6b0oLkNn7A-vzLuG9--j8,21544
246
245
  ipex_llm/vllm/xpu/entrypoints/openai/cli_args.py,sha256=2rENA2ucynMaIjiZBEh2ez1o5vR32GaP514t39CD7KM,8676
247
- ipex_llm-2.2.0b20250105.post0.data/scripts/ipex-llm-init.bat,sha256=HPtCYuDYwEatq7dAwOvdfVcHYCpAVdbj75K1qh0vQek,2578
248
- ipex_llm-2.2.0b20250105.post0.data/scripts/llm-chat.ps1,sha256=6qrs-hGVAV8IKh7Jx8nq_XrnZcjd7qGU5wndArM7Yag,2769
249
- ipex_llm-2.2.0b20250105.post0.data/scripts/llm-cli.ps1,sha256=3qBtTLs_EjYDnM8YyCpJhzLnGCKTEGssu9UNqfkjVXs,3009
250
- ipex_llm-2.2.0b20250105.post0.dist-info/METADATA,sha256=-eNpo4zm9w1DQqVCTFi228urj8ylbTuXml4uNwlEP3E,12825
251
- ipex_llm-2.2.0b20250105.post0.dist-info/WHEEL,sha256=6iYPr8vTHsyDK75jr9X0V3I9wPSVmtwr_8fdATBciGk,98
252
- ipex_llm-2.2.0b20250105.post0.dist-info/entry_points.txt,sha256=TiUyBB2MRmfF3ko-pyAEzqeBCRnyhu27bNOAsWPp3e8,61
253
- ipex_llm-2.2.0b20250105.post0.dist-info/top_level.txt,sha256=CGCMHM-SyqUabU4h8RqJ2KTYckQUO3LvIWwmUQ6Qbzw,9
254
- ipex_llm-2.2.0b20250105.post0.dist-info/RECORD,,
246
+ ipex_llm-2.2.0b20250106.post1.data/scripts/ipex-llm-init.bat,sha256=HPtCYuDYwEatq7dAwOvdfVcHYCpAVdbj75K1qh0vQek,2578
247
+ ipex_llm-2.2.0b20250106.post1.data/scripts/llm-chat.ps1,sha256=6qrs-hGVAV8IKh7Jx8nq_XrnZcjd7qGU5wndArM7Yag,2769
248
+ ipex_llm-2.2.0b20250106.post1.data/scripts/llm-cli.ps1,sha256=3qBtTLs_EjYDnM8YyCpJhzLnGCKTEGssu9UNqfkjVXs,3009
249
+ ipex_llm-2.2.0b20250106.post1.dist-info/METADATA,sha256=I0vPU5mDtPZR3wpY87fYHdn6r14U0T50NWGP7EsF5s8,12825
250
+ ipex_llm-2.2.0b20250106.post1.dist-info/WHEEL,sha256=6iYPr8vTHsyDK75jr9X0V3I9wPSVmtwr_8fdATBciGk,98
251
+ ipex_llm-2.2.0b20250106.post1.dist-info/entry_points.txt,sha256=TiUyBB2MRmfF3ko-pyAEzqeBCRnyhu27bNOAsWPp3e8,61
252
+ ipex_llm-2.2.0b20250106.post1.dist-info/top_level.txt,sha256=CGCMHM-SyqUabU4h8RqJ2KTYckQUO3LvIWwmUQ6Qbzw,9
253
+ ipex_llm-2.2.0b20250106.post1.dist-info/RECORD,,
@@ -1,441 +0,0 @@
1
- #
2
- # Copyright 2016 The BigDL Authors.
3
- #
4
- # Licensed under the Apache License, Version 2.0 (the "License");
5
- # you may not use this file except in compliance with the License.
6
- # You may obtain a copy of the License at
7
- #
8
- # http://www.apache.org/licenses/LICENSE-2.0
9
- #
10
- # Unless required by applicable law or agreed to in writing, software
11
- # distributed under the License is distributed on an "AS IS" BASIS,
12
- # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13
- # See the License for the specific language governing permissions and
14
- # limitations under the License.
15
- #
16
- # This file is adapted from
17
- # https://github.com/huggingface/transformers/blob/main/src/transformers/models/gptj/modeling_gptj.py
18
- #
19
-
20
- import torch
21
- from typing import Optional, Tuple, Union
22
- from ipex_llm.transformers.models.utils import init_kv_cache, extend_kv_cache, \
23
- apply_rotary_pos_emb, append_kv_cache, apply_ipex_rotate_every_two
24
- from transformers.utils.import_utils import is_torch_fx_proxy
25
- from transformers.modeling_outputs import BaseModelOutputWithPast
26
- from transformers.models.gptj.modeling_gptj import GPTJModel
27
- from ipex_llm.utils.common import invalidInputError
28
-
29
- import os
30
-
31
- KV_CACHE_ALLOC_BLOCK_LENGTH = int(os.environ.get("KV_CACHE_ALLOC_BLOCK_LENGTH", 256))
32
-
33
-
34
- def _get_embed_positions(self, position_ids):
35
- embed_positions = self.embed_positions
36
- if embed_positions.device != position_ids.device:
37
- embed_positions = embed_positions.to(position_ids.device)
38
- self.embed_positions = embed_positions
39
- return embed_positions.repeat(position_ids.shape[0], 1, 1)
40
-
41
-
42
- def _attn(
43
- self,
44
- query,
45
- key,
46
- value,
47
- attention_mask=None,
48
- head_mask=None,
49
- ):
50
- # compute causal mask from causal mask buffer
51
- query_length, key_length = query.size(-2), key.size(-2)
52
- causal_mask = self.bias[:, :, key_length - query_length: key_length, :key_length]
53
-
54
- # Keep the attention weights computation in fp32 to avoid overflow issues
55
- query = query.to(torch.float32)
56
- key = key.to(torch.float32)
57
-
58
- attn_weights = torch.matmul(query, key.transpose(-1, -2))
59
-
60
- mask_value = torch.finfo(attn_weights.dtype).min
61
- # Need to be a tensor, otherwise we get error:
62
- # `RuntimeError: expected scalar type float but found double`.
63
- # Need to be on the same device, otherwise `RuntimeError: ..., x and y to be on the same device`
64
- mask_value = torch.tensor(mask_value, dtype=attn_weights.dtype).to(attn_weights.device)
65
- attn_weights = torch.where(causal_mask, attn_weights, mask_value)
66
-
67
- attn_weights = attn_weights / self.scale_attn
68
-
69
- if attention_mask is not None:
70
- # Apply the attention mask
71
- attn_weights = attn_weights + attention_mask
72
-
73
- attn_weights = nn.functional.softmax(attn_weights, dim=-1)
74
- attn_weights = attn_weights.to(value.dtype)
75
- attn_weights = self.attn_dropout(attn_weights)
76
-
77
- # Mask heads if we want to
78
- if head_mask is not None:
79
- attn_weights = attn_weights * head_mask
80
-
81
- attn_output = torch.matmul(attn_weights, value)
82
-
83
- return attn_output, attn_weights
84
-
85
-
86
- def gptj_attention_forward(
87
- self,
88
- hidden_states: torch.FloatTensor,
89
- layer_past: Optional[Tuple[torch.Tensor]] = None,
90
- attention_mask: Optional[torch.FloatTensor] = None,
91
- position_ids: Optional[torch.LongTensor] = None,
92
- head_mask: Optional[torch.FloatTensor] = None,
93
- use_cache: Optional[bool] = False,
94
- rotary_emb: Optional[Tuple]=None,
95
- output_attentions: Optional[bool] = False,
96
- ) -> Union[
97
- Tuple[torch.Tensor, Tuple[torch.Tensor]],
98
- Optional[Tuple[torch.Tensor, Tuple[torch.Tensor], Tuple[torch.Tensor, ...]]],
99
- ]:
100
- query = self.q_proj(hidden_states)
101
- key = self.k_proj(hidden_states)
102
- value = self.v_proj(hidden_states)
103
-
104
- query = self._split_heads(query, self.num_attention_heads, self.head_dim, True)
105
- key = self._split_heads(key, self.num_attention_heads, self.head_dim, True)
106
- value = self._split_heads(value, self.num_attention_heads, self.head_dim, False)
107
-
108
- sin, cos = rotary_emb
109
- use_fuse_rope = hidden_states.device.type == "xpu" and not self.training
110
-
111
- if self.rotary_dim is not None:
112
- k_rot = key[:, :, :, : self.rotary_dim]
113
- q_rot = query[:, :, :, : self.rotary_dim]
114
-
115
- if use_fuse_rope:
116
- apply_ipex_rotate_every_two(q_rot, k_rot, cos, sin)
117
- else:
118
- k_pass = key[:, :, :, self.rotary_dim:]
119
- q_pass = query[:, :, :, self.rotary_dim:]
120
- q_rot, k_rot = apply_rotary_pos_emb(q_rot, k_rot, cos, sin, position_ids, "gptj")
121
- key = torch.cat([k_rot, k_pass], dim=-1)
122
- query = torch.cat([q_rot, q_pass], dim=-1)
123
- else:
124
- if use_fuse_rope:
125
- apply_ipex_rotate_every_two(query, key, cos, sin)
126
- else:
127
- query, key = apply_rotary_pos_emb(query, key, cos, sin, position_ids, "gptj")
128
-
129
- batch_size, q_len, _ = hidden_states.size()
130
-
131
- key = key.permute(0, 2, 1, 3).contiguous()
132
- query = query.permute(0, 2, 1, 3).contiguous()
133
-
134
- kv_seq_len = key.size(-2)
135
- device = hidden_states.device
136
-
137
- if layer_past is not None:
138
- kv_seq_len += layer_past[0].size(2)
139
-
140
- if layer_past is not None:
141
- cache_k = layer_past[0]
142
- cache_v = layer_past[1]
143
- past_length = cache_k.size(2)
144
- if cache_k.stride()[1] < kv_seq_len * cache_k.size(3):
145
- new_cache_k, new_cache_v = extend_kv_cache(batch_size,
146
- self.num_attention_heads,
147
- self.head_dim,
148
- past_length,
149
- kv_seq_len + KV_CACHE_ALLOC_BLOCK_LENGTH,
150
- dtype=cache_v.dtype,
151
- device=device)
152
- new_cache_k[:] = cache_k
153
- new_cache_v[:] = cache_v
154
- cache_k = new_cache_k
155
- cache_v = new_cache_v
156
- key, value = append_kv_cache(cache_k, cache_v, key, value)
157
-
158
- elif use_cache:
159
- key_cache, value_cache = init_kv_cache(batch_size,
160
- self.num_attention_heads,
161
- self.head_dim,
162
- kv_seq_len,
163
- kv_seq_len + KV_CACHE_ALLOC_BLOCK_LENGTH,
164
- dtype=value.dtype,
165
- device=device)
166
- key_cache[:] = key
167
- value_cache[:] = value
168
- key = key_cache
169
- value = value_cache
170
-
171
- if use_cache is True:
172
- present = (key, value)
173
- else:
174
- present = None
175
-
176
- # compute self-attention: V x Softmax(QK^T)
177
- attn_output, attn_weights = self._attn(query, key, value, attention_mask, head_mask)
178
-
179
- attn_output = self._merge_heads(attn_output, self.num_attention_heads, self.head_dim)
180
- attn_output = self.out_proj(attn_output)
181
- attn_output = self.resid_dropout(attn_output)
182
-
183
- outputs = (attn_output, present)
184
- if output_attentions:
185
- outputs += (attn_weights,)
186
-
187
- return outputs # a, present, (attentions)
188
-
189
-
190
- def gptj_block_forward(
191
- self,
192
- hidden_states: Optional[torch.FloatTensor],
193
- layer_past: Optional[Tuple[torch.Tensor]] = None,
194
- attention_mask: Optional[torch.FloatTensor] = None,
195
- position_ids: Optional[torch.LongTensor] = None,
196
- head_mask: Optional[torch.FloatTensor] = None,
197
- use_cache: Optional[bool] = False,
198
- rotary_emb: Optional[Tuple]=None,
199
- output_attentions: Optional[bool] = False,
200
- ) -> Union[Tuple[torch.Tensor], Optional[Tuple[torch.Tensor, Tuple[torch.FloatTensor, ...]]]]:
201
- residual = hidden_states
202
- hidden_states = self.ln_1(hidden_states)
203
- attn_outputs = self.attn(
204
- hidden_states=hidden_states,
205
- layer_past=layer_past,
206
- attention_mask=attention_mask,
207
- position_ids=position_ids,
208
- head_mask=head_mask,
209
- use_cache=use_cache,
210
- rotary_emb=rotary_emb,
211
- output_attentions=output_attentions,
212
- )
213
- attn_output = attn_outputs[0] # output_attn: a, present, (attentions)
214
- outputs = attn_outputs[1:]
215
-
216
- feed_forward_hidden_states = self.mlp(hidden_states)
217
- hidden_states = attn_output + feed_forward_hidden_states + residual
218
-
219
- if use_cache:
220
- outputs = (hidden_states,) + outputs
221
- else:
222
- outputs = (hidden_states,) + outputs[1:]
223
-
224
- return outputs # hidden_states, present, (attentions)
225
-
226
-
227
- def create_sinusoidal_positions(num_pos: int, dim: int) -> torch.Tensor:
228
- inv_freq = 1.0 / (10000 ** (torch.arange(0, dim, 2) / dim))
229
- sinusoid_inp = torch.einsum("i , j -> i j",
230
- torch.arange(num_pos, dtype=torch.float), inv_freq).float()
231
- return torch.cat((torch.sin(sinusoid_inp), torch.cos(sinusoid_inp)), dim=1)
232
-
233
-
234
- old_init = GPTJModel.__init__
235
-
236
-
237
- def gptj_model_new_init(self, config):
238
- old_init(self, config)
239
- embed_dim = config.hidden_size
240
- rotary_dim = config.rotary_dim
241
- pos_embd_dim = rotary_dim or embed_dim
242
- max_positions = config.max_position_embeddings
243
- self.embed_positions = create_sinusoidal_positions(max_positions, pos_embd_dim)
244
-
245
-
246
- def get_new_embed_positions(position_ids, prev_embed_positions):
247
- embed_positions = prev_embed_positions
248
- if embed_positions.device != position_ids.device:
249
- embed_positions = embed_positions.to(position_ids.device)
250
- prev_embed_positions = embed_positions
251
- return embed_positions.repeat(position_ids.shape[0], 1, 1), prev_embed_positions
252
-
253
-
254
- def gptj_model_forward(
255
- self,
256
- input_ids: Optional[torch.LongTensor] = None,
257
- past_key_values: Optional[Tuple[Tuple[torch.Tensor]]] = None,
258
- attention_mask: Optional[torch.FloatTensor] = None,
259
- token_type_ids: Optional[torch.LongTensor] = None,
260
- position_ids: Optional[torch.LongTensor] = None,
261
- head_mask: Optional[torch.FloatTensor] = None,
262
- inputs_embeds: Optional[torch.FloatTensor] = None,
263
- use_cache: Optional[bool] = None,
264
- output_attentions: Optional[bool] = None,
265
- output_hidden_states: Optional[bool] = None,
266
- return_dict: Optional[bool] = None,
267
- ) -> Union[Tuple, BaseModelOutputWithPast]:
268
- output_attentions = output_attentions if output_attentions is not None \
269
- else self.config.output_attentions
270
- output_hidden_states = (
271
- output_hidden_states if output_hidden_states is not None
272
- else self.config.output_hidden_states
273
- )
274
- use_cache = use_cache if use_cache is not None else self.config.use_cache
275
- return_dict = return_dict if return_dict is not None else self.config.use_return_dict
276
-
277
- if input_ids is not None and inputs_embeds is not None:
278
- invalidInputError(False,
279
- "You cannot specify both input_ids and inputs_embeds at the same time")
280
- elif input_ids is not None:
281
- self.warn_if_padding_and_no_attention_mask(input_ids, attention_mask)
282
- input_shape = input_ids.size()
283
- input_ids = input_ids.view(-1, input_shape[-1])
284
- batch_size = input_ids.shape[0]
285
- elif inputs_embeds is not None:
286
- input_shape = inputs_embeds.size()[:-1]
287
- batch_size = inputs_embeds.shape[0]
288
- else:
289
- invalidInputError(False, "You have to specify either input_ids or inputs_embeds")
290
-
291
- device = input_ids.device if input_ids is not None else inputs_embeds.device
292
-
293
- if token_type_ids is not None:
294
- token_type_ids = token_type_ids.view(-1, input_shape[-1])
295
-
296
- if past_key_values is None:
297
- past_length = 0
298
- past_key_values = tuple([None] * len(self.h))
299
- else:
300
- past_length = past_key_values[0][0].size(-2)
301
-
302
- if position_ids is None:
303
- position_ids = torch.arange(past_length, input_shape[-1] + past_length,
304
- dtype=torch.long, device=device)
305
- position_ids = position_ids.unsqueeze(0)
306
-
307
- # Attention mask.
308
- if attention_mask is not None:
309
- if batch_size <= 0:
310
- invalidInputError(False, "batch_size has to be defined and > 0")
311
- attention_mask = attention_mask.view(batch_size, -1)
312
- # We create a 3D attention mask from a 2D tensor mask.
313
- # Sizes are [batch_size, 1, 1, to_seq_length]
314
- # So we can broadcast to [batch_size, num_heads, from_seq_length, to_seq_length]
315
- # this attention mask is more simple than the triangular masking of causal attention
316
- # used in OpenAI GPT, we just need to prepare the broadcast dimension here.
317
- attention_mask = attention_mask[:, None, None, :]
318
-
319
- # Since attention_mask is 1.0 for positions we want to attend and 0.0 for
320
- # masked positions, this operation will create a tensor which is 0.0 for
321
- # positions we want to attend and the dtype's smallest value for masked positions.
322
- # Since we are adding it to the raw scores before the softmax, this is
323
- # effectively the same as removing these entirely.
324
- attention_mask = attention_mask.to(dtype=self.dtype) # fp16 compatibility
325
- attention_mask = (1.0 - attention_mask) * torch.finfo(self.dtype).min
326
-
327
- # Prepare head mask if needed
328
- # 1.0 in head_mask indicate we keep the head
329
- # attention_probs has shape bsz x num_attention_heads x N x N
330
- # head_mask has shape n_layer x batch x num_attention_heads x N x N
331
- head_mask = self.get_head_mask(head_mask, self.config.n_layer)
332
-
333
- if inputs_embeds is None:
334
- inputs_embeds = self.wte(input_ids)
335
-
336
- hidden_states = inputs_embeds
337
-
338
- if token_type_ids is not None:
339
- token_type_embeds = self.wte(token_type_ids)
340
- hidden_states = hidden_states + token_type_embeds
341
-
342
- hidden_states = self.drop(hidden_states)
343
-
344
- output_shape = (-1,) + input_shape[1:] + (hidden_states.size(-1),)
345
-
346
- if self.gradient_checkpointing and self.training:
347
- if use_cache:
348
- logger.warning_once(
349
- "`use_cache=True` is incompatible with gradient checkpointing."
350
- "Setting `use_cache=False`..."
351
- )
352
- use_cache = False
353
-
354
- presents = () if use_cache else None
355
- all_self_attentions = () if output_attentions else None
356
- all_hidden_states = () if output_hidden_states else None
357
-
358
- # Repeat cos sin here, call only once for each token.
359
- # If put this to attension forward, it will generate too many times.
360
- if is_torch_fx_proxy(position_ids) or torch.jit.is_tracing():
361
- # The logic to conditionally copy to GPU could not be traced, so we do this
362
- # every time in the torch.fx case
363
- embed_positions = get_embed_positions(self.embed_positions, position_ids)
364
- else:
365
- embed_positions, self.embed_positions = get_new_embed_positions(position_ids,
366
- self.embed_positions)
367
-
368
- repeated_position_ids = position_ids.unsqueeze(-1).repeat(1, 1, embed_positions.shape[-1])
369
- sincos = torch.gather(embed_positions, 1, repeated_position_ids)
370
- sin, cos = torch.split(sincos, sincos.shape[-1] // 2, dim=-1)
371
- sin = torch.repeat_interleave(sin[:, :, None, :], 2, 3)
372
- cos = torch.repeat_interleave(cos[:, :, None, :], 2, 3)
373
-
374
- for i, (block, layer_past) in enumerate(zip(self.h, past_key_values)):
375
- # Model parallel
376
- if self.model_parallel:
377
- torch.cuda.set_device(hidden_states.device)
378
- # Ensure layer_past is on same device as hidden_states (might not be correct)
379
- if layer_past is not None:
380
- layer_past = tuple(past_state.to(hidden_states.device) for past_state in layer_past)
381
- # Ensure that attention_mask is always on the same device as hidden_states
382
- if attention_mask is not None:
383
- attention_mask = attention_mask.to(hidden_states.device)
384
- if isinstance(head_mask, torch.Tensor):
385
- head_mask = head_mask.to(hidden_states.device)
386
- if output_hidden_states:
387
- all_hidden_states = all_hidden_states + (hidden_states,)
388
-
389
- if self.gradient_checkpointing and self.training:
390
- outputs = self._gradient_checkpointing_func(
391
- block.__call__,
392
- hidden_states,
393
- None,
394
- attention_mask,
395
- position_ids,
396
- head_mask[i],
397
- use_cache,
398
- output_attentions,
399
- )
400
- else:
401
- outputs = block(
402
- hidden_states=hidden_states,
403
- layer_past=layer_past,
404
- attention_mask=attention_mask,
405
- position_ids=position_ids,
406
- head_mask=head_mask[i],
407
- use_cache=use_cache,
408
- rotary_emb=(sin, cos),
409
- output_attentions=output_attentions,
410
- )
411
-
412
- hidden_states = outputs[0]
413
- if use_cache is True:
414
- presents = presents + (outputs[1],)
415
-
416
- if output_attentions:
417
- all_self_attentions = all_self_attentions + (outputs[2 if use_cache else 1],)
418
-
419
- # Model Parallel: If it's the last layer for that device, put things on the next device
420
- if self.model_parallel:
421
- for k, v in self.device_map.items():
422
- if i == v[-1] and "cuda:" + str(k) != self.last_device:
423
- hidden_states = hidden_states.to("cuda:" + str(k + 1))
424
-
425
- hidden_states = self.ln_f(hidden_states)
426
-
427
- hidden_states = hidden_states.view(output_shape)
428
- # Add last hidden state
429
- if output_hidden_states:
430
- all_hidden_states = all_hidden_states + (hidden_states,)
431
-
432
- if not return_dict:
433
- return tuple(v for v in [hidden_states, presents, all_hidden_states, all_self_attentions]
434
- if v is not None)
435
-
436
- return BaseModelOutputWithPast(
437
- last_hidden_state=hidden_states,
438
- past_key_values=presents,
439
- hidden_states=all_hidden_states,
440
- attentions=all_self_attentions,
441
- )