ipex-llm 2.2.0b20250105.post0__py3-none-win_amd64.whl → 2.2.0b20250106.post1__py3-none-win_amd64.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- ipex_llm/libs/bloom-api.dll +0 -0
- ipex_llm/libs/bloom.dll +0 -0
- ipex_llm/libs/gptneox-api.dll +0 -0
- ipex_llm/libs/gptneox.dll +0 -0
- ipex_llm/libs/libbloom_avx.dll +0 -0
- ipex_llm/libs/libbloom_vnni.dll +0 -0
- ipex_llm/libs/libgptneox_avx.dll +0 -0
- ipex_llm/libs/libgptneox_vnni.dll +0 -0
- ipex_llm/libs/libllama_avx.dll +0 -0
- ipex_llm/libs/libllama_vnni.dll +0 -0
- ipex_llm/libs/libstarcoder_avx.dll +0 -0
- ipex_llm/libs/libstarcoder_vnni.dll +0 -0
- ipex_llm/libs/llama-api.dll +0 -0
- ipex_llm/libs/llama.dll +0 -0
- ipex_llm/libs/main-bloom.exe +0 -0
- ipex_llm/libs/main-gptneox.exe +0 -0
- ipex_llm/libs/main-llama.exe +0 -0
- ipex_llm/libs/main-starcoder.exe +0 -0
- ipex_llm/libs/pipeline.dll +0 -0
- ipex_llm/libs/quantize-bloom.exe +0 -0
- ipex_llm/libs/quantize-bloom_vnni.exe +0 -0
- ipex_llm/libs/quantize-gptneox.exe +0 -0
- ipex_llm/libs/quantize-gptneox_vnni.exe +0 -0
- ipex_llm/libs/quantize-llama.exe +0 -0
- ipex_llm/libs/quantize-llama_vnni.exe +0 -0
- ipex_llm/libs/quantize-starcoder.exe +0 -0
- ipex_llm/libs/quantize-starcoder_vnni.exe +0 -0
- ipex_llm/libs/starcoder-api.dll +0 -0
- ipex_llm/libs/starcoder.dll +0 -0
- ipex_llm/transformers/convert.py +2 -26
- ipex_llm/transformers/loader.py +1 -1
- ipex_llm/transformers/low_bit_linear.py +9 -23
- ipex_llm/transformers/model.py +0 -7
- ipex_llm/transformers/models/utils.py +2 -15
- ipex_llm/transformers/speculative.py +2 -14
- ipex_llm/transformers/utils.py +2 -0
- {ipex_llm-2.2.0b20250105.post0.dist-info → ipex_llm-2.2.0b20250106.post1.dist-info}/METADATA +20 -20
- {ipex_llm-2.2.0b20250105.post0.dist-info → ipex_llm-2.2.0b20250106.post1.dist-info}/RECORD +44 -45
- ipex_llm/transformers/models/gptj.py +0 -441
- {ipex_llm-2.2.0b20250105.post0.data → ipex_llm-2.2.0b20250106.post1.data}/scripts/ipex-llm-init.bat +0 -0
- {ipex_llm-2.2.0b20250105.post0.data → ipex_llm-2.2.0b20250106.post1.data}/scripts/llm-chat.ps1 +0 -0
- {ipex_llm-2.2.0b20250105.post0.data → ipex_llm-2.2.0b20250106.post1.data}/scripts/llm-cli.ps1 +0 -0
- {ipex_llm-2.2.0b20250105.post0.dist-info → ipex_llm-2.2.0b20250106.post1.dist-info}/WHEEL +0 -0
- {ipex_llm-2.2.0b20250105.post0.dist-info → ipex_llm-2.2.0b20250106.post1.dist-info}/entry_points.txt +0 -0
- {ipex_llm-2.2.0b20250105.post0.dist-info → ipex_llm-2.2.0b20250106.post1.dist-info}/top_level.txt +0 -0
ipex_llm/libs/bloom-api.dll
CHANGED
Binary file
|
ipex_llm/libs/bloom.dll
CHANGED
Binary file
|
ipex_llm/libs/gptneox-api.dll
CHANGED
Binary file
|
ipex_llm/libs/gptneox.dll
CHANGED
Binary file
|
ipex_llm/libs/libbloom_avx.dll
CHANGED
Binary file
|
ipex_llm/libs/libbloom_vnni.dll
CHANGED
Binary file
|
ipex_llm/libs/libgptneox_avx.dll
CHANGED
Binary file
|
Binary file
|
ipex_llm/libs/libllama_avx.dll
CHANGED
Binary file
|
ipex_llm/libs/libllama_vnni.dll
CHANGED
Binary file
|
Binary file
|
Binary file
|
ipex_llm/libs/llama-api.dll
CHANGED
Binary file
|
ipex_llm/libs/llama.dll
CHANGED
Binary file
|
ipex_llm/libs/main-bloom.exe
CHANGED
Binary file
|
ipex_llm/libs/main-gptneox.exe
CHANGED
Binary file
|
ipex_llm/libs/main-llama.exe
CHANGED
Binary file
|
ipex_llm/libs/main-starcoder.exe
CHANGED
Binary file
|
ipex_llm/libs/pipeline.dll
CHANGED
Binary file
|
ipex_llm/libs/quantize-bloom.exe
CHANGED
Binary file
|
Binary file
|
Binary file
|
Binary file
|
ipex_llm/libs/quantize-llama.exe
CHANGED
Binary file
|
Binary file
|
Binary file
|
Binary file
|
ipex_llm/libs/starcoder-api.dll
CHANGED
Binary file
|
ipex_llm/libs/starcoder.dll
CHANGED
Binary file
|
ipex_llm/transformers/convert.py
CHANGED
@@ -680,18 +680,9 @@ def _replace_with_low_bit_linear(model, qtype, modules_to_not_convert=None,
|
|
680
680
|
optimize_lm_head=optimize_lm_head
|
681
681
|
)
|
682
682
|
device = module.weight.data.device
|
683
|
-
|
684
|
-
if get_ipex_version() < "2.1.10+xpu":
|
685
|
-
new_linear._parameters['weight'] = nn.Parameter(module.weight)
|
686
|
-
else:
|
687
|
-
# only from 2.1, ipex provides matmul_bias_out
|
688
|
-
# so we need to transpose weight
|
689
|
-
new_weight = module.weight.transpose(0, 1).contiguous()
|
690
|
-
new_linear._parameters['weight'] = nn.Parameter(new_weight)
|
691
|
-
new_linear.weight_type = 2
|
683
|
+
new_linear._parameters['weight'] = nn.Parameter(module.weight)
|
692
684
|
if module.bias is not None:
|
693
|
-
new_linear._parameters['bias'] = nn.Parameter(module.bias.data)
|
694
|
-
.to(device)
|
685
|
+
new_linear._parameters['bias'] = nn.Parameter(module.bias.data).to(device)
|
695
686
|
elif qtype == ggml_tensor_qtype["bf16"]:
|
696
687
|
module.to(torch.bfloat16)
|
697
688
|
if _USE_VLLM:
|
@@ -1452,21 +1443,6 @@ def _optimize_post(model):
|
|
1452
1443
|
module.MultiheadAttention,
|
1453
1444
|
mpt_multihead_attention_forward
|
1454
1445
|
)
|
1455
|
-
elif "gptj" in model.config.model_type:
|
1456
|
-
# dolly-v1-6b
|
1457
|
-
modeling_module_name = model.__class__.__module__
|
1458
|
-
module = importlib.import_module(modeling_module_name)
|
1459
|
-
from ipex_llm.transformers.models.gptj import gptj_attention_forward, gptj_model_forward,\
|
1460
|
-
gptj_block_forward
|
1461
|
-
convert_forward(model,
|
1462
|
-
module.GPTJAttention,
|
1463
|
-
gptj_attention_forward)
|
1464
|
-
convert_forward(model,
|
1465
|
-
module.GPTJModel,
|
1466
|
-
gptj_model_forward)
|
1467
|
-
convert_forward(model,
|
1468
|
-
module.GPTJBlock,
|
1469
|
-
gptj_block_forward)
|
1470
1446
|
elif "bloom" in model.config.model_type:
|
1471
1447
|
modeling_module_name = model.__class__.__module__
|
1472
1448
|
module = importlib.import_module(modeling_module_name)
|
ipex_llm/transformers/loader.py
CHANGED
@@ -22,7 +22,7 @@ import time
|
|
22
22
|
from datetime import date
|
23
23
|
import argparse
|
24
24
|
from ipex_llm.utils.common import invalidInputError
|
25
|
-
from transformers import AutoTokenizer,
|
25
|
+
from transformers import AutoTokenizer, LlamaTokenizer
|
26
26
|
|
27
27
|
LLAMA_IDS = ['llama', 'vicuna', 'merged-baize']
|
28
28
|
|
@@ -286,7 +286,7 @@ def use_batch_forward(x: torch.Tensor, qtype: int, output_len: int):
|
|
286
286
|
or (
|
287
287
|
qtype in [SYM_INT8, FP4, FP6, Q4_K, Q6_K]
|
288
288
|
and batch_size <= 48
|
289
|
-
and device_name in ["arc", "pvc", "mtl", "
|
289
|
+
and device_name in ["arc", "pvc", "mtl", "arl"]
|
290
290
|
and x.shape[1] % 256 == 0
|
291
291
|
and output_len % 32 == 0
|
292
292
|
)
|
@@ -759,9 +759,9 @@ class FP16Linear(nn.Linear):
|
|
759
759
|
self.weight_length = self.out_len * self.in_len
|
760
760
|
self.qtype = ggml_tensor_qtype["fp16"]
|
761
761
|
self.mp_group = mp_group
|
762
|
-
#
|
763
|
-
#
|
764
|
-
#
|
762
|
+
# weight_type = 1 means original weight
|
763
|
+
# weight_type = 2 means weight has been transposed
|
764
|
+
# weight_type = 3 means weight has been transposed by esimd method
|
765
765
|
self.weight_type = 1
|
766
766
|
self.optimize_lm_head = optimize_lm_head
|
767
767
|
self.disable_fp16_opt = False
|
@@ -775,28 +775,14 @@ class FP16Linear(nn.Linear):
|
|
775
775
|
|
776
776
|
x = x.to(torch.float16)
|
777
777
|
if self.bias is not None and self.bias.dtype != x.dtype:
|
778
|
-
|
778
|
+
self.bias.data = self.bias.data.to(x.dtype)
|
779
779
|
if self.weight is not None and self.weight.dtype != x.dtype:
|
780
780
|
self.weight.data = self.weight.data.to(x.dtype)
|
781
781
|
|
782
782
|
if not self.use_esimd_kernel(x):
|
783
|
-
|
784
|
-
|
785
|
-
|
786
|
-
or self.disable_fp16_opt
|
787
|
-
):
|
788
|
-
if self.weight_type == 2:
|
789
|
-
self.weight = torch.nn.Parameter(self.weight.transpose(0, 1).contiguous(),
|
790
|
-
requires_grad=False)
|
791
|
-
self.weight_type = 1
|
792
|
-
result = F.linear(x, self.weight, self.bias)
|
793
|
-
else:
|
794
|
-
if self.weight_type == 1:
|
795
|
-
self.weight = torch.nn.Parameter(self.weight.transpose(0, 1).contiguous(),
|
796
|
-
requires_grad=False)
|
797
|
-
self.weight_type = 2
|
798
|
-
result = torch.ops.torch_ipex.matmul_bias_out(x.contiguous(),
|
799
|
-
self.weight, self.bias)
|
783
|
+
invalidInputError(self.weight_type == 1, "weight_type should be 1")
|
784
|
+
result = F.linear(x, self.weight, self.bias)
|
785
|
+
|
800
786
|
if self.mp_group is not None:
|
801
787
|
if get_use_vllm():
|
802
788
|
result = self.mp_group.all_reduce(result)
|
@@ -852,7 +838,7 @@ class FP16Linear(nn.Linear):
|
|
852
838
|
if self.disable_fp16_opt:
|
853
839
|
return False
|
854
840
|
# esimd kernel can only be used for Arc and Flex
|
855
|
-
if gpu_type not in ["arc"
|
841
|
+
if gpu_type not in ["arc"]:
|
856
842
|
return False
|
857
843
|
# now esimd kernel can only be used for specific cases (llama2-7b shape)
|
858
844
|
if self.in_len == 11008 and self.out_features == 4096:
|
ipex_llm/transformers/model.py
CHANGED
@@ -103,12 +103,6 @@ def save_low_bit(self, *args, **kwargs):
|
|
103
103
|
self.to(origin_device)
|
104
104
|
|
105
105
|
|
106
|
-
def _load_pre():
|
107
|
-
from transformers import GPTJModel
|
108
|
-
from ipex_llm.transformers.models.gptj import gptj_model_new_init
|
109
|
-
GPTJModel.__init__ = gptj_model_new_init
|
110
|
-
|
111
|
-
|
112
106
|
class _BaseAutoModelClass:
|
113
107
|
HF_MODEL = None
|
114
108
|
|
@@ -495,7 +489,6 @@ class _BaseAutoModelClass:
|
|
495
489
|
else:
|
496
490
|
if quant_config is not None:
|
497
491
|
kwargs["quantization_config"] = quant_config
|
498
|
-
_load_pre()
|
499
492
|
try:
|
500
493
|
# To handle the input CUDA setting (such as 'device_map={"":0}'), ignore it
|
501
494
|
kwargs.pop('device_map', None)
|
@@ -168,7 +168,7 @@ def should_use_fuse_rope(hidden_states, position_ids, training):
|
|
168
168
|
|
169
169
|
def apply_rotary_pos_emb(q, k, cos, sin, position_ids, model_family):
|
170
170
|
if model_family in ["llama", "baichuan", "internlm", "aquila", "gpt_neox", "mistral",
|
171
|
-
"
|
171
|
+
"qwen2", "yuan", "stablelm", "qwen2_moe"]:
|
172
172
|
# The first two dimensions of cos and sin are always 1, so we can `squeeze` them.
|
173
173
|
cos = cos.squeeze(1).squeeze(0) # [seq_len, dim]
|
174
174
|
sin = sin.squeeze(1).squeeze(0) # [seq_len, dim]
|
@@ -183,7 +183,7 @@ def apply_rotary_pos_emb(q, k, cos, sin, position_ids, model_family):
|
|
183
183
|
q_embed = (q * cos) + (rotate_half(q) * sin)
|
184
184
|
k_embed = (k * cos) + (rotate_half(k) * sin)
|
185
185
|
return q_embed, k_embed
|
186
|
-
elif model_family in ["
|
186
|
+
elif model_family in ["chatglm"]:
|
187
187
|
q_embed = (q * cos) + (rotate_every_two(q) * sin)
|
188
188
|
k_embed = (k * cos) + (rotate_every_two(k) * sin)
|
189
189
|
return q_embed, k_embed
|
@@ -192,19 +192,6 @@ def apply_rotary_pos_emb(q, k, cos, sin, position_ids, model_family):
|
|
192
192
|
f"{model_family} is not supported.")
|
193
193
|
|
194
194
|
|
195
|
-
def apply_ipex_rotate_every_two(q, k, cos, sin):
|
196
|
-
# ipex's apply_rotary_embedding_two_qk can change the origin storage,
|
197
|
-
# so q/k will get the result directly.
|
198
|
-
from ipex_llm.transformers.utils import get_ipex_version
|
199
|
-
if get_ipex_version() >= "2.1.10+xpu":
|
200
|
-
torch.ops.torch_ipex.apply_rotary_embedding_two_qk(
|
201
|
-
q, k, sin, cos, q, k
|
202
|
-
)
|
203
|
-
else:
|
204
|
-
torch.ops.torch_ipex.apply_rotary_embedding(q, sin, cos, q)
|
205
|
-
torch.ops.torch_ipex.apply_rotary_embedding(k, sin, cos, k)
|
206
|
-
|
207
|
-
|
208
195
|
def is_enough_kv_cache_room_4_36(past_key_value, idx, seq_len=1):
|
209
196
|
# to determinate if is enough kv cache room in transformers==4.36
|
210
197
|
# seq_len for current seq len
|
@@ -432,8 +432,7 @@ def _check_and_extend_kv_cache(past_key_values, max_step_draft, kv_alloc_block_l
|
|
432
432
|
from ipex_llm.transformers.models.utils import is_enough_kv_cache_room_4_31, \
|
433
433
|
extend_kv_cache
|
434
434
|
enough_kv_room = True
|
435
|
-
if model_type not in ["chatglm", "qwen", "baichuan", "llama", "mistral",
|
436
|
-
"gptj", "opt"]:
|
435
|
+
if model_type not in ["chatglm", "qwen", "baichuan", "llama", "mistral", "opt"]:
|
437
436
|
return past_key_values, False
|
438
437
|
cache_k = past_key_values[0][0]
|
439
438
|
if model_type == "chatglm":
|
@@ -527,7 +526,7 @@ def _crop_past_key_values(self, past_key_values, new_cache_size, _enable_ipex=Fa
|
|
527
526
|
v[:-(new_cache_size), :, :, :])
|
528
527
|
for k, v in past_key_values
|
529
528
|
]
|
530
|
-
elif self.config.model_type in ["baichuan"
|
529
|
+
elif self.config.model_type in ["baichuan"]:
|
531
530
|
past_key_values = [
|
532
531
|
(k[:, :, :-(new_cache_size), :],
|
533
532
|
v[:, :, :-(new_cache_size), :])
|
@@ -796,13 +795,6 @@ def _non_cpu_ipex_verify(self, verify_input_ids, past_key_values, cur_attention_
|
|
796
795
|
device=verify_input_ids.device)
|
797
796
|
position_ids = position_ids.unsqueeze(0).repeat(1, 1) + past_key_value_len
|
798
797
|
forward_args["position_ids"] = position_ids
|
799
|
-
elif self.config.model_type == "gptj":
|
800
|
-
past_length = past_key_values[0][0].size(2)
|
801
|
-
input_len = verify_input_ids.shape[1]
|
802
|
-
position_ids = torch.arange(past_length, input_len + past_length,
|
803
|
-
dtype=torch.long, device=verify_input_ids.device)
|
804
|
-
position_ids = position_ids.unsqueeze(0).view(-1, input_len)
|
805
|
-
forward_args["position_ids"] = position_ids
|
806
798
|
|
807
799
|
return self(**forward_args)
|
808
800
|
|
@@ -971,10 +963,6 @@ def speculative_generate(self,
|
|
971
963
|
past_key_value_len = past_key_values[0][0].shape[0]
|
972
964
|
position_ids = torch.Tensor([[past_key_value_len + step_draft]]).long()
|
973
965
|
forward_args["position_ids"] = position_ids
|
974
|
-
elif self.config.model_type == "gptj":
|
975
|
-
past_length = draft_past_key_values[0][0].size(2)
|
976
|
-
position_ids = torch.Tensor([[past_length]]).long().to(self.device)
|
977
|
-
forward_args["position_ids"] = position_ids
|
978
966
|
|
979
967
|
if _enable_ipex:
|
980
968
|
if any(keyword in self.config.model_type
|
ipex_llm/transformers/utils.py
CHANGED
@@ -172,6 +172,8 @@ def get_xpu_device_name(device: torch.device):
|
|
172
172
|
if device.type != "xpu":
|
173
173
|
return device.type
|
174
174
|
else:
|
175
|
+
# possiable device name:
|
176
|
+
# ["arc", "pvc", "mtl", "lnl", "bmg", "arl", "legacy", "unknown"]
|
175
177
|
import xe_linear
|
176
178
|
return xe_linear.get_xpu_device_name(device)
|
177
179
|
|
{ipex_llm-2.2.0b20250105.post0.dist-info → ipex_llm-2.2.0b20250106.post1.dist-info}/METADATA
RENAMED
@@ -1,6 +1,6 @@
|
|
1
1
|
Metadata-Version: 2.1
|
2
2
|
Name: ipex-llm
|
3
|
-
Version: 2.2.
|
3
|
+
Version: 2.2.0b20250106.post1
|
4
4
|
Summary: Large Language Model Develop Toolkit
|
5
5
|
Home-page: https://github.com/intel-analytics/ipex-llm
|
6
6
|
Author: BigDL Authors
|
@@ -27,10 +27,10 @@ Requires-Dist: intel-openmp ; (platform_machine == "x86_64" or platform_machine
|
|
27
27
|
Requires-Dist: torch ==2.1.2+cpu ; (platform_system == "Linux") and extra == 'all'
|
28
28
|
Requires-Dist: torch ==2.1.2 ; (platform_system == "Windows") and extra == 'all'
|
29
29
|
Provides-Extra: cpp
|
30
|
-
Requires-Dist: bigdl-core-cpp ==2.6.0b20250106.
|
30
|
+
Requires-Dist: bigdl-core-cpp ==2.6.0b20250106.post1 ; extra == 'cpp'
|
31
31
|
Requires-Dist: setuptools ; extra == 'cpp'
|
32
32
|
Provides-Extra: cpp-arl
|
33
|
-
Requires-Dist: bigdl-core-cpp ==2.6.0b20250106.
|
33
|
+
Requires-Dist: bigdl-core-cpp ==2.6.0b20250106.post1 ; extra == 'cpp-arl'
|
34
34
|
Requires-Dist: setuptools ; extra == 'cpp-arl'
|
35
35
|
Requires-Dist: onednn-devel ==2024.1.1 ; (platform_system == "Windows") and extra == 'cpp-arl'
|
36
36
|
Requires-Dist: onednn ==2024.1.1 ; (platform_system == "Windows") and extra == 'cpp-arl'
|
@@ -67,7 +67,7 @@ Requires-Dist: transformers ==4.40.0 ; extra == 'npu'
|
|
67
67
|
Requires-Dist: intel-openmp ; (platform_machine == "x86_64" or platform_machine == "AMD64") and extra == 'npu'
|
68
68
|
Requires-Dist: torch ==2.1.2+cpu ; (platform_system == "Linux") and extra == 'npu'
|
69
69
|
Requires-Dist: torch ==2.1.2 ; (platform_system == "Windows") and extra == 'npu'
|
70
|
-
Requires-Dist: bigdl-core-npu ==2.6.0b20250106.
|
70
|
+
Requires-Dist: bigdl-core-npu ==2.6.0b20250106.post1 ; (platform_system == "Windows") and extra == 'npu'
|
71
71
|
Provides-Extra: serving
|
72
72
|
Requires-Dist: py-cpuinfo ; extra == 'serving'
|
73
73
|
Requires-Dist: fschat[model_worker,webui] ==0.2.36 ; extra == 'serving'
|
@@ -87,9 +87,9 @@ Requires-Dist: setuptools <70.0.0 ; extra == 'xpu'
|
|
87
87
|
Requires-Dist: torch ==2.1.0a0 ; extra == 'xpu'
|
88
88
|
Requires-Dist: torchvision ==0.16.0a0 ; extra == 'xpu'
|
89
89
|
Requires-Dist: intel-extension-for-pytorch ==2.1.10+xpu ; extra == 'xpu'
|
90
|
-
Requires-Dist: bigdl-core-xe-21 ==2.6.0b20250106.
|
91
|
-
Requires-Dist: bigdl-core-xe-batch-21 ==2.6.0b20250106.
|
92
|
-
Requires-Dist: bigdl-core-xe-addons-21 ==2.6.0b20250106.
|
90
|
+
Requires-Dist: bigdl-core-xe-21 ==2.6.0b20250106.post1 ; extra == 'xpu'
|
91
|
+
Requires-Dist: bigdl-core-xe-batch-21 ==2.6.0b20250106.post1 ; extra == 'xpu'
|
92
|
+
Requires-Dist: bigdl-core-xe-addons-21 ==2.6.0b20250106.post1 ; extra == 'xpu'
|
93
93
|
Provides-Extra: xpu-2-1
|
94
94
|
Requires-Dist: py-cpuinfo ; extra == 'xpu-2-1'
|
95
95
|
Requires-Dist: protobuf ; extra == 'xpu-2-1'
|
@@ -104,9 +104,9 @@ Requires-Dist: setuptools <70.0.0 ; extra == 'xpu-2-1'
|
|
104
104
|
Requires-Dist: torch ==2.1.0a0 ; extra == 'xpu-2-1'
|
105
105
|
Requires-Dist: torchvision ==0.16.0a0 ; extra == 'xpu-2-1'
|
106
106
|
Requires-Dist: intel-extension-for-pytorch ==2.1.10+xpu ; extra == 'xpu-2-1'
|
107
|
-
Requires-Dist: bigdl-core-xe-21 ==2.6.0b20250106.
|
108
|
-
Requires-Dist: bigdl-core-xe-batch-21 ==2.6.0b20250106.
|
109
|
-
Requires-Dist: bigdl-core-xe-addons-21 ==2.6.0b20250106.
|
107
|
+
Requires-Dist: bigdl-core-xe-21 ==2.6.0b20250106.post1 ; extra == 'xpu-2-1'
|
108
|
+
Requires-Dist: bigdl-core-xe-batch-21 ==2.6.0b20250106.post1 ; extra == 'xpu-2-1'
|
109
|
+
Requires-Dist: bigdl-core-xe-addons-21 ==2.6.0b20250106.post1 ; extra == 'xpu-2-1'
|
110
110
|
Requires-Dist: intel-openmp ; (platform_machine == "x86_64" or platform_machine == "AMD64") and extra == 'xpu-2-1'
|
111
111
|
Requires-Dist: dpcpp-cpp-rt ==2024.0.2 ; (platform_system == "Windows") and extra == 'xpu-2-1'
|
112
112
|
Requires-Dist: mkl-dpcpp ==2024.0.0 ; (platform_system == "Windows") and extra == 'xpu-2-1'
|
@@ -124,7 +124,7 @@ Requires-Dist: setuptools ; extra == 'xpu-2-6'
|
|
124
124
|
Requires-Dist: torch ==2.6.0+xpu ; extra == 'xpu-2-6'
|
125
125
|
Requires-Dist: torchvision ==0.21.0+xpu ; extra == 'xpu-2-6'
|
126
126
|
Requires-Dist: torchaudio ==2.6.0+xpu ; extra == 'xpu-2-6'
|
127
|
-
Requires-Dist: bigdl-core-xe-all ==2.6.0b20250106.
|
127
|
+
Requires-Dist: bigdl-core-xe-all ==2.6.0b20250106.post1 ; extra == 'xpu-2-6'
|
128
128
|
Requires-Dist: intel-openmp ; (platform_machine == "x86_64" or platform_machine == "AMD64") and extra == 'xpu-2-6'
|
129
129
|
Provides-Extra: xpu-arc
|
130
130
|
Requires-Dist: py-cpuinfo ; extra == 'xpu-arc'
|
@@ -137,9 +137,9 @@ Requires-Dist: tokenizers ==0.15.2 ; extra == 'xpu-arc'
|
|
137
137
|
Requires-Dist: accelerate ==0.23.0 ; extra == 'xpu-arc'
|
138
138
|
Requires-Dist: tabulate ; extra == 'xpu-arc'
|
139
139
|
Requires-Dist: setuptools ; extra == 'xpu-arc'
|
140
|
-
Requires-Dist: bigdl-core-xe-23 ==2.6.0b20250106.
|
141
|
-
Requires-Dist: bigdl-core-xe-batch-23 ==2.6.0b20250106.
|
142
|
-
Requires-Dist: bigdl-core-xe-addons-23 ==2.6.0b20250106.
|
140
|
+
Requires-Dist: bigdl-core-xe-23 ==2.6.0b20250106.post1 ; extra == 'xpu-arc'
|
141
|
+
Requires-Dist: bigdl-core-xe-batch-23 ==2.6.0b20250106.post1 ; extra == 'xpu-arc'
|
142
|
+
Requires-Dist: bigdl-core-xe-addons-23 ==2.6.0b20250106.post1 ; extra == 'xpu-arc'
|
143
143
|
Requires-Dist: intel-openmp ; (platform_machine == "x86_64" or platform_machine == "AMD64") and extra == 'xpu-arc'
|
144
144
|
Requires-Dist: torch ==2.3.1+cxx11.abi ; (platform_system == "Linux") and extra == 'xpu-arc'
|
145
145
|
Requires-Dist: torchvision ==0.18.1+cxx11.abi ; (platform_system == "Linux") and extra == 'xpu-arc'
|
@@ -160,9 +160,9 @@ Requires-Dist: tokenizers ==0.15.2 ; extra == 'xpu-arl'
|
|
160
160
|
Requires-Dist: accelerate ==0.23.0 ; extra == 'xpu-arl'
|
161
161
|
Requires-Dist: tabulate ; extra == 'xpu-arl'
|
162
162
|
Requires-Dist: setuptools ; extra == 'xpu-arl'
|
163
|
-
Requires-Dist: bigdl-core-xe-23 ==2.6.0b20250106.
|
164
|
-
Requires-Dist: bigdl-core-xe-batch-23 ==2.6.0b20250106.
|
165
|
-
Requires-Dist: bigdl-core-xe-addons-23 ==2.6.0b20250106.
|
163
|
+
Requires-Dist: bigdl-core-xe-23 ==2.6.0b20250106.post1 ; extra == 'xpu-arl'
|
164
|
+
Requires-Dist: bigdl-core-xe-batch-23 ==2.6.0b20250106.post1 ; extra == 'xpu-arl'
|
165
|
+
Requires-Dist: bigdl-core-xe-addons-23 ==2.6.0b20250106.post1 ; extra == 'xpu-arl'
|
166
166
|
Requires-Dist: intel-openmp ; (platform_machine == "x86_64" or platform_machine == "AMD64") and extra == 'xpu-arl'
|
167
167
|
Requires-Dist: torch ==2.3.1+cxx11.abi ; (platform_system == "Linux") and extra == 'xpu-arl'
|
168
168
|
Requires-Dist: torchvision ==0.18.1+cxx11.abi ; (platform_system == "Linux") and extra == 'xpu-arl'
|
@@ -183,9 +183,9 @@ Requires-Dist: tokenizers ==0.15.2 ; extra == 'xpu-lnl'
|
|
183
183
|
Requires-Dist: accelerate ==0.23.0 ; extra == 'xpu-lnl'
|
184
184
|
Requires-Dist: tabulate ; extra == 'xpu-lnl'
|
185
185
|
Requires-Dist: setuptools ; extra == 'xpu-lnl'
|
186
|
-
Requires-Dist: bigdl-core-xe-23 ==2.6.0b20250106.
|
187
|
-
Requires-Dist: bigdl-core-xe-batch-23 ==2.6.0b20250106.
|
188
|
-
Requires-Dist: bigdl-core-xe-addons-23 ==2.6.0b20250106.
|
186
|
+
Requires-Dist: bigdl-core-xe-23 ==2.6.0b20250106.post1 ; extra == 'xpu-lnl'
|
187
|
+
Requires-Dist: bigdl-core-xe-batch-23 ==2.6.0b20250106.post1 ; extra == 'xpu-lnl'
|
188
|
+
Requires-Dist: bigdl-core-xe-addons-23 ==2.6.0b20250106.post1 ; extra == 'xpu-lnl'
|
189
189
|
Requires-Dist: intel-openmp ; (platform_machine == "x86_64" or platform_machine == "AMD64") and extra == 'xpu-lnl'
|
190
190
|
Requires-Dist: torch ==2.3.1+cxx11.abi ; (platform_system == "Linux") and extra == 'xpu-lnl'
|
191
191
|
Requires-Dist: torchvision ==0.18.1+cxx11.abi ; (platform_system == "Linux") and extra == 'xpu-lnl'
|
@@ -41,35 +41,35 @@ ipex_llm/langchain/llms/transformerspipelinellm.py,sha256=vm522YPPwWxxAPVvQBtxRf
|
|
41
41
|
ipex_llm/langchain/vllm/__init__.py,sha256=T-EbRT6GJ_8RCu-iLmSzcftOimXSPQf2d5X72AUAy2Y,874
|
42
42
|
ipex_llm/langchain/vllm/vllm.py,sha256=6dxc-ZISZQrJilEa_HA827l75Dv9rcHpY_G6FdJ8BVs,7793
|
43
43
|
ipex_llm/libs/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
44
|
-
ipex_llm/libs/bloom-api.dll,sha256=
|
45
|
-
ipex_llm/libs/bloom.dll,sha256=
|
46
|
-
ipex_llm/libs/gptneox-api.dll,sha256=
|
47
|
-
ipex_llm/libs/gptneox.dll,sha256=
|
48
|
-
ipex_llm/libs/libbloom_avx.dll,sha256=
|
49
|
-
ipex_llm/libs/libbloom_vnni.dll,sha256=
|
50
|
-
ipex_llm/libs/libgptneox_avx.dll,sha256=
|
51
|
-
ipex_llm/libs/libgptneox_vnni.dll,sha256=
|
52
|
-
ipex_llm/libs/libllama_avx.dll,sha256=
|
53
|
-
ipex_llm/libs/libllama_vnni.dll,sha256=
|
54
|
-
ipex_llm/libs/libstarcoder_avx.dll,sha256=
|
55
|
-
ipex_llm/libs/libstarcoder_vnni.dll,sha256=
|
56
|
-
ipex_llm/libs/llama-api.dll,sha256=
|
57
|
-
ipex_llm/libs/llama.dll,sha256=
|
58
|
-
ipex_llm/libs/main-bloom.exe,sha256=
|
59
|
-
ipex_llm/libs/main-gptneox.exe,sha256=
|
60
|
-
ipex_llm/libs/main-llama.exe,sha256
|
61
|
-
ipex_llm/libs/main-starcoder.exe,sha256
|
62
|
-
ipex_llm/libs/pipeline.dll,sha256=
|
63
|
-
ipex_llm/libs/quantize-bloom.exe,sha256=
|
64
|
-
ipex_llm/libs/quantize-bloom_vnni.exe,sha256=
|
65
|
-
ipex_llm/libs/quantize-gptneox.exe,sha256=
|
66
|
-
ipex_llm/libs/quantize-gptneox_vnni.exe,sha256=
|
67
|
-
ipex_llm/libs/quantize-llama.exe,sha256=
|
68
|
-
ipex_llm/libs/quantize-llama_vnni.exe,sha256=
|
69
|
-
ipex_llm/libs/quantize-starcoder.exe,sha256
|
70
|
-
ipex_llm/libs/quantize-starcoder_vnni.exe,sha256=
|
71
|
-
ipex_llm/libs/starcoder-api.dll,sha256=
|
72
|
-
ipex_llm/libs/starcoder.dll,sha256=
|
44
|
+
ipex_llm/libs/bloom-api.dll,sha256=MSVCD9yi2nlgksGF-5u7tKE660pzfRthAuTWK_7ozbE,36352
|
45
|
+
ipex_llm/libs/bloom.dll,sha256=DMJiNnnBbuZArLpPrSIeVgBQtgHRKK0gfqF3e_4N4Ak,506880
|
46
|
+
ipex_llm/libs/gptneox-api.dll,sha256=B9yLQPHTYv0o88cZ3pYCfY5ksnetr0xcWC4wKYXhsKs,24576
|
47
|
+
ipex_llm/libs/gptneox.dll,sha256=B8H5jlqDLVMSi4wqdPZ0ozI1smbuY7jRN-9YQXSoDQI,567296
|
48
|
+
ipex_llm/libs/libbloom_avx.dll,sha256=syY7Lc2kwoM7D57WPaj2XIMJCXyY1IfXClgkY8Lrs1w,535040
|
49
|
+
ipex_llm/libs/libbloom_vnni.dll,sha256=RtkwaaPVlbWv6OtxClKILy5CYlL98olnJYdFbe95tEM,506880
|
50
|
+
ipex_llm/libs/libgptneox_avx.dll,sha256=1gmdd3DdFrMJBI4pXyI0_703ndz2kopDkK8vz7uPyKI,595456
|
51
|
+
ipex_llm/libs/libgptneox_vnni.dll,sha256=bUKisE3_Cg4rZ2AtjGhVoXtKqNVAo4MZI7N7Q4Q_cf0,567808
|
52
|
+
ipex_llm/libs/libllama_avx.dll,sha256=lfcj2lw9lIgmDifxIzNnzcz6fHuYUIVYvdvwoM8ijOE,589824
|
53
|
+
ipex_llm/libs/libllama_vnni.dll,sha256=5xY7lEJ-sW6owTNcBO8SWRCoweo7k8N4FS4v9-lQcdc,561664
|
54
|
+
ipex_llm/libs/libstarcoder_avx.dll,sha256=z8P_H55-1VQFB9KcBxnBrSlkLlsLpxPghN9VfKpQulA,626688
|
55
|
+
ipex_llm/libs/libstarcoder_vnni.dll,sha256=rPEGDTZlUTl0hUP40uRe8idjkVhNkyYk818RYmONL1Q,598528
|
56
|
+
ipex_llm/libs/llama-api.dll,sha256=XJP636817YbUBOneSVGLqr9-qefV4DV6vR_8P7e8Q7k,25600
|
57
|
+
ipex_llm/libs/llama.dll,sha256=YF2m8rwTy7hRbCo31FG-fo1q6DStBAxenNgkz4_gkr0,561152
|
58
|
+
ipex_llm/libs/main-bloom.exe,sha256=lWEzFmr2zWPLnDUy3l-FBv5k-xLn7BXm1A3RSkZx890,103424
|
59
|
+
ipex_llm/libs/main-gptneox.exe,sha256=ex0FAncs6bLngz7-jmtCxuDNEWlZCXThBMCdveDJnEs,98816
|
60
|
+
ipex_llm/libs/main-llama.exe,sha256=ooEHT6YMGZTCKo3h-m3E7f8vkc6aMJPZiRH9dU_4Wu0,99840
|
61
|
+
ipex_llm/libs/main-starcoder.exe,sha256=r9rZZhk0V4_6L7EH7evcBdmRyWSqiZ_oNon3mjSCXEY,157696
|
62
|
+
ipex_llm/libs/pipeline.dll,sha256=5rhnCddgAUozj2ZQox5ykjsWivaNsK-jVcvArO9Hfy8,72704
|
63
|
+
ipex_llm/libs/quantize-bloom.exe,sha256=V-ht-dVMJ9dq4O9vokGlBoOyTCCIgZoZMC-Cn8UdPL4,126464
|
64
|
+
ipex_llm/libs/quantize-bloom_vnni.exe,sha256=q14nAfmyRJW57xLG10wKCODG-cSOZ6KP7VCIVPAqslM,127488
|
65
|
+
ipex_llm/libs/quantize-gptneox.exe,sha256=Mjhxhcb8FXwrGI0dMRrHXzOOWkLZwtYQkpj4i2gSTYk,104448
|
66
|
+
ipex_llm/libs/quantize-gptneox_vnni.exe,sha256=_9D8mqr8zCI7sAzRZ_KlCayqDq0ctQWcwAzI4NWdH7U,104960
|
67
|
+
ipex_llm/libs/quantize-llama.exe,sha256=TsPfUBMnQNcKA44_QG9wrmRSEcs2bAmiSOD-ll57g6w,109568
|
68
|
+
ipex_llm/libs/quantize-llama_vnni.exe,sha256=1fUjt6woxEAzYQ1b0Ok3JRo1YcFcTiogB-l0RdOs_iY,110592
|
69
|
+
ipex_llm/libs/quantize-starcoder.exe,sha256=-wvnnZhGw_eKPnbhZiiaepceFbd_zMUY7qQwrKg4miw,127488
|
70
|
+
ipex_llm/libs/quantize-starcoder_vnni.exe,sha256=iPbhV8PTL7J3JXJnJkRw1tsvaHA0kGxgwpttSzVkUHQ,128512
|
71
|
+
ipex_llm/libs/starcoder-api.dll,sha256=bpzxqpZZE9Q7Yghy-33_Hkvo-OLvKntfqH-DJLWfXjQ,21504
|
72
|
+
ipex_llm/libs/starcoder.dll,sha256=QXucnOagZLcxQF8PG_nvQ9kW4XvexngcqrmJE5N5o9s,598016
|
73
73
|
ipex_llm/llamaindex/__init__.py,sha256=T-EbRT6GJ_8RCu-iLmSzcftOimXSPQf2d5X72AUAy2Y,874
|
74
74
|
ipex_llm/llamaindex/llms/__init__.py,sha256=KP1lEdGqDuxPoxL1ZSH25Pm2kKMPJBWUTLR0ckSLMIU,1139
|
75
75
|
ipex_llm/llamaindex/llms/bigdlllm.py,sha256=FQBzq1KOjfc6uofTXAha3O7TqpJkNfOFepXQmOVlbnI,26314
|
@@ -87,25 +87,25 @@ ipex_llm/serving/fastchat/tgi_api_protocol.py,sha256=brT3k3-V0NJrU4fRqUwWjC0O3iO
|
|
87
87
|
ipex_llm/serving/fastchat/tgi_api_server.py,sha256=agNTAEiZPSuj3dEdIdYKwkoY0cXOUDX06DiM9VP2knQ,24418
|
88
88
|
ipex_llm/serving/fastchat/vllm_worker.py,sha256=ZLz2Q9GxJO6r_LOiP6epgCRjBGk-K4EB1SNEWSJp5DA,11091
|
89
89
|
ipex_llm/transformers/__init__.py,sha256=l4KkMkLe-pRC7b_kj6LCfeifgE-Uo33_Av_FwN9HnFA,1074
|
90
|
-
ipex_llm/transformers/convert.py,sha256=
|
90
|
+
ipex_llm/transformers/convert.py,sha256=TxWdTTOSvh-j5jqokQJVWykta4U4LHupE1QJ-9udzwc,98733
|
91
91
|
ipex_llm/transformers/convert_ipex.py,sha256=iKXo0n8fVFTOA2fNYYrByMFK0dovL-kLd2sVDk88AlQ,14334
|
92
92
|
ipex_llm/transformers/embedding.py,sha256=bdgk59DvD4ZZyxRzewXOR7g56nThgO6uhIwk8QL7f-s,9299
|
93
93
|
ipex_llm/transformers/kv.py,sha256=k4TU18LlA-Sbq9WNNQnfuzu3RSFBwFhmaV3BcGN5bAo,19191
|
94
94
|
ipex_llm/transformers/lisa.py,sha256=F5WxbtXQ7RdKulj83h_2DnEIgKiKGZf7zvOmg6QBl2s,3289
|
95
|
-
ipex_llm/transformers/loader.py,sha256=
|
95
|
+
ipex_llm/transformers/loader.py,sha256=AwjV5RpI2t2bedlv7ZhLm8cfd-QJZm5hny-XyjIvdnk,6876
|
96
96
|
ipex_llm/transformers/lookup.py,sha256=b6OlZ9OV10R9qeWw8mVryVpDxszkjwLkldvi7GPMJY8,19614
|
97
|
-
ipex_llm/transformers/low_bit_linear.py,sha256=
|
98
|
-
ipex_llm/transformers/model.py,sha256=
|
97
|
+
ipex_llm/transformers/low_bit_linear.py,sha256=lPIvDuRoS0zusiJ6vw_fOTJgK5ylh4CuD3U-qs8ih4Y,40869
|
98
|
+
ipex_llm/transformers/model.py,sha256=fj7LBjrWtWwDJJYXnWiXsLGS4ayqqHfnh0p51dSDssE,40908
|
99
99
|
ipex_llm/transformers/modelling_bigdl.py,sha256=7JpNVMuyq_OmtNUaMFMXdxPWZp2q0QHC02QeA-VTPOw,6709
|
100
100
|
ipex_llm/transformers/npu_model.py,sha256=YW02GeVz-9ZGqxAeSz0AOvciS-17bo9eK5ZOBrICwSQ,39508
|
101
101
|
ipex_llm/transformers/patches.py,sha256=halPWm__ORh2fRFSIFPiCNg3LQBfrRkTPtmtRpBJCZQ,1286
|
102
102
|
ipex_llm/transformers/pipeline_parallel.py,sha256=uNZpOXljNmdoEYnP8U-VFiN4dRZb2piQbIf2bG9LQnE,49051
|
103
103
|
ipex_llm/transformers/qlora.py,sha256=jtPGsvWFjbTUGzDBCdfftnCis_0nJQNRpACSwXUbbGU,14943
|
104
104
|
ipex_llm/transformers/relora.py,sha256=-dYzUV0P-IhO2jFdnzN9-v_sFzJpRj3ZwN9eCJzOoCw,16567
|
105
|
-
ipex_llm/transformers/speculative.py,sha256=
|
105
|
+
ipex_llm/transformers/speculative.py,sha256=0XNLgc9dGswJHVPrXo4iM7pPxkWwfFfJMECcivJSnIc,63368
|
106
106
|
ipex_llm/transformers/streamer.py,sha256=RrVlLblzCOtABRUpaMXAyaMnCGgLUtAi_YesLumRbww,4842
|
107
107
|
ipex_llm/transformers/training_patch.py,sha256=oxMkUtqyvqJiprw6dE3skkYfD1HOmUlH9N0hBkbn0G0,10799
|
108
|
-
ipex_llm/transformers/utils.py,sha256=
|
108
|
+
ipex_llm/transformers/utils.py,sha256=7syzq4jnEo-mWmS9E2VZ2GcFrjojWI8E7Hcx0tloifg,16996
|
109
109
|
ipex_llm/transformers/xpu_customize_fwd.py,sha256=wFpIhs5F6tkNs8gBOrLxWdhLzO3EDHovVkERPIAoAvg,7611
|
110
110
|
ipex_llm/transformers/xpu_ops.py,sha256=H46-69pMRQhekbAEoDfNacCInLWycMHDqrgMGLvFYfI,4362
|
111
111
|
ipex_llm/transformers/awq/__init__.py,sha256=Du5gu3-eeAkeDO_dEMBTzrDBA66DSN3uL3-rn8WGXQw,875
|
@@ -151,7 +151,6 @@ ipex_llm/transformers/models/gemma2.py,sha256=2WZuv-FLzJyTJFaYxOuzJt47QE64M0lHnz
|
|
151
151
|
ipex_llm/transformers/models/glm.py,sha256=gHYgfn20jPRL-ElXy-rUqMh6_LQcc5x7DEXSZuRA4E0,7094
|
152
152
|
ipex_llm/transformers/models/gpt2.py,sha256=YSaNgK1uLCFDuIFqnKO0Mi-AsOZsYav-7pNf_NpKGdM,3445
|
153
153
|
ipex_llm/transformers/models/gptbigcode.py,sha256=cP1_qGWoa43R2WacAMblShjku4QupcCZiLaPPAoOUs4,9101
|
154
|
-
ipex_llm/transformers/models/gptj.py,sha256=TTIx461X2nOcIkrAcZhEf7d7mjJ3yvEC9KLVc1-hrpc,17973
|
155
154
|
ipex_llm/transformers/models/gptneox.py,sha256=loRh1x_5S6BCeOr_s5xr-N_1SQHL3Y5IiUBAEyoMUqQ,6172
|
156
155
|
ipex_llm/transformers/models/internlm.py,sha256=ZbIUMDwNRcrCeduXfbA_uq1AUEWawEt6CJRvQl3LkAg,17832
|
157
156
|
ipex_llm/transformers/models/internvl.py,sha256=Vx0vENIEQLX2M6P398mw5TOhpks0U8xf8rtRQvy94go,8154
|
@@ -175,7 +174,7 @@ ipex_llm/transformers/models/rwkv5.py,sha256=OkRNj1pCAZg1z2Fw-I0DEnxLEdZyPeRSQ6m
|
|
175
174
|
ipex_llm/transformers/models/sd.py,sha256=VvHV5u-0k2MgHu3NL9113hPj7DgfxqctuKzEEeNfRDU,5981
|
176
175
|
ipex_llm/transformers/models/stablelm.py,sha256=RGQCYuQhYqtZ1j3RZkYi0_QvCRnUgUIPYxfBcLnElzg,6885
|
177
176
|
ipex_llm/transformers/models/starcoder2.py,sha256=4P3mhRYf2Kreb1ESjrQGfy1puLMmZXgV35zf-Tksvao,6462
|
178
|
-
ipex_llm/transformers/models/utils.py,sha256=
|
177
|
+
ipex_llm/transformers/models/utils.py,sha256=85rGIzGZvWe3JjYpWcuc1nfzI_tn_zFcdZpWivxJkl0,15457
|
179
178
|
ipex_llm/transformers/models/yuan.py,sha256=1jRPebwAK2ENbyYokOmb4LSVo-szucWiygz9zTv-scs,7656
|
180
179
|
ipex_llm/transformers/npu_models/__init__.py,sha256=ulEUGLjaP48LCrVeury3UxLjXxKzRi0UpSG4bYu-7f8,585
|
181
180
|
ipex_llm/transformers/npu_models/baichuan.py,sha256=fJtd7fBrttySghRUgfZTAdxLjsSNC-XL08HISsXigLE,4685
|
@@ -244,11 +243,11 @@ ipex_llm/vllm/xpu/engine/__init__.py,sha256=pY_CpyuZd72fr6s32ejeKHKFW0K4vUU2rzZj
|
|
244
243
|
ipex_llm/vllm/xpu/engine/engine.py,sha256=k4-D27WS_Gk3mA--w3HWAjPjb4Aiu043MVPi0ZoAUBc,5984
|
245
244
|
ipex_llm/vllm/xpu/entrypoints/openai/api_server.py,sha256=GshTZFB8e4PWvqckfbmTOU6b0oLkNn7A-vzLuG9--j8,21544
|
246
245
|
ipex_llm/vllm/xpu/entrypoints/openai/cli_args.py,sha256=2rENA2ucynMaIjiZBEh2ez1o5vR32GaP514t39CD7KM,8676
|
247
|
-
ipex_llm-2.2.
|
248
|
-
ipex_llm-2.2.
|
249
|
-
ipex_llm-2.2.
|
250
|
-
ipex_llm-2.2.
|
251
|
-
ipex_llm-2.2.
|
252
|
-
ipex_llm-2.2.
|
253
|
-
ipex_llm-2.2.
|
254
|
-
ipex_llm-2.2.
|
246
|
+
ipex_llm-2.2.0b20250106.post1.data/scripts/ipex-llm-init.bat,sha256=HPtCYuDYwEatq7dAwOvdfVcHYCpAVdbj75K1qh0vQek,2578
|
247
|
+
ipex_llm-2.2.0b20250106.post1.data/scripts/llm-chat.ps1,sha256=6qrs-hGVAV8IKh7Jx8nq_XrnZcjd7qGU5wndArM7Yag,2769
|
248
|
+
ipex_llm-2.2.0b20250106.post1.data/scripts/llm-cli.ps1,sha256=3qBtTLs_EjYDnM8YyCpJhzLnGCKTEGssu9UNqfkjVXs,3009
|
249
|
+
ipex_llm-2.2.0b20250106.post1.dist-info/METADATA,sha256=I0vPU5mDtPZR3wpY87fYHdn6r14U0T50NWGP7EsF5s8,12825
|
250
|
+
ipex_llm-2.2.0b20250106.post1.dist-info/WHEEL,sha256=6iYPr8vTHsyDK75jr9X0V3I9wPSVmtwr_8fdATBciGk,98
|
251
|
+
ipex_llm-2.2.0b20250106.post1.dist-info/entry_points.txt,sha256=TiUyBB2MRmfF3ko-pyAEzqeBCRnyhu27bNOAsWPp3e8,61
|
252
|
+
ipex_llm-2.2.0b20250106.post1.dist-info/top_level.txt,sha256=CGCMHM-SyqUabU4h8RqJ2KTYckQUO3LvIWwmUQ6Qbzw,9
|
253
|
+
ipex_llm-2.2.0b20250106.post1.dist-info/RECORD,,
|
@@ -1,441 +0,0 @@
|
|
1
|
-
#
|
2
|
-
# Copyright 2016 The BigDL Authors.
|
3
|
-
#
|
4
|
-
# Licensed under the Apache License, Version 2.0 (the "License");
|
5
|
-
# you may not use this file except in compliance with the License.
|
6
|
-
# You may obtain a copy of the License at
|
7
|
-
#
|
8
|
-
# http://www.apache.org/licenses/LICENSE-2.0
|
9
|
-
#
|
10
|
-
# Unless required by applicable law or agreed to in writing, software
|
11
|
-
# distributed under the License is distributed on an "AS IS" BASIS,
|
12
|
-
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
13
|
-
# See the License for the specific language governing permissions and
|
14
|
-
# limitations under the License.
|
15
|
-
#
|
16
|
-
# This file is adapted from
|
17
|
-
# https://github.com/huggingface/transformers/blob/main/src/transformers/models/gptj/modeling_gptj.py
|
18
|
-
#
|
19
|
-
|
20
|
-
import torch
|
21
|
-
from typing import Optional, Tuple, Union
|
22
|
-
from ipex_llm.transformers.models.utils import init_kv_cache, extend_kv_cache, \
|
23
|
-
apply_rotary_pos_emb, append_kv_cache, apply_ipex_rotate_every_two
|
24
|
-
from transformers.utils.import_utils import is_torch_fx_proxy
|
25
|
-
from transformers.modeling_outputs import BaseModelOutputWithPast
|
26
|
-
from transformers.models.gptj.modeling_gptj import GPTJModel
|
27
|
-
from ipex_llm.utils.common import invalidInputError
|
28
|
-
|
29
|
-
import os
|
30
|
-
|
31
|
-
KV_CACHE_ALLOC_BLOCK_LENGTH = int(os.environ.get("KV_CACHE_ALLOC_BLOCK_LENGTH", 256))
|
32
|
-
|
33
|
-
|
34
|
-
def _get_embed_positions(self, position_ids):
|
35
|
-
embed_positions = self.embed_positions
|
36
|
-
if embed_positions.device != position_ids.device:
|
37
|
-
embed_positions = embed_positions.to(position_ids.device)
|
38
|
-
self.embed_positions = embed_positions
|
39
|
-
return embed_positions.repeat(position_ids.shape[0], 1, 1)
|
40
|
-
|
41
|
-
|
42
|
-
def _attn(
|
43
|
-
self,
|
44
|
-
query,
|
45
|
-
key,
|
46
|
-
value,
|
47
|
-
attention_mask=None,
|
48
|
-
head_mask=None,
|
49
|
-
):
|
50
|
-
# compute causal mask from causal mask buffer
|
51
|
-
query_length, key_length = query.size(-2), key.size(-2)
|
52
|
-
causal_mask = self.bias[:, :, key_length - query_length: key_length, :key_length]
|
53
|
-
|
54
|
-
# Keep the attention weights computation in fp32 to avoid overflow issues
|
55
|
-
query = query.to(torch.float32)
|
56
|
-
key = key.to(torch.float32)
|
57
|
-
|
58
|
-
attn_weights = torch.matmul(query, key.transpose(-1, -2))
|
59
|
-
|
60
|
-
mask_value = torch.finfo(attn_weights.dtype).min
|
61
|
-
# Need to be a tensor, otherwise we get error:
|
62
|
-
# `RuntimeError: expected scalar type float but found double`.
|
63
|
-
# Need to be on the same device, otherwise `RuntimeError: ..., x and y to be on the same device`
|
64
|
-
mask_value = torch.tensor(mask_value, dtype=attn_weights.dtype).to(attn_weights.device)
|
65
|
-
attn_weights = torch.where(causal_mask, attn_weights, mask_value)
|
66
|
-
|
67
|
-
attn_weights = attn_weights / self.scale_attn
|
68
|
-
|
69
|
-
if attention_mask is not None:
|
70
|
-
# Apply the attention mask
|
71
|
-
attn_weights = attn_weights + attention_mask
|
72
|
-
|
73
|
-
attn_weights = nn.functional.softmax(attn_weights, dim=-1)
|
74
|
-
attn_weights = attn_weights.to(value.dtype)
|
75
|
-
attn_weights = self.attn_dropout(attn_weights)
|
76
|
-
|
77
|
-
# Mask heads if we want to
|
78
|
-
if head_mask is not None:
|
79
|
-
attn_weights = attn_weights * head_mask
|
80
|
-
|
81
|
-
attn_output = torch.matmul(attn_weights, value)
|
82
|
-
|
83
|
-
return attn_output, attn_weights
|
84
|
-
|
85
|
-
|
86
|
-
def gptj_attention_forward(
|
87
|
-
self,
|
88
|
-
hidden_states: torch.FloatTensor,
|
89
|
-
layer_past: Optional[Tuple[torch.Tensor]] = None,
|
90
|
-
attention_mask: Optional[torch.FloatTensor] = None,
|
91
|
-
position_ids: Optional[torch.LongTensor] = None,
|
92
|
-
head_mask: Optional[torch.FloatTensor] = None,
|
93
|
-
use_cache: Optional[bool] = False,
|
94
|
-
rotary_emb: Optional[Tuple]=None,
|
95
|
-
output_attentions: Optional[bool] = False,
|
96
|
-
) -> Union[
|
97
|
-
Tuple[torch.Tensor, Tuple[torch.Tensor]],
|
98
|
-
Optional[Tuple[torch.Tensor, Tuple[torch.Tensor], Tuple[torch.Tensor, ...]]],
|
99
|
-
]:
|
100
|
-
query = self.q_proj(hidden_states)
|
101
|
-
key = self.k_proj(hidden_states)
|
102
|
-
value = self.v_proj(hidden_states)
|
103
|
-
|
104
|
-
query = self._split_heads(query, self.num_attention_heads, self.head_dim, True)
|
105
|
-
key = self._split_heads(key, self.num_attention_heads, self.head_dim, True)
|
106
|
-
value = self._split_heads(value, self.num_attention_heads, self.head_dim, False)
|
107
|
-
|
108
|
-
sin, cos = rotary_emb
|
109
|
-
use_fuse_rope = hidden_states.device.type == "xpu" and not self.training
|
110
|
-
|
111
|
-
if self.rotary_dim is not None:
|
112
|
-
k_rot = key[:, :, :, : self.rotary_dim]
|
113
|
-
q_rot = query[:, :, :, : self.rotary_dim]
|
114
|
-
|
115
|
-
if use_fuse_rope:
|
116
|
-
apply_ipex_rotate_every_two(q_rot, k_rot, cos, sin)
|
117
|
-
else:
|
118
|
-
k_pass = key[:, :, :, self.rotary_dim:]
|
119
|
-
q_pass = query[:, :, :, self.rotary_dim:]
|
120
|
-
q_rot, k_rot = apply_rotary_pos_emb(q_rot, k_rot, cos, sin, position_ids, "gptj")
|
121
|
-
key = torch.cat([k_rot, k_pass], dim=-1)
|
122
|
-
query = torch.cat([q_rot, q_pass], dim=-1)
|
123
|
-
else:
|
124
|
-
if use_fuse_rope:
|
125
|
-
apply_ipex_rotate_every_two(query, key, cos, sin)
|
126
|
-
else:
|
127
|
-
query, key = apply_rotary_pos_emb(query, key, cos, sin, position_ids, "gptj")
|
128
|
-
|
129
|
-
batch_size, q_len, _ = hidden_states.size()
|
130
|
-
|
131
|
-
key = key.permute(0, 2, 1, 3).contiguous()
|
132
|
-
query = query.permute(0, 2, 1, 3).contiguous()
|
133
|
-
|
134
|
-
kv_seq_len = key.size(-2)
|
135
|
-
device = hidden_states.device
|
136
|
-
|
137
|
-
if layer_past is not None:
|
138
|
-
kv_seq_len += layer_past[0].size(2)
|
139
|
-
|
140
|
-
if layer_past is not None:
|
141
|
-
cache_k = layer_past[0]
|
142
|
-
cache_v = layer_past[1]
|
143
|
-
past_length = cache_k.size(2)
|
144
|
-
if cache_k.stride()[1] < kv_seq_len * cache_k.size(3):
|
145
|
-
new_cache_k, new_cache_v = extend_kv_cache(batch_size,
|
146
|
-
self.num_attention_heads,
|
147
|
-
self.head_dim,
|
148
|
-
past_length,
|
149
|
-
kv_seq_len + KV_CACHE_ALLOC_BLOCK_LENGTH,
|
150
|
-
dtype=cache_v.dtype,
|
151
|
-
device=device)
|
152
|
-
new_cache_k[:] = cache_k
|
153
|
-
new_cache_v[:] = cache_v
|
154
|
-
cache_k = new_cache_k
|
155
|
-
cache_v = new_cache_v
|
156
|
-
key, value = append_kv_cache(cache_k, cache_v, key, value)
|
157
|
-
|
158
|
-
elif use_cache:
|
159
|
-
key_cache, value_cache = init_kv_cache(batch_size,
|
160
|
-
self.num_attention_heads,
|
161
|
-
self.head_dim,
|
162
|
-
kv_seq_len,
|
163
|
-
kv_seq_len + KV_CACHE_ALLOC_BLOCK_LENGTH,
|
164
|
-
dtype=value.dtype,
|
165
|
-
device=device)
|
166
|
-
key_cache[:] = key
|
167
|
-
value_cache[:] = value
|
168
|
-
key = key_cache
|
169
|
-
value = value_cache
|
170
|
-
|
171
|
-
if use_cache is True:
|
172
|
-
present = (key, value)
|
173
|
-
else:
|
174
|
-
present = None
|
175
|
-
|
176
|
-
# compute self-attention: V x Softmax(QK^T)
|
177
|
-
attn_output, attn_weights = self._attn(query, key, value, attention_mask, head_mask)
|
178
|
-
|
179
|
-
attn_output = self._merge_heads(attn_output, self.num_attention_heads, self.head_dim)
|
180
|
-
attn_output = self.out_proj(attn_output)
|
181
|
-
attn_output = self.resid_dropout(attn_output)
|
182
|
-
|
183
|
-
outputs = (attn_output, present)
|
184
|
-
if output_attentions:
|
185
|
-
outputs += (attn_weights,)
|
186
|
-
|
187
|
-
return outputs # a, present, (attentions)
|
188
|
-
|
189
|
-
|
190
|
-
def gptj_block_forward(
|
191
|
-
self,
|
192
|
-
hidden_states: Optional[torch.FloatTensor],
|
193
|
-
layer_past: Optional[Tuple[torch.Tensor]] = None,
|
194
|
-
attention_mask: Optional[torch.FloatTensor] = None,
|
195
|
-
position_ids: Optional[torch.LongTensor] = None,
|
196
|
-
head_mask: Optional[torch.FloatTensor] = None,
|
197
|
-
use_cache: Optional[bool] = False,
|
198
|
-
rotary_emb: Optional[Tuple]=None,
|
199
|
-
output_attentions: Optional[bool] = False,
|
200
|
-
) -> Union[Tuple[torch.Tensor], Optional[Tuple[torch.Tensor, Tuple[torch.FloatTensor, ...]]]]:
|
201
|
-
residual = hidden_states
|
202
|
-
hidden_states = self.ln_1(hidden_states)
|
203
|
-
attn_outputs = self.attn(
|
204
|
-
hidden_states=hidden_states,
|
205
|
-
layer_past=layer_past,
|
206
|
-
attention_mask=attention_mask,
|
207
|
-
position_ids=position_ids,
|
208
|
-
head_mask=head_mask,
|
209
|
-
use_cache=use_cache,
|
210
|
-
rotary_emb=rotary_emb,
|
211
|
-
output_attentions=output_attentions,
|
212
|
-
)
|
213
|
-
attn_output = attn_outputs[0] # output_attn: a, present, (attentions)
|
214
|
-
outputs = attn_outputs[1:]
|
215
|
-
|
216
|
-
feed_forward_hidden_states = self.mlp(hidden_states)
|
217
|
-
hidden_states = attn_output + feed_forward_hidden_states + residual
|
218
|
-
|
219
|
-
if use_cache:
|
220
|
-
outputs = (hidden_states,) + outputs
|
221
|
-
else:
|
222
|
-
outputs = (hidden_states,) + outputs[1:]
|
223
|
-
|
224
|
-
return outputs # hidden_states, present, (attentions)
|
225
|
-
|
226
|
-
|
227
|
-
def create_sinusoidal_positions(num_pos: int, dim: int) -> torch.Tensor:
|
228
|
-
inv_freq = 1.0 / (10000 ** (torch.arange(0, dim, 2) / dim))
|
229
|
-
sinusoid_inp = torch.einsum("i , j -> i j",
|
230
|
-
torch.arange(num_pos, dtype=torch.float), inv_freq).float()
|
231
|
-
return torch.cat((torch.sin(sinusoid_inp), torch.cos(sinusoid_inp)), dim=1)
|
232
|
-
|
233
|
-
|
234
|
-
old_init = GPTJModel.__init__
|
235
|
-
|
236
|
-
|
237
|
-
def gptj_model_new_init(self, config):
|
238
|
-
old_init(self, config)
|
239
|
-
embed_dim = config.hidden_size
|
240
|
-
rotary_dim = config.rotary_dim
|
241
|
-
pos_embd_dim = rotary_dim or embed_dim
|
242
|
-
max_positions = config.max_position_embeddings
|
243
|
-
self.embed_positions = create_sinusoidal_positions(max_positions, pos_embd_dim)
|
244
|
-
|
245
|
-
|
246
|
-
def get_new_embed_positions(position_ids, prev_embed_positions):
|
247
|
-
embed_positions = prev_embed_positions
|
248
|
-
if embed_positions.device != position_ids.device:
|
249
|
-
embed_positions = embed_positions.to(position_ids.device)
|
250
|
-
prev_embed_positions = embed_positions
|
251
|
-
return embed_positions.repeat(position_ids.shape[0], 1, 1), prev_embed_positions
|
252
|
-
|
253
|
-
|
254
|
-
def gptj_model_forward(
|
255
|
-
self,
|
256
|
-
input_ids: Optional[torch.LongTensor] = None,
|
257
|
-
past_key_values: Optional[Tuple[Tuple[torch.Tensor]]] = None,
|
258
|
-
attention_mask: Optional[torch.FloatTensor] = None,
|
259
|
-
token_type_ids: Optional[torch.LongTensor] = None,
|
260
|
-
position_ids: Optional[torch.LongTensor] = None,
|
261
|
-
head_mask: Optional[torch.FloatTensor] = None,
|
262
|
-
inputs_embeds: Optional[torch.FloatTensor] = None,
|
263
|
-
use_cache: Optional[bool] = None,
|
264
|
-
output_attentions: Optional[bool] = None,
|
265
|
-
output_hidden_states: Optional[bool] = None,
|
266
|
-
return_dict: Optional[bool] = None,
|
267
|
-
) -> Union[Tuple, BaseModelOutputWithPast]:
|
268
|
-
output_attentions = output_attentions if output_attentions is not None \
|
269
|
-
else self.config.output_attentions
|
270
|
-
output_hidden_states = (
|
271
|
-
output_hidden_states if output_hidden_states is not None
|
272
|
-
else self.config.output_hidden_states
|
273
|
-
)
|
274
|
-
use_cache = use_cache if use_cache is not None else self.config.use_cache
|
275
|
-
return_dict = return_dict if return_dict is not None else self.config.use_return_dict
|
276
|
-
|
277
|
-
if input_ids is not None and inputs_embeds is not None:
|
278
|
-
invalidInputError(False,
|
279
|
-
"You cannot specify both input_ids and inputs_embeds at the same time")
|
280
|
-
elif input_ids is not None:
|
281
|
-
self.warn_if_padding_and_no_attention_mask(input_ids, attention_mask)
|
282
|
-
input_shape = input_ids.size()
|
283
|
-
input_ids = input_ids.view(-1, input_shape[-1])
|
284
|
-
batch_size = input_ids.shape[0]
|
285
|
-
elif inputs_embeds is not None:
|
286
|
-
input_shape = inputs_embeds.size()[:-1]
|
287
|
-
batch_size = inputs_embeds.shape[0]
|
288
|
-
else:
|
289
|
-
invalidInputError(False, "You have to specify either input_ids or inputs_embeds")
|
290
|
-
|
291
|
-
device = input_ids.device if input_ids is not None else inputs_embeds.device
|
292
|
-
|
293
|
-
if token_type_ids is not None:
|
294
|
-
token_type_ids = token_type_ids.view(-1, input_shape[-1])
|
295
|
-
|
296
|
-
if past_key_values is None:
|
297
|
-
past_length = 0
|
298
|
-
past_key_values = tuple([None] * len(self.h))
|
299
|
-
else:
|
300
|
-
past_length = past_key_values[0][0].size(-2)
|
301
|
-
|
302
|
-
if position_ids is None:
|
303
|
-
position_ids = torch.arange(past_length, input_shape[-1] + past_length,
|
304
|
-
dtype=torch.long, device=device)
|
305
|
-
position_ids = position_ids.unsqueeze(0)
|
306
|
-
|
307
|
-
# Attention mask.
|
308
|
-
if attention_mask is not None:
|
309
|
-
if batch_size <= 0:
|
310
|
-
invalidInputError(False, "batch_size has to be defined and > 0")
|
311
|
-
attention_mask = attention_mask.view(batch_size, -1)
|
312
|
-
# We create a 3D attention mask from a 2D tensor mask.
|
313
|
-
# Sizes are [batch_size, 1, 1, to_seq_length]
|
314
|
-
# So we can broadcast to [batch_size, num_heads, from_seq_length, to_seq_length]
|
315
|
-
# this attention mask is more simple than the triangular masking of causal attention
|
316
|
-
# used in OpenAI GPT, we just need to prepare the broadcast dimension here.
|
317
|
-
attention_mask = attention_mask[:, None, None, :]
|
318
|
-
|
319
|
-
# Since attention_mask is 1.0 for positions we want to attend and 0.0 for
|
320
|
-
# masked positions, this operation will create a tensor which is 0.0 for
|
321
|
-
# positions we want to attend and the dtype's smallest value for masked positions.
|
322
|
-
# Since we are adding it to the raw scores before the softmax, this is
|
323
|
-
# effectively the same as removing these entirely.
|
324
|
-
attention_mask = attention_mask.to(dtype=self.dtype) # fp16 compatibility
|
325
|
-
attention_mask = (1.0 - attention_mask) * torch.finfo(self.dtype).min
|
326
|
-
|
327
|
-
# Prepare head mask if needed
|
328
|
-
# 1.0 in head_mask indicate we keep the head
|
329
|
-
# attention_probs has shape bsz x num_attention_heads x N x N
|
330
|
-
# head_mask has shape n_layer x batch x num_attention_heads x N x N
|
331
|
-
head_mask = self.get_head_mask(head_mask, self.config.n_layer)
|
332
|
-
|
333
|
-
if inputs_embeds is None:
|
334
|
-
inputs_embeds = self.wte(input_ids)
|
335
|
-
|
336
|
-
hidden_states = inputs_embeds
|
337
|
-
|
338
|
-
if token_type_ids is not None:
|
339
|
-
token_type_embeds = self.wte(token_type_ids)
|
340
|
-
hidden_states = hidden_states + token_type_embeds
|
341
|
-
|
342
|
-
hidden_states = self.drop(hidden_states)
|
343
|
-
|
344
|
-
output_shape = (-1,) + input_shape[1:] + (hidden_states.size(-1),)
|
345
|
-
|
346
|
-
if self.gradient_checkpointing and self.training:
|
347
|
-
if use_cache:
|
348
|
-
logger.warning_once(
|
349
|
-
"`use_cache=True` is incompatible with gradient checkpointing."
|
350
|
-
"Setting `use_cache=False`..."
|
351
|
-
)
|
352
|
-
use_cache = False
|
353
|
-
|
354
|
-
presents = () if use_cache else None
|
355
|
-
all_self_attentions = () if output_attentions else None
|
356
|
-
all_hidden_states = () if output_hidden_states else None
|
357
|
-
|
358
|
-
# Repeat cos sin here, call only once for each token.
|
359
|
-
# If put this to attension forward, it will generate too many times.
|
360
|
-
if is_torch_fx_proxy(position_ids) or torch.jit.is_tracing():
|
361
|
-
# The logic to conditionally copy to GPU could not be traced, so we do this
|
362
|
-
# every time in the torch.fx case
|
363
|
-
embed_positions = get_embed_positions(self.embed_positions, position_ids)
|
364
|
-
else:
|
365
|
-
embed_positions, self.embed_positions = get_new_embed_positions(position_ids,
|
366
|
-
self.embed_positions)
|
367
|
-
|
368
|
-
repeated_position_ids = position_ids.unsqueeze(-1).repeat(1, 1, embed_positions.shape[-1])
|
369
|
-
sincos = torch.gather(embed_positions, 1, repeated_position_ids)
|
370
|
-
sin, cos = torch.split(sincos, sincos.shape[-1] // 2, dim=-1)
|
371
|
-
sin = torch.repeat_interleave(sin[:, :, None, :], 2, 3)
|
372
|
-
cos = torch.repeat_interleave(cos[:, :, None, :], 2, 3)
|
373
|
-
|
374
|
-
for i, (block, layer_past) in enumerate(zip(self.h, past_key_values)):
|
375
|
-
# Model parallel
|
376
|
-
if self.model_parallel:
|
377
|
-
torch.cuda.set_device(hidden_states.device)
|
378
|
-
# Ensure layer_past is on same device as hidden_states (might not be correct)
|
379
|
-
if layer_past is not None:
|
380
|
-
layer_past = tuple(past_state.to(hidden_states.device) for past_state in layer_past)
|
381
|
-
# Ensure that attention_mask is always on the same device as hidden_states
|
382
|
-
if attention_mask is not None:
|
383
|
-
attention_mask = attention_mask.to(hidden_states.device)
|
384
|
-
if isinstance(head_mask, torch.Tensor):
|
385
|
-
head_mask = head_mask.to(hidden_states.device)
|
386
|
-
if output_hidden_states:
|
387
|
-
all_hidden_states = all_hidden_states + (hidden_states,)
|
388
|
-
|
389
|
-
if self.gradient_checkpointing and self.training:
|
390
|
-
outputs = self._gradient_checkpointing_func(
|
391
|
-
block.__call__,
|
392
|
-
hidden_states,
|
393
|
-
None,
|
394
|
-
attention_mask,
|
395
|
-
position_ids,
|
396
|
-
head_mask[i],
|
397
|
-
use_cache,
|
398
|
-
output_attentions,
|
399
|
-
)
|
400
|
-
else:
|
401
|
-
outputs = block(
|
402
|
-
hidden_states=hidden_states,
|
403
|
-
layer_past=layer_past,
|
404
|
-
attention_mask=attention_mask,
|
405
|
-
position_ids=position_ids,
|
406
|
-
head_mask=head_mask[i],
|
407
|
-
use_cache=use_cache,
|
408
|
-
rotary_emb=(sin, cos),
|
409
|
-
output_attentions=output_attentions,
|
410
|
-
)
|
411
|
-
|
412
|
-
hidden_states = outputs[0]
|
413
|
-
if use_cache is True:
|
414
|
-
presents = presents + (outputs[1],)
|
415
|
-
|
416
|
-
if output_attentions:
|
417
|
-
all_self_attentions = all_self_attentions + (outputs[2 if use_cache else 1],)
|
418
|
-
|
419
|
-
# Model Parallel: If it's the last layer for that device, put things on the next device
|
420
|
-
if self.model_parallel:
|
421
|
-
for k, v in self.device_map.items():
|
422
|
-
if i == v[-1] and "cuda:" + str(k) != self.last_device:
|
423
|
-
hidden_states = hidden_states.to("cuda:" + str(k + 1))
|
424
|
-
|
425
|
-
hidden_states = self.ln_f(hidden_states)
|
426
|
-
|
427
|
-
hidden_states = hidden_states.view(output_shape)
|
428
|
-
# Add last hidden state
|
429
|
-
if output_hidden_states:
|
430
|
-
all_hidden_states = all_hidden_states + (hidden_states,)
|
431
|
-
|
432
|
-
if not return_dict:
|
433
|
-
return tuple(v for v in [hidden_states, presents, all_hidden_states, all_self_attentions]
|
434
|
-
if v is not None)
|
435
|
-
|
436
|
-
return BaseModelOutputWithPast(
|
437
|
-
last_hidden_state=hidden_states,
|
438
|
-
past_key_values=presents,
|
439
|
-
hidden_states=all_hidden_states,
|
440
|
-
attentions=all_self_attentions,
|
441
|
-
)
|
{ipex_llm-2.2.0b20250105.post0.data → ipex_llm-2.2.0b20250106.post1.data}/scripts/ipex-llm-init.bat
RENAMED
File without changes
|
{ipex_llm-2.2.0b20250105.post0.data → ipex_llm-2.2.0b20250106.post1.data}/scripts/llm-chat.ps1
RENAMED
File without changes
|
{ipex_llm-2.2.0b20250105.post0.data → ipex_llm-2.2.0b20250106.post1.data}/scripts/llm-cli.ps1
RENAMED
File without changes
|
File without changes
|
{ipex_llm-2.2.0b20250105.post0.dist-info → ipex_llm-2.2.0b20250106.post1.dist-info}/entry_points.txt
RENAMED
File without changes
|
{ipex_llm-2.2.0b20250105.post0.dist-info → ipex_llm-2.2.0b20250106.post1.dist-info}/top_level.txt
RENAMED
File without changes
|