ipex-llm 2.2.0b20250204__py3-none-win_amd64.whl → 2.2.0b20250204.post0__py3-none-win_amd64.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- ipex_llm/libs/bloom-api.dll +0 -0
- ipex_llm/libs/bloom.dll +0 -0
- ipex_llm/libs/gptneox-api.dll +0 -0
- ipex_llm/libs/gptneox.dll +0 -0
- ipex_llm/libs/libbloom_avx.dll +0 -0
- ipex_llm/libs/libbloom_vnni.dll +0 -0
- ipex_llm/libs/libgptneox_avx.dll +0 -0
- ipex_llm/libs/libgptneox_vnni.dll +0 -0
- ipex_llm/libs/libllama_avx.dll +0 -0
- ipex_llm/libs/libllama_vnni.dll +0 -0
- ipex_llm/libs/libstarcoder_avx.dll +0 -0
- ipex_llm/libs/libstarcoder_vnni.dll +0 -0
- ipex_llm/libs/llama-api.dll +0 -0
- ipex_llm/libs/llama.dll +0 -0
- ipex_llm/libs/main-bloom.exe +0 -0
- ipex_llm/libs/main-gptneox.exe +0 -0
- ipex_llm/libs/main-llama.exe +0 -0
- ipex_llm/libs/main-starcoder.exe +0 -0
- ipex_llm/libs/pipeline.dll +0 -0
- ipex_llm/libs/quantize-bloom.exe +0 -0
- ipex_llm/libs/quantize-bloom_vnni.exe +0 -0
- ipex_llm/libs/quantize-gptneox.exe +0 -0
- ipex_llm/libs/quantize-gptneox_vnni.exe +0 -0
- ipex_llm/libs/quantize-llama.exe +0 -0
- ipex_llm/libs/quantize-llama_vnni.exe +0 -0
- ipex_llm/libs/quantize-starcoder.exe +0 -0
- ipex_llm/libs/quantize-starcoder_vnni.exe +0 -0
- ipex_llm/libs/starcoder-api.dll +0 -0
- ipex_llm/libs/starcoder.dll +0 -0
- ipex_llm/transformers/low_bit_linear.py +33 -70
- ipex_llm/transformers/models/utils.py +0 -13
- ipex_llm/transformers/npu_model.py +17 -4
- ipex_llm/transformers/npu_models/convert.py +6 -2
- ipex_llm/transformers/npu_pipeline_model/convert_pipeline.py +27 -12
- ipex_llm/transformers/npu_pipeline_model/llama.py +24 -11
- ipex_llm/transformers/npu_pipeline_model/minicpm.py +19 -10
- ipex_llm/transformers/npu_pipeline_model/qwen.py +17 -8
- ipex_llm/transformers/qlora.py +2 -2
- ipex_llm/transformers/utils.py +19 -6
- ipex_llm/transformers/xpu_customize_fwd.py +6 -4
- ipex_llm/transformers/xpu_ops.py +4 -3
- {ipex_llm-2.2.0b20250204.dist-info → ipex_llm-2.2.0b20250204.post0.dist-info}/METADATA +23 -30
- {ipex_llm-2.2.0b20250204.dist-info → ipex_llm-2.2.0b20250204.post0.dist-info}/RECORD +49 -49
- {ipex_llm-2.2.0b20250204.data → ipex_llm-2.2.0b20250204.post0.data}/scripts/ipex-llm-init.bat +0 -0
- {ipex_llm-2.2.0b20250204.data → ipex_llm-2.2.0b20250204.post0.data}/scripts/llm-chat.ps1 +0 -0
- {ipex_llm-2.2.0b20250204.data → ipex_llm-2.2.0b20250204.post0.data}/scripts/llm-cli.ps1 +0 -0
- {ipex_llm-2.2.0b20250204.dist-info → ipex_llm-2.2.0b20250204.post0.dist-info}/WHEEL +0 -0
- {ipex_llm-2.2.0b20250204.dist-info → ipex_llm-2.2.0b20250204.post0.dist-info}/entry_points.txt +0 -0
- {ipex_llm-2.2.0b20250204.dist-info → ipex_llm-2.2.0b20250204.post0.dist-info}/top_level.txt +0 -0
ipex_llm/libs/bloom-api.dll
CHANGED
Binary file
|
ipex_llm/libs/bloom.dll
CHANGED
Binary file
|
ipex_llm/libs/gptneox-api.dll
CHANGED
Binary file
|
ipex_llm/libs/gptneox.dll
CHANGED
Binary file
|
ipex_llm/libs/libbloom_avx.dll
CHANGED
Binary file
|
ipex_llm/libs/libbloom_vnni.dll
CHANGED
Binary file
|
ipex_llm/libs/libgptneox_avx.dll
CHANGED
Binary file
|
Binary file
|
ipex_llm/libs/libllama_avx.dll
CHANGED
Binary file
|
ipex_llm/libs/libllama_vnni.dll
CHANGED
Binary file
|
Binary file
|
Binary file
|
ipex_llm/libs/llama-api.dll
CHANGED
Binary file
|
ipex_llm/libs/llama.dll
CHANGED
Binary file
|
ipex_llm/libs/main-bloom.exe
CHANGED
Binary file
|
ipex_llm/libs/main-gptneox.exe
CHANGED
Binary file
|
ipex_llm/libs/main-llama.exe
CHANGED
Binary file
|
ipex_llm/libs/main-starcoder.exe
CHANGED
Binary file
|
ipex_llm/libs/pipeline.dll
CHANGED
Binary file
|
ipex_llm/libs/quantize-bloom.exe
CHANGED
Binary file
|
Binary file
|
Binary file
|
Binary file
|
ipex_llm/libs/quantize-llama.exe
CHANGED
Binary file
|
Binary file
|
Binary file
|
Binary file
|
ipex_llm/libs/starcoder-api.dll
CHANGED
Binary file
|
ipex_llm/libs/starcoder.dll
CHANGED
Binary file
|
@@ -51,7 +51,8 @@ from torch import Tensor, dtype, nn
|
|
51
51
|
from operator import mul
|
52
52
|
from functools import reduce
|
53
53
|
from ipex_llm.transformers.xpu_customize_fwd import custom_fwd, custom_bwd
|
54
|
-
from ipex_llm.transformers.utils import
|
54
|
+
from ipex_llm.transformers.utils import is_autocast_enabled, get_autocast_dtype
|
55
|
+
from ipex_llm.transformers.utils import get_xpu_device_name
|
55
56
|
from ipex_llm.transformers.convert import is_deepspeed_available, get_use_vllm
|
56
57
|
|
57
58
|
T = TypeVar("T", bound="torch.nn.Module")
|
@@ -500,16 +501,16 @@ class MatMulLowBit(torch.autograd.Function):
|
|
500
501
|
|
501
502
|
@staticmethod
|
502
503
|
@custom_fwd
|
503
|
-
def forward(ctx, A, weight,
|
504
|
+
def forward(ctx, A, weight, output_size):
|
504
505
|
ctx.is_empty = False
|
505
506
|
import xe_linear
|
506
507
|
if weight.qtype == NF4:
|
507
508
|
result = xe_linear.forward_new(A,
|
508
509
|
weight.data.view(torch.uint8),
|
509
510
|
weight.qtype,
|
510
|
-
|
511
|
+
output_size)
|
511
512
|
else:
|
512
|
-
result = xe_linear.forward_new(A, weight.data, weight.qtype,
|
513
|
+
result = xe_linear.forward_new(A, weight.data, weight.qtype, output_size)
|
513
514
|
if any(ctx.needs_input_grad[:2]):
|
514
515
|
ctx.tensors = (A, weight)
|
515
516
|
else:
|
@@ -527,8 +528,8 @@ class MatMulLowBit(torch.autograd.Function):
|
|
527
528
|
A, weight = ctx.tensors
|
528
529
|
grad_A, grad_weight = None, None
|
529
530
|
if req_gradA:
|
530
|
-
if
|
531
|
-
grad_output = grad_output.to(
|
531
|
+
if is_autocast_enabled("xpu"):
|
532
|
+
grad_output = grad_output.to(get_autocast_dtype("xpu"))
|
532
533
|
if weight.qtype == NF4:
|
533
534
|
dequant_weight = xe_linear.dequant(A,
|
534
535
|
weight.data.view(torch.uint8),
|
@@ -615,7 +616,7 @@ class LowBitLinear(nn.Linear):
|
|
615
616
|
is_training = self.training and not torch.is_inference_mode_enabled()
|
616
617
|
if is_training:
|
617
618
|
# below logic is only for training
|
618
|
-
autocast_dtype = get_autocast_dtype(x)
|
619
|
+
autocast_dtype = get_autocast_dtype(x.device.type)
|
619
620
|
if self.compute_dtype is not None and x.device.type == "xpu":
|
620
621
|
x = x.to(self.compute_dtype) # solve GC issue for unlora module
|
621
622
|
elif autocast_dtype is not None:
|
@@ -627,89 +628,50 @@ class LowBitLinear(nn.Linear):
|
|
627
628
|
if self.optimize_lm_head:
|
628
629
|
x = reshape_lm_head_input(x)
|
629
630
|
|
630
|
-
# [batch,
|
631
|
-
|
632
|
-
|
633
|
-
# Output shape, e.g., [batch, input_num, out_len]
|
634
|
-
new_shape = x_shape[:-1] + (self.out_len,)
|
631
|
+
# [batch, seq_len, in_len] -> [batch, seq_len, out_len]
|
632
|
+
new_shape = x.shape[:-1] + (self.out_len,)
|
633
|
+
|
635
634
|
# Activation is empty tensor, e.g., [1, 0, 4096]
|
636
|
-
if 0 in
|
635
|
+
if 0 in x.shape:
|
637
636
|
# return empty tensor with output shape, x.dtype and x.device
|
638
637
|
return torch.empty(new_shape, dtype=x.dtype, device=x.device)
|
639
638
|
|
640
|
-
x_2d = x.contiguous().view(-1, x_shape[-1])
|
641
|
-
|
642
639
|
if self.act_order:
|
643
|
-
|
644
|
-
# x0 for weight
|
645
|
-
x0 = self.weight.data
|
646
|
-
|
647
|
-
if x0.device.type == "xpu":
|
648
|
-
# GPU logic
|
649
|
-
try:
|
650
|
-
import xe_linear
|
651
|
-
from ipex_llm.transformers.models.utils import use_xmx
|
652
|
-
except ModuleNotFoundError:
|
653
|
-
invalidInputError(False,
|
654
|
-
"Please `pip install bigdl_core_xe` first.")
|
640
|
+
x = x[..., self.g_idx_map]
|
655
641
|
|
656
|
-
|
657
|
-
x_2d = x_2d.contiguous()
|
642
|
+
x_2d = x.contiguous().view(-1, x.shape[-1])
|
658
643
|
|
659
|
-
|
660
|
-
|
661
|
-
|
662
|
-
input_seq_size = 1
|
663
|
-
|
664
|
-
if is_training:
|
665
|
-
# training path
|
666
|
-
if x_2d.requires_grad:
|
667
|
-
result = MatMulLowBit.apply(x_2d, self.weight, input_seq_size)
|
668
|
-
else:
|
669
|
-
if self.weight.qtype == NF4:
|
670
|
-
result = xe_linear.forward_new(x_2d,
|
671
|
-
self.weight.data.view(torch.uint8),
|
672
|
-
self.weight.qtype,
|
673
|
-
input_seq_size)
|
674
|
-
else:
|
675
|
-
result = xe_linear.forward_new(x_2d,
|
676
|
-
self.weight.data,
|
677
|
-
self.weight.qtype,
|
678
|
-
input_seq_size)
|
644
|
+
if self.weight.device.type == "xpu":
|
645
|
+
if is_training and x_2d.requires_grad:
|
646
|
+
result = MatMulLowBit.apply(x_2d, self.weight, self.out_len)
|
679
647
|
else:
|
680
|
-
# inference path
|
681
|
-
# current workaround to reduce first token latency of fp32 input
|
682
|
-
# sometimes fp16 cause nan and training instability
|
683
|
-
# disable the conversion when training
|
684
|
-
# TODO: may modify the input length condition for empty cache.
|
685
648
|
do_empty_cache = self.low_memory_mode and x_2d.shape[0] >= 1024
|
686
649
|
if do_empty_cache:
|
687
650
|
torch.xpu.empty_cache()
|
688
651
|
|
652
|
+
if self.qtype == NF4:
|
653
|
+
w = self.weight.data.view(torch.uint8)
|
654
|
+
else:
|
655
|
+
w = self.weight.data
|
656
|
+
|
689
657
|
if use_batch_forward(x_2d, self.weight.qtype, self.out_len):
|
690
658
|
import xe_batch
|
691
|
-
result = xe_batch.batch_forward(x_2d,
|
692
|
-
elif
|
693
|
-
|
694
|
-
|
695
|
-
and x_2d.dtype == torch.float32
|
696
|
-
and not use_xmx(x_2d, self.weight.qtype)
|
697
|
-
):
|
659
|
+
result = xe_batch.batch_forward(x_2d, w, self.qtype)
|
660
|
+
elif not is_training and self.conver_to_half \
|
661
|
+
and x_2d.shape[0] > 1 and x_2d.dtype == torch.float:
|
662
|
+
import xe_linear
|
698
663
|
x_2d = x_2d.half()
|
699
|
-
result = xe_linear.forward_new(x_2d, self.
|
700
|
-
self.weight.qtype, input_seq_size)
|
664
|
+
result = xe_linear.forward_new(x_2d, w, self.qtype, self.out_len)
|
701
665
|
result = result.to(x.dtype)
|
702
666
|
else:
|
703
|
-
|
704
|
-
|
705
|
-
self.weight.qtype, input_seq_size)
|
706
|
-
else:
|
707
|
-
result = xe_linear.forward_new(x_2d, self.weight.data,
|
708
|
-
self.weight.qtype, input_seq_size)
|
667
|
+
import xe_linear
|
668
|
+
result = xe_linear.forward_new(x_2d, w, self.qtype, self.out_len)
|
709
669
|
|
710
670
|
if do_empty_cache:
|
711
671
|
torch.xpu.empty_cache()
|
672
|
+
|
712
673
|
result = result.view(new_shape)
|
674
|
+
|
713
675
|
if self.mp_group is not None:
|
714
676
|
if get_use_vllm():
|
715
677
|
result = self.mp_group.all_reduce(result)
|
@@ -718,6 +680,7 @@ class LowBitLinear(nn.Linear):
|
|
718
680
|
dist.inference_all_reduce(result, group=self.mp_group)
|
719
681
|
else:
|
720
682
|
invalidInputError(False, "mp_group is not None, but no supported backend found")
|
683
|
+
|
721
684
|
if self.bias is not None:
|
722
685
|
result += self.bias
|
723
686
|
else:
|
@@ -731,7 +694,7 @@ class LowBitLinear(nn.Linear):
|
|
731
694
|
result = MatMulLowBitCPU.apply(x, self.weight)
|
732
695
|
else:
|
733
696
|
from ipex_llm.utils.isa_checker import is_server, is_spr
|
734
|
-
|
697
|
+
x0 = self.weight.data
|
735
698
|
# convert if necessary, and compute a linear result
|
736
699
|
if is_server() and (not is_spr()) and \
|
737
700
|
self.qtype == SYM_INT4 and x_2d.shape[0] >= TORCH_LINEAR_THRESHOLD:
|
@@ -259,19 +259,6 @@ def mlp_fusion_check(x, qtype, training):
|
|
259
259
|
return True
|
260
260
|
|
261
261
|
|
262
|
-
def use_xmx(x: torch.Tensor, qtype: int):
|
263
|
-
device = get_xpu_device_name(x.device)
|
264
|
-
return (
|
265
|
-
device in ["arc", "pvc"]
|
266
|
-
and qtype in [SYM_INT4, SYM_INT8, FP8E4, FP8E5, WOQ_INT4]
|
267
|
-
and (
|
268
|
-
(device == "pvc" and 1 < x.size(0) <= 16)
|
269
|
-
or
|
270
|
-
(device != "pvc" and 1 < x.size(0) <= 64)
|
271
|
-
)
|
272
|
-
)
|
273
|
-
|
274
|
-
|
275
262
|
def repeat_kv(hidden_states: torch.Tensor, n_rep: int) -> torch.Tensor:
|
276
263
|
batch, num_key_value_heads, slen, head_dim = hidden_states.shape
|
277
264
|
if n_rep == 1:
|
@@ -139,8 +139,10 @@ class _BaseAutoModelClass:
|
|
139
139
|
mock_device = kwargs.pop('device', None) # For mock on CPU
|
140
140
|
convert_model = kwargs.pop('convert_model', False)
|
141
141
|
save_directory = kwargs.pop('save_directory', None)
|
142
|
-
fuse_layers = kwargs.pop(
|
143
|
-
imatrix_file = kwargs.pop(
|
142
|
+
fuse_layers = kwargs.pop("fuse_layers", None)
|
143
|
+
imatrix_file = kwargs.pop("imatrix_file", None)
|
144
|
+
keep_ir = kwargs.pop("keep_ir", False)
|
145
|
+
compile_blob = kwargs.pop("compile_blob", True)
|
144
146
|
|
145
147
|
if imatrix_file is not None:
|
146
148
|
imatrix_data = load_imatrix_data(imatrix_file)
|
@@ -236,6 +238,8 @@ class _BaseAutoModelClass:
|
|
236
238
|
"fuse_layers": fuse_layers,
|
237
239
|
"imatrix_data": imatrix_data,
|
238
240
|
"skip_npu_logic": mock_device == "dummy",
|
241
|
+
"keep_ir": keep_ir,
|
242
|
+
"compile_blob": compile_blob,
|
239
243
|
}
|
240
244
|
# Dummy will skip npu related logic and save the quantized model
|
241
245
|
if mock_device == "dummy":
|
@@ -280,9 +284,14 @@ class _BaseAutoModelClass:
|
|
280
284
|
fuse_layers = kwargs.pop('fuse_layers', None)
|
281
285
|
imatrix_data = kwargs.pop('imatrix_data', None)
|
282
286
|
skip_npu_logic = kwargs.pop("skip_npu_logic", False)
|
287
|
+
keep_ir = kwargs.pop("keep_ir", False)
|
288
|
+
compile_blob = kwargs.pop("compile_blob", True)
|
289
|
+
|
283
290
|
invalidInputError(save_directory is not None,
|
284
291
|
"Please provide the path to save converted model "
|
285
292
|
"through `save_directory`.")
|
293
|
+
invalidInputError(keep_ir or compile_blob,
|
294
|
+
"Please save blob or save IR either.")
|
286
295
|
|
287
296
|
if hasattr(model, "llm"):
|
288
297
|
llm = model.llm
|
@@ -323,7 +332,9 @@ class _BaseAutoModelClass:
|
|
323
332
|
qtype=qtype,
|
324
333
|
save_directory=save_directory,
|
325
334
|
fuse_layers=fuse_layers,
|
326
|
-
has_llm=hasattr(model, "llm")
|
335
|
+
has_llm=hasattr(model, "llm"),
|
336
|
+
keep_ir=keep_ir,
|
337
|
+
compile_blob=compile_blob
|
327
338
|
)
|
328
339
|
else:
|
329
340
|
optimize_llm(
|
@@ -346,7 +357,9 @@ class _BaseAutoModelClass:
|
|
346
357
|
qtype=qtype,
|
347
358
|
convert_model=convert_model,
|
348
359
|
save_directory=save_directory,
|
349
|
-
fuse_layers=fuse_layers
|
360
|
+
fuse_layers=fuse_layers,
|
361
|
+
keep_ir=keep_ir,
|
362
|
+
compile_blob=compile_blob)
|
350
363
|
model.save_low_bit = types.MethodType(save_low_bit, model)
|
351
364
|
model.save_low_bit(save_directory)
|
352
365
|
logger.info(f"Converted model has already saved to {save_directory}.")
|
@@ -450,7 +450,9 @@ def optimize_llm_single_process(
|
|
450
450
|
qtype: str,
|
451
451
|
save_directory: str,
|
452
452
|
fuse_layers: int=None,
|
453
|
-
has_llm: bool=False
|
453
|
+
has_llm: bool=False,
|
454
|
+
keep_ir: bool=False,
|
455
|
+
compile_blob: bool=True
|
454
456
|
):
|
455
457
|
from ipex_llm.transformers.npu_pipeline_model.convert_pipeline import convert_llm
|
456
458
|
from .npu_llm_cpp import load_model_from_file
|
@@ -463,7 +465,9 @@ def optimize_llm_single_process(
|
|
463
465
|
qtype=qtype,
|
464
466
|
convert_model=True,
|
465
467
|
save_directory=save_directory,
|
466
|
-
fuse_layers=fuse_layers
|
468
|
+
fuse_layers=fuse_layers,
|
469
|
+
keep_ir=keep_ir,
|
470
|
+
compile_blob=compile_blob)
|
467
471
|
try:
|
468
472
|
model_ptr = load_model_from_file(save_directory)
|
469
473
|
model.kv_len = kv_len
|
@@ -196,7 +196,9 @@ def convert_llm(model: torch.nn.Module,
|
|
196
196
|
qtype: str,
|
197
197
|
convert_model: bool=False,
|
198
198
|
save_directory: str=None,
|
199
|
-
fuse_layers: int=None
|
199
|
+
fuse_layers: int=None,
|
200
|
+
keep_ir: bool=False,
|
201
|
+
compile_blob: bool=True):
|
200
202
|
# whether to set layernorm weight as const
|
201
203
|
layernorm_const = os.environ.get("IPEX_LLM_NPU_LAYERNORM_CONST", "1") == "1"
|
202
204
|
if group_size == 0:
|
@@ -220,7 +222,9 @@ def convert_llm(model: torch.nn.Module,
|
|
220
222
|
n_splits_down_proj,
|
221
223
|
group_size,
|
222
224
|
save_directory,
|
223
|
-
fuse_layers=fuse_layers
|
225
|
+
fuse_layers=fuse_layers,
|
226
|
+
keep_ir=keep_ir,
|
227
|
+
compile_blob=compile_blob)
|
224
228
|
return 0
|
225
229
|
if model.config.model_type == "llama":
|
226
230
|
with tempfile.TemporaryDirectory() as temp_dir:
|
@@ -428,7 +432,9 @@ def convert_llm_for_deploy(model: torch.nn.Module,
|
|
428
432
|
n_splits_down_proj: int,
|
429
433
|
group_size: int,
|
430
434
|
save_directory: str=None,
|
431
|
-
fuse_layers: int=None
|
435
|
+
fuse_layers: int=None,
|
436
|
+
keep_ir: bool=False,
|
437
|
+
compile_blob: bool=True):
|
432
438
|
if not os.path.exists(save_directory):
|
433
439
|
os.mkdir(save_directory)
|
434
440
|
weight_dir = os.path.join(save_directory, "model_weights")
|
@@ -479,14 +485,17 @@ def convert_llm_for_deploy(model: torch.nn.Module,
|
|
479
485
|
# save fused_layers blobs of fused decoder layers
|
480
486
|
convert_fused_qwen_layer(model, fused_layers, n_splits_linear, n_splits_down_proj,
|
481
487
|
save_directory, weight_dir, transpose_value_cache, kv_len,
|
482
|
-
group_size, layernorm_const, "decode"
|
488
|
+
group_size, layernorm_const, "decode",
|
489
|
+
keep_ir=keep_ir, compile_blob=compile_blob)
|
483
490
|
# save blob of single prefill layer
|
484
491
|
convert_qwen_layer(model, 0, n_splits_linear, n_splits_down_proj,
|
485
492
|
save_directory, weight_dir, transpose_value_cache, max_prompt_len,
|
486
|
-
group_size, layernorm_const, "prefill"
|
493
|
+
group_size, layernorm_const, "prefill",
|
494
|
+
keep_ir=keep_ir, compile_blob=compile_blob)
|
487
495
|
# save blob of lmhead and bin of embedding
|
488
496
|
convert_lm_head_and_embedding(model, save_directory, weight_dir,
|
489
|
-
convert_model=True, group_size=group_size
|
497
|
+
convert_model=True, group_size=group_size,
|
498
|
+
keep_ir=keep_ir, compile_blob=compile_blob)
|
490
499
|
elif model.config.model_type == "llama":
|
491
500
|
embedding_post = False
|
492
501
|
cos_sin_input = False
|
@@ -540,15 +549,18 @@ def convert_llm_for_deploy(model: torch.nn.Module,
|
|
540
549
|
convert_lm_head_and_embedding(model, n_splits_linear,
|
541
550
|
save_directory, weight_dir,
|
542
551
|
convert_model=True,
|
543
|
-
max_prompt_len=max_prompt_len
|
552
|
+
max_prompt_len=max_prompt_len,
|
553
|
+
keep_ir=keep_ir, compile_blob=compile_blob)
|
544
554
|
# save fused_layers blobs of fused decoder layers
|
545
555
|
convert_fused_llama_layer(model, fused_layers, n_splits_linear, n_splits_down_proj,
|
546
556
|
save_directory, weight_dir, transpose_value_cache, kv_len,
|
547
|
-
group_size, layernorm_const, "decode"
|
557
|
+
group_size, layernorm_const, "decode",
|
558
|
+
keep_ir=keep_ir, compile_blob=compile_blob)
|
548
559
|
# save blob of single prefill layer
|
549
560
|
convert_llama_layer(model, 0, n_splits_linear, n_splits_down_proj,
|
550
561
|
save_directory, weight_dir, transpose_value_cache, max_prompt_len,
|
551
|
-
group_size, layernorm_const, "prefill"
|
562
|
+
group_size, layernorm_const, "prefill",
|
563
|
+
keep_ir=keep_ir, compile_blob=compile_blob)
|
552
564
|
elif model.config.model_type == "minicpm":
|
553
565
|
if group_size == 0:
|
554
566
|
fused_layers = 4 if fuse_layers is None else fuse_layers
|
@@ -577,16 +589,19 @@ def convert_llm_for_deploy(model: torch.nn.Module,
|
|
577
589
|
# save fused_layers blobs of fused decoder layers
|
578
590
|
convert_fused_minicpm_layer(model, fused_layers, n_splits_linear, n_splits_down_proj,
|
579
591
|
save_directory, weight_dir, transpose_value_cache, kv_len,
|
580
|
-
group_size, layernorm_const, "decode"
|
592
|
+
group_size, layernorm_const, "decode",
|
593
|
+
keep_ir=keep_ir, compile_blob=compile_blob)
|
581
594
|
# save blob of single prefill layer
|
582
595
|
convert_minicpm_layer(model, 0, n_splits_linear, n_splits_down_proj,
|
583
596
|
save_directory, weight_dir, transpose_value_cache, max_prompt_len,
|
584
|
-
group_size, layernorm_const, "prefill"
|
597
|
+
group_size, layernorm_const, "prefill",
|
598
|
+
keep_ir=keep_ir, compile_blob=compile_blob)
|
585
599
|
# save blob of lmhead and bin of embedding and embedding_post
|
586
600
|
convert_lm_head_and_embedding(model, n_splits_linear,
|
587
601
|
save_directory, weight_dir,
|
588
602
|
convert_model=True,
|
589
|
-
max_prompt_len=max_prompt_len
|
603
|
+
max_prompt_len=max_prompt_len,
|
604
|
+
keep_ir=keep_ir, compile_blob=compile_blob)
|
590
605
|
|
591
606
|
model.config.update(update_dict)
|
592
607
|
model.config.save_pretrained(save_directory)
|
@@ -123,7 +123,8 @@ class Llama32PostEmbedding(NNFactory):
|
|
123
123
|
|
124
124
|
|
125
125
|
def convert_lm_head_and_embedding(model, n_splits_linear, temp_dir, weight_dir,
|
126
|
-
convert_model=False, max_prompt_len=1
|
126
|
+
convert_model=False, max_prompt_len=1,
|
127
|
+
keep_ir=False, compile_blob=True):
|
127
128
|
num_heads = model.model.layers[0].self_attn.num_heads
|
128
129
|
num_key_value_heads = model.model.layers[0].self_attn.num_key_value_heads
|
129
130
|
head_dim = model.model.layers[0].self_attn.head_dim
|
@@ -175,7 +176,8 @@ def convert_lm_head_and_embedding(model, n_splits_linear, temp_dir, weight_dir,
|
|
175
176
|
asym=asym
|
176
177
|
)
|
177
178
|
last_blob_path = update_names_of_IR_and_export_blob(new_lm_head, "lm_head", temp_dir,
|
178
|
-
|
179
|
+
keep_ir=keep_ir, compile_blob=compile_blob)
|
180
|
+
os.remove(os.path.join(temp_dir, "lm_head.bin"))
|
179
181
|
|
180
182
|
# save weights bins files
|
181
183
|
if n_splits_linear == 1:
|
@@ -211,7 +213,9 @@ def convert_lm_head_and_embedding(model, n_splits_linear, temp_dir, weight_dir,
|
|
211
213
|
first_blob_path = None
|
212
214
|
else:
|
213
215
|
first_blob_path = update_names_of_IR_and_export_blob(new_embedding, "embedding",
|
214
|
-
temp_dir,
|
216
|
+
temp_dir, keep_ir=keep_ir,
|
217
|
+
compile_blob=compile_blob)
|
218
|
+
os.remove(os.path.join(temp_dir, "embedding.bin"))
|
215
219
|
else:
|
216
220
|
# llama-3.2-3B & llama-3.2-1B
|
217
221
|
embedding_layer = model.model.embed_tokens
|
@@ -235,22 +239,28 @@ def convert_lm_head_and_embedding(model, n_splits_linear, temp_dir, weight_dir,
|
|
235
239
|
attention_scaling=attention_scaling,
|
236
240
|
input_len=1)
|
237
241
|
update_names_of_IR_and_export_blob(embedding_post, "embedding_post",
|
238
|
-
temp_dir,
|
242
|
+
temp_dir, keep_ir=keep_ir, compile_blob=compile_blob)
|
239
243
|
embedding_post_prefill = Llama32PostEmbedding(inv_freq=inv_freq,
|
240
244
|
attention_scaling=attention_scaling,
|
241
245
|
input_len=max_prompt_len)
|
242
246
|
update_names_of_IR_and_export_blob(embedding_post_prefill,
|
243
247
|
"embedding_post_prefill",
|
244
|
-
temp_dir,
|
248
|
+
temp_dir, keep_ir=keep_ir, compile_blob=compile_blob)
|
249
|
+
os.remove(os.path.join(temp_dir, "embedding_post.bin"))
|
250
|
+
os.remove(os.path.join(temp_dir, "embedding_post_prefill.bin"))
|
245
251
|
else:
|
246
252
|
first_blob_path = update_names_of_IR_and_export_blob(new_embedding, "embedding",
|
247
|
-
temp_dir
|
253
|
+
temp_dir, keep_ir=keep_ir,
|
254
|
+
compile_blob=compile_blob)
|
255
|
+
os.remove(os.path.join(temp_dir, "embedding.bin"))
|
256
|
+
|
248
257
|
return first_blob_path, last_blob_path
|
249
258
|
|
250
259
|
|
251
260
|
def convert_llama_layer(model, layer_idx, n_splits_linear, n_splits_down_proj,
|
252
261
|
temp_dir, weight_dir, transpose_value_cache, kv_len, group_size,
|
253
|
-
layernorm_const, mode="decode"
|
262
|
+
layernorm_const, mode="decode",
|
263
|
+
keep_ir=False, compile_blob=True):
|
254
264
|
num_heads = model.model.layers[0].self_attn.num_heads
|
255
265
|
num_key_value_heads = model.model.layers[0].self_attn.num_key_value_heads
|
256
266
|
head_dim = model.model.layers[0].self_attn.head_dim
|
@@ -317,8 +327,9 @@ def convert_llama_layer(model, layer_idx, n_splits_linear, n_splits_down_proj,
|
|
317
327
|
rest_blob_path = update_names_of_IR_and_export_blob(single_decoder,
|
318
328
|
decoder_name,
|
319
329
|
temp_dir,
|
320
|
-
|
330
|
+
keep_ir=keep_ir, compile_blob=compile_blob,
|
321
331
|
npu_dpu_groups=npu_dpu_groups)
|
332
|
+
os.remove(os.path.join(temp_dir, decoder_name + ".bin"))
|
322
333
|
|
323
334
|
if mode == "decode":
|
324
335
|
if hasattr(curr_layer.self_attn.rotary_emb, "cos_cached"):
|
@@ -364,7 +375,8 @@ def convert_llama_layer(model, layer_idx, n_splits_linear, n_splits_down_proj,
|
|
364
375
|
|
365
376
|
def convert_fused_llama_layer(model, fused_layers, n_splits_linear, n_splits_down_proj,
|
366
377
|
save_dir, weight_dir, transpose_value_cache, kv_len, group_size,
|
367
|
-
layernorm_const, mode="decode"
|
378
|
+
layernorm_const, mode="decode",
|
379
|
+
keep_ir=False, compile_blob=True):
|
368
380
|
num_heads = model.model.layers[0].self_attn.num_heads
|
369
381
|
num_key_value_heads = model.model.layers[0].self_attn.num_key_value_heads
|
370
382
|
head_dim = model.model.layers[0].self_attn.head_dim
|
@@ -457,6 +469,7 @@ def convert_fused_llama_layer(model, fused_layers, n_splits_linear, n_splits_dow
|
|
457
469
|
update_names_of_IR_and_export_blob(fused_decoder,
|
458
470
|
f"decoder_layer_{i}",
|
459
471
|
save_dir,
|
460
|
-
|
461
|
-
|
472
|
+
keep_ir=keep_ir,
|
473
|
+
compile_blob=compile_blob)
|
474
|
+
os.remove(os.path.join(save_dir, f"decoder_layer_{i}" + ".bin"))
|
462
475
|
return 0
|
@@ -162,7 +162,8 @@ class MiniCPMLMHead(LLMBaseNNFactory):
|
|
162
162
|
|
163
163
|
|
164
164
|
def convert_lm_head_and_embedding(model, n_splits_linear, temp_dir, weight_dir,
|
165
|
-
convert_model=False, max_prompt_len=1
|
165
|
+
convert_model=False, max_prompt_len=1,
|
166
|
+
keep_ir=False, compile_blob=True):
|
166
167
|
num_heads = model.model.layers[0].self_attn.num_heads
|
167
168
|
num_key_value_heads = model.model.layers[0].self_attn.num_key_value_heads
|
168
169
|
head_dim = model.model.layers[0].self_attn.head_dim
|
@@ -230,7 +231,8 @@ def convert_lm_head_and_embedding(model, n_splits_linear, temp_dir, weight_dir,
|
|
230
231
|
asym=asym
|
231
232
|
)
|
232
233
|
last_blob_path = update_names_of_IR_and_export_blob(new_lm_head, "lm_head", temp_dir,
|
233
|
-
|
234
|
+
keep_ir=keep_ir, compile_blob=compile_blob)
|
235
|
+
os.remove(os.path.join(temp_dir, "lm_head.bin"))
|
234
236
|
|
235
237
|
# save weights bins files
|
236
238
|
if n_splits_linear == 1:
|
@@ -280,22 +282,27 @@ def convert_lm_head_and_embedding(model, n_splits_linear, temp_dir, weight_dir,
|
|
280
282
|
dtype=np.float16,
|
281
283
|
scale_emb=model.config.scale_emb)
|
282
284
|
update_names_of_IR_and_export_blob(embedding_post, "embedding_post",
|
283
|
-
temp_dir,
|
285
|
+
temp_dir, keep_ir=keep_ir, compile_blob=compile_blob)
|
284
286
|
embedding_post_prefill = MiniCPMPostEmbedding(max_prompt_len, model.config.hidden_size,
|
285
287
|
dtype=np.float16,
|
286
288
|
scale_emb=model.config.scale_emb)
|
287
289
|
update_names_of_IR_and_export_blob(embedding_post_prefill,
|
288
290
|
"embedding_post_prefill",
|
289
|
-
temp_dir,
|
291
|
+
temp_dir, keep_ir=keep_ir, compile_blob=compile_blob)
|
292
|
+
os.remove(os.path.join(temp_dir, "embedding_post.bin"))
|
293
|
+
os.remove(os.path.join(temp_dir, "embedding_post_prefill.bin"))
|
290
294
|
else:
|
291
295
|
first_blob_path = update_names_of_IR_and_export_blob(new_embedding, "embedding",
|
292
|
-
temp_dir,
|
296
|
+
temp_dir, keep_ir=keep_ir,
|
297
|
+
compile_blob=compile_blob)
|
298
|
+
os.remove(os.path.join(temp_dir, "embedding.bin"))
|
293
299
|
return first_blob_path, last_blob_path
|
294
300
|
|
295
301
|
|
296
302
|
def convert_minicpm_layer(model, layer_idx, n_splits_linear, n_splits_down_proj,
|
297
303
|
temp_dir, weight_dir, transpose_value_cache, kv_len, group_size,
|
298
|
-
layernorm_const, mode="decode"
|
304
|
+
layernorm_const, mode="decode",
|
305
|
+
keep_ir=False, compile_blob=True):
|
299
306
|
num_heads = model.model.layers[0].self_attn.num_heads
|
300
307
|
num_key_value_heads = model.model.layers[0].self_attn.num_key_value_heads
|
301
308
|
head_dim = model.model.layers[0].self_attn.head_dim
|
@@ -353,7 +360,8 @@ def convert_minicpm_layer(model, layer_idx, n_splits_linear, n_splits_down_proj,
|
|
353
360
|
rest_blob_path = update_names_of_IR_and_export_blob(single_decoder,
|
354
361
|
decoder_name,
|
355
362
|
temp_dir,
|
356
|
-
|
363
|
+
keep_ir=keep_ir, compile_blob=compile_blob)
|
364
|
+
os.remove(os.path.join(temp_dir, decoder_name + ".bin"))
|
357
365
|
|
358
366
|
if mode == "decode":
|
359
367
|
if layernorm_const:
|
@@ -386,7 +394,8 @@ def convert_minicpm_layer(model, layer_idx, n_splits_linear, n_splits_down_proj,
|
|
386
394
|
|
387
395
|
def convert_fused_minicpm_layer(model, fused_layers, n_splits_linear, n_splits_down_proj,
|
388
396
|
save_dir, weight_dir, transpose_value_cache, kv_len, group_size,
|
389
|
-
layernorm_const, mode="decode"
|
397
|
+
layernorm_const, mode="decode",
|
398
|
+
keep_ir=False, compile_blob=True):
|
390
399
|
num_heads = model.model.layers[0].self_attn.num_heads
|
391
400
|
num_key_value_heads = model.model.layers[0].self_attn.num_key_value_heads
|
392
401
|
head_dim = model.model.layers[0].self_attn.head_dim
|
@@ -477,6 +486,6 @@ def convert_fused_minicpm_layer(model, fused_layers, n_splits_linear, n_splits_d
|
|
477
486
|
update_names_of_IR_and_export_blob(fused_decoder,
|
478
487
|
f"decoder_layer_{i}",
|
479
488
|
save_dir,
|
480
|
-
compile_blob=
|
481
|
-
|
489
|
+
keep_ir=keep_ir, compile_blob=compile_blob)
|
490
|
+
os.remove(os.path.join(save_dir, f"decoder_layer_{i}" + ".bin"))
|
482
491
|
return 0
|
@@ -24,7 +24,8 @@ from ipex_llm.transformers.npu_models.lm_head import SlicedLMHead
|
|
24
24
|
|
25
25
|
|
26
26
|
def convert_lm_head_and_embedding(model, temp_dir, weight_dir,
|
27
|
-
convert_model=False, group_size=0
|
27
|
+
convert_model=False, group_size=0,
|
28
|
+
keep_ir=False, compile_blob=True):
|
28
29
|
num_heads = model.model.layers[0].self_attn.num_heads
|
29
30
|
head_dim = model.model.layers[0].self_attn.head_dim
|
30
31
|
rms_norm_eps = model.config.rms_norm_eps
|
@@ -84,7 +85,9 @@ def convert_lm_head_and_embedding(model, temp_dir, weight_dir,
|
|
84
85
|
)
|
85
86
|
|
86
87
|
last_blob_path = update_names_of_IR_and_export_blob(new_lm_head, f"lm_head",
|
87
|
-
temp_dir,
|
88
|
+
temp_dir, keep_ir=keep_ir,
|
89
|
+
compile_blob=compile_blob)
|
90
|
+
os.remove(os.path.join(temp_dir, "lm_head.bin"))
|
88
91
|
|
89
92
|
# save weights bins files
|
90
93
|
if not isinstance(lm_head, SlicedLMHead):
|
@@ -119,13 +122,16 @@ def convert_lm_head_and_embedding(model, temp_dir, weight_dir,
|
|
119
122
|
first_blob_path = True
|
120
123
|
else:
|
121
124
|
first_blob_path = update_names_of_IR_and_export_blob(new_embedding, f"embedding",
|
122
|
-
temp_dir,
|
125
|
+
temp_dir, keep_ir=keep_ir,
|
126
|
+
compile_blob=compile_blob)
|
127
|
+
os.remove(os.path.join(temp_dir, "embedding.bin"))
|
123
128
|
return first_blob_path, last_blob_path
|
124
129
|
|
125
130
|
|
126
131
|
def convert_qwen_layer(model, layer_idx, n_splits_linear, n_splits_down_proj,
|
127
132
|
temp_dir, weight_dir, transpose_value_cache, kv_len, group_size,
|
128
|
-
layernorm_const, mode="decode"
|
133
|
+
layernorm_const, mode="decode",
|
134
|
+
keep_ir=False, compile_blob=True):
|
129
135
|
num_heads = model.model.layers[0].self_attn.num_heads
|
130
136
|
num_key_value_heads = model.model.layers[0].self_attn.num_key_value_heads
|
131
137
|
head_dim = model.model.layers[0].self_attn.head_dim
|
@@ -183,8 +189,10 @@ def convert_qwen_layer(model, layer_idx, n_splits_linear, n_splits_down_proj,
|
|
183
189
|
)
|
184
190
|
rest_blob_path = update_names_of_IR_and_export_blob(single_decoder,
|
185
191
|
decoder_name,
|
186
|
-
temp_dir,
|
192
|
+
temp_dir, keep_ir=keep_ir,
|
193
|
+
compile_blob=compile_blob,
|
187
194
|
npu_dpu_groups=npu_dpu_groups)
|
195
|
+
os.remove(os.path.join(temp_dir, decoder_name + ".bin"))
|
188
196
|
|
189
197
|
# 0, 1, 2 are input_embed/attention_mask/position_id
|
190
198
|
if mode == "decode":
|
@@ -226,7 +234,8 @@ def convert_qwen_layer(model, layer_idx, n_splits_linear, n_splits_down_proj,
|
|
226
234
|
|
227
235
|
def convert_fused_qwen_layer(model, fused_layers, n_splits_linear, n_splits_down_proj,
|
228
236
|
save_dir, weight_dir, transpose_value_cache, kv_len, group_size,
|
229
|
-
layernorm_const, mode="decode"
|
237
|
+
layernorm_const, mode="decode",
|
238
|
+
keep_ir=False, compile_blob=True):
|
230
239
|
num_heads = model.model.layers[0].self_attn.num_heads
|
231
240
|
num_key_value_heads = model.model.layers[0].self_attn.num_key_value_heads
|
232
241
|
head_dim = model.model.layers[0].self_attn.head_dim
|
@@ -330,6 +339,6 @@ def convert_fused_qwen_layer(model, fused_layers, n_splits_linear, n_splits_down
|
|
330
339
|
update_names_of_IR_and_export_blob(fused_decoder,
|
331
340
|
f"decoder_layer_{i}",
|
332
341
|
save_dir,
|
333
|
-
compile_blob=
|
334
|
-
|
342
|
+
keep_ir=keep_ir, compile_blob=compile_blob)
|
343
|
+
os.remove(os.path.join(save_dir, f"decoder_layer_{i}" + ".bin"))
|
335
344
|
return 0
|
ipex_llm/transformers/qlora.py
CHANGED
@@ -109,7 +109,7 @@ class LoraLowBitLinear(Module, LoraLayer):
|
|
109
109
|
self.qa_pool = torch.nn.Identity()
|
110
110
|
|
111
111
|
def forward(self, x: torch.Tensor):
|
112
|
-
autocast_dtype = get_autocast_dtype(x)
|
112
|
+
autocast_dtype = get_autocast_dtype(x.device.type)
|
113
113
|
if x.device.type == "xpu":
|
114
114
|
# force to use bf16 on gpu
|
115
115
|
x = x.to(torch.bfloat16)
|
@@ -177,7 +177,7 @@ class LoraBF16Linear(Module, LoraLayer):
|
|
177
177
|
self.is_target_conv_1d_layer = is_target_conv_1d_layer
|
178
178
|
|
179
179
|
def forward(self, x: torch.Tensor):
|
180
|
-
autocast_dtype = get_autocast_dtype(x)
|
180
|
+
autocast_dtype = get_autocast_dtype(x.device.type)
|
181
181
|
if x.device.type == "xpu":
|
182
182
|
# force to use bf16 on gpu
|
183
183
|
x = x.to(torch.bfloat16)
|
ipex_llm/transformers/utils.py
CHANGED
@@ -138,26 +138,39 @@ def fix_key(key):
|
|
138
138
|
return key
|
139
139
|
|
140
140
|
|
141
|
-
def
|
141
|
+
def is_autocast_enabled(device_type: str):
|
142
142
|
if torch.__version__ >= '2.3':
|
143
|
-
|
144
|
-
|
143
|
+
return torch.is_autocast_enabled(device_type)
|
144
|
+
else:
|
145
|
+
if device_type == "xpu":
|
146
|
+
return torch.xpu.is_autocast_xpu_enabled()
|
147
|
+
elif device_type == "cpu":
|
148
|
+
return torch.is_autocast_cpu_enabled()
|
149
|
+
else:
|
150
|
+
invalidInputError(False,
|
151
|
+
f"Device type {device_type} is not supported.")
|
152
|
+
|
153
|
+
|
154
|
+
def get_autocast_dtype(device_type: str):
|
155
|
+
if torch.__version__ >= '2.3':
|
156
|
+
if torch.is_autocast_enabled(device_type):
|
157
|
+
return torch.get_autocast_dtype(device_type)
|
145
158
|
else:
|
146
159
|
return None
|
147
160
|
else:
|
148
|
-
if
|
161
|
+
if device_type == "xpu":
|
149
162
|
if torch.xpu.is_autocast_xpu_enabled():
|
150
163
|
return torch.xpu.get_autocast_xpu_dtype()
|
151
164
|
else:
|
152
165
|
return None
|
153
|
-
elif
|
166
|
+
elif device_type == "cpu":
|
154
167
|
if torch.is_autocast_cpu_enabled():
|
155
168
|
return torch.get_autocast_cpu_dtype()
|
156
169
|
else:
|
157
170
|
return None
|
158
171
|
else:
|
159
172
|
invalidInputError(False,
|
160
|
-
f"Device {
|
173
|
+
f"Device type {device_type} is not supported.")
|
161
174
|
|
162
175
|
|
163
176
|
def get_xpu_device_name(device: torch.device):
|
@@ -107,6 +107,8 @@ except ModuleNotFoundError:
|
|
107
107
|
np = None # type: ignore[assignment]
|
108
108
|
from typing import Any
|
109
109
|
|
110
|
+
from ipex_llm.transformers.utils import is_autocast_enabled, get_autocast_dtype
|
111
|
+
|
110
112
|
|
111
113
|
def _cast(value, dtype):
|
112
114
|
if isinstance(value, torch.Tensor):
|
@@ -155,12 +157,12 @@ def custom_fwd(fwd=None, *, cast_inputs=None):
|
|
155
157
|
|
156
158
|
@functools.wraps(fwd)
|
157
159
|
def decorate_fwd(*args, **kwargs):
|
158
|
-
args[0]._dtype =
|
160
|
+
args[0]._dtype = get_autocast_dtype("xpu")
|
159
161
|
if cast_inputs is None:
|
160
|
-
args[0]._fwd_used_autocast =
|
162
|
+
args[0]._fwd_used_autocast = is_autocast_enabled("xpu")
|
161
163
|
return fwd(*args, **kwargs)
|
162
164
|
else:
|
163
|
-
autocast_context =
|
165
|
+
autocast_context = is_autocast_enabled("xpu")
|
164
166
|
args[0]._fwd_used_autocast = False
|
165
167
|
if autocast_context:
|
166
168
|
with torch.xpu.autocast(enabled=False):
|
@@ -184,7 +186,7 @@ def custom_bwd(bwd):
|
|
184
186
|
|
185
187
|
@functools.wraps(bwd)
|
186
188
|
def decorate_bwd(*args, **kwargs):
|
187
|
-
with torch.
|
189
|
+
with torch.autocast("xpu", enabled=args[0]._fwd_used_autocast, dtype=args[0]._dtype):
|
188
190
|
return bwd(*args, **kwargs)
|
189
191
|
|
190
192
|
return decorate_bwd
|
ipex_llm/transformers/xpu_ops.py
CHANGED
@@ -20,9 +20,10 @@ import xe_batch
|
|
20
20
|
import xe_addons
|
21
21
|
|
22
22
|
|
23
|
-
|
24
|
-
|
25
|
-
|
23
|
+
@torch.library.register_fake("ipex_llm::forward_new")
|
24
|
+
def _(x, weight, qtype, output_size):
|
25
|
+
return torch.empty([x.size(0), output_size],
|
26
|
+
dtype=x.dtype, device=x.device)
|
26
27
|
|
27
28
|
|
28
29
|
# @torch.library.register_fake("ipex_llm::dequant")
|
@@ -1,6 +1,6 @@
|
|
1
1
|
Metadata-Version: 2.1
|
2
2
|
Name: ipex-llm
|
3
|
-
Version: 2.2.0b20250204
|
3
|
+
Version: 2.2.0b20250204.post0
|
4
4
|
Summary: Large Language Model Develop Toolkit
|
5
5
|
Home-page: https://github.com/intel-analytics/ipex-llm
|
6
6
|
Author: BigDL Authors
|
@@ -27,19 +27,12 @@ Requires-Dist: intel-openmp ; (platform_machine == "x86_64" or platform_machine
|
|
27
27
|
Requires-Dist: torch ==2.1.2+cpu ; (platform_system == "Linux") and extra == 'all'
|
28
28
|
Requires-Dist: torch ==2.1.2 ; (platform_system == "Windows") and extra == 'all'
|
29
29
|
Provides-Extra: cpp
|
30
|
-
Requires-Dist: bigdl-core-cpp ==2.6.0b20250204 ; extra == 'cpp'
|
30
|
+
Requires-Dist: bigdl-core-cpp ==2.6.0b20250204.post0 ; extra == 'cpp'
|
31
|
+
Requires-Dist: onednn-devel ==2025.0.1 ; extra == 'cpp'
|
32
|
+
Requires-Dist: onednn ==2025.0.1 ; extra == 'cpp'
|
33
|
+
Requires-Dist: dpcpp-cpp-rt ==2025.0.2 ; extra == 'cpp'
|
34
|
+
Requires-Dist: mkl-dpcpp ==2025.0.1 ; extra == 'cpp'
|
31
35
|
Requires-Dist: setuptools ; extra == 'cpp'
|
32
|
-
Provides-Extra: cpp-arl
|
33
|
-
Requires-Dist: bigdl-core-cpp ==2.6.0b20250204 ; extra == 'cpp-arl'
|
34
|
-
Requires-Dist: setuptools ; extra == 'cpp-arl'
|
35
|
-
Requires-Dist: onednn-devel ==2024.1.1 ; (platform_system == "Windows") and extra == 'cpp-arl'
|
36
|
-
Requires-Dist: onednn ==2024.1.1 ; (platform_system == "Windows") and extra == 'cpp-arl'
|
37
|
-
Requires-Dist: dpcpp-cpp-rt ==2024.2.1 ; (platform_system == "Windows") and extra == 'cpp-arl'
|
38
|
-
Requires-Dist: mkl-dpcpp ==2024.2.1 ; (platform_system == "Windows") and extra == 'cpp-arl'
|
39
|
-
Requires-Dist: onednn-devel ==2024.2.1 ; (platform_system == "Windows") and extra == 'cpp'
|
40
|
-
Requires-Dist: onednn ==2024.2.1 ; (platform_system == "Windows") and extra == 'cpp'
|
41
|
-
Requires-Dist: dpcpp-cpp-rt ==2024.2.1 ; (platform_system == "Windows") and extra == 'cpp'
|
42
|
-
Requires-Dist: mkl-dpcpp ==2024.2.1 ; (platform_system == "Windows") and extra == 'cpp'
|
43
36
|
Provides-Extra: llama-index
|
44
37
|
Requires-Dist: py-cpuinfo ; extra == 'llama-index'
|
45
38
|
Requires-Dist: protobuf ; extra == 'llama-index'
|
@@ -67,7 +60,7 @@ Requires-Dist: transformers ==4.40.0 ; extra == 'npu'
|
|
67
60
|
Requires-Dist: intel-openmp ; (platform_machine == "x86_64" or platform_machine == "AMD64") and extra == 'npu'
|
68
61
|
Requires-Dist: torch ==2.1.2+cpu ; (platform_system == "Linux") and extra == 'npu'
|
69
62
|
Requires-Dist: torch ==2.1.2 ; (platform_system == "Windows") and extra == 'npu'
|
70
|
-
Requires-Dist: bigdl-core-npu ==2.6.0b20250204 ; (platform_system == "Windows") and extra == 'npu'
|
63
|
+
Requires-Dist: bigdl-core-npu ==2.6.0b20250204.post0 ; (platform_system == "Windows") and extra == 'npu'
|
71
64
|
Provides-Extra: serving
|
72
65
|
Requires-Dist: py-cpuinfo ; extra == 'serving'
|
73
66
|
Requires-Dist: fschat[model_worker,webui] ==0.2.36 ; extra == 'serving'
|
@@ -87,9 +80,9 @@ Requires-Dist: setuptools <70.0.0 ; extra == 'xpu'
|
|
87
80
|
Requires-Dist: torch ==2.1.0a0 ; extra == 'xpu'
|
88
81
|
Requires-Dist: torchvision ==0.16.0a0 ; extra == 'xpu'
|
89
82
|
Requires-Dist: intel-extension-for-pytorch ==2.1.10+xpu ; extra == 'xpu'
|
90
|
-
Requires-Dist: bigdl-core-xe-21 ==2.6.0b20250204 ; extra == 'xpu'
|
91
|
-
Requires-Dist: bigdl-core-xe-batch-21 ==2.6.0b20250204 ; extra == 'xpu'
|
92
|
-
Requires-Dist: bigdl-core-xe-addons-21 ==2.6.0b20250204 ; extra == 'xpu'
|
83
|
+
Requires-Dist: bigdl-core-xe-21 ==2.6.0b20250204.post0 ; extra == 'xpu'
|
84
|
+
Requires-Dist: bigdl-core-xe-batch-21 ==2.6.0b20250204.post0 ; extra == 'xpu'
|
85
|
+
Requires-Dist: bigdl-core-xe-addons-21 ==2.6.0b20250204.post0 ; extra == 'xpu'
|
93
86
|
Provides-Extra: xpu-2-1
|
94
87
|
Requires-Dist: py-cpuinfo ; extra == 'xpu-2-1'
|
95
88
|
Requires-Dist: protobuf ; extra == 'xpu-2-1'
|
@@ -104,9 +97,9 @@ Requires-Dist: setuptools <70.0.0 ; extra == 'xpu-2-1'
|
|
104
97
|
Requires-Dist: torch ==2.1.0a0 ; extra == 'xpu-2-1'
|
105
98
|
Requires-Dist: torchvision ==0.16.0a0 ; extra == 'xpu-2-1'
|
106
99
|
Requires-Dist: intel-extension-for-pytorch ==2.1.10+xpu ; extra == 'xpu-2-1'
|
107
|
-
Requires-Dist: bigdl-core-xe-21 ==2.6.0b20250204 ; extra == 'xpu-2-1'
|
108
|
-
Requires-Dist: bigdl-core-xe-batch-21 ==2.6.0b20250204 ; extra == 'xpu-2-1'
|
109
|
-
Requires-Dist: bigdl-core-xe-addons-21 ==2.6.0b20250204 ; extra == 'xpu-2-1'
|
100
|
+
Requires-Dist: bigdl-core-xe-21 ==2.6.0b20250204.post0 ; extra == 'xpu-2-1'
|
101
|
+
Requires-Dist: bigdl-core-xe-batch-21 ==2.6.0b20250204.post0 ; extra == 'xpu-2-1'
|
102
|
+
Requires-Dist: bigdl-core-xe-addons-21 ==2.6.0b20250204.post0 ; extra == 'xpu-2-1'
|
110
103
|
Requires-Dist: intel-openmp ; (platform_machine == "x86_64" or platform_machine == "AMD64") and extra == 'xpu-2-1'
|
111
104
|
Requires-Dist: dpcpp-cpp-rt ==2024.0.2 ; (platform_system == "Windows") and extra == 'xpu-2-1'
|
112
105
|
Requires-Dist: mkl-dpcpp ==2024.0.0 ; (platform_system == "Windows") and extra == 'xpu-2-1'
|
@@ -124,7 +117,7 @@ Requires-Dist: setuptools ; extra == 'xpu-2-6'
|
|
124
117
|
Requires-Dist: torch ==2.6.0+xpu ; extra == 'xpu-2-6'
|
125
118
|
Requires-Dist: torchvision ==0.21.0+xpu ; extra == 'xpu-2-6'
|
126
119
|
Requires-Dist: torchaudio ==2.6.0+xpu ; extra == 'xpu-2-6'
|
127
|
-
Requires-Dist: bigdl-core-xe-all ==2.6.0b20250204 ; extra == 'xpu-2-6'
|
120
|
+
Requires-Dist: bigdl-core-xe-all ==2.6.0b20250204.post0 ; extra == 'xpu-2-6'
|
128
121
|
Requires-Dist: onednn-devel ==2025.0.1 ; extra == 'xpu-2-6'
|
129
122
|
Requires-Dist: onednn ==2025.0.1 ; extra == 'xpu-2-6'
|
130
123
|
Requires-Dist: dpcpp-cpp-rt ==2025.0.2 ; extra == 'xpu-2-6'
|
@@ -140,9 +133,9 @@ Requires-Dist: tokenizers ==0.15.2 ; extra == 'xpu-arc'
|
|
140
133
|
Requires-Dist: accelerate ==0.23.0 ; extra == 'xpu-arc'
|
141
134
|
Requires-Dist: tabulate ; extra == 'xpu-arc'
|
142
135
|
Requires-Dist: setuptools ; extra == 'xpu-arc'
|
143
|
-
Requires-Dist: bigdl-core-xe-23 ==2.6.0b20250204 ; extra == 'xpu-arc'
|
144
|
-
Requires-Dist: bigdl-core-xe-batch-23 ==2.6.0b20250204 ; extra == 'xpu-arc'
|
145
|
-
Requires-Dist: bigdl-core-xe-addons-23 ==2.6.0b20250204 ; extra == 'xpu-arc'
|
136
|
+
Requires-Dist: bigdl-core-xe-23 ==2.6.0b20250204.post0 ; extra == 'xpu-arc'
|
137
|
+
Requires-Dist: bigdl-core-xe-batch-23 ==2.6.0b20250204.post0 ; extra == 'xpu-arc'
|
138
|
+
Requires-Dist: bigdl-core-xe-addons-23 ==2.6.0b20250204.post0 ; extra == 'xpu-arc'
|
146
139
|
Requires-Dist: intel-openmp ; (platform_machine == "x86_64" or platform_machine == "AMD64") and extra == 'xpu-arc'
|
147
140
|
Requires-Dist: torch ==2.3.1+cxx11.abi ; (platform_system == "Linux") and extra == 'xpu-arc'
|
148
141
|
Requires-Dist: torchvision ==0.18.1+cxx11.abi ; (platform_system == "Linux") and extra == 'xpu-arc'
|
@@ -163,9 +156,9 @@ Requires-Dist: tokenizers ==0.15.2 ; extra == 'xpu-arl'
|
|
163
156
|
Requires-Dist: accelerate ==0.23.0 ; extra == 'xpu-arl'
|
164
157
|
Requires-Dist: tabulate ; extra == 'xpu-arl'
|
165
158
|
Requires-Dist: setuptools ; extra == 'xpu-arl'
|
166
|
-
Requires-Dist: bigdl-core-xe-23 ==2.6.0b20250204 ; extra == 'xpu-arl'
|
167
|
-
Requires-Dist: bigdl-core-xe-batch-23 ==2.6.0b20250204 ; extra == 'xpu-arl'
|
168
|
-
Requires-Dist: bigdl-core-xe-addons-23 ==2.6.0b20250204 ; extra == 'xpu-arl'
|
159
|
+
Requires-Dist: bigdl-core-xe-23 ==2.6.0b20250204.post0 ; extra == 'xpu-arl'
|
160
|
+
Requires-Dist: bigdl-core-xe-batch-23 ==2.6.0b20250204.post0 ; extra == 'xpu-arl'
|
161
|
+
Requires-Dist: bigdl-core-xe-addons-23 ==2.6.0b20250204.post0 ; extra == 'xpu-arl'
|
169
162
|
Requires-Dist: intel-openmp ; (platform_machine == "x86_64" or platform_machine == "AMD64") and extra == 'xpu-arl'
|
170
163
|
Requires-Dist: torch ==2.3.1+cxx11.abi ; (platform_system == "Linux") and extra == 'xpu-arl'
|
171
164
|
Requires-Dist: torchvision ==0.18.1+cxx11.abi ; (platform_system == "Linux") and extra == 'xpu-arl'
|
@@ -186,9 +179,9 @@ Requires-Dist: tokenizers ==0.15.2 ; extra == 'xpu-lnl'
|
|
186
179
|
Requires-Dist: accelerate ==0.23.0 ; extra == 'xpu-lnl'
|
187
180
|
Requires-Dist: tabulate ; extra == 'xpu-lnl'
|
188
181
|
Requires-Dist: setuptools ; extra == 'xpu-lnl'
|
189
|
-
Requires-Dist: bigdl-core-xe-23 ==2.6.0b20250204 ; extra == 'xpu-lnl'
|
190
|
-
Requires-Dist: bigdl-core-xe-batch-23 ==2.6.0b20250204 ; extra == 'xpu-lnl'
|
191
|
-
Requires-Dist: bigdl-core-xe-addons-23 ==2.6.0b20250204 ; extra == 'xpu-lnl'
|
182
|
+
Requires-Dist: bigdl-core-xe-23 ==2.6.0b20250204.post0 ; extra == 'xpu-lnl'
|
183
|
+
Requires-Dist: bigdl-core-xe-batch-23 ==2.6.0b20250204.post0 ; extra == 'xpu-lnl'
|
184
|
+
Requires-Dist: bigdl-core-xe-addons-23 ==2.6.0b20250204.post0 ; extra == 'xpu-lnl'
|
192
185
|
Requires-Dist: intel-openmp ; (platform_machine == "x86_64" or platform_machine == "AMD64") and extra == 'xpu-lnl'
|
193
186
|
Requires-Dist: torch ==2.3.1+cxx11.abi ; (platform_system == "Linux") and extra == 'xpu-lnl'
|
194
187
|
Requires-Dist: torchvision ==0.18.1+cxx11.abi ; (platform_system == "Linux") and extra == 'xpu-lnl'
|
@@ -41,35 +41,35 @@ ipex_llm/langchain/llms/transformerspipelinellm.py,sha256=vm522YPPwWxxAPVvQBtxRf
|
|
41
41
|
ipex_llm/langchain/vllm/__init__.py,sha256=T-EbRT6GJ_8RCu-iLmSzcftOimXSPQf2d5X72AUAy2Y,874
|
42
42
|
ipex_llm/langchain/vllm/vllm.py,sha256=6dxc-ZISZQrJilEa_HA827l75Dv9rcHpY_G6FdJ8BVs,7793
|
43
43
|
ipex_llm/libs/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
44
|
-
ipex_llm/libs/bloom-api.dll,sha256=
|
45
|
-
ipex_llm/libs/bloom.dll,sha256=
|
46
|
-
ipex_llm/libs/gptneox-api.dll,sha256=
|
47
|
-
ipex_llm/libs/gptneox.dll,sha256=
|
48
|
-
ipex_llm/libs/libbloom_avx.dll,sha256=
|
49
|
-
ipex_llm/libs/libbloom_vnni.dll,sha256=
|
50
|
-
ipex_llm/libs/libgptneox_avx.dll,sha256=
|
51
|
-
ipex_llm/libs/libgptneox_vnni.dll,sha256=
|
52
|
-
ipex_llm/libs/libllama_avx.dll,sha256=
|
53
|
-
ipex_llm/libs/libllama_vnni.dll,sha256=
|
54
|
-
ipex_llm/libs/libstarcoder_avx.dll,sha256=
|
55
|
-
ipex_llm/libs/libstarcoder_vnni.dll,sha256=
|
56
|
-
ipex_llm/libs/llama-api.dll,sha256=
|
57
|
-
ipex_llm/libs/llama.dll,sha256=
|
58
|
-
ipex_llm/libs/main-bloom.exe,sha256=
|
59
|
-
ipex_llm/libs/main-gptneox.exe,sha256=
|
60
|
-
ipex_llm/libs/main-llama.exe,sha256=
|
61
|
-
ipex_llm/libs/main-starcoder.exe,sha256=
|
62
|
-
ipex_llm/libs/pipeline.dll,sha256=
|
63
|
-
ipex_llm/libs/quantize-bloom.exe,sha256=
|
64
|
-
ipex_llm/libs/quantize-bloom_vnni.exe,sha256=
|
65
|
-
ipex_llm/libs/quantize-gptneox.exe,sha256=
|
66
|
-
ipex_llm/libs/quantize-gptneox_vnni.exe,sha256=
|
67
|
-
ipex_llm/libs/quantize-llama.exe,sha256=
|
68
|
-
ipex_llm/libs/quantize-llama_vnni.exe,sha256=
|
69
|
-
ipex_llm/libs/quantize-starcoder.exe,sha256=
|
70
|
-
ipex_llm/libs/quantize-starcoder_vnni.exe,sha256=
|
71
|
-
ipex_llm/libs/starcoder-api.dll,sha256=
|
72
|
-
ipex_llm/libs/starcoder.dll,sha256=
|
44
|
+
ipex_llm/libs/bloom-api.dll,sha256=UxAv-GadkIVCnv5S66r7IFt4szGuIM1tGokBmxemeC4,36352
|
45
|
+
ipex_llm/libs/bloom.dll,sha256=RSJ05o8iUkQJq4bjd3lNcHmfOAmJlSGAd4Pomq9PwAM,507904
|
46
|
+
ipex_llm/libs/gptneox-api.dll,sha256=C31cT9xWi2OERSExQz9QU5R_WdMRDPgprukPnLKH71Q,24576
|
47
|
+
ipex_llm/libs/gptneox.dll,sha256=8SrVALu3AG19z1VQqv2LEBsSwy-LqTGqlt-8eQQX26Y,568320
|
48
|
+
ipex_llm/libs/libbloom_avx.dll,sha256=nWPWw-uyycgK8yFPRokE1wAt4NOWasKhUh87ghOAp2o,536576
|
49
|
+
ipex_llm/libs/libbloom_vnni.dll,sha256=ArrDTSK8uFo5siZz9WPnNul0xjs6pfAZ7z7mjqb9ywI,508416
|
50
|
+
ipex_llm/libs/libgptneox_avx.dll,sha256=Sm_4JKYmFxuWRw1_ATSdeGuTYveIPhNnAKw-YkJKoNU,596992
|
51
|
+
ipex_llm/libs/libgptneox_vnni.dll,sha256=IL8Q-KsatQl3mBejqGSvOQc2lZtA2dsPYaXYw5Cdjv4,568832
|
52
|
+
ipex_llm/libs/libllama_avx.dll,sha256=KaYditEetrkzBQIDArK9owquKe1FlC6n5u-9LYfgSbA,591360
|
53
|
+
ipex_llm/libs/libllama_vnni.dll,sha256=b8aWcXg4lZUMinb9m0cgQMHt8k-OnKddnhqAp9BGhYg,563200
|
54
|
+
ipex_llm/libs/libstarcoder_avx.dll,sha256=CPdy-ZQ8wMTsB0vrhiy1oBXjBqghprQEgEL9qnh8Q5o,627712
|
55
|
+
ipex_llm/libs/libstarcoder_vnni.dll,sha256=jNnXhk4tVpHMW2VUGrc-jKrnwFJpnSIArBuWlOVi2CY,599552
|
56
|
+
ipex_llm/libs/llama-api.dll,sha256=F1EyeCDV62JqNFiTtSglmEkMHvQ27Tz1kA4ZS19dfRE,25600
|
57
|
+
ipex_llm/libs/llama.dll,sha256=WO3V-mjWiKcZQA_lLIslINOXi3lgx8_R100rsWU0sq0,562688
|
58
|
+
ipex_llm/libs/main-bloom.exe,sha256=GBNcb3iEFTxWoadPalCuyOFR4AAAoD8lpKDVJjZDCpk,103424
|
59
|
+
ipex_llm/libs/main-gptneox.exe,sha256=mNL1bSCjnRtpXy2I4XVullKq9msTt77grfN7IWFfNPg,98816
|
60
|
+
ipex_llm/libs/main-llama.exe,sha256=O9b660YcerS0RLPly2qwDzUDJB91vBctXblmZipzOFU,99840
|
61
|
+
ipex_llm/libs/main-starcoder.exe,sha256=fcSGXJAETsXNfxR2-0HsxrB2BxHaDniIj1Z8uOyrESw,157696
|
62
|
+
ipex_llm/libs/pipeline.dll,sha256=Js9cA-pFx0vHdZmsK9TEIa5RLaymnY9OeNV3i8DZh_4,72704
|
63
|
+
ipex_llm/libs/quantize-bloom.exe,sha256=3cdZBeG9x_oyFcyAvoAFojo9Kb0TJC-ZAJzXdPG9e24,126464
|
64
|
+
ipex_llm/libs/quantize-bloom_vnni.exe,sha256=ETIqFEor0troIoRLJRYjJ_pDpaFByhk79UNfEMGcw-g,128000
|
65
|
+
ipex_llm/libs/quantize-gptneox.exe,sha256=PupJio2YrOQCq3-45BuxkfBG2IksXgPSks2P_riU7C8,104448
|
66
|
+
ipex_llm/libs/quantize-gptneox_vnni.exe,sha256=9_Cv-2mhRE5A_1_aJKdckkV_mEnvc8mcGe8KUeWZdHA,104960
|
67
|
+
ipex_llm/libs/quantize-llama.exe,sha256=hfG3BiXvXMtNHr68ZBID4MR7PGgaJwmN749Ry79826s,110080
|
68
|
+
ipex_llm/libs/quantize-llama_vnni.exe,sha256=mrK5Q6weyYpg3aJl46TvaRX7Iho5ANCLOMgI06mO--s,110592
|
69
|
+
ipex_llm/libs/quantize-starcoder.exe,sha256=9X3c4e2SvGFb6_eIwhm2pal-P630M1zr4sjZH0LKaS8,127488
|
70
|
+
ipex_llm/libs/quantize-starcoder_vnni.exe,sha256=Fgk7DiCoI5a1fj7s0LHavmQOYgUNxHS8oCUJMRdcjoI,128512
|
71
|
+
ipex_llm/libs/starcoder-api.dll,sha256=3JcNxRgk3otEDsqUT9kTe6JZ_CTeCK4Vwt2U7eaWcu0,21504
|
72
|
+
ipex_llm/libs/starcoder.dll,sha256=4vV5R9PYmzHjoREsw7V4h9wl0oGv0qKRz3XwhQLLqlQ,599040
|
73
73
|
ipex_llm/llamaindex/__init__.py,sha256=T-EbRT6GJ_8RCu-iLmSzcftOimXSPQf2d5X72AUAy2Y,874
|
74
74
|
ipex_llm/llamaindex/llms/__init__.py,sha256=KP1lEdGqDuxPoxL1ZSH25Pm2kKMPJBWUTLR0ckSLMIU,1139
|
75
75
|
ipex_llm/llamaindex/llms/bigdlllm.py,sha256=FQBzq1KOjfc6uofTXAha3O7TqpJkNfOFepXQmOVlbnI,26314
|
@@ -94,20 +94,20 @@ ipex_llm/transformers/kv.py,sha256=k4TU18LlA-Sbq9WNNQnfuzu3RSFBwFhmaV3BcGN5bAo,1
|
|
94
94
|
ipex_llm/transformers/lisa.py,sha256=F5WxbtXQ7RdKulj83h_2DnEIgKiKGZf7zvOmg6QBl2s,3289
|
95
95
|
ipex_llm/transformers/loader.py,sha256=AwjV5RpI2t2bedlv7ZhLm8cfd-QJZm5hny-XyjIvdnk,6876
|
96
96
|
ipex_llm/transformers/lookup.py,sha256=b6OlZ9OV10R9qeWw8mVryVpDxszkjwLkldvi7GPMJY8,19614
|
97
|
-
ipex_llm/transformers/low_bit_linear.py,sha256=
|
97
|
+
ipex_llm/transformers/low_bit_linear.py,sha256=3EtbiCAq5HU_r2pGJ9beSDK4NPTN8Jj-aHMqm1jqX18,39177
|
98
98
|
ipex_llm/transformers/model.py,sha256=cQJNlAkdfoWmVbWd-TS2hf-Do41mMO9orPvG3FO4Nns,40855
|
99
99
|
ipex_llm/transformers/modelling_bigdl.py,sha256=7JpNVMuyq_OmtNUaMFMXdxPWZp2q0QHC02QeA-VTPOw,6709
|
100
|
-
ipex_llm/transformers/npu_model.py,sha256=
|
100
|
+
ipex_llm/transformers/npu_model.py,sha256=LMmRmhq8IAN9FrXLUeUK2B8XS2OJ5GVWmG0cEdeK-ro,40354
|
101
101
|
ipex_llm/transformers/patches.py,sha256=G9KcXxo42H1HJEDaroq4JbBN5P0P0lty7U7kk7-g4tw,991
|
102
102
|
ipex_llm/transformers/pipeline_parallel.py,sha256=uNZpOXljNmdoEYnP8U-VFiN4dRZb2piQbIf2bG9LQnE,49051
|
103
|
-
ipex_llm/transformers/qlora.py,sha256=
|
103
|
+
ipex_llm/transformers/qlora.py,sha256=qV9Y6G5kAaet77LLA3oXn3qQY4ayyAPZ7NAjOlHCS7g,14967
|
104
104
|
ipex_llm/transformers/relora.py,sha256=-dYzUV0P-IhO2jFdnzN9-v_sFzJpRj3ZwN9eCJzOoCw,16567
|
105
105
|
ipex_llm/transformers/speculative.py,sha256=0XNLgc9dGswJHVPrXo4iM7pPxkWwfFfJMECcivJSnIc,63368
|
106
106
|
ipex_llm/transformers/streamer.py,sha256=RrVlLblzCOtABRUpaMXAyaMnCGgLUtAi_YesLumRbww,4842
|
107
107
|
ipex_llm/transformers/training_patch.py,sha256=oxMkUtqyvqJiprw6dE3skkYfD1HOmUlH9N0hBkbn0G0,10799
|
108
|
-
ipex_llm/transformers/utils.py,sha256=
|
109
|
-
ipex_llm/transformers/xpu_customize_fwd.py,sha256=
|
110
|
-
ipex_llm/transformers/xpu_ops.py,sha256=
|
108
|
+
ipex_llm/transformers/utils.py,sha256=a-2wbflSd_yYnC5qcMoY5HLR1yT_QpxeX_WpGpaDLrA,17457
|
109
|
+
ipex_llm/transformers/xpu_customize_fwd.py,sha256=PUBYLnTbaBXUs3Dnte9Gqln2XFk8iA62SmloWjr7GJI,7668
|
110
|
+
ipex_llm/transformers/xpu_ops.py,sha256=z95iTtcDQvNyJOvB4A6B_ECTYjHp4A7x-FsssoETOMs,4914
|
111
111
|
ipex_llm/transformers/awq/__init__.py,sha256=Du5gu3-eeAkeDO_dEMBTzrDBA66DSN3uL3-rn8WGXQw,875
|
112
112
|
ipex_llm/transformers/awq/act.py,sha256=YwomJzOOKwkKtzGrm4L4kwBstBLO1Z8SK4CKi8PSYVQ,2172
|
113
113
|
ipex_llm/transformers/awq/awq.py,sha256=cGyRQJWwAEJtOtdSbsBoQ33KX_Ie0pv5OJHC0ACEELE,8861
|
@@ -174,7 +174,7 @@ ipex_llm/transformers/models/rwkv5.py,sha256=OkRNj1pCAZg1z2Fw-I0DEnxLEdZyPeRSQ6m
|
|
174
174
|
ipex_llm/transformers/models/sd.py,sha256=VvHV5u-0k2MgHu3NL9113hPj7DgfxqctuKzEEeNfRDU,5981
|
175
175
|
ipex_llm/transformers/models/stablelm.py,sha256=fj-XtOnR6kggnFUQTMPCOOzolkPztN06WAv8QW-XRnI,7054
|
176
176
|
ipex_llm/transformers/models/starcoder2.py,sha256=ONKvD7JCkRM0DI-R56x28QFBJ7CjD5hOZBQ_3WfOcNk,6626
|
177
|
-
ipex_llm/transformers/models/utils.py,sha256=
|
177
|
+
ipex_llm/transformers/models/utils.py,sha256=qI5ln8SQGTvR_IyxFkoZhefgOErnXUnJrifIyhiqT9c,14753
|
178
178
|
ipex_llm/transformers/models/whisper.py,sha256=ju3WP8Eq-KvD7kb3Qy51r4FOfSX3NBxfp5RBcq__gzc,4241
|
179
179
|
ipex_llm/transformers/models/yuan.py,sha256=JYAn_ZaSGK0NBJLEIxCACfAq084a66GFJkdd5NbpmMA,7732
|
180
180
|
ipex_llm/transformers/npu_models/__init__.py,sha256=ulEUGLjaP48LCrVeury3UxLjXxKzRi0UpSG4bYu-7f8,585
|
@@ -183,7 +183,7 @@ ipex_llm/transformers/npu_models/baichuan_mp.py,sha256=tHhO-0v5z6IhxsfzAPYWXVbLr
|
|
183
183
|
ipex_llm/transformers/npu_models/chatglm.py,sha256=YzpGLZ7ORt6qkwW9mCwZ_xhOAI8uHSDHJrmqWgNM234,10511
|
184
184
|
ipex_llm/transformers/npu_models/chatglm4.py,sha256=J4523DzhIzZxIvlf1V9qU4auzEGKvC80YqyxuCJygjw,9795
|
185
185
|
ipex_llm/transformers/npu_models/common.py,sha256=tTUJL7IxVrJSnXle6nla35wTUrBf2sOEt7Ya1qyMezY,4853
|
186
|
-
ipex_llm/transformers/npu_models/convert.py,sha256=
|
186
|
+
ipex_llm/transformers/npu_models/convert.py,sha256=2YAi8rvEYu_tvzpczKsJBsKjAns5FAPz1MntJTxIQC0,25472
|
187
187
|
ipex_llm/transformers/npu_models/convert_mp.py,sha256=Y6Fcde7bXHkZ0wvm8PymxJqvncbDj3ZjMez3SY9qi5U,24452
|
188
188
|
ipex_llm/transformers/npu_models/glm_edge.py,sha256=VsJex-6530h4ZQk35TxRe1MnttAHT41omE8LV47LgBE,6723
|
189
189
|
ipex_llm/transformers/npu_models/kv.py,sha256=2OSFO9Z6e4nGdVxXEM-Bq2qa_npYYbGmQt3lcCZxTlU,9201
|
@@ -208,11 +208,11 @@ ipex_llm/transformers/npu_models/xlm_mp.py,sha256=sj8OVun8xJprM7ZJp0XzWa55rqlSIz
|
|
208
208
|
ipex_llm/transformers/npu_pipeline_model/__init__.py,sha256=b2IXvVqQ5cItki021h8s3ymW12RPu8QNPprq4Mn3bDM,586
|
209
209
|
ipex_llm/transformers/npu_pipeline_model/baichuan.py,sha256=ICxRzFQ4OIANDkkVi2_4xOeQXmfFXYMx3H52KuE1xR4,6208
|
210
210
|
ipex_llm/transformers/npu_pipeline_model/common.py,sha256=QxJoJESpv0BpwO_FBeAT2wKA56wNFfen8iI37PrMKuA,7838
|
211
|
-
ipex_llm/transformers/npu_pipeline_model/convert_pipeline.py,sha256
|
212
|
-
ipex_llm/transformers/npu_pipeline_model/llama.py,sha256=
|
213
|
-
ipex_llm/transformers/npu_pipeline_model/minicpm.py,sha256=
|
211
|
+
ipex_llm/transformers/npu_pipeline_model/convert_pipeline.py,sha256=-eHNbRuX2QhYd0-jCyo2pZpHTZTZ108bhObYx8a3CJs,29494
|
212
|
+
ipex_llm/transformers/npu_pipeline_model/llama.py,sha256=pmAnawfAn0W8XSr8kGWfxR1HylCLa-Y6mKpFeX-m8UY,20892
|
213
|
+
ipex_llm/transformers/npu_pipeline_model/minicpm.py,sha256=H7j_UaHj-IwEBriQ-bunle0-8s2NmvqnL9eYuixnmFc,21398
|
214
214
|
ipex_llm/transformers/npu_pipeline_model/pipeline_cpp.py,sha256=JNmodAMg_NQvDILug3E_fGXEh6cd3wsj4bvAzcd-vaU,2749
|
215
|
-
ipex_llm/transformers/npu_pipeline_model/qwen.py,sha256=
|
215
|
+
ipex_llm/transformers/npu_pipeline_model/qwen.py,sha256=FAfoPlKEAxeU6-J8ltpSev5ithm9AC-urtreu6NGpME,15509
|
216
216
|
ipex_llm/utils/__init__.py,sha256=LlUgrD03rfw4iY8zWPtHH6p65Gw76waVOLHaqagETw0,1425
|
217
217
|
ipex_llm/utils/benchmark_util_4_29.py,sha256=OU1W1quiaiJGsg1pd3HM9O6PmVSaPA0HHE7R8hNTfmQ,258653
|
218
218
|
ipex_llm/utils/benchmark_util_4_42.py,sha256=HEiClCgKDp_T64HH8ulSTly8dvt6UwPDYZfrPVYvXcc,225383
|
@@ -248,11 +248,11 @@ ipex_llm/vllm/xpu/engine/__init__.py,sha256=pY_CpyuZd72fr6s32ejeKHKFW0K4vUU2rzZj
|
|
248
248
|
ipex_llm/vllm/xpu/engine/engine.py,sha256=k4-D27WS_Gk3mA--w3HWAjPjb4Aiu043MVPi0ZoAUBc,5984
|
249
249
|
ipex_llm/vllm/xpu/entrypoints/openai/api_server.py,sha256=GshTZFB8e4PWvqckfbmTOU6b0oLkNn7A-vzLuG9--j8,21544
|
250
250
|
ipex_llm/vllm/xpu/entrypoints/openai/cli_args.py,sha256=2rENA2ucynMaIjiZBEh2ez1o5vR32GaP514t39CD7KM,8676
|
251
|
-
ipex_llm-2.2.0b20250204.data/scripts/ipex-llm-init.bat,sha256=HPtCYuDYwEatq7dAwOvdfVcHYCpAVdbj75K1qh0vQek,2578
|
252
|
-
ipex_llm-2.2.0b20250204.data/scripts/llm-chat.ps1,sha256=6qrs-hGVAV8IKh7Jx8nq_XrnZcjd7qGU5wndArM7Yag,2769
|
253
|
-
ipex_llm-2.2.0b20250204.data/scripts/llm-cli.ps1,sha256=3qBtTLs_EjYDnM8YyCpJhzLnGCKTEGssu9UNqfkjVXs,3009
|
254
|
-
ipex_llm-2.2.0b20250204.dist-info/METADATA,sha256=
|
255
|
-
ipex_llm-2.2.0b20250204.dist-info/WHEEL,sha256=6iYPr8vTHsyDK75jr9X0V3I9wPSVmtwr_8fdATBciGk,98
|
256
|
-
ipex_llm-2.2.0b20250204.dist-info/entry_points.txt,sha256=TiUyBB2MRmfF3ko-pyAEzqeBCRnyhu27bNOAsWPp3e8,61
|
257
|
-
ipex_llm-2.2.0b20250204.dist-info/top_level.txt,sha256=CGCMHM-SyqUabU4h8RqJ2KTYckQUO3LvIWwmUQ6Qbzw,9
|
258
|
-
ipex_llm-2.2.0b20250204.dist-info/RECORD,,
|
251
|
+
ipex_llm-2.2.0b20250204.post0.data/scripts/ipex-llm-init.bat,sha256=HPtCYuDYwEatq7dAwOvdfVcHYCpAVdbj75K1qh0vQek,2578
|
252
|
+
ipex_llm-2.2.0b20250204.post0.data/scripts/llm-chat.ps1,sha256=6qrs-hGVAV8IKh7Jx8nq_XrnZcjd7qGU5wndArM7Yag,2769
|
253
|
+
ipex_llm-2.2.0b20250204.post0.data/scripts/llm-cli.ps1,sha256=3qBtTLs_EjYDnM8YyCpJhzLnGCKTEGssu9UNqfkjVXs,3009
|
254
|
+
ipex_llm-2.2.0b20250204.post0.dist-info/METADATA,sha256=v7-tush1os4a_HVuvzFajDBsfW_xq4VW8GhU--mIj8U,12343
|
255
|
+
ipex_llm-2.2.0b20250204.post0.dist-info/WHEEL,sha256=6iYPr8vTHsyDK75jr9X0V3I9wPSVmtwr_8fdATBciGk,98
|
256
|
+
ipex_llm-2.2.0b20250204.post0.dist-info/entry_points.txt,sha256=TiUyBB2MRmfF3ko-pyAEzqeBCRnyhu27bNOAsWPp3e8,61
|
257
|
+
ipex_llm-2.2.0b20250204.post0.dist-info/top_level.txt,sha256=CGCMHM-SyqUabU4h8RqJ2KTYckQUO3LvIWwmUQ6Qbzw,9
|
258
|
+
ipex_llm-2.2.0b20250204.post0.dist-info/RECORD,,
|
{ipex_llm-2.2.0b20250204.data → ipex_llm-2.2.0b20250204.post0.data}/scripts/ipex-llm-init.bat
RENAMED
File without changes
|
File without changes
|
File without changes
|
File without changes
|
{ipex_llm-2.2.0b20250204.dist-info → ipex_llm-2.2.0b20250204.post0.dist-info}/entry_points.txt
RENAMED
File without changes
|
File without changes
|