ipex-llm 2.2.0b20250205__py3-none-win_amd64.whl → 2.2.0b20250206__py3-none-win_amd64.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- ipex_llm/libs/bloom-api.dll +0 -0
- ipex_llm/libs/bloom.dll +0 -0
- ipex_llm/libs/gptneox-api.dll +0 -0
- ipex_llm/libs/gptneox.dll +0 -0
- ipex_llm/libs/libbloom_avx.dll +0 -0
- ipex_llm/libs/libbloom_vnni.dll +0 -0
- ipex_llm/libs/libgptneox_avx.dll +0 -0
- ipex_llm/libs/libgptneox_vnni.dll +0 -0
- ipex_llm/libs/libllama_avx.dll +0 -0
- ipex_llm/libs/libllama_vnni.dll +0 -0
- ipex_llm/libs/libstarcoder_avx.dll +0 -0
- ipex_llm/libs/libstarcoder_vnni.dll +0 -0
- ipex_llm/libs/llama-api.dll +0 -0
- ipex_llm/libs/llama.dll +0 -0
- ipex_llm/libs/main-bloom.exe +0 -0
- ipex_llm/libs/main-gptneox.exe +0 -0
- ipex_llm/libs/main-llama.exe +0 -0
- ipex_llm/libs/main-starcoder.exe +0 -0
- ipex_llm/libs/pipeline.dll +0 -0
- ipex_llm/libs/quantize-bloom.exe +0 -0
- ipex_llm/libs/quantize-bloom_vnni.exe +0 -0
- ipex_llm/libs/quantize-gptneox.exe +0 -0
- ipex_llm/libs/quantize-gptneox_vnni.exe +0 -0
- ipex_llm/libs/quantize-llama.exe +0 -0
- ipex_llm/libs/quantize-llama_vnni.exe +0 -0
- ipex_llm/libs/quantize-starcoder.exe +0 -0
- ipex_llm/libs/quantize-starcoder_vnni.exe +0 -0
- ipex_llm/libs/starcoder-api.dll +0 -0
- ipex_llm/libs/starcoder.dll +0 -0
- ipex_llm/transformers/low_bit_linear.py +5 -4
- ipex_llm/transformers/npu_model.py +17 -4
- ipex_llm/transformers/npu_models/convert.py +6 -2
- ipex_llm/transformers/npu_pipeline_model/convert_pipeline.py +27 -12
- ipex_llm/transformers/npu_pipeline_model/llama.py +24 -11
- ipex_llm/transformers/npu_pipeline_model/minicpm.py +19 -10
- ipex_llm/transformers/npu_pipeline_model/qwen.py +17 -8
- ipex_llm/transformers/qlora.py +2 -2
- ipex_llm/transformers/utils.py +19 -6
- ipex_llm/transformers/xpu_customize_fwd.py +6 -4
- {ipex_llm-2.2.0b20250205.dist-info → ipex_llm-2.2.0b20250206.dist-info}/METADATA +20 -20
- {ipex_llm-2.2.0b20250205.dist-info → ipex_llm-2.2.0b20250206.dist-info}/RECORD +47 -47
- {ipex_llm-2.2.0b20250205.data → ipex_llm-2.2.0b20250206.data}/scripts/ipex-llm-init.bat +0 -0
- {ipex_llm-2.2.0b20250205.data → ipex_llm-2.2.0b20250206.data}/scripts/llm-chat.ps1 +0 -0
- {ipex_llm-2.2.0b20250205.data → ipex_llm-2.2.0b20250206.data}/scripts/llm-cli.ps1 +0 -0
- {ipex_llm-2.2.0b20250205.dist-info → ipex_llm-2.2.0b20250206.dist-info}/WHEEL +0 -0
- {ipex_llm-2.2.0b20250205.dist-info → ipex_llm-2.2.0b20250206.dist-info}/entry_points.txt +0 -0
- {ipex_llm-2.2.0b20250205.dist-info → ipex_llm-2.2.0b20250206.dist-info}/top_level.txt +0 -0
ipex_llm/libs/bloom-api.dll
CHANGED
Binary file
|
ipex_llm/libs/bloom.dll
CHANGED
Binary file
|
ipex_llm/libs/gptneox-api.dll
CHANGED
Binary file
|
ipex_llm/libs/gptneox.dll
CHANGED
Binary file
|
ipex_llm/libs/libbloom_avx.dll
CHANGED
Binary file
|
ipex_llm/libs/libbloom_vnni.dll
CHANGED
Binary file
|
ipex_llm/libs/libgptneox_avx.dll
CHANGED
Binary file
|
Binary file
|
ipex_llm/libs/libllama_avx.dll
CHANGED
Binary file
|
ipex_llm/libs/libllama_vnni.dll
CHANGED
Binary file
|
Binary file
|
Binary file
|
ipex_llm/libs/llama-api.dll
CHANGED
Binary file
|
ipex_llm/libs/llama.dll
CHANGED
Binary file
|
ipex_llm/libs/main-bloom.exe
CHANGED
Binary file
|
ipex_llm/libs/main-gptneox.exe
CHANGED
Binary file
|
ipex_llm/libs/main-llama.exe
CHANGED
Binary file
|
ipex_llm/libs/main-starcoder.exe
CHANGED
Binary file
|
ipex_llm/libs/pipeline.dll
CHANGED
Binary file
|
ipex_llm/libs/quantize-bloom.exe
CHANGED
Binary file
|
Binary file
|
Binary file
|
Binary file
|
ipex_llm/libs/quantize-llama.exe
CHANGED
Binary file
|
Binary file
|
Binary file
|
Binary file
|
ipex_llm/libs/starcoder-api.dll
CHANGED
Binary file
|
ipex_llm/libs/starcoder.dll
CHANGED
Binary file
|
@@ -51,7 +51,8 @@ from torch import Tensor, dtype, nn
|
|
51
51
|
from operator import mul
|
52
52
|
from functools import reduce
|
53
53
|
from ipex_llm.transformers.xpu_customize_fwd import custom_fwd, custom_bwd
|
54
|
-
from ipex_llm.transformers.utils import
|
54
|
+
from ipex_llm.transformers.utils import is_autocast_enabled, get_autocast_dtype
|
55
|
+
from ipex_llm.transformers.utils import get_xpu_device_name
|
55
56
|
from ipex_llm.transformers.convert import is_deepspeed_available, get_use_vllm
|
56
57
|
|
57
58
|
T = TypeVar("T", bound="torch.nn.Module")
|
@@ -527,8 +528,8 @@ class MatMulLowBit(torch.autograd.Function):
|
|
527
528
|
A, weight = ctx.tensors
|
528
529
|
grad_A, grad_weight = None, None
|
529
530
|
if req_gradA:
|
530
|
-
if
|
531
|
-
grad_output = grad_output.to(
|
531
|
+
if is_autocast_enabled("xpu"):
|
532
|
+
grad_output = grad_output.to(get_autocast_dtype("xpu"))
|
532
533
|
if weight.qtype == NF4:
|
533
534
|
dequant_weight = xe_linear.dequant(A,
|
534
535
|
weight.data.view(torch.uint8),
|
@@ -615,7 +616,7 @@ class LowBitLinear(nn.Linear):
|
|
615
616
|
is_training = self.training and not torch.is_inference_mode_enabled()
|
616
617
|
if is_training:
|
617
618
|
# below logic is only for training
|
618
|
-
autocast_dtype = get_autocast_dtype(x)
|
619
|
+
autocast_dtype = get_autocast_dtype(x.device.type)
|
619
620
|
if self.compute_dtype is not None and x.device.type == "xpu":
|
620
621
|
x = x.to(self.compute_dtype) # solve GC issue for unlora module
|
621
622
|
elif autocast_dtype is not None:
|
@@ -139,8 +139,10 @@ class _BaseAutoModelClass:
|
|
139
139
|
mock_device = kwargs.pop('device', None) # For mock on CPU
|
140
140
|
convert_model = kwargs.pop('convert_model', False)
|
141
141
|
save_directory = kwargs.pop('save_directory', None)
|
142
|
-
fuse_layers = kwargs.pop(
|
143
|
-
imatrix_file = kwargs.pop(
|
142
|
+
fuse_layers = kwargs.pop("fuse_layers", None)
|
143
|
+
imatrix_file = kwargs.pop("imatrix_file", None)
|
144
|
+
keep_ir = kwargs.pop("keep_ir", False)
|
145
|
+
compile_blob = kwargs.pop("compile_blob", True)
|
144
146
|
|
145
147
|
if imatrix_file is not None:
|
146
148
|
imatrix_data = load_imatrix_data(imatrix_file)
|
@@ -236,6 +238,8 @@ class _BaseAutoModelClass:
|
|
236
238
|
"fuse_layers": fuse_layers,
|
237
239
|
"imatrix_data": imatrix_data,
|
238
240
|
"skip_npu_logic": mock_device == "dummy",
|
241
|
+
"keep_ir": keep_ir,
|
242
|
+
"compile_blob": compile_blob,
|
239
243
|
}
|
240
244
|
# Dummy will skip npu related logic and save the quantized model
|
241
245
|
if mock_device == "dummy":
|
@@ -280,9 +284,14 @@ class _BaseAutoModelClass:
|
|
280
284
|
fuse_layers = kwargs.pop('fuse_layers', None)
|
281
285
|
imatrix_data = kwargs.pop('imatrix_data', None)
|
282
286
|
skip_npu_logic = kwargs.pop("skip_npu_logic", False)
|
287
|
+
keep_ir = kwargs.pop("keep_ir", False)
|
288
|
+
compile_blob = kwargs.pop("compile_blob", True)
|
289
|
+
|
283
290
|
invalidInputError(save_directory is not None,
|
284
291
|
"Please provide the path to save converted model "
|
285
292
|
"through `save_directory`.")
|
293
|
+
invalidInputError(keep_ir or compile_blob,
|
294
|
+
"Please save blob or save IR either.")
|
286
295
|
|
287
296
|
if hasattr(model, "llm"):
|
288
297
|
llm = model.llm
|
@@ -323,7 +332,9 @@ class _BaseAutoModelClass:
|
|
323
332
|
qtype=qtype,
|
324
333
|
save_directory=save_directory,
|
325
334
|
fuse_layers=fuse_layers,
|
326
|
-
has_llm=hasattr(model, "llm")
|
335
|
+
has_llm=hasattr(model, "llm"),
|
336
|
+
keep_ir=keep_ir,
|
337
|
+
compile_blob=compile_blob
|
327
338
|
)
|
328
339
|
else:
|
329
340
|
optimize_llm(
|
@@ -346,7 +357,9 @@ class _BaseAutoModelClass:
|
|
346
357
|
qtype=qtype,
|
347
358
|
convert_model=convert_model,
|
348
359
|
save_directory=save_directory,
|
349
|
-
fuse_layers=fuse_layers
|
360
|
+
fuse_layers=fuse_layers,
|
361
|
+
keep_ir=keep_ir,
|
362
|
+
compile_blob=compile_blob)
|
350
363
|
model.save_low_bit = types.MethodType(save_low_bit, model)
|
351
364
|
model.save_low_bit(save_directory)
|
352
365
|
logger.info(f"Converted model has already saved to {save_directory}.")
|
@@ -450,7 +450,9 @@ def optimize_llm_single_process(
|
|
450
450
|
qtype: str,
|
451
451
|
save_directory: str,
|
452
452
|
fuse_layers: int=None,
|
453
|
-
has_llm: bool=False
|
453
|
+
has_llm: bool=False,
|
454
|
+
keep_ir: bool=False,
|
455
|
+
compile_blob: bool=True
|
454
456
|
):
|
455
457
|
from ipex_llm.transformers.npu_pipeline_model.convert_pipeline import convert_llm
|
456
458
|
from .npu_llm_cpp import load_model_from_file
|
@@ -463,7 +465,9 @@ def optimize_llm_single_process(
|
|
463
465
|
qtype=qtype,
|
464
466
|
convert_model=True,
|
465
467
|
save_directory=save_directory,
|
466
|
-
fuse_layers=fuse_layers
|
468
|
+
fuse_layers=fuse_layers,
|
469
|
+
keep_ir=keep_ir,
|
470
|
+
compile_blob=compile_blob)
|
467
471
|
try:
|
468
472
|
model_ptr = load_model_from_file(save_directory)
|
469
473
|
model.kv_len = kv_len
|
@@ -196,7 +196,9 @@ def convert_llm(model: torch.nn.Module,
|
|
196
196
|
qtype: str,
|
197
197
|
convert_model: bool=False,
|
198
198
|
save_directory: str=None,
|
199
|
-
fuse_layers: int=None
|
199
|
+
fuse_layers: int=None,
|
200
|
+
keep_ir: bool=False,
|
201
|
+
compile_blob: bool=True):
|
200
202
|
# whether to set layernorm weight as const
|
201
203
|
layernorm_const = os.environ.get("IPEX_LLM_NPU_LAYERNORM_CONST", "1") == "1"
|
202
204
|
if group_size == 0:
|
@@ -220,7 +222,9 @@ def convert_llm(model: torch.nn.Module,
|
|
220
222
|
n_splits_down_proj,
|
221
223
|
group_size,
|
222
224
|
save_directory,
|
223
|
-
fuse_layers=fuse_layers
|
225
|
+
fuse_layers=fuse_layers,
|
226
|
+
keep_ir=keep_ir,
|
227
|
+
compile_blob=compile_blob)
|
224
228
|
return 0
|
225
229
|
if model.config.model_type == "llama":
|
226
230
|
with tempfile.TemporaryDirectory() as temp_dir:
|
@@ -428,7 +432,9 @@ def convert_llm_for_deploy(model: torch.nn.Module,
|
|
428
432
|
n_splits_down_proj: int,
|
429
433
|
group_size: int,
|
430
434
|
save_directory: str=None,
|
431
|
-
fuse_layers: int=None
|
435
|
+
fuse_layers: int=None,
|
436
|
+
keep_ir: bool=False,
|
437
|
+
compile_blob: bool=True):
|
432
438
|
if not os.path.exists(save_directory):
|
433
439
|
os.mkdir(save_directory)
|
434
440
|
weight_dir = os.path.join(save_directory, "model_weights")
|
@@ -479,14 +485,17 @@ def convert_llm_for_deploy(model: torch.nn.Module,
|
|
479
485
|
# save fused_layers blobs of fused decoder layers
|
480
486
|
convert_fused_qwen_layer(model, fused_layers, n_splits_linear, n_splits_down_proj,
|
481
487
|
save_directory, weight_dir, transpose_value_cache, kv_len,
|
482
|
-
group_size, layernorm_const, "decode"
|
488
|
+
group_size, layernorm_const, "decode",
|
489
|
+
keep_ir=keep_ir, compile_blob=compile_blob)
|
483
490
|
# save blob of single prefill layer
|
484
491
|
convert_qwen_layer(model, 0, n_splits_linear, n_splits_down_proj,
|
485
492
|
save_directory, weight_dir, transpose_value_cache, max_prompt_len,
|
486
|
-
group_size, layernorm_const, "prefill"
|
493
|
+
group_size, layernorm_const, "prefill",
|
494
|
+
keep_ir=keep_ir, compile_blob=compile_blob)
|
487
495
|
# save blob of lmhead and bin of embedding
|
488
496
|
convert_lm_head_and_embedding(model, save_directory, weight_dir,
|
489
|
-
convert_model=True, group_size=group_size
|
497
|
+
convert_model=True, group_size=group_size,
|
498
|
+
keep_ir=keep_ir, compile_blob=compile_blob)
|
490
499
|
elif model.config.model_type == "llama":
|
491
500
|
embedding_post = False
|
492
501
|
cos_sin_input = False
|
@@ -540,15 +549,18 @@ def convert_llm_for_deploy(model: torch.nn.Module,
|
|
540
549
|
convert_lm_head_and_embedding(model, n_splits_linear,
|
541
550
|
save_directory, weight_dir,
|
542
551
|
convert_model=True,
|
543
|
-
max_prompt_len=max_prompt_len
|
552
|
+
max_prompt_len=max_prompt_len,
|
553
|
+
keep_ir=keep_ir, compile_blob=compile_blob)
|
544
554
|
# save fused_layers blobs of fused decoder layers
|
545
555
|
convert_fused_llama_layer(model, fused_layers, n_splits_linear, n_splits_down_proj,
|
546
556
|
save_directory, weight_dir, transpose_value_cache, kv_len,
|
547
|
-
group_size, layernorm_const, "decode"
|
557
|
+
group_size, layernorm_const, "decode",
|
558
|
+
keep_ir=keep_ir, compile_blob=compile_blob)
|
548
559
|
# save blob of single prefill layer
|
549
560
|
convert_llama_layer(model, 0, n_splits_linear, n_splits_down_proj,
|
550
561
|
save_directory, weight_dir, transpose_value_cache, max_prompt_len,
|
551
|
-
group_size, layernorm_const, "prefill"
|
562
|
+
group_size, layernorm_const, "prefill",
|
563
|
+
keep_ir=keep_ir, compile_blob=compile_blob)
|
552
564
|
elif model.config.model_type == "minicpm":
|
553
565
|
if group_size == 0:
|
554
566
|
fused_layers = 4 if fuse_layers is None else fuse_layers
|
@@ -577,16 +589,19 @@ def convert_llm_for_deploy(model: torch.nn.Module,
|
|
577
589
|
# save fused_layers blobs of fused decoder layers
|
578
590
|
convert_fused_minicpm_layer(model, fused_layers, n_splits_linear, n_splits_down_proj,
|
579
591
|
save_directory, weight_dir, transpose_value_cache, kv_len,
|
580
|
-
group_size, layernorm_const, "decode"
|
592
|
+
group_size, layernorm_const, "decode",
|
593
|
+
keep_ir=keep_ir, compile_blob=compile_blob)
|
581
594
|
# save blob of single prefill layer
|
582
595
|
convert_minicpm_layer(model, 0, n_splits_linear, n_splits_down_proj,
|
583
596
|
save_directory, weight_dir, transpose_value_cache, max_prompt_len,
|
584
|
-
group_size, layernorm_const, "prefill"
|
597
|
+
group_size, layernorm_const, "prefill",
|
598
|
+
keep_ir=keep_ir, compile_blob=compile_blob)
|
585
599
|
# save blob of lmhead and bin of embedding and embedding_post
|
586
600
|
convert_lm_head_and_embedding(model, n_splits_linear,
|
587
601
|
save_directory, weight_dir,
|
588
602
|
convert_model=True,
|
589
|
-
max_prompt_len=max_prompt_len
|
603
|
+
max_prompt_len=max_prompt_len,
|
604
|
+
keep_ir=keep_ir, compile_blob=compile_blob)
|
590
605
|
|
591
606
|
model.config.update(update_dict)
|
592
607
|
model.config.save_pretrained(save_directory)
|
@@ -123,7 +123,8 @@ class Llama32PostEmbedding(NNFactory):
|
|
123
123
|
|
124
124
|
|
125
125
|
def convert_lm_head_and_embedding(model, n_splits_linear, temp_dir, weight_dir,
|
126
|
-
convert_model=False, max_prompt_len=1
|
126
|
+
convert_model=False, max_prompt_len=1,
|
127
|
+
keep_ir=False, compile_blob=True):
|
127
128
|
num_heads = model.model.layers[0].self_attn.num_heads
|
128
129
|
num_key_value_heads = model.model.layers[0].self_attn.num_key_value_heads
|
129
130
|
head_dim = model.model.layers[0].self_attn.head_dim
|
@@ -175,7 +176,8 @@ def convert_lm_head_and_embedding(model, n_splits_linear, temp_dir, weight_dir,
|
|
175
176
|
asym=asym
|
176
177
|
)
|
177
178
|
last_blob_path = update_names_of_IR_and_export_blob(new_lm_head, "lm_head", temp_dir,
|
178
|
-
|
179
|
+
keep_ir=keep_ir, compile_blob=compile_blob)
|
180
|
+
os.remove(os.path.join(temp_dir, "lm_head.bin"))
|
179
181
|
|
180
182
|
# save weights bins files
|
181
183
|
if n_splits_linear == 1:
|
@@ -211,7 +213,9 @@ def convert_lm_head_and_embedding(model, n_splits_linear, temp_dir, weight_dir,
|
|
211
213
|
first_blob_path = None
|
212
214
|
else:
|
213
215
|
first_blob_path = update_names_of_IR_and_export_blob(new_embedding, "embedding",
|
214
|
-
temp_dir,
|
216
|
+
temp_dir, keep_ir=keep_ir,
|
217
|
+
compile_blob=compile_blob)
|
218
|
+
os.remove(os.path.join(temp_dir, "embedding.bin"))
|
215
219
|
else:
|
216
220
|
# llama-3.2-3B & llama-3.2-1B
|
217
221
|
embedding_layer = model.model.embed_tokens
|
@@ -235,22 +239,28 @@ def convert_lm_head_and_embedding(model, n_splits_linear, temp_dir, weight_dir,
|
|
235
239
|
attention_scaling=attention_scaling,
|
236
240
|
input_len=1)
|
237
241
|
update_names_of_IR_and_export_blob(embedding_post, "embedding_post",
|
238
|
-
temp_dir,
|
242
|
+
temp_dir, keep_ir=keep_ir, compile_blob=compile_blob)
|
239
243
|
embedding_post_prefill = Llama32PostEmbedding(inv_freq=inv_freq,
|
240
244
|
attention_scaling=attention_scaling,
|
241
245
|
input_len=max_prompt_len)
|
242
246
|
update_names_of_IR_and_export_blob(embedding_post_prefill,
|
243
247
|
"embedding_post_prefill",
|
244
|
-
temp_dir,
|
248
|
+
temp_dir, keep_ir=keep_ir, compile_blob=compile_blob)
|
249
|
+
os.remove(os.path.join(temp_dir, "embedding_post.bin"))
|
250
|
+
os.remove(os.path.join(temp_dir, "embedding_post_prefill.bin"))
|
245
251
|
else:
|
246
252
|
first_blob_path = update_names_of_IR_and_export_blob(new_embedding, "embedding",
|
247
|
-
temp_dir
|
253
|
+
temp_dir, keep_ir=keep_ir,
|
254
|
+
compile_blob=compile_blob)
|
255
|
+
os.remove(os.path.join(temp_dir, "embedding.bin"))
|
256
|
+
|
248
257
|
return first_blob_path, last_blob_path
|
249
258
|
|
250
259
|
|
251
260
|
def convert_llama_layer(model, layer_idx, n_splits_linear, n_splits_down_proj,
|
252
261
|
temp_dir, weight_dir, transpose_value_cache, kv_len, group_size,
|
253
|
-
layernorm_const, mode="decode"
|
262
|
+
layernorm_const, mode="decode",
|
263
|
+
keep_ir=False, compile_blob=True):
|
254
264
|
num_heads = model.model.layers[0].self_attn.num_heads
|
255
265
|
num_key_value_heads = model.model.layers[0].self_attn.num_key_value_heads
|
256
266
|
head_dim = model.model.layers[0].self_attn.head_dim
|
@@ -317,8 +327,9 @@ def convert_llama_layer(model, layer_idx, n_splits_linear, n_splits_down_proj,
|
|
317
327
|
rest_blob_path = update_names_of_IR_and_export_blob(single_decoder,
|
318
328
|
decoder_name,
|
319
329
|
temp_dir,
|
320
|
-
|
330
|
+
keep_ir=keep_ir, compile_blob=compile_blob,
|
321
331
|
npu_dpu_groups=npu_dpu_groups)
|
332
|
+
os.remove(os.path.join(temp_dir, decoder_name + ".bin"))
|
322
333
|
|
323
334
|
if mode == "decode":
|
324
335
|
if hasattr(curr_layer.self_attn.rotary_emb, "cos_cached"):
|
@@ -364,7 +375,8 @@ def convert_llama_layer(model, layer_idx, n_splits_linear, n_splits_down_proj,
|
|
364
375
|
|
365
376
|
def convert_fused_llama_layer(model, fused_layers, n_splits_linear, n_splits_down_proj,
|
366
377
|
save_dir, weight_dir, transpose_value_cache, kv_len, group_size,
|
367
|
-
layernorm_const, mode="decode"
|
378
|
+
layernorm_const, mode="decode",
|
379
|
+
keep_ir=False, compile_blob=True):
|
368
380
|
num_heads = model.model.layers[0].self_attn.num_heads
|
369
381
|
num_key_value_heads = model.model.layers[0].self_attn.num_key_value_heads
|
370
382
|
head_dim = model.model.layers[0].self_attn.head_dim
|
@@ -457,6 +469,7 @@ def convert_fused_llama_layer(model, fused_layers, n_splits_linear, n_splits_dow
|
|
457
469
|
update_names_of_IR_and_export_blob(fused_decoder,
|
458
470
|
f"decoder_layer_{i}",
|
459
471
|
save_dir,
|
460
|
-
|
461
|
-
|
472
|
+
keep_ir=keep_ir,
|
473
|
+
compile_blob=compile_blob)
|
474
|
+
os.remove(os.path.join(save_dir, f"decoder_layer_{i}" + ".bin"))
|
462
475
|
return 0
|
@@ -162,7 +162,8 @@ class MiniCPMLMHead(LLMBaseNNFactory):
|
|
162
162
|
|
163
163
|
|
164
164
|
def convert_lm_head_and_embedding(model, n_splits_linear, temp_dir, weight_dir,
|
165
|
-
convert_model=False, max_prompt_len=1
|
165
|
+
convert_model=False, max_prompt_len=1,
|
166
|
+
keep_ir=False, compile_blob=True):
|
166
167
|
num_heads = model.model.layers[0].self_attn.num_heads
|
167
168
|
num_key_value_heads = model.model.layers[0].self_attn.num_key_value_heads
|
168
169
|
head_dim = model.model.layers[0].self_attn.head_dim
|
@@ -230,7 +231,8 @@ def convert_lm_head_and_embedding(model, n_splits_linear, temp_dir, weight_dir,
|
|
230
231
|
asym=asym
|
231
232
|
)
|
232
233
|
last_blob_path = update_names_of_IR_and_export_blob(new_lm_head, "lm_head", temp_dir,
|
233
|
-
|
234
|
+
keep_ir=keep_ir, compile_blob=compile_blob)
|
235
|
+
os.remove(os.path.join(temp_dir, "lm_head.bin"))
|
234
236
|
|
235
237
|
# save weights bins files
|
236
238
|
if n_splits_linear == 1:
|
@@ -280,22 +282,27 @@ def convert_lm_head_and_embedding(model, n_splits_linear, temp_dir, weight_dir,
|
|
280
282
|
dtype=np.float16,
|
281
283
|
scale_emb=model.config.scale_emb)
|
282
284
|
update_names_of_IR_and_export_blob(embedding_post, "embedding_post",
|
283
|
-
temp_dir,
|
285
|
+
temp_dir, keep_ir=keep_ir, compile_blob=compile_blob)
|
284
286
|
embedding_post_prefill = MiniCPMPostEmbedding(max_prompt_len, model.config.hidden_size,
|
285
287
|
dtype=np.float16,
|
286
288
|
scale_emb=model.config.scale_emb)
|
287
289
|
update_names_of_IR_and_export_blob(embedding_post_prefill,
|
288
290
|
"embedding_post_prefill",
|
289
|
-
temp_dir,
|
291
|
+
temp_dir, keep_ir=keep_ir, compile_blob=compile_blob)
|
292
|
+
os.remove(os.path.join(temp_dir, "embedding_post.bin"))
|
293
|
+
os.remove(os.path.join(temp_dir, "embedding_post_prefill.bin"))
|
290
294
|
else:
|
291
295
|
first_blob_path = update_names_of_IR_and_export_blob(new_embedding, "embedding",
|
292
|
-
temp_dir,
|
296
|
+
temp_dir, keep_ir=keep_ir,
|
297
|
+
compile_blob=compile_blob)
|
298
|
+
os.remove(os.path.join(temp_dir, "embedding.bin"))
|
293
299
|
return first_blob_path, last_blob_path
|
294
300
|
|
295
301
|
|
296
302
|
def convert_minicpm_layer(model, layer_idx, n_splits_linear, n_splits_down_proj,
|
297
303
|
temp_dir, weight_dir, transpose_value_cache, kv_len, group_size,
|
298
|
-
layernorm_const, mode="decode"
|
304
|
+
layernorm_const, mode="decode",
|
305
|
+
keep_ir=False, compile_blob=True):
|
299
306
|
num_heads = model.model.layers[0].self_attn.num_heads
|
300
307
|
num_key_value_heads = model.model.layers[0].self_attn.num_key_value_heads
|
301
308
|
head_dim = model.model.layers[0].self_attn.head_dim
|
@@ -353,7 +360,8 @@ def convert_minicpm_layer(model, layer_idx, n_splits_linear, n_splits_down_proj,
|
|
353
360
|
rest_blob_path = update_names_of_IR_and_export_blob(single_decoder,
|
354
361
|
decoder_name,
|
355
362
|
temp_dir,
|
356
|
-
|
363
|
+
keep_ir=keep_ir, compile_blob=compile_blob)
|
364
|
+
os.remove(os.path.join(temp_dir, decoder_name + ".bin"))
|
357
365
|
|
358
366
|
if mode == "decode":
|
359
367
|
if layernorm_const:
|
@@ -386,7 +394,8 @@ def convert_minicpm_layer(model, layer_idx, n_splits_linear, n_splits_down_proj,
|
|
386
394
|
|
387
395
|
def convert_fused_minicpm_layer(model, fused_layers, n_splits_linear, n_splits_down_proj,
|
388
396
|
save_dir, weight_dir, transpose_value_cache, kv_len, group_size,
|
389
|
-
layernorm_const, mode="decode"
|
397
|
+
layernorm_const, mode="decode",
|
398
|
+
keep_ir=False, compile_blob=True):
|
390
399
|
num_heads = model.model.layers[0].self_attn.num_heads
|
391
400
|
num_key_value_heads = model.model.layers[0].self_attn.num_key_value_heads
|
392
401
|
head_dim = model.model.layers[0].self_attn.head_dim
|
@@ -477,6 +486,6 @@ def convert_fused_minicpm_layer(model, fused_layers, n_splits_linear, n_splits_d
|
|
477
486
|
update_names_of_IR_and_export_blob(fused_decoder,
|
478
487
|
f"decoder_layer_{i}",
|
479
488
|
save_dir,
|
480
|
-
compile_blob=
|
481
|
-
|
489
|
+
keep_ir=keep_ir, compile_blob=compile_blob)
|
490
|
+
os.remove(os.path.join(save_dir, f"decoder_layer_{i}" + ".bin"))
|
482
491
|
return 0
|
@@ -24,7 +24,8 @@ from ipex_llm.transformers.npu_models.lm_head import SlicedLMHead
|
|
24
24
|
|
25
25
|
|
26
26
|
def convert_lm_head_and_embedding(model, temp_dir, weight_dir,
|
27
|
-
convert_model=False, group_size=0
|
27
|
+
convert_model=False, group_size=0,
|
28
|
+
keep_ir=False, compile_blob=True):
|
28
29
|
num_heads = model.model.layers[0].self_attn.num_heads
|
29
30
|
head_dim = model.model.layers[0].self_attn.head_dim
|
30
31
|
rms_norm_eps = model.config.rms_norm_eps
|
@@ -84,7 +85,9 @@ def convert_lm_head_and_embedding(model, temp_dir, weight_dir,
|
|
84
85
|
)
|
85
86
|
|
86
87
|
last_blob_path = update_names_of_IR_and_export_blob(new_lm_head, f"lm_head",
|
87
|
-
temp_dir,
|
88
|
+
temp_dir, keep_ir=keep_ir,
|
89
|
+
compile_blob=compile_blob)
|
90
|
+
os.remove(os.path.join(temp_dir, "lm_head.bin"))
|
88
91
|
|
89
92
|
# save weights bins files
|
90
93
|
if not isinstance(lm_head, SlicedLMHead):
|
@@ -119,13 +122,16 @@ def convert_lm_head_and_embedding(model, temp_dir, weight_dir,
|
|
119
122
|
first_blob_path = True
|
120
123
|
else:
|
121
124
|
first_blob_path = update_names_of_IR_and_export_blob(new_embedding, f"embedding",
|
122
|
-
temp_dir,
|
125
|
+
temp_dir, keep_ir=keep_ir,
|
126
|
+
compile_blob=compile_blob)
|
127
|
+
os.remove(os.path.join(temp_dir, "embedding.bin"))
|
123
128
|
return first_blob_path, last_blob_path
|
124
129
|
|
125
130
|
|
126
131
|
def convert_qwen_layer(model, layer_idx, n_splits_linear, n_splits_down_proj,
|
127
132
|
temp_dir, weight_dir, transpose_value_cache, kv_len, group_size,
|
128
|
-
layernorm_const, mode="decode"
|
133
|
+
layernorm_const, mode="decode",
|
134
|
+
keep_ir=False, compile_blob=True):
|
129
135
|
num_heads = model.model.layers[0].self_attn.num_heads
|
130
136
|
num_key_value_heads = model.model.layers[0].self_attn.num_key_value_heads
|
131
137
|
head_dim = model.model.layers[0].self_attn.head_dim
|
@@ -183,8 +189,10 @@ def convert_qwen_layer(model, layer_idx, n_splits_linear, n_splits_down_proj,
|
|
183
189
|
)
|
184
190
|
rest_blob_path = update_names_of_IR_and_export_blob(single_decoder,
|
185
191
|
decoder_name,
|
186
|
-
temp_dir,
|
192
|
+
temp_dir, keep_ir=keep_ir,
|
193
|
+
compile_blob=compile_blob,
|
187
194
|
npu_dpu_groups=npu_dpu_groups)
|
195
|
+
os.remove(os.path.join(temp_dir, decoder_name + ".bin"))
|
188
196
|
|
189
197
|
# 0, 1, 2 are input_embed/attention_mask/position_id
|
190
198
|
if mode == "decode":
|
@@ -226,7 +234,8 @@ def convert_qwen_layer(model, layer_idx, n_splits_linear, n_splits_down_proj,
|
|
226
234
|
|
227
235
|
def convert_fused_qwen_layer(model, fused_layers, n_splits_linear, n_splits_down_proj,
|
228
236
|
save_dir, weight_dir, transpose_value_cache, kv_len, group_size,
|
229
|
-
layernorm_const, mode="decode"
|
237
|
+
layernorm_const, mode="decode",
|
238
|
+
keep_ir=False, compile_blob=True):
|
230
239
|
num_heads = model.model.layers[0].self_attn.num_heads
|
231
240
|
num_key_value_heads = model.model.layers[0].self_attn.num_key_value_heads
|
232
241
|
head_dim = model.model.layers[0].self_attn.head_dim
|
@@ -330,6 +339,6 @@ def convert_fused_qwen_layer(model, fused_layers, n_splits_linear, n_splits_down
|
|
330
339
|
update_names_of_IR_and_export_blob(fused_decoder,
|
331
340
|
f"decoder_layer_{i}",
|
332
341
|
save_dir,
|
333
|
-
compile_blob=
|
334
|
-
|
342
|
+
keep_ir=keep_ir, compile_blob=compile_blob)
|
343
|
+
os.remove(os.path.join(save_dir, f"decoder_layer_{i}" + ".bin"))
|
335
344
|
return 0
|
ipex_llm/transformers/qlora.py
CHANGED
@@ -109,7 +109,7 @@ class LoraLowBitLinear(Module, LoraLayer):
|
|
109
109
|
self.qa_pool = torch.nn.Identity()
|
110
110
|
|
111
111
|
def forward(self, x: torch.Tensor):
|
112
|
-
autocast_dtype = get_autocast_dtype(x)
|
112
|
+
autocast_dtype = get_autocast_dtype(x.device.type)
|
113
113
|
if x.device.type == "xpu":
|
114
114
|
# force to use bf16 on gpu
|
115
115
|
x = x.to(torch.bfloat16)
|
@@ -177,7 +177,7 @@ class LoraBF16Linear(Module, LoraLayer):
|
|
177
177
|
self.is_target_conv_1d_layer = is_target_conv_1d_layer
|
178
178
|
|
179
179
|
def forward(self, x: torch.Tensor):
|
180
|
-
autocast_dtype = get_autocast_dtype(x)
|
180
|
+
autocast_dtype = get_autocast_dtype(x.device.type)
|
181
181
|
if x.device.type == "xpu":
|
182
182
|
# force to use bf16 on gpu
|
183
183
|
x = x.to(torch.bfloat16)
|
ipex_llm/transformers/utils.py
CHANGED
@@ -138,26 +138,39 @@ def fix_key(key):
|
|
138
138
|
return key
|
139
139
|
|
140
140
|
|
141
|
-
def
|
141
|
+
def is_autocast_enabled(device_type: str):
|
142
142
|
if torch.__version__ >= '2.3':
|
143
|
-
|
144
|
-
|
143
|
+
return torch.is_autocast_enabled(device_type)
|
144
|
+
else:
|
145
|
+
if device_type == "xpu":
|
146
|
+
return torch.xpu.is_autocast_xpu_enabled()
|
147
|
+
elif device_type == "cpu":
|
148
|
+
return torch.is_autocast_cpu_enabled()
|
149
|
+
else:
|
150
|
+
invalidInputError(False,
|
151
|
+
f"Device type {device_type} is not supported.")
|
152
|
+
|
153
|
+
|
154
|
+
def get_autocast_dtype(device_type: str):
|
155
|
+
if torch.__version__ >= '2.3':
|
156
|
+
if torch.is_autocast_enabled(device_type):
|
157
|
+
return torch.get_autocast_dtype(device_type)
|
145
158
|
else:
|
146
159
|
return None
|
147
160
|
else:
|
148
|
-
if
|
161
|
+
if device_type == "xpu":
|
149
162
|
if torch.xpu.is_autocast_xpu_enabled():
|
150
163
|
return torch.xpu.get_autocast_xpu_dtype()
|
151
164
|
else:
|
152
165
|
return None
|
153
|
-
elif
|
166
|
+
elif device_type == "cpu":
|
154
167
|
if torch.is_autocast_cpu_enabled():
|
155
168
|
return torch.get_autocast_cpu_dtype()
|
156
169
|
else:
|
157
170
|
return None
|
158
171
|
else:
|
159
172
|
invalidInputError(False,
|
160
|
-
f"Device {
|
173
|
+
f"Device type {device_type} is not supported.")
|
161
174
|
|
162
175
|
|
163
176
|
def get_xpu_device_name(device: torch.device):
|
@@ -107,6 +107,8 @@ except ModuleNotFoundError:
|
|
107
107
|
np = None # type: ignore[assignment]
|
108
108
|
from typing import Any
|
109
109
|
|
110
|
+
from ipex_llm.transformers.utils import is_autocast_enabled, get_autocast_dtype
|
111
|
+
|
110
112
|
|
111
113
|
def _cast(value, dtype):
|
112
114
|
if isinstance(value, torch.Tensor):
|
@@ -155,12 +157,12 @@ def custom_fwd(fwd=None, *, cast_inputs=None):
|
|
155
157
|
|
156
158
|
@functools.wraps(fwd)
|
157
159
|
def decorate_fwd(*args, **kwargs):
|
158
|
-
args[0]._dtype =
|
160
|
+
args[0]._dtype = get_autocast_dtype("xpu")
|
159
161
|
if cast_inputs is None:
|
160
|
-
args[0]._fwd_used_autocast =
|
162
|
+
args[0]._fwd_used_autocast = is_autocast_enabled("xpu")
|
161
163
|
return fwd(*args, **kwargs)
|
162
164
|
else:
|
163
|
-
autocast_context =
|
165
|
+
autocast_context = is_autocast_enabled("xpu")
|
164
166
|
args[0]._fwd_used_autocast = False
|
165
167
|
if autocast_context:
|
166
168
|
with torch.xpu.autocast(enabled=False):
|
@@ -184,7 +186,7 @@ def custom_bwd(bwd):
|
|
184
186
|
|
185
187
|
@functools.wraps(bwd)
|
186
188
|
def decorate_bwd(*args, **kwargs):
|
187
|
-
with torch.
|
189
|
+
with torch.autocast("xpu", enabled=args[0]._fwd_used_autocast, dtype=args[0]._dtype):
|
188
190
|
return bwd(*args, **kwargs)
|
189
191
|
|
190
192
|
return decorate_bwd
|
@@ -1,6 +1,6 @@
|
|
1
1
|
Metadata-Version: 2.1
|
2
2
|
Name: ipex-llm
|
3
|
-
Version: 2.2.
|
3
|
+
Version: 2.2.0b20250206
|
4
4
|
Summary: Large Language Model Develop Toolkit
|
5
5
|
Home-page: https://github.com/intel-analytics/ipex-llm
|
6
6
|
Author: BigDL Authors
|
@@ -27,10 +27,10 @@ Requires-Dist: intel-openmp ; (platform_machine == "x86_64" or platform_machine
|
|
27
27
|
Requires-Dist: torch ==2.1.2+cpu ; (platform_system == "Linux") and extra == 'all'
|
28
28
|
Requires-Dist: torch ==2.1.2 ; (platform_system == "Windows") and extra == 'all'
|
29
29
|
Provides-Extra: cpp
|
30
|
-
Requires-Dist: bigdl-core-cpp ==2.6.
|
30
|
+
Requires-Dist: bigdl-core-cpp ==2.6.0b20250206 ; extra == 'cpp'
|
31
31
|
Requires-Dist: setuptools ; extra == 'cpp'
|
32
32
|
Provides-Extra: cpp-arl
|
33
|
-
Requires-Dist: bigdl-core-cpp ==2.6.
|
33
|
+
Requires-Dist: bigdl-core-cpp ==2.6.0b20250206 ; extra == 'cpp-arl'
|
34
34
|
Requires-Dist: setuptools ; extra == 'cpp-arl'
|
35
35
|
Requires-Dist: onednn-devel ==2024.1.1 ; (platform_system == "Windows") and extra == 'cpp-arl'
|
36
36
|
Requires-Dist: onednn ==2024.1.1 ; (platform_system == "Windows") and extra == 'cpp-arl'
|
@@ -67,7 +67,7 @@ Requires-Dist: transformers ==4.40.0 ; extra == 'npu'
|
|
67
67
|
Requires-Dist: intel-openmp ; (platform_machine == "x86_64" or platform_machine == "AMD64") and extra == 'npu'
|
68
68
|
Requires-Dist: torch ==2.1.2+cpu ; (platform_system == "Linux") and extra == 'npu'
|
69
69
|
Requires-Dist: torch ==2.1.2 ; (platform_system == "Windows") and extra == 'npu'
|
70
|
-
Requires-Dist: bigdl-core-npu ==2.6.
|
70
|
+
Requires-Dist: bigdl-core-npu ==2.6.0b20250206 ; (platform_system == "Windows") and extra == 'npu'
|
71
71
|
Provides-Extra: serving
|
72
72
|
Requires-Dist: py-cpuinfo ; extra == 'serving'
|
73
73
|
Requires-Dist: fschat[model_worker,webui] ==0.2.36 ; extra == 'serving'
|
@@ -87,9 +87,9 @@ Requires-Dist: setuptools <70.0.0 ; extra == 'xpu'
|
|
87
87
|
Requires-Dist: torch ==2.1.0a0 ; extra == 'xpu'
|
88
88
|
Requires-Dist: torchvision ==0.16.0a0 ; extra == 'xpu'
|
89
89
|
Requires-Dist: intel-extension-for-pytorch ==2.1.10+xpu ; extra == 'xpu'
|
90
|
-
Requires-Dist: bigdl-core-xe-21 ==2.6.
|
91
|
-
Requires-Dist: bigdl-core-xe-batch-21 ==2.6.
|
92
|
-
Requires-Dist: bigdl-core-xe-addons-21 ==2.6.
|
90
|
+
Requires-Dist: bigdl-core-xe-21 ==2.6.0b20250206 ; extra == 'xpu'
|
91
|
+
Requires-Dist: bigdl-core-xe-batch-21 ==2.6.0b20250206 ; extra == 'xpu'
|
92
|
+
Requires-Dist: bigdl-core-xe-addons-21 ==2.6.0b20250206 ; extra == 'xpu'
|
93
93
|
Provides-Extra: xpu-2-1
|
94
94
|
Requires-Dist: py-cpuinfo ; extra == 'xpu-2-1'
|
95
95
|
Requires-Dist: protobuf ; extra == 'xpu-2-1'
|
@@ -104,9 +104,9 @@ Requires-Dist: setuptools <70.0.0 ; extra == 'xpu-2-1'
|
|
104
104
|
Requires-Dist: torch ==2.1.0a0 ; extra == 'xpu-2-1'
|
105
105
|
Requires-Dist: torchvision ==0.16.0a0 ; extra == 'xpu-2-1'
|
106
106
|
Requires-Dist: intel-extension-for-pytorch ==2.1.10+xpu ; extra == 'xpu-2-1'
|
107
|
-
Requires-Dist: bigdl-core-xe-21 ==2.6.
|
108
|
-
Requires-Dist: bigdl-core-xe-batch-21 ==2.6.
|
109
|
-
Requires-Dist: bigdl-core-xe-addons-21 ==2.6.
|
107
|
+
Requires-Dist: bigdl-core-xe-21 ==2.6.0b20250206 ; extra == 'xpu-2-1'
|
108
|
+
Requires-Dist: bigdl-core-xe-batch-21 ==2.6.0b20250206 ; extra == 'xpu-2-1'
|
109
|
+
Requires-Dist: bigdl-core-xe-addons-21 ==2.6.0b20250206 ; extra == 'xpu-2-1'
|
110
110
|
Requires-Dist: intel-openmp ; (platform_machine == "x86_64" or platform_machine == "AMD64") and extra == 'xpu-2-1'
|
111
111
|
Requires-Dist: dpcpp-cpp-rt ==2024.0.2 ; (platform_system == "Windows") and extra == 'xpu-2-1'
|
112
112
|
Requires-Dist: mkl-dpcpp ==2024.0.0 ; (platform_system == "Windows") and extra == 'xpu-2-1'
|
@@ -124,7 +124,7 @@ Requires-Dist: setuptools ; extra == 'xpu-2-6'
|
|
124
124
|
Requires-Dist: torch ==2.6.0+xpu ; extra == 'xpu-2-6'
|
125
125
|
Requires-Dist: torchvision ==0.21.0+xpu ; extra == 'xpu-2-6'
|
126
126
|
Requires-Dist: torchaudio ==2.6.0+xpu ; extra == 'xpu-2-6'
|
127
|
-
Requires-Dist: bigdl-core-xe-all ==2.6.
|
127
|
+
Requires-Dist: bigdl-core-xe-all ==2.6.0b20250206 ; extra == 'xpu-2-6'
|
128
128
|
Requires-Dist: onednn-devel ==2025.0.1 ; extra == 'xpu-2-6'
|
129
129
|
Requires-Dist: onednn ==2025.0.1 ; extra == 'xpu-2-6'
|
130
130
|
Requires-Dist: dpcpp-cpp-rt ==2025.0.2 ; extra == 'xpu-2-6'
|
@@ -140,9 +140,9 @@ Requires-Dist: tokenizers ==0.15.2 ; extra == 'xpu-arc'
|
|
140
140
|
Requires-Dist: accelerate ==0.23.0 ; extra == 'xpu-arc'
|
141
141
|
Requires-Dist: tabulate ; extra == 'xpu-arc'
|
142
142
|
Requires-Dist: setuptools ; extra == 'xpu-arc'
|
143
|
-
Requires-Dist: bigdl-core-xe-23 ==2.6.
|
144
|
-
Requires-Dist: bigdl-core-xe-batch-23 ==2.6.
|
145
|
-
Requires-Dist: bigdl-core-xe-addons-23 ==2.6.
|
143
|
+
Requires-Dist: bigdl-core-xe-23 ==2.6.0b20250206 ; extra == 'xpu-arc'
|
144
|
+
Requires-Dist: bigdl-core-xe-batch-23 ==2.6.0b20250206 ; extra == 'xpu-arc'
|
145
|
+
Requires-Dist: bigdl-core-xe-addons-23 ==2.6.0b20250206 ; extra == 'xpu-arc'
|
146
146
|
Requires-Dist: intel-openmp ; (platform_machine == "x86_64" or platform_machine == "AMD64") and extra == 'xpu-arc'
|
147
147
|
Requires-Dist: torch ==2.3.1+cxx11.abi ; (platform_system == "Linux") and extra == 'xpu-arc'
|
148
148
|
Requires-Dist: torchvision ==0.18.1+cxx11.abi ; (platform_system == "Linux") and extra == 'xpu-arc'
|
@@ -163,9 +163,9 @@ Requires-Dist: tokenizers ==0.15.2 ; extra == 'xpu-arl'
|
|
163
163
|
Requires-Dist: accelerate ==0.23.0 ; extra == 'xpu-arl'
|
164
164
|
Requires-Dist: tabulate ; extra == 'xpu-arl'
|
165
165
|
Requires-Dist: setuptools ; extra == 'xpu-arl'
|
166
|
-
Requires-Dist: bigdl-core-xe-23 ==2.6.
|
167
|
-
Requires-Dist: bigdl-core-xe-batch-23 ==2.6.
|
168
|
-
Requires-Dist: bigdl-core-xe-addons-23 ==2.6.
|
166
|
+
Requires-Dist: bigdl-core-xe-23 ==2.6.0b20250206 ; extra == 'xpu-arl'
|
167
|
+
Requires-Dist: bigdl-core-xe-batch-23 ==2.6.0b20250206 ; extra == 'xpu-arl'
|
168
|
+
Requires-Dist: bigdl-core-xe-addons-23 ==2.6.0b20250206 ; extra == 'xpu-arl'
|
169
169
|
Requires-Dist: intel-openmp ; (platform_machine == "x86_64" or platform_machine == "AMD64") and extra == 'xpu-arl'
|
170
170
|
Requires-Dist: torch ==2.3.1+cxx11.abi ; (platform_system == "Linux") and extra == 'xpu-arl'
|
171
171
|
Requires-Dist: torchvision ==0.18.1+cxx11.abi ; (platform_system == "Linux") and extra == 'xpu-arl'
|
@@ -186,9 +186,9 @@ Requires-Dist: tokenizers ==0.15.2 ; extra == 'xpu-lnl'
|
|
186
186
|
Requires-Dist: accelerate ==0.23.0 ; extra == 'xpu-lnl'
|
187
187
|
Requires-Dist: tabulate ; extra == 'xpu-lnl'
|
188
188
|
Requires-Dist: setuptools ; extra == 'xpu-lnl'
|
189
|
-
Requires-Dist: bigdl-core-xe-23 ==2.6.
|
190
|
-
Requires-Dist: bigdl-core-xe-batch-23 ==2.6.
|
191
|
-
Requires-Dist: bigdl-core-xe-addons-23 ==2.6.
|
189
|
+
Requires-Dist: bigdl-core-xe-23 ==2.6.0b20250206 ; extra == 'xpu-lnl'
|
190
|
+
Requires-Dist: bigdl-core-xe-batch-23 ==2.6.0b20250206 ; extra == 'xpu-lnl'
|
191
|
+
Requires-Dist: bigdl-core-xe-addons-23 ==2.6.0b20250206 ; extra == 'xpu-lnl'
|
192
192
|
Requires-Dist: intel-openmp ; (platform_machine == "x86_64" or platform_machine == "AMD64") and extra == 'xpu-lnl'
|
193
193
|
Requires-Dist: torch ==2.3.1+cxx11.abi ; (platform_system == "Linux") and extra == 'xpu-lnl'
|
194
194
|
Requires-Dist: torchvision ==0.18.1+cxx11.abi ; (platform_system == "Linux") and extra == 'xpu-lnl'
|
@@ -41,35 +41,35 @@ ipex_llm/langchain/llms/transformerspipelinellm.py,sha256=vm522YPPwWxxAPVvQBtxRf
|
|
41
41
|
ipex_llm/langchain/vllm/__init__.py,sha256=T-EbRT6GJ_8RCu-iLmSzcftOimXSPQf2d5X72AUAy2Y,874
|
42
42
|
ipex_llm/langchain/vllm/vllm.py,sha256=6dxc-ZISZQrJilEa_HA827l75Dv9rcHpY_G6FdJ8BVs,7793
|
43
43
|
ipex_llm/libs/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
44
|
-
ipex_llm/libs/bloom-api.dll,sha256=
|
45
|
-
ipex_llm/libs/bloom.dll,sha256=
|
46
|
-
ipex_llm/libs/gptneox-api.dll,sha256=
|
47
|
-
ipex_llm/libs/gptneox.dll,sha256=
|
48
|
-
ipex_llm/libs/libbloom_avx.dll,sha256=
|
49
|
-
ipex_llm/libs/libbloom_vnni.dll,sha256=
|
50
|
-
ipex_llm/libs/libgptneox_avx.dll,sha256=
|
51
|
-
ipex_llm/libs/libgptneox_vnni.dll,sha256=
|
52
|
-
ipex_llm/libs/libllama_avx.dll,sha256=
|
53
|
-
ipex_llm/libs/libllama_vnni.dll,sha256=
|
54
|
-
ipex_llm/libs/libstarcoder_avx.dll,sha256=
|
55
|
-
ipex_llm/libs/libstarcoder_vnni.dll,sha256=
|
56
|
-
ipex_llm/libs/llama-api.dll,sha256=
|
57
|
-
ipex_llm/libs/llama.dll,sha256=
|
58
|
-
ipex_llm/libs/main-bloom.exe,sha256=
|
59
|
-
ipex_llm/libs/main-gptneox.exe,sha256=
|
60
|
-
ipex_llm/libs/main-llama.exe,sha256=
|
61
|
-
ipex_llm/libs/main-starcoder.exe,sha256=
|
62
|
-
ipex_llm/libs/pipeline.dll,sha256=
|
63
|
-
ipex_llm/libs/quantize-bloom.exe,sha256=
|
64
|
-
ipex_llm/libs/quantize-bloom_vnni.exe,sha256=
|
65
|
-
ipex_llm/libs/quantize-gptneox.exe,sha256=
|
66
|
-
ipex_llm/libs/quantize-gptneox_vnni.exe,sha256=
|
67
|
-
ipex_llm/libs/quantize-llama.exe,sha256=
|
68
|
-
ipex_llm/libs/quantize-llama_vnni.exe,sha256=
|
69
|
-
ipex_llm/libs/quantize-starcoder.exe,sha256=
|
70
|
-
ipex_llm/libs/quantize-starcoder_vnni.exe,sha256=
|
71
|
-
ipex_llm/libs/starcoder-api.dll,sha256=
|
72
|
-
ipex_llm/libs/starcoder.dll,sha256=
|
44
|
+
ipex_llm/libs/bloom-api.dll,sha256=H0S3QMH9mK_VlsEGqqM7vGKNiuvD1j3_cNOloDEqojg,36352
|
45
|
+
ipex_llm/libs/bloom.dll,sha256=GtBKdhbPz4gZDtzdcrjiIa0IoZighOQmWoaScXlCzGA,507904
|
46
|
+
ipex_llm/libs/gptneox-api.dll,sha256=_8nji5kq5Z524SGHaElsEFZCkCZJRyjLAbK7dF5EAkE,24576
|
47
|
+
ipex_llm/libs/gptneox.dll,sha256=su29UwirxxACBTb9rKx4ln5sKsmmG82J7wbIFv9GOQs,568320
|
48
|
+
ipex_llm/libs/libbloom_avx.dll,sha256=v1lgo7B-JJMWEwgs5hDwkm9XSd0nmO1r1X8JoYaJLIs,536576
|
49
|
+
ipex_llm/libs/libbloom_vnni.dll,sha256=dBbTV7wWKZKPMw4oZL-H2_ooLdBhwziXLI97xLKvC3w,508416
|
50
|
+
ipex_llm/libs/libgptneox_avx.dll,sha256=F_JBremk85c6zqKo0_rse9YXo9v_T52jFwy8Nnwt9yg,596992
|
51
|
+
ipex_llm/libs/libgptneox_vnni.dll,sha256=8A6hc2w5Xqq2MoY_t-El6upUqFuI5Cu-ITiiDv9Nfvg,568832
|
52
|
+
ipex_llm/libs/libllama_avx.dll,sha256=fADeqa8IK5akM04Cjyd1IRY3Exk8tAuIdNzKBew2zJg,591360
|
53
|
+
ipex_llm/libs/libllama_vnni.dll,sha256=SbwkJLCQqtIW9zz_QKzAYb5kqfyUSs8-gddMikbB57s,563200
|
54
|
+
ipex_llm/libs/libstarcoder_avx.dll,sha256=vgvvBkIZ18ofJ9rE69gkNn9SpY025RyI7x2VM0APDWA,627712
|
55
|
+
ipex_llm/libs/libstarcoder_vnni.dll,sha256=L0cdtY2qHvKpJhFEPl_UkaCVhUw4tcknoIuWbyxQ-ck,599552
|
56
|
+
ipex_llm/libs/llama-api.dll,sha256=7yQHdnnFcNiHESH3nrGLyEWscKV9FTPWmDqk-Gf9bA8,25600
|
57
|
+
ipex_llm/libs/llama.dll,sha256=pOUGsXP8_NP1byv7z_Q-JU2flWnTjYlCL6lbU-RvORw,562688
|
58
|
+
ipex_llm/libs/main-bloom.exe,sha256=bK5DfBLbt4jHwdPl0hw1zaBGQHFWC9MFjiDRqCXFgFA,103424
|
59
|
+
ipex_llm/libs/main-gptneox.exe,sha256=3OfGBYDzOpYeB6GxToauh8af4M8i6l4Z6ffYQPdKyIw,98816
|
60
|
+
ipex_llm/libs/main-llama.exe,sha256=wZGa8lG3bfaEQi8-DvRC4D3sjMKXms1pwT9OXVME4_Y,99840
|
61
|
+
ipex_llm/libs/main-starcoder.exe,sha256=3yZrYUpJ1FYOWCh6PNmWagQ5e6BmimlL25B6AiPmQys,157696
|
62
|
+
ipex_llm/libs/pipeline.dll,sha256=uDPNVk7J_dvOX_NTAJs6AEtm5pAnwYLuczHYuTV6Pso,72704
|
63
|
+
ipex_llm/libs/quantize-bloom.exe,sha256=6Rl2TEE9-FN0jHrcAYsZjfp0kAxzMoHKuvM31d8pzPs,126464
|
64
|
+
ipex_llm/libs/quantize-bloom_vnni.exe,sha256=7Q20DE84l-CDxcVgUxzWspAh0faioQw2iJqdtk9JME4,128000
|
65
|
+
ipex_llm/libs/quantize-gptneox.exe,sha256=QRxEqJYH3ShD6KLhW3guxM_SxPusFADvv8j5euhp53Q,104448
|
66
|
+
ipex_llm/libs/quantize-gptneox_vnni.exe,sha256=fj2E8ChFakQQzHHpYb_UxNy-9yQ9ZbChhr5PYUCdWkw,104960
|
67
|
+
ipex_llm/libs/quantize-llama.exe,sha256=v2Rq663-92bMV3ze9l2-ocxvSjTeqlJegfY5XLf4MRQ,110080
|
68
|
+
ipex_llm/libs/quantize-llama_vnni.exe,sha256=ywCgkuUA_jBImNslFpLFdcUvGv2pcbRvRZyZBhJ6-4c,110592
|
69
|
+
ipex_llm/libs/quantize-starcoder.exe,sha256=zbiQpagpWGSYqgyHEmHgglDen3nDUS1LyhUXJbt65wE,127488
|
70
|
+
ipex_llm/libs/quantize-starcoder_vnni.exe,sha256=xtynT6qbnZ1nBRxsIQbi2JGSKOlvdSCCozDQJiDSwCg,128512
|
71
|
+
ipex_llm/libs/starcoder-api.dll,sha256=7c7MWBv57ZhfiynSrYJIKhnE6HMXUTSYYTUGwD7BX9I,21504
|
72
|
+
ipex_llm/libs/starcoder.dll,sha256=AzASEAh2HCDC9XIQ0JfUiUDqF-3p4KR3rF71MKQDA4k,599040
|
73
73
|
ipex_llm/llamaindex/__init__.py,sha256=T-EbRT6GJ_8RCu-iLmSzcftOimXSPQf2d5X72AUAy2Y,874
|
74
74
|
ipex_llm/llamaindex/llms/__init__.py,sha256=KP1lEdGqDuxPoxL1ZSH25Pm2kKMPJBWUTLR0ckSLMIU,1139
|
75
75
|
ipex_llm/llamaindex/llms/bigdlllm.py,sha256=FQBzq1KOjfc6uofTXAha3O7TqpJkNfOFepXQmOVlbnI,26314
|
@@ -94,19 +94,19 @@ ipex_llm/transformers/kv.py,sha256=k4TU18LlA-Sbq9WNNQnfuzu3RSFBwFhmaV3BcGN5bAo,1
|
|
94
94
|
ipex_llm/transformers/lisa.py,sha256=F5WxbtXQ7RdKulj83h_2DnEIgKiKGZf7zvOmg6QBl2s,3289
|
95
95
|
ipex_llm/transformers/loader.py,sha256=AwjV5RpI2t2bedlv7ZhLm8cfd-QJZm5hny-XyjIvdnk,6876
|
96
96
|
ipex_llm/transformers/lookup.py,sha256=b6OlZ9OV10R9qeWw8mVryVpDxszkjwLkldvi7GPMJY8,19614
|
97
|
-
ipex_llm/transformers/low_bit_linear.py,sha256=
|
97
|
+
ipex_llm/transformers/low_bit_linear.py,sha256=3EtbiCAq5HU_r2pGJ9beSDK4NPTN8Jj-aHMqm1jqX18,39177
|
98
98
|
ipex_llm/transformers/model.py,sha256=cQJNlAkdfoWmVbWd-TS2hf-Do41mMO9orPvG3FO4Nns,40855
|
99
99
|
ipex_llm/transformers/modelling_bigdl.py,sha256=7JpNVMuyq_OmtNUaMFMXdxPWZp2q0QHC02QeA-VTPOw,6709
|
100
|
-
ipex_llm/transformers/npu_model.py,sha256=
|
100
|
+
ipex_llm/transformers/npu_model.py,sha256=LMmRmhq8IAN9FrXLUeUK2B8XS2OJ5GVWmG0cEdeK-ro,40354
|
101
101
|
ipex_llm/transformers/patches.py,sha256=G9KcXxo42H1HJEDaroq4JbBN5P0P0lty7U7kk7-g4tw,991
|
102
102
|
ipex_llm/transformers/pipeline_parallel.py,sha256=uNZpOXljNmdoEYnP8U-VFiN4dRZb2piQbIf2bG9LQnE,49051
|
103
|
-
ipex_llm/transformers/qlora.py,sha256=
|
103
|
+
ipex_llm/transformers/qlora.py,sha256=qV9Y6G5kAaet77LLA3oXn3qQY4ayyAPZ7NAjOlHCS7g,14967
|
104
104
|
ipex_llm/transformers/relora.py,sha256=-dYzUV0P-IhO2jFdnzN9-v_sFzJpRj3ZwN9eCJzOoCw,16567
|
105
105
|
ipex_llm/transformers/speculative.py,sha256=0XNLgc9dGswJHVPrXo4iM7pPxkWwfFfJMECcivJSnIc,63368
|
106
106
|
ipex_llm/transformers/streamer.py,sha256=RrVlLblzCOtABRUpaMXAyaMnCGgLUtAi_YesLumRbww,4842
|
107
107
|
ipex_llm/transformers/training_patch.py,sha256=oxMkUtqyvqJiprw6dE3skkYfD1HOmUlH9N0hBkbn0G0,10799
|
108
|
-
ipex_llm/transformers/utils.py,sha256=
|
109
|
-
ipex_llm/transformers/xpu_customize_fwd.py,sha256=
|
108
|
+
ipex_llm/transformers/utils.py,sha256=a-2wbflSd_yYnC5qcMoY5HLR1yT_QpxeX_WpGpaDLrA,17457
|
109
|
+
ipex_llm/transformers/xpu_customize_fwd.py,sha256=PUBYLnTbaBXUs3Dnte9Gqln2XFk8iA62SmloWjr7GJI,7668
|
110
110
|
ipex_llm/transformers/xpu_ops.py,sha256=z95iTtcDQvNyJOvB4A6B_ECTYjHp4A7x-FsssoETOMs,4914
|
111
111
|
ipex_llm/transformers/awq/__init__.py,sha256=Du5gu3-eeAkeDO_dEMBTzrDBA66DSN3uL3-rn8WGXQw,875
|
112
112
|
ipex_llm/transformers/awq/act.py,sha256=YwomJzOOKwkKtzGrm4L4kwBstBLO1Z8SK4CKi8PSYVQ,2172
|
@@ -183,7 +183,7 @@ ipex_llm/transformers/npu_models/baichuan_mp.py,sha256=tHhO-0v5z6IhxsfzAPYWXVbLr
|
|
183
183
|
ipex_llm/transformers/npu_models/chatglm.py,sha256=YzpGLZ7ORt6qkwW9mCwZ_xhOAI8uHSDHJrmqWgNM234,10511
|
184
184
|
ipex_llm/transformers/npu_models/chatglm4.py,sha256=J4523DzhIzZxIvlf1V9qU4auzEGKvC80YqyxuCJygjw,9795
|
185
185
|
ipex_llm/transformers/npu_models/common.py,sha256=tTUJL7IxVrJSnXle6nla35wTUrBf2sOEt7Ya1qyMezY,4853
|
186
|
-
ipex_llm/transformers/npu_models/convert.py,sha256=
|
186
|
+
ipex_llm/transformers/npu_models/convert.py,sha256=2YAi8rvEYu_tvzpczKsJBsKjAns5FAPz1MntJTxIQC0,25472
|
187
187
|
ipex_llm/transformers/npu_models/convert_mp.py,sha256=Y6Fcde7bXHkZ0wvm8PymxJqvncbDj3ZjMez3SY9qi5U,24452
|
188
188
|
ipex_llm/transformers/npu_models/glm_edge.py,sha256=VsJex-6530h4ZQk35TxRe1MnttAHT41omE8LV47LgBE,6723
|
189
189
|
ipex_llm/transformers/npu_models/kv.py,sha256=2OSFO9Z6e4nGdVxXEM-Bq2qa_npYYbGmQt3lcCZxTlU,9201
|
@@ -208,11 +208,11 @@ ipex_llm/transformers/npu_models/xlm_mp.py,sha256=sj8OVun8xJprM7ZJp0XzWa55rqlSIz
|
|
208
208
|
ipex_llm/transformers/npu_pipeline_model/__init__.py,sha256=b2IXvVqQ5cItki021h8s3ymW12RPu8QNPprq4Mn3bDM,586
|
209
209
|
ipex_llm/transformers/npu_pipeline_model/baichuan.py,sha256=ICxRzFQ4OIANDkkVi2_4xOeQXmfFXYMx3H52KuE1xR4,6208
|
210
210
|
ipex_llm/transformers/npu_pipeline_model/common.py,sha256=QxJoJESpv0BpwO_FBeAT2wKA56wNFfen8iI37PrMKuA,7838
|
211
|
-
ipex_llm/transformers/npu_pipeline_model/convert_pipeline.py,sha256
|
212
|
-
ipex_llm/transformers/npu_pipeline_model/llama.py,sha256=
|
213
|
-
ipex_llm/transformers/npu_pipeline_model/minicpm.py,sha256=
|
211
|
+
ipex_llm/transformers/npu_pipeline_model/convert_pipeline.py,sha256=-eHNbRuX2QhYd0-jCyo2pZpHTZTZ108bhObYx8a3CJs,29494
|
212
|
+
ipex_llm/transformers/npu_pipeline_model/llama.py,sha256=pmAnawfAn0W8XSr8kGWfxR1HylCLa-Y6mKpFeX-m8UY,20892
|
213
|
+
ipex_llm/transformers/npu_pipeline_model/minicpm.py,sha256=H7j_UaHj-IwEBriQ-bunle0-8s2NmvqnL9eYuixnmFc,21398
|
214
214
|
ipex_llm/transformers/npu_pipeline_model/pipeline_cpp.py,sha256=JNmodAMg_NQvDILug3E_fGXEh6cd3wsj4bvAzcd-vaU,2749
|
215
|
-
ipex_llm/transformers/npu_pipeline_model/qwen.py,sha256=
|
215
|
+
ipex_llm/transformers/npu_pipeline_model/qwen.py,sha256=FAfoPlKEAxeU6-J8ltpSev5ithm9AC-urtreu6NGpME,15509
|
216
216
|
ipex_llm/utils/__init__.py,sha256=LlUgrD03rfw4iY8zWPtHH6p65Gw76waVOLHaqagETw0,1425
|
217
217
|
ipex_llm/utils/benchmark_util_4_29.py,sha256=OU1W1quiaiJGsg1pd3HM9O6PmVSaPA0HHE7R8hNTfmQ,258653
|
218
218
|
ipex_llm/utils/benchmark_util_4_42.py,sha256=HEiClCgKDp_T64HH8ulSTly8dvt6UwPDYZfrPVYvXcc,225383
|
@@ -248,11 +248,11 @@ ipex_llm/vllm/xpu/engine/__init__.py,sha256=pY_CpyuZd72fr6s32ejeKHKFW0K4vUU2rzZj
|
|
248
248
|
ipex_llm/vllm/xpu/engine/engine.py,sha256=k4-D27WS_Gk3mA--w3HWAjPjb4Aiu043MVPi0ZoAUBc,5984
|
249
249
|
ipex_llm/vllm/xpu/entrypoints/openai/api_server.py,sha256=GshTZFB8e4PWvqckfbmTOU6b0oLkNn7A-vzLuG9--j8,21544
|
250
250
|
ipex_llm/vllm/xpu/entrypoints/openai/cli_args.py,sha256=2rENA2ucynMaIjiZBEh2ez1o5vR32GaP514t39CD7KM,8676
|
251
|
-
ipex_llm-2.2.
|
252
|
-
ipex_llm-2.2.
|
253
|
-
ipex_llm-2.2.
|
254
|
-
ipex_llm-2.2.
|
255
|
-
ipex_llm-2.2.
|
256
|
-
ipex_llm-2.2.
|
257
|
-
ipex_llm-2.2.
|
258
|
-
ipex_llm-2.2.
|
251
|
+
ipex_llm-2.2.0b20250206.data/scripts/ipex-llm-init.bat,sha256=HPtCYuDYwEatq7dAwOvdfVcHYCpAVdbj75K1qh0vQek,2578
|
252
|
+
ipex_llm-2.2.0b20250206.data/scripts/llm-chat.ps1,sha256=6qrs-hGVAV8IKh7Jx8nq_XrnZcjd7qGU5wndArM7Yag,2769
|
253
|
+
ipex_llm-2.2.0b20250206.data/scripts/llm-cli.ps1,sha256=3qBtTLs_EjYDnM8YyCpJhzLnGCKTEGssu9UNqfkjVXs,3009
|
254
|
+
ipex_llm-2.2.0b20250206.dist-info/METADATA,sha256=pAr_-dBEJB_J2lV8oNgJkJ5bGTObiseNHISkXAGkY9I,12879
|
255
|
+
ipex_llm-2.2.0b20250206.dist-info/WHEEL,sha256=6iYPr8vTHsyDK75jr9X0V3I9wPSVmtwr_8fdATBciGk,98
|
256
|
+
ipex_llm-2.2.0b20250206.dist-info/entry_points.txt,sha256=TiUyBB2MRmfF3ko-pyAEzqeBCRnyhu27bNOAsWPp3e8,61
|
257
|
+
ipex_llm-2.2.0b20250206.dist-info/top_level.txt,sha256=CGCMHM-SyqUabU4h8RqJ2KTYckQUO3LvIWwmUQ6Qbzw,9
|
258
|
+
ipex_llm-2.2.0b20250206.dist-info/RECORD,,
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|