ipex-llm 2.2.0b20250204__py3-none-win_amd64.whl → 2.2.0b20250204.post0__py3-none-win_amd64.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (49) hide show
  1. ipex_llm/libs/bloom-api.dll +0 -0
  2. ipex_llm/libs/bloom.dll +0 -0
  3. ipex_llm/libs/gptneox-api.dll +0 -0
  4. ipex_llm/libs/gptneox.dll +0 -0
  5. ipex_llm/libs/libbloom_avx.dll +0 -0
  6. ipex_llm/libs/libbloom_vnni.dll +0 -0
  7. ipex_llm/libs/libgptneox_avx.dll +0 -0
  8. ipex_llm/libs/libgptneox_vnni.dll +0 -0
  9. ipex_llm/libs/libllama_avx.dll +0 -0
  10. ipex_llm/libs/libllama_vnni.dll +0 -0
  11. ipex_llm/libs/libstarcoder_avx.dll +0 -0
  12. ipex_llm/libs/libstarcoder_vnni.dll +0 -0
  13. ipex_llm/libs/llama-api.dll +0 -0
  14. ipex_llm/libs/llama.dll +0 -0
  15. ipex_llm/libs/main-bloom.exe +0 -0
  16. ipex_llm/libs/main-gptneox.exe +0 -0
  17. ipex_llm/libs/main-llama.exe +0 -0
  18. ipex_llm/libs/main-starcoder.exe +0 -0
  19. ipex_llm/libs/pipeline.dll +0 -0
  20. ipex_llm/libs/quantize-bloom.exe +0 -0
  21. ipex_llm/libs/quantize-bloom_vnni.exe +0 -0
  22. ipex_llm/libs/quantize-gptneox.exe +0 -0
  23. ipex_llm/libs/quantize-gptneox_vnni.exe +0 -0
  24. ipex_llm/libs/quantize-llama.exe +0 -0
  25. ipex_llm/libs/quantize-llama_vnni.exe +0 -0
  26. ipex_llm/libs/quantize-starcoder.exe +0 -0
  27. ipex_llm/libs/quantize-starcoder_vnni.exe +0 -0
  28. ipex_llm/libs/starcoder-api.dll +0 -0
  29. ipex_llm/libs/starcoder.dll +0 -0
  30. ipex_llm/transformers/low_bit_linear.py +33 -70
  31. ipex_llm/transformers/models/utils.py +0 -13
  32. ipex_llm/transformers/npu_model.py +17 -4
  33. ipex_llm/transformers/npu_models/convert.py +6 -2
  34. ipex_llm/transformers/npu_pipeline_model/convert_pipeline.py +27 -12
  35. ipex_llm/transformers/npu_pipeline_model/llama.py +24 -11
  36. ipex_llm/transformers/npu_pipeline_model/minicpm.py +19 -10
  37. ipex_llm/transformers/npu_pipeline_model/qwen.py +17 -8
  38. ipex_llm/transformers/qlora.py +2 -2
  39. ipex_llm/transformers/utils.py +19 -6
  40. ipex_llm/transformers/xpu_customize_fwd.py +6 -4
  41. ipex_llm/transformers/xpu_ops.py +4 -3
  42. {ipex_llm-2.2.0b20250204.dist-info → ipex_llm-2.2.0b20250204.post0.dist-info}/METADATA +23 -30
  43. {ipex_llm-2.2.0b20250204.dist-info → ipex_llm-2.2.0b20250204.post0.dist-info}/RECORD +49 -49
  44. {ipex_llm-2.2.0b20250204.data → ipex_llm-2.2.0b20250204.post0.data}/scripts/ipex-llm-init.bat +0 -0
  45. {ipex_llm-2.2.0b20250204.data → ipex_llm-2.2.0b20250204.post0.data}/scripts/llm-chat.ps1 +0 -0
  46. {ipex_llm-2.2.0b20250204.data → ipex_llm-2.2.0b20250204.post0.data}/scripts/llm-cli.ps1 +0 -0
  47. {ipex_llm-2.2.0b20250204.dist-info → ipex_llm-2.2.0b20250204.post0.dist-info}/WHEEL +0 -0
  48. {ipex_llm-2.2.0b20250204.dist-info → ipex_llm-2.2.0b20250204.post0.dist-info}/entry_points.txt +0 -0
  49. {ipex_llm-2.2.0b20250204.dist-info → ipex_llm-2.2.0b20250204.post0.dist-info}/top_level.txt +0 -0
Binary file
ipex_llm/libs/bloom.dll CHANGED
Binary file
Binary file
ipex_llm/libs/gptneox.dll CHANGED
Binary file
Binary file
Binary file
Binary file
Binary file
Binary file
Binary file
Binary file
Binary file
Binary file
ipex_llm/libs/llama.dll CHANGED
Binary file
Binary file
Binary file
Binary file
Binary file
Binary file
Binary file
Binary file
Binary file
Binary file
Binary file
Binary file
Binary file
Binary file
Binary file
Binary file
@@ -51,7 +51,8 @@ from torch import Tensor, dtype, nn
51
51
  from operator import mul
52
52
  from functools import reduce
53
53
  from ipex_llm.transformers.xpu_customize_fwd import custom_fwd, custom_bwd
54
- from ipex_llm.transformers.utils import get_autocast_dtype, get_xpu_device_name
54
+ from ipex_llm.transformers.utils import is_autocast_enabled, get_autocast_dtype
55
+ from ipex_llm.transformers.utils import get_xpu_device_name
55
56
  from ipex_llm.transformers.convert import is_deepspeed_available, get_use_vllm
56
57
 
57
58
  T = TypeVar("T", bound="torch.nn.Module")
@@ -500,16 +501,16 @@ class MatMulLowBit(torch.autograd.Function):
500
501
 
501
502
  @staticmethod
502
503
  @custom_fwd
503
- def forward(ctx, A, weight, input_seq_size):
504
+ def forward(ctx, A, weight, output_size):
504
505
  ctx.is_empty = False
505
506
  import xe_linear
506
507
  if weight.qtype == NF4:
507
508
  result = xe_linear.forward_new(A,
508
509
  weight.data.view(torch.uint8),
509
510
  weight.qtype,
510
- input_seq_size)
511
+ output_size)
511
512
  else:
512
- result = xe_linear.forward_new(A, weight.data, weight.qtype, input_seq_size)
513
+ result = xe_linear.forward_new(A, weight.data, weight.qtype, output_size)
513
514
  if any(ctx.needs_input_grad[:2]):
514
515
  ctx.tensors = (A, weight)
515
516
  else:
@@ -527,8 +528,8 @@ class MatMulLowBit(torch.autograd.Function):
527
528
  A, weight = ctx.tensors
528
529
  grad_A, grad_weight = None, None
529
530
  if req_gradA:
530
- if torch.xpu.is_autocast_xpu_enabled():
531
- grad_output = grad_output.to(torch.xpu.get_autocast_xpu_dtype())
531
+ if is_autocast_enabled("xpu"):
532
+ grad_output = grad_output.to(get_autocast_dtype("xpu"))
532
533
  if weight.qtype == NF4:
533
534
  dequant_weight = xe_linear.dequant(A,
534
535
  weight.data.view(torch.uint8),
@@ -615,7 +616,7 @@ class LowBitLinear(nn.Linear):
615
616
  is_training = self.training and not torch.is_inference_mode_enabled()
616
617
  if is_training:
617
618
  # below logic is only for training
618
- autocast_dtype = get_autocast_dtype(x)
619
+ autocast_dtype = get_autocast_dtype(x.device.type)
619
620
  if self.compute_dtype is not None and x.device.type == "xpu":
620
621
  x = x.to(self.compute_dtype) # solve GC issue for unlora module
621
622
  elif autocast_dtype is not None:
@@ -627,89 +628,50 @@ class LowBitLinear(nn.Linear):
627
628
  if self.optimize_lm_head:
628
629
  x = reshape_lm_head_input(x)
629
630
 
630
- # [batch, input_num, in_len]
631
- # input_num == token num for Transformer
632
- x_shape = x.shape
633
- # Output shape, e.g., [batch, input_num, out_len]
634
- new_shape = x_shape[:-1] + (self.out_len,)
631
+ # [batch, seq_len, in_len] -> [batch, seq_len, out_len]
632
+ new_shape = x.shape[:-1] + (self.out_len,)
633
+
635
634
  # Activation is empty tensor, e.g., [1, 0, 4096]
636
- if 0 in x_shape:
635
+ if 0 in x.shape:
637
636
  # return empty tensor with output shape, x.dtype and x.device
638
637
  return torch.empty(new_shape, dtype=x.dtype, device=x.device)
639
638
 
640
- x_2d = x.contiguous().view(-1, x_shape[-1])
641
-
642
639
  if self.act_order:
643
- x_2d = x_2d[:, self.g_idx_map]
644
- # x0 for weight
645
- x0 = self.weight.data
646
-
647
- if x0.device.type == "xpu":
648
- # GPU logic
649
- try:
650
- import xe_linear
651
- from ipex_llm.transformers.models.utils import use_xmx
652
- except ModuleNotFoundError:
653
- invalidInputError(False,
654
- "Please `pip install bigdl_core_xe` first.")
640
+ x = x[..., self.g_idx_map]
655
641
 
656
- if x_2d.is_contiguous() is False:
657
- x_2d = x_2d.contiguous()
642
+ x_2d = x.contiguous().view(-1, x.shape[-1])
658
643
 
659
- if len(x_shape) == 3:
660
- input_seq_size = x_shape[1]
661
- elif len(x_shape) < 3:
662
- input_seq_size = 1
663
-
664
- if is_training:
665
- # training path
666
- if x_2d.requires_grad:
667
- result = MatMulLowBit.apply(x_2d, self.weight, input_seq_size)
668
- else:
669
- if self.weight.qtype == NF4:
670
- result = xe_linear.forward_new(x_2d,
671
- self.weight.data.view(torch.uint8),
672
- self.weight.qtype,
673
- input_seq_size)
674
- else:
675
- result = xe_linear.forward_new(x_2d,
676
- self.weight.data,
677
- self.weight.qtype,
678
- input_seq_size)
644
+ if self.weight.device.type == "xpu":
645
+ if is_training and x_2d.requires_grad:
646
+ result = MatMulLowBit.apply(x_2d, self.weight, self.out_len)
679
647
  else:
680
- # inference path
681
- # current workaround to reduce first token latency of fp32 input
682
- # sometimes fp16 cause nan and training instability
683
- # disable the conversion when training
684
- # TODO: may modify the input length condition for empty cache.
685
648
  do_empty_cache = self.low_memory_mode and x_2d.shape[0] >= 1024
686
649
  if do_empty_cache:
687
650
  torch.xpu.empty_cache()
688
651
 
652
+ if self.qtype == NF4:
653
+ w = self.weight.data.view(torch.uint8)
654
+ else:
655
+ w = self.weight.data
656
+
689
657
  if use_batch_forward(x_2d, self.weight.qtype, self.out_len):
690
658
  import xe_batch
691
- result = xe_batch.batch_forward(x_2d, self.weight.data, self.weight.qtype)
692
- elif (
693
- self.conver_to_half
694
- and x_2d.shape[0] > 1
695
- and x_2d.dtype == torch.float32
696
- and not use_xmx(x_2d, self.weight.qtype)
697
- ):
659
+ result = xe_batch.batch_forward(x_2d, w, self.qtype)
660
+ elif not is_training and self.conver_to_half \
661
+ and x_2d.shape[0] > 1 and x_2d.dtype == torch.float:
662
+ import xe_linear
698
663
  x_2d = x_2d.half()
699
- result = xe_linear.forward_new(x_2d, self.weight.data,
700
- self.weight.qtype, input_seq_size)
664
+ result = xe_linear.forward_new(x_2d, w, self.qtype, self.out_len)
701
665
  result = result.to(x.dtype)
702
666
  else:
703
- if self.weight.qtype == NF4:
704
- result = xe_linear.forward_new(x_2d, self.weight.data.view(torch.uint8),
705
- self.weight.qtype, input_seq_size)
706
- else:
707
- result = xe_linear.forward_new(x_2d, self.weight.data,
708
- self.weight.qtype, input_seq_size)
667
+ import xe_linear
668
+ result = xe_linear.forward_new(x_2d, w, self.qtype, self.out_len)
709
669
 
710
670
  if do_empty_cache:
711
671
  torch.xpu.empty_cache()
672
+
712
673
  result = result.view(new_shape)
674
+
713
675
  if self.mp_group is not None:
714
676
  if get_use_vllm():
715
677
  result = self.mp_group.all_reduce(result)
@@ -718,6 +680,7 @@ class LowBitLinear(nn.Linear):
718
680
  dist.inference_all_reduce(result, group=self.mp_group)
719
681
  else:
720
682
  invalidInputError(False, "mp_group is not None, but no supported backend found")
683
+
721
684
  if self.bias is not None:
722
685
  result += self.bias
723
686
  else:
@@ -731,7 +694,7 @@ class LowBitLinear(nn.Linear):
731
694
  result = MatMulLowBitCPU.apply(x, self.weight)
732
695
  else:
733
696
  from ipex_llm.utils.isa_checker import is_server, is_spr
734
-
697
+ x0 = self.weight.data
735
698
  # convert if necessary, and compute a linear result
736
699
  if is_server() and (not is_spr()) and \
737
700
  self.qtype == SYM_INT4 and x_2d.shape[0] >= TORCH_LINEAR_THRESHOLD:
@@ -259,19 +259,6 @@ def mlp_fusion_check(x, qtype, training):
259
259
  return True
260
260
 
261
261
 
262
- def use_xmx(x: torch.Tensor, qtype: int):
263
- device = get_xpu_device_name(x.device)
264
- return (
265
- device in ["arc", "pvc"]
266
- and qtype in [SYM_INT4, SYM_INT8, FP8E4, FP8E5, WOQ_INT4]
267
- and (
268
- (device == "pvc" and 1 < x.size(0) <= 16)
269
- or
270
- (device != "pvc" and 1 < x.size(0) <= 64)
271
- )
272
- )
273
-
274
-
275
262
  def repeat_kv(hidden_states: torch.Tensor, n_rep: int) -> torch.Tensor:
276
263
  batch, num_key_value_heads, slen, head_dim = hidden_states.shape
277
264
  if n_rep == 1:
@@ -139,8 +139,10 @@ class _BaseAutoModelClass:
139
139
  mock_device = kwargs.pop('device', None) # For mock on CPU
140
140
  convert_model = kwargs.pop('convert_model', False)
141
141
  save_directory = kwargs.pop('save_directory', None)
142
- fuse_layers = kwargs.pop('fuse_layers', None)
143
- imatrix_file = kwargs.pop('imatrix_file', None)
142
+ fuse_layers = kwargs.pop("fuse_layers", None)
143
+ imatrix_file = kwargs.pop("imatrix_file", None)
144
+ keep_ir = kwargs.pop("keep_ir", False)
145
+ compile_blob = kwargs.pop("compile_blob", True)
144
146
 
145
147
  if imatrix_file is not None:
146
148
  imatrix_data = load_imatrix_data(imatrix_file)
@@ -236,6 +238,8 @@ class _BaseAutoModelClass:
236
238
  "fuse_layers": fuse_layers,
237
239
  "imatrix_data": imatrix_data,
238
240
  "skip_npu_logic": mock_device == "dummy",
241
+ "keep_ir": keep_ir,
242
+ "compile_blob": compile_blob,
239
243
  }
240
244
  # Dummy will skip npu related logic and save the quantized model
241
245
  if mock_device == "dummy":
@@ -280,9 +284,14 @@ class _BaseAutoModelClass:
280
284
  fuse_layers = kwargs.pop('fuse_layers', None)
281
285
  imatrix_data = kwargs.pop('imatrix_data', None)
282
286
  skip_npu_logic = kwargs.pop("skip_npu_logic", False)
287
+ keep_ir = kwargs.pop("keep_ir", False)
288
+ compile_blob = kwargs.pop("compile_blob", True)
289
+
283
290
  invalidInputError(save_directory is not None,
284
291
  "Please provide the path to save converted model "
285
292
  "through `save_directory`.")
293
+ invalidInputError(keep_ir or compile_blob,
294
+ "Please save blob or save IR either.")
286
295
 
287
296
  if hasattr(model, "llm"):
288
297
  llm = model.llm
@@ -323,7 +332,9 @@ class _BaseAutoModelClass:
323
332
  qtype=qtype,
324
333
  save_directory=save_directory,
325
334
  fuse_layers=fuse_layers,
326
- has_llm=hasattr(model, "llm")
335
+ has_llm=hasattr(model, "llm"),
336
+ keep_ir=keep_ir,
337
+ compile_blob=compile_blob
327
338
  )
328
339
  else:
329
340
  optimize_llm(
@@ -346,7 +357,9 @@ class _BaseAutoModelClass:
346
357
  qtype=qtype,
347
358
  convert_model=convert_model,
348
359
  save_directory=save_directory,
349
- fuse_layers=fuse_layers)
360
+ fuse_layers=fuse_layers,
361
+ keep_ir=keep_ir,
362
+ compile_blob=compile_blob)
350
363
  model.save_low_bit = types.MethodType(save_low_bit, model)
351
364
  model.save_low_bit(save_directory)
352
365
  logger.info(f"Converted model has already saved to {save_directory}.")
@@ -450,7 +450,9 @@ def optimize_llm_single_process(
450
450
  qtype: str,
451
451
  save_directory: str,
452
452
  fuse_layers: int=None,
453
- has_llm: bool=False
453
+ has_llm: bool=False,
454
+ keep_ir: bool=False,
455
+ compile_blob: bool=True
454
456
  ):
455
457
  from ipex_llm.transformers.npu_pipeline_model.convert_pipeline import convert_llm
456
458
  from .npu_llm_cpp import load_model_from_file
@@ -463,7 +465,9 @@ def optimize_llm_single_process(
463
465
  qtype=qtype,
464
466
  convert_model=True,
465
467
  save_directory=save_directory,
466
- fuse_layers=fuse_layers)
468
+ fuse_layers=fuse_layers,
469
+ keep_ir=keep_ir,
470
+ compile_blob=compile_blob)
467
471
  try:
468
472
  model_ptr = load_model_from_file(save_directory)
469
473
  model.kv_len = kv_len
@@ -196,7 +196,9 @@ def convert_llm(model: torch.nn.Module,
196
196
  qtype: str,
197
197
  convert_model: bool=False,
198
198
  save_directory: str=None,
199
- fuse_layers: int=None):
199
+ fuse_layers: int=None,
200
+ keep_ir: bool=False,
201
+ compile_blob: bool=True):
200
202
  # whether to set layernorm weight as const
201
203
  layernorm_const = os.environ.get("IPEX_LLM_NPU_LAYERNORM_CONST", "1") == "1"
202
204
  if group_size == 0:
@@ -220,7 +222,9 @@ def convert_llm(model: torch.nn.Module,
220
222
  n_splits_down_proj,
221
223
  group_size,
222
224
  save_directory,
223
- fuse_layers=fuse_layers)
225
+ fuse_layers=fuse_layers,
226
+ keep_ir=keep_ir,
227
+ compile_blob=compile_blob)
224
228
  return 0
225
229
  if model.config.model_type == "llama":
226
230
  with tempfile.TemporaryDirectory() as temp_dir:
@@ -428,7 +432,9 @@ def convert_llm_for_deploy(model: torch.nn.Module,
428
432
  n_splits_down_proj: int,
429
433
  group_size: int,
430
434
  save_directory: str=None,
431
- fuse_layers: int=None):
435
+ fuse_layers: int=None,
436
+ keep_ir: bool=False,
437
+ compile_blob: bool=True):
432
438
  if not os.path.exists(save_directory):
433
439
  os.mkdir(save_directory)
434
440
  weight_dir = os.path.join(save_directory, "model_weights")
@@ -479,14 +485,17 @@ def convert_llm_for_deploy(model: torch.nn.Module,
479
485
  # save fused_layers blobs of fused decoder layers
480
486
  convert_fused_qwen_layer(model, fused_layers, n_splits_linear, n_splits_down_proj,
481
487
  save_directory, weight_dir, transpose_value_cache, kv_len,
482
- group_size, layernorm_const, "decode")
488
+ group_size, layernorm_const, "decode",
489
+ keep_ir=keep_ir, compile_blob=compile_blob)
483
490
  # save blob of single prefill layer
484
491
  convert_qwen_layer(model, 0, n_splits_linear, n_splits_down_proj,
485
492
  save_directory, weight_dir, transpose_value_cache, max_prompt_len,
486
- group_size, layernorm_const, "prefill")
493
+ group_size, layernorm_const, "prefill",
494
+ keep_ir=keep_ir, compile_blob=compile_blob)
487
495
  # save blob of lmhead and bin of embedding
488
496
  convert_lm_head_and_embedding(model, save_directory, weight_dir,
489
- convert_model=True, group_size=group_size)
497
+ convert_model=True, group_size=group_size,
498
+ keep_ir=keep_ir, compile_blob=compile_blob)
490
499
  elif model.config.model_type == "llama":
491
500
  embedding_post = False
492
501
  cos_sin_input = False
@@ -540,15 +549,18 @@ def convert_llm_for_deploy(model: torch.nn.Module,
540
549
  convert_lm_head_and_embedding(model, n_splits_linear,
541
550
  save_directory, weight_dir,
542
551
  convert_model=True,
543
- max_prompt_len=max_prompt_len)
552
+ max_prompt_len=max_prompt_len,
553
+ keep_ir=keep_ir, compile_blob=compile_blob)
544
554
  # save fused_layers blobs of fused decoder layers
545
555
  convert_fused_llama_layer(model, fused_layers, n_splits_linear, n_splits_down_proj,
546
556
  save_directory, weight_dir, transpose_value_cache, kv_len,
547
- group_size, layernorm_const, "decode")
557
+ group_size, layernorm_const, "decode",
558
+ keep_ir=keep_ir, compile_blob=compile_blob)
548
559
  # save blob of single prefill layer
549
560
  convert_llama_layer(model, 0, n_splits_linear, n_splits_down_proj,
550
561
  save_directory, weight_dir, transpose_value_cache, max_prompt_len,
551
- group_size, layernorm_const, "prefill")
562
+ group_size, layernorm_const, "prefill",
563
+ keep_ir=keep_ir, compile_blob=compile_blob)
552
564
  elif model.config.model_type == "minicpm":
553
565
  if group_size == 0:
554
566
  fused_layers = 4 if fuse_layers is None else fuse_layers
@@ -577,16 +589,19 @@ def convert_llm_for_deploy(model: torch.nn.Module,
577
589
  # save fused_layers blobs of fused decoder layers
578
590
  convert_fused_minicpm_layer(model, fused_layers, n_splits_linear, n_splits_down_proj,
579
591
  save_directory, weight_dir, transpose_value_cache, kv_len,
580
- group_size, layernorm_const, "decode")
592
+ group_size, layernorm_const, "decode",
593
+ keep_ir=keep_ir, compile_blob=compile_blob)
581
594
  # save blob of single prefill layer
582
595
  convert_minicpm_layer(model, 0, n_splits_linear, n_splits_down_proj,
583
596
  save_directory, weight_dir, transpose_value_cache, max_prompt_len,
584
- group_size, layernorm_const, "prefill")
597
+ group_size, layernorm_const, "prefill",
598
+ keep_ir=keep_ir, compile_blob=compile_blob)
585
599
  # save blob of lmhead and bin of embedding and embedding_post
586
600
  convert_lm_head_and_embedding(model, n_splits_linear,
587
601
  save_directory, weight_dir,
588
602
  convert_model=True,
589
- max_prompt_len=max_prompt_len)
603
+ max_prompt_len=max_prompt_len,
604
+ keep_ir=keep_ir, compile_blob=compile_blob)
590
605
 
591
606
  model.config.update(update_dict)
592
607
  model.config.save_pretrained(save_directory)
@@ -123,7 +123,8 @@ class Llama32PostEmbedding(NNFactory):
123
123
 
124
124
 
125
125
  def convert_lm_head_and_embedding(model, n_splits_linear, temp_dir, weight_dir,
126
- convert_model=False, max_prompt_len=1):
126
+ convert_model=False, max_prompt_len=1,
127
+ keep_ir=False, compile_blob=True):
127
128
  num_heads = model.model.layers[0].self_attn.num_heads
128
129
  num_key_value_heads = model.model.layers[0].self_attn.num_key_value_heads
129
130
  head_dim = model.model.layers[0].self_attn.head_dim
@@ -175,7 +176,8 @@ def convert_lm_head_and_embedding(model, n_splits_linear, temp_dir, weight_dir,
175
176
  asym=asym
176
177
  )
177
178
  last_blob_path = update_names_of_IR_and_export_blob(new_lm_head, "lm_head", temp_dir,
178
- True, False)
179
+ keep_ir=keep_ir, compile_blob=compile_blob)
180
+ os.remove(os.path.join(temp_dir, "lm_head.bin"))
179
181
 
180
182
  # save weights bins files
181
183
  if n_splits_linear == 1:
@@ -211,7 +213,9 @@ def convert_lm_head_and_embedding(model, n_splits_linear, temp_dir, weight_dir,
211
213
  first_blob_path = None
212
214
  else:
213
215
  first_blob_path = update_names_of_IR_and_export_blob(new_embedding, "embedding",
214
- temp_dir, True, False)
216
+ temp_dir, keep_ir=keep_ir,
217
+ compile_blob=compile_blob)
218
+ os.remove(os.path.join(temp_dir, "embedding.bin"))
215
219
  else:
216
220
  # llama-3.2-3B & llama-3.2-1B
217
221
  embedding_layer = model.model.embed_tokens
@@ -235,22 +239,28 @@ def convert_lm_head_and_embedding(model, n_splits_linear, temp_dir, weight_dir,
235
239
  attention_scaling=attention_scaling,
236
240
  input_len=1)
237
241
  update_names_of_IR_and_export_blob(embedding_post, "embedding_post",
238
- temp_dir, True, False)
242
+ temp_dir, keep_ir=keep_ir, compile_blob=compile_blob)
239
243
  embedding_post_prefill = Llama32PostEmbedding(inv_freq=inv_freq,
240
244
  attention_scaling=attention_scaling,
241
245
  input_len=max_prompt_len)
242
246
  update_names_of_IR_and_export_blob(embedding_post_prefill,
243
247
  "embedding_post_prefill",
244
- temp_dir, True, False)
248
+ temp_dir, keep_ir=keep_ir, compile_blob=compile_blob)
249
+ os.remove(os.path.join(temp_dir, "embedding_post.bin"))
250
+ os.remove(os.path.join(temp_dir, "embedding_post_prefill.bin"))
245
251
  else:
246
252
  first_blob_path = update_names_of_IR_and_export_blob(new_embedding, "embedding",
247
- temp_dir)
253
+ temp_dir, keep_ir=keep_ir,
254
+ compile_blob=compile_blob)
255
+ os.remove(os.path.join(temp_dir, "embedding.bin"))
256
+
248
257
  return first_blob_path, last_blob_path
249
258
 
250
259
 
251
260
  def convert_llama_layer(model, layer_idx, n_splits_linear, n_splits_down_proj,
252
261
  temp_dir, weight_dir, transpose_value_cache, kv_len, group_size,
253
- layernorm_const, mode="decode"):
262
+ layernorm_const, mode="decode",
263
+ keep_ir=False, compile_blob=True):
254
264
  num_heads = model.model.layers[0].self_attn.num_heads
255
265
  num_key_value_heads = model.model.layers[0].self_attn.num_key_value_heads
256
266
  head_dim = model.model.layers[0].self_attn.head_dim
@@ -317,8 +327,9 @@ def convert_llama_layer(model, layer_idx, n_splits_linear, n_splits_down_proj,
317
327
  rest_blob_path = update_names_of_IR_and_export_blob(single_decoder,
318
328
  decoder_name,
319
329
  temp_dir,
320
- True, False,
330
+ keep_ir=keep_ir, compile_blob=compile_blob,
321
331
  npu_dpu_groups=npu_dpu_groups)
332
+ os.remove(os.path.join(temp_dir, decoder_name + ".bin"))
322
333
 
323
334
  if mode == "decode":
324
335
  if hasattr(curr_layer.self_attn.rotary_emb, "cos_cached"):
@@ -364,7 +375,8 @@ def convert_llama_layer(model, layer_idx, n_splits_linear, n_splits_down_proj,
364
375
 
365
376
  def convert_fused_llama_layer(model, fused_layers, n_splits_linear, n_splits_down_proj,
366
377
  save_dir, weight_dir, transpose_value_cache, kv_len, group_size,
367
- layernorm_const, mode="decode"):
378
+ layernorm_const, mode="decode",
379
+ keep_ir=False, compile_blob=True):
368
380
  num_heads = model.model.layers[0].self_attn.num_heads
369
381
  num_key_value_heads = model.model.layers[0].self_attn.num_key_value_heads
370
382
  head_dim = model.model.layers[0].self_attn.head_dim
@@ -457,6 +469,7 @@ def convert_fused_llama_layer(model, fused_layers, n_splits_linear, n_splits_dow
457
469
  update_names_of_IR_and_export_blob(fused_decoder,
458
470
  f"decoder_layer_{i}",
459
471
  save_dir,
460
- compile_blob=True,
461
- keep_ir=False)
472
+ keep_ir=keep_ir,
473
+ compile_blob=compile_blob)
474
+ os.remove(os.path.join(save_dir, f"decoder_layer_{i}" + ".bin"))
462
475
  return 0
@@ -162,7 +162,8 @@ class MiniCPMLMHead(LLMBaseNNFactory):
162
162
 
163
163
 
164
164
  def convert_lm_head_and_embedding(model, n_splits_linear, temp_dir, weight_dir,
165
- convert_model=False, max_prompt_len=1):
165
+ convert_model=False, max_prompt_len=1,
166
+ keep_ir=False, compile_blob=True):
166
167
  num_heads = model.model.layers[0].self_attn.num_heads
167
168
  num_key_value_heads = model.model.layers[0].self_attn.num_key_value_heads
168
169
  head_dim = model.model.layers[0].self_attn.head_dim
@@ -230,7 +231,8 @@ def convert_lm_head_and_embedding(model, n_splits_linear, temp_dir, weight_dir,
230
231
  asym=asym
231
232
  )
232
233
  last_blob_path = update_names_of_IR_and_export_blob(new_lm_head, "lm_head", temp_dir,
233
- True, True)
234
+ keep_ir=keep_ir, compile_blob=compile_blob)
235
+ os.remove(os.path.join(temp_dir, "lm_head.bin"))
234
236
 
235
237
  # save weights bins files
236
238
  if n_splits_linear == 1:
@@ -280,22 +282,27 @@ def convert_lm_head_and_embedding(model, n_splits_linear, temp_dir, weight_dir,
280
282
  dtype=np.float16,
281
283
  scale_emb=model.config.scale_emb)
282
284
  update_names_of_IR_and_export_blob(embedding_post, "embedding_post",
283
- temp_dir, True, False)
285
+ temp_dir, keep_ir=keep_ir, compile_blob=compile_blob)
284
286
  embedding_post_prefill = MiniCPMPostEmbedding(max_prompt_len, model.config.hidden_size,
285
287
  dtype=np.float16,
286
288
  scale_emb=model.config.scale_emb)
287
289
  update_names_of_IR_and_export_blob(embedding_post_prefill,
288
290
  "embedding_post_prefill",
289
- temp_dir, True, False)
291
+ temp_dir, keep_ir=keep_ir, compile_blob=compile_blob)
292
+ os.remove(os.path.join(temp_dir, "embedding_post.bin"))
293
+ os.remove(os.path.join(temp_dir, "embedding_post_prefill.bin"))
290
294
  else:
291
295
  first_blob_path = update_names_of_IR_and_export_blob(new_embedding, "embedding",
292
- temp_dir, True, False)
296
+ temp_dir, keep_ir=keep_ir,
297
+ compile_blob=compile_blob)
298
+ os.remove(os.path.join(temp_dir, "embedding.bin"))
293
299
  return first_blob_path, last_blob_path
294
300
 
295
301
 
296
302
  def convert_minicpm_layer(model, layer_idx, n_splits_linear, n_splits_down_proj,
297
303
  temp_dir, weight_dir, transpose_value_cache, kv_len, group_size,
298
- layernorm_const, mode="decode"):
304
+ layernorm_const, mode="decode",
305
+ keep_ir=False, compile_blob=True):
299
306
  num_heads = model.model.layers[0].self_attn.num_heads
300
307
  num_key_value_heads = model.model.layers[0].self_attn.num_key_value_heads
301
308
  head_dim = model.model.layers[0].self_attn.head_dim
@@ -353,7 +360,8 @@ def convert_minicpm_layer(model, layer_idx, n_splits_linear, n_splits_down_proj,
353
360
  rest_blob_path = update_names_of_IR_and_export_blob(single_decoder,
354
361
  decoder_name,
355
362
  temp_dir,
356
- True, True)
363
+ keep_ir=keep_ir, compile_blob=compile_blob)
364
+ os.remove(os.path.join(temp_dir, decoder_name + ".bin"))
357
365
 
358
366
  if mode == "decode":
359
367
  if layernorm_const:
@@ -386,7 +394,8 @@ def convert_minicpm_layer(model, layer_idx, n_splits_linear, n_splits_down_proj,
386
394
 
387
395
  def convert_fused_minicpm_layer(model, fused_layers, n_splits_linear, n_splits_down_proj,
388
396
  save_dir, weight_dir, transpose_value_cache, kv_len, group_size,
389
- layernorm_const, mode="decode"):
397
+ layernorm_const, mode="decode",
398
+ keep_ir=False, compile_blob=True):
390
399
  num_heads = model.model.layers[0].self_attn.num_heads
391
400
  num_key_value_heads = model.model.layers[0].self_attn.num_key_value_heads
392
401
  head_dim = model.model.layers[0].self_attn.head_dim
@@ -477,6 +486,6 @@ def convert_fused_minicpm_layer(model, fused_layers, n_splits_linear, n_splits_d
477
486
  update_names_of_IR_and_export_blob(fused_decoder,
478
487
  f"decoder_layer_{i}",
479
488
  save_dir,
480
- compile_blob=True,
481
- keep_ir=False)
489
+ keep_ir=keep_ir, compile_blob=compile_blob)
490
+ os.remove(os.path.join(save_dir, f"decoder_layer_{i}" + ".bin"))
482
491
  return 0
@@ -24,7 +24,8 @@ from ipex_llm.transformers.npu_models.lm_head import SlicedLMHead
24
24
 
25
25
 
26
26
  def convert_lm_head_and_embedding(model, temp_dir, weight_dir,
27
- convert_model=False, group_size=0):
27
+ convert_model=False, group_size=0,
28
+ keep_ir=False, compile_blob=True):
28
29
  num_heads = model.model.layers[0].self_attn.num_heads
29
30
  head_dim = model.model.layers[0].self_attn.head_dim
30
31
  rms_norm_eps = model.config.rms_norm_eps
@@ -84,7 +85,9 @@ def convert_lm_head_and_embedding(model, temp_dir, weight_dir,
84
85
  )
85
86
 
86
87
  last_blob_path = update_names_of_IR_and_export_blob(new_lm_head, f"lm_head",
87
- temp_dir, True, False)
88
+ temp_dir, keep_ir=keep_ir,
89
+ compile_blob=compile_blob)
90
+ os.remove(os.path.join(temp_dir, "lm_head.bin"))
88
91
 
89
92
  # save weights bins files
90
93
  if not isinstance(lm_head, SlicedLMHead):
@@ -119,13 +122,16 @@ def convert_lm_head_and_embedding(model, temp_dir, weight_dir,
119
122
  first_blob_path = True
120
123
  else:
121
124
  first_blob_path = update_names_of_IR_and_export_blob(new_embedding, f"embedding",
122
- temp_dir, True, keep_ir=True)
125
+ temp_dir, keep_ir=keep_ir,
126
+ compile_blob=compile_blob)
127
+ os.remove(os.path.join(temp_dir, "embedding.bin"))
123
128
  return first_blob_path, last_blob_path
124
129
 
125
130
 
126
131
  def convert_qwen_layer(model, layer_idx, n_splits_linear, n_splits_down_proj,
127
132
  temp_dir, weight_dir, transpose_value_cache, kv_len, group_size,
128
- layernorm_const, mode="decode"):
133
+ layernorm_const, mode="decode",
134
+ keep_ir=False, compile_blob=True):
129
135
  num_heads = model.model.layers[0].self_attn.num_heads
130
136
  num_key_value_heads = model.model.layers[0].self_attn.num_key_value_heads
131
137
  head_dim = model.model.layers[0].self_attn.head_dim
@@ -183,8 +189,10 @@ def convert_qwen_layer(model, layer_idx, n_splits_linear, n_splits_down_proj,
183
189
  )
184
190
  rest_blob_path = update_names_of_IR_and_export_blob(single_decoder,
185
191
  decoder_name,
186
- temp_dir, True, False,
192
+ temp_dir, keep_ir=keep_ir,
193
+ compile_blob=compile_blob,
187
194
  npu_dpu_groups=npu_dpu_groups)
195
+ os.remove(os.path.join(temp_dir, decoder_name + ".bin"))
188
196
 
189
197
  # 0, 1, 2 are input_embed/attention_mask/position_id
190
198
  if mode == "decode":
@@ -226,7 +234,8 @@ def convert_qwen_layer(model, layer_idx, n_splits_linear, n_splits_down_proj,
226
234
 
227
235
  def convert_fused_qwen_layer(model, fused_layers, n_splits_linear, n_splits_down_proj,
228
236
  save_dir, weight_dir, transpose_value_cache, kv_len, group_size,
229
- layernorm_const, mode="decode"):
237
+ layernorm_const, mode="decode",
238
+ keep_ir=False, compile_blob=True):
230
239
  num_heads = model.model.layers[0].self_attn.num_heads
231
240
  num_key_value_heads = model.model.layers[0].self_attn.num_key_value_heads
232
241
  head_dim = model.model.layers[0].self_attn.head_dim
@@ -330,6 +339,6 @@ def convert_fused_qwen_layer(model, fused_layers, n_splits_linear, n_splits_down
330
339
  update_names_of_IR_and_export_blob(fused_decoder,
331
340
  f"decoder_layer_{i}",
332
341
  save_dir,
333
- compile_blob=True,
334
- keep_ir=False)
342
+ keep_ir=keep_ir, compile_blob=compile_blob)
343
+ os.remove(os.path.join(save_dir, f"decoder_layer_{i}" + ".bin"))
335
344
  return 0
@@ -109,7 +109,7 @@ class LoraLowBitLinear(Module, LoraLayer):
109
109
  self.qa_pool = torch.nn.Identity()
110
110
 
111
111
  def forward(self, x: torch.Tensor):
112
- autocast_dtype = get_autocast_dtype(x)
112
+ autocast_dtype = get_autocast_dtype(x.device.type)
113
113
  if x.device.type == "xpu":
114
114
  # force to use bf16 on gpu
115
115
  x = x.to(torch.bfloat16)
@@ -177,7 +177,7 @@ class LoraBF16Linear(Module, LoraLayer):
177
177
  self.is_target_conv_1d_layer = is_target_conv_1d_layer
178
178
 
179
179
  def forward(self, x: torch.Tensor):
180
- autocast_dtype = get_autocast_dtype(x)
180
+ autocast_dtype = get_autocast_dtype(x.device.type)
181
181
  if x.device.type == "xpu":
182
182
  # force to use bf16 on gpu
183
183
  x = x.to(torch.bfloat16)
@@ -138,26 +138,39 @@ def fix_key(key):
138
138
  return key
139
139
 
140
140
 
141
- def get_autocast_dtype(x):
141
+ def is_autocast_enabled(device_type: str):
142
142
  if torch.__version__ >= '2.3':
143
- if torch.is_autocast_enabled(x.device.type):
144
- return torch.get_autocast_dtype(x.device.type)
143
+ return torch.is_autocast_enabled(device_type)
144
+ else:
145
+ if device_type == "xpu":
146
+ return torch.xpu.is_autocast_xpu_enabled()
147
+ elif device_type == "cpu":
148
+ return torch.is_autocast_cpu_enabled()
149
+ else:
150
+ invalidInputError(False,
151
+ f"Device type {device_type} is not supported.")
152
+
153
+
154
+ def get_autocast_dtype(device_type: str):
155
+ if torch.__version__ >= '2.3':
156
+ if torch.is_autocast_enabled(device_type):
157
+ return torch.get_autocast_dtype(device_type)
145
158
  else:
146
159
  return None
147
160
  else:
148
- if x.device.type == "xpu":
161
+ if device_type == "xpu":
149
162
  if torch.xpu.is_autocast_xpu_enabled():
150
163
  return torch.xpu.get_autocast_xpu_dtype()
151
164
  else:
152
165
  return None
153
- elif x.device.type == "cpu":
166
+ elif device_type == "cpu":
154
167
  if torch.is_autocast_cpu_enabled():
155
168
  return torch.get_autocast_cpu_dtype()
156
169
  else:
157
170
  return None
158
171
  else:
159
172
  invalidInputError(False,
160
- f"Device {x.device} is not supported.")
173
+ f"Device type {device_type} is not supported.")
161
174
 
162
175
 
163
176
  def get_xpu_device_name(device: torch.device):
@@ -107,6 +107,8 @@ except ModuleNotFoundError:
107
107
  np = None # type: ignore[assignment]
108
108
  from typing import Any
109
109
 
110
+ from ipex_llm.transformers.utils import is_autocast_enabled, get_autocast_dtype
111
+
110
112
 
111
113
  def _cast(value, dtype):
112
114
  if isinstance(value, torch.Tensor):
@@ -155,12 +157,12 @@ def custom_fwd(fwd=None, *, cast_inputs=None):
155
157
 
156
158
  @functools.wraps(fwd)
157
159
  def decorate_fwd(*args, **kwargs):
158
- args[0]._dtype = torch.xpu.get_autocast_xpu_dtype()
160
+ args[0]._dtype = get_autocast_dtype("xpu")
159
161
  if cast_inputs is None:
160
- args[0]._fwd_used_autocast = torch.xpu.is_autocast_xpu_enabled()
162
+ args[0]._fwd_used_autocast = is_autocast_enabled("xpu")
161
163
  return fwd(*args, **kwargs)
162
164
  else:
163
- autocast_context = torch.xpu.is_autocast_xpu_enabled()
165
+ autocast_context = is_autocast_enabled("xpu")
164
166
  args[0]._fwd_used_autocast = False
165
167
  if autocast_context:
166
168
  with torch.xpu.autocast(enabled=False):
@@ -184,7 +186,7 @@ def custom_bwd(bwd):
184
186
 
185
187
  @functools.wraps(bwd)
186
188
  def decorate_bwd(*args, **kwargs):
187
- with torch.xpu.autocast(enabled=args[0]._fwd_used_autocast, dtype=args[0]._dtype):
189
+ with torch.autocast("xpu", enabled=args[0]._fwd_used_autocast, dtype=args[0]._dtype):
188
190
  return bwd(*args, **kwargs)
189
191
 
190
192
  return decorate_bwd
@@ -20,9 +20,10 @@ import xe_batch
20
20
  import xe_addons
21
21
 
22
22
 
23
- # @torch.library.register_fake("ipex_llm::forward_new")
24
- # def _(x, weight, qtype, input_size):
25
- # return ???
23
+ @torch.library.register_fake("ipex_llm::forward_new")
24
+ def _(x, weight, qtype, output_size):
25
+ return torch.empty([x.size(0), output_size],
26
+ dtype=x.dtype, device=x.device)
26
27
 
27
28
 
28
29
  # @torch.library.register_fake("ipex_llm::dequant")
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.1
2
2
  Name: ipex-llm
3
- Version: 2.2.0b20250204
3
+ Version: 2.2.0b20250204.post0
4
4
  Summary: Large Language Model Develop Toolkit
5
5
  Home-page: https://github.com/intel-analytics/ipex-llm
6
6
  Author: BigDL Authors
@@ -27,19 +27,12 @@ Requires-Dist: intel-openmp ; (platform_machine == "x86_64" or platform_machine
27
27
  Requires-Dist: torch ==2.1.2+cpu ; (platform_system == "Linux") and extra == 'all'
28
28
  Requires-Dist: torch ==2.1.2 ; (platform_system == "Windows") and extra == 'all'
29
29
  Provides-Extra: cpp
30
- Requires-Dist: bigdl-core-cpp ==2.6.0b20250204 ; extra == 'cpp'
30
+ Requires-Dist: bigdl-core-cpp ==2.6.0b20250204.post0 ; extra == 'cpp'
31
+ Requires-Dist: onednn-devel ==2025.0.1 ; extra == 'cpp'
32
+ Requires-Dist: onednn ==2025.0.1 ; extra == 'cpp'
33
+ Requires-Dist: dpcpp-cpp-rt ==2025.0.2 ; extra == 'cpp'
34
+ Requires-Dist: mkl-dpcpp ==2025.0.1 ; extra == 'cpp'
31
35
  Requires-Dist: setuptools ; extra == 'cpp'
32
- Provides-Extra: cpp-arl
33
- Requires-Dist: bigdl-core-cpp ==2.6.0b20250204 ; extra == 'cpp-arl'
34
- Requires-Dist: setuptools ; extra == 'cpp-arl'
35
- Requires-Dist: onednn-devel ==2024.1.1 ; (platform_system == "Windows") and extra == 'cpp-arl'
36
- Requires-Dist: onednn ==2024.1.1 ; (platform_system == "Windows") and extra == 'cpp-arl'
37
- Requires-Dist: dpcpp-cpp-rt ==2024.2.1 ; (platform_system == "Windows") and extra == 'cpp-arl'
38
- Requires-Dist: mkl-dpcpp ==2024.2.1 ; (platform_system == "Windows") and extra == 'cpp-arl'
39
- Requires-Dist: onednn-devel ==2024.2.1 ; (platform_system == "Windows") and extra == 'cpp'
40
- Requires-Dist: onednn ==2024.2.1 ; (platform_system == "Windows") and extra == 'cpp'
41
- Requires-Dist: dpcpp-cpp-rt ==2024.2.1 ; (platform_system == "Windows") and extra == 'cpp'
42
- Requires-Dist: mkl-dpcpp ==2024.2.1 ; (platform_system == "Windows") and extra == 'cpp'
43
36
  Provides-Extra: llama-index
44
37
  Requires-Dist: py-cpuinfo ; extra == 'llama-index'
45
38
  Requires-Dist: protobuf ; extra == 'llama-index'
@@ -67,7 +60,7 @@ Requires-Dist: transformers ==4.40.0 ; extra == 'npu'
67
60
  Requires-Dist: intel-openmp ; (platform_machine == "x86_64" or platform_machine == "AMD64") and extra == 'npu'
68
61
  Requires-Dist: torch ==2.1.2+cpu ; (platform_system == "Linux") and extra == 'npu'
69
62
  Requires-Dist: torch ==2.1.2 ; (platform_system == "Windows") and extra == 'npu'
70
- Requires-Dist: bigdl-core-npu ==2.6.0b20250204 ; (platform_system == "Windows") and extra == 'npu'
63
+ Requires-Dist: bigdl-core-npu ==2.6.0b20250204.post0 ; (platform_system == "Windows") and extra == 'npu'
71
64
  Provides-Extra: serving
72
65
  Requires-Dist: py-cpuinfo ; extra == 'serving'
73
66
  Requires-Dist: fschat[model_worker,webui] ==0.2.36 ; extra == 'serving'
@@ -87,9 +80,9 @@ Requires-Dist: setuptools <70.0.0 ; extra == 'xpu'
87
80
  Requires-Dist: torch ==2.1.0a0 ; extra == 'xpu'
88
81
  Requires-Dist: torchvision ==0.16.0a0 ; extra == 'xpu'
89
82
  Requires-Dist: intel-extension-for-pytorch ==2.1.10+xpu ; extra == 'xpu'
90
- Requires-Dist: bigdl-core-xe-21 ==2.6.0b20250204 ; extra == 'xpu'
91
- Requires-Dist: bigdl-core-xe-batch-21 ==2.6.0b20250204 ; extra == 'xpu'
92
- Requires-Dist: bigdl-core-xe-addons-21 ==2.6.0b20250204 ; extra == 'xpu'
83
+ Requires-Dist: bigdl-core-xe-21 ==2.6.0b20250204.post0 ; extra == 'xpu'
84
+ Requires-Dist: bigdl-core-xe-batch-21 ==2.6.0b20250204.post0 ; extra == 'xpu'
85
+ Requires-Dist: bigdl-core-xe-addons-21 ==2.6.0b20250204.post0 ; extra == 'xpu'
93
86
  Provides-Extra: xpu-2-1
94
87
  Requires-Dist: py-cpuinfo ; extra == 'xpu-2-1'
95
88
  Requires-Dist: protobuf ; extra == 'xpu-2-1'
@@ -104,9 +97,9 @@ Requires-Dist: setuptools <70.0.0 ; extra == 'xpu-2-1'
104
97
  Requires-Dist: torch ==2.1.0a0 ; extra == 'xpu-2-1'
105
98
  Requires-Dist: torchvision ==0.16.0a0 ; extra == 'xpu-2-1'
106
99
  Requires-Dist: intel-extension-for-pytorch ==2.1.10+xpu ; extra == 'xpu-2-1'
107
- Requires-Dist: bigdl-core-xe-21 ==2.6.0b20250204 ; extra == 'xpu-2-1'
108
- Requires-Dist: bigdl-core-xe-batch-21 ==2.6.0b20250204 ; extra == 'xpu-2-1'
109
- Requires-Dist: bigdl-core-xe-addons-21 ==2.6.0b20250204 ; extra == 'xpu-2-1'
100
+ Requires-Dist: bigdl-core-xe-21 ==2.6.0b20250204.post0 ; extra == 'xpu-2-1'
101
+ Requires-Dist: bigdl-core-xe-batch-21 ==2.6.0b20250204.post0 ; extra == 'xpu-2-1'
102
+ Requires-Dist: bigdl-core-xe-addons-21 ==2.6.0b20250204.post0 ; extra == 'xpu-2-1'
110
103
  Requires-Dist: intel-openmp ; (platform_machine == "x86_64" or platform_machine == "AMD64") and extra == 'xpu-2-1'
111
104
  Requires-Dist: dpcpp-cpp-rt ==2024.0.2 ; (platform_system == "Windows") and extra == 'xpu-2-1'
112
105
  Requires-Dist: mkl-dpcpp ==2024.0.0 ; (platform_system == "Windows") and extra == 'xpu-2-1'
@@ -124,7 +117,7 @@ Requires-Dist: setuptools ; extra == 'xpu-2-6'
124
117
  Requires-Dist: torch ==2.6.0+xpu ; extra == 'xpu-2-6'
125
118
  Requires-Dist: torchvision ==0.21.0+xpu ; extra == 'xpu-2-6'
126
119
  Requires-Dist: torchaudio ==2.6.0+xpu ; extra == 'xpu-2-6'
127
- Requires-Dist: bigdl-core-xe-all ==2.6.0b20250204 ; extra == 'xpu-2-6'
120
+ Requires-Dist: bigdl-core-xe-all ==2.6.0b20250204.post0 ; extra == 'xpu-2-6'
128
121
  Requires-Dist: onednn-devel ==2025.0.1 ; extra == 'xpu-2-6'
129
122
  Requires-Dist: onednn ==2025.0.1 ; extra == 'xpu-2-6'
130
123
  Requires-Dist: dpcpp-cpp-rt ==2025.0.2 ; extra == 'xpu-2-6'
@@ -140,9 +133,9 @@ Requires-Dist: tokenizers ==0.15.2 ; extra == 'xpu-arc'
140
133
  Requires-Dist: accelerate ==0.23.0 ; extra == 'xpu-arc'
141
134
  Requires-Dist: tabulate ; extra == 'xpu-arc'
142
135
  Requires-Dist: setuptools ; extra == 'xpu-arc'
143
- Requires-Dist: bigdl-core-xe-23 ==2.6.0b20250204 ; extra == 'xpu-arc'
144
- Requires-Dist: bigdl-core-xe-batch-23 ==2.6.0b20250204 ; extra == 'xpu-arc'
145
- Requires-Dist: bigdl-core-xe-addons-23 ==2.6.0b20250204 ; extra == 'xpu-arc'
136
+ Requires-Dist: bigdl-core-xe-23 ==2.6.0b20250204.post0 ; extra == 'xpu-arc'
137
+ Requires-Dist: bigdl-core-xe-batch-23 ==2.6.0b20250204.post0 ; extra == 'xpu-arc'
138
+ Requires-Dist: bigdl-core-xe-addons-23 ==2.6.0b20250204.post0 ; extra == 'xpu-arc'
146
139
  Requires-Dist: intel-openmp ; (platform_machine == "x86_64" or platform_machine == "AMD64") and extra == 'xpu-arc'
147
140
  Requires-Dist: torch ==2.3.1+cxx11.abi ; (platform_system == "Linux") and extra == 'xpu-arc'
148
141
  Requires-Dist: torchvision ==0.18.1+cxx11.abi ; (platform_system == "Linux") and extra == 'xpu-arc'
@@ -163,9 +156,9 @@ Requires-Dist: tokenizers ==0.15.2 ; extra == 'xpu-arl'
163
156
  Requires-Dist: accelerate ==0.23.0 ; extra == 'xpu-arl'
164
157
  Requires-Dist: tabulate ; extra == 'xpu-arl'
165
158
  Requires-Dist: setuptools ; extra == 'xpu-arl'
166
- Requires-Dist: bigdl-core-xe-23 ==2.6.0b20250204 ; extra == 'xpu-arl'
167
- Requires-Dist: bigdl-core-xe-batch-23 ==2.6.0b20250204 ; extra == 'xpu-arl'
168
- Requires-Dist: bigdl-core-xe-addons-23 ==2.6.0b20250204 ; extra == 'xpu-arl'
159
+ Requires-Dist: bigdl-core-xe-23 ==2.6.0b20250204.post0 ; extra == 'xpu-arl'
160
+ Requires-Dist: bigdl-core-xe-batch-23 ==2.6.0b20250204.post0 ; extra == 'xpu-arl'
161
+ Requires-Dist: bigdl-core-xe-addons-23 ==2.6.0b20250204.post0 ; extra == 'xpu-arl'
169
162
  Requires-Dist: intel-openmp ; (platform_machine == "x86_64" or platform_machine == "AMD64") and extra == 'xpu-arl'
170
163
  Requires-Dist: torch ==2.3.1+cxx11.abi ; (platform_system == "Linux") and extra == 'xpu-arl'
171
164
  Requires-Dist: torchvision ==0.18.1+cxx11.abi ; (platform_system == "Linux") and extra == 'xpu-arl'
@@ -186,9 +179,9 @@ Requires-Dist: tokenizers ==0.15.2 ; extra == 'xpu-lnl'
186
179
  Requires-Dist: accelerate ==0.23.0 ; extra == 'xpu-lnl'
187
180
  Requires-Dist: tabulate ; extra == 'xpu-lnl'
188
181
  Requires-Dist: setuptools ; extra == 'xpu-lnl'
189
- Requires-Dist: bigdl-core-xe-23 ==2.6.0b20250204 ; extra == 'xpu-lnl'
190
- Requires-Dist: bigdl-core-xe-batch-23 ==2.6.0b20250204 ; extra == 'xpu-lnl'
191
- Requires-Dist: bigdl-core-xe-addons-23 ==2.6.0b20250204 ; extra == 'xpu-lnl'
182
+ Requires-Dist: bigdl-core-xe-23 ==2.6.0b20250204.post0 ; extra == 'xpu-lnl'
183
+ Requires-Dist: bigdl-core-xe-batch-23 ==2.6.0b20250204.post0 ; extra == 'xpu-lnl'
184
+ Requires-Dist: bigdl-core-xe-addons-23 ==2.6.0b20250204.post0 ; extra == 'xpu-lnl'
192
185
  Requires-Dist: intel-openmp ; (platform_machine == "x86_64" or platform_machine == "AMD64") and extra == 'xpu-lnl'
193
186
  Requires-Dist: torch ==2.3.1+cxx11.abi ; (platform_system == "Linux") and extra == 'xpu-lnl'
194
187
  Requires-Dist: torchvision ==0.18.1+cxx11.abi ; (platform_system == "Linux") and extra == 'xpu-lnl'
@@ -41,35 +41,35 @@ ipex_llm/langchain/llms/transformerspipelinellm.py,sha256=vm522YPPwWxxAPVvQBtxRf
41
41
  ipex_llm/langchain/vllm/__init__.py,sha256=T-EbRT6GJ_8RCu-iLmSzcftOimXSPQf2d5X72AUAy2Y,874
42
42
  ipex_llm/langchain/vllm/vllm.py,sha256=6dxc-ZISZQrJilEa_HA827l75Dv9rcHpY_G6FdJ8BVs,7793
43
43
  ipex_llm/libs/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
44
- ipex_llm/libs/bloom-api.dll,sha256=Liqh-LQ7dD6SJTbEYBvA359vCwptXW1ZkIDRBqV6HrQ,36352
45
- ipex_llm/libs/bloom.dll,sha256=z8mZQPHnJaTprtO4Li54v4AUUocHrhswbvABd_L6J1Y,507904
46
- ipex_llm/libs/gptneox-api.dll,sha256=hiet2igvNHeTG3cxpTx3YanuiBPbKQqu6XuahO8raBU,24576
47
- ipex_llm/libs/gptneox.dll,sha256=BMUCjypqgFHj2NYv4WtTSN5QmLLfwCVwdQjUEBbmtL0,568320
48
- ipex_llm/libs/libbloom_avx.dll,sha256=20YTK8QrzFsE_WxxDcBAkEV4GFJVFjqYaDXZJ2l7PYo,536576
49
- ipex_llm/libs/libbloom_vnni.dll,sha256=5kidOCZeFIelGHXIrbGg3UKmczMaB8kJHwv3k6ueJeI,508416
50
- ipex_llm/libs/libgptneox_avx.dll,sha256=XP0Z5yTD8GQVfoJxvuTRFOBMSWIdXTcEJ_Y-hASE3mQ,596992
51
- ipex_llm/libs/libgptneox_vnni.dll,sha256=jHxu_arkPjbXF32sMkfM56UGDFqzMAUoSBAj6zZrOrg,568832
52
- ipex_llm/libs/libllama_avx.dll,sha256=0nC8LsOxILe0iuPNVdJM9nahlerbWyETjD9OAh5A9qo,591360
53
- ipex_llm/libs/libllama_vnni.dll,sha256=AJd_Yrt-wvJltg2-NfUE0Sfyyhbz38OBvhJXVa31Wac,563200
54
- ipex_llm/libs/libstarcoder_avx.dll,sha256=2G9rZRohjoXR80p9aXhYd64tSVTWir3C-ZoUpknhAj4,627712
55
- ipex_llm/libs/libstarcoder_vnni.dll,sha256=lRO9clZiAJNXHLAOEPOsV-29qHyitp9TPI4g8Kolx_8,599552
56
- ipex_llm/libs/llama-api.dll,sha256=JFXvlgLKkieXTBFqxVT3A39DnRN1jWxopfbpNxztaJA,25600
57
- ipex_llm/libs/llama.dll,sha256=Tl5YtNoy6vP0pjEdOqzWyFj9n3uUCXn49ywtoZsq8QU,562688
58
- ipex_llm/libs/main-bloom.exe,sha256=MTmlkYHLdFmKvhZcJUyFre-J9dRJui2oqLMfoby3j2Y,103424
59
- ipex_llm/libs/main-gptneox.exe,sha256=tv02QCHWEJKwJx9M2H7pmfw2HDA_c1GK_QuSVHrFIzs,98816
60
- ipex_llm/libs/main-llama.exe,sha256=3SqMTCxXX4rFnq-hlMe587YjHQERtp82QEFnXILMo8A,99840
61
- ipex_llm/libs/main-starcoder.exe,sha256=aQqfyZ7SW2u6WZ4vCSb-KWVzGEljZNHtdXiL-ErMBW8,157696
62
- ipex_llm/libs/pipeline.dll,sha256=nRhyK2ZwGB8pDKijNZMCUxSMlD1mR9jVTFKxmxAR2v4,72704
63
- ipex_llm/libs/quantize-bloom.exe,sha256=vfmlpkCSjhiAJgTyRYLpKvEJySJg58h5SzXCilajiug,126464
64
- ipex_llm/libs/quantize-bloom_vnni.exe,sha256=9tBmQ8vNQQ_0Ux5YIIqwveeNqSlOjvqmMk12bz0R3gU,128000
65
- ipex_llm/libs/quantize-gptneox.exe,sha256=9bqrQpmqS6i4wcrPdDPrk0-EeuJsBOLpJm2WsCoDISA,104448
66
- ipex_llm/libs/quantize-gptneox_vnni.exe,sha256=G-jfbjDWGREcfVpRR90JWkAuwfxVmY3Yji3B-OvlbbU,104960
67
- ipex_llm/libs/quantize-llama.exe,sha256=JydqEBFh_RkOU_nwHo4vqIL1t37Cy4FvuuivT9s2W4k,110080
68
- ipex_llm/libs/quantize-llama_vnni.exe,sha256=R00IZwHx1Q5pSnouN6HjhmyRWtkFdqtEb3cA2Qzi03I,110592
69
- ipex_llm/libs/quantize-starcoder.exe,sha256=ocAyh6uYU9AtjQZADxukNS2Z1PMl8hgim7A1ZUsd1XU,127488
70
- ipex_llm/libs/quantize-starcoder_vnni.exe,sha256=q2zQhWiaBQPPtu3ceqRcOEGuGdYKL7EsuGtVgSBLt70,128512
71
- ipex_llm/libs/starcoder-api.dll,sha256=dbdN2Tu242FGudTJJm0eliVL-a5hxOAUtG0eQw9ITRY,21504
72
- ipex_llm/libs/starcoder.dll,sha256=R1aQDRSWcmp7P8iqJihXs1uXgjpHnQdHqPfQGXoOkdY,599040
44
+ ipex_llm/libs/bloom-api.dll,sha256=UxAv-GadkIVCnv5S66r7IFt4szGuIM1tGokBmxemeC4,36352
45
+ ipex_llm/libs/bloom.dll,sha256=RSJ05o8iUkQJq4bjd3lNcHmfOAmJlSGAd4Pomq9PwAM,507904
46
+ ipex_llm/libs/gptneox-api.dll,sha256=C31cT9xWi2OERSExQz9QU5R_WdMRDPgprukPnLKH71Q,24576
47
+ ipex_llm/libs/gptneox.dll,sha256=8SrVALu3AG19z1VQqv2LEBsSwy-LqTGqlt-8eQQX26Y,568320
48
+ ipex_llm/libs/libbloom_avx.dll,sha256=nWPWw-uyycgK8yFPRokE1wAt4NOWasKhUh87ghOAp2o,536576
49
+ ipex_llm/libs/libbloom_vnni.dll,sha256=ArrDTSK8uFo5siZz9WPnNul0xjs6pfAZ7z7mjqb9ywI,508416
50
+ ipex_llm/libs/libgptneox_avx.dll,sha256=Sm_4JKYmFxuWRw1_ATSdeGuTYveIPhNnAKw-YkJKoNU,596992
51
+ ipex_llm/libs/libgptneox_vnni.dll,sha256=IL8Q-KsatQl3mBejqGSvOQc2lZtA2dsPYaXYw5Cdjv4,568832
52
+ ipex_llm/libs/libllama_avx.dll,sha256=KaYditEetrkzBQIDArK9owquKe1FlC6n5u-9LYfgSbA,591360
53
+ ipex_llm/libs/libllama_vnni.dll,sha256=b8aWcXg4lZUMinb9m0cgQMHt8k-OnKddnhqAp9BGhYg,563200
54
+ ipex_llm/libs/libstarcoder_avx.dll,sha256=CPdy-ZQ8wMTsB0vrhiy1oBXjBqghprQEgEL9qnh8Q5o,627712
55
+ ipex_llm/libs/libstarcoder_vnni.dll,sha256=jNnXhk4tVpHMW2VUGrc-jKrnwFJpnSIArBuWlOVi2CY,599552
56
+ ipex_llm/libs/llama-api.dll,sha256=F1EyeCDV62JqNFiTtSglmEkMHvQ27Tz1kA4ZS19dfRE,25600
57
+ ipex_llm/libs/llama.dll,sha256=WO3V-mjWiKcZQA_lLIslINOXi3lgx8_R100rsWU0sq0,562688
58
+ ipex_llm/libs/main-bloom.exe,sha256=GBNcb3iEFTxWoadPalCuyOFR4AAAoD8lpKDVJjZDCpk,103424
59
+ ipex_llm/libs/main-gptneox.exe,sha256=mNL1bSCjnRtpXy2I4XVullKq9msTt77grfN7IWFfNPg,98816
60
+ ipex_llm/libs/main-llama.exe,sha256=O9b660YcerS0RLPly2qwDzUDJB91vBctXblmZipzOFU,99840
61
+ ipex_llm/libs/main-starcoder.exe,sha256=fcSGXJAETsXNfxR2-0HsxrB2BxHaDniIj1Z8uOyrESw,157696
62
+ ipex_llm/libs/pipeline.dll,sha256=Js9cA-pFx0vHdZmsK9TEIa5RLaymnY9OeNV3i8DZh_4,72704
63
+ ipex_llm/libs/quantize-bloom.exe,sha256=3cdZBeG9x_oyFcyAvoAFojo9Kb0TJC-ZAJzXdPG9e24,126464
64
+ ipex_llm/libs/quantize-bloom_vnni.exe,sha256=ETIqFEor0troIoRLJRYjJ_pDpaFByhk79UNfEMGcw-g,128000
65
+ ipex_llm/libs/quantize-gptneox.exe,sha256=PupJio2YrOQCq3-45BuxkfBG2IksXgPSks2P_riU7C8,104448
66
+ ipex_llm/libs/quantize-gptneox_vnni.exe,sha256=9_Cv-2mhRE5A_1_aJKdckkV_mEnvc8mcGe8KUeWZdHA,104960
67
+ ipex_llm/libs/quantize-llama.exe,sha256=hfG3BiXvXMtNHr68ZBID4MR7PGgaJwmN749Ry79826s,110080
68
+ ipex_llm/libs/quantize-llama_vnni.exe,sha256=mrK5Q6weyYpg3aJl46TvaRX7Iho5ANCLOMgI06mO--s,110592
69
+ ipex_llm/libs/quantize-starcoder.exe,sha256=9X3c4e2SvGFb6_eIwhm2pal-P630M1zr4sjZH0LKaS8,127488
70
+ ipex_llm/libs/quantize-starcoder_vnni.exe,sha256=Fgk7DiCoI5a1fj7s0LHavmQOYgUNxHS8oCUJMRdcjoI,128512
71
+ ipex_llm/libs/starcoder-api.dll,sha256=3JcNxRgk3otEDsqUT9kTe6JZ_CTeCK4Vwt2U7eaWcu0,21504
72
+ ipex_llm/libs/starcoder.dll,sha256=4vV5R9PYmzHjoREsw7V4h9wl0oGv0qKRz3XwhQLLqlQ,599040
73
73
  ipex_llm/llamaindex/__init__.py,sha256=T-EbRT6GJ_8RCu-iLmSzcftOimXSPQf2d5X72AUAy2Y,874
74
74
  ipex_llm/llamaindex/llms/__init__.py,sha256=KP1lEdGqDuxPoxL1ZSH25Pm2kKMPJBWUTLR0ckSLMIU,1139
75
75
  ipex_llm/llamaindex/llms/bigdlllm.py,sha256=FQBzq1KOjfc6uofTXAha3O7TqpJkNfOFepXQmOVlbnI,26314
@@ -94,20 +94,20 @@ ipex_llm/transformers/kv.py,sha256=k4TU18LlA-Sbq9WNNQnfuzu3RSFBwFhmaV3BcGN5bAo,1
94
94
  ipex_llm/transformers/lisa.py,sha256=F5WxbtXQ7RdKulj83h_2DnEIgKiKGZf7zvOmg6QBl2s,3289
95
95
  ipex_llm/transformers/loader.py,sha256=AwjV5RpI2t2bedlv7ZhLm8cfd-QJZm5hny-XyjIvdnk,6876
96
96
  ipex_llm/transformers/lookup.py,sha256=b6OlZ9OV10R9qeWw8mVryVpDxszkjwLkldvi7GPMJY8,19614
97
- ipex_llm/transformers/low_bit_linear.py,sha256=mFJRKU60ZVHm-V7gDsJYIz-ryntZi15XhS0eqSUPag4,41136
97
+ ipex_llm/transformers/low_bit_linear.py,sha256=3EtbiCAq5HU_r2pGJ9beSDK4NPTN8Jj-aHMqm1jqX18,39177
98
98
  ipex_llm/transformers/model.py,sha256=cQJNlAkdfoWmVbWd-TS2hf-Do41mMO9orPvG3FO4Nns,40855
99
99
  ipex_llm/transformers/modelling_bigdl.py,sha256=7JpNVMuyq_OmtNUaMFMXdxPWZp2q0QHC02QeA-VTPOw,6709
100
- ipex_llm/transformers/npu_model.py,sha256=X8ZtvZJpzz64XrSPhUYXXZmdJcbZ9X6G3Vlzw-zgN1Q,39749
100
+ ipex_llm/transformers/npu_model.py,sha256=LMmRmhq8IAN9FrXLUeUK2B8XS2OJ5GVWmG0cEdeK-ro,40354
101
101
  ipex_llm/transformers/patches.py,sha256=G9KcXxo42H1HJEDaroq4JbBN5P0P0lty7U7kk7-g4tw,991
102
102
  ipex_llm/transformers/pipeline_parallel.py,sha256=uNZpOXljNmdoEYnP8U-VFiN4dRZb2piQbIf2bG9LQnE,49051
103
- ipex_llm/transformers/qlora.py,sha256=jtPGsvWFjbTUGzDBCdfftnCis_0nJQNRpACSwXUbbGU,14943
103
+ ipex_llm/transformers/qlora.py,sha256=qV9Y6G5kAaet77LLA3oXn3qQY4ayyAPZ7NAjOlHCS7g,14967
104
104
  ipex_llm/transformers/relora.py,sha256=-dYzUV0P-IhO2jFdnzN9-v_sFzJpRj3ZwN9eCJzOoCw,16567
105
105
  ipex_llm/transformers/speculative.py,sha256=0XNLgc9dGswJHVPrXo4iM7pPxkWwfFfJMECcivJSnIc,63368
106
106
  ipex_llm/transformers/streamer.py,sha256=RrVlLblzCOtABRUpaMXAyaMnCGgLUtAi_YesLumRbww,4842
107
107
  ipex_llm/transformers/training_patch.py,sha256=oxMkUtqyvqJiprw6dE3skkYfD1HOmUlH9N0hBkbn0G0,10799
108
- ipex_llm/transformers/utils.py,sha256=JBekwpPD-CyMxt1OzvVsp7tu26pSA4v2mjuaUbqrAgI,16995
109
- ipex_llm/transformers/xpu_customize_fwd.py,sha256=wFpIhs5F6tkNs8gBOrLxWdhLzO3EDHovVkERPIAoAvg,7611
110
- ipex_llm/transformers/xpu_ops.py,sha256=vw4cUwvqUqDr45d-WMIkCpM2oiHfjN-VjF0bjMSF4kY,4830
108
+ ipex_llm/transformers/utils.py,sha256=a-2wbflSd_yYnC5qcMoY5HLR1yT_QpxeX_WpGpaDLrA,17457
109
+ ipex_llm/transformers/xpu_customize_fwd.py,sha256=PUBYLnTbaBXUs3Dnte9Gqln2XFk8iA62SmloWjr7GJI,7668
110
+ ipex_llm/transformers/xpu_ops.py,sha256=z95iTtcDQvNyJOvB4A6B_ECTYjHp4A7x-FsssoETOMs,4914
111
111
  ipex_llm/transformers/awq/__init__.py,sha256=Du5gu3-eeAkeDO_dEMBTzrDBA66DSN3uL3-rn8WGXQw,875
112
112
  ipex_llm/transformers/awq/act.py,sha256=YwomJzOOKwkKtzGrm4L4kwBstBLO1Z8SK4CKi8PSYVQ,2172
113
113
  ipex_llm/transformers/awq/awq.py,sha256=cGyRQJWwAEJtOtdSbsBoQ33KX_Ie0pv5OJHC0ACEELE,8861
@@ -174,7 +174,7 @@ ipex_llm/transformers/models/rwkv5.py,sha256=OkRNj1pCAZg1z2Fw-I0DEnxLEdZyPeRSQ6m
174
174
  ipex_llm/transformers/models/sd.py,sha256=VvHV5u-0k2MgHu3NL9113hPj7DgfxqctuKzEEeNfRDU,5981
175
175
  ipex_llm/transformers/models/stablelm.py,sha256=fj-XtOnR6kggnFUQTMPCOOzolkPztN06WAv8QW-XRnI,7054
176
176
  ipex_llm/transformers/models/starcoder2.py,sha256=ONKvD7JCkRM0DI-R56x28QFBJ7CjD5hOZBQ_3WfOcNk,6626
177
- ipex_llm/transformers/models/utils.py,sha256=Rdn9T4zk6Hz8ybJp6kvlyfPwgHwdxEZ8R4zGtMeozWg,15105
177
+ ipex_llm/transformers/models/utils.py,sha256=qI5ln8SQGTvR_IyxFkoZhefgOErnXUnJrifIyhiqT9c,14753
178
178
  ipex_llm/transformers/models/whisper.py,sha256=ju3WP8Eq-KvD7kb3Qy51r4FOfSX3NBxfp5RBcq__gzc,4241
179
179
  ipex_llm/transformers/models/yuan.py,sha256=JYAn_ZaSGK0NBJLEIxCACfAq084a66GFJkdd5NbpmMA,7732
180
180
  ipex_llm/transformers/npu_models/__init__.py,sha256=ulEUGLjaP48LCrVeury3UxLjXxKzRi0UpSG4bYu-7f8,585
@@ -183,7 +183,7 @@ ipex_llm/transformers/npu_models/baichuan_mp.py,sha256=tHhO-0v5z6IhxsfzAPYWXVbLr
183
183
  ipex_llm/transformers/npu_models/chatglm.py,sha256=YzpGLZ7ORt6qkwW9mCwZ_xhOAI8uHSDHJrmqWgNM234,10511
184
184
  ipex_llm/transformers/npu_models/chatglm4.py,sha256=J4523DzhIzZxIvlf1V9qU4auzEGKvC80YqyxuCJygjw,9795
185
185
  ipex_llm/transformers/npu_models/common.py,sha256=tTUJL7IxVrJSnXle6nla35wTUrBf2sOEt7Ya1qyMezY,4853
186
- ipex_llm/transformers/npu_models/convert.py,sha256=FILSGnoltcR9FMrCkw0eOKh6p3sbBI5i0Ms8AsJc04E,25342
186
+ ipex_llm/transformers/npu_models/convert.py,sha256=2YAi8rvEYu_tvzpczKsJBsKjAns5FAPz1MntJTxIQC0,25472
187
187
  ipex_llm/transformers/npu_models/convert_mp.py,sha256=Y6Fcde7bXHkZ0wvm8PymxJqvncbDj3ZjMez3SY9qi5U,24452
188
188
  ipex_llm/transformers/npu_models/glm_edge.py,sha256=VsJex-6530h4ZQk35TxRe1MnttAHT41omE8LV47LgBE,6723
189
189
  ipex_llm/transformers/npu_models/kv.py,sha256=2OSFO9Z6e4nGdVxXEM-Bq2qa_npYYbGmQt3lcCZxTlU,9201
@@ -208,11 +208,11 @@ ipex_llm/transformers/npu_models/xlm_mp.py,sha256=sj8OVun8xJprM7ZJp0XzWa55rqlSIz
208
208
  ipex_llm/transformers/npu_pipeline_model/__init__.py,sha256=b2IXvVqQ5cItki021h8s3ymW12RPu8QNPprq4Mn3bDM,586
209
209
  ipex_llm/transformers/npu_pipeline_model/baichuan.py,sha256=ICxRzFQ4OIANDkkVi2_4xOeQXmfFXYMx3H52KuE1xR4,6208
210
210
  ipex_llm/transformers/npu_pipeline_model/common.py,sha256=QxJoJESpv0BpwO_FBeAT2wKA56wNFfen8iI37PrMKuA,7838
211
- ipex_llm/transformers/npu_pipeline_model/convert_pipeline.py,sha256=wrRgmNT13RVtQRp5gFRBxNEPJHxFMLeGqb8a58YodPQ,28512
212
- ipex_llm/transformers/npu_pipeline_model/llama.py,sha256=MnvHRytLt3oy5jIPUBe8AeEJ6PtPWLbhQ5a9WqjZ1TQ,19905
213
- ipex_llm/transformers/npu_pipeline_model/minicpm.py,sha256=MDMesYlVbECKdK0xxkt1LwHgpkJOO7ZwBExYAwMGQa0,20637
211
+ ipex_llm/transformers/npu_pipeline_model/convert_pipeline.py,sha256=-eHNbRuX2QhYd0-jCyo2pZpHTZTZ108bhObYx8a3CJs,29494
212
+ ipex_llm/transformers/npu_pipeline_model/llama.py,sha256=pmAnawfAn0W8XSr8kGWfxR1HylCLa-Y6mKpFeX-m8UY,20892
213
+ ipex_llm/transformers/npu_pipeline_model/minicpm.py,sha256=H7j_UaHj-IwEBriQ-bunle0-8s2NmvqnL9eYuixnmFc,21398
214
214
  ipex_llm/transformers/npu_pipeline_model/pipeline_cpp.py,sha256=JNmodAMg_NQvDILug3E_fGXEh6cd3wsj4bvAzcd-vaU,2749
215
- ipex_llm/transformers/npu_pipeline_model/qwen.py,sha256=3paMXr1viuztybhmVLqQ9XvM3EZbxncDuNSNwLF8OI0,14849
215
+ ipex_llm/transformers/npu_pipeline_model/qwen.py,sha256=FAfoPlKEAxeU6-J8ltpSev5ithm9AC-urtreu6NGpME,15509
216
216
  ipex_llm/utils/__init__.py,sha256=LlUgrD03rfw4iY8zWPtHH6p65Gw76waVOLHaqagETw0,1425
217
217
  ipex_llm/utils/benchmark_util_4_29.py,sha256=OU1W1quiaiJGsg1pd3HM9O6PmVSaPA0HHE7R8hNTfmQ,258653
218
218
  ipex_llm/utils/benchmark_util_4_42.py,sha256=HEiClCgKDp_T64HH8ulSTly8dvt6UwPDYZfrPVYvXcc,225383
@@ -248,11 +248,11 @@ ipex_llm/vllm/xpu/engine/__init__.py,sha256=pY_CpyuZd72fr6s32ejeKHKFW0K4vUU2rzZj
248
248
  ipex_llm/vllm/xpu/engine/engine.py,sha256=k4-D27WS_Gk3mA--w3HWAjPjb4Aiu043MVPi0ZoAUBc,5984
249
249
  ipex_llm/vllm/xpu/entrypoints/openai/api_server.py,sha256=GshTZFB8e4PWvqckfbmTOU6b0oLkNn7A-vzLuG9--j8,21544
250
250
  ipex_llm/vllm/xpu/entrypoints/openai/cli_args.py,sha256=2rENA2ucynMaIjiZBEh2ez1o5vR32GaP514t39CD7KM,8676
251
- ipex_llm-2.2.0b20250204.data/scripts/ipex-llm-init.bat,sha256=HPtCYuDYwEatq7dAwOvdfVcHYCpAVdbj75K1qh0vQek,2578
252
- ipex_llm-2.2.0b20250204.data/scripts/llm-chat.ps1,sha256=6qrs-hGVAV8IKh7Jx8nq_XrnZcjd7qGU5wndArM7Yag,2769
253
- ipex_llm-2.2.0b20250204.data/scripts/llm-cli.ps1,sha256=3qBtTLs_EjYDnM8YyCpJhzLnGCKTEGssu9UNqfkjVXs,3009
254
- ipex_llm-2.2.0b20250204.dist-info/METADATA,sha256=axGzhPAqCjlvETBNA5ONJm7wqp1r6Q-Ac202bfz7lSs,12879
255
- ipex_llm-2.2.0b20250204.dist-info/WHEEL,sha256=6iYPr8vTHsyDK75jr9X0V3I9wPSVmtwr_8fdATBciGk,98
256
- ipex_llm-2.2.0b20250204.dist-info/entry_points.txt,sha256=TiUyBB2MRmfF3ko-pyAEzqeBCRnyhu27bNOAsWPp3e8,61
257
- ipex_llm-2.2.0b20250204.dist-info/top_level.txt,sha256=CGCMHM-SyqUabU4h8RqJ2KTYckQUO3LvIWwmUQ6Qbzw,9
258
- ipex_llm-2.2.0b20250204.dist-info/RECORD,,
251
+ ipex_llm-2.2.0b20250204.post0.data/scripts/ipex-llm-init.bat,sha256=HPtCYuDYwEatq7dAwOvdfVcHYCpAVdbj75K1qh0vQek,2578
252
+ ipex_llm-2.2.0b20250204.post0.data/scripts/llm-chat.ps1,sha256=6qrs-hGVAV8IKh7Jx8nq_XrnZcjd7qGU5wndArM7Yag,2769
253
+ ipex_llm-2.2.0b20250204.post0.data/scripts/llm-cli.ps1,sha256=3qBtTLs_EjYDnM8YyCpJhzLnGCKTEGssu9UNqfkjVXs,3009
254
+ ipex_llm-2.2.0b20250204.post0.dist-info/METADATA,sha256=v7-tush1os4a_HVuvzFajDBsfW_xq4VW8GhU--mIj8U,12343
255
+ ipex_llm-2.2.0b20250204.post0.dist-info/WHEEL,sha256=6iYPr8vTHsyDK75jr9X0V3I9wPSVmtwr_8fdATBciGk,98
256
+ ipex_llm-2.2.0b20250204.post0.dist-info/entry_points.txt,sha256=TiUyBB2MRmfF3ko-pyAEzqeBCRnyhu27bNOAsWPp3e8,61
257
+ ipex_llm-2.2.0b20250204.post0.dist-info/top_level.txt,sha256=CGCMHM-SyqUabU4h8RqJ2KTYckQUO3LvIWwmUQ6Qbzw,9
258
+ ipex_llm-2.2.0b20250204.post0.dist-info/RECORD,,