ipex-llm 2.3.0b20250603__py3-none-win_amd64.whl → 2.3.0b20250605__py3-none-win_amd64.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (40) hide show
  1. ipex_llm/ggml/quantize.py +3 -0
  2. ipex_llm/libs/bloom-api.dll +0 -0
  3. ipex_llm/libs/bloom.dll +0 -0
  4. ipex_llm/libs/gptneox-api.dll +0 -0
  5. ipex_llm/libs/gptneox.dll +0 -0
  6. ipex_llm/libs/libbloom_avx.dll +0 -0
  7. ipex_llm/libs/libbloom_vnni.dll +0 -0
  8. ipex_llm/libs/libgptneox_avx.dll +0 -0
  9. ipex_llm/libs/libgptneox_vnni.dll +0 -0
  10. ipex_llm/libs/libllama_avx.dll +0 -0
  11. ipex_llm/libs/libllama_vnni.dll +0 -0
  12. ipex_llm/libs/libstarcoder_avx.dll +0 -0
  13. ipex_llm/libs/libstarcoder_vnni.dll +0 -0
  14. ipex_llm/libs/llama-api.dll +0 -0
  15. ipex_llm/libs/llama.dll +0 -0
  16. ipex_llm/libs/main-bloom.exe +0 -0
  17. ipex_llm/libs/main-gptneox.exe +0 -0
  18. ipex_llm/libs/main-llama.exe +0 -0
  19. ipex_llm/libs/main-starcoder.exe +0 -0
  20. ipex_llm/libs/pipeline.dll +0 -0
  21. ipex_llm/libs/quantize-bloom.exe +0 -0
  22. ipex_llm/libs/quantize-bloom_vnni.exe +0 -0
  23. ipex_llm/libs/quantize-gptneox.exe +0 -0
  24. ipex_llm/libs/quantize-gptneox_vnni.exe +0 -0
  25. ipex_llm/libs/quantize-llama.exe +0 -0
  26. ipex_llm/libs/quantize-llama_vnni.exe +0 -0
  27. ipex_llm/libs/quantize-starcoder.exe +0 -0
  28. ipex_llm/libs/quantize-starcoder_vnni.exe +0 -0
  29. ipex_llm/libs/starcoder-api.dll +0 -0
  30. ipex_llm/libs/starcoder.dll +0 -0
  31. ipex_llm/transformers/low_bit_linear.py +81 -42
  32. ipex_llm/vllm/xpu/model_convert.py +7 -1
  33. {ipex_llm-2.3.0b20250603.dist-info → ipex_llm-2.3.0b20250605.dist-info}/METADATA +11 -11
  34. {ipex_llm-2.3.0b20250603.dist-info → ipex_llm-2.3.0b20250605.dist-info}/RECORD +40 -40
  35. {ipex_llm-2.3.0b20250603.data → ipex_llm-2.3.0b20250605.data}/scripts/ipex-llm-init.bat +0 -0
  36. {ipex_llm-2.3.0b20250603.data → ipex_llm-2.3.0b20250605.data}/scripts/llm-chat.ps1 +0 -0
  37. {ipex_llm-2.3.0b20250603.data → ipex_llm-2.3.0b20250605.data}/scripts/llm-cli.ps1 +0 -0
  38. {ipex_llm-2.3.0b20250603.dist-info → ipex_llm-2.3.0b20250605.dist-info}/WHEEL +0 -0
  39. {ipex_llm-2.3.0b20250603.dist-info → ipex_llm-2.3.0b20250605.dist-info}/entry_points.txt +0 -0
  40. {ipex_llm-2.3.0b20250603.dist-info → ipex_llm-2.3.0b20250605.dist-info}/top_level.txt +0 -0
ipex_llm/ggml/quantize.py CHANGED
@@ -54,6 +54,9 @@ ggml_tensor_qtype = {"sym_int4": 2, # q4_0 in ggml
54
54
  "sym_int8_rtn": 32,
55
55
  "asym_int4_rtn": 33,
56
56
  "woq_int4": 34,
57
+ "torch_fp8_e5m2": 35,
58
+ "torch_fp8": 35,
59
+ "torch_fp8_e4m3": 36
57
60
  }
58
61
 
59
62
  # mixed precison from llama.cpp
Binary file
ipex_llm/libs/bloom.dll CHANGED
Binary file
Binary file
ipex_llm/libs/gptneox.dll CHANGED
Binary file
Binary file
Binary file
Binary file
Binary file
Binary file
Binary file
Binary file
Binary file
Binary file
ipex_llm/libs/llama.dll CHANGED
Binary file
Binary file
Binary file
Binary file
Binary file
Binary file
Binary file
Binary file
Binary file
Binary file
Binary file
Binary file
Binary file
Binary file
Binary file
Binary file
@@ -86,6 +86,8 @@ SYM_INT4_RTN = ggml_tensor_qtype["sym_int4_rtn"]
86
86
  SYM_INT8_RTN = ggml_tensor_qtype["sym_int8_rtn"]
87
87
  ASYM_INT4_RTN = ggml_tensor_qtype["asym_int4_rtn"]
88
88
  WOQ_INT4 = ggml_tensor_qtype["woq_int4"]
89
+ TORCH_FP8E5 = ggml_tensor_qtype["torch_fp8_e5m2"]
90
+ TORCH_FP8E4 = ggml_tensor_qtype["torch_fp8_e4m3"]
89
91
  RTN_DTYPE = {
90
92
  SYM_INT4_RTN: torch.uint8,
91
93
  ASYM_INT4_RTN: torch.uint8,
@@ -106,39 +108,44 @@ def ggml_convert_qtype(tensor: torch.Tensor, qtype: int,
106
108
  imatrix: torch.Tensor=None,
107
109
  in_features: int=None,
108
110
  enable_scale_search: bool=False):
109
- QK = ggml.ggml_qk_size(qtype)
110
- block_size_in_bytes = ggml.ggml_type_size(qtype)
111
-
112
- invalidInputError(tensor.dtype == torch.float,
113
- "Input tensor must be float32")
114
- src = tensor.data.data_ptr()
115
- src = ctypes.cast(src, ctypes.POINTER(ctypes.c_float))
116
- n = tensor.numel() # all elements
117
- k = tensor.shape[-1]
118
- invalidInputError(k % QK == 0,
119
- f"Last dim of input tensor must be multiple of {QK}")
120
-
121
- dst_size = (n // QK) * block_size_in_bytes
122
- if qtype in [SYM_INT8_RTN, SYM_INT4_RTN, ASYM_INT4_RTN]:
123
- dst_tensor = torch.empty(dst_size, dtype=RTN_DTYPE[qtype],
124
- device=device)
125
- dst_tensor = dst_tensor.reshape(tensor.shape[0], tensor.shape[-1] // QK)
126
- if qtype == ASYM_INT4_RTN:
127
- scale = torch.empty((n // k) * 2, dtype=torch.float32,
128
- device=device)
129
- else:
130
- scale = torch.empty(n // k, dtype=torch.float32,
131
- device=device)
132
- elif qtype == NF4:
133
- # Deepspeed zero3 requires unified dtype,
134
- # thus here uses bfloat16 consistent to other layers
135
- # dst_size above is computed based on uint8, and for bfloat16,
136
- # buffer size should be half
137
- dst_tensor = torch.empty(dst_size // 2, dtype=torch.bfloat16,
138
- device=device)
111
+ if qtype in [TORCH_FP8E5, TORCH_FP8E4]:
112
+ fp8_dtype = torch.float8_e5m2 if qtype == TORCH_FP8E5 else torch.float8_e4m3fn
113
+ dst_tensor = torch.empty(tensor.shape, device=device, dtype=fp8_dtype)
114
+ scale = torch.zeros(1, device=device, dtype=torch.float32)
139
115
  else:
140
- dst_tensor = torch.empty(dst_size, dtype=torch.uint8,
141
- device=device)
116
+ QK = ggml.ggml_qk_size(qtype)
117
+ block_size_in_bytes = ggml.ggml_type_size(qtype)
118
+
119
+ invalidInputError(tensor.dtype == torch.float,
120
+ "Input tensor must be float32")
121
+ src = tensor.data.data_ptr()
122
+ src = ctypes.cast(src, ctypes.POINTER(ctypes.c_float))
123
+ n = tensor.numel() # all elements
124
+ k = tensor.shape[-1]
125
+ invalidInputError(k % QK == 0,
126
+ f"Last dim of input tensor must be multiple of {QK}")
127
+
128
+ dst_size = (n // QK) * block_size_in_bytes
129
+ if qtype in [SYM_INT8_RTN, SYM_INT4_RTN, ASYM_INT4_RTN]:
130
+ dst_tensor = torch.empty(dst_size, dtype=RTN_DTYPE[qtype],
131
+ device=device)
132
+ dst_tensor = dst_tensor.reshape(tensor.shape[0], tensor.shape[-1] // QK)
133
+ if qtype == ASYM_INT4_RTN:
134
+ scale = torch.empty((n // k) * 2, dtype=torch.float32,
135
+ device=device)
136
+ else:
137
+ scale = torch.empty(n // k, dtype=torch.float32,
138
+ device=device)
139
+ elif qtype == NF4:
140
+ # Deepspeed zero3 requires unified dtype,
141
+ # thus here uses bfloat16 consistent to other layers
142
+ # dst_size above is computed based on uint8, and for bfloat16,
143
+ # buffer size should be half
144
+ dst_tensor = torch.empty(dst_size // 2, dtype=torch.bfloat16,
145
+ device=device)
146
+ else:
147
+ dst_tensor = torch.empty(dst_size, dtype=torch.uint8,
148
+ device=device)
142
149
 
143
150
  if not convert_shape_only and device != 'meta':
144
151
  dst = ctypes.c_void_p(dst_tensor.data.data_ptr())
@@ -158,6 +165,17 @@ def ggml_convert_qtype(tensor: torch.Tensor, qtype: int,
158
165
  enable_scale_search,
159
166
  imatrix)
160
167
  return dst_tensor, scale.type(torch.float16)
168
+ elif qtype in [TORCH_FP8E5, TORCH_FP8E4]:
169
+ import xe_linear
170
+ tensor_device = tensor.device
171
+ tensor_xpu = tensor.to("xpu")
172
+ dst_tensor = dst_tensor.to("xpu")
173
+ scale = scale.to("xpu")
174
+
175
+ xe_linear.dynamic_scaled_fp8_quant(dst_tensor, tensor_xpu, scale)
176
+
177
+ # scale = scale.to(tensor_device)
178
+ dst_tensor = dst_tensor.to(tensor_device)
161
179
  else:
162
180
  ggml.ggml_quantize_tensor(src, dst, qtype, n, k, hist, enable_scale_search)
163
181
  else:
@@ -171,6 +189,8 @@ def ggml_convert_qtype(tensor: torch.Tensor, qtype: int,
171
189
  hist, imatrix)
172
190
  if qtype in [SYM_INT8_RTN, SYM_INT4_RTN, ASYM_INT4_RTN]:
173
191
  return dst_tensor, scale.type(torch.float16)
192
+ elif qtype in [TORCH_FP8E5, TORCH_FP8E4]:
193
+ return dst_tensor, scale
174
194
  else:
175
195
  return dst_tensor
176
196
 
@@ -179,7 +199,7 @@ def ggml_q_format_convet_cpu2xpu(tensor: torch.Tensor, num_elem: int, qtype: int
179
199
  if qtype == NF4:
180
200
  invalidInputError(tensor.dtype == torch.bfloat16,
181
201
  "NF4 Input tensor must be bfloat16")
182
- else:
202
+ elif qtype not in [TORCH_FP8E5, TORCH_FP8E4]:
183
203
  invalidInputError(tensor.dtype == torch.uint8,
184
204
  "Input tensor except NF4 must be uint8")
185
205
 
@@ -208,7 +228,7 @@ def ggml_q_format_convet_xpu2cpu(tensor: torch.Tensor, num_elem: int, qtype: int
208
228
  if qtype == NF4:
209
229
  invalidInputError(tensor.dtype == torch.bfloat16,
210
230
  "NF4 Input tensor must be bfloat16")
211
- else:
231
+ elif qtype not in [TORCH_FP8E5, TORCH_FP8E4]:
212
232
  invalidInputError(tensor.dtype == torch.uint8,
213
233
  "Input tensor must be uint8")
214
234
 
@@ -319,7 +339,8 @@ class FP4Params(torch.nn.Parameter):
319
339
  qtype=None,
320
340
  imatrix=None,
321
341
  in_features=None,
322
- enable_scale_search=False):
342
+ enable_scale_search=False,
343
+ torch_fp8_scale=None):
323
344
  if data is None:
324
345
  data = torch.empty(0)
325
346
 
@@ -332,6 +353,7 @@ class FP4Params(torch.nn.Parameter):
332
353
  self.imatrix = imatrix
333
354
  self.in_features = in_features
334
355
  self.enable_scale_search = enable_scale_search
356
+ self.torch_fp8_scale = torch_fp8_scale
335
357
  return self
336
358
 
337
359
  def ggml_mse(self, w, ggml_qtype, device):
@@ -391,7 +413,11 @@ class FP4Params(torch.nn.Parameter):
391
413
  imatrix=self.imatrix,
392
414
  in_features=self.in_features,
393
415
  enable_scale_search=self.enable_scale_search)
394
- self.data = w_quantized
416
+ if self.qtype in [TORCH_FP8E5, TORCH_FP8E4]:
417
+ self.data = w_quantized[0]
418
+ self.torch_fp8_scale = w_quantized[1]
419
+ else:
420
+ self.data = w_quantized
395
421
  self.quantized = True
396
422
  self._shape = w.shape
397
423
  return self
@@ -414,6 +440,8 @@ class FP4Params(torch.nn.Parameter):
414
440
 
415
441
  def to(self, *args, **kwargs):
416
442
  device, dtype, non_blocking, convert_to_format = torch._C._nn._parse_to(*args, **kwargs)
443
+ if self.qtype in [TORCH_FP8E5, TORCH_FP8E4]:
444
+ dtype = None
417
445
  if (device is not None and device.type == "cpu" and self.data.device.type == "cpu"):
418
446
  return self.quantize(device.type)
419
447
  elif device is not None and device.type == "meta" and self.data.device.type == "meta":
@@ -424,6 +452,7 @@ class FP4Params(torch.nn.Parameter):
424
452
  self.data = ggml_q_format_convet_cpu2xpu(self.data,
425
453
  reduce(mul, self._shape, 1),
426
454
  self.qtype)
455
+ fp8_scale = None if self.torch_fp8_scale is None else self.torch_fp8_scale.to(device)
427
456
  new_param = FP4Params(super().to(device=device,
428
457
  dtype=dtype,
429
458
  non_blocking=non_blocking),
@@ -431,9 +460,11 @@ class FP4Params(torch.nn.Parameter):
431
460
  quantized=self.quantized,
432
461
  _shape=self._shape,
433
462
  qtype=self.qtype,
434
- enable_scale_search=self.enable_scale_search)
463
+ enable_scale_search=self.enable_scale_search,
464
+ torch_fp8_scale=fp8_scale)
435
465
  return new_param
436
466
  elif (device is not None and device.type == "cpu" and self.data.device.type == "xpu"):
467
+ fp8_scale = None if self.torch_fp8_scale is None else self.torch_fp8_scale.to(device)
437
468
  new_param = FP4Params(super().to(device=device,
438
469
  dtype=dtype,
439
470
  non_blocking=non_blocking),
@@ -441,7 +472,8 @@ class FP4Params(torch.nn.Parameter):
441
472
  quantized=self.quantized,
442
473
  _shape=self._shape,
443
474
  qtype=self.qtype,
444
- enable_scale_search=self.enable_scale_search)
475
+ enable_scale_search=self.enable_scale_search,
476
+ torch_fp8_scale=fp8_scale)
445
477
  ggml_xpu = new_param.data
446
478
  new_param.data = ggml_q_format_convet_xpu2cpu(ggml_xpu,
447
479
  reduce(mul, new_param._shape, 1),
@@ -614,6 +646,7 @@ class LowBitLinear(nn.Linear):
614
646
  # Due to inconsistent training status in some models like Baichuan-7b-Chat,
615
647
  # we should check both self.training and torch.is_inference_mode_enabled().
616
648
  is_training = self.training and not torch.is_inference_mode_enabled()
649
+
617
650
  if is_training:
618
651
  # below logic is only for training
619
652
  autocast_dtype = get_autocast_dtype(x.device.type)
@@ -643,6 +676,8 @@ class LowBitLinear(nn.Linear):
643
676
 
644
677
  if self.weight.device.type == "xpu":
645
678
  if is_training and x_2d.requires_grad:
679
+ invalidInputError(self.weight.qtype not in [TORCH_FP8E5, TORCH_FP8E4],
680
+ "TORCH_FP8 training is not supported.")
646
681
  result = MatMulLowBit.apply(x_2d, self.weight, self.out_len)
647
682
  else:
648
683
  do_empty_cache = self.low_memory_mode and x_2d.shape[0] >= 1024
@@ -654,7 +689,11 @@ class LowBitLinear(nn.Linear):
654
689
  else:
655
690
  w = self.weight.data
656
691
 
657
- if use_batch_forward(x_2d, self.weight.qtype, self.out_len) and \
692
+ if self.weight.qtype in [TORCH_FP8E5, TORCH_FP8E4]:
693
+ import xe_linear
694
+ result = xe_linear.run_linear_fp8(x_2d, w, self.bias,
695
+ self.weight.torch_fp8_scale)
696
+ elif use_batch_forward(x_2d, self.weight.qtype, self.out_len) and \
658
697
  (x_2d.dtype == torch.half or self.conver_to_half):
659
698
  import xe_batch
660
699
  result = xe_batch.batch_forward(x_2d, w, self.qtype)
@@ -682,13 +721,13 @@ class LowBitLinear(nn.Linear):
682
721
  else:
683
722
  invalidInputError(False, "mp_group is not None, but no supported backend found")
684
723
 
685
- if self.bias is not None:
724
+ if self.bias is not None and self.weight.qtype not in [TORCH_FP8E5, TORCH_FP8E4]:
686
725
  result += self.bias
687
726
  else:
688
727
  # CPU logic
689
728
  # todo may need to set a different number on different platforms
690
- invalidInputError(self.qtype != NF3 and self.qtype != NF4 and self.qtype != FP8E4
691
- and self.qtype != FP4 and self.qtype != FP8E5,
729
+ invalidInputError(self.qtype not in [NF3, NF4, FP8E4, FP4, FP8E5,
730
+ TORCH_FP8E5, TORCH_FP8E4],
692
731
  "NF3, NF4, FP4 and FP8 quantization are currently not"
693
732
  " supported on CPU")
694
733
  if self.training and x.requires_grad:
@@ -129,9 +129,15 @@ def get_load_function(low_bit):
129
129
  if "glm-4v" in self.vllm_config.model_config.model.lower() and \
130
130
  low_bit in ("sym_int4", "woq_int4"):
131
131
  modules = ["dense_4h_to_h"]
132
+ if "phi4mm" in self.vllm_config.model_config.hf_config.model_type:
133
+ modules = ["vision_encoder", "embed_tokens_extend"]
132
134
  if low_bit == "fp16":
133
135
  # to fix qwen2.5-vl and glm-4v
134
- modules = ["vision", "visual"]
136
+ if modules is None:
137
+ modules = ["vision", "visual"]
138
+ else:
139
+ modules.append("vision")
140
+ modules.append("visual")
135
141
  optimize_model(self.model,
136
142
  low_bit=low_bit,
137
143
  torch_dtype=self.vllm_config.model_config.dtype,
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.1
2
2
  Name: ipex-llm
3
- Version: 2.3.0b20250603
3
+ Version: 2.3.0b20250605
4
4
  Summary: Large Language Model Develop Toolkit
5
5
  Home-page: https://github.com/intel-analytics/ipex-llm
6
6
  Author: BigDL Authors
@@ -27,7 +27,7 @@ Requires-Dist: intel-openmp ; (platform_machine == "x86_64" or platform_machine
27
27
  Requires-Dist: torch ==2.1.2+cpu ; (platform_system == "Linux") and extra == 'all'
28
28
  Requires-Dist: torch ==2.1.2 ; (platform_system == "Windows") and extra == 'all'
29
29
  Provides-Extra: cpp
30
- Requires-Dist: bigdl-core-cpp ==2.7.0b20250603 ; extra == 'cpp'
30
+ Requires-Dist: bigdl-core-cpp ==2.7.0b20250605 ; extra == 'cpp'
31
31
  Requires-Dist: setuptools ; extra == 'cpp'
32
32
  Requires-Dist: onednn-devel ==2025.0.1 ; (platform_system == "Windows") and extra == 'cpp'
33
33
  Requires-Dist: onednn ==2025.0.1 ; (platform_system == "Windows") and extra == 'cpp'
@@ -60,7 +60,7 @@ Requires-Dist: transformers ==4.40.0 ; extra == 'npu'
60
60
  Requires-Dist: intel-openmp ; (platform_machine == "x86_64" or platform_machine == "AMD64") and extra == 'npu'
61
61
  Requires-Dist: torch ==2.1.2+cpu ; (platform_system == "Linux") and extra == 'npu'
62
62
  Requires-Dist: torch ==2.1.2 ; (platform_system == "Windows") and extra == 'npu'
63
- Requires-Dist: bigdl-core-npu ==2.7.0b20250603 ; (platform_system == "Windows") and extra == 'npu'
63
+ Requires-Dist: bigdl-core-npu ==2.7.0b20250605 ; (platform_system == "Windows") and extra == 'npu'
64
64
  Provides-Extra: serving
65
65
  Requires-Dist: py-cpuinfo ; extra == 'serving'
66
66
  Requires-Dist: fschat[model_worker,webui] ==0.2.36 ; extra == 'serving'
@@ -80,9 +80,9 @@ Requires-Dist: setuptools <70.0.0 ; extra == 'xpu'
80
80
  Requires-Dist: torch ==2.1.0a0 ; extra == 'xpu'
81
81
  Requires-Dist: torchvision ==0.16.0a0 ; extra == 'xpu'
82
82
  Requires-Dist: intel-extension-for-pytorch ==2.1.10+xpu ; extra == 'xpu'
83
- Requires-Dist: bigdl-core-xe-21 ==2.7.0b20250603 ; extra == 'xpu'
84
- Requires-Dist: bigdl-core-xe-batch-21 ==2.7.0b20250603 ; extra == 'xpu'
85
- Requires-Dist: bigdl-core-xe-addons-21 ==2.7.0b20250603 ; extra == 'xpu'
83
+ Requires-Dist: bigdl-core-xe-21 ==2.7.0b20250605 ; extra == 'xpu'
84
+ Requires-Dist: bigdl-core-xe-batch-21 ==2.7.0b20250605 ; extra == 'xpu'
85
+ Requires-Dist: bigdl-core-xe-addons-21 ==2.7.0b20250605 ; extra == 'xpu'
86
86
  Provides-Extra: xpu-2-1
87
87
  Requires-Dist: py-cpuinfo ; extra == 'xpu-2-1'
88
88
  Requires-Dist: protobuf ; extra == 'xpu-2-1'
@@ -97,9 +97,9 @@ Requires-Dist: setuptools <70.0.0 ; extra == 'xpu-2-1'
97
97
  Requires-Dist: torch ==2.1.0a0 ; extra == 'xpu-2-1'
98
98
  Requires-Dist: torchvision ==0.16.0a0 ; extra == 'xpu-2-1'
99
99
  Requires-Dist: intel-extension-for-pytorch ==2.1.10+xpu ; extra == 'xpu-2-1'
100
- Requires-Dist: bigdl-core-xe-21 ==2.7.0b20250603 ; extra == 'xpu-2-1'
101
- Requires-Dist: bigdl-core-xe-batch-21 ==2.7.0b20250603 ; extra == 'xpu-2-1'
102
- Requires-Dist: bigdl-core-xe-addons-21 ==2.7.0b20250603 ; extra == 'xpu-2-1'
100
+ Requires-Dist: bigdl-core-xe-21 ==2.7.0b20250605 ; extra == 'xpu-2-1'
101
+ Requires-Dist: bigdl-core-xe-batch-21 ==2.7.0b20250605 ; extra == 'xpu-2-1'
102
+ Requires-Dist: bigdl-core-xe-addons-21 ==2.7.0b20250605 ; extra == 'xpu-2-1'
103
103
  Requires-Dist: intel-openmp ; (platform_machine == "x86_64" or platform_machine == "AMD64") and extra == 'xpu-2-1'
104
104
  Requires-Dist: dpcpp-cpp-rt ==2024.0.2 ; (platform_system == "Windows") and extra == 'xpu-2-1'
105
105
  Requires-Dist: mkl-dpcpp ==2024.0.0 ; (platform_system == "Windows") and extra == 'xpu-2-1'
@@ -117,7 +117,7 @@ Requires-Dist: setuptools ; extra == 'xpu-2-6'
117
117
  Requires-Dist: torch ==2.6.0+xpu ; extra == 'xpu-2-6'
118
118
  Requires-Dist: torchvision ==0.21.0+xpu ; extra == 'xpu-2-6'
119
119
  Requires-Dist: torchaudio ==2.6.0+xpu ; extra == 'xpu-2-6'
120
- Requires-Dist: bigdl-core-xe-all ==2.7.0b20250603 ; extra == 'xpu-2-6'
120
+ Requires-Dist: bigdl-core-xe-all ==2.7.0b20250605 ; extra == 'xpu-2-6'
121
121
  Requires-Dist: onednn-devel ==2025.0.1 ; extra == 'xpu-2-6'
122
122
  Requires-Dist: onednn ==2025.0.1 ; extra == 'xpu-2-6'
123
123
  Requires-Dist: dpcpp-cpp-rt ==2025.0.2 ; extra == 'xpu-2-6'
@@ -132,7 +132,7 @@ Requires-Dist: tokenizers ==0.15.2 ; extra == 'xpu-2-6-arl'
132
132
  Requires-Dist: accelerate ==0.23.0 ; extra == 'xpu-2-6-arl'
133
133
  Requires-Dist: tabulate ; extra == 'xpu-2-6-arl'
134
134
  Requires-Dist: setuptools ; extra == 'xpu-2-6-arl'
135
- Requires-Dist: bigdl-core-xe-all ==2.7.0b20250603 ; extra == 'xpu-2-6-arl'
135
+ Requires-Dist: bigdl-core-xe-all ==2.7.0b20250605 ; extra == 'xpu-2-6-arl'
136
136
  Requires-Dist: onednn-devel ==2025.0.1 ; extra == 'xpu-2-6-arl'
137
137
  Requires-Dist: onednn ==2025.0.1 ; extra == 'xpu-2-6-arl'
138
138
  Requires-Dist: dpcpp-cpp-rt ==2025.0.2 ; extra == 'xpu-2-6-arl'
@@ -9,7 +9,7 @@ ipex_llm/cli/prompts/chat-with-llm.txt,sha256=PpSyd4FQQd-T7ptfXL9jZp7dgstevu1fsx
9
9
  ipex_llm/ggml/__init__.py,sha256=FzapYBUiTdZf0LzlN9hfJI-HE1OTi_2dzaYELJ9Mw8s,1272
10
10
  ipex_llm/ggml/convert.py,sha256=xfWH1E_hivbsxVo8h00STjH1Rlu9-dZQkCLLeIs1TWA,5286
11
11
  ipex_llm/ggml/convert_model.py,sha256=t-tGK9w8ZRi9dlDLTutput3ZBKj3ji94WUJi2KG8hkA,5955
12
- ipex_llm/ggml/quantize.py,sha256=Cvk1R771rRDhSW7BRWcmb4ImY6TWDl_u9Vkdh7rYSuM,6367
12
+ ipex_llm/ggml/quantize.py,sha256=3RQvkCvYz6rTRXAaXMK854JA6g1d8uq5JXZ7OZcj1eg,6490
13
13
  ipex_llm/ggml/model/__init__.py,sha256=T-EbRT6GJ_8RCu-iLmSzcftOimXSPQf2d5X72AUAy2Y,874
14
14
  ipex_llm/ggml/model/bloom/__init__.py,sha256=291QHI19FMw7Z1oaKBAf2YJ0M51iYqWC4IT1ejI-OGg,900
15
15
  ipex_llm/ggml/model/bloom/bloom.py,sha256=fUxgZd_Uc4RXaMC_naYdjekwNprM1TpURmQ8VbocShc,17975
@@ -41,35 +41,35 @@ ipex_llm/langchain/llms/transformerspipelinellm.py,sha256=vm522YPPwWxxAPVvQBtxRf
41
41
  ipex_llm/langchain/vllm/__init__.py,sha256=T-EbRT6GJ_8RCu-iLmSzcftOimXSPQf2d5X72AUAy2Y,874
42
42
  ipex_llm/langchain/vllm/vllm.py,sha256=6dxc-ZISZQrJilEa_HA827l75Dv9rcHpY_G6FdJ8BVs,7793
43
43
  ipex_llm/libs/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
44
- ipex_llm/libs/bloom-api.dll,sha256=2iiONXdYbNVFXpjWnSg6ALdpUC_vXYDpDZ3kypbZkMI,36352
45
- ipex_llm/libs/bloom.dll,sha256=ef7cAOoa7FMrArAntBQcZ-cXdQgwkPk1Ewb8tEjNMAU,507904
46
- ipex_llm/libs/gptneox-api.dll,sha256=xV_cwbAdOJgbaXBeKHYPFKST91PvIubVFKPIju3dLqM,24576
47
- ipex_llm/libs/gptneox.dll,sha256=byWmqMhwZkeFHbSuq6uAtT3pNw3CFtJ9-EZki5qxmok,568320
48
- ipex_llm/libs/libbloom_avx.dll,sha256=RwPN6SlxgYSfV_IoBR0057GEAqn9qFah391CFf9_6Oc,536576
49
- ipex_llm/libs/libbloom_vnni.dll,sha256=CSlK6fIHt_LWUNXSDcmr07Oc3N5-I_X83v9WRXQdTMY,508416
50
- ipex_llm/libs/libgptneox_avx.dll,sha256=ewwZC9V6SVtqWURxBi6Ci-oMYaaC5493RNBsY2WLp8w,596992
51
- ipex_llm/libs/libgptneox_vnni.dll,sha256=q9isGSvnkBc0WlRj2VvseTGTSwPXZ7lKnyivodAe4DA,568832
52
- ipex_llm/libs/libllama_avx.dll,sha256=rA-alWTcLOP7-fHg3Ei6UxvFi4vZ2jSKTilexk0-lRU,591360
53
- ipex_llm/libs/libllama_vnni.dll,sha256=5TVLhryFFizH1Q-1Lt8fmwF8U2cWPHLTer1SAm-8w_k,563200
54
- ipex_llm/libs/libstarcoder_avx.dll,sha256=CD3s7jzP25G-pOSqZs1nE8t4ONHzHQxQflqqeKtmGBU,627712
55
- ipex_llm/libs/libstarcoder_vnni.dll,sha256=elHNYTRjU4Q8-IiGUKZu2AjaXpoFnCRRttSDu3fDQeE,599552
56
- ipex_llm/libs/llama-api.dll,sha256=pn160dmbwa7bUgA1A1rSjCNLY80GVsQlkRlPEiDe-IM,25600
57
- ipex_llm/libs/llama.dll,sha256=CQccqnFLy28phB3JLUwUF40FNJTC2eFaSFaam0V76bA,562688
58
- ipex_llm/libs/main-bloom.exe,sha256=5kZPqipliB4_a_stE1yEKD-xpMLgi0qqS-odEnR1arU,103424
59
- ipex_llm/libs/main-gptneox.exe,sha256=RtSnKro6TYvCxT4ibRVVqTicIQ8l1uEvdh5qLPwKrAg,98816
60
- ipex_llm/libs/main-llama.exe,sha256=WCAD0IsQqpB7kE4MZ-Rve0v044M1uUPEvaNwiVL8Ww8,99840
61
- ipex_llm/libs/main-starcoder.exe,sha256=klpzDUV0n7AKrVdMg3wvkOb5BNvZBb7ubU0FBLioYzA,157696
62
- ipex_llm/libs/pipeline.dll,sha256=H04OpIP5tsRZX_pxzlTN9fOhQeCfRy3VYiorrqu9-fA,73216
63
- ipex_llm/libs/quantize-bloom.exe,sha256=BTCZTMYlQ1h8_r2qwGa3haYmZ43KiVp0CLjAlzi4a6E,126464
64
- ipex_llm/libs/quantize-bloom_vnni.exe,sha256=eWeX0oiA7AeEh0uPEye-dHxutkp5bJvY78Kcc9DC0Mg,128000
65
- ipex_llm/libs/quantize-gptneox.exe,sha256=lZaClPPwAzgZ2CG1BO3nJQgLAfiNFVGEiHAVfLTgkIs,104448
66
- ipex_llm/libs/quantize-gptneox_vnni.exe,sha256=ZV2tctKKCtlX-SEfLzpVIKJPz-3xXRPX2PEpR8Xab08,104960
67
- ipex_llm/libs/quantize-llama.exe,sha256=pYHtPC90SMF6667uN4291-5PMCXI5fHqhaG6AAYaw8g,110080
68
- ipex_llm/libs/quantize-llama_vnni.exe,sha256=pMlorWCDsD-rzeewNAH-P7NifwW7Fcu-grPJwvqw1Yo,110592
69
- ipex_llm/libs/quantize-starcoder.exe,sha256=1PZhBJcP3fzeGeZ9NXV53UkpWO287HgE5zaNK4zfuEM,127488
70
- ipex_llm/libs/quantize-starcoder_vnni.exe,sha256=VX7MpJ--Pn-8BbFLv-C773eNXtzYQsXlg5KZVZxcsaE,128512
71
- ipex_llm/libs/starcoder-api.dll,sha256=0vCKcVFlGiBtDsubvgOkWHYNS57oMIj-3Zp5_ZThsvo,21504
72
- ipex_llm/libs/starcoder.dll,sha256=CT2jIqGrkRHQwo7WNBme0TNTBjvpJl518jlKVe68wpg,599040
44
+ ipex_llm/libs/bloom-api.dll,sha256=C2Gy5HeAYJzx9slLgEXvTEtYSbnF0NecHsyKCKR8to4,36352
45
+ ipex_llm/libs/bloom.dll,sha256=YYDzX4Z5hnTQyZjFXdzZoFIkplvpxwT56PrXiQnNZcE,507904
46
+ ipex_llm/libs/gptneox-api.dll,sha256=AeWoztSA6tms5sqE4daiExLgBwzC9pKDW36U8Wy38a4,24576
47
+ ipex_llm/libs/gptneox.dll,sha256=VihlfvAOtf32NqVl59LsIioACyt0q5YNXQMsGfez92o,568320
48
+ ipex_llm/libs/libbloom_avx.dll,sha256=oYoPfOZ8qtCcelb9IUPEdK958_EjLqULMn0UvH9p0UM,536576
49
+ ipex_llm/libs/libbloom_vnni.dll,sha256=RSGJVyxMpNWML4evaCVoPQFijungzIAj67FPPaJKYKQ,508416
50
+ ipex_llm/libs/libgptneox_avx.dll,sha256=ZLH-nd2MEokE52jTNjNIGzQ6mSVlp4dRnkyXTNT8VPk,596992
51
+ ipex_llm/libs/libgptneox_vnni.dll,sha256=j4YHCGvcyR78WZ70F9EsR9QWhK5ztOI6IgaPlo1GXI8,568832
52
+ ipex_llm/libs/libllama_avx.dll,sha256=2jGixTIolwhmc_7H1Apm2Hwoq_yyHE4emejSFVoUq78,591360
53
+ ipex_llm/libs/libllama_vnni.dll,sha256=8LAx0h40W4F4caV6ADFgQRc4tVfnLLPC6owjg6lZLlY,563200
54
+ ipex_llm/libs/libstarcoder_avx.dll,sha256=bOyDjdRFnpPFmoL7bEaqE1nE_m3J_HtUJkAO5vZPM_I,627712
55
+ ipex_llm/libs/libstarcoder_vnni.dll,sha256=CHWEQ-ApRnHc1LovlXwjO63UzfhRKCnTrUOhHxmY65o,599552
56
+ ipex_llm/libs/llama-api.dll,sha256=Hd_JfzQjtNfd0fnnNFLO-UHo-exlFxMoxW2NgV7y1co,25600
57
+ ipex_llm/libs/llama.dll,sha256=mn2QSGZ6IY6Z5G_m-qNO_wrwOy3eQJ1AlQ4roNpu3PE,562688
58
+ ipex_llm/libs/main-bloom.exe,sha256=cHNHHlKQEzrHgm0QpYqjwhuBFZ6huMjbdRurTJtRtB8,103424
59
+ ipex_llm/libs/main-gptneox.exe,sha256=MpiUlvrfmLtWbcBfEmfisU8T5FbxXD0Uk2acx7yACig,98816
60
+ ipex_llm/libs/main-llama.exe,sha256=nqBLLBVjNnZCh72KL3XmmS4tTKiKuiTFB-vxPSFURpE,99840
61
+ ipex_llm/libs/main-starcoder.exe,sha256=aDOKY_AhaqAhAgMJXmbAF1aIv6Pow6-mnW65R5ojwcc,157696
62
+ ipex_llm/libs/pipeline.dll,sha256=y5x6scPUEi-8pV2SqCo6_x87WGxV_jxwRRMvLU4XoLU,73216
63
+ ipex_llm/libs/quantize-bloom.exe,sha256=7dghzsKCkrchRMhme5PabOkuBvkOIAV191T_6Eo3FVA,126464
64
+ ipex_llm/libs/quantize-bloom_vnni.exe,sha256=ioOn79_l1u7m1QUpvb0DWhQIVdoBdXMxAsA9mDKn_W8,128000
65
+ ipex_llm/libs/quantize-gptneox.exe,sha256=6_wZBbgeaYie8T7LL37nh9mOCOyLc4sDi8T0oFZgFG0,104448
66
+ ipex_llm/libs/quantize-gptneox_vnni.exe,sha256=ZJXaAOygZRYS8efiPFc2iJ42BWKeJAQ21Gyb_qYCPAM,104960
67
+ ipex_llm/libs/quantize-llama.exe,sha256=VRiSH4HzV8laqb2EIi6DJ07XMVy4J-N8Mx2gf8wk1Ww,110080
68
+ ipex_llm/libs/quantize-llama_vnni.exe,sha256=kW2sdD6eRXS6IkYXQkZb0fdCHrJs_voUVlqy3pz1xMs,110592
69
+ ipex_llm/libs/quantize-starcoder.exe,sha256=HXD4f_muhQnlqzbFpeHNbvPPf0qXk3D69bPhFps6t7I,127488
70
+ ipex_llm/libs/quantize-starcoder_vnni.exe,sha256=oCK-30YAEKF0DkwR1to8F97LzvWuPNeiEKqtfJ0yKCg,128512
71
+ ipex_llm/libs/starcoder-api.dll,sha256=b8v58Pgz-gxyqSDo670JIZs7EadA2MymaARbmSFfYlk,21504
72
+ ipex_llm/libs/starcoder.dll,sha256=IFRyTp4l9XfRgaB5yVik1EM2qHgpTEg3GQVNSciScXI,599040
73
73
  ipex_llm/llamaindex/__init__.py,sha256=T-EbRT6GJ_8RCu-iLmSzcftOimXSPQf2d5X72AUAy2Y,874
74
74
  ipex_llm/llamaindex/llms/__init__.py,sha256=KP1lEdGqDuxPoxL1ZSH25Pm2kKMPJBWUTLR0ckSLMIU,1139
75
75
  ipex_llm/llamaindex/llms/bigdlllm.py,sha256=FQBzq1KOjfc6uofTXAha3O7TqpJkNfOFepXQmOVlbnI,26314
@@ -94,7 +94,7 @@ ipex_llm/transformers/kv.py,sha256=src_HcVDKFwQ1V8hdTrFQw5RIwUewM9VOR47GVTPJG4,2
94
94
  ipex_llm/transformers/lisa.py,sha256=F5WxbtXQ7RdKulj83h_2DnEIgKiKGZf7zvOmg6QBl2s,3289
95
95
  ipex_llm/transformers/loader.py,sha256=c9qfJSC6-in-mkd-iKb1igk3nHWUYS3QtyH2cOazmKc,6825
96
96
  ipex_llm/transformers/lookup.py,sha256=b6OlZ9OV10R9qeWw8mVryVpDxszkjwLkldvi7GPMJY8,19614
97
- ipex_llm/transformers/low_bit_linear.py,sha256=f47v3w3DUG0G65RawgiL5y9N8l_GRRz6uaCSTMga2zM,39281
97
+ ipex_llm/transformers/low_bit_linear.py,sha256=03TMG4GZsgRPvchQC2h7eMU9IQ9XCyVcdh3Pvi7_Rew,41550
98
98
  ipex_llm/transformers/model.py,sha256=tWTzKsCz8A1P5gYEeG9KZgpxQgbP9hQ-TWAdkebA6Jg,40886
99
99
  ipex_llm/transformers/modelling_bigdl.py,sha256=7JpNVMuyq_OmtNUaMFMXdxPWZp2q0QHC02QeA-VTPOw,6709
100
100
  ipex_llm/transformers/npu_model.py,sha256=X8mdY6N9TYlxG41wmFloX44ZUjyitFzdKbhzO7TToFY,40309
@@ -253,16 +253,16 @@ ipex_llm/vllm/cpu/entrypoints/openai/cli_args.py,sha256=hB398yYtKauASRzevctScdbF
253
253
  ipex_llm/vllm/xpu/__init__.py,sha256=zBSG6nzrVF5QnpR6_f7kPhBFeowTE9gaZ7D5m98E7_w,585
254
254
  ipex_llm/vllm/xpu/ipex_llm_v1_wrapper.py,sha256=pd939vFomKIg9Qn2NO4u0OF6hPgvQpqcfJSxqBzcqhA,825
255
255
  ipex_llm/vllm/xpu/ipex_llm_wrapper.py,sha256=_CbhvBuf_KPnmLfngYKtJl5gPAHVsG2mWth3wSeaH3M,892
256
- ipex_llm/vllm/xpu/model_convert.py,sha256=oedafTsnysTi78PGYcjn1w5rnIBfBx4_mpZp2fF6z44,10093
256
+ ipex_llm/vllm/xpu/model_convert.py,sha256=HZeTrQHMYfgXlz1b9KiKdAUZ57nLgpv6VhM5CkiSrUc,10416
257
257
  ipex_llm/vllm/xpu/engine/__init__.py,sha256=sOvwLx_Zj0jiRCGj9W3DgGTfcSU3hABYhgIQI7T6cxU,879
258
258
  ipex_llm/vllm/xpu/engine/engine.py,sha256=XAprw7VifjfnR915TZOaKcxe3QCFsVBgxzS8qOdn1yg,14462
259
259
  ipex_llm/vllm/xpu/entrypoints/openai/api_server.py,sha256=VlmS56hBHBZTIZ5Jhvb4TZN-h28O7uMn33hX8NiJXKk,45719
260
260
  ipex_llm/vllm/xpu/entrypoints/openai/cli_args.py,sha256=hB398yYtKauASRzevctScdbFIjiiSGMAe1bwEuIHrhY,10893
261
- ipex_llm-2.3.0b20250603.data/scripts/ipex-llm-init.bat,sha256=HPtCYuDYwEatq7dAwOvdfVcHYCpAVdbj75K1qh0vQek,2578
262
- ipex_llm-2.3.0b20250603.data/scripts/llm-chat.ps1,sha256=6qrs-hGVAV8IKh7Jx8nq_XrnZcjd7qGU5wndArM7Yag,2769
263
- ipex_llm-2.3.0b20250603.data/scripts/llm-cli.ps1,sha256=3qBtTLs_EjYDnM8YyCpJhzLnGCKTEGssu9UNqfkjVXs,3009
264
- ipex_llm-2.3.0b20250603.dist-info/METADATA,sha256=ksoQDNkxXOZiquKGwSnn-LS4DfdIbFQ9mCbCzDg4AH4,8865
265
- ipex_llm-2.3.0b20250603.dist-info/WHEEL,sha256=6iYPr8vTHsyDK75jr9X0V3I9wPSVmtwr_8fdATBciGk,98
266
- ipex_llm-2.3.0b20250603.dist-info/entry_points.txt,sha256=TiUyBB2MRmfF3ko-pyAEzqeBCRnyhu27bNOAsWPp3e8,61
267
- ipex_llm-2.3.0b20250603.dist-info/top_level.txt,sha256=CGCMHM-SyqUabU4h8RqJ2KTYckQUO3LvIWwmUQ6Qbzw,9
268
- ipex_llm-2.3.0b20250603.dist-info/RECORD,,
261
+ ipex_llm-2.3.0b20250605.data/scripts/ipex-llm-init.bat,sha256=HPtCYuDYwEatq7dAwOvdfVcHYCpAVdbj75K1qh0vQek,2578
262
+ ipex_llm-2.3.0b20250605.data/scripts/llm-chat.ps1,sha256=6qrs-hGVAV8IKh7Jx8nq_XrnZcjd7qGU5wndArM7Yag,2769
263
+ ipex_llm-2.3.0b20250605.data/scripts/llm-cli.ps1,sha256=3qBtTLs_EjYDnM8YyCpJhzLnGCKTEGssu9UNqfkjVXs,3009
264
+ ipex_llm-2.3.0b20250605.dist-info/METADATA,sha256=2CommuodTS_N2xOt4lnyS-8ZP0vWAMSEBYQOElEuSjk,8865
265
+ ipex_llm-2.3.0b20250605.dist-info/WHEEL,sha256=6iYPr8vTHsyDK75jr9X0V3I9wPSVmtwr_8fdATBciGk,98
266
+ ipex_llm-2.3.0b20250605.dist-info/entry_points.txt,sha256=TiUyBB2MRmfF3ko-pyAEzqeBCRnyhu27bNOAsWPp3e8,61
267
+ ipex_llm-2.3.0b20250605.dist-info/top_level.txt,sha256=CGCMHM-SyqUabU4h8RqJ2KTYckQUO3LvIWwmUQ6Qbzw,9
268
+ ipex_llm-2.3.0b20250605.dist-info/RECORD,,