PyPI - ipex-llm - Versions diffs - 2.3.0b20250603__py3-none-win_amd64.whl → 2.3.0b20250605__py3-none-win_amd64.whl - Mend

ipex-llm 2.3.0b20250603__py3-none-win_amd64.whl → 2.3.0b20250605__py3-none-win_amd64.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (40) hide show

ipex_llm/ggml/quantize.py +3 -0
ipex_llm/libs/bloom-api.dll +0 -0
ipex_llm/libs/bloom.dll +0 -0
ipex_llm/libs/gptneox-api.dll +0 -0
ipex_llm/libs/gptneox.dll +0 -0
ipex_llm/libs/libbloom_avx.dll +0 -0
ipex_llm/libs/libbloom_vnni.dll +0 -0
ipex_llm/libs/libgptneox_avx.dll +0 -0
ipex_llm/libs/libgptneox_vnni.dll +0 -0
ipex_llm/libs/libllama_avx.dll +0 -0
ipex_llm/libs/libllama_vnni.dll +0 -0
ipex_llm/libs/libstarcoder_avx.dll +0 -0
ipex_llm/libs/libstarcoder_vnni.dll +0 -0
ipex_llm/libs/llama-api.dll +0 -0
ipex_llm/libs/llama.dll +0 -0
ipex_llm/libs/main-bloom.exe +0 -0
ipex_llm/libs/main-gptneox.exe +0 -0
ipex_llm/libs/main-llama.exe +0 -0
ipex_llm/libs/main-starcoder.exe +0 -0
ipex_llm/libs/pipeline.dll +0 -0
ipex_llm/libs/quantize-bloom.exe +0 -0
ipex_llm/libs/quantize-bloom_vnni.exe +0 -0
ipex_llm/libs/quantize-gptneox.exe +0 -0
ipex_llm/libs/quantize-gptneox_vnni.exe +0 -0
ipex_llm/libs/quantize-llama.exe +0 -0
ipex_llm/libs/quantize-llama_vnni.exe +0 -0
ipex_llm/libs/quantize-starcoder.exe +0 -0
ipex_llm/libs/quantize-starcoder_vnni.exe +0 -0
ipex_llm/libs/starcoder-api.dll +0 -0
ipex_llm/libs/starcoder.dll +0 -0
ipex_llm/transformers/low_bit_linear.py +81 -42
ipex_llm/vllm/xpu/model_convert.py +7 -1
{ipex_llm-2.3.0b20250603.dist-info → ipex_llm-2.3.0b20250605.dist-info}/METADATA +11 -11
{ipex_llm-2.3.0b20250603.dist-info → ipex_llm-2.3.0b20250605.dist-info}/RECORD +40 -40
{ipex_llm-2.3.0b20250603.data → ipex_llm-2.3.0b20250605.data}/scripts/ipex-llm-init.bat +0 -0
{ipex_llm-2.3.0b20250603.data → ipex_llm-2.3.0b20250605.data}/scripts/llm-chat.ps1 +0 -0
{ipex_llm-2.3.0b20250603.data → ipex_llm-2.3.0b20250605.data}/scripts/llm-cli.ps1 +0 -0
{ipex_llm-2.3.0b20250603.dist-info → ipex_llm-2.3.0b20250605.dist-info}/WHEEL +0 -0
{ipex_llm-2.3.0b20250603.dist-info → ipex_llm-2.3.0b20250605.dist-info}/entry_points.txt +0 -0
{ipex_llm-2.3.0b20250603.dist-info → ipex_llm-2.3.0b20250605.dist-info}/top_level.txt +0 -0

ipex_llm/ggml/quantize.py CHANGED Viewed

@@ -54,6 +54,9 @@ ggml_tensor_qtype = {"sym_int4": 2,   # q4_0 in ggml
                      "sym_int8_rtn": 32,
                      "asym_int4_rtn": 33,
                      "woq_int4": 34,
+                     "torch_fp8_e5m2": 35,
+                     "torch_fp8": 35,
+                     "torch_fp8_e4m3": 36
                      }
 # mixed precison from llama.cpp

ipex_llm/libs/bloom-api.dll CHANGED Viewed

Binary file

ipex_llm/libs/bloom.dll CHANGED Viewed

Binary file

ipex_llm/libs/gptneox-api.dll CHANGED Viewed

Binary file

ipex_llm/libs/gptneox.dll CHANGED Viewed

Binary file

ipex_llm/libs/libbloom_avx.dll CHANGED Viewed

Binary file

ipex_llm/libs/libbloom_vnni.dll CHANGED Viewed

Binary file

ipex_llm/libs/libgptneox_avx.dll CHANGED Viewed

Binary file

ipex_llm/libs/libgptneox_vnni.dll CHANGED Viewed

Binary file

ipex_llm/libs/libllama_avx.dll CHANGED Viewed

Binary file

ipex_llm/libs/libllama_vnni.dll CHANGED Viewed

Binary file

ipex_llm/libs/libstarcoder_avx.dll CHANGED Viewed

Binary file

ipex_llm/libs/libstarcoder_vnni.dll CHANGED Viewed

Binary file

ipex_llm/libs/llama-api.dll CHANGED Viewed

Binary file

ipex_llm/libs/llama.dll CHANGED Viewed

Binary file

ipex_llm/libs/main-bloom.exe CHANGED Viewed

Binary file

ipex_llm/libs/main-gptneox.exe CHANGED Viewed

Binary file

ipex_llm/libs/main-llama.exe CHANGED Viewed

Binary file

ipex_llm/libs/main-starcoder.exe CHANGED Viewed

Binary file

ipex_llm/libs/pipeline.dll CHANGED Viewed

Binary file

ipex_llm/libs/quantize-bloom.exe CHANGED Viewed

Binary file

ipex_llm/libs/quantize-bloom_vnni.exe CHANGED Viewed

Binary file

ipex_llm/libs/quantize-gptneox.exe CHANGED Viewed

Binary file

ipex_llm/libs/quantize-gptneox_vnni.exe CHANGED Viewed

Binary file

ipex_llm/libs/quantize-llama.exe CHANGED Viewed

Binary file

ipex_llm/libs/quantize-llama_vnni.exe CHANGED Viewed

Binary file

ipex_llm/libs/quantize-starcoder.exe CHANGED Viewed

Binary file

ipex_llm/libs/quantize-starcoder_vnni.exe CHANGED Viewed

Binary file

ipex_llm/libs/starcoder-api.dll CHANGED Viewed

Binary file

ipex_llm/libs/starcoder.dll CHANGED Viewed

Binary file

ipex_llm/transformers/low_bit_linear.py CHANGED Viewed

@@ -86,6 +86,8 @@ SYM_INT4_RTN = ggml_tensor_qtype["sym_int4_rtn"]
 SYM_INT8_RTN = ggml_tensor_qtype["sym_int8_rtn"]
 ASYM_INT4_RTN = ggml_tensor_qtype["asym_int4_rtn"]
 WOQ_INT4 = ggml_tensor_qtype["woq_int4"]
+TORCH_FP8E5 = ggml_tensor_qtype["torch_fp8_e5m2"]
+TORCH_FP8E4 = ggml_tensor_qtype["torch_fp8_e4m3"]
 RTN_DTYPE = {
     SYM_INT4_RTN: torch.uint8,
     ASYM_INT4_RTN: torch.uint8,
@@ -106,39 +108,44 @@ def ggml_convert_qtype(tensor: torch.Tensor, qtype: int,
                        imatrix: torch.Tensor=None,
                        in_features: int=None,
                        enable_scale_search: bool=False):
-    QK = ggml.ggml_qk_size(qtype)
-    block_size_in_bytes = ggml.ggml_type_size(qtype)
-    invalidInputError(tensor.dtype == torch.float,
-                      "Input tensor must be float32")
-    src = tensor.data.data_ptr()
-    src = ctypes.cast(src, ctypes.POINTER(ctypes.c_float))
-    n = tensor.numel()  # all elements
-    k = tensor.shape[-1]
-    invalidInputError(k % QK == 0,
-                      f"Last dim of input tensor must be multiple of {QK}")
-    dst_size = (n // QK) * block_size_in_bytes
-    if qtype in [SYM_INT8_RTN, SYM_INT4_RTN, ASYM_INT4_RTN]:
-        dst_tensor = torch.empty(dst_size, dtype=RTN_DTYPE[qtype],
-                                 device=device)
-        dst_tensor = dst_tensor.reshape(tensor.shape[0], tensor.shape[-1] // QK)
-        if qtype == ASYM_INT4_RTN:
-            scale = torch.empty((n // k) * 2, dtype=torch.float32,
-                                device=device)
-        else:
-            scale = torch.empty(n // k, dtype=torch.float32,
-                                device=device)
-    elif qtype == NF4:
-        # Deepspeed zero3 requires unified dtype,
-        # thus here uses bfloat16 consistent to other layers
-        # dst_size above is computed based on uint8, and for bfloat16,
-        # buffer size should be half
-        dst_tensor = torch.empty(dst_size // 2, dtype=torch.bfloat16,
-                                 device=device)
+    if qtype in [TORCH_FP8E5, TORCH_FP8E4]:
+        fp8_dtype = torch.float8_e5m2 if qtype == TORCH_FP8E5 else torch.float8_e4m3fn
+        dst_tensor = torch.empty(tensor.shape, device=device, dtype=fp8_dtype)
+        scale = torch.zeros(1, device=device, dtype=torch.float32)
     else:
-        dst_tensor = torch.empty(dst_size, dtype=torch.uint8,
-                                 device=device)
+        QK = ggml.ggml_qk_size(qtype)
+        block_size_in_bytes = ggml.ggml_type_size(qtype)
+        invalidInputError(tensor.dtype == torch.float,
+                          "Input tensor must be float32")
+        src = tensor.data.data_ptr()
+        src = ctypes.cast(src, ctypes.POINTER(ctypes.c_float))
+        n = tensor.numel()  # all elements
+        k = tensor.shape[-1]
+        invalidInputError(k % QK == 0,
+                          f"Last dim of input tensor must be multiple of {QK}")
+        dst_size = (n // QK) * block_size_in_bytes
+        if qtype in [SYM_INT8_RTN, SYM_INT4_RTN, ASYM_INT4_RTN]:
+            dst_tensor = torch.empty(dst_size, dtype=RTN_DTYPE[qtype],
+                                     device=device)
+            dst_tensor = dst_tensor.reshape(tensor.shape[0], tensor.shape[-1] // QK)
+            if qtype == ASYM_INT4_RTN:
+                scale = torch.empty((n // k) * 2, dtype=torch.float32,
+                                    device=device)
+            else:
+                scale = torch.empty(n // k, dtype=torch.float32,
+                                    device=device)
+        elif qtype == NF4:
+            # Deepspeed zero3 requires unified dtype,
+            # thus here uses bfloat16 consistent to other layers
+            # dst_size above is computed based on uint8, and for bfloat16,
+            # buffer size should be half
+            dst_tensor = torch.empty(dst_size // 2, dtype=torch.bfloat16,
+                                     device=device)
+        else:
+            dst_tensor = torch.empty(dst_size, dtype=torch.uint8,
+                                     device=device)
     if not convert_shape_only and device != 'meta':
         dst = ctypes.c_void_p(dst_tensor.data.data_ptr())
@@ -158,6 +165,17 @@ def ggml_convert_qtype(tensor: torch.Tensor, qtype: int,
                                                                enable_scale_search,
                                                                imatrix)
                 return dst_tensor, scale.type(torch.float16)
+            elif qtype in [TORCH_FP8E5, TORCH_FP8E4]:
+                import xe_linear
+                tensor_device = tensor.device
+                tensor_xpu = tensor.to("xpu")
+                dst_tensor = dst_tensor.to("xpu")
+                scale = scale.to("xpu")
+                xe_linear.dynamic_scaled_fp8_quant(dst_tensor, tensor_xpu, scale)
+                # scale = scale.to(tensor_device)
+                dst_tensor = dst_tensor.to(tensor_device)
             else:
                 ggml.ggml_quantize_tensor(src, dst, qtype, n, k, hist, enable_scale_search)
         else:
@@ -171,6 +189,8 @@ def ggml_convert_qtype(tensor: torch.Tensor, qtype: int,
                                                    hist, imatrix)
     if qtype in [SYM_INT8_RTN, SYM_INT4_RTN, ASYM_INT4_RTN]:
         return dst_tensor, scale.type(torch.float16)
+    elif qtype in [TORCH_FP8E5, TORCH_FP8E4]:
+        return dst_tensor, scale
     else:
         return dst_tensor
@@ -179,7 +199,7 @@ def ggml_q_format_convet_cpu2xpu(tensor: torch.Tensor, num_elem: int, qtype: int
     if qtype == NF4:
         invalidInputError(tensor.dtype == torch.bfloat16,
                           "NF4 Input tensor must be bfloat16")
-    else:
+    elif qtype not in [TORCH_FP8E5, TORCH_FP8E4]:
         invalidInputError(tensor.dtype == torch.uint8,
                           "Input tensor except NF4 must be uint8")
@@ -208,7 +228,7 @@ def ggml_q_format_convet_xpu2cpu(tensor: torch.Tensor, num_elem: int, qtype: int
     if qtype == NF4:
         invalidInputError(tensor.dtype == torch.bfloat16,
                           "NF4 Input tensor must be bfloat16")
-    else:
+    elif qtype not in [TORCH_FP8E5, TORCH_FP8E4]:
         invalidInputError(tensor.dtype == torch.uint8,
                           "Input tensor must be uint8")
@@ -319,7 +339,8 @@ class FP4Params(torch.nn.Parameter):
                 qtype=None,
                 imatrix=None,
                 in_features=None,
-                enable_scale_search=False):
+                enable_scale_search=False,
+                torch_fp8_scale=None):
         if data is None:
             data = torch.empty(0)
@@ -332,6 +353,7 @@ class FP4Params(torch.nn.Parameter):
         self.imatrix = imatrix
         self.in_features = in_features
         self.enable_scale_search = enable_scale_search
+        self.torch_fp8_scale = torch_fp8_scale
         return self
     def ggml_mse(self, w, ggml_qtype, device):
@@ -391,7 +413,11 @@ class FP4Params(torch.nn.Parameter):
                                                  imatrix=self.imatrix,
                                                  in_features=self.in_features,
                                                  enable_scale_search=self.enable_scale_search)
-                self.data = w_quantized
+                if self.qtype in [TORCH_FP8E5, TORCH_FP8E4]:
+                    self.data = w_quantized[0]
+                    self.torch_fp8_scale = w_quantized[1]
+                else:
+                    self.data = w_quantized
             self.quantized = True
             self._shape = w.shape
         return self
@@ -414,6 +440,8 @@ class FP4Params(torch.nn.Parameter):
     def to(self, *args, **kwargs):
         device, dtype, non_blocking, convert_to_format = torch._C._nn._parse_to(*args, **kwargs)
+        if self.qtype in [TORCH_FP8E5, TORCH_FP8E4]:
+            dtype = None
         if (device is not None and device.type == "cpu" and self.data.device.type == "cpu"):
             return self.quantize(device.type)
         elif device is not None and device.type == "meta" and self.data.device.type == "meta":
@@ -424,6 +452,7 @@ class FP4Params(torch.nn.Parameter):
             self.data = ggml_q_format_convet_cpu2xpu(self.data,
                                                      reduce(mul, self._shape, 1),
                                                      self.qtype)
+            fp8_scale = None if self.torch_fp8_scale is None else self.torch_fp8_scale.to(device)
             new_param = FP4Params(super().to(device=device,
                                              dtype=dtype,
                                              non_blocking=non_blocking),
@@ -431,9 +460,11 @@ class FP4Params(torch.nn.Parameter):
                                   quantized=self.quantized,
                                   _shape=self._shape,
                                   qtype=self.qtype,
-                                  enable_scale_search=self.enable_scale_search)
+                                  enable_scale_search=self.enable_scale_search,
+                                  torch_fp8_scale=fp8_scale)
             return new_param
         elif (device is not None and device.type == "cpu" and self.data.device.type == "xpu"):
+            fp8_scale = None if self.torch_fp8_scale is None else self.torch_fp8_scale.to(device)
             new_param = FP4Params(super().to(device=device,
                                              dtype=dtype,
                                              non_blocking=non_blocking),
@@ -441,7 +472,8 @@ class FP4Params(torch.nn.Parameter):
                                   quantized=self.quantized,
                                   _shape=self._shape,
                                   qtype=self.qtype,
-                                  enable_scale_search=self.enable_scale_search)
+                                  enable_scale_search=self.enable_scale_search,
+                                  torch_fp8_scale=fp8_scale)
             ggml_xpu = new_param.data
             new_param.data = ggml_q_format_convet_xpu2cpu(ggml_xpu,
                                                           reduce(mul, new_param._shape, 1),
@@ -614,6 +646,7 @@ class LowBitLinear(nn.Linear):
         # Due to inconsistent training status in some models like Baichuan-7b-Chat,
         # we should check both self.training and torch.is_inference_mode_enabled().
         is_training = self.training and not torch.is_inference_mode_enabled()
         if is_training:
             # below logic is only for training
             autocast_dtype = get_autocast_dtype(x.device.type)
@@ -643,6 +676,8 @@ class LowBitLinear(nn.Linear):
         if self.weight.device.type == "xpu":
             if is_training and x_2d.requires_grad:
+                invalidInputError(self.weight.qtype not in [TORCH_FP8E5, TORCH_FP8E4],
+                                  "TORCH_FP8 training is not supported.")
                 result = MatMulLowBit.apply(x_2d, self.weight, self.out_len)
             else:
                 do_empty_cache = self.low_memory_mode and x_2d.shape[0] >= 1024
@@ -654,7 +689,11 @@ class LowBitLinear(nn.Linear):
                 else:
                     w = self.weight.data
-                if use_batch_forward(x_2d, self.weight.qtype, self.out_len) and \
+                if self.weight.qtype in [TORCH_FP8E5, TORCH_FP8E4]:
+                    import xe_linear
+                    result = xe_linear.run_linear_fp8(x_2d, w, self.bias,
+                                                      self.weight.torch_fp8_scale)
+                elif use_batch_forward(x_2d, self.weight.qtype, self.out_len) and \
                         (x_2d.dtype == torch.half or self.conver_to_half):
                     import xe_batch
                     result = xe_batch.batch_forward(x_2d, w, self.qtype)
@@ -682,13 +721,13 @@ class LowBitLinear(nn.Linear):
                 else:
                     invalidInputError(False, "mp_group is not None, but no supported backend found")
-            if self.bias is not None:
+            if self.bias is not None and self.weight.qtype not in [TORCH_FP8E5, TORCH_FP8E4]:
                 result += self.bias
         else:
             # CPU logic
             # todo may need to set a different number on different platforms
-            invalidInputError(self.qtype != NF3 and self.qtype != NF4 and self.qtype != FP8E4
-                              and self.qtype != FP4 and self.qtype != FP8E5,
+            invalidInputError(self.qtype not in [NF3, NF4, FP8E4, FP4, FP8E5,
+                                                 TORCH_FP8E5, TORCH_FP8E4],
                               "NF3, NF4, FP4 and FP8 quantization are currently not"
                               " supported on CPU")
             if self.training and x.requires_grad:

ipex_llm/vllm/xpu/model_convert.py CHANGED Viewed

@@ -129,9 +129,15 @@ def get_load_function(low_bit):
                 if "glm-4v" in self.vllm_config.model_config.model.lower() and \
                         low_bit in ("sym_int4", "woq_int4"):
                     modules = ["dense_4h_to_h"]
+                if "phi4mm" in self.vllm_config.model_config.hf_config.model_type:
+                    modules = ["vision_encoder", "embed_tokens_extend"]
                 if low_bit == "fp16":
                     # to fix qwen2.5-vl and glm-4v
-                    modules = ["vision", "visual"]
+                    if modules is None:
+                        modules = ["vision", "visual"]
+                    else:
+                        modules.append("vision")
+                        modules.append("visual")
                 optimize_model(self.model,
                                low_bit=low_bit,
                                torch_dtype=self.vllm_config.model_config.dtype,

{ipex_llm-2.3.0b20250603.dist-info → ipex_llm-2.3.0b20250605.dist-info}/METADATA RENAMED Viewed

@@ -1,6 +1,6 @@
 Metadata-Version: 2.1
 Name: ipex-llm
-Version: 2.3.0b20250603
+Version: 2.3.0b20250605
 Summary: Large Language Model Develop Toolkit
 Home-page: https://github.com/intel-analytics/ipex-llm
 Author: BigDL Authors
@@ -27,7 +27,7 @@ Requires-Dist: intel-openmp ; (platform_machine == "x86_64" or platform_machine
 Requires-Dist: torch ==2.1.2+cpu ; (platform_system == "Linux") and extra == 'all'
 Requires-Dist: torch ==2.1.2 ; (platform_system == "Windows") and extra == 'all'
 Provides-Extra: cpp
-Requires-Dist: bigdl-core-cpp ==2.7.0b20250603 ; extra == 'cpp'
+Requires-Dist: bigdl-core-cpp ==2.7.0b20250605 ; extra == 'cpp'
 Requires-Dist: setuptools ; extra == 'cpp'
 Requires-Dist: onednn-devel ==2025.0.1 ; (platform_system == "Windows") and extra == 'cpp'
 Requires-Dist: onednn ==2025.0.1 ; (platform_system == "Windows") and extra == 'cpp'
@@ -60,7 +60,7 @@ Requires-Dist: transformers ==4.40.0 ; extra == 'npu'
 Requires-Dist: intel-openmp ; (platform_machine == "x86_64" or platform_machine == "AMD64") and extra == 'npu'
 Requires-Dist: torch ==2.1.2+cpu ; (platform_system == "Linux") and extra == 'npu'
 Requires-Dist: torch ==2.1.2 ; (platform_system == "Windows") and extra == 'npu'
-Requires-Dist: bigdl-core-npu ==2.7.0b20250603 ; (platform_system == "Windows") and extra == 'npu'
+Requires-Dist: bigdl-core-npu ==2.7.0b20250605 ; (platform_system == "Windows") and extra == 'npu'
 Provides-Extra: serving
 Requires-Dist: py-cpuinfo ; extra == 'serving'
 Requires-Dist: fschat[model_worker,webui] ==0.2.36 ; extra == 'serving'
@@ -80,9 +80,9 @@ Requires-Dist: setuptools <70.0.0 ; extra == 'xpu'
 Requires-Dist: torch ==2.1.0a0 ; extra == 'xpu'
 Requires-Dist: torchvision ==0.16.0a0 ; extra == 'xpu'
 Requires-Dist: intel-extension-for-pytorch ==2.1.10+xpu ; extra == 'xpu'
-Requires-Dist: bigdl-core-xe-21 ==2.7.0b20250603 ; extra == 'xpu'
-Requires-Dist: bigdl-core-xe-batch-21 ==2.7.0b20250603 ; extra == 'xpu'
-Requires-Dist: bigdl-core-xe-addons-21 ==2.7.0b20250603 ; extra == 'xpu'
+Requires-Dist: bigdl-core-xe-21 ==2.7.0b20250605 ; extra == 'xpu'
+Requires-Dist: bigdl-core-xe-batch-21 ==2.7.0b20250605 ; extra == 'xpu'
+Requires-Dist: bigdl-core-xe-addons-21 ==2.7.0b20250605 ; extra == 'xpu'
 Provides-Extra: xpu-2-1
 Requires-Dist: py-cpuinfo ; extra == 'xpu-2-1'
 Requires-Dist: protobuf ; extra == 'xpu-2-1'
@@ -97,9 +97,9 @@ Requires-Dist: setuptools <70.0.0 ; extra == 'xpu-2-1'
 Requires-Dist: torch ==2.1.0a0 ; extra == 'xpu-2-1'
 Requires-Dist: torchvision ==0.16.0a0 ; extra == 'xpu-2-1'
 Requires-Dist: intel-extension-for-pytorch ==2.1.10+xpu ; extra == 'xpu-2-1'
-Requires-Dist: bigdl-core-xe-21 ==2.7.0b20250603 ; extra == 'xpu-2-1'
-Requires-Dist: bigdl-core-xe-batch-21 ==2.7.0b20250603 ; extra == 'xpu-2-1'
-Requires-Dist: bigdl-core-xe-addons-21 ==2.7.0b20250603 ; extra == 'xpu-2-1'
+Requires-Dist: bigdl-core-xe-21 ==2.7.0b20250605 ; extra == 'xpu-2-1'
+Requires-Dist: bigdl-core-xe-batch-21 ==2.7.0b20250605 ; extra == 'xpu-2-1'
+Requires-Dist: bigdl-core-xe-addons-21 ==2.7.0b20250605 ; extra == 'xpu-2-1'
 Requires-Dist: intel-openmp ; (platform_machine == "x86_64" or platform_machine == "AMD64") and extra == 'xpu-2-1'
 Requires-Dist: dpcpp-cpp-rt ==2024.0.2 ; (platform_system == "Windows") and extra == 'xpu-2-1'
 Requires-Dist: mkl-dpcpp ==2024.0.0 ; (platform_system == "Windows") and extra == 'xpu-2-1'
@@ -117,7 +117,7 @@ Requires-Dist: setuptools ; extra == 'xpu-2-6'
 Requires-Dist: torch ==2.6.0+xpu ; extra == 'xpu-2-6'
 Requires-Dist: torchvision ==0.21.0+xpu ; extra == 'xpu-2-6'
 Requires-Dist: torchaudio ==2.6.0+xpu ; extra == 'xpu-2-6'
-Requires-Dist: bigdl-core-xe-all ==2.7.0b20250603 ; extra == 'xpu-2-6'
+Requires-Dist: bigdl-core-xe-all ==2.7.0b20250605 ; extra == 'xpu-2-6'
 Requires-Dist: onednn-devel ==2025.0.1 ; extra == 'xpu-2-6'
 Requires-Dist: onednn ==2025.0.1 ; extra == 'xpu-2-6'
 Requires-Dist: dpcpp-cpp-rt ==2025.0.2 ; extra == 'xpu-2-6'
@@ -132,7 +132,7 @@ Requires-Dist: tokenizers ==0.15.2 ; extra == 'xpu-2-6-arl'
 Requires-Dist: accelerate ==0.23.0 ; extra == 'xpu-2-6-arl'
 Requires-Dist: tabulate ; extra == 'xpu-2-6-arl'
 Requires-Dist: setuptools ; extra == 'xpu-2-6-arl'
-Requires-Dist: bigdl-core-xe-all ==2.7.0b20250603 ; extra == 'xpu-2-6-arl'
+Requires-Dist: bigdl-core-xe-all ==2.7.0b20250605 ; extra == 'xpu-2-6-arl'
 Requires-Dist: onednn-devel ==2025.0.1 ; extra == 'xpu-2-6-arl'
 Requires-Dist: onednn ==2025.0.1 ; extra == 'xpu-2-6-arl'
 Requires-Dist: dpcpp-cpp-rt ==2025.0.2 ; extra == 'xpu-2-6-arl'

{ipex_llm-2.3.0b20250603.dist-info → ipex_llm-2.3.0b20250605.dist-info}/RECORD RENAMED Viewed

@@ -9,7 +9,7 @@ ipex_llm/cli/prompts/chat-with-llm.txt,sha256=PpSyd4FQQd-T7ptfXL9jZp7dgstevu1fsx
 ipex_llm/ggml/__init__.py,sha256=FzapYBUiTdZf0LzlN9hfJI-HE1OTi_2dzaYELJ9Mw8s,1272
 ipex_llm/ggml/convert.py,sha256=xfWH1E_hivbsxVo8h00STjH1Rlu9-dZQkCLLeIs1TWA,5286
 ipex_llm/ggml/convert_model.py,sha256=t-tGK9w8ZRi9dlDLTutput3ZBKj3ji94WUJi2KG8hkA,5955
-ipex_llm/ggml/quantize.py,sha256=Cvk1R771rRDhSW7BRWcmb4ImY6TWDl_u9Vkdh7rYSuM,6367
+ipex_llm/ggml/quantize.py,sha256=3RQvkCvYz6rTRXAaXMK854JA6g1d8uq5JXZ7OZcj1eg,6490
 ipex_llm/ggml/model/__init__.py,sha256=T-EbRT6GJ_8RCu-iLmSzcftOimXSPQf2d5X72AUAy2Y,874
 ipex_llm/ggml/model/bloom/__init__.py,sha256=291QHI19FMw7Z1oaKBAf2YJ0M51iYqWC4IT1ejI-OGg,900
 ipex_llm/ggml/model/bloom/bloom.py,sha256=fUxgZd_Uc4RXaMC_naYdjekwNprM1TpURmQ8VbocShc,17975
@@ -41,35 +41,35 @@ ipex_llm/langchain/llms/transformerspipelinellm.py,sha256=vm522YPPwWxxAPVvQBtxRf
 ipex_llm/langchain/vllm/__init__.py,sha256=T-EbRT6GJ_8RCu-iLmSzcftOimXSPQf2d5X72AUAy2Y,874
 ipex_llm/langchain/vllm/vllm.py,sha256=6dxc-ZISZQrJilEa_HA827l75Dv9rcHpY_G6FdJ8BVs,7793
 ipex_llm/libs/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
-ipex_llm/libs/bloom-api.dll,sha256=2iiONXdYbNVFXpjWnSg6ALdpUC_vXYDpDZ3kypbZkMI,36352
-ipex_llm/libs/bloom.dll,sha256=ef7cAOoa7FMrArAntBQcZ-cXdQgwkPk1Ewb8tEjNMAU,507904
-ipex_llm/libs/gptneox-api.dll,sha256=xV_cwbAdOJgbaXBeKHYPFKST91PvIubVFKPIju3dLqM,24576
-ipex_llm/libs/gptneox.dll,sha256=byWmqMhwZkeFHbSuq6uAtT3pNw3CFtJ9-EZki5qxmok,568320
-ipex_llm/libs/libbloom_avx.dll,sha256=RwPN6SlxgYSfV_IoBR0057GEAqn9qFah391CFf9_6Oc,536576
-ipex_llm/libs/libbloom_vnni.dll,sha256=CSlK6fIHt_LWUNXSDcmr07Oc3N5-I_X83v9WRXQdTMY,508416
-ipex_llm/libs/libgptneox_avx.dll,sha256=ewwZC9V6SVtqWURxBi6Ci-oMYaaC5493RNBsY2WLp8w,596992
-ipex_llm/libs/libgptneox_vnni.dll,sha256=q9isGSvnkBc0WlRj2VvseTGTSwPXZ7lKnyivodAe4DA,568832
-ipex_llm/libs/libllama_avx.dll,sha256=rA-alWTcLOP7-fHg3Ei6UxvFi4vZ2jSKTilexk0-lRU,591360
-ipex_llm/libs/libllama_vnni.dll,sha256=5TVLhryFFizH1Q-1Lt8fmwF8U2cWPHLTer1SAm-8w_k,563200
-ipex_llm/libs/libstarcoder_avx.dll,sha256=CD3s7jzP25G-pOSqZs1nE8t4ONHzHQxQflqqeKtmGBU,627712
-ipex_llm/libs/libstarcoder_vnni.dll,sha256=elHNYTRjU4Q8-IiGUKZu2AjaXpoFnCRRttSDu3fDQeE,599552
-ipex_llm/libs/llama-api.dll,sha256=pn160dmbwa7bUgA1A1rSjCNLY80GVsQlkRlPEiDe-IM,25600
-ipex_llm/libs/llama.dll,sha256=CQccqnFLy28phB3JLUwUF40FNJTC2eFaSFaam0V76bA,562688
-ipex_llm/libs/main-bloom.exe,sha256=5kZPqipliB4_a_stE1yEKD-xpMLgi0qqS-odEnR1arU,103424
-ipex_llm/libs/main-gptneox.exe,sha256=RtSnKro6TYvCxT4ibRVVqTicIQ8l1uEvdh5qLPwKrAg,98816
-ipex_llm/libs/main-llama.exe,sha256=WCAD0IsQqpB7kE4MZ-Rve0v044M1uUPEvaNwiVL8Ww8,99840
-ipex_llm/libs/main-starcoder.exe,sha256=klpzDUV0n7AKrVdMg3wvkOb5BNvZBb7ubU0FBLioYzA,157696
-ipex_llm/libs/pipeline.dll,sha256=H04OpIP5tsRZX_pxzlTN9fOhQeCfRy3VYiorrqu9-fA,73216
-ipex_llm/libs/quantize-bloom.exe,sha256=BTCZTMYlQ1h8_r2qwGa3haYmZ43KiVp0CLjAlzi4a6E,126464
-ipex_llm/libs/quantize-bloom_vnni.exe,sha256=eWeX0oiA7AeEh0uPEye-dHxutkp5bJvY78Kcc9DC0Mg,128000
-ipex_llm/libs/quantize-gptneox.exe,sha256=lZaClPPwAzgZ2CG1BO3nJQgLAfiNFVGEiHAVfLTgkIs,104448
-ipex_llm/libs/quantize-gptneox_vnni.exe,sha256=ZV2tctKKCtlX-SEfLzpVIKJPz-3xXRPX2PEpR8Xab08,104960
-ipex_llm/libs/quantize-llama.exe,sha256=pYHtPC90SMF6667uN4291-5PMCXI5fHqhaG6AAYaw8g,110080
-ipex_llm/libs/quantize-llama_vnni.exe,sha256=pMlorWCDsD-rzeewNAH-P7NifwW7Fcu-grPJwvqw1Yo,110592
-ipex_llm/libs/quantize-starcoder.exe,sha256=1PZhBJcP3fzeGeZ9NXV53UkpWO287HgE5zaNK4zfuEM,127488
-ipex_llm/libs/quantize-starcoder_vnni.exe,sha256=VX7MpJ--Pn-8BbFLv-C773eNXtzYQsXlg5KZVZxcsaE,128512
-ipex_llm/libs/starcoder-api.dll,sha256=0vCKcVFlGiBtDsubvgOkWHYNS57oMIj-3Zp5_ZThsvo,21504
-ipex_llm/libs/starcoder.dll,sha256=CT2jIqGrkRHQwo7WNBme0TNTBjvpJl518jlKVe68wpg,599040
+ipex_llm/libs/bloom-api.dll,sha256=C2Gy5HeAYJzx9slLgEXvTEtYSbnF0NecHsyKCKR8to4,36352
+ipex_llm/libs/bloom.dll,sha256=YYDzX4Z5hnTQyZjFXdzZoFIkplvpxwT56PrXiQnNZcE,507904
+ipex_llm/libs/gptneox-api.dll,sha256=AeWoztSA6tms5sqE4daiExLgBwzC9pKDW36U8Wy38a4,24576
+ipex_llm/libs/gptneox.dll,sha256=VihlfvAOtf32NqVl59LsIioACyt0q5YNXQMsGfez92o,568320
+ipex_llm/libs/libbloom_avx.dll,sha256=oYoPfOZ8qtCcelb9IUPEdK958_EjLqULMn0UvH9p0UM,536576
+ipex_llm/libs/libbloom_vnni.dll,sha256=RSGJVyxMpNWML4evaCVoPQFijungzIAj67FPPaJKYKQ,508416
+ipex_llm/libs/libgptneox_avx.dll,sha256=ZLH-nd2MEokE52jTNjNIGzQ6mSVlp4dRnkyXTNT8VPk,596992
+ipex_llm/libs/libgptneox_vnni.dll,sha256=j4YHCGvcyR78WZ70F9EsR9QWhK5ztOI6IgaPlo1GXI8,568832
+ipex_llm/libs/libllama_avx.dll,sha256=2jGixTIolwhmc_7H1Apm2Hwoq_yyHE4emejSFVoUq78,591360
+ipex_llm/libs/libllama_vnni.dll,sha256=8LAx0h40W4F4caV6ADFgQRc4tVfnLLPC6owjg6lZLlY,563200
+ipex_llm/libs/libstarcoder_avx.dll,sha256=bOyDjdRFnpPFmoL7bEaqE1nE_m3J_HtUJkAO5vZPM_I,627712
+ipex_llm/libs/libstarcoder_vnni.dll,sha256=CHWEQ-ApRnHc1LovlXwjO63UzfhRKCnTrUOhHxmY65o,599552
+ipex_llm/libs/llama-api.dll,sha256=Hd_JfzQjtNfd0fnnNFLO-UHo-exlFxMoxW2NgV7y1co,25600
+ipex_llm/libs/llama.dll,sha256=mn2QSGZ6IY6Z5G_m-qNO_wrwOy3eQJ1AlQ4roNpu3PE,562688
+ipex_llm/libs/main-bloom.exe,sha256=cHNHHlKQEzrHgm0QpYqjwhuBFZ6huMjbdRurTJtRtB8,103424
+ipex_llm/libs/main-gptneox.exe,sha256=MpiUlvrfmLtWbcBfEmfisU8T5FbxXD0Uk2acx7yACig,98816
+ipex_llm/libs/main-llama.exe,sha256=nqBLLBVjNnZCh72KL3XmmS4tTKiKuiTFB-vxPSFURpE,99840
+ipex_llm/libs/main-starcoder.exe,sha256=aDOKY_AhaqAhAgMJXmbAF1aIv6Pow6-mnW65R5ojwcc,157696
+ipex_llm/libs/pipeline.dll,sha256=y5x6scPUEi-8pV2SqCo6_x87WGxV_jxwRRMvLU4XoLU,73216
+ipex_llm/libs/quantize-bloom.exe,sha256=7dghzsKCkrchRMhme5PabOkuBvkOIAV191T_6Eo3FVA,126464
+ipex_llm/libs/quantize-bloom_vnni.exe,sha256=ioOn79_l1u7m1QUpvb0DWhQIVdoBdXMxAsA9mDKn_W8,128000
+ipex_llm/libs/quantize-gptneox.exe,sha256=6_wZBbgeaYie8T7LL37nh9mOCOyLc4sDi8T0oFZgFG0,104448
+ipex_llm/libs/quantize-gptneox_vnni.exe,sha256=ZJXaAOygZRYS8efiPFc2iJ42BWKeJAQ21Gyb_qYCPAM,104960
+ipex_llm/libs/quantize-llama.exe,sha256=VRiSH4HzV8laqb2EIi6DJ07XMVy4J-N8Mx2gf8wk1Ww,110080
+ipex_llm/libs/quantize-llama_vnni.exe,sha256=kW2sdD6eRXS6IkYXQkZb0fdCHrJs_voUVlqy3pz1xMs,110592
+ipex_llm/libs/quantize-starcoder.exe,sha256=HXD4f_muhQnlqzbFpeHNbvPPf0qXk3D69bPhFps6t7I,127488
+ipex_llm/libs/quantize-starcoder_vnni.exe,sha256=oCK-30YAEKF0DkwR1to8F97LzvWuPNeiEKqtfJ0yKCg,128512
+ipex_llm/libs/starcoder-api.dll,sha256=b8v58Pgz-gxyqSDo670JIZs7EadA2MymaARbmSFfYlk,21504
+ipex_llm/libs/starcoder.dll,sha256=IFRyTp4l9XfRgaB5yVik1EM2qHgpTEg3GQVNSciScXI,599040
 ipex_llm/llamaindex/__init__.py,sha256=T-EbRT6GJ_8RCu-iLmSzcftOimXSPQf2d5X72AUAy2Y,874
 ipex_llm/llamaindex/llms/__init__.py,sha256=KP1lEdGqDuxPoxL1ZSH25Pm2kKMPJBWUTLR0ckSLMIU,1139
 ipex_llm/llamaindex/llms/bigdlllm.py,sha256=FQBzq1KOjfc6uofTXAha3O7TqpJkNfOFepXQmOVlbnI,26314
@@ -94,7 +94,7 @@ ipex_llm/transformers/kv.py,sha256=src_HcVDKFwQ1V8hdTrFQw5RIwUewM9VOR47GVTPJG4,2
 ipex_llm/transformers/lisa.py,sha256=F5WxbtXQ7RdKulj83h_2DnEIgKiKGZf7zvOmg6QBl2s,3289
 ipex_llm/transformers/loader.py,sha256=c9qfJSC6-in-mkd-iKb1igk3nHWUYS3QtyH2cOazmKc,6825
 ipex_llm/transformers/lookup.py,sha256=b6OlZ9OV10R9qeWw8mVryVpDxszkjwLkldvi7GPMJY8,19614
-ipex_llm/transformers/low_bit_linear.py,sha256=f47v3w3DUG0G65RawgiL5y9N8l_GRRz6uaCSTMga2zM,39281
+ipex_llm/transformers/low_bit_linear.py,sha256=03TMG4GZsgRPvchQC2h7eMU9IQ9XCyVcdh3Pvi7_Rew,41550
 ipex_llm/transformers/model.py,sha256=tWTzKsCz8A1P5gYEeG9KZgpxQgbP9hQ-TWAdkebA6Jg,40886
 ipex_llm/transformers/modelling_bigdl.py,sha256=7JpNVMuyq_OmtNUaMFMXdxPWZp2q0QHC02QeA-VTPOw,6709
 ipex_llm/transformers/npu_model.py,sha256=X8mdY6N9TYlxG41wmFloX44ZUjyitFzdKbhzO7TToFY,40309
@@ -253,16 +253,16 @@ ipex_llm/vllm/cpu/entrypoints/openai/cli_args.py,sha256=hB398yYtKauASRzevctScdbF
 ipex_llm/vllm/xpu/__init__.py,sha256=zBSG6nzrVF5QnpR6_f7kPhBFeowTE9gaZ7D5m98E7_w,585
 ipex_llm/vllm/xpu/ipex_llm_v1_wrapper.py,sha256=pd939vFomKIg9Qn2NO4u0OF6hPgvQpqcfJSxqBzcqhA,825
 ipex_llm/vllm/xpu/ipex_llm_wrapper.py,sha256=_CbhvBuf_KPnmLfngYKtJl5gPAHVsG2mWth3wSeaH3M,892
-ipex_llm/vllm/xpu/model_convert.py,sha256=oedafTsnysTi78PGYcjn1w5rnIBfBx4_mpZp2fF6z44,10093
+ipex_llm/vllm/xpu/model_convert.py,sha256=HZeTrQHMYfgXlz1b9KiKdAUZ57nLgpv6VhM5CkiSrUc,10416
 ipex_llm/vllm/xpu/engine/__init__.py,sha256=sOvwLx_Zj0jiRCGj9W3DgGTfcSU3hABYhgIQI7T6cxU,879
 ipex_llm/vllm/xpu/engine/engine.py,sha256=XAprw7VifjfnR915TZOaKcxe3QCFsVBgxzS8qOdn1yg,14462
 ipex_llm/vllm/xpu/entrypoints/openai/api_server.py,sha256=VlmS56hBHBZTIZ5Jhvb4TZN-h28O7uMn33hX8NiJXKk,45719
 ipex_llm/vllm/xpu/entrypoints/openai/cli_args.py,sha256=hB398yYtKauASRzevctScdbFIjiiSGMAe1bwEuIHrhY,10893
-ipex_llm-2.3.0b20250603.data/scripts/ipex-llm-init.bat,sha256=HPtCYuDYwEatq7dAwOvdfVcHYCpAVdbj75K1qh0vQek,2578
-ipex_llm-2.3.0b20250603.data/scripts/llm-chat.ps1,sha256=6qrs-hGVAV8IKh7Jx8nq_XrnZcjd7qGU5wndArM7Yag,2769
-ipex_llm-2.3.0b20250603.data/scripts/llm-cli.ps1,sha256=3qBtTLs_EjYDnM8YyCpJhzLnGCKTEGssu9UNqfkjVXs,3009
-ipex_llm-2.3.0b20250603.dist-info/METADATA,sha256=ksoQDNkxXOZiquKGwSnn-LS4DfdIbFQ9mCbCzDg4AH4,8865
-ipex_llm-2.3.0b20250603.dist-info/WHEEL,sha256=6iYPr8vTHsyDK75jr9X0V3I9wPSVmtwr_8fdATBciGk,98
-ipex_llm-2.3.0b20250603.dist-info/entry_points.txt,sha256=TiUyBB2MRmfF3ko-pyAEzqeBCRnyhu27bNOAsWPp3e8,61
-ipex_llm-2.3.0b20250603.dist-info/top_level.txt,sha256=CGCMHM-SyqUabU4h8RqJ2KTYckQUO3LvIWwmUQ6Qbzw,9
-ipex_llm-2.3.0b20250603.dist-info/RECORD,,
+ipex_llm-2.3.0b20250605.data/scripts/ipex-llm-init.bat,sha256=HPtCYuDYwEatq7dAwOvdfVcHYCpAVdbj75K1qh0vQek,2578
+ipex_llm-2.3.0b20250605.data/scripts/llm-chat.ps1,sha256=6qrs-hGVAV8IKh7Jx8nq_XrnZcjd7qGU5wndArM7Yag,2769
+ipex_llm-2.3.0b20250605.data/scripts/llm-cli.ps1,sha256=3qBtTLs_EjYDnM8YyCpJhzLnGCKTEGssu9UNqfkjVXs,3009
+ipex_llm-2.3.0b20250605.dist-info/METADATA,sha256=2CommuodTS_N2xOt4lnyS-8ZP0vWAMSEBYQOElEuSjk,8865
+ipex_llm-2.3.0b20250605.dist-info/WHEEL,sha256=6iYPr8vTHsyDK75jr9X0V3I9wPSVmtwr_8fdATBciGk,98
+ipex_llm-2.3.0b20250605.dist-info/entry_points.txt,sha256=TiUyBB2MRmfF3ko-pyAEzqeBCRnyhu27bNOAsWPp3e8,61
+ipex_llm-2.3.0b20250605.dist-info/top_level.txt,sha256=CGCMHM-SyqUabU4h8RqJ2KTYckQUO3LvIWwmUQ6Qbzw,9
+ipex_llm-2.3.0b20250605.dist-info/RECORD,,

{ipex_llm-2.3.0b20250603.data → ipex_llm-2.3.0b20250605.data}/scripts/ipex-llm-init.bat RENAMED Viewed

File without changes

{ipex_llm-2.3.0b20250603.data → ipex_llm-2.3.0b20250605.data}/scripts/llm-chat.ps1 RENAMED Viewed

File without changes

{ipex_llm-2.3.0b20250603.data → ipex_llm-2.3.0b20250605.data}/scripts/llm-cli.ps1 RENAMED Viewed

File without changes

{ipex_llm-2.3.0b20250603.dist-info → ipex_llm-2.3.0b20250605.dist-info}/WHEEL RENAMED Viewed

File without changes

{ipex_llm-2.3.0b20250603.dist-info → ipex_llm-2.3.0b20250605.dist-info}/entry_points.txt RENAMED Viewed

File without changes

{ipex_llm-2.3.0b20250603.dist-info → ipex_llm-2.3.0b20250605.dist-info}/top_level.txt RENAMED Viewed

File without changes