PyPI - ipex-llm - Versions diffs - 2.2.0b20250106__py3-none-manylinux2010_x86_64.whl → 2.2.0b20250106.post1__py3-none-manylinux2010_x86_64.whl - Mend

ipex-llm 2.2.0b20250106__py3-none-manylinux2010_x86_64.whl → 2.2.0b20250106.post1__py3-none-manylinux2010_x86_64.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (24) hide show

ipex_llm/transformers/models/utils.py CHANGED Viewed

@@ -19,7 +19,7 @@ import torch
 import warnings
 from ipex_llm.utils.common import invalidInputError
 from ipex_llm.ggml.quantize import ggml_tensor_qtype
-from ipex_llm.transformers.utils import get_ipex_version, get_xpu_device_type
+from ipex_llm.transformers.utils import get_ipex_version, get_xpu_device_name
 from ipex_llm.transformers.low_bit_linear import SYM_INT4, SYM_INT8, FP8E5, IQ2_XXS, FP4, FP8E4,\
     FP6, ASYM_INT4
@@ -85,16 +85,14 @@ def use_quantize_kv_cache(linear: torch.nn.Module, x: torch.Tensor, kv_group: in
         return os.environ["IPEX_LLM_QUANTIZE_KV_CACHE"] == "1"
     elif os.environ.get("IPEX_LLM_LOW_MEM", None) is not None:
         return os.environ["IPEX_LLM_LOW_MEM"] == "1"
+    elif linear.qtype in [ggml_tensor_qtype["fp16"], ggml_tensor_qtype["bf16"]]:
+        return False
     else:
-        return x.device.type == 'xpu' and kv_cache_device_check(x, kv_group) \
-            and hasattr(linear, "qtype") and \
-            linear.qtype != ggml_tensor_qtype["fp16"] and linear.qtype != ggml_tensor_qtype["bf16"]
-def kv_cache_device_check(x: torch.Tensor, kv_group: int) -> bool:
-    return (get_xpu_device_type(x) in ["mtl", "lnl"] and kv_group <= 1) or \
-        ((get_xpu_device_type(x) == "arc" or get_xpu_device_type(x) == "flex") and
-            1 < x.size(0) and x.size(0) <= 8)
+        device_name = get_xpu_device_name(x.device)
+        return (
+            device_name in ["mtl", "lnl", "arl"] and kv_group == 1
+            or device_name in ["arc", "bmg"] and x.size(0) > 1
+        )
 def init_fp8_kv_cache(batch_size, num_heads, current_length, head_dim, device):
@@ -170,7 +168,7 @@ def should_use_fuse_rope(hidden_states, position_ids, training):
 def apply_rotary_pos_emb(q, k, cos, sin, position_ids, model_family):
     if model_family in ["llama", "baichuan", "internlm", "aquila", "gpt_neox", "mistral",
-                        "mixtral", "qwen2", "yuan", "stablelm", "qwen2_moe"]:
+                        "qwen2", "yuan", "stablelm", "qwen2_moe"]:
         # The first two dimensions of cos and sin are always 1, so we can `squeeze` them.
         cos = cos.squeeze(1).squeeze(0)  # [seq_len, dim]
         sin = sin.squeeze(1).squeeze(0)  # [seq_len, dim]
@@ -185,7 +183,7 @@ def apply_rotary_pos_emb(q, k, cos, sin, position_ids, model_family):
         q_embed = (q * cos) + (rotate_half(q) * sin)
         k_embed = (k * cos) + (rotate_half(k) * sin)
         return q_embed, k_embed
-    elif model_family in ["gptj", "chatglm"]:
+    elif model_family in ["chatglm"]:
         q_embed = (q * cos) + (rotate_every_two(q) * sin)
         k_embed = (k * cos) + (rotate_every_two(k) * sin)
         return q_embed, k_embed
@@ -194,19 +192,6 @@ def apply_rotary_pos_emb(q, k, cos, sin, position_ids, model_family):
                           f"{model_family} is not supported.")
-def apply_ipex_rotate_every_two(q, k, cos, sin):
-    # ipex's apply_rotary_embedding_two_qk can change the origin storage,
-    # so q/k will get the result directly.
-    from ipex_llm.transformers.utils import get_ipex_version
-    if get_ipex_version() >= "2.1.10+xpu":
-        torch.ops.torch_ipex.apply_rotary_embedding_two_qk(
-            q, k, sin, cos, q, k
-        )
-    else:
-        torch.ops.torch_ipex.apply_rotary_embedding(q, sin, cos, q)
-        torch.ops.torch_ipex.apply_rotary_embedding(k, sin, cos, k)
 def is_enough_kv_cache_room_4_36(past_key_value, idx, seq_len=1):
     # to determinate if is enough kv cache room in transformers==4.36
     # seq_len for current seq len
@@ -226,57 +211,6 @@ def is_enough_kv_cache_room_4_31(past_key_value, seq_len=1):
         (past_key_value[0].size(2) + seq_len) * past_key_value[0].size(3)
-def use_flash_attention(query, key, attention_mask=None):
-    # here we support query's shape is always [batch_size, head_num, q_len, head_dim],
-    # key's shape is always [batch_size, head_num, k_len, head_dim]
-    invalidInputError(query.dim() == 4,
-                      "Here query input of use_flash_attention should be [batch_size, "
-                      "head_num, q_len, head_dim]")
-    invalidInputError(key.dim() == 4,
-                      "Here key input of use_flash_attention should be [batch_size, "
-                      "head_num, k_len, head_dim]")
-    bsz, _, q_len, _ = query.size()
-    k_len = key.size()[2]
-    # check whether ipex flash attention can be used
-    if q_len != k_len:
-        # now only use flash attention for first token
-        # as it seems have no performance benifit for rest token now
-        return False
-    if query.device.type != "xpu":
-        # ipex flash attention only support for xpu
-        return False
-    ipex_version = get_ipex_version()
-    if ipex_version <= "2.0.110+xpu":
-        # ipex flash attention is supported from ipex 2.1
-        return False
-    if not torch.xpu.has_xetla():
-        # ipex flash attention is only supported for xetla
-        # may update this later
-        return False
-    elif get_xpu_device_type(query) != "pvc":
-        return False
-    if query.dtype not in [torch.float32, torch.float16]:
-        # only use flash attention for fp32/fp16 input
-        return False
-    if bsz > 1:
-        # as flash attention doesn't support attn_mask in ipex 2.1,
-        # so it will cause output error for padded batch input
-        if attention_mask is None:
-            return True
-        else:
-            # TODO: below logic may change for different model
-            # attention mask shape : [bsz, 1, q_len, k_len]
-            if attention_mask[0].squeeze()[0, 0].item() != 0:
-                # first batch contains padding
-                # otherwise we suppose it should be a upper triangular matrix
-                # at the same time, the diagonal is also 0
-                return False
-            elif not attention_mask.equal(attention_mask[0].repeat(bsz, 1, 1, 1)):
-                # check whether mask of every batch is the same
-                return False
-    return True
 def use_sdp(q_len, kv_len, head_dim, query_states):
     return (
         query_states.device.type == "xpu"
@@ -315,38 +249,16 @@ def mlp_fusion_check(x, qtype, training):
     if training or x.requires_grad:
         return False
     if qtype == FP6:
-        device = get_xpu_device_type(x)
-        if device in ["mtl", "lnl"]:
+        device = get_xpu_device_name(x.device)
+        if device in ["mtl", "lnl", "arl"]:
             return False
     return True
-def use_decoding_fast_path(proj,
-                           use_fuse_rope,
-                           enough_kv_room,
-                           bs,
-                           qtype_check=decoding_fast_path_qtype_check):
-    if proj is None:
-        return False
-    device = get_xpu_device_type(proj.weight)
-    if not qtype_check(proj):
-        return False
-    if not use_fuse_rope:
-        return False
-    if not enough_kv_room:
-        return False
-    if bs != 1:
-        return False
-    if device in ["uhd"]:
-        return False
-    return True
 def use_xmx(x: torch.Tensor, qtype: int):
-    device = get_xpu_device_type(x)
+    device = get_xpu_device_name(x.device)
     return (
-        device in ["arc", "flex", "pvc"]
+        device in ["arc", "pvc"]
         and qtype in [SYM_INT4, SYM_INT8, FP8E4, FP8E5]
         and (
             (device == "pvc" and 1 < x.size(0) <= 16)
@@ -370,7 +282,7 @@ def fp16_fusion_check(proj, x, training):
         return False
     if x.requires_grad:
         return False
-    device_type = get_xpu_device_type(x)
+    device_type = get_xpu_device_name(x.device)
     if device_type != "pvc":
         return False
     return True
@@ -439,7 +351,7 @@ def should_use_compresskv(x: torch.Tensor, prompt_len: int):
     else:
         if use_compress_kv is None:
             return (
-                get_xpu_device_type(x) in ["mtl", "lnl"]
+                get_xpu_device_name(x.device) in ["mtl", "lnl", "arl"]
                 and prompt_len >= 1800
                 and prompt_len <= 4500
             )

ipex_llm/transformers/npu_pipeline_model/convert_pipeline.py CHANGED Viewed

@@ -473,10 +473,6 @@ def convert_llm_for_deploy(model: torch.nn.Module,
                        "n_splits_linear": n_splits_linear,
                        "n_splits_down_proj": n_splits_down_proj,
                        "lm_head_low_bit": lm_head_low_bit}
-        model.config.update(update_dict)
-        model.config.save_pretrained(save_directory)
-        if model.can_generate():
-            model.generation_config.save_pretrained(save_directory)
         from .qwen import convert_qwen_layer, convert_fused_qwen_layer
         from .qwen import convert_lm_head_and_embedding
@@ -537,8 +533,6 @@ def convert_llm_for_deploy(model: torch.nn.Module,
                        "n_splits_linear": n_splits_linear,
                        "n_splits_down_proj": n_splits_down_proj,
                        "lm_head_low_bit": lm_head_low_bit}
-        model.config.update(update_dict)
-        model.config.save_pretrained(save_directory)
         from .llama import convert_llama_layer, convert_fused_llama_layer
         from .llama import convert_lm_head_and_embedding
@@ -577,8 +571,6 @@ def convert_llm_for_deploy(model: torch.nn.Module,
                        "n_splits_linear": n_splits_linear,
                        "n_splits_down_proj": n_splits_down_proj,
                        "lm_head_low_bit": lm_head_low_bit}
-        model.config.update(update_dict)
-        model.config.save_pretrained(save_directory)
         from .minicpm import convert_minicpm_layer, convert_fused_minicpm_layer
         from .minicpm import convert_lm_head_and_embedding
@@ -595,3 +587,8 @@ def convert_llm_for_deploy(model: torch.nn.Module,
                                       save_directory, weight_dir,
                                       convert_model=True,
                                       max_prompt_len=max_prompt_len)
+    model.config.update(update_dict)
+    model.config.save_pretrained(save_directory)
+    if model.can_generate():
+        model.generation_config.save_pretrained(save_directory)

ipex_llm/transformers/speculative.py CHANGED Viewed

@@ -432,8 +432,7 @@ def _check_and_extend_kv_cache(past_key_values, max_step_draft, kv_alloc_block_l
     from ipex_llm.transformers.models.utils import is_enough_kv_cache_room_4_31, \
         extend_kv_cache
     enough_kv_room = True
-    if model_type not in ["chatglm", "qwen", "baichuan", "llama", "mistral",
-                          "gptj", "opt"]:
+    if model_type not in ["chatglm", "qwen", "baichuan", "llama", "mistral", "opt"]:
         return past_key_values, False
     cache_k = past_key_values[0][0]
     if model_type == "chatglm":
@@ -527,7 +526,7 @@ def _crop_past_key_values(self, past_key_values, new_cache_size, _enable_ipex=Fa
                         v[:-(new_cache_size), :, :, :])
                     for k, v in past_key_values
                 ]
-        elif self.config.model_type in ["baichuan", "gptj"]:
+        elif self.config.model_type in ["baichuan"]:
             past_key_values = [
                 (k[:, :, :-(new_cache_size), :],
                     v[:, :, :-(new_cache_size), :])
@@ -796,13 +795,6 @@ def _non_cpu_ipex_verify(self, verify_input_ids, past_key_values, cur_attention_
                                     device=verify_input_ids.device)
         position_ids = position_ids.unsqueeze(0).repeat(1, 1) + past_key_value_len
         forward_args["position_ids"] = position_ids
-    elif self.config.model_type == "gptj":
-        past_length = past_key_values[0][0].size(2)
-        input_len = verify_input_ids.shape[1]
-        position_ids = torch.arange(past_length, input_len + past_length,
-                                    dtype=torch.long, device=verify_input_ids.device)
-        position_ids = position_ids.unsqueeze(0).view(-1, input_len)
-        forward_args["position_ids"] = position_ids
     return self(**forward_args)
@@ -971,10 +963,6 @@ def speculative_generate(self,
                         past_key_value_len = past_key_values[0][0].shape[0]
                     position_ids = torch.Tensor([[past_key_value_len + step_draft]]).long()
                     forward_args["position_ids"] = position_ids
-                elif self.config.model_type == "gptj":
-                    past_length = draft_past_key_values[0][0].size(2)
-                    position_ids = torch.Tensor([[past_length]]).long().to(self.device)
-                    forward_args["position_ids"] = position_ids
                 if _enable_ipex:
                     if any(keyword in self.config.model_type

ipex_llm/transformers/utils.py CHANGED Viewed

@@ -168,27 +168,14 @@ def get_ipex_version():
     return _ipex_version
-def get_xpu_device_type(x):
-    if x.device.type != "xpu":
-        return x.device.type
-    name = torch.xpu.get_device_name(x.device.index)
-    if name.startswith("Intel(R) Arc(TM) A"):
-        return "arc"
-    elif name.startswith("Intel(R) Graphics [0xe20b]"):
-        return "bmg"
-    elif name.startswith("Intel(R) Arc(TM)"):
-        if 'V' in name:
-            return "lnl"
-        else:
-            return "mtl"
-    elif name.startswith("Intel(R) Data Center GPU Flex"):
-        return "flex"
-    elif name.startswith("Intel(R) Data Center GPU Max"):
-        return "pvc"
-    elif name.startswith("Intel(R) UHD"):
-        return "uhd"
+def get_xpu_device_name(device: torch.device):
+    if device.type != "xpu":
+        return device.type
     else:
-        return "others"
+        # possiable device name:
+        # ["arc", "pvc", "mtl", "lnl", "bmg", "arl", "legacy", "unknown"]
+        import xe_linear
+        return xe_linear.get_xpu_device_name(device)
 def load_imatrix_data(imatrix_file):

{ipex_llm-2.2.0b20250106.dist-info → ipex_llm-2.2.0b20250106.post1.dist-info}/METADATA RENAMED Viewed

@@ -1,6 +1,6 @@
 Metadata-Version: 2.1
 Name: ipex-llm
-Version: 2.2.0b20250106
+Version: 2.2.0b20250106.post1
 Summary: Large Language Model Develop Toolkit
 Home-page: https://github.com/intel-analytics/ipex-llm
 Author: BigDL Authors
@@ -27,15 +27,17 @@ Requires-Dist: intel-openmp ; (platform_machine == "x86_64" or platform_machine
 Requires-Dist: torch ==2.1.2+cpu ; (platform_system == "Linux") and extra == 'all'
 Requires-Dist: torch ==2.1.2 ; (platform_system == "Windows") and extra == 'all'
 Provides-Extra: cpp
-Requires-Dist: bigdl-core-cpp ==2.6.0b20250106 ; extra == 'cpp'
+Requires-Dist: bigdl-core-cpp ==2.6.0b20250106.post1 ; extra == 'cpp'
 Requires-Dist: setuptools ; extra == 'cpp'
 Provides-Extra: cpp-arl
-Requires-Dist: bigdl-core-cpp ==2.6.0b20250106 ; extra == 'cpp-arl'
+Requires-Dist: bigdl-core-cpp ==2.6.0b20250106.post1 ; extra == 'cpp-arl'
 Requires-Dist: setuptools ; extra == 'cpp-arl'
 Requires-Dist: onednn-devel ==2024.1.1 ; (platform_system == "Windows") and extra == 'cpp-arl'
+Requires-Dist: onednn ==2024.1.1 ; (platform_system == "Windows") and extra == 'cpp-arl'
 Requires-Dist: dpcpp-cpp-rt ==2024.2.1 ; (platform_system == "Windows") and extra == 'cpp-arl'
 Requires-Dist: mkl-dpcpp ==2024.2.1 ; (platform_system == "Windows") and extra == 'cpp-arl'
 Requires-Dist: onednn-devel ==2024.2.1 ; (platform_system == "Windows") and extra == 'cpp'
+Requires-Dist: onednn ==2024.2.1 ; (platform_system == "Windows") and extra == 'cpp'
 Requires-Dist: dpcpp-cpp-rt ==2024.2.1 ; (platform_system == "Windows") and extra == 'cpp'
 Requires-Dist: mkl-dpcpp ==2024.2.1 ; (platform_system == "Windows") and extra == 'cpp'
 Provides-Extra: llama-index
@@ -65,7 +67,7 @@ Requires-Dist: transformers ==4.40.0 ; extra == 'npu'
 Requires-Dist: intel-openmp ; (platform_machine == "x86_64" or platform_machine == "AMD64") and extra == 'npu'
 Requires-Dist: torch ==2.1.2+cpu ; (platform_system == "Linux") and extra == 'npu'
 Requires-Dist: torch ==2.1.2 ; (platform_system == "Windows") and extra == 'npu'
-Requires-Dist: bigdl-core-npu ==2.6.0b20250106 ; (platform_system == "Windows") and extra == 'npu'
+Requires-Dist: bigdl-core-npu ==2.6.0b20250106.post1 ; (platform_system == "Windows") and extra == 'npu'
 Provides-Extra: serving
 Requires-Dist: py-cpuinfo ; extra == 'serving'
 Requires-Dist: fschat[model_worker,webui] ==0.2.36 ; extra == 'serving'
@@ -85,9 +87,9 @@ Requires-Dist: setuptools <70.0.0 ; extra == 'xpu'
 Requires-Dist: torch ==2.1.0a0 ; extra == 'xpu'
 Requires-Dist: torchvision ==0.16.0a0 ; extra == 'xpu'
 Requires-Dist: intel-extension-for-pytorch ==2.1.10+xpu ; extra == 'xpu'
-Requires-Dist: bigdl-core-xe-21 ==2.6.0b20250106 ; extra == 'xpu'
-Requires-Dist: bigdl-core-xe-batch-21 ==2.6.0b20250106 ; extra == 'xpu'
-Requires-Dist: bigdl-core-xe-addons-21 ==2.6.0b20250106 ; extra == 'xpu'
+Requires-Dist: bigdl-core-xe-21 ==2.6.0b20250106.post1 ; extra == 'xpu'
+Requires-Dist: bigdl-core-xe-batch-21 ==2.6.0b20250106.post1 ; extra == 'xpu'
+Requires-Dist: bigdl-core-xe-addons-21 ==2.6.0b20250106.post1 ; extra == 'xpu'
 Provides-Extra: xpu-2-1
 Requires-Dist: py-cpuinfo ; extra == 'xpu-2-1'
 Requires-Dist: protobuf ; extra == 'xpu-2-1'
@@ -102,12 +104,28 @@ Requires-Dist: setuptools <70.0.0 ; extra == 'xpu-2-1'
 Requires-Dist: torch ==2.1.0a0 ; extra == 'xpu-2-1'
 Requires-Dist: torchvision ==0.16.0a0 ; extra == 'xpu-2-1'
 Requires-Dist: intel-extension-for-pytorch ==2.1.10+xpu ; extra == 'xpu-2-1'
-Requires-Dist: bigdl-core-xe-21 ==2.6.0b20250106 ; extra == 'xpu-2-1'
-Requires-Dist: bigdl-core-xe-batch-21 ==2.6.0b20250106 ; extra == 'xpu-2-1'
-Requires-Dist: bigdl-core-xe-addons-21 ==2.6.0b20250106 ; extra == 'xpu-2-1'
+Requires-Dist: bigdl-core-xe-21 ==2.6.0b20250106.post1 ; extra == 'xpu-2-1'
+Requires-Dist: bigdl-core-xe-batch-21 ==2.6.0b20250106.post1 ; extra == 'xpu-2-1'
+Requires-Dist: bigdl-core-xe-addons-21 ==2.6.0b20250106.post1 ; extra == 'xpu-2-1'
 Requires-Dist: intel-openmp ; (platform_machine == "x86_64" or platform_machine == "AMD64") and extra == 'xpu-2-1'
 Requires-Dist: dpcpp-cpp-rt ==2024.0.2 ; (platform_system == "Windows") and extra == 'xpu-2-1'
 Requires-Dist: mkl-dpcpp ==2024.0.0 ; (platform_system == "Windows") and extra == 'xpu-2-1'
+Provides-Extra: xpu-2-6
+Requires-Dist: py-cpuinfo ; extra == 'xpu-2-6'
+Requires-Dist: protobuf ; extra == 'xpu-2-6'
+Requires-Dist: mpmath ==1.3.0 ; extra == 'xpu-2-6'
+Requires-Dist: numpy ==1.26.4 ; extra == 'xpu-2-6'
+Requires-Dist: transformers ==4.37.0 ; extra == 'xpu-2-6'
+Requires-Dist: sentencepiece ; extra == 'xpu-2-6'
+Requires-Dist: tokenizers ==0.15.2 ; extra == 'xpu-2-6'
+Requires-Dist: accelerate ==0.23.0 ; extra == 'xpu-2-6'
+Requires-Dist: tabulate ; extra == 'xpu-2-6'
+Requires-Dist: setuptools ; extra == 'xpu-2-6'
+Requires-Dist: torch ==2.6.0+xpu ; extra == 'xpu-2-6'
+Requires-Dist: torchvision ==0.21.0+xpu ; extra == 'xpu-2-6'
+Requires-Dist: torchaudio ==2.6.0+xpu ; extra == 'xpu-2-6'
+Requires-Dist: bigdl-core-xe-all ==2.6.0b20250106.post1 ; extra == 'xpu-2-6'
+Requires-Dist: intel-openmp ; (platform_machine == "x86_64" or platform_machine == "AMD64") and extra == 'xpu-2-6'
 Provides-Extra: xpu-arc
 Requires-Dist: py-cpuinfo ; extra == 'xpu-arc'
 Requires-Dist: protobuf ; extra == 'xpu-arc'
@@ -119,9 +137,9 @@ Requires-Dist: tokenizers ==0.15.2 ; extra == 'xpu-arc'
 Requires-Dist: accelerate ==0.23.0 ; extra == 'xpu-arc'
 Requires-Dist: tabulate ; extra == 'xpu-arc'
 Requires-Dist: setuptools ; extra == 'xpu-arc'
-Requires-Dist: bigdl-core-xe-23 ==2.6.0b20250106 ; extra == 'xpu-arc'
-Requires-Dist: bigdl-core-xe-batch-23 ==2.6.0b20250106 ; extra == 'xpu-arc'
-Requires-Dist: bigdl-core-xe-addons-23 ==2.6.0b20250106 ; extra == 'xpu-arc'
+Requires-Dist: bigdl-core-xe-23 ==2.6.0b20250106.post1 ; extra == 'xpu-arc'
+Requires-Dist: bigdl-core-xe-batch-23 ==2.6.0b20250106.post1 ; extra == 'xpu-arc'
+Requires-Dist: bigdl-core-xe-addons-23 ==2.6.0b20250106.post1 ; extra == 'xpu-arc'
 Requires-Dist: intel-openmp ; (platform_machine == "x86_64" or platform_machine == "AMD64") and extra == 'xpu-arc'
 Requires-Dist: torch ==2.3.1+cxx11.abi ; (platform_system == "Linux") and extra == 'xpu-arc'
 Requires-Dist: torchvision ==0.18.1+cxx11.abi ; (platform_system == "Linux") and extra == 'xpu-arc'
@@ -130,6 +148,7 @@ Requires-Dist: torch ==2.3.1.post0+cxx11.abi ; (platform_system == "Windows") an
 Requires-Dist: torchvision ==0.18.1.post0+cxx11.abi ; (platform_system == "Windows") and extra == 'xpu-arc'
 Requires-Dist: intel-extension-for-pytorch ==2.3.110.post0+xpu ; (platform_system == "Windows") and extra == 'xpu-arc'
 Requires-Dist: onednn-devel ==2024.1.1 ; (platform_system == "Windows") and extra == 'xpu-arc'
+Requires-Dist: onednn ==2024.1.1 ; (platform_system == "Windows") and extra == 'xpu-arc'
 Provides-Extra: xpu-arl
 Requires-Dist: py-cpuinfo ; extra == 'xpu-arl'
 Requires-Dist: protobuf ; extra == 'xpu-arl'
@@ -141,9 +160,9 @@ Requires-Dist: tokenizers ==0.15.2 ; extra == 'xpu-arl'
 Requires-Dist: accelerate ==0.23.0 ; extra == 'xpu-arl'
 Requires-Dist: tabulate ; extra == 'xpu-arl'
 Requires-Dist: setuptools ; extra == 'xpu-arl'
-Requires-Dist: bigdl-core-xe-23 ==2.6.0b20250106 ; extra == 'xpu-arl'
-Requires-Dist: bigdl-core-xe-batch-23 ==2.6.0b20250106 ; extra == 'xpu-arl'
-Requires-Dist: bigdl-core-xe-addons-23 ==2.6.0b20250106 ; extra == 'xpu-arl'
+Requires-Dist: bigdl-core-xe-23 ==2.6.0b20250106.post1 ; extra == 'xpu-arl'
+Requires-Dist: bigdl-core-xe-batch-23 ==2.6.0b20250106.post1 ; extra == 'xpu-arl'
+Requires-Dist: bigdl-core-xe-addons-23 ==2.6.0b20250106.post1 ; extra == 'xpu-arl'
 Requires-Dist: intel-openmp ; (platform_machine == "x86_64" or platform_machine == "AMD64") and extra == 'xpu-arl'
 Requires-Dist: torch ==2.3.1+cxx11.abi ; (platform_system == "Linux") and extra == 'xpu-arl'
 Requires-Dist: torchvision ==0.18.1+cxx11.abi ; (platform_system == "Linux") and extra == 'xpu-arl'
@@ -152,6 +171,7 @@ Requires-Dist: torch ==2.3.1.post0+cxx11.abi ; (platform_system == "Windows") an
 Requires-Dist: torchvision ==0.18.1.post0+cxx11.abi ; (platform_system == "Windows") and extra == 'xpu-arl'
 Requires-Dist: intel-extension-for-pytorch ==2.3.110.post0+xpu ; (platform_system == "Windows") and extra == 'xpu-arl'
 Requires-Dist: onednn-devel ==2024.1.1 ; (platform_system == "Windows") and extra == 'xpu-arl'
+Requires-Dist: onednn ==2024.1.1 ; (platform_system == "Windows") and extra == 'xpu-arl'
 Provides-Extra: xpu-lnl
 Requires-Dist: py-cpuinfo ; extra == 'xpu-lnl'
 Requires-Dist: protobuf ; extra == 'xpu-lnl'
@@ -163,9 +183,9 @@ Requires-Dist: tokenizers ==0.15.2 ; extra == 'xpu-lnl'
 Requires-Dist: accelerate ==0.23.0 ; extra == 'xpu-lnl'
 Requires-Dist: tabulate ; extra == 'xpu-lnl'
 Requires-Dist: setuptools ; extra == 'xpu-lnl'
-Requires-Dist: bigdl-core-xe-23 ==2.6.0b20250106 ; extra == 'xpu-lnl'
-Requires-Dist: bigdl-core-xe-batch-23 ==2.6.0b20250106 ; extra == 'xpu-lnl'
-Requires-Dist: bigdl-core-xe-addons-23 ==2.6.0b20250106 ; extra == 'xpu-lnl'
+Requires-Dist: bigdl-core-xe-23 ==2.6.0b20250106.post1 ; extra == 'xpu-lnl'
+Requires-Dist: bigdl-core-xe-batch-23 ==2.6.0b20250106.post1 ; extra == 'xpu-lnl'
+Requires-Dist: bigdl-core-xe-addons-23 ==2.6.0b20250106.post1 ; extra == 'xpu-lnl'
 Requires-Dist: intel-openmp ; (platform_machine == "x86_64" or platform_machine == "AMD64") and extra == 'xpu-lnl'
 Requires-Dist: torch ==2.3.1+cxx11.abi ; (platform_system == "Linux") and extra == 'xpu-lnl'
 Requires-Dist: torchvision ==0.18.1+cxx11.abi ; (platform_system == "Linux") and extra == 'xpu-lnl'
@@ -174,6 +194,7 @@ Requires-Dist: torch ==2.3.1.post0+cxx11.abi ; (platform_system == "Windows") an
 Requires-Dist: torchvision ==0.18.1.post0+cxx11.abi ; (platform_system == "Windows") and extra == 'xpu-lnl'
 Requires-Dist: intel-extension-for-pytorch ==2.3.110.post0+xpu ; (platform_system == "Windows") and extra == 'xpu-lnl'
 Requires-Dist: onednn-devel ==2024.1.1 ; (platform_system == "Windows") and extra == 'xpu-lnl'
+Requires-Dist: onednn ==2024.1.1 ; (platform_system == "Windows") and extra == 'xpu-lnl'
 Requires-Dist: intel-openmp ; (platform_machine == "x86_64" or platform_machine == "AMD64") and extra == 'xpu'
 Requires-Dist: dpcpp-cpp-rt ==2024.0.2 ; (platform_system == "Windows") and extra == 'xpu'
 Requires-Dist: mkl-dpcpp ==2024.0.0 ; (platform_system == "Windows") and extra == 'xpu'

{ipex_llm-2.2.0b20250106.dist-info → ipex_llm-2.2.0b20250106.post1.dist-info}/RECORD RENAMED Viewed

@@ -94,25 +94,25 @@ ipex_llm/serving/fastchat/tgi_api_protocol.py,sha256=brT3k3-V0NJrU4fRqUwWjC0O3iO
 ipex_llm/serving/fastchat/tgi_api_server.py,sha256=agNTAEiZPSuj3dEdIdYKwkoY0cXOUDX06DiM9VP2knQ,24418
 ipex_llm/serving/fastchat/vllm_worker.py,sha256=ZLz2Q9GxJO6r_LOiP6epgCRjBGk-K4EB1SNEWSJp5DA,11091
 ipex_llm/transformers/__init__.py,sha256=l4KkMkLe-pRC7b_kj6LCfeifgE-Uo33_Av_FwN9HnFA,1074
-ipex_llm/transformers/convert.py,sha256=2kcaxal7x1ltv_d-qE1r0FXlYNEwlQ2LwAMXLuz-X9Y,106252
+ipex_llm/transformers/convert.py,sha256=TxWdTTOSvh-j5jqokQJVWykta4U4LHupE1QJ-9udzwc,98733
 ipex_llm/transformers/convert_ipex.py,sha256=iKXo0n8fVFTOA2fNYYrByMFK0dovL-kLd2sVDk88AlQ,14334
 ipex_llm/transformers/embedding.py,sha256=bdgk59DvD4ZZyxRzewXOR7g56nThgO6uhIwk8QL7f-s,9299
 ipex_llm/transformers/kv.py,sha256=k4TU18LlA-Sbq9WNNQnfuzu3RSFBwFhmaV3BcGN5bAo,19191
 ipex_llm/transformers/lisa.py,sha256=F5WxbtXQ7RdKulj83h_2DnEIgKiKGZf7zvOmg6QBl2s,3289
-ipex_llm/transformers/loader.py,sha256=cOgX93xOC-4dt01GTJ5wyd7PjZ8S43r4mctkR2YxVuw,6893
-ipex_llm/transformers/lookup.py,sha256=c4ETIha6ZLbWvhcclSKRDdi5Ipuet4mfUnOkBa0E8kk,19607
-ipex_llm/transformers/low_bit_linear.py,sha256=dyyYyCqw0GK8hzaUGanrg-uIhU1HTLEEbvbxXMlm-80,41668
-ipex_llm/transformers/model.py,sha256=KcRjkauGg48BYrUBoUZaVMpg7Piuz5JrfIpVZd3EIjs,41105
+ipex_llm/transformers/loader.py,sha256=AwjV5RpI2t2bedlv7ZhLm8cfd-QJZm5hny-XyjIvdnk,6876
+ipex_llm/transformers/lookup.py,sha256=b6OlZ9OV10R9qeWw8mVryVpDxszkjwLkldvi7GPMJY8,19614
+ipex_llm/transformers/low_bit_linear.py,sha256=lPIvDuRoS0zusiJ6vw_fOTJgK5ylh4CuD3U-qs8ih4Y,40869
+ipex_llm/transformers/model.py,sha256=fj7LBjrWtWwDJJYXnWiXsLGS4ayqqHfnh0p51dSDssE,40908
 ipex_llm/transformers/modelling_bigdl.py,sha256=7JpNVMuyq_OmtNUaMFMXdxPWZp2q0QHC02QeA-VTPOw,6709
 ipex_llm/transformers/npu_model.py,sha256=YW02GeVz-9ZGqxAeSz0AOvciS-17bo9eK5ZOBrICwSQ,39508
 ipex_llm/transformers/patches.py,sha256=halPWm__ORh2fRFSIFPiCNg3LQBfrRkTPtmtRpBJCZQ,1286
 ipex_llm/transformers/pipeline_parallel.py,sha256=uNZpOXljNmdoEYnP8U-VFiN4dRZb2piQbIf2bG9LQnE,49051
 ipex_llm/transformers/qlora.py,sha256=jtPGsvWFjbTUGzDBCdfftnCis_0nJQNRpACSwXUbbGU,14943
 ipex_llm/transformers/relora.py,sha256=-dYzUV0P-IhO2jFdnzN9-v_sFzJpRj3ZwN9eCJzOoCw,16567
-ipex_llm/transformers/speculative.py,sha256=Zf1nQb5GXpJQrUHBTL-H4RUBfdv3lGhfehzudHimhYk,64109
+ipex_llm/transformers/speculative.py,sha256=0XNLgc9dGswJHVPrXo4iM7pPxkWwfFfJMECcivJSnIc,63368
 ipex_llm/transformers/streamer.py,sha256=RrVlLblzCOtABRUpaMXAyaMnCGgLUtAi_YesLumRbww,4842
 ipex_llm/transformers/training_patch.py,sha256=oxMkUtqyvqJiprw6dE3skkYfD1HOmUlH9N0hBkbn0G0,10799
-ipex_llm/transformers/utils.py,sha256=IRzmoRwgCwnOaiDroSyqc2pvlj-ipNvcW8RubvVT_rY,17374
+ipex_llm/transformers/utils.py,sha256=7syzq4jnEo-mWmS9E2VZ2GcFrjojWI8E7Hcx0tloifg,16996
 ipex_llm/transformers/xpu_customize_fwd.py,sha256=wFpIhs5F6tkNs8gBOrLxWdhLzO3EDHovVkERPIAoAvg,7611
 ipex_llm/transformers/xpu_ops.py,sha256=H46-69pMRQhekbAEoDfNacCInLWycMHDqrgMGLvFYfI,4362
 ipex_llm/transformers/awq/__init__.py,sha256=Du5gu3-eeAkeDO_dEMBTzrDBA66DSN3uL3-rn8WGXQw,875
@@ -148,28 +148,24 @@ ipex_llm/transformers/models/baichuan.py,sha256=oJCAEENSG8oQhJ-QPN2SiapARjAGdOM6
 ipex_llm/transformers/models/bert.py,sha256=bJNic2pt1kph0kBwdK5MRGyWupFfx2Ts0V3D1L-5kWo,6085
 ipex_llm/transformers/models/bloom.py,sha256=PxfzyYT-nFn3K5rZhTQjmcEjUUzAhUFzxIN4kzRlCuc,8103
 ipex_llm/transformers/models/chatglm.py,sha256=UHai1t2AUtGmF765_eHF8LUMVQzp_oCBx8TJB21WrHk,12597
-ipex_llm/transformers/models/chatglm2.py,sha256=kfJThuKYb3unAB1XCzfop1iDW1gOkyFOjSr-lEjUdS0,24781
+ipex_llm/transformers/models/chatglm2.py,sha256=SGCABJdYQLW0zDarEoWrEQLuWlbq9iQhYU8ZeR1-ptQ,15957
 ipex_llm/transformers/models/chatglm4.py,sha256=AAhAFFDDas5DBQPfh2Mwl7a2v7taKf6xphoeeNNFaBI,16593
 ipex_llm/transformers/models/chatglm4v.py,sha256=YRfuf9g1E0MQ_7wbHAOMvadFnO-j3LqI_k1SaRkDs0M,14055
-ipex_llm/transformers/models/cohere.py,sha256=RCUBfhI_eNuZ1NyQC6vm28eYqPmgZn6pHSvQMwFyT2A,25884
 ipex_llm/transformers/models/common.py,sha256=4obQMGF02FCiXrHnFle9Fsx7C33b1FDt37qJJ4YgxRc,11578
 ipex_llm/transformers/models/decilm.py,sha256=P-PBuDPf07GvKggLwJx_wPwIn6esN3rX8ai2JxRuZmE,5246
-ipex_llm/transformers/models/falcon.py,sha256=f5BzMbv4E-R5Pete8zBscbgiueXGIaWGs-5RbcMlUo4,33549
 ipex_llm/transformers/models/gemma.py,sha256=_E3Yw8Y45xyNVeLqyVKcpr8kjuICtETeL82cJ-bWJuU,9424
 ipex_llm/transformers/models/gemma2.py,sha256=2WZuv-FLzJyTJFaYxOuzJt47QE64M0lHnzAiO5T6ozI,8049
 ipex_llm/transformers/models/glm.py,sha256=gHYgfn20jPRL-ElXy-rUqMh6_LQcc5x7DEXSZuRA4E0,7094
 ipex_llm/transformers/models/gpt2.py,sha256=YSaNgK1uLCFDuIFqnKO0Mi-AsOZsYav-7pNf_NpKGdM,3445
 ipex_llm/transformers/models/gptbigcode.py,sha256=cP1_qGWoa43R2WacAMblShjku4QupcCZiLaPPAoOUs4,9101
-ipex_llm/transformers/models/gptj.py,sha256=TTIx461X2nOcIkrAcZhEf7d7mjJ3yvEC9KLVc1-hrpc,17973
 ipex_llm/transformers/models/gptneox.py,sha256=loRh1x_5S6BCeOr_s5xr-N_1SQHL3Y5IiUBAEyoMUqQ,6172
 ipex_llm/transformers/models/internlm.py,sha256=ZbIUMDwNRcrCeduXfbA_uq1AUEWawEt6CJRvQl3LkAg,17832
 ipex_llm/transformers/models/internvl.py,sha256=Vx0vENIEQLX2M6P398mw5TOhpks0U8xf8rtRQvy94go,8154
 ipex_llm/transformers/models/llama.py,sha256=ozwtdQ0MbanJEtW4LBFGxqs_QAq82EonhL2dL6tGyw0,8567
 ipex_llm/transformers/models/minicpm.py,sha256=ib2rJTN7Tf7znBCtVrtXsF-_Uuk2aA7KVg02xzatLiI,10103
 ipex_llm/transformers/models/minicpm3.py,sha256=FhNS6mi2rg7dSdF_QQGrao3g9EC6XLn1MTKd-kd0wF0,9191
-ipex_llm/transformers/models/minicpmv.py,sha256=igfugwyP3Nu3DcLhYXpe9W9Sk3q2YSrzjj7ukAwuJkg,9829
+ipex_llm/transformers/models/minicpmv.py,sha256=ZV4s48WNIyRoEkvENnlmopnx3ojZANBer0LI6bRtxrY,9826
 ipex_llm/transformers/models/mistral.py,sha256=rE1GWQxXvF6aG-buPHDR13zeynDZEDIubPF4PiVhZbM,7451
-ipex_llm/transformers/models/mixtral.py,sha256=zShaxabIoQaL3cV0Rptf7VCN4QuJsV4KBhanff1TASY,26601
 ipex_llm/transformers/models/mllama.py,sha256=ogpLmmN_OwcFUyjYB-oDC-l3uw8urFvUEc5edkjWHAk,10939
 ipex_llm/transformers/models/mpt.py,sha256=z02NwHogJZVh-Mk4sYoIzR90SFIKhoNN_-ifsD907TQ,9540
 ipex_llm/transformers/models/phi.py,sha256=E6qz4EEuHIVGvaPo-wtLC5lz3iyMqTbAE_cRlcjQRKI,6670
@@ -182,10 +178,10 @@ ipex_llm/transformers/models/qwen2_vl.py,sha256=jIm4yZSd751BkRqgj3wR1QBkDIh-TMCL
 ipex_llm/transformers/models/qwen_vl.py,sha256=j7Nzzz2Qvynu9yrCXmoEfERjw43hXof5TbXIs7Ms-oY,17105
 ipex_llm/transformers/models/rwkv4.py,sha256=H4KMtxN0JA2ZTXnonHpsUUJ5xULemo-D1Jzl0ri_UY8,6123
 ipex_llm/transformers/models/rwkv5.py,sha256=OkRNj1pCAZg1z2Fw-I0DEnxLEdZyPeRSQ6msrkxLOCs,10710
-ipex_llm/transformers/models/sd.py,sha256=7qkti_5jf3KmB-W4hDVeYGJvipuwGrxPox_4scGmnIc,5991
+ipex_llm/transformers/models/sd.py,sha256=VvHV5u-0k2MgHu3NL9113hPj7DgfxqctuKzEEeNfRDU,5981
 ipex_llm/transformers/models/stablelm.py,sha256=RGQCYuQhYqtZ1j3RZkYi0_QvCRnUgUIPYxfBcLnElzg,6885
 ipex_llm/transformers/models/starcoder2.py,sha256=4P3mhRYf2Kreb1ESjrQGfy1puLMmZXgV35zf-Tksvao,6462
-ipex_llm/transformers/models/utils.py,sha256=hpTT9X5zCi2CVlFRR-nApn_1Bsh1S8SHlFJwc-tKxf0,18985
+ipex_llm/transformers/models/utils.py,sha256=85rGIzGZvWe3JjYpWcuc1nfzI_tn_zFcdZpWivxJkl0,15457
 ipex_llm/transformers/models/yuan.py,sha256=1jRPebwAK2ENbyYokOmb4LSVo-szucWiygz9zTv-scs,7656
 ipex_llm/transformers/npu_models/__init__.py,sha256=ulEUGLjaP48LCrVeury3UxLjXxKzRi0UpSG4bYu-7f8,585
 ipex_llm/transformers/npu_models/baichuan.py,sha256=fJtd7fBrttySghRUgfZTAdxLjsSNC-XL08HISsXigLE,4685
@@ -218,7 +214,7 @@ ipex_llm/transformers/npu_models/xlm_mp.py,sha256=sj8OVun8xJprM7ZJp0XzWa55rqlSIz
 ipex_llm/transformers/npu_pipeline_model/__init__.py,sha256=b2IXvVqQ5cItki021h8s3ymW12RPu8QNPprq4Mn3bDM,586
 ipex_llm/transformers/npu_pipeline_model/baichuan.py,sha256=ICxRzFQ4OIANDkkVi2_4xOeQXmfFXYMx3H52KuE1xR4,6208
 ipex_llm/transformers/npu_pipeline_model/common.py,sha256=QxJoJESpv0BpwO_FBeAT2wKA56wNFfen8iI37PrMKuA,7838
-ipex_llm/transformers/npu_pipeline_model/convert_pipeline.py,sha256=953Gua2tFKLICpbmCBm-Lxnur85Ce7jNMeRTNT4DKZE,28715
+ipex_llm/transformers/npu_pipeline_model/convert_pipeline.py,sha256=wrRgmNT13RVtQRp5gFRBxNEPJHxFMLeGqb8a58YodPQ,28512
 ipex_llm/transformers/npu_pipeline_model/llama.py,sha256=MnvHRytLt3oy5jIPUBe8AeEJ6PtPWLbhQ5a9WqjZ1TQ,19905
 ipex_llm/transformers/npu_pipeline_model/minicpm.py,sha256=MDMesYlVbECKdK0xxkt1LwHgpkJOO7ZwBExYAwMGQa0,20637
 ipex_llm/transformers/npu_pipeline_model/pipeline_cpp.py,sha256=JNmodAMg_NQvDILug3E_fGXEh6cd3wsj4bvAzcd-vaU,2749
@@ -254,11 +250,11 @@ ipex_llm/vllm/xpu/engine/__init__.py,sha256=pY_CpyuZd72fr6s32ejeKHKFW0K4vUU2rzZj
 ipex_llm/vllm/xpu/engine/engine.py,sha256=k4-D27WS_Gk3mA--w3HWAjPjb4Aiu043MVPi0ZoAUBc,5984
 ipex_llm/vllm/xpu/entrypoints/openai/api_server.py,sha256=GshTZFB8e4PWvqckfbmTOU6b0oLkNn7A-vzLuG9--j8,21544
 ipex_llm/vllm/xpu/entrypoints/openai/cli_args.py,sha256=2rENA2ucynMaIjiZBEh2ez1o5vR32GaP514t39CD7KM,8676
-ipex_llm-2.2.0b20250106.data/scripts/ipex-llm-init,sha256=fLQsT2dRL6H5bThb4GuIWotAuqoLsIxFwA-0c2qmaO8,6672
-ipex_llm-2.2.0b20250106.data/scripts/llm-chat,sha256=TdUnUmNapzuoe1c8IzrdVOQwWEg8IqsMSBRlOD3daZM,2249
-ipex_llm-2.2.0b20250106.data/scripts/llm-cli,sha256=RXGPlLElHxcKzoUxljEMBIAXbzCDysXL-Nxw-xF-7LU,2457
-ipex_llm-2.2.0b20250106.dist-info/METADATA,sha256=RVDr0pwoPE6J0yPUZ9k7t6_jQn01wTwAXkU5ViqE-c8,11374
-ipex_llm-2.2.0b20250106.dist-info/WHEEL,sha256=PPJcBMAZibF_2GFE9NmOJGqiaSMPiNFbJd6QaJjdA6Y,109
-ipex_llm-2.2.0b20250106.dist-info/entry_points.txt,sha256=TiUyBB2MRmfF3ko-pyAEzqeBCRnyhu27bNOAsWPp3e8,61
-ipex_llm-2.2.0b20250106.dist-info/top_level.txt,sha256=CGCMHM-SyqUabU4h8RqJ2KTYckQUO3LvIWwmUQ6Qbzw,9
-ipex_llm-2.2.0b20250106.dist-info/RECORD,,
+ipex_llm-2.2.0b20250106.post1.data/scripts/ipex-llm-init,sha256=fLQsT2dRL6H5bThb4GuIWotAuqoLsIxFwA-0c2qmaO8,6672
+ipex_llm-2.2.0b20250106.post1.data/scripts/llm-chat,sha256=TdUnUmNapzuoe1c8IzrdVOQwWEg8IqsMSBRlOD3daZM,2249
+ipex_llm-2.2.0b20250106.post1.data/scripts/llm-cli,sha256=RXGPlLElHxcKzoUxljEMBIAXbzCDysXL-Nxw-xF-7LU,2457
+ipex_llm-2.2.0b20250106.post1.dist-info/METADATA,sha256=I0vPU5mDtPZR3wpY87fYHdn6r14U0T50NWGP7EsF5s8,12825
+ipex_llm-2.2.0b20250106.post1.dist-info/WHEEL,sha256=PPJcBMAZibF_2GFE9NmOJGqiaSMPiNFbJd6QaJjdA6Y,109
+ipex_llm-2.2.0b20250106.post1.dist-info/entry_points.txt,sha256=TiUyBB2MRmfF3ko-pyAEzqeBCRnyhu27bNOAsWPp3e8,61
+ipex_llm-2.2.0b20250106.post1.dist-info/top_level.txt,sha256=CGCMHM-SyqUabU4h8RqJ2KTYckQUO3LvIWwmUQ6Qbzw,9
+ipex_llm-2.2.0b20250106.post1.dist-info/RECORD,,