PyPI - ipex-llm - Versions diffs - 2.2.0b20250105.post0__py3-none-win_amd64.whl → 2.2.0b20250106.post1__py3-none-win_amd64.whl - Mend

ipex-llm 2.2.0b20250105.post0__py3-none-win_amd64.whl → 2.2.0b20250106.post1__py3-none-win_amd64.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (45) hide show

ipex_llm/libs/bloom-api.dll +0 -0
ipex_llm/libs/bloom.dll +0 -0
ipex_llm/libs/gptneox-api.dll +0 -0
ipex_llm/libs/gptneox.dll +0 -0
ipex_llm/libs/libbloom_avx.dll +0 -0
ipex_llm/libs/libbloom_vnni.dll +0 -0
ipex_llm/libs/libgptneox_avx.dll +0 -0
ipex_llm/libs/libgptneox_vnni.dll +0 -0
ipex_llm/libs/libllama_avx.dll +0 -0
ipex_llm/libs/libllama_vnni.dll +0 -0
ipex_llm/libs/libstarcoder_avx.dll +0 -0
ipex_llm/libs/libstarcoder_vnni.dll +0 -0
ipex_llm/libs/llama-api.dll +0 -0
ipex_llm/libs/llama.dll +0 -0
ipex_llm/libs/main-bloom.exe +0 -0
ipex_llm/libs/main-gptneox.exe +0 -0
ipex_llm/libs/main-llama.exe +0 -0
ipex_llm/libs/main-starcoder.exe +0 -0
ipex_llm/libs/pipeline.dll +0 -0
ipex_llm/libs/quantize-bloom.exe +0 -0
ipex_llm/libs/quantize-bloom_vnni.exe +0 -0
ipex_llm/libs/quantize-gptneox.exe +0 -0
ipex_llm/libs/quantize-gptneox_vnni.exe +0 -0
ipex_llm/libs/quantize-llama.exe +0 -0
ipex_llm/libs/quantize-llama_vnni.exe +0 -0
ipex_llm/libs/quantize-starcoder.exe +0 -0
ipex_llm/libs/quantize-starcoder_vnni.exe +0 -0
ipex_llm/libs/starcoder-api.dll +0 -0
ipex_llm/libs/starcoder.dll +0 -0
ipex_llm/transformers/convert.py +2 -26
ipex_llm/transformers/loader.py +1 -1
ipex_llm/transformers/low_bit_linear.py +9 -23
ipex_llm/transformers/model.py +0 -7
ipex_llm/transformers/models/utils.py +2 -15
ipex_llm/transformers/speculative.py +2 -14
ipex_llm/transformers/utils.py +2 -0
{ipex_llm-2.2.0b20250105.post0.dist-info → ipex_llm-2.2.0b20250106.post1.dist-info}/METADATA +20 -20
{ipex_llm-2.2.0b20250105.post0.dist-info → ipex_llm-2.2.0b20250106.post1.dist-info}/RECORD +44 -45
ipex_llm/transformers/models/gptj.py +0 -441
{ipex_llm-2.2.0b20250105.post0.data → ipex_llm-2.2.0b20250106.post1.data}/scripts/ipex-llm-init.bat +0 -0
{ipex_llm-2.2.0b20250105.post0.data → ipex_llm-2.2.0b20250106.post1.data}/scripts/llm-chat.ps1 +0 -0
{ipex_llm-2.2.0b20250105.post0.data → ipex_llm-2.2.0b20250106.post1.data}/scripts/llm-cli.ps1 +0 -0
{ipex_llm-2.2.0b20250105.post0.dist-info → ipex_llm-2.2.0b20250106.post1.dist-info}/WHEEL +0 -0
{ipex_llm-2.2.0b20250105.post0.dist-info → ipex_llm-2.2.0b20250106.post1.dist-info}/entry_points.txt +0 -0
{ipex_llm-2.2.0b20250105.post0.dist-info → ipex_llm-2.2.0b20250106.post1.dist-info}/top_level.txt +0 -0

ipex_llm/libs/bloom-api.dll CHANGED Viewed

Binary file

ipex_llm/libs/bloom.dll CHANGED Viewed

Binary file

ipex_llm/libs/gptneox-api.dll CHANGED Viewed

Binary file

ipex_llm/libs/gptneox.dll CHANGED Viewed

Binary file

ipex_llm/libs/libbloom_avx.dll CHANGED Viewed

Binary file

ipex_llm/libs/libbloom_vnni.dll CHANGED Viewed

Binary file

ipex_llm/libs/libgptneox_avx.dll CHANGED Viewed

Binary file

ipex_llm/libs/libgptneox_vnni.dll CHANGED Viewed

Binary file

ipex_llm/libs/libllama_avx.dll CHANGED Viewed

Binary file

ipex_llm/libs/libllama_vnni.dll CHANGED Viewed

Binary file

ipex_llm/libs/libstarcoder_avx.dll CHANGED Viewed

Binary file

ipex_llm/libs/libstarcoder_vnni.dll CHANGED Viewed

Binary file

ipex_llm/libs/llama-api.dll CHANGED Viewed

Binary file

ipex_llm/libs/llama.dll CHANGED Viewed

Binary file

ipex_llm/libs/main-bloom.exe CHANGED Viewed

Binary file

ipex_llm/libs/main-gptneox.exe CHANGED Viewed

Binary file

ipex_llm/libs/main-llama.exe CHANGED Viewed

Binary file

ipex_llm/libs/main-starcoder.exe CHANGED Viewed

Binary file

ipex_llm/libs/pipeline.dll CHANGED Viewed

Binary file

ipex_llm/libs/quantize-bloom.exe CHANGED Viewed

Binary file

ipex_llm/libs/quantize-bloom_vnni.exe CHANGED Viewed

Binary file

ipex_llm/libs/quantize-gptneox.exe CHANGED Viewed

Binary file

ipex_llm/libs/quantize-gptneox_vnni.exe CHANGED Viewed

Binary file

ipex_llm/libs/quantize-llama.exe CHANGED Viewed

Binary file

ipex_llm/libs/quantize-llama_vnni.exe CHANGED Viewed

Binary file

ipex_llm/libs/quantize-starcoder.exe CHANGED Viewed

Binary file

ipex_llm/libs/quantize-starcoder_vnni.exe CHANGED Viewed

Binary file

ipex_llm/libs/starcoder-api.dll CHANGED Viewed

Binary file

ipex_llm/libs/starcoder.dll CHANGED Viewed

Binary file

ipex_llm/transformers/convert.py CHANGED Viewed

@@ -680,18 +680,9 @@ def _replace_with_low_bit_linear(model, qtype, modules_to_not_convert=None,
                             optimize_lm_head=optimize_lm_head
                         )
                     device = module.weight.data.device
-                    from ipex_llm.transformers.utils import get_ipex_version
-                    if get_ipex_version() < "2.1.10+xpu":
-                        new_linear._parameters['weight'] = nn.Parameter(module.weight)
-                    else:
-                        # only from 2.1, ipex provides matmul_bias_out
-                        # so we need to transpose weight
-                        new_weight = module.weight.transpose(0, 1).contiguous()
-                        new_linear._parameters['weight'] = nn.Parameter(new_weight)
-                        new_linear.weight_type = 2
+                    new_linear._parameters['weight'] = nn.Parameter(module.weight)
                     if module.bias is not None:
-                        new_linear._parameters['bias'] = nn.Parameter(module.bias.data)\
-                            .to(device)
+                        new_linear._parameters['bias'] = nn.Parameter(module.bias.data).to(device)
                 elif qtype == ggml_tensor_qtype["bf16"]:
                     module.to(torch.bfloat16)
                     if _USE_VLLM:
@@ -1452,21 +1443,6 @@ def _optimize_post(model):
                             module.MultiheadAttention,
                             mpt_multihead_attention_forward
                             )
-    elif "gptj" in model.config.model_type:
-        # dolly-v1-6b
-        modeling_module_name = model.__class__.__module__
-        module = importlib.import_module(modeling_module_name)
-        from ipex_llm.transformers.models.gptj import gptj_attention_forward, gptj_model_forward,\
-            gptj_block_forward
-        convert_forward(model,
-                        module.GPTJAttention,
-                        gptj_attention_forward)
-        convert_forward(model,
-                        module.GPTJModel,
-                        gptj_model_forward)
-        convert_forward(model,
-                        module.GPTJBlock,
-                        gptj_block_forward)
     elif "bloom" in model.config.model_type:
         modeling_module_name = model.__class__.__module__
         module = importlib.import_module(modeling_module_name)

ipex_llm/transformers/loader.py CHANGED Viewed

@@ -22,7 +22,7 @@ import time
 from datetime import date
 import argparse
 from ipex_llm.utils.common import invalidInputError
-from transformers import AutoTokenizer, GPTJForCausalLM, LlamaTokenizer
+from transformers import AutoTokenizer, LlamaTokenizer
 LLAMA_IDS = ['llama', 'vicuna', 'merged-baize']

ipex_llm/transformers/low_bit_linear.py CHANGED Viewed

@@ -286,7 +286,7 @@ def use_batch_forward(x: torch.Tensor, qtype: int, output_len: int):
             or (
                 qtype in [SYM_INT8, FP4, FP6, Q4_K, Q6_K]
                 and batch_size <= 48
-                and device_name in ["arc", "pvc", "mtl", "lnl", "arl"]
+                and device_name in ["arc", "pvc", "mtl", "arl"]
                 and x.shape[1] % 256 == 0
                 and output_len % 32 == 0
             )
@@ -759,9 +759,9 @@ class FP16Linear(nn.Linear):
         self.weight_length = self.out_len * self.in_len
         self.qtype = ggml_tensor_qtype["fp16"]
         self.mp_group = mp_group
-        # weigh_type = 1 means original weight
-        # weigh_type = 2 means weight has been transposed
-        # weigh_type = 3 means weight has been transposed by esimd method
+        # weight_type = 1 means original weight
+        # weight_type = 2 means weight has been transposed
+        # weight_type = 3 means weight has been transposed by esimd method
         self.weight_type = 1
         self.optimize_lm_head = optimize_lm_head
         self.disable_fp16_opt = False
@@ -775,28 +775,14 @@ class FP16Linear(nn.Linear):
         x = x.to(torch.float16)
         if self.bias is not None and self.bias.dtype != x.dtype:
-                self.bias.data = self.bias.data.to(x.dtype)
+            self.bias.data = self.bias.data.to(x.dtype)
         if self.weight is not None and self.weight.dtype != x.dtype:
             self.weight.data = self.weight.data.to(x.dtype)
         if not self.use_esimd_kernel(x):
-            if (
-                get_ipex_version() < "2.1.10+xpu"
-                or get_xpu_device_name(x.device) not in ["arc", "pvc"]
-                or self.disable_fp16_opt
-            ):
-                if self.weight_type == 2:
-                    self.weight = torch.nn.Parameter(self.weight.transpose(0, 1).contiguous(),
-                                                     requires_grad=False)
-                    self.weight_type = 1
-                result = F.linear(x, self.weight, self.bias)
-            else:
-                if self.weight_type == 1:
-                    self.weight = torch.nn.Parameter(self.weight.transpose(0, 1).contiguous(),
-                                                     requires_grad=False)
-                    self.weight_type = 2
-                result = torch.ops.torch_ipex.matmul_bias_out(x.contiguous(),
-                                                              self.weight, self.bias)
+            invalidInputError(self.weight_type == 1, "weight_type should be 1")
+            result = F.linear(x, self.weight, self.bias)
             if self.mp_group is not None:
                 if get_use_vllm():
                     result = self.mp_group.all_reduce(result)
@@ -852,7 +838,7 @@ class FP16Linear(nn.Linear):
         if self.disable_fp16_opt:
             return False
         # esimd kernel can only be used for Arc and Flex
-        if gpu_type not in ["arc", "flex"]:
+        if gpu_type not in ["arc"]:
             return False
         # now esimd kernel can only be used for specific cases (llama2-7b shape)
         if self.in_len == 11008 and self.out_features == 4096:

ipex_llm/transformers/model.py CHANGED Viewed

@@ -103,12 +103,6 @@ def save_low_bit(self, *args, **kwargs):
         self.to(origin_device)
-def _load_pre():
-    from transformers import GPTJModel
-    from ipex_llm.transformers.models.gptj import gptj_model_new_init
-    GPTJModel.__init__ = gptj_model_new_init
 class _BaseAutoModelClass:
     HF_MODEL = None
@@ -495,7 +489,6 @@ class _BaseAutoModelClass:
         else:
             if quant_config is not None:
                 kwargs["quantization_config"] = quant_config
-            _load_pre()
             try:
                 # To handle the input CUDA setting (such as 'device_map={"":0}'), ignore it
                 kwargs.pop('device_map', None)

ipex_llm/transformers/models/utils.py CHANGED Viewed

@@ -168,7 +168,7 @@ def should_use_fuse_rope(hidden_states, position_ids, training):
 def apply_rotary_pos_emb(q, k, cos, sin, position_ids, model_family):
     if model_family in ["llama", "baichuan", "internlm", "aquila", "gpt_neox", "mistral",
-                        "mixtral", "qwen2", "yuan", "stablelm", "qwen2_moe"]:
+                        "qwen2", "yuan", "stablelm", "qwen2_moe"]:
         # The first two dimensions of cos and sin are always 1, so we can `squeeze` them.
         cos = cos.squeeze(1).squeeze(0)  # [seq_len, dim]
         sin = sin.squeeze(1).squeeze(0)  # [seq_len, dim]
@@ -183,7 +183,7 @@ def apply_rotary_pos_emb(q, k, cos, sin, position_ids, model_family):
         q_embed = (q * cos) + (rotate_half(q) * sin)
         k_embed = (k * cos) + (rotate_half(k) * sin)
         return q_embed, k_embed
-    elif model_family in ["gptj", "chatglm"]:
+    elif model_family in ["chatglm"]:
         q_embed = (q * cos) + (rotate_every_two(q) * sin)
         k_embed = (k * cos) + (rotate_every_two(k) * sin)
         return q_embed, k_embed
@@ -192,19 +192,6 @@ def apply_rotary_pos_emb(q, k, cos, sin, position_ids, model_family):
                           f"{model_family} is not supported.")
-def apply_ipex_rotate_every_two(q, k, cos, sin):
-    # ipex's apply_rotary_embedding_two_qk can change the origin storage,
-    # so q/k will get the result directly.
-    from ipex_llm.transformers.utils import get_ipex_version
-    if get_ipex_version() >= "2.1.10+xpu":
-        torch.ops.torch_ipex.apply_rotary_embedding_two_qk(
-            q, k, sin, cos, q, k
-        )
-    else:
-        torch.ops.torch_ipex.apply_rotary_embedding(q, sin, cos, q)
-        torch.ops.torch_ipex.apply_rotary_embedding(k, sin, cos, k)
 def is_enough_kv_cache_room_4_36(past_key_value, idx, seq_len=1):
     # to determinate if is enough kv cache room in transformers==4.36
     # seq_len for current seq len

ipex_llm/transformers/speculative.py CHANGED Viewed

@@ -432,8 +432,7 @@ def _check_and_extend_kv_cache(past_key_values, max_step_draft, kv_alloc_block_l
     from ipex_llm.transformers.models.utils import is_enough_kv_cache_room_4_31, \
         extend_kv_cache
     enough_kv_room = True
-    if model_type not in ["chatglm", "qwen", "baichuan", "llama", "mistral",
-                          "gptj", "opt"]:
+    if model_type not in ["chatglm", "qwen", "baichuan", "llama", "mistral", "opt"]:
         return past_key_values, False
     cache_k = past_key_values[0][0]
     if model_type == "chatglm":
@@ -527,7 +526,7 @@ def _crop_past_key_values(self, past_key_values, new_cache_size, _enable_ipex=Fa
                         v[:-(new_cache_size), :, :, :])
                     for k, v in past_key_values
                 ]
-        elif self.config.model_type in ["baichuan", "gptj"]:
+        elif self.config.model_type in ["baichuan"]:
             past_key_values = [
                 (k[:, :, :-(new_cache_size), :],
                     v[:, :, :-(new_cache_size), :])
@@ -796,13 +795,6 @@ def _non_cpu_ipex_verify(self, verify_input_ids, past_key_values, cur_attention_
                                     device=verify_input_ids.device)
         position_ids = position_ids.unsqueeze(0).repeat(1, 1) + past_key_value_len
         forward_args["position_ids"] = position_ids
-    elif self.config.model_type == "gptj":
-        past_length = past_key_values[0][0].size(2)
-        input_len = verify_input_ids.shape[1]
-        position_ids = torch.arange(past_length, input_len + past_length,
-                                    dtype=torch.long, device=verify_input_ids.device)
-        position_ids = position_ids.unsqueeze(0).view(-1, input_len)
-        forward_args["position_ids"] = position_ids
     return self(**forward_args)
@@ -971,10 +963,6 @@ def speculative_generate(self,
                         past_key_value_len = past_key_values[0][0].shape[0]
                     position_ids = torch.Tensor([[past_key_value_len + step_draft]]).long()
                     forward_args["position_ids"] = position_ids
-                elif self.config.model_type == "gptj":
-                    past_length = draft_past_key_values[0][0].size(2)
-                    position_ids = torch.Tensor([[past_length]]).long().to(self.device)
-                    forward_args["position_ids"] = position_ids
                 if _enable_ipex:
                     if any(keyword in self.config.model_type

ipex_llm/transformers/utils.py CHANGED Viewed

@@ -172,6 +172,8 @@ def get_xpu_device_name(device: torch.device):
     if device.type != "xpu":
         return device.type
     else:
+        # possiable device name:
+        # ["arc", "pvc", "mtl", "lnl", "bmg", "arl", "legacy", "unknown"]
         import xe_linear
         return xe_linear.get_xpu_device_name(device)

{ipex_llm-2.2.0b20250105.post0.dist-info → ipex_llm-2.2.0b20250106.post1.dist-info}/METADATA RENAMED Viewed

@@ -1,6 +1,6 @@
 Metadata-Version: 2.1
 Name: ipex-llm
-Version: 2.2.0b20250105.post0
+Version: 2.2.0b20250106.post1
 Summary: Large Language Model Develop Toolkit
 Home-page: https://github.com/intel-analytics/ipex-llm
 Author: BigDL Authors
@@ -27,10 +27,10 @@ Requires-Dist: intel-openmp ; (platform_machine == "x86_64" or platform_machine
 Requires-Dist: torch ==2.1.2+cpu ; (platform_system == "Linux") and extra == 'all'
 Requires-Dist: torch ==2.1.2 ; (platform_system == "Windows") and extra == 'all'
 Provides-Extra: cpp
-Requires-Dist: bigdl-core-cpp ==2.6.0b20250106.post0 ; extra == 'cpp'
+Requires-Dist: bigdl-core-cpp ==2.6.0b20250106.post1 ; extra == 'cpp'
 Requires-Dist: setuptools ; extra == 'cpp'
 Provides-Extra: cpp-arl
-Requires-Dist: bigdl-core-cpp ==2.6.0b20250106.post0 ; extra == 'cpp-arl'
+Requires-Dist: bigdl-core-cpp ==2.6.0b20250106.post1 ; extra == 'cpp-arl'
 Requires-Dist: setuptools ; extra == 'cpp-arl'
 Requires-Dist: onednn-devel ==2024.1.1 ; (platform_system == "Windows") and extra == 'cpp-arl'
 Requires-Dist: onednn ==2024.1.1 ; (platform_system == "Windows") and extra == 'cpp-arl'
@@ -67,7 +67,7 @@ Requires-Dist: transformers ==4.40.0 ; extra == 'npu'
 Requires-Dist: intel-openmp ; (platform_machine == "x86_64" or platform_machine == "AMD64") and extra == 'npu'
 Requires-Dist: torch ==2.1.2+cpu ; (platform_system == "Linux") and extra == 'npu'
 Requires-Dist: torch ==2.1.2 ; (platform_system == "Windows") and extra == 'npu'
-Requires-Dist: bigdl-core-npu ==2.6.0b20250106.post0 ; (platform_system == "Windows") and extra == 'npu'
+Requires-Dist: bigdl-core-npu ==2.6.0b20250106.post1 ; (platform_system == "Windows") and extra == 'npu'
 Provides-Extra: serving
 Requires-Dist: py-cpuinfo ; extra == 'serving'
 Requires-Dist: fschat[model_worker,webui] ==0.2.36 ; extra == 'serving'
@@ -87,9 +87,9 @@ Requires-Dist: setuptools <70.0.0 ; extra == 'xpu'
 Requires-Dist: torch ==2.1.0a0 ; extra == 'xpu'
 Requires-Dist: torchvision ==0.16.0a0 ; extra == 'xpu'
 Requires-Dist: intel-extension-for-pytorch ==2.1.10+xpu ; extra == 'xpu'
-Requires-Dist: bigdl-core-xe-21 ==2.6.0b20250106.post0 ; extra == 'xpu'
-Requires-Dist: bigdl-core-xe-batch-21 ==2.6.0b20250106.post0 ; extra == 'xpu'
-Requires-Dist: bigdl-core-xe-addons-21 ==2.6.0b20250106.post0 ; extra == 'xpu'
+Requires-Dist: bigdl-core-xe-21 ==2.6.0b20250106.post1 ; extra == 'xpu'
+Requires-Dist: bigdl-core-xe-batch-21 ==2.6.0b20250106.post1 ; extra == 'xpu'
+Requires-Dist: bigdl-core-xe-addons-21 ==2.6.0b20250106.post1 ; extra == 'xpu'
 Provides-Extra: xpu-2-1
 Requires-Dist: py-cpuinfo ; extra == 'xpu-2-1'
 Requires-Dist: protobuf ; extra == 'xpu-2-1'
@@ -104,9 +104,9 @@ Requires-Dist: setuptools <70.0.0 ; extra == 'xpu-2-1'
 Requires-Dist: torch ==2.1.0a0 ; extra == 'xpu-2-1'
 Requires-Dist: torchvision ==0.16.0a0 ; extra == 'xpu-2-1'
 Requires-Dist: intel-extension-for-pytorch ==2.1.10+xpu ; extra == 'xpu-2-1'
-Requires-Dist: bigdl-core-xe-21 ==2.6.0b20250106.post0 ; extra == 'xpu-2-1'
-Requires-Dist: bigdl-core-xe-batch-21 ==2.6.0b20250106.post0 ; extra == 'xpu-2-1'
-Requires-Dist: bigdl-core-xe-addons-21 ==2.6.0b20250106.post0 ; extra == 'xpu-2-1'
+Requires-Dist: bigdl-core-xe-21 ==2.6.0b20250106.post1 ; extra == 'xpu-2-1'
+Requires-Dist: bigdl-core-xe-batch-21 ==2.6.0b20250106.post1 ; extra == 'xpu-2-1'
+Requires-Dist: bigdl-core-xe-addons-21 ==2.6.0b20250106.post1 ; extra == 'xpu-2-1'
 Requires-Dist: intel-openmp ; (platform_machine == "x86_64" or platform_machine == "AMD64") and extra == 'xpu-2-1'
 Requires-Dist: dpcpp-cpp-rt ==2024.0.2 ; (platform_system == "Windows") and extra == 'xpu-2-1'
 Requires-Dist: mkl-dpcpp ==2024.0.0 ; (platform_system == "Windows") and extra == 'xpu-2-1'
@@ -124,7 +124,7 @@ Requires-Dist: setuptools ; extra == 'xpu-2-6'
 Requires-Dist: torch ==2.6.0+xpu ; extra == 'xpu-2-6'
 Requires-Dist: torchvision ==0.21.0+xpu ; extra == 'xpu-2-6'
 Requires-Dist: torchaudio ==2.6.0+xpu ; extra == 'xpu-2-6'
-Requires-Dist: bigdl-core-xe-all ==2.6.0b20250106.post0 ; extra == 'xpu-2-6'
+Requires-Dist: bigdl-core-xe-all ==2.6.0b20250106.post1 ; extra == 'xpu-2-6'
 Requires-Dist: intel-openmp ; (platform_machine == "x86_64" or platform_machine == "AMD64") and extra == 'xpu-2-6'
 Provides-Extra: xpu-arc
 Requires-Dist: py-cpuinfo ; extra == 'xpu-arc'
@@ -137,9 +137,9 @@ Requires-Dist: tokenizers ==0.15.2 ; extra == 'xpu-arc'
 Requires-Dist: accelerate ==0.23.0 ; extra == 'xpu-arc'
 Requires-Dist: tabulate ; extra == 'xpu-arc'
 Requires-Dist: setuptools ; extra == 'xpu-arc'
-Requires-Dist: bigdl-core-xe-23 ==2.6.0b20250106.post0 ; extra == 'xpu-arc'
-Requires-Dist: bigdl-core-xe-batch-23 ==2.6.0b20250106.post0 ; extra == 'xpu-arc'
-Requires-Dist: bigdl-core-xe-addons-23 ==2.6.0b20250106.post0 ; extra == 'xpu-arc'
+Requires-Dist: bigdl-core-xe-23 ==2.6.0b20250106.post1 ; extra == 'xpu-arc'
+Requires-Dist: bigdl-core-xe-batch-23 ==2.6.0b20250106.post1 ; extra == 'xpu-arc'
+Requires-Dist: bigdl-core-xe-addons-23 ==2.6.0b20250106.post1 ; extra == 'xpu-arc'
 Requires-Dist: intel-openmp ; (platform_machine == "x86_64" or platform_machine == "AMD64") and extra == 'xpu-arc'
 Requires-Dist: torch ==2.3.1+cxx11.abi ; (platform_system == "Linux") and extra == 'xpu-arc'
 Requires-Dist: torchvision ==0.18.1+cxx11.abi ; (platform_system == "Linux") and extra == 'xpu-arc'
@@ -160,9 +160,9 @@ Requires-Dist: tokenizers ==0.15.2 ; extra == 'xpu-arl'
 Requires-Dist: accelerate ==0.23.0 ; extra == 'xpu-arl'
 Requires-Dist: tabulate ; extra == 'xpu-arl'
 Requires-Dist: setuptools ; extra == 'xpu-arl'
-Requires-Dist: bigdl-core-xe-23 ==2.6.0b20250106.post0 ; extra == 'xpu-arl'
-Requires-Dist: bigdl-core-xe-batch-23 ==2.6.0b20250106.post0 ; extra == 'xpu-arl'
-Requires-Dist: bigdl-core-xe-addons-23 ==2.6.0b20250106.post0 ; extra == 'xpu-arl'
+Requires-Dist: bigdl-core-xe-23 ==2.6.0b20250106.post1 ; extra == 'xpu-arl'
+Requires-Dist: bigdl-core-xe-batch-23 ==2.6.0b20250106.post1 ; extra == 'xpu-arl'
+Requires-Dist: bigdl-core-xe-addons-23 ==2.6.0b20250106.post1 ; extra == 'xpu-arl'
 Requires-Dist: intel-openmp ; (platform_machine == "x86_64" or platform_machine == "AMD64") and extra == 'xpu-arl'
 Requires-Dist: torch ==2.3.1+cxx11.abi ; (platform_system == "Linux") and extra == 'xpu-arl'
 Requires-Dist: torchvision ==0.18.1+cxx11.abi ; (platform_system == "Linux") and extra == 'xpu-arl'
@@ -183,9 +183,9 @@ Requires-Dist: tokenizers ==0.15.2 ; extra == 'xpu-lnl'
 Requires-Dist: accelerate ==0.23.0 ; extra == 'xpu-lnl'
 Requires-Dist: tabulate ; extra == 'xpu-lnl'
 Requires-Dist: setuptools ; extra == 'xpu-lnl'
-Requires-Dist: bigdl-core-xe-23 ==2.6.0b20250106.post0 ; extra == 'xpu-lnl'
-Requires-Dist: bigdl-core-xe-batch-23 ==2.6.0b20250106.post0 ; extra == 'xpu-lnl'
-Requires-Dist: bigdl-core-xe-addons-23 ==2.6.0b20250106.post0 ; extra == 'xpu-lnl'
+Requires-Dist: bigdl-core-xe-23 ==2.6.0b20250106.post1 ; extra == 'xpu-lnl'
+Requires-Dist: bigdl-core-xe-batch-23 ==2.6.0b20250106.post1 ; extra == 'xpu-lnl'
+Requires-Dist: bigdl-core-xe-addons-23 ==2.6.0b20250106.post1 ; extra == 'xpu-lnl'
 Requires-Dist: intel-openmp ; (platform_machine == "x86_64" or platform_machine == "AMD64") and extra == 'xpu-lnl'
 Requires-Dist: torch ==2.3.1+cxx11.abi ; (platform_system == "Linux") and extra == 'xpu-lnl'
 Requires-Dist: torchvision ==0.18.1+cxx11.abi ; (platform_system == "Linux") and extra == 'xpu-lnl'

{ipex_llm-2.2.0b20250105.post0.dist-info → ipex_llm-2.2.0b20250106.post1.dist-info}/RECORD RENAMED Viewed

@@ -41,35 +41,35 @@ ipex_llm/langchain/llms/transformerspipelinellm.py,sha256=vm522YPPwWxxAPVvQBtxRf
 ipex_llm/langchain/vllm/__init__.py,sha256=T-EbRT6GJ_8RCu-iLmSzcftOimXSPQf2d5X72AUAy2Y,874
 ipex_llm/langchain/vllm/vllm.py,sha256=6dxc-ZISZQrJilEa_HA827l75Dv9rcHpY_G6FdJ8BVs,7793
 ipex_llm/libs/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
-ipex_llm/libs/bloom-api.dll,sha256=91zD_S-epu_Gp_dAblCkxDwZFjSBhhfho6to8r3K3Ac,36352
-ipex_llm/libs/bloom.dll,sha256=wxdywP2aMiNAKEw3uU31aAe9RR2r1JjhIaqPuslYBZU,506880
-ipex_llm/libs/gptneox-api.dll,sha256=MNnJv8cjFUg_siCIlKPSCg8_kTVHrTFTvDPKu8SAHZw,24576
-ipex_llm/libs/gptneox.dll,sha256=d-3KIFy39GehluUVnpipGeN1qTqVkKWrz5dNT6j9SPk,567296
-ipex_llm/libs/libbloom_avx.dll,sha256=iwht0gpe7yTRMF3F5ZQ8DRs26vQi7i2D8Mafg_Yyt5Q,535040
-ipex_llm/libs/libbloom_vnni.dll,sha256=VJxjLCbNP0wZBW6Rw0owR_rc9z3PKPONHLHOIZYiZiM,506880
-ipex_llm/libs/libgptneox_avx.dll,sha256=3Ja3SkOp6wCtA1NB6GRBQLseB91bzgbhKOWRKh0o01A,595456
-ipex_llm/libs/libgptneox_vnni.dll,sha256=8hinv9vxdzjsW7PJhukAxKeLAtF2f2PQOmzwnNEzDD8,567808
-ipex_llm/libs/libllama_avx.dll,sha256=R7yRH3t4q0CXy0Sv91m8j2z0wnG6Qa_-JxEp6kTcgt0,589824
-ipex_llm/libs/libllama_vnni.dll,sha256=Cg9vrS5J4Uev0iW7p5346-flWKLjNJqyItv89g3qwRs,561664
-ipex_llm/libs/libstarcoder_avx.dll,sha256=dCgKfpmh57NN4sjs6j7bQFtO-URXcTmPS6oktgLHwsU,626688
-ipex_llm/libs/libstarcoder_vnni.dll,sha256=KZufV9B-riiEIitRMxv9nmGWxXM6Qbrv4E8ybustdQA,598528
-ipex_llm/libs/llama-api.dll,sha256=lS0rqYlsIUJMOBi1rkLqurGnCsCaDCAe4QZCHZGGeGI,25600
-ipex_llm/libs/llama.dll,sha256=2exLzMA9NFPkMldvfZeT-Ua6moJjcUDsTwv-hyLEjFg,561152
-ipex_llm/libs/main-bloom.exe,sha256=Rjc3aUPGdbJ1nkRNWZ9j637pA4nZ6fC4EBXJE3vhKkM,103424
-ipex_llm/libs/main-gptneox.exe,sha256=eIq223joAOSPSi0nKxNI0_z9CjC_9f0XGx5ZEFdMX8w,98816
-ipex_llm/libs/main-llama.exe,sha256=-jaXiwAERqW8QYjOB1VYs2-hU-HSLYoheA9R7hQnqPg,99840
-ipex_llm/libs/main-starcoder.exe,sha256=-2M-nKhMW8AMhUvwno-iN-x32G_Csb6TCK4b45-ywQE,157696
-ipex_llm/libs/pipeline.dll,sha256=J_rocSlOut7S0JZQnZlUD-IQ92D5P4pAvzbrPZFG7us,72704
-ipex_llm/libs/quantize-bloom.exe,sha256=XYSsf4HvaYKc0J8022uz6nz6AifT-8FhB7dqJRuEGoE,126464
-ipex_llm/libs/quantize-bloom_vnni.exe,sha256=4mpem2yXPOPKoSsezR32Ckng-_mVE37yHWxY4PG48mc,127488
-ipex_llm/libs/quantize-gptneox.exe,sha256=ZqX02ScTq8X90MRYbJVo5NFdI8E0MzwCpS2UGGK6mO8,104448
-ipex_llm/libs/quantize-gptneox_vnni.exe,sha256=rr-sll3eMtxB0Rg-tXQsFUhcrHkkLnAu8HZWzeqVPJ4,104960
-ipex_llm/libs/quantize-llama.exe,sha256=XrWIjKL-5XaqThU-fNnfVS4pTc1Yp_pGlhwALHU4c-k,109568
-ipex_llm/libs/quantize-llama_vnni.exe,sha256=AuQqKGxs2U3S2Zyk-GMDFFpaTKeQpGR3ydW5gZ8FbWA,110592
-ipex_llm/libs/quantize-starcoder.exe,sha256=X3TroCzyM_i8El6D0-V2mufnRgZc5kBsW1qNoJKIdcE,127488
-ipex_llm/libs/quantize-starcoder_vnni.exe,sha256=K1NgSQOr5-xEEOHqD25qxJe5Z6esvUivIJEkBYto6KM,128512
-ipex_llm/libs/starcoder-api.dll,sha256=YTFtKQ0jSo2cgwO7HkIfjPvD5jANUaDWSa9ykCLfU5s,21504
-ipex_llm/libs/starcoder.dll,sha256=KWX7QLhGFBGUBea-Od0udwKxnJnblsi4y8TSJl9p67g,598016
+ipex_llm/libs/bloom-api.dll,sha256=MSVCD9yi2nlgksGF-5u7tKE660pzfRthAuTWK_7ozbE,36352
+ipex_llm/libs/bloom.dll,sha256=DMJiNnnBbuZArLpPrSIeVgBQtgHRKK0gfqF3e_4N4Ak,506880
+ipex_llm/libs/gptneox-api.dll,sha256=B9yLQPHTYv0o88cZ3pYCfY5ksnetr0xcWC4wKYXhsKs,24576
+ipex_llm/libs/gptneox.dll,sha256=B8H5jlqDLVMSi4wqdPZ0ozI1smbuY7jRN-9YQXSoDQI,567296
+ipex_llm/libs/libbloom_avx.dll,sha256=syY7Lc2kwoM7D57WPaj2XIMJCXyY1IfXClgkY8Lrs1w,535040
+ipex_llm/libs/libbloom_vnni.dll,sha256=RtkwaaPVlbWv6OtxClKILy5CYlL98olnJYdFbe95tEM,506880
+ipex_llm/libs/libgptneox_avx.dll,sha256=1gmdd3DdFrMJBI4pXyI0_703ndz2kopDkK8vz7uPyKI,595456
+ipex_llm/libs/libgptneox_vnni.dll,sha256=bUKisE3_Cg4rZ2AtjGhVoXtKqNVAo4MZI7N7Q4Q_cf0,567808
+ipex_llm/libs/libllama_avx.dll,sha256=lfcj2lw9lIgmDifxIzNnzcz6fHuYUIVYvdvwoM8ijOE,589824
+ipex_llm/libs/libllama_vnni.dll,sha256=5xY7lEJ-sW6owTNcBO8SWRCoweo7k8N4FS4v9-lQcdc,561664
+ipex_llm/libs/libstarcoder_avx.dll,sha256=z8P_H55-1VQFB9KcBxnBrSlkLlsLpxPghN9VfKpQulA,626688
+ipex_llm/libs/libstarcoder_vnni.dll,sha256=rPEGDTZlUTl0hUP40uRe8idjkVhNkyYk818RYmONL1Q,598528
+ipex_llm/libs/llama-api.dll,sha256=XJP636817YbUBOneSVGLqr9-qefV4DV6vR_8P7e8Q7k,25600
+ipex_llm/libs/llama.dll,sha256=YF2m8rwTy7hRbCo31FG-fo1q6DStBAxenNgkz4_gkr0,561152
+ipex_llm/libs/main-bloom.exe,sha256=lWEzFmr2zWPLnDUy3l-FBv5k-xLn7BXm1A3RSkZx890,103424
+ipex_llm/libs/main-gptneox.exe,sha256=ex0FAncs6bLngz7-jmtCxuDNEWlZCXThBMCdveDJnEs,98816
+ipex_llm/libs/main-llama.exe,sha256=ooEHT6YMGZTCKo3h-m3E7f8vkc6aMJPZiRH9dU_4Wu0,99840
+ipex_llm/libs/main-starcoder.exe,sha256=r9rZZhk0V4_6L7EH7evcBdmRyWSqiZ_oNon3mjSCXEY,157696
+ipex_llm/libs/pipeline.dll,sha256=5rhnCddgAUozj2ZQox5ykjsWivaNsK-jVcvArO9Hfy8,72704
+ipex_llm/libs/quantize-bloom.exe,sha256=V-ht-dVMJ9dq4O9vokGlBoOyTCCIgZoZMC-Cn8UdPL4,126464
+ipex_llm/libs/quantize-bloom_vnni.exe,sha256=q14nAfmyRJW57xLG10wKCODG-cSOZ6KP7VCIVPAqslM,127488
+ipex_llm/libs/quantize-gptneox.exe,sha256=Mjhxhcb8FXwrGI0dMRrHXzOOWkLZwtYQkpj4i2gSTYk,104448
+ipex_llm/libs/quantize-gptneox_vnni.exe,sha256=_9D8mqr8zCI7sAzRZ_KlCayqDq0ctQWcwAzI4NWdH7U,104960
+ipex_llm/libs/quantize-llama.exe,sha256=TsPfUBMnQNcKA44_QG9wrmRSEcs2bAmiSOD-ll57g6w,109568
+ipex_llm/libs/quantize-llama_vnni.exe,sha256=1fUjt6woxEAzYQ1b0Ok3JRo1YcFcTiogB-l0RdOs_iY,110592
+ipex_llm/libs/quantize-starcoder.exe,sha256=-wvnnZhGw_eKPnbhZiiaepceFbd_zMUY7qQwrKg4miw,127488
+ipex_llm/libs/quantize-starcoder_vnni.exe,sha256=iPbhV8PTL7J3JXJnJkRw1tsvaHA0kGxgwpttSzVkUHQ,128512
+ipex_llm/libs/starcoder-api.dll,sha256=bpzxqpZZE9Q7Yghy-33_Hkvo-OLvKntfqH-DJLWfXjQ,21504
+ipex_llm/libs/starcoder.dll,sha256=QXucnOagZLcxQF8PG_nvQ9kW4XvexngcqrmJE5N5o9s,598016
 ipex_llm/llamaindex/__init__.py,sha256=T-EbRT6GJ_8RCu-iLmSzcftOimXSPQf2d5X72AUAy2Y,874
 ipex_llm/llamaindex/llms/__init__.py,sha256=KP1lEdGqDuxPoxL1ZSH25Pm2kKMPJBWUTLR0ckSLMIU,1139
 ipex_llm/llamaindex/llms/bigdlllm.py,sha256=FQBzq1KOjfc6uofTXAha3O7TqpJkNfOFepXQmOVlbnI,26314
@@ -87,25 +87,25 @@ ipex_llm/serving/fastchat/tgi_api_protocol.py,sha256=brT3k3-V0NJrU4fRqUwWjC0O3iO
 ipex_llm/serving/fastchat/tgi_api_server.py,sha256=agNTAEiZPSuj3dEdIdYKwkoY0cXOUDX06DiM9VP2knQ,24418
 ipex_llm/serving/fastchat/vllm_worker.py,sha256=ZLz2Q9GxJO6r_LOiP6epgCRjBGk-K4EB1SNEWSJp5DA,11091
 ipex_llm/transformers/__init__.py,sha256=l4KkMkLe-pRC7b_kj6LCfeifgE-Uo33_Av_FwN9HnFA,1074
-ipex_llm/transformers/convert.py,sha256=pFm6VlU84u_Llr2sp6-gRrEYDeNgIk2QPukolq4IE1s,99947
+ipex_llm/transformers/convert.py,sha256=TxWdTTOSvh-j5jqokQJVWykta4U4LHupE1QJ-9udzwc,98733
 ipex_llm/transformers/convert_ipex.py,sha256=iKXo0n8fVFTOA2fNYYrByMFK0dovL-kLd2sVDk88AlQ,14334
 ipex_llm/transformers/embedding.py,sha256=bdgk59DvD4ZZyxRzewXOR7g56nThgO6uhIwk8QL7f-s,9299
 ipex_llm/transformers/kv.py,sha256=k4TU18LlA-Sbq9WNNQnfuzu3RSFBwFhmaV3BcGN5bAo,19191
 ipex_llm/transformers/lisa.py,sha256=F5WxbtXQ7RdKulj83h_2DnEIgKiKGZf7zvOmg6QBl2s,3289
-ipex_llm/transformers/loader.py,sha256=cOgX93xOC-4dt01GTJ5wyd7PjZ8S43r4mctkR2YxVuw,6893
+ipex_llm/transformers/loader.py,sha256=AwjV5RpI2t2bedlv7ZhLm8cfd-QJZm5hny-XyjIvdnk,6876
 ipex_llm/transformers/lookup.py,sha256=b6OlZ9OV10R9qeWw8mVryVpDxszkjwLkldvi7GPMJY8,19614
-ipex_llm/transformers/low_bit_linear.py,sha256=nKraUvZJ7UdXP29HSE4CJPIVxmN-TvG8dpT4gpleuyQ,41688
-ipex_llm/transformers/model.py,sha256=KcRjkauGg48BYrUBoUZaVMpg7Piuz5JrfIpVZd3EIjs,41105
+ipex_llm/transformers/low_bit_linear.py,sha256=lPIvDuRoS0zusiJ6vw_fOTJgK5ylh4CuD3U-qs8ih4Y,40869
+ipex_llm/transformers/model.py,sha256=fj7LBjrWtWwDJJYXnWiXsLGS4ayqqHfnh0p51dSDssE,40908
 ipex_llm/transformers/modelling_bigdl.py,sha256=7JpNVMuyq_OmtNUaMFMXdxPWZp2q0QHC02QeA-VTPOw,6709
 ipex_llm/transformers/npu_model.py,sha256=YW02GeVz-9ZGqxAeSz0AOvciS-17bo9eK5ZOBrICwSQ,39508
 ipex_llm/transformers/patches.py,sha256=halPWm__ORh2fRFSIFPiCNg3LQBfrRkTPtmtRpBJCZQ,1286
 ipex_llm/transformers/pipeline_parallel.py,sha256=uNZpOXljNmdoEYnP8U-VFiN4dRZb2piQbIf2bG9LQnE,49051
 ipex_llm/transformers/qlora.py,sha256=jtPGsvWFjbTUGzDBCdfftnCis_0nJQNRpACSwXUbbGU,14943
 ipex_llm/transformers/relora.py,sha256=-dYzUV0P-IhO2jFdnzN9-v_sFzJpRj3ZwN9eCJzOoCw,16567
-ipex_llm/transformers/speculative.py,sha256=Zf1nQb5GXpJQrUHBTL-H4RUBfdv3lGhfehzudHimhYk,64109
+ipex_llm/transformers/speculative.py,sha256=0XNLgc9dGswJHVPrXo4iM7pPxkWwfFfJMECcivJSnIc,63368
 ipex_llm/transformers/streamer.py,sha256=RrVlLblzCOtABRUpaMXAyaMnCGgLUtAi_YesLumRbww,4842
 ipex_llm/transformers/training_patch.py,sha256=oxMkUtqyvqJiprw6dE3skkYfD1HOmUlH9N0hBkbn0G0,10799
-ipex_llm/transformers/utils.py,sha256=fXLIlr9hoBr27p3w3xzczZGPk2cCTIRbUKBkiVCGYbc,16889
+ipex_llm/transformers/utils.py,sha256=7syzq4jnEo-mWmS9E2VZ2GcFrjojWI8E7Hcx0tloifg,16996
 ipex_llm/transformers/xpu_customize_fwd.py,sha256=wFpIhs5F6tkNs8gBOrLxWdhLzO3EDHovVkERPIAoAvg,7611
 ipex_llm/transformers/xpu_ops.py,sha256=H46-69pMRQhekbAEoDfNacCInLWycMHDqrgMGLvFYfI,4362
 ipex_llm/transformers/awq/__init__.py,sha256=Du5gu3-eeAkeDO_dEMBTzrDBA66DSN3uL3-rn8WGXQw,875
@@ -151,7 +151,6 @@ ipex_llm/transformers/models/gemma2.py,sha256=2WZuv-FLzJyTJFaYxOuzJt47QE64M0lHnz
 ipex_llm/transformers/models/glm.py,sha256=gHYgfn20jPRL-ElXy-rUqMh6_LQcc5x7DEXSZuRA4E0,7094
 ipex_llm/transformers/models/gpt2.py,sha256=YSaNgK1uLCFDuIFqnKO0Mi-AsOZsYav-7pNf_NpKGdM,3445
 ipex_llm/transformers/models/gptbigcode.py,sha256=cP1_qGWoa43R2WacAMblShjku4QupcCZiLaPPAoOUs4,9101
-ipex_llm/transformers/models/gptj.py,sha256=TTIx461X2nOcIkrAcZhEf7d7mjJ3yvEC9KLVc1-hrpc,17973
 ipex_llm/transformers/models/gptneox.py,sha256=loRh1x_5S6BCeOr_s5xr-N_1SQHL3Y5IiUBAEyoMUqQ,6172
 ipex_llm/transformers/models/internlm.py,sha256=ZbIUMDwNRcrCeduXfbA_uq1AUEWawEt6CJRvQl3LkAg,17832
 ipex_llm/transformers/models/internvl.py,sha256=Vx0vENIEQLX2M6P398mw5TOhpks0U8xf8rtRQvy94go,8154
@@ -175,7 +174,7 @@ ipex_llm/transformers/models/rwkv5.py,sha256=OkRNj1pCAZg1z2Fw-I0DEnxLEdZyPeRSQ6m
 ipex_llm/transformers/models/sd.py,sha256=VvHV5u-0k2MgHu3NL9113hPj7DgfxqctuKzEEeNfRDU,5981
 ipex_llm/transformers/models/stablelm.py,sha256=RGQCYuQhYqtZ1j3RZkYi0_QvCRnUgUIPYxfBcLnElzg,6885
 ipex_llm/transformers/models/starcoder2.py,sha256=4P3mhRYf2Kreb1ESjrQGfy1puLMmZXgV35zf-Tksvao,6462
-ipex_llm/transformers/models/utils.py,sha256=Qbz7UkYSbsM5bodH2445O0-JF50Mu3UEwW0j2ZNxHSU,15997
+ipex_llm/transformers/models/utils.py,sha256=85rGIzGZvWe3JjYpWcuc1nfzI_tn_zFcdZpWivxJkl0,15457
 ipex_llm/transformers/models/yuan.py,sha256=1jRPebwAK2ENbyYokOmb4LSVo-szucWiygz9zTv-scs,7656
 ipex_llm/transformers/npu_models/__init__.py,sha256=ulEUGLjaP48LCrVeury3UxLjXxKzRi0UpSG4bYu-7f8,585
 ipex_llm/transformers/npu_models/baichuan.py,sha256=fJtd7fBrttySghRUgfZTAdxLjsSNC-XL08HISsXigLE,4685
@@ -244,11 +243,11 @@ ipex_llm/vllm/xpu/engine/__init__.py,sha256=pY_CpyuZd72fr6s32ejeKHKFW0K4vUU2rzZj
 ipex_llm/vllm/xpu/engine/engine.py,sha256=k4-D27WS_Gk3mA--w3HWAjPjb4Aiu043MVPi0ZoAUBc,5984
 ipex_llm/vllm/xpu/entrypoints/openai/api_server.py,sha256=GshTZFB8e4PWvqckfbmTOU6b0oLkNn7A-vzLuG9--j8,21544
 ipex_llm/vllm/xpu/entrypoints/openai/cli_args.py,sha256=2rENA2ucynMaIjiZBEh2ez1o5vR32GaP514t39CD7KM,8676
-ipex_llm-2.2.0b20250105.post0.data/scripts/ipex-llm-init.bat,sha256=HPtCYuDYwEatq7dAwOvdfVcHYCpAVdbj75K1qh0vQek,2578
-ipex_llm-2.2.0b20250105.post0.data/scripts/llm-chat.ps1,sha256=6qrs-hGVAV8IKh7Jx8nq_XrnZcjd7qGU5wndArM7Yag,2769
-ipex_llm-2.2.0b20250105.post0.data/scripts/llm-cli.ps1,sha256=3qBtTLs_EjYDnM8YyCpJhzLnGCKTEGssu9UNqfkjVXs,3009
-ipex_llm-2.2.0b20250105.post0.dist-info/METADATA,sha256=-eNpo4zm9w1DQqVCTFi228urj8ylbTuXml4uNwlEP3E,12825
-ipex_llm-2.2.0b20250105.post0.dist-info/WHEEL,sha256=6iYPr8vTHsyDK75jr9X0V3I9wPSVmtwr_8fdATBciGk,98
-ipex_llm-2.2.0b20250105.post0.dist-info/entry_points.txt,sha256=TiUyBB2MRmfF3ko-pyAEzqeBCRnyhu27bNOAsWPp3e8,61
-ipex_llm-2.2.0b20250105.post0.dist-info/top_level.txt,sha256=CGCMHM-SyqUabU4h8RqJ2KTYckQUO3LvIWwmUQ6Qbzw,9
-ipex_llm-2.2.0b20250105.post0.dist-info/RECORD,,
+ipex_llm-2.2.0b20250106.post1.data/scripts/ipex-llm-init.bat,sha256=HPtCYuDYwEatq7dAwOvdfVcHYCpAVdbj75K1qh0vQek,2578
+ipex_llm-2.2.0b20250106.post1.data/scripts/llm-chat.ps1,sha256=6qrs-hGVAV8IKh7Jx8nq_XrnZcjd7qGU5wndArM7Yag,2769
+ipex_llm-2.2.0b20250106.post1.data/scripts/llm-cli.ps1,sha256=3qBtTLs_EjYDnM8YyCpJhzLnGCKTEGssu9UNqfkjVXs,3009
+ipex_llm-2.2.0b20250106.post1.dist-info/METADATA,sha256=I0vPU5mDtPZR3wpY87fYHdn6r14U0T50NWGP7EsF5s8,12825
+ipex_llm-2.2.0b20250106.post1.dist-info/WHEEL,sha256=6iYPr8vTHsyDK75jr9X0V3I9wPSVmtwr_8fdATBciGk,98
+ipex_llm-2.2.0b20250106.post1.dist-info/entry_points.txt,sha256=TiUyBB2MRmfF3ko-pyAEzqeBCRnyhu27bNOAsWPp3e8,61
+ipex_llm-2.2.0b20250106.post1.dist-info/top_level.txt,sha256=CGCMHM-SyqUabU4h8RqJ2KTYckQUO3LvIWwmUQ6Qbzw,9
+ipex_llm-2.2.0b20250106.post1.dist-info/RECORD,,

ipex_llm/transformers/models/gptj.py DELETED Viewed

@@ -1,441 +0,0 @@
-#
-# Copyright 2016 The BigDL Authors.
-#
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-#     http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-#
-# This file is adapted from
-# https://github.com/huggingface/transformers/blob/main/src/transformers/models/gptj/modeling_gptj.py
-#
-import torch
-from typing import Optional, Tuple, Union
-from ipex_llm.transformers.models.utils import init_kv_cache, extend_kv_cache, \
-    apply_rotary_pos_emb, append_kv_cache, apply_ipex_rotate_every_two
-from transformers.utils.import_utils import is_torch_fx_proxy
-from transformers.modeling_outputs import BaseModelOutputWithPast
-from transformers.models.gptj.modeling_gptj import GPTJModel
-from ipex_llm.utils.common import invalidInputError
-import os
-KV_CACHE_ALLOC_BLOCK_LENGTH = int(os.environ.get("KV_CACHE_ALLOC_BLOCK_LENGTH", 256))
-def _get_embed_positions(self, position_ids):
-    embed_positions = self.embed_positions
-    if embed_positions.device != position_ids.device:
-        embed_positions = embed_positions.to(position_ids.device)
-        self.embed_positions = embed_positions
-    return embed_positions.repeat(position_ids.shape[0], 1, 1)
-def _attn(
-    self,
-    query,
-    key,
-    value,
-    attention_mask=None,
-    head_mask=None,
-):
-    # compute causal mask from causal mask buffer
-    query_length, key_length = query.size(-2), key.size(-2)
-    causal_mask = self.bias[:, :, key_length - query_length: key_length, :key_length]
-    # Keep the attention weights computation in fp32 to avoid overflow issues
-    query = query.to(torch.float32)
-    key = key.to(torch.float32)
-    attn_weights = torch.matmul(query, key.transpose(-1, -2))
-    mask_value = torch.finfo(attn_weights.dtype).min
-    # Need to be a tensor, otherwise we get error:
-    # `RuntimeError: expected scalar type float but found double`.
-    # Need to be on the same device, otherwise `RuntimeError: ..., x and y to be on the same device`
-    mask_value = torch.tensor(mask_value, dtype=attn_weights.dtype).to(attn_weights.device)
-    attn_weights = torch.where(causal_mask, attn_weights, mask_value)
-    attn_weights = attn_weights / self.scale_attn
-    if attention_mask is not None:
-        # Apply the attention mask
-        attn_weights = attn_weights + attention_mask
-    attn_weights = nn.functional.softmax(attn_weights, dim=-1)
-    attn_weights = attn_weights.to(value.dtype)
-    attn_weights = self.attn_dropout(attn_weights)
-    # Mask heads if we want to
-    if head_mask is not None:
-        attn_weights = attn_weights * head_mask
-    attn_output = torch.matmul(attn_weights, value)
-    return attn_output, attn_weights
-def gptj_attention_forward(
-    self,
-    hidden_states: torch.FloatTensor,
-    layer_past: Optional[Tuple[torch.Tensor]] = None,
-    attention_mask: Optional[torch.FloatTensor] = None,
-    position_ids: Optional[torch.LongTensor] = None,
-    head_mask: Optional[torch.FloatTensor] = None,
-    use_cache: Optional[bool] = False,
-    rotary_emb: Optional[Tuple]=None,
-    output_attentions: Optional[bool] = False,
-) -> Union[
-    Tuple[torch.Tensor, Tuple[torch.Tensor]],
-    Optional[Tuple[torch.Tensor, Tuple[torch.Tensor], Tuple[torch.Tensor, ...]]],
-]:
-    query = self.q_proj(hidden_states)
-    key = self.k_proj(hidden_states)
-    value = self.v_proj(hidden_states)
-    query = self._split_heads(query, self.num_attention_heads, self.head_dim, True)
-    key = self._split_heads(key, self.num_attention_heads, self.head_dim, True)
-    value = self._split_heads(value, self.num_attention_heads, self.head_dim, False)
-    sin, cos = rotary_emb
-    use_fuse_rope = hidden_states.device.type == "xpu" and not self.training
-    if self.rotary_dim is not None:
-        k_rot = key[:, :, :, : self.rotary_dim]
-        q_rot = query[:, :, :, : self.rotary_dim]
-        if use_fuse_rope:
-            apply_ipex_rotate_every_two(q_rot, k_rot, cos, sin)
-        else:
-            k_pass = key[:, :, :, self.rotary_dim:]
-            q_pass = query[:, :, :, self.rotary_dim:]
-            q_rot, k_rot = apply_rotary_pos_emb(q_rot, k_rot, cos, sin, position_ids, "gptj")
-            key = torch.cat([k_rot, k_pass], dim=-1)
-            query = torch.cat([q_rot, q_pass], dim=-1)
-    else:
-        if use_fuse_rope:
-            apply_ipex_rotate_every_two(query, key, cos, sin)
-        else:
-            query, key = apply_rotary_pos_emb(query, key, cos, sin, position_ids, "gptj")
-    batch_size, q_len, _ = hidden_states.size()
-    key = key.permute(0, 2, 1, 3).contiguous()
-    query = query.permute(0, 2, 1, 3).contiguous()
-    kv_seq_len = key.size(-2)
-    device = hidden_states.device
-    if layer_past is not None:
-        kv_seq_len += layer_past[0].size(2)
-    if layer_past is not None:
-        cache_k = layer_past[0]
-        cache_v = layer_past[1]
-        past_length = cache_k.size(2)
-        if cache_k.stride()[1] < kv_seq_len * cache_k.size(3):
-            new_cache_k, new_cache_v = extend_kv_cache(batch_size,
-                                                       self.num_attention_heads,
-                                                       self.head_dim,
-                                                       past_length,
-                                                       kv_seq_len + KV_CACHE_ALLOC_BLOCK_LENGTH,
-                                                       dtype=cache_v.dtype,
-                                                       device=device)
-            new_cache_k[:] = cache_k
-            new_cache_v[:] = cache_v
-            cache_k = new_cache_k
-            cache_v = new_cache_v
-        key, value = append_kv_cache(cache_k, cache_v, key, value)
-    elif use_cache:
-        key_cache, value_cache = init_kv_cache(batch_size,
-                                               self.num_attention_heads,
-                                               self.head_dim,
-                                               kv_seq_len,
-                                               kv_seq_len + KV_CACHE_ALLOC_BLOCK_LENGTH,
-                                               dtype=value.dtype,
-                                               device=device)
-        key_cache[:] = key
-        value_cache[:] = value
-        key = key_cache
-        value = value_cache
-    if use_cache is True:
-        present = (key, value)
-    else:
-        present = None
-    # compute self-attention: V x Softmax(QK^T)
-    attn_output, attn_weights = self._attn(query, key, value, attention_mask, head_mask)
-    attn_output = self._merge_heads(attn_output, self.num_attention_heads, self.head_dim)
-    attn_output = self.out_proj(attn_output)
-    attn_output = self.resid_dropout(attn_output)
-    outputs = (attn_output, present)
-    if output_attentions:
-        outputs += (attn_weights,)
-    return outputs  # a, present, (attentions)
-def gptj_block_forward(
-    self,
-    hidden_states: Optional[torch.FloatTensor],
-    layer_past: Optional[Tuple[torch.Tensor]] = None,
-    attention_mask: Optional[torch.FloatTensor] = None,
-    position_ids: Optional[torch.LongTensor] = None,
-    head_mask: Optional[torch.FloatTensor] = None,
-    use_cache: Optional[bool] = False,
-    rotary_emb: Optional[Tuple]=None,
-    output_attentions: Optional[bool] = False,
-) -> Union[Tuple[torch.Tensor], Optional[Tuple[torch.Tensor, Tuple[torch.FloatTensor, ...]]]]:
-    residual = hidden_states
-    hidden_states = self.ln_1(hidden_states)
-    attn_outputs = self.attn(
-        hidden_states=hidden_states,
-        layer_past=layer_past,
-        attention_mask=attention_mask,
-        position_ids=position_ids,
-        head_mask=head_mask,
-        use_cache=use_cache,
-        rotary_emb=rotary_emb,
-        output_attentions=output_attentions,
-    )
-    attn_output = attn_outputs[0]  # output_attn: a, present, (attentions)
-    outputs = attn_outputs[1:]
-    feed_forward_hidden_states = self.mlp(hidden_states)
-    hidden_states = attn_output + feed_forward_hidden_states + residual
-    if use_cache:
-        outputs = (hidden_states,) + outputs
-    else:
-        outputs = (hidden_states,) + outputs[1:]
-    return outputs  # hidden_states, present, (attentions)
-def create_sinusoidal_positions(num_pos: int, dim: int) -> torch.Tensor:
-    inv_freq = 1.0 / (10000 ** (torch.arange(0, dim, 2) / dim))
-    sinusoid_inp = torch.einsum("i , j -> i j",
-                                torch.arange(num_pos, dtype=torch.float), inv_freq).float()
-    return torch.cat((torch.sin(sinusoid_inp), torch.cos(sinusoid_inp)), dim=1)
-old_init = GPTJModel.__init__
-def gptj_model_new_init(self, config):
-    old_init(self, config)
-    embed_dim = config.hidden_size
-    rotary_dim = config.rotary_dim
-    pos_embd_dim = rotary_dim or embed_dim
-    max_positions = config.max_position_embeddings
-    self.embed_positions = create_sinusoidal_positions(max_positions, pos_embd_dim)
-def get_new_embed_positions(position_ids, prev_embed_positions):
-    embed_positions = prev_embed_positions
-    if embed_positions.device != position_ids.device:
-        embed_positions = embed_positions.to(position_ids.device)
-        prev_embed_positions = embed_positions
-    return embed_positions.repeat(position_ids.shape[0], 1, 1), prev_embed_positions
-def gptj_model_forward(
-    self,
-    input_ids: Optional[torch.LongTensor] = None,
-    past_key_values: Optional[Tuple[Tuple[torch.Tensor]]] = None,
-    attention_mask: Optional[torch.FloatTensor] = None,
-    token_type_ids: Optional[torch.LongTensor] = None,
-    position_ids: Optional[torch.LongTensor] = None,
-    head_mask: Optional[torch.FloatTensor] = None,
-    inputs_embeds: Optional[torch.FloatTensor] = None,
-    use_cache: Optional[bool] = None,
-    output_attentions: Optional[bool] = None,
-    output_hidden_states: Optional[bool] = None,
-    return_dict: Optional[bool] = None,
-) -> Union[Tuple, BaseModelOutputWithPast]:
-    output_attentions = output_attentions if output_attentions is not None \
-        else self.config.output_attentions
-    output_hidden_states = (
-        output_hidden_states if output_hidden_states is not None
-        else self.config.output_hidden_states
-    )
-    use_cache = use_cache if use_cache is not None else self.config.use_cache
-    return_dict = return_dict if return_dict is not None else self.config.use_return_dict
-    if input_ids is not None and inputs_embeds is not None:
-        invalidInputError(False,
-                          "You cannot specify both input_ids and inputs_embeds at the same time")
-    elif input_ids is not None:
-        self.warn_if_padding_and_no_attention_mask(input_ids, attention_mask)
-        input_shape = input_ids.size()
-        input_ids = input_ids.view(-1, input_shape[-1])
-        batch_size = input_ids.shape[0]
-    elif inputs_embeds is not None:
-        input_shape = inputs_embeds.size()[:-1]
-        batch_size = inputs_embeds.shape[0]
-    else:
-        invalidInputError(False, "You have to specify either input_ids or inputs_embeds")
-    device = input_ids.device if input_ids is not None else inputs_embeds.device
-    if token_type_ids is not None:
-        token_type_ids = token_type_ids.view(-1, input_shape[-1])
-    if past_key_values is None:
-        past_length = 0
-        past_key_values = tuple([None] * len(self.h))
-    else:
-        past_length = past_key_values[0][0].size(-2)
-    if position_ids is None:
-        position_ids = torch.arange(past_length, input_shape[-1] + past_length,
-                                    dtype=torch.long, device=device)
-        position_ids = position_ids.unsqueeze(0)
-    # Attention mask.
-    if attention_mask is not None:
-        if batch_size <= 0:
-            invalidInputError(False, "batch_size has to be defined and > 0")
-        attention_mask = attention_mask.view(batch_size, -1)
-        # We create a 3D attention mask from a 2D tensor mask.
-        # Sizes are [batch_size, 1, 1, to_seq_length]
-        # So we can broadcast to [batch_size, num_heads, from_seq_length, to_seq_length]
-        # this attention mask is more simple than the triangular masking of causal attention
-        # used in OpenAI GPT, we just need to prepare the broadcast dimension here.
-        attention_mask = attention_mask[:, None, None, :]
-        # Since attention_mask is 1.0 for positions we want to attend and 0.0 for
-        # masked positions, this operation will create a tensor which is 0.0 for
-        # positions we want to attend and the dtype's smallest value for masked positions.
-        # Since we are adding it to the raw scores before the softmax, this is
-        # effectively the same as removing these entirely.
-        attention_mask = attention_mask.to(dtype=self.dtype)  # fp16 compatibility
-        attention_mask = (1.0 - attention_mask) * torch.finfo(self.dtype).min
-    # Prepare head mask if needed
-    # 1.0 in head_mask indicate we keep the head
-    # attention_probs has shape bsz x num_attention_heads x N x N
-    # head_mask has shape n_layer x batch x num_attention_heads x N x N
-    head_mask = self.get_head_mask(head_mask, self.config.n_layer)
-    if inputs_embeds is None:
-        inputs_embeds = self.wte(input_ids)
-    hidden_states = inputs_embeds
-    if token_type_ids is not None:
-        token_type_embeds = self.wte(token_type_ids)
-        hidden_states = hidden_states + token_type_embeds
-    hidden_states = self.drop(hidden_states)
-    output_shape = (-1,) + input_shape[1:] + (hidden_states.size(-1),)
-    if self.gradient_checkpointing and self.training:
-        if use_cache:
-            logger.warning_once(
-                "`use_cache=True` is incompatible with gradient checkpointing."
-                "Setting `use_cache=False`..."
-            )
-            use_cache = False
-    presents = () if use_cache else None
-    all_self_attentions = () if output_attentions else None
-    all_hidden_states = () if output_hidden_states else None
-    # Repeat cos sin here, call only once for each token.
-    # If put this to attension forward, it will generate too many times.
-    if is_torch_fx_proxy(position_ids) or torch.jit.is_tracing():
-        # The logic to conditionally copy to GPU could not be traced, so we do this
-        # every time in the torch.fx case
-        embed_positions = get_embed_positions(self.embed_positions, position_ids)
-    else:
-        embed_positions, self.embed_positions = get_new_embed_positions(position_ids,
-                                                                        self.embed_positions)
-    repeated_position_ids = position_ids.unsqueeze(-1).repeat(1, 1, embed_positions.shape[-1])
-    sincos = torch.gather(embed_positions, 1, repeated_position_ids)
-    sin, cos = torch.split(sincos, sincos.shape[-1] // 2, dim=-1)
-    sin = torch.repeat_interleave(sin[:, :, None, :], 2, 3)
-    cos = torch.repeat_interleave(cos[:, :, None, :], 2, 3)
-    for i, (block, layer_past) in enumerate(zip(self.h, past_key_values)):
-        # Model parallel
-        if self.model_parallel:
-            torch.cuda.set_device(hidden_states.device)
-            # Ensure layer_past is on same device as hidden_states (might not be correct)
-            if layer_past is not None:
-                layer_past = tuple(past_state.to(hidden_states.device) for past_state in layer_past)
-            # Ensure that attention_mask is always on the same device as hidden_states
-            if attention_mask is not None:
-                attention_mask = attention_mask.to(hidden_states.device)
-            if isinstance(head_mask, torch.Tensor):
-                head_mask = head_mask.to(hidden_states.device)
-        if output_hidden_states:
-            all_hidden_states = all_hidden_states + (hidden_states,)
-        if self.gradient_checkpointing and self.training:
-            outputs = self._gradient_checkpointing_func(
-                block.__call__,
-                hidden_states,
-                None,
-                attention_mask,
-                position_ids,
-                head_mask[i],
-                use_cache,
-                output_attentions,
-            )
-        else:
-            outputs = block(
-                hidden_states=hidden_states,
-                layer_past=layer_past,
-                attention_mask=attention_mask,
-                position_ids=position_ids,
-                head_mask=head_mask[i],
-                use_cache=use_cache,
-                rotary_emb=(sin, cos),
-                output_attentions=output_attentions,
-            )
-        hidden_states = outputs[0]
-        if use_cache is True:
-            presents = presents + (outputs[1],)
-        if output_attentions:
-            all_self_attentions = all_self_attentions + (outputs[2 if use_cache else 1],)
-        # Model Parallel: If it's the last layer for that device, put things on the next device
-        if self.model_parallel:
-            for k, v in self.device_map.items():
-                if i == v[-1] and "cuda:" + str(k) != self.last_device:
-                    hidden_states = hidden_states.to("cuda:" + str(k + 1))
-    hidden_states = self.ln_f(hidden_states)
-    hidden_states = hidden_states.view(output_shape)
-    # Add last hidden state
-    if output_hidden_states:
-        all_hidden_states = all_hidden_states + (hidden_states,)
-    if not return_dict:
-        return tuple(v for v in [hidden_states, presents, all_hidden_states, all_self_attentions]
-                     if v is not None)
-    return BaseModelOutputWithPast(
-        last_hidden_state=hidden_states,
-        past_key_values=presents,
-        hidden_states=all_hidden_states,
-        attentions=all_self_attentions,
-    )

{ipex_llm-2.2.0b20250105.post0.data → ipex_llm-2.2.0b20250106.post1.data}/scripts/ipex-llm-init.bat RENAMED Viewed

File without changes

{ipex_llm-2.2.0b20250105.post0.data → ipex_llm-2.2.0b20250106.post1.data}/scripts/llm-chat.ps1 RENAMED Viewed

File without changes

{ipex_llm-2.2.0b20250105.post0.data → ipex_llm-2.2.0b20250106.post1.data}/scripts/llm-cli.ps1 RENAMED Viewed

File without changes

{ipex_llm-2.2.0b20250105.post0.dist-info → ipex_llm-2.2.0b20250106.post1.dist-info}/WHEEL RENAMED Viewed

File without changes

{ipex_llm-2.2.0b20250105.post0.dist-info → ipex_llm-2.2.0b20250106.post1.dist-info}/entry_points.txt RENAMED Viewed

File without changes

{ipex_llm-2.2.0b20250105.post0.dist-info → ipex_llm-2.2.0b20250106.post1.dist-info}/top_level.txt RENAMED Viewed

File without changes