PyPI - ipex-llm - Versions diffs - 2.2.0b20250207__py3-none-win_amd64.whl → 2.2.0b20250208__py3-none-win_amd64.whl - Mend

ipex-llm 2.2.0b20250207__py3-none-win_amd64.whl → 2.2.0b20250208__py3-none-win_amd64.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (41) hide show

ipex_llm/libs/bloom-api.dll +0 -0
ipex_llm/libs/bloom.dll +0 -0
ipex_llm/libs/gptneox-api.dll +0 -0
ipex_llm/libs/gptneox.dll +0 -0
ipex_llm/libs/libbloom_avx.dll +0 -0
ipex_llm/libs/libbloom_vnni.dll +0 -0
ipex_llm/libs/libgptneox_avx.dll +0 -0
ipex_llm/libs/libgptneox_vnni.dll +0 -0
ipex_llm/libs/libllama_avx.dll +0 -0
ipex_llm/libs/libllama_vnni.dll +0 -0
ipex_llm/libs/libstarcoder_avx.dll +0 -0
ipex_llm/libs/libstarcoder_vnni.dll +0 -0
ipex_llm/libs/llama-api.dll +0 -0
ipex_llm/libs/llama.dll +0 -0
ipex_llm/libs/main-bloom.exe +0 -0
ipex_llm/libs/main-gptneox.exe +0 -0
ipex_llm/libs/main-llama.exe +0 -0
ipex_llm/libs/main-starcoder.exe +0 -0
ipex_llm/libs/pipeline.dll +0 -0
ipex_llm/libs/quantize-bloom.exe +0 -0
ipex_llm/libs/quantize-bloom_vnni.exe +0 -0
ipex_llm/libs/quantize-gptneox.exe +0 -0
ipex_llm/libs/quantize-gptneox_vnni.exe +0 -0
ipex_llm/libs/quantize-llama.exe +0 -0
ipex_llm/libs/quantize-llama_vnni.exe +0 -0
ipex_llm/libs/quantize-starcoder.exe +0 -0
ipex_llm/libs/quantize-starcoder_vnni.exe +0 -0
ipex_llm/libs/starcoder-api.dll +0 -0
ipex_llm/libs/starcoder.dll +0 -0
ipex_llm/transformers/npu_pipeline_model/convert_pipeline.py +23 -21
ipex_llm/transformers/npu_pipeline_model/llama.py +11 -7
ipex_llm/transformers/npu_pipeline_model/minicpm.py +10 -6
ipex_llm/transformers/npu_pipeline_model/qwen.py +11 -4
{ipex_llm-2.2.0b20250207.dist-info → ipex_llm-2.2.0b20250208.dist-info}/METADATA +19 -19
{ipex_llm-2.2.0b20250207.dist-info → ipex_llm-2.2.0b20250208.dist-info}/RECORD +41 -41
{ipex_llm-2.2.0b20250207.data → ipex_llm-2.2.0b20250208.data}/scripts/ipex-llm-init.bat +0 -0
{ipex_llm-2.2.0b20250207.data → ipex_llm-2.2.0b20250208.data}/scripts/llm-chat.ps1 +0 -0
{ipex_llm-2.2.0b20250207.data → ipex_llm-2.2.0b20250208.data}/scripts/llm-cli.ps1 +0 -0
{ipex_llm-2.2.0b20250207.dist-info → ipex_llm-2.2.0b20250208.dist-info}/WHEEL +0 -0
{ipex_llm-2.2.0b20250207.dist-info → ipex_llm-2.2.0b20250208.dist-info}/entry_points.txt +0 -0
{ipex_llm-2.2.0b20250207.dist-info → ipex_llm-2.2.0b20250208.dist-info}/top_level.txt +0 -0

ipex_llm/libs/bloom-api.dll CHANGED Viewed

Binary file

ipex_llm/libs/bloom.dll CHANGED Viewed

Binary file

ipex_llm/libs/gptneox-api.dll CHANGED Viewed

Binary file

ipex_llm/libs/gptneox.dll CHANGED Viewed

Binary file

ipex_llm/libs/libbloom_avx.dll CHANGED Viewed

Binary file

ipex_llm/libs/libbloom_vnni.dll CHANGED Viewed

Binary file

ipex_llm/libs/libgptneox_avx.dll CHANGED Viewed

Binary file

ipex_llm/libs/libgptneox_vnni.dll CHANGED Viewed

Binary file

ipex_llm/libs/libllama_avx.dll CHANGED Viewed

Binary file

ipex_llm/libs/libllama_vnni.dll CHANGED Viewed

Binary file

ipex_llm/libs/libstarcoder_avx.dll CHANGED Viewed

Binary file

ipex_llm/libs/libstarcoder_vnni.dll CHANGED Viewed

Binary file

ipex_llm/libs/llama-api.dll CHANGED Viewed

Binary file

ipex_llm/libs/llama.dll CHANGED Viewed

Binary file

ipex_llm/libs/main-bloom.exe CHANGED Viewed

Binary file

ipex_llm/libs/main-gptneox.exe CHANGED Viewed

Binary file

ipex_llm/libs/main-llama.exe CHANGED Viewed

Binary file

ipex_llm/libs/main-starcoder.exe CHANGED Viewed

Binary file

ipex_llm/libs/pipeline.dll CHANGED Viewed

Binary file

ipex_llm/libs/quantize-bloom.exe CHANGED Viewed

Binary file

ipex_llm/libs/quantize-bloom_vnni.exe CHANGED Viewed

Binary file

ipex_llm/libs/quantize-gptneox.exe CHANGED Viewed

Binary file

ipex_llm/libs/quantize-gptneox_vnni.exe CHANGED Viewed

Binary file

ipex_llm/libs/quantize-llama.exe CHANGED Viewed

Binary file

ipex_llm/libs/quantize-llama_vnni.exe CHANGED Viewed

Binary file

ipex_llm/libs/quantize-starcoder.exe CHANGED Viewed

Binary file

ipex_llm/libs/quantize-starcoder_vnni.exe CHANGED Viewed

Binary file

ipex_llm/libs/starcoder-api.dll CHANGED Viewed

Binary file

ipex_llm/libs/starcoder.dll CHANGED Viewed

Binary file

ipex_llm/transformers/npu_pipeline_model/convert_pipeline.py CHANGED Viewed

@@ -201,7 +201,7 @@ def convert_llm(model: torch.nn.Module,
                 keep_ir: bool=False,
                 compile_blob: bool=True):
     # whether to set layernorm weight as const
-    layernorm_const = os.environ.get("IPEX_LLM_NPU_LAYERNORM_CONST", "1") == "1"
+    const_parameter = os.environ.get("IPEX_LLM_NPU_CONST_PARAMETER", "1") == "1"
     if group_size == 0:
         n_splits_linear = 1
         if qtype in ["sym_int8_rtn", "asym_int4_rtn"]:
@@ -240,7 +240,7 @@ def convert_llm(model: torch.nn.Module,
             for layer_idx in range(0, layer_num):
                 param_list.append((model, layer_idx, n_splits_linear, n_splits_down_proj,
                                    temp_dir, weight_dir, transpose_value_cache, kv_len, group_size,
-                                   layernorm_const))
+                                   const_parameter))
             with Pool() as pool:
                 result = pool.starmap(convert_llama_layer, param_list)
@@ -267,7 +267,7 @@ def convert_llm(model: torch.nn.Module,
                 res = InitLLMPipeline(model_type, kv_len, model.num_head, model.head_dim, layer_num,
                                       model.vocab_size, weight_dir, "model",
                                       first_blob_path, last_blob_path,
-                                      os.path.join(temp_dir, "decoder_layer"), layernorm_const)
+                                      os.path.join(temp_dir, "decoder_layer"), const_parameter)
             except:
                 invalidInputError(False,
                                   "False to InitLLMPipeline.")
@@ -284,7 +284,7 @@ def convert_llm(model: torch.nn.Module,
             for layer_idx in range(0, layer_num):
                 param_list.append((model, layer_idx, n_splits_linear, n_splits_down_proj,
                                   temp_dir, weight_dir, transpose_value_cache, kv_len, group_size,
-                                  layernorm_const))
+                                  const_parameter))
             with Pool() as pool:
                 result = pool.starmap(convert_baichuan_layer, param_list)
@@ -308,7 +308,7 @@ def convert_llm(model: torch.nn.Module,
                 res = InitLLMPipeline("baichuan", kv_len, model.num_head, model.head_dim, layer_num,
                                       model.vocab_size, weight_dir, "model",
                                       first_blob_path, last_blob_path,
-                                      os.path.join(temp_dir, "decoder_layer"), layernorm_const)
+                                      os.path.join(temp_dir, "decoder_layer"), const_parameter)
             except:
                 invalidInputError(False,
                                   "False to InitLLMPipeline.")
@@ -325,7 +325,7 @@ def convert_llm(model: torch.nn.Module,
             for layer_idx in range(0, layer_num):
                 param_list.append((model, layer_idx, n_splits_linear, n_splits_down_proj,
                                    temp_dir, weight_dir, transpose_value_cache, kv_len, group_size,
-                                   layernorm_const))
+                                   const_parameter))
             with Pool() as pool:
                 result = pool.starmap(convert_minicpm_layer, param_list)
@@ -348,12 +348,12 @@ def convert_llm(model: torch.nn.Module,
                 res = InitLLMPipeline("minicpm", kv_len, model.num_head, model.head_dim, layer_num,
                                       model.vocab_size, weight_dir, "model",
                                       first_blob_path, last_blob_path,
-                                      os.path.join(temp_dir, "decoder_layer"), layernorm_const)
+                                      os.path.join(temp_dir, "decoder_layer"), const_parameter)
             except:
                 invalidInputError(False,
                                   "False to InitLLMPipeline.")
     elif model.config.model_type == "qwen2":
-        layernorm_const = os.environ.get("IPEX_LLM_NPU_LAYERNORM_CONST", "0") == "1"
+        const_parameter = os.environ.get("IPEX_LLM_NPU_CONST_PARAMETER", "0") == "1"
         with tempfile.TemporaryDirectory() as temp_dir:
             if save_directory is not None:
                 temp_dir = save_directory
@@ -371,7 +371,7 @@ def convert_llm(model: torch.nn.Module,
             for layer_idx in range(0, layer_num):
                 param_list.append((model, layer_idx, n_splits_linear, n_splits_down_proj,
                                   temp_dir, weight_dir, transpose_value_cache, kv_len, group_size,
-                                  layernorm_const))
+                                  const_parameter))
             with Pool() as pool:
                 result = pool.starmap(convert_qwen_layer, param_list)
@@ -396,7 +396,7 @@ def convert_llm(model: torch.nn.Module,
                                "head_dim": model.head_dim,
                                "transpose_value_cache": transpose_value_cache,
                                "max_prompt_len": max_prompt_len,
-                               "layernorm_const": layernorm_const,
+                               "const_parameter": const_parameter,
                                "group_size":  group_size}
                 model.config.update(update_dict)
                 model.config.save_pretrained(save_directory)
@@ -405,7 +405,7 @@ def convert_llm(model: torch.nn.Module,
                 res = InitLLMPipeline("qwen", kv_len, model.num_head, model.head_dim, layer_num,
                                       model.vocab_size, weight_dir, "model",
                                       first_blob_path, last_blob_path,
-                                      os.path.join(temp_dir, "decoder_layer"), layernorm_const)
+                                      os.path.join(temp_dir, "decoder_layer"), const_parameter)
             except:
                 invalidInputError(False,
                                   "False to InitLLMPipeline.")
@@ -441,7 +441,9 @@ def convert_llm_for_deploy(model: torch.nn.Module,
     weight_dir = os.path.join(save_directory, "model_weights")
     if not os.path.exists(weight_dir):
         os.mkdir(weight_dir)
-    layernorm_const = os.environ.get("IPEX_LLM_NPU_LAYERNORM_CONST", "1") == "1"
+    const_parameter = os.environ.get("IPEX_LLM_NPU_CONST_PARAMETER", "1") == "1"
+    if keep_ir:
+        const_parameter = False
     lm_head_low_bit = getattr(model.config, "bigdl_transformers_low_bit", "sym_int4_rtn")
     if hasattr(model, "lm_head") and not isinstance(model.lm_head, SlicedLMHead):
@@ -472,7 +474,7 @@ def convert_llm_for_deploy(model: torch.nn.Module,
                        "head_dim": model.model.layers[0].self_attn.head_dim,
                        "transpose_value_cache": transpose_value_cache,
                        "max_prompt_len": max_prompt_len,
-                       "layernorm_const": layernorm_const,
+                       "const_parameter": const_parameter,
                        "group_size":  group_size,
                        "fused_layers": fused_layers,
                        "qkv_bias": True,
@@ -490,12 +492,12 @@ def convert_llm_for_deploy(model: torch.nn.Module,
         # save fused_layers blobs of fused decoder layers
         convert_fused_qwen_layer(model, fused_layers, n_splits_linear, n_splits_down_proj,
                                  save_directory, weight_dir, transpose_value_cache, kv_len,
-                                 group_size, layernorm_const, "decode",
+                                 group_size, const_parameter, "decode",
                                  keep_ir=keep_ir, compile_blob=compile_blob)
         # save blob of single prefill layer
         convert_qwen_layer(model, 0, n_splits_linear, n_splits_down_proj,
                            save_directory, weight_dir, transpose_value_cache, max_prompt_len,
-                           group_size, layernorm_const, "prefill",
+                           group_size, const_parameter, "prefill",
                            keep_ir=keep_ir, compile_blob=compile_blob)
         # save blob of lmhead and bin of embedding
         convert_lm_head_and_embedding(model, save_directory, weight_dir, convert_model=True,
@@ -535,7 +537,7 @@ def convert_llm_for_deploy(model: torch.nn.Module,
                        "head_dim": model.model.layers[0].self_attn.head_dim,
                        "transpose_value_cache": transpose_value_cache,
                        "max_prompt_len": max_prompt_len,
-                       "layernorm_const": layernorm_const,
+                       "const_parameter": const_parameter,
                        "group_size":  group_size,
                        "fused_layers": fused_layers,
                        "qkv_bias": False,
@@ -559,12 +561,12 @@ def convert_llm_for_deploy(model: torch.nn.Module,
         # save fused_layers blobs of fused decoder layers
         convert_fused_llama_layer(model, fused_layers, n_splits_linear, n_splits_down_proj,
                                   save_directory, weight_dir, transpose_value_cache, kv_len,
-                                  group_size, layernorm_const, "decode",
+                                  group_size, const_parameter, "decode",
                                   keep_ir=keep_ir, compile_blob=compile_blob)
         # save blob of single prefill layer
         convert_llama_layer(model, 0, n_splits_linear, n_splits_down_proj,
                             save_directory, weight_dir, transpose_value_cache, max_prompt_len,
-                            group_size, layernorm_const, "prefill",
+                            group_size, const_parameter, "prefill",
                             keep_ir=keep_ir, compile_blob=compile_blob)
     elif model.config.model_type == "minicpm":
         if group_size == 0:
@@ -576,7 +578,7 @@ def convert_llm_for_deploy(model: torch.nn.Module,
                        "head_dim": model.model.layers[0].self_attn.head_dim,
                        "transpose_value_cache": transpose_value_cache,
                        "max_prompt_len": max_prompt_len,
-                       "layernorm_const": layernorm_const,
+                       "const_parameter": const_parameter,
                        "group_size":  group_size,
                        "fused_layers": fused_layers,
                        "qkv_bias": False,
@@ -594,12 +596,12 @@ def convert_llm_for_deploy(model: torch.nn.Module,
         # save fused_layers blobs of fused decoder layers
         convert_fused_minicpm_layer(model, fused_layers, n_splits_linear, n_splits_down_proj,
                                     save_directory, weight_dir, transpose_value_cache, kv_len,
-                                    group_size, layernorm_const, "decode",
+                                    group_size, const_parameter, "decode",
                                     keep_ir=keep_ir, compile_blob=compile_blob)
         # save blob of single prefill layer
         convert_minicpm_layer(model, 0, n_splits_linear, n_splits_down_proj,
                               save_directory, weight_dir, transpose_value_cache, max_prompt_len,
-                              group_size, layernorm_const, "prefill",
+                              group_size, const_parameter, "prefill",
                               keep_ir=keep_ir, compile_blob=compile_blob)
         # save blob of lmhead and bin of embedding and embedding_post
         convert_lm_head_and_embedding(model, n_splits_linear,

ipex_llm/transformers/npu_pipeline_model/llama.py CHANGED Viewed

@@ -107,7 +107,7 @@ def convert_lm_head_and_embedding(model, n_splits_linear, temp_dir, weight_dir,
 def convert_llama_layer(model, layer_idx, n_splits_linear, n_splits_down_proj,
                         temp_dir, weight_dir, transpose_value_cache, kv_len, group_size,
-                        layernorm_const, mode="decode",
+                        const_parameter, mode="decode",
                         keep_ir=False, compile_blob=True):
     num_heads = model.model.layers[0].self_attn.num_heads
     num_key_value_heads = model.model.layers[0].self_attn.num_key_value_heads
@@ -145,14 +145,14 @@ def convert_llama_layer(model, layer_idx, n_splits_linear, n_splits_down_proj,
     else:
         input_len = kv_len
         decoder_name = "decoder_layer_prefill"
-        layernorm_const = False
+        const_parameter = False
         keep_position_ids = False
         npu_dpu_groups = 6
     single_decoder = LowBitLlamaMultiDecoderlayer(
         [1, input_len, num_heads * head_dim],
-        input_layernorm_weights=[layer_norm_0] if layernorm_const else None,
-        post_attn_layernorm_weights=[layer_norm_1] if layernorm_const else None,
+        input_layernorm_weights=[layer_norm_0] if const_parameter else None,
+        post_attn_layernorm_weights=[layer_norm_1] if const_parameter else None,
         cached_cos=cached_cos,
         cached_sin=cached_sin,
         num_heads=num_heads,
@@ -182,7 +182,7 @@ def convert_llama_layer(model, layer_idx, n_splits_linear, n_splits_down_proj,
     if mode == "decode":
         if hasattr(curr_layer.self_attn.rotary_emb, "cos_cached"):
             # llama-2-7B & llama-3-8B
-            if layernorm_const:
+            if const_parameter:
                 st_idx = 5
             else:
                 input_lm_bin_file = os.path.join(weight_dir, f"model_{layer_idx}_input_3.bin")
@@ -192,7 +192,7 @@ def convert_llama_layer(model, layer_idx, n_splits_linear, n_splits_down_proj,
                 st_idx = 7
         else:
             # llama-3.2-3B & llama-3.2-1B
-            if layernorm_const:
+            if const_parameter:
                 st_idx = 6
             else:
                 input_lm_bin_file = os.path.join(weight_dir, f"model_{layer_idx}_input_4.bin")
@@ -223,7 +223,7 @@ def convert_llama_layer(model, layer_idx, n_splits_linear, n_splits_down_proj,
 def convert_fused_llama_layer(model, fused_layers, n_splits_linear, n_splits_down_proj,
                               save_dir, weight_dir, transpose_value_cache, kv_len, group_size,
-                              layernorm_const, mode="decode",
+                              const_parameter, mode="decode",
                               keep_ir=False, compile_blob=True):
     num_heads = model.model.layers[0].self_attn.num_heads
     num_key_value_heads = model.model.layers[0].self_attn.num_key_value_heads
@@ -294,6 +294,10 @@ def convert_fused_llama_layer(model, fused_layers, n_splits_linear, n_splits_dow
         else:  # FP16 Linear
             np_dtype = np.float16
+        if not const_parameter:
+            input_layer_norm_weights = None
+            post_attn_layernorm_weights = None
         fused_decoder = LowBitLlamaMultiDecoderlayer(
             [1, 1, num_heads * head_dim],
             input_layernorm_weights=input_layer_norm_weights,

ipex_llm/transformers/npu_pipeline_model/minicpm.py CHANGED Viewed

@@ -301,7 +301,7 @@ def convert_lm_head_and_embedding(model, n_splits_linear, temp_dir, weight_dir,
 def convert_minicpm_layer(model, layer_idx, n_splits_linear, n_splits_down_proj,
                           temp_dir, weight_dir, transpose_value_cache, kv_len, group_size,
-                          layernorm_const, mode="decode",
+                          const_parameter, mode="decode",
                           keep_ir=False, compile_blob=True):
     num_heads = model.model.layers[0].self_attn.num_heads
     num_key_value_heads = model.model.layers[0].self_attn.num_key_value_heads
@@ -333,12 +333,12 @@ def convert_minicpm_layer(model, layer_idx, n_splits_linear, n_splits_down_proj,
     else:
         input_len = kv_len
         decoder_name = "decoder_layer_prefill"
-        layernorm_const = False
+        const_parameter = False
     single_decoder = LowBitMinicpmMultiDecoderlayer(
         [1, input_len, num_heads * head_dim],
-        input_layernorm_weights=[layer_norm_0] if layernorm_const else None,
-        post_attn_layernorm_weights=[layer_norm_1] if layernorm_const else None,
+        input_layernorm_weights=[layer_norm_0] if const_parameter else None,
+        post_attn_layernorm_weights=[layer_norm_1] if const_parameter else None,
         cached_cos=cached_cos,
         cached_sin=cached_sin,
         num_heads=num_heads,
@@ -364,7 +364,7 @@ def convert_minicpm_layer(model, layer_idx, n_splits_linear, n_splits_down_proj,
     os.remove(os.path.join(temp_dir, decoder_name + ".bin"))
     if mode == "decode":
-        if layernorm_const:
+        if const_parameter:
             st_idx = 5
         else:
             input_lm_bin_file = os.path.join(weight_dir, f"model_{layer_idx}_input_3.bin")
@@ -394,7 +394,7 @@ def convert_minicpm_layer(model, layer_idx, n_splits_linear, n_splits_down_proj,
 def convert_fused_minicpm_layer(model, fused_layers, n_splits_linear, n_splits_down_proj,
                                 save_dir, weight_dir, transpose_value_cache, kv_len, group_size,
-                                layernorm_const, mode="decode",
+                                const_parameter, mode="decode",
                                 keep_ir=False, compile_blob=True):
     num_heads = model.model.layers[0].self_attn.num_heads
     num_key_value_heads = model.model.layers[0].self_attn.num_key_value_heads
@@ -461,6 +461,10 @@ def convert_fused_minicpm_layer(model, fused_layers, n_splits_linear, n_splits_d
         else:  # FP16 Linear
             np_dtype = np.float16
+        if not const_parameter:
+            input_layer_norm_weights = None
+            post_attn_layernorm_weights = None
         fused_decoder = LowBitMinicpmMultiDecoderlayer(
             [1, 1, num_heads * head_dim],
             input_layernorm_weights=input_layer_norm_weights,

ipex_llm/transformers/npu_pipeline_model/qwen.py CHANGED Viewed

@@ -117,7 +117,7 @@ def convert_lm_head_and_embedding(model, temp_dir, weight_dir,
 def convert_qwen_layer(model, layer_idx, n_splits_linear, n_splits_down_proj,
                        temp_dir, weight_dir, transpose_value_cache, kv_len, group_size,
-                       layernorm_const, mode="decode",
+                       const_parameter, mode="decode",
                        keep_ir=False, compile_blob=True):
     num_heads = model.model.layers[0].self_attn.num_heads
     num_key_value_heads = model.model.layers[0].self_attn.num_key_value_heads
@@ -193,7 +193,7 @@ def convert_qwen_layer(model, layer_idx, n_splits_linear, n_splits_down_proj,
     # 0, 1, 2 are input_embed/attention_mask/position_id
     if mode == "decode":
         if hasattr(curr_layer.self_attn.rotary_emb, "cos_cached"):
-            if layernorm_const:
+            if const_parameter:
                 st_idx = 3
             else:
                 input_lm_bin_file = os.path.join(weight_dir, f"model_{layer_idx}_input_3.bin")
@@ -203,7 +203,7 @@ def convert_qwen_layer(model, layer_idx, n_splits_linear, n_splits_down_proj,
                 st_idx = 5
         else:
             # transformers >= 4.45.0
-            if layernorm_const:
+            if const_parameter:
                 st_idx = 4
             else:
                 input_lm_bin_file = os.path.join(weight_dir, f"model_{layer_idx}_input_4.bin")
@@ -241,7 +241,7 @@ def convert_qwen_layer(model, layer_idx, n_splits_linear, n_splits_down_proj,
 def convert_fused_qwen_layer(model, fused_layers, n_splits_linear, n_splits_down_proj,
                              save_dir, weight_dir, transpose_value_cache, kv_len, group_size,
-                             layernorm_const, mode="decode",
+                             const_parameter, mode="decode",
                              keep_ir=False, compile_blob=True):
     num_heads = model.model.layers[0].self_attn.num_heads
     num_key_value_heads = model.model.layers[0].self_attn.num_key_value_heads
@@ -325,6 +325,13 @@ def convert_fused_qwen_layer(model, fused_layers, n_splits_linear, n_splits_down
         else:  # FP16 Linear
             np_dtype = np.float16
+        if not const_parameter:
+            input_layer_norm_weights = None
+            post_attn_layernorm_weights = None
+            q_biases = None
+            k_biases = None
+            v_biases = None
         fused_decoder = LowBitQwenMultiDecoderlayer(
             [1, 1, num_heads * head_dim],
             input_layernorm_weights=input_layer_norm_weights,

{ipex_llm-2.2.0b20250207.dist-info → ipex_llm-2.2.0b20250208.dist-info}/METADATA RENAMED Viewed

@@ -1,6 +1,6 @@
 Metadata-Version: 2.1
 Name: ipex-llm
-Version: 2.2.0b20250207
+Version: 2.2.0b20250208
 Summary: Large Language Model Develop Toolkit
 Home-page: https://github.com/intel-analytics/ipex-llm
 Author: BigDL Authors
@@ -27,7 +27,7 @@ Requires-Dist: intel-openmp ; (platform_machine == "x86_64" or platform_machine
 Requires-Dist: torch ==2.1.2+cpu ; (platform_system == "Linux") and extra == 'all'
 Requires-Dist: torch ==2.1.2 ; (platform_system == "Windows") and extra == 'all'
 Provides-Extra: cpp
-Requires-Dist: bigdl-core-cpp ==2.6.0b20250207 ; extra == 'cpp'
+Requires-Dist: bigdl-core-cpp ==2.6.0b20250208 ; extra == 'cpp'
 Requires-Dist: setuptools ; extra == 'cpp'
 Requires-Dist: onednn-devel ==2025.0.1 ; (platform_system == "Windows") and extra == 'cpp'
 Requires-Dist: onednn ==2025.0.1 ; (platform_system == "Windows") and extra == 'cpp'
@@ -60,7 +60,7 @@ Requires-Dist: transformers ==4.40.0 ; extra == 'npu'
 Requires-Dist: intel-openmp ; (platform_machine == "x86_64" or platform_machine == "AMD64") and extra == 'npu'
 Requires-Dist: torch ==2.1.2+cpu ; (platform_system == "Linux") and extra == 'npu'
 Requires-Dist: torch ==2.1.2 ; (platform_system == "Windows") and extra == 'npu'
-Requires-Dist: bigdl-core-npu ==2.6.0b20250207 ; (platform_system == "Windows") and extra == 'npu'
+Requires-Dist: bigdl-core-npu ==2.6.0b20250208 ; (platform_system == "Windows") and extra == 'npu'
 Provides-Extra: serving
 Requires-Dist: py-cpuinfo ; extra == 'serving'
 Requires-Dist: fschat[model_worker,webui] ==0.2.36 ; extra == 'serving'
@@ -80,9 +80,9 @@ Requires-Dist: setuptools <70.0.0 ; extra == 'xpu'
 Requires-Dist: torch ==2.1.0a0 ; extra == 'xpu'
 Requires-Dist: torchvision ==0.16.0a0 ; extra == 'xpu'
 Requires-Dist: intel-extension-for-pytorch ==2.1.10+xpu ; extra == 'xpu'
-Requires-Dist: bigdl-core-xe-21 ==2.6.0b20250207 ; extra == 'xpu'
-Requires-Dist: bigdl-core-xe-batch-21 ==2.6.0b20250207 ; extra == 'xpu'
-Requires-Dist: bigdl-core-xe-addons-21 ==2.6.0b20250207 ; extra == 'xpu'
+Requires-Dist: bigdl-core-xe-21 ==2.6.0b20250208 ; extra == 'xpu'
+Requires-Dist: bigdl-core-xe-batch-21 ==2.6.0b20250208 ; extra == 'xpu'
+Requires-Dist: bigdl-core-xe-addons-21 ==2.6.0b20250208 ; extra == 'xpu'
 Provides-Extra: xpu-2-1
 Requires-Dist: py-cpuinfo ; extra == 'xpu-2-1'
 Requires-Dist: protobuf ; extra == 'xpu-2-1'
@@ -97,9 +97,9 @@ Requires-Dist: setuptools <70.0.0 ; extra == 'xpu-2-1'
 Requires-Dist: torch ==2.1.0a0 ; extra == 'xpu-2-1'
 Requires-Dist: torchvision ==0.16.0a0 ; extra == 'xpu-2-1'
 Requires-Dist: intel-extension-for-pytorch ==2.1.10+xpu ; extra == 'xpu-2-1'
-Requires-Dist: bigdl-core-xe-21 ==2.6.0b20250207 ; extra == 'xpu-2-1'
-Requires-Dist: bigdl-core-xe-batch-21 ==2.6.0b20250207 ; extra == 'xpu-2-1'
-Requires-Dist: bigdl-core-xe-addons-21 ==2.6.0b20250207 ; extra == 'xpu-2-1'
+Requires-Dist: bigdl-core-xe-21 ==2.6.0b20250208 ; extra == 'xpu-2-1'
+Requires-Dist: bigdl-core-xe-batch-21 ==2.6.0b20250208 ; extra == 'xpu-2-1'
+Requires-Dist: bigdl-core-xe-addons-21 ==2.6.0b20250208 ; extra == 'xpu-2-1'
 Requires-Dist: intel-openmp ; (platform_machine == "x86_64" or platform_machine == "AMD64") and extra == 'xpu-2-1'
 Requires-Dist: dpcpp-cpp-rt ==2024.0.2 ; (platform_system == "Windows") and extra == 'xpu-2-1'
 Requires-Dist: mkl-dpcpp ==2024.0.0 ; (platform_system == "Windows") and extra == 'xpu-2-1'
@@ -117,7 +117,7 @@ Requires-Dist: setuptools ; extra == 'xpu-2-6'
 Requires-Dist: torch ==2.6.0+xpu ; extra == 'xpu-2-6'
 Requires-Dist: torchvision ==0.21.0+xpu ; extra == 'xpu-2-6'
 Requires-Dist: torchaudio ==2.6.0+xpu ; extra == 'xpu-2-6'
-Requires-Dist: bigdl-core-xe-all ==2.6.0b20250207 ; extra == 'xpu-2-6'
+Requires-Dist: bigdl-core-xe-all ==2.6.0b20250208 ; extra == 'xpu-2-6'
 Requires-Dist: onednn-devel ==2025.0.1 ; extra == 'xpu-2-6'
 Requires-Dist: onednn ==2025.0.1 ; extra == 'xpu-2-6'
 Requires-Dist: dpcpp-cpp-rt ==2025.0.2 ; extra == 'xpu-2-6'
@@ -133,9 +133,9 @@ Requires-Dist: tokenizers ==0.15.2 ; extra == 'xpu-arc'
 Requires-Dist: accelerate ==0.23.0 ; extra == 'xpu-arc'
 Requires-Dist: tabulate ; extra == 'xpu-arc'
 Requires-Dist: setuptools ; extra == 'xpu-arc'
-Requires-Dist: bigdl-core-xe-23 ==2.6.0b20250207 ; extra == 'xpu-arc'
-Requires-Dist: bigdl-core-xe-batch-23 ==2.6.0b20250207 ; extra == 'xpu-arc'
-Requires-Dist: bigdl-core-xe-addons-23 ==2.6.0b20250207 ; extra == 'xpu-arc'
+Requires-Dist: bigdl-core-xe-23 ==2.6.0b20250208 ; extra == 'xpu-arc'
+Requires-Dist: bigdl-core-xe-batch-23 ==2.6.0b20250208 ; extra == 'xpu-arc'
+Requires-Dist: bigdl-core-xe-addons-23 ==2.6.0b20250208 ; extra == 'xpu-arc'
 Requires-Dist: intel-openmp ; (platform_machine == "x86_64" or platform_machine == "AMD64") and extra == 'xpu-arc'
 Requires-Dist: torch ==2.3.1+cxx11.abi ; (platform_system == "Linux") and extra == 'xpu-arc'
 Requires-Dist: torchvision ==0.18.1+cxx11.abi ; (platform_system == "Linux") and extra == 'xpu-arc'
@@ -156,9 +156,9 @@ Requires-Dist: tokenizers ==0.15.2 ; extra == 'xpu-arl'
 Requires-Dist: accelerate ==0.23.0 ; extra == 'xpu-arl'
 Requires-Dist: tabulate ; extra == 'xpu-arl'
 Requires-Dist: setuptools ; extra == 'xpu-arl'
-Requires-Dist: bigdl-core-xe-23 ==2.6.0b20250207 ; extra == 'xpu-arl'
-Requires-Dist: bigdl-core-xe-batch-23 ==2.6.0b20250207 ; extra == 'xpu-arl'
-Requires-Dist: bigdl-core-xe-addons-23 ==2.6.0b20250207 ; extra == 'xpu-arl'
+Requires-Dist: bigdl-core-xe-23 ==2.6.0b20250208 ; extra == 'xpu-arl'
+Requires-Dist: bigdl-core-xe-batch-23 ==2.6.0b20250208 ; extra == 'xpu-arl'
+Requires-Dist: bigdl-core-xe-addons-23 ==2.6.0b20250208 ; extra == 'xpu-arl'
 Requires-Dist: intel-openmp ; (platform_machine == "x86_64" or platform_machine == "AMD64") and extra == 'xpu-arl'
 Requires-Dist: torch ==2.3.1+cxx11.abi ; (platform_system == "Linux") and extra == 'xpu-arl'
 Requires-Dist: torchvision ==0.18.1+cxx11.abi ; (platform_system == "Linux") and extra == 'xpu-arl'
@@ -179,9 +179,9 @@ Requires-Dist: tokenizers ==0.15.2 ; extra == 'xpu-lnl'
 Requires-Dist: accelerate ==0.23.0 ; extra == 'xpu-lnl'
 Requires-Dist: tabulate ; extra == 'xpu-lnl'
 Requires-Dist: setuptools ; extra == 'xpu-lnl'
-Requires-Dist: bigdl-core-xe-23 ==2.6.0b20250207 ; extra == 'xpu-lnl'
-Requires-Dist: bigdl-core-xe-batch-23 ==2.6.0b20250207 ; extra == 'xpu-lnl'
-Requires-Dist: bigdl-core-xe-addons-23 ==2.6.0b20250207 ; extra == 'xpu-lnl'
+Requires-Dist: bigdl-core-xe-23 ==2.6.0b20250208 ; extra == 'xpu-lnl'
+Requires-Dist: bigdl-core-xe-batch-23 ==2.6.0b20250208 ; extra == 'xpu-lnl'
+Requires-Dist: bigdl-core-xe-addons-23 ==2.6.0b20250208 ; extra == 'xpu-lnl'
 Requires-Dist: intel-openmp ; (platform_machine == "x86_64" or platform_machine == "AMD64") and extra == 'xpu-lnl'
 Requires-Dist: torch ==2.3.1+cxx11.abi ; (platform_system == "Linux") and extra == 'xpu-lnl'
 Requires-Dist: torchvision ==0.18.1+cxx11.abi ; (platform_system == "Linux") and extra == 'xpu-lnl'

{ipex_llm-2.2.0b20250207.dist-info → ipex_llm-2.2.0b20250208.dist-info}/RECORD RENAMED Viewed

@@ -41,35 +41,35 @@ ipex_llm/langchain/llms/transformerspipelinellm.py,sha256=vm522YPPwWxxAPVvQBtxRf
 ipex_llm/langchain/vllm/__init__.py,sha256=T-EbRT6GJ_8RCu-iLmSzcftOimXSPQf2d5X72AUAy2Y,874
 ipex_llm/langchain/vllm/vllm.py,sha256=6dxc-ZISZQrJilEa_HA827l75Dv9rcHpY_G6FdJ8BVs,7793
 ipex_llm/libs/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
-ipex_llm/libs/bloom-api.dll,sha256=R0zcv1M0D8y8inrrCUO2xCSTRb0IChVyLa6YQo9zne8,36352
-ipex_llm/libs/bloom.dll,sha256=eBzUhLMeOAb9InMPp9_KC5VhJC9F-YKNlJn6HyfOAb0,507904
-ipex_llm/libs/gptneox-api.dll,sha256=9_mq8IntnMiU7-_kDxiLojnEc1nu3rrxZZAIes7Nd4k,24576
-ipex_llm/libs/gptneox.dll,sha256=kR3dyhN7tNUxVIWoqudW57V0MIGqr-Mxkmw7kwR8VWs,568320
-ipex_llm/libs/libbloom_avx.dll,sha256=0iRHd_QIzEG_NI0RkFKmCX_HG-3E21t33sxrmbCpQwo,536576
-ipex_llm/libs/libbloom_vnni.dll,sha256=dL1TzKzoki8KDsCmka6QfzBH24T06WokxT3F4M5a3lk,508416
-ipex_llm/libs/libgptneox_avx.dll,sha256=SPi9xXxB5jLp63CfgVhmMA-rCoyCCji2nuWz-rv5y3E,596992
-ipex_llm/libs/libgptneox_vnni.dll,sha256=NV3xykgHJGxNTDWAA_yhwlBG_dbHPX0__5s9uHCPmfc,568832
-ipex_llm/libs/libllama_avx.dll,sha256=EbZ-lpHHtM-zS9aiuDU8cBVueVAtRi3UqerARH41qC8,591360
-ipex_llm/libs/libllama_vnni.dll,sha256=67XqNSyXI1nuaA1-xcSOhYIHZaH7aZBvwMetGpTriIk,563200
-ipex_llm/libs/libstarcoder_avx.dll,sha256=kAqXHfoZfmyqIbNbGpzQjXNCMz9pkG5KVRECzEDEwhM,627712
-ipex_llm/libs/libstarcoder_vnni.dll,sha256=c02B9jpBvST282jRXJtkRwJKkZnzhkz5MLdFfjH9T8I,599552
-ipex_llm/libs/llama-api.dll,sha256=SA2frHXocsnAN9z3LZfWT_FjY1waSMS26bHM6ot_07c,25600
-ipex_llm/libs/llama.dll,sha256=Ls7CKimo2SNy-uJt6lLz16yz1O9E358dRgP8E0svF98,562688
-ipex_llm/libs/main-bloom.exe,sha256=-HCik31DRGrozp_Uy420O1l-Sk_7e9V1bjg4XaLPFvA,103424
-ipex_llm/libs/main-gptneox.exe,sha256=pqxQCGKBrsoDtvuKhCwk6uOAGt4GGvzoAdQbHB9qrFI,98816
-ipex_llm/libs/main-llama.exe,sha256=sPKj3WRmI97jyNhO4A5Lz4eF-tsZZojv6z2VaNzAKAU,99840
-ipex_llm/libs/main-starcoder.exe,sha256=7vyW8v2qO1J_fkRq4uzk44UsV4AhDGmcWHUwMiez8WY,157696
-ipex_llm/libs/pipeline.dll,sha256=vHFtLO6vUZQVwtzXICv1Q5Ork32Dw5Ipqa8pbr6TtmM,72704
-ipex_llm/libs/quantize-bloom.exe,sha256=8rUxXU7Z4AZ7mFHI3sGpwGG18_DkapunwTzzUTjCCbo,126464
-ipex_llm/libs/quantize-bloom_vnni.exe,sha256=gA9kKUkmFOIzT_CmFFvG-fG6d6bZuEWSTeyPvhCsDLs,128000
-ipex_llm/libs/quantize-gptneox.exe,sha256=YsrviyLjQU9uxD1p6TfdBAPXG72-QzZFGpt7lDmK_gM,104448
-ipex_llm/libs/quantize-gptneox_vnni.exe,sha256=mYmUHza3rZztjTogXv9FxuIM20z0gHfyjbF6b6ADEK0,104960
-ipex_llm/libs/quantize-llama.exe,sha256=h-7nbo0uIswViTdxf_vHmE3sZdnQ79dDMUHzqjtyMKs,110080
-ipex_llm/libs/quantize-llama_vnni.exe,sha256=OEPzGySIaa-O9IhPY-u2slHnhMDzp6mL8e_Qr2WUgKc,110592
-ipex_llm/libs/quantize-starcoder.exe,sha256=4U-jT0MC4Iz4kP_6WpKkMOSk_hTlqAwgSVlGLGa-imA,127488
-ipex_llm/libs/quantize-starcoder_vnni.exe,sha256=Xc4jW9KH_RNSfJIYJinDRIx-BbWmqxx4h-kc9jowZpk,128512
-ipex_llm/libs/starcoder-api.dll,sha256=2lF73SE1AyICwtpQSZUfkiAbE1WJQ5gEbikL1Lsvzhg,21504
-ipex_llm/libs/starcoder.dll,sha256=NBh51OQS90ppaqMAJAFCa6HptcUnnPx7tUL1J95QwMk,599040
+ipex_llm/libs/bloom-api.dll,sha256=6JzFpTxSCIDgyHljiTDIwiyj8tNjyIimr00FUg18eTw,36352
+ipex_llm/libs/bloom.dll,sha256=2W5NmA0RyQMElLCyunvANpTUBTnzahAtlFwf1IQYb_k,507904
+ipex_llm/libs/gptneox-api.dll,sha256=jlJcF2GR9ZHenAOcbcKirLRejWsdiQDhUpWbl5ulVz8,24576
+ipex_llm/libs/gptneox.dll,sha256=8f1rs4aF7OGVtIjZZv-v1bSamG6C_b68G_O2sd_dzAU,568320
+ipex_llm/libs/libbloom_avx.dll,sha256=RJ-jYk9S6Jd34ktr1B3FbEL3MuPyngfekS2HLHc3nHE,536576
+ipex_llm/libs/libbloom_vnni.dll,sha256=LCvnLsZ2uisySM4nL8cDDggRC_6Bw3OFkP-2vikhTVQ,508416
+ipex_llm/libs/libgptneox_avx.dll,sha256=aDdomyYNf9unY6S9ix6CuX2ICY6uphptRm294nRm9AU,596992
+ipex_llm/libs/libgptneox_vnni.dll,sha256=GMlR-de0HcAdw8YJzcXX1JndXgv3PmZ4nipoqyeh7ls,568832
+ipex_llm/libs/libllama_avx.dll,sha256=Y7AJTriTvqWJH8hIA6gwYsL2u_uGA35NmRPoSQSkn78,591360
+ipex_llm/libs/libllama_vnni.dll,sha256=XGd5lij9PZu63CBadK3R6mhkfqpi21wRrU9eGFpReaM,563200
+ipex_llm/libs/libstarcoder_avx.dll,sha256=ZzZFh1IW_QlHrNcE79vSsqPcA9YZSDn1LCEpCwtv0oo,627712
+ipex_llm/libs/libstarcoder_vnni.dll,sha256=dF3mLHFXKAe8TiTb2HqIbjvkQoCVK2BpRwbohFSDZ_Y,599552
+ipex_llm/libs/llama-api.dll,sha256=hg0_pZC8RjgFuTtc19NF5BMLTkchmj_X4FogUC_YeDA,25600
+ipex_llm/libs/llama.dll,sha256=3q1FkqwWE3Fo_-zZ74kWR-IOM7g-SSiSYg3Ud3wmkRo,562688
+ipex_llm/libs/main-bloom.exe,sha256=tdNzTM-5XqiWe1rLTV1qslHWUNi0VKcAWJdjaRKEAzg,103424
+ipex_llm/libs/main-gptneox.exe,sha256=XtmysEP3FoLLtJz5qTIoFPaG4MZhTAZ-0Gru65bk_7Y,98816
+ipex_llm/libs/main-llama.exe,sha256=r7ZrLzVb2_phMKmD50dz5xnrPC8H9H_IPK0T8js8x2E,99840
+ipex_llm/libs/main-starcoder.exe,sha256=udG6tPP_-3aCkFzfi8WxGD6lrauKPxwlaFhv3OF-gDg,157696
+ipex_llm/libs/pipeline.dll,sha256=_CoiiDG12fYEEeIJyqh-dAi9bK3am421KItj548mlek,73216
+ipex_llm/libs/quantize-bloom.exe,sha256=DU-zbDfk4ZZ5N8pQWDM39Xqoy_4rSsYGLhvJdZmunVQ,126464
+ipex_llm/libs/quantize-bloom_vnni.exe,sha256=trXZ2CtNiCkD1_mfvXWJf7IBgNaYRxk3sNzsYNbwBgQ,128000
+ipex_llm/libs/quantize-gptneox.exe,sha256=ZDLyAS9FaMXAZiQrb805iKHY-cR43Z-E453tS-C__bE,104448
+ipex_llm/libs/quantize-gptneox_vnni.exe,sha256=alab9U5EUVg07XwQyNdXAbjp8p3IZ_0sP56IMkpXjTU,104960
+ipex_llm/libs/quantize-llama.exe,sha256=ttvTdoGd6pWVdgA4KR9A_B_Tdg1Gt8qN1mBQrZp7JIw,110080
+ipex_llm/libs/quantize-llama_vnni.exe,sha256=6ECTUDfKLzj-G_yvohaR8z5DSPt6wwnDq2uaRwXdCDc,110592
+ipex_llm/libs/quantize-starcoder.exe,sha256=ApQMB9yxcjqUomqgjnD23rzo3IidKgG_UHXwEAQ-4EA,127488
+ipex_llm/libs/quantize-starcoder_vnni.exe,sha256=6U5MeFLPGtJrkJA6q4HE3NyDUzcmlpBz9WpwLBXS684,128512
+ipex_llm/libs/starcoder-api.dll,sha256=fVkYZ0zZ8mhERgN0hILyFt6cUpe__Y8oBW2Dq1ePinE,21504
+ipex_llm/libs/starcoder.dll,sha256=eOv12oBIIYpxBIoId8g_BuhlIlRZ32r29yDtNQQEMAg,599040
 ipex_llm/llamaindex/__init__.py,sha256=T-EbRT6GJ_8RCu-iLmSzcftOimXSPQf2d5X72AUAy2Y,874
 ipex_llm/llamaindex/llms/__init__.py,sha256=KP1lEdGqDuxPoxL1ZSH25Pm2kKMPJBWUTLR0ckSLMIU,1139
 ipex_llm/llamaindex/llms/bigdlllm.py,sha256=FQBzq1KOjfc6uofTXAha3O7TqpJkNfOFepXQmOVlbnI,26314
@@ -208,11 +208,11 @@ ipex_llm/transformers/npu_models/xlm_mp.py,sha256=sj8OVun8xJprM7ZJp0XzWa55rqlSIz
 ipex_llm/transformers/npu_pipeline_model/__init__.py,sha256=b2IXvVqQ5cItki021h8s3ymW12RPu8QNPprq4Mn3bDM,586
 ipex_llm/transformers/npu_pipeline_model/baichuan.py,sha256=ICxRzFQ4OIANDkkVi2_4xOeQXmfFXYMx3H52KuE1xR4,6208
 ipex_llm/transformers/npu_pipeline_model/common.py,sha256=faooJmM75qnVyZYuQLx9gJpVlotcVF4qXRCnOrknfk4,14776
-ipex_llm/transformers/npu_pipeline_model/convert_pipeline.py,sha256=_l4RFmyBMbREo8vzKpHXAMtE202JVQ41Y2lPg1qCOMI,29846
-ipex_llm/transformers/npu_pipeline_model/llama.py,sha256=j2sipfFSrzV2VgLKPOClMHwWIDXqDsL1jIQJK25hneo,14397
-ipex_llm/transformers/npu_pipeline_model/minicpm.py,sha256=H7j_UaHj-IwEBriQ-bunle0-8s2NmvqnL9eYuixnmFc,21398
+ipex_llm/transformers/npu_pipeline_model/convert_pipeline.py,sha256=IlvaZC9pi_ZJTWuO2dMSgCkc3V909lVhpAfktTEfSLI,29894
+ipex_llm/transformers/npu_pipeline_model/llama.py,sha256=ISJ84zj0Ce2f9vdgmoGfdxQz2LRaUMlZCEM3MV2VpoQ,14521
+ipex_llm/transformers/npu_pipeline_model/minicpm.py,sha256=Q-rUzXBlx5Ns3xemi7H6t8dnzu1q4e-MhUpJMzJmBRU,21522
 ipex_llm/transformers/npu_pipeline_model/pipeline_cpp.py,sha256=JNmodAMg_NQvDILug3E_fGXEh6cd3wsj4bvAzcd-vaU,2749
-ipex_llm/transformers/npu_pipeline_model/qwen.py,sha256=6MNtCL1CXoR19B4tKZSgv2e5gtma9bqDG7DOYMCnPt0,16013
+ipex_llm/transformers/npu_pipeline_model/qwen.py,sha256=WEMUdGZH3INyJm-1Hfv3o41BiP037n2ftS4qPM0jaiE,16221
 ipex_llm/utils/__init__.py,sha256=LlUgrD03rfw4iY8zWPtHH6p65Gw76waVOLHaqagETw0,1425
 ipex_llm/utils/benchmark_util_4_29.py,sha256=OU1W1quiaiJGsg1pd3HM9O6PmVSaPA0HHE7R8hNTfmQ,258653
 ipex_llm/utils/benchmark_util_4_42.py,sha256=HEiClCgKDp_T64HH8ulSTly8dvt6UwPDYZfrPVYvXcc,225383
@@ -248,11 +248,11 @@ ipex_llm/vllm/xpu/engine/__init__.py,sha256=pY_CpyuZd72fr6s32ejeKHKFW0K4vUU2rzZj
 ipex_llm/vllm/xpu/engine/engine.py,sha256=k4-D27WS_Gk3mA--w3HWAjPjb4Aiu043MVPi0ZoAUBc,5984
 ipex_llm/vllm/xpu/entrypoints/openai/api_server.py,sha256=GshTZFB8e4PWvqckfbmTOU6b0oLkNn7A-vzLuG9--j8,21544
 ipex_llm/vllm/xpu/entrypoints/openai/cli_args.py,sha256=2rENA2ucynMaIjiZBEh2ez1o5vR32GaP514t39CD7KM,8676
-ipex_llm-2.2.0b20250207.data/scripts/ipex-llm-init.bat,sha256=HPtCYuDYwEatq7dAwOvdfVcHYCpAVdbj75K1qh0vQek,2578
-ipex_llm-2.2.0b20250207.data/scripts/llm-chat.ps1,sha256=6qrs-hGVAV8IKh7Jx8nq_XrnZcjd7qGU5wndArM7Yag,2769
-ipex_llm-2.2.0b20250207.data/scripts/llm-cli.ps1,sha256=3qBtTLs_EjYDnM8YyCpJhzLnGCKTEGssu9UNqfkjVXs,3009
-ipex_llm-2.2.0b20250207.dist-info/METADATA,sha256=d1hx5hE5Xeb3lHGWqeF35SK9GZOX6syXJ_Syu5b35IU,12369
-ipex_llm-2.2.0b20250207.dist-info/WHEEL,sha256=6iYPr8vTHsyDK75jr9X0V3I9wPSVmtwr_8fdATBciGk,98
-ipex_llm-2.2.0b20250207.dist-info/entry_points.txt,sha256=TiUyBB2MRmfF3ko-pyAEzqeBCRnyhu27bNOAsWPp3e8,61
-ipex_llm-2.2.0b20250207.dist-info/top_level.txt,sha256=CGCMHM-SyqUabU4h8RqJ2KTYckQUO3LvIWwmUQ6Qbzw,9
-ipex_llm-2.2.0b20250207.dist-info/RECORD,,
+ipex_llm-2.2.0b20250208.data/scripts/ipex-llm-init.bat,sha256=HPtCYuDYwEatq7dAwOvdfVcHYCpAVdbj75K1qh0vQek,2578
+ipex_llm-2.2.0b20250208.data/scripts/llm-chat.ps1,sha256=6qrs-hGVAV8IKh7Jx8nq_XrnZcjd7qGU5wndArM7Yag,2769
+ipex_llm-2.2.0b20250208.data/scripts/llm-cli.ps1,sha256=3qBtTLs_EjYDnM8YyCpJhzLnGCKTEGssu9UNqfkjVXs,3009
+ipex_llm-2.2.0b20250208.dist-info/METADATA,sha256=lvf2U7DribCbFo-qabprOhWsiYrX4yLUPfykdJZ91Dk,12369
+ipex_llm-2.2.0b20250208.dist-info/WHEEL,sha256=6iYPr8vTHsyDK75jr9X0V3I9wPSVmtwr_8fdATBciGk,98
+ipex_llm-2.2.0b20250208.dist-info/entry_points.txt,sha256=TiUyBB2MRmfF3ko-pyAEzqeBCRnyhu27bNOAsWPp3e8,61
+ipex_llm-2.2.0b20250208.dist-info/top_level.txt,sha256=CGCMHM-SyqUabU4h8RqJ2KTYckQUO3LvIWwmUQ6Qbzw,9
+ipex_llm-2.2.0b20250208.dist-info/RECORD,,

{ipex_llm-2.2.0b20250207.data → ipex_llm-2.2.0b20250208.data}/scripts/ipex-llm-init.bat RENAMED Viewed

File without changes

{ipex_llm-2.2.0b20250207.data → ipex_llm-2.2.0b20250208.data}/scripts/llm-chat.ps1 RENAMED Viewed

File without changes

{ipex_llm-2.2.0b20250207.data → ipex_llm-2.2.0b20250208.data}/scripts/llm-cli.ps1 RENAMED Viewed

File without changes

{ipex_llm-2.2.0b20250207.dist-info → ipex_llm-2.2.0b20250208.dist-info}/WHEEL RENAMED Viewed

File without changes

{ipex_llm-2.2.0b20250207.dist-info → ipex_llm-2.2.0b20250208.dist-info}/entry_points.txt RENAMED Viewed

File without changes

{ipex_llm-2.2.0b20250207.dist-info → ipex_llm-2.2.0b20250208.dist-info}/top_level.txt RENAMED Viewed

File without changes