ipex-llm 2.2.0b20250207__py3-none-win_amd64.whl → 2.2.0b20250209__py3-none-win_amd64.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (41) hide show
  1. ipex_llm/libs/bloom-api.dll +0 -0
  2. ipex_llm/libs/bloom.dll +0 -0
  3. ipex_llm/libs/gptneox-api.dll +0 -0
  4. ipex_llm/libs/gptneox.dll +0 -0
  5. ipex_llm/libs/libbloom_avx.dll +0 -0
  6. ipex_llm/libs/libbloom_vnni.dll +0 -0
  7. ipex_llm/libs/libgptneox_avx.dll +0 -0
  8. ipex_llm/libs/libgptneox_vnni.dll +0 -0
  9. ipex_llm/libs/libllama_avx.dll +0 -0
  10. ipex_llm/libs/libllama_vnni.dll +0 -0
  11. ipex_llm/libs/libstarcoder_avx.dll +0 -0
  12. ipex_llm/libs/libstarcoder_vnni.dll +0 -0
  13. ipex_llm/libs/llama-api.dll +0 -0
  14. ipex_llm/libs/llama.dll +0 -0
  15. ipex_llm/libs/main-bloom.exe +0 -0
  16. ipex_llm/libs/main-gptneox.exe +0 -0
  17. ipex_llm/libs/main-llama.exe +0 -0
  18. ipex_llm/libs/main-starcoder.exe +0 -0
  19. ipex_llm/libs/pipeline.dll +0 -0
  20. ipex_llm/libs/quantize-bloom.exe +0 -0
  21. ipex_llm/libs/quantize-bloom_vnni.exe +0 -0
  22. ipex_llm/libs/quantize-gptneox.exe +0 -0
  23. ipex_llm/libs/quantize-gptneox_vnni.exe +0 -0
  24. ipex_llm/libs/quantize-llama.exe +0 -0
  25. ipex_llm/libs/quantize-llama_vnni.exe +0 -0
  26. ipex_llm/libs/quantize-starcoder.exe +0 -0
  27. ipex_llm/libs/quantize-starcoder_vnni.exe +0 -0
  28. ipex_llm/libs/starcoder-api.dll +0 -0
  29. ipex_llm/libs/starcoder.dll +0 -0
  30. ipex_llm/transformers/npu_pipeline_model/convert_pipeline.py +23 -21
  31. ipex_llm/transformers/npu_pipeline_model/llama.py +11 -7
  32. ipex_llm/transformers/npu_pipeline_model/minicpm.py +10 -6
  33. ipex_llm/transformers/npu_pipeline_model/qwen.py +11 -4
  34. {ipex_llm-2.2.0b20250207.dist-info → ipex_llm-2.2.0b20250209.dist-info}/METADATA +19 -19
  35. {ipex_llm-2.2.0b20250207.dist-info → ipex_llm-2.2.0b20250209.dist-info}/RECORD +41 -41
  36. {ipex_llm-2.2.0b20250207.data → ipex_llm-2.2.0b20250209.data}/scripts/ipex-llm-init.bat +0 -0
  37. {ipex_llm-2.2.0b20250207.data → ipex_llm-2.2.0b20250209.data}/scripts/llm-chat.ps1 +0 -0
  38. {ipex_llm-2.2.0b20250207.data → ipex_llm-2.2.0b20250209.data}/scripts/llm-cli.ps1 +0 -0
  39. {ipex_llm-2.2.0b20250207.dist-info → ipex_llm-2.2.0b20250209.dist-info}/WHEEL +0 -0
  40. {ipex_llm-2.2.0b20250207.dist-info → ipex_llm-2.2.0b20250209.dist-info}/entry_points.txt +0 -0
  41. {ipex_llm-2.2.0b20250207.dist-info → ipex_llm-2.2.0b20250209.dist-info}/top_level.txt +0 -0
Binary file
ipex_llm/libs/bloom.dll CHANGED
Binary file
Binary file
ipex_llm/libs/gptneox.dll CHANGED
Binary file
Binary file
Binary file
Binary file
Binary file
Binary file
Binary file
Binary file
Binary file
Binary file
ipex_llm/libs/llama.dll CHANGED
Binary file
Binary file
Binary file
Binary file
Binary file
Binary file
Binary file
Binary file
Binary file
Binary file
Binary file
Binary file
Binary file
Binary file
Binary file
Binary file
@@ -201,7 +201,7 @@ def convert_llm(model: torch.nn.Module,
201
201
  keep_ir: bool=False,
202
202
  compile_blob: bool=True):
203
203
  # whether to set layernorm weight as const
204
- layernorm_const = os.environ.get("IPEX_LLM_NPU_LAYERNORM_CONST", "1") == "1"
204
+ const_parameter = os.environ.get("IPEX_LLM_NPU_CONST_PARAMETER", "1") == "1"
205
205
  if group_size == 0:
206
206
  n_splits_linear = 1
207
207
  if qtype in ["sym_int8_rtn", "asym_int4_rtn"]:
@@ -240,7 +240,7 @@ def convert_llm(model: torch.nn.Module,
240
240
  for layer_idx in range(0, layer_num):
241
241
  param_list.append((model, layer_idx, n_splits_linear, n_splits_down_proj,
242
242
  temp_dir, weight_dir, transpose_value_cache, kv_len, group_size,
243
- layernorm_const))
243
+ const_parameter))
244
244
  with Pool() as pool:
245
245
  result = pool.starmap(convert_llama_layer, param_list)
246
246
 
@@ -267,7 +267,7 @@ def convert_llm(model: torch.nn.Module,
267
267
  res = InitLLMPipeline(model_type, kv_len, model.num_head, model.head_dim, layer_num,
268
268
  model.vocab_size, weight_dir, "model",
269
269
  first_blob_path, last_blob_path,
270
- os.path.join(temp_dir, "decoder_layer"), layernorm_const)
270
+ os.path.join(temp_dir, "decoder_layer"), const_parameter)
271
271
  except:
272
272
  invalidInputError(False,
273
273
  "False to InitLLMPipeline.")
@@ -284,7 +284,7 @@ def convert_llm(model: torch.nn.Module,
284
284
  for layer_idx in range(0, layer_num):
285
285
  param_list.append((model, layer_idx, n_splits_linear, n_splits_down_proj,
286
286
  temp_dir, weight_dir, transpose_value_cache, kv_len, group_size,
287
- layernorm_const))
287
+ const_parameter))
288
288
  with Pool() as pool:
289
289
  result = pool.starmap(convert_baichuan_layer, param_list)
290
290
 
@@ -308,7 +308,7 @@ def convert_llm(model: torch.nn.Module,
308
308
  res = InitLLMPipeline("baichuan", kv_len, model.num_head, model.head_dim, layer_num,
309
309
  model.vocab_size, weight_dir, "model",
310
310
  first_blob_path, last_blob_path,
311
- os.path.join(temp_dir, "decoder_layer"), layernorm_const)
311
+ os.path.join(temp_dir, "decoder_layer"), const_parameter)
312
312
  except:
313
313
  invalidInputError(False,
314
314
  "False to InitLLMPipeline.")
@@ -325,7 +325,7 @@ def convert_llm(model: torch.nn.Module,
325
325
  for layer_idx in range(0, layer_num):
326
326
  param_list.append((model, layer_idx, n_splits_linear, n_splits_down_proj,
327
327
  temp_dir, weight_dir, transpose_value_cache, kv_len, group_size,
328
- layernorm_const))
328
+ const_parameter))
329
329
  with Pool() as pool:
330
330
  result = pool.starmap(convert_minicpm_layer, param_list)
331
331
 
@@ -348,12 +348,12 @@ def convert_llm(model: torch.nn.Module,
348
348
  res = InitLLMPipeline("minicpm", kv_len, model.num_head, model.head_dim, layer_num,
349
349
  model.vocab_size, weight_dir, "model",
350
350
  first_blob_path, last_blob_path,
351
- os.path.join(temp_dir, "decoder_layer"), layernorm_const)
351
+ os.path.join(temp_dir, "decoder_layer"), const_parameter)
352
352
  except:
353
353
  invalidInputError(False,
354
354
  "False to InitLLMPipeline.")
355
355
  elif model.config.model_type == "qwen2":
356
- layernorm_const = os.environ.get("IPEX_LLM_NPU_LAYERNORM_CONST", "0") == "1"
356
+ const_parameter = os.environ.get("IPEX_LLM_NPU_CONST_PARAMETER", "0") == "1"
357
357
  with tempfile.TemporaryDirectory() as temp_dir:
358
358
  if save_directory is not None:
359
359
  temp_dir = save_directory
@@ -371,7 +371,7 @@ def convert_llm(model: torch.nn.Module,
371
371
  for layer_idx in range(0, layer_num):
372
372
  param_list.append((model, layer_idx, n_splits_linear, n_splits_down_proj,
373
373
  temp_dir, weight_dir, transpose_value_cache, kv_len, group_size,
374
- layernorm_const))
374
+ const_parameter))
375
375
  with Pool() as pool:
376
376
  result = pool.starmap(convert_qwen_layer, param_list)
377
377
 
@@ -396,7 +396,7 @@ def convert_llm(model: torch.nn.Module,
396
396
  "head_dim": model.head_dim,
397
397
  "transpose_value_cache": transpose_value_cache,
398
398
  "max_prompt_len": max_prompt_len,
399
- "layernorm_const": layernorm_const,
399
+ "const_parameter": const_parameter,
400
400
  "group_size": group_size}
401
401
  model.config.update(update_dict)
402
402
  model.config.save_pretrained(save_directory)
@@ -405,7 +405,7 @@ def convert_llm(model: torch.nn.Module,
405
405
  res = InitLLMPipeline("qwen", kv_len, model.num_head, model.head_dim, layer_num,
406
406
  model.vocab_size, weight_dir, "model",
407
407
  first_blob_path, last_blob_path,
408
- os.path.join(temp_dir, "decoder_layer"), layernorm_const)
408
+ os.path.join(temp_dir, "decoder_layer"), const_parameter)
409
409
  except:
410
410
  invalidInputError(False,
411
411
  "False to InitLLMPipeline.")
@@ -441,7 +441,9 @@ def convert_llm_for_deploy(model: torch.nn.Module,
441
441
  weight_dir = os.path.join(save_directory, "model_weights")
442
442
  if not os.path.exists(weight_dir):
443
443
  os.mkdir(weight_dir)
444
- layernorm_const = os.environ.get("IPEX_LLM_NPU_LAYERNORM_CONST", "1") == "1"
444
+ const_parameter = os.environ.get("IPEX_LLM_NPU_CONST_PARAMETER", "1") == "1"
445
+ if keep_ir:
446
+ const_parameter = False
445
447
 
446
448
  lm_head_low_bit = getattr(model.config, "bigdl_transformers_low_bit", "sym_int4_rtn")
447
449
  if hasattr(model, "lm_head") and not isinstance(model.lm_head, SlicedLMHead):
@@ -472,7 +474,7 @@ def convert_llm_for_deploy(model: torch.nn.Module,
472
474
  "head_dim": model.model.layers[0].self_attn.head_dim,
473
475
  "transpose_value_cache": transpose_value_cache,
474
476
  "max_prompt_len": max_prompt_len,
475
- "layernorm_const": layernorm_const,
477
+ "const_parameter": const_parameter,
476
478
  "group_size": group_size,
477
479
  "fused_layers": fused_layers,
478
480
  "qkv_bias": True,
@@ -490,12 +492,12 @@ def convert_llm_for_deploy(model: torch.nn.Module,
490
492
  # save fused_layers blobs of fused decoder layers
491
493
  convert_fused_qwen_layer(model, fused_layers, n_splits_linear, n_splits_down_proj,
492
494
  save_directory, weight_dir, transpose_value_cache, kv_len,
493
- group_size, layernorm_const, "decode",
495
+ group_size, const_parameter, "decode",
494
496
  keep_ir=keep_ir, compile_blob=compile_blob)
495
497
  # save blob of single prefill layer
496
498
  convert_qwen_layer(model, 0, n_splits_linear, n_splits_down_proj,
497
499
  save_directory, weight_dir, transpose_value_cache, max_prompt_len,
498
- group_size, layernorm_const, "prefill",
500
+ group_size, const_parameter, "prefill",
499
501
  keep_ir=keep_ir, compile_blob=compile_blob)
500
502
  # save blob of lmhead and bin of embedding
501
503
  convert_lm_head_and_embedding(model, save_directory, weight_dir, convert_model=True,
@@ -535,7 +537,7 @@ def convert_llm_for_deploy(model: torch.nn.Module,
535
537
  "head_dim": model.model.layers[0].self_attn.head_dim,
536
538
  "transpose_value_cache": transpose_value_cache,
537
539
  "max_prompt_len": max_prompt_len,
538
- "layernorm_const": layernorm_const,
540
+ "const_parameter": const_parameter,
539
541
  "group_size": group_size,
540
542
  "fused_layers": fused_layers,
541
543
  "qkv_bias": False,
@@ -559,12 +561,12 @@ def convert_llm_for_deploy(model: torch.nn.Module,
559
561
  # save fused_layers blobs of fused decoder layers
560
562
  convert_fused_llama_layer(model, fused_layers, n_splits_linear, n_splits_down_proj,
561
563
  save_directory, weight_dir, transpose_value_cache, kv_len,
562
- group_size, layernorm_const, "decode",
564
+ group_size, const_parameter, "decode",
563
565
  keep_ir=keep_ir, compile_blob=compile_blob)
564
566
  # save blob of single prefill layer
565
567
  convert_llama_layer(model, 0, n_splits_linear, n_splits_down_proj,
566
568
  save_directory, weight_dir, transpose_value_cache, max_prompt_len,
567
- group_size, layernorm_const, "prefill",
569
+ group_size, const_parameter, "prefill",
568
570
  keep_ir=keep_ir, compile_blob=compile_blob)
569
571
  elif model.config.model_type == "minicpm":
570
572
  if group_size == 0:
@@ -576,7 +578,7 @@ def convert_llm_for_deploy(model: torch.nn.Module,
576
578
  "head_dim": model.model.layers[0].self_attn.head_dim,
577
579
  "transpose_value_cache": transpose_value_cache,
578
580
  "max_prompt_len": max_prompt_len,
579
- "layernorm_const": layernorm_const,
581
+ "const_parameter": const_parameter,
580
582
  "group_size": group_size,
581
583
  "fused_layers": fused_layers,
582
584
  "qkv_bias": False,
@@ -594,12 +596,12 @@ def convert_llm_for_deploy(model: torch.nn.Module,
594
596
  # save fused_layers blobs of fused decoder layers
595
597
  convert_fused_minicpm_layer(model, fused_layers, n_splits_linear, n_splits_down_proj,
596
598
  save_directory, weight_dir, transpose_value_cache, kv_len,
597
- group_size, layernorm_const, "decode",
599
+ group_size, const_parameter, "decode",
598
600
  keep_ir=keep_ir, compile_blob=compile_blob)
599
601
  # save blob of single prefill layer
600
602
  convert_minicpm_layer(model, 0, n_splits_linear, n_splits_down_proj,
601
603
  save_directory, weight_dir, transpose_value_cache, max_prompt_len,
602
- group_size, layernorm_const, "prefill",
604
+ group_size, const_parameter, "prefill",
603
605
  keep_ir=keep_ir, compile_blob=compile_blob)
604
606
  # save blob of lmhead and bin of embedding and embedding_post
605
607
  convert_lm_head_and_embedding(model, n_splits_linear,
@@ -107,7 +107,7 @@ def convert_lm_head_and_embedding(model, n_splits_linear, temp_dir, weight_dir,
107
107
 
108
108
  def convert_llama_layer(model, layer_idx, n_splits_linear, n_splits_down_proj,
109
109
  temp_dir, weight_dir, transpose_value_cache, kv_len, group_size,
110
- layernorm_const, mode="decode",
110
+ const_parameter, mode="decode",
111
111
  keep_ir=False, compile_blob=True):
112
112
  num_heads = model.model.layers[0].self_attn.num_heads
113
113
  num_key_value_heads = model.model.layers[0].self_attn.num_key_value_heads
@@ -145,14 +145,14 @@ def convert_llama_layer(model, layer_idx, n_splits_linear, n_splits_down_proj,
145
145
  else:
146
146
  input_len = kv_len
147
147
  decoder_name = "decoder_layer_prefill"
148
- layernorm_const = False
148
+ const_parameter = False
149
149
  keep_position_ids = False
150
150
  npu_dpu_groups = 6
151
151
 
152
152
  single_decoder = LowBitLlamaMultiDecoderlayer(
153
153
  [1, input_len, num_heads * head_dim],
154
- input_layernorm_weights=[layer_norm_0] if layernorm_const else None,
155
- post_attn_layernorm_weights=[layer_norm_1] if layernorm_const else None,
154
+ input_layernorm_weights=[layer_norm_0] if const_parameter else None,
155
+ post_attn_layernorm_weights=[layer_norm_1] if const_parameter else None,
156
156
  cached_cos=cached_cos,
157
157
  cached_sin=cached_sin,
158
158
  num_heads=num_heads,
@@ -182,7 +182,7 @@ def convert_llama_layer(model, layer_idx, n_splits_linear, n_splits_down_proj,
182
182
  if mode == "decode":
183
183
  if hasattr(curr_layer.self_attn.rotary_emb, "cos_cached"):
184
184
  # llama-2-7B & llama-3-8B
185
- if layernorm_const:
185
+ if const_parameter:
186
186
  st_idx = 5
187
187
  else:
188
188
  input_lm_bin_file = os.path.join(weight_dir, f"model_{layer_idx}_input_3.bin")
@@ -192,7 +192,7 @@ def convert_llama_layer(model, layer_idx, n_splits_linear, n_splits_down_proj,
192
192
  st_idx = 7
193
193
  else:
194
194
  # llama-3.2-3B & llama-3.2-1B
195
- if layernorm_const:
195
+ if const_parameter:
196
196
  st_idx = 6
197
197
  else:
198
198
  input_lm_bin_file = os.path.join(weight_dir, f"model_{layer_idx}_input_4.bin")
@@ -223,7 +223,7 @@ def convert_llama_layer(model, layer_idx, n_splits_linear, n_splits_down_proj,
223
223
 
224
224
  def convert_fused_llama_layer(model, fused_layers, n_splits_linear, n_splits_down_proj,
225
225
  save_dir, weight_dir, transpose_value_cache, kv_len, group_size,
226
- layernorm_const, mode="decode",
226
+ const_parameter, mode="decode",
227
227
  keep_ir=False, compile_blob=True):
228
228
  num_heads = model.model.layers[0].self_attn.num_heads
229
229
  num_key_value_heads = model.model.layers[0].self_attn.num_key_value_heads
@@ -294,6 +294,10 @@ def convert_fused_llama_layer(model, fused_layers, n_splits_linear, n_splits_dow
294
294
  else: # FP16 Linear
295
295
  np_dtype = np.float16
296
296
 
297
+ if not const_parameter:
298
+ input_layer_norm_weights = None
299
+ post_attn_layernorm_weights = None
300
+
297
301
  fused_decoder = LowBitLlamaMultiDecoderlayer(
298
302
  [1, 1, num_heads * head_dim],
299
303
  input_layernorm_weights=input_layer_norm_weights,
@@ -301,7 +301,7 @@ def convert_lm_head_and_embedding(model, n_splits_linear, temp_dir, weight_dir,
301
301
 
302
302
  def convert_minicpm_layer(model, layer_idx, n_splits_linear, n_splits_down_proj,
303
303
  temp_dir, weight_dir, transpose_value_cache, kv_len, group_size,
304
- layernorm_const, mode="decode",
304
+ const_parameter, mode="decode",
305
305
  keep_ir=False, compile_blob=True):
306
306
  num_heads = model.model.layers[0].self_attn.num_heads
307
307
  num_key_value_heads = model.model.layers[0].self_attn.num_key_value_heads
@@ -333,12 +333,12 @@ def convert_minicpm_layer(model, layer_idx, n_splits_linear, n_splits_down_proj,
333
333
  else:
334
334
  input_len = kv_len
335
335
  decoder_name = "decoder_layer_prefill"
336
- layernorm_const = False
336
+ const_parameter = False
337
337
 
338
338
  single_decoder = LowBitMinicpmMultiDecoderlayer(
339
339
  [1, input_len, num_heads * head_dim],
340
- input_layernorm_weights=[layer_norm_0] if layernorm_const else None,
341
- post_attn_layernorm_weights=[layer_norm_1] if layernorm_const else None,
340
+ input_layernorm_weights=[layer_norm_0] if const_parameter else None,
341
+ post_attn_layernorm_weights=[layer_norm_1] if const_parameter else None,
342
342
  cached_cos=cached_cos,
343
343
  cached_sin=cached_sin,
344
344
  num_heads=num_heads,
@@ -364,7 +364,7 @@ def convert_minicpm_layer(model, layer_idx, n_splits_linear, n_splits_down_proj,
364
364
  os.remove(os.path.join(temp_dir, decoder_name + ".bin"))
365
365
 
366
366
  if mode == "decode":
367
- if layernorm_const:
367
+ if const_parameter:
368
368
  st_idx = 5
369
369
  else:
370
370
  input_lm_bin_file = os.path.join(weight_dir, f"model_{layer_idx}_input_3.bin")
@@ -394,7 +394,7 @@ def convert_minicpm_layer(model, layer_idx, n_splits_linear, n_splits_down_proj,
394
394
 
395
395
  def convert_fused_minicpm_layer(model, fused_layers, n_splits_linear, n_splits_down_proj,
396
396
  save_dir, weight_dir, transpose_value_cache, kv_len, group_size,
397
- layernorm_const, mode="decode",
397
+ const_parameter, mode="decode",
398
398
  keep_ir=False, compile_blob=True):
399
399
  num_heads = model.model.layers[0].self_attn.num_heads
400
400
  num_key_value_heads = model.model.layers[0].self_attn.num_key_value_heads
@@ -461,6 +461,10 @@ def convert_fused_minicpm_layer(model, fused_layers, n_splits_linear, n_splits_d
461
461
  else: # FP16 Linear
462
462
  np_dtype = np.float16
463
463
 
464
+ if not const_parameter:
465
+ input_layer_norm_weights = None
466
+ post_attn_layernorm_weights = None
467
+
464
468
  fused_decoder = LowBitMinicpmMultiDecoderlayer(
465
469
  [1, 1, num_heads * head_dim],
466
470
  input_layernorm_weights=input_layer_norm_weights,
@@ -117,7 +117,7 @@ def convert_lm_head_and_embedding(model, temp_dir, weight_dir,
117
117
 
118
118
  def convert_qwen_layer(model, layer_idx, n_splits_linear, n_splits_down_proj,
119
119
  temp_dir, weight_dir, transpose_value_cache, kv_len, group_size,
120
- layernorm_const, mode="decode",
120
+ const_parameter, mode="decode",
121
121
  keep_ir=False, compile_blob=True):
122
122
  num_heads = model.model.layers[0].self_attn.num_heads
123
123
  num_key_value_heads = model.model.layers[0].self_attn.num_key_value_heads
@@ -193,7 +193,7 @@ def convert_qwen_layer(model, layer_idx, n_splits_linear, n_splits_down_proj,
193
193
  # 0, 1, 2 are input_embed/attention_mask/position_id
194
194
  if mode == "decode":
195
195
  if hasattr(curr_layer.self_attn.rotary_emb, "cos_cached"):
196
- if layernorm_const:
196
+ if const_parameter:
197
197
  st_idx = 3
198
198
  else:
199
199
  input_lm_bin_file = os.path.join(weight_dir, f"model_{layer_idx}_input_3.bin")
@@ -203,7 +203,7 @@ def convert_qwen_layer(model, layer_idx, n_splits_linear, n_splits_down_proj,
203
203
  st_idx = 5
204
204
  else:
205
205
  # transformers >= 4.45.0
206
- if layernorm_const:
206
+ if const_parameter:
207
207
  st_idx = 4
208
208
  else:
209
209
  input_lm_bin_file = os.path.join(weight_dir, f"model_{layer_idx}_input_4.bin")
@@ -241,7 +241,7 @@ def convert_qwen_layer(model, layer_idx, n_splits_linear, n_splits_down_proj,
241
241
 
242
242
  def convert_fused_qwen_layer(model, fused_layers, n_splits_linear, n_splits_down_proj,
243
243
  save_dir, weight_dir, transpose_value_cache, kv_len, group_size,
244
- layernorm_const, mode="decode",
244
+ const_parameter, mode="decode",
245
245
  keep_ir=False, compile_blob=True):
246
246
  num_heads = model.model.layers[0].self_attn.num_heads
247
247
  num_key_value_heads = model.model.layers[0].self_attn.num_key_value_heads
@@ -325,6 +325,13 @@ def convert_fused_qwen_layer(model, fused_layers, n_splits_linear, n_splits_down
325
325
  else: # FP16 Linear
326
326
  np_dtype = np.float16
327
327
 
328
+ if not const_parameter:
329
+ input_layer_norm_weights = None
330
+ post_attn_layernorm_weights = None
331
+ q_biases = None
332
+ k_biases = None
333
+ v_biases = None
334
+
328
335
  fused_decoder = LowBitQwenMultiDecoderlayer(
329
336
  [1, 1, num_heads * head_dim],
330
337
  input_layernorm_weights=input_layer_norm_weights,
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.1
2
2
  Name: ipex-llm
3
- Version: 2.2.0b20250207
3
+ Version: 2.2.0b20250209
4
4
  Summary: Large Language Model Develop Toolkit
5
5
  Home-page: https://github.com/intel-analytics/ipex-llm
6
6
  Author: BigDL Authors
@@ -27,7 +27,7 @@ Requires-Dist: intel-openmp ; (platform_machine == "x86_64" or platform_machine
27
27
  Requires-Dist: torch ==2.1.2+cpu ; (platform_system == "Linux") and extra == 'all'
28
28
  Requires-Dist: torch ==2.1.2 ; (platform_system == "Windows") and extra == 'all'
29
29
  Provides-Extra: cpp
30
- Requires-Dist: bigdl-core-cpp ==2.6.0b20250207 ; extra == 'cpp'
30
+ Requires-Dist: bigdl-core-cpp ==2.6.0b20250209 ; extra == 'cpp'
31
31
  Requires-Dist: setuptools ; extra == 'cpp'
32
32
  Requires-Dist: onednn-devel ==2025.0.1 ; (platform_system == "Windows") and extra == 'cpp'
33
33
  Requires-Dist: onednn ==2025.0.1 ; (platform_system == "Windows") and extra == 'cpp'
@@ -60,7 +60,7 @@ Requires-Dist: transformers ==4.40.0 ; extra == 'npu'
60
60
  Requires-Dist: intel-openmp ; (platform_machine == "x86_64" or platform_machine == "AMD64") and extra == 'npu'
61
61
  Requires-Dist: torch ==2.1.2+cpu ; (platform_system == "Linux") and extra == 'npu'
62
62
  Requires-Dist: torch ==2.1.2 ; (platform_system == "Windows") and extra == 'npu'
63
- Requires-Dist: bigdl-core-npu ==2.6.0b20250207 ; (platform_system == "Windows") and extra == 'npu'
63
+ Requires-Dist: bigdl-core-npu ==2.6.0b20250209 ; (platform_system == "Windows") and extra == 'npu'
64
64
  Provides-Extra: serving
65
65
  Requires-Dist: py-cpuinfo ; extra == 'serving'
66
66
  Requires-Dist: fschat[model_worker,webui] ==0.2.36 ; extra == 'serving'
@@ -80,9 +80,9 @@ Requires-Dist: setuptools <70.0.0 ; extra == 'xpu'
80
80
  Requires-Dist: torch ==2.1.0a0 ; extra == 'xpu'
81
81
  Requires-Dist: torchvision ==0.16.0a0 ; extra == 'xpu'
82
82
  Requires-Dist: intel-extension-for-pytorch ==2.1.10+xpu ; extra == 'xpu'
83
- Requires-Dist: bigdl-core-xe-21 ==2.6.0b20250207 ; extra == 'xpu'
84
- Requires-Dist: bigdl-core-xe-batch-21 ==2.6.0b20250207 ; extra == 'xpu'
85
- Requires-Dist: bigdl-core-xe-addons-21 ==2.6.0b20250207 ; extra == 'xpu'
83
+ Requires-Dist: bigdl-core-xe-21 ==2.6.0b20250209 ; extra == 'xpu'
84
+ Requires-Dist: bigdl-core-xe-batch-21 ==2.6.0b20250209 ; extra == 'xpu'
85
+ Requires-Dist: bigdl-core-xe-addons-21 ==2.6.0b20250209 ; extra == 'xpu'
86
86
  Provides-Extra: xpu-2-1
87
87
  Requires-Dist: py-cpuinfo ; extra == 'xpu-2-1'
88
88
  Requires-Dist: protobuf ; extra == 'xpu-2-1'
@@ -97,9 +97,9 @@ Requires-Dist: setuptools <70.0.0 ; extra == 'xpu-2-1'
97
97
  Requires-Dist: torch ==2.1.0a0 ; extra == 'xpu-2-1'
98
98
  Requires-Dist: torchvision ==0.16.0a0 ; extra == 'xpu-2-1'
99
99
  Requires-Dist: intel-extension-for-pytorch ==2.1.10+xpu ; extra == 'xpu-2-1'
100
- Requires-Dist: bigdl-core-xe-21 ==2.6.0b20250207 ; extra == 'xpu-2-1'
101
- Requires-Dist: bigdl-core-xe-batch-21 ==2.6.0b20250207 ; extra == 'xpu-2-1'
102
- Requires-Dist: bigdl-core-xe-addons-21 ==2.6.0b20250207 ; extra == 'xpu-2-1'
100
+ Requires-Dist: bigdl-core-xe-21 ==2.6.0b20250209 ; extra == 'xpu-2-1'
101
+ Requires-Dist: bigdl-core-xe-batch-21 ==2.6.0b20250209 ; extra == 'xpu-2-1'
102
+ Requires-Dist: bigdl-core-xe-addons-21 ==2.6.0b20250209 ; extra == 'xpu-2-1'
103
103
  Requires-Dist: intel-openmp ; (platform_machine == "x86_64" or platform_machine == "AMD64") and extra == 'xpu-2-1'
104
104
  Requires-Dist: dpcpp-cpp-rt ==2024.0.2 ; (platform_system == "Windows") and extra == 'xpu-2-1'
105
105
  Requires-Dist: mkl-dpcpp ==2024.0.0 ; (platform_system == "Windows") and extra == 'xpu-2-1'
@@ -117,7 +117,7 @@ Requires-Dist: setuptools ; extra == 'xpu-2-6'
117
117
  Requires-Dist: torch ==2.6.0+xpu ; extra == 'xpu-2-6'
118
118
  Requires-Dist: torchvision ==0.21.0+xpu ; extra == 'xpu-2-6'
119
119
  Requires-Dist: torchaudio ==2.6.0+xpu ; extra == 'xpu-2-6'
120
- Requires-Dist: bigdl-core-xe-all ==2.6.0b20250207 ; extra == 'xpu-2-6'
120
+ Requires-Dist: bigdl-core-xe-all ==2.6.0b20250209 ; extra == 'xpu-2-6'
121
121
  Requires-Dist: onednn-devel ==2025.0.1 ; extra == 'xpu-2-6'
122
122
  Requires-Dist: onednn ==2025.0.1 ; extra == 'xpu-2-6'
123
123
  Requires-Dist: dpcpp-cpp-rt ==2025.0.2 ; extra == 'xpu-2-6'
@@ -133,9 +133,9 @@ Requires-Dist: tokenizers ==0.15.2 ; extra == 'xpu-arc'
133
133
  Requires-Dist: accelerate ==0.23.0 ; extra == 'xpu-arc'
134
134
  Requires-Dist: tabulate ; extra == 'xpu-arc'
135
135
  Requires-Dist: setuptools ; extra == 'xpu-arc'
136
- Requires-Dist: bigdl-core-xe-23 ==2.6.0b20250207 ; extra == 'xpu-arc'
137
- Requires-Dist: bigdl-core-xe-batch-23 ==2.6.0b20250207 ; extra == 'xpu-arc'
138
- Requires-Dist: bigdl-core-xe-addons-23 ==2.6.0b20250207 ; extra == 'xpu-arc'
136
+ Requires-Dist: bigdl-core-xe-23 ==2.6.0b20250209 ; extra == 'xpu-arc'
137
+ Requires-Dist: bigdl-core-xe-batch-23 ==2.6.0b20250209 ; extra == 'xpu-arc'
138
+ Requires-Dist: bigdl-core-xe-addons-23 ==2.6.0b20250209 ; extra == 'xpu-arc'
139
139
  Requires-Dist: intel-openmp ; (platform_machine == "x86_64" or platform_machine == "AMD64") and extra == 'xpu-arc'
140
140
  Requires-Dist: torch ==2.3.1+cxx11.abi ; (platform_system == "Linux") and extra == 'xpu-arc'
141
141
  Requires-Dist: torchvision ==0.18.1+cxx11.abi ; (platform_system == "Linux") and extra == 'xpu-arc'
@@ -156,9 +156,9 @@ Requires-Dist: tokenizers ==0.15.2 ; extra == 'xpu-arl'
156
156
  Requires-Dist: accelerate ==0.23.0 ; extra == 'xpu-arl'
157
157
  Requires-Dist: tabulate ; extra == 'xpu-arl'
158
158
  Requires-Dist: setuptools ; extra == 'xpu-arl'
159
- Requires-Dist: bigdl-core-xe-23 ==2.6.0b20250207 ; extra == 'xpu-arl'
160
- Requires-Dist: bigdl-core-xe-batch-23 ==2.6.0b20250207 ; extra == 'xpu-arl'
161
- Requires-Dist: bigdl-core-xe-addons-23 ==2.6.0b20250207 ; extra == 'xpu-arl'
159
+ Requires-Dist: bigdl-core-xe-23 ==2.6.0b20250209 ; extra == 'xpu-arl'
160
+ Requires-Dist: bigdl-core-xe-batch-23 ==2.6.0b20250209 ; extra == 'xpu-arl'
161
+ Requires-Dist: bigdl-core-xe-addons-23 ==2.6.0b20250209 ; extra == 'xpu-arl'
162
162
  Requires-Dist: intel-openmp ; (platform_machine == "x86_64" or platform_machine == "AMD64") and extra == 'xpu-arl'
163
163
  Requires-Dist: torch ==2.3.1+cxx11.abi ; (platform_system == "Linux") and extra == 'xpu-arl'
164
164
  Requires-Dist: torchvision ==0.18.1+cxx11.abi ; (platform_system == "Linux") and extra == 'xpu-arl'
@@ -179,9 +179,9 @@ Requires-Dist: tokenizers ==0.15.2 ; extra == 'xpu-lnl'
179
179
  Requires-Dist: accelerate ==0.23.0 ; extra == 'xpu-lnl'
180
180
  Requires-Dist: tabulate ; extra == 'xpu-lnl'
181
181
  Requires-Dist: setuptools ; extra == 'xpu-lnl'
182
- Requires-Dist: bigdl-core-xe-23 ==2.6.0b20250207 ; extra == 'xpu-lnl'
183
- Requires-Dist: bigdl-core-xe-batch-23 ==2.6.0b20250207 ; extra == 'xpu-lnl'
184
- Requires-Dist: bigdl-core-xe-addons-23 ==2.6.0b20250207 ; extra == 'xpu-lnl'
182
+ Requires-Dist: bigdl-core-xe-23 ==2.6.0b20250209 ; extra == 'xpu-lnl'
183
+ Requires-Dist: bigdl-core-xe-batch-23 ==2.6.0b20250209 ; extra == 'xpu-lnl'
184
+ Requires-Dist: bigdl-core-xe-addons-23 ==2.6.0b20250209 ; extra == 'xpu-lnl'
185
185
  Requires-Dist: intel-openmp ; (platform_machine == "x86_64" or platform_machine == "AMD64") and extra == 'xpu-lnl'
186
186
  Requires-Dist: torch ==2.3.1+cxx11.abi ; (platform_system == "Linux") and extra == 'xpu-lnl'
187
187
  Requires-Dist: torchvision ==0.18.1+cxx11.abi ; (platform_system == "Linux") and extra == 'xpu-lnl'
@@ -41,35 +41,35 @@ ipex_llm/langchain/llms/transformerspipelinellm.py,sha256=vm522YPPwWxxAPVvQBtxRf
41
41
  ipex_llm/langchain/vllm/__init__.py,sha256=T-EbRT6GJ_8RCu-iLmSzcftOimXSPQf2d5X72AUAy2Y,874
42
42
  ipex_llm/langchain/vllm/vllm.py,sha256=6dxc-ZISZQrJilEa_HA827l75Dv9rcHpY_G6FdJ8BVs,7793
43
43
  ipex_llm/libs/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
44
- ipex_llm/libs/bloom-api.dll,sha256=R0zcv1M0D8y8inrrCUO2xCSTRb0IChVyLa6YQo9zne8,36352
45
- ipex_llm/libs/bloom.dll,sha256=eBzUhLMeOAb9InMPp9_KC5VhJC9F-YKNlJn6HyfOAb0,507904
46
- ipex_llm/libs/gptneox-api.dll,sha256=9_mq8IntnMiU7-_kDxiLojnEc1nu3rrxZZAIes7Nd4k,24576
47
- ipex_llm/libs/gptneox.dll,sha256=kR3dyhN7tNUxVIWoqudW57V0MIGqr-Mxkmw7kwR8VWs,568320
48
- ipex_llm/libs/libbloom_avx.dll,sha256=0iRHd_QIzEG_NI0RkFKmCX_HG-3E21t33sxrmbCpQwo,536576
49
- ipex_llm/libs/libbloom_vnni.dll,sha256=dL1TzKzoki8KDsCmka6QfzBH24T06WokxT3F4M5a3lk,508416
50
- ipex_llm/libs/libgptneox_avx.dll,sha256=SPi9xXxB5jLp63CfgVhmMA-rCoyCCji2nuWz-rv5y3E,596992
51
- ipex_llm/libs/libgptneox_vnni.dll,sha256=NV3xykgHJGxNTDWAA_yhwlBG_dbHPX0__5s9uHCPmfc,568832
52
- ipex_llm/libs/libllama_avx.dll,sha256=EbZ-lpHHtM-zS9aiuDU8cBVueVAtRi3UqerARH41qC8,591360
53
- ipex_llm/libs/libllama_vnni.dll,sha256=67XqNSyXI1nuaA1-xcSOhYIHZaH7aZBvwMetGpTriIk,563200
54
- ipex_llm/libs/libstarcoder_avx.dll,sha256=kAqXHfoZfmyqIbNbGpzQjXNCMz9pkG5KVRECzEDEwhM,627712
55
- ipex_llm/libs/libstarcoder_vnni.dll,sha256=c02B9jpBvST282jRXJtkRwJKkZnzhkz5MLdFfjH9T8I,599552
56
- ipex_llm/libs/llama-api.dll,sha256=SA2frHXocsnAN9z3LZfWT_FjY1waSMS26bHM6ot_07c,25600
57
- ipex_llm/libs/llama.dll,sha256=Ls7CKimo2SNy-uJt6lLz16yz1O9E358dRgP8E0svF98,562688
58
- ipex_llm/libs/main-bloom.exe,sha256=-HCik31DRGrozp_Uy420O1l-Sk_7e9V1bjg4XaLPFvA,103424
59
- ipex_llm/libs/main-gptneox.exe,sha256=pqxQCGKBrsoDtvuKhCwk6uOAGt4GGvzoAdQbHB9qrFI,98816
60
- ipex_llm/libs/main-llama.exe,sha256=sPKj3WRmI97jyNhO4A5Lz4eF-tsZZojv6z2VaNzAKAU,99840
61
- ipex_llm/libs/main-starcoder.exe,sha256=7vyW8v2qO1J_fkRq4uzk44UsV4AhDGmcWHUwMiez8WY,157696
62
- ipex_llm/libs/pipeline.dll,sha256=vHFtLO6vUZQVwtzXICv1Q5Ork32Dw5Ipqa8pbr6TtmM,72704
63
- ipex_llm/libs/quantize-bloom.exe,sha256=8rUxXU7Z4AZ7mFHI3sGpwGG18_DkapunwTzzUTjCCbo,126464
64
- ipex_llm/libs/quantize-bloom_vnni.exe,sha256=gA9kKUkmFOIzT_CmFFvG-fG6d6bZuEWSTeyPvhCsDLs,128000
65
- ipex_llm/libs/quantize-gptneox.exe,sha256=YsrviyLjQU9uxD1p6TfdBAPXG72-QzZFGpt7lDmK_gM,104448
66
- ipex_llm/libs/quantize-gptneox_vnni.exe,sha256=mYmUHza3rZztjTogXv9FxuIM20z0gHfyjbF6b6ADEK0,104960
67
- ipex_llm/libs/quantize-llama.exe,sha256=h-7nbo0uIswViTdxf_vHmE3sZdnQ79dDMUHzqjtyMKs,110080
68
- ipex_llm/libs/quantize-llama_vnni.exe,sha256=OEPzGySIaa-O9IhPY-u2slHnhMDzp6mL8e_Qr2WUgKc,110592
69
- ipex_llm/libs/quantize-starcoder.exe,sha256=4U-jT0MC4Iz4kP_6WpKkMOSk_hTlqAwgSVlGLGa-imA,127488
70
- ipex_llm/libs/quantize-starcoder_vnni.exe,sha256=Xc4jW9KH_RNSfJIYJinDRIx-BbWmqxx4h-kc9jowZpk,128512
71
- ipex_llm/libs/starcoder-api.dll,sha256=2lF73SE1AyICwtpQSZUfkiAbE1WJQ5gEbikL1Lsvzhg,21504
72
- ipex_llm/libs/starcoder.dll,sha256=NBh51OQS90ppaqMAJAFCa6HptcUnnPx7tUL1J95QwMk,599040
44
+ ipex_llm/libs/bloom-api.dll,sha256=U0-Bvu1Hx3YpOrmi7FYbHzyWPh7PTcq7ccJKvi8iPaY,36352
45
+ ipex_llm/libs/bloom.dll,sha256=ILK_LrDzq7d3B3QzvuWhEkyvrupN84z2epVTyO5PqTw,507904
46
+ ipex_llm/libs/gptneox-api.dll,sha256=4_aUAtRpJjp91ojQsJwQWjtaaotdK80JUmwe24fyWbs,24576
47
+ ipex_llm/libs/gptneox.dll,sha256=984aCoROSwrqlfmWpvYjW6rTGR8jak__GcQ-litzfMc,568320
48
+ ipex_llm/libs/libbloom_avx.dll,sha256=N_Vc658b8oW_pNh7IegV7EoHVWkgplhUEQByekuFXqU,536576
49
+ ipex_llm/libs/libbloom_vnni.dll,sha256=o02kqjBOT6o4iyRwJXZIO_WBcilfRtRjR9grrftAjss,508416
50
+ ipex_llm/libs/libgptneox_avx.dll,sha256=OFSy6yHu6r0AylVjXBFjONqhLShQc2seYTxZPxDdsTc,596992
51
+ ipex_llm/libs/libgptneox_vnni.dll,sha256=YCjfvELLTSk7e6xxi1BBrDb7X8Hn1-6O5A7zbkqtOco,568832
52
+ ipex_llm/libs/libllama_avx.dll,sha256=XZJykjVJWQeABU7Hid_Q6eC9FQnlc9rcgmHj01X32to,591360
53
+ ipex_llm/libs/libllama_vnni.dll,sha256=XKzC0BeBR5bgJJrU0PAMITveO3IbPnfAYkVZd_xi3Q0,563200
54
+ ipex_llm/libs/libstarcoder_avx.dll,sha256=0tmhjR_F9jpbFj20m-Dl3zOtbuWQrcSm6VgHX4zb-jU,627712
55
+ ipex_llm/libs/libstarcoder_vnni.dll,sha256=_BE49_uj7VSU7HvxzAq-AmPSR8VccQv4nR_rRDPy_aY,599552
56
+ ipex_llm/libs/llama-api.dll,sha256=5rmig5u2sTHNXuxt70eP_JiKnOy4vy2YcJ3uBHBpvBc,25600
57
+ ipex_llm/libs/llama.dll,sha256=Dkg70FUB9zUp9MGi0cRZQlAVr5BesmaqF9oAQxa6oTg,562688
58
+ ipex_llm/libs/main-bloom.exe,sha256=hz7H1LCpHeucnGEVvSErvTbQftbqbOarXj_gSWySqSo,103424
59
+ ipex_llm/libs/main-gptneox.exe,sha256=513Le9X5XFQpq6-SsVL1ivmoaIuJIzruO-vycciSjVI,98816
60
+ ipex_llm/libs/main-llama.exe,sha256=aavlQia_sz2RRI7yUjKWLUITQ1uDpSaSldVNm7jpj1g,99840
61
+ ipex_llm/libs/main-starcoder.exe,sha256=caoMY-ajVNLmARZtyehbGN-8hjGntm2HUtVisGAneVc,157696
62
+ ipex_llm/libs/pipeline.dll,sha256=QdP2K6m0ewc19729A0bZnVB_F6DhT78CDKyZxOGs7-I,73216
63
+ ipex_llm/libs/quantize-bloom.exe,sha256=mUcTDN9q00jgtEE4r_RW63CfDF0s9uA-lf4OF65NVoY,126464
64
+ ipex_llm/libs/quantize-bloom_vnni.exe,sha256=bpSywuXfBy3u7SyxQjaErj861P4BxZ0u0_izGuXi92c,128000
65
+ ipex_llm/libs/quantize-gptneox.exe,sha256=AsDSisOhNsiFixDbY6fwjR_Q7OmsoIKU9HvSBVkiDMQ,104448
66
+ ipex_llm/libs/quantize-gptneox_vnni.exe,sha256=K53Aq2y_WeVxcHCoIcYU5IqNXqABEZLHTbF_xLGXRV8,104960
67
+ ipex_llm/libs/quantize-llama.exe,sha256=RitAwRaB321vPwl0tYx8XIlv4IYoLcM3JK7svhP14Gw,110080
68
+ ipex_llm/libs/quantize-llama_vnni.exe,sha256=kdVy8SUimLmyeKTcLR9jIEYkvqgO2tl0RIN-i47X7ls,110592
69
+ ipex_llm/libs/quantize-starcoder.exe,sha256=JjlJnC9tJezSrv9RwU1n7XN_ZJKlzM5qNTSfnCSD13Y,127488
70
+ ipex_llm/libs/quantize-starcoder_vnni.exe,sha256=D9tnS-eqS8QS6WfmXaJ3U0LJ-mo9wPfRnGM0sQkeTHY,128512
71
+ ipex_llm/libs/starcoder-api.dll,sha256=eqAZM8d_VTuk1ewVRAyxw6BDprvWCevMP69euUswEHs,21504
72
+ ipex_llm/libs/starcoder.dll,sha256=rUf75bDnOps6nckM9OP-ecCDDnflPrM3OW2l1jlmxso,599040
73
73
  ipex_llm/llamaindex/__init__.py,sha256=T-EbRT6GJ_8RCu-iLmSzcftOimXSPQf2d5X72AUAy2Y,874
74
74
  ipex_llm/llamaindex/llms/__init__.py,sha256=KP1lEdGqDuxPoxL1ZSH25Pm2kKMPJBWUTLR0ckSLMIU,1139
75
75
  ipex_llm/llamaindex/llms/bigdlllm.py,sha256=FQBzq1KOjfc6uofTXAha3O7TqpJkNfOFepXQmOVlbnI,26314
@@ -208,11 +208,11 @@ ipex_llm/transformers/npu_models/xlm_mp.py,sha256=sj8OVun8xJprM7ZJp0XzWa55rqlSIz
208
208
  ipex_llm/transformers/npu_pipeline_model/__init__.py,sha256=b2IXvVqQ5cItki021h8s3ymW12RPu8QNPprq4Mn3bDM,586
209
209
  ipex_llm/transformers/npu_pipeline_model/baichuan.py,sha256=ICxRzFQ4OIANDkkVi2_4xOeQXmfFXYMx3H52KuE1xR4,6208
210
210
  ipex_llm/transformers/npu_pipeline_model/common.py,sha256=faooJmM75qnVyZYuQLx9gJpVlotcVF4qXRCnOrknfk4,14776
211
- ipex_llm/transformers/npu_pipeline_model/convert_pipeline.py,sha256=_l4RFmyBMbREo8vzKpHXAMtE202JVQ41Y2lPg1qCOMI,29846
212
- ipex_llm/transformers/npu_pipeline_model/llama.py,sha256=j2sipfFSrzV2VgLKPOClMHwWIDXqDsL1jIQJK25hneo,14397
213
- ipex_llm/transformers/npu_pipeline_model/minicpm.py,sha256=H7j_UaHj-IwEBriQ-bunle0-8s2NmvqnL9eYuixnmFc,21398
211
+ ipex_llm/transformers/npu_pipeline_model/convert_pipeline.py,sha256=IlvaZC9pi_ZJTWuO2dMSgCkc3V909lVhpAfktTEfSLI,29894
212
+ ipex_llm/transformers/npu_pipeline_model/llama.py,sha256=ISJ84zj0Ce2f9vdgmoGfdxQz2LRaUMlZCEM3MV2VpoQ,14521
213
+ ipex_llm/transformers/npu_pipeline_model/minicpm.py,sha256=Q-rUzXBlx5Ns3xemi7H6t8dnzu1q4e-MhUpJMzJmBRU,21522
214
214
  ipex_llm/transformers/npu_pipeline_model/pipeline_cpp.py,sha256=JNmodAMg_NQvDILug3E_fGXEh6cd3wsj4bvAzcd-vaU,2749
215
- ipex_llm/transformers/npu_pipeline_model/qwen.py,sha256=6MNtCL1CXoR19B4tKZSgv2e5gtma9bqDG7DOYMCnPt0,16013
215
+ ipex_llm/transformers/npu_pipeline_model/qwen.py,sha256=WEMUdGZH3INyJm-1Hfv3o41BiP037n2ftS4qPM0jaiE,16221
216
216
  ipex_llm/utils/__init__.py,sha256=LlUgrD03rfw4iY8zWPtHH6p65Gw76waVOLHaqagETw0,1425
217
217
  ipex_llm/utils/benchmark_util_4_29.py,sha256=OU1W1quiaiJGsg1pd3HM9O6PmVSaPA0HHE7R8hNTfmQ,258653
218
218
  ipex_llm/utils/benchmark_util_4_42.py,sha256=HEiClCgKDp_T64HH8ulSTly8dvt6UwPDYZfrPVYvXcc,225383
@@ -248,11 +248,11 @@ ipex_llm/vllm/xpu/engine/__init__.py,sha256=pY_CpyuZd72fr6s32ejeKHKFW0K4vUU2rzZj
248
248
  ipex_llm/vllm/xpu/engine/engine.py,sha256=k4-D27WS_Gk3mA--w3HWAjPjb4Aiu043MVPi0ZoAUBc,5984
249
249
  ipex_llm/vllm/xpu/entrypoints/openai/api_server.py,sha256=GshTZFB8e4PWvqckfbmTOU6b0oLkNn7A-vzLuG9--j8,21544
250
250
  ipex_llm/vllm/xpu/entrypoints/openai/cli_args.py,sha256=2rENA2ucynMaIjiZBEh2ez1o5vR32GaP514t39CD7KM,8676
251
- ipex_llm-2.2.0b20250207.data/scripts/ipex-llm-init.bat,sha256=HPtCYuDYwEatq7dAwOvdfVcHYCpAVdbj75K1qh0vQek,2578
252
- ipex_llm-2.2.0b20250207.data/scripts/llm-chat.ps1,sha256=6qrs-hGVAV8IKh7Jx8nq_XrnZcjd7qGU5wndArM7Yag,2769
253
- ipex_llm-2.2.0b20250207.data/scripts/llm-cli.ps1,sha256=3qBtTLs_EjYDnM8YyCpJhzLnGCKTEGssu9UNqfkjVXs,3009
254
- ipex_llm-2.2.0b20250207.dist-info/METADATA,sha256=d1hx5hE5Xeb3lHGWqeF35SK9GZOX6syXJ_Syu5b35IU,12369
255
- ipex_llm-2.2.0b20250207.dist-info/WHEEL,sha256=6iYPr8vTHsyDK75jr9X0V3I9wPSVmtwr_8fdATBciGk,98
256
- ipex_llm-2.2.0b20250207.dist-info/entry_points.txt,sha256=TiUyBB2MRmfF3ko-pyAEzqeBCRnyhu27bNOAsWPp3e8,61
257
- ipex_llm-2.2.0b20250207.dist-info/top_level.txt,sha256=CGCMHM-SyqUabU4h8RqJ2KTYckQUO3LvIWwmUQ6Qbzw,9
258
- ipex_llm-2.2.0b20250207.dist-info/RECORD,,
251
+ ipex_llm-2.2.0b20250209.data/scripts/ipex-llm-init.bat,sha256=HPtCYuDYwEatq7dAwOvdfVcHYCpAVdbj75K1qh0vQek,2578
252
+ ipex_llm-2.2.0b20250209.data/scripts/llm-chat.ps1,sha256=6qrs-hGVAV8IKh7Jx8nq_XrnZcjd7qGU5wndArM7Yag,2769
253
+ ipex_llm-2.2.0b20250209.data/scripts/llm-cli.ps1,sha256=3qBtTLs_EjYDnM8YyCpJhzLnGCKTEGssu9UNqfkjVXs,3009
254
+ ipex_llm-2.2.0b20250209.dist-info/METADATA,sha256=KZwqL3P6WfVX5YAbcMk7oSjq7fwR221_ZHLnNz1e6xI,12369
255
+ ipex_llm-2.2.0b20250209.dist-info/WHEEL,sha256=6iYPr8vTHsyDK75jr9X0V3I9wPSVmtwr_8fdATBciGk,98
256
+ ipex_llm-2.2.0b20250209.dist-info/entry_points.txt,sha256=TiUyBB2MRmfF3ko-pyAEzqeBCRnyhu27bNOAsWPp3e8,61
257
+ ipex_llm-2.2.0b20250209.dist-info/top_level.txt,sha256=CGCMHM-SyqUabU4h8RqJ2KTYckQUO3LvIWwmUQ6Qbzw,9
258
+ ipex_llm-2.2.0b20250209.dist-info/RECORD,,