ipex-llm 2.2.0b20250207__py3-none-win_amd64.whl → 2.2.0b20250208__py3-none-win_amd64.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- ipex_llm/libs/bloom-api.dll +0 -0
- ipex_llm/libs/bloom.dll +0 -0
- ipex_llm/libs/gptneox-api.dll +0 -0
- ipex_llm/libs/gptneox.dll +0 -0
- ipex_llm/libs/libbloom_avx.dll +0 -0
- ipex_llm/libs/libbloom_vnni.dll +0 -0
- ipex_llm/libs/libgptneox_avx.dll +0 -0
- ipex_llm/libs/libgptneox_vnni.dll +0 -0
- ipex_llm/libs/libllama_avx.dll +0 -0
- ipex_llm/libs/libllama_vnni.dll +0 -0
- ipex_llm/libs/libstarcoder_avx.dll +0 -0
- ipex_llm/libs/libstarcoder_vnni.dll +0 -0
- ipex_llm/libs/llama-api.dll +0 -0
- ipex_llm/libs/llama.dll +0 -0
- ipex_llm/libs/main-bloom.exe +0 -0
- ipex_llm/libs/main-gptneox.exe +0 -0
- ipex_llm/libs/main-llama.exe +0 -0
- ipex_llm/libs/main-starcoder.exe +0 -0
- ipex_llm/libs/pipeline.dll +0 -0
- ipex_llm/libs/quantize-bloom.exe +0 -0
- ipex_llm/libs/quantize-bloom_vnni.exe +0 -0
- ipex_llm/libs/quantize-gptneox.exe +0 -0
- ipex_llm/libs/quantize-gptneox_vnni.exe +0 -0
- ipex_llm/libs/quantize-llama.exe +0 -0
- ipex_llm/libs/quantize-llama_vnni.exe +0 -0
- ipex_llm/libs/quantize-starcoder.exe +0 -0
- ipex_llm/libs/quantize-starcoder_vnni.exe +0 -0
- ipex_llm/libs/starcoder-api.dll +0 -0
- ipex_llm/libs/starcoder.dll +0 -0
- ipex_llm/transformers/npu_pipeline_model/convert_pipeline.py +23 -21
- ipex_llm/transformers/npu_pipeline_model/llama.py +11 -7
- ipex_llm/transformers/npu_pipeline_model/minicpm.py +10 -6
- ipex_llm/transformers/npu_pipeline_model/qwen.py +11 -4
- {ipex_llm-2.2.0b20250207.dist-info → ipex_llm-2.2.0b20250208.dist-info}/METADATA +19 -19
- {ipex_llm-2.2.0b20250207.dist-info → ipex_llm-2.2.0b20250208.dist-info}/RECORD +41 -41
- {ipex_llm-2.2.0b20250207.data → ipex_llm-2.2.0b20250208.data}/scripts/ipex-llm-init.bat +0 -0
- {ipex_llm-2.2.0b20250207.data → ipex_llm-2.2.0b20250208.data}/scripts/llm-chat.ps1 +0 -0
- {ipex_llm-2.2.0b20250207.data → ipex_llm-2.2.0b20250208.data}/scripts/llm-cli.ps1 +0 -0
- {ipex_llm-2.2.0b20250207.dist-info → ipex_llm-2.2.0b20250208.dist-info}/WHEEL +0 -0
- {ipex_llm-2.2.0b20250207.dist-info → ipex_llm-2.2.0b20250208.dist-info}/entry_points.txt +0 -0
- {ipex_llm-2.2.0b20250207.dist-info → ipex_llm-2.2.0b20250208.dist-info}/top_level.txt +0 -0
ipex_llm/libs/bloom-api.dll
CHANGED
Binary file
|
ipex_llm/libs/bloom.dll
CHANGED
Binary file
|
ipex_llm/libs/gptneox-api.dll
CHANGED
Binary file
|
ipex_llm/libs/gptneox.dll
CHANGED
Binary file
|
ipex_llm/libs/libbloom_avx.dll
CHANGED
Binary file
|
ipex_llm/libs/libbloom_vnni.dll
CHANGED
Binary file
|
ipex_llm/libs/libgptneox_avx.dll
CHANGED
Binary file
|
Binary file
|
ipex_llm/libs/libllama_avx.dll
CHANGED
Binary file
|
ipex_llm/libs/libllama_vnni.dll
CHANGED
Binary file
|
Binary file
|
Binary file
|
ipex_llm/libs/llama-api.dll
CHANGED
Binary file
|
ipex_llm/libs/llama.dll
CHANGED
Binary file
|
ipex_llm/libs/main-bloom.exe
CHANGED
Binary file
|
ipex_llm/libs/main-gptneox.exe
CHANGED
Binary file
|
ipex_llm/libs/main-llama.exe
CHANGED
Binary file
|
ipex_llm/libs/main-starcoder.exe
CHANGED
Binary file
|
ipex_llm/libs/pipeline.dll
CHANGED
Binary file
|
ipex_llm/libs/quantize-bloom.exe
CHANGED
Binary file
|
Binary file
|
Binary file
|
Binary file
|
ipex_llm/libs/quantize-llama.exe
CHANGED
Binary file
|
Binary file
|
Binary file
|
Binary file
|
ipex_llm/libs/starcoder-api.dll
CHANGED
Binary file
|
ipex_llm/libs/starcoder.dll
CHANGED
Binary file
|
@@ -201,7 +201,7 @@ def convert_llm(model: torch.nn.Module,
|
|
201
201
|
keep_ir: bool=False,
|
202
202
|
compile_blob: bool=True):
|
203
203
|
# whether to set layernorm weight as const
|
204
|
-
|
204
|
+
const_parameter = os.environ.get("IPEX_LLM_NPU_CONST_PARAMETER", "1") == "1"
|
205
205
|
if group_size == 0:
|
206
206
|
n_splits_linear = 1
|
207
207
|
if qtype in ["sym_int8_rtn", "asym_int4_rtn"]:
|
@@ -240,7 +240,7 @@ def convert_llm(model: torch.nn.Module,
|
|
240
240
|
for layer_idx in range(0, layer_num):
|
241
241
|
param_list.append((model, layer_idx, n_splits_linear, n_splits_down_proj,
|
242
242
|
temp_dir, weight_dir, transpose_value_cache, kv_len, group_size,
|
243
|
-
|
243
|
+
const_parameter))
|
244
244
|
with Pool() as pool:
|
245
245
|
result = pool.starmap(convert_llama_layer, param_list)
|
246
246
|
|
@@ -267,7 +267,7 @@ def convert_llm(model: torch.nn.Module,
|
|
267
267
|
res = InitLLMPipeline(model_type, kv_len, model.num_head, model.head_dim, layer_num,
|
268
268
|
model.vocab_size, weight_dir, "model",
|
269
269
|
first_blob_path, last_blob_path,
|
270
|
-
os.path.join(temp_dir, "decoder_layer"),
|
270
|
+
os.path.join(temp_dir, "decoder_layer"), const_parameter)
|
271
271
|
except:
|
272
272
|
invalidInputError(False,
|
273
273
|
"False to InitLLMPipeline.")
|
@@ -284,7 +284,7 @@ def convert_llm(model: torch.nn.Module,
|
|
284
284
|
for layer_idx in range(0, layer_num):
|
285
285
|
param_list.append((model, layer_idx, n_splits_linear, n_splits_down_proj,
|
286
286
|
temp_dir, weight_dir, transpose_value_cache, kv_len, group_size,
|
287
|
-
|
287
|
+
const_parameter))
|
288
288
|
with Pool() as pool:
|
289
289
|
result = pool.starmap(convert_baichuan_layer, param_list)
|
290
290
|
|
@@ -308,7 +308,7 @@ def convert_llm(model: torch.nn.Module,
|
|
308
308
|
res = InitLLMPipeline("baichuan", kv_len, model.num_head, model.head_dim, layer_num,
|
309
309
|
model.vocab_size, weight_dir, "model",
|
310
310
|
first_blob_path, last_blob_path,
|
311
|
-
os.path.join(temp_dir, "decoder_layer"),
|
311
|
+
os.path.join(temp_dir, "decoder_layer"), const_parameter)
|
312
312
|
except:
|
313
313
|
invalidInputError(False,
|
314
314
|
"False to InitLLMPipeline.")
|
@@ -325,7 +325,7 @@ def convert_llm(model: torch.nn.Module,
|
|
325
325
|
for layer_idx in range(0, layer_num):
|
326
326
|
param_list.append((model, layer_idx, n_splits_linear, n_splits_down_proj,
|
327
327
|
temp_dir, weight_dir, transpose_value_cache, kv_len, group_size,
|
328
|
-
|
328
|
+
const_parameter))
|
329
329
|
with Pool() as pool:
|
330
330
|
result = pool.starmap(convert_minicpm_layer, param_list)
|
331
331
|
|
@@ -348,12 +348,12 @@ def convert_llm(model: torch.nn.Module,
|
|
348
348
|
res = InitLLMPipeline("minicpm", kv_len, model.num_head, model.head_dim, layer_num,
|
349
349
|
model.vocab_size, weight_dir, "model",
|
350
350
|
first_blob_path, last_blob_path,
|
351
|
-
os.path.join(temp_dir, "decoder_layer"),
|
351
|
+
os.path.join(temp_dir, "decoder_layer"), const_parameter)
|
352
352
|
except:
|
353
353
|
invalidInputError(False,
|
354
354
|
"False to InitLLMPipeline.")
|
355
355
|
elif model.config.model_type == "qwen2":
|
356
|
-
|
356
|
+
const_parameter = os.environ.get("IPEX_LLM_NPU_CONST_PARAMETER", "0") == "1"
|
357
357
|
with tempfile.TemporaryDirectory() as temp_dir:
|
358
358
|
if save_directory is not None:
|
359
359
|
temp_dir = save_directory
|
@@ -371,7 +371,7 @@ def convert_llm(model: torch.nn.Module,
|
|
371
371
|
for layer_idx in range(0, layer_num):
|
372
372
|
param_list.append((model, layer_idx, n_splits_linear, n_splits_down_proj,
|
373
373
|
temp_dir, weight_dir, transpose_value_cache, kv_len, group_size,
|
374
|
-
|
374
|
+
const_parameter))
|
375
375
|
with Pool() as pool:
|
376
376
|
result = pool.starmap(convert_qwen_layer, param_list)
|
377
377
|
|
@@ -396,7 +396,7 @@ def convert_llm(model: torch.nn.Module,
|
|
396
396
|
"head_dim": model.head_dim,
|
397
397
|
"transpose_value_cache": transpose_value_cache,
|
398
398
|
"max_prompt_len": max_prompt_len,
|
399
|
-
"
|
399
|
+
"const_parameter": const_parameter,
|
400
400
|
"group_size": group_size}
|
401
401
|
model.config.update(update_dict)
|
402
402
|
model.config.save_pretrained(save_directory)
|
@@ -405,7 +405,7 @@ def convert_llm(model: torch.nn.Module,
|
|
405
405
|
res = InitLLMPipeline("qwen", kv_len, model.num_head, model.head_dim, layer_num,
|
406
406
|
model.vocab_size, weight_dir, "model",
|
407
407
|
first_blob_path, last_blob_path,
|
408
|
-
os.path.join(temp_dir, "decoder_layer"),
|
408
|
+
os.path.join(temp_dir, "decoder_layer"), const_parameter)
|
409
409
|
except:
|
410
410
|
invalidInputError(False,
|
411
411
|
"False to InitLLMPipeline.")
|
@@ -441,7 +441,9 @@ def convert_llm_for_deploy(model: torch.nn.Module,
|
|
441
441
|
weight_dir = os.path.join(save_directory, "model_weights")
|
442
442
|
if not os.path.exists(weight_dir):
|
443
443
|
os.mkdir(weight_dir)
|
444
|
-
|
444
|
+
const_parameter = os.environ.get("IPEX_LLM_NPU_CONST_PARAMETER", "1") == "1"
|
445
|
+
if keep_ir:
|
446
|
+
const_parameter = False
|
445
447
|
|
446
448
|
lm_head_low_bit = getattr(model.config, "bigdl_transformers_low_bit", "sym_int4_rtn")
|
447
449
|
if hasattr(model, "lm_head") and not isinstance(model.lm_head, SlicedLMHead):
|
@@ -472,7 +474,7 @@ def convert_llm_for_deploy(model: torch.nn.Module,
|
|
472
474
|
"head_dim": model.model.layers[0].self_attn.head_dim,
|
473
475
|
"transpose_value_cache": transpose_value_cache,
|
474
476
|
"max_prompt_len": max_prompt_len,
|
475
|
-
"
|
477
|
+
"const_parameter": const_parameter,
|
476
478
|
"group_size": group_size,
|
477
479
|
"fused_layers": fused_layers,
|
478
480
|
"qkv_bias": True,
|
@@ -490,12 +492,12 @@ def convert_llm_for_deploy(model: torch.nn.Module,
|
|
490
492
|
# save fused_layers blobs of fused decoder layers
|
491
493
|
convert_fused_qwen_layer(model, fused_layers, n_splits_linear, n_splits_down_proj,
|
492
494
|
save_directory, weight_dir, transpose_value_cache, kv_len,
|
493
|
-
group_size,
|
495
|
+
group_size, const_parameter, "decode",
|
494
496
|
keep_ir=keep_ir, compile_blob=compile_blob)
|
495
497
|
# save blob of single prefill layer
|
496
498
|
convert_qwen_layer(model, 0, n_splits_linear, n_splits_down_proj,
|
497
499
|
save_directory, weight_dir, transpose_value_cache, max_prompt_len,
|
498
|
-
group_size,
|
500
|
+
group_size, const_parameter, "prefill",
|
499
501
|
keep_ir=keep_ir, compile_blob=compile_blob)
|
500
502
|
# save blob of lmhead and bin of embedding
|
501
503
|
convert_lm_head_and_embedding(model, save_directory, weight_dir, convert_model=True,
|
@@ -535,7 +537,7 @@ def convert_llm_for_deploy(model: torch.nn.Module,
|
|
535
537
|
"head_dim": model.model.layers[0].self_attn.head_dim,
|
536
538
|
"transpose_value_cache": transpose_value_cache,
|
537
539
|
"max_prompt_len": max_prompt_len,
|
538
|
-
"
|
540
|
+
"const_parameter": const_parameter,
|
539
541
|
"group_size": group_size,
|
540
542
|
"fused_layers": fused_layers,
|
541
543
|
"qkv_bias": False,
|
@@ -559,12 +561,12 @@ def convert_llm_for_deploy(model: torch.nn.Module,
|
|
559
561
|
# save fused_layers blobs of fused decoder layers
|
560
562
|
convert_fused_llama_layer(model, fused_layers, n_splits_linear, n_splits_down_proj,
|
561
563
|
save_directory, weight_dir, transpose_value_cache, kv_len,
|
562
|
-
group_size,
|
564
|
+
group_size, const_parameter, "decode",
|
563
565
|
keep_ir=keep_ir, compile_blob=compile_blob)
|
564
566
|
# save blob of single prefill layer
|
565
567
|
convert_llama_layer(model, 0, n_splits_linear, n_splits_down_proj,
|
566
568
|
save_directory, weight_dir, transpose_value_cache, max_prompt_len,
|
567
|
-
group_size,
|
569
|
+
group_size, const_parameter, "prefill",
|
568
570
|
keep_ir=keep_ir, compile_blob=compile_blob)
|
569
571
|
elif model.config.model_type == "minicpm":
|
570
572
|
if group_size == 0:
|
@@ -576,7 +578,7 @@ def convert_llm_for_deploy(model: torch.nn.Module,
|
|
576
578
|
"head_dim": model.model.layers[0].self_attn.head_dim,
|
577
579
|
"transpose_value_cache": transpose_value_cache,
|
578
580
|
"max_prompt_len": max_prompt_len,
|
579
|
-
"
|
581
|
+
"const_parameter": const_parameter,
|
580
582
|
"group_size": group_size,
|
581
583
|
"fused_layers": fused_layers,
|
582
584
|
"qkv_bias": False,
|
@@ -594,12 +596,12 @@ def convert_llm_for_deploy(model: torch.nn.Module,
|
|
594
596
|
# save fused_layers blobs of fused decoder layers
|
595
597
|
convert_fused_minicpm_layer(model, fused_layers, n_splits_linear, n_splits_down_proj,
|
596
598
|
save_directory, weight_dir, transpose_value_cache, kv_len,
|
597
|
-
group_size,
|
599
|
+
group_size, const_parameter, "decode",
|
598
600
|
keep_ir=keep_ir, compile_blob=compile_blob)
|
599
601
|
# save blob of single prefill layer
|
600
602
|
convert_minicpm_layer(model, 0, n_splits_linear, n_splits_down_proj,
|
601
603
|
save_directory, weight_dir, transpose_value_cache, max_prompt_len,
|
602
|
-
group_size,
|
604
|
+
group_size, const_parameter, "prefill",
|
603
605
|
keep_ir=keep_ir, compile_blob=compile_blob)
|
604
606
|
# save blob of lmhead and bin of embedding and embedding_post
|
605
607
|
convert_lm_head_and_embedding(model, n_splits_linear,
|
@@ -107,7 +107,7 @@ def convert_lm_head_and_embedding(model, n_splits_linear, temp_dir, weight_dir,
|
|
107
107
|
|
108
108
|
def convert_llama_layer(model, layer_idx, n_splits_linear, n_splits_down_proj,
|
109
109
|
temp_dir, weight_dir, transpose_value_cache, kv_len, group_size,
|
110
|
-
|
110
|
+
const_parameter, mode="decode",
|
111
111
|
keep_ir=False, compile_blob=True):
|
112
112
|
num_heads = model.model.layers[0].self_attn.num_heads
|
113
113
|
num_key_value_heads = model.model.layers[0].self_attn.num_key_value_heads
|
@@ -145,14 +145,14 @@ def convert_llama_layer(model, layer_idx, n_splits_linear, n_splits_down_proj,
|
|
145
145
|
else:
|
146
146
|
input_len = kv_len
|
147
147
|
decoder_name = "decoder_layer_prefill"
|
148
|
-
|
148
|
+
const_parameter = False
|
149
149
|
keep_position_ids = False
|
150
150
|
npu_dpu_groups = 6
|
151
151
|
|
152
152
|
single_decoder = LowBitLlamaMultiDecoderlayer(
|
153
153
|
[1, input_len, num_heads * head_dim],
|
154
|
-
input_layernorm_weights=[layer_norm_0] if
|
155
|
-
post_attn_layernorm_weights=[layer_norm_1] if
|
154
|
+
input_layernorm_weights=[layer_norm_0] if const_parameter else None,
|
155
|
+
post_attn_layernorm_weights=[layer_norm_1] if const_parameter else None,
|
156
156
|
cached_cos=cached_cos,
|
157
157
|
cached_sin=cached_sin,
|
158
158
|
num_heads=num_heads,
|
@@ -182,7 +182,7 @@ def convert_llama_layer(model, layer_idx, n_splits_linear, n_splits_down_proj,
|
|
182
182
|
if mode == "decode":
|
183
183
|
if hasattr(curr_layer.self_attn.rotary_emb, "cos_cached"):
|
184
184
|
# llama-2-7B & llama-3-8B
|
185
|
-
if
|
185
|
+
if const_parameter:
|
186
186
|
st_idx = 5
|
187
187
|
else:
|
188
188
|
input_lm_bin_file = os.path.join(weight_dir, f"model_{layer_idx}_input_3.bin")
|
@@ -192,7 +192,7 @@ def convert_llama_layer(model, layer_idx, n_splits_linear, n_splits_down_proj,
|
|
192
192
|
st_idx = 7
|
193
193
|
else:
|
194
194
|
# llama-3.2-3B & llama-3.2-1B
|
195
|
-
if
|
195
|
+
if const_parameter:
|
196
196
|
st_idx = 6
|
197
197
|
else:
|
198
198
|
input_lm_bin_file = os.path.join(weight_dir, f"model_{layer_idx}_input_4.bin")
|
@@ -223,7 +223,7 @@ def convert_llama_layer(model, layer_idx, n_splits_linear, n_splits_down_proj,
|
|
223
223
|
|
224
224
|
def convert_fused_llama_layer(model, fused_layers, n_splits_linear, n_splits_down_proj,
|
225
225
|
save_dir, weight_dir, transpose_value_cache, kv_len, group_size,
|
226
|
-
|
226
|
+
const_parameter, mode="decode",
|
227
227
|
keep_ir=False, compile_blob=True):
|
228
228
|
num_heads = model.model.layers[0].self_attn.num_heads
|
229
229
|
num_key_value_heads = model.model.layers[0].self_attn.num_key_value_heads
|
@@ -294,6 +294,10 @@ def convert_fused_llama_layer(model, fused_layers, n_splits_linear, n_splits_dow
|
|
294
294
|
else: # FP16 Linear
|
295
295
|
np_dtype = np.float16
|
296
296
|
|
297
|
+
if not const_parameter:
|
298
|
+
input_layer_norm_weights = None
|
299
|
+
post_attn_layernorm_weights = None
|
300
|
+
|
297
301
|
fused_decoder = LowBitLlamaMultiDecoderlayer(
|
298
302
|
[1, 1, num_heads * head_dim],
|
299
303
|
input_layernorm_weights=input_layer_norm_weights,
|
@@ -301,7 +301,7 @@ def convert_lm_head_and_embedding(model, n_splits_linear, temp_dir, weight_dir,
|
|
301
301
|
|
302
302
|
def convert_minicpm_layer(model, layer_idx, n_splits_linear, n_splits_down_proj,
|
303
303
|
temp_dir, weight_dir, transpose_value_cache, kv_len, group_size,
|
304
|
-
|
304
|
+
const_parameter, mode="decode",
|
305
305
|
keep_ir=False, compile_blob=True):
|
306
306
|
num_heads = model.model.layers[0].self_attn.num_heads
|
307
307
|
num_key_value_heads = model.model.layers[0].self_attn.num_key_value_heads
|
@@ -333,12 +333,12 @@ def convert_minicpm_layer(model, layer_idx, n_splits_linear, n_splits_down_proj,
|
|
333
333
|
else:
|
334
334
|
input_len = kv_len
|
335
335
|
decoder_name = "decoder_layer_prefill"
|
336
|
-
|
336
|
+
const_parameter = False
|
337
337
|
|
338
338
|
single_decoder = LowBitMinicpmMultiDecoderlayer(
|
339
339
|
[1, input_len, num_heads * head_dim],
|
340
|
-
input_layernorm_weights=[layer_norm_0] if
|
341
|
-
post_attn_layernorm_weights=[layer_norm_1] if
|
340
|
+
input_layernorm_weights=[layer_norm_0] if const_parameter else None,
|
341
|
+
post_attn_layernorm_weights=[layer_norm_1] if const_parameter else None,
|
342
342
|
cached_cos=cached_cos,
|
343
343
|
cached_sin=cached_sin,
|
344
344
|
num_heads=num_heads,
|
@@ -364,7 +364,7 @@ def convert_minicpm_layer(model, layer_idx, n_splits_linear, n_splits_down_proj,
|
|
364
364
|
os.remove(os.path.join(temp_dir, decoder_name + ".bin"))
|
365
365
|
|
366
366
|
if mode == "decode":
|
367
|
-
if
|
367
|
+
if const_parameter:
|
368
368
|
st_idx = 5
|
369
369
|
else:
|
370
370
|
input_lm_bin_file = os.path.join(weight_dir, f"model_{layer_idx}_input_3.bin")
|
@@ -394,7 +394,7 @@ def convert_minicpm_layer(model, layer_idx, n_splits_linear, n_splits_down_proj,
|
|
394
394
|
|
395
395
|
def convert_fused_minicpm_layer(model, fused_layers, n_splits_linear, n_splits_down_proj,
|
396
396
|
save_dir, weight_dir, transpose_value_cache, kv_len, group_size,
|
397
|
-
|
397
|
+
const_parameter, mode="decode",
|
398
398
|
keep_ir=False, compile_blob=True):
|
399
399
|
num_heads = model.model.layers[0].self_attn.num_heads
|
400
400
|
num_key_value_heads = model.model.layers[0].self_attn.num_key_value_heads
|
@@ -461,6 +461,10 @@ def convert_fused_minicpm_layer(model, fused_layers, n_splits_linear, n_splits_d
|
|
461
461
|
else: # FP16 Linear
|
462
462
|
np_dtype = np.float16
|
463
463
|
|
464
|
+
if not const_parameter:
|
465
|
+
input_layer_norm_weights = None
|
466
|
+
post_attn_layernorm_weights = None
|
467
|
+
|
464
468
|
fused_decoder = LowBitMinicpmMultiDecoderlayer(
|
465
469
|
[1, 1, num_heads * head_dim],
|
466
470
|
input_layernorm_weights=input_layer_norm_weights,
|
@@ -117,7 +117,7 @@ def convert_lm_head_and_embedding(model, temp_dir, weight_dir,
|
|
117
117
|
|
118
118
|
def convert_qwen_layer(model, layer_idx, n_splits_linear, n_splits_down_proj,
|
119
119
|
temp_dir, weight_dir, transpose_value_cache, kv_len, group_size,
|
120
|
-
|
120
|
+
const_parameter, mode="decode",
|
121
121
|
keep_ir=False, compile_blob=True):
|
122
122
|
num_heads = model.model.layers[0].self_attn.num_heads
|
123
123
|
num_key_value_heads = model.model.layers[0].self_attn.num_key_value_heads
|
@@ -193,7 +193,7 @@ def convert_qwen_layer(model, layer_idx, n_splits_linear, n_splits_down_proj,
|
|
193
193
|
# 0, 1, 2 are input_embed/attention_mask/position_id
|
194
194
|
if mode == "decode":
|
195
195
|
if hasattr(curr_layer.self_attn.rotary_emb, "cos_cached"):
|
196
|
-
if
|
196
|
+
if const_parameter:
|
197
197
|
st_idx = 3
|
198
198
|
else:
|
199
199
|
input_lm_bin_file = os.path.join(weight_dir, f"model_{layer_idx}_input_3.bin")
|
@@ -203,7 +203,7 @@ def convert_qwen_layer(model, layer_idx, n_splits_linear, n_splits_down_proj,
|
|
203
203
|
st_idx = 5
|
204
204
|
else:
|
205
205
|
# transformers >= 4.45.0
|
206
|
-
if
|
206
|
+
if const_parameter:
|
207
207
|
st_idx = 4
|
208
208
|
else:
|
209
209
|
input_lm_bin_file = os.path.join(weight_dir, f"model_{layer_idx}_input_4.bin")
|
@@ -241,7 +241,7 @@ def convert_qwen_layer(model, layer_idx, n_splits_linear, n_splits_down_proj,
|
|
241
241
|
|
242
242
|
def convert_fused_qwen_layer(model, fused_layers, n_splits_linear, n_splits_down_proj,
|
243
243
|
save_dir, weight_dir, transpose_value_cache, kv_len, group_size,
|
244
|
-
|
244
|
+
const_parameter, mode="decode",
|
245
245
|
keep_ir=False, compile_blob=True):
|
246
246
|
num_heads = model.model.layers[0].self_attn.num_heads
|
247
247
|
num_key_value_heads = model.model.layers[0].self_attn.num_key_value_heads
|
@@ -325,6 +325,13 @@ def convert_fused_qwen_layer(model, fused_layers, n_splits_linear, n_splits_down
|
|
325
325
|
else: # FP16 Linear
|
326
326
|
np_dtype = np.float16
|
327
327
|
|
328
|
+
if not const_parameter:
|
329
|
+
input_layer_norm_weights = None
|
330
|
+
post_attn_layernorm_weights = None
|
331
|
+
q_biases = None
|
332
|
+
k_biases = None
|
333
|
+
v_biases = None
|
334
|
+
|
328
335
|
fused_decoder = LowBitQwenMultiDecoderlayer(
|
329
336
|
[1, 1, num_heads * head_dim],
|
330
337
|
input_layernorm_weights=input_layer_norm_weights,
|
@@ -1,6 +1,6 @@
|
|
1
1
|
Metadata-Version: 2.1
|
2
2
|
Name: ipex-llm
|
3
|
-
Version: 2.2.
|
3
|
+
Version: 2.2.0b20250208
|
4
4
|
Summary: Large Language Model Develop Toolkit
|
5
5
|
Home-page: https://github.com/intel-analytics/ipex-llm
|
6
6
|
Author: BigDL Authors
|
@@ -27,7 +27,7 @@ Requires-Dist: intel-openmp ; (platform_machine == "x86_64" or platform_machine
|
|
27
27
|
Requires-Dist: torch ==2.1.2+cpu ; (platform_system == "Linux") and extra == 'all'
|
28
28
|
Requires-Dist: torch ==2.1.2 ; (platform_system == "Windows") and extra == 'all'
|
29
29
|
Provides-Extra: cpp
|
30
|
-
Requires-Dist: bigdl-core-cpp ==2.6.
|
30
|
+
Requires-Dist: bigdl-core-cpp ==2.6.0b20250208 ; extra == 'cpp'
|
31
31
|
Requires-Dist: setuptools ; extra == 'cpp'
|
32
32
|
Requires-Dist: onednn-devel ==2025.0.1 ; (platform_system == "Windows") and extra == 'cpp'
|
33
33
|
Requires-Dist: onednn ==2025.0.1 ; (platform_system == "Windows") and extra == 'cpp'
|
@@ -60,7 +60,7 @@ Requires-Dist: transformers ==4.40.0 ; extra == 'npu'
|
|
60
60
|
Requires-Dist: intel-openmp ; (platform_machine == "x86_64" or platform_machine == "AMD64") and extra == 'npu'
|
61
61
|
Requires-Dist: torch ==2.1.2+cpu ; (platform_system == "Linux") and extra == 'npu'
|
62
62
|
Requires-Dist: torch ==2.1.2 ; (platform_system == "Windows") and extra == 'npu'
|
63
|
-
Requires-Dist: bigdl-core-npu ==2.6.
|
63
|
+
Requires-Dist: bigdl-core-npu ==2.6.0b20250208 ; (platform_system == "Windows") and extra == 'npu'
|
64
64
|
Provides-Extra: serving
|
65
65
|
Requires-Dist: py-cpuinfo ; extra == 'serving'
|
66
66
|
Requires-Dist: fschat[model_worker,webui] ==0.2.36 ; extra == 'serving'
|
@@ -80,9 +80,9 @@ Requires-Dist: setuptools <70.0.0 ; extra == 'xpu'
|
|
80
80
|
Requires-Dist: torch ==2.1.0a0 ; extra == 'xpu'
|
81
81
|
Requires-Dist: torchvision ==0.16.0a0 ; extra == 'xpu'
|
82
82
|
Requires-Dist: intel-extension-for-pytorch ==2.1.10+xpu ; extra == 'xpu'
|
83
|
-
Requires-Dist: bigdl-core-xe-21 ==2.6.
|
84
|
-
Requires-Dist: bigdl-core-xe-batch-21 ==2.6.
|
85
|
-
Requires-Dist: bigdl-core-xe-addons-21 ==2.6.
|
83
|
+
Requires-Dist: bigdl-core-xe-21 ==2.6.0b20250208 ; extra == 'xpu'
|
84
|
+
Requires-Dist: bigdl-core-xe-batch-21 ==2.6.0b20250208 ; extra == 'xpu'
|
85
|
+
Requires-Dist: bigdl-core-xe-addons-21 ==2.6.0b20250208 ; extra == 'xpu'
|
86
86
|
Provides-Extra: xpu-2-1
|
87
87
|
Requires-Dist: py-cpuinfo ; extra == 'xpu-2-1'
|
88
88
|
Requires-Dist: protobuf ; extra == 'xpu-2-1'
|
@@ -97,9 +97,9 @@ Requires-Dist: setuptools <70.0.0 ; extra == 'xpu-2-1'
|
|
97
97
|
Requires-Dist: torch ==2.1.0a0 ; extra == 'xpu-2-1'
|
98
98
|
Requires-Dist: torchvision ==0.16.0a0 ; extra == 'xpu-2-1'
|
99
99
|
Requires-Dist: intel-extension-for-pytorch ==2.1.10+xpu ; extra == 'xpu-2-1'
|
100
|
-
Requires-Dist: bigdl-core-xe-21 ==2.6.
|
101
|
-
Requires-Dist: bigdl-core-xe-batch-21 ==2.6.
|
102
|
-
Requires-Dist: bigdl-core-xe-addons-21 ==2.6.
|
100
|
+
Requires-Dist: bigdl-core-xe-21 ==2.6.0b20250208 ; extra == 'xpu-2-1'
|
101
|
+
Requires-Dist: bigdl-core-xe-batch-21 ==2.6.0b20250208 ; extra == 'xpu-2-1'
|
102
|
+
Requires-Dist: bigdl-core-xe-addons-21 ==2.6.0b20250208 ; extra == 'xpu-2-1'
|
103
103
|
Requires-Dist: intel-openmp ; (platform_machine == "x86_64" or platform_machine == "AMD64") and extra == 'xpu-2-1'
|
104
104
|
Requires-Dist: dpcpp-cpp-rt ==2024.0.2 ; (platform_system == "Windows") and extra == 'xpu-2-1'
|
105
105
|
Requires-Dist: mkl-dpcpp ==2024.0.0 ; (platform_system == "Windows") and extra == 'xpu-2-1'
|
@@ -117,7 +117,7 @@ Requires-Dist: setuptools ; extra == 'xpu-2-6'
|
|
117
117
|
Requires-Dist: torch ==2.6.0+xpu ; extra == 'xpu-2-6'
|
118
118
|
Requires-Dist: torchvision ==0.21.0+xpu ; extra == 'xpu-2-6'
|
119
119
|
Requires-Dist: torchaudio ==2.6.0+xpu ; extra == 'xpu-2-6'
|
120
|
-
Requires-Dist: bigdl-core-xe-all ==2.6.
|
120
|
+
Requires-Dist: bigdl-core-xe-all ==2.6.0b20250208 ; extra == 'xpu-2-6'
|
121
121
|
Requires-Dist: onednn-devel ==2025.0.1 ; extra == 'xpu-2-6'
|
122
122
|
Requires-Dist: onednn ==2025.0.1 ; extra == 'xpu-2-6'
|
123
123
|
Requires-Dist: dpcpp-cpp-rt ==2025.0.2 ; extra == 'xpu-2-6'
|
@@ -133,9 +133,9 @@ Requires-Dist: tokenizers ==0.15.2 ; extra == 'xpu-arc'
|
|
133
133
|
Requires-Dist: accelerate ==0.23.0 ; extra == 'xpu-arc'
|
134
134
|
Requires-Dist: tabulate ; extra == 'xpu-arc'
|
135
135
|
Requires-Dist: setuptools ; extra == 'xpu-arc'
|
136
|
-
Requires-Dist: bigdl-core-xe-23 ==2.6.
|
137
|
-
Requires-Dist: bigdl-core-xe-batch-23 ==2.6.
|
138
|
-
Requires-Dist: bigdl-core-xe-addons-23 ==2.6.
|
136
|
+
Requires-Dist: bigdl-core-xe-23 ==2.6.0b20250208 ; extra == 'xpu-arc'
|
137
|
+
Requires-Dist: bigdl-core-xe-batch-23 ==2.6.0b20250208 ; extra == 'xpu-arc'
|
138
|
+
Requires-Dist: bigdl-core-xe-addons-23 ==2.6.0b20250208 ; extra == 'xpu-arc'
|
139
139
|
Requires-Dist: intel-openmp ; (platform_machine == "x86_64" or platform_machine == "AMD64") and extra == 'xpu-arc'
|
140
140
|
Requires-Dist: torch ==2.3.1+cxx11.abi ; (platform_system == "Linux") and extra == 'xpu-arc'
|
141
141
|
Requires-Dist: torchvision ==0.18.1+cxx11.abi ; (platform_system == "Linux") and extra == 'xpu-arc'
|
@@ -156,9 +156,9 @@ Requires-Dist: tokenizers ==0.15.2 ; extra == 'xpu-arl'
|
|
156
156
|
Requires-Dist: accelerate ==0.23.0 ; extra == 'xpu-arl'
|
157
157
|
Requires-Dist: tabulate ; extra == 'xpu-arl'
|
158
158
|
Requires-Dist: setuptools ; extra == 'xpu-arl'
|
159
|
-
Requires-Dist: bigdl-core-xe-23 ==2.6.
|
160
|
-
Requires-Dist: bigdl-core-xe-batch-23 ==2.6.
|
161
|
-
Requires-Dist: bigdl-core-xe-addons-23 ==2.6.
|
159
|
+
Requires-Dist: bigdl-core-xe-23 ==2.6.0b20250208 ; extra == 'xpu-arl'
|
160
|
+
Requires-Dist: bigdl-core-xe-batch-23 ==2.6.0b20250208 ; extra == 'xpu-arl'
|
161
|
+
Requires-Dist: bigdl-core-xe-addons-23 ==2.6.0b20250208 ; extra == 'xpu-arl'
|
162
162
|
Requires-Dist: intel-openmp ; (platform_machine == "x86_64" or platform_machine == "AMD64") and extra == 'xpu-arl'
|
163
163
|
Requires-Dist: torch ==2.3.1+cxx11.abi ; (platform_system == "Linux") and extra == 'xpu-arl'
|
164
164
|
Requires-Dist: torchvision ==0.18.1+cxx11.abi ; (platform_system == "Linux") and extra == 'xpu-arl'
|
@@ -179,9 +179,9 @@ Requires-Dist: tokenizers ==0.15.2 ; extra == 'xpu-lnl'
|
|
179
179
|
Requires-Dist: accelerate ==0.23.0 ; extra == 'xpu-lnl'
|
180
180
|
Requires-Dist: tabulate ; extra == 'xpu-lnl'
|
181
181
|
Requires-Dist: setuptools ; extra == 'xpu-lnl'
|
182
|
-
Requires-Dist: bigdl-core-xe-23 ==2.6.
|
183
|
-
Requires-Dist: bigdl-core-xe-batch-23 ==2.6.
|
184
|
-
Requires-Dist: bigdl-core-xe-addons-23 ==2.6.
|
182
|
+
Requires-Dist: bigdl-core-xe-23 ==2.6.0b20250208 ; extra == 'xpu-lnl'
|
183
|
+
Requires-Dist: bigdl-core-xe-batch-23 ==2.6.0b20250208 ; extra == 'xpu-lnl'
|
184
|
+
Requires-Dist: bigdl-core-xe-addons-23 ==2.6.0b20250208 ; extra == 'xpu-lnl'
|
185
185
|
Requires-Dist: intel-openmp ; (platform_machine == "x86_64" or platform_machine == "AMD64") and extra == 'xpu-lnl'
|
186
186
|
Requires-Dist: torch ==2.3.1+cxx11.abi ; (platform_system == "Linux") and extra == 'xpu-lnl'
|
187
187
|
Requires-Dist: torchvision ==0.18.1+cxx11.abi ; (platform_system == "Linux") and extra == 'xpu-lnl'
|
@@ -41,35 +41,35 @@ ipex_llm/langchain/llms/transformerspipelinellm.py,sha256=vm522YPPwWxxAPVvQBtxRf
|
|
41
41
|
ipex_llm/langchain/vllm/__init__.py,sha256=T-EbRT6GJ_8RCu-iLmSzcftOimXSPQf2d5X72AUAy2Y,874
|
42
42
|
ipex_llm/langchain/vllm/vllm.py,sha256=6dxc-ZISZQrJilEa_HA827l75Dv9rcHpY_G6FdJ8BVs,7793
|
43
43
|
ipex_llm/libs/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
44
|
-
ipex_llm/libs/bloom-api.dll,sha256=
|
45
|
-
ipex_llm/libs/bloom.dll,sha256=
|
46
|
-
ipex_llm/libs/gptneox-api.dll,sha256=
|
47
|
-
ipex_llm/libs/gptneox.dll,sha256=
|
48
|
-
ipex_llm/libs/libbloom_avx.dll,sha256=
|
49
|
-
ipex_llm/libs/libbloom_vnni.dll,sha256=
|
50
|
-
ipex_llm/libs/libgptneox_avx.dll,sha256=
|
51
|
-
ipex_llm/libs/libgptneox_vnni.dll,sha256=
|
52
|
-
ipex_llm/libs/libllama_avx.dll,sha256=
|
53
|
-
ipex_llm/libs/libllama_vnni.dll,sha256=
|
54
|
-
ipex_llm/libs/libstarcoder_avx.dll,sha256=
|
55
|
-
ipex_llm/libs/libstarcoder_vnni.dll,sha256=
|
56
|
-
ipex_llm/libs/llama-api.dll,sha256=
|
57
|
-
ipex_llm/libs/llama.dll,sha256=
|
58
|
-
ipex_llm/libs/main-bloom.exe,sha256
|
59
|
-
ipex_llm/libs/main-gptneox.exe,sha256=
|
60
|
-
ipex_llm/libs/main-llama.exe,sha256=
|
61
|
-
ipex_llm/libs/main-starcoder.exe,sha256=
|
62
|
-
ipex_llm/libs/pipeline.dll,sha256=
|
63
|
-
ipex_llm/libs/quantize-bloom.exe,sha256=
|
64
|
-
ipex_llm/libs/quantize-bloom_vnni.exe,sha256=
|
65
|
-
ipex_llm/libs/quantize-gptneox.exe,sha256=
|
66
|
-
ipex_llm/libs/quantize-gptneox_vnni.exe,sha256=
|
67
|
-
ipex_llm/libs/quantize-llama.exe,sha256=
|
68
|
-
ipex_llm/libs/quantize-llama_vnni.exe,sha256=
|
69
|
-
ipex_llm/libs/quantize-starcoder.exe,sha256=
|
70
|
-
ipex_llm/libs/quantize-starcoder_vnni.exe,sha256=
|
71
|
-
ipex_llm/libs/starcoder-api.dll,sha256=
|
72
|
-
ipex_llm/libs/starcoder.dll,sha256=
|
44
|
+
ipex_llm/libs/bloom-api.dll,sha256=6JzFpTxSCIDgyHljiTDIwiyj8tNjyIimr00FUg18eTw,36352
|
45
|
+
ipex_llm/libs/bloom.dll,sha256=2W5NmA0RyQMElLCyunvANpTUBTnzahAtlFwf1IQYb_k,507904
|
46
|
+
ipex_llm/libs/gptneox-api.dll,sha256=jlJcF2GR9ZHenAOcbcKirLRejWsdiQDhUpWbl5ulVz8,24576
|
47
|
+
ipex_llm/libs/gptneox.dll,sha256=8f1rs4aF7OGVtIjZZv-v1bSamG6C_b68G_O2sd_dzAU,568320
|
48
|
+
ipex_llm/libs/libbloom_avx.dll,sha256=RJ-jYk9S6Jd34ktr1B3FbEL3MuPyngfekS2HLHc3nHE,536576
|
49
|
+
ipex_llm/libs/libbloom_vnni.dll,sha256=LCvnLsZ2uisySM4nL8cDDggRC_6Bw3OFkP-2vikhTVQ,508416
|
50
|
+
ipex_llm/libs/libgptneox_avx.dll,sha256=aDdomyYNf9unY6S9ix6CuX2ICY6uphptRm294nRm9AU,596992
|
51
|
+
ipex_llm/libs/libgptneox_vnni.dll,sha256=GMlR-de0HcAdw8YJzcXX1JndXgv3PmZ4nipoqyeh7ls,568832
|
52
|
+
ipex_llm/libs/libllama_avx.dll,sha256=Y7AJTriTvqWJH8hIA6gwYsL2u_uGA35NmRPoSQSkn78,591360
|
53
|
+
ipex_llm/libs/libllama_vnni.dll,sha256=XGd5lij9PZu63CBadK3R6mhkfqpi21wRrU9eGFpReaM,563200
|
54
|
+
ipex_llm/libs/libstarcoder_avx.dll,sha256=ZzZFh1IW_QlHrNcE79vSsqPcA9YZSDn1LCEpCwtv0oo,627712
|
55
|
+
ipex_llm/libs/libstarcoder_vnni.dll,sha256=dF3mLHFXKAe8TiTb2HqIbjvkQoCVK2BpRwbohFSDZ_Y,599552
|
56
|
+
ipex_llm/libs/llama-api.dll,sha256=hg0_pZC8RjgFuTtc19NF5BMLTkchmj_X4FogUC_YeDA,25600
|
57
|
+
ipex_llm/libs/llama.dll,sha256=3q1FkqwWE3Fo_-zZ74kWR-IOM7g-SSiSYg3Ud3wmkRo,562688
|
58
|
+
ipex_llm/libs/main-bloom.exe,sha256=tdNzTM-5XqiWe1rLTV1qslHWUNi0VKcAWJdjaRKEAzg,103424
|
59
|
+
ipex_llm/libs/main-gptneox.exe,sha256=XtmysEP3FoLLtJz5qTIoFPaG4MZhTAZ-0Gru65bk_7Y,98816
|
60
|
+
ipex_llm/libs/main-llama.exe,sha256=r7ZrLzVb2_phMKmD50dz5xnrPC8H9H_IPK0T8js8x2E,99840
|
61
|
+
ipex_llm/libs/main-starcoder.exe,sha256=udG6tPP_-3aCkFzfi8WxGD6lrauKPxwlaFhv3OF-gDg,157696
|
62
|
+
ipex_llm/libs/pipeline.dll,sha256=_CoiiDG12fYEEeIJyqh-dAi9bK3am421KItj548mlek,73216
|
63
|
+
ipex_llm/libs/quantize-bloom.exe,sha256=DU-zbDfk4ZZ5N8pQWDM39Xqoy_4rSsYGLhvJdZmunVQ,126464
|
64
|
+
ipex_llm/libs/quantize-bloom_vnni.exe,sha256=trXZ2CtNiCkD1_mfvXWJf7IBgNaYRxk3sNzsYNbwBgQ,128000
|
65
|
+
ipex_llm/libs/quantize-gptneox.exe,sha256=ZDLyAS9FaMXAZiQrb805iKHY-cR43Z-E453tS-C__bE,104448
|
66
|
+
ipex_llm/libs/quantize-gptneox_vnni.exe,sha256=alab9U5EUVg07XwQyNdXAbjp8p3IZ_0sP56IMkpXjTU,104960
|
67
|
+
ipex_llm/libs/quantize-llama.exe,sha256=ttvTdoGd6pWVdgA4KR9A_B_Tdg1Gt8qN1mBQrZp7JIw,110080
|
68
|
+
ipex_llm/libs/quantize-llama_vnni.exe,sha256=6ECTUDfKLzj-G_yvohaR8z5DSPt6wwnDq2uaRwXdCDc,110592
|
69
|
+
ipex_llm/libs/quantize-starcoder.exe,sha256=ApQMB9yxcjqUomqgjnD23rzo3IidKgG_UHXwEAQ-4EA,127488
|
70
|
+
ipex_llm/libs/quantize-starcoder_vnni.exe,sha256=6U5MeFLPGtJrkJA6q4HE3NyDUzcmlpBz9WpwLBXS684,128512
|
71
|
+
ipex_llm/libs/starcoder-api.dll,sha256=fVkYZ0zZ8mhERgN0hILyFt6cUpe__Y8oBW2Dq1ePinE,21504
|
72
|
+
ipex_llm/libs/starcoder.dll,sha256=eOv12oBIIYpxBIoId8g_BuhlIlRZ32r29yDtNQQEMAg,599040
|
73
73
|
ipex_llm/llamaindex/__init__.py,sha256=T-EbRT6GJ_8RCu-iLmSzcftOimXSPQf2d5X72AUAy2Y,874
|
74
74
|
ipex_llm/llamaindex/llms/__init__.py,sha256=KP1lEdGqDuxPoxL1ZSH25Pm2kKMPJBWUTLR0ckSLMIU,1139
|
75
75
|
ipex_llm/llamaindex/llms/bigdlllm.py,sha256=FQBzq1KOjfc6uofTXAha3O7TqpJkNfOFepXQmOVlbnI,26314
|
@@ -208,11 +208,11 @@ ipex_llm/transformers/npu_models/xlm_mp.py,sha256=sj8OVun8xJprM7ZJp0XzWa55rqlSIz
|
|
208
208
|
ipex_llm/transformers/npu_pipeline_model/__init__.py,sha256=b2IXvVqQ5cItki021h8s3ymW12RPu8QNPprq4Mn3bDM,586
|
209
209
|
ipex_llm/transformers/npu_pipeline_model/baichuan.py,sha256=ICxRzFQ4OIANDkkVi2_4xOeQXmfFXYMx3H52KuE1xR4,6208
|
210
210
|
ipex_llm/transformers/npu_pipeline_model/common.py,sha256=faooJmM75qnVyZYuQLx9gJpVlotcVF4qXRCnOrknfk4,14776
|
211
|
-
ipex_llm/transformers/npu_pipeline_model/convert_pipeline.py,sha256=
|
212
|
-
ipex_llm/transformers/npu_pipeline_model/llama.py,sha256=
|
213
|
-
ipex_llm/transformers/npu_pipeline_model/minicpm.py,sha256=
|
211
|
+
ipex_llm/transformers/npu_pipeline_model/convert_pipeline.py,sha256=IlvaZC9pi_ZJTWuO2dMSgCkc3V909lVhpAfktTEfSLI,29894
|
212
|
+
ipex_llm/transformers/npu_pipeline_model/llama.py,sha256=ISJ84zj0Ce2f9vdgmoGfdxQz2LRaUMlZCEM3MV2VpoQ,14521
|
213
|
+
ipex_llm/transformers/npu_pipeline_model/minicpm.py,sha256=Q-rUzXBlx5Ns3xemi7H6t8dnzu1q4e-MhUpJMzJmBRU,21522
|
214
214
|
ipex_llm/transformers/npu_pipeline_model/pipeline_cpp.py,sha256=JNmodAMg_NQvDILug3E_fGXEh6cd3wsj4bvAzcd-vaU,2749
|
215
|
-
ipex_llm/transformers/npu_pipeline_model/qwen.py,sha256=
|
215
|
+
ipex_llm/transformers/npu_pipeline_model/qwen.py,sha256=WEMUdGZH3INyJm-1Hfv3o41BiP037n2ftS4qPM0jaiE,16221
|
216
216
|
ipex_llm/utils/__init__.py,sha256=LlUgrD03rfw4iY8zWPtHH6p65Gw76waVOLHaqagETw0,1425
|
217
217
|
ipex_llm/utils/benchmark_util_4_29.py,sha256=OU1W1quiaiJGsg1pd3HM9O6PmVSaPA0HHE7R8hNTfmQ,258653
|
218
218
|
ipex_llm/utils/benchmark_util_4_42.py,sha256=HEiClCgKDp_T64HH8ulSTly8dvt6UwPDYZfrPVYvXcc,225383
|
@@ -248,11 +248,11 @@ ipex_llm/vllm/xpu/engine/__init__.py,sha256=pY_CpyuZd72fr6s32ejeKHKFW0K4vUU2rzZj
|
|
248
248
|
ipex_llm/vllm/xpu/engine/engine.py,sha256=k4-D27WS_Gk3mA--w3HWAjPjb4Aiu043MVPi0ZoAUBc,5984
|
249
249
|
ipex_llm/vllm/xpu/entrypoints/openai/api_server.py,sha256=GshTZFB8e4PWvqckfbmTOU6b0oLkNn7A-vzLuG9--j8,21544
|
250
250
|
ipex_llm/vllm/xpu/entrypoints/openai/cli_args.py,sha256=2rENA2ucynMaIjiZBEh2ez1o5vR32GaP514t39CD7KM,8676
|
251
|
-
ipex_llm-2.2.
|
252
|
-
ipex_llm-2.2.
|
253
|
-
ipex_llm-2.2.
|
254
|
-
ipex_llm-2.2.
|
255
|
-
ipex_llm-2.2.
|
256
|
-
ipex_llm-2.2.
|
257
|
-
ipex_llm-2.2.
|
258
|
-
ipex_llm-2.2.
|
251
|
+
ipex_llm-2.2.0b20250208.data/scripts/ipex-llm-init.bat,sha256=HPtCYuDYwEatq7dAwOvdfVcHYCpAVdbj75K1qh0vQek,2578
|
252
|
+
ipex_llm-2.2.0b20250208.data/scripts/llm-chat.ps1,sha256=6qrs-hGVAV8IKh7Jx8nq_XrnZcjd7qGU5wndArM7Yag,2769
|
253
|
+
ipex_llm-2.2.0b20250208.data/scripts/llm-cli.ps1,sha256=3qBtTLs_EjYDnM8YyCpJhzLnGCKTEGssu9UNqfkjVXs,3009
|
254
|
+
ipex_llm-2.2.0b20250208.dist-info/METADATA,sha256=lvf2U7DribCbFo-qabprOhWsiYrX4yLUPfykdJZ91Dk,12369
|
255
|
+
ipex_llm-2.2.0b20250208.dist-info/WHEEL,sha256=6iYPr8vTHsyDK75jr9X0V3I9wPSVmtwr_8fdATBciGk,98
|
256
|
+
ipex_llm-2.2.0b20250208.dist-info/entry_points.txt,sha256=TiUyBB2MRmfF3ko-pyAEzqeBCRnyhu27bNOAsWPp3e8,61
|
257
|
+
ipex_llm-2.2.0b20250208.dist-info/top_level.txt,sha256=CGCMHM-SyqUabU4h8RqJ2KTYckQUO3LvIWwmUQ6Qbzw,9
|
258
|
+
ipex_llm-2.2.0b20250208.dist-info/RECORD,,
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|