bigdl-core-cpp 2.6.0b20250203__py3-none-win_amd64.whl → 2.6.0b20250204.post0__py3-none-win_amd64.whl
Sign up to get free protection for your applications and to get access to all the features.
- bigdl/cpp/convert_hf_to_gguf.py +99 -44
- bigdl/cpp/convert_hf_to_gguf_update.py +4 -1
- bigdl/cpp/convert_lora_to_gguf.py +41 -11
- bigdl/cpp/gguf-py/gguf/constants.py +79 -18
- bigdl/cpp/gguf-py/gguf/gguf.py +1 -1
- bigdl/cpp/gguf-py/gguf/gguf_reader.py +5 -6
- bigdl/cpp/gguf-py/gguf/gguf_writer.py +36 -12
- bigdl/cpp/gguf-py/gguf/metadata.py +131 -19
- bigdl/cpp/gguf-py/gguf/tensor_mapping.py +17 -15
- bigdl/cpp/gguf-py/gguf/vocab.py +24 -2
- bigdl/cpp/libs/common.lib +0 -0
- bigdl/cpp/libs/ggml-cpu.dll +0 -0
- bigdl/cpp/libs/ggml-sycl.dll +0 -0
- bigdl/cpp/libs/ggml.dll +0 -0
- bigdl/cpp/libs/llama-batched.exe +0 -0
- bigdl/cpp/libs/llama-bench.exe +0 -0
- bigdl/cpp/libs/llama-cli.exe +0 -0
- bigdl/cpp/libs/llama-embedding.exe +0 -0
- bigdl/cpp/libs/llama-gguf.exe +0 -0
- bigdl/cpp/libs/llama-llava-cli.exe +0 -0
- bigdl/cpp/libs/llama-lookup.exe +0 -0
- bigdl/cpp/libs/llama-ls-sycl-device.exe +0 -0
- bigdl/cpp/libs/llama-minicpmv-cli.exe +0 -0
- bigdl/cpp/libs/llama-perplexity.exe +0 -0
- bigdl/cpp/libs/llama-quantize.exe +0 -0
- bigdl/cpp/libs/llama-server.exe +0 -0
- bigdl/cpp/libs/llama-simple.exe +0 -0
- bigdl/cpp/libs/llama-speculative.exe +0 -0
- bigdl/cpp/libs/llama-tokenize.exe +0 -0
- bigdl/cpp/libs/llama.dll +0 -0
- bigdl/cpp/libs/llava_shared.dll +0 -0
- bigdl/cpp/libs/ollama-ggml-base.dll +0 -0
- bigdl/cpp/libs/ollama-ggml-cpu.dll +0 -0
- bigdl/cpp/libs/ollama-ggml-sycl.dll +0 -0
- bigdl/cpp/libs/ollama.exe +0 -0
- bigdl/cpp/libs/ollama_ggml.dll +0 -0
- bigdl/cpp/libs/ollama_llama.dll +0 -0
- bigdl/cpp/libs/ollama_llava_shared.dll +0 -0
- {bigdl_core_cpp-2.6.0b20250203.data → bigdl_core_cpp-2.6.0b20250204.post0.data}/scripts/init-ollama.bat +1 -1
- {bigdl_core_cpp-2.6.0b20250203.dist-info → bigdl_core_cpp-2.6.0b20250204.post0.dist-info}/METADATA +1 -1
- bigdl_core_cpp-2.6.0b20250204.post0.dist-info/RECORD +54 -0
- bigdl/cpp/libs/dist/windows-amd64/lib/ollama/runners/ipex_llm/ollama_llama_server.exe +0 -0
- bigdl_core_cpp-2.6.0b20250203.dist-info/RECORD +0 -50
- {bigdl_core_cpp-2.6.0b20250203.data → bigdl_core_cpp-2.6.0b20250204.post0.data}/scripts/init-llama-cpp.bat +0 -0
- {bigdl_core_cpp-2.6.0b20250203.data → bigdl_core_cpp-2.6.0b20250204.post0.data}/scripts/init-llama-cpp.ps1 +0 -0
- {bigdl_core_cpp-2.6.0b20250203.dist-info → bigdl_core_cpp-2.6.0b20250204.post0.dist-info}/WHEEL +0 -0
- {bigdl_core_cpp-2.6.0b20250203.dist-info → bigdl_core_cpp-2.6.0b20250204.post0.dist-info}/top_level.txt +0 -0
@@ -568,6 +568,9 @@ class GGUFWriter:
|
|
568
568
|
def add_base_model_organization(self, source_id: int, organization: str) -> None:
|
569
569
|
self.add_string(Keys.General.BASE_MODEL_ORGANIZATION.format(id=source_id), organization)
|
570
570
|
|
571
|
+
def add_base_model_description(self, source_id: int, description: str) -> None:
|
572
|
+
self.add_string(Keys.General.BASE_MODEL_DESCRIPTION.format(id=source_id), description)
|
573
|
+
|
571
574
|
def add_base_model_url(self, source_id: int, url: str) -> None:
|
572
575
|
self.add_string(Keys.General.BASE_MODEL_URL.format(id=source_id), url)
|
573
576
|
|
@@ -580,15 +583,42 @@ class GGUFWriter:
|
|
580
583
|
def add_base_model_repo_url(self, source_id: int, repo_url: str) -> None:
|
581
584
|
self.add_string(Keys.General.BASE_MODEL_REPO_URL.format(id=source_id), repo_url)
|
582
585
|
|
586
|
+
def add_dataset_count(self, source_count: int) -> None:
|
587
|
+
self.add_uint32(Keys.General.DATASET_COUNT, source_count)
|
588
|
+
|
589
|
+
def add_dataset_name(self, source_id: int, name: str) -> None:
|
590
|
+
self.add_string(Keys.General.DATASET_NAME.format(id=source_id), name)
|
591
|
+
|
592
|
+
def add_dataset_author(self, source_id: int, author: str) -> None:
|
593
|
+
self.add_string(Keys.General.DATASET_AUTHOR.format(id=source_id), author)
|
594
|
+
|
595
|
+
def add_dataset_version(self, source_id: int, version: str) -> None:
|
596
|
+
self.add_string(Keys.General.DATASET_VERSION.format(id=source_id), version)
|
597
|
+
|
598
|
+
def add_dataset_organization(self, source_id: int, organization: str) -> None:
|
599
|
+
self.add_string(Keys.General.DATASET_ORGANIZATION.format(id=source_id), organization)
|
600
|
+
|
601
|
+
def add_dataset_description(self, source_id: int, description: str) -> None:
|
602
|
+
self.add_string(Keys.General.DATASET_DESCRIPTION.format(id=source_id), description)
|
603
|
+
|
604
|
+
def add_dataset_url(self, source_id: int, url: str) -> None:
|
605
|
+
self.add_string(Keys.General.DATASET_URL.format(id=source_id), url)
|
606
|
+
|
607
|
+
def add_dataset_doi(self, source_id: int, doi: str) -> None:
|
608
|
+
self.add_string(Keys.General.DATASET_DOI.format(id=source_id), doi)
|
609
|
+
|
610
|
+
def add_dataset_uuid(self, source_id: int, uuid: str) -> None:
|
611
|
+
self.add_string(Keys.General.DATASET_UUID.format(id=source_id), uuid)
|
612
|
+
|
613
|
+
def add_dataset_repo_url(self, source_id: int, repo_url: str) -> None:
|
614
|
+
self.add_string(Keys.General.DATASET_REPO_URL.format(id=source_id), repo_url)
|
615
|
+
|
583
616
|
def add_tags(self, tags: Sequence[str]) -> None:
|
584
617
|
self.add_array(Keys.General.TAGS, tags)
|
585
618
|
|
586
619
|
def add_languages(self, languages: Sequence[str]) -> None:
|
587
620
|
self.add_array(Keys.General.LANGUAGES, languages)
|
588
621
|
|
589
|
-
def add_datasets(self, datasets: Sequence[str]) -> None:
|
590
|
-
self.add_array(Keys.General.DATASETS, datasets)
|
591
|
-
|
592
622
|
def add_tensor_data_layout(self, layout: str) -> None:
|
593
623
|
self.add_string(Keys.LLM.TENSOR_DATA_LAYOUT.format(arch=self.arch), layout)
|
594
624
|
|
@@ -721,6 +751,9 @@ class GGUFWriter:
|
|
721
751
|
def add_rope_dimension_count(self, count: int) -> None:
|
722
752
|
self.add_uint32(Keys.Rope.DIMENSION_COUNT.format(arch=self.arch), count)
|
723
753
|
|
754
|
+
def add_rope_dimension_sections(self, dims: Sequence[int]) -> None:
|
755
|
+
self.add_array(Keys.Rope.DIMENSION_SECTIONS.format(arch=self.arch), dims)
|
756
|
+
|
724
757
|
def add_rope_freq_base(self, value: float) -> None:
|
725
758
|
self.add_float32(Keys.Rope.FREQ_BASE.format(arch=self.arch), value)
|
726
759
|
|
@@ -843,15 +876,6 @@ class GGUFWriter:
|
|
843
876
|
|
844
877
|
self.add_string(Keys.Tokenizer.CHAT_TEMPLATE, value)
|
845
878
|
|
846
|
-
def add_prefix_token_id(self, id: int) -> None:
|
847
|
-
self.add_uint32(Keys.Tokenizer.PREFIX_ID, id)
|
848
|
-
|
849
|
-
def add_suffix_token_id(self, id: int) -> None:
|
850
|
-
self.add_uint32(Keys.Tokenizer.SUFFIX_ID, id)
|
851
|
-
|
852
|
-
def add_middle_token_id(self, id: int) -> None:
|
853
|
-
self.add_uint32(Keys.Tokenizer.MIDDLE_ID, id)
|
854
|
-
|
855
879
|
def add_eot_token_id(self, id: int) -> None:
|
856
880
|
self.add_uint32(Keys.Tokenizer.EOT_ID, id)
|
857
881
|
|
@@ -41,7 +41,7 @@ class Metadata:
|
|
41
41
|
base_models: Optional[list[dict]] = None
|
42
42
|
tags: Optional[list[str]] = None
|
43
43
|
languages: Optional[list[str]] = None
|
44
|
-
datasets: Optional[list[
|
44
|
+
datasets: Optional[list[dict]] = None
|
45
45
|
|
46
46
|
@staticmethod
|
47
47
|
def load(metadata_override_path: Optional[Path] = None, model_path: Optional[Path] = None, model_name: Optional[str] = None, total_params: int = 0) -> Metadata:
|
@@ -91,9 +91,11 @@ class Metadata:
|
|
91
91
|
# Base Models is received here as an array of models
|
92
92
|
metadata.base_models = metadata_override.get("general.base_models", metadata.base_models)
|
93
93
|
|
94
|
+
# Datasets is received here as an array of datasets
|
95
|
+
metadata.datasets = metadata_override.get("general.datasets", metadata.datasets)
|
96
|
+
|
94
97
|
metadata.tags = metadata_override.get(Keys.General.TAGS, metadata.tags)
|
95
98
|
metadata.languages = metadata_override.get(Keys.General.LANGUAGES, metadata.languages)
|
96
|
-
metadata.datasets = metadata_override.get(Keys.General.DATASETS, metadata.datasets)
|
97
99
|
|
98
100
|
# Direct Metadata Override (via direct cli argument)
|
99
101
|
if model_name is not None:
|
@@ -346,12 +348,12 @@ class Metadata:
|
|
346
348
|
use_model_card_metadata("author", "model_creator")
|
347
349
|
use_model_card_metadata("basename", "model_type")
|
348
350
|
|
349
|
-
if "base_model" in model_card:
|
351
|
+
if "base_model" in model_card or "base_models" in model_card or "base_model_sources" in model_card:
|
350
352
|
# This represents the parent models that this is based on
|
351
353
|
# Example: stabilityai/stable-diffusion-xl-base-1.0. Can also be a list (for merges)
|
352
354
|
# Example of merges: https://huggingface.co/EmbeddedLLM/Mistral-7B-Merge-14-v0.1/blob/main/README.md
|
353
355
|
metadata_base_models = []
|
354
|
-
base_model_value = model_card.get("base_model", None)
|
356
|
+
base_model_value = model_card.get("base_model", model_card.get("base_models", model_card.get("base_model_sources", None)))
|
355
357
|
|
356
358
|
if base_model_value is not None:
|
357
359
|
if isinstance(base_model_value, str):
|
@@ -364,18 +366,106 @@ class Metadata:
|
|
364
366
|
|
365
367
|
for model_id in metadata_base_models:
|
366
368
|
# NOTE: model size of base model is assumed to be similar to the size of the current model
|
367
|
-
model_full_name_component, org_component, basename, finetune, version, size_label = Metadata.get_model_id_components(model_id, total_params)
|
368
369
|
base_model = {}
|
369
|
-
if
|
370
|
-
|
371
|
-
|
372
|
-
|
373
|
-
|
374
|
-
|
375
|
-
|
376
|
-
|
370
|
+
if isinstance(model_id, str):
|
371
|
+
if model_id.startswith("http://") or model_id.startswith("https://") or model_id.startswith("ssh://"):
|
372
|
+
base_model["repo_url"] = model_id
|
373
|
+
|
374
|
+
# Check if Hugging Face ID is present in URL
|
375
|
+
if "huggingface.co" in model_id:
|
376
|
+
match = re.match(r"https?://huggingface.co/([^/]+/[^/]+)$", model_id)
|
377
|
+
if match:
|
378
|
+
model_id_component = match.group(1)
|
379
|
+
model_full_name_component, org_component, basename, finetune, version, size_label = Metadata.get_model_id_components(model_id_component, total_params)
|
380
|
+
|
381
|
+
# Populate model dictionary with extracted components
|
382
|
+
if model_full_name_component is not None:
|
383
|
+
base_model["name"] = Metadata.id_to_title(model_full_name_component)
|
384
|
+
if org_component is not None:
|
385
|
+
base_model["organization"] = Metadata.id_to_title(org_component)
|
386
|
+
if version is not None:
|
387
|
+
base_model["version"] = version
|
388
|
+
|
389
|
+
else:
|
390
|
+
# Likely a Hugging Face ID
|
391
|
+
model_full_name_component, org_component, basename, finetune, version, size_label = Metadata.get_model_id_components(model_id, total_params)
|
392
|
+
|
393
|
+
# Populate model dictionary with extracted components
|
394
|
+
if model_full_name_component is not None:
|
395
|
+
base_model["name"] = Metadata.id_to_title(model_full_name_component)
|
396
|
+
if org_component is not None:
|
397
|
+
base_model["organization"] = Metadata.id_to_title(org_component)
|
398
|
+
if version is not None:
|
399
|
+
base_model["version"] = version
|
400
|
+
if org_component is not None and model_full_name_component is not None:
|
401
|
+
base_model["repo_url"] = f"https://huggingface.co/{org_component}/{model_full_name_component}"
|
402
|
+
|
403
|
+
elif isinstance(model_id, dict):
|
404
|
+
base_model = model_id
|
405
|
+
|
406
|
+
else:
|
407
|
+
logger.error(f"base model entry '{str(model_id)}' not in a known format")
|
408
|
+
|
377
409
|
metadata.base_models.append(base_model)
|
378
410
|
|
411
|
+
if "datasets" in model_card or "dataset" in model_card or "dataset_sources" in model_card:
|
412
|
+
# This represents the datasets that this was trained from
|
413
|
+
metadata_datasets = []
|
414
|
+
dataset_value = model_card.get("datasets", model_card.get("dataset", model_card.get("dataset_sources", None)))
|
415
|
+
|
416
|
+
if dataset_value is not None:
|
417
|
+
if isinstance(dataset_value, str):
|
418
|
+
metadata_datasets.append(dataset_value)
|
419
|
+
elif isinstance(dataset_value, list):
|
420
|
+
metadata_datasets.extend(dataset_value)
|
421
|
+
|
422
|
+
if metadata.datasets is None:
|
423
|
+
metadata.datasets = []
|
424
|
+
|
425
|
+
for dataset_id in metadata_datasets:
|
426
|
+
# NOTE: model size of base model is assumed to be similar to the size of the current model
|
427
|
+
dataset = {}
|
428
|
+
if isinstance(dataset_id, str):
|
429
|
+
if dataset_id.startswith(("http://", "https://", "ssh://")):
|
430
|
+
dataset["repo_url"] = dataset_id
|
431
|
+
|
432
|
+
# Check if Hugging Face ID is present in URL
|
433
|
+
if "huggingface.co" in dataset_id:
|
434
|
+
match = re.match(r"https?://huggingface.co/([^/]+/[^/]+)$", dataset_id)
|
435
|
+
if match:
|
436
|
+
dataset_id_component = match.group(1)
|
437
|
+
dataset_name_component, org_component, basename, finetune, version, size_label = Metadata.get_model_id_components(dataset_id_component, total_params)
|
438
|
+
|
439
|
+
# Populate dataset dictionary with extracted components
|
440
|
+
if dataset_name_component is not None:
|
441
|
+
dataset["name"] = Metadata.id_to_title(dataset_name_component)
|
442
|
+
if org_component is not None:
|
443
|
+
dataset["organization"] = Metadata.id_to_title(org_component)
|
444
|
+
if version is not None:
|
445
|
+
dataset["version"] = version
|
446
|
+
|
447
|
+
else:
|
448
|
+
# Likely a Hugging Face ID
|
449
|
+
dataset_name_component, org_component, basename, finetune, version, size_label = Metadata.get_model_id_components(dataset_id, total_params)
|
450
|
+
|
451
|
+
# Populate dataset dictionary with extracted components
|
452
|
+
if dataset_name_component is not None:
|
453
|
+
dataset["name"] = Metadata.id_to_title(dataset_name_component)
|
454
|
+
if org_component is not None:
|
455
|
+
dataset["organization"] = Metadata.id_to_title(org_component)
|
456
|
+
if version is not None:
|
457
|
+
dataset["version"] = version
|
458
|
+
if org_component is not None and dataset_name_component is not None:
|
459
|
+
dataset["repo_url"] = f"https://huggingface.co/{org_component}/{dataset_name_component}"
|
460
|
+
|
461
|
+
elif isinstance(dataset_id, dict):
|
462
|
+
dataset = dataset_id
|
463
|
+
|
464
|
+
else:
|
465
|
+
logger.error(f"dataset entry '{str(dataset_id)}' not in a known format")
|
466
|
+
|
467
|
+
metadata.datasets.append(dataset)
|
468
|
+
|
379
469
|
use_model_card_metadata("license", "license")
|
380
470
|
use_model_card_metadata("license_name", "license_name")
|
381
471
|
use_model_card_metadata("license_link", "license_link")
|
@@ -386,9 +476,6 @@ class Metadata:
|
|
386
476
|
use_array_model_card_metadata("languages", "languages")
|
387
477
|
use_array_model_card_metadata("languages", "language")
|
388
478
|
|
389
|
-
use_array_model_card_metadata("datasets", "datasets")
|
390
|
-
use_array_model_card_metadata("datasets", "dataset")
|
391
|
-
|
392
479
|
# Hugging Face Parameter Heuristics
|
393
480
|
####################################
|
394
481
|
|
@@ -458,7 +545,10 @@ class Metadata:
|
|
458
545
|
gguf_writer.add_size_label(self.size_label)
|
459
546
|
|
460
547
|
if self.license is not None:
|
461
|
-
|
548
|
+
if isinstance(self.license, list):
|
549
|
+
gguf_writer.add_license(",".join(self.license))
|
550
|
+
else:
|
551
|
+
gguf_writer.add_license(self.license)
|
462
552
|
if self.license_name is not None:
|
463
553
|
gguf_writer.add_license_name(self.license_name)
|
464
554
|
if self.license_link is not None:
|
@@ -493,6 +583,8 @@ class Metadata:
|
|
493
583
|
gguf_writer.add_base_model_version(key, base_model_entry["version"])
|
494
584
|
if "organization" in base_model_entry:
|
495
585
|
gguf_writer.add_base_model_organization(key, base_model_entry["organization"])
|
586
|
+
if "description" in base_model_entry:
|
587
|
+
gguf_writer.add_base_model_description(key, base_model_entry["description"])
|
496
588
|
if "url" in base_model_entry:
|
497
589
|
gguf_writer.add_base_model_url(key, base_model_entry["url"])
|
498
590
|
if "doi" in base_model_entry:
|
@@ -502,9 +594,29 @@ class Metadata:
|
|
502
594
|
if "repo_url" in base_model_entry:
|
503
595
|
gguf_writer.add_base_model_repo_url(key, base_model_entry["repo_url"])
|
504
596
|
|
597
|
+
if self.datasets is not None:
|
598
|
+
gguf_writer.add_dataset_count(len(self.datasets))
|
599
|
+
for key, dataset_entry in enumerate(self.datasets):
|
600
|
+
if "name" in dataset_entry:
|
601
|
+
gguf_writer.add_dataset_name(key, dataset_entry["name"])
|
602
|
+
if "author" in dataset_entry:
|
603
|
+
gguf_writer.add_dataset_author(key, dataset_entry["author"])
|
604
|
+
if "version" in dataset_entry:
|
605
|
+
gguf_writer.add_dataset_version(key, dataset_entry["version"])
|
606
|
+
if "organization" in dataset_entry:
|
607
|
+
gguf_writer.add_dataset_organization(key, dataset_entry["organization"])
|
608
|
+
if "description" in dataset_entry:
|
609
|
+
gguf_writer.add_dataset_description(key, dataset_entry["description"])
|
610
|
+
if "url" in dataset_entry:
|
611
|
+
gguf_writer.add_dataset_url(key, dataset_entry["url"])
|
612
|
+
if "doi" in dataset_entry:
|
613
|
+
gguf_writer.add_dataset_doi(key, dataset_entry["doi"])
|
614
|
+
if "uuid" in dataset_entry:
|
615
|
+
gguf_writer.add_dataset_uuid(key, dataset_entry["uuid"])
|
616
|
+
if "repo_url" in dataset_entry:
|
617
|
+
gguf_writer.add_dataset_repo_url(key, dataset_entry["repo_url"])
|
618
|
+
|
505
619
|
if self.tags is not None:
|
506
620
|
gguf_writer.add_tags(self.tags)
|
507
621
|
if self.languages is not None:
|
508
622
|
gguf_writer.add_languages(self.languages)
|
509
|
-
if self.datasets is not None:
|
510
|
-
gguf_writer.add_datasets(self.datasets)
|
@@ -13,7 +13,7 @@ class TensorNameMap:
|
|
13
13
|
"transformer.wte", # gpt2 gpt-j mpt refact qwen dbrx jais exaone
|
14
14
|
"transformer.word_embeddings", # falcon
|
15
15
|
"word_embeddings", # bloom
|
16
|
-
"model.embed_tokens", # llama-hf nemotron olmoe
|
16
|
+
"model.embed_tokens", # llama-hf nemotron olmoe olmo2
|
17
17
|
"tok_embeddings", # llama-pth
|
18
18
|
"embeddings.word_embeddings", # bert nomic-bert
|
19
19
|
"language_model.embedding.word_embeddings", # persimmon
|
@@ -54,7 +54,7 @@ class TensorNameMap:
|
|
54
54
|
# Output
|
55
55
|
MODEL_TENSOR.OUTPUT: (
|
56
56
|
"embed_out", # gptneox
|
57
|
-
"lm_head", # gpt2 mpt falcon llama-hf baichuan qwen mamba dbrx jais nemotron exaone olmoe
|
57
|
+
"lm_head", # gpt2 mpt falcon llama-hf baichuan qwen mamba dbrx jais nemotron exaone olmoe olmo2
|
58
58
|
"output", # llama-pth bloom internlm2
|
59
59
|
"word_embeddings_for_head", # persimmon
|
60
60
|
"lm_head.linear", # phi2
|
@@ -66,7 +66,7 @@ class TensorNameMap:
|
|
66
66
|
MODEL_TENSOR.OUTPUT_NORM: (
|
67
67
|
"gpt_neox.final_layer_norm", # gptneox
|
68
68
|
"transformer.ln_f", # gpt2 gpt-j falcon jais exaone
|
69
|
-
"model.norm", # llama-hf baichuan internlm2 olmoe
|
69
|
+
"model.norm", # llama-hf baichuan internlm2 olmoe olmo2
|
70
70
|
"norm", # llama-pth
|
71
71
|
"transformer.norm_f", # mpt dbrx
|
72
72
|
"ln_f", # refact bloom qwen gpt2
|
@@ -145,7 +145,8 @@ class TensorNameMap:
|
|
145
145
|
|
146
146
|
# Attention query
|
147
147
|
MODEL_TENSOR.ATTN_Q: (
|
148
|
-
"model.layers.{bid}.self_attn.q_proj", # llama-hf nemotron olmoe
|
148
|
+
"model.layers.{bid}.self_attn.q_proj", # llama-hf nemotron olmoe olmo2
|
149
|
+
"model.layers.{bid}.self_attn.q_proj_no_perm", # llama-custom
|
149
150
|
"layers.{bid}.attention.wq", # llama-pth
|
150
151
|
"encoder.layer.{bid}.attention.self.query", # bert
|
151
152
|
"transformer.h.{bid}.attn.q_proj", # gpt-j
|
@@ -157,7 +158,8 @@ class TensorNameMap:
|
|
157
158
|
|
158
159
|
# Attention key
|
159
160
|
MODEL_TENSOR.ATTN_K: (
|
160
|
-
"model.layers.{bid}.self_attn.k_proj", # llama-hf nemotron olmoe
|
161
|
+
"model.layers.{bid}.self_attn.k_proj", # llama-hf nemotron olmoe olmo2
|
162
|
+
"model.layers.{bid}.self_attn.k_proj_no_perm", # llama-custom
|
161
163
|
"layers.{bid}.attention.wk", # llama-pth
|
162
164
|
"encoder.layer.{bid}.attention.self.key", # bert
|
163
165
|
"transformer.h.{bid}.attn.k_proj", # gpt-j
|
@@ -170,7 +172,7 @@ class TensorNameMap:
|
|
170
172
|
|
171
173
|
# Attention value
|
172
174
|
MODEL_TENSOR.ATTN_V: (
|
173
|
-
"model.layers.{bid}.self_attn.v_proj", # llama-hf nemotron olmoe
|
175
|
+
"model.layers.{bid}.self_attn.v_proj", # llama-hf nemotron olmoe olmo2
|
174
176
|
"layers.{bid}.attention.wv", # llama-pth
|
175
177
|
"encoder.layer.{bid}.attention.self.value", # bert
|
176
178
|
"transformer.h.{bid}.attn.v_proj", # gpt-j
|
@@ -188,7 +190,7 @@ class TensorNameMap:
|
|
188
190
|
"transformer.blocks.{bid}.attn.out_proj", # mpt
|
189
191
|
"transformer.h.{bid}.self_attention.dense", # falcon
|
190
192
|
"h.{bid}.self_attention.dense", # bloom
|
191
|
-
"model.layers.{bid}.self_attn.o_proj", # llama-hf nemotron olmoe
|
193
|
+
"model.layers.{bid}.self_attn.o_proj", # llama-hf nemotron olmoe olmo2
|
192
194
|
"layers.{bid}.attention.wo", # llama-pth
|
193
195
|
"encoder.layer.{bid}.attention.output.dense", # bert
|
194
196
|
"transformer.h.{bid}.attn.out_proj", # gpt-j
|
@@ -215,7 +217,7 @@ class TensorNameMap:
|
|
215
217
|
),
|
216
218
|
|
217
219
|
MODEL_TENSOR.ATTN_POST_NORM: (
|
218
|
-
"model.layers.{bid}.post_attention_layernorm", # gemma2
|
220
|
+
"model.layers.{bid}.post_attention_layernorm", # gemma2 olmo2
|
219
221
|
),
|
220
222
|
|
221
223
|
# Rotary embeddings
|
@@ -250,7 +252,7 @@ class TensorNameMap:
|
|
250
252
|
|
251
253
|
# Post feed-forward norm
|
252
254
|
MODEL_TENSOR.FFN_POST_NORM: (
|
253
|
-
"model.layers.{bid}.post_feedforward_layernorm", # gemma2
|
255
|
+
"model.layers.{bid}.post_feedforward_layernorm", # gemma2 olmo2
|
254
256
|
),
|
255
257
|
|
256
258
|
MODEL_TENSOR.FFN_GATE_INP: (
|
@@ -273,7 +275,7 @@ class TensorNameMap:
|
|
273
275
|
"transformer.blocks.{bid}.ffn.up_proj", # mpt
|
274
276
|
"transformer.h.{bid}.mlp.dense_h_to_4h", # falcon
|
275
277
|
"h.{bid}.mlp.dense_h_to_4h", # bloom
|
276
|
-
"model.layers.{bid}.mlp.up_proj", # llama-hf refact nemotron
|
278
|
+
"model.layers.{bid}.mlp.up_proj", # llama-hf refact nemotron olmo2
|
277
279
|
"layers.{bid}.feed_forward.w3", # llama-pth
|
278
280
|
"encoder.layer.{bid}.intermediate.dense", # bert
|
279
281
|
"transformer.h.{bid}.mlp.fc_in", # gpt-j
|
@@ -314,7 +316,7 @@ class TensorNameMap:
|
|
314
316
|
|
315
317
|
# Feed-forward gate
|
316
318
|
MODEL_TENSOR.FFN_GATE: (
|
317
|
-
"model.layers.{bid}.mlp.gate_proj", # llama-hf refact
|
319
|
+
"model.layers.{bid}.mlp.gate_proj", # llama-hf refact olmo2
|
318
320
|
"layers.{bid}.feed_forward.w1", # llama-pth
|
319
321
|
"transformer.h.{bid}.mlp.w2", # qwen
|
320
322
|
"transformer.h.{bid}.mlp.c_fc2", # jais
|
@@ -346,7 +348,7 @@ class TensorNameMap:
|
|
346
348
|
"transformer.blocks.{bid}.ffn.down_proj", # mpt
|
347
349
|
"transformer.h.{bid}.mlp.dense_4h_to_h", # falcon
|
348
350
|
"h.{bid}.mlp.dense_4h_to_h", # bloom
|
349
|
-
"model.layers.{bid}.mlp.down_proj", # llama-hf nemotron
|
351
|
+
"model.layers.{bid}.mlp.down_proj", # llama-hf nemotron olmo2
|
350
352
|
"layers.{bid}.feed_forward.w2", # llama-pth
|
351
353
|
"encoder.layer.{bid}.output.dense", # bert
|
352
354
|
"transformer.h.{bid}.mlp.fc_out", # gpt-j
|
@@ -383,7 +385,7 @@ class TensorNameMap:
|
|
383
385
|
MODEL_TENSOR.ATTN_Q_NORM: (
|
384
386
|
"language_model.encoder.layers.{bid}.self_attention.q_layernorm",
|
385
387
|
"model.layers.{bid}.self_attn.q_layernorm", # persimmon
|
386
|
-
"model.layers.{bid}.self_attn.q_norm", # cohere olmoe chameleon
|
388
|
+
"model.layers.{bid}.self_attn.q_norm", # cohere olmoe chameleon olmo2
|
387
389
|
"transformer.blocks.{bid}.attn.q_ln", # sea-lion
|
388
390
|
"encoder.layer.{bid}.attention.self.layer_norm_q", # jina-bert-v2
|
389
391
|
"transformer.layers.{bid}.attn.q_norm", # openelm
|
@@ -392,7 +394,7 @@ class TensorNameMap:
|
|
392
394
|
MODEL_TENSOR.ATTN_K_NORM: (
|
393
395
|
"language_model.encoder.layers.{bid}.self_attention.k_layernorm",
|
394
396
|
"model.layers.{bid}.self_attn.k_layernorm", # persimmon
|
395
|
-
"model.layers.{bid}.self_attn.k_norm", # cohere olmoe chameleon
|
397
|
+
"model.layers.{bid}.self_attn.k_norm", # cohere olmoe chameleon olmo2
|
396
398
|
"transformer.blocks.{bid}.attn.k_ln", # sea-lion
|
397
399
|
"encoder.layer.{bid}.attention.self.layer_norm_k", # jina-bert-v2
|
398
400
|
"transformer.layers.{bid}.attn.k_norm", # openelm
|
@@ -766,4 +768,4 @@ class TensorNameMap:
|
|
766
768
|
|
767
769
|
|
768
770
|
def get_tensor_name_map(arch: MODEL_ARCH, n_blocks: int) -> TensorNameMap:
|
769
|
-
return TensorNameMap(arch, n_blocks)
|
771
|
+
return TensorNameMap(arch, n_blocks)
|
bigdl/cpp/gguf-py/gguf/vocab.py
CHANGED
@@ -122,8 +122,30 @@ class SpecialVocab:
|
|
122
122
|
tokenizer = json.load(f)
|
123
123
|
if self.load_merges:
|
124
124
|
merges = tokenizer.get('model', {}).get('merges')
|
125
|
-
if isinstance(merges, list) and merges
|
126
|
-
|
125
|
+
if isinstance(merges, list) and merges:
|
126
|
+
if isinstance(merges[0], str):
|
127
|
+
self.merges = merges
|
128
|
+
elif isinstance(merges[0], list) and len(merges[0]) == 2 and isinstance(merges[0][0], str):
|
129
|
+
# New format since transformers 4.45 to support spaces in merges
|
130
|
+
# ref: https://github.com/ggerganov/llama.cpp/issues/9692
|
131
|
+
# TODO: internally store as the new format instead of converting to old
|
132
|
+
if any(' ' in s for pair in merges for s in pair):
|
133
|
+
logger.warning(f'Spaces in merges detected, encoding as {chr(ord(" ") + 256)!r}')
|
134
|
+
self.merges = [
|
135
|
+
' '.join(
|
136
|
+
[
|
137
|
+
# ensure the spaces are properly encoded
|
138
|
+
''.join(
|
139
|
+
chr(ord(c) + 256) if c == ' ' else c
|
140
|
+
for c in part
|
141
|
+
)
|
142
|
+
for part in pair
|
143
|
+
]
|
144
|
+
)
|
145
|
+
for pair in merges
|
146
|
+
]
|
147
|
+
else:
|
148
|
+
raise ValueError("Unknown tokenizer merges format")
|
127
149
|
added_tokens = tokenizer.get('added_tokens', {})
|
128
150
|
else:
|
129
151
|
added_tokens = {}
|
bigdl/cpp/libs/common.lib
CHANGED
Binary file
|
Binary file
|
Binary file
|
bigdl/cpp/libs/ggml.dll
CHANGED
Binary file
|
bigdl/cpp/libs/llama-batched.exe
CHANGED
Binary file
|
bigdl/cpp/libs/llama-bench.exe
CHANGED
Binary file
|
bigdl/cpp/libs/llama-cli.exe
CHANGED
Binary file
|
Binary file
|
bigdl/cpp/libs/llama-gguf.exe
CHANGED
Binary file
|
Binary file
|
bigdl/cpp/libs/llama-lookup.exe
CHANGED
Binary file
|
Binary file
|
Binary file
|
Binary file
|
Binary file
|
bigdl/cpp/libs/llama-server.exe
CHANGED
Binary file
|
bigdl/cpp/libs/llama-simple.exe
CHANGED
Binary file
|
Binary file
|
Binary file
|
bigdl/cpp/libs/llama.dll
CHANGED
Binary file
|
bigdl/cpp/libs/llava_shared.dll
CHANGED
Binary file
|
Binary file
|
Binary file
|
Binary file
|
bigdl/cpp/libs/ollama.exe
CHANGED
Binary file
|
bigdl/cpp/libs/ollama_ggml.dll
CHANGED
Binary file
|
bigdl/cpp/libs/ollama_llama.dll
CHANGED
Binary file
|
Binary file
|
@@ -6,7 +6,7 @@ set "cpp_dir=%cpp_dir:~0,-1%"
|
|
6
6
|
set "lib_dir=%cpp_dir%\libs"
|
7
7
|
|
8
8
|
:: Create symlinks for DLLs and EXE
|
9
|
-
for %%f in (ollama.exe ollama_llama.dll ollama_ggml.dll ollama_llava_shared.dll libc++.dll) do (
|
9
|
+
for %%f in (ollama.exe ollama_llama.dll ollama_ggml.dll ollama_llava_shared.dll ollama-ggml-base.dll ollama-ggml-cpu.dll ollama-ggml-sycl.dll libc++.dll) do (
|
10
10
|
if exist "%cd%\%%f" del /f "%cd%\%%f"
|
11
11
|
mklink "%cd%\%%f" "%lib_dir%\%%f"
|
12
12
|
)
|
@@ -0,0 +1,54 @@
|
|
1
|
+
bigdl/cpp/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
2
|
+
bigdl/cpp/convert_hf_to_gguf.py,sha256=HVJ6axht-K3DAmmkVkC1T1_vZJ8FprUwfNpXJKnLsUQ,210336
|
3
|
+
bigdl/cpp/convert_hf_to_gguf_update.py,sha256=4A9Q4oLh5tZAovmgKgS7bVlcCqcVm4j0SIwGK_lww9s,17004
|
4
|
+
bigdl/cpp/convert_llama_ggml_to_gguf.py,sha256=0dKjRhmFzvWV4e-cuLmaeW14JrWUtZwerBmz8mYyMvI,19556
|
5
|
+
bigdl/cpp/convert_lora_to_gguf.py,sha256=b2CUmTK-ztrJE_50DzsXK3SRZshr_LYFyUPn0UDnkiA,17270
|
6
|
+
bigdl/cpp/gguf-py/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
7
|
+
bigdl/cpp/gguf-py/gguf/__init__.py,sha256=h5GWs6SMXYR8giWZ7MTZzAc3hYsIJF-HAkdxtgXLOPo,228
|
8
|
+
bigdl/cpp/gguf-py/gguf/constants.py,sha256=hN1QPaL74Ef8iJXFE2hlRJmG1w43a2E6HwB0xCxp8ic,61425
|
9
|
+
bigdl/cpp/gguf-py/gguf/gguf.py,sha256=QpLc-xU055W2d7CEFvJp2gLIfGO63bdM24ZndZCH6rw,493
|
10
|
+
bigdl/cpp/gguf-py/gguf/gguf_reader.py,sha256=PUrx08ZwaUOz1gLw5JQ459Hi7JIeCdlHgZX7wXcTqbI,12702
|
11
|
+
bigdl/cpp/gguf-py/gguf/gguf_writer.py,sha256=e-8gwsdq0sipd8zzrXvvtVWV7mCaQD9fRsCn6_67CNs,38541
|
12
|
+
bigdl/cpp/gguf-py/gguf/lazy.py,sha256=YIYxGBWD-oKXU4HOvpHs9eiEn81HUgeSmt1mmHJlbdM,8814
|
13
|
+
bigdl/cpp/gguf-py/gguf/metadata.py,sha256=oBTb4DXi_h1L_gYm8x_JRVuEPR4GHlVHuM-iN0OxWoY,33244
|
14
|
+
bigdl/cpp/gguf-py/gguf/py.typed,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
15
|
+
bigdl/cpp/gguf-py/gguf/quants.py,sha256=2z6vcK-kBefqZbYNmSEVmdZF_tXHeVb5NC6jCbBdgKc,62040
|
16
|
+
bigdl/cpp/gguf-py/gguf/tensor_mapping.py,sha256=tUkpDyeMXwf9M1Cx1HL2g6mMavngsj0UYkeWMaOPjQU,35561
|
17
|
+
bigdl/cpp/gguf-py/gguf/utility.py,sha256=LAwLstUlosYQ1oX9LlJZ-1uCmwyNtOFcJfXpkLnju0k,3003
|
18
|
+
bigdl/cpp/gguf-py/gguf/vocab.py,sha256=ulUC8XudFDuZC2SNKGNQpsU2KFP_YhkyNLIWV16jG6I,20816
|
19
|
+
bigdl/cpp/libs/common.lib,sha256=ELuS4xoqHJYsjeta8zWKSOKDZHqrnAwuWa3blD7O4F8,3955904
|
20
|
+
bigdl/cpp/libs/ggml-cpu.dll,sha256=WxBkX7Smps1FOcCaDfzxMAKlUgLTqfKvVb7kEt2GAdo,475648
|
21
|
+
bigdl/cpp/libs/ggml-sycl.dll,sha256=y0xMUN5r3kn1idTnWep_HOSp3OEMByiq-MYfFUl5Fms,5280768
|
22
|
+
bigdl/cpp/libs/ggml.dll,sha256=A8buG4Wm9lJ4byLJAaqfrjacjigfq5aLuoOMU3Gs0Lw,113152
|
23
|
+
bigdl/cpp/libs/libc++.dll,sha256=U0TVK2WfFQIJPP6Bz9SeJmgskm2iqZWJorx_DGdfKIw,1561600
|
24
|
+
bigdl/cpp/libs/llama-batched.exe,sha256=OXRvTB7SgBF3kSzluFaZ9T6AYLcRIJ-3fDGBQYMSADM,847872
|
25
|
+
bigdl/cpp/libs/llama-bench.exe,sha256=MZM1q--5V9DMr_rQjRpe54Hjbj1hzh6B87QqA_cHS0s,277504
|
26
|
+
bigdl/cpp/libs/llama-cli.exe,sha256=SbG_NSl6VY7ChmwRD2RYgxO-QI8tiTfzeoj3tq8WkHE,922624
|
27
|
+
bigdl/cpp/libs/llama-embedding.exe,sha256=msgusVBW43uLEBxq26Vjg8KKoiHPBGNiCvQR4s7HnfE,870400
|
28
|
+
bigdl/cpp/libs/llama-gguf.exe,sha256=ijuXVYdZSeRcDr35M-DmKS5sJQ0Ab_n_b0z5SQU_x9k,58880
|
29
|
+
bigdl/cpp/libs/llama-llava-cli.exe,sha256=fqgtEg_c2uXUTgmGQWJ9pFP0ie75_GkswP56qBWGZ4w,1109504
|
30
|
+
bigdl/cpp/libs/llama-lookup.exe,sha256=QCPdyiI8AsFFXjAnX1GohxOiFKqPRLoL7_G9K2ihN7Y,904704
|
31
|
+
bigdl/cpp/libs/llama-ls-sycl-device.exe,sha256=IfpDycbUBZgmohCtMUNI3k5hDpg1QkqimyVuYPMk_dw,10240
|
32
|
+
bigdl/cpp/libs/llama-minicpmv-cli.exe,sha256=j-Tk3T7Kb6N0ccR2F-UTBPuxjFYHfSs5WbfSc9HAGhQ,1107456
|
33
|
+
bigdl/cpp/libs/llama-perplexity.exe,sha256=ZEEjyN3JnTjLBwb2DtM4SOHFbgH-HzdfJlKZX_sQIhw,990720
|
34
|
+
bigdl/cpp/libs/llama-quantize.exe,sha256=5bKWjuDw3kbjCuwN0iPOobph6rfU-j0os_dYOFG0zHI,122880
|
35
|
+
bigdl/cpp/libs/llama-server.exe,sha256=32cg0YZkZTJQ24Vst7kO3RhCDN1BoOrUQUwBVPJgUes,2148864
|
36
|
+
bigdl/cpp/libs/llama-simple.exe,sha256=nIQ7VJ_I1Fqc9gsCDt_BVx8M-lIiLh_byycNPzH6lqw,62464
|
37
|
+
bigdl/cpp/libs/llama-speculative.exe,sha256=xeDoKmoJPae-r1FBTApS-z5K1AhWA13lbpUQdqGjSpg,907264
|
38
|
+
bigdl/cpp/libs/llama-tokenize.exe,sha256=cJLtVEuYfli42CJYGet0beV6m6pW7r2m44QtgOqlRt0,88576
|
39
|
+
bigdl/cpp/libs/llama.dll,sha256=2Wgwo2UBczq2s6SBP9TBZMxJMmr_P8CfobB0NTlSkis,1404928
|
40
|
+
bigdl/cpp/libs/llava_shared.dll,sha256=vBVJuzZbXjf0W14a5ZO415Wj1-qiwT_Qcex-v71RjVQ,365056
|
41
|
+
bigdl/cpp/libs/ollama-ggml-base.dll,sha256=2yGQxfoge3KDesOWcwa8ncFMvyvg7Jd8pl_pvMN6w5w,459776
|
42
|
+
bigdl/cpp/libs/ollama-ggml-cpu.dll,sha256=iQEVXQK_tHmGY4UpMrPPj6G9ssbPxHeJQR0s5rqG6cU,475648
|
43
|
+
bigdl/cpp/libs/ollama-ggml-sycl.dll,sha256=Y_0PbkDf48ypCVA71QLXap_Wgbd5n76m4gKQEt64NFk,5280768
|
44
|
+
bigdl/cpp/libs/ollama.exe,sha256=tTdj_U3k2XHx4R2e7Z_tTOaqnOtQAJQqWNdo6oxTnHU,25979904
|
45
|
+
bigdl/cpp/libs/ollama_ggml.dll,sha256=TPiPOCTK2Lfcf4X03NIVwiYTMPeYAKX9VTmeI9b3RKc,113152
|
46
|
+
bigdl/cpp/libs/ollama_llama.dll,sha256=UGD_a9ok5m0vnLGtF2cSHwnQJRAV6XaGDBMMdIT0g00,1427968
|
47
|
+
bigdl/cpp/libs/ollama_llava_shared.dll,sha256=3e7S_cVWEvfUUEUYWUmTpeDsMS7Wr5_Hofc4lAUmK_w,365056
|
48
|
+
bigdl_core_cpp-2.6.0b20250204.post0.data/scripts/init-llama-cpp.bat,sha256=U0h6RifZxL3GGJp-0dxdZapQIvXUATSj644CURJL-lg,751
|
49
|
+
bigdl_core_cpp-2.6.0b20250204.post0.data/scripts/init-llama-cpp.ps1,sha256=JFOylLxO4MKpllHhdbPuJ1xHi9azxDpzdJns8JtZpkU,501
|
50
|
+
bigdl_core_cpp-2.6.0b20250204.post0.data/scripts/init-ollama.bat,sha256=DKWd9wFMT7hcNVQViMxG3CnqF4iZXCh2BmuS3zQjAuw,642
|
51
|
+
bigdl_core_cpp-2.6.0b20250204.post0.dist-info/METADATA,sha256=mLLb2aUYHIfuJCKqAxZ2Wy1s8hPohkWVoh8m2gGrn90,756
|
52
|
+
bigdl_core_cpp-2.6.0b20250204.post0.dist-info/WHEEL,sha256=2wr--P33L_Xt79Mrb57-zn6CrTlNaEVHEwbOduMxJRg,97
|
53
|
+
bigdl_core_cpp-2.6.0b20250204.post0.dist-info/top_level.txt,sha256=iGuLfZARD_qANcIMfy0tbbrC3EtCg6BSiH8icc3dLWs,6
|
54
|
+
bigdl_core_cpp-2.6.0b20250204.post0.dist-info/RECORD,,
|