bigdl-core-cpp 2.6.0b20250203__py3-none-win_amd64.whl → 2.6.0b20250204.post0__py3-none-win_amd64.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- bigdl/cpp/convert_hf_to_gguf.py +99 -44
- bigdl/cpp/convert_hf_to_gguf_update.py +4 -1
- bigdl/cpp/convert_lora_to_gguf.py +41 -11
- bigdl/cpp/gguf-py/gguf/constants.py +79 -18
- bigdl/cpp/gguf-py/gguf/gguf.py +1 -1
- bigdl/cpp/gguf-py/gguf/gguf_reader.py +5 -6
- bigdl/cpp/gguf-py/gguf/gguf_writer.py +36 -12
- bigdl/cpp/gguf-py/gguf/metadata.py +131 -19
- bigdl/cpp/gguf-py/gguf/tensor_mapping.py +17 -15
- bigdl/cpp/gguf-py/gguf/vocab.py +24 -2
- bigdl/cpp/libs/common.lib +0 -0
- bigdl/cpp/libs/ggml-cpu.dll +0 -0
- bigdl/cpp/libs/ggml-sycl.dll +0 -0
- bigdl/cpp/libs/ggml.dll +0 -0
- bigdl/cpp/libs/llama-batched.exe +0 -0
- bigdl/cpp/libs/llama-bench.exe +0 -0
- bigdl/cpp/libs/llama-cli.exe +0 -0
- bigdl/cpp/libs/llama-embedding.exe +0 -0
- bigdl/cpp/libs/llama-gguf.exe +0 -0
- bigdl/cpp/libs/llama-llava-cli.exe +0 -0
- bigdl/cpp/libs/llama-lookup.exe +0 -0
- bigdl/cpp/libs/llama-ls-sycl-device.exe +0 -0
- bigdl/cpp/libs/llama-minicpmv-cli.exe +0 -0
- bigdl/cpp/libs/llama-perplexity.exe +0 -0
- bigdl/cpp/libs/llama-quantize.exe +0 -0
- bigdl/cpp/libs/llama-server.exe +0 -0
- bigdl/cpp/libs/llama-simple.exe +0 -0
- bigdl/cpp/libs/llama-speculative.exe +0 -0
- bigdl/cpp/libs/llama-tokenize.exe +0 -0
- bigdl/cpp/libs/llama.dll +0 -0
- bigdl/cpp/libs/llava_shared.dll +0 -0
- bigdl/cpp/libs/ollama-ggml-base.dll +0 -0
- bigdl/cpp/libs/ollama-ggml-cpu.dll +0 -0
- bigdl/cpp/libs/ollama-ggml-sycl.dll +0 -0
- bigdl/cpp/libs/ollama.exe +0 -0
- bigdl/cpp/libs/ollama_ggml.dll +0 -0
- bigdl/cpp/libs/ollama_llama.dll +0 -0
- bigdl/cpp/libs/ollama_llava_shared.dll +0 -0
- {bigdl_core_cpp-2.6.0b20250203.data → bigdl_core_cpp-2.6.0b20250204.post0.data}/scripts/init-ollama.bat +1 -1
- {bigdl_core_cpp-2.6.0b20250203.dist-info → bigdl_core_cpp-2.6.0b20250204.post0.dist-info}/METADATA +1 -1
- bigdl_core_cpp-2.6.0b20250204.post0.dist-info/RECORD +54 -0
- bigdl/cpp/libs/dist/windows-amd64/lib/ollama/runners/ipex_llm/ollama_llama_server.exe +0 -0
- bigdl_core_cpp-2.6.0b20250203.dist-info/RECORD +0 -50
- {bigdl_core_cpp-2.6.0b20250203.data → bigdl_core_cpp-2.6.0b20250204.post0.data}/scripts/init-llama-cpp.bat +0 -0
- {bigdl_core_cpp-2.6.0b20250203.data → bigdl_core_cpp-2.6.0b20250204.post0.data}/scripts/init-llama-cpp.ps1 +0 -0
- {bigdl_core_cpp-2.6.0b20250203.dist-info → bigdl_core_cpp-2.6.0b20250204.post0.dist-info}/WHEEL +0 -0
- {bigdl_core_cpp-2.6.0b20250203.dist-info → bigdl_core_cpp-2.6.0b20250204.post0.dist-info}/top_level.txt +0 -0
@@ -568,6 +568,9 @@ class GGUFWriter:
|
|
568
568
|
def add_base_model_organization(self, source_id: int, organization: str) -> None:
|
569
569
|
self.add_string(Keys.General.BASE_MODEL_ORGANIZATION.format(id=source_id), organization)
|
570
570
|
|
571
|
+
def add_base_model_description(self, source_id: int, description: str) -> None:
|
572
|
+
self.add_string(Keys.General.BASE_MODEL_DESCRIPTION.format(id=source_id), description)
|
573
|
+
|
571
574
|
def add_base_model_url(self, source_id: int, url: str) -> None:
|
572
575
|
self.add_string(Keys.General.BASE_MODEL_URL.format(id=source_id), url)
|
573
576
|
|
@@ -580,15 +583,42 @@ class GGUFWriter:
|
|
580
583
|
def add_base_model_repo_url(self, source_id: int, repo_url: str) -> None:
|
581
584
|
self.add_string(Keys.General.BASE_MODEL_REPO_URL.format(id=source_id), repo_url)
|
582
585
|
|
586
|
+
def add_dataset_count(self, source_count: int) -> None:
|
587
|
+
self.add_uint32(Keys.General.DATASET_COUNT, source_count)
|
588
|
+
|
589
|
+
def add_dataset_name(self, source_id: int, name: str) -> None:
|
590
|
+
self.add_string(Keys.General.DATASET_NAME.format(id=source_id), name)
|
591
|
+
|
592
|
+
def add_dataset_author(self, source_id: int, author: str) -> None:
|
593
|
+
self.add_string(Keys.General.DATASET_AUTHOR.format(id=source_id), author)
|
594
|
+
|
595
|
+
def add_dataset_version(self, source_id: int, version: str) -> None:
|
596
|
+
self.add_string(Keys.General.DATASET_VERSION.format(id=source_id), version)
|
597
|
+
|
598
|
+
def add_dataset_organization(self, source_id: int, organization: str) -> None:
|
599
|
+
self.add_string(Keys.General.DATASET_ORGANIZATION.format(id=source_id), organization)
|
600
|
+
|
601
|
+
def add_dataset_description(self, source_id: int, description: str) -> None:
|
602
|
+
self.add_string(Keys.General.DATASET_DESCRIPTION.format(id=source_id), description)
|
603
|
+
|
604
|
+
def add_dataset_url(self, source_id: int, url: str) -> None:
|
605
|
+
self.add_string(Keys.General.DATASET_URL.format(id=source_id), url)
|
606
|
+
|
607
|
+
def add_dataset_doi(self, source_id: int, doi: str) -> None:
|
608
|
+
self.add_string(Keys.General.DATASET_DOI.format(id=source_id), doi)
|
609
|
+
|
610
|
+
def add_dataset_uuid(self, source_id: int, uuid: str) -> None:
|
611
|
+
self.add_string(Keys.General.DATASET_UUID.format(id=source_id), uuid)
|
612
|
+
|
613
|
+
def add_dataset_repo_url(self, source_id: int, repo_url: str) -> None:
|
614
|
+
self.add_string(Keys.General.DATASET_REPO_URL.format(id=source_id), repo_url)
|
615
|
+
|
583
616
|
def add_tags(self, tags: Sequence[str]) -> None:
|
584
617
|
self.add_array(Keys.General.TAGS, tags)
|
585
618
|
|
586
619
|
def add_languages(self, languages: Sequence[str]) -> None:
|
587
620
|
self.add_array(Keys.General.LANGUAGES, languages)
|
588
621
|
|
589
|
-
def add_datasets(self, datasets: Sequence[str]) -> None:
|
590
|
-
self.add_array(Keys.General.DATASETS, datasets)
|
591
|
-
|
592
622
|
def add_tensor_data_layout(self, layout: str) -> None:
|
593
623
|
self.add_string(Keys.LLM.TENSOR_DATA_LAYOUT.format(arch=self.arch), layout)
|
594
624
|
|
@@ -721,6 +751,9 @@ class GGUFWriter:
|
|
721
751
|
def add_rope_dimension_count(self, count: int) -> None:
|
722
752
|
self.add_uint32(Keys.Rope.DIMENSION_COUNT.format(arch=self.arch), count)
|
723
753
|
|
754
|
+
def add_rope_dimension_sections(self, dims: Sequence[int]) -> None:
|
755
|
+
self.add_array(Keys.Rope.DIMENSION_SECTIONS.format(arch=self.arch), dims)
|
756
|
+
|
724
757
|
def add_rope_freq_base(self, value: float) -> None:
|
725
758
|
self.add_float32(Keys.Rope.FREQ_BASE.format(arch=self.arch), value)
|
726
759
|
|
@@ -843,15 +876,6 @@ class GGUFWriter:
|
|
843
876
|
|
844
877
|
self.add_string(Keys.Tokenizer.CHAT_TEMPLATE, value)
|
845
878
|
|
846
|
-
def add_prefix_token_id(self, id: int) -> None:
|
847
|
-
self.add_uint32(Keys.Tokenizer.PREFIX_ID, id)
|
848
|
-
|
849
|
-
def add_suffix_token_id(self, id: int) -> None:
|
850
|
-
self.add_uint32(Keys.Tokenizer.SUFFIX_ID, id)
|
851
|
-
|
852
|
-
def add_middle_token_id(self, id: int) -> None:
|
853
|
-
self.add_uint32(Keys.Tokenizer.MIDDLE_ID, id)
|
854
|
-
|
855
879
|
def add_eot_token_id(self, id: int) -> None:
|
856
880
|
self.add_uint32(Keys.Tokenizer.EOT_ID, id)
|
857
881
|
|
@@ -41,7 +41,7 @@ class Metadata:
|
|
41
41
|
base_models: Optional[list[dict]] = None
|
42
42
|
tags: Optional[list[str]] = None
|
43
43
|
languages: Optional[list[str]] = None
|
44
|
-
datasets: Optional[list[
|
44
|
+
datasets: Optional[list[dict]] = None
|
45
45
|
|
46
46
|
@staticmethod
|
47
47
|
def load(metadata_override_path: Optional[Path] = None, model_path: Optional[Path] = None, model_name: Optional[str] = None, total_params: int = 0) -> Metadata:
|
@@ -91,9 +91,11 @@ class Metadata:
|
|
91
91
|
# Base Models is received here as an array of models
|
92
92
|
metadata.base_models = metadata_override.get("general.base_models", metadata.base_models)
|
93
93
|
|
94
|
+
# Datasets is received here as an array of datasets
|
95
|
+
metadata.datasets = metadata_override.get("general.datasets", metadata.datasets)
|
96
|
+
|
94
97
|
metadata.tags = metadata_override.get(Keys.General.TAGS, metadata.tags)
|
95
98
|
metadata.languages = metadata_override.get(Keys.General.LANGUAGES, metadata.languages)
|
96
|
-
metadata.datasets = metadata_override.get(Keys.General.DATASETS, metadata.datasets)
|
97
99
|
|
98
100
|
# Direct Metadata Override (via direct cli argument)
|
99
101
|
if model_name is not None:
|
@@ -346,12 +348,12 @@ class Metadata:
|
|
346
348
|
use_model_card_metadata("author", "model_creator")
|
347
349
|
use_model_card_metadata("basename", "model_type")
|
348
350
|
|
349
|
-
if "base_model" in model_card:
|
351
|
+
if "base_model" in model_card or "base_models" in model_card or "base_model_sources" in model_card:
|
350
352
|
# This represents the parent models that this is based on
|
351
353
|
# Example: stabilityai/stable-diffusion-xl-base-1.0. Can also be a list (for merges)
|
352
354
|
# Example of merges: https://huggingface.co/EmbeddedLLM/Mistral-7B-Merge-14-v0.1/blob/main/README.md
|
353
355
|
metadata_base_models = []
|
354
|
-
base_model_value = model_card.get("base_model", None)
|
356
|
+
base_model_value = model_card.get("base_model", model_card.get("base_models", model_card.get("base_model_sources", None)))
|
355
357
|
|
356
358
|
if base_model_value is not None:
|
357
359
|
if isinstance(base_model_value, str):
|
@@ -364,18 +366,106 @@ class Metadata:
|
|
364
366
|
|
365
367
|
for model_id in metadata_base_models:
|
366
368
|
# NOTE: model size of base model is assumed to be similar to the size of the current model
|
367
|
-
model_full_name_component, org_component, basename, finetune, version, size_label = Metadata.get_model_id_components(model_id, total_params)
|
368
369
|
base_model = {}
|
369
|
-
if
|
370
|
-
|
371
|
-
|
372
|
-
|
373
|
-
|
374
|
-
|
375
|
-
|
376
|
-
|
370
|
+
if isinstance(model_id, str):
|
371
|
+
if model_id.startswith("http://") or model_id.startswith("https://") or model_id.startswith("ssh://"):
|
372
|
+
base_model["repo_url"] = model_id
|
373
|
+
|
374
|
+
# Check if Hugging Face ID is present in URL
|
375
|
+
if "huggingface.co" in model_id:
|
376
|
+
match = re.match(r"https?://huggingface.co/([^/]+/[^/]+)$", model_id)
|
377
|
+
if match:
|
378
|
+
model_id_component = match.group(1)
|
379
|
+
model_full_name_component, org_component, basename, finetune, version, size_label = Metadata.get_model_id_components(model_id_component, total_params)
|
380
|
+
|
381
|
+
# Populate model dictionary with extracted components
|
382
|
+
if model_full_name_component is not None:
|
383
|
+
base_model["name"] = Metadata.id_to_title(model_full_name_component)
|
384
|
+
if org_component is not None:
|
385
|
+
base_model["organization"] = Metadata.id_to_title(org_component)
|
386
|
+
if version is not None:
|
387
|
+
base_model["version"] = version
|
388
|
+
|
389
|
+
else:
|
390
|
+
# Likely a Hugging Face ID
|
391
|
+
model_full_name_component, org_component, basename, finetune, version, size_label = Metadata.get_model_id_components(model_id, total_params)
|
392
|
+
|
393
|
+
# Populate model dictionary with extracted components
|
394
|
+
if model_full_name_component is not None:
|
395
|
+
base_model["name"] = Metadata.id_to_title(model_full_name_component)
|
396
|
+
if org_component is not None:
|
397
|
+
base_model["organization"] = Metadata.id_to_title(org_component)
|
398
|
+
if version is not None:
|
399
|
+
base_model["version"] = version
|
400
|
+
if org_component is not None and model_full_name_component is not None:
|
401
|
+
base_model["repo_url"] = f"https://huggingface.co/{org_component}/{model_full_name_component}"
|
402
|
+
|
403
|
+
elif isinstance(model_id, dict):
|
404
|
+
base_model = model_id
|
405
|
+
|
406
|
+
else:
|
407
|
+
logger.error(f"base model entry '{str(model_id)}' not in a known format")
|
408
|
+
|
377
409
|
metadata.base_models.append(base_model)
|
378
410
|
|
411
|
+
if "datasets" in model_card or "dataset" in model_card or "dataset_sources" in model_card:
|
412
|
+
# This represents the datasets that this was trained from
|
413
|
+
metadata_datasets = []
|
414
|
+
dataset_value = model_card.get("datasets", model_card.get("dataset", model_card.get("dataset_sources", None)))
|
415
|
+
|
416
|
+
if dataset_value is not None:
|
417
|
+
if isinstance(dataset_value, str):
|
418
|
+
metadata_datasets.append(dataset_value)
|
419
|
+
elif isinstance(dataset_value, list):
|
420
|
+
metadata_datasets.extend(dataset_value)
|
421
|
+
|
422
|
+
if metadata.datasets is None:
|
423
|
+
metadata.datasets = []
|
424
|
+
|
425
|
+
for dataset_id in metadata_datasets:
|
426
|
+
# NOTE: model size of base model is assumed to be similar to the size of the current model
|
427
|
+
dataset = {}
|
428
|
+
if isinstance(dataset_id, str):
|
429
|
+
if dataset_id.startswith(("http://", "https://", "ssh://")):
|
430
|
+
dataset["repo_url"] = dataset_id
|
431
|
+
|
432
|
+
# Check if Hugging Face ID is present in URL
|
433
|
+
if "huggingface.co" in dataset_id:
|
434
|
+
match = re.match(r"https?://huggingface.co/([^/]+/[^/]+)$", dataset_id)
|
435
|
+
if match:
|
436
|
+
dataset_id_component = match.group(1)
|
437
|
+
dataset_name_component, org_component, basename, finetune, version, size_label = Metadata.get_model_id_components(dataset_id_component, total_params)
|
438
|
+
|
439
|
+
# Populate dataset dictionary with extracted components
|
440
|
+
if dataset_name_component is not None:
|
441
|
+
dataset["name"] = Metadata.id_to_title(dataset_name_component)
|
442
|
+
if org_component is not None:
|
443
|
+
dataset["organization"] = Metadata.id_to_title(org_component)
|
444
|
+
if version is not None:
|
445
|
+
dataset["version"] = version
|
446
|
+
|
447
|
+
else:
|
448
|
+
# Likely a Hugging Face ID
|
449
|
+
dataset_name_component, org_component, basename, finetune, version, size_label = Metadata.get_model_id_components(dataset_id, total_params)
|
450
|
+
|
451
|
+
# Populate dataset dictionary with extracted components
|
452
|
+
if dataset_name_component is not None:
|
453
|
+
dataset["name"] = Metadata.id_to_title(dataset_name_component)
|
454
|
+
if org_component is not None:
|
455
|
+
dataset["organization"] = Metadata.id_to_title(org_component)
|
456
|
+
if version is not None:
|
457
|
+
dataset["version"] = version
|
458
|
+
if org_component is not None and dataset_name_component is not None:
|
459
|
+
dataset["repo_url"] = f"https://huggingface.co/{org_component}/{dataset_name_component}"
|
460
|
+
|
461
|
+
elif isinstance(dataset_id, dict):
|
462
|
+
dataset = dataset_id
|
463
|
+
|
464
|
+
else:
|
465
|
+
logger.error(f"dataset entry '{str(dataset_id)}' not in a known format")
|
466
|
+
|
467
|
+
metadata.datasets.append(dataset)
|
468
|
+
|
379
469
|
use_model_card_metadata("license", "license")
|
380
470
|
use_model_card_metadata("license_name", "license_name")
|
381
471
|
use_model_card_metadata("license_link", "license_link")
|
@@ -386,9 +476,6 @@ class Metadata:
|
|
386
476
|
use_array_model_card_metadata("languages", "languages")
|
387
477
|
use_array_model_card_metadata("languages", "language")
|
388
478
|
|
389
|
-
use_array_model_card_metadata("datasets", "datasets")
|
390
|
-
use_array_model_card_metadata("datasets", "dataset")
|
391
|
-
|
392
479
|
# Hugging Face Parameter Heuristics
|
393
480
|
####################################
|
394
481
|
|
@@ -458,7 +545,10 @@ class Metadata:
|
|
458
545
|
gguf_writer.add_size_label(self.size_label)
|
459
546
|
|
460
547
|
if self.license is not None:
|
461
|
-
|
548
|
+
if isinstance(self.license, list):
|
549
|
+
gguf_writer.add_license(",".join(self.license))
|
550
|
+
else:
|
551
|
+
gguf_writer.add_license(self.license)
|
462
552
|
if self.license_name is not None:
|
463
553
|
gguf_writer.add_license_name(self.license_name)
|
464
554
|
if self.license_link is not None:
|
@@ -493,6 +583,8 @@ class Metadata:
|
|
493
583
|
gguf_writer.add_base_model_version(key, base_model_entry["version"])
|
494
584
|
if "organization" in base_model_entry:
|
495
585
|
gguf_writer.add_base_model_organization(key, base_model_entry["organization"])
|
586
|
+
if "description" in base_model_entry:
|
587
|
+
gguf_writer.add_base_model_description(key, base_model_entry["description"])
|
496
588
|
if "url" in base_model_entry:
|
497
589
|
gguf_writer.add_base_model_url(key, base_model_entry["url"])
|
498
590
|
if "doi" in base_model_entry:
|
@@ -502,9 +594,29 @@ class Metadata:
|
|
502
594
|
if "repo_url" in base_model_entry:
|
503
595
|
gguf_writer.add_base_model_repo_url(key, base_model_entry["repo_url"])
|
504
596
|
|
597
|
+
if self.datasets is not None:
|
598
|
+
gguf_writer.add_dataset_count(len(self.datasets))
|
599
|
+
for key, dataset_entry in enumerate(self.datasets):
|
600
|
+
if "name" in dataset_entry:
|
601
|
+
gguf_writer.add_dataset_name(key, dataset_entry["name"])
|
602
|
+
if "author" in dataset_entry:
|
603
|
+
gguf_writer.add_dataset_author(key, dataset_entry["author"])
|
604
|
+
if "version" in dataset_entry:
|
605
|
+
gguf_writer.add_dataset_version(key, dataset_entry["version"])
|
606
|
+
if "organization" in dataset_entry:
|
607
|
+
gguf_writer.add_dataset_organization(key, dataset_entry["organization"])
|
608
|
+
if "description" in dataset_entry:
|
609
|
+
gguf_writer.add_dataset_description(key, dataset_entry["description"])
|
610
|
+
if "url" in dataset_entry:
|
611
|
+
gguf_writer.add_dataset_url(key, dataset_entry["url"])
|
612
|
+
if "doi" in dataset_entry:
|
613
|
+
gguf_writer.add_dataset_doi(key, dataset_entry["doi"])
|
614
|
+
if "uuid" in dataset_entry:
|
615
|
+
gguf_writer.add_dataset_uuid(key, dataset_entry["uuid"])
|
616
|
+
if "repo_url" in dataset_entry:
|
617
|
+
gguf_writer.add_dataset_repo_url(key, dataset_entry["repo_url"])
|
618
|
+
|
505
619
|
if self.tags is not None:
|
506
620
|
gguf_writer.add_tags(self.tags)
|
507
621
|
if self.languages is not None:
|
508
622
|
gguf_writer.add_languages(self.languages)
|
509
|
-
if self.datasets is not None:
|
510
|
-
gguf_writer.add_datasets(self.datasets)
|
@@ -13,7 +13,7 @@ class TensorNameMap:
|
|
13
13
|
"transformer.wte", # gpt2 gpt-j mpt refact qwen dbrx jais exaone
|
14
14
|
"transformer.word_embeddings", # falcon
|
15
15
|
"word_embeddings", # bloom
|
16
|
-
"model.embed_tokens", # llama-hf nemotron olmoe
|
16
|
+
"model.embed_tokens", # llama-hf nemotron olmoe olmo2
|
17
17
|
"tok_embeddings", # llama-pth
|
18
18
|
"embeddings.word_embeddings", # bert nomic-bert
|
19
19
|
"language_model.embedding.word_embeddings", # persimmon
|
@@ -54,7 +54,7 @@ class TensorNameMap:
|
|
54
54
|
# Output
|
55
55
|
MODEL_TENSOR.OUTPUT: (
|
56
56
|
"embed_out", # gptneox
|
57
|
-
"lm_head", # gpt2 mpt falcon llama-hf baichuan qwen mamba dbrx jais nemotron exaone olmoe
|
57
|
+
"lm_head", # gpt2 mpt falcon llama-hf baichuan qwen mamba dbrx jais nemotron exaone olmoe olmo2
|
58
58
|
"output", # llama-pth bloom internlm2
|
59
59
|
"word_embeddings_for_head", # persimmon
|
60
60
|
"lm_head.linear", # phi2
|
@@ -66,7 +66,7 @@ class TensorNameMap:
|
|
66
66
|
MODEL_TENSOR.OUTPUT_NORM: (
|
67
67
|
"gpt_neox.final_layer_norm", # gptneox
|
68
68
|
"transformer.ln_f", # gpt2 gpt-j falcon jais exaone
|
69
|
-
"model.norm", # llama-hf baichuan internlm2 olmoe
|
69
|
+
"model.norm", # llama-hf baichuan internlm2 olmoe olmo2
|
70
70
|
"norm", # llama-pth
|
71
71
|
"transformer.norm_f", # mpt dbrx
|
72
72
|
"ln_f", # refact bloom qwen gpt2
|
@@ -145,7 +145,8 @@ class TensorNameMap:
|
|
145
145
|
|
146
146
|
# Attention query
|
147
147
|
MODEL_TENSOR.ATTN_Q: (
|
148
|
-
"model.layers.{bid}.self_attn.q_proj", # llama-hf nemotron olmoe
|
148
|
+
"model.layers.{bid}.self_attn.q_proj", # llama-hf nemotron olmoe olmo2
|
149
|
+
"model.layers.{bid}.self_attn.q_proj_no_perm", # llama-custom
|
149
150
|
"layers.{bid}.attention.wq", # llama-pth
|
150
151
|
"encoder.layer.{bid}.attention.self.query", # bert
|
151
152
|
"transformer.h.{bid}.attn.q_proj", # gpt-j
|
@@ -157,7 +158,8 @@ class TensorNameMap:
|
|
157
158
|
|
158
159
|
# Attention key
|
159
160
|
MODEL_TENSOR.ATTN_K: (
|
160
|
-
"model.layers.{bid}.self_attn.k_proj", # llama-hf nemotron olmoe
|
161
|
+
"model.layers.{bid}.self_attn.k_proj", # llama-hf nemotron olmoe olmo2
|
162
|
+
"model.layers.{bid}.self_attn.k_proj_no_perm", # llama-custom
|
161
163
|
"layers.{bid}.attention.wk", # llama-pth
|
162
164
|
"encoder.layer.{bid}.attention.self.key", # bert
|
163
165
|
"transformer.h.{bid}.attn.k_proj", # gpt-j
|
@@ -170,7 +172,7 @@ class TensorNameMap:
|
|
170
172
|
|
171
173
|
# Attention value
|
172
174
|
MODEL_TENSOR.ATTN_V: (
|
173
|
-
"model.layers.{bid}.self_attn.v_proj", # llama-hf nemotron olmoe
|
175
|
+
"model.layers.{bid}.self_attn.v_proj", # llama-hf nemotron olmoe olmo2
|
174
176
|
"layers.{bid}.attention.wv", # llama-pth
|
175
177
|
"encoder.layer.{bid}.attention.self.value", # bert
|
176
178
|
"transformer.h.{bid}.attn.v_proj", # gpt-j
|
@@ -188,7 +190,7 @@ class TensorNameMap:
|
|
188
190
|
"transformer.blocks.{bid}.attn.out_proj", # mpt
|
189
191
|
"transformer.h.{bid}.self_attention.dense", # falcon
|
190
192
|
"h.{bid}.self_attention.dense", # bloom
|
191
|
-
"model.layers.{bid}.self_attn.o_proj", # llama-hf nemotron olmoe
|
193
|
+
"model.layers.{bid}.self_attn.o_proj", # llama-hf nemotron olmoe olmo2
|
192
194
|
"layers.{bid}.attention.wo", # llama-pth
|
193
195
|
"encoder.layer.{bid}.attention.output.dense", # bert
|
194
196
|
"transformer.h.{bid}.attn.out_proj", # gpt-j
|
@@ -215,7 +217,7 @@ class TensorNameMap:
|
|
215
217
|
),
|
216
218
|
|
217
219
|
MODEL_TENSOR.ATTN_POST_NORM: (
|
218
|
-
"model.layers.{bid}.post_attention_layernorm", # gemma2
|
220
|
+
"model.layers.{bid}.post_attention_layernorm", # gemma2 olmo2
|
219
221
|
),
|
220
222
|
|
221
223
|
# Rotary embeddings
|
@@ -250,7 +252,7 @@ class TensorNameMap:
|
|
250
252
|
|
251
253
|
# Post feed-forward norm
|
252
254
|
MODEL_TENSOR.FFN_POST_NORM: (
|
253
|
-
"model.layers.{bid}.post_feedforward_layernorm", # gemma2
|
255
|
+
"model.layers.{bid}.post_feedforward_layernorm", # gemma2 olmo2
|
254
256
|
),
|
255
257
|
|
256
258
|
MODEL_TENSOR.FFN_GATE_INP: (
|
@@ -273,7 +275,7 @@ class TensorNameMap:
|
|
273
275
|
"transformer.blocks.{bid}.ffn.up_proj", # mpt
|
274
276
|
"transformer.h.{bid}.mlp.dense_h_to_4h", # falcon
|
275
277
|
"h.{bid}.mlp.dense_h_to_4h", # bloom
|
276
|
-
"model.layers.{bid}.mlp.up_proj", # llama-hf refact nemotron
|
278
|
+
"model.layers.{bid}.mlp.up_proj", # llama-hf refact nemotron olmo2
|
277
279
|
"layers.{bid}.feed_forward.w3", # llama-pth
|
278
280
|
"encoder.layer.{bid}.intermediate.dense", # bert
|
279
281
|
"transformer.h.{bid}.mlp.fc_in", # gpt-j
|
@@ -314,7 +316,7 @@ class TensorNameMap:
|
|
314
316
|
|
315
317
|
# Feed-forward gate
|
316
318
|
MODEL_TENSOR.FFN_GATE: (
|
317
|
-
"model.layers.{bid}.mlp.gate_proj", # llama-hf refact
|
319
|
+
"model.layers.{bid}.mlp.gate_proj", # llama-hf refact olmo2
|
318
320
|
"layers.{bid}.feed_forward.w1", # llama-pth
|
319
321
|
"transformer.h.{bid}.mlp.w2", # qwen
|
320
322
|
"transformer.h.{bid}.mlp.c_fc2", # jais
|
@@ -346,7 +348,7 @@ class TensorNameMap:
|
|
346
348
|
"transformer.blocks.{bid}.ffn.down_proj", # mpt
|
347
349
|
"transformer.h.{bid}.mlp.dense_4h_to_h", # falcon
|
348
350
|
"h.{bid}.mlp.dense_4h_to_h", # bloom
|
349
|
-
"model.layers.{bid}.mlp.down_proj", # llama-hf nemotron
|
351
|
+
"model.layers.{bid}.mlp.down_proj", # llama-hf nemotron olmo2
|
350
352
|
"layers.{bid}.feed_forward.w2", # llama-pth
|
351
353
|
"encoder.layer.{bid}.output.dense", # bert
|
352
354
|
"transformer.h.{bid}.mlp.fc_out", # gpt-j
|
@@ -383,7 +385,7 @@ class TensorNameMap:
|
|
383
385
|
MODEL_TENSOR.ATTN_Q_NORM: (
|
384
386
|
"language_model.encoder.layers.{bid}.self_attention.q_layernorm",
|
385
387
|
"model.layers.{bid}.self_attn.q_layernorm", # persimmon
|
386
|
-
"model.layers.{bid}.self_attn.q_norm", # cohere olmoe chameleon
|
388
|
+
"model.layers.{bid}.self_attn.q_norm", # cohere olmoe chameleon olmo2
|
387
389
|
"transformer.blocks.{bid}.attn.q_ln", # sea-lion
|
388
390
|
"encoder.layer.{bid}.attention.self.layer_norm_q", # jina-bert-v2
|
389
391
|
"transformer.layers.{bid}.attn.q_norm", # openelm
|
@@ -392,7 +394,7 @@ class TensorNameMap:
|
|
392
394
|
MODEL_TENSOR.ATTN_K_NORM: (
|
393
395
|
"language_model.encoder.layers.{bid}.self_attention.k_layernorm",
|
394
396
|
"model.layers.{bid}.self_attn.k_layernorm", # persimmon
|
395
|
-
"model.layers.{bid}.self_attn.k_norm", # cohere olmoe chameleon
|
397
|
+
"model.layers.{bid}.self_attn.k_norm", # cohere olmoe chameleon olmo2
|
396
398
|
"transformer.blocks.{bid}.attn.k_ln", # sea-lion
|
397
399
|
"encoder.layer.{bid}.attention.self.layer_norm_k", # jina-bert-v2
|
398
400
|
"transformer.layers.{bid}.attn.k_norm", # openelm
|
@@ -766,4 +768,4 @@ class TensorNameMap:
|
|
766
768
|
|
767
769
|
|
768
770
|
def get_tensor_name_map(arch: MODEL_ARCH, n_blocks: int) -> TensorNameMap:
|
769
|
-
return TensorNameMap(arch, n_blocks)
|
771
|
+
return TensorNameMap(arch, n_blocks)
|
bigdl/cpp/gguf-py/gguf/vocab.py
CHANGED
@@ -122,8 +122,30 @@ class SpecialVocab:
|
|
122
122
|
tokenizer = json.load(f)
|
123
123
|
if self.load_merges:
|
124
124
|
merges = tokenizer.get('model', {}).get('merges')
|
125
|
-
if isinstance(merges, list) and merges
|
126
|
-
|
125
|
+
if isinstance(merges, list) and merges:
|
126
|
+
if isinstance(merges[0], str):
|
127
|
+
self.merges = merges
|
128
|
+
elif isinstance(merges[0], list) and len(merges[0]) == 2 and isinstance(merges[0][0], str):
|
129
|
+
# New format since transformers 4.45 to support spaces in merges
|
130
|
+
# ref: https://github.com/ggerganov/llama.cpp/issues/9692
|
131
|
+
# TODO: internally store as the new format instead of converting to old
|
132
|
+
if any(' ' in s for pair in merges for s in pair):
|
133
|
+
logger.warning(f'Spaces in merges detected, encoding as {chr(ord(" ") + 256)!r}')
|
134
|
+
self.merges = [
|
135
|
+
' '.join(
|
136
|
+
[
|
137
|
+
# ensure the spaces are properly encoded
|
138
|
+
''.join(
|
139
|
+
chr(ord(c) + 256) if c == ' ' else c
|
140
|
+
for c in part
|
141
|
+
)
|
142
|
+
for part in pair
|
143
|
+
]
|
144
|
+
)
|
145
|
+
for pair in merges
|
146
|
+
]
|
147
|
+
else:
|
148
|
+
raise ValueError("Unknown tokenizer merges format")
|
127
149
|
added_tokens = tokenizer.get('added_tokens', {})
|
128
150
|
else:
|
129
151
|
added_tokens = {}
|
bigdl/cpp/libs/common.lib
CHANGED
Binary file
|
Binary file
|
Binary file
|
bigdl/cpp/libs/ggml.dll
CHANGED
Binary file
|
bigdl/cpp/libs/llama-batched.exe
CHANGED
Binary file
|
bigdl/cpp/libs/llama-bench.exe
CHANGED
Binary file
|
bigdl/cpp/libs/llama-cli.exe
CHANGED
Binary file
|
Binary file
|
bigdl/cpp/libs/llama-gguf.exe
CHANGED
Binary file
|
Binary file
|
bigdl/cpp/libs/llama-lookup.exe
CHANGED
Binary file
|
Binary file
|
Binary file
|
Binary file
|
Binary file
|
bigdl/cpp/libs/llama-server.exe
CHANGED
Binary file
|
bigdl/cpp/libs/llama-simple.exe
CHANGED
Binary file
|
Binary file
|
Binary file
|
bigdl/cpp/libs/llama.dll
CHANGED
Binary file
|
bigdl/cpp/libs/llava_shared.dll
CHANGED
Binary file
|
Binary file
|
Binary file
|
Binary file
|
bigdl/cpp/libs/ollama.exe
CHANGED
Binary file
|
bigdl/cpp/libs/ollama_ggml.dll
CHANGED
Binary file
|
bigdl/cpp/libs/ollama_llama.dll
CHANGED
Binary file
|
Binary file
|
@@ -6,7 +6,7 @@ set "cpp_dir=%cpp_dir:~0,-1%"
|
|
6
6
|
set "lib_dir=%cpp_dir%\libs"
|
7
7
|
|
8
8
|
:: Create symlinks for DLLs and EXE
|
9
|
-
for %%f in (ollama.exe ollama_llama.dll ollama_ggml.dll ollama_llava_shared.dll libc++.dll) do (
|
9
|
+
for %%f in (ollama.exe ollama_llama.dll ollama_ggml.dll ollama_llava_shared.dll ollama-ggml-base.dll ollama-ggml-cpu.dll ollama-ggml-sycl.dll libc++.dll) do (
|
10
10
|
if exist "%cd%\%%f" del /f "%cd%\%%f"
|
11
11
|
mklink "%cd%\%%f" "%lib_dir%\%%f"
|
12
12
|
)
|
@@ -0,0 +1,54 @@
|
|
1
|
+
bigdl/cpp/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
2
|
+
bigdl/cpp/convert_hf_to_gguf.py,sha256=HVJ6axht-K3DAmmkVkC1T1_vZJ8FprUwfNpXJKnLsUQ,210336
|
3
|
+
bigdl/cpp/convert_hf_to_gguf_update.py,sha256=4A9Q4oLh5tZAovmgKgS7bVlcCqcVm4j0SIwGK_lww9s,17004
|
4
|
+
bigdl/cpp/convert_llama_ggml_to_gguf.py,sha256=0dKjRhmFzvWV4e-cuLmaeW14JrWUtZwerBmz8mYyMvI,19556
|
5
|
+
bigdl/cpp/convert_lora_to_gguf.py,sha256=b2CUmTK-ztrJE_50DzsXK3SRZshr_LYFyUPn0UDnkiA,17270
|
6
|
+
bigdl/cpp/gguf-py/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
7
|
+
bigdl/cpp/gguf-py/gguf/__init__.py,sha256=h5GWs6SMXYR8giWZ7MTZzAc3hYsIJF-HAkdxtgXLOPo,228
|
8
|
+
bigdl/cpp/gguf-py/gguf/constants.py,sha256=hN1QPaL74Ef8iJXFE2hlRJmG1w43a2E6HwB0xCxp8ic,61425
|
9
|
+
bigdl/cpp/gguf-py/gguf/gguf.py,sha256=QpLc-xU055W2d7CEFvJp2gLIfGO63bdM24ZndZCH6rw,493
|
10
|
+
bigdl/cpp/gguf-py/gguf/gguf_reader.py,sha256=PUrx08ZwaUOz1gLw5JQ459Hi7JIeCdlHgZX7wXcTqbI,12702
|
11
|
+
bigdl/cpp/gguf-py/gguf/gguf_writer.py,sha256=e-8gwsdq0sipd8zzrXvvtVWV7mCaQD9fRsCn6_67CNs,38541
|
12
|
+
bigdl/cpp/gguf-py/gguf/lazy.py,sha256=YIYxGBWD-oKXU4HOvpHs9eiEn81HUgeSmt1mmHJlbdM,8814
|
13
|
+
bigdl/cpp/gguf-py/gguf/metadata.py,sha256=oBTb4DXi_h1L_gYm8x_JRVuEPR4GHlVHuM-iN0OxWoY,33244
|
14
|
+
bigdl/cpp/gguf-py/gguf/py.typed,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
15
|
+
bigdl/cpp/gguf-py/gguf/quants.py,sha256=2z6vcK-kBefqZbYNmSEVmdZF_tXHeVb5NC6jCbBdgKc,62040
|
16
|
+
bigdl/cpp/gguf-py/gguf/tensor_mapping.py,sha256=tUkpDyeMXwf9M1Cx1HL2g6mMavngsj0UYkeWMaOPjQU,35561
|
17
|
+
bigdl/cpp/gguf-py/gguf/utility.py,sha256=LAwLstUlosYQ1oX9LlJZ-1uCmwyNtOFcJfXpkLnju0k,3003
|
18
|
+
bigdl/cpp/gguf-py/gguf/vocab.py,sha256=ulUC8XudFDuZC2SNKGNQpsU2KFP_YhkyNLIWV16jG6I,20816
|
19
|
+
bigdl/cpp/libs/common.lib,sha256=ELuS4xoqHJYsjeta8zWKSOKDZHqrnAwuWa3blD7O4F8,3955904
|
20
|
+
bigdl/cpp/libs/ggml-cpu.dll,sha256=WxBkX7Smps1FOcCaDfzxMAKlUgLTqfKvVb7kEt2GAdo,475648
|
21
|
+
bigdl/cpp/libs/ggml-sycl.dll,sha256=y0xMUN5r3kn1idTnWep_HOSp3OEMByiq-MYfFUl5Fms,5280768
|
22
|
+
bigdl/cpp/libs/ggml.dll,sha256=A8buG4Wm9lJ4byLJAaqfrjacjigfq5aLuoOMU3Gs0Lw,113152
|
23
|
+
bigdl/cpp/libs/libc++.dll,sha256=U0TVK2WfFQIJPP6Bz9SeJmgskm2iqZWJorx_DGdfKIw,1561600
|
24
|
+
bigdl/cpp/libs/llama-batched.exe,sha256=OXRvTB7SgBF3kSzluFaZ9T6AYLcRIJ-3fDGBQYMSADM,847872
|
25
|
+
bigdl/cpp/libs/llama-bench.exe,sha256=MZM1q--5V9DMr_rQjRpe54Hjbj1hzh6B87QqA_cHS0s,277504
|
26
|
+
bigdl/cpp/libs/llama-cli.exe,sha256=SbG_NSl6VY7ChmwRD2RYgxO-QI8tiTfzeoj3tq8WkHE,922624
|
27
|
+
bigdl/cpp/libs/llama-embedding.exe,sha256=msgusVBW43uLEBxq26Vjg8KKoiHPBGNiCvQR4s7HnfE,870400
|
28
|
+
bigdl/cpp/libs/llama-gguf.exe,sha256=ijuXVYdZSeRcDr35M-DmKS5sJQ0Ab_n_b0z5SQU_x9k,58880
|
29
|
+
bigdl/cpp/libs/llama-llava-cli.exe,sha256=fqgtEg_c2uXUTgmGQWJ9pFP0ie75_GkswP56qBWGZ4w,1109504
|
30
|
+
bigdl/cpp/libs/llama-lookup.exe,sha256=QCPdyiI8AsFFXjAnX1GohxOiFKqPRLoL7_G9K2ihN7Y,904704
|
31
|
+
bigdl/cpp/libs/llama-ls-sycl-device.exe,sha256=IfpDycbUBZgmohCtMUNI3k5hDpg1QkqimyVuYPMk_dw,10240
|
32
|
+
bigdl/cpp/libs/llama-minicpmv-cli.exe,sha256=j-Tk3T7Kb6N0ccR2F-UTBPuxjFYHfSs5WbfSc9HAGhQ,1107456
|
33
|
+
bigdl/cpp/libs/llama-perplexity.exe,sha256=ZEEjyN3JnTjLBwb2DtM4SOHFbgH-HzdfJlKZX_sQIhw,990720
|
34
|
+
bigdl/cpp/libs/llama-quantize.exe,sha256=5bKWjuDw3kbjCuwN0iPOobph6rfU-j0os_dYOFG0zHI,122880
|
35
|
+
bigdl/cpp/libs/llama-server.exe,sha256=32cg0YZkZTJQ24Vst7kO3RhCDN1BoOrUQUwBVPJgUes,2148864
|
36
|
+
bigdl/cpp/libs/llama-simple.exe,sha256=nIQ7VJ_I1Fqc9gsCDt_BVx8M-lIiLh_byycNPzH6lqw,62464
|
37
|
+
bigdl/cpp/libs/llama-speculative.exe,sha256=xeDoKmoJPae-r1FBTApS-z5K1AhWA13lbpUQdqGjSpg,907264
|
38
|
+
bigdl/cpp/libs/llama-tokenize.exe,sha256=cJLtVEuYfli42CJYGet0beV6m6pW7r2m44QtgOqlRt0,88576
|
39
|
+
bigdl/cpp/libs/llama.dll,sha256=2Wgwo2UBczq2s6SBP9TBZMxJMmr_P8CfobB0NTlSkis,1404928
|
40
|
+
bigdl/cpp/libs/llava_shared.dll,sha256=vBVJuzZbXjf0W14a5ZO415Wj1-qiwT_Qcex-v71RjVQ,365056
|
41
|
+
bigdl/cpp/libs/ollama-ggml-base.dll,sha256=2yGQxfoge3KDesOWcwa8ncFMvyvg7Jd8pl_pvMN6w5w,459776
|
42
|
+
bigdl/cpp/libs/ollama-ggml-cpu.dll,sha256=iQEVXQK_tHmGY4UpMrPPj6G9ssbPxHeJQR0s5rqG6cU,475648
|
43
|
+
bigdl/cpp/libs/ollama-ggml-sycl.dll,sha256=Y_0PbkDf48ypCVA71QLXap_Wgbd5n76m4gKQEt64NFk,5280768
|
44
|
+
bigdl/cpp/libs/ollama.exe,sha256=tTdj_U3k2XHx4R2e7Z_tTOaqnOtQAJQqWNdo6oxTnHU,25979904
|
45
|
+
bigdl/cpp/libs/ollama_ggml.dll,sha256=TPiPOCTK2Lfcf4X03NIVwiYTMPeYAKX9VTmeI9b3RKc,113152
|
46
|
+
bigdl/cpp/libs/ollama_llama.dll,sha256=UGD_a9ok5m0vnLGtF2cSHwnQJRAV6XaGDBMMdIT0g00,1427968
|
47
|
+
bigdl/cpp/libs/ollama_llava_shared.dll,sha256=3e7S_cVWEvfUUEUYWUmTpeDsMS7Wr5_Hofc4lAUmK_w,365056
|
48
|
+
bigdl_core_cpp-2.6.0b20250204.post0.data/scripts/init-llama-cpp.bat,sha256=U0h6RifZxL3GGJp-0dxdZapQIvXUATSj644CURJL-lg,751
|
49
|
+
bigdl_core_cpp-2.6.0b20250204.post0.data/scripts/init-llama-cpp.ps1,sha256=JFOylLxO4MKpllHhdbPuJ1xHi9azxDpzdJns8JtZpkU,501
|
50
|
+
bigdl_core_cpp-2.6.0b20250204.post0.data/scripts/init-ollama.bat,sha256=DKWd9wFMT7hcNVQViMxG3CnqF4iZXCh2BmuS3zQjAuw,642
|
51
|
+
bigdl_core_cpp-2.6.0b20250204.post0.dist-info/METADATA,sha256=mLLb2aUYHIfuJCKqAxZ2Wy1s8hPohkWVoh8m2gGrn90,756
|
52
|
+
bigdl_core_cpp-2.6.0b20250204.post0.dist-info/WHEEL,sha256=2wr--P33L_Xt79Mrb57-zn6CrTlNaEVHEwbOduMxJRg,97
|
53
|
+
bigdl_core_cpp-2.6.0b20250204.post0.dist-info/top_level.txt,sha256=iGuLfZARD_qANcIMfy0tbbrC3EtCg6BSiH8icc3dLWs,6
|
54
|
+
bigdl_core_cpp-2.6.0b20250204.post0.dist-info/RECORD,,
|