bigdl-core-cpp 2.6.0b20250204__py3-none-win_amd64.whl → 2.6.0b20250204.post0__py3-none-win_amd64.whl

Sign up to get free protection for your applications and to get access to all the features.
Files changed (47) hide show
  1. bigdl/cpp/convert_hf_to_gguf.py +99 -44
  2. bigdl/cpp/convert_hf_to_gguf_update.py +4 -1
  3. bigdl/cpp/convert_lora_to_gguf.py +41 -11
  4. bigdl/cpp/gguf-py/gguf/constants.py +79 -18
  5. bigdl/cpp/gguf-py/gguf/gguf.py +1 -1
  6. bigdl/cpp/gguf-py/gguf/gguf_reader.py +5 -6
  7. bigdl/cpp/gguf-py/gguf/gguf_writer.py +36 -12
  8. bigdl/cpp/gguf-py/gguf/metadata.py +131 -19
  9. bigdl/cpp/gguf-py/gguf/tensor_mapping.py +17 -15
  10. bigdl/cpp/gguf-py/gguf/vocab.py +24 -2
  11. bigdl/cpp/libs/common.lib +0 -0
  12. bigdl/cpp/libs/ggml-cpu.dll +0 -0
  13. bigdl/cpp/libs/ggml-sycl.dll +0 -0
  14. bigdl/cpp/libs/ggml.dll +0 -0
  15. bigdl/cpp/libs/llama-batched.exe +0 -0
  16. bigdl/cpp/libs/llama-bench.exe +0 -0
  17. bigdl/cpp/libs/llama-cli.exe +0 -0
  18. bigdl/cpp/libs/llama-embedding.exe +0 -0
  19. bigdl/cpp/libs/llama-gguf.exe +0 -0
  20. bigdl/cpp/libs/llama-llava-cli.exe +0 -0
  21. bigdl/cpp/libs/llama-lookup.exe +0 -0
  22. bigdl/cpp/libs/llama-ls-sycl-device.exe +0 -0
  23. bigdl/cpp/libs/llama-minicpmv-cli.exe +0 -0
  24. bigdl/cpp/libs/llama-perplexity.exe +0 -0
  25. bigdl/cpp/libs/llama-quantize.exe +0 -0
  26. bigdl/cpp/libs/llama-server.exe +0 -0
  27. bigdl/cpp/libs/llama-simple.exe +0 -0
  28. bigdl/cpp/libs/llama-speculative.exe +0 -0
  29. bigdl/cpp/libs/llama-tokenize.exe +0 -0
  30. bigdl/cpp/libs/llama.dll +0 -0
  31. bigdl/cpp/libs/llava_shared.dll +0 -0
  32. bigdl/cpp/libs/ollama-ggml-base.dll +0 -0
  33. bigdl/cpp/libs/ollama-ggml-cpu.dll +0 -0
  34. bigdl/cpp/libs/ollama-ggml-sycl.dll +0 -0
  35. bigdl/cpp/libs/ollama.exe +0 -0
  36. bigdl/cpp/libs/ollama_ggml.dll +0 -0
  37. bigdl/cpp/libs/ollama_llama.dll +0 -0
  38. bigdl/cpp/libs/ollama_llava_shared.dll +0 -0
  39. {bigdl_core_cpp-2.6.0b20250204.data → bigdl_core_cpp-2.6.0b20250204.post0.data}/scripts/init-ollama.bat +1 -1
  40. {bigdl_core_cpp-2.6.0b20250204.dist-info → bigdl_core_cpp-2.6.0b20250204.post0.dist-info}/METADATA +1 -1
  41. bigdl_core_cpp-2.6.0b20250204.post0.dist-info/RECORD +54 -0
  42. bigdl/cpp/libs/dist/windows-amd64/lib/ollama/runners/ipex_llm/ollama_llama_server.exe +0 -0
  43. bigdl_core_cpp-2.6.0b20250204.dist-info/RECORD +0 -50
  44. {bigdl_core_cpp-2.6.0b20250204.data → bigdl_core_cpp-2.6.0b20250204.post0.data}/scripts/init-llama-cpp.bat +0 -0
  45. {bigdl_core_cpp-2.6.0b20250204.data → bigdl_core_cpp-2.6.0b20250204.post0.data}/scripts/init-llama-cpp.ps1 +0 -0
  46. {bigdl_core_cpp-2.6.0b20250204.dist-info → bigdl_core_cpp-2.6.0b20250204.post0.dist-info}/WHEEL +0 -0
  47. {bigdl_core_cpp-2.6.0b20250204.dist-info → bigdl_core_cpp-2.6.0b20250204.post0.dist-info}/top_level.txt +0 -0
@@ -568,6 +568,9 @@ class GGUFWriter:
568
568
  def add_base_model_organization(self, source_id: int, organization: str) -> None:
569
569
  self.add_string(Keys.General.BASE_MODEL_ORGANIZATION.format(id=source_id), organization)
570
570
 
571
+ def add_base_model_description(self, source_id: int, description: str) -> None:
572
+ self.add_string(Keys.General.BASE_MODEL_DESCRIPTION.format(id=source_id), description)
573
+
571
574
  def add_base_model_url(self, source_id: int, url: str) -> None:
572
575
  self.add_string(Keys.General.BASE_MODEL_URL.format(id=source_id), url)
573
576
 
@@ -580,15 +583,42 @@ class GGUFWriter:
580
583
  def add_base_model_repo_url(self, source_id: int, repo_url: str) -> None:
581
584
  self.add_string(Keys.General.BASE_MODEL_REPO_URL.format(id=source_id), repo_url)
582
585
 
586
+ def add_dataset_count(self, source_count: int) -> None:
587
+ self.add_uint32(Keys.General.DATASET_COUNT, source_count)
588
+
589
+ def add_dataset_name(self, source_id: int, name: str) -> None:
590
+ self.add_string(Keys.General.DATASET_NAME.format(id=source_id), name)
591
+
592
+ def add_dataset_author(self, source_id: int, author: str) -> None:
593
+ self.add_string(Keys.General.DATASET_AUTHOR.format(id=source_id), author)
594
+
595
+ def add_dataset_version(self, source_id: int, version: str) -> None:
596
+ self.add_string(Keys.General.DATASET_VERSION.format(id=source_id), version)
597
+
598
+ def add_dataset_organization(self, source_id: int, organization: str) -> None:
599
+ self.add_string(Keys.General.DATASET_ORGANIZATION.format(id=source_id), organization)
600
+
601
+ def add_dataset_description(self, source_id: int, description: str) -> None:
602
+ self.add_string(Keys.General.DATASET_DESCRIPTION.format(id=source_id), description)
603
+
604
+ def add_dataset_url(self, source_id: int, url: str) -> None:
605
+ self.add_string(Keys.General.DATASET_URL.format(id=source_id), url)
606
+
607
+ def add_dataset_doi(self, source_id: int, doi: str) -> None:
608
+ self.add_string(Keys.General.DATASET_DOI.format(id=source_id), doi)
609
+
610
+ def add_dataset_uuid(self, source_id: int, uuid: str) -> None:
611
+ self.add_string(Keys.General.DATASET_UUID.format(id=source_id), uuid)
612
+
613
+ def add_dataset_repo_url(self, source_id: int, repo_url: str) -> None:
614
+ self.add_string(Keys.General.DATASET_REPO_URL.format(id=source_id), repo_url)
615
+
583
616
  def add_tags(self, tags: Sequence[str]) -> None:
584
617
  self.add_array(Keys.General.TAGS, tags)
585
618
 
586
619
  def add_languages(self, languages: Sequence[str]) -> None:
587
620
  self.add_array(Keys.General.LANGUAGES, languages)
588
621
 
589
- def add_datasets(self, datasets: Sequence[str]) -> None:
590
- self.add_array(Keys.General.DATASETS, datasets)
591
-
592
622
  def add_tensor_data_layout(self, layout: str) -> None:
593
623
  self.add_string(Keys.LLM.TENSOR_DATA_LAYOUT.format(arch=self.arch), layout)
594
624
 
@@ -721,6 +751,9 @@ class GGUFWriter:
721
751
  def add_rope_dimension_count(self, count: int) -> None:
722
752
  self.add_uint32(Keys.Rope.DIMENSION_COUNT.format(arch=self.arch), count)
723
753
 
754
+ def add_rope_dimension_sections(self, dims: Sequence[int]) -> None:
755
+ self.add_array(Keys.Rope.DIMENSION_SECTIONS.format(arch=self.arch), dims)
756
+
724
757
  def add_rope_freq_base(self, value: float) -> None:
725
758
  self.add_float32(Keys.Rope.FREQ_BASE.format(arch=self.arch), value)
726
759
 
@@ -843,15 +876,6 @@ class GGUFWriter:
843
876
 
844
877
  self.add_string(Keys.Tokenizer.CHAT_TEMPLATE, value)
845
878
 
846
- def add_prefix_token_id(self, id: int) -> None:
847
- self.add_uint32(Keys.Tokenizer.PREFIX_ID, id)
848
-
849
- def add_suffix_token_id(self, id: int) -> None:
850
- self.add_uint32(Keys.Tokenizer.SUFFIX_ID, id)
851
-
852
- def add_middle_token_id(self, id: int) -> None:
853
- self.add_uint32(Keys.Tokenizer.MIDDLE_ID, id)
854
-
855
879
  def add_eot_token_id(self, id: int) -> None:
856
880
  self.add_uint32(Keys.Tokenizer.EOT_ID, id)
857
881
 
@@ -41,7 +41,7 @@ class Metadata:
41
41
  base_models: Optional[list[dict]] = None
42
42
  tags: Optional[list[str]] = None
43
43
  languages: Optional[list[str]] = None
44
- datasets: Optional[list[str]] = None
44
+ datasets: Optional[list[dict]] = None
45
45
 
46
46
  @staticmethod
47
47
  def load(metadata_override_path: Optional[Path] = None, model_path: Optional[Path] = None, model_name: Optional[str] = None, total_params: int = 0) -> Metadata:
@@ -91,9 +91,11 @@ class Metadata:
91
91
  # Base Models is received here as an array of models
92
92
  metadata.base_models = metadata_override.get("general.base_models", metadata.base_models)
93
93
 
94
+ # Datasets is received here as an array of datasets
95
+ metadata.datasets = metadata_override.get("general.datasets", metadata.datasets)
96
+
94
97
  metadata.tags = metadata_override.get(Keys.General.TAGS, metadata.tags)
95
98
  metadata.languages = metadata_override.get(Keys.General.LANGUAGES, metadata.languages)
96
- metadata.datasets = metadata_override.get(Keys.General.DATASETS, metadata.datasets)
97
99
 
98
100
  # Direct Metadata Override (via direct cli argument)
99
101
  if model_name is not None:
@@ -346,12 +348,12 @@ class Metadata:
346
348
  use_model_card_metadata("author", "model_creator")
347
349
  use_model_card_metadata("basename", "model_type")
348
350
 
349
- if "base_model" in model_card:
351
+ if "base_model" in model_card or "base_models" in model_card or "base_model_sources" in model_card:
350
352
  # This represents the parent models that this is based on
351
353
  # Example: stabilityai/stable-diffusion-xl-base-1.0. Can also be a list (for merges)
352
354
  # Example of merges: https://huggingface.co/EmbeddedLLM/Mistral-7B-Merge-14-v0.1/blob/main/README.md
353
355
  metadata_base_models = []
354
- base_model_value = model_card.get("base_model", None)
356
+ base_model_value = model_card.get("base_model", model_card.get("base_models", model_card.get("base_model_sources", None)))
355
357
 
356
358
  if base_model_value is not None:
357
359
  if isinstance(base_model_value, str):
@@ -364,18 +366,106 @@ class Metadata:
364
366
 
365
367
  for model_id in metadata_base_models:
366
368
  # NOTE: model size of base model is assumed to be similar to the size of the current model
367
- model_full_name_component, org_component, basename, finetune, version, size_label = Metadata.get_model_id_components(model_id, total_params)
368
369
  base_model = {}
369
- if model_full_name_component is not None:
370
- base_model["name"] = Metadata.id_to_title(model_full_name_component)
371
- if org_component is not None:
372
- base_model["organization"] = Metadata.id_to_title(org_component)
373
- if version is not None:
374
- base_model["version"] = version
375
- if org_component is not None and model_full_name_component is not None:
376
- base_model["repo_url"] = f"https://huggingface.co/{org_component}/{model_full_name_component}"
370
+ if isinstance(model_id, str):
371
+ if model_id.startswith("http://") or model_id.startswith("https://") or model_id.startswith("ssh://"):
372
+ base_model["repo_url"] = model_id
373
+
374
+ # Check if Hugging Face ID is present in URL
375
+ if "huggingface.co" in model_id:
376
+ match = re.match(r"https?://huggingface.co/([^/]+/[^/]+)$", model_id)
377
+ if match:
378
+ model_id_component = match.group(1)
379
+ model_full_name_component, org_component, basename, finetune, version, size_label = Metadata.get_model_id_components(model_id_component, total_params)
380
+
381
+ # Populate model dictionary with extracted components
382
+ if model_full_name_component is not None:
383
+ base_model["name"] = Metadata.id_to_title(model_full_name_component)
384
+ if org_component is not None:
385
+ base_model["organization"] = Metadata.id_to_title(org_component)
386
+ if version is not None:
387
+ base_model["version"] = version
388
+
389
+ else:
390
+ # Likely a Hugging Face ID
391
+ model_full_name_component, org_component, basename, finetune, version, size_label = Metadata.get_model_id_components(model_id, total_params)
392
+
393
+ # Populate model dictionary with extracted components
394
+ if model_full_name_component is not None:
395
+ base_model["name"] = Metadata.id_to_title(model_full_name_component)
396
+ if org_component is not None:
397
+ base_model["organization"] = Metadata.id_to_title(org_component)
398
+ if version is not None:
399
+ base_model["version"] = version
400
+ if org_component is not None and model_full_name_component is not None:
401
+ base_model["repo_url"] = f"https://huggingface.co/{org_component}/{model_full_name_component}"
402
+
403
+ elif isinstance(model_id, dict):
404
+ base_model = model_id
405
+
406
+ else:
407
+ logger.error(f"base model entry '{str(model_id)}' not in a known format")
408
+
377
409
  metadata.base_models.append(base_model)
378
410
 
411
+ if "datasets" in model_card or "dataset" in model_card or "dataset_sources" in model_card:
412
+ # This represents the datasets that this was trained from
413
+ metadata_datasets = []
414
+ dataset_value = model_card.get("datasets", model_card.get("dataset", model_card.get("dataset_sources", None)))
415
+
416
+ if dataset_value is not None:
417
+ if isinstance(dataset_value, str):
418
+ metadata_datasets.append(dataset_value)
419
+ elif isinstance(dataset_value, list):
420
+ metadata_datasets.extend(dataset_value)
421
+
422
+ if metadata.datasets is None:
423
+ metadata.datasets = []
424
+
425
+ for dataset_id in metadata_datasets:
426
+ # NOTE: model size of base model is assumed to be similar to the size of the current model
427
+ dataset = {}
428
+ if isinstance(dataset_id, str):
429
+ if dataset_id.startswith(("http://", "https://", "ssh://")):
430
+ dataset["repo_url"] = dataset_id
431
+
432
+ # Check if Hugging Face ID is present in URL
433
+ if "huggingface.co" in dataset_id:
434
+ match = re.match(r"https?://huggingface.co/([^/]+/[^/]+)$", dataset_id)
435
+ if match:
436
+ dataset_id_component = match.group(1)
437
+ dataset_name_component, org_component, basename, finetune, version, size_label = Metadata.get_model_id_components(dataset_id_component, total_params)
438
+
439
+ # Populate dataset dictionary with extracted components
440
+ if dataset_name_component is not None:
441
+ dataset["name"] = Metadata.id_to_title(dataset_name_component)
442
+ if org_component is not None:
443
+ dataset["organization"] = Metadata.id_to_title(org_component)
444
+ if version is not None:
445
+ dataset["version"] = version
446
+
447
+ else:
448
+ # Likely a Hugging Face ID
449
+ dataset_name_component, org_component, basename, finetune, version, size_label = Metadata.get_model_id_components(dataset_id, total_params)
450
+
451
+ # Populate dataset dictionary with extracted components
452
+ if dataset_name_component is not None:
453
+ dataset["name"] = Metadata.id_to_title(dataset_name_component)
454
+ if org_component is not None:
455
+ dataset["organization"] = Metadata.id_to_title(org_component)
456
+ if version is not None:
457
+ dataset["version"] = version
458
+ if org_component is not None and dataset_name_component is not None:
459
+ dataset["repo_url"] = f"https://huggingface.co/{org_component}/{dataset_name_component}"
460
+
461
+ elif isinstance(dataset_id, dict):
462
+ dataset = dataset_id
463
+
464
+ else:
465
+ logger.error(f"dataset entry '{str(dataset_id)}' not in a known format")
466
+
467
+ metadata.datasets.append(dataset)
468
+
379
469
  use_model_card_metadata("license", "license")
380
470
  use_model_card_metadata("license_name", "license_name")
381
471
  use_model_card_metadata("license_link", "license_link")
@@ -386,9 +476,6 @@ class Metadata:
386
476
  use_array_model_card_metadata("languages", "languages")
387
477
  use_array_model_card_metadata("languages", "language")
388
478
 
389
- use_array_model_card_metadata("datasets", "datasets")
390
- use_array_model_card_metadata("datasets", "dataset")
391
-
392
479
  # Hugging Face Parameter Heuristics
393
480
  ####################################
394
481
 
@@ -458,7 +545,10 @@ class Metadata:
458
545
  gguf_writer.add_size_label(self.size_label)
459
546
 
460
547
  if self.license is not None:
461
- gguf_writer.add_license(self.license)
548
+ if isinstance(self.license, list):
549
+ gguf_writer.add_license(",".join(self.license))
550
+ else:
551
+ gguf_writer.add_license(self.license)
462
552
  if self.license_name is not None:
463
553
  gguf_writer.add_license_name(self.license_name)
464
554
  if self.license_link is not None:
@@ -493,6 +583,8 @@ class Metadata:
493
583
  gguf_writer.add_base_model_version(key, base_model_entry["version"])
494
584
  if "organization" in base_model_entry:
495
585
  gguf_writer.add_base_model_organization(key, base_model_entry["organization"])
586
+ if "description" in base_model_entry:
587
+ gguf_writer.add_base_model_description(key, base_model_entry["description"])
496
588
  if "url" in base_model_entry:
497
589
  gguf_writer.add_base_model_url(key, base_model_entry["url"])
498
590
  if "doi" in base_model_entry:
@@ -502,9 +594,29 @@ class Metadata:
502
594
  if "repo_url" in base_model_entry:
503
595
  gguf_writer.add_base_model_repo_url(key, base_model_entry["repo_url"])
504
596
 
597
+ if self.datasets is not None:
598
+ gguf_writer.add_dataset_count(len(self.datasets))
599
+ for key, dataset_entry in enumerate(self.datasets):
600
+ if "name" in dataset_entry:
601
+ gguf_writer.add_dataset_name(key, dataset_entry["name"])
602
+ if "author" in dataset_entry:
603
+ gguf_writer.add_dataset_author(key, dataset_entry["author"])
604
+ if "version" in dataset_entry:
605
+ gguf_writer.add_dataset_version(key, dataset_entry["version"])
606
+ if "organization" in dataset_entry:
607
+ gguf_writer.add_dataset_organization(key, dataset_entry["organization"])
608
+ if "description" in dataset_entry:
609
+ gguf_writer.add_dataset_description(key, dataset_entry["description"])
610
+ if "url" in dataset_entry:
611
+ gguf_writer.add_dataset_url(key, dataset_entry["url"])
612
+ if "doi" in dataset_entry:
613
+ gguf_writer.add_dataset_doi(key, dataset_entry["doi"])
614
+ if "uuid" in dataset_entry:
615
+ gguf_writer.add_dataset_uuid(key, dataset_entry["uuid"])
616
+ if "repo_url" in dataset_entry:
617
+ gguf_writer.add_dataset_repo_url(key, dataset_entry["repo_url"])
618
+
505
619
  if self.tags is not None:
506
620
  gguf_writer.add_tags(self.tags)
507
621
  if self.languages is not None:
508
622
  gguf_writer.add_languages(self.languages)
509
- if self.datasets is not None:
510
- gguf_writer.add_datasets(self.datasets)
@@ -13,7 +13,7 @@ class TensorNameMap:
13
13
  "transformer.wte", # gpt2 gpt-j mpt refact qwen dbrx jais exaone
14
14
  "transformer.word_embeddings", # falcon
15
15
  "word_embeddings", # bloom
16
- "model.embed_tokens", # llama-hf nemotron olmoe
16
+ "model.embed_tokens", # llama-hf nemotron olmoe olmo2
17
17
  "tok_embeddings", # llama-pth
18
18
  "embeddings.word_embeddings", # bert nomic-bert
19
19
  "language_model.embedding.word_embeddings", # persimmon
@@ -54,7 +54,7 @@ class TensorNameMap:
54
54
  # Output
55
55
  MODEL_TENSOR.OUTPUT: (
56
56
  "embed_out", # gptneox
57
- "lm_head", # gpt2 mpt falcon llama-hf baichuan qwen mamba dbrx jais nemotron exaone olmoe
57
+ "lm_head", # gpt2 mpt falcon llama-hf baichuan qwen mamba dbrx jais nemotron exaone olmoe olmo2
58
58
  "output", # llama-pth bloom internlm2
59
59
  "word_embeddings_for_head", # persimmon
60
60
  "lm_head.linear", # phi2
@@ -66,7 +66,7 @@ class TensorNameMap:
66
66
  MODEL_TENSOR.OUTPUT_NORM: (
67
67
  "gpt_neox.final_layer_norm", # gptneox
68
68
  "transformer.ln_f", # gpt2 gpt-j falcon jais exaone
69
- "model.norm", # llama-hf baichuan internlm2 olmoe
69
+ "model.norm", # llama-hf baichuan internlm2 olmoe olmo2
70
70
  "norm", # llama-pth
71
71
  "transformer.norm_f", # mpt dbrx
72
72
  "ln_f", # refact bloom qwen gpt2
@@ -145,7 +145,8 @@ class TensorNameMap:
145
145
 
146
146
  # Attention query
147
147
  MODEL_TENSOR.ATTN_Q: (
148
- "model.layers.{bid}.self_attn.q_proj", # llama-hf nemotron olmoe
148
+ "model.layers.{bid}.self_attn.q_proj", # llama-hf nemotron olmoe olmo2
149
+ "model.layers.{bid}.self_attn.q_proj_no_perm", # llama-custom
149
150
  "layers.{bid}.attention.wq", # llama-pth
150
151
  "encoder.layer.{bid}.attention.self.query", # bert
151
152
  "transformer.h.{bid}.attn.q_proj", # gpt-j
@@ -157,7 +158,8 @@ class TensorNameMap:
157
158
 
158
159
  # Attention key
159
160
  MODEL_TENSOR.ATTN_K: (
160
- "model.layers.{bid}.self_attn.k_proj", # llama-hf nemotron olmoe
161
+ "model.layers.{bid}.self_attn.k_proj", # llama-hf nemotron olmoe olmo2
162
+ "model.layers.{bid}.self_attn.k_proj_no_perm", # llama-custom
161
163
  "layers.{bid}.attention.wk", # llama-pth
162
164
  "encoder.layer.{bid}.attention.self.key", # bert
163
165
  "transformer.h.{bid}.attn.k_proj", # gpt-j
@@ -170,7 +172,7 @@ class TensorNameMap:
170
172
 
171
173
  # Attention value
172
174
  MODEL_TENSOR.ATTN_V: (
173
- "model.layers.{bid}.self_attn.v_proj", # llama-hf nemotron olmoe
175
+ "model.layers.{bid}.self_attn.v_proj", # llama-hf nemotron olmoe olmo2
174
176
  "layers.{bid}.attention.wv", # llama-pth
175
177
  "encoder.layer.{bid}.attention.self.value", # bert
176
178
  "transformer.h.{bid}.attn.v_proj", # gpt-j
@@ -188,7 +190,7 @@ class TensorNameMap:
188
190
  "transformer.blocks.{bid}.attn.out_proj", # mpt
189
191
  "transformer.h.{bid}.self_attention.dense", # falcon
190
192
  "h.{bid}.self_attention.dense", # bloom
191
- "model.layers.{bid}.self_attn.o_proj", # llama-hf nemotron olmoe
193
+ "model.layers.{bid}.self_attn.o_proj", # llama-hf nemotron olmoe olmo2
192
194
  "layers.{bid}.attention.wo", # llama-pth
193
195
  "encoder.layer.{bid}.attention.output.dense", # bert
194
196
  "transformer.h.{bid}.attn.out_proj", # gpt-j
@@ -215,7 +217,7 @@ class TensorNameMap:
215
217
  ),
216
218
 
217
219
  MODEL_TENSOR.ATTN_POST_NORM: (
218
- "model.layers.{bid}.post_attention_layernorm", # gemma2
220
+ "model.layers.{bid}.post_attention_layernorm", # gemma2 olmo2
219
221
  ),
220
222
 
221
223
  # Rotary embeddings
@@ -250,7 +252,7 @@ class TensorNameMap:
250
252
 
251
253
  # Post feed-forward norm
252
254
  MODEL_TENSOR.FFN_POST_NORM: (
253
- "model.layers.{bid}.post_feedforward_layernorm", # gemma2
255
+ "model.layers.{bid}.post_feedforward_layernorm", # gemma2 olmo2
254
256
  ),
255
257
 
256
258
  MODEL_TENSOR.FFN_GATE_INP: (
@@ -273,7 +275,7 @@ class TensorNameMap:
273
275
  "transformer.blocks.{bid}.ffn.up_proj", # mpt
274
276
  "transformer.h.{bid}.mlp.dense_h_to_4h", # falcon
275
277
  "h.{bid}.mlp.dense_h_to_4h", # bloom
276
- "model.layers.{bid}.mlp.up_proj", # llama-hf refact nemotron
278
+ "model.layers.{bid}.mlp.up_proj", # llama-hf refact nemotron olmo2
277
279
  "layers.{bid}.feed_forward.w3", # llama-pth
278
280
  "encoder.layer.{bid}.intermediate.dense", # bert
279
281
  "transformer.h.{bid}.mlp.fc_in", # gpt-j
@@ -314,7 +316,7 @@ class TensorNameMap:
314
316
 
315
317
  # Feed-forward gate
316
318
  MODEL_TENSOR.FFN_GATE: (
317
- "model.layers.{bid}.mlp.gate_proj", # llama-hf refact
319
+ "model.layers.{bid}.mlp.gate_proj", # llama-hf refact olmo2
318
320
  "layers.{bid}.feed_forward.w1", # llama-pth
319
321
  "transformer.h.{bid}.mlp.w2", # qwen
320
322
  "transformer.h.{bid}.mlp.c_fc2", # jais
@@ -346,7 +348,7 @@ class TensorNameMap:
346
348
  "transformer.blocks.{bid}.ffn.down_proj", # mpt
347
349
  "transformer.h.{bid}.mlp.dense_4h_to_h", # falcon
348
350
  "h.{bid}.mlp.dense_4h_to_h", # bloom
349
- "model.layers.{bid}.mlp.down_proj", # llama-hf nemotron
351
+ "model.layers.{bid}.mlp.down_proj", # llama-hf nemotron olmo2
350
352
  "layers.{bid}.feed_forward.w2", # llama-pth
351
353
  "encoder.layer.{bid}.output.dense", # bert
352
354
  "transformer.h.{bid}.mlp.fc_out", # gpt-j
@@ -383,7 +385,7 @@ class TensorNameMap:
383
385
  MODEL_TENSOR.ATTN_Q_NORM: (
384
386
  "language_model.encoder.layers.{bid}.self_attention.q_layernorm",
385
387
  "model.layers.{bid}.self_attn.q_layernorm", # persimmon
386
- "model.layers.{bid}.self_attn.q_norm", # cohere olmoe chameleon
388
+ "model.layers.{bid}.self_attn.q_norm", # cohere olmoe chameleon olmo2
387
389
  "transformer.blocks.{bid}.attn.q_ln", # sea-lion
388
390
  "encoder.layer.{bid}.attention.self.layer_norm_q", # jina-bert-v2
389
391
  "transformer.layers.{bid}.attn.q_norm", # openelm
@@ -392,7 +394,7 @@ class TensorNameMap:
392
394
  MODEL_TENSOR.ATTN_K_NORM: (
393
395
  "language_model.encoder.layers.{bid}.self_attention.k_layernorm",
394
396
  "model.layers.{bid}.self_attn.k_layernorm", # persimmon
395
- "model.layers.{bid}.self_attn.k_norm", # cohere olmoe chameleon
397
+ "model.layers.{bid}.self_attn.k_norm", # cohere olmoe chameleon olmo2
396
398
  "transformer.blocks.{bid}.attn.k_ln", # sea-lion
397
399
  "encoder.layer.{bid}.attention.self.layer_norm_k", # jina-bert-v2
398
400
  "transformer.layers.{bid}.attn.k_norm", # openelm
@@ -766,4 +768,4 @@ class TensorNameMap:
766
768
 
767
769
 
768
770
  def get_tensor_name_map(arch: MODEL_ARCH, n_blocks: int) -> TensorNameMap:
769
- return TensorNameMap(arch, n_blocks)
771
+ return TensorNameMap(arch, n_blocks)
@@ -122,8 +122,30 @@ class SpecialVocab:
122
122
  tokenizer = json.load(f)
123
123
  if self.load_merges:
124
124
  merges = tokenizer.get('model', {}).get('merges')
125
- if isinstance(merges, list) and merges and isinstance(merges[0], str):
126
- self.merges = merges
125
+ if isinstance(merges, list) and merges:
126
+ if isinstance(merges[0], str):
127
+ self.merges = merges
128
+ elif isinstance(merges[0], list) and len(merges[0]) == 2 and isinstance(merges[0][0], str):
129
+ # New format since transformers 4.45 to support spaces in merges
130
+ # ref: https://github.com/ggerganov/llama.cpp/issues/9692
131
+ # TODO: internally store as the new format instead of converting to old
132
+ if any(' ' in s for pair in merges for s in pair):
133
+ logger.warning(f'Spaces in merges detected, encoding as {chr(ord(" ") + 256)!r}')
134
+ self.merges = [
135
+ ' '.join(
136
+ [
137
+ # ensure the spaces are properly encoded
138
+ ''.join(
139
+ chr(ord(c) + 256) if c == ' ' else c
140
+ for c in part
141
+ )
142
+ for part in pair
143
+ ]
144
+ )
145
+ for pair in merges
146
+ ]
147
+ else:
148
+ raise ValueError("Unknown tokenizer merges format")
127
149
  added_tokens = tokenizer.get('added_tokens', {})
128
150
  else:
129
151
  added_tokens = {}
bigdl/cpp/libs/common.lib CHANGED
Binary file
Binary file
Binary file
bigdl/cpp/libs/ggml.dll CHANGED
Binary file
Binary file
Binary file
Binary file
Binary file
Binary file
Binary file
Binary file
Binary file
Binary file
Binary file
Binary file
Binary file
Binary file
Binary file
Binary file
bigdl/cpp/libs/llama.dll CHANGED
Binary file
Binary file
Binary file
Binary file
Binary file
bigdl/cpp/libs/ollama.exe CHANGED
Binary file
Binary file
Binary file
Binary file
@@ -6,7 +6,7 @@ set "cpp_dir=%cpp_dir:~0,-1%"
6
6
  set "lib_dir=%cpp_dir%\libs"
7
7
 
8
8
  :: Create symlinks for DLLs and EXE
9
- for %%f in (ollama.exe ollama_llama.dll ollama_ggml.dll ollama_llava_shared.dll libc++.dll) do (
9
+ for %%f in (ollama.exe ollama_llama.dll ollama_ggml.dll ollama_llava_shared.dll ollama-ggml-base.dll ollama-ggml-cpu.dll ollama-ggml-sycl.dll libc++.dll) do (
10
10
  if exist "%cd%\%%f" del /f "%cd%\%%f"
11
11
  mklink "%cd%\%%f" "%lib_dir%\%%f"
12
12
  )
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.2
2
2
  Name: bigdl-core-cpp
3
- Version: 2.6.0b20250204
3
+ Version: 2.6.0b20250204.post0
4
4
  Summary: Large Language Model Develop Toolkit
5
5
  Author: BigDL Authors
6
6
  License: Apache License, Version 2.0
@@ -0,0 +1,54 @@
1
+ bigdl/cpp/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
2
+ bigdl/cpp/convert_hf_to_gguf.py,sha256=HVJ6axht-K3DAmmkVkC1T1_vZJ8FprUwfNpXJKnLsUQ,210336
3
+ bigdl/cpp/convert_hf_to_gguf_update.py,sha256=4A9Q4oLh5tZAovmgKgS7bVlcCqcVm4j0SIwGK_lww9s,17004
4
+ bigdl/cpp/convert_llama_ggml_to_gguf.py,sha256=0dKjRhmFzvWV4e-cuLmaeW14JrWUtZwerBmz8mYyMvI,19556
5
+ bigdl/cpp/convert_lora_to_gguf.py,sha256=b2CUmTK-ztrJE_50DzsXK3SRZshr_LYFyUPn0UDnkiA,17270
6
+ bigdl/cpp/gguf-py/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
7
+ bigdl/cpp/gguf-py/gguf/__init__.py,sha256=h5GWs6SMXYR8giWZ7MTZzAc3hYsIJF-HAkdxtgXLOPo,228
8
+ bigdl/cpp/gguf-py/gguf/constants.py,sha256=hN1QPaL74Ef8iJXFE2hlRJmG1w43a2E6HwB0xCxp8ic,61425
9
+ bigdl/cpp/gguf-py/gguf/gguf.py,sha256=QpLc-xU055W2d7CEFvJp2gLIfGO63bdM24ZndZCH6rw,493
10
+ bigdl/cpp/gguf-py/gguf/gguf_reader.py,sha256=PUrx08ZwaUOz1gLw5JQ459Hi7JIeCdlHgZX7wXcTqbI,12702
11
+ bigdl/cpp/gguf-py/gguf/gguf_writer.py,sha256=e-8gwsdq0sipd8zzrXvvtVWV7mCaQD9fRsCn6_67CNs,38541
12
+ bigdl/cpp/gguf-py/gguf/lazy.py,sha256=YIYxGBWD-oKXU4HOvpHs9eiEn81HUgeSmt1mmHJlbdM,8814
13
+ bigdl/cpp/gguf-py/gguf/metadata.py,sha256=oBTb4DXi_h1L_gYm8x_JRVuEPR4GHlVHuM-iN0OxWoY,33244
14
+ bigdl/cpp/gguf-py/gguf/py.typed,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
15
+ bigdl/cpp/gguf-py/gguf/quants.py,sha256=2z6vcK-kBefqZbYNmSEVmdZF_tXHeVb5NC6jCbBdgKc,62040
16
+ bigdl/cpp/gguf-py/gguf/tensor_mapping.py,sha256=tUkpDyeMXwf9M1Cx1HL2g6mMavngsj0UYkeWMaOPjQU,35561
17
+ bigdl/cpp/gguf-py/gguf/utility.py,sha256=LAwLstUlosYQ1oX9LlJZ-1uCmwyNtOFcJfXpkLnju0k,3003
18
+ bigdl/cpp/gguf-py/gguf/vocab.py,sha256=ulUC8XudFDuZC2SNKGNQpsU2KFP_YhkyNLIWV16jG6I,20816
19
+ bigdl/cpp/libs/common.lib,sha256=ELuS4xoqHJYsjeta8zWKSOKDZHqrnAwuWa3blD7O4F8,3955904
20
+ bigdl/cpp/libs/ggml-cpu.dll,sha256=WxBkX7Smps1FOcCaDfzxMAKlUgLTqfKvVb7kEt2GAdo,475648
21
+ bigdl/cpp/libs/ggml-sycl.dll,sha256=y0xMUN5r3kn1idTnWep_HOSp3OEMByiq-MYfFUl5Fms,5280768
22
+ bigdl/cpp/libs/ggml.dll,sha256=A8buG4Wm9lJ4byLJAaqfrjacjigfq5aLuoOMU3Gs0Lw,113152
23
+ bigdl/cpp/libs/libc++.dll,sha256=U0TVK2WfFQIJPP6Bz9SeJmgskm2iqZWJorx_DGdfKIw,1561600
24
+ bigdl/cpp/libs/llama-batched.exe,sha256=OXRvTB7SgBF3kSzluFaZ9T6AYLcRIJ-3fDGBQYMSADM,847872
25
+ bigdl/cpp/libs/llama-bench.exe,sha256=MZM1q--5V9DMr_rQjRpe54Hjbj1hzh6B87QqA_cHS0s,277504
26
+ bigdl/cpp/libs/llama-cli.exe,sha256=SbG_NSl6VY7ChmwRD2RYgxO-QI8tiTfzeoj3tq8WkHE,922624
27
+ bigdl/cpp/libs/llama-embedding.exe,sha256=msgusVBW43uLEBxq26Vjg8KKoiHPBGNiCvQR4s7HnfE,870400
28
+ bigdl/cpp/libs/llama-gguf.exe,sha256=ijuXVYdZSeRcDr35M-DmKS5sJQ0Ab_n_b0z5SQU_x9k,58880
29
+ bigdl/cpp/libs/llama-llava-cli.exe,sha256=fqgtEg_c2uXUTgmGQWJ9pFP0ie75_GkswP56qBWGZ4w,1109504
30
+ bigdl/cpp/libs/llama-lookup.exe,sha256=QCPdyiI8AsFFXjAnX1GohxOiFKqPRLoL7_G9K2ihN7Y,904704
31
+ bigdl/cpp/libs/llama-ls-sycl-device.exe,sha256=IfpDycbUBZgmohCtMUNI3k5hDpg1QkqimyVuYPMk_dw,10240
32
+ bigdl/cpp/libs/llama-minicpmv-cli.exe,sha256=j-Tk3T7Kb6N0ccR2F-UTBPuxjFYHfSs5WbfSc9HAGhQ,1107456
33
+ bigdl/cpp/libs/llama-perplexity.exe,sha256=ZEEjyN3JnTjLBwb2DtM4SOHFbgH-HzdfJlKZX_sQIhw,990720
34
+ bigdl/cpp/libs/llama-quantize.exe,sha256=5bKWjuDw3kbjCuwN0iPOobph6rfU-j0os_dYOFG0zHI,122880
35
+ bigdl/cpp/libs/llama-server.exe,sha256=32cg0YZkZTJQ24Vst7kO3RhCDN1BoOrUQUwBVPJgUes,2148864
36
+ bigdl/cpp/libs/llama-simple.exe,sha256=nIQ7VJ_I1Fqc9gsCDt_BVx8M-lIiLh_byycNPzH6lqw,62464
37
+ bigdl/cpp/libs/llama-speculative.exe,sha256=xeDoKmoJPae-r1FBTApS-z5K1AhWA13lbpUQdqGjSpg,907264
38
+ bigdl/cpp/libs/llama-tokenize.exe,sha256=cJLtVEuYfli42CJYGet0beV6m6pW7r2m44QtgOqlRt0,88576
39
+ bigdl/cpp/libs/llama.dll,sha256=2Wgwo2UBczq2s6SBP9TBZMxJMmr_P8CfobB0NTlSkis,1404928
40
+ bigdl/cpp/libs/llava_shared.dll,sha256=vBVJuzZbXjf0W14a5ZO415Wj1-qiwT_Qcex-v71RjVQ,365056
41
+ bigdl/cpp/libs/ollama-ggml-base.dll,sha256=2yGQxfoge3KDesOWcwa8ncFMvyvg7Jd8pl_pvMN6w5w,459776
42
+ bigdl/cpp/libs/ollama-ggml-cpu.dll,sha256=iQEVXQK_tHmGY4UpMrPPj6G9ssbPxHeJQR0s5rqG6cU,475648
43
+ bigdl/cpp/libs/ollama-ggml-sycl.dll,sha256=Y_0PbkDf48ypCVA71QLXap_Wgbd5n76m4gKQEt64NFk,5280768
44
+ bigdl/cpp/libs/ollama.exe,sha256=tTdj_U3k2XHx4R2e7Z_tTOaqnOtQAJQqWNdo6oxTnHU,25979904
45
+ bigdl/cpp/libs/ollama_ggml.dll,sha256=TPiPOCTK2Lfcf4X03NIVwiYTMPeYAKX9VTmeI9b3RKc,113152
46
+ bigdl/cpp/libs/ollama_llama.dll,sha256=UGD_a9ok5m0vnLGtF2cSHwnQJRAV6XaGDBMMdIT0g00,1427968
47
+ bigdl/cpp/libs/ollama_llava_shared.dll,sha256=3e7S_cVWEvfUUEUYWUmTpeDsMS7Wr5_Hofc4lAUmK_w,365056
48
+ bigdl_core_cpp-2.6.0b20250204.post0.data/scripts/init-llama-cpp.bat,sha256=U0h6RifZxL3GGJp-0dxdZapQIvXUATSj644CURJL-lg,751
49
+ bigdl_core_cpp-2.6.0b20250204.post0.data/scripts/init-llama-cpp.ps1,sha256=JFOylLxO4MKpllHhdbPuJ1xHi9azxDpzdJns8JtZpkU,501
50
+ bigdl_core_cpp-2.6.0b20250204.post0.data/scripts/init-ollama.bat,sha256=DKWd9wFMT7hcNVQViMxG3CnqF4iZXCh2BmuS3zQjAuw,642
51
+ bigdl_core_cpp-2.6.0b20250204.post0.dist-info/METADATA,sha256=mLLb2aUYHIfuJCKqAxZ2Wy1s8hPohkWVoh8m2gGrn90,756
52
+ bigdl_core_cpp-2.6.0b20250204.post0.dist-info/WHEEL,sha256=2wr--P33L_Xt79Mrb57-zn6CrTlNaEVHEwbOduMxJRg,97
53
+ bigdl_core_cpp-2.6.0b20250204.post0.dist-info/top_level.txt,sha256=iGuLfZARD_qANcIMfy0tbbrC3EtCg6BSiH8icc3dLWs,6
54
+ bigdl_core_cpp-2.6.0b20250204.post0.dist-info/RECORD,,