bigdl-core-cpp 2.6.0b20250320__py3-none-win_amd64.whl → 2.6.0b20250322__py3-none-win_amd64.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (47) hide show
  1. bigdl/cpp/convert_hf_to_gguf.py +687 -60
  2. bigdl/cpp/convert_hf_to_gguf_update.py +46 -41
  3. bigdl/cpp/convert_lora_to_gguf.py +33 -5
  4. bigdl/cpp/gguf-py/gguf/constants.py +306 -123
  5. bigdl/cpp/gguf-py/gguf/gguf_writer.py +31 -3
  6. bigdl/cpp/gguf-py/gguf/tensor_mapping.py +122 -25
  7. bigdl/cpp/gguf-py/gguf/utility.py +1 -1
  8. bigdl/cpp/gguf-py/gguf/vocab.py +1 -1
  9. bigdl/cpp/libs/common.lib +0 -0
  10. bigdl/cpp/libs/ggml-base.dll +0 -0
  11. bigdl/cpp/libs/ggml-cpu.dll +0 -0
  12. bigdl/cpp/libs/ggml-sycl.dll +0 -0
  13. bigdl/cpp/libs/ggml.dll +0 -0
  14. bigdl/cpp/libs/llama-batched.exe +0 -0
  15. bigdl/cpp/libs/llama-bench.exe +0 -0
  16. bigdl/cpp/libs/llama-cli.exe +0 -0
  17. bigdl/cpp/libs/llama-embedding.exe +0 -0
  18. bigdl/cpp/libs/llama-gemma3-cli.exe +0 -0
  19. bigdl/cpp/libs/llama-gguf.exe +0 -0
  20. bigdl/cpp/libs/llama-llava-cli.exe +0 -0
  21. bigdl/cpp/libs/llama-lookup.exe +0 -0
  22. bigdl/cpp/libs/llama-ls-sycl-device.exe +0 -0
  23. bigdl/cpp/libs/llama-minicpmv-cli.exe +0 -0
  24. bigdl/cpp/libs/llama-perplexity.exe +0 -0
  25. bigdl/cpp/libs/llama-quantize.exe +0 -0
  26. bigdl/cpp/libs/llama-server.exe +0 -0
  27. bigdl/cpp/libs/llama-simple.exe +0 -0
  28. bigdl/cpp/libs/llama-speculative.exe +0 -0
  29. bigdl/cpp/libs/llama-tokenize.exe +0 -0
  30. bigdl/cpp/libs/llama.dll +0 -0
  31. bigdl/cpp/libs/llava_shared.dll +0 -0
  32. bigdl/cpp/libs/ollama-ggml-base.dll +0 -0
  33. bigdl/cpp/libs/ollama-ggml-cpu.dll +0 -0
  34. bigdl/cpp/libs/ollama-ggml-sycl.dll +0 -0
  35. bigdl/cpp/libs/ollama-lib.exe +0 -0
  36. bigdl/cpp/libs/ollama.exe +0 -0
  37. bigdl/cpp/libs/ollama_ggml.dll +0 -0
  38. bigdl/cpp/libs/ollama_llama.dll +0 -0
  39. bigdl/cpp/libs/ollama_llava_shared.dll +0 -0
  40. {bigdl_core_cpp-2.6.0b20250320.dist-info → bigdl_core_cpp-2.6.0b20250322.dist-info}/METADATA +1 -1
  41. bigdl_core_cpp-2.6.0b20250322.dist-info/RECORD +57 -0
  42. {bigdl_core_cpp-2.6.0b20250320.dist-info → bigdl_core_cpp-2.6.0b20250322.dist-info}/WHEEL +1 -1
  43. bigdl_core_cpp-2.6.0b20250320.dist-info/RECORD +0 -57
  44. {bigdl_core_cpp-2.6.0b20250320.data → bigdl_core_cpp-2.6.0b20250322.data}/scripts/init-llama-cpp.bat +0 -0
  45. {bigdl_core_cpp-2.6.0b20250320.data → bigdl_core_cpp-2.6.0b20250322.data}/scripts/init-llama-cpp.ps1 +0 -0
  46. {bigdl_core_cpp-2.6.0b20250320.data → bigdl_core_cpp-2.6.0b20250322.data}/scripts/init-ollama.bat +0 -0
  47. {bigdl_core_cpp-2.6.0b20250320.dist-info → bigdl_core_cpp-2.6.0b20250322.dist-info}/top_level.txt +0 -0
@@ -8,7 +8,7 @@
8
8
  # provide the necessary information to llama.cpp via the GGUF header in order to implement
9
9
  # the same pre-tokenizer.
10
10
  #
11
- # ref: https://github.com/ggerganov/llama.cpp/pull/6920
11
+ # ref: https://github.com/ggml-org/llama.cpp/pull/6920
12
12
  #
13
13
  # Instructions:
14
14
  #
@@ -65,45 +65,50 @@ else:
65
65
 
66
66
  # TODO: add models here, base models preferred
67
67
  models = [
68
- {"name": "llama-spm", "tokt": TOKENIZER_TYPE.SPM, "repo": "https://huggingface.co/meta-llama/Llama-2-7b-hf", },
69
- {"name": "llama-bpe", "tokt": TOKENIZER_TYPE.BPE, "repo": "https://huggingface.co/meta-llama/Meta-Llama-3-8B", },
70
- {"name": "phi-3", "tokt": TOKENIZER_TYPE.SPM, "repo": "https://huggingface.co/microsoft/Phi-3-mini-4k-instruct", },
71
- {"name": "deepseek-llm", "tokt": TOKENIZER_TYPE.BPE, "repo": "https://huggingface.co/deepseek-ai/deepseek-llm-7b-base", },
72
- {"name": "deepseek-coder", "tokt": TOKENIZER_TYPE.BPE, "repo": "https://huggingface.co/deepseek-ai/deepseek-coder-6.7b-base", },
73
- {"name": "falcon", "tokt": TOKENIZER_TYPE.BPE, "repo": "https://huggingface.co/tiiuae/falcon-7b", },
74
- {"name": "bert-bge", "tokt": TOKENIZER_TYPE.WPM, "repo": "https://huggingface.co/BAAI/bge-small-en-v1.5", },
75
- {"name": "bert-bge-large", "tokt": TOKENIZER_TYPE.BPE, "repo": "https://huggingface.co/BAAI/bge-large-zh-v1.5", },
76
- {"name": "mpt", "tokt": TOKENIZER_TYPE.BPE, "repo": "https://huggingface.co/mosaicml/mpt-7b", },
77
- {"name": "starcoder", "tokt": TOKENIZER_TYPE.BPE, "repo": "https://huggingface.co/bigcode/starcoder2-3b", },
78
- {"name": "gpt-2", "tokt": TOKENIZER_TYPE.BPE, "repo": "https://huggingface.co/openai-community/gpt2", },
79
- {"name": "stablelm2", "tokt": TOKENIZER_TYPE.BPE, "repo": "https://huggingface.co/stabilityai/stablelm-2-zephyr-1_6b", },
80
- {"name": "refact", "tokt": TOKENIZER_TYPE.BPE, "repo": "https://huggingface.co/smallcloudai/Refact-1_6-base", },
81
- {"name": "command-r", "tokt": TOKENIZER_TYPE.BPE, "repo": "https://huggingface.co/CohereForAI/c4ai-command-r-v01", },
82
- {"name": "qwen2", "tokt": TOKENIZER_TYPE.BPE, "repo": "https://huggingface.co/Qwen/Qwen1.5-7B", },
83
- {"name": "olmo", "tokt": TOKENIZER_TYPE.BPE, "repo": "https://huggingface.co/allenai/OLMo-1.7-7B-hf", },
84
- {"name": "dbrx", "tokt": TOKENIZER_TYPE.BPE, "repo": "https://huggingface.co/databricks/dbrx-base", },
85
- {"name": "jina-v1-en", "tokt": TOKENIZER_TYPE.BPE, "repo": "https://huggingface.co/jinaai/jina-reranker-v1-tiny-en", },
86
- {"name": "jina-v2-en", "tokt": TOKENIZER_TYPE.WPM, "repo": "https://huggingface.co/jinaai/jina-embeddings-v2-base-en", }, # WPM!
87
- {"name": "jina-v2-es", "tokt": TOKENIZER_TYPE.BPE, "repo": "https://huggingface.co/jinaai/jina-embeddings-v2-base-es", },
88
- {"name": "jina-v2-de", "tokt": TOKENIZER_TYPE.BPE, "repo": "https://huggingface.co/jinaai/jina-embeddings-v2-base-de", },
89
- {"name": "smaug-bpe", "tokt": TOKENIZER_TYPE.BPE, "repo": "https://huggingface.co/abacusai/Smaug-Llama-3-70B-Instruct", },
90
- {"name": "poro-chat", "tokt": TOKENIZER_TYPE.BPE, "repo": "https://huggingface.co/LumiOpen/Poro-34B-chat", },
91
- {"name": "jina-v2-code", "tokt": TOKENIZER_TYPE.BPE, "repo": "https://huggingface.co/jinaai/jina-embeddings-v2-base-code", },
92
- {"name": "viking", "tokt": TOKENIZER_TYPE.BPE, "repo": "https://huggingface.co/LumiOpen/Viking-7B", }, # Also used for Viking 13B and 33B
93
- {"name": "gemma", "tokt": TOKENIZER_TYPE.SPM, "repo": "https://huggingface.co/google/gemma-2b", },
94
- {"name": "gemma-2", "tokt": TOKENIZER_TYPE.SPM, "repo": "https://huggingface.co/google/gemma-2-9b", },
95
- {"name": "jais", "tokt": TOKENIZER_TYPE.BPE, "repo": "https://huggingface.co/core42/jais-13b", },
96
- {"name": "t5", "tokt": TOKENIZER_TYPE.UGM, "repo": "https://huggingface.co/google-t5/t5-small", },
97
- {"name": "codeshell", "tokt": TOKENIZER_TYPE.BPE, "repo": "https://huggingface.co/WisdomShell/CodeShell-7B", },
98
- {"name": "tekken", "tokt": TOKENIZER_TYPE.BPE, "repo": "https://huggingface.co/mistralai/Mistral-Nemo-Base-2407", },
99
- {"name": "smollm", "tokt": TOKENIZER_TYPE.BPE, "repo": "https://huggingface.co/HuggingFaceTB/SmolLM-135M", },
100
- {'name': "bloom", "tokt": TOKENIZER_TYPE.BPE, "repo": "https://huggingface.co/bigscience/bloom", },
101
- {'name': "gpt3-finnish", "tokt": TOKENIZER_TYPE.BPE, "repo": "https://huggingface.co/TurkuNLP/gpt3-finnish-small", },
102
- {"name": "exaone", "tokt": TOKENIZER_TYPE.BPE, "repo": "https://huggingface.co/LGAI-EXAONE/EXAONE-3.0-7.8B-Instruct", },
103
- {"name": "phi-2", "tokt": TOKENIZER_TYPE.BPE, "repo": "https://huggingface.co/microsoft/phi-2", },
104
- {"name": "chameleon", "tokt": TOKENIZER_TYPE.BPE, "repo": "https://huggingface.co/facebook/chameleon-7b", },
105
- {"name": "minerva-7b", "tokt": TOKENIZER_TYPE.BPE, "repo": "https://huggingface.co/sapienzanlp/Minerva-7B-base-v1.0", },
106
- {"name": "roberta-bpe", "tokt": TOKENIZER_TYPE.BPE, "repo": "https://huggingface.co/sentence-transformers/stsb-roberta-base"},
68
+ {"name": "llama-spm", "tokt": TOKENIZER_TYPE.SPM, "repo": "https://huggingface.co/meta-llama/Llama-2-7b-hf", },
69
+ {"name": "llama-bpe", "tokt": TOKENIZER_TYPE.BPE, "repo": "https://huggingface.co/meta-llama/Meta-Llama-3-8B", },
70
+ {"name": "phi-3", "tokt": TOKENIZER_TYPE.SPM, "repo": "https://huggingface.co/microsoft/Phi-3-mini-4k-instruct", },
71
+ {"name": "deepseek-llm", "tokt": TOKENIZER_TYPE.BPE, "repo": "https://huggingface.co/deepseek-ai/deepseek-llm-7b-base", },
72
+ {"name": "deepseek-coder", "tokt": TOKENIZER_TYPE.BPE, "repo": "https://huggingface.co/deepseek-ai/deepseek-coder-6.7b-base", },
73
+ {"name": "falcon", "tokt": TOKENIZER_TYPE.BPE, "repo": "https://huggingface.co/tiiuae/falcon-7b", },
74
+ {"name": "bert-bge", "tokt": TOKENIZER_TYPE.WPM, "repo": "https://huggingface.co/BAAI/bge-small-en-v1.5", },
75
+ {"name": "falcon3", "tokt": TOKENIZER_TYPE.BPE, "repo": "https://huggingface.co/tiiuae/Falcon3-7B-Base", },
76
+ {"name": "bert-bge-large", "tokt": TOKENIZER_TYPE.BPE, "repo": "https://huggingface.co/BAAI/bge-large-zh-v1.5", },
77
+ {"name": "mpt", "tokt": TOKENIZER_TYPE.BPE, "repo": "https://huggingface.co/mosaicml/mpt-7b", },
78
+ {"name": "starcoder", "tokt": TOKENIZER_TYPE.BPE, "repo": "https://huggingface.co/bigcode/starcoder2-3b", },
79
+ {"name": "gpt-2", "tokt": TOKENIZER_TYPE.BPE, "repo": "https://huggingface.co/openai-community/gpt2", },
80
+ {"name": "stablelm2", "tokt": TOKENIZER_TYPE.BPE, "repo": "https://huggingface.co/stabilityai/stablelm-2-zephyr-1_6b", },
81
+ {"name": "refact", "tokt": TOKENIZER_TYPE.BPE, "repo": "https://huggingface.co/smallcloudai/Refact-1_6-base", },
82
+ {"name": "command-r", "tokt": TOKENIZER_TYPE.BPE, "repo": "https://huggingface.co/CohereForAI/c4ai-command-r-v01", },
83
+ {"name": "qwen2", "tokt": TOKENIZER_TYPE.BPE, "repo": "https://huggingface.co/Qwen/Qwen1.5-7B", },
84
+ {"name": "olmo", "tokt": TOKENIZER_TYPE.BPE, "repo": "https://huggingface.co/allenai/OLMo-1.7-7B-hf", },
85
+ {"name": "dbrx", "tokt": TOKENIZER_TYPE.BPE, "repo": "https://huggingface.co/databricks/dbrx-base", },
86
+ {"name": "jina-v1-en", "tokt": TOKENIZER_TYPE.BPE, "repo": "https://huggingface.co/jinaai/jina-reranker-v1-tiny-en", },
87
+ {"name": "jina-v2-en", "tokt": TOKENIZER_TYPE.WPM, "repo": "https://huggingface.co/jinaai/jina-embeddings-v2-base-en", }, # WPM!
88
+ {"name": "jina-v2-es", "tokt": TOKENIZER_TYPE.BPE, "repo": "https://huggingface.co/jinaai/jina-embeddings-v2-base-es", },
89
+ {"name": "jina-v2-de", "tokt": TOKENIZER_TYPE.BPE, "repo": "https://huggingface.co/jinaai/jina-embeddings-v2-base-de", },
90
+ {"name": "smaug-bpe", "tokt": TOKENIZER_TYPE.BPE, "repo": "https://huggingface.co/abacusai/Smaug-Llama-3-70B-Instruct", },
91
+ {"name": "poro-chat", "tokt": TOKENIZER_TYPE.BPE, "repo": "https://huggingface.co/LumiOpen/Poro-34B-chat", },
92
+ {"name": "jina-v2-code", "tokt": TOKENIZER_TYPE.BPE, "repo": "https://huggingface.co/jinaai/jina-embeddings-v2-base-code", },
93
+ {"name": "viking", "tokt": TOKENIZER_TYPE.BPE, "repo": "https://huggingface.co/LumiOpen/Viking-7B", }, # Also used for Viking 13B and 33B
94
+ {"name": "gemma", "tokt": TOKENIZER_TYPE.SPM, "repo": "https://huggingface.co/google/gemma-2b", },
95
+ {"name": "gemma-2", "tokt": TOKENIZER_TYPE.SPM, "repo": "https://huggingface.co/google/gemma-2-9b", },
96
+ {"name": "jais", "tokt": TOKENIZER_TYPE.BPE, "repo": "https://huggingface.co/core42/jais-13b", },
97
+ {"name": "t5", "tokt": TOKENIZER_TYPE.UGM, "repo": "https://huggingface.co/google-t5/t5-small", },
98
+ {"name": "codeshell", "tokt": TOKENIZER_TYPE.BPE, "repo": "https://huggingface.co/WisdomShell/CodeShell-7B", },
99
+ {"name": "tekken", "tokt": TOKENIZER_TYPE.BPE, "repo": "https://huggingface.co/mistralai/Mistral-Nemo-Base-2407", },
100
+ {"name": "smollm", "tokt": TOKENIZER_TYPE.BPE, "repo": "https://huggingface.co/HuggingFaceTB/SmolLM-135M", },
101
+ {'name': "bloom", "tokt": TOKENIZER_TYPE.BPE, "repo": "https://huggingface.co/bigscience/bloom", },
102
+ {'name': "gpt3-finnish", "tokt": TOKENIZER_TYPE.BPE, "repo": "https://huggingface.co/TurkuNLP/gpt3-finnish-small", },
103
+ {"name": "exaone", "tokt": TOKENIZER_TYPE.BPE, "repo": "https://huggingface.co/LGAI-EXAONE/EXAONE-3.0-7.8B-Instruct", },
104
+ {"name": "phi-2", "tokt": TOKENIZER_TYPE.BPE, "repo": "https://huggingface.co/microsoft/phi-2", },
105
+ {"name": "chameleon", "tokt": TOKENIZER_TYPE.BPE, "repo": "https://huggingface.co/facebook/chameleon-7b", },
106
+ {"name": "minerva-7b", "tokt": TOKENIZER_TYPE.BPE, "repo": "https://huggingface.co/sapienzanlp/Minerva-7B-base-v1.0", },
107
+ {"name": "roberta-bpe", "tokt": TOKENIZER_TYPE.BPE, "repo": "https://huggingface.co/sentence-transformers/stsb-roberta-base"},
108
+ {"name": "gigachat", "tokt": TOKENIZER_TYPE.BPE, "repo": "https://huggingface.co/ai-sage/GigaChat-20B-A3B-instruct"},
109
+ {"name": "megrez", "tokt": TOKENIZER_TYPE.BPE, "repo": "https://huggingface.co/Infinigence/Megrez-3B-Instruct"},
110
+ {"name": "deepseek-v3", "tokt": TOKENIZER_TYPE.BPE, "repo": "https://huggingface.co/deepseek-ai/DeepSeek-V3"},
111
+ {"name": "deepseek-r1-qwen", "tokt": TOKENIZER_TYPE.BPE, "repo": "https://huggingface.co/deepseek-ai/DeepSeek-R1-Distill-Qwen-1.5B"},
107
112
  ]
108
113
 
109
114
 
@@ -241,7 +246,7 @@ src_func = f"""
241
246
  logger.warning("** - the model has not been added to convert_hf_to_gguf_update.py yet")
242
247
  logger.warning("** - the pre-tokenization config has changed upstream")
243
248
  logger.warning("** Check your model files and convert_hf_to_gguf_update.py and update them accordingly.")
244
- logger.warning("** ref: https://github.com/ggerganov/llama.cpp/pull/6920")
249
+ logger.warning("** ref: https://github.com/ggml-org/llama.cpp/pull/6920")
245
250
  logger.warning("**")
246
251
  logger.warning(f"** chkhsh: {{chkhsh}}")
247
252
  logger.warning("**************************************************************************************")
@@ -226,6 +226,9 @@ def get_base_tensor_name(lora_tensor_name: str) -> str:
226
226
  base_name = lora_tensor_name.replace("base_model.model.", "")
227
227
  base_name = base_name.replace(".lora_A.weight", ".weight")
228
228
  base_name = base_name.replace(".lora_B.weight", ".weight")
229
+ # models produced by mergekit-extract-lora have token embeddings in the adapter
230
+ base_name = base_name.replace(".lora_embedding_A", ".weight")
231
+ base_name = base_name.replace(".lora_embedding_B", ".weight")
229
232
  return base_name
230
233
 
231
234
 
@@ -260,6 +263,10 @@ def parse_args() -> argparse.Namespace:
260
263
  "--base", type=Path,
261
264
  help="directory containing Hugging Face model config files (config.json, tokenizer.json) for the base model that the adapter is based on - only config is needed, actual model weights are not required. If base model is unspecified, it will be loaded from Hugging Face hub based on the adapter config",
262
265
  )
266
+ parser.add_argument(
267
+ "--base-model-id", type=str,
268
+ help="the model ID of the base model, if it is not available locally or in the adapter config. If specified, it will ignore --base and load the base model config from the Hugging Face hub (Example: 'meta-llama/Llama-3.2-1B-Instruct')",
269
+ )
263
270
  parser.add_argument(
264
271
  "lora_path", type=Path,
265
272
  help="directory containing Hugging Face PEFT LoRA config (adapter_model.json) and weights (adapter_model.safetensors or adapter_model.bin)",
@@ -290,6 +297,7 @@ if __name__ == '__main__':
290
297
 
291
298
  dir_base_model: Path | None = args.base
292
299
  dir_lora: Path = args.lora_path
300
+ base_model_id: str | None = args.base_model_id
293
301
  lora_config = dir_lora / "adapter_config.json"
294
302
  input_model = dir_lora / "adapter_model.safetensors"
295
303
 
@@ -313,7 +321,10 @@ if __name__ == '__main__':
313
321
  lparams: dict[str, Any] = json.load(f)
314
322
 
315
323
  # load base model
316
- if dir_base_model is None:
324
+ if base_model_id is not None:
325
+ logger.info(f"Loading base model from Hugging Face: {base_model_id}")
326
+ hparams = load_hparams_from_hf(base_model_id)
327
+ elif dir_base_model is None:
317
328
  if "base_model_name_or_path" in lparams:
318
329
  model_id = lparams["base_model_name_or_path"]
319
330
  logger.info(f"Loading base model from Hugging Face: {model_id}")
@@ -371,15 +382,20 @@ if __name__ == '__main__':
371
382
  if self.lazy:
372
383
  tensor = LazyTorchTensor.from_eager(tensor)
373
384
  base_name = get_base_tensor_name(name)
374
- is_lora_a = ".lora_A.weight" in name
375
- is_lora_b = ".lora_B.weight" in name
385
+ # note: mergekit-extract-lora also adds token embeddings to the adapter
386
+ is_lora_a = ".lora_A.weight" in name or ".lora_embedding_A" in name
387
+ is_lora_b = ".lora_B.weight" in name or ".lora_embedding_B" in name
376
388
  if not is_lora_a and not is_lora_b:
377
389
  if ".base_layer.weight" in name:
378
390
  continue
391
+ # mergekit-extract-lora add these layernorm to the adapter, we need to keep them
392
+ if "_layernorm" in name or ".norm" in name:
393
+ yield (base_name, tensor)
394
+ continue
379
395
  logger.error(f"Unexpected name '{name}': Not a lora_A or lora_B tensor")
380
396
  if ".embed_tokens.weight" in name or ".lm_head.weight" in name:
381
397
  logger.error("Embeddings is present in the adapter. This can be due to new tokens added during fine tuning")
382
- logger.error("Please refer to https://github.com/ggerganov/llama.cpp/pull/9948")
398
+ logger.error("Please refer to https://github.com/ggml-org/llama.cpp/pull/9948")
383
399
  sys.exit(1)
384
400
 
385
401
  if base_name in tensor_map:
@@ -403,13 +419,25 @@ if __name__ == '__main__':
403
419
  # some archs may have the same tensor for lm_head and output (tie word embeddings)
404
420
  # in this case, adapters targeting lm_head will fail when using llama-export-lora
405
421
  # therefore, we ignore them for now
406
- # see: https://github.com/ggerganov/llama.cpp/issues/9065
422
+ # see: https://github.com/ggml-org/llama.cpp/issues/9065
407
423
  if name == "lm_head.weight" and len(dest) == 0:
408
424
  raise ValueError("lm_head is present in adapter, but is ignored in base model")
409
425
  for dest_name, dest_data in dest:
426
+ # mergekit-extract-lora add these layernorm to the adapter
427
+ if "_norm" in dest_name:
428
+ assert dest_data.dim() == 1
429
+ yield (dest_name, dest_data)
430
+ continue
431
+
432
+ # otherwise, we must get the lora_A and lora_B tensors
410
433
  assert isinstance(dest_data, LoraTorchTensor)
411
434
  lora_a, lora_b = dest_data.get_lora_A_B()
412
435
 
436
+ # note: mergekit-extract-lora flip and transpose A and B
437
+ # here we only need to transpose token_embd.lora_a, see llm_build_inp_embd()
438
+ if "token_embd.weight" in dest_name:
439
+ lora_a = lora_a.T
440
+
413
441
  yield (dest_name + ".lora_a", lora_a)
414
442
  yield (dest_name + ".lora_b", lora_b)
415
443