bigdl-core-cpp 2.5.0rc1__py3-none-win_amd64.whl → 2.6.0b2__py3-none-win_amd64.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (85) hide show
  1. bigdl/cpp/{convert-hf-to-gguf.py → convert_hf_to_gguf.py} +413 -67
  2. bigdl/cpp/convert_hf_to_gguf_update.py +354 -0
  3. bigdl/cpp/convert_llama_ggml_to_gguf.py +454 -0
  4. bigdl/cpp/convert_lora_to_gguf.py +393 -0
  5. bigdl/cpp/gguf-py/gguf/__init__.py +1 -1
  6. bigdl/cpp/gguf-py/gguf/constants.py +71 -2
  7. bigdl/cpp/gguf-py/gguf/gguf_writer.py +16 -1
  8. bigdl/cpp/gguf-py/gguf/lazy.py +4 -1
  9. bigdl/cpp/gguf-py/gguf/metadata.py +70 -63
  10. bigdl/cpp/gguf-py/gguf/quants.py +1129 -64
  11. bigdl/cpp/gguf-py/gguf/tensor_mapping.py +23 -15
  12. bigdl/cpp/gguf-py/gguf/utility.py +1 -1
  13. bigdl/cpp/gguf-py/gguf/vocab.py +301 -1
  14. bigdl/cpp/libs/common.lib +0 -0
  15. bigdl/cpp/libs/dist/windows-amd64/lib/ollama/runners/cpu/ggml.dll +0 -0
  16. bigdl/cpp/libs/dist/windows-amd64/lib/ollama/runners/cpu/llama.dll +0 -0
  17. bigdl/cpp/libs/dist/windows-amd64/lib/ollama/runners/cpu/ollama_llama_server.exe +0 -0
  18. bigdl/cpp/libs/dist/windows-amd64/lib/ollama/runners/cpu_avx/ggml.dll +0 -0
  19. bigdl/cpp/libs/dist/windows-amd64/lib/ollama/runners/cpu_avx/llama.dll +0 -0
  20. bigdl/cpp/libs/dist/windows-amd64/lib/ollama/runners/cpu_avx/ollama_llama_server.exe +0 -0
  21. bigdl/cpp/libs/dist/windows-amd64/lib/ollama/runners/cpu_avx2/ggml.dll +0 -0
  22. bigdl/cpp/libs/dist/windows-amd64/lib/ollama/runners/cpu_avx2/llama.dll +0 -0
  23. bigdl/cpp/libs/dist/windows-amd64/lib/ollama/runners/cpu_avx2/ollama_llama_server.exe +0 -0
  24. bigdl/cpp/libs/ggml.dll +0 -0
  25. bigdl/cpp/libs/llama-batched.exe +0 -0
  26. bigdl/cpp/libs/llama-bench.exe +0 -0
  27. bigdl/cpp/libs/llama-cli.exe +0 -0
  28. bigdl/cpp/libs/llama-embedding.exe +0 -0
  29. bigdl/cpp/libs/llama-gguf.exe +0 -0
  30. bigdl/cpp/libs/llama-llava-cli.exe +0 -0
  31. bigdl/cpp/libs/llama-lookup.exe +0 -0
  32. bigdl/cpp/libs/{ls-sycl-device.exe → llama-ls-sycl-device.exe} +0 -0
  33. bigdl/cpp/libs/llama-minicpmv-cli.exe +0 -0
  34. bigdl/cpp/libs/llama-perplexity.exe +0 -0
  35. bigdl/cpp/libs/llama-quantize.exe +0 -0
  36. bigdl/cpp/libs/llama-server.exe +0 -0
  37. bigdl/cpp/libs/llama-simple.exe +0 -0
  38. bigdl/cpp/libs/llama-speculative.exe +0 -0
  39. bigdl/cpp/libs/llama-tokenize.exe +0 -0
  40. bigdl/cpp/libs/llama.dll +0 -0
  41. bigdl/cpp/libs/llava_shared.dll +0 -0
  42. bigdl/cpp/libs/ollama.exe +0 -0
  43. {bigdl_core_cpp-2.5.0rc1.data → bigdl_core_cpp-2.6.0b2.data}/scripts/init-llama-cpp.bat +7 -2
  44. {bigdl_core_cpp-2.5.0rc1.data → bigdl_core_cpp-2.6.0b2.data}/scripts/init-ollama.bat +6 -0
  45. {bigdl_core_cpp-2.5.0rc1.dist-info → bigdl_core_cpp-2.6.0b2.dist-info}/METADATA +3 -3
  46. bigdl_core_cpp-2.6.0b2.dist-info/RECORD +54 -0
  47. {bigdl_core_cpp-2.5.0rc1.dist-info → bigdl_core_cpp-2.6.0b2.dist-info}/WHEEL +1 -1
  48. bigdl/cpp/convert.py +0 -1714
  49. bigdl/cpp/libs/baby-llama.exe +0 -0
  50. bigdl/cpp/libs/batched-bench.exe +0 -0
  51. bigdl/cpp/libs/batched.exe +0 -0
  52. bigdl/cpp/libs/beam-search.exe +0 -0
  53. bigdl/cpp/libs/benchmark.exe +0 -0
  54. bigdl/cpp/libs/convert-llama2c-to-ggml.exe +0 -0
  55. bigdl/cpp/libs/dist/windows-amd64/ollama_runners/cpu/ollama_llama_server.exe +0 -0
  56. bigdl/cpp/libs/dist/windows-amd64/ollama_runners/cpu_avx/ollama_llama_server.exe +0 -0
  57. bigdl/cpp/libs/dist/windows-amd64/ollama_runners/cpu_avx2/ollama_llama_server.exe +0 -0
  58. bigdl/cpp/libs/embedding.exe +0 -0
  59. bigdl/cpp/libs/export-lora.exe +0 -0
  60. bigdl/cpp/libs/finetune.exe +0 -0
  61. bigdl/cpp/libs/ggml_shared.dll +0 -0
  62. bigdl/cpp/libs/gguf.exe +0 -0
  63. bigdl/cpp/libs/gritlm.exe +0 -0
  64. bigdl/cpp/libs/imatrix.exe +0 -0
  65. bigdl/cpp/libs/infill.exe +0 -0
  66. bigdl/cpp/libs/llava-cli.exe +0 -0
  67. bigdl/cpp/libs/lookahead.exe +0 -0
  68. bigdl/cpp/libs/lookup.exe +0 -0
  69. bigdl/cpp/libs/main.exe +0 -0
  70. bigdl/cpp/libs/parallel.exe +0 -0
  71. bigdl/cpp/libs/passkey.exe +0 -0
  72. bigdl/cpp/libs/perplexity.exe +0 -0
  73. bigdl/cpp/libs/q8dot.exe +0 -0
  74. bigdl/cpp/libs/quantize-stats.exe +0 -0
  75. bigdl/cpp/libs/quantize.exe +0 -0
  76. bigdl/cpp/libs/save-load-state.exe +0 -0
  77. bigdl/cpp/libs/server.exe +0 -0
  78. bigdl/cpp/libs/simple.exe +0 -0
  79. bigdl/cpp/libs/speculative.exe +0 -0
  80. bigdl/cpp/libs/tokenize.exe +0 -0
  81. bigdl/cpp/libs/train-text-from-scratch.exe +0 -0
  82. bigdl/cpp/libs/vdot.exe +0 -0
  83. bigdl_core_cpp-2.5.0rc1.dist-info/RECORD +0 -63
  84. {bigdl_core_cpp-2.5.0rc1.data → bigdl_core_cpp-2.6.0b2.data}/scripts/init-llama-cpp.ps1 +0 -0
  85. {bigdl_core_cpp-2.5.0rc1.dist-info → bigdl_core_cpp-2.6.0b2.dist-info}/top_level.txt +0 -0
@@ -10,10 +10,10 @@ class TensorNameMap:
10
10
  # Token embeddings
11
11
  MODEL_TENSOR.TOKEN_EMBD: (
12
12
  "gpt_neox.embed_in", # gptneox
13
- "transformer.wte", # gpt2 gpt-j mpt refact qwen dbrx jais
13
+ "transformer.wte", # gpt2 gpt-j mpt refact qwen dbrx jais exaone
14
14
  "transformer.word_embeddings", # falcon
15
15
  "word_embeddings", # bloom
16
- "model.embed_tokens", # llama-hf
16
+ "model.embed_tokens", # llama-hf nemotron
17
17
  "tok_embeddings", # llama-pth
18
18
  "embeddings.word_embeddings", # bert nomic-bert
19
19
  "language_model.embedding.word_embeddings", # persimmon
@@ -52,7 +52,7 @@ class TensorNameMap:
52
52
  # Output
53
53
  MODEL_TENSOR.OUTPUT: (
54
54
  "embed_out", # gptneox
55
- "lm_head", # gpt2 mpt falcon llama-hf baichuan qwen mamba dbrx jais
55
+ "lm_head", # gpt2 mpt falcon llama-hf baichuan qwen mamba dbrx jais nemotron exaone
56
56
  "output", # llama-pth bloom internlm2
57
57
  "word_embeddings_for_head", # persimmon
58
58
  "lm_head.linear", # phi2
@@ -62,7 +62,7 @@ class TensorNameMap:
62
62
  # Output norm
63
63
  MODEL_TENSOR.OUTPUT_NORM: (
64
64
  "gpt_neox.final_layer_norm", # gptneox
65
- "transformer.ln_f", # gpt2 gpt-j falcon jais
65
+ "transformer.ln_f", # gpt2 gpt-j falcon jais exaone
66
66
  "model.norm", # llama-hf baichuan internlm2
67
67
  "norm", # llama-pth
68
68
  "transformer.norm_f", # mpt dbrx
@@ -75,6 +75,7 @@ class TensorNameMap:
75
75
  "transformer.rms_norm", # Grok
76
76
  "encoder.final_layernorm", # chatglm
77
77
  "transformer.norm", # openelm
78
+ "model.norm", # nemotron
78
79
  ),
79
80
 
80
81
  # Rope frequencies
@@ -88,12 +89,12 @@ class TensorNameMap:
88
89
  # Attention norm
89
90
  MODEL_TENSOR.ATTN_NORM: (
90
91
  "gpt_neox.layers.{bid}.input_layernorm", # gptneox
91
- "transformer.h.{bid}.ln_1", # gpt2 gpt-j refact qwen jais
92
+ "transformer.h.{bid}.ln_1", # gpt2 gpt-j refact qwen jais exaone
92
93
  "transformer.blocks.{bid}.norm_1", # mpt
93
94
  "transformer.h.{bid}.input_layernorm", # falcon7b
94
95
  "h.{bid}.input_layernorm", # bloom
95
96
  "transformer.h.{bid}.ln_mlp", # falcon40b
96
- "model.layers.{bid}.input_layernorm", # llama-hf
97
+ "model.layers.{bid}.input_layernorm", # llama-hf nemotron
97
98
  "layers.{bid}.attention_norm", # llama-pth
98
99
  "language_model.encoder.layers.{bid}.input_layernorm", # persimmon
99
100
  "model.layers.{bid}.ln1", # yi
@@ -135,18 +136,19 @@ class TensorNameMap:
135
136
 
136
137
  # Attention query
137
138
  MODEL_TENSOR.ATTN_Q: (
138
- "model.layers.{bid}.self_attn.q_proj", # llama-hf
139
+ "model.layers.{bid}.self_attn.q_proj", # llama-hf nemotron
139
140
  "layers.{bid}.attention.wq", # llama-pth
140
141
  "encoder.layer.{bid}.attention.self.query", # bert
141
142
  "transformer.h.{bid}.attn.q_proj", # gpt-j
142
143
  "model.layers.layers.{bid}.self_attn.q_proj", # plamo
143
144
  "model.layers.{bid}.attention.wq", # internlm2
144
145
  "transformer.decoder_layer.{bid}.multi_head_attention.query",# Grok
146
+ "transformer.h.{bid}.attn.attention.q_proj", # exaone
145
147
  ),
146
148
 
147
149
  # Attention key
148
150
  MODEL_TENSOR.ATTN_K: (
149
- "model.layers.{bid}.self_attn.k_proj", # llama-hf
151
+ "model.layers.{bid}.self_attn.k_proj", # llama-hf nemotron
150
152
  "layers.{bid}.attention.wk", # llama-pth
151
153
  "encoder.layer.{bid}.attention.self.key", # bert
152
154
  "transformer.h.{bid}.attn.k_proj", # gpt-j
@@ -154,18 +156,20 @@ class TensorNameMap:
154
156
  "model.layers.layers.{bid}.self_attn.k_proj", # plamo
155
157
  "model.layers.{bid}.attention.wk", # internlm2
156
158
  "transformer.decoder_layer.{bid}.multi_head_attention.key",# Grok
159
+ "transformer.h.{bid}.attn.attention.k_proj", # exaone
157
160
  ),
158
161
 
159
162
  # Attention value
160
163
  MODEL_TENSOR.ATTN_V: (
161
- "model.layers.{bid}.self_attn.v_proj", # llama-hf
164
+ "model.layers.{bid}.self_attn.v_proj", # llama-hf nemotron
162
165
  "layers.{bid}.attention.wv", # llama-pth
163
166
  "encoder.layer.{bid}.attention.self.value", # bert
164
167
  "transformer.h.{bid}.attn.v_proj", # gpt-j
165
168
  "transformer.h.{bid}.attn.v", # refact
166
169
  "model.layers.layers.{bid}.self_attn.v_proj", # plamo
167
170
  "model.layers.{bid}.attention.wv", # internlm2
168
- "transformer.decoder_layer.{bid}.multi_head_attention.value" # Grok
171
+ "transformer.decoder_layer.{bid}.multi_head_attention.value",# Grok
172
+ "transformer.h.{bid}.attn.attention.v_proj", # exaone
169
173
  ),
170
174
 
171
175
  # Attention output
@@ -175,7 +179,7 @@ class TensorNameMap:
175
179
  "transformer.blocks.{bid}.attn.out_proj", # mpt
176
180
  "transformer.h.{bid}.self_attention.dense", # falcon
177
181
  "h.{bid}.self_attention.dense", # bloom
178
- "model.layers.{bid}.self_attn.o_proj", # llama-hf
182
+ "model.layers.{bid}.self_attn.o_proj", # llama-hf nemotron
179
183
  "layers.{bid}.attention.wo", # llama-pth
180
184
  "encoder.layer.{bid}.attention.output.dense", # bert
181
185
  "transformer.h.{bid}.attn.out_proj", # gpt-j
@@ -190,6 +194,7 @@ class TensorNameMap:
190
194
  "transformer.blocks.{bid}.norm_attn_norm.attn.out_proj", # dbrx
191
195
  "encoder.layers.{bid}.self_attention.dense", # chatglm
192
196
  "transformer.layers.{bid}.attn.out_proj", # openelm
197
+ "transformer.h.{bid}.attn.attention.out_proj", # exaone
193
198
  ),
194
199
 
195
200
  # Attention output norm
@@ -215,10 +220,10 @@ class TensorNameMap:
215
220
  # Feed-forward norm
216
221
  MODEL_TENSOR.FFN_NORM: (
217
222
  "gpt_neox.layers.{bid}.post_attention_layernorm", # gptneox
218
- "transformer.h.{bid}.ln_2", # gpt2 refact qwen jais
223
+ "transformer.h.{bid}.ln_2", # gpt2 refact qwen jais exaone
219
224
  "h.{bid}.post_attention_layernorm", # bloom
220
225
  "transformer.blocks.{bid}.norm_2", # mpt
221
- "model.layers.{bid}.post_attention_layernorm", # llama-hf
226
+ "model.layers.{bid}.post_attention_layernorm", # llama-hf nemotron
222
227
  "layers.{bid}.ffn_norm", # llama-pth
223
228
  "language_model.encoder.layers.{bid}.post_attention_layernorm", # persimmon
224
229
  "model.layers.{bid}.ln2", # yi
@@ -258,7 +263,7 @@ class TensorNameMap:
258
263
  "transformer.blocks.{bid}.ffn.up_proj", # mpt
259
264
  "transformer.h.{bid}.mlp.dense_h_to_4h", # falcon
260
265
  "h.{bid}.mlp.dense_h_to_4h", # bloom
261
- "model.layers.{bid}.mlp.up_proj", # llama-hf refact
266
+ "model.layers.{bid}.mlp.up_proj", # llama-hf refact nemotron
262
267
  "layers.{bid}.feed_forward.w3", # llama-pth
263
268
  "encoder.layer.{bid}.intermediate.dense", # bert
264
269
  "transformer.h.{bid}.mlp.fc_in", # gpt-j
@@ -277,6 +282,7 @@ class TensorNameMap:
277
282
  "encoder.layer.{bid}.mlp.gated_layers_v", # jina-bert-v2
278
283
  "model.layers.{bid}.residual_mlp.w3", # arctic
279
284
  "encoder.layers.{bid}.mlp.dense_h_to_4h", # chatglm
285
+ "transformer.h.{bid}.mlp.c_fc_1", # exaone
280
286
  ),
281
287
 
282
288
  MODEL_TENSOR.FFN_UP_EXP: (
@@ -308,6 +314,7 @@ class TensorNameMap:
308
314
  "encoder.layer.{bid}.mlp.gated_layers_w", # jina-bert-v2
309
315
  "transformer.h.{bid}.mlp.linear_1", # refact
310
316
  "model.layers.{bid}.residual_mlp.w1", # arctic
317
+ "transformer.h.{bid}.mlp.c_fc_0", # exaone
311
318
  ),
312
319
 
313
320
  MODEL_TENSOR.FFN_GATE_EXP: (
@@ -329,7 +336,7 @@ class TensorNameMap:
329
336
  "transformer.blocks.{bid}.ffn.down_proj", # mpt
330
337
  "transformer.h.{bid}.mlp.dense_4h_to_h", # falcon
331
338
  "h.{bid}.mlp.dense_4h_to_h", # bloom
332
- "model.layers.{bid}.mlp.down_proj", # llama-hf
339
+ "model.layers.{bid}.mlp.down_proj", # llama-hf nemotron
333
340
  "layers.{bid}.feed_forward.w2", # llama-pth
334
341
  "encoder.layer.{bid}.output.dense", # bert
335
342
  "transformer.h.{bid}.mlp.fc_out", # gpt-j
@@ -347,6 +354,7 @@ class TensorNameMap:
347
354
  "model.layers.{bid}.residual_mlp.w2", # arctic
348
355
  "encoder.layer.{bid}.mlp.down_layer", # jina-bert-v2
349
356
  "encoder.layers.{bid}.mlp.dense_4h_to_h", # chatglm
357
+ "model.layers.h.{bid}.mlp.c_proj", # exaone
350
358
  ),
351
359
 
352
360
  MODEL_TENSOR.FFN_DOWN_EXP: (
@@ -66,4 +66,4 @@ def naming_convention(model_name: str | None, base_name: str | None, finetune_st
66
66
 
67
67
  kind = f"-{model_type.strip().replace(' ', '-')}" if model_type is not None else ""
68
68
 
69
- return f"{name}{parameters}{finetune}{version}{encoding}{kind}"
69
+ return f"{name}{parameters}{finetune}{version}{encoding}{kind}"
@@ -1,10 +1,15 @@
1
1
  from __future__ import annotations
2
2
 
3
+ import re
3
4
  import logging
4
5
  import json
5
6
  import os
6
7
  from pathlib import Path
7
- from typing import Any, Callable, Sequence, Mapping, Iterable
8
+ from typing import Any, Callable, Sequence, Mapping, Iterable, Protocol, ClassVar, runtime_checkable
9
+
10
+ from sentencepiece import SentencePieceProcessor
11
+
12
+ import gguf
8
13
 
9
14
  from .gguf_writer import GGUFWriter
10
15
 
@@ -163,3 +168,298 @@ class SpecialVocab:
163
168
  for typ in self.special_token_types:
164
169
  self._set_special_token(typ, config.get(f'{typ}_token_id'))
165
170
  return True
171
+
172
+
173
+ @runtime_checkable
174
+ class BaseVocab(Protocol):
175
+ tokenizer_model: ClassVar[str]
176
+ name: ClassVar[str]
177
+
178
+
179
+ @runtime_checkable
180
+ class Vocab(BaseVocab, Protocol):
181
+ vocab_size: int
182
+ added_tokens_dict: dict[str, int]
183
+ added_tokens_list: list[str]
184
+ fname_tokenizer: Path
185
+
186
+ def __init__(self, base_path: Path): ...
187
+ def all_tokens(self) -> Iterable[tuple[bytes, float, gguf.TokenType]]: ...
188
+
189
+
190
+ class NoVocab(BaseVocab):
191
+ tokenizer_model = "no_vocab"
192
+ name = "no_vocab"
193
+
194
+ def __repr__(self) -> str:
195
+ return "<NoVocab for a model without integrated vocabulary>"
196
+
197
+
198
+ class BpeVocab(Vocab):
199
+ tokenizer_model = "gpt2"
200
+ name = "bpe"
201
+
202
+ def __init__(self, base_path: Path):
203
+ added_tokens: dict[str, int] = {}
204
+
205
+ if (fname_tokenizer := base_path / 'vocab.json').exists():
206
+ # "slow" tokenizer
207
+ with open(fname_tokenizer, encoding="utf-8") as f:
208
+ self.vocab = json.load(f)
209
+
210
+ try:
211
+ # FIXME: Verify that added tokens here _cannot_ overlap with the main vocab.
212
+ with open(base_path / 'added_tokens.json', encoding="utf-8") as f:
213
+ added_tokens = json.load(f)
214
+ except FileNotFoundError:
215
+ pass
216
+ else:
217
+ # "fast" tokenizer
218
+ fname_tokenizer = base_path / 'tokenizer.json'
219
+
220
+ # if this fails, FileNotFoundError propagates to caller
221
+ with open(fname_tokenizer, encoding="utf-8") as f:
222
+ tokenizer_json = json.load(f)
223
+
224
+ tokenizer_model: dict[str, Any] = tokenizer_json['model']
225
+ if (
226
+ tokenizer_model['type'] != 'BPE' or tokenizer_model.get('byte_fallback', False)
227
+ or tokenizer_json['decoder']['type'] != 'ByteLevel'
228
+ ):
229
+ raise FileNotFoundError('Cannot find GPT-2 BPE tokenizer')
230
+
231
+ self.vocab = tokenizer_model["vocab"]
232
+
233
+ if (added := tokenizer_json.get('added_tokens')) is not None:
234
+ # Added tokens here can be duplicates of the main vocabulary.
235
+ added_tokens = {item['content']: item['id']
236
+ for item in added
237
+ if item['content'] not in self.vocab}
238
+
239
+ vocab_size = len(self.vocab)
240
+ expected_ids = list(range(vocab_size, vocab_size + len(added_tokens)))
241
+ actual_ids = sorted(added_tokens.values())
242
+ if expected_ids != actual_ids:
243
+ expected_end_id = vocab_size + len(actual_ids) - 1
244
+ raise ValueError(f"Expected the {len(actual_ids)} added token ID(s) to be sequential in the range "
245
+ f"{vocab_size} - {expected_end_id}; got {actual_ids}")
246
+
247
+ items = sorted(added_tokens.items(), key=lambda text_idx: text_idx[1])
248
+ self.added_tokens_dict = added_tokens
249
+ self.added_tokens_list = [text for (text, idx) in items]
250
+ self.vocab_size_base = vocab_size
251
+ self.vocab_size = self.vocab_size_base + len(self.added_tokens_list)
252
+ self.fname_tokenizer = fname_tokenizer
253
+
254
+ def bpe_tokens(self) -> Iterable[tuple[bytes, float, gguf.TokenType]]:
255
+ reverse_vocab = {id: encoded_tok for encoded_tok, id in self.vocab.items()}
256
+
257
+ for i, _ in enumerate(self.vocab):
258
+ yield reverse_vocab[i], 0.0, gguf.TokenType.NORMAL
259
+
260
+ def added_tokens(self) -> Iterable[tuple[bytes, float, gguf.TokenType]]:
261
+ for text in self.added_tokens_list:
262
+ score = -1000.0
263
+ yield text.encode("utf-8"), score, gguf.TokenType.CONTROL
264
+
265
+ def all_tokens(self) -> Iterable[tuple[bytes, float, gguf.TokenType]]:
266
+ yield from self.bpe_tokens()
267
+ yield from self.added_tokens()
268
+
269
+ def __repr__(self) -> str:
270
+ return f"<BpeVocab with {self.vocab_size_base} base tokens and {len(self.added_tokens_list)} added tokens>"
271
+
272
+
273
+ class SentencePieceVocab(Vocab):
274
+ tokenizer_model = "llama"
275
+ name = "spm"
276
+
277
+ def __init__(self, base_path: Path):
278
+ added_tokens: dict[str, int] = {}
279
+ if (fname_tokenizer := base_path / 'tokenizer.model').exists():
280
+ # normal location
281
+ try:
282
+ with open(base_path / 'added_tokens.json', encoding="utf-8") as f:
283
+ added_tokens = json.load(f)
284
+ except FileNotFoundError:
285
+ pass
286
+ elif not (fname_tokenizer := base_path.parent / 'tokenizer.model').exists():
287
+ # not found in alternate location either
288
+ raise FileNotFoundError('Cannot find tokenizer.model')
289
+
290
+ self.sentencepiece_tokenizer = SentencePieceProcessor()
291
+ self.sentencepiece_tokenizer.LoadFromFile(str(fname_tokenizer))
292
+ vocab_size = self.sentencepiece_tokenizer.vocab_size()
293
+
294
+ new_tokens = {id: piece for piece, id in added_tokens.items() if id >= vocab_size}
295
+ expected_new_ids = list(range(vocab_size, vocab_size + len(new_tokens)))
296
+ actual_new_ids = sorted(new_tokens.keys())
297
+
298
+ if expected_new_ids != actual_new_ids:
299
+ raise ValueError(f"Expected new token IDs {expected_new_ids} to be sequential; got {actual_new_ids}")
300
+
301
+ # Token pieces that were added to the base vocabulary.
302
+ self.added_tokens_dict = added_tokens
303
+ self.added_tokens_list = [new_tokens[id] for id in actual_new_ids]
304
+ self.vocab_size_base = vocab_size
305
+ self.vocab_size = self.vocab_size_base + len(self.added_tokens_list)
306
+ self.fname_tokenizer = fname_tokenizer
307
+
308
+ def sentencepiece_tokens(self) -> Iterable[tuple[bytes, float, gguf.TokenType]]:
309
+ tokenizer = self.sentencepiece_tokenizer
310
+ for i in range(tokenizer.vocab_size()):
311
+ piece = tokenizer.IdToPiece(i)
312
+ text = piece.encode("utf-8")
313
+ score: float = tokenizer.GetScore(i)
314
+
315
+ toktype = gguf.TokenType.NORMAL
316
+ if tokenizer.IsUnknown(i):
317
+ toktype = gguf.TokenType.UNKNOWN
318
+ if tokenizer.IsControl(i):
319
+ toktype = gguf.TokenType.CONTROL
320
+
321
+ # NOTE: I think added_tokens are user defined.
322
+ # ref: https://github.com/google/sentencepiece/blob/master/src/sentencepiece_model.proto
323
+ # if tokenizer.is_user_defined(i): toktype = gguf.TokenType.USER_DEFINED
324
+
325
+ if tokenizer.IsUnused(i):
326
+ toktype = gguf.TokenType.UNUSED
327
+ if tokenizer.IsByte(i):
328
+ toktype = gguf.TokenType.BYTE
329
+
330
+ yield text, score, toktype
331
+
332
+ def added_tokens(self) -> Iterable[tuple[bytes, float, gguf.TokenType]]:
333
+ for text in self.added_tokens_list:
334
+ score = -1000.0
335
+ yield text.encode("utf-8"), score, gguf.TokenType.USER_DEFINED
336
+
337
+ def all_tokens(self) -> Iterable[tuple[bytes, float, gguf.TokenType]]:
338
+ yield from self.sentencepiece_tokens()
339
+ yield from self.added_tokens()
340
+
341
+ def __repr__(self) -> str:
342
+ return f"<SentencePieceVocab with {self.vocab_size_base} base tokens and {len(self.added_tokens_list)} added tokens>"
343
+
344
+
345
+ class LlamaHfVocab(Vocab):
346
+ tokenizer_model = "llama"
347
+ name = "hfft"
348
+
349
+ def __init__(self, base_path: Path):
350
+ fname_tokenizer = base_path / 'tokenizer.json'
351
+ # if this fails, FileNotFoundError propagates to caller
352
+ with open(fname_tokenizer, encoding='utf-8') as f:
353
+ tokenizer_json = json.load(f)
354
+
355
+ # pre-check so we know if we need transformers
356
+ tokenizer_model: dict[str, Any] = tokenizer_json['model']
357
+ is_llama3 = (
358
+ tokenizer_model['type'] == 'BPE' and tokenizer_model.get('ignore_merges', False)
359
+ and not tokenizer_model.get('byte_fallback', True)
360
+ )
361
+ if is_llama3:
362
+ raise TypeError('Llama 3 must be converted with BpeVocab')
363
+
364
+ if not is_llama3 and (
365
+ tokenizer_model['type'] != 'BPE' or not tokenizer_model.get('byte_fallback', False)
366
+ or tokenizer_json['decoder']['type'] != 'Sequence'
367
+ ):
368
+ raise FileNotFoundError('Cannot find Llama BPE tokenizer')
369
+
370
+ try:
371
+ from transformers import AutoTokenizer
372
+ except ImportError as e:
373
+ raise ImportError(
374
+ "To use LlamaHfVocab, please install the `transformers` package. "
375
+ "You can install it with `pip install transformers`."
376
+ ) from e
377
+
378
+ # Allow the tokenizer to default to slow or fast versions.
379
+ # Explicitly set tokenizer to use local paths.
380
+ self.tokenizer = AutoTokenizer.from_pretrained(
381
+ base_path,
382
+ cache_dir=base_path,
383
+ local_files_only=True,
384
+ )
385
+ assert self.tokenizer.is_fast # assume tokenizer.json is used
386
+
387
+ # Initialize lists and dictionaries for added tokens
388
+ self.added_tokens_list = []
389
+ self.added_tokens_dict = dict()
390
+ self.added_tokens_ids = set()
391
+
392
+ # Process added tokens
393
+ for tok, tokidx in sorted(
394
+ self.tokenizer.get_added_vocab().items(), key=lambda x: x[1]
395
+ ):
396
+ # Only consider added tokens that are not in the base vocabulary
397
+ if tokidx >= self.tokenizer.vocab_size:
398
+ self.added_tokens_list.append(tok)
399
+ self.added_tokens_dict[tok] = tokidx
400
+ self.added_tokens_ids.add(tokidx)
401
+
402
+ # Store special tokens and their IDs
403
+ self.specials = {
404
+ tok: self.tokenizer.get_vocab()[tok]
405
+ for tok in self.tokenizer.all_special_tokens
406
+ }
407
+ self.special_ids = set(self.tokenizer.all_special_ids)
408
+
409
+ # Set vocabulary sizes
410
+ self.vocab_size_base = self.tokenizer.vocab_size
411
+ self.vocab_size = self.vocab_size_base + len(self.added_tokens_list)
412
+
413
+ self.fname_tokenizer = fname_tokenizer
414
+
415
+ def hf_tokens(self) -> Iterable[tuple[bytes, float, gguf.TokenType]]:
416
+ reverse_vocab = {
417
+ id: encoded_tok for encoded_tok, id in self.tokenizer.get_vocab().items()
418
+ }
419
+
420
+ for token_id in range(self.vocab_size_base):
421
+ # Skip processing added tokens here
422
+ if token_id in self.added_tokens_ids:
423
+ continue
424
+
425
+ # Convert token text to bytes
426
+ token_text = reverse_vocab[token_id].encode("utf-8")
427
+
428
+ # Yield token text, score, and type
429
+ yield token_text, self.get_token_score(token_id), self.get_token_type(
430
+ token_id, token_text, self.special_ids # Reuse already stored special IDs
431
+ )
432
+
433
+ def get_token_type(self, token_id: int, token_text: bytes, special_ids: set[int]) -> gguf.TokenType:
434
+ # Special case for byte tokens
435
+ if re.fullmatch(br"<0x[0-9A-Fa-f]{2}>", token_text):
436
+ return gguf.TokenType.BYTE
437
+
438
+ # Determine token type based on whether it's a special token
439
+ return gguf.TokenType.CONTROL if token_id in special_ids else gguf.TokenType.NORMAL
440
+
441
+ def get_token_score(self, token_id: int) -> float:
442
+ # Placeholder for actual logic to determine the token's score
443
+ # This needs to be implemented based on specific requirements
444
+ return -1000.0 # Default score
445
+
446
+ def added_tokens(self) -> Iterable[tuple[bytes, float, gguf.TokenType]]:
447
+ for text in self.added_tokens_list:
448
+ if text in self.specials:
449
+ toktype = self.get_token_type(self.specials[text], b'', self.special_ids)
450
+ score = self.get_token_score(self.specials[text])
451
+ else:
452
+ toktype = gguf.TokenType.USER_DEFINED
453
+ score = -1000.0
454
+
455
+ yield text.encode("utf-8"), score, toktype
456
+
457
+ def has_newline_token(self):
458
+ return "<0x0A>" in self.tokenizer.vocab or "\n" in self.tokenizer.vocab
459
+
460
+ def all_tokens(self) -> Iterable[tuple[bytes, float, gguf.TokenType]]:
461
+ yield from self.hf_tokens()
462
+ yield from self.added_tokens()
463
+
464
+ def __repr__(self) -> str:
465
+ return f"<LlamaHfVocab with {self.vocab_size_base} base tokens and {len(self.added_tokens_list)} added tokens>"
bigdl/cpp/libs/common.lib CHANGED
Binary file
Binary file
Binary file
Binary file
Binary file
Binary file
Binary file
Binary file
Binary file
Binary file
Binary file
Binary file
Binary file
Binary file
Binary file
Binary file
bigdl/cpp/libs/llama.dll CHANGED
Binary file
Binary file
bigdl/cpp/libs/ollama.exe CHANGED
Binary file
@@ -9,11 +9,16 @@ set "destination_folder=%cd%"
9
9
  pushd "%lib_dir%"
10
10
  for %%f in (*) do (
11
11
  if not "%%f"=="ollama.exe" (
12
+ if exist "%destination_folder%\%%~nxf" (
13
+ del /f "%destination_folder%\%%~nxf"
14
+ )
12
15
  mklink "%destination_folder%\%%~nxf" "%%~ff"
13
16
  )
14
17
  )
15
18
  popd
16
19
 
17
- copy "%cpp_dir%\convert.py" .
18
- copy "%cpp_dir%\convert-hf-to-gguf.py" .
20
+ copy "%cpp_dir%\convert_hf_to_gguf.py" .
21
+ copy "%cpp_dir%\convert_hf_to_gguf_update.py" .
22
+ copy "%cpp_dir%\convert_llama_ggml_to_gguf.py" .
23
+ copy "%cpp_dir%\convert_lora_to_gguf.py" .
19
24
  xcopy /E /I "%cpp_dir%\gguf-py\" .\gguf-py
@@ -9,5 +9,11 @@ set "target_path=%cd%\ollama.exe"
9
9
  set "source_dist_dir=%lib_dir%\dist"
10
10
  set "target_dist_dir=%cd%\dist"
11
11
 
12
+ if exist "%target_path%" (
13
+ del /f "%target_path%"
14
+ )
12
15
  mklink "%target_path%" "%source_path%"
16
+ if exist "%target_dist_dir%" (
17
+ rmdir /s /q "%target_dist_dir%"
18
+ )
13
19
  mklink /D "%target_dist_dir%" "%source_dist_dir%"
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.1
2
2
  Name: bigdl-core-cpp
3
- Version: 2.5.0rc1
3
+ Version: 2.6.0b2
4
4
  Summary: Large Language Model Develop Toolkit
5
5
  Author: BigDL Authors
6
6
  License: Apache License, Version 2.0
@@ -10,9 +10,9 @@ Classifier: Programming Language :: Python :: 3.9
10
10
  Classifier: Programming Language :: Python :: Implementation :: CPython
11
11
  Requires-Dist: torch==2.2.0
12
12
  Requires-Dist: numpy==1.26.4
13
- Requires-Dist: transformers<5.0.0,>=4.35.2
13
+ Requires-Dist: transformers==4.44.2
14
14
  Requires-Dist: sentencepiece~=0.1.98
15
- Requires-Dist: accelerate==0.21.0
15
+ Requires-Dist: accelerate==0.33.0
16
16
  Requires-Dist: protobuf<5.0.0,>=4.21.0
17
17
  Requires-Dist: gguf>=0.1.0
18
18