bigdl-core-cpp 2.5.0rc1__py3-none-win_amd64.whl → 2.6.0b1__py3-none-win_amd64.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- bigdl/cpp/{convert-hf-to-gguf.py → convert_hf_to_gguf.py} +413 -67
- bigdl/cpp/convert_hf_to_gguf_update.py +354 -0
- bigdl/cpp/convert_llama_ggml_to_gguf.py +454 -0
- bigdl/cpp/convert_lora_to_gguf.py +393 -0
- bigdl/cpp/gguf-py/gguf/__init__.py +1 -1
- bigdl/cpp/gguf-py/gguf/constants.py +71 -2
- bigdl/cpp/gguf-py/gguf/gguf_writer.py +16 -1
- bigdl/cpp/gguf-py/gguf/lazy.py +4 -1
- bigdl/cpp/gguf-py/gguf/metadata.py +70 -63
- bigdl/cpp/gguf-py/gguf/quants.py +1129 -64
- bigdl/cpp/gguf-py/gguf/tensor_mapping.py +23 -15
- bigdl/cpp/gguf-py/gguf/utility.py +1 -1
- bigdl/cpp/gguf-py/gguf/vocab.py +301 -1
- bigdl/cpp/libs/common.lib +0 -0
- bigdl/cpp/libs/dist/windows-amd64/lib/ollama/runners/cpu/ggml.dll +0 -0
- bigdl/cpp/libs/dist/windows-amd64/lib/ollama/runners/cpu/llama.dll +0 -0
- bigdl/cpp/libs/dist/windows-amd64/lib/ollama/runners/cpu/ollama_llama_server.exe +0 -0
- bigdl/cpp/libs/dist/windows-amd64/lib/ollama/runners/cpu_avx/ggml.dll +0 -0
- bigdl/cpp/libs/dist/windows-amd64/lib/ollama/runners/cpu_avx/llama.dll +0 -0
- bigdl/cpp/libs/dist/windows-amd64/lib/ollama/runners/cpu_avx/ollama_llama_server.exe +0 -0
- bigdl/cpp/libs/dist/windows-amd64/lib/ollama/runners/cpu_avx2/ggml.dll +0 -0
- bigdl/cpp/libs/dist/windows-amd64/lib/ollama/runners/cpu_avx2/llama.dll +0 -0
- bigdl/cpp/libs/dist/windows-amd64/lib/ollama/runners/cpu_avx2/ollama_llama_server.exe +0 -0
- bigdl/cpp/libs/ggml.dll +0 -0
- bigdl/cpp/libs/llama-batched.exe +0 -0
- bigdl/cpp/libs/llama-bench.exe +0 -0
- bigdl/cpp/libs/llama-cli.exe +0 -0
- bigdl/cpp/libs/llama-embedding.exe +0 -0
- bigdl/cpp/libs/llama-gguf.exe +0 -0
- bigdl/cpp/libs/llama-llava-cli.exe +0 -0
- bigdl/cpp/libs/llama-lookup.exe +0 -0
- bigdl/cpp/libs/{ls-sycl-device.exe → llama-ls-sycl-device.exe} +0 -0
- bigdl/cpp/libs/llama-minicpmv-cli.exe +0 -0
- bigdl/cpp/libs/llama-perplexity.exe +0 -0
- bigdl/cpp/libs/llama-quantize.exe +0 -0
- bigdl/cpp/libs/llama-server.exe +0 -0
- bigdl/cpp/libs/llama-simple.exe +0 -0
- bigdl/cpp/libs/llama-speculative.exe +0 -0
- bigdl/cpp/libs/llama-tokenize.exe +0 -0
- bigdl/cpp/libs/llama.dll +0 -0
- bigdl/cpp/libs/llava_shared.dll +0 -0
- bigdl/cpp/libs/ollama.exe +0 -0
- {bigdl_core_cpp-2.5.0rc1.data → bigdl_core_cpp-2.6.0b1.data}/scripts/init-llama-cpp.bat +7 -2
- {bigdl_core_cpp-2.5.0rc1.data → bigdl_core_cpp-2.6.0b1.data}/scripts/init-ollama.bat +6 -0
- {bigdl_core_cpp-2.5.0rc1.dist-info → bigdl_core_cpp-2.6.0b1.dist-info}/METADATA +3 -3
- bigdl_core_cpp-2.6.0b1.dist-info/RECORD +54 -0
- {bigdl_core_cpp-2.5.0rc1.dist-info → bigdl_core_cpp-2.6.0b1.dist-info}/WHEEL +1 -1
- bigdl/cpp/convert.py +0 -1714
- bigdl/cpp/libs/baby-llama.exe +0 -0
- bigdl/cpp/libs/batched-bench.exe +0 -0
- bigdl/cpp/libs/batched.exe +0 -0
- bigdl/cpp/libs/beam-search.exe +0 -0
- bigdl/cpp/libs/benchmark.exe +0 -0
- bigdl/cpp/libs/convert-llama2c-to-ggml.exe +0 -0
- bigdl/cpp/libs/dist/windows-amd64/ollama_runners/cpu/ollama_llama_server.exe +0 -0
- bigdl/cpp/libs/dist/windows-amd64/ollama_runners/cpu_avx/ollama_llama_server.exe +0 -0
- bigdl/cpp/libs/dist/windows-amd64/ollama_runners/cpu_avx2/ollama_llama_server.exe +0 -0
- bigdl/cpp/libs/embedding.exe +0 -0
- bigdl/cpp/libs/export-lora.exe +0 -0
- bigdl/cpp/libs/finetune.exe +0 -0
- bigdl/cpp/libs/ggml_shared.dll +0 -0
- bigdl/cpp/libs/gguf.exe +0 -0
- bigdl/cpp/libs/gritlm.exe +0 -0
- bigdl/cpp/libs/imatrix.exe +0 -0
- bigdl/cpp/libs/infill.exe +0 -0
- bigdl/cpp/libs/llava-cli.exe +0 -0
- bigdl/cpp/libs/lookahead.exe +0 -0
- bigdl/cpp/libs/lookup.exe +0 -0
- bigdl/cpp/libs/main.exe +0 -0
- bigdl/cpp/libs/parallel.exe +0 -0
- bigdl/cpp/libs/passkey.exe +0 -0
- bigdl/cpp/libs/perplexity.exe +0 -0
- bigdl/cpp/libs/q8dot.exe +0 -0
- bigdl/cpp/libs/quantize-stats.exe +0 -0
- bigdl/cpp/libs/quantize.exe +0 -0
- bigdl/cpp/libs/save-load-state.exe +0 -0
- bigdl/cpp/libs/server.exe +0 -0
- bigdl/cpp/libs/simple.exe +0 -0
- bigdl/cpp/libs/speculative.exe +0 -0
- bigdl/cpp/libs/tokenize.exe +0 -0
- bigdl/cpp/libs/train-text-from-scratch.exe +0 -0
- bigdl/cpp/libs/vdot.exe +0 -0
- bigdl_core_cpp-2.5.0rc1.dist-info/RECORD +0 -63
- {bigdl_core_cpp-2.5.0rc1.data → bigdl_core_cpp-2.6.0b1.data}/scripts/init-llama-cpp.ps1 +0 -0
- {bigdl_core_cpp-2.5.0rc1.dist-info → bigdl_core_cpp-2.6.0b1.dist-info}/top_level.txt +0 -0
@@ -10,10 +10,10 @@ class TensorNameMap:
|
|
10
10
|
# Token embeddings
|
11
11
|
MODEL_TENSOR.TOKEN_EMBD: (
|
12
12
|
"gpt_neox.embed_in", # gptneox
|
13
|
-
"transformer.wte", # gpt2 gpt-j mpt refact qwen dbrx jais
|
13
|
+
"transformer.wte", # gpt2 gpt-j mpt refact qwen dbrx jais exaone
|
14
14
|
"transformer.word_embeddings", # falcon
|
15
15
|
"word_embeddings", # bloom
|
16
|
-
"model.embed_tokens", # llama-hf
|
16
|
+
"model.embed_tokens", # llama-hf nemotron
|
17
17
|
"tok_embeddings", # llama-pth
|
18
18
|
"embeddings.word_embeddings", # bert nomic-bert
|
19
19
|
"language_model.embedding.word_embeddings", # persimmon
|
@@ -52,7 +52,7 @@ class TensorNameMap:
|
|
52
52
|
# Output
|
53
53
|
MODEL_TENSOR.OUTPUT: (
|
54
54
|
"embed_out", # gptneox
|
55
|
-
"lm_head", # gpt2 mpt falcon llama-hf baichuan qwen mamba dbrx jais
|
55
|
+
"lm_head", # gpt2 mpt falcon llama-hf baichuan qwen mamba dbrx jais nemotron exaone
|
56
56
|
"output", # llama-pth bloom internlm2
|
57
57
|
"word_embeddings_for_head", # persimmon
|
58
58
|
"lm_head.linear", # phi2
|
@@ -62,7 +62,7 @@ class TensorNameMap:
|
|
62
62
|
# Output norm
|
63
63
|
MODEL_TENSOR.OUTPUT_NORM: (
|
64
64
|
"gpt_neox.final_layer_norm", # gptneox
|
65
|
-
"transformer.ln_f", # gpt2 gpt-j falcon jais
|
65
|
+
"transformer.ln_f", # gpt2 gpt-j falcon jais exaone
|
66
66
|
"model.norm", # llama-hf baichuan internlm2
|
67
67
|
"norm", # llama-pth
|
68
68
|
"transformer.norm_f", # mpt dbrx
|
@@ -75,6 +75,7 @@ class TensorNameMap:
|
|
75
75
|
"transformer.rms_norm", # Grok
|
76
76
|
"encoder.final_layernorm", # chatglm
|
77
77
|
"transformer.norm", # openelm
|
78
|
+
"model.norm", # nemotron
|
78
79
|
),
|
79
80
|
|
80
81
|
# Rope frequencies
|
@@ -88,12 +89,12 @@ class TensorNameMap:
|
|
88
89
|
# Attention norm
|
89
90
|
MODEL_TENSOR.ATTN_NORM: (
|
90
91
|
"gpt_neox.layers.{bid}.input_layernorm", # gptneox
|
91
|
-
"transformer.h.{bid}.ln_1", # gpt2 gpt-j refact qwen jais
|
92
|
+
"transformer.h.{bid}.ln_1", # gpt2 gpt-j refact qwen jais exaone
|
92
93
|
"transformer.blocks.{bid}.norm_1", # mpt
|
93
94
|
"transformer.h.{bid}.input_layernorm", # falcon7b
|
94
95
|
"h.{bid}.input_layernorm", # bloom
|
95
96
|
"transformer.h.{bid}.ln_mlp", # falcon40b
|
96
|
-
"model.layers.{bid}.input_layernorm", # llama-hf
|
97
|
+
"model.layers.{bid}.input_layernorm", # llama-hf nemotron
|
97
98
|
"layers.{bid}.attention_norm", # llama-pth
|
98
99
|
"language_model.encoder.layers.{bid}.input_layernorm", # persimmon
|
99
100
|
"model.layers.{bid}.ln1", # yi
|
@@ -135,18 +136,19 @@ class TensorNameMap:
|
|
135
136
|
|
136
137
|
# Attention query
|
137
138
|
MODEL_TENSOR.ATTN_Q: (
|
138
|
-
"model.layers.{bid}.self_attn.q_proj", # llama-hf
|
139
|
+
"model.layers.{bid}.self_attn.q_proj", # llama-hf nemotron
|
139
140
|
"layers.{bid}.attention.wq", # llama-pth
|
140
141
|
"encoder.layer.{bid}.attention.self.query", # bert
|
141
142
|
"transformer.h.{bid}.attn.q_proj", # gpt-j
|
142
143
|
"model.layers.layers.{bid}.self_attn.q_proj", # plamo
|
143
144
|
"model.layers.{bid}.attention.wq", # internlm2
|
144
145
|
"transformer.decoder_layer.{bid}.multi_head_attention.query",# Grok
|
146
|
+
"transformer.h.{bid}.attn.attention.q_proj", # exaone
|
145
147
|
),
|
146
148
|
|
147
149
|
# Attention key
|
148
150
|
MODEL_TENSOR.ATTN_K: (
|
149
|
-
"model.layers.{bid}.self_attn.k_proj", # llama-hf
|
151
|
+
"model.layers.{bid}.self_attn.k_proj", # llama-hf nemotron
|
150
152
|
"layers.{bid}.attention.wk", # llama-pth
|
151
153
|
"encoder.layer.{bid}.attention.self.key", # bert
|
152
154
|
"transformer.h.{bid}.attn.k_proj", # gpt-j
|
@@ -154,18 +156,20 @@ class TensorNameMap:
|
|
154
156
|
"model.layers.layers.{bid}.self_attn.k_proj", # plamo
|
155
157
|
"model.layers.{bid}.attention.wk", # internlm2
|
156
158
|
"transformer.decoder_layer.{bid}.multi_head_attention.key",# Grok
|
159
|
+
"transformer.h.{bid}.attn.attention.k_proj", # exaone
|
157
160
|
),
|
158
161
|
|
159
162
|
# Attention value
|
160
163
|
MODEL_TENSOR.ATTN_V: (
|
161
|
-
"model.layers.{bid}.self_attn.v_proj", # llama-hf
|
164
|
+
"model.layers.{bid}.self_attn.v_proj", # llama-hf nemotron
|
162
165
|
"layers.{bid}.attention.wv", # llama-pth
|
163
166
|
"encoder.layer.{bid}.attention.self.value", # bert
|
164
167
|
"transformer.h.{bid}.attn.v_proj", # gpt-j
|
165
168
|
"transformer.h.{bid}.attn.v", # refact
|
166
169
|
"model.layers.layers.{bid}.self_attn.v_proj", # plamo
|
167
170
|
"model.layers.{bid}.attention.wv", # internlm2
|
168
|
-
"transformer.decoder_layer.{bid}.multi_head_attention.value"
|
171
|
+
"transformer.decoder_layer.{bid}.multi_head_attention.value",# Grok
|
172
|
+
"transformer.h.{bid}.attn.attention.v_proj", # exaone
|
169
173
|
),
|
170
174
|
|
171
175
|
# Attention output
|
@@ -175,7 +179,7 @@ class TensorNameMap:
|
|
175
179
|
"transformer.blocks.{bid}.attn.out_proj", # mpt
|
176
180
|
"transformer.h.{bid}.self_attention.dense", # falcon
|
177
181
|
"h.{bid}.self_attention.dense", # bloom
|
178
|
-
"model.layers.{bid}.self_attn.o_proj", # llama-hf
|
182
|
+
"model.layers.{bid}.self_attn.o_proj", # llama-hf nemotron
|
179
183
|
"layers.{bid}.attention.wo", # llama-pth
|
180
184
|
"encoder.layer.{bid}.attention.output.dense", # bert
|
181
185
|
"transformer.h.{bid}.attn.out_proj", # gpt-j
|
@@ -190,6 +194,7 @@ class TensorNameMap:
|
|
190
194
|
"transformer.blocks.{bid}.norm_attn_norm.attn.out_proj", # dbrx
|
191
195
|
"encoder.layers.{bid}.self_attention.dense", # chatglm
|
192
196
|
"transformer.layers.{bid}.attn.out_proj", # openelm
|
197
|
+
"transformer.h.{bid}.attn.attention.out_proj", # exaone
|
193
198
|
),
|
194
199
|
|
195
200
|
# Attention output norm
|
@@ -215,10 +220,10 @@ class TensorNameMap:
|
|
215
220
|
# Feed-forward norm
|
216
221
|
MODEL_TENSOR.FFN_NORM: (
|
217
222
|
"gpt_neox.layers.{bid}.post_attention_layernorm", # gptneox
|
218
|
-
"transformer.h.{bid}.ln_2", # gpt2 refact qwen jais
|
223
|
+
"transformer.h.{bid}.ln_2", # gpt2 refact qwen jais exaone
|
219
224
|
"h.{bid}.post_attention_layernorm", # bloom
|
220
225
|
"transformer.blocks.{bid}.norm_2", # mpt
|
221
|
-
"model.layers.{bid}.post_attention_layernorm", # llama-hf
|
226
|
+
"model.layers.{bid}.post_attention_layernorm", # llama-hf nemotron
|
222
227
|
"layers.{bid}.ffn_norm", # llama-pth
|
223
228
|
"language_model.encoder.layers.{bid}.post_attention_layernorm", # persimmon
|
224
229
|
"model.layers.{bid}.ln2", # yi
|
@@ -258,7 +263,7 @@ class TensorNameMap:
|
|
258
263
|
"transformer.blocks.{bid}.ffn.up_proj", # mpt
|
259
264
|
"transformer.h.{bid}.mlp.dense_h_to_4h", # falcon
|
260
265
|
"h.{bid}.mlp.dense_h_to_4h", # bloom
|
261
|
-
"model.layers.{bid}.mlp.up_proj", # llama-hf refact
|
266
|
+
"model.layers.{bid}.mlp.up_proj", # llama-hf refact nemotron
|
262
267
|
"layers.{bid}.feed_forward.w3", # llama-pth
|
263
268
|
"encoder.layer.{bid}.intermediate.dense", # bert
|
264
269
|
"transformer.h.{bid}.mlp.fc_in", # gpt-j
|
@@ -277,6 +282,7 @@ class TensorNameMap:
|
|
277
282
|
"encoder.layer.{bid}.mlp.gated_layers_v", # jina-bert-v2
|
278
283
|
"model.layers.{bid}.residual_mlp.w3", # arctic
|
279
284
|
"encoder.layers.{bid}.mlp.dense_h_to_4h", # chatglm
|
285
|
+
"transformer.h.{bid}.mlp.c_fc_1", # exaone
|
280
286
|
),
|
281
287
|
|
282
288
|
MODEL_TENSOR.FFN_UP_EXP: (
|
@@ -308,6 +314,7 @@ class TensorNameMap:
|
|
308
314
|
"encoder.layer.{bid}.mlp.gated_layers_w", # jina-bert-v2
|
309
315
|
"transformer.h.{bid}.mlp.linear_1", # refact
|
310
316
|
"model.layers.{bid}.residual_mlp.w1", # arctic
|
317
|
+
"transformer.h.{bid}.mlp.c_fc_0", # exaone
|
311
318
|
),
|
312
319
|
|
313
320
|
MODEL_TENSOR.FFN_GATE_EXP: (
|
@@ -329,7 +336,7 @@ class TensorNameMap:
|
|
329
336
|
"transformer.blocks.{bid}.ffn.down_proj", # mpt
|
330
337
|
"transformer.h.{bid}.mlp.dense_4h_to_h", # falcon
|
331
338
|
"h.{bid}.mlp.dense_4h_to_h", # bloom
|
332
|
-
"model.layers.{bid}.mlp.down_proj", # llama-hf
|
339
|
+
"model.layers.{bid}.mlp.down_proj", # llama-hf nemotron
|
333
340
|
"layers.{bid}.feed_forward.w2", # llama-pth
|
334
341
|
"encoder.layer.{bid}.output.dense", # bert
|
335
342
|
"transformer.h.{bid}.mlp.fc_out", # gpt-j
|
@@ -347,6 +354,7 @@ class TensorNameMap:
|
|
347
354
|
"model.layers.{bid}.residual_mlp.w2", # arctic
|
348
355
|
"encoder.layer.{bid}.mlp.down_layer", # jina-bert-v2
|
349
356
|
"encoder.layers.{bid}.mlp.dense_4h_to_h", # chatglm
|
357
|
+
"model.layers.h.{bid}.mlp.c_proj", # exaone
|
350
358
|
),
|
351
359
|
|
352
360
|
MODEL_TENSOR.FFN_DOWN_EXP: (
|
@@ -66,4 +66,4 @@ def naming_convention(model_name: str | None, base_name: str | None, finetune_st
|
|
66
66
|
|
67
67
|
kind = f"-{model_type.strip().replace(' ', '-')}" if model_type is not None else ""
|
68
68
|
|
69
|
-
return f"{name}{parameters}{finetune}{version}{encoding}{kind}"
|
69
|
+
return f"{name}{parameters}{finetune}{version}{encoding}{kind}"
|
bigdl/cpp/gguf-py/gguf/vocab.py
CHANGED
@@ -1,10 +1,15 @@
|
|
1
1
|
from __future__ import annotations
|
2
2
|
|
3
|
+
import re
|
3
4
|
import logging
|
4
5
|
import json
|
5
6
|
import os
|
6
7
|
from pathlib import Path
|
7
|
-
from typing import Any, Callable, Sequence, Mapping, Iterable
|
8
|
+
from typing import Any, Callable, Sequence, Mapping, Iterable, Protocol, ClassVar, runtime_checkable
|
9
|
+
|
10
|
+
from sentencepiece import SentencePieceProcessor
|
11
|
+
|
12
|
+
import gguf
|
8
13
|
|
9
14
|
from .gguf_writer import GGUFWriter
|
10
15
|
|
@@ -163,3 +168,298 @@ class SpecialVocab:
|
|
163
168
|
for typ in self.special_token_types:
|
164
169
|
self._set_special_token(typ, config.get(f'{typ}_token_id'))
|
165
170
|
return True
|
171
|
+
|
172
|
+
|
173
|
+
@runtime_checkable
|
174
|
+
class BaseVocab(Protocol):
|
175
|
+
tokenizer_model: ClassVar[str]
|
176
|
+
name: ClassVar[str]
|
177
|
+
|
178
|
+
|
179
|
+
@runtime_checkable
|
180
|
+
class Vocab(BaseVocab, Protocol):
|
181
|
+
vocab_size: int
|
182
|
+
added_tokens_dict: dict[str, int]
|
183
|
+
added_tokens_list: list[str]
|
184
|
+
fname_tokenizer: Path
|
185
|
+
|
186
|
+
def __init__(self, base_path: Path): ...
|
187
|
+
def all_tokens(self) -> Iterable[tuple[bytes, float, gguf.TokenType]]: ...
|
188
|
+
|
189
|
+
|
190
|
+
class NoVocab(BaseVocab):
|
191
|
+
tokenizer_model = "no_vocab"
|
192
|
+
name = "no_vocab"
|
193
|
+
|
194
|
+
def __repr__(self) -> str:
|
195
|
+
return "<NoVocab for a model without integrated vocabulary>"
|
196
|
+
|
197
|
+
|
198
|
+
class BpeVocab(Vocab):
|
199
|
+
tokenizer_model = "gpt2"
|
200
|
+
name = "bpe"
|
201
|
+
|
202
|
+
def __init__(self, base_path: Path):
|
203
|
+
added_tokens: dict[str, int] = {}
|
204
|
+
|
205
|
+
if (fname_tokenizer := base_path / 'vocab.json').exists():
|
206
|
+
# "slow" tokenizer
|
207
|
+
with open(fname_tokenizer, encoding="utf-8") as f:
|
208
|
+
self.vocab = json.load(f)
|
209
|
+
|
210
|
+
try:
|
211
|
+
# FIXME: Verify that added tokens here _cannot_ overlap with the main vocab.
|
212
|
+
with open(base_path / 'added_tokens.json', encoding="utf-8") as f:
|
213
|
+
added_tokens = json.load(f)
|
214
|
+
except FileNotFoundError:
|
215
|
+
pass
|
216
|
+
else:
|
217
|
+
# "fast" tokenizer
|
218
|
+
fname_tokenizer = base_path / 'tokenizer.json'
|
219
|
+
|
220
|
+
# if this fails, FileNotFoundError propagates to caller
|
221
|
+
with open(fname_tokenizer, encoding="utf-8") as f:
|
222
|
+
tokenizer_json = json.load(f)
|
223
|
+
|
224
|
+
tokenizer_model: dict[str, Any] = tokenizer_json['model']
|
225
|
+
if (
|
226
|
+
tokenizer_model['type'] != 'BPE' or tokenizer_model.get('byte_fallback', False)
|
227
|
+
or tokenizer_json['decoder']['type'] != 'ByteLevel'
|
228
|
+
):
|
229
|
+
raise FileNotFoundError('Cannot find GPT-2 BPE tokenizer')
|
230
|
+
|
231
|
+
self.vocab = tokenizer_model["vocab"]
|
232
|
+
|
233
|
+
if (added := tokenizer_json.get('added_tokens')) is not None:
|
234
|
+
# Added tokens here can be duplicates of the main vocabulary.
|
235
|
+
added_tokens = {item['content']: item['id']
|
236
|
+
for item in added
|
237
|
+
if item['content'] not in self.vocab}
|
238
|
+
|
239
|
+
vocab_size = len(self.vocab)
|
240
|
+
expected_ids = list(range(vocab_size, vocab_size + len(added_tokens)))
|
241
|
+
actual_ids = sorted(added_tokens.values())
|
242
|
+
if expected_ids != actual_ids:
|
243
|
+
expected_end_id = vocab_size + len(actual_ids) - 1
|
244
|
+
raise ValueError(f"Expected the {len(actual_ids)} added token ID(s) to be sequential in the range "
|
245
|
+
f"{vocab_size} - {expected_end_id}; got {actual_ids}")
|
246
|
+
|
247
|
+
items = sorted(added_tokens.items(), key=lambda text_idx: text_idx[1])
|
248
|
+
self.added_tokens_dict = added_tokens
|
249
|
+
self.added_tokens_list = [text for (text, idx) in items]
|
250
|
+
self.vocab_size_base = vocab_size
|
251
|
+
self.vocab_size = self.vocab_size_base + len(self.added_tokens_list)
|
252
|
+
self.fname_tokenizer = fname_tokenizer
|
253
|
+
|
254
|
+
def bpe_tokens(self) -> Iterable[tuple[bytes, float, gguf.TokenType]]:
|
255
|
+
reverse_vocab = {id: encoded_tok for encoded_tok, id in self.vocab.items()}
|
256
|
+
|
257
|
+
for i, _ in enumerate(self.vocab):
|
258
|
+
yield reverse_vocab[i], 0.0, gguf.TokenType.NORMAL
|
259
|
+
|
260
|
+
def added_tokens(self) -> Iterable[tuple[bytes, float, gguf.TokenType]]:
|
261
|
+
for text in self.added_tokens_list:
|
262
|
+
score = -1000.0
|
263
|
+
yield text.encode("utf-8"), score, gguf.TokenType.CONTROL
|
264
|
+
|
265
|
+
def all_tokens(self) -> Iterable[tuple[bytes, float, gguf.TokenType]]:
|
266
|
+
yield from self.bpe_tokens()
|
267
|
+
yield from self.added_tokens()
|
268
|
+
|
269
|
+
def __repr__(self) -> str:
|
270
|
+
return f"<BpeVocab with {self.vocab_size_base} base tokens and {len(self.added_tokens_list)} added tokens>"
|
271
|
+
|
272
|
+
|
273
|
+
class SentencePieceVocab(Vocab):
|
274
|
+
tokenizer_model = "llama"
|
275
|
+
name = "spm"
|
276
|
+
|
277
|
+
def __init__(self, base_path: Path):
|
278
|
+
added_tokens: dict[str, int] = {}
|
279
|
+
if (fname_tokenizer := base_path / 'tokenizer.model').exists():
|
280
|
+
# normal location
|
281
|
+
try:
|
282
|
+
with open(base_path / 'added_tokens.json', encoding="utf-8") as f:
|
283
|
+
added_tokens = json.load(f)
|
284
|
+
except FileNotFoundError:
|
285
|
+
pass
|
286
|
+
elif not (fname_tokenizer := base_path.parent / 'tokenizer.model').exists():
|
287
|
+
# not found in alternate location either
|
288
|
+
raise FileNotFoundError('Cannot find tokenizer.model')
|
289
|
+
|
290
|
+
self.sentencepiece_tokenizer = SentencePieceProcessor()
|
291
|
+
self.sentencepiece_tokenizer.LoadFromFile(str(fname_tokenizer))
|
292
|
+
vocab_size = self.sentencepiece_tokenizer.vocab_size()
|
293
|
+
|
294
|
+
new_tokens = {id: piece for piece, id in added_tokens.items() if id >= vocab_size}
|
295
|
+
expected_new_ids = list(range(vocab_size, vocab_size + len(new_tokens)))
|
296
|
+
actual_new_ids = sorted(new_tokens.keys())
|
297
|
+
|
298
|
+
if expected_new_ids != actual_new_ids:
|
299
|
+
raise ValueError(f"Expected new token IDs {expected_new_ids} to be sequential; got {actual_new_ids}")
|
300
|
+
|
301
|
+
# Token pieces that were added to the base vocabulary.
|
302
|
+
self.added_tokens_dict = added_tokens
|
303
|
+
self.added_tokens_list = [new_tokens[id] for id in actual_new_ids]
|
304
|
+
self.vocab_size_base = vocab_size
|
305
|
+
self.vocab_size = self.vocab_size_base + len(self.added_tokens_list)
|
306
|
+
self.fname_tokenizer = fname_tokenizer
|
307
|
+
|
308
|
+
def sentencepiece_tokens(self) -> Iterable[tuple[bytes, float, gguf.TokenType]]:
|
309
|
+
tokenizer = self.sentencepiece_tokenizer
|
310
|
+
for i in range(tokenizer.vocab_size()):
|
311
|
+
piece = tokenizer.IdToPiece(i)
|
312
|
+
text = piece.encode("utf-8")
|
313
|
+
score: float = tokenizer.GetScore(i)
|
314
|
+
|
315
|
+
toktype = gguf.TokenType.NORMAL
|
316
|
+
if tokenizer.IsUnknown(i):
|
317
|
+
toktype = gguf.TokenType.UNKNOWN
|
318
|
+
if tokenizer.IsControl(i):
|
319
|
+
toktype = gguf.TokenType.CONTROL
|
320
|
+
|
321
|
+
# NOTE: I think added_tokens are user defined.
|
322
|
+
# ref: https://github.com/google/sentencepiece/blob/master/src/sentencepiece_model.proto
|
323
|
+
# if tokenizer.is_user_defined(i): toktype = gguf.TokenType.USER_DEFINED
|
324
|
+
|
325
|
+
if tokenizer.IsUnused(i):
|
326
|
+
toktype = gguf.TokenType.UNUSED
|
327
|
+
if tokenizer.IsByte(i):
|
328
|
+
toktype = gguf.TokenType.BYTE
|
329
|
+
|
330
|
+
yield text, score, toktype
|
331
|
+
|
332
|
+
def added_tokens(self) -> Iterable[tuple[bytes, float, gguf.TokenType]]:
|
333
|
+
for text in self.added_tokens_list:
|
334
|
+
score = -1000.0
|
335
|
+
yield text.encode("utf-8"), score, gguf.TokenType.USER_DEFINED
|
336
|
+
|
337
|
+
def all_tokens(self) -> Iterable[tuple[bytes, float, gguf.TokenType]]:
|
338
|
+
yield from self.sentencepiece_tokens()
|
339
|
+
yield from self.added_tokens()
|
340
|
+
|
341
|
+
def __repr__(self) -> str:
|
342
|
+
return f"<SentencePieceVocab with {self.vocab_size_base} base tokens and {len(self.added_tokens_list)} added tokens>"
|
343
|
+
|
344
|
+
|
345
|
+
class LlamaHfVocab(Vocab):
|
346
|
+
tokenizer_model = "llama"
|
347
|
+
name = "hfft"
|
348
|
+
|
349
|
+
def __init__(self, base_path: Path):
|
350
|
+
fname_tokenizer = base_path / 'tokenizer.json'
|
351
|
+
# if this fails, FileNotFoundError propagates to caller
|
352
|
+
with open(fname_tokenizer, encoding='utf-8') as f:
|
353
|
+
tokenizer_json = json.load(f)
|
354
|
+
|
355
|
+
# pre-check so we know if we need transformers
|
356
|
+
tokenizer_model: dict[str, Any] = tokenizer_json['model']
|
357
|
+
is_llama3 = (
|
358
|
+
tokenizer_model['type'] == 'BPE' and tokenizer_model.get('ignore_merges', False)
|
359
|
+
and not tokenizer_model.get('byte_fallback', True)
|
360
|
+
)
|
361
|
+
if is_llama3:
|
362
|
+
raise TypeError('Llama 3 must be converted with BpeVocab')
|
363
|
+
|
364
|
+
if not is_llama3 and (
|
365
|
+
tokenizer_model['type'] != 'BPE' or not tokenizer_model.get('byte_fallback', False)
|
366
|
+
or tokenizer_json['decoder']['type'] != 'Sequence'
|
367
|
+
):
|
368
|
+
raise FileNotFoundError('Cannot find Llama BPE tokenizer')
|
369
|
+
|
370
|
+
try:
|
371
|
+
from transformers import AutoTokenizer
|
372
|
+
except ImportError as e:
|
373
|
+
raise ImportError(
|
374
|
+
"To use LlamaHfVocab, please install the `transformers` package. "
|
375
|
+
"You can install it with `pip install transformers`."
|
376
|
+
) from e
|
377
|
+
|
378
|
+
# Allow the tokenizer to default to slow or fast versions.
|
379
|
+
# Explicitly set tokenizer to use local paths.
|
380
|
+
self.tokenizer = AutoTokenizer.from_pretrained(
|
381
|
+
base_path,
|
382
|
+
cache_dir=base_path,
|
383
|
+
local_files_only=True,
|
384
|
+
)
|
385
|
+
assert self.tokenizer.is_fast # assume tokenizer.json is used
|
386
|
+
|
387
|
+
# Initialize lists and dictionaries for added tokens
|
388
|
+
self.added_tokens_list = []
|
389
|
+
self.added_tokens_dict = dict()
|
390
|
+
self.added_tokens_ids = set()
|
391
|
+
|
392
|
+
# Process added tokens
|
393
|
+
for tok, tokidx in sorted(
|
394
|
+
self.tokenizer.get_added_vocab().items(), key=lambda x: x[1]
|
395
|
+
):
|
396
|
+
# Only consider added tokens that are not in the base vocabulary
|
397
|
+
if tokidx >= self.tokenizer.vocab_size:
|
398
|
+
self.added_tokens_list.append(tok)
|
399
|
+
self.added_tokens_dict[tok] = tokidx
|
400
|
+
self.added_tokens_ids.add(tokidx)
|
401
|
+
|
402
|
+
# Store special tokens and their IDs
|
403
|
+
self.specials = {
|
404
|
+
tok: self.tokenizer.get_vocab()[tok]
|
405
|
+
for tok in self.tokenizer.all_special_tokens
|
406
|
+
}
|
407
|
+
self.special_ids = set(self.tokenizer.all_special_ids)
|
408
|
+
|
409
|
+
# Set vocabulary sizes
|
410
|
+
self.vocab_size_base = self.tokenizer.vocab_size
|
411
|
+
self.vocab_size = self.vocab_size_base + len(self.added_tokens_list)
|
412
|
+
|
413
|
+
self.fname_tokenizer = fname_tokenizer
|
414
|
+
|
415
|
+
def hf_tokens(self) -> Iterable[tuple[bytes, float, gguf.TokenType]]:
|
416
|
+
reverse_vocab = {
|
417
|
+
id: encoded_tok for encoded_tok, id in self.tokenizer.get_vocab().items()
|
418
|
+
}
|
419
|
+
|
420
|
+
for token_id in range(self.vocab_size_base):
|
421
|
+
# Skip processing added tokens here
|
422
|
+
if token_id in self.added_tokens_ids:
|
423
|
+
continue
|
424
|
+
|
425
|
+
# Convert token text to bytes
|
426
|
+
token_text = reverse_vocab[token_id].encode("utf-8")
|
427
|
+
|
428
|
+
# Yield token text, score, and type
|
429
|
+
yield token_text, self.get_token_score(token_id), self.get_token_type(
|
430
|
+
token_id, token_text, self.special_ids # Reuse already stored special IDs
|
431
|
+
)
|
432
|
+
|
433
|
+
def get_token_type(self, token_id: int, token_text: bytes, special_ids: set[int]) -> gguf.TokenType:
|
434
|
+
# Special case for byte tokens
|
435
|
+
if re.fullmatch(br"<0x[0-9A-Fa-f]{2}>", token_text):
|
436
|
+
return gguf.TokenType.BYTE
|
437
|
+
|
438
|
+
# Determine token type based on whether it's a special token
|
439
|
+
return gguf.TokenType.CONTROL if token_id in special_ids else gguf.TokenType.NORMAL
|
440
|
+
|
441
|
+
def get_token_score(self, token_id: int) -> float:
|
442
|
+
# Placeholder for actual logic to determine the token's score
|
443
|
+
# This needs to be implemented based on specific requirements
|
444
|
+
return -1000.0 # Default score
|
445
|
+
|
446
|
+
def added_tokens(self) -> Iterable[tuple[bytes, float, gguf.TokenType]]:
|
447
|
+
for text in self.added_tokens_list:
|
448
|
+
if text in self.specials:
|
449
|
+
toktype = self.get_token_type(self.specials[text], b'', self.special_ids)
|
450
|
+
score = self.get_token_score(self.specials[text])
|
451
|
+
else:
|
452
|
+
toktype = gguf.TokenType.USER_DEFINED
|
453
|
+
score = -1000.0
|
454
|
+
|
455
|
+
yield text.encode("utf-8"), score, toktype
|
456
|
+
|
457
|
+
def has_newline_token(self):
|
458
|
+
return "<0x0A>" in self.tokenizer.vocab or "\n" in self.tokenizer.vocab
|
459
|
+
|
460
|
+
def all_tokens(self) -> Iterable[tuple[bytes, float, gguf.TokenType]]:
|
461
|
+
yield from self.hf_tokens()
|
462
|
+
yield from self.added_tokens()
|
463
|
+
|
464
|
+
def __repr__(self) -> str:
|
465
|
+
return f"<LlamaHfVocab with {self.vocab_size_base} base tokens and {len(self.added_tokens_list)} added tokens>"
|
bigdl/cpp/libs/common.lib
CHANGED
Binary file
|
Binary file
|
Binary file
|
Binary file
|
Binary file
|
Binary file
|
Binary file
|
Binary file
|
Binary file
|
bigdl/cpp/libs/ggml.dll
ADDED
Binary file
|
Binary file
|
bigdl/cpp/libs/llama-bench.exe
CHANGED
Binary file
|
Binary file
|
Binary file
|
Binary file
|
Binary file
|
Binary file
|
Binary file
|
Binary file
|
Binary file
|
Binary file
|
Binary file
|
Binary file
|
Binary file
|
Binary file
|
bigdl/cpp/libs/llama.dll
CHANGED
Binary file
|
bigdl/cpp/libs/llava_shared.dll
CHANGED
Binary file
|
bigdl/cpp/libs/ollama.exe
CHANGED
Binary file
|
@@ -9,11 +9,16 @@ set "destination_folder=%cd%"
|
|
9
9
|
pushd "%lib_dir%"
|
10
10
|
for %%f in (*) do (
|
11
11
|
if not "%%f"=="ollama.exe" (
|
12
|
+
if exist "%destination_folder%\%%~nxf" (
|
13
|
+
del /f "%destination_folder%\%%~nxf"
|
14
|
+
)
|
12
15
|
mklink "%destination_folder%\%%~nxf" "%%~ff"
|
13
16
|
)
|
14
17
|
)
|
15
18
|
popd
|
16
19
|
|
17
|
-
copy "%cpp_dir%\
|
18
|
-
copy "%cpp_dir%\
|
20
|
+
copy "%cpp_dir%\convert_hf_to_gguf.py" .
|
21
|
+
copy "%cpp_dir%\convert_hf_to_gguf_update.py" .
|
22
|
+
copy "%cpp_dir%\convert_llama_ggml_to_gguf.py" .
|
23
|
+
copy "%cpp_dir%\convert_lora_to_gguf.py" .
|
19
24
|
xcopy /E /I "%cpp_dir%\gguf-py\" .\gguf-py
|
@@ -9,5 +9,11 @@ set "target_path=%cd%\ollama.exe"
|
|
9
9
|
set "source_dist_dir=%lib_dir%\dist"
|
10
10
|
set "target_dist_dir=%cd%\dist"
|
11
11
|
|
12
|
+
if exist "%target_path%" (
|
13
|
+
del /f "%target_path%"
|
14
|
+
)
|
12
15
|
mklink "%target_path%" "%source_path%"
|
16
|
+
if exist "%target_dist_dir%" (
|
17
|
+
rmdir /s /q "%target_dist_dir%"
|
18
|
+
)
|
13
19
|
mklink /D "%target_dist_dir%" "%source_dist_dir%"
|
@@ -1,6 +1,6 @@
|
|
1
1
|
Metadata-Version: 2.1
|
2
2
|
Name: bigdl-core-cpp
|
3
|
-
Version: 2.
|
3
|
+
Version: 2.6.0b1
|
4
4
|
Summary: Large Language Model Develop Toolkit
|
5
5
|
Author: BigDL Authors
|
6
6
|
License: Apache License, Version 2.0
|
@@ -10,9 +10,9 @@ Classifier: Programming Language :: Python :: 3.9
|
|
10
10
|
Classifier: Programming Language :: Python :: Implementation :: CPython
|
11
11
|
Requires-Dist: torch==2.2.0
|
12
12
|
Requires-Dist: numpy==1.26.4
|
13
|
-
Requires-Dist: transformers
|
13
|
+
Requires-Dist: transformers==4.44.2
|
14
14
|
Requires-Dist: sentencepiece~=0.1.98
|
15
|
-
Requires-Dist: accelerate==0.
|
15
|
+
Requires-Dist: accelerate==0.33.0
|
16
16
|
Requires-Dist: protobuf<5.0.0,>=4.21.0
|
17
17
|
Requires-Dist: gguf>=0.1.0
|
18
18
|
|