codon-model 0.0.4__tar.gz → 0.0.5a2__tar.gz
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- {codon_model-0.0.4/codon_model.egg-info → codon_model-0.0.5a2}/PKG-INFO +1 -1
- {codon_model-0.0.4 → codon_model-0.0.5a2}/codon/__init__.py +1 -1
- {codon_model-0.0.4 → codon_model-0.0.5a2}/codon/base.py +73 -11
- {codon_model-0.0.4 → codon_model-0.0.5a2}/codon/motif/base.py +4 -8
- codon_model-0.0.5a2/codon/motif/data/__init__.py +5 -0
- codon_model-0.0.5a2/codon/motif/data/prev1.py +85 -0
- {codon_model-0.0.4 → codon_model-0.0.5a2}/codon/motif/motif_a1.py +2 -2
- {codon_model-0.0.4 → codon_model-0.0.5a2}/codon/ops/attention.py +9 -15
- {codon_model-0.0.4 → codon_model-0.0.5a2}/codon/ops/pixelshuffle.py +0 -1
- {codon_model-0.0.4 → codon_model-0.0.5a2}/codon/utils/tokens.py +2 -5
- {codon_model-0.0.4 → codon_model-0.0.5a2/codon_model.egg-info}/PKG-INFO +1 -1
- {codon_model-0.0.4 → codon_model-0.0.5a2}/codon_model.egg-info/SOURCES.txt +2 -0
- {codon_model-0.0.4 → codon_model-0.0.5a2}/LICENSE +0 -0
- {codon_model-0.0.4 → codon_model-0.0.5a2}/codon/block/__init__.py +0 -0
- {codon_model-0.0.4 → codon_model-0.0.5a2}/codon/block/attention.py +0 -0
- {codon_model-0.0.4 → codon_model-0.0.5a2}/codon/block/bio/__init__.py +0 -0
- {codon_model-0.0.4 → codon_model-0.0.5a2}/codon/block/bio/hebian.py +0 -0
- {codon_model-0.0.4 → codon_model-0.0.5a2}/codon/block/bio/predictive.py +0 -0
- {codon_model-0.0.4 → codon_model-0.0.5a2}/codon/block/codebook.py +0 -0
- {codon_model-0.0.4 → codon_model-0.0.5a2}/codon/block/conv.py +0 -0
- {codon_model-0.0.4 → codon_model-0.0.5a2}/codon/block/embedding.py +0 -0
- {codon_model-0.0.4 → codon_model-0.0.5a2}/codon/block/film.py +0 -0
- {codon_model-0.0.4 → codon_model-0.0.5a2}/codon/block/fusion.py +0 -0
- {codon_model-0.0.4 → codon_model-0.0.5a2}/codon/block/lora.py +0 -0
- {codon_model-0.0.4 → codon_model-0.0.5a2}/codon/block/manifold.py +0 -0
- {codon_model-0.0.4 → codon_model-0.0.5a2}/codon/block/mlp.py +0 -0
- {codon_model-0.0.4 → codon_model-0.0.5a2}/codon/block/moe.py +0 -0
- {codon_model-0.0.4 → codon_model-0.0.5a2}/codon/block/pixelshuffle.py +0 -0
- {codon_model-0.0.4 → codon_model-0.0.5a2}/codon/block/transformer.py +0 -0
- {codon_model-0.0.4 → codon_model-0.0.5a2}/codon/exp/__init__.py +0 -0
- {codon_model-0.0.4 → codon_model-0.0.5a2}/codon/exp/block/__init__.py +0 -0
- {codon_model-0.0.4 → codon_model-0.0.5a2}/codon/exp/block/bio.py +0 -0
- {codon_model-0.0.4 → codon_model-0.0.5a2}/codon/exp/block/manifold.py +0 -0
- {codon_model-0.0.4 → codon_model-0.0.5a2}/codon/exp/block/moe.py +0 -0
- {codon_model-0.0.4 → codon_model-0.0.5a2}/codon/exp/ops/__init__.py +0 -0
- {codon_model-0.0.4 → codon_model-0.0.5a2}/codon/kit/__init__.py +0 -0
- {codon_model-0.0.4 → codon_model-0.0.5a2}/codon/kit/train/__init__.py +0 -0
- {codon_model-0.0.4 → codon_model-0.0.5a2}/codon/kit/train/vision.py +0 -0
- {codon_model-0.0.4 → codon_model-0.0.5a2}/codon/model/__init__.py +0 -0
- {codon_model-0.0.4 → codon_model-0.0.5a2}/codon/model/patch_disc.py +0 -0
- {codon_model-0.0.4 → codon_model-0.0.5a2}/codon/model/resnet.py +0 -0
- {codon_model-0.0.4 → codon_model-0.0.5a2}/codon/model/tcn.py +0 -0
- {codon_model-0.0.4 → codon_model-0.0.5a2}/codon/motif/__init__.py +0 -0
- {codon_model-0.0.4 → codon_model-0.0.5a2}/codon/motif/motif_v1.py +0 -0
- {codon_model-0.0.4 → codon_model-0.0.5a2}/codon/ops/__init__.py +0 -0
- {codon_model-0.0.4 → codon_model-0.0.5a2}/codon/ops/bio.py +0 -0
- {codon_model-0.0.4 → codon_model-0.0.5a2}/codon/ops/manifold/__init__.py +0 -0
- {codon_model-0.0.4 → codon_model-0.0.5a2}/codon/ops/manifold/conv.py +0 -0
- {codon_model-0.0.4 → codon_model-0.0.5a2}/codon/ops/manifold/linear.py +0 -0
- {codon_model-0.0.4 → codon_model-0.0.5a2}/codon/utils/__init__.py +0 -0
- {codon_model-0.0.4 → codon_model-0.0.5a2}/codon/utils/dataset/__init__.py +0 -0
- {codon_model-0.0.4 → codon_model-0.0.5a2}/codon/utils/dataset/base.py +0 -0
- {codon_model-0.0.4 → codon_model-0.0.5a2}/codon/utils/dataset/conflux/__init__.py +0 -0
- {codon_model-0.0.4 → codon_model-0.0.5a2}/codon/utils/dataset/conflux/base.py +0 -0
- {codon_model-0.0.4 → codon_model-0.0.5a2}/codon/utils/dataset/conflux/reader.py +0 -0
- {codon_model-0.0.4 → codon_model-0.0.5a2}/codon/utils/dataset/conflux/writer.py +0 -0
- {codon_model-0.0.4 → codon_model-0.0.5a2}/codon/utils/dataset/corpus.py +0 -0
- {codon_model-0.0.4 → codon_model-0.0.5a2}/codon/utils/dataset/dataviewer.py +0 -0
- {codon_model-0.0.4 → codon_model-0.0.5a2}/codon/utils/dataset/flatdata.py +0 -0
- {codon_model-0.0.4 → codon_model-0.0.5a2}/codon/utils/dataset/image.py +0 -0
- {codon_model-0.0.4 → codon_model-0.0.5a2}/codon/utils/eval/__init__.py +0 -0
- {codon_model-0.0.4 → codon_model-0.0.5a2}/codon/utils/eval/activation.py +0 -0
- {codon_model-0.0.4 → codon_model-0.0.5a2}/codon/utils/eval/base.py +0 -0
- {codon_model-0.0.4 → codon_model-0.0.5a2}/codon/utils/eval/boundary.py +0 -0
- {codon_model-0.0.4 → codon_model-0.0.5a2}/codon/utils/eval/cka.py +0 -0
- {codon_model-0.0.4 → codon_model-0.0.5a2}/codon/utils/eval/confusion.py +0 -0
- {codon_model-0.0.4 → codon_model-0.0.5a2}/codon/utils/eval/gradcam.py +0 -0
- {codon_model-0.0.4 → codon_model-0.0.5a2}/codon/utils/eval/layer_rsa.py +0 -0
- {codon_model-0.0.4 → codon_model-0.0.5a2}/codon/utils/eval/rsa.py +0 -0
- {codon_model-0.0.4 → codon_model-0.0.5a2}/codon/utils/eval/selectivity.py +0 -0
- {codon_model-0.0.4 → codon_model-0.0.5a2}/codon/utils/eval/similarity.py +0 -0
- {codon_model-0.0.4 → codon_model-0.0.5a2}/codon/utils/eval/tsne.py +0 -0
- {codon_model-0.0.4 → codon_model-0.0.5a2}/codon/utils/info.py +0 -0
- {codon_model-0.0.4 → codon_model-0.0.5a2}/codon/utils/layer/__init__.py +0 -0
- {codon_model-0.0.4 → codon_model-0.0.5a2}/codon/utils/layer/lora.py +0 -0
- {codon_model-0.0.4 → codon_model-0.0.5a2}/codon/utils/layer/manifold.py +0 -0
- {codon_model-0.0.4 → codon_model-0.0.5a2}/codon/utils/mask.py +0 -0
- {codon_model-0.0.4 → codon_model-0.0.5a2}/codon/utils/safecode.py +0 -0
- {codon_model-0.0.4 → codon_model-0.0.5a2}/codon/utils/seed.py +0 -0
- {codon_model-0.0.4 → codon_model-0.0.5a2}/codon/utils/split.py +0 -0
- {codon_model-0.0.4 → codon_model-0.0.5a2}/codon/utils/theta.py +0 -0
- {codon_model-0.0.4 → codon_model-0.0.5a2}/codon/utils/transforms.py +0 -0
- {codon_model-0.0.4 → codon_model-0.0.5a2}/codon_model.egg-info/dependency_links.txt +0 -0
- {codon_model-0.0.4 → codon_model-0.0.5a2}/codon_model.egg-info/requires.txt +0 -0
- {codon_model-0.0.4 → codon_model-0.0.5a2}/codon_model.egg-info/top_level.txt +0 -0
- {codon_model-0.0.4 → codon_model-0.0.5a2}/setup.cfg +0 -0
- {codon_model-0.0.4 → codon_model-0.0.5a2}/setup.py +0 -0
- {codon_model-0.0.4 → codon_model-0.0.5a2}/test/test_conflux_dataset.py +0 -0
- {codon_model-0.0.4 → codon_model-0.0.5a2}/test/test_motifv1_train.py +0 -0
|
@@ -5,6 +5,7 @@ import torch.nn.functional as F
|
|
|
5
5
|
from typing import Callable, Any, Iterator, Union
|
|
6
6
|
|
|
7
7
|
from safetensors.torch import save_model as safe_save_model
|
|
8
|
+
from safetensors.torch import save_file as safe_save_file
|
|
8
9
|
from safetensors.torch import load_model as safe_load_model
|
|
9
10
|
|
|
10
11
|
|
|
@@ -133,41 +134,102 @@ class BasicModel(nn.Module):
|
|
|
133
134
|
|
|
134
135
|
return total
|
|
135
136
|
|
|
136
|
-
def load_pretrained(self, path: str) -> 'BasicModel':
|
|
137
|
+
def load_pretrained(self, path: str, strict: bool = False) -> 'BasicModel':
|
|
137
138
|
'''
|
|
138
139
|
Load a pretrained model from a file.
|
|
139
|
-
|
|
140
140
|
Args:
|
|
141
141
|
path (str): The path to the model file.
|
|
142
|
+
strict (bool, optional): Whether to strictly enforce that the keys
|
|
143
|
+
in state_dict match. Defaults to False.
|
|
142
144
|
'''
|
|
143
145
|
if path.endswith('.safetensors'):
|
|
144
|
-
safe_load_model(self, path)
|
|
146
|
+
safe_load_model(self, path, strict=strict)
|
|
145
147
|
return self
|
|
146
|
-
|
|
147
148
|
state_dict = torch.load(path, map_location=self.device)
|
|
148
|
-
|
|
149
149
|
if isinstance(state_dict, dict):
|
|
150
150
|
if 'model_state_dict' in state_dict:
|
|
151
151
|
state_dict = state_dict['model_state_dict']
|
|
152
152
|
elif 'state_dict' in state_dict:
|
|
153
153
|
state_dict = state_dict['state_dict']
|
|
154
154
|
|
|
155
|
-
self.load_state_dict(state_dict)
|
|
156
|
-
|
|
155
|
+
self.load_state_dict(state_dict, strict=strict)
|
|
157
156
|
return self
|
|
158
157
|
|
|
159
|
-
def save_pretrained(
|
|
158
|
+
def save_pretrained(
|
|
159
|
+
self,
|
|
160
|
+
path: str,
|
|
161
|
+
trainable_only: bool = False,
|
|
162
|
+
include_buffer: bool = True,
|
|
163
|
+
exclude_modules: list[Union[type, nn.Module]] = None,
|
|
164
|
+
only: list[str] = None,
|
|
165
|
+
exclude: list[str] = None
|
|
166
|
+
) -> 'BasicModel':
|
|
160
167
|
'''
|
|
161
168
|
Save the model to a file.
|
|
162
169
|
|
|
163
170
|
Args:
|
|
164
171
|
path (str): The path to save the model file.
|
|
165
|
-
|
|
172
|
+
trainable_only (bool, optional): If True, only save parameters that require gradients.
|
|
173
|
+
include_buffer (bool, optional): If False, exclude registered buffers from the saved file.
|
|
174
|
+
exclude_modules (list[Union[type, nn.Module]], optional): Module types or instances to exclude.
|
|
175
|
+
only (list[str], optional): If provided, only save parameters whose keys contain ANY of these strings.
|
|
176
|
+
exclude (list[str], optional): If provided, exclude parameters whose keys contain ANY of these strings.
|
|
177
|
+
'''
|
|
178
|
+
state_dict = self.state_dict()
|
|
179
|
+
is_modified = False
|
|
180
|
+
|
|
181
|
+
exclude_prefixes = []
|
|
182
|
+
if exclude_modules:
|
|
183
|
+
exclude_types = tuple(t for t in exclude_modules if isinstance(t, type))
|
|
184
|
+
exclude_instances = set(m for m in exclude_modules if not isinstance(m, type))
|
|
185
|
+
|
|
186
|
+
for name, module in self.named_modules():
|
|
187
|
+
if module in exclude_instances or (exclude_types and isinstance(module, exclude_types)):
|
|
188
|
+
if name != '': exclude_prefixes.append(name + '.')
|
|
189
|
+
exclude_prefixes = tuple(exclude_prefixes)
|
|
190
|
+
|
|
191
|
+
has_filter = trainable_only or not include_buffer or exclude_prefixes or only or exclude
|
|
192
|
+
|
|
193
|
+
if has_filter:
|
|
194
|
+
trainable_names = {name for name, p in self.named_parameters() if p.requires_grad}
|
|
195
|
+
buffer_names = {name for name, _ in self.named_buffers()}
|
|
196
|
+
|
|
197
|
+
filtered_dict = {}
|
|
198
|
+
for key, tensor in state_dict.items():
|
|
199
|
+
keep = True
|
|
200
|
+
|
|
201
|
+
if exclude_prefixes and key.startswith(exclude_prefixes):
|
|
202
|
+
keep = False
|
|
203
|
+
|
|
204
|
+
elif exclude and any(kw in key for kw in exclude):
|
|
205
|
+
keep = False
|
|
206
|
+
|
|
207
|
+
elif only and not any(kw in key for kw in only):
|
|
208
|
+
keep = False
|
|
209
|
+
|
|
210
|
+
else:
|
|
211
|
+
is_buffer = key in buffer_names
|
|
212
|
+
if not include_buffer and is_buffer:
|
|
213
|
+
keep = False
|
|
214
|
+
elif trainable_only and not is_buffer and key not in trainable_names:
|
|
215
|
+
keep = False
|
|
216
|
+
|
|
217
|
+
if keep:
|
|
218
|
+
filtered_dict[key] = tensor
|
|
219
|
+
else:
|
|
220
|
+
is_modified = True
|
|
221
|
+
|
|
222
|
+
if is_modified:
|
|
223
|
+
state_dict = filtered_dict
|
|
224
|
+
|
|
166
225
|
if path.endswith('.safetensors'):
|
|
167
|
-
|
|
226
|
+
if not is_modified:
|
|
227
|
+
safe_save_model(self, path)
|
|
228
|
+
else:
|
|
229
|
+
safe_save_file(state_dict, path)
|
|
168
230
|
else:
|
|
169
|
-
state_dict = self.state_dict()
|
|
170
231
|
torch.save(state_dict, path)
|
|
232
|
+
|
|
171
233
|
return self
|
|
172
234
|
|
|
173
235
|
def freeze(self) -> 'BasicModel':
|
|
@@ -1,12 +1,8 @@
|
|
|
1
|
-
import
|
|
2
|
-
import torch.nn as nn
|
|
3
|
-
import torch.nn.functional as F
|
|
1
|
+
from codon.base import *
|
|
4
2
|
|
|
5
|
-
from typing import
|
|
3
|
+
from typing import Optional, List, Tuple
|
|
6
4
|
from dataclasses import dataclass
|
|
7
5
|
|
|
8
|
-
from codon.base import BasicModel
|
|
9
|
-
|
|
10
6
|
|
|
11
7
|
@dataclass
|
|
12
8
|
class AutoVisionEncoderOutput:
|
|
@@ -77,7 +73,7 @@ class CausalLanguageModel(BasicModel):
|
|
|
77
73
|
self,
|
|
78
74
|
input_ids: torch.Tensor,
|
|
79
75
|
max_new_tokens: int = 100,
|
|
80
|
-
temperature: float =
|
|
76
|
+
temperature: float = 0.7,
|
|
81
77
|
top_k: int = None,
|
|
82
78
|
eos_token_id: int = None
|
|
83
79
|
) -> torch.Tensor:
|
|
@@ -88,7 +84,7 @@ class CausalLanguageModel(BasicModel):
|
|
|
88
84
|
input_ids (torch.Tensor): Input token IDs with shape [batch, seq_len].
|
|
89
85
|
max_new_tokens (int): Maximum number of new tokens to generate. Defaults to 100.
|
|
90
86
|
temperature (float): Sampling temperature. Higher values increase randomness.
|
|
91
|
-
Defaults to
|
|
87
|
+
Defaults to 0.7.
|
|
92
88
|
top_k (int, optional): If set, sample only from top k tokens. Defaults to None.
|
|
93
89
|
eos_token_id (int, optional): End-of-sequence token ID. If None, generation
|
|
94
90
|
stops after max_new_tokens. Defaults to None.
|
|
@@ -0,0 +1,85 @@
|
|
|
1
|
+
from codon.utils.dataset.base import CodonDataset
|
|
2
|
+
from transformers import PreTrainedTokenizerFast
|
|
3
|
+
from codon.utils.tokens import PackedTokenizer
|
|
4
|
+
|
|
5
|
+
import os
|
|
6
|
+
import bisect
|
|
7
|
+
import pyarrow.parquet as pq
|
|
8
|
+
from pathlib import Path
|
|
9
|
+
from typing import Any, Optional, Dict, Union
|
|
10
|
+
from tqdm import tqdm
|
|
11
|
+
|
|
12
|
+
class MotifPrev1(CodonDataset):
|
|
13
|
+
'''
|
|
14
|
+
A concrete implementation of CodonDataset for loading Motif data from a directory.
|
|
15
|
+
Optimized with O(1) lazy loading via Parquet metadata or full memory mapping initialization.
|
|
16
|
+
'''
|
|
17
|
+
|
|
18
|
+
def __init__(self, path: str, mode: str = 'lazy') -> None:
|
|
19
|
+
if not os.path.isdir(path):
|
|
20
|
+
raise NotADirectoryError(f'{path}')
|
|
21
|
+
|
|
22
|
+
if mode not in ['lazy', 'full']:
|
|
23
|
+
raise ValueError('')
|
|
24
|
+
|
|
25
|
+
self.path = path
|
|
26
|
+
self.mode = mode
|
|
27
|
+
self.tokenizer: Optional[PreTrainedTokenizerFast] = None
|
|
28
|
+
|
|
29
|
+
self.file_paths = sorted(list(Path(path).glob('*.parquet')))
|
|
30
|
+
if not self.file_paths:
|
|
31
|
+
raise FileNotFoundError(f'{path}')
|
|
32
|
+
|
|
33
|
+
self.cum_sizes = []
|
|
34
|
+
self._table_cache: Dict[int, Any] = {}
|
|
35
|
+
current_total = 0
|
|
36
|
+
|
|
37
|
+
with tqdm(total=len(self.file_paths), desc=f'Loading Dataset ({mode})', leave=False) as pbar:
|
|
38
|
+
for idx, fp in enumerate(self.file_paths):
|
|
39
|
+
if mode == 'lazy':
|
|
40
|
+
meta = pq.read_metadata(fp)
|
|
41
|
+
current_total += meta.num_rows
|
|
42
|
+
elif mode == 'full':
|
|
43
|
+
table = pq.read_table(fp, memory_map=True)
|
|
44
|
+
self._table_cache[idx] = table
|
|
45
|
+
current_total += table.num_rows
|
|
46
|
+
|
|
47
|
+
self.cum_sizes.append(current_total)
|
|
48
|
+
pbar.update(1)
|
|
49
|
+
|
|
50
|
+
self.total_rows = current_total
|
|
51
|
+
|
|
52
|
+
def set_tokenizer(self, tokenizer: Union[PreTrainedTokenizerFast, PackedTokenizer]) -> 'MotifPrev1':
|
|
53
|
+
if isinstance(tokenizer, PackedTokenizer):
|
|
54
|
+
tokenizer = tokenizer.fast_tokenizer
|
|
55
|
+
self.tokenizer = tokenizer
|
|
56
|
+
return self
|
|
57
|
+
|
|
58
|
+
def __len__(self) -> int:
|
|
59
|
+
return self.total_rows
|
|
60
|
+
|
|
61
|
+
def __getitem__(self, idx: int) -> Dict[str, Any]:
|
|
62
|
+
if idx < 0 or idx >= self.total_rows:
|
|
63
|
+
raise IndexError(f'Index {idx} out of bounds for dataset length {self.total_rows}.')
|
|
64
|
+
|
|
65
|
+
file_idx = bisect.bisect_right(self.cum_sizes, idx)
|
|
66
|
+
local_idx = idx if file_idx == 0 else idx - self.cum_sizes[file_idx - 1]
|
|
67
|
+
|
|
68
|
+
if file_idx not in self._table_cache:
|
|
69
|
+
self._table_cache[file_idx] = pq.read_table(self.file_paths[file_idx], memory_map=True)
|
|
70
|
+
|
|
71
|
+
table = self._table_cache[file_idx]
|
|
72
|
+
|
|
73
|
+
content_str = table.column('content')[local_idx].as_py()
|
|
74
|
+
|
|
75
|
+
record: Dict[str, Any] = {
|
|
76
|
+
'content': content_str
|
|
77
|
+
}
|
|
78
|
+
|
|
79
|
+
if 'tag' in table.column_names:
|
|
80
|
+
record['tag'] = table.column('tag')[local_idx].as_py()
|
|
81
|
+
|
|
82
|
+
if self.tokenizer is not None:
|
|
83
|
+
record['input_ids'] = self.tokenizer.encode(content_str)
|
|
84
|
+
|
|
85
|
+
return record
|
|
@@ -5,7 +5,6 @@ import math
|
|
|
5
5
|
from dataclasses import dataclass
|
|
6
6
|
from typing import Optional, Tuple
|
|
7
7
|
|
|
8
|
-
from torch.nn.attention import SDPBackend, sdpa_kernel
|
|
9
8
|
|
|
10
9
|
@dataclass
|
|
11
10
|
class AttentionOutput:
|
|
@@ -77,21 +76,16 @@ def apply_attention(
|
|
|
77
76
|
is_causal = True
|
|
78
77
|
|
|
79
78
|
try:
|
|
80
|
-
|
|
81
|
-
|
|
82
|
-
|
|
83
|
-
|
|
84
|
-
|
|
85
|
-
|
|
86
|
-
|
|
87
|
-
|
|
88
|
-
attn_mask=attention_mask,
|
|
89
|
-
is_causal=is_causal,
|
|
90
|
-
dropout_p=dropout
|
|
91
|
-
)
|
|
79
|
+
output = F.scaled_dot_product_attention(
|
|
80
|
+
query_states,
|
|
81
|
+
key_states,
|
|
82
|
+
value_states,
|
|
83
|
+
attn_mask=attention_mask,
|
|
84
|
+
is_causal=is_causal,
|
|
85
|
+
dropout_p=dropout
|
|
86
|
+
)
|
|
92
87
|
return AttentionOutput(output=output, attention_weights=None)
|
|
93
|
-
except RuntimeError:
|
|
94
|
-
pass
|
|
88
|
+
except RuntimeError: pass
|
|
95
89
|
# Manual Fallback Path
|
|
96
90
|
d_k = query_states.size(-1)
|
|
97
91
|
scores = torch.matmul(query_states, key_states.transpose(-2, -1)) / math.sqrt(d_k)
|
|
@@ -37,10 +37,10 @@ class TokenizerTrainerResult:
|
|
|
37
37
|
core_tokens = ['[pad]', '[unk]', '[sep]', '[cls]']
|
|
38
38
|
chat_tokens = [
|
|
39
39
|
'[im_start]', '[im_end]',
|
|
40
|
-
'[system]', '[user]', '[model]', '[tool]',
|
|
40
|
+
'[system]', '[user]', '[model]', '[tool]',
|
|
41
41
|
'[interruption]', '[fim]',
|
|
42
42
|
]
|
|
43
|
-
reasoning_tokens = ['[cot_start]', '[cot_end]'
|
|
43
|
+
reasoning_tokens = ['[cot_start]', '[cot_end]']
|
|
44
44
|
code_tokens = ['[fim_pre]', '[fim_mid]', '[fim_suf]']
|
|
45
45
|
tool_tokens = ['[tool_start]', '[tool_name]', '[tool_args]', '[tool_end]']
|
|
46
46
|
|
|
@@ -59,7 +59,6 @@ base_special_tokens = (
|
|
|
59
59
|
)
|
|
60
60
|
|
|
61
61
|
base_special_tokens += [f'[unused_{i}]' for i in range(len(base_special_tokens), 64)]
|
|
62
|
-
base_special_tokens += [f'[mask_{i}]' for i in range(32)]
|
|
63
62
|
|
|
64
63
|
chat_template = (
|
|
65
64
|
"{% for message in messages %}"
|
|
@@ -83,8 +82,6 @@ chat_template = (
|
|
|
83
82
|
"{{ '[model]' }}"
|
|
84
83
|
"{% elif message['role'] == 'tool' %}"
|
|
85
84
|
"{{ '[tool]' }}"
|
|
86
|
-
"{% elif message['role'] == 'train' %}"
|
|
87
|
-
"{{ '[train]' }}"
|
|
88
85
|
"{% else %}"
|
|
89
86
|
"{{ message['role'] }}"
|
|
90
87
|
"{% endif %}"
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|