ommlds 0.0.0.dev472__py3-none-any.whl → 0.0.0.dev473__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of ommlds might be problematic. Click here for more details.

@@ -468,7 +468,7 @@
468
468
  "!.minichain.registries.manifests.RegistryManifest": {
469
469
  "module": "ommlds.minichain.backends.impls.tinygrad.chat",
470
470
  "attr": "TinygradLlama3ChatChoicesService",
471
- "name": "tinygrad_llama3",
471
+ "name": "tinygrad-llama3",
472
472
  "aliases": null,
473
473
  "type": "ChatChoicesService"
474
474
  }
@@ -483,7 +483,7 @@
483
483
  "!.minichain.registries.manifests.RegistryManifest": {
484
484
  "module": "ommlds.minichain.backends.impls.tinygrad.chat",
485
485
  "attr": "TinygradLlama3ChatChoicesStreamService",
486
- "name": "tinygrad_llama3",
486
+ "name": "tinygrad-llama3",
487
487
  "aliases": null,
488
488
  "type": "ChatChoicesStreamService"
489
489
  }
@@ -500,7 +500,7 @@
500
500
  "ChatChoicesService",
501
501
  "ChatChoicesStreamService"
502
502
  ],
503
- "backend_name": "tinygrad_llama3",
503
+ "backend_name": "tinygrad-llama3",
504
504
  "model_names": null
505
505
  }
506
506
  }
ommlds/__about__.py CHANGED
@@ -41,6 +41,10 @@ class Project(ProjectBase):
41
41
  'datasets ~= 4.3',
42
42
  ],
43
43
 
44
+ 'nanochat': [
45
+ 'regex >= 2025.0',
46
+ ],
47
+
44
48
  'numpy': [
45
49
  'numpy >= 1.26',
46
50
  ],
@@ -76,6 +80,8 @@ class Project(ProjectBase):
76
80
 
77
81
 
78
82
  class Setuptools(SetuptoolsBase):
83
+ rs = True
84
+
79
85
  find_packages = {
80
86
  'include': [Project.name, f'{Project.name}.*'],
81
87
  'exclude': [*SetuptoolsBase.find_packages['exclude']],
ommlds/cli/main.py CHANGED
@@ -191,7 +191,7 @@ def _main(args: ta.Any = None) -> None:
191
191
  _a_main,
192
192
  args,
193
193
  ),
194
- backend='asyncio',
194
+ # backend='trio',
195
195
  ) # noqa
196
196
 
197
197
 
@@ -2,6 +2,7 @@ import typing as ta
2
2
 
3
3
  from omlish import inject as inj
4
4
  from omlish import lang
5
+ from omlish import typedvalues as tv
5
6
 
6
7
  from ..... import minichain as mc
7
8
  from .injection import backend_configs
@@ -41,7 +42,7 @@ def bind_backends(
41
42
  async def inner(be: 'mc.BackendCatalog.Backend', cfgs: _types.BackendConfigs | None) -> ta.Any:
42
43
  kwt = inj.build_kwargs_target(be.factory, non_strict=True)
43
44
  kw = await injector.provide_kwargs(kwt)
44
- return be.factory(*cfgs or [], **kw)
45
+ return be.factory(*tv.collect(*(be.configs or []), *(cfgs or []), override=True), **kw)
45
46
 
46
47
  return _catalog.CatalogBackendProvider.Instantiator(inner)
47
48
 
@@ -52,7 +52,7 @@ class AnthropicChatChoicesStreamService:
52
52
  self._model_name = cc.pop(AnthropicChatChoicesService.DEFAULT_MODEL_NAME)
53
53
  self._api_key = check.not_none(ApiKey.pop_secret(cc, env='ANTHROPIC_API_KEY'))
54
54
 
55
- READ_CHUNK_SIZE = 64 * 1024
55
+ READ_CHUNK_SIZE: ta.ClassVar[int] = -1
56
56
 
57
57
  async def invoke(
58
58
  self,
@@ -125,7 +125,7 @@ class GoogleChatChoicesStreamService:
125
125
  AiMessage: 'model',
126
126
  }
127
127
 
128
- READ_CHUNK_SIZE = 64 * 1024
128
+ READ_CHUNK_SIZE: ta.ClassVar[int] = -1
129
129
 
130
130
  async def invoke(
131
131
  self,
@@ -146,7 +146,7 @@ class OllamaChatChoicesService(BaseOllamaChatChoicesService):
146
146
  # )
147
147
  @static_check_is_chat_choices_stream_service
148
148
  class OllamaChatChoicesStreamService(BaseOllamaChatChoicesService):
149
- READ_CHUNK_SIZE = 64 * 1024
149
+ READ_CHUNK_SIZE: ta.ClassVar[int] = -1
150
150
 
151
151
  async def invoke(
152
152
  self,
@@ -54,7 +54,7 @@ class OpenaiChatChoicesStreamService:
54
54
  self._model_name = cc.pop(OpenaiChatChoicesService.DEFAULT_MODEL_NAME)
55
55
  self._api_key = ApiKey.pop_secret(cc, env='OPENAI_API_KEY')
56
56
 
57
- READ_CHUNK_SIZE = 64 * 1024
57
+ READ_CHUNK_SIZE: ta.ClassVar[int] = -1
58
58
 
59
59
  async def invoke(self, request: ChatChoicesStreamRequest) -> ChatChoicesStreamResponse:
60
60
  # check.isinstance(request, ChatRequest)
@@ -113,7 +113,7 @@ class BaseTinygradLlama3ChatService(lang.ExitStacked, lang.Abstract):
113
113
 
114
114
 
115
115
  # @omlish-manifest $.minichain.registries.manifests.RegistryManifest(
116
- # name='tinygrad_llama3',
116
+ # name='tinygrad-llama3',
117
117
  # type='ChatChoicesService',
118
118
  # )
119
119
  @static_check_is_chat_choices_service
@@ -133,7 +133,7 @@ class TinygradLlama3ChatChoicesService(BaseTinygradLlama3ChatService):
133
133
 
134
134
 
135
135
  # @omlish-manifest $.minichain.registries.manifests.RegistryManifest(
136
- # name='tinygrad_llama3',
136
+ # name='tinygrad-llama3',
137
137
  # type='ChatChoicesStreamService',
138
138
  # )
139
139
  @static_check_is_chat_choices_stream_service
@@ -168,5 +168,5 @@ class TinygradLlama3ChatChoicesStreamService(BaseTinygradLlama3ChatService):
168
168
  # 'ChatChoicesService',
169
169
  # 'ChatChoicesStreamService',
170
170
  # ],
171
- # 'tinygrad_llama3',
171
+ # 'tinygrad-llama3',
172
172
  # )
@@ -56,7 +56,7 @@ _REPO_MODEL_PAT = re.compile(
56
56
  def parse_backend_string(s: str) -> ParsedBackendString:
57
57
  backend: str | None
58
58
  if ':' in s:
59
- backend, s = s.split(':')
59
+ backend, _, s = s.partition(':')
60
60
  else:
61
61
  backend = None
62
62
 
@@ -108,7 +108,10 @@ class ManifestBackendStringResolver(BackendStringResolver):
108
108
 
109
109
  mn: str | None = mdl.name
110
110
 
111
- if mn == m.backend_name:
111
+ if args.parsed.backend == m.backend_name and mn is not None:
112
+ pass
113
+
114
+ elif mn == m.backend_name:
112
115
  if m.model_names is not None:
113
116
  mn = m.model_names.resolved_default
114
117
  else:
@@ -125,17 +125,18 @@ class _StreamServiceResponse(StreamResponseIterator[V, OutputT]):
125
125
  return
126
126
  if self._cr.cr_running or self._cr.cr_suspended:
127
127
  cex = StreamServiceCancelledError()
128
- for i in itertools.count():
128
+ i = None
129
+ for n in itertools.count():
129
130
  try:
130
- if not i:
131
+ if not n:
131
132
  x = self._g.throw(cex)
132
133
  else:
133
- x = self._g.send(None)
134
+ x = self._g.send(i)
134
135
  except StreamServiceCancelledError as cex2:
135
136
  if cex2 is cex:
136
137
  break
137
138
  raise
138
- yield x
139
+ i = yield x
139
140
  if self._cr.cr_running:
140
141
  raise RuntimeError(f'Coroutine {self._cr!r} not terminated')
141
142
  if self._g is not self._a:
@@ -155,9 +156,10 @@ class _StreamServiceResponse(StreamResponseIterator[V, OutputT]):
155
156
  @types.coroutine
156
157
  def _anext(self):
157
158
  check.state(self._state == 'running')
159
+ i = None
158
160
  while True:
159
161
  try:
160
- x = self._g.send(None)
162
+ x = self._g.send(i)
161
163
  except StopIteration as e:
162
164
  if e.value is not None:
163
165
  self._outputs = tv.TypedValues(*check.isinstance(e.value, ta.Sequence))
@@ -170,7 +172,7 @@ class _StreamServiceResponse(StreamResponseIterator[V, OutputT]):
170
172
  x.done = True
171
173
  return x.value
172
174
 
173
- yield x
175
+ i = yield x
174
176
 
175
177
  async def __anext__(self) -> V:
176
178
  return await self._anext()
@@ -0,0 +1,21 @@
1
+ MIT License
2
+
3
+ Copyright (c) 2025 Andrej Karpathy
4
+
5
+ Permission is hereby granted, free of charge, to any person obtaining a copy
6
+ of this software and associated documentation files (the "Software"), to deal
7
+ in the Software without restriction, including without limitation the rights
8
+ to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
9
+ copies of the Software, and to permit persons to whom the Software is
10
+ furnished to do so, subject to the following conditions:
11
+
12
+ The above copyright notice and this permission notice shall be included in all
13
+ copies or substantial portions of the Software.
14
+
15
+ THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16
+ IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17
+ FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
18
+ AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19
+ LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
20
+ OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
21
+ SOFTWARE.
File without changes
@@ -0,0 +1,21 @@
1
+ MIT License
2
+
3
+ Copyright (c) 2025 Andrej Karpathy
4
+
5
+ Permission is hereby granted, free of charge, to any person obtaining a copy
6
+ of this software and associated documentation files (the "Software"), to deal
7
+ in the Software without restriction, including without limitation the rights
8
+ to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
9
+ copies of the Software, and to permit persons to whom the Software is
10
+ furnished to do so, subject to the following conditions:
11
+
12
+ The above copyright notice and this permission notice shall be included in all
13
+ copies or substantial portions of the Software.
14
+
15
+ THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16
+ IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17
+ FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
18
+ AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19
+ LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
20
+ OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
21
+ SOFTWARE.
@@ -0,0 +1,406 @@
1
+ # https://github.com/karpathy/nanochat/tree/9467d83cf23dcc9a9b4ca6e35103142f48a55b27
2
+ """
3
+ BPE Tokenizer in the style of GPT-4.
4
+
5
+ Two implementations are available:
6
+ 1) HuggingFace Tokenizer that can do both training and inference but is really confusing
7
+ 2) Our own RustBPE Tokenizer for training and tiktoken for efficient inference
8
+ """
9
+ import copy
10
+ import os
11
+ import pickle
12
+ import typing as ta
13
+
14
+ from omlish import check
15
+ from omlish import collections as col
16
+ from omlish import lang
17
+
18
+
19
+ with lang.auto_proxy_import(globals()):
20
+ import tiktoken
21
+ import tokenizers
22
+
23
+
24
+ rustbpe: ta.Any = lang.proxy_import('.rustbpe', __package__)
25
+
26
+
27
+ ##
28
+
29
+
30
+ SPECIAL_TOKENS = [
31
+ # every document begins with the Beginning of Sequence (BOS) token that delimits documents
32
+ '<|bos|>',
33
+ # tokens below are only used during finetuning to render Conversations into token ids
34
+ '<|user_start|>', # user messages
35
+ '<|user_end|>',
36
+ '<|assistant_start|>', # assistant messages
37
+ '<|assistant_end|>',
38
+ '<|python_start|>', # assistant invokes python REPL tool
39
+ '<|python_end|>',
40
+ '<|output_start|>', # python REPL outputs back to assistant
41
+ '<|output_end|>',
42
+ ]
43
+
44
+
45
+ # NOTE: this split pattern deviates from GPT-4 in that we use \p{N}{1,2} instead of \p{N}{1,3}
46
+ # I did this because I didn't want to "waste" too many tokens on numbers for smaller vocab sizes.
47
+ # I haven't validated that this is actually a good idea, TODO.
48
+ SPLIT_PATTERN = r"""'(?i:[sdmt]|ll|ve|re)|[^\r\n\p{L}\p{N}]?+\p{L}+|\p{N}{1,2}| ?[^\s\p{L}\p{N}]++[\r\n]*|\s*[\r\n]|\s+(?!\S)|\s+""" # noqa
49
+
50
+
51
+ # -----------------------------------------------------------------------------
52
+ # Generic GPT-4-style tokenizer based on HuggingFace Tokenizer
53
+
54
+
55
+ class HuggingFaceTokenizer:
56
+ """Light wrapper around HuggingFace Tokenizer for some utilities"""
57
+
58
+ def __init__(self, tokenizer):
59
+ self.tokenizer = tokenizer
60
+
61
+ @classmethod
62
+ def from_pretrained(cls, hf_path):
63
+ # init from a HuggingFace pretrained tokenizer (e.g. "gpt2")
64
+ tokenizer = tokenizers.Tokenizer.from_pretrained(hf_path)
65
+ return cls(tokenizer)
66
+
67
+ @classmethod
68
+ def from_directory(cls, tokenizer_dir):
69
+ # init from a local directory on disk (e.g. "out/tokenizer")
70
+ tokenizer_path = os.path.join(tokenizer_dir, 'tokenizer.json')
71
+ tokenizer = tokenizers.Tokenizer.from_file(tokenizer_path)
72
+ return cls(tokenizer)
73
+
74
+ @classmethod
75
+ def train_from_iterator(
76
+ cls,
77
+ text_iterator,
78
+ vocab_size,
79
+ *,
80
+ split_pattern=SPLIT_PATTERN,
81
+ special_tokens=SPECIAL_TOKENS,
82
+ ):
83
+ # train from an iterator of text
84
+ # Configure the HuggingFace Tokenizer
85
+ tokenizer = tokenizers.Tokenizer(tokenizers.models.BPE(
86
+ byte_fallback=True, # needed!
87
+ unk_token=None,
88
+ fuse_unk=False,
89
+ ))
90
+ # Normalizer: None
91
+ tokenizer.normalizer = None
92
+ # Pre-tokenizer: GPT-4 style
93
+ # the regex pattern used by GPT-4 to split text into groups before BPE
94
+ # NOTE: The pattern was changed from \p{N}{1,3} to \p{N}{1,2} because I suspect it is harmful to
95
+ # very small models and smaller vocab sizes, because it is a little bit wasteful in the token space.
96
+ # (but I haven't validated this! TODO)
97
+ gpt4_split_regex = tokenizers.Regex(split_pattern) # huggingface demands that you wrap it in Regex!!
98
+ tokenizer.pre_tokenizer = tokenizers.pre_tokenizers.Sequence([
99
+ tokenizers.pre_tokenizers.Split(pattern=gpt4_split_regex, behavior='isolated', invert=False),
100
+ tokenizers.pre_tokenizers.ByteLevel(add_prefix_space=False, use_regex=False),
101
+ ])
102
+ # Decoder: ByteLevel (it pairs together with the ByteLevel pre-tokenizer)
103
+ tokenizer.decoder = tokenizers.decoders.ByteLevel()
104
+ # Post-processor: None
105
+ tokenizer.post_processor = None
106
+ # Trainer: BPE
107
+ trainer = tokenizers.trainers.BpeTrainer(
108
+ vocab_size=vocab_size,
109
+ show_progress=True,
110
+ min_frequency=0, # no minimum frequency
111
+ initial_alphabet=tokenizers.pre_tokenizers.ByteLevel.alphabet(),
112
+ special_tokens=special_tokens,
113
+ )
114
+ # Kick off the training
115
+ tokenizer.train_from_iterator(text_iterator, trainer)
116
+ return cls(tokenizer)
117
+
118
+ def encode_ordinary(self, text):
119
+ ids = self.tokenizer.encode(text, add_special_tokens=False).ids
120
+ return ids
121
+
122
+ def get_vocab_size(self):
123
+ return self.tokenizer.get_vocab_size()
124
+
125
+ def get_special_tokens(self):
126
+ special_tokens_map = self.tokenizer.get_added_tokens_decoder()
127
+ special_tokens = [w.content for w in special_tokens_map.values()]
128
+ return special_tokens
129
+
130
+ def id_to_token(self, id): # noqa
131
+ return self.tokenizer.id_to_token(id)
132
+
133
+ def _encode_one(self, text, prepend=None, append=None):
134
+ # encode a single string
135
+ # prepend/append can be either a string of a special token or a token id directly.
136
+ check.isinstance(text, str)
137
+ ids = []
138
+ if prepend is not None:
139
+ prepend_id = prepend if isinstance(prepend, int) else self.encode_special(prepend)
140
+ ids.append(prepend_id)
141
+ ids.extend(self.tokenizer.encode(text, add_special_tokens=False).ids)
142
+ if append is not None:
143
+ append_id = append if isinstance(append, int) else self.encode_special(append)
144
+ ids.append(append_id)
145
+ return ids
146
+
147
+ def encode_special(self, text):
148
+ # encode a single special token via exact match
149
+ return self.tokenizer.token_to_id(text)
150
+
151
+ def get_bos_token_id(self):
152
+ bos = self.encode_special('<|bos|>')
153
+ return bos
154
+
155
+ def encode(self, text, *args, **kwargs):
156
+ if isinstance(text, str):
157
+ return self._encode_one(text, *args, **kwargs)
158
+ elif isinstance(text, list):
159
+ return [self._encode_one(t, *args, **kwargs) for t in text]
160
+ else:
161
+ raise ValueError(f'Invalid input type: {type(text)}') # noqa
162
+
163
+ def __call__(self, *args, **kwargs):
164
+ return self.encode(*args, **kwargs)
165
+
166
+ def decode(self, ids):
167
+ return self.tokenizer.decode(ids, skip_special_tokens=False)
168
+
169
+ def save(self, tokenizer_dir):
170
+ # save the tokenizer to disk
171
+ os.makedirs(tokenizer_dir, exist_ok=True)
172
+ tokenizer_path = os.path.join(tokenizer_dir, 'tokenizer.json')
173
+ self.tokenizer.save(tokenizer_path)
174
+ print(f'Saved tokenizer to {tokenizer_path}')
175
+
176
+
177
+ # -----------------------------------------------------------------------------
178
+ # Tokenizer based on rustbpe + tiktoken combo
179
+
180
+
181
+ class RustBPETokenizer:
182
+ """Light wrapper around tiktoken (for efficient inference) but train with rustbpe"""
183
+
184
+ def __init__(self, enc, bos_token):
185
+ self.enc = enc
186
+ self.bos_token_id = self.encode_special(bos_token)
187
+
188
+ @classmethod
189
+ def train_from_iterator(cls, text_iterator, vocab_size):
190
+ # 1) train using rustbpe
191
+ tokenizer = rustbpe.Tokenizer()
192
+ # the special tokens are inserted later in __init__, we don't train them here
193
+ vocab_size_no_special = vocab_size - len(SPECIAL_TOKENS)
194
+ check.state(vocab_size_no_special >= 256, f'vocab_size_no_special must be at least 256, got {vocab_size_no_special}') # noqa
195
+ tokenizer.train_from_iterator(text_iterator, vocab_size_no_special, pattern=SPLIT_PATTERN)
196
+ # 2) construct the associated tiktoken encoding for inference
197
+ pattern = tokenizer.get_pattern()
198
+ mergeable_ranks_list = tokenizer.get_mergeable_ranks()
199
+ mergeable_ranks = {bytes(k): v for k, v in mergeable_ranks_list}
200
+ tokens_offset = len(mergeable_ranks)
201
+ special_tokens = {name: tokens_offset + i for i, name in enumerate(SPECIAL_TOKENS)}
202
+ enc = tiktoken.Encoding(
203
+ name='rustbpe',
204
+ pat_str=pattern,
205
+ mergeable_ranks=mergeable_ranks, # dict[bytes, int] (token bytes -> merge priority rank)
206
+ special_tokens=special_tokens, # dict[str, int] (special token name -> token id)
207
+ )
208
+ return cls(enc, '<|bos|>')
209
+
210
+ @classmethod
211
+ def from_directory(cls, tokenizer_dir):
212
+ pickle_path = os.path.join(tokenizer_dir, 'tokenizer.pkl')
213
+ with open(pickle_path, 'rb') as f:
214
+ enc = pickle.load(f) # noqa
215
+ return cls(enc, '<|bos|>')
216
+
217
+ @classmethod
218
+ def from_pretrained(cls, tiktoken_name):
219
+ # https://github.com/openai/tiktoken/blob/eedc8563/tiktoken_ext/openai_public.py
220
+ enc = tiktoken.get_encoding(tiktoken_name)
221
+ # tiktoken calls the special document delimiter token "<|endoftext|>"
222
+ # yes this is confusing because this token is almost always PREPENDED to the beginning of the document
223
+ # it most often is used to signal the start of a new sequence to the LLM during inference etc.
224
+ # so in nanoChat we always use "<|bos|>" short for "beginning of sequence", but historically it is often called
225
+ # "<|endoftext|>".
226
+ return cls(enc, '<|endoftext|>')
227
+
228
+ def get_vocab_size(self):
229
+ return self.enc.n_vocab
230
+
231
+ def get_special_tokens(self):
232
+ return self.enc.special_tokens_set
233
+
234
+ def id_to_token(self, id): # noqa
235
+ return self.enc.decode([id])
236
+
237
+ @col.cache.cache(max_size=32)
238
+ def encode_special(self, text):
239
+ return self.enc.encode_single_token(text)
240
+
241
+ def get_bos_token_id(self):
242
+ return self.bos_token_id
243
+
244
+ def encode(self, text, prepend=None, append=None, num_threads=8):
245
+ # text can be either a string or a list of strings
246
+
247
+ if prepend is not None:
248
+ prepend_id = prepend if isinstance(prepend, int) else self.encode_special(prepend)
249
+ if append is not None:
250
+ append_id = append if isinstance(append, int) else self.encode_special(append)
251
+
252
+ if isinstance(text, str):
253
+ ids = self.enc.encode_ordinary(text)
254
+ if prepend is not None:
255
+ ids.insert(0, prepend_id) # TODO: slightly inefficient here? :( hmm
256
+ if append is not None:
257
+ ids.append(append_id)
258
+ elif isinstance(text, list):
259
+ ids = self.enc.encode_ordinary_batch(text, num_threads=num_threads)
260
+ if prepend is not None:
261
+ for ids_row in ids:
262
+ ids_row.insert(0, prepend_id) # TODO: same
263
+ if append is not None:
264
+ for ids_row in ids:
265
+ ids_row.append(append_id)
266
+ else:
267
+ raise ValueError(f'Invalid input type: {type(text)}') # noqa
268
+
269
+ return ids
270
+
271
+ def __call__(self, *args, **kwargs):
272
+ return self.encode(*args, **kwargs)
273
+
274
+ def decode(self, ids):
275
+ return self.enc.decode(ids)
276
+
277
+ def save(self, tokenizer_dir):
278
+ # save the encoding object to disk
279
+ os.makedirs(tokenizer_dir, exist_ok=True)
280
+ pickle_path = os.path.join(tokenizer_dir, 'tokenizer.pkl')
281
+ with open(pickle_path, 'wb') as f:
282
+ pickle.dump(self.enc, f)
283
+ print(f'Saved tokenizer encoding to {pickle_path}')
284
+
285
+ def render_conversation(self, conversation, max_tokens=2048):
286
+ """
287
+ Tokenize a single Chat conversation (which we call a "doc" or "document" here).
288
+ Returns:
289
+ - ids: list[int] is a list of token ids of this rendered conversation
290
+ - mask: list[int] of same length, mask = 1 for tokens that the Assistant is expected to train on.
291
+ """
292
+
293
+ # ids, masks that we will return and a helper function to help build them up.
294
+ ids, mask = [], []
295
+
296
+ def add_tokens(token_ids, mask_val):
297
+ if isinstance(token_ids, int):
298
+ token_ids = [token_ids]
299
+ ids.extend(token_ids)
300
+ mask.extend([mask_val] * len(token_ids))
301
+
302
+ # sometimes the first message is a system message...
303
+ # => just merge it with the second (user) message
304
+ if conversation['messages'][0]['role'] == 'system':
305
+ # some conversation surgery is necessary here for now...
306
+ conversation = copy.deepcopy(conversation) # avoid mutating the original
307
+ messages = conversation['messages']
308
+ check.state(messages[1]['role'] == 'user', 'System message must be followed by a user message')
309
+ messages[1]['content'] = messages[0]['content'] + '\n\n' + messages[1]['content']
310
+ messages = messages[1:]
311
+ else:
312
+ messages = conversation['messages']
313
+ check.state(len(messages) >= 1, f'Conversation has less than 1 message: {messages}')
314
+
315
+ # fetch all the special tokens we need
316
+ bos = self.get_bos_token_id()
317
+ user_start, user_end = self.encode_special('<|user_start|>'), self.encode_special('<|user_end|>')
318
+ assistant_start, assistant_end = self.encode_special('<|assistant_start|>'), self.encode_special('<|assistant_end|>') # noqa
319
+ python_start, python_end = self.encode_special('<|python_start|>'), self.encode_special('<|python_end|>')
320
+ output_start, output_end = self.encode_special('<|output_start|>'), self.encode_special('<|output_end|>')
321
+
322
+ # now we can tokenize the conversation
323
+ add_tokens(bos, 0)
324
+ for i, message in enumerate(messages):
325
+ # some sanity checking here around assumptions, to prevent footguns
326
+ must_be_from = 'user' if i % 2 == 0 else 'assistant'
327
+ check.state(message['role'] == must_be_from, f"Message {i} is from {message['role']} but should be from {must_be_from}") # noqa
328
+
329
+ # content can be either a simple string or a list of parts (e.g. containing tool calls)
330
+ content = message['content']
331
+
332
+ if message['role'] == 'user':
333
+ check.isinstance(content, str), 'User messages are simply expected to be strings'
334
+ value_ids = self.encode(content)
335
+ add_tokens(user_start, 0)
336
+ add_tokens(value_ids, 0)
337
+ add_tokens(user_end, 0)
338
+ elif message['role'] == 'assistant':
339
+ add_tokens(assistant_start, 0)
340
+ if isinstance(content, str):
341
+ # simple string => simply add the tokens
342
+ value_ids = self.encode(content)
343
+ add_tokens(value_ids, 1)
344
+ elif isinstance(content, list):
345
+ for part in content:
346
+ value_ids = self.encode(part['text'])
347
+ if part['type'] == 'text':
348
+ # string part => simply add the tokens
349
+ add_tokens(value_ids, 1)
350
+ elif part['type'] == 'python':
351
+ # python tool call => add the tokens inside <|python_start|> and <|python_end|>
352
+ add_tokens(python_start, 1)
353
+ add_tokens(value_ids, 1)
354
+ add_tokens(python_end, 1)
355
+ elif part['type'] == 'python_output':
356
+ # python output => add the tokens inside <|output_start|> and <|output_end|>
357
+ # none of these tokens are supervised because the tokens come from Python at test time
358
+ add_tokens(output_start, 0)
359
+ add_tokens(value_ids, 0)
360
+ add_tokens(output_end, 0)
361
+ else:
362
+ raise ValueError(f"Unknown part type: {part['type']}")
363
+ else:
364
+ raise ValueError(f'Unknown content type: {type(content)}')
365
+ add_tokens(assistant_end, 1)
366
+
367
+ # truncate to max_tokens tokens MAX (helps prevent OOMs)
368
+ ids = ids[:max_tokens]
369
+ mask = mask[:max_tokens]
370
+ return ids, mask
371
+
372
+ def visualize_tokenization(self, ids, mask, with_token_id=False):
373
+ """Small helper function useful in debugging: visualize the tokenization of render_conversation"""
374
+
375
+ red = '\033[91m'
376
+ green = '\033[92m'
377
+ reset = '\033[0m'
378
+ gray = '\033[90m'
379
+ tokens = []
380
+ for i, (token_id, mask_val) in enumerate(zip(ids, mask)): # noqa
381
+ token_str = self.decode([token_id])
382
+ color = green if mask_val == 1 else red
383
+ tokens.append(f'{color}{token_str}{reset}')
384
+ if with_token_id:
385
+ tokens.append(f'{gray}({token_id}){reset}')
386
+ return '|'.join(tokens)
387
+
388
+ def render_for_completion(self, conversation):
389
+ """
390
+ Used during Reinforcement Learning. In that setting, we want to render the conversation priming the Assistant
391
+ for a completion. Unlike the Chat SFT case, we don't need to return the mask.
392
+ """
393
+
394
+ # We have some surgery to do: we need to pop the last message (of the Assistant)
395
+ conversation = copy.deepcopy(conversation) # avoid mutating the original
396
+ messages = conversation['messages']
397
+ check.state(messages[-1]['role'] == 'assistant', 'Last message must be from the Assistant')
398
+ messages.pop() # remove the last message (of the Assistant) inplace
399
+
400
+ # Now tokenize the conversation
401
+ ids, mask = self.render_conversation(conversation)
402
+
403
+ # Finally, to prime the Assistant for a completion, append the Assistant start token
404
+ assistant_start = self.encode_special('<|assistant_start|>')
405
+ ids.append(assistant_start)
406
+ return ids
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: ommlds
3
- Version: 0.0.0.dev472
3
+ Version: 0.0.0.dev473
4
4
  Summary: ommlds
5
5
  Author: wrmsr
6
6
  License-Expression: BSD-3-Clause
@@ -14,8 +14,8 @@ Classifier: Programming Language :: Python :: 3.13
14
14
  Requires-Python: >=3.13
15
15
  Description-Content-Type: text/markdown
16
16
  License-File: LICENSE
17
- Requires-Dist: omdev==0.0.0.dev472
18
- Requires-Dist: omlish==0.0.0.dev472
17
+ Requires-Dist: omdev==0.0.0.dev473
18
+ Requires-Dist: omlish==0.0.0.dev473
19
19
  Provides-Extra: all
20
20
  Requires-Dist: llama-cpp-python~=0.3; extra == "all"
21
21
  Requires-Dist: mlx~=0.29; extra == "all"
@@ -28,6 +28,7 @@ Requires-Dist: transformers~=4.57; extra == "all"
28
28
  Requires-Dist: sentence-transformers~=5.1; extra == "all"
29
29
  Requires-Dist: huggingface-hub~=0.36; extra == "all"
30
30
  Requires-Dist: datasets~=4.3; extra == "all"
31
+ Requires-Dist: regex>=2025.0; extra == "all"
31
32
  Requires-Dist: numpy>=1.26; extra == "all"
32
33
  Requires-Dist: pytesseract~=0.3; extra == "all"
33
34
  Requires-Dist: rapidocr-onnxruntime~=1.4; extra == "all"
@@ -49,6 +50,8 @@ Requires-Dist: sentence-transformers~=5.1; extra == "backends"
49
50
  Provides-Extra: huggingface
50
51
  Requires-Dist: huggingface-hub~=0.36; extra == "huggingface"
51
52
  Requires-Dist: datasets~=4.3; extra == "huggingface"
53
+ Provides-Extra: nanochat
54
+ Requires-Dist: regex>=2025.0; extra == "nanochat"
52
55
  Provides-Extra: numpy
53
56
  Requires-Dist: numpy>=1.26; extra == "numpy"
54
57
  Provides-Extra: ocr
@@ -1,5 +1,5 @@
1
- ommlds/.omlish-manifests.json,sha256=nVhqSv9iQ2zu3MpOtJC2mFH5ORyzIC2K8kCAwDEdzGE,21555
2
- ommlds/__about__.py,sha256=t2rQF0yXpWFcCb2dvgzGR3I35HKGvGSn-EfhaUWVl5s,1759
1
+ ommlds/.omlish-manifests.json,sha256=jsNWNqNQTjpxr-irqzy0dWpqeZH9CsD0SFY3OgIGuZ4,21555
2
+ ommlds/__about__.py,sha256=v41mIGxCcZVApXGD-CSqkYDT9wXorPjnU611ejWJjaQ,1839
3
3
  ommlds/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
4
4
  ommlds/_hacks/__init__.py,sha256=ajfw7dMKH8UuloeQ5MSxWwgAmdWf2v8gm-K3uLP9wtY,196
5
5
  ommlds/_hacks/funcs.py,sha256=8XseIblP7yolDUD7WQSGn1LP90IQzByVejSzphAPDyM,2861
@@ -86,7 +86,7 @@ ommlds/backends/transformers/streamers.py,sha256=Hu_9lp_kUilKjOfs7Ixqr2NoA5FuRn2
86
86
  ommlds/cli/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
87
87
  ommlds/cli/__main__.py,sha256=1ffCb0fcUOJMzxROJmJRXQ8PSOVYv7KrcuBtT95cf0c,140
88
88
  ommlds/cli/inject.py,sha256=WhTDabJz9b1NRRHVH-UyVN5nj6UncvIeTvgkGrcE9vc,666
89
- ommlds/cli/main.py,sha256=dTBJSxWdsS1pEqDY-vT_g2PC-aUkatVhq0XEW8zirQ0,5802
89
+ ommlds/cli/main.py,sha256=-cMFxZqK4XTHxaFxDk4fvtWLB6LZ6UGlmbN74L2ir-g,5801
90
90
  ommlds/cli/backends/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
91
91
  ommlds/cli/backends/inject.py,sha256=OVstNsoeVnprM9PBL_zP0N46KkoDg3_Wz90BWcQ7km4,1734
92
92
  ommlds/cli/backends/standard.py,sha256=HnammWyAXJHeqXJrAMBdarcT4Nyt2CxudZdD2fW_Y9M,631
@@ -100,7 +100,7 @@ ommlds/cli/sessions/chat/inject.py,sha256=7Yg6wUs2Oej4UjNZCAWCJCEsDJZWvT4G8XvkvV
100
100
  ommlds/cli/sessions/chat/session.py,sha256=eqwelLE74JFC-fBpk_hdwMD2nP4pLv3ZPwUn99200B8,521
101
101
  ommlds/cli/sessions/chat/backends/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
102
102
  ommlds/cli/sessions/chat/backends/catalog.py,sha256=hIY0L1zewuJX0_xxcMcy4gylSLiQENB3YxgYJEoKgrU,2109
103
- ommlds/cli/sessions/chat/backends/inject.py,sha256=v_kw6lOMo5XoAf2dyIld2oBjg7lVbS2ndmdPQv4F464,1558
103
+ ommlds/cli/sessions/chat/backends/inject.py,sha256=er03V-84B-wWt86o2HXXqCb6uFRKbfXfUZsB6GrzYOA,1646
104
104
  ommlds/cli/sessions/chat/backends/injection.py,sha256=GCn5OvNIEowgB70kQVuU84z3i8lLA4vOVkTZlQG8s0o,327
105
105
  ommlds/cli/sessions/chat/backends/types.py,sha256=5eImYHXLKqbC5MDrN443eMGamP9snCmV1n7LtAsqgPk,696
106
106
  ommlds/cli/sessions/chat/chat/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
@@ -180,14 +180,14 @@ ommlds/minichain/backends/impls/anthropic/__init__.py,sha256=47DEQpj8HBSa-_TImW-
180
180
  ommlds/minichain/backends/impls/anthropic/chat.py,sha256=-qGr_DZgGe-dr1AKb6WLtCq_I2E9635X1rQZSJqOb04,4318
181
181
  ommlds/minichain/backends/impls/anthropic/names.py,sha256=GPPeYt0CcDcDCR8I6BMd7bMjC_Zk_bjnLLpF9ClwXcg,1099
182
182
  ommlds/minichain/backends/impls/anthropic/protocol.py,sha256=whPVYuKShKiMCzasHl77sCIiymhzXj8mFZXEyhZvld8,3292
183
- ommlds/minichain/backends/impls/anthropic/stream.py,sha256=ePgAz1QyuUcqSLiJTmP2IeW7jZagOMYceWmnvTTHBa8,8779
183
+ ommlds/minichain/backends/impls/anthropic/stream.py,sha256=NNBFb0sMId9yWua3fkAMZ-qYhQN9nLrXiO4DViR77YI,8790
184
184
  ommlds/minichain/backends/impls/duckduckgo/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
185
185
  ommlds/minichain/backends/impls/duckduckgo/search.py,sha256=igzeU9P9b1MMiu4KAJVS9H6KLIoPm68wXi4Kx3_DHyQ,940
186
186
  ommlds/minichain/backends/impls/google/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
187
187
  ommlds/minichain/backends/impls/google/chat.py,sha256=lGb5blGLlcBlt9xeDZJvbh5SlV7fgfezd5_As_SPBXo,6499
188
188
  ommlds/minichain/backends/impls/google/names.py,sha256=HxHJ31HeKZg6aW1C_Anqp-gamCXpq9pOdKj8_yVgE8Y,871
189
189
  ommlds/minichain/backends/impls/google/search.py,sha256=y5_6seSRU8CFnLA_Ja8XEMbIBWSgwBzE1iBf-qyz0tA,3427
190
- ommlds/minichain/backends/impls/google/stream.py,sha256=lAxPZkFLkeBB8rNvnv5wEvK6aPqAfyAssukMA8QQz7s,8002
190
+ ommlds/minichain/backends/impls/google/stream.py,sha256=ITownhKSOJB4IG23wWZJepUImSM6vJsDMOM9W1STpwU,8013
191
191
  ommlds/minichain/backends/impls/google/tools.py,sha256=Tty0gsyx7-PbeoNqMuql_ewQ6q-ZsDaDdsD5ShinGVY,5089
192
192
  ommlds/minichain/backends/impls/huggingface/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
193
193
  ommlds/minichain/backends/impls/huggingface/configs.py,sha256=6jsBtPNXOP57PcpxNTVLGWLc-18Iwn_lDbGouwCJTIQ,258
@@ -200,18 +200,18 @@ ommlds/minichain/backends/impls/llamacpp/stream.py,sha256=uzrXr2HhshgFe3Z0g8KTPc
200
200
  ommlds/minichain/backends/impls/mlx/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
201
201
  ommlds/minichain/backends/impls/mlx/chat.py,sha256=sMlhgiFZrxAC-kKkLSJ6c-2uJn0IHZXH4EiPET_-CKI,7458
202
202
  ommlds/minichain/backends/impls/ollama/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
203
- ommlds/minichain/backends/impls/ollama/chat.py,sha256=3fuIAsIW20aEAOLLfM21d5ju27igr6N-3Lf9nNUWcoY,6598
203
+ ommlds/minichain/backends/impls/ollama/chat.py,sha256=agnJcOwJGebSiV5TG0UmVFYGCc7hEpt7FM73rtyF8gk,6609
204
204
  ommlds/minichain/backends/impls/openai/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
205
205
  ommlds/minichain/backends/impls/openai/chat.py,sha256=QcQZO78p4UUzI4QU1K-057OEZGKIYxjXENhkifsSuaI,2841
206
206
  ommlds/minichain/backends/impls/openai/completion.py,sha256=4Mi4Zvrq5fCqUd0asL3WiCbCdmxOdo0NFkoZMfdsYXY,1939
207
207
  ommlds/minichain/backends/impls/openai/embedding.py,sha256=BNtvKYLTsnQwQR9Tv3Fr8zCYN1kr1UNdJ15lcsjz6X0,1765
208
208
  ommlds/minichain/backends/impls/openai/format.py,sha256=teGX8mNU3sXNWP4YWGD8d59M4X9_r75ImSzfTJgtNCM,7351
209
209
  ommlds/minichain/backends/impls/openai/names.py,sha256=b74t8FwSbGEveVtVz4SqM5tiRDyTKNlUKlseV6AX3Yo,1211
210
- ommlds/minichain/backends/impls/openai/stream.py,sha256=pm-iUNjw5o94LrnrbAWttfbpxStnq0vDvrE7FV9fdsM,5399
210
+ ommlds/minichain/backends/impls/openai/stream.py,sha256=1kh_V_eu8QAY_i4ulfm22kbZeMiIDLwmDPJf7aaIikI,5410
211
211
  ommlds/minichain/backends/impls/sentencepiece/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
212
212
  ommlds/minichain/backends/impls/sentencepiece/tokens.py,sha256=tUEBKyBgkTowssS_AdcAuPkyFzfyDfE935x4JG8PXM0,1602
213
213
  ommlds/minichain/backends/impls/tinygrad/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
214
- ommlds/minichain/backends/impls/tinygrad/chat.py,sha256=EsxNflUhrc_ouFyIvpEhKxx-0DTZKRi-BJhyD7bAfmc,4916
214
+ ommlds/minichain/backends/impls/tinygrad/chat.py,sha256=Y3Lp08Sb0YUPAxEciexOUm0uyoJnhbH5pWT9buclx6Y,4916
215
215
  ommlds/minichain/backends/impls/tokenizers/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
216
216
  ommlds/minichain/backends/impls/tokenizers/tokens.py,sha256=_8Q49k5YroG5wQI0cuK6kOJ3XYwjhpaAS04ejhzBsWw,1500
217
217
  ommlds/minichain/backends/impls/transformers/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
@@ -220,8 +220,8 @@ ommlds/minichain/backends/impls/transformers/tokens.py,sha256=uS3-IWOJRUMBfPDVRr
220
220
  ommlds/minichain/backends/impls/transformers/transformers.py,sha256=laM8G2SAE6jUjnHkeZsbWxS2KJF4efi-35aBlRBzIsE,9053
221
221
  ommlds/minichain/backends/strings/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
222
222
  ommlds/minichain/backends/strings/manifests.py,sha256=kmlanVUAZqIh0P95Mm8H20e8ib3gEgYHHUlkCXDQGFk,413
223
- ommlds/minichain/backends/strings/parsing.py,sha256=2wChk9Z8fhqJTk8_91f8QFjKcSZygOQM_rVk-P4NnKw,1772
224
- ommlds/minichain/backends/strings/resolving.py,sha256=CVn0RDnOnw1BNQFWbSfKaDxm2C6j3FT755nT3z_Q254,5760
223
+ ommlds/minichain/backends/strings/parsing.py,sha256=Etmk04BnKvCMtGg4AgbvxsPGvfRcLldLxpdpxcozdNk,1779
224
+ ommlds/minichain/backends/strings/resolving.py,sha256=q0qMdIvFZH-yScpXNX8GHE_yyQC_eEf1eptiUnNolUI,5849
225
225
  ommlds/minichain/chat/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
226
226
  ommlds/minichain/chat/_marshal.py,sha256=M3p093nxzxITbznc--P-tyCXuWDHrq4JFKTZAx6XWdk,740
227
227
  ommlds/minichain/chat/formats.py,sha256=LmlU7iu8PMJuroFTmyWfP4tXvLjj5VNxdAp1Us9MSAA,562
@@ -318,7 +318,7 @@ ommlds/minichain/services/requests.py,sha256=VAfKbYu4T0CZTWVQmZ2LUmYU7DNm6IerYMN
318
318
  ommlds/minichain/services/responses.py,sha256=4W6Z4Fx4_GFqKgle27OeLr0zzjVTA0pkZrlsZiFQNdo,1534
319
319
  ommlds/minichain/services/services.py,sha256=WjkQNYIp87SflLSReOHMkG2qIVAOem6vsrs_2NxWN_M,325
320
320
  ommlds/minichain/stream/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
321
- ommlds/minichain/stream/services.py,sha256=fFR1klP_PZJ3Pqmqx_SGap8gRDuthJah1fyoke6G9Ww,5328
321
+ ommlds/minichain/stream/services.py,sha256=Mx0FmoEuXBRgwTJDfcTd14F4HdvpDOwuCCNWUvDqbKE,5368
322
322
  ommlds/minichain/stream/wrap.py,sha256=nQC0aCi49I18nF0Yx8qiiLkhIAECV6s6o4pvOy5Kx98,2041
323
323
  ommlds/minichain/text/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
324
324
  ommlds/minichain/text/applypatch.py,sha256=YIN5JChJ0FXyK1I6OiAHQmE7BT-exHfaAMM9ay7ylyc,17705
@@ -356,6 +356,10 @@ ommlds/minichain/vectors/search.py,sha256=27MTUiVT2xmSnmgJTAR09oQaiNRh1ixj0mGZVu
356
356
  ommlds/minichain/vectors/similarity.py,sha256=etqSswPH7ERThueqnCUHULsM3rpsVslRFua0m_ps_F4,1308
357
357
  ommlds/minichain/vectors/stores.py,sha256=etbLCS0RXAEmqcCdqiys8twa8R7Y_DcjQ_VqnEnRF4s,530
358
358
  ommlds/minichain/vectors/types.py,sha256=xSAK1Xfkubqf95QgJhSHrwBu_C5quuye3wZAASmxJkM,3473
359
+ ommlds/nanochat/LICENSE,sha256=QrsJ8zmor4uQ07SWm29uS6Sv87XGFBA7Ax_M33HI93I,1072
360
+ ommlds/nanochat/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
361
+ ommlds/nanochat/tokenizers.py,sha256=cU6ld0qdMG1T41_ijRD8EsbFMLLCpSNLDjgQOBi6RdM,17502
362
+ ommlds/nanochat/rustbpe/LICENSE,sha256=QrsJ8zmor4uQ07SWm29uS6Sv87XGFBA7Ax_M33HI93I,1072
359
363
  ommlds/server/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
360
364
  ommlds/server/__main__.py,sha256=morlItVl-0_MDK6xk2VKhqOtA8oQk0SoWOWEqcgqXTw,155
361
365
  ommlds/server/cli.py,sha256=gCN__45IXjCtk-tWwO2hr8vs5K-R0e1auNWdIc7d6_U,1825
@@ -377,9 +381,9 @@ ommlds/wiki/utils/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU
377
381
  ommlds/wiki/utils/io.py,sha256=UKgDJGtmpnWvIqVd2mJc2QNPOqlToEY1GEveNp6_pMo,7088
378
382
  ommlds/wiki/utils/progress.py,sha256=EhvKcMFYtsarCQhIahlO6f0SboyAKP3UwUyrnVnP-Vk,3222
379
383
  ommlds/wiki/utils/xml.py,sha256=vVV8Ctn13aaRM9eYfs9Wd6rHn5WOCEUzQ44fIhOvJdg,3754
380
- ommlds-0.0.0.dev472.dist-info/licenses/LICENSE,sha256=B_hVtavaA8zCYDW99DYdcpDLKz1n3BBRjZrcbv8uG8c,1451
381
- ommlds-0.0.0.dev472.dist-info/METADATA,sha256=1pgQO5hxkugLK5ik5YuManJYKtc1SgPl96SZdHo1k04,3224
382
- ommlds-0.0.0.dev472.dist-info/WHEEL,sha256=_zCd3N1l69ArxyTb8rzEoP9TpbYXkqRFSNOD5OuxnTs,91
383
- ommlds-0.0.0.dev472.dist-info/entry_points.txt,sha256=Z5YWtX7ClfiCKdW-dd_CSVvM0h4yQpJPi-2G3q6gNFo,35
384
- ommlds-0.0.0.dev472.dist-info/top_level.txt,sha256=Rbnk5d5wi58vnAXx13WFZqdQ4VX8hBCS2hEL3WeXOhY,7
385
- ommlds-0.0.0.dev472.dist-info/RECORD,,
384
+ ommlds-0.0.0.dev473.dist-info/licenses/LICENSE,sha256=B_hVtavaA8zCYDW99DYdcpDLKz1n3BBRjZrcbv8uG8c,1451
385
+ ommlds-0.0.0.dev473.dist-info/METADATA,sha256=0l14jYSJstdmoZV9kjXuIwuF0AGrEufXx2Ixqm6-wJ4,3344
386
+ ommlds-0.0.0.dev473.dist-info/WHEEL,sha256=_zCd3N1l69ArxyTb8rzEoP9TpbYXkqRFSNOD5OuxnTs,91
387
+ ommlds-0.0.0.dev473.dist-info/entry_points.txt,sha256=Z5YWtX7ClfiCKdW-dd_CSVvM0h4yQpJPi-2G3q6gNFo,35
388
+ ommlds-0.0.0.dev473.dist-info/top_level.txt,sha256=Rbnk5d5wi58vnAXx13WFZqdQ4VX8hBCS2hEL3WeXOhY,7
389
+ ommlds-0.0.0.dev473.dist-info/RECORD,,