ommlds 0.0.0.dev332__py3-none-any.whl → 0.0.0.dev334__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- ommlds/backends/tinygrad/models/llama3/__main__.py +1 -1
- ommlds/backends/tinygrad/models/llama3/attention.py +62 -27
- ommlds/backends/tinygrad/models/llama3/{repl.py → cli.py} +56 -22
- ommlds/backends/tinygrad/models/llama3/loading.py +25 -20
- ommlds/backends/tinygrad/models/llama3/transformer.py +5 -7
- ommlds/cli/main.py +1 -1
- ommlds/cli/sessions/chat.py +1 -2
- ommlds/cli/state.py +1 -1
- ommlds/minichain/__init__.py +91 -46
- ommlds/minichain/_typedvalues.py +93 -0
- ommlds/minichain/backends/tinygrad/chat.py +1 -1
- ommlds/minichain/backends/transformers/sentence.py +2 -2
- ommlds/minichain/chat/messages.py +44 -2
- ommlds/minichain/chat/metadata.py +16 -0
- ommlds/minichain/content/{marshal.py → _marshal.py} +17 -6
- ommlds/minichain/content/content.py +0 -6
- ommlds/minichain/content/images.py +2 -2
- ommlds/minichain/content/list.py +15 -0
- ommlds/minichain/content/metadata.py +16 -0
- ommlds/minichain/content/simple.py +38 -0
- ommlds/minichain/content/text.py +12 -0
- ommlds/minichain/content/transforms.py +16 -2
- ommlds/minichain/metadata.py +48 -0
- ommlds/minichain/services/_marshal.py +48 -6
- ommlds/minichain/services/_typedvalues.py +0 -33
- ommlds/minichain/services/requests.py +5 -2
- ommlds/minichain/services/responses.py +5 -2
- {ommlds-0.0.0.dev332.dist-info → ommlds-0.0.0.dev334.dist-info}/METADATA +3 -3
- {ommlds-0.0.0.dev332.dist-info → ommlds-0.0.0.dev334.dist-info}/RECORD +36 -29
- /ommlds/minichain/chat/{marshal.py → _marshal.py} +0 -0
- /ommlds/minichain/llms/{marshal.py → _marshal.py} +0 -0
- /ommlds/minichain/tools/{marshal.py → _marshal.py} +0 -0
- {ommlds-0.0.0.dev332.dist-info → ommlds-0.0.0.dev334.dist-info}/WHEEL +0 -0
- {ommlds-0.0.0.dev332.dist-info → ommlds-0.0.0.dev334.dist-info}/entry_points.txt +0 -0
- {ommlds-0.0.0.dev332.dist-info → ommlds-0.0.0.dev334.dist-info}/licenses/LICENSE +0 -0
- {ommlds-0.0.0.dev332.dist-info → ommlds-0.0.0.dev334.dist-info}/top_level.txt +0 -0
|
@@ -57,8 +57,8 @@ class Attention:
|
|
|
57
57
|
self,
|
|
58
58
|
dim,
|
|
59
59
|
n_heads,
|
|
60
|
-
n_kv_heads,
|
|
61
|
-
max_context,
|
|
60
|
+
n_kv_heads=None,
|
|
61
|
+
max_context=0,
|
|
62
62
|
linear=nn.Linear,
|
|
63
63
|
qk_norm: float | None = None,
|
|
64
64
|
) -> None:
|
|
@@ -85,7 +85,7 @@ class Attention:
|
|
|
85
85
|
x: Tensor,
|
|
86
86
|
start_pos: Variable_ | int,
|
|
87
87
|
freqs_cis: Tensor,
|
|
88
|
-
mask: Tensor | None,
|
|
88
|
+
mask: Tensor | None = None,
|
|
89
89
|
) -> Tensor:
|
|
90
90
|
if getenv('WQKV'):
|
|
91
91
|
if not hasattr(self, 'wqkv'):
|
|
@@ -114,36 +114,71 @@ class Attention:
|
|
|
114
114
|
bsz, seqlen, _, _ = xq.shape
|
|
115
115
|
|
|
116
116
|
# create kv cache
|
|
117
|
-
if not hasattr(self, 'cache_kv'):
|
|
118
|
-
|
|
119
|
-
|
|
120
|
-
|
|
121
|
-
|
|
122
|
-
|
|
123
|
-
|
|
124
|
-
|
|
125
|
-
|
|
117
|
+
# if not hasattr(self, 'cache_kv'):
|
|
118
|
+
# self.cache_kv = (
|
|
119
|
+
# Tensor.zeros(
|
|
120
|
+
# 2,
|
|
121
|
+
# bsz,
|
|
122
|
+
# self.max_context,
|
|
123
|
+
# self.n_kv_heads,
|
|
124
|
+
# self.head_dim,
|
|
125
|
+
# dtype=x.dtype,
|
|
126
|
+
# )
|
|
127
|
+
# .contiguous()
|
|
128
|
+
# .realize()
|
|
129
|
+
# )
|
|
130
|
+
# if isinstance(x.device, tuple):
|
|
131
|
+
# # TODO: instead of specifying how to shard, it can follow how xk and xv are being sharded
|
|
132
|
+
# self.cache_kv.shard_(
|
|
133
|
+
# (x.device), axis=3 if getenv('SHARD_KVCACHE') else None,
|
|
134
|
+
# ).realize()
|
|
135
|
+
#
|
|
136
|
+
# # update the cache
|
|
137
|
+
# check.state(xk.dtype == xv.dtype == self.cache_kv.dtype, f'{xk.dtype=}, {xv.dtype=}, {self.cache_kv.dtype=}')
|
|
138
|
+
#
|
|
139
|
+
# self.cache_kv[:, :, start_pos:start_pos + seqlen, :, :].assign(Tensor.stack(xk, xv)).realize()
|
|
140
|
+
#
|
|
141
|
+
# keys = self.cache_kv[0, :, 0:start_pos + seqlen, :, :]
|
|
142
|
+
# values = self.cache_kv[1, :, 0:start_pos + seqlen, :, :]
|
|
143
|
+
|
|
144
|
+
if self.max_context:
|
|
145
|
+
if not hasattr(self, 'cache_kv'):
|
|
146
|
+
self.cache_kv = (
|
|
147
|
+
Tensor.zeros(
|
|
148
|
+
2,
|
|
149
|
+
bsz,
|
|
150
|
+
self.max_context,
|
|
151
|
+
self.n_kv_heads,
|
|
152
|
+
self.head_dim,
|
|
153
|
+
dtype=x.dtype,
|
|
154
|
+
)
|
|
155
|
+
.contiguous()
|
|
156
|
+
.realize()
|
|
126
157
|
)
|
|
127
|
-
.
|
|
128
|
-
|
|
158
|
+
if isinstance(x.device, tuple):
|
|
159
|
+
# TODO: instead of specifying how to shard, it can follow how xk and xv are being sharded
|
|
160
|
+
self.cache_kv.shard_(
|
|
161
|
+
(x.device),
|
|
162
|
+
axis=3 if getenv('SHARD_KVCACHE') else None,
|
|
163
|
+
).realize()
|
|
164
|
+
|
|
165
|
+
# update the cache
|
|
166
|
+
check.state(
|
|
167
|
+
xk.dtype == xv.dtype == self.cache_kv.dtype,
|
|
168
|
+
f'{xk.dtype=}, {xv.dtype=}, {self.cache_kv.dtype=}',
|
|
129
169
|
)
|
|
130
|
-
|
|
131
|
-
# TODO: instead of specifying how to shard, it can follow how xk and xv are being sharded
|
|
132
|
-
self.cache_kv.shard_(
|
|
133
|
-
(x.device), axis=3 if getenv('SHARD_KVCACHE') else None,
|
|
134
|
-
).realize()
|
|
170
|
+
self.cache_kv[:, :, start_pos:start_pos + seqlen, :, :].assign(Tensor.stack(xk, xv)).realize()
|
|
135
171
|
|
|
136
|
-
|
|
137
|
-
|
|
172
|
+
keys = self.cache_kv[0, :, 0:start_pos + seqlen, :, :]
|
|
173
|
+
values = self.cache_kv[1, :, 0:start_pos + seqlen, :, :]
|
|
138
174
|
|
|
139
|
-
|
|
140
|
-
(
|
|
141
|
-
|
|
175
|
+
else:
|
|
176
|
+
check.state(start_pos == 0)
|
|
177
|
+
keys, values = xk, xv
|
|
142
178
|
|
|
143
|
-
keys =
|
|
144
|
-
values =
|
|
179
|
+
keys = repeat_kv(keys, self.n_rep)
|
|
180
|
+
values = repeat_kv(values, self.n_rep)
|
|
145
181
|
|
|
146
|
-
keys, values = repeat_kv(keys, self.n_rep), repeat_kv(values, self.n_rep)
|
|
147
182
|
xq, keys, values = (
|
|
148
183
|
xq.transpose(1, 2),
|
|
149
184
|
keys.transpose(1, 2),
|
|
@@ -1,5 +1,6 @@
|
|
|
1
1
|
import argparse
|
|
2
2
|
import pathlib
|
|
3
|
+
import typing as ta
|
|
3
4
|
|
|
4
5
|
from tinygrad import Tensor
|
|
5
6
|
|
|
@@ -12,33 +13,60 @@ from .llm import Llama3Llm
|
|
|
12
13
|
##
|
|
13
14
|
|
|
14
15
|
|
|
15
|
-
|
|
16
|
-
|
|
17
|
-
|
|
18
|
-
*llm.encode_message('system', 'You are an helpful assistant.'),
|
|
19
|
-
]
|
|
16
|
+
class _RunToStopResult(ta.NamedTuple):
|
|
17
|
+
start_pos: int
|
|
18
|
+
last_tok: int
|
|
20
19
|
|
|
21
|
-
|
|
20
|
+
|
|
21
|
+
def _run_to_stop(llm: Llama3Llm, start_pos: int, last_tok: int) -> _RunToStopResult:
|
|
22
22
|
while True:
|
|
23
|
-
|
|
23
|
+
tok = llm.feed(
|
|
24
|
+
[last_tok],
|
|
25
|
+
start_pos,
|
|
26
|
+
)
|
|
27
|
+
tok = tok.item()
|
|
28
|
+
|
|
29
|
+
start_pos += 1
|
|
30
|
+
last_tok = tok
|
|
31
|
+
if tok in llm.tokenizer.stop_tokens:
|
|
32
|
+
break
|
|
33
|
+
|
|
34
|
+
print(llm.tokenizer.decode([tok]), end='', flush=True)
|
|
24
35
|
|
|
25
|
-
|
|
26
|
-
last_tok = toks[-1]
|
|
27
|
-
while True:
|
|
28
|
-
tok = llm.feed(
|
|
29
|
-
[last_tok],
|
|
30
|
-
start_pos,
|
|
31
|
-
)
|
|
32
|
-
tok = tok.item()
|
|
36
|
+
print(flush=True)
|
|
33
37
|
|
|
34
|
-
|
|
35
|
-
last_tok = tok
|
|
36
|
-
if tok in llm.tokenizer.stop_tokens:
|
|
37
|
-
break
|
|
38
|
+
return _RunToStopResult(start_pos, last_tok)
|
|
38
39
|
|
|
39
|
-
print(llm.tokenizer.decode([tok]), end='', flush=True)
|
|
40
40
|
|
|
41
|
-
|
|
41
|
+
def _run_new_toks(llm: Llama3Llm, toks: list[int], start_pos: int = 0) -> int:
|
|
42
|
+
start_pos = llm.prefill(toks[:-1], start_pos=start_pos)
|
|
43
|
+
last_tok = toks[-1]
|
|
44
|
+
return _run_to_stop(llm, start_pos, last_tok).start_pos
|
|
45
|
+
|
|
46
|
+
|
|
47
|
+
#
|
|
48
|
+
|
|
49
|
+
|
|
50
|
+
def run_prompt(llm: Llama3Llm, prompt: str) -> None:
|
|
51
|
+
_run_new_toks(llm,[
|
|
52
|
+
llm.tokenizer.bos_id,
|
|
53
|
+
*llm.encode_message('system', 'You are an helpful assistant.'),
|
|
54
|
+
*llm.encode_message('user', prompt),
|
|
55
|
+
*llm.encode_role('assistant'),
|
|
56
|
+
])
|
|
57
|
+
|
|
58
|
+
|
|
59
|
+
def run_repl(llm: Llama3Llm) -> None:
|
|
60
|
+
start_pos = llm.prefill([
|
|
61
|
+
llm.tokenizer.bos_id,
|
|
62
|
+
*llm.encode_message('system', 'You are an helpful assistant.'),
|
|
63
|
+
])
|
|
64
|
+
|
|
65
|
+
while True:
|
|
66
|
+
start_pos = _run_new_toks(llm, [
|
|
67
|
+
*llm.encode_message('user', input('Q: ')),
|
|
68
|
+
*llm.encode_role('assistant'),
|
|
69
|
+
], start_pos)
|
|
42
70
|
|
|
43
71
|
|
|
44
72
|
##
|
|
@@ -84,6 +112,9 @@ def _build_arg_parser() -> argparse.ArgumentParser:
|
|
|
84
112
|
default=0.85,
|
|
85
113
|
help='Temperature',
|
|
86
114
|
)
|
|
115
|
+
parser.add_argument(
|
|
116
|
+
'--prompt',
|
|
117
|
+
)
|
|
87
118
|
return parser
|
|
88
119
|
|
|
89
120
|
|
|
@@ -112,7 +143,10 @@ def _main() -> None:
|
|
|
112
143
|
temperature=args.temperature,
|
|
113
144
|
)
|
|
114
145
|
|
|
115
|
-
|
|
146
|
+
if (prompt := args.prompt) is not None:
|
|
147
|
+
run_prompt(llm, prompt)
|
|
148
|
+
else:
|
|
149
|
+
run_repl(llm)
|
|
116
150
|
|
|
117
151
|
|
|
118
152
|
if __name__ == '__main__':
|
|
@@ -27,7 +27,7 @@ from .transformer import Transformer
|
|
|
27
27
|
# TODO: model shouldn't be an input here, and n_kv_heads should support None
|
|
28
28
|
def convert_from_huggingface(
|
|
29
29
|
weights: dict[str, Tensor],
|
|
30
|
-
|
|
30
|
+
n_layers: int,
|
|
31
31
|
n_heads: int,
|
|
32
32
|
n_kv_heads: int,
|
|
33
33
|
permute_layers: bool = True,
|
|
@@ -50,35 +50,35 @@ def convert_from_huggingface(
|
|
|
50
50
|
'model.embed_tokens.weight': 'tok_embeddings.weight',
|
|
51
51
|
**{
|
|
52
52
|
f'model.layers.{l}.input_layernorm.weight': f'layers.{l}.attention_norm.weight'
|
|
53
|
-
for l in range(
|
|
53
|
+
for l in range(n_layers)
|
|
54
54
|
},
|
|
55
55
|
**{
|
|
56
56
|
f'model.layers.{l}.self_attn.{x}_norm.weight': f'layers.{l}.attention.{x}_norm.weight'
|
|
57
57
|
for x in ['q', 'k']
|
|
58
|
-
for l in range(
|
|
58
|
+
for l in range(n_layers)
|
|
59
59
|
},
|
|
60
60
|
**{
|
|
61
61
|
f'model.layers.{l}.self_attn.{x}_proj.weight': f'layers.{l}.attention.w{x}.weight'
|
|
62
62
|
for x in ['q', 'k', 'v', 'o']
|
|
63
|
-
for l in range(
|
|
63
|
+
for l in range(n_layers)
|
|
64
64
|
},
|
|
65
65
|
**{
|
|
66
66
|
f'model.layers.{l}.self_attn.{x}_proj.bias': f'layers.{l}.attention.w{x}.bias'
|
|
67
67
|
for x in ['q', 'k', 'v', 'o']
|
|
68
|
-
for l in range(
|
|
68
|
+
for l in range(n_layers)
|
|
69
69
|
},
|
|
70
70
|
**{
|
|
71
71
|
f'model.layers.{l}.post_attention_layernorm.weight': f'layers.{l}.ffn_norm.weight'
|
|
72
|
-
for l in range(
|
|
72
|
+
for l in range(n_layers)
|
|
73
73
|
},
|
|
74
74
|
**{
|
|
75
75
|
f'model.layers.{l}.mlp.{x}_proj.weight': f'layers.{l}.feed_forward.w{y}.weight'
|
|
76
76
|
for x, y in {'gate': '1', 'down': '2', 'up': '3'}.items()
|
|
77
|
-
for l in range(
|
|
77
|
+
for l in range(n_layers)
|
|
78
78
|
},
|
|
79
79
|
**{
|
|
80
80
|
f'model.layers.{l}.mlp.gate.weight': f'layers.{l}.feed_forward.gate.weight'
|
|
81
|
-
for l in range(
|
|
81
|
+
for l in range(n_layers)
|
|
82
82
|
},
|
|
83
83
|
'model.norm.weight': 'norm.weight',
|
|
84
84
|
'lm_head.weight': 'output.weight',
|
|
@@ -107,31 +107,31 @@ def convert_from_huggingface(
|
|
|
107
107
|
|
|
108
108
|
def convert_from_gguf(
|
|
109
109
|
weights: dict[str, Tensor],
|
|
110
|
-
|
|
110
|
+
n_layers: int,
|
|
111
111
|
):
|
|
112
112
|
keymap = {
|
|
113
113
|
'token_embd.weight': 'tok_embeddings.weight',
|
|
114
114
|
**{
|
|
115
115
|
f'blk.{l}.attn_norm.weight': f'layers.{l}.attention_norm.weight'
|
|
116
|
-
for l in range(
|
|
116
|
+
for l in range(n_layers)
|
|
117
117
|
},
|
|
118
118
|
**{
|
|
119
119
|
f'blk.{l}.attn_{x}.weight': f'layers.{l}.attention.w{x}.weight'
|
|
120
120
|
for x in ['q', 'k', 'v']
|
|
121
|
-
for l in range(
|
|
121
|
+
for l in range(n_layers)
|
|
122
122
|
},
|
|
123
123
|
**{
|
|
124
124
|
f'blk.{l}.attn_output.weight': f'layers.{l}.attention.wo.weight'
|
|
125
|
-
for l in range(
|
|
125
|
+
for l in range(n_layers)
|
|
126
126
|
},
|
|
127
127
|
**{
|
|
128
128
|
f'blk.{l}.ffn_norm.weight': f'layers.{l}.ffn_norm.weight'
|
|
129
|
-
for l in range(
|
|
129
|
+
for l in range(n_layers)
|
|
130
130
|
},
|
|
131
131
|
**{
|
|
132
132
|
f'blk.{l}.ffn_{x}.weight': f'layers.{l}.feed_forward.w{y}.weight'
|
|
133
133
|
for x, y in {'gate': '1', 'down': '2', 'up': '3'}.items()
|
|
134
|
-
for l in range(
|
|
134
|
+
for l in range(n_layers)
|
|
135
135
|
},
|
|
136
136
|
'output_norm.weight': 'norm.weight',
|
|
137
137
|
'rope_freqs.weight': 'rope_freqs.weight',
|
|
@@ -269,8 +269,10 @@ def build_transformer(
|
|
|
269
269
|
else:
|
|
270
270
|
linear, embedding, quantize_embeds = nn.Linear, nn.Embedding, False
|
|
271
271
|
|
|
272
|
+
model_params = MODEL_PARAMS[model_size]
|
|
273
|
+
|
|
272
274
|
model = Transformer(
|
|
273
|
-
**
|
|
275
|
+
**model_params['args'],
|
|
274
276
|
linear=linear,
|
|
275
277
|
embedding=embedding,
|
|
276
278
|
max_context=max_context,
|
|
@@ -292,7 +294,7 @@ def build_transformer(
|
|
|
292
294
|
weights = concat_weights(
|
|
293
295
|
[
|
|
294
296
|
load(str(model_path / f'consolidated.{i:02d}.pth'))
|
|
295
|
-
for i in range(
|
|
297
|
+
for i in range(model_params['files'])
|
|
296
298
|
],
|
|
297
299
|
device[0] if isinstance(device, tuple) else device,
|
|
298
300
|
)
|
|
@@ -303,13 +305,16 @@ def build_transformer(
|
|
|
303
305
|
if 'model.embed_tokens.weight' in weights:
|
|
304
306
|
weights = convert_from_huggingface(
|
|
305
307
|
weights,
|
|
306
|
-
|
|
307
|
-
|
|
308
|
-
|
|
308
|
+
model_params['args']['n_layers'],
|
|
309
|
+
model_params['args']['n_heads'],
|
|
310
|
+
model_params['args']['n_kv_heads'],
|
|
309
311
|
)
|
|
310
312
|
|
|
311
313
|
elif 'token_embd.weight' in weights:
|
|
312
|
-
weights = convert_from_gguf(
|
|
314
|
+
weights = convert_from_gguf(
|
|
315
|
+
weights,
|
|
316
|
+
model_params['args']['n_layers'],
|
|
317
|
+
)
|
|
313
318
|
|
|
314
319
|
weights = fix_bf16(weights)
|
|
315
320
|
|
|
@@ -130,10 +130,8 @@ class Transformer:
|
|
|
130
130
|
_bsz, seqlen = tokens.shape
|
|
131
131
|
h = self.tok_embeddings(tokens)
|
|
132
132
|
|
|
133
|
-
self.freqs_cis = self.freqs_cis.cast(h.dtype).
|
|
134
|
-
freqs_cis = self.freqs_cis
|
|
135
|
-
(None, (start_pos, start_pos + seqlen), None, None, None),
|
|
136
|
-
)
|
|
133
|
+
self.freqs_cis = self.freqs_cis.cast(h.dtype).kernelize()
|
|
134
|
+
freqs_cis = self.freqs_cis[:, start_pos:start_pos + seqlen, :, :, :]
|
|
137
135
|
|
|
138
136
|
mask = (
|
|
139
137
|
Tensor.full(
|
|
@@ -143,7 +141,7 @@ class Transformer:
|
|
|
143
141
|
device=h.device,
|
|
144
142
|
)
|
|
145
143
|
.triu(start_pos + 1)
|
|
146
|
-
.
|
|
144
|
+
.kernelize()
|
|
147
145
|
) if seqlen > 1 else None
|
|
148
146
|
|
|
149
147
|
for layer in self.layers:
|
|
@@ -152,7 +150,7 @@ class Transformer:
|
|
|
152
150
|
|
|
153
151
|
return sample(
|
|
154
152
|
logits.flatten(), temperature, top_k, top_p, alpha_f, alpha_p,
|
|
155
|
-
).
|
|
153
|
+
).kernelize()
|
|
156
154
|
|
|
157
155
|
def __call__(
|
|
158
156
|
self,
|
|
@@ -172,7 +170,7 @@ class Transformer:
|
|
|
172
170
|
):
|
|
173
171
|
return self.forward_jit(
|
|
174
172
|
tokens,
|
|
175
|
-
Variable('start_pos', 1, self.max_context).bind(start_pos),
|
|
173
|
+
Variable('start_pos', 1, self.max_context - 1).bind(start_pos),
|
|
176
174
|
temperature,
|
|
177
175
|
top_k,
|
|
178
176
|
top_p,
|
ommlds/cli/main.py
CHANGED
|
@@ -70,7 +70,7 @@ def _main() -> None:
|
|
|
70
70
|
content: mc.Content
|
|
71
71
|
|
|
72
72
|
if args.image:
|
|
73
|
-
content = mc.
|
|
73
|
+
content = mc.ImageContent(pimg.open(check.non_empty_str(check.single(args.prompt))))
|
|
74
74
|
|
|
75
75
|
elif args.editor:
|
|
76
76
|
check.arg(not args.prompt)
|
ommlds/cli/sessions/chat.py
CHANGED
ommlds/cli/state.py
CHANGED
|
@@ -42,7 +42,7 @@ class StateStorage(lang.Abstract):
|
|
|
42
42
|
ms = msh.unmarshal(obj, MarshaledState)
|
|
43
43
|
if ms.version < self._version:
|
|
44
44
|
return None
|
|
45
|
-
return msh.unmarshal(ms.payload, ty)
|
|
45
|
+
return msh.unmarshal(ms.payload, ty)
|
|
46
46
|
|
|
47
47
|
def marshal_state(self, obj: ta.Any, ty: type | None = None) -> ta.Any:
|
|
48
48
|
ms = MarshaledState(
|
ommlds/minichain/__init__.py
CHANGED
|
@@ -1,10 +1,6 @@
|
|
|
1
1
|
# fmt: off
|
|
2
2
|
|
|
3
|
-
|
|
4
|
-
register_type,
|
|
5
|
-
registry_new,
|
|
6
|
-
registry_of,
|
|
7
|
-
)
|
|
3
|
+
##
|
|
8
4
|
|
|
9
5
|
from .chat.formats import ( # noqa
|
|
10
6
|
JSON_RESPONSE_FORMAT,
|
|
@@ -71,23 +67,7 @@ from .chat.types import ( # noqa
|
|
|
71
67
|
ChatResponseOutput,
|
|
72
68
|
)
|
|
73
69
|
|
|
74
|
-
|
|
75
|
-
CompletionRequestOption,
|
|
76
|
-
CompletionRequestOptions,
|
|
77
|
-
CompletionRequest,
|
|
78
|
-
|
|
79
|
-
CompletionResponseOutput,
|
|
80
|
-
CompletionResponseOutputs,
|
|
81
|
-
CompletionResponse,
|
|
82
|
-
|
|
83
|
-
CompletionService,
|
|
84
|
-
)
|
|
85
|
-
|
|
86
|
-
from .configs import ( # noqa
|
|
87
|
-
Config,
|
|
88
|
-
|
|
89
|
-
consume_configs,
|
|
90
|
-
)
|
|
70
|
+
##
|
|
91
71
|
|
|
92
72
|
from .content.content import ( # noqa
|
|
93
73
|
Content,
|
|
@@ -95,13 +75,26 @@ from .content.content import ( # noqa
|
|
|
95
75
|
)
|
|
96
76
|
|
|
97
77
|
from .content.images import ( # noqa
|
|
98
|
-
|
|
78
|
+
ImageContent,
|
|
79
|
+
)
|
|
80
|
+
|
|
81
|
+
from .content.list import ( # noqa
|
|
82
|
+
ListContent,
|
|
83
|
+
)
|
|
84
|
+
|
|
85
|
+
from .content.metadata import ( # noqa
|
|
86
|
+
ContentMetadata,
|
|
87
|
+
ContentMetadatas,
|
|
99
88
|
)
|
|
100
89
|
|
|
101
90
|
from .content.rendering import ( # noqa
|
|
102
91
|
StringRenderer,
|
|
103
92
|
)
|
|
104
93
|
|
|
94
|
+
from .content.text import ( # noqa
|
|
95
|
+
TextContent,
|
|
96
|
+
)
|
|
97
|
+
|
|
105
98
|
from .content.transforms import ( # noqa
|
|
106
99
|
ContentTransform,
|
|
107
100
|
|
|
@@ -109,10 +102,7 @@ from .content.transforms import ( # noqa
|
|
|
109
102
|
transform_content_strings,
|
|
110
103
|
)
|
|
111
104
|
|
|
112
|
-
|
|
113
|
-
Env,
|
|
114
|
-
EnvKey,
|
|
115
|
-
)
|
|
105
|
+
##
|
|
116
106
|
|
|
117
107
|
from .llms.tokens import ( # noqa
|
|
118
108
|
Token,
|
|
@@ -135,6 +125,8 @@ from .llms.services import ( # noqa
|
|
|
135
125
|
TokenUsageOutput,
|
|
136
126
|
)
|
|
137
127
|
|
|
128
|
+
##
|
|
129
|
+
|
|
138
130
|
from .services import ( # noqa
|
|
139
131
|
Request,
|
|
140
132
|
RequestOption,
|
|
@@ -144,21 +136,7 @@ from .services import ( # noqa
|
|
|
144
136
|
ServiceFacade,
|
|
145
137
|
)
|
|
146
138
|
|
|
147
|
-
|
|
148
|
-
ModelSpecifier,
|
|
149
|
-
ModelName,
|
|
150
|
-
ModelPath,
|
|
151
|
-
|
|
152
|
-
ApiKey,
|
|
153
|
-
|
|
154
|
-
DefaultRequestOptions,
|
|
155
|
-
)
|
|
156
|
-
|
|
157
|
-
from .streaming import ( # noqa
|
|
158
|
-
ResponseGenerator,
|
|
159
|
-
|
|
160
|
-
StreamResponse,
|
|
161
|
-
)
|
|
139
|
+
##
|
|
162
140
|
|
|
163
141
|
from .tools.jsonschema import ( # noqa
|
|
164
142
|
build_tool_spec_json_schema,
|
|
@@ -189,6 +167,8 @@ from .tools.types import ( # noqa
|
|
|
189
167
|
ToolExecRequest,
|
|
190
168
|
)
|
|
191
169
|
|
|
170
|
+
##
|
|
171
|
+
|
|
192
172
|
from .vectors.embeddings import ( # noqa
|
|
193
173
|
EmbeddingRequest,
|
|
194
174
|
EmbeddingRequestOption,
|
|
@@ -235,13 +215,78 @@ from .vectors.types import ( # noqa
|
|
|
235
215
|
Vectorable,
|
|
236
216
|
)
|
|
237
217
|
|
|
218
|
+
##
|
|
219
|
+
|
|
220
|
+
from .completion import ( # noqa
|
|
221
|
+
CompletionRequestOption,
|
|
222
|
+
CompletionRequestOptions,
|
|
223
|
+
CompletionRequest,
|
|
224
|
+
|
|
225
|
+
CompletionResponseOutput,
|
|
226
|
+
CompletionResponseOutputs,
|
|
227
|
+
CompletionResponse,
|
|
228
|
+
|
|
229
|
+
CompletionService,
|
|
230
|
+
)
|
|
231
|
+
|
|
232
|
+
from .configs import ( # noqa
|
|
233
|
+
Config,
|
|
234
|
+
|
|
235
|
+
consume_configs,
|
|
236
|
+
)
|
|
237
|
+
|
|
238
|
+
from .envs import ( # noqa
|
|
239
|
+
Env,
|
|
240
|
+
EnvKey,
|
|
241
|
+
)
|
|
242
|
+
|
|
243
|
+
from .metadata import ( # noqa
|
|
244
|
+
Metadata,
|
|
245
|
+
|
|
246
|
+
MetadataContainer,
|
|
247
|
+
|
|
248
|
+
CommonMetadata,
|
|
249
|
+
|
|
250
|
+
Uuid,
|
|
251
|
+
)
|
|
252
|
+
|
|
253
|
+
from .registry import ( # noqa
|
|
254
|
+
register_type,
|
|
255
|
+
registry_new,
|
|
256
|
+
registry_of,
|
|
257
|
+
)
|
|
258
|
+
|
|
259
|
+
from .resources import ( # noqa
|
|
260
|
+
ResourcesRef,
|
|
261
|
+
ResourcesRefNotRegisteredError,
|
|
262
|
+
Resources,
|
|
263
|
+
|
|
264
|
+
ResourceManaged,
|
|
265
|
+
)
|
|
266
|
+
|
|
267
|
+
from .standard import ( # noqa
|
|
268
|
+
ModelSpecifier,
|
|
269
|
+
ModelName,
|
|
270
|
+
ModelPath,
|
|
271
|
+
|
|
272
|
+
ApiKey,
|
|
273
|
+
|
|
274
|
+
DefaultRequestOptions,
|
|
275
|
+
)
|
|
276
|
+
|
|
277
|
+
from .streaming import ( # noqa
|
|
278
|
+
ResponseGenerator,
|
|
279
|
+
|
|
280
|
+
StreamResponse,
|
|
281
|
+
)
|
|
282
|
+
|
|
238
283
|
|
|
239
284
|
##
|
|
240
285
|
|
|
241
286
|
|
|
242
287
|
from omlish.lang.imports import _register_conditional_import # noqa
|
|
243
288
|
|
|
244
|
-
_register_conditional_import('omlish.marshal', '.chat.
|
|
245
|
-
_register_conditional_import('omlish.marshal', '.content.
|
|
246
|
-
_register_conditional_import('omlish.marshal', '.llms.
|
|
247
|
-
_register_conditional_import('omlish.marshal', '.tools.
|
|
289
|
+
_register_conditional_import('omlish.marshal', '.chat._marshal', __package__)
|
|
290
|
+
_register_conditional_import('omlish.marshal', '.content._marshal', __package__)
|
|
291
|
+
_register_conditional_import('omlish.marshal', '.llms._marshal', __package__)
|
|
292
|
+
_register_conditional_import('omlish.marshal', '.tools._marshal', __package__)
|