python-fastllm 0.0.2__tar.gz → 0.0.4__tar.gz
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- {python_fastllm-0.0.2 → python_fastllm-0.0.4}/PKG-INFO +1 -1
- python_fastllm-0.0.4/fastllm/__init__.py +1 -0
- {python_fastllm-0.0.2 → python_fastllm-0.0.4}/fastllm/_modidx.py +14 -4
- {python_fastllm-0.0.2 → python_fastllm-0.0.4}/fastllm/acomplete.py +1 -3
- {python_fastllm-0.0.2 → python_fastllm-0.0.4}/fastllm/chat.py +112 -30
- {python_fastllm-0.0.2 → python_fastllm-0.0.4}/fastllm/streaming.py +52 -67
- {python_fastllm-0.0.2 → python_fastllm-0.0.4}/fastllm/types.py +30 -14
- {python_fastllm-0.0.2 → python_fastllm-0.0.4}/pyproject.toml +2 -0
- {python_fastllm-0.0.2 → python_fastllm-0.0.4}/python_fastllm.egg-info/PKG-INFO +1 -1
- python_fastllm-0.0.2/fastllm/__init__.py +0 -1
- {python_fastllm-0.0.2 → python_fastllm-0.0.4}/README.md +0 -0
- {python_fastllm-0.0.2 → python_fastllm-0.0.4}/fastllm/anthropic.py +0 -0
- {python_fastllm-0.0.2 → python_fastllm-0.0.4}/fastllm/gemini.py +0 -0
- {python_fastllm-0.0.2 → python_fastllm-0.0.4}/fastllm/openai_chat.py +0 -0
- {python_fastllm-0.0.2 → python_fastllm-0.0.4}/fastllm/openai_responses.py +0 -0
- {python_fastllm-0.0.2 → python_fastllm-0.0.4}/fastllm/specs/anthropic.json +0 -0
- {python_fastllm-0.0.2 → python_fastllm-0.0.4}/fastllm/specs/anthropic.yml +0 -0
- {python_fastllm-0.0.2 → python_fastllm-0.0.4}/fastllm/specs/gemini.json +0 -0
- {python_fastllm-0.0.2 → python_fastllm-0.0.4}/fastllm/specs/openai.with-code-samples.json +0 -0
- {python_fastllm-0.0.2 → python_fastllm-0.0.4}/fastllm/specs/openai.with-code-samples.yml +0 -0
- {python_fastllm-0.0.2 → python_fastllm-0.0.4}/fastllm/specs/spec_manifest.json +0 -0
- {python_fastllm-0.0.2 → python_fastllm-0.0.4}/python_fastllm.egg-info/SOURCES.txt +0 -0
- {python_fastllm-0.0.2 → python_fastllm-0.0.4}/python_fastllm.egg-info/dependency_links.txt +0 -0
- {python_fastllm-0.0.2 → python_fastllm-0.0.4}/python_fastllm.egg-info/entry_points.txt +0 -0
- {python_fastllm-0.0.2 → python_fastllm-0.0.4}/python_fastllm.egg-info/requires.txt +0 -0
- {python_fastllm-0.0.2 → python_fastllm-0.0.4}/python_fastllm.egg-info/top_level.txt +0 -0
- {python_fastllm-0.0.2 → python_fastllm-0.0.4}/setup.cfg +0 -0
|
@@ -0,0 +1 @@
|
|
|
1
|
+
__version__ = "0.0.4"
|
|
@@ -51,7 +51,11 @@ d = { 'settings': { 'branch': 'main',
|
|
|
51
51
|
'fastllm.chat.AsyncStreamFormatter': ('chat.html#asyncstreamformatter', 'fastllm/chat.py'),
|
|
52
52
|
'fastllm.chat.AsyncStreamFormatter.format_stream': ( 'chat.html#asyncstreamformatter.format_stream',
|
|
53
53
|
'fastllm/chat.py'),
|
|
54
|
+
'fastllm.chat.FenceToolStop': ('chat.html#fencetoolstop', 'fastllm/chat.py'),
|
|
55
|
+
'fastllm.chat.FenceToolStop.__call__': ('chat.html#fencetoolstop.__call__', 'fastllm/chat.py'),
|
|
56
|
+
'fastllm.chat.FenceToolStop.__init__': ('chat.html#fencetoolstop.__init__', 'fastllm/chat.py'),
|
|
54
57
|
'fastllm.chat.FullResponse': ('chat.html#fullresponse', 'fastllm/chat.py'),
|
|
58
|
+
'fastllm.chat.Msg.text': ('chat.html#msg.text', 'fastllm/chat.py'),
|
|
55
59
|
'fastllm.chat.StopResponse': ('chat.html#stopresponse', 'fastllm/chat.py'),
|
|
56
60
|
'fastllm.chat.StreamFormatter': ('chat.html#streamformatter', 'fastllm/chat.py'),
|
|
57
61
|
'fastllm.chat.StreamFormatter.__init__': ('chat.html#streamformatter.__init__', 'fastllm/chat.py'),
|
|
@@ -65,6 +69,7 @@ d = { 'settings': { 'branch': 'main',
|
|
|
65
69
|
'fastllm.chat.UsageStats.__repr__': ('chat.html#usagestats.__repr__', 'fastllm/chat.py'),
|
|
66
70
|
'fastllm.chat.UsageStats.fmt': ('chat.html#usagestats.fmt', 'fastllm/chat.py'),
|
|
67
71
|
'fastllm.chat.UsageStats.from_response': ('chat.html#usagestats.from_response', 'fastllm/chat.py'),
|
|
72
|
+
'fastllm.chat._active_fence_langs': ('chat.html#_active_fence_langs', 'fastllm/chat.py'),
|
|
68
73
|
'fastllm.chat._add_cache_control': ('chat.html#_add_cache_control', 'fastllm/chat.py'),
|
|
69
74
|
'fastllm.chat._alite_call_func': ('chat.html#_alite_call_func', 'fastllm/chat.py'),
|
|
70
75
|
'fastllm.chat._apply_cache_idxs': ('chat.html#_apply_cache_idxs', 'fastllm/chat.py'),
|
|
@@ -79,7 +84,10 @@ d = { 'settings': { 'branch': 'main',
|
|
|
79
84
|
'fastllm.chat._lite_call_func': ('chat.html#_lite_call_func', 'fastllm/chat.py'),
|
|
80
85
|
'fastllm.chat._mk_content': ('chat.html#_mk_content', 'fastllm/chat.py'),
|
|
81
86
|
'fastllm.chat._mk_prefill': ('chat.html#_mk_prefill', 'fastllm/chat.py'),
|
|
87
|
+
'fastllm.chat._mk_result_fence': ('chat.html#_mk_result_fence', 'fastllm/chat.py'),
|
|
82
88
|
'fastllm.chat._mk_tool_result': ('chat.html#_mk_tool_result', 'fastllm/chat.py'),
|
|
89
|
+
'fastllm.chat._split_fence_msgs': ('chat.html#_split_fence_msgs', 'fastllm/chat.py'),
|
|
90
|
+
'fastllm.chat._split_msg_on_fences': ('chat.html#_split_msg_on_fences', 'fastllm/chat.py'),
|
|
83
91
|
'fastllm.chat._srv_tc_summary': ('chat.html#_srv_tc_summary', 'fastllm/chat.py'),
|
|
84
92
|
'fastllm.chat._srvtools': ('chat.html#_srvtools', 'fastllm/chat.py'),
|
|
85
93
|
'fastllm.chat._tc_summary': ('chat.html#_tc_summary', 'fastllm/chat.py'),
|
|
@@ -93,6 +101,7 @@ d = { 'settings': { 'branch': 'main',
|
|
|
93
101
|
'fastllm.chat.astream_with_complete': ('chat.html#astream_with_complete', 'fastllm/chat.py'),
|
|
94
102
|
'fastllm.chat.cite_footnote': ('chat.html#cite_footnote', 'fastllm/chat.py'),
|
|
95
103
|
'fastllm.chat.contents': ('chat.html#contents', 'fastllm/chat.py'),
|
|
104
|
+
'fastllm.chat.extract_fence_call': ('chat.html#extract_fence_call', 'fastllm/chat.py'),
|
|
96
105
|
'fastllm.chat.fmt2hist': ('chat.html#fmt2hist', 'fastllm/chat.py'),
|
|
97
106
|
'fastllm.chat.lite_mk_func': ('chat.html#lite_mk_func', 'fastllm/chat.py'),
|
|
98
107
|
'fastllm.chat.mk_msg': ('chat.html#mk_msg', 'fastllm/chat.py'),
|
|
@@ -101,9 +110,11 @@ d = { 'settings': { 'branch': 'main',
|
|
|
101
110
|
'fastllm.chat.mk_tr_details': ('chat.html#mk_tr_details', 'fastllm/chat.py'),
|
|
102
111
|
'fastllm.chat.postproc': ('chat.html#postproc', 'fastllm/chat.py'),
|
|
103
112
|
'fastllm.chat.remove_cache_ckpts': ('chat.html#remove_cache_ckpts', 'fastllm/chat.py'),
|
|
113
|
+
'fastllm.chat.run_fence_tool': ('chat.html#run_fence_tool', 'fastllm/chat.py'),
|
|
104
114
|
'fastllm.chat.search_count': ('chat.html#search_count', 'fastllm/chat.py'),
|
|
105
115
|
'fastllm.chat.split_tools': ('chat.html#split_tools', 'fastllm/chat.py'),
|
|
106
116
|
'fastllm.chat.stop_reason': ('chat.html#stop_reason', 'fastllm/chat.py'),
|
|
117
|
+
'fastllm.chat.stop_sequences': ('chat.html#stop_sequences', 'fastllm/chat.py'),
|
|
107
118
|
'fastllm.chat.structured': ('chat.html#structured', 'fastllm/chat.py')},
|
|
108
119
|
'fastllm.gemini': { 'fastllm.gemini._gem_filter_sch': ('gemini.html#_gem_filter_sch', 'fastllm/gemini.py'),
|
|
109
120
|
'fastllm.gemini._gem_part_type': ('gemini.html#_gem_part_type', 'fastllm/gemini.py'),
|
|
@@ -208,13 +219,12 @@ d = { 'settings': { 'branch': 'main',
|
|
|
208
219
|
'fastllm.streaming.PartAccum': ('streaming.html#partaccum', 'fastllm/streaming.py'),
|
|
209
220
|
'fastllm.streaming.PartAccum.append': ('streaming.html#partaccum.append', 'fastllm/streaming.py'),
|
|
210
221
|
'fastllm.streaming.PartAccum.finalize': ('streaming.html#partaccum.finalize', 'fastllm/streaming.py'),
|
|
222
|
+
'fastllm.streaming.PartAccum.get_merged': ( 'streaming.html#partaccum.get_merged',
|
|
223
|
+
'fastllm/streaming.py'),
|
|
211
224
|
'fastllm.streaming._trim_delta': ('streaming.html#_trim_delta', 'fastllm/streaming.py'),
|
|
212
|
-
'fastllm.streaming.accum_completion': ('streaming.html#accum_completion', 'fastllm/streaming.py'),
|
|
213
|
-
'fastllm.streaming.completion_text': ('streaming.html#completion_text', 'fastllm/streaming.py'),
|
|
214
|
-
'fastllm.streaming.fake_stream': ('streaming.html#fake_stream', 'fastllm/streaming.py'),
|
|
215
225
|
'fastllm.streaming.mk_acollect_stream': ('streaming.html#mk_acollect_stream', 'fastllm/streaming.py'),
|
|
216
226
|
'fastllm.streaming.norm_and_yield': ('streaming.html#norm_and_yield', 'fastllm/streaming.py'),
|
|
217
|
-
'fastllm.streaming.
|
|
227
|
+
'fastllm.streaming.stop_and_trim': ('streaming.html#stop_and_trim', 'fastllm/streaming.py')},
|
|
218
228
|
'fastllm.types': { 'fastllm.types.APIRegistry': ('types.html#apiregistry', 'fastllm/types.py'),
|
|
219
229
|
'fastllm.types.APIRegistry.__init__': ('types.html#apiregistry.__init__', 'fastllm/types.py'),
|
|
220
230
|
'fastllm.types.APIRegistry.register': ('types.html#apiregistry.register', 'fastllm/types.py'),
|
|
@@ -18,7 +18,6 @@ from fastspec.errors import APIError
|
|
|
18
18
|
from .types import *
|
|
19
19
|
from .streaming import *
|
|
20
20
|
from .openai_responses import *
|
|
21
|
-
from .streaming import stop_sequences as _stop_sequences
|
|
22
21
|
from .openai_chat import *
|
|
23
22
|
from .anthropic import *
|
|
24
23
|
from .gemini import *
|
|
@@ -30,7 +29,7 @@ oai_spec = SpecParser.from_openapi(dict2obj(json.loads((specs_path/'openai.with
|
|
|
30
29
|
gem_spec = SpecParser.from_discovery(dict2obj(json.loads((specs_path/'gemini.json').read_text())))
|
|
31
30
|
|
|
32
31
|
# %% ../nbs/06_acomplete.ipynb #32ee2546
|
|
33
|
-
_codex_json = '~/.codex/auth.json', 'tokens','access_token'
|
|
32
|
+
_codex_json = '~/.codex/auth.json', ('tokens','access_token')
|
|
34
33
|
vendor_mapping = {
|
|
35
34
|
"openai": ('openai', 'https://api.openai.com/v1', 'OPENAI_API_KEY'),
|
|
36
35
|
"anthropic": ('anthropic', 'https://api.anthropic.com', 'ANTHROPIC_API_KEY'),
|
|
@@ -106,7 +105,6 @@ async def acomplete(msgs, model, api_name=None, vendor_name=None, api_key=None,
|
|
|
106
105
|
"Unified completion across different APIs."
|
|
107
106
|
cli, api_name, vendor_name = mk_client(model, vendor_name, api_name, api_key, base_url, xtra_hdrs)
|
|
108
107
|
api = api_registry.apis[api_name]
|
|
109
|
-
if stop_sequences: stop_callables = L(stop_callables) + [_stop_sequences(stop_sequences)]
|
|
110
108
|
payload = api.mk_payload(msgs, model, stream=stream, stop_callables=stop_callables, **kwargs)
|
|
111
109
|
payload = merge(payload, ifnone(xtra_body, {}))
|
|
112
110
|
if vendor_name == 'codex':
|
|
@@ -3,12 +3,11 @@
|
|
|
3
3
|
# AUTOGENERATED! DO NOT EDIT! File to edit: ../nbs/07_chat.ipynb.
|
|
4
4
|
|
|
5
5
|
# %% auto #0
|
|
6
|
-
__all__ = ['
|
|
7
|
-
'
|
|
8
|
-
'
|
|
9
|
-
'
|
|
10
|
-
'
|
|
11
|
-
'adisplay_stream']
|
|
6
|
+
__all__ = ['tool_dtls_tag', 're_tools', 'token_dtls_tag', 're_token', 'effort', 'remove_cache_ckpts', 'contents', 'stop_reason',
|
|
7
|
+
'mk_msg', 'FenceToolStop', 'extract_fence_call', 'stop_sequences', 'split_tools', 'fmt2hist', 'mk_msgs',
|
|
8
|
+
'cite_footnote', 'postproc', 'lite_mk_func', 'ToolResponse', 'structured', 'StopResponse', 'FullResponse',
|
|
9
|
+
'search_count', 'UsageStats', 'AsyncChat', 'add_warning', 'astream_with_complete', 'run_fence_tool',
|
|
10
|
+
'mk_tr_details', 'mk_srv_tc_details', 'StreamFormatter', 'AsyncStreamFormatter', 'adisplay_stream']
|
|
12
11
|
|
|
13
12
|
# %% ../nbs/07_chat.ipynb #d5a3bc1f
|
|
14
13
|
import asyncio, base64, json, mimetypes, random, string, ast, warnings
|
|
@@ -21,19 +20,8 @@ from fastcore import imghdr
|
|
|
21
20
|
from fastcore.xml import Safe
|
|
22
21
|
from dataclasses import dataclass
|
|
23
22
|
|
|
23
|
+
from .types import *
|
|
24
24
|
from .acomplete import *
|
|
25
|
-
from .acomplete import Msg, Part, PartType, ToolCall, Completion, mk_tool_res_msg, get_model_info
|
|
26
|
-
|
|
27
|
-
# %% ../nbs/07_chat.ipynb #c4b8f12b
|
|
28
|
-
haik45 = "claude-haiku-4-5"
|
|
29
|
-
sonn45 = "claude-sonnet-4-5"
|
|
30
|
-
sonn = sonn46 = "claude-sonnet-4-6"
|
|
31
|
-
opus46 = "claude-opus-4-6"
|
|
32
|
-
opus = "claude-opus-4-7"
|
|
33
|
-
gpt54 = "gpt-5.4"
|
|
34
|
-
gpt54m = "gpt-5.4-mini"
|
|
35
|
-
codex54 = "gpt-5.4"
|
|
36
|
-
codex55 = "gpt-5.5"
|
|
37
25
|
|
|
38
26
|
# %% ../nbs/07_chat.ipynb #90f55ad4
|
|
39
27
|
def _bytes2content(data):
|
|
@@ -106,6 +94,65 @@ token_dtls_tag = "<details class='token-usage-details'>"
|
|
|
106
94
|
re_token = re.compile(fr"^{re.escape(token_dtls_tag)}<summary>.*?</summary>\n*\n*`.*?`\n*\n*</details>\n?",
|
|
107
95
|
flags=re.DOTALL|re.MULTILINE)
|
|
108
96
|
|
|
97
|
+
# %% ../nbs/07_chat.ipynb #be998131
|
|
98
|
+
_fence_back = '`````'
|
|
99
|
+
_fence_re = re.compile(f'^{_fence_back}(py|bash)\n(.*?)\n{_fence_back}', re.DOTALL | re.MULTILINE)
|
|
100
|
+
_result_re = re.compile(f'\n{_fence_back}result\n(.*?)\n{_fence_back}\n', re.DOTALL)
|
|
101
|
+
_lang2tool = dict(py='python', bash='bash')
|
|
102
|
+
|
|
103
|
+
class FenceToolStop:
|
|
104
|
+
def __init__(self, langs): self.langs = langs
|
|
105
|
+
def __call__(self, text):
|
|
106
|
+
"Return trim result if complete fence detected in active lang"
|
|
107
|
+
m = _fence_re.search(text)
|
|
108
|
+
if m and m.group(1) in self.langs: return m.group(0)
|
|
109
|
+
|
|
110
|
+
# %% ../nbs/07_chat.ipynb #e6360e96
|
|
111
|
+
def extract_fence_call(text):
|
|
112
|
+
"Return (lang, code) if text ends with terminated py/bash fence, else None"
|
|
113
|
+
ms = list(_fence_re.finditer(text))
|
|
114
|
+
if not ms: return None
|
|
115
|
+
m = ms[-1]
|
|
116
|
+
if not text[m.end():].strip(): return m.group(1), m.group(2)
|
|
117
|
+
|
|
118
|
+
# %% ../nbs/07_chat.ipynb #215183bf
|
|
119
|
+
@patch(as_prop=True)
|
|
120
|
+
def text(self:Msg): return ''.join(p.text or '' for p in self.content if p.type == PartType.text)
|
|
121
|
+
|
|
122
|
+
# %% ../nbs/07_chat.ipynb #1de7e4d2
|
|
123
|
+
def _mk_result_fence(output): return f"\n{_fence_back}result\n{output}\n{_fence_back}\n"
|
|
124
|
+
|
|
125
|
+
def _split_msg_on_fences(msg):
|
|
126
|
+
"Split an assistant Msg on result fences, return list of Msgs"
|
|
127
|
+
if msg.role != 'assistant': return [msg]
|
|
128
|
+
if not _result_re.search(msg.text): return [msg]
|
|
129
|
+
res, asst_parts, tool_parts = [], [], []
|
|
130
|
+
for msg_part in msg.content:
|
|
131
|
+
if msg_part.type == PartType.thinking: asst_parts.append(msg_part)
|
|
132
|
+
elif msg_part.type == PartType.tool_use: tool_parts.append(msg_part)
|
|
133
|
+
elif parts := _result_re.split(msg_part.text or ''):
|
|
134
|
+
for i,p in enumerate(parts):
|
|
135
|
+
if not p: continue
|
|
136
|
+
if i % 2 == 0: res.append(Msg(role='assistant', content=asst_parts+[Part(type=PartType.text, text=p.strip())]))
|
|
137
|
+
else: res.append(Msg(role='user', content=[Part(type=PartType.text, text=_mk_result_fence(p))]))
|
|
138
|
+
if tool_parts: res.append(Msg(role='assistant', content=tool_parts))
|
|
139
|
+
return res
|
|
140
|
+
|
|
141
|
+
def _split_fence_msgs(msgs):
|
|
142
|
+
"Split all assistant msgs on result fences for wire protocol"
|
|
143
|
+
res = []
|
|
144
|
+
for m in msgs: res.extend(_split_msg_on_fences(m))
|
|
145
|
+
return res
|
|
146
|
+
|
|
147
|
+
# %% ../nbs/07_chat.ipynb #b161ca9e
|
|
148
|
+
def stop_sequences(seqs):
|
|
149
|
+
"Stop when any sequence appears in the accumulated completion text."
|
|
150
|
+
seqs = L(seqs)
|
|
151
|
+
def _stop(text):
|
|
152
|
+
for s in seqs:
|
|
153
|
+
if s in text: return text[:text.find(s)+len(s)]
|
|
154
|
+
return _stop
|
|
155
|
+
|
|
109
156
|
# %% ../nbs/07_chat.ipynb #45ada210
|
|
110
157
|
def _extract_tool_parts(text:str):
|
|
111
158
|
"Extract (tool_use_part, tool_result_part) from <details> json block"
|
|
@@ -122,10 +169,13 @@ def split_tools(s):
|
|
|
122
169
|
"Split formatted output into (text, summary, tooljson) chunks"
|
|
123
170
|
return [(txt,summ,tj) for txt,_,summ,tj in chunked(re_tools.split(s.strip()), 4, pad=True)]
|
|
124
171
|
|
|
172
|
+
# %% ../nbs/07_chat.ipynb #44060a78
|
|
125
173
|
def fmt2hist(outp:str)->list[Msg]:
|
|
126
174
|
"Transform a formatted output string into fastllm canonical Msgs"
|
|
127
175
|
if token_dtls_tag in outp: outp = re_token.sub('', outp)
|
|
128
|
-
if tool_dtls_tag not in outp:
|
|
176
|
+
if tool_dtls_tag not in outp:
|
|
177
|
+
msg = Msg(role='assistant', content=[Part(type=PartType.text, text=outp.strip())])
|
|
178
|
+
return _split_msg_on_fences(msg)
|
|
129
179
|
hist, asst_parts, tool_parts = [], [], []
|
|
130
180
|
def flush():
|
|
131
181
|
if tool_parts:
|
|
@@ -134,17 +184,18 @@ def fmt2hist(outp:str)->list[Msg]:
|
|
|
134
184
|
asst_parts.clear(); tool_parts.clear()
|
|
135
185
|
for txt,_,tj in split_tools(outp):
|
|
136
186
|
if txt and txt.strip():
|
|
137
|
-
if tool_parts: flush()
|
|
187
|
+
if tool_parts: flush()
|
|
138
188
|
asst_parts.append(Part(type=PartType.text, text=txt.strip()))
|
|
139
189
|
if tj and (tp := _extract_tool_parts(tj)):
|
|
140
190
|
asst_parts.append(tp[0])
|
|
141
191
|
tool_parts.append(tp[1])
|
|
142
192
|
flush()
|
|
143
193
|
if asst_parts: hist.append(Msg(role='assistant', content=asst_parts))
|
|
144
|
-
|
|
145
|
-
|
|
146
|
-
|
|
147
|
-
|
|
194
|
+
result = []
|
|
195
|
+
for msg in hist:
|
|
196
|
+
if msg.role == 'assistant': result.extend(_split_msg_on_fences(msg))
|
|
197
|
+
else: result.append(msg)
|
|
198
|
+
return result
|
|
148
199
|
|
|
149
200
|
# %% ../nbs/07_chat.ipynb #8de5ce8d
|
|
150
201
|
def _apply_cache_idxs(msgs, cache_idxs=[-1], ttl=None):
|
|
@@ -321,6 +372,13 @@ def _inject_tool_reminder(msgs, reminder):
|
|
|
321
372
|
msgs[i] = m
|
|
322
373
|
return msgs
|
|
323
374
|
|
|
375
|
+
# %% ../nbs/07_chat.ipynb #e7eb2032
|
|
376
|
+
def _active_fence_langs(tool_schemas):
|
|
377
|
+
"Return set of active fence langs whose mapped tool is registered"
|
|
378
|
+
if not tool_schemas: return set()
|
|
379
|
+
names = {nested_idx(t, 'function', 'name') for t in tool_schemas}
|
|
380
|
+
return {lang for lang, tname in _lang2tool.items() if tname in names}
|
|
381
|
+
|
|
324
382
|
# %% ../nbs/07_chat.ipynb #e9a14051
|
|
325
383
|
class AsyncChat:
|
|
326
384
|
def __init__(
|
|
@@ -364,6 +422,7 @@ class AsyncChat:
|
|
|
364
422
|
self.hist = mk_msgs(self.hist, self.cache and 'claude' in self.model, cache_idxs, self.ttl)
|
|
365
423
|
msgs = self.hist
|
|
366
424
|
if prefill: msgs = self.hist + [Msg(role='assistant', content=[Part(PartType.text, prefill)])]
|
|
425
|
+
msgs = _split_fence_msgs(msgs)
|
|
367
426
|
if self.tool_reminder: msgs = _inject_tool_reminder(msgs, self.tool_reminder)
|
|
368
427
|
if 'deepseek' in self.model:
|
|
369
428
|
# The `reasoning_content` in the thinking mode must be passed back to the API.
|
|
@@ -431,8 +490,12 @@ def _prep_call(self:AsyncChat, prefill, search, max_tokens, kwargs, stream=False
|
|
|
431
490
|
if self.base_url: kwargs['base_url'] = self.base_url
|
|
432
491
|
if self.extra_headers: kwargs['xtra_headers'] = self.extra_headers
|
|
433
492
|
kwargs.update(_think_kw(self.model, think, self.vendor_name))
|
|
493
|
+
if (langs := _active_fence_langs(self.tool_schemas)):
|
|
494
|
+
if not any(isinstance(s, FenceToolStop) for s in kwargs.get('stop_callables', [])):
|
|
495
|
+
kwargs['stop_callables'] = kwargs.get('stop_callables', []) + [FenceToolStop(langs)]
|
|
434
496
|
return prefill, max_tokens
|
|
435
497
|
|
|
498
|
+
|
|
436
499
|
# %% ../nbs/07_chat.ipynb #07951b77
|
|
437
500
|
@patch
|
|
438
501
|
def print_hist(self:AsyncChat):
|
|
@@ -486,16 +549,27 @@ async def _call(self:AsyncChat, msg=None, prefill=None, temp=None, think=None, s
|
|
|
486
549
|
self._track(res)
|
|
487
550
|
yield res
|
|
488
551
|
|
|
552
|
+
toolloop, prompt = False, None
|
|
553
|
+
if (langs := _active_fence_langs(self.tool_schemas)):
|
|
554
|
+
if m := last(self.hist, lambda o: o.role == 'assistant'):
|
|
555
|
+
if fence := extract_fence_call(m.text):
|
|
556
|
+
lang, code = fence
|
|
557
|
+
out = await run_fence_tool(lang, code, self.ns)
|
|
558
|
+
for p in reversed(m.content):
|
|
559
|
+
if p.type == PartType.text: p.text += out; break
|
|
560
|
+
if stream: yield {'text': out}
|
|
561
|
+
toolloop = True
|
|
489
562
|
if stcs:= _srvtools(res.tool_calls):
|
|
490
563
|
for tc in stcs: yield tc
|
|
491
564
|
if tcs := _usrtools(res.tool_calls):
|
|
492
565
|
tres = await parallel_async(_alite_call_func, tcs, timeout=tc_timeout, n_workers=n_workers, pause=pause, **self.tcdict)
|
|
493
566
|
tmsg = mk_tool_res_msg(tcs, tres)
|
|
494
|
-
# TODO: We yield tool calls at the end with their results, fastllm doesn't yield streaming tool calls during streaming as once the collation is done for simplicity, but it can
|
|
495
567
|
for r in tmsg.content: yield r
|
|
496
568
|
self.hist.append(tmsg)
|
|
497
569
|
if step>=max_steps-1 or _has_stop(tmsg.content): prompt,tool_choice,search = mk_msg(final_prompt),'none',False
|
|
498
|
-
|
|
570
|
+
toolloop = True
|
|
571
|
+
|
|
572
|
+
if toolloop and step <= max_steps:
|
|
499
573
|
try:
|
|
500
574
|
async for result in self._call(
|
|
501
575
|
prompt, prefill, temp, think, search, stream, max_steps, step+1,
|
|
@@ -507,6 +581,14 @@ async def _call(self:AsyncChat, msg=None, prefill=None, temp=None, think=None, s
|
|
|
507
581
|
prompt, prefill, temp, think, search, stream, max_steps, step+1,
|
|
508
582
|
final_prompt, tool_choice='none', **kwargs): yield result
|
|
509
583
|
|
|
584
|
+
# %% ../nbs/07_chat.ipynb #4dc002da
|
|
585
|
+
async def run_fence_tool(lang, code, ns):
|
|
586
|
+
"Run the mapped tool for `lang` with the code, return result fence"
|
|
587
|
+
tname = _lang2tool[lang]
|
|
588
|
+
arg = dict(code=code) if lang == 'py' else dict(command=code)
|
|
589
|
+
res = _mk_tool_result(await call_func_async(tname, arg, ns=ns, raise_on_err=False))
|
|
590
|
+
return _mk_result_fence(_trunc_str(str(res)))
|
|
591
|
+
|
|
510
592
|
# %% ../nbs/07_chat.ipynb #1361515a
|
|
511
593
|
@patch
|
|
512
594
|
@delegates(AsyncChat._call)
|
|
@@ -608,15 +690,15 @@ class AsyncStreamFormatter(StreamFormatter):
|
|
|
608
690
|
"Format the response stream for markdown display."
|
|
609
691
|
async for o in rs: yield self.format_item(o)
|
|
610
692
|
|
|
611
|
-
# %% ../nbs/07_chat.ipynb #
|
|
693
|
+
# %% ../nbs/07_chat.ipynb #944bcd25
|
|
612
694
|
@delegates(AsyncStreamFormatter)
|
|
613
695
|
async def adisplay_stream(rs, **kwargs):
|
|
614
696
|
"Use IPython.display to markdown display the response stream."
|
|
615
697
|
try: from IPython.display import display, Markdown
|
|
616
698
|
except ModuleNotFoundError: raise ModuleNotFoundError("This function requires ipython. Please run `pip install ipython` to use.")
|
|
617
699
|
fmt = AsyncStreamFormatter(**kwargs)
|
|
618
|
-
md = ''
|
|
700
|
+
md,h = '',display(Markdown(' '), display_id=True)
|
|
619
701
|
async for o in fmt.format_stream(rs):
|
|
620
|
-
md+=o
|
|
621
|
-
|
|
702
|
+
md += o
|
|
703
|
+
if md: h.update(Markdown(md))
|
|
622
704
|
return fmt
|
|
@@ -3,11 +3,10 @@
|
|
|
3
3
|
# AUTOGENERATED! DO NOT EDIT! File to edit: ../nbs/01_streaming.ipynb.
|
|
4
4
|
|
|
5
5
|
# %% auto #0
|
|
6
|
-
__all__ = ['Delta', 'norm_and_yield', 'PartAccum', '
|
|
7
|
-
'fake_stream']
|
|
6
|
+
__all__ = ['Delta', 'norm_and_yield', 'PartAccum', 'stop_and_trim', 'mk_acollect_stream']
|
|
8
7
|
|
|
9
8
|
# %% ../nbs/01_streaming.ipynb #0df5c926
|
|
10
|
-
import json
|
|
9
|
+
import json,copy
|
|
11
10
|
from dataclasses import dataclass, field, fields
|
|
12
11
|
from fastcore.utils import *
|
|
13
12
|
from fastcore.meta import delegates
|
|
@@ -56,54 +55,44 @@ class PartAccum:
|
|
|
56
55
|
# anthropic citations have matching idx
|
|
57
56
|
self.parts[index].data['citations'].extend(citations or [])
|
|
58
57
|
|
|
59
|
-
def
|
|
60
|
-
|
|
61
|
-
|
|
62
|
-
|
|
63
|
-
|
|
64
|
-
|
|
65
|
-
|
|
58
|
+
def get_merged(self, with_tools=True):
|
|
59
|
+
tmp_parts = copy.deepcopy(self.parts)
|
|
60
|
+
tool_calls = []
|
|
61
|
+
if with_tools:
|
|
62
|
+
for idx,tc in tmp_parts.items():
|
|
63
|
+
if isinstance(tc, ToolCall):
|
|
64
|
+
if isinstance(tc.arguments, str): tc.arguments = json.loads(tc.arguments) if tc.arguments else {}
|
|
65
|
+
tool_calls.append(tc)
|
|
66
|
+
data = {**tc.extra, 'id':tc.id, 'name':tc.name, 'arguments':tc.arguments, 'server':tc.server}
|
|
67
|
+
tmp_parts[idx] = Part(type=PartType.tool_use, data=data)
|
|
66
68
|
|
|
67
69
|
merged = []
|
|
68
|
-
for p in
|
|
70
|
+
for p in tmp_parts.values():
|
|
71
|
+
if isinstance(p, ToolCall) and not with_tools: continue
|
|
69
72
|
if merged and merged[-1].type == p.type and p.type in (PartType.text, PartType.thinking): merged[-1].text += p.text
|
|
70
|
-
else: merged.append(p)
|
|
71
|
-
|
|
72
|
-
|
|
73
|
-
|
|
74
|
-
|
|
75
|
-
"Build a Completion snapshot from in-progress PartAccum state"
|
|
76
|
-
parts = [p for p in pa.parts.values() if isinstance(p, Part)]
|
|
77
|
-
if delta and delta.text:
|
|
78
|
-
parts = parts.copy()
|
|
79
|
-
if parts and parts[-1].type==PartType.text:
|
|
80
|
-
p = parts[-1]
|
|
81
|
-
parts[-1] = Part(type=p.type, text=(p.text or '') + delta.text, data=p.data)
|
|
82
|
-
else: parts.append(Part(type=PartType.text, text=delta.text))
|
|
83
|
-
return Completion(raw.get('model', model), Msg(role="assistant", content=parts),
|
|
84
|
-
fin, usg, api_name=api_name, vendor_name=vendor_name, raw={'deltas':deltas})
|
|
85
|
-
|
|
86
|
-
# %% ../nbs/01_streaming.ipynb #c28f706f
|
|
87
|
-
def completion_text(c):
|
|
88
|
-
"Combined text from a Completion."
|
|
89
|
-
return ''.join(p.text or '' for p in c.message.content if p.type==PartType.text)
|
|
90
|
-
|
|
91
|
-
# %% ../nbs/01_streaming.ipynb #b2b9f7ca
|
|
92
|
-
def stop_sequences(seqs):
|
|
93
|
-
"Stop when any sequence appears in the accumulated completion text."
|
|
94
|
-
seqs = L(seqs)
|
|
95
|
-
def _stop(c):
|
|
96
|
-
txt = completion_text(c)
|
|
97
|
-
for s in seqs:
|
|
98
|
-
if s in txt: return s
|
|
99
|
-
return _stop
|
|
73
|
+
else: merged.append(p)
|
|
74
|
+
return merged, tool_calls
|
|
75
|
+
|
|
76
|
+
def finalize(self):
|
|
77
|
+
self.parts, self.tool_calls = self.get_merged()
|
|
100
78
|
|
|
101
|
-
# %% ../nbs/01_streaming.ipynb #
|
|
102
|
-
def _trim_delta(d,
|
|
79
|
+
# %% ../nbs/01_streaming.ipynb #f11ea80a
|
|
80
|
+
def _trim_delta(d, txt, s):
|
|
103
81
|
"Trim `d.text` so accumulated text in `cur` stops just before stop sequence `s`."
|
|
104
|
-
|
|
105
|
-
|
|
106
|
-
|
|
82
|
+
idx = len(txt) - (txt.find(s) + len(s))
|
|
83
|
+
if idx>0: d.text = d.text[:-idx]
|
|
84
|
+
|
|
85
|
+
# %% ../nbs/01_streaming.ipynb #efbf96d7
|
|
86
|
+
def stop_and_trim(part_accum, d, stop_callables):
|
|
87
|
+
'Stop based on the accumulated text so far, and trim current delta'
|
|
88
|
+
parts,_ = part_accum.get_merged(with_tools=False)
|
|
89
|
+
prev = parts[-1].text if parts and parts[-1].type == PartType.text else ''
|
|
90
|
+
txt = prev + (d.text or '')
|
|
91
|
+
for f in stop_callables:
|
|
92
|
+
if res:=f(txt):
|
|
93
|
+
if isinstance(res, str): _trim_delta(d, txt, res)
|
|
94
|
+
return True
|
|
95
|
+
return False
|
|
107
96
|
|
|
108
97
|
# %% ../nbs/01_streaming.ipynb #fc71790b
|
|
109
98
|
async def mk_acollect_stream(it, index_fn, model=None, api_name=None, vendor_name=None, stop_callables=None):
|
|
@@ -120,18 +109,22 @@ async def mk_acollect_stream(it, index_fn, model=None, api_name=None, vendor_nam
|
|
|
120
109
|
idx = _fidx(d, name, pt)
|
|
121
110
|
part_accum.append(typ, idx, **(ret or {kw: val}))
|
|
122
111
|
return ret or {name: val}
|
|
112
|
+
def _yield_parts(d):
|
|
113
|
+
for args in [('text',), ('thinking',), ('citations', 'text', 'citations')]:
|
|
114
|
+
if (r := _proc(d, args[0], pt=args[1] if len(args)>1 else None, kw=args[2] if len(args)>2 else 'txt')):
|
|
115
|
+
yield r
|
|
116
|
+
stop, stop_yielded = False, False
|
|
123
117
|
async for d in it:
|
|
124
|
-
stop
|
|
125
|
-
|
|
126
|
-
|
|
127
|
-
for
|
|
128
|
-
|
|
129
|
-
|
|
130
|
-
|
|
131
|
-
|
|
132
|
-
|
|
133
|
-
|
|
134
|
-
if (r:=_proc(d, 'citations', pt='text', kw='citations')): yield r
|
|
118
|
+
# Check stop condition and yield stop delta
|
|
119
|
+
stop = stop_and_trim(part_accum, d, stop_callables)
|
|
120
|
+
if stop and not stop_yielded:
|
|
121
|
+
for r in _yield_parts(d): yield r
|
|
122
|
+
stop_yielded = True
|
|
123
|
+
# If stop the remaining deltas are yielded as processing
|
|
124
|
+
if stop: yield {'thinking':'processing'}
|
|
125
|
+
else:
|
|
126
|
+
for r in _yield_parts(d): yield r
|
|
127
|
+
# Rest incl. tools, finish reason, usage is processed independently
|
|
135
128
|
for tc in d.tool_calls:
|
|
136
129
|
args = tc.arguments.get('_delta', tc.arguments)
|
|
137
130
|
_proc(d, 'tool_use', ret=dict(id=tc.id, name=tc.name, arguments=args, server=tc.server, extra=tc.extra))
|
|
@@ -143,20 +136,12 @@ async def mk_acollect_stream(it, index_fn, model=None, api_name=None, vendor_nam
|
|
|
143
136
|
if d.usage: usg = d.usage
|
|
144
137
|
last_typ = typ
|
|
145
138
|
deltas.append(d)
|
|
146
|
-
if stop:
|
|
147
|
-
fin = fin or FinishReason.stop
|
|
148
|
-
await it.aclose()
|
|
149
|
-
break
|
|
150
139
|
part_accum.finalize()
|
|
151
|
-
# need to recheck for tool calls post collation for streaming
|
|
152
140
|
tcs = part_accum.tool_calls
|
|
153
|
-
fin = FinishReason.
|
|
141
|
+
if stop: fin = FinishReason.stop
|
|
142
|
+
fin = FinishReason.tool_calls if fin==FinishReason.stop and any(~L(tcs).attrgot('server')) else fin # recheck tool calls post collation
|
|
154
143
|
# tool calls and non-anthropic citations are yielded at the end
|
|
155
144
|
yield Completion(d.raw.get('model', model),
|
|
156
145
|
message=Msg(role="assistant", content=part_accum.parts),
|
|
157
146
|
finish_reason=fin, usage=usg, tool_calls=tcs, api_name=api_name, vendor_name=vendor_name,
|
|
158
147
|
raw={'deltas':deltas})
|
|
159
|
-
|
|
160
|
-
# %% ../nbs/01_streaming.ipynb #f79d3b99
|
|
161
|
-
async def fake_stream(*ss):
|
|
162
|
-
for s in ss: yield Delta(text=s, raw={'model':'fake'})
|
|
@@ -3,7 +3,8 @@
|
|
|
3
3
|
# AUTOGENERATED! DO NOT EDIT! File to edit: ../nbs/00_types.ipynb.
|
|
4
4
|
|
|
5
5
|
# %% auto #0
|
|
6
|
-
__all__ = ['PartType', 'FinishReason', 'api_registry', 'model_prices_url', '
|
|
6
|
+
__all__ = ['PartType', 'FinishReason', 'api_registry', 'model_prices_url', 'haik45', 'sonn45', 'sonn', 'sonn46', 'opus46', 'opus',
|
|
7
|
+
'gpt54', 'gpt54m', 'codex54', 'codex55', 'codex53spark', 'codex_pricing', 'Part', 'Msg', 'ToolCall',
|
|
7
8
|
'display_list', 'Usage', 'Completion', 'APIRegistry', 'mk_completion', 'mk_tool_res_msg', 'fn_schema',
|
|
8
9
|
'sys_text', 'part_txt', 'data_url', 'url_mime', 'payload_kwargs', 'get_api_key', 'model_prices_meta',
|
|
9
10
|
'infer_api_name', 'get_model_meta', 'get_model_info']
|
|
@@ -151,7 +152,6 @@ class APIRegistry:
|
|
|
151
152
|
api_registry = APIRegistry()
|
|
152
153
|
|
|
153
154
|
# %% ../nbs/00_types.ipynb #d58a5f96
|
|
154
|
-
#COMMON
|
|
155
155
|
def mk_completion(resp, model, api_name, vendor_name):
|
|
156
156
|
"Normalize an api response into Completion."
|
|
157
157
|
api = api_registry.apis[api_name]
|
|
@@ -167,7 +167,6 @@ def mk_completion(resp, model, api_name, vendor_name):
|
|
|
167
167
|
raw=resp)
|
|
168
168
|
|
|
169
169
|
# %% ../nbs/00_types.ipynb #d5322db5
|
|
170
|
-
#COMMON
|
|
171
170
|
def mk_tool_res_msg(tool_calls:list[ToolCall], results:list[str|list]):
|
|
172
171
|
'A util to prepare parallel tool call with str or media list results'
|
|
173
172
|
parts = []
|
|
@@ -177,7 +176,6 @@ def mk_tool_res_msg(tool_calls:list[ToolCall], results:list[str|list]):
|
|
|
177
176
|
return Msg(role="tool", content=parts)
|
|
178
177
|
|
|
179
178
|
# %% ../nbs/00_types.ipynb #8a8e468b
|
|
180
|
-
#COMMON
|
|
181
179
|
def fn_schema(t):
|
|
182
180
|
"Extract (name, description, parameters) from any tool format."
|
|
183
181
|
if not isinstance(t, dict): return None
|
|
@@ -188,7 +186,6 @@ def fn_schema(t):
|
|
|
188
186
|
return None
|
|
189
187
|
|
|
190
188
|
# %% ../nbs/00_types.ipynb #d1d48d91
|
|
191
|
-
#COMMON
|
|
192
189
|
def sys_text(system):
|
|
193
190
|
"Extract text from system (str or Part)."
|
|
194
191
|
if system is None: return None
|
|
@@ -197,7 +194,6 @@ def sys_text(system):
|
|
|
197
194
|
def part_txt(p): return p.text if isinstance(p,Part) else p
|
|
198
195
|
|
|
199
196
|
# %% ../nbs/00_types.ipynb #dc2b75a0
|
|
200
|
-
#COMMON
|
|
201
197
|
_ext_mime = {
|
|
202
198
|
'.jpg':'image/jpeg', '.jpeg':'image/jpeg', '.png':'image/png', '.gif':'image/gif', '.webp':'image/webp',
|
|
203
199
|
'.pdf':'application/pdf',
|
|
@@ -250,7 +246,19 @@ def get_model_meta(model, vendor_name=None, tfm=noop):
|
|
|
250
246
|
elif vendor_name: key = f"{vendor_name}/{model}"
|
|
251
247
|
return dict2obj(tfm(mp.get(key), model, vendor_name))
|
|
252
248
|
|
|
253
|
-
# %% ../nbs/00_types.ipynb #
|
|
249
|
+
# %% ../nbs/00_types.ipynb #60607e23
|
|
250
|
+
haik45 = "claude-haiku-4-5"
|
|
251
|
+
sonn45 = "claude-sonnet-4-5"
|
|
252
|
+
sonn = sonn46 = "claude-sonnet-4-6"
|
|
253
|
+
opus46 = "claude-opus-4-6"
|
|
254
|
+
opus = "claude-opus-4-7"
|
|
255
|
+
gpt54 = "gpt-5.4"
|
|
256
|
+
gpt54m = "gpt-5.4-mini"
|
|
257
|
+
codex54 = "gpt-5.4"
|
|
258
|
+
codex55 = "gpt-5.5"
|
|
259
|
+
codex53spark = "gpt-5.3-codex-spark"
|
|
260
|
+
|
|
261
|
+
# %% ../nbs/00_types.ipynb #d6d5b98c
|
|
254
262
|
codex_pricing = {
|
|
255
263
|
"input_cost_per_token": 0.10 / 1_000_000,
|
|
256
264
|
"cache_creation_input_token_cost": 0.10 / 1_000_000,
|
|
@@ -258,23 +266,31 @@ codex_pricing = {
|
|
|
258
266
|
"output_cost_per_token": 0.50 / 1_000_000,
|
|
259
267
|
}
|
|
260
268
|
|
|
269
|
+
_codex_overrides = {
|
|
270
|
+
codex53spark: dict(
|
|
271
|
+
supports_vision=False, supports_image_input=False, supports_web_search=True, supports_reasoning=True,
|
|
272
|
+
max_tokens=128000, max_input_tokens=128000, max_output_tokens=128000)
|
|
273
|
+
}
|
|
274
|
+
|
|
275
|
+
# %% ../nbs/00_types.ipynb #fbfdeb0a
|
|
261
276
|
def get_model_info(mn, vendor_name=None):
|
|
262
|
-
info = get_model_meta(mn, vendor_name)
|
|
277
|
+
info = get_model_meta(mn, 'chatgpt' if vendor_name=='codex' else vendor_name)
|
|
263
278
|
# anthropic web search
|
|
264
|
-
if 'search_context_cost_per_query' in info:
|
|
265
|
-
|
|
266
|
-
# add reasoning to kimi
|
|
279
|
+
if 'search_context_cost_per_query' in info: info['supports_web_search'] = True
|
|
280
|
+
# kimi
|
|
267
281
|
if 'kimi' in mn:
|
|
268
282
|
if 'k2p6' in mn: info = get_model_meta(mn.replace('k2p6', 'k2p5'), vendor_name)
|
|
269
283
|
info['supports_reasoning'] = True
|
|
270
284
|
info['supports_vision'] = True
|
|
271
285
|
if vendor_name == 'moonshot': info['supports_assistant_prefill'] = True
|
|
272
|
-
#
|
|
286
|
+
# gpt web search
|
|
273
287
|
if mn in ("gpt-5.4", "gpt-5.4-mini"):
|
|
274
288
|
info['supports_web_search'] = True
|
|
275
289
|
info.pop('mode', None)
|
|
276
|
-
# codex
|
|
277
|
-
if vendor_name == 'codex':
|
|
290
|
+
# codex updates
|
|
291
|
+
if vendor_name == 'codex':
|
|
292
|
+
info = merge(info, codex_pricing)
|
|
293
|
+
info |= _codex_overrides.get(mn, {})
|
|
278
294
|
# deepseek v4
|
|
279
295
|
if vendor_name == 'deepseek' and mn in ("deepseek-v4-flash", "deepseek-v4-pro"):
|
|
280
296
|
info = dict(get_model_meta("deepseek/deepseek-v3.2"))
|
|
@@ -1 +0,0 @@
|
|
|
1
|
-
__version__ = "0.0.2"
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|