python-fastllm 0.0.3__tar.gz → 0.0.4__tar.gz
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- {python_fastllm-0.0.3 → python_fastllm-0.0.4}/PKG-INFO +1 -1
- python_fastllm-0.0.4/fastllm/__init__.py +1 -0
- {python_fastllm-0.0.3 → python_fastllm-0.0.4}/fastllm/_modidx.py +14 -4
- {python_fastllm-0.0.3 → python_fastllm-0.0.4}/fastllm/acomplete.py +1 -3
- {python_fastllm-0.0.3 → python_fastllm-0.0.4}/fastllm/chat.py +110 -16
- {python_fastllm-0.0.3 → python_fastllm-0.0.4}/fastllm/streaming.py +52 -67
- {python_fastllm-0.0.3 → python_fastllm-0.0.4}/fastllm/types.py +3 -3
- {python_fastllm-0.0.3 → python_fastllm-0.0.4}/python_fastllm.egg-info/PKG-INFO +1 -1
- python_fastllm-0.0.3/fastllm/__init__.py +0 -1
- {python_fastllm-0.0.3 → python_fastllm-0.0.4}/README.md +0 -0
- {python_fastllm-0.0.3 → python_fastllm-0.0.4}/fastllm/anthropic.py +0 -0
- {python_fastllm-0.0.3 → python_fastllm-0.0.4}/fastllm/gemini.py +0 -0
- {python_fastllm-0.0.3 → python_fastllm-0.0.4}/fastllm/openai_chat.py +0 -0
- {python_fastllm-0.0.3 → python_fastllm-0.0.4}/fastllm/openai_responses.py +0 -0
- {python_fastllm-0.0.3 → python_fastllm-0.0.4}/fastllm/specs/anthropic.json +0 -0
- {python_fastllm-0.0.3 → python_fastllm-0.0.4}/fastllm/specs/anthropic.yml +0 -0
- {python_fastllm-0.0.3 → python_fastllm-0.0.4}/fastllm/specs/gemini.json +0 -0
- {python_fastllm-0.0.3 → python_fastllm-0.0.4}/fastllm/specs/openai.with-code-samples.json +0 -0
- {python_fastllm-0.0.3 → python_fastllm-0.0.4}/fastllm/specs/openai.with-code-samples.yml +0 -0
- {python_fastllm-0.0.3 → python_fastllm-0.0.4}/fastllm/specs/spec_manifest.json +0 -0
- {python_fastllm-0.0.3 → python_fastllm-0.0.4}/pyproject.toml +0 -0
- {python_fastllm-0.0.3 → python_fastllm-0.0.4}/python_fastllm.egg-info/SOURCES.txt +0 -0
- {python_fastllm-0.0.3 → python_fastllm-0.0.4}/python_fastllm.egg-info/dependency_links.txt +0 -0
- {python_fastllm-0.0.3 → python_fastllm-0.0.4}/python_fastllm.egg-info/entry_points.txt +0 -0
- {python_fastllm-0.0.3 → python_fastllm-0.0.4}/python_fastllm.egg-info/requires.txt +0 -0
- {python_fastllm-0.0.3 → python_fastllm-0.0.4}/python_fastllm.egg-info/top_level.txt +0 -0
- {python_fastllm-0.0.3 → python_fastllm-0.0.4}/setup.cfg +0 -0
|
@@ -0,0 +1 @@
|
|
|
1
|
+
__version__ = "0.0.4"
|
|
@@ -51,7 +51,11 @@ d = { 'settings': { 'branch': 'main',
|
|
|
51
51
|
'fastllm.chat.AsyncStreamFormatter': ('chat.html#asyncstreamformatter', 'fastllm/chat.py'),
|
|
52
52
|
'fastllm.chat.AsyncStreamFormatter.format_stream': ( 'chat.html#asyncstreamformatter.format_stream',
|
|
53
53
|
'fastllm/chat.py'),
|
|
54
|
+
'fastllm.chat.FenceToolStop': ('chat.html#fencetoolstop', 'fastllm/chat.py'),
|
|
55
|
+
'fastllm.chat.FenceToolStop.__call__': ('chat.html#fencetoolstop.__call__', 'fastllm/chat.py'),
|
|
56
|
+
'fastllm.chat.FenceToolStop.__init__': ('chat.html#fencetoolstop.__init__', 'fastllm/chat.py'),
|
|
54
57
|
'fastllm.chat.FullResponse': ('chat.html#fullresponse', 'fastllm/chat.py'),
|
|
58
|
+
'fastllm.chat.Msg.text': ('chat.html#msg.text', 'fastllm/chat.py'),
|
|
55
59
|
'fastllm.chat.StopResponse': ('chat.html#stopresponse', 'fastllm/chat.py'),
|
|
56
60
|
'fastllm.chat.StreamFormatter': ('chat.html#streamformatter', 'fastllm/chat.py'),
|
|
57
61
|
'fastllm.chat.StreamFormatter.__init__': ('chat.html#streamformatter.__init__', 'fastllm/chat.py'),
|
|
@@ -65,6 +69,7 @@ d = { 'settings': { 'branch': 'main',
|
|
|
65
69
|
'fastllm.chat.UsageStats.__repr__': ('chat.html#usagestats.__repr__', 'fastllm/chat.py'),
|
|
66
70
|
'fastllm.chat.UsageStats.fmt': ('chat.html#usagestats.fmt', 'fastllm/chat.py'),
|
|
67
71
|
'fastllm.chat.UsageStats.from_response': ('chat.html#usagestats.from_response', 'fastllm/chat.py'),
|
|
72
|
+
'fastllm.chat._active_fence_langs': ('chat.html#_active_fence_langs', 'fastllm/chat.py'),
|
|
68
73
|
'fastllm.chat._add_cache_control': ('chat.html#_add_cache_control', 'fastllm/chat.py'),
|
|
69
74
|
'fastllm.chat._alite_call_func': ('chat.html#_alite_call_func', 'fastllm/chat.py'),
|
|
70
75
|
'fastllm.chat._apply_cache_idxs': ('chat.html#_apply_cache_idxs', 'fastllm/chat.py'),
|
|
@@ -79,7 +84,10 @@ d = { 'settings': { 'branch': 'main',
|
|
|
79
84
|
'fastllm.chat._lite_call_func': ('chat.html#_lite_call_func', 'fastllm/chat.py'),
|
|
80
85
|
'fastllm.chat._mk_content': ('chat.html#_mk_content', 'fastllm/chat.py'),
|
|
81
86
|
'fastllm.chat._mk_prefill': ('chat.html#_mk_prefill', 'fastllm/chat.py'),
|
|
87
|
+
'fastllm.chat._mk_result_fence': ('chat.html#_mk_result_fence', 'fastllm/chat.py'),
|
|
82
88
|
'fastllm.chat._mk_tool_result': ('chat.html#_mk_tool_result', 'fastllm/chat.py'),
|
|
89
|
+
'fastllm.chat._split_fence_msgs': ('chat.html#_split_fence_msgs', 'fastllm/chat.py'),
|
|
90
|
+
'fastllm.chat._split_msg_on_fences': ('chat.html#_split_msg_on_fences', 'fastllm/chat.py'),
|
|
83
91
|
'fastllm.chat._srv_tc_summary': ('chat.html#_srv_tc_summary', 'fastllm/chat.py'),
|
|
84
92
|
'fastllm.chat._srvtools': ('chat.html#_srvtools', 'fastllm/chat.py'),
|
|
85
93
|
'fastllm.chat._tc_summary': ('chat.html#_tc_summary', 'fastllm/chat.py'),
|
|
@@ -93,6 +101,7 @@ d = { 'settings': { 'branch': 'main',
|
|
|
93
101
|
'fastllm.chat.astream_with_complete': ('chat.html#astream_with_complete', 'fastllm/chat.py'),
|
|
94
102
|
'fastllm.chat.cite_footnote': ('chat.html#cite_footnote', 'fastllm/chat.py'),
|
|
95
103
|
'fastllm.chat.contents': ('chat.html#contents', 'fastllm/chat.py'),
|
|
104
|
+
'fastllm.chat.extract_fence_call': ('chat.html#extract_fence_call', 'fastllm/chat.py'),
|
|
96
105
|
'fastllm.chat.fmt2hist': ('chat.html#fmt2hist', 'fastllm/chat.py'),
|
|
97
106
|
'fastllm.chat.lite_mk_func': ('chat.html#lite_mk_func', 'fastllm/chat.py'),
|
|
98
107
|
'fastllm.chat.mk_msg': ('chat.html#mk_msg', 'fastllm/chat.py'),
|
|
@@ -101,9 +110,11 @@ d = { 'settings': { 'branch': 'main',
|
|
|
101
110
|
'fastllm.chat.mk_tr_details': ('chat.html#mk_tr_details', 'fastllm/chat.py'),
|
|
102
111
|
'fastllm.chat.postproc': ('chat.html#postproc', 'fastllm/chat.py'),
|
|
103
112
|
'fastllm.chat.remove_cache_ckpts': ('chat.html#remove_cache_ckpts', 'fastllm/chat.py'),
|
|
113
|
+
'fastllm.chat.run_fence_tool': ('chat.html#run_fence_tool', 'fastllm/chat.py'),
|
|
104
114
|
'fastllm.chat.search_count': ('chat.html#search_count', 'fastllm/chat.py'),
|
|
105
115
|
'fastllm.chat.split_tools': ('chat.html#split_tools', 'fastllm/chat.py'),
|
|
106
116
|
'fastllm.chat.stop_reason': ('chat.html#stop_reason', 'fastllm/chat.py'),
|
|
117
|
+
'fastllm.chat.stop_sequences': ('chat.html#stop_sequences', 'fastllm/chat.py'),
|
|
107
118
|
'fastllm.chat.structured': ('chat.html#structured', 'fastllm/chat.py')},
|
|
108
119
|
'fastllm.gemini': { 'fastllm.gemini._gem_filter_sch': ('gemini.html#_gem_filter_sch', 'fastllm/gemini.py'),
|
|
109
120
|
'fastllm.gemini._gem_part_type': ('gemini.html#_gem_part_type', 'fastllm/gemini.py'),
|
|
@@ -208,13 +219,12 @@ d = { 'settings': { 'branch': 'main',
|
|
|
208
219
|
'fastllm.streaming.PartAccum': ('streaming.html#partaccum', 'fastllm/streaming.py'),
|
|
209
220
|
'fastllm.streaming.PartAccum.append': ('streaming.html#partaccum.append', 'fastllm/streaming.py'),
|
|
210
221
|
'fastllm.streaming.PartAccum.finalize': ('streaming.html#partaccum.finalize', 'fastllm/streaming.py'),
|
|
222
|
+
'fastllm.streaming.PartAccum.get_merged': ( 'streaming.html#partaccum.get_merged',
|
|
223
|
+
'fastllm/streaming.py'),
|
|
211
224
|
'fastllm.streaming._trim_delta': ('streaming.html#_trim_delta', 'fastllm/streaming.py'),
|
|
212
|
-
'fastllm.streaming.accum_completion': ('streaming.html#accum_completion', 'fastllm/streaming.py'),
|
|
213
|
-
'fastllm.streaming.completion_text': ('streaming.html#completion_text', 'fastllm/streaming.py'),
|
|
214
|
-
'fastllm.streaming.fake_stream': ('streaming.html#fake_stream', 'fastllm/streaming.py'),
|
|
215
225
|
'fastllm.streaming.mk_acollect_stream': ('streaming.html#mk_acollect_stream', 'fastllm/streaming.py'),
|
|
216
226
|
'fastllm.streaming.norm_and_yield': ('streaming.html#norm_and_yield', 'fastllm/streaming.py'),
|
|
217
|
-
'fastllm.streaming.
|
|
227
|
+
'fastllm.streaming.stop_and_trim': ('streaming.html#stop_and_trim', 'fastllm/streaming.py')},
|
|
218
228
|
'fastllm.types': { 'fastllm.types.APIRegistry': ('types.html#apiregistry', 'fastllm/types.py'),
|
|
219
229
|
'fastllm.types.APIRegistry.__init__': ('types.html#apiregistry.__init__', 'fastllm/types.py'),
|
|
220
230
|
'fastllm.types.APIRegistry.register': ('types.html#apiregistry.register', 'fastllm/types.py'),
|
|
@@ -18,7 +18,6 @@ from fastspec.errors import APIError
|
|
|
18
18
|
from .types import *
|
|
19
19
|
from .streaming import *
|
|
20
20
|
from .openai_responses import *
|
|
21
|
-
from .streaming import stop_sequences as _stop_sequences
|
|
22
21
|
from .openai_chat import *
|
|
23
22
|
from .anthropic import *
|
|
24
23
|
from .gemini import *
|
|
@@ -30,7 +29,7 @@ oai_spec = SpecParser.from_openapi(dict2obj(json.loads((specs_path/'openai.with
|
|
|
30
29
|
gem_spec = SpecParser.from_discovery(dict2obj(json.loads((specs_path/'gemini.json').read_text())))
|
|
31
30
|
|
|
32
31
|
# %% ../nbs/06_acomplete.ipynb #32ee2546
|
|
33
|
-
_codex_json = '~/.codex/auth.json', 'tokens','access_token'
|
|
32
|
+
_codex_json = '~/.codex/auth.json', ('tokens','access_token')
|
|
34
33
|
vendor_mapping = {
|
|
35
34
|
"openai": ('openai', 'https://api.openai.com/v1', 'OPENAI_API_KEY'),
|
|
36
35
|
"anthropic": ('anthropic', 'https://api.anthropic.com', 'ANTHROPIC_API_KEY'),
|
|
@@ -106,7 +105,6 @@ async def acomplete(msgs, model, api_name=None, vendor_name=None, api_key=None,
|
|
|
106
105
|
"Unified completion across different APIs."
|
|
107
106
|
cli, api_name, vendor_name = mk_client(model, vendor_name, api_name, api_key, base_url, xtra_hdrs)
|
|
108
107
|
api = api_registry.apis[api_name]
|
|
109
|
-
if stop_sequences: stop_callables = L(stop_callables) + [_stop_sequences(stop_sequences)]
|
|
110
108
|
payload = api.mk_payload(msgs, model, stream=stream, stop_callables=stop_callables, **kwargs)
|
|
111
109
|
payload = merge(payload, ifnone(xtra_body, {}))
|
|
112
110
|
if vendor_name == 'codex':
|
|
@@ -4,10 +4,10 @@
|
|
|
4
4
|
|
|
5
5
|
# %% auto #0
|
|
6
6
|
__all__ = ['tool_dtls_tag', 're_tools', 'token_dtls_tag', 're_token', 'effort', 'remove_cache_ckpts', 'contents', 'stop_reason',
|
|
7
|
-
'mk_msg', '
|
|
8
|
-
'
|
|
9
|
-
'
|
|
10
|
-
'adisplay_stream']
|
|
7
|
+
'mk_msg', 'FenceToolStop', 'extract_fence_call', 'stop_sequences', 'split_tools', 'fmt2hist', 'mk_msgs',
|
|
8
|
+
'cite_footnote', 'postproc', 'lite_mk_func', 'ToolResponse', 'structured', 'StopResponse', 'FullResponse',
|
|
9
|
+
'search_count', 'UsageStats', 'AsyncChat', 'add_warning', 'astream_with_complete', 'run_fence_tool',
|
|
10
|
+
'mk_tr_details', 'mk_srv_tc_details', 'StreamFormatter', 'AsyncStreamFormatter', 'adisplay_stream']
|
|
11
11
|
|
|
12
12
|
# %% ../nbs/07_chat.ipynb #d5a3bc1f
|
|
13
13
|
import asyncio, base64, json, mimetypes, random, string, ast, warnings
|
|
@@ -94,6 +94,65 @@ token_dtls_tag = "<details class='token-usage-details'>"
|
|
|
94
94
|
re_token = re.compile(fr"^{re.escape(token_dtls_tag)}<summary>.*?</summary>\n*\n*`.*?`\n*\n*</details>\n?",
|
|
95
95
|
flags=re.DOTALL|re.MULTILINE)
|
|
96
96
|
|
|
97
|
+
# %% ../nbs/07_chat.ipynb #be998131
|
|
98
|
+
_fence_back = '`````'
|
|
99
|
+
_fence_re = re.compile(f'^{_fence_back}(py|bash)\n(.*?)\n{_fence_back}', re.DOTALL | re.MULTILINE)
|
|
100
|
+
_result_re = re.compile(f'\n{_fence_back}result\n(.*?)\n{_fence_back}\n', re.DOTALL)
|
|
101
|
+
_lang2tool = dict(py='python', bash='bash')
|
|
102
|
+
|
|
103
|
+
class FenceToolStop:
|
|
104
|
+
def __init__(self, langs): self.langs = langs
|
|
105
|
+
def __call__(self, text):
|
|
106
|
+
"Return trim result if complete fence detected in active lang"
|
|
107
|
+
m = _fence_re.search(text)
|
|
108
|
+
if m and m.group(1) in self.langs: return m.group(0)
|
|
109
|
+
|
|
110
|
+
# %% ../nbs/07_chat.ipynb #e6360e96
|
|
111
|
+
def extract_fence_call(text):
|
|
112
|
+
"Return (lang, code) if text ends with terminated py/bash fence, else None"
|
|
113
|
+
ms = list(_fence_re.finditer(text))
|
|
114
|
+
if not ms: return None
|
|
115
|
+
m = ms[-1]
|
|
116
|
+
if not text[m.end():].strip(): return m.group(1), m.group(2)
|
|
117
|
+
|
|
118
|
+
# %% ../nbs/07_chat.ipynb #215183bf
|
|
119
|
+
@patch(as_prop=True)
|
|
120
|
+
def text(self:Msg): return ''.join(p.text or '' for p in self.content if p.type == PartType.text)
|
|
121
|
+
|
|
122
|
+
# %% ../nbs/07_chat.ipynb #1de7e4d2
|
|
123
|
+
def _mk_result_fence(output): return f"\n{_fence_back}result\n{output}\n{_fence_back}\n"
|
|
124
|
+
|
|
125
|
+
def _split_msg_on_fences(msg):
|
|
126
|
+
"Split an assistant Msg on result fences, return list of Msgs"
|
|
127
|
+
if msg.role != 'assistant': return [msg]
|
|
128
|
+
if not _result_re.search(msg.text): return [msg]
|
|
129
|
+
res, asst_parts, tool_parts = [], [], []
|
|
130
|
+
for msg_part in msg.content:
|
|
131
|
+
if msg_part.type == PartType.thinking: asst_parts.append(msg_part)
|
|
132
|
+
elif msg_part.type == PartType.tool_use: tool_parts.append(msg_part)
|
|
133
|
+
elif parts := _result_re.split(msg_part.text or ''):
|
|
134
|
+
for i,p in enumerate(parts):
|
|
135
|
+
if not p: continue
|
|
136
|
+
if i % 2 == 0: res.append(Msg(role='assistant', content=asst_parts+[Part(type=PartType.text, text=p.strip())]))
|
|
137
|
+
else: res.append(Msg(role='user', content=[Part(type=PartType.text, text=_mk_result_fence(p))]))
|
|
138
|
+
if tool_parts: res.append(Msg(role='assistant', content=tool_parts))
|
|
139
|
+
return res
|
|
140
|
+
|
|
141
|
+
def _split_fence_msgs(msgs):
|
|
142
|
+
"Split all assistant msgs on result fences for wire protocol"
|
|
143
|
+
res = []
|
|
144
|
+
for m in msgs: res.extend(_split_msg_on_fences(m))
|
|
145
|
+
return res
|
|
146
|
+
|
|
147
|
+
# %% ../nbs/07_chat.ipynb #b161ca9e
|
|
148
|
+
def stop_sequences(seqs):
|
|
149
|
+
"Stop when any sequence appears in the accumulated completion text."
|
|
150
|
+
seqs = L(seqs)
|
|
151
|
+
def _stop(text):
|
|
152
|
+
for s in seqs:
|
|
153
|
+
if s in text: return text[:text.find(s)+len(s)]
|
|
154
|
+
return _stop
|
|
155
|
+
|
|
97
156
|
# %% ../nbs/07_chat.ipynb #45ada210
|
|
98
157
|
def _extract_tool_parts(text:str):
|
|
99
158
|
"Extract (tool_use_part, tool_result_part) from <details> json block"
|
|
@@ -110,10 +169,13 @@ def split_tools(s):
|
|
|
110
169
|
"Split formatted output into (text, summary, tooljson) chunks"
|
|
111
170
|
return [(txt,summ,tj) for txt,_,summ,tj in chunked(re_tools.split(s.strip()), 4, pad=True)]
|
|
112
171
|
|
|
172
|
+
# %% ../nbs/07_chat.ipynb #44060a78
|
|
113
173
|
def fmt2hist(outp:str)->list[Msg]:
|
|
114
174
|
"Transform a formatted output string into fastllm canonical Msgs"
|
|
115
175
|
if token_dtls_tag in outp: outp = re_token.sub('', outp)
|
|
116
|
-
if tool_dtls_tag not in outp:
|
|
176
|
+
if tool_dtls_tag not in outp:
|
|
177
|
+
msg = Msg(role='assistant', content=[Part(type=PartType.text, text=outp.strip())])
|
|
178
|
+
return _split_msg_on_fences(msg)
|
|
117
179
|
hist, asst_parts, tool_parts = [], [], []
|
|
118
180
|
def flush():
|
|
119
181
|
if tool_parts:
|
|
@@ -122,17 +184,18 @@ def fmt2hist(outp:str)->list[Msg]:
|
|
|
122
184
|
asst_parts.clear(); tool_parts.clear()
|
|
123
185
|
for txt,_,tj in split_tools(outp):
|
|
124
186
|
if txt and txt.strip():
|
|
125
|
-
if tool_parts: flush()
|
|
187
|
+
if tool_parts: flush()
|
|
126
188
|
asst_parts.append(Part(type=PartType.text, text=txt.strip()))
|
|
127
189
|
if tj and (tp := _extract_tool_parts(tj)):
|
|
128
190
|
asst_parts.append(tp[0])
|
|
129
191
|
tool_parts.append(tp[1])
|
|
130
192
|
flush()
|
|
131
193
|
if asst_parts: hist.append(Msg(role='assistant', content=asst_parts))
|
|
132
|
-
|
|
133
|
-
|
|
134
|
-
|
|
135
|
-
|
|
194
|
+
result = []
|
|
195
|
+
for msg in hist:
|
|
196
|
+
if msg.role == 'assistant': result.extend(_split_msg_on_fences(msg))
|
|
197
|
+
else: result.append(msg)
|
|
198
|
+
return result
|
|
136
199
|
|
|
137
200
|
# %% ../nbs/07_chat.ipynb #8de5ce8d
|
|
138
201
|
def _apply_cache_idxs(msgs, cache_idxs=[-1], ttl=None):
|
|
@@ -309,6 +372,13 @@ def _inject_tool_reminder(msgs, reminder):
|
|
|
309
372
|
msgs[i] = m
|
|
310
373
|
return msgs
|
|
311
374
|
|
|
375
|
+
# %% ../nbs/07_chat.ipynb #e7eb2032
|
|
376
|
+
def _active_fence_langs(tool_schemas):
|
|
377
|
+
"Return set of active fence langs whose mapped tool is registered"
|
|
378
|
+
if not tool_schemas: return set()
|
|
379
|
+
names = {nested_idx(t, 'function', 'name') for t in tool_schemas}
|
|
380
|
+
return {lang for lang, tname in _lang2tool.items() if tname in names}
|
|
381
|
+
|
|
312
382
|
# %% ../nbs/07_chat.ipynb #e9a14051
|
|
313
383
|
class AsyncChat:
|
|
314
384
|
def __init__(
|
|
@@ -352,6 +422,7 @@ class AsyncChat:
|
|
|
352
422
|
self.hist = mk_msgs(self.hist, self.cache and 'claude' in self.model, cache_idxs, self.ttl)
|
|
353
423
|
msgs = self.hist
|
|
354
424
|
if prefill: msgs = self.hist + [Msg(role='assistant', content=[Part(PartType.text, prefill)])]
|
|
425
|
+
msgs = _split_fence_msgs(msgs)
|
|
355
426
|
if self.tool_reminder: msgs = _inject_tool_reminder(msgs, self.tool_reminder)
|
|
356
427
|
if 'deepseek' in self.model:
|
|
357
428
|
# The `reasoning_content` in the thinking mode must be passed back to the API.
|
|
@@ -419,8 +490,12 @@ def _prep_call(self:AsyncChat, prefill, search, max_tokens, kwargs, stream=False
|
|
|
419
490
|
if self.base_url: kwargs['base_url'] = self.base_url
|
|
420
491
|
if self.extra_headers: kwargs['xtra_headers'] = self.extra_headers
|
|
421
492
|
kwargs.update(_think_kw(self.model, think, self.vendor_name))
|
|
493
|
+
if (langs := _active_fence_langs(self.tool_schemas)):
|
|
494
|
+
if not any(isinstance(s, FenceToolStop) for s in kwargs.get('stop_callables', [])):
|
|
495
|
+
kwargs['stop_callables'] = kwargs.get('stop_callables', []) + [FenceToolStop(langs)]
|
|
422
496
|
return prefill, max_tokens
|
|
423
497
|
|
|
498
|
+
|
|
424
499
|
# %% ../nbs/07_chat.ipynb #07951b77
|
|
425
500
|
@patch
|
|
426
501
|
def print_hist(self:AsyncChat):
|
|
@@ -474,16 +549,27 @@ async def _call(self:AsyncChat, msg=None, prefill=None, temp=None, think=None, s
|
|
|
474
549
|
self._track(res)
|
|
475
550
|
yield res
|
|
476
551
|
|
|
552
|
+
toolloop, prompt = False, None
|
|
553
|
+
if (langs := _active_fence_langs(self.tool_schemas)):
|
|
554
|
+
if m := last(self.hist, lambda o: o.role == 'assistant'):
|
|
555
|
+
if fence := extract_fence_call(m.text):
|
|
556
|
+
lang, code = fence
|
|
557
|
+
out = await run_fence_tool(lang, code, self.ns)
|
|
558
|
+
for p in reversed(m.content):
|
|
559
|
+
if p.type == PartType.text: p.text += out; break
|
|
560
|
+
if stream: yield {'text': out}
|
|
561
|
+
toolloop = True
|
|
477
562
|
if stcs:= _srvtools(res.tool_calls):
|
|
478
563
|
for tc in stcs: yield tc
|
|
479
564
|
if tcs := _usrtools(res.tool_calls):
|
|
480
565
|
tres = await parallel_async(_alite_call_func, tcs, timeout=tc_timeout, n_workers=n_workers, pause=pause, **self.tcdict)
|
|
481
566
|
tmsg = mk_tool_res_msg(tcs, tres)
|
|
482
|
-
# TODO: We yield tool calls at the end with their results, fastllm doesn't yield streaming tool calls during streaming as once the collation is done for simplicity, but it can
|
|
483
567
|
for r in tmsg.content: yield r
|
|
484
568
|
self.hist.append(tmsg)
|
|
485
569
|
if step>=max_steps-1 or _has_stop(tmsg.content): prompt,tool_choice,search = mk_msg(final_prompt),'none',False
|
|
486
|
-
|
|
570
|
+
toolloop = True
|
|
571
|
+
|
|
572
|
+
if toolloop and step <= max_steps:
|
|
487
573
|
try:
|
|
488
574
|
async for result in self._call(
|
|
489
575
|
prompt, prefill, temp, think, search, stream, max_steps, step+1,
|
|
@@ -495,6 +581,14 @@ async def _call(self:AsyncChat, msg=None, prefill=None, temp=None, think=None, s
|
|
|
495
581
|
prompt, prefill, temp, think, search, stream, max_steps, step+1,
|
|
496
582
|
final_prompt, tool_choice='none', **kwargs): yield result
|
|
497
583
|
|
|
584
|
+
# %% ../nbs/07_chat.ipynb #4dc002da
|
|
585
|
+
async def run_fence_tool(lang, code, ns):
|
|
586
|
+
"Run the mapped tool for `lang` with the code, return result fence"
|
|
587
|
+
tname = _lang2tool[lang]
|
|
588
|
+
arg = dict(code=code) if lang == 'py' else dict(command=code)
|
|
589
|
+
res = _mk_tool_result(await call_func_async(tname, arg, ns=ns, raise_on_err=False))
|
|
590
|
+
return _mk_result_fence(_trunc_str(str(res)))
|
|
591
|
+
|
|
498
592
|
# %% ../nbs/07_chat.ipynb #1361515a
|
|
499
593
|
@patch
|
|
500
594
|
@delegates(AsyncChat._call)
|
|
@@ -596,15 +690,15 @@ class AsyncStreamFormatter(StreamFormatter):
|
|
|
596
690
|
"Format the response stream for markdown display."
|
|
597
691
|
async for o in rs: yield self.format_item(o)
|
|
598
692
|
|
|
599
|
-
# %% ../nbs/07_chat.ipynb #
|
|
693
|
+
# %% ../nbs/07_chat.ipynb #944bcd25
|
|
600
694
|
@delegates(AsyncStreamFormatter)
|
|
601
695
|
async def adisplay_stream(rs, **kwargs):
|
|
602
696
|
"Use IPython.display to markdown display the response stream."
|
|
603
697
|
try: from IPython.display import display, Markdown
|
|
604
698
|
except ModuleNotFoundError: raise ModuleNotFoundError("This function requires ipython. Please run `pip install ipython` to use.")
|
|
605
699
|
fmt = AsyncStreamFormatter(**kwargs)
|
|
606
|
-
md = ''
|
|
700
|
+
md,h = '',display(Markdown(' '), display_id=True)
|
|
607
701
|
async for o in fmt.format_stream(rs):
|
|
608
|
-
md+=o
|
|
609
|
-
|
|
702
|
+
md += o
|
|
703
|
+
if md: h.update(Markdown(md))
|
|
610
704
|
return fmt
|
|
@@ -3,11 +3,10 @@
|
|
|
3
3
|
# AUTOGENERATED! DO NOT EDIT! File to edit: ../nbs/01_streaming.ipynb.
|
|
4
4
|
|
|
5
5
|
# %% auto #0
|
|
6
|
-
__all__ = ['Delta', 'norm_and_yield', 'PartAccum', '
|
|
7
|
-
'fake_stream']
|
|
6
|
+
__all__ = ['Delta', 'norm_and_yield', 'PartAccum', 'stop_and_trim', 'mk_acollect_stream']
|
|
8
7
|
|
|
9
8
|
# %% ../nbs/01_streaming.ipynb #0df5c926
|
|
10
|
-
import json
|
|
9
|
+
import json,copy
|
|
11
10
|
from dataclasses import dataclass, field, fields
|
|
12
11
|
from fastcore.utils import *
|
|
13
12
|
from fastcore.meta import delegates
|
|
@@ -56,54 +55,44 @@ class PartAccum:
|
|
|
56
55
|
# anthropic citations have matching idx
|
|
57
56
|
self.parts[index].data['citations'].extend(citations or [])
|
|
58
57
|
|
|
59
|
-
def
|
|
60
|
-
|
|
61
|
-
|
|
62
|
-
|
|
63
|
-
|
|
64
|
-
|
|
65
|
-
|
|
58
|
+
def get_merged(self, with_tools=True):
|
|
59
|
+
tmp_parts = copy.deepcopy(self.parts)
|
|
60
|
+
tool_calls = []
|
|
61
|
+
if with_tools:
|
|
62
|
+
for idx,tc in tmp_parts.items():
|
|
63
|
+
if isinstance(tc, ToolCall):
|
|
64
|
+
if isinstance(tc.arguments, str): tc.arguments = json.loads(tc.arguments) if tc.arguments else {}
|
|
65
|
+
tool_calls.append(tc)
|
|
66
|
+
data = {**tc.extra, 'id':tc.id, 'name':tc.name, 'arguments':tc.arguments, 'server':tc.server}
|
|
67
|
+
tmp_parts[idx] = Part(type=PartType.tool_use, data=data)
|
|
66
68
|
|
|
67
69
|
merged = []
|
|
68
|
-
for p in
|
|
70
|
+
for p in tmp_parts.values():
|
|
71
|
+
if isinstance(p, ToolCall) and not with_tools: continue
|
|
69
72
|
if merged and merged[-1].type == p.type and p.type in (PartType.text, PartType.thinking): merged[-1].text += p.text
|
|
70
|
-
else: merged.append(p)
|
|
71
|
-
|
|
72
|
-
|
|
73
|
-
|
|
74
|
-
|
|
75
|
-
"Build a Completion snapshot from in-progress PartAccum state"
|
|
76
|
-
parts = [p for p in pa.parts.values() if isinstance(p, Part)]
|
|
77
|
-
if delta and delta.text:
|
|
78
|
-
parts = parts.copy()
|
|
79
|
-
if parts and parts[-1].type==PartType.text:
|
|
80
|
-
p = parts[-1]
|
|
81
|
-
parts[-1] = Part(type=p.type, text=(p.text or '') + delta.text, data=p.data)
|
|
82
|
-
else: parts.append(Part(type=PartType.text, text=delta.text))
|
|
83
|
-
return Completion(raw.get('model', model), Msg(role="assistant", content=parts),
|
|
84
|
-
fin, usg, api_name=api_name, vendor_name=vendor_name, raw={'deltas':deltas})
|
|
85
|
-
|
|
86
|
-
# %% ../nbs/01_streaming.ipynb #c28f706f
|
|
87
|
-
def completion_text(c):
|
|
88
|
-
"Combined text from a Completion."
|
|
89
|
-
return ''.join(p.text or '' for p in c.message.content if p.type==PartType.text)
|
|
90
|
-
|
|
91
|
-
# %% ../nbs/01_streaming.ipynb #b2b9f7ca
|
|
92
|
-
def stop_sequences(seqs):
|
|
93
|
-
"Stop when any sequence appears in the accumulated completion text."
|
|
94
|
-
seqs = L(seqs)
|
|
95
|
-
def _stop(c):
|
|
96
|
-
txt = completion_text(c)
|
|
97
|
-
for s in seqs:
|
|
98
|
-
if s in txt: return s
|
|
99
|
-
return _stop
|
|
73
|
+
else: merged.append(p)
|
|
74
|
+
return merged, tool_calls
|
|
75
|
+
|
|
76
|
+
def finalize(self):
|
|
77
|
+
self.parts, self.tool_calls = self.get_merged()
|
|
100
78
|
|
|
101
|
-
# %% ../nbs/01_streaming.ipynb #
|
|
102
|
-
def _trim_delta(d,
|
|
79
|
+
# %% ../nbs/01_streaming.ipynb #f11ea80a
|
|
80
|
+
def _trim_delta(d, txt, s):
|
|
103
81
|
"Trim `d.text` so accumulated text in `cur` stops just before stop sequence `s`."
|
|
104
|
-
|
|
105
|
-
|
|
106
|
-
|
|
82
|
+
idx = len(txt) - (txt.find(s) + len(s))
|
|
83
|
+
if idx>0: d.text = d.text[:-idx]
|
|
84
|
+
|
|
85
|
+
# %% ../nbs/01_streaming.ipynb #efbf96d7
|
|
86
|
+
def stop_and_trim(part_accum, d, stop_callables):
|
|
87
|
+
'Stop based on the accumulated text so far, and trim current delta'
|
|
88
|
+
parts,_ = part_accum.get_merged(with_tools=False)
|
|
89
|
+
prev = parts[-1].text if parts and parts[-1].type == PartType.text else ''
|
|
90
|
+
txt = prev + (d.text or '')
|
|
91
|
+
for f in stop_callables:
|
|
92
|
+
if res:=f(txt):
|
|
93
|
+
if isinstance(res, str): _trim_delta(d, txt, res)
|
|
94
|
+
return True
|
|
95
|
+
return False
|
|
107
96
|
|
|
108
97
|
# %% ../nbs/01_streaming.ipynb #fc71790b
|
|
109
98
|
async def mk_acollect_stream(it, index_fn, model=None, api_name=None, vendor_name=None, stop_callables=None):
|
|
@@ -120,18 +109,22 @@ async def mk_acollect_stream(it, index_fn, model=None, api_name=None, vendor_nam
|
|
|
120
109
|
idx = _fidx(d, name, pt)
|
|
121
110
|
part_accum.append(typ, idx, **(ret or {kw: val}))
|
|
122
111
|
return ret or {name: val}
|
|
112
|
+
def _yield_parts(d):
|
|
113
|
+
for args in [('text',), ('thinking',), ('citations', 'text', 'citations')]:
|
|
114
|
+
if (r := _proc(d, args[0], pt=args[1] if len(args)>1 else None, kw=args[2] if len(args)>2 else 'txt')):
|
|
115
|
+
yield r
|
|
116
|
+
stop, stop_yielded = False, False
|
|
123
117
|
async for d in it:
|
|
124
|
-
stop
|
|
125
|
-
|
|
126
|
-
|
|
127
|
-
for
|
|
128
|
-
|
|
129
|
-
|
|
130
|
-
|
|
131
|
-
|
|
132
|
-
|
|
133
|
-
|
|
134
|
-
if (r:=_proc(d, 'citations', pt='text', kw='citations')): yield r
|
|
118
|
+
# Check stop condition and yield stop delta
|
|
119
|
+
stop = stop_and_trim(part_accum, d, stop_callables)
|
|
120
|
+
if stop and not stop_yielded:
|
|
121
|
+
for r in _yield_parts(d): yield r
|
|
122
|
+
stop_yielded = True
|
|
123
|
+
# If stop the remaining deltas are yielded as processing
|
|
124
|
+
if stop: yield {'thinking':'processing'}
|
|
125
|
+
else:
|
|
126
|
+
for r in _yield_parts(d): yield r
|
|
127
|
+
# Rest incl. tools, finish reason, usage is processed independently
|
|
135
128
|
for tc in d.tool_calls:
|
|
136
129
|
args = tc.arguments.get('_delta', tc.arguments)
|
|
137
130
|
_proc(d, 'tool_use', ret=dict(id=tc.id, name=tc.name, arguments=args, server=tc.server, extra=tc.extra))
|
|
@@ -143,20 +136,12 @@ async def mk_acollect_stream(it, index_fn, model=None, api_name=None, vendor_nam
|
|
|
143
136
|
if d.usage: usg = d.usage
|
|
144
137
|
last_typ = typ
|
|
145
138
|
deltas.append(d)
|
|
146
|
-
if stop:
|
|
147
|
-
fin = fin or FinishReason.stop
|
|
148
|
-
await it.aclose()
|
|
149
|
-
break
|
|
150
139
|
part_accum.finalize()
|
|
151
|
-
# need to recheck for tool calls post collation for streaming
|
|
152
140
|
tcs = part_accum.tool_calls
|
|
153
|
-
fin = FinishReason.
|
|
141
|
+
if stop: fin = FinishReason.stop
|
|
142
|
+
fin = FinishReason.tool_calls if fin==FinishReason.stop and any(~L(tcs).attrgot('server')) else fin # recheck tool calls post collation
|
|
154
143
|
# tool calls and non-anthropic citations are yielded at the end
|
|
155
144
|
yield Completion(d.raw.get('model', model),
|
|
156
145
|
message=Msg(role="assistant", content=part_accum.parts),
|
|
157
146
|
finish_reason=fin, usage=usg, tool_calls=tcs, api_name=api_name, vendor_name=vendor_name,
|
|
158
147
|
raw={'deltas':deltas})
|
|
159
|
-
|
|
160
|
-
# %% ../nbs/01_streaming.ipynb #f79d3b99
|
|
161
|
-
async def fake_stream(*ss):
|
|
162
|
-
for s in ss: yield Delta(text=s, raw={'model':'fake'})
|
|
@@ -268,13 +268,13 @@ codex_pricing = {
|
|
|
268
268
|
|
|
269
269
|
_codex_overrides = {
|
|
270
270
|
codex53spark: dict(
|
|
271
|
-
supports_vision=False, supports_image_input=False, supports_web_search=True,
|
|
271
|
+
supports_vision=False, supports_image_input=False, supports_web_search=True, supports_reasoning=True,
|
|
272
272
|
max_tokens=128000, max_input_tokens=128000, max_output_tokens=128000)
|
|
273
273
|
}
|
|
274
274
|
|
|
275
275
|
# %% ../nbs/00_types.ipynb #fbfdeb0a
|
|
276
276
|
def get_model_info(mn, vendor_name=None):
|
|
277
|
-
info = get_model_meta(mn, vendor_name)
|
|
277
|
+
info = get_model_meta(mn, 'chatgpt' if vendor_name=='codex' else vendor_name)
|
|
278
278
|
# anthropic web search
|
|
279
279
|
if 'search_context_cost_per_query' in info: info['supports_web_search'] = True
|
|
280
280
|
# kimi
|
|
@@ -288,7 +288,7 @@ def get_model_info(mn, vendor_name=None):
|
|
|
288
288
|
info['supports_web_search'] = True
|
|
289
289
|
info.pop('mode', None)
|
|
290
290
|
# codex updates
|
|
291
|
-
if vendor_name == 'codex':
|
|
291
|
+
if vendor_name == 'codex':
|
|
292
292
|
info = merge(info, codex_pricing)
|
|
293
293
|
info |= _codex_overrides.get(mn, {})
|
|
294
294
|
# deepseek v4
|
|
@@ -1 +0,0 @@
|
|
|
1
|
-
__version__ = "0.0.3"
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|