python-fastllm 0.0.6__py3-none-any.whl → 0.0.8__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- fastllm/__init__.py +1 -1
- fastllm/_modidx.py +32 -0
- fastllm/acomplete.py +17 -1
- fastllm/anthropic.py +17 -13
- fastllm/chat.py +203 -122
- fastllm/codex.py +7 -0
- fastllm/streaming.py +3 -1
- fastllm/types.py +10 -6
- {python_fastllm-0.0.6.dist-info → python_fastllm-0.0.8.dist-info}/METADATA +1 -1
- python_fastllm-0.0.8.dist-info/RECORD +22 -0
- python_fastllm-0.0.6.dist-info/RECORD +0 -21
- {python_fastllm-0.0.6.dist-info → python_fastllm-0.0.8.dist-info}/WHEEL +0 -0
- {python_fastllm-0.0.6.dist-info → python_fastllm-0.0.8.dist-info}/entry_points.txt +0 -0
- {python_fastllm-0.0.6.dist-info → python_fastllm-0.0.8.dist-info}/top_level.txt +0 -0
fastllm/__init__.py
CHANGED
|
@@ -1 +1 @@
|
|
|
1
|
-
__version__ = "0.0.
|
|
1
|
+
__version__ = "0.0.8"
|
fastllm/_modidx.py
CHANGED
|
@@ -10,6 +10,7 @@ d = { 'settings': { 'branch': 'main',
|
|
|
10
10
|
'fastllm.acomplete._classify_error': ('acomplete.html#_classify_error', 'fastllm/acomplete.py'),
|
|
11
11
|
'fastllm.acomplete._classify_error_stream': ( 'acomplete.html#_classify_error_stream',
|
|
12
12
|
'fastllm/acomplete.py'),
|
|
13
|
+
'fastllm.acomplete._debug_print': ('acomplete.html#_debug_print', 'fastllm/acomplete.py'),
|
|
13
14
|
'fastllm.acomplete._is_ctx_exceeded': ('acomplete.html#_is_ctx_exceeded', 'fastllm/acomplete.py'),
|
|
14
15
|
'fastllm.acomplete.acomplete': ('acomplete.html#acomplete', 'fastllm/acomplete.py'),
|
|
15
16
|
'fastllm.acomplete.mk_client': ('acomplete.html#mk_client', 'fastllm/acomplete.py')},
|
|
@@ -31,6 +32,7 @@ d = { 'settings': { 'branch': 'main',
|
|
|
31
32
|
'fastllm.anthropic.denorm_tool_use': ('anthropic.html#denorm_tool_use', 'fastllm/anthropic.py'),
|
|
32
33
|
'fastllm.anthropic.denorm_user': ('anthropic.html#denorm_user', 'fastllm/anthropic.py'),
|
|
33
34
|
'fastllm.anthropic.denorm_web_search': ('anthropic.html#denorm_web_search', 'fastllm/anthropic.py'),
|
|
35
|
+
'fastllm.anthropic.finalize_usage': ('anthropic.html#finalize_usage', 'fastllm/anthropic.py'),
|
|
34
36
|
'fastllm.anthropic.get_hdrs': ('anthropic.html#get_hdrs', 'fastllm/anthropic.py'),
|
|
35
37
|
'fastllm.anthropic.mk_payload': ('anthropic.html#mk_payload', 'fastllm/anthropic.py'),
|
|
36
38
|
'fastllm.anthropic.norm_finish': ('anthropic.html#norm_finish', 'fastllm/anthropic.py'),
|
|
@@ -43,24 +45,53 @@ d = { 'settings': { 'branch': 'main',
|
|
|
43
45
|
'fastllm.chat.AsyncChat.__call__': ('chat.html#asyncchat.__call__', 'fastllm/chat.py'),
|
|
44
46
|
'fastllm.chat.AsyncChat.__init__': ('chat.html#asyncchat.__init__', 'fastllm/chat.py'),
|
|
45
47
|
'fastllm.chat.AsyncChat._call': ('chat.html#asyncchat._call', 'fastllm/chat.py'),
|
|
48
|
+
'fastllm.chat.AsyncChat._call_cbs': ('chat.html#asyncchat._call_cbs', 'fastllm/chat.py'),
|
|
46
49
|
'fastllm.chat.AsyncChat._prep_call': ('chat.html#asyncchat._prep_call', 'fastllm/chat.py'),
|
|
47
50
|
'fastllm.chat.AsyncChat._prep_msg': ('chat.html#asyncchat._prep_msg', 'fastllm/chat.py'),
|
|
48
51
|
'fastllm.chat.AsyncChat._track': ('chat.html#asyncchat._track', 'fastllm/chat.py'),
|
|
52
|
+
'fastllm.chat.AsyncChat.add_cb': ('chat.html#asyncchat.add_cb', 'fastllm/chat.py'),
|
|
53
|
+
'fastllm.chat.AsyncChat.add_cbs': ('chat.html#asyncchat.add_cbs', 'fastllm/chat.py'),
|
|
49
54
|
'fastllm.chat.AsyncChat.print_hist': ('chat.html#asyncchat.print_hist', 'fastllm/chat.py'),
|
|
50
55
|
'fastllm.chat.AsyncChat.tcdict': ('chat.html#asyncchat.tcdict', 'fastllm/chat.py'),
|
|
51
56
|
'fastllm.chat.AsyncStreamFormatter': ('chat.html#asyncstreamformatter', 'fastllm/chat.py'),
|
|
52
57
|
'fastllm.chat.AsyncStreamFormatter.format_stream': ( 'chat.html#asyncstreamformatter.format_stream',
|
|
53
58
|
'fastllm/chat.py'),
|
|
59
|
+
'fastllm.chat.ChatCallback': ('chat.html#chatcallback', 'fastllm/chat.py'),
|
|
60
|
+
'fastllm.chat.ChatCallback.__repr__': ('chat.html#chatcallback.__repr__', 'fastllm/chat.py'),
|
|
61
|
+
'fastllm.chat.DeepseekMsgsCallback': ('chat.html#deepseekmsgscallback', 'fastllm/chat.py'),
|
|
62
|
+
'fastllm.chat.DeepseekMsgsCallback.after_msgs': ( 'chat.html#deepseekmsgscallback.after_msgs',
|
|
63
|
+
'fastllm/chat.py'),
|
|
64
|
+
'fastllm.chat.DeepseekPrefillCallback': ('chat.html#deepseekprefillcallback', 'fastllm/chat.py'),
|
|
65
|
+
'fastllm.chat.DeepseekPrefillCallback.before_acomplete': ( 'chat.html#deepseekprefillcallback.before_acomplete',
|
|
66
|
+
'fastllm/chat.py'),
|
|
67
|
+
'fastllm.chat.FenceToolCallback': ('chat.html#fencetoolcallback', 'fastllm/chat.py'),
|
|
68
|
+
'fastllm.chat.FenceToolCallback.after_msgs': ('chat.html#fencetoolcallback.after_msgs', 'fastllm/chat.py'),
|
|
69
|
+
'fastllm.chat.FenceToolCallback.before_acomplete': ( 'chat.html#fencetoolcallback.before_acomplete',
|
|
70
|
+
'fastllm/chat.py'),
|
|
71
|
+
'fastllm.chat.FenceToolCallback.before_tool_calls': ( 'chat.html#fencetoolcallback.before_tool_calls',
|
|
72
|
+
'fastllm/chat.py'),
|
|
54
73
|
'fastllm.chat.FenceToolStop': ('chat.html#fencetoolstop', 'fastllm/chat.py'),
|
|
55
74
|
'fastllm.chat.FenceToolStop.__call__': ('chat.html#fencetoolstop.__call__', 'fastllm/chat.py'),
|
|
56
75
|
'fastllm.chat.FenceToolStop.__init__': ('chat.html#fencetoolstop.__init__', 'fastllm/chat.py'),
|
|
57
76
|
'fastllm.chat.FullResponse': ('chat.html#fullresponse', 'fastllm/chat.py'),
|
|
58
77
|
'fastllm.chat.Msg.text': ('chat.html#msg.text', 'fastllm/chat.py'),
|
|
78
|
+
'fastllm.chat.StopReasonCallback': ('chat.html#stopreasoncallback', 'fastllm/chat.py'),
|
|
79
|
+
'fastllm.chat.StopReasonCallback.after_acomplete': ( 'chat.html#stopreasoncallback.after_acomplete',
|
|
80
|
+
'fastllm/chat.py'),
|
|
59
81
|
'fastllm.chat.StopResponse': ('chat.html#stopresponse', 'fastllm/chat.py'),
|
|
82
|
+
'fastllm.chat.StopSequencesCallback': ('chat.html#stopsequencescallback', 'fastllm/chat.py'),
|
|
83
|
+
'fastllm.chat.StopSequencesCallback.__init__': ( 'chat.html#stopsequencescallback.__init__',
|
|
84
|
+
'fastllm/chat.py'),
|
|
85
|
+
'fastllm.chat.StopSequencesCallback.before_acomplete': ( 'chat.html#stopsequencescallback.before_acomplete',
|
|
86
|
+
'fastllm/chat.py'),
|
|
60
87
|
'fastllm.chat.StreamFormatter': ('chat.html#streamformatter', 'fastllm/chat.py'),
|
|
61
88
|
'fastllm.chat.StreamFormatter.__init__': ('chat.html#streamformatter.__init__', 'fastllm/chat.py'),
|
|
62
89
|
'fastllm.chat.StreamFormatter.format_item': ('chat.html#streamformatter.format_item', 'fastllm/chat.py'),
|
|
63
90
|
'fastllm.chat.StreamFormatter.format_stream': ('chat.html#streamformatter.format_stream', 'fastllm/chat.py'),
|
|
91
|
+
'fastllm.chat.ToolReminderCallback': ('chat.html#toolremindercallback', 'fastllm/chat.py'),
|
|
92
|
+
'fastllm.chat.ToolReminderCallback.__init__': ('chat.html#toolremindercallback.__init__', 'fastllm/chat.py'),
|
|
93
|
+
'fastllm.chat.ToolReminderCallback.after_msgs': ( 'chat.html#toolremindercallback.after_msgs',
|
|
94
|
+
'fastllm/chat.py'),
|
|
64
95
|
'fastllm.chat.ToolResponse': ('chat.html#toolresponse', 'fastllm/chat.py'),
|
|
65
96
|
'fastllm.chat.UsageStats': ('chat.html#usagestats', 'fastllm/chat.py'),
|
|
66
97
|
'fastllm.chat.UsageStats.__add__': ('chat.html#usagestats.__add__', 'fastllm/chat.py'),
|
|
@@ -116,6 +147,7 @@ d = { 'settings': { 'branch': 'main',
|
|
|
116
147
|
'fastllm.chat.stop_reason': ('chat.html#stop_reason', 'fastllm/chat.py'),
|
|
117
148
|
'fastllm.chat.stop_sequences': ('chat.html#stop_sequences', 'fastllm/chat.py'),
|
|
118
149
|
'fastllm.chat.structured': ('chat.html#structured', 'fastllm/chat.py')},
|
|
150
|
+
'fastllm.codex': {},
|
|
119
151
|
'fastllm.gemini': { 'fastllm.gemini._gem_filter_sch': ('gemini.html#_gem_filter_sch', 'fastllm/gemini.py'),
|
|
120
152
|
'fastllm.gemini._gem_part_type': ('gemini.html#_gem_part_type', 'fastllm/gemini.py'),
|
|
121
153
|
'fastllm.gemini.acollect_stream': ('gemini.html#acollect_stream', 'fastllm/gemini.py'),
|
fastllm/acomplete.py
CHANGED
|
@@ -3,7 +3,7 @@
|
|
|
3
3
|
# AUTOGENERATED! DO NOT EDIT! File to edit: ../nbs/06_acomplete.ipynb.
|
|
4
4
|
|
|
5
5
|
# %% auto #0
|
|
6
|
-
__all__ = ['specs_path', 'ant_spec', 'oai_spec', 'gem_spec', 'vendor_mapping', 'api2spec', 'mk_client',
|
|
6
|
+
__all__ = ['specs_path', 'ant_spec', 'oai_spec', 'gem_spec', 'vendor_mapping', 'api2spec', 'defaults', 'mk_client',
|
|
7
7
|
'ContextWindowExceededError', 'acomplete']
|
|
8
8
|
|
|
9
9
|
# %% ../nbs/06_acomplete.ipynb #f2f57253
|
|
@@ -98,6 +98,21 @@ async def _classify_error_stream(gen):
|
|
|
98
98
|
async for x in gen: yield x
|
|
99
99
|
except APIError as e: raise _classify_error(e) from e
|
|
100
100
|
|
|
101
|
+
# %% ../nbs/06_acomplete.ipynb #f626a4e1
|
|
102
|
+
defaults = SimpleNamespace(debug_mode=False)
|
|
103
|
+
|
|
104
|
+
def _debug_print(model, api_name, vendor_name, payload, func):
|
|
105
|
+
"Pretty-print acomplete inputs when defaults.debug_mode is set"
|
|
106
|
+
from pprint import pformat
|
|
107
|
+
p = dict(payload)
|
|
108
|
+
if defaults.debug_mode == 'brief' and 'tools' in p:
|
|
109
|
+
p['tools'] = '; '.join(o.get('name', o.get('type', o)) for o in p['tools'])
|
|
110
|
+
print('━'*60)
|
|
111
|
+
print(f"\033[1;36mfastllm debug\033[0m model={model} vendor={vendor_name} api={api_name} base_url={func.base_url} path={func.path}")
|
|
112
|
+
print('─'*60)
|
|
113
|
+
print(f"\033[1;33mpayload:\033[0m\n{pformat(p, width=120, sort_dicts=False)}")
|
|
114
|
+
print('━'*60)
|
|
115
|
+
|
|
101
116
|
# %% ../nbs/06_acomplete.ipynb #2379ec94
|
|
102
117
|
@delegates(payload_kwargs)
|
|
103
118
|
async def acomplete(msgs, model, api_name=None, vendor_name=None, api_key=None, base_url=None, xtra_body=None, xtra_hdrs=None,
|
|
@@ -114,6 +129,7 @@ async def acomplete(msgs, model, api_name=None, vendor_name=None, api_key=None,
|
|
|
114
129
|
if vendor_name == 'deepseek' and 'v4' in model: payload['messages'][-1]['prefix'] = True
|
|
115
130
|
if vendor_name == 'moonshot' and 'kimi' in model: payload['messages'][-1]['partial'] = True
|
|
116
131
|
func = attrgetter(api.op_path[stream])(cli)
|
|
132
|
+
if defaults.debug_mode: _debug_print(model, api_name, vendor_name, payload, func)
|
|
117
133
|
try: resp = await func(**payload)
|
|
118
134
|
except APIError as e: raise _classify_error(e) from e
|
|
119
135
|
if stream: return _classify_error_stream(api.acollect_stream(resp, model=model, vendor_name=vendor_name, stop_callables=stop_callables))
|
fastllm/anthropic.py
CHANGED
|
@@ -1,7 +1,7 @@
|
|
|
1
1
|
# AUTOGENERATED! DO NOT EDIT! File to edit: ../nbs/04_anthropic.ipynb.
|
|
2
2
|
|
|
3
3
|
# %% auto #0
|
|
4
|
-
__all__ = ['ant_tc_types', '
|
|
4
|
+
__all__ = ['ant_tc_types', 'norm_tool_call', 'norm_tool_calls', 'norm_usage', 'finalize_usage', 'norm_finish', 'norm_parts',
|
|
5
5
|
'norm_sse_event', 'delta_index_fn', 'acollect_stream', 'denorm_tool_use', 'denorm_assistant', 'denorm_tool',
|
|
6
6
|
'denorm_msgs', 'denorm_tool_schs', 'denorm_tool_choice', 'denorm_reasoning', 'denorm_web_search',
|
|
7
7
|
'denorm_system', 'denorm_user', 'denorm_image', 'denorm_file', 'denorm_tool_result', 'mk_payload',
|
|
@@ -42,7 +42,18 @@ def norm_usage(resp):
|
|
|
42
42
|
pt = int(usg.get("input_tokens", 0) or 0) + cached + cache_creation
|
|
43
43
|
ct = int(usg.get("output_tokens", 0) or 0)
|
|
44
44
|
return Usage(prompt_tokens=pt, completion_tokens=ct, total_tokens=pt + ct,
|
|
45
|
-
cached_tokens=cached, cache_creation_tokens=cache_creation, raw=usg)
|
|
45
|
+
cached_tokens=cached, cache_creation_tokens=cache_creation, reasoning_tokens=0, raw=usg)
|
|
46
|
+
|
|
47
|
+
def finalize_usage(usg, parts):
|
|
48
|
+
"Adjust usage using finalized Anthropic content parts."
|
|
49
|
+
if not usg: return usg
|
|
50
|
+
rc = '\n'.join(p.text or '' for p in parts if p.type == PartType.thinking)
|
|
51
|
+
ct = int(usg.raw.get('output_tokens', usg.completion_tokens) or 0)
|
|
52
|
+
rt = min(int(len(rc.split())*1.5), ct) if rc else 0
|
|
53
|
+
res = Usage(prompt_tokens=usg.prompt_tokens, completion_tokens=ct-rt, total_tokens=usg.prompt_tokens+ct,
|
|
54
|
+
cached_tokens=usg.cached_tokens, cache_creation_tokens=usg.cache_creation_tokens, reasoning_tokens=rt, raw=usg.raw)
|
|
55
|
+
print(res)
|
|
56
|
+
return res
|
|
46
57
|
|
|
47
58
|
# %% ../nbs/04_anthropic.ipynb #7a8b1f8f
|
|
48
59
|
def norm_finish(resp, tcs=None):
|
|
@@ -197,7 +208,7 @@ def denorm_reasoning(v):
|
|
|
197
208
|
def denorm_web_search(v):
|
|
198
209
|
"Map canonical web_search_options to Anthropic hosted web_search tool."
|
|
199
210
|
_max_uses = {"low": 1, "medium": 5, "high": 10}
|
|
200
|
-
t = {"type": "
|
|
211
|
+
t = {"type": "web_search_20250305", "name": "web_search"}
|
|
201
212
|
if (typ := (v or {}).get("type")): t["type"] = typ
|
|
202
213
|
if (s := (v or {}).get("search_context_size")):
|
|
203
214
|
t["max_uses"] = _max_uses.get(s, 5)
|
|
@@ -286,13 +297,6 @@ def cost(usage, m):
|
|
|
286
297
|
return cost
|
|
287
298
|
|
|
288
299
|
# %% ../nbs/04_anthropic.ipynb #f7c0b989
|
|
289
|
-
|
|
290
|
-
|
|
291
|
-
|
|
292
|
-
norm_usage=norm_usage,
|
|
293
|
-
acollect_stream=acollect_stream,
|
|
294
|
-
mk_payload=mk_payload,
|
|
295
|
-
cost=cost,
|
|
296
|
-
get_hdrs=get_hdrs,
|
|
297
|
-
op_path=('messages.messages_post','messages.messages_post'))
|
|
298
|
-
api_registry.register('anthropic', **api_ns)
|
|
300
|
+
api_registry.register('anthropic', norm_tool_calls=norm_tool_calls, norm_parts=norm_parts, norm_finish=norm_finish, norm_usage=norm_usage,
|
|
301
|
+
finalize_usage=finalize_usage, acollect_stream=acollect_stream, mk_payload=mk_payload, cost=cost, get_hdrs=get_hdrs,
|
|
302
|
+
op_path=('messages.messages_post','messages.messages_post'))
|
fastllm/chat.py
CHANGED
|
@@ -4,10 +4,12 @@
|
|
|
4
4
|
|
|
5
5
|
# %% auto #0
|
|
6
6
|
__all__ = ['tool_dtls_tag', 're_tools', 'token_dtls_tag', 're_token', 'effort', 'remove_cache_ckpts', 'contents', 'stop_reason',
|
|
7
|
-
'mk_msg', 'FenceToolStop', 'extract_fence_call', '
|
|
8
|
-
'
|
|
9
|
-
'
|
|
10
|
-
'
|
|
7
|
+
'mk_msg', 'FenceToolStop', 'extract_fence_call', 'split_tools', 'fmt2hist', 'mk_msgs', 'cite_footnote',
|
|
8
|
+
'postproc', 'lite_mk_func', 'ToolResponse', 'structured', 'StopResponse', 'FullResponse', 'search_count',
|
|
9
|
+
'UsageStats', 'AsyncChat', 'astream_with_complete', 'ChatCallback', 'DeepseekMsgsCallback',
|
|
10
|
+
'DeepseekPrefillCallback', 'add_warning', 'StopReasonCallback', 'run_fence_tool', 'FenceToolCallback',
|
|
11
|
+
'ToolReminderCallback', 'stop_sequences', 'StopSequencesCallback', 'mk_tr_details', 'mk_srv_tc_details',
|
|
12
|
+
'StreamFormatter', 'AsyncStreamFormatter', 'adisplay_stream']
|
|
11
13
|
|
|
12
14
|
# %% ../nbs/07_chat.ipynb #d5a3bc1f
|
|
13
15
|
import asyncio, base64, json, mimetypes, random, string, ast, warnings
|
|
@@ -55,7 +57,7 @@ def remove_cache_ckpts(msg):
|
|
|
55
57
|
return msg
|
|
56
58
|
|
|
57
59
|
def _mk_content(o):
|
|
58
|
-
if isinstance(o, str): return Part(type=PartType.text, text=o
|
|
60
|
+
if isinstance(o, str): return Part(type=PartType.text, text=o)
|
|
59
61
|
elif isinstance(o,bytes): return _bytes2content(o)
|
|
60
62
|
return o
|
|
61
63
|
|
|
@@ -87,16 +89,16 @@ def mk_msg(
|
|
|
87
89
|
return _add_cache_control(msg, ttl=ttl) if cache else msg
|
|
88
90
|
|
|
89
91
|
# %% ../nbs/07_chat.ipynb #db466e1c
|
|
90
|
-
tool_dtls_tag = "<details class='tool-usage-details'>"
|
|
92
|
+
tool_dtls_tag = "<details class='tool-usage-details' markdown='1'>"
|
|
91
93
|
re_tools = re.compile(fr"^({tool_dtls_tag}\n*(?:<summary>(?P<summary>.*?)</summary>\n*)?\n*```json\n+(.*?)\n+```\n+</details>)",
|
|
92
94
|
flags=re.DOTALL|re.MULTILINE)
|
|
93
|
-
token_dtls_tag = "<details class='token-usage-details'>"
|
|
94
|
-
re_token = re.compile(fr"^{re.escape(token_dtls_tag)}
|
|
95
|
+
token_dtls_tag = "<details class='token-usage-details' markdown='1'>"
|
|
96
|
+
re_token = re.compile(fr"^{re.escape(token_dtls_tag)}\n*<summary>.*?</summary>\n*\n*`.*?`\n*\n*</details>\n?",
|
|
95
97
|
flags=re.DOTALL|re.MULTILINE)
|
|
96
98
|
|
|
97
99
|
# %% ../nbs/07_chat.ipynb #be998131
|
|
98
100
|
_fence_back = '`````'
|
|
99
|
-
_fence_re = re.compile(f'^{_fence_back}(py|bash)\n(.*?)\n{_fence_back}', re.DOTALL | re.MULTILINE)
|
|
101
|
+
_fence_re = re.compile(f'^{_fence_back}(py|bash)\n(.*?)\n{_fence_back}$', re.DOTALL | re.MULTILINE)
|
|
100
102
|
_result_re = re.compile(f'\n{_fence_back}result\n(.*?)\n{_fence_back}\n', re.DOTALL)
|
|
101
103
|
_lang2tool = dict(py='python', bash='bash')
|
|
102
104
|
|
|
@@ -144,15 +146,6 @@ def _split_fence_msgs(msgs):
|
|
|
144
146
|
for m in msgs: res.extend(_split_msg_on_fences(m))
|
|
145
147
|
return res
|
|
146
148
|
|
|
147
|
-
# %% ../nbs/07_chat.ipynb #b161ca9e
|
|
148
|
-
def stop_sequences(seqs):
|
|
149
|
-
"Stop when any sequence appears in the accumulated completion text."
|
|
150
|
-
seqs = L(seqs)
|
|
151
|
-
def _stop(text):
|
|
152
|
-
for s in seqs:
|
|
153
|
-
if s in text: return text[:text.find(s)+len(s)]
|
|
154
|
-
return _stop
|
|
155
|
-
|
|
156
149
|
# %% ../nbs/07_chat.ipynb #45ada210
|
|
157
150
|
def _extract_tool_parts(text:str):
|
|
158
151
|
"Extract (tool_use_part, tool_result_part) from <details> json block"
|
|
@@ -216,7 +209,9 @@ def mk_msgs(
|
|
|
216
209
|
"Create a list of fastllm canonical Msgs."
|
|
217
210
|
if not msgs: return []
|
|
218
211
|
if not isinstance(msgs, list): msgs = [msgs]
|
|
219
|
-
msgs = L(msgs).map(lambda m:
|
|
212
|
+
msgs = L(msgs).map(lambda m:
|
|
213
|
+
fmt2hist(m) if isinstance(m,str) and (tool_dtls_tag in m or token_dtls_tag in m) else [m]
|
|
214
|
+
).concat()
|
|
220
215
|
res, role = [], 'user'
|
|
221
216
|
for m in msgs:
|
|
222
217
|
res.append(msg := remove_cache_ckpts(mk_msg(m, role=role)))
|
|
@@ -304,9 +299,11 @@ def _has_stop(tres_parts): return any(isinstance(p.text, StopResponse) for p in
|
|
|
304
299
|
def _trunc_str(s, mx=2000, skip=10, replace="TRUNCATED"):
|
|
305
300
|
"Truncate `s` to `mx` chars max, adding `replace` if truncated"
|
|
306
301
|
if not isinstance(s, str): s = str(s)
|
|
307
|
-
|
|
302
|
+
s = s.rstrip()
|
|
303
|
+
if len(s)>2 and s[0]=='𝍁' and s[-1]=='𝍁':
|
|
304
|
+
s = s[1:-1]
|
|
305
|
+
if replace: return s
|
|
308
306
|
if isinstance_str(s, ('FullResponse','Safe')): return s
|
|
309
|
-
s = str(s).strip()
|
|
310
307
|
if len(s)<=mx: return s
|
|
311
308
|
s = s[skip:mx-skip]
|
|
312
309
|
ss = s.split(' ')
|
|
@@ -362,24 +359,7 @@ class UsageStats:
|
|
|
362
359
|
summ = f"${self.cost:.4f}" if self.cost else f"{self.total_tokens:,} tokens"
|
|
363
360
|
return f"\n\n{token_dtls_tag}<summary>{summ}</summary>\n\n`{self!r}`\n\n</details>\n"
|
|
364
361
|
|
|
365
|
-
# %% ../nbs/07_chat.ipynb #
|
|
366
|
-
def _inject_tool_reminder(msgs, reminder):
|
|
367
|
-
i = len(msgs)
|
|
368
|
-
while i>0 and msgs[i-1].role=='tool': i-=1
|
|
369
|
-
if i>=len(msgs): return msgs
|
|
370
|
-
msgs,m = list(msgs),msgs[i]
|
|
371
|
-
m.content.append(Part(type=PartType.text, text=reminder))
|
|
372
|
-
msgs[i] = m
|
|
373
|
-
return msgs
|
|
374
|
-
|
|
375
|
-
# %% ../nbs/07_chat.ipynb #e7eb2032
|
|
376
|
-
def _active_fence_langs(tool_schemas):
|
|
377
|
-
"Return set of active fence langs whose mapped tool is registered"
|
|
378
|
-
if not tool_schemas: return set()
|
|
379
|
-
names = {nested_idx(t, 'function', 'name') for t in tool_schemas}
|
|
380
|
-
return {lang for lang, tname in _lang2tool.items() if tname in names}
|
|
381
|
-
|
|
382
|
-
# %% ../nbs/07_chat.ipynb #e9a14051
|
|
362
|
+
# %% ../nbs/07_chat.ipynb #cb3d7e77
|
|
383
363
|
class AsyncChat:
|
|
384
364
|
def __init__(
|
|
385
365
|
self,
|
|
@@ -399,7 +379,8 @@ class AsyncChat:
|
|
|
399
379
|
base_url=None, # API base url when model can't be resolved or vendor_name is not known
|
|
400
380
|
extra_headers=None, # Extra HTTP headers for custom providers
|
|
401
381
|
markup=0, # Cost markup multiplier (e.g. 0.5 for 50%)
|
|
402
|
-
|
|
382
|
+
cbs:list=None, # Chat callbacks
|
|
383
|
+
default_cbs=True # Whether to include default callbacks
|
|
403
384
|
):
|
|
404
385
|
"LiteLLM chat client."
|
|
405
386
|
self.model = model
|
|
@@ -408,7 +389,10 @@ class AsyncChat:
|
|
|
408
389
|
elif ns is None: ns = globals()
|
|
409
390
|
self.tool_schemas = [lite_mk_func(t) for t in tools] if tools else None
|
|
410
391
|
self.use = UsageStats()
|
|
411
|
-
store_attr()
|
|
392
|
+
store_attr(but='cbs')
|
|
393
|
+
self.cbs = L()
|
|
394
|
+
if default_cbs: self.add_cbs(defaults.chat_callbacks)
|
|
395
|
+
self.add_cbs(cbs)
|
|
412
396
|
|
|
413
397
|
def _prep_msg(self, msg=None, prefill=None):
|
|
414
398
|
"Prepare the system prompt and messages list for the API call"
|
|
@@ -422,14 +406,6 @@ class AsyncChat:
|
|
|
422
406
|
self.hist = mk_msgs(self.hist, self.cache and 'claude' in self.model, cache_idxs, self.ttl)
|
|
423
407
|
msgs = self.hist
|
|
424
408
|
if prefill: msgs = self.hist + [Msg(role='assistant', content=[Part(PartType.text, prefill)])]
|
|
425
|
-
msgs = _split_fence_msgs(msgs)
|
|
426
|
-
if self.tool_reminder: msgs = _inject_tool_reminder(msgs, self.tool_reminder)
|
|
427
|
-
if 'deepseek' in self.model:
|
|
428
|
-
# The `reasoning_content` in the thinking mode must be passed back to the API.
|
|
429
|
-
for m in msgs:
|
|
430
|
-
if m.role=='assistant':
|
|
431
|
-
if not any(p.type==PartType.thinking for p in m.content):
|
|
432
|
-
m.content.append(Part(PartType.thinking, ''))
|
|
433
409
|
return sp, msgs
|
|
434
410
|
|
|
435
411
|
@property
|
|
@@ -439,39 +415,35 @@ class AsyncChat:
|
|
|
439
415
|
u.cost *= (1 + self.markup)
|
|
440
416
|
self.use += u
|
|
441
417
|
|
|
418
|
+
def add_cb(self, cb):
|
|
419
|
+
if isinstance(cb, type): cb = cb()
|
|
420
|
+
cb.chat = self
|
|
421
|
+
self.cbs.append(cb)
|
|
422
|
+
return self
|
|
423
|
+
|
|
424
|
+
def add_cbs(self, cbs):
|
|
425
|
+
if cbs is None: return self
|
|
426
|
+
L(cbs).map(self.add_cb)
|
|
427
|
+
return self
|
|
428
|
+
|
|
442
429
|
# %% ../nbs/07_chat.ipynb #2e469ea1
|
|
443
430
|
def _srvtools(tcs): return L(tcs).filter(lambda o: o.server) if tcs else None
|
|
444
431
|
def _usrtools(tcs): return L(tcs).filter(lambda o: not o.server) if tcs else None
|
|
445
432
|
|
|
446
|
-
# %% ../nbs/07_chat.ipynb #a2e70fbb
|
|
447
|
-
def add_warning(r, msg):
|
|
448
|
-
wrn = Part(PartType.text, f"<warning>{msg}</warning>")
|
|
449
|
-
if r.message.content: r.message.content.append(wrn)
|
|
450
|
-
else: r.message.content = [wrn]
|
|
451
|
-
|
|
452
|
-
# %% ../nbs/07_chat.ipynb #e16195f9
|
|
453
|
-
def _handle_stop_reason(res):
|
|
454
|
-
"Returns (action, warning_msg) - action is 'warning', 'pause', or None"
|
|
455
|
-
sr = stop_reason(res)
|
|
456
|
-
if sr == 'length': return 'warning', 'Response was cut off at token limit.'
|
|
457
|
-
if sr == 'refusal': return 'warning', 'AI server provider content filter was applied to this request'
|
|
458
|
-
if sr == 'content_filter': return 'warning', 'AI server provider content filter was applied to this request.'
|
|
459
|
-
# if sr == 'pause_turn': return 'retry', None # TODO: Not a canonical finish reason
|
|
460
|
-
return None, None
|
|
461
|
-
|
|
462
433
|
# %% ../nbs/07_chat.ipynb #19b87f53
|
|
463
434
|
def _think_kw(model, think, vendor_name):
|
|
464
435
|
if not think: return {}
|
|
465
436
|
if 'opus-4-7' in model:
|
|
466
437
|
e = 'xhigh' if think=='h' else effort.get(think)
|
|
467
|
-
|
|
438
|
+
eff = dict(thinking={"type":"adaptive", "display":"summarized"}, output_config={"effort":e})
|
|
439
|
+
return dict(reasoning_effort=eff)
|
|
468
440
|
try: xhigh = get_model_info(model, vendor_name).get('supports_xhigh_reasoning_effort')
|
|
469
441
|
except: xhigh = False
|
|
470
442
|
eff = effort.get(think) if think!='x' else 'xhigh' if xhigh else 'high'
|
|
471
443
|
if vendor_name == 'codex': return dict(reasoning_effort={'effort':eff, 'summary':'auto'})
|
|
472
444
|
return dict(reasoning_effort=eff)
|
|
473
445
|
|
|
474
|
-
# %% ../nbs/07_chat.ipynb #
|
|
446
|
+
# %% ../nbs/07_chat.ipynb #06e898fd
|
|
475
447
|
@patch
|
|
476
448
|
def _prep_call(self:AsyncChat, prefill, search, max_tokens, kwargs, stream=False, think=None):
|
|
477
449
|
"Prepare model info, prefill, search, and provider kwargs for a completion call"
|
|
@@ -483,19 +455,14 @@ def _prep_call(self:AsyncChat, prefill, search, max_tokens, kwargs, stream=False
|
|
|
483
455
|
kwargs['web_search_options']['search_context_size'] = effort[s]
|
|
484
456
|
if self.vendor_name == 'codex': kwargs['web_search_options']['type'] = 'web_search'
|
|
485
457
|
else: kwargs.pop('web_search_options', None)
|
|
486
|
-
# kwargs['additional_drop_params'] = ['temperature'] # TODO: What is this for?
|
|
487
458
|
if self.api_name: kwargs['api_name'] = self.api_name
|
|
488
459
|
if self.vendor_name: kwargs['vendor_name'] = self.vendor_name
|
|
489
460
|
if self.api_key: kwargs['api_key'] = self.api_key
|
|
490
461
|
if self.base_url: kwargs['base_url'] = self.base_url
|
|
491
462
|
if self.extra_headers: kwargs['xtra_headers'] = self.extra_headers
|
|
492
463
|
kwargs.update(_think_kw(self.model, think, self.vendor_name))
|
|
493
|
-
if (langs := _active_fence_langs(self.tool_schemas)):
|
|
494
|
-
if not any(isinstance(s, FenceToolStop) for s in kwargs.get('stop_callables', [])):
|
|
495
|
-
kwargs['stop_callables'] = kwargs.get('stop_callables', []) + [FenceToolStop(langs)]
|
|
496
464
|
return prefill, max_tokens
|
|
497
465
|
|
|
498
|
-
|
|
499
466
|
# %% ../nbs/07_chat.ipynb #07951b77
|
|
500
467
|
@patch
|
|
501
468
|
def print_hist(self:AsyncChat):
|
|
@@ -515,50 +482,35 @@ async def astream_with_complete(self, agen, postproc=noop):
|
|
|
515
482
|
if not isinstance(chunk, Completion): yield postproc(chunk)
|
|
516
483
|
self.value = chunk
|
|
517
484
|
|
|
518
|
-
# %% ../nbs/07_chat.ipynb #
|
|
485
|
+
# %% ../nbs/07_chat.ipynb #a049cf52
|
|
519
486
|
@patch
|
|
520
487
|
@delegates(acomplete)
|
|
521
488
|
async def _call(self:AsyncChat, msg=None, prefill=None, temp=None, think=None, search=None, stream=False, max_steps=2, step=1,
|
|
522
489
|
final_prompt=None, tool_choice=None, max_tokens=None, n_workers=8, pause=0.001, tc_timeout=7200, **kwargs):
|
|
523
490
|
if step>max_steps+1: return
|
|
524
|
-
prefill, max_tokens = self._prep_call(prefill, search, max_tokens, kwargs, stream=stream, think=think)
|
|
525
|
-
|
|
526
|
-
|
|
527
|
-
|
|
528
|
-
|
|
529
|
-
|
|
530
|
-
res = await acomplete(
|
|
491
|
+
self.prefill, max_tokens = self._prep_call(prefill, search, max_tokens, kwargs, stream=stream, think=think)
|
|
492
|
+
self.turn_sysp, self.turn_msgs = self._prep_msg(msg, prefill)
|
|
493
|
+
async for o in self._call_cbs('after_msgs'): yield o
|
|
494
|
+
|
|
495
|
+
self.turn_kwargs, self.stream = kwargs, stream
|
|
496
|
+
async for o in self._call_cbs('before_acomplete'): yield o
|
|
497
|
+
res = await acomplete(self.turn_msgs, self.model, system=self.turn_sysp, stream=stream,
|
|
531
498
|
tools=self.tool_schemas, tool_choice=tool_choice, max_tokens=int(max_tokens),
|
|
532
|
-
temperature=None if think else ifnone(temp,self.temp), **
|
|
499
|
+
temperature=None if think else ifnone(temp,self.temp), **self.turn_kwargs)
|
|
533
500
|
if stream:
|
|
534
|
-
if prefill: yield _mk_prefill(prefill)
|
|
501
|
+
if self.prefill: yield _mk_prefill(self.prefill)
|
|
535
502
|
res = astream_with_complete(res, postproc=postproc)
|
|
536
503
|
async for chunk in res: yield chunk
|
|
537
504
|
res = res.value
|
|
538
|
-
|
|
539
|
-
if prefill:
|
|
540
|
-
self.hist.append(
|
|
541
|
-
|
|
542
|
-
|
|
543
|
-
elif action == 'retry':
|
|
544
|
-
async for result in self._call(
|
|
545
|
-
None, prefill, temp, think, search, stream, max_steps, step,
|
|
546
|
-
final_prompt, tool_choice, **kwargs): yield result
|
|
547
|
-
self.hist.pop(-2) # rm incomplete srvtoolu_
|
|
548
|
-
return
|
|
549
|
-
self._track(res)
|
|
505
|
+
self.turn_res, self.turn_msg = res, contents(res)
|
|
506
|
+
if self.prefill: self.turn_msg.content[0].text = self.prefill + self.turn_msg.content[0].text
|
|
507
|
+
self.hist.append(self.turn_msg)
|
|
508
|
+
async for o in self._call_cbs('after_acomplete'): yield o
|
|
509
|
+
self._track(self.turn_res)
|
|
550
510
|
yield res
|
|
551
511
|
|
|
552
|
-
toolloop, prompt = False, None
|
|
553
|
-
|
|
554
|
-
if m := last(self.hist, lambda o: o.role == 'assistant'):
|
|
555
|
-
if fence := extract_fence_call(m.text):
|
|
556
|
-
lang, code = fence
|
|
557
|
-
out = await run_fence_tool(lang, code, self.ns)
|
|
558
|
-
for p in reversed(m.content):
|
|
559
|
-
if p.type == PartType.text: p.text += out; break
|
|
560
|
-
if stream: yield {'text': out}
|
|
561
|
-
toolloop = True
|
|
512
|
+
self.toolloop, self.prompt, tmsg = False, None, None
|
|
513
|
+
async for o in self._call_cbs('before_tool_calls'): yield o
|
|
562
514
|
if stcs:= _srvtools(res.tool_calls):
|
|
563
515
|
for tc in stcs: yield tc
|
|
564
516
|
if tcs := _usrtools(res.tool_calls):
|
|
@@ -566,29 +518,23 @@ async def _call(self:AsyncChat, msg=None, prefill=None, temp=None, think=None, s
|
|
|
566
518
|
tmsg = mk_tool_res_msg(tcs, tres)
|
|
567
519
|
for r in tmsg.content: yield r
|
|
568
520
|
self.hist.append(tmsg)
|
|
569
|
-
if step>=max_steps-1 or _has_stop(tmsg.content): prompt,tool_choice,search = mk_msg(final_prompt),'none',False
|
|
570
|
-
toolloop = True
|
|
521
|
+
if step>=max_steps-1 or _has_stop(tmsg.content): self.prompt,tool_choice,search = mk_msg(final_prompt),'none',False
|
|
522
|
+
self.toolloop = True
|
|
571
523
|
|
|
572
|
-
|
|
524
|
+
async for o in self._call_cbs('after_tool_calls'): yield o
|
|
525
|
+
if self.toolloop and step <= max_steps:
|
|
573
526
|
try:
|
|
574
527
|
async for result in self._call(
|
|
575
|
-
prompt,
|
|
528
|
+
self.prompt, None, temp, think, search, stream, max_steps, step+1,
|
|
576
529
|
final_prompt, tool_choice=tool_choice, **kwargs): yield result
|
|
577
530
|
except ContextWindowExceededError:
|
|
578
|
-
|
|
579
|
-
|
|
531
|
+
if tmsg is not None:
|
|
532
|
+
for p in tmsg.content:
|
|
533
|
+
if len(p.text)>1000: p.text = _cwe_msg + _trunc_str(p.text, mx=1000)
|
|
580
534
|
async for result in self._call(
|
|
581
|
-
prompt,
|
|
535
|
+
self.prompt, None, temp, think, search, stream, max_steps, step+1,
|
|
582
536
|
final_prompt, tool_choice='none', **kwargs): yield result
|
|
583
537
|
|
|
584
|
-
# %% ../nbs/07_chat.ipynb #4dc002da
|
|
585
|
-
async def run_fence_tool(lang, code, ns):
|
|
586
|
-
"Run the mapped tool for `lang` with the code, return result fence"
|
|
587
|
-
tname = _lang2tool[lang]
|
|
588
|
-
arg = dict(code=code) if lang == 'py' else dict(command=code)
|
|
589
|
-
res = _mk_tool_result(await call_func_async(tname, arg, ns=ns, raise_on_err=False))
|
|
590
|
-
return _mk_result_fence(_trunc_str(str(res)))
|
|
591
|
-
|
|
592
538
|
# %% ../nbs/07_chat.ipynb #1361515a
|
|
593
539
|
@patch
|
|
594
540
|
@delegates(AsyncChat._call)
|
|
@@ -611,11 +557,146 @@ async def __call__(
|
|
|
611
557
|
async for res in result_gen: pass
|
|
612
558
|
return res # normal chat behavior only return last msg
|
|
613
559
|
|
|
560
|
+
# %% ../nbs/07_chat.ipynb #a4bbd2ce
|
|
561
|
+
class ChatCallback(GetAttr):
|
|
562
|
+
order,_default,chat,run = 0,'chat',None,True
|
|
563
|
+
def __repr__(self): return type(self).__name__
|
|
564
|
+
|
|
565
|
+
# %% ../nbs/07_chat.ipynb #2f02135c
|
|
566
|
+
@patch
|
|
567
|
+
async def _call_cbs(self:AsyncChat, event):
|
|
568
|
+
for cb in self.cbs.sorted('order'):
|
|
569
|
+
if cb.run and hasattr(cb, event):
|
|
570
|
+
async for o in getattr(cb, event)(): yield o
|
|
571
|
+
|
|
572
|
+
# %% ../nbs/07_chat.ipynb #cf3f064c
|
|
573
|
+
class DeepseekMsgsCallback(ChatCallback):
|
|
574
|
+
order = 10
|
|
575
|
+
async def after_msgs(self):
|
|
576
|
+
if 'deepseek' not in self.model: return
|
|
577
|
+
for m in self.turn_msgs:
|
|
578
|
+
if m.role=='assistant' and not any(p.type==PartType.thinking for p in m.content):
|
|
579
|
+
m.content.append(Part(PartType.thinking, ''))
|
|
580
|
+
if False: yield
|
|
581
|
+
|
|
582
|
+
# %% ../nbs/07_chat.ipynb #14baac3e
|
|
583
|
+
class DeepseekPrefillCallback(ChatCallback):
|
|
584
|
+
order = 10
|
|
585
|
+
async def before_acomplete(self):
|
|
586
|
+
if self.prefill and self.vendor_name == 'deepseek' and self.model.startswith("deepseek-"):
|
|
587
|
+
self.chat.turn_kwargs['base_url'] = 'https://api.deepseek.com/beta'
|
|
588
|
+
if False: yield
|
|
589
|
+
|
|
590
|
+
# %% ../nbs/07_chat.ipynb #ce47dc4a
|
|
591
|
+
def add_warning(r, msg):
|
|
592
|
+
wrn = Part(PartType.text, f"<warning>{msg}</warning>")
|
|
593
|
+
if r.message.content: r.message.content.append(wrn)
|
|
594
|
+
else: r.message.content = [wrn]
|
|
595
|
+
|
|
596
|
+
# %% ../nbs/07_chat.ipynb #b6ea161d
|
|
597
|
+
def _handle_stop_reason(res):
|
|
598
|
+
"Returns (action, warning_msg) - action is 'warning', 'pause', or None"
|
|
599
|
+
sr = stop_reason(res)
|
|
600
|
+
if sr == 'length': return 'warning', 'Response was cut off at token limit.'
|
|
601
|
+
if sr == 'refusal': return 'warning', 'AI server provider content filter was applied to this request'
|
|
602
|
+
if sr == 'content_filter': return 'warning', 'AI server provider content filter was applied to this request.'
|
|
603
|
+
# if sr == 'pause_turn': return 'retry', None # TODO: Not a canonical finish reason
|
|
604
|
+
return None, None
|
|
605
|
+
|
|
606
|
+
# %% ../nbs/07_chat.ipynb #daf876f4
|
|
607
|
+
class StopReasonCallback(ChatCallback):
|
|
608
|
+
order = 40
|
|
609
|
+
async def after_acomplete(self):
|
|
610
|
+
action, msg = _handle_stop_reason(self.turn_res)
|
|
611
|
+
if action == 'warning': add_warning(self.chat.turn_res, msg)
|
|
612
|
+
if False: yield
|
|
613
|
+
|
|
614
|
+
# %% ../nbs/07_chat.ipynb #aa7630b2
|
|
615
|
+
def _active_fence_langs(tool_schemas):
|
|
616
|
+
"Return set of active fence langs whose mapped tool is registered"
|
|
617
|
+
if not tool_schemas: return set()
|
|
618
|
+
names = {nested_idx(t, 'function', 'name') for t in tool_schemas}
|
|
619
|
+
return {lang for lang, tname in _lang2tool.items() if tname in names}
|
|
620
|
+
|
|
621
|
+
# %% ../nbs/07_chat.ipynb #72274cdc
|
|
622
|
+
async def run_fence_tool(lang, code, ns):
|
|
623
|
+
"Run the mapped tool for `lang` with the code, return result fence"
|
|
624
|
+
tname = _lang2tool[lang]
|
|
625
|
+
arg = dict(code=code) if lang == 'py' else dict(command=code)
|
|
626
|
+
res = _mk_tool_result(await call_func_async(tname, arg, ns=ns, raise_on_err=False))
|
|
627
|
+
return _mk_result_fence(_trunc_str(str(res)))
|
|
628
|
+
|
|
629
|
+
# %% ../nbs/07_chat.ipynb #740ee3a4
|
|
630
|
+
class FenceToolCallback(ChatCallback):
|
|
631
|
+
order = 20
|
|
632
|
+
|
|
633
|
+
async def after_msgs(self):
|
|
634
|
+
self.chat.turn_msgs = _split_fence_msgs(self.turn_msgs)
|
|
635
|
+
if False: yield
|
|
636
|
+
|
|
637
|
+
async def before_acomplete(self):
|
|
638
|
+
if langs := _active_fence_langs(self.tool_schemas):
|
|
639
|
+
if not any(isinstance(s, FenceToolStop) for s in self.turn_kwargs.get('stop_callables', [])):
|
|
640
|
+
self.chat.turn_kwargs['stop_callables'] = self.turn_kwargs.get('stop_callables', []) + [FenceToolStop(langs)]
|
|
641
|
+
if False: yield
|
|
642
|
+
|
|
643
|
+
async def before_tool_calls(self):
|
|
644
|
+
if not _active_fence_langs(self.tool_schemas): return
|
|
645
|
+
if m := last(self.hist, lambda o: o.role == 'assistant'):
|
|
646
|
+
if fence := extract_fence_call(m.text):
|
|
647
|
+
lang, code = fence
|
|
648
|
+
out = await run_fence_tool(lang, code, self.ns)
|
|
649
|
+
for p in reversed(m.content):
|
|
650
|
+
if p.type == PartType.text: p.text += out; break
|
|
651
|
+
self.chat.toolloop = True
|
|
652
|
+
if self.stream: yield {'text': out}
|
|
653
|
+
|
|
654
|
+
# %% ../nbs/07_chat.ipynb #1897aea2
|
|
655
|
+
def _inject_tool_reminder(msgs, reminder):
|
|
656
|
+
i = len(msgs)
|
|
657
|
+
while i>0 and msgs[i-1].role=='tool': i-=1
|
|
658
|
+
if i>=len(msgs): return msgs
|
|
659
|
+
msgs,m = list(msgs),msgs[i]
|
|
660
|
+
m.content.append(Part(type=PartType.text, text=reminder))
|
|
661
|
+
msgs[i] = m
|
|
662
|
+
return msgs
|
|
663
|
+
|
|
664
|
+
# %% ../nbs/07_chat.ipynb #1b404e0f
|
|
665
|
+
_tool_reminder = '\n<system-reminder>After *EVERY* tool call result, no matter how small, briefly summarise in prose what you found, before continuing or calling another tool.</system-reminder>'
|
|
666
|
+
|
|
667
|
+
# %% ../nbs/07_chat.ipynb #fab308b7
|
|
668
|
+
class ToolReminderCallback(ChatCallback):
|
|
669
|
+
order = 30
|
|
670
|
+
def __init__(self, tool_reminder=_tool_reminder): store_attr()
|
|
671
|
+
async def after_msgs(self):
|
|
672
|
+
self.chat.turn_msgs = _inject_tool_reminder(self.turn_msgs, self.tool_reminder)
|
|
673
|
+
if False: yield
|
|
674
|
+
|
|
675
|
+
# %% ../nbs/07_chat.ipynb #423caa31
|
|
676
|
+
def stop_sequences(seqs):
|
|
677
|
+
"Stop when any sequence appears in the accumulated completion text."
|
|
678
|
+
seqs = L(seqs)
|
|
679
|
+
def _stop(text):
|
|
680
|
+
for s in seqs:
|
|
681
|
+
if s in text: return text[:text.find(s)+len(s)]
|
|
682
|
+
return _stop
|
|
683
|
+
|
|
684
|
+
# %% ../nbs/07_chat.ipynb #663eee29
|
|
685
|
+
class StopSequencesCallback(ChatCallback):
|
|
686
|
+
order = 30
|
|
687
|
+
def __init__(self, seqs): self.seqs = L(seqs)
|
|
688
|
+
async def before_acomplete(self):
|
|
689
|
+
self.chat.turn_kwargs['stop_callables'] = self.turn_kwargs.get('stop_callables', []) + [stop_sequences(self.seqs)]
|
|
690
|
+
if False: yield
|
|
691
|
+
|
|
692
|
+
# %% ../nbs/07_chat.ipynb #318ee856
|
|
693
|
+
defaults.chat_callbacks = [DeepseekPrefillCallback, FenceToolCallback, ToolReminderCallback, StopReasonCallback]
|
|
694
|
+
|
|
614
695
|
# %% ../nbs/07_chat.ipynb #115fd94f
|
|
615
696
|
def _trunc_param(v, mx=40):
|
|
616
697
|
"Truncate and escape param value for display"
|
|
617
698
|
tp = _trunc_str(str(v).replace('`', r'\`'), mx=mx, replace=None, skip=0)
|
|
618
|
-
try: return
|
|
699
|
+
try: return dumps(tp, ensure_ascii=False)
|
|
619
700
|
except Exception: return repr(tp).replace('\\\\', '\\')
|
|
620
701
|
|
|
621
702
|
# %% ../nbs/07_chat.ipynb #80c0abdb
|
|
@@ -645,7 +726,7 @@ def mk_tr_details(tr, mx=2000):
|
|
|
645
726
|
'call':{'function': tr.data['name'], 'arguments': args},
|
|
646
727
|
'result':_trunc_content(tr.text, mx=mx),}
|
|
647
728
|
summ = f"<summary>{_tc_summary(tr)}</summary>"
|
|
648
|
-
return f"\n\n{tool_dtls_tag}\n{summ}\n\n```json\n{dumps(res, indent=2)}\n```\n\n</details>\n\n"
|
|
729
|
+
return f"\n\n{tool_dtls_tag}\n{summ}\n\n```json\n{dumps(res, indent=2, ensure_ascii=False)}\n```\n\n</details>\n\n"
|
|
649
730
|
|
|
650
731
|
# %% ../nbs/07_chat.ipynb #3049001c
|
|
651
732
|
def mk_srv_tc_details(tc, mx=2000):
|
|
@@ -653,7 +734,7 @@ def mk_srv_tc_details(tc, mx=2000):
|
|
|
653
734
|
args = {k:_trunc_str(v, mx=mx*5) for k,v in tc.arguments.items()}
|
|
654
735
|
res = {'id':tc.id, 'server':True, 'call':{'function': tc.name, 'arguments': args}, 'result':"Server tool call executed."}
|
|
655
736
|
summ = f"<summary>{_srv_tc_summary(tc)}</summary>"
|
|
656
|
-
return f"\n\n{tool_dtls_tag}\n{summ}\n\n```json\n{dumps(res, indent=2)}\n```\n\n</details>\n\n"
|
|
737
|
+
return f"\n\n{tool_dtls_tag}\n{summ}\n\n```json\n{dumps(res, indent=2, ensure_ascii=False)}\n```\n\n</details>\n\n"
|
|
657
738
|
|
|
658
739
|
# %% ../nbs/07_chat.ipynb #f0d984ec
|
|
659
740
|
# status_re = re.compile(r'^- ⏳ <code>(.*)</code> ⏳$|^🧠+$', re.MULTILINE) # TODO: Need to yield tool calls as they are done collated in fastllm `_acollect_stream`
|
fastllm/codex.py
ADDED
fastllm/streaming.py
CHANGED
|
@@ -116,7 +116,7 @@ async def mk_acollect_stream(it, index_fn, model=None, api_name=None, vendor_nam
|
|
|
116
116
|
stop, stop_yielded = False, False
|
|
117
117
|
async for d in it:
|
|
118
118
|
# Check stop condition and yield stop delta
|
|
119
|
-
stop = stop_and_trim(part_accum, d, stop_callables)
|
|
119
|
+
if not stop: stop = stop_and_trim(part_accum, d, stop_callables)
|
|
120
120
|
if stop and not stop_yielded:
|
|
121
121
|
for r in _yield_parts(d): yield r
|
|
122
122
|
stop_yielded = True
|
|
@@ -138,6 +138,7 @@ async def mk_acollect_stream(it, index_fn, model=None, api_name=None, vendor_nam
|
|
|
138
138
|
deltas.append(d)
|
|
139
139
|
part_accum.finalize()
|
|
140
140
|
tcs = part_accum.tool_calls
|
|
141
|
+
if api_name: usg = api_registry.apis[api_name].finalize_usage(usg, part_accum.parts)
|
|
141
142
|
if stop: fin = FinishReason.stop
|
|
142
143
|
fin = FinishReason.tool_calls if fin==FinishReason.stop and any(~L(tcs).attrgot('server')) else fin # recheck tool calls post collation
|
|
143
144
|
# tool calls and non-anthropic citations are yielded at the end
|
|
@@ -145,3 +146,4 @@ async def mk_acollect_stream(it, index_fn, model=None, api_name=None, vendor_nam
|
|
|
145
146
|
message=Msg(role="assistant", content=part_accum.parts),
|
|
146
147
|
finish_reason=fin, usage=usg, tool_calls=tcs, api_name=api_name, vendor_name=vendor_name,
|
|
147
148
|
raw={'deltas':deltas})
|
|
149
|
+
|
fastllm/types.py
CHANGED
|
@@ -40,7 +40,7 @@ def _repr_markdown_(self: Part):
|
|
|
40
40
|
|
|
41
41
|
{body}
|
|
42
42
|
|
|
43
|
-
<details>
|
|
43
|
+
<details markdown='1'>
|
|
44
44
|
|
|
45
45
|
- data: `{data}`
|
|
46
46
|
|
|
@@ -80,7 +80,7 @@ def _repr_markdown_(self: ToolCall):
|
|
|
80
80
|
extra = _trunc_strs(self.extra)
|
|
81
81
|
return f"""🔧 **{self.name}**(`{self.arguments}`)
|
|
82
82
|
|
|
83
|
-
<details>
|
|
83
|
+
<details markdown='1'>
|
|
84
84
|
|
|
85
85
|
- id: `{self.id}`
|
|
86
86
|
- server: `{self.server}`
|
|
@@ -135,7 +135,7 @@ def _repr_markdown_(self: Completion):
|
|
|
135
135
|
det_str = '\n- '.join(details)
|
|
136
136
|
return f"""{content}
|
|
137
137
|
|
|
138
|
-
<details>
|
|
138
|
+
<details markdown='1'>
|
|
139
139
|
|
|
140
140
|
- {det_str}
|
|
141
141
|
|
|
@@ -147,25 +147,29 @@ FinishReason = str_enum('finish_reason', 'stop', 'tool_calls', 'length', 'conten
|
|
|
147
147
|
# %% ../nbs/00_types.ipynb #fc681c52
|
|
148
148
|
class APIRegistry:
|
|
149
149
|
def __init__(self): self.apis = {}
|
|
150
|
-
def register(self, name, **kwargs): self.apis[name] = SimpleNamespace(**kwargs)
|
|
150
|
+
def register(self, name, finalize_usage=noop, **kwargs): self.apis[name] = SimpleNamespace(finalize_usage=finalize_usage, **kwargs)
|
|
151
151
|
|
|
152
152
|
api_registry = APIRegistry()
|
|
153
153
|
|
|
154
|
+
|
|
154
155
|
# %% ../nbs/00_types.ipynb #d58a5f96
|
|
155
156
|
def mk_completion(resp, model, api_name, vendor_name):
|
|
156
157
|
"Normalize an api response into Completion."
|
|
157
158
|
api = api_registry.apis[api_name]
|
|
158
159
|
tcs = api.norm_tool_calls(resp)
|
|
160
|
+
parts = api.norm_parts(resp)
|
|
161
|
+
usg = api.finalize_usage(api.norm_usage(resp), parts)
|
|
159
162
|
return Completion(
|
|
160
163
|
model=resp.get("model") or model,
|
|
161
|
-
message=Msg(role="assistant", content=
|
|
164
|
+
message=Msg(role="assistant", content=parts),
|
|
162
165
|
finish_reason=api.norm_finish(resp, tcs),
|
|
163
|
-
usage=
|
|
166
|
+
usage=usg,
|
|
164
167
|
tool_calls=tcs,
|
|
165
168
|
api_name=api_name,
|
|
166
169
|
vendor_name=vendor_name,
|
|
167
170
|
raw=resp)
|
|
168
171
|
|
|
172
|
+
|
|
169
173
|
# %% ../nbs/00_types.ipynb #d5322db5
|
|
170
174
|
def mk_tool_res_msg(tool_calls:list[ToolCall], results:list[str|list]):
|
|
171
175
|
'A util to prepare parallel tool call with str or media list results'
|
|
@@ -0,0 +1,22 @@
|
|
|
1
|
+
fastllm/__init__.py,sha256=wOJN3HxAgnSon5vWYU3Txm2UZ_7tBHDKXUKZIH-mXX8,22
|
|
2
|
+
fastllm/_modidx.py,sha256=DJ-zrb77RwJiCGXzGtCoMBxZOfSEaX_ftBNC_qIgIpo,33303
|
|
3
|
+
fastllm/acomplete.py,sha256=jSQDotocHp5dK9c18_MxqZkT626XoLNCftlTkKZrqhY,7440
|
|
4
|
+
fastllm/anthropic.py,sha256=sjxLcJyhdRPc6K7hlKjl4n073RkfbhOhh-QMZweTizU,15128
|
|
5
|
+
fastllm/chat.py,sha256=LKqOczYFRM-s82seTbxBWFW5G16oqnG7nQO5oi3E_T4,35856
|
|
6
|
+
fastllm/codex.py,sha256=HZchfrGUgdf8ayhtOFbIRmh9YmIqfQBwqviAEeir4Uo,161
|
|
7
|
+
fastllm/gemini.py,sha256=E1EYMfV8IMpC_-WzlDrkhz_CJQmzmxvaVUucNgPOqSA,14947
|
|
8
|
+
fastllm/openai_chat.py,sha256=wZ0HI0m9ipy9XVhqmYBXf-BmkVAOipUVwqu9NGB_rJU,10941
|
|
9
|
+
fastllm/openai_responses.py,sha256=Nk5bfTCF2-a17nwvIsf-u39j539v9KIduVfScECItKk,13052
|
|
10
|
+
fastllm/streaming.py,sha256=aBEa1cFbK2XbFuEP_EG8FW0hp4T-FzZ2ZlwdPqGLpw8,6755
|
|
11
|
+
fastllm/types.py,sha256=LOM2KN3uVpwv5WFYc8oaof5Dh_N4jpz_gxfdEpVTnpY,12456
|
|
12
|
+
fastllm/specs/anthropic.json,sha256=VCgTjM2_HoDpCkeu3q_TCOEZLMHriJZLAG3LnDBAgGM,541035
|
|
13
|
+
fastllm/specs/anthropic.yml,sha256=3S3NAKdXB1Nwp-Sn9Gmh4tBnwhGGhMO3DXkGqPXPUYs,724122
|
|
14
|
+
fastllm/specs/gemini.json,sha256=zJGOdvZ2BvCiTENZt0-BDEvNBMl8h6EBmEskle_WBto,309331
|
|
15
|
+
fastllm/specs/openai.with-code-samples.json,sha256=Kto19AW1u8MfxVDJ4cFVBIdZQOIyy8NWylswo57eABU,1995929
|
|
16
|
+
fastllm/specs/openai.with-code-samples.yml,sha256=DlcWGdaeP4k7smVjt6UbyehJ-2XGU3rn3nCIBMDRfYU,2553630
|
|
17
|
+
fastllm/specs/spec_manifest.json,sha256=9tVFwojXFnNqsAxQzCRTP1lgSIM0fXixnrXdv4Cmb0c,653
|
|
18
|
+
python_fastllm-0.0.8.dist-info/METADATA,sha256=2uAYn9BpQnovifcMF8xwqXO2ZarEoc6Vf-0J7vF4_nk,19546
|
|
19
|
+
python_fastllm-0.0.8.dist-info/WHEEL,sha256=aeYiig01lYGDzBgS8HxWXOg3uV61G9ijOsup-k9o1sk,91
|
|
20
|
+
python_fastllm-0.0.8.dist-info/entry_points.txt,sha256=dq0chsiRjJYStCOXweFW9L6LpyMTjWu2AabKCbTSbuI,36
|
|
21
|
+
python_fastllm-0.0.8.dist-info/top_level.txt,sha256=F8qodL7nEGUHGmzzqfhNKCTIr1i0D6cvudOnm3z7o0Y,8
|
|
22
|
+
python_fastllm-0.0.8.dist-info/RECORD,,
|
|
@@ -1,21 +0,0 @@
|
|
|
1
|
-
fastllm/__init__.py,sha256=QiiYsv0kcJaB8wCWyT-FnI2b6be87HA-CrrIUn8LQhg,22
|
|
2
|
-
fastllm/_modidx.py,sha256=oWXxw9eciYsnR2YAhla6c7HCPdXSFb9jV7WQ71fZXPQ,29551
|
|
3
|
-
fastllm/acomplete.py,sha256=p6g_LASZz5u4vhFjh-vJKw1ImhBLW090_Y-TJYmVyDo,6649
|
|
4
|
-
fastllm/anthropic.py,sha256=fG20kOv3d3wGKQe8rD5pFWgZHKe-vT-9QJ3nPXh2twY,14615
|
|
5
|
-
fastllm/chat.py,sha256=3zDfqcMt_v8NNDiOyhNz8czd4vlOzA7r0_GL1zNt-uI,32937
|
|
6
|
-
fastllm/gemini.py,sha256=E1EYMfV8IMpC_-WzlDrkhz_CJQmzmxvaVUucNgPOqSA,14947
|
|
7
|
-
fastllm/openai_chat.py,sha256=wZ0HI0m9ipy9XVhqmYBXf-BmkVAOipUVwqu9NGB_rJU,10941
|
|
8
|
-
fastllm/openai_responses.py,sha256=Nk5bfTCF2-a17nwvIsf-u39j539v9KIduVfScECItKk,13052
|
|
9
|
-
fastllm/streaming.py,sha256=FYG4-rt7mfGKrUPxCMp_Z3kkxGLKKAVPev8ifD3YFlQ,6652
|
|
10
|
-
fastllm/types.py,sha256=JQdMrAdkqYOIeDtuNC18CFtsXjp12FqTp2_3OlOroV8,12304
|
|
11
|
-
fastllm/specs/anthropic.json,sha256=VCgTjM2_HoDpCkeu3q_TCOEZLMHriJZLAG3LnDBAgGM,541035
|
|
12
|
-
fastllm/specs/anthropic.yml,sha256=3S3NAKdXB1Nwp-Sn9Gmh4tBnwhGGhMO3DXkGqPXPUYs,724122
|
|
13
|
-
fastllm/specs/gemini.json,sha256=zJGOdvZ2BvCiTENZt0-BDEvNBMl8h6EBmEskle_WBto,309331
|
|
14
|
-
fastllm/specs/openai.with-code-samples.json,sha256=Kto19AW1u8MfxVDJ4cFVBIdZQOIyy8NWylswo57eABU,1995929
|
|
15
|
-
fastllm/specs/openai.with-code-samples.yml,sha256=DlcWGdaeP4k7smVjt6UbyehJ-2XGU3rn3nCIBMDRfYU,2553630
|
|
16
|
-
fastllm/specs/spec_manifest.json,sha256=9tVFwojXFnNqsAxQzCRTP1lgSIM0fXixnrXdv4Cmb0c,653
|
|
17
|
-
python_fastllm-0.0.6.dist-info/METADATA,sha256=Ye3J3OCznSy7NOGLAHIs9e3G0vvVSLCbL9J1oqNJkww,19546
|
|
18
|
-
python_fastllm-0.0.6.dist-info/WHEEL,sha256=aeYiig01lYGDzBgS8HxWXOg3uV61G9ijOsup-k9o1sk,91
|
|
19
|
-
python_fastllm-0.0.6.dist-info/entry_points.txt,sha256=dq0chsiRjJYStCOXweFW9L6LpyMTjWu2AabKCbTSbuI,36
|
|
20
|
-
python_fastllm-0.0.6.dist-info/top_level.txt,sha256=F8qodL7nEGUHGmzzqfhNKCTIr1i0D6cvudOnm3z7o0Y,8
|
|
21
|
-
python_fastllm-0.0.6.dist-info/RECORD,,
|
|
File without changes
|
|
File without changes
|
|
File without changes
|