python-fastllm 0.0.5__py3-none-any.whl → 0.0.7__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- fastllm/__init__.py +1 -1
- fastllm/_modidx.py +29 -0
- fastllm/acomplete.py +1 -1
- fastllm/chat.py +190 -114
- fastllm/streaming.py +1 -1
- fastllm/types.py +3 -3
- {python_fastllm-0.0.5.dist-info → python_fastllm-0.0.7.dist-info}/METADATA +1 -1
- {python_fastllm-0.0.5.dist-info → python_fastllm-0.0.7.dist-info}/RECORD +11 -11
- {python_fastllm-0.0.5.dist-info → python_fastllm-0.0.7.dist-info}/WHEEL +0 -0
- {python_fastllm-0.0.5.dist-info → python_fastllm-0.0.7.dist-info}/entry_points.txt +0 -0
- {python_fastllm-0.0.5.dist-info → python_fastllm-0.0.7.dist-info}/top_level.txt +0 -0
fastllm/__init__.py
CHANGED
|
@@ -1 +1 @@
|
|
|
1
|
-
__version__ = "0.0.
|
|
1
|
+
__version__ = "0.0.7"
|
fastllm/_modidx.py
CHANGED
|
@@ -43,24 +43,53 @@ d = { 'settings': { 'branch': 'main',
|
|
|
43
43
|
'fastllm.chat.AsyncChat.__call__': ('chat.html#asyncchat.__call__', 'fastllm/chat.py'),
|
|
44
44
|
'fastllm.chat.AsyncChat.__init__': ('chat.html#asyncchat.__init__', 'fastllm/chat.py'),
|
|
45
45
|
'fastllm.chat.AsyncChat._call': ('chat.html#asyncchat._call', 'fastllm/chat.py'),
|
|
46
|
+
'fastllm.chat.AsyncChat._call_cbs': ('chat.html#asyncchat._call_cbs', 'fastllm/chat.py'),
|
|
46
47
|
'fastllm.chat.AsyncChat._prep_call': ('chat.html#asyncchat._prep_call', 'fastllm/chat.py'),
|
|
47
48
|
'fastllm.chat.AsyncChat._prep_msg': ('chat.html#asyncchat._prep_msg', 'fastllm/chat.py'),
|
|
48
49
|
'fastllm.chat.AsyncChat._track': ('chat.html#asyncchat._track', 'fastllm/chat.py'),
|
|
50
|
+
'fastllm.chat.AsyncChat.add_cb': ('chat.html#asyncchat.add_cb', 'fastllm/chat.py'),
|
|
51
|
+
'fastllm.chat.AsyncChat.add_cbs': ('chat.html#asyncchat.add_cbs', 'fastllm/chat.py'),
|
|
49
52
|
'fastllm.chat.AsyncChat.print_hist': ('chat.html#asyncchat.print_hist', 'fastllm/chat.py'),
|
|
50
53
|
'fastllm.chat.AsyncChat.tcdict': ('chat.html#asyncchat.tcdict', 'fastllm/chat.py'),
|
|
51
54
|
'fastllm.chat.AsyncStreamFormatter': ('chat.html#asyncstreamformatter', 'fastllm/chat.py'),
|
|
52
55
|
'fastllm.chat.AsyncStreamFormatter.format_stream': ( 'chat.html#asyncstreamformatter.format_stream',
|
|
53
56
|
'fastllm/chat.py'),
|
|
57
|
+
'fastllm.chat.ChatCallback': ('chat.html#chatcallback', 'fastllm/chat.py'),
|
|
58
|
+
'fastllm.chat.ChatCallback.__repr__': ('chat.html#chatcallback.__repr__', 'fastllm/chat.py'),
|
|
59
|
+
'fastllm.chat.DeepseekMsgsCallback': ('chat.html#deepseekmsgscallback', 'fastllm/chat.py'),
|
|
60
|
+
'fastllm.chat.DeepseekMsgsCallback.after_msgs': ( 'chat.html#deepseekmsgscallback.after_msgs',
|
|
61
|
+
'fastllm/chat.py'),
|
|
62
|
+
'fastllm.chat.DeepseekPrefillCallback': ('chat.html#deepseekprefillcallback', 'fastllm/chat.py'),
|
|
63
|
+
'fastllm.chat.DeepseekPrefillCallback.before_acomplete': ( 'chat.html#deepseekprefillcallback.before_acomplete',
|
|
64
|
+
'fastllm/chat.py'),
|
|
65
|
+
'fastllm.chat.FenceToolCallback': ('chat.html#fencetoolcallback', 'fastllm/chat.py'),
|
|
66
|
+
'fastllm.chat.FenceToolCallback.after_msgs': ('chat.html#fencetoolcallback.after_msgs', 'fastllm/chat.py'),
|
|
67
|
+
'fastllm.chat.FenceToolCallback.before_acomplete': ( 'chat.html#fencetoolcallback.before_acomplete',
|
|
68
|
+
'fastllm/chat.py'),
|
|
69
|
+
'fastllm.chat.FenceToolCallback.before_tool_calls': ( 'chat.html#fencetoolcallback.before_tool_calls',
|
|
70
|
+
'fastllm/chat.py'),
|
|
54
71
|
'fastllm.chat.FenceToolStop': ('chat.html#fencetoolstop', 'fastllm/chat.py'),
|
|
55
72
|
'fastllm.chat.FenceToolStop.__call__': ('chat.html#fencetoolstop.__call__', 'fastllm/chat.py'),
|
|
56
73
|
'fastllm.chat.FenceToolStop.__init__': ('chat.html#fencetoolstop.__init__', 'fastllm/chat.py'),
|
|
57
74
|
'fastllm.chat.FullResponse': ('chat.html#fullresponse', 'fastllm/chat.py'),
|
|
58
75
|
'fastllm.chat.Msg.text': ('chat.html#msg.text', 'fastllm/chat.py'),
|
|
76
|
+
'fastllm.chat.StopReasonCallback': ('chat.html#stopreasoncallback', 'fastllm/chat.py'),
|
|
77
|
+
'fastllm.chat.StopReasonCallback.after_acomplete': ( 'chat.html#stopreasoncallback.after_acomplete',
|
|
78
|
+
'fastllm/chat.py'),
|
|
59
79
|
'fastllm.chat.StopResponse': ('chat.html#stopresponse', 'fastllm/chat.py'),
|
|
80
|
+
'fastllm.chat.StopSequencesCallback': ('chat.html#stopsequencescallback', 'fastllm/chat.py'),
|
|
81
|
+
'fastllm.chat.StopSequencesCallback.__init__': ( 'chat.html#stopsequencescallback.__init__',
|
|
82
|
+
'fastllm/chat.py'),
|
|
83
|
+
'fastllm.chat.StopSequencesCallback.before_acomplete': ( 'chat.html#stopsequencescallback.before_acomplete',
|
|
84
|
+
'fastllm/chat.py'),
|
|
60
85
|
'fastllm.chat.StreamFormatter': ('chat.html#streamformatter', 'fastllm/chat.py'),
|
|
61
86
|
'fastllm.chat.StreamFormatter.__init__': ('chat.html#streamformatter.__init__', 'fastllm/chat.py'),
|
|
62
87
|
'fastllm.chat.StreamFormatter.format_item': ('chat.html#streamformatter.format_item', 'fastllm/chat.py'),
|
|
63
88
|
'fastllm.chat.StreamFormatter.format_stream': ('chat.html#streamformatter.format_stream', 'fastllm/chat.py'),
|
|
89
|
+
'fastllm.chat.ToolReminderCallback': ('chat.html#toolremindercallback', 'fastllm/chat.py'),
|
|
90
|
+
'fastllm.chat.ToolReminderCallback.__init__': ('chat.html#toolremindercallback.__init__', 'fastllm/chat.py'),
|
|
91
|
+
'fastllm.chat.ToolReminderCallback.after_msgs': ( 'chat.html#toolremindercallback.after_msgs',
|
|
92
|
+
'fastllm/chat.py'),
|
|
64
93
|
'fastllm.chat.ToolResponse': ('chat.html#toolresponse', 'fastllm/chat.py'),
|
|
65
94
|
'fastllm.chat.UsageStats': ('chat.html#usagestats', 'fastllm/chat.py'),
|
|
66
95
|
'fastllm.chat.UsageStats.__add__': ('chat.html#usagestats.__add__', 'fastllm/chat.py'),
|
fastllm/acomplete.py
CHANGED
|
@@ -49,7 +49,7 @@ api2spec = {'openai':oai_spec, 'openai_chat':oai_spec, 'anthropic':ant_spec, 'ge
|
|
|
49
49
|
|
|
50
50
|
# %% ../nbs/06_acomplete.ipynb #79075d95
|
|
51
51
|
@flexicache()
|
|
52
|
-
def mk_client(model, vendor_name=None, api_name=None, api_key=None, base_url=None, xtra_hdrs=None):
|
|
52
|
+
def mk_client(model=None, vendor_name=None, api_name=None, api_key=None, base_url=None, xtra_hdrs=None):
|
|
53
53
|
err_msg = f"please pass a valid one vendor: {', '.join(list(vendor_mapping))} or pass `api_name`,`base_url` and `api_key`"
|
|
54
54
|
if vendor_name:
|
|
55
55
|
override_base_url = base_url
|
fastllm/chat.py
CHANGED
|
@@ -4,10 +4,12 @@
|
|
|
4
4
|
|
|
5
5
|
# %% auto #0
|
|
6
6
|
__all__ = ['tool_dtls_tag', 're_tools', 'token_dtls_tag', 're_token', 'effort', 'remove_cache_ckpts', 'contents', 'stop_reason',
|
|
7
|
-
'mk_msg', 'FenceToolStop', 'extract_fence_call', '
|
|
8
|
-
'
|
|
9
|
-
'
|
|
10
|
-
'
|
|
7
|
+
'mk_msg', 'FenceToolStop', 'extract_fence_call', 'split_tools', 'fmt2hist', 'mk_msgs', 'cite_footnote',
|
|
8
|
+
'postproc', 'lite_mk_func', 'ToolResponse', 'structured', 'StopResponse', 'FullResponse', 'search_count',
|
|
9
|
+
'UsageStats', 'AsyncChat', 'astream_with_complete', 'ChatCallback', 'DeepseekMsgsCallback',
|
|
10
|
+
'DeepseekPrefillCallback', 'add_warning', 'StopReasonCallback', 'run_fence_tool', 'FenceToolCallback',
|
|
11
|
+
'ToolReminderCallback', 'stop_sequences', 'StopSequencesCallback', 'mk_tr_details', 'mk_srv_tc_details',
|
|
12
|
+
'StreamFormatter', 'AsyncStreamFormatter', 'adisplay_stream']
|
|
11
13
|
|
|
12
14
|
# %% ../nbs/07_chat.ipynb #d5a3bc1f
|
|
13
15
|
import asyncio, base64, json, mimetypes, random, string, ast, warnings
|
|
@@ -55,7 +57,7 @@ def remove_cache_ckpts(msg):
|
|
|
55
57
|
return msg
|
|
56
58
|
|
|
57
59
|
def _mk_content(o):
|
|
58
|
-
if isinstance(o, str): return Part(type=PartType.text, text=o
|
|
60
|
+
if isinstance(o, str): return Part(type=PartType.text, text=o)
|
|
59
61
|
elif isinstance(o,bytes): return _bytes2content(o)
|
|
60
62
|
return o
|
|
61
63
|
|
|
@@ -87,16 +89,16 @@ def mk_msg(
|
|
|
87
89
|
return _add_cache_control(msg, ttl=ttl) if cache else msg
|
|
88
90
|
|
|
89
91
|
# %% ../nbs/07_chat.ipynb #db466e1c
|
|
90
|
-
tool_dtls_tag = "<details class='tool-usage-details'>"
|
|
92
|
+
tool_dtls_tag = "<details class='tool-usage-details' markdown='1'>"
|
|
91
93
|
re_tools = re.compile(fr"^({tool_dtls_tag}\n*(?:<summary>(?P<summary>.*?)</summary>\n*)?\n*```json\n+(.*?)\n+```\n+</details>)",
|
|
92
94
|
flags=re.DOTALL|re.MULTILINE)
|
|
93
|
-
token_dtls_tag = "<details class='token-usage-details'>"
|
|
95
|
+
token_dtls_tag = "<details class='token-usage-details' markdown='1'>"
|
|
94
96
|
re_token = re.compile(fr"^{re.escape(token_dtls_tag)}<summary>.*?</summary>\n*\n*`.*?`\n*\n*</details>\n?",
|
|
95
97
|
flags=re.DOTALL|re.MULTILINE)
|
|
96
98
|
|
|
97
99
|
# %% ../nbs/07_chat.ipynb #be998131
|
|
98
100
|
_fence_back = '`````'
|
|
99
|
-
_fence_re = re.compile(f'^{_fence_back}(py|bash)\n(.*?)\n{_fence_back}', re.DOTALL | re.MULTILINE)
|
|
101
|
+
_fence_re = re.compile(f'^{_fence_back}(py|bash)\n(.*?)\n{_fence_back}$', re.DOTALL | re.MULTILINE)
|
|
100
102
|
_result_re = re.compile(f'\n{_fence_back}result\n(.*?)\n{_fence_back}\n', re.DOTALL)
|
|
101
103
|
_lang2tool = dict(py='python', bash='bash')
|
|
102
104
|
|
|
@@ -144,15 +146,6 @@ def _split_fence_msgs(msgs):
|
|
|
144
146
|
for m in msgs: res.extend(_split_msg_on_fences(m))
|
|
145
147
|
return res
|
|
146
148
|
|
|
147
|
-
# %% ../nbs/07_chat.ipynb #b161ca9e
|
|
148
|
-
def stop_sequences(seqs):
|
|
149
|
-
"Stop when any sequence appears in the accumulated completion text."
|
|
150
|
-
seqs = L(seqs)
|
|
151
|
-
def _stop(text):
|
|
152
|
-
for s in seqs:
|
|
153
|
-
if s in text: return text[:text.find(s)+len(s)]
|
|
154
|
-
return _stop
|
|
155
|
-
|
|
156
149
|
# %% ../nbs/07_chat.ipynb #45ada210
|
|
157
150
|
def _extract_tool_parts(text:str):
|
|
158
151
|
"Extract (tool_use_part, tool_result_part) from <details> json block"
|
|
@@ -362,24 +355,7 @@ class UsageStats:
|
|
|
362
355
|
summ = f"${self.cost:.4f}" if self.cost else f"{self.total_tokens:,} tokens"
|
|
363
356
|
return f"\n\n{token_dtls_tag}<summary>{summ}</summary>\n\n`{self!r}`\n\n</details>\n"
|
|
364
357
|
|
|
365
|
-
# %% ../nbs/07_chat.ipynb #
|
|
366
|
-
def _inject_tool_reminder(msgs, reminder):
|
|
367
|
-
i = len(msgs)
|
|
368
|
-
while i>0 and msgs[i-1].role=='tool': i-=1
|
|
369
|
-
if i>=len(msgs): return msgs
|
|
370
|
-
msgs,m = list(msgs),msgs[i]
|
|
371
|
-
m.content.append(Part(type=PartType.text, text=reminder))
|
|
372
|
-
msgs[i] = m
|
|
373
|
-
return msgs
|
|
374
|
-
|
|
375
|
-
# %% ../nbs/07_chat.ipynb #e7eb2032
|
|
376
|
-
def _active_fence_langs(tool_schemas):
|
|
377
|
-
"Return set of active fence langs whose mapped tool is registered"
|
|
378
|
-
if not tool_schemas: return set()
|
|
379
|
-
names = {nested_idx(t, 'function', 'name') for t in tool_schemas}
|
|
380
|
-
return {lang for lang, tname in _lang2tool.items() if tname in names}
|
|
381
|
-
|
|
382
|
-
# %% ../nbs/07_chat.ipynb #e9a14051
|
|
358
|
+
# %% ../nbs/07_chat.ipynb #cb3d7e77
|
|
383
359
|
class AsyncChat:
|
|
384
360
|
def __init__(
|
|
385
361
|
self,
|
|
@@ -399,7 +375,8 @@ class AsyncChat:
|
|
|
399
375
|
base_url=None, # API base url when model can't be resolved or vendor_name is not known
|
|
400
376
|
extra_headers=None, # Extra HTTP headers for custom providers
|
|
401
377
|
markup=0, # Cost markup multiplier (e.g. 0.5 for 50%)
|
|
402
|
-
|
|
378
|
+
cbs:list=None, # Chat callbacks
|
|
379
|
+
default_cbs=True # Whether to include default callbacks
|
|
403
380
|
):
|
|
404
381
|
"LiteLLM chat client."
|
|
405
382
|
self.model = model
|
|
@@ -408,7 +385,10 @@ class AsyncChat:
|
|
|
408
385
|
elif ns is None: ns = globals()
|
|
409
386
|
self.tool_schemas = [lite_mk_func(t) for t in tools] if tools else None
|
|
410
387
|
self.use = UsageStats()
|
|
411
|
-
store_attr()
|
|
388
|
+
store_attr(but='cbs')
|
|
389
|
+
self.cbs = L()
|
|
390
|
+
if default_cbs: self.add_cbs(defaults.chat_callbacks)
|
|
391
|
+
self.add_cbs(cbs)
|
|
412
392
|
|
|
413
393
|
def _prep_msg(self, msg=None, prefill=None):
|
|
414
394
|
"Prepare the system prompt and messages list for the API call"
|
|
@@ -422,14 +402,6 @@ class AsyncChat:
|
|
|
422
402
|
self.hist = mk_msgs(self.hist, self.cache and 'claude' in self.model, cache_idxs, self.ttl)
|
|
423
403
|
msgs = self.hist
|
|
424
404
|
if prefill: msgs = self.hist + [Msg(role='assistant', content=[Part(PartType.text, prefill)])]
|
|
425
|
-
msgs = _split_fence_msgs(msgs)
|
|
426
|
-
if self.tool_reminder: msgs = _inject_tool_reminder(msgs, self.tool_reminder)
|
|
427
|
-
if 'deepseek' in self.model:
|
|
428
|
-
# The `reasoning_content` in the thinking mode must be passed back to the API.
|
|
429
|
-
for m in msgs:
|
|
430
|
-
if m.role=='assistant':
|
|
431
|
-
if not any(p.type==PartType.thinking for p in m.content):
|
|
432
|
-
m.content.append(Part(PartType.thinking, ''))
|
|
433
405
|
return sp, msgs
|
|
434
406
|
|
|
435
407
|
@property
|
|
@@ -439,26 +411,21 @@ class AsyncChat:
|
|
|
439
411
|
u.cost *= (1 + self.markup)
|
|
440
412
|
self.use += u
|
|
441
413
|
|
|
414
|
+
def add_cb(self, cb):
|
|
415
|
+
if isinstance(cb, type): cb = cb()
|
|
416
|
+
cb.chat = self
|
|
417
|
+
self.cbs.append(cb)
|
|
418
|
+
return self
|
|
419
|
+
|
|
420
|
+
def add_cbs(self, cbs):
|
|
421
|
+
if cbs is None: return self
|
|
422
|
+
L(cbs).map(self.add_cb)
|
|
423
|
+
return self
|
|
424
|
+
|
|
442
425
|
# %% ../nbs/07_chat.ipynb #2e469ea1
|
|
443
426
|
def _srvtools(tcs): return L(tcs).filter(lambda o: o.server) if tcs else None
|
|
444
427
|
def _usrtools(tcs): return L(tcs).filter(lambda o: not o.server) if tcs else None
|
|
445
428
|
|
|
446
|
-
# %% ../nbs/07_chat.ipynb #a2e70fbb
|
|
447
|
-
def add_warning(r, msg):
|
|
448
|
-
wrn = Part(PartType.text, f"<warning>{msg}</warning>")
|
|
449
|
-
if r.message.content: r.message.content.append(wrn)
|
|
450
|
-
else: r.message.content = [wrn]
|
|
451
|
-
|
|
452
|
-
# %% ../nbs/07_chat.ipynb #e16195f9
|
|
453
|
-
def _handle_stop_reason(res):
|
|
454
|
-
"Returns (action, warning_msg) - action is 'warning', 'pause', or None"
|
|
455
|
-
sr = stop_reason(res)
|
|
456
|
-
if sr == 'length': return 'warning', 'Response was cut off at token limit.'
|
|
457
|
-
if sr == 'refusal': return 'warning', 'AI server provider content filter was applied to this request'
|
|
458
|
-
if sr == 'content_filter': return 'warning', 'AI server provider content filter was applied to this request.'
|
|
459
|
-
# if sr == 'pause_turn': return 'retry', None # TODO: Not a canonical finish reason
|
|
460
|
-
return None, None
|
|
461
|
-
|
|
462
429
|
# %% ../nbs/07_chat.ipynb #19b87f53
|
|
463
430
|
def _think_kw(model, think, vendor_name):
|
|
464
431
|
if not think: return {}
|
|
@@ -471,7 +438,7 @@ def _think_kw(model, think, vendor_name):
|
|
|
471
438
|
if vendor_name == 'codex': return dict(reasoning_effort={'effort':eff, 'summary':'auto'})
|
|
472
439
|
return dict(reasoning_effort=eff)
|
|
473
440
|
|
|
474
|
-
# %% ../nbs/07_chat.ipynb #
|
|
441
|
+
# %% ../nbs/07_chat.ipynb #06e898fd
|
|
475
442
|
@patch
|
|
476
443
|
def _prep_call(self:AsyncChat, prefill, search, max_tokens, kwargs, stream=False, think=None):
|
|
477
444
|
"Prepare model info, prefill, search, and provider kwargs for a completion call"
|
|
@@ -483,19 +450,14 @@ def _prep_call(self:AsyncChat, prefill, search, max_tokens, kwargs, stream=False
|
|
|
483
450
|
kwargs['web_search_options']['search_context_size'] = effort[s]
|
|
484
451
|
if self.vendor_name == 'codex': kwargs['web_search_options']['type'] = 'web_search'
|
|
485
452
|
else: kwargs.pop('web_search_options', None)
|
|
486
|
-
# kwargs['additional_drop_params'] = ['temperature'] # TODO: What is this for?
|
|
487
453
|
if self.api_name: kwargs['api_name'] = self.api_name
|
|
488
454
|
if self.vendor_name: kwargs['vendor_name'] = self.vendor_name
|
|
489
455
|
if self.api_key: kwargs['api_key'] = self.api_key
|
|
490
456
|
if self.base_url: kwargs['base_url'] = self.base_url
|
|
491
457
|
if self.extra_headers: kwargs['xtra_headers'] = self.extra_headers
|
|
492
458
|
kwargs.update(_think_kw(self.model, think, self.vendor_name))
|
|
493
|
-
if (langs := _active_fence_langs(self.tool_schemas)):
|
|
494
|
-
if not any(isinstance(s, FenceToolStop) for s in kwargs.get('stop_callables', [])):
|
|
495
|
-
kwargs['stop_callables'] = kwargs.get('stop_callables', []) + [FenceToolStop(langs)]
|
|
496
459
|
return prefill, max_tokens
|
|
497
460
|
|
|
498
|
-
|
|
499
461
|
# %% ../nbs/07_chat.ipynb #07951b77
|
|
500
462
|
@patch
|
|
501
463
|
def print_hist(self:AsyncChat):
|
|
@@ -515,50 +477,35 @@ async def astream_with_complete(self, agen, postproc=noop):
|
|
|
515
477
|
if not isinstance(chunk, Completion): yield postproc(chunk)
|
|
516
478
|
self.value = chunk
|
|
517
479
|
|
|
518
|
-
# %% ../nbs/07_chat.ipynb #
|
|
480
|
+
# %% ../nbs/07_chat.ipynb #a049cf52
|
|
519
481
|
@patch
|
|
520
482
|
@delegates(acomplete)
|
|
521
483
|
async def _call(self:AsyncChat, msg=None, prefill=None, temp=None, think=None, search=None, stream=False, max_steps=2, step=1,
|
|
522
484
|
final_prompt=None, tool_choice=None, max_tokens=None, n_workers=8, pause=0.001, tc_timeout=7200, **kwargs):
|
|
523
485
|
if step>max_steps+1: return
|
|
524
|
-
prefill, max_tokens = self._prep_call(prefill, search, max_tokens, kwargs, stream=stream, think=think)
|
|
525
|
-
|
|
526
|
-
|
|
527
|
-
|
|
528
|
-
|
|
529
|
-
|
|
530
|
-
res = await acomplete(
|
|
486
|
+
self.prefill, max_tokens = self._prep_call(prefill, search, max_tokens, kwargs, stream=stream, think=think)
|
|
487
|
+
self.turn_sysp, self.turn_msgs = self._prep_msg(msg, prefill)
|
|
488
|
+
async for o in self._call_cbs('after_msgs'): yield o
|
|
489
|
+
|
|
490
|
+
self.turn_kwargs, self.stream = kwargs, stream
|
|
491
|
+
async for o in self._call_cbs('before_acomplete'): yield o
|
|
492
|
+
res = await acomplete(self.turn_msgs, self.model, system=self.turn_sysp, stream=stream,
|
|
531
493
|
tools=self.tool_schemas, tool_choice=tool_choice, max_tokens=int(max_tokens),
|
|
532
|
-
temperature=None if think else ifnone(temp,self.temp), **
|
|
494
|
+
temperature=None if think else ifnone(temp,self.temp), **self.turn_kwargs)
|
|
533
495
|
if stream:
|
|
534
|
-
if prefill: yield _mk_prefill(prefill)
|
|
496
|
+
if self.prefill: yield _mk_prefill(self.prefill)
|
|
535
497
|
res = astream_with_complete(res, postproc=postproc)
|
|
536
498
|
async for chunk in res: yield chunk
|
|
537
499
|
res = res.value
|
|
538
|
-
|
|
539
|
-
if prefill:
|
|
540
|
-
self.hist.append(
|
|
541
|
-
|
|
542
|
-
|
|
543
|
-
elif action == 'retry':
|
|
544
|
-
async for result in self._call(
|
|
545
|
-
None, prefill, temp, think, search, stream, max_steps, step,
|
|
546
|
-
final_prompt, tool_choice, **kwargs): yield result
|
|
547
|
-
self.hist.pop(-2) # rm incomplete srvtoolu_
|
|
548
|
-
return
|
|
549
|
-
self._track(res)
|
|
500
|
+
self.turn_res, self.turn_msg = res, contents(res)
|
|
501
|
+
if self.prefill: self.turn_msg.content[0].text = self.prefill + self.turn_msg.content[0].text
|
|
502
|
+
self.hist.append(self.turn_msg)
|
|
503
|
+
async for o in self._call_cbs('after_acomplete'): yield o
|
|
504
|
+
self._track(self.turn_res)
|
|
550
505
|
yield res
|
|
551
506
|
|
|
552
|
-
toolloop, prompt = False, None
|
|
553
|
-
|
|
554
|
-
if m := last(self.hist, lambda o: o.role == 'assistant'):
|
|
555
|
-
if fence := extract_fence_call(m.text):
|
|
556
|
-
lang, code = fence
|
|
557
|
-
out = await run_fence_tool(lang, code, self.ns)
|
|
558
|
-
for p in reversed(m.content):
|
|
559
|
-
if p.type == PartType.text: p.text += out; break
|
|
560
|
-
if stream: yield {'text': out}
|
|
561
|
-
toolloop = True
|
|
507
|
+
self.toolloop, self.prompt, tmsg = False, None, None
|
|
508
|
+
async for o in self._call_cbs('before_tool_calls'): yield o
|
|
562
509
|
if stcs:= _srvtools(res.tool_calls):
|
|
563
510
|
for tc in stcs: yield tc
|
|
564
511
|
if tcs := _usrtools(res.tool_calls):
|
|
@@ -566,29 +513,23 @@ async def _call(self:AsyncChat, msg=None, prefill=None, temp=None, think=None, s
|
|
|
566
513
|
tmsg = mk_tool_res_msg(tcs, tres)
|
|
567
514
|
for r in tmsg.content: yield r
|
|
568
515
|
self.hist.append(tmsg)
|
|
569
|
-
if step>=max_steps-1 or _has_stop(tmsg.content): prompt,tool_choice,search = mk_msg(final_prompt),'none',False
|
|
570
|
-
toolloop = True
|
|
516
|
+
if step>=max_steps-1 or _has_stop(tmsg.content): self.prompt,tool_choice,search = mk_msg(final_prompt),'none',False
|
|
517
|
+
self.toolloop = True
|
|
571
518
|
|
|
572
|
-
|
|
519
|
+
async for o in self._call_cbs('after_tool_calls'): yield o
|
|
520
|
+
if self.toolloop and step <= max_steps:
|
|
573
521
|
try:
|
|
574
522
|
async for result in self._call(
|
|
575
|
-
prompt,
|
|
523
|
+
self.prompt, None, temp, think, search, stream, max_steps, step+1,
|
|
576
524
|
final_prompt, tool_choice=tool_choice, **kwargs): yield result
|
|
577
525
|
except ContextWindowExceededError:
|
|
578
|
-
|
|
579
|
-
|
|
526
|
+
if tmsg is not None:
|
|
527
|
+
for p in tmsg.content:
|
|
528
|
+
if len(p.text)>1000: p.text = _cwe_msg + _trunc_str(p.text, mx=1000)
|
|
580
529
|
async for result in self._call(
|
|
581
|
-
prompt,
|
|
530
|
+
self.prompt, None, temp, think, search, stream, max_steps, step+1,
|
|
582
531
|
final_prompt, tool_choice='none', **kwargs): yield result
|
|
583
532
|
|
|
584
|
-
# %% ../nbs/07_chat.ipynb #4dc002da
|
|
585
|
-
async def run_fence_tool(lang, code, ns):
|
|
586
|
-
"Run the mapped tool for `lang` with the code, return result fence"
|
|
587
|
-
tname = _lang2tool[lang]
|
|
588
|
-
arg = dict(code=code) if lang == 'py' else dict(command=code)
|
|
589
|
-
res = _mk_tool_result(await call_func_async(tname, arg, ns=ns, raise_on_err=False))
|
|
590
|
-
return _mk_result_fence(_trunc_str(str(res)))
|
|
591
|
-
|
|
592
533
|
# %% ../nbs/07_chat.ipynb #1361515a
|
|
593
534
|
@patch
|
|
594
535
|
@delegates(AsyncChat._call)
|
|
@@ -611,6 +552,141 @@ async def __call__(
|
|
|
611
552
|
async for res in result_gen: pass
|
|
612
553
|
return res # normal chat behavior only return last msg
|
|
613
554
|
|
|
555
|
+
# %% ../nbs/07_chat.ipynb #a4bbd2ce
|
|
556
|
+
class ChatCallback(GetAttr):
|
|
557
|
+
order,_default,chat,run = 0,'chat',None,True
|
|
558
|
+
def __repr__(self): return type(self).__name__
|
|
559
|
+
|
|
560
|
+
# %% ../nbs/07_chat.ipynb #2f02135c
|
|
561
|
+
@patch
|
|
562
|
+
async def _call_cbs(self:AsyncChat, event):
|
|
563
|
+
for cb in self.cbs.sorted('order'):
|
|
564
|
+
if cb.run and hasattr(cb, event):
|
|
565
|
+
async for o in getattr(cb, event)(): yield o
|
|
566
|
+
|
|
567
|
+
# %% ../nbs/07_chat.ipynb #cf3f064c
|
|
568
|
+
class DeepseekMsgsCallback(ChatCallback):
|
|
569
|
+
order = 10
|
|
570
|
+
async def after_msgs(self):
|
|
571
|
+
if 'deepseek' not in self.model: return
|
|
572
|
+
for m in self.turn_msgs:
|
|
573
|
+
if m.role=='assistant' and not any(p.type==PartType.thinking for p in m.content):
|
|
574
|
+
m.content.append(Part(PartType.thinking, ''))
|
|
575
|
+
if False: yield
|
|
576
|
+
|
|
577
|
+
# %% ../nbs/07_chat.ipynb #14baac3e
|
|
578
|
+
class DeepseekPrefillCallback(ChatCallback):
|
|
579
|
+
order = 10
|
|
580
|
+
async def before_acomplete(self):
|
|
581
|
+
if self.prefill and self.vendor_name == 'deepseek' and self.model.startswith("deepseek-"):
|
|
582
|
+
self.chat.turn_kwargs['base_url'] = 'https://api.deepseek.com/beta'
|
|
583
|
+
if False: yield
|
|
584
|
+
|
|
585
|
+
# %% ../nbs/07_chat.ipynb #ce47dc4a
|
|
586
|
+
def add_warning(r, msg):
|
|
587
|
+
wrn = Part(PartType.text, f"<warning>{msg}</warning>")
|
|
588
|
+
if r.message.content: r.message.content.append(wrn)
|
|
589
|
+
else: r.message.content = [wrn]
|
|
590
|
+
|
|
591
|
+
# %% ../nbs/07_chat.ipynb #b6ea161d
|
|
592
|
+
def _handle_stop_reason(res):
|
|
593
|
+
"Returns (action, warning_msg) - action is 'warning', 'pause', or None"
|
|
594
|
+
sr = stop_reason(res)
|
|
595
|
+
if sr == 'length': return 'warning', 'Response was cut off at token limit.'
|
|
596
|
+
if sr == 'refusal': return 'warning', 'AI server provider content filter was applied to this request'
|
|
597
|
+
if sr == 'content_filter': return 'warning', 'AI server provider content filter was applied to this request.'
|
|
598
|
+
# if sr == 'pause_turn': return 'retry', None # TODO: Not a canonical finish reason
|
|
599
|
+
return None, None
|
|
600
|
+
|
|
601
|
+
# %% ../nbs/07_chat.ipynb #daf876f4
|
|
602
|
+
class StopReasonCallback(ChatCallback):
|
|
603
|
+
order = 40
|
|
604
|
+
async def after_acomplete(self):
|
|
605
|
+
action, msg = _handle_stop_reason(self.turn_res)
|
|
606
|
+
if action == 'warning': add_warning(self.chat.turn_res, msg)
|
|
607
|
+
if False: yield
|
|
608
|
+
|
|
609
|
+
# %% ../nbs/07_chat.ipynb #aa7630b2
|
|
610
|
+
def _active_fence_langs(tool_schemas):
|
|
611
|
+
"Return set of active fence langs whose mapped tool is registered"
|
|
612
|
+
if not tool_schemas: return set()
|
|
613
|
+
names = {nested_idx(t, 'function', 'name') for t in tool_schemas}
|
|
614
|
+
return {lang for lang, tname in _lang2tool.items() if tname in names}
|
|
615
|
+
|
|
616
|
+
# %% ../nbs/07_chat.ipynb #72274cdc
|
|
617
|
+
async def run_fence_tool(lang, code, ns):
|
|
618
|
+
"Run the mapped tool for `lang` with the code, return result fence"
|
|
619
|
+
tname = _lang2tool[lang]
|
|
620
|
+
arg = dict(code=code) if lang == 'py' else dict(command=code)
|
|
621
|
+
res = _mk_tool_result(await call_func_async(tname, arg, ns=ns, raise_on_err=False))
|
|
622
|
+
return _mk_result_fence(_trunc_str(str(res)))
|
|
623
|
+
|
|
624
|
+
# %% ../nbs/07_chat.ipynb #740ee3a4
|
|
625
|
+
class FenceToolCallback(ChatCallback):
|
|
626
|
+
order = 20
|
|
627
|
+
|
|
628
|
+
async def after_msgs(self):
|
|
629
|
+
self.chat.turn_msgs = _split_fence_msgs(self.turn_msgs)
|
|
630
|
+
if False: yield
|
|
631
|
+
|
|
632
|
+
async def before_acomplete(self):
|
|
633
|
+
if langs := _active_fence_langs(self.tool_schemas):
|
|
634
|
+
if not any(isinstance(s, FenceToolStop) for s in self.turn_kwargs.get('stop_callables', [])):
|
|
635
|
+
self.chat.turn_kwargs['stop_callables'] = self.turn_kwargs.get('stop_callables', []) + [FenceToolStop(langs)]
|
|
636
|
+
if False: yield
|
|
637
|
+
|
|
638
|
+
async def before_tool_calls(self):
|
|
639
|
+
if not _active_fence_langs(self.tool_schemas): return
|
|
640
|
+
if m := last(self.hist, lambda o: o.role == 'assistant'):
|
|
641
|
+
if fence := extract_fence_call(m.text):
|
|
642
|
+
lang, code = fence
|
|
643
|
+
out = await run_fence_tool(lang, code, self.ns)
|
|
644
|
+
for p in reversed(m.content):
|
|
645
|
+
if p.type == PartType.text: p.text += out; break
|
|
646
|
+
self.chat.toolloop = True
|
|
647
|
+
if self.stream: yield {'text': out}
|
|
648
|
+
|
|
649
|
+
# %% ../nbs/07_chat.ipynb #1897aea2
|
|
650
|
+
def _inject_tool_reminder(msgs, reminder):
|
|
651
|
+
i = len(msgs)
|
|
652
|
+
while i>0 and msgs[i-1].role=='tool': i-=1
|
|
653
|
+
if i>=len(msgs): return msgs
|
|
654
|
+
msgs,m = list(msgs),msgs[i]
|
|
655
|
+
m.content.append(Part(type=PartType.text, text=reminder))
|
|
656
|
+
msgs[i] = m
|
|
657
|
+
return msgs
|
|
658
|
+
|
|
659
|
+
# %% ../nbs/07_chat.ipynb #1b404e0f
|
|
660
|
+
_tool_reminder = '\n<system-reminder>After *EVERY* tool call result, no matter how small, briefly summarise in prose what you found, before continuing or calling another tool.</system-reminder>'
|
|
661
|
+
|
|
662
|
+
# %% ../nbs/07_chat.ipynb #fab308b7
|
|
663
|
+
class ToolReminderCallback(ChatCallback):
|
|
664
|
+
order = 30
|
|
665
|
+
def __init__(self, tool_reminder=_tool_reminder): store_attr()
|
|
666
|
+
async def after_msgs(self):
|
|
667
|
+
self.chat.turn_msgs = _inject_tool_reminder(self.turn_msgs, self.tool_reminder)
|
|
668
|
+
if False: yield
|
|
669
|
+
|
|
670
|
+
# %% ../nbs/07_chat.ipynb #423caa31
|
|
671
|
+
def stop_sequences(seqs):
|
|
672
|
+
"Stop when any sequence appears in the accumulated completion text."
|
|
673
|
+
seqs = L(seqs)
|
|
674
|
+
def _stop(text):
|
|
675
|
+
for s in seqs:
|
|
676
|
+
if s in text: return text[:text.find(s)+len(s)]
|
|
677
|
+
return _stop
|
|
678
|
+
|
|
679
|
+
# %% ../nbs/07_chat.ipynb #663eee29
|
|
680
|
+
class StopSequencesCallback(ChatCallback):
|
|
681
|
+
order = 30
|
|
682
|
+
def __init__(self, seqs): self.seqs = L(seqs)
|
|
683
|
+
async def before_acomplete(self):
|
|
684
|
+
self.chat.turn_kwargs['stop_callables'] = self.turn_kwargs.get('stop_callables', []) + [stop_sequences(self.seqs)]
|
|
685
|
+
if False: yield
|
|
686
|
+
|
|
687
|
+
# %% ../nbs/07_chat.ipynb #318ee856
|
|
688
|
+
defaults.chat_callbacks = [DeepseekPrefillCallback, FenceToolCallback, ToolReminderCallback, StopReasonCallback]
|
|
689
|
+
|
|
614
690
|
# %% ../nbs/07_chat.ipynb #115fd94f
|
|
615
691
|
def _trunc_param(v, mx=40):
|
|
616
692
|
"Truncate and escape param value for display"
|
fastllm/streaming.py
CHANGED
|
@@ -116,7 +116,7 @@ async def mk_acollect_stream(it, index_fn, model=None, api_name=None, vendor_nam
|
|
|
116
116
|
stop, stop_yielded = False, False
|
|
117
117
|
async for d in it:
|
|
118
118
|
# Check stop condition and yield stop delta
|
|
119
|
-
stop = stop_and_trim(part_accum, d, stop_callables)
|
|
119
|
+
if not stop: stop = stop_and_trim(part_accum, d, stop_callables)
|
|
120
120
|
if stop and not stop_yielded:
|
|
121
121
|
for r in _yield_parts(d): yield r
|
|
122
122
|
stop_yielded = True
|
fastllm/types.py
CHANGED
|
@@ -40,7 +40,7 @@ def _repr_markdown_(self: Part):
|
|
|
40
40
|
|
|
41
41
|
{body}
|
|
42
42
|
|
|
43
|
-
<details>
|
|
43
|
+
<details markdown='1'>
|
|
44
44
|
|
|
45
45
|
- data: `{data}`
|
|
46
46
|
|
|
@@ -80,7 +80,7 @@ def _repr_markdown_(self: ToolCall):
|
|
|
80
80
|
extra = _trunc_strs(self.extra)
|
|
81
81
|
return f"""🔧 **{self.name}**(`{self.arguments}`)
|
|
82
82
|
|
|
83
|
-
<details>
|
|
83
|
+
<details markdown='1'>
|
|
84
84
|
|
|
85
85
|
- id: `{self.id}`
|
|
86
86
|
- server: `{self.server}`
|
|
@@ -135,7 +135,7 @@ def _repr_markdown_(self: Completion):
|
|
|
135
135
|
det_str = '\n- '.join(details)
|
|
136
136
|
return f"""{content}
|
|
137
137
|
|
|
138
|
-
<details>
|
|
138
|
+
<details markdown='1'>
|
|
139
139
|
|
|
140
140
|
- {det_str}
|
|
141
141
|
|
|
@@ -1,21 +1,21 @@
|
|
|
1
|
-
fastllm/__init__.py,sha256=
|
|
2
|
-
fastllm/_modidx.py,sha256=
|
|
3
|
-
fastllm/acomplete.py,sha256=
|
|
1
|
+
fastllm/__init__.py,sha256=R9xOYoYrWKcfO5zvTeGC3m_eDNOvxMd8CocQs2tLufo,22
|
|
2
|
+
fastllm/_modidx.py,sha256=BpUAd8BeXNaOlvK5Zxt26Ws0pMbAWX1RHus-fQyt31k,33014
|
|
3
|
+
fastllm/acomplete.py,sha256=p6g_LASZz5u4vhFjh-vJKw1ImhBLW090_Y-TJYmVyDo,6649
|
|
4
4
|
fastllm/anthropic.py,sha256=fG20kOv3d3wGKQe8rD5pFWgZHKe-vT-9QJ3nPXh2twY,14615
|
|
5
|
-
fastllm/chat.py,sha256
|
|
5
|
+
fastllm/chat.py,sha256=-FZ4YSyVnKEs7eYU8mU4LLCsY1yd-BRtadLaWYhZvpk,35695
|
|
6
6
|
fastllm/gemini.py,sha256=E1EYMfV8IMpC_-WzlDrkhz_CJQmzmxvaVUucNgPOqSA,14947
|
|
7
7
|
fastllm/openai_chat.py,sha256=wZ0HI0m9ipy9XVhqmYBXf-BmkVAOipUVwqu9NGB_rJU,10941
|
|
8
8
|
fastllm/openai_responses.py,sha256=Nk5bfTCF2-a17nwvIsf-u39j539v9KIduVfScECItKk,13052
|
|
9
|
-
fastllm/streaming.py,sha256=
|
|
10
|
-
fastllm/types.py,sha256=
|
|
9
|
+
fastllm/streaming.py,sha256=1NvHN4yZKXfIKj5qqZcJRUDkHab4rbhQeRFqjSSgHQc,6665
|
|
10
|
+
fastllm/types.py,sha256=FyiFHLfZ2Nzf0M5BS2mFqNUiQn8IHUAVU0vfKKnxnfw,12343
|
|
11
11
|
fastllm/specs/anthropic.json,sha256=VCgTjM2_HoDpCkeu3q_TCOEZLMHriJZLAG3LnDBAgGM,541035
|
|
12
12
|
fastllm/specs/anthropic.yml,sha256=3S3NAKdXB1Nwp-Sn9Gmh4tBnwhGGhMO3DXkGqPXPUYs,724122
|
|
13
13
|
fastllm/specs/gemini.json,sha256=zJGOdvZ2BvCiTENZt0-BDEvNBMl8h6EBmEskle_WBto,309331
|
|
14
14
|
fastllm/specs/openai.with-code-samples.json,sha256=Kto19AW1u8MfxVDJ4cFVBIdZQOIyy8NWylswo57eABU,1995929
|
|
15
15
|
fastllm/specs/openai.with-code-samples.yml,sha256=DlcWGdaeP4k7smVjt6UbyehJ-2XGU3rn3nCIBMDRfYU,2553630
|
|
16
16
|
fastllm/specs/spec_manifest.json,sha256=9tVFwojXFnNqsAxQzCRTP1lgSIM0fXixnrXdv4Cmb0c,653
|
|
17
|
-
python_fastllm-0.0.
|
|
18
|
-
python_fastllm-0.0.
|
|
19
|
-
python_fastllm-0.0.
|
|
20
|
-
python_fastllm-0.0.
|
|
21
|
-
python_fastllm-0.0.
|
|
17
|
+
python_fastllm-0.0.7.dist-info/METADATA,sha256=qGlquhrnY9uW1GqHmuwoWYq3sRmvzvj2DAEH3PKRVOg,19546
|
|
18
|
+
python_fastllm-0.0.7.dist-info/WHEEL,sha256=aeYiig01lYGDzBgS8HxWXOg3uV61G9ijOsup-k9o1sk,91
|
|
19
|
+
python_fastllm-0.0.7.dist-info/entry_points.txt,sha256=dq0chsiRjJYStCOXweFW9L6LpyMTjWu2AabKCbTSbuI,36
|
|
20
|
+
python_fastllm-0.0.7.dist-info/top_level.txt,sha256=F8qodL7nEGUHGmzzqfhNKCTIr1i0D6cvudOnm3z7o0Y,8
|
|
21
|
+
python_fastllm-0.0.7.dist-info/RECORD,,
|
|
File without changes
|
|
File without changes
|
|
File without changes
|