PyPI - python-fastllm - Versions diffs - 0.0.6__tar.gz → 0.0.8__tar.gz - Mend

python-fastllm 0.0.6tar.gz → 0.0.8tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (28) hide show

{python_fastllm-0.0.6 → python_fastllm-0.0.8}/PKG-INFO RENAMED Viewed

@@ -1,6 +1,6 @@
 Metadata-Version: 2.4
 Name: python-fastllm
-Version: 0.0.6
+Version: 0.0.8
 Author-email: Kerem Turgutlu <keremturgutlu@gmail.com>
 License: Apache-2.0
 Project-URL: Repository, https://github.com/AnswerDotAI/fastllm

python_fastllm-0.0.8/fastllm/__init__.py ADDED Viewed

	@@ -0,0 +1 @@
1	+ __version__ = "0.0.8"

{python_fastllm-0.0.6 → python_fastllm-0.0.8}/fastllm/_modidx.py RENAMED Viewed

@@ -10,6 +10,7 @@ d = { 'settings': { 'branch': 'main',
                                    'fastllm.acomplete._classify_error': ('acomplete.html#_classify_error', 'fastllm/acomplete.py'),
                                    'fastllm.acomplete._classify_error_stream': ( 'acomplete.html#_classify_error_stream',
                                                                                  'fastllm/acomplete.py'),
+                                   'fastllm.acomplete._debug_print': ('acomplete.html#_debug_print', 'fastllm/acomplete.py'),
                                    'fastllm.acomplete._is_ctx_exceeded': ('acomplete.html#_is_ctx_exceeded', 'fastllm/acomplete.py'),
                                    'fastllm.acomplete.acomplete': ('acomplete.html#acomplete', 'fastllm/acomplete.py'),
                                    'fastllm.acomplete.mk_client': ('acomplete.html#mk_client', 'fastllm/acomplete.py')},
@@ -31,6 +32,7 @@ d = { 'settings': { 'branch': 'main',
                                    'fastllm.anthropic.denorm_tool_use': ('anthropic.html#denorm_tool_use', 'fastllm/anthropic.py'),
                                    'fastllm.anthropic.denorm_user': ('anthropic.html#denorm_user', 'fastllm/anthropic.py'),
                                    'fastllm.anthropic.denorm_web_search': ('anthropic.html#denorm_web_search', 'fastllm/anthropic.py'),
+                                   'fastllm.anthropic.finalize_usage': ('anthropic.html#finalize_usage', 'fastllm/anthropic.py'),
                                    'fastllm.anthropic.get_hdrs': ('anthropic.html#get_hdrs', 'fastllm/anthropic.py'),
                                    'fastllm.anthropic.mk_payload': ('anthropic.html#mk_payload', 'fastllm/anthropic.py'),
                                    'fastllm.anthropic.norm_finish': ('anthropic.html#norm_finish', 'fastllm/anthropic.py'),
@@ -43,24 +45,53 @@ d = { 'settings': { 'branch': 'main',
                               'fastllm.chat.AsyncChat.__call__': ('chat.html#asyncchat.__call__', 'fastllm/chat.py'),
                               'fastllm.chat.AsyncChat.__init__': ('chat.html#asyncchat.__init__', 'fastllm/chat.py'),
                               'fastllm.chat.AsyncChat._call': ('chat.html#asyncchat._call', 'fastllm/chat.py'),
+                              'fastllm.chat.AsyncChat._call_cbs': ('chat.html#asyncchat._call_cbs', 'fastllm/chat.py'),
                               'fastllm.chat.AsyncChat._prep_call': ('chat.html#asyncchat._prep_call', 'fastllm/chat.py'),
                               'fastllm.chat.AsyncChat._prep_msg': ('chat.html#asyncchat._prep_msg', 'fastllm/chat.py'),
                               'fastllm.chat.AsyncChat._track': ('chat.html#asyncchat._track', 'fastllm/chat.py'),
+                              'fastllm.chat.AsyncChat.add_cb': ('chat.html#asyncchat.add_cb', 'fastllm/chat.py'),
+                              'fastllm.chat.AsyncChat.add_cbs': ('chat.html#asyncchat.add_cbs', 'fastllm/chat.py'),
                               'fastllm.chat.AsyncChat.print_hist': ('chat.html#asyncchat.print_hist', 'fastllm/chat.py'),
                               'fastllm.chat.AsyncChat.tcdict': ('chat.html#asyncchat.tcdict', 'fastllm/chat.py'),
                               'fastllm.chat.AsyncStreamFormatter': ('chat.html#asyncstreamformatter', 'fastllm/chat.py'),
                               'fastllm.chat.AsyncStreamFormatter.format_stream': ( 'chat.html#asyncstreamformatter.format_stream',
                                                                                    'fastllm/chat.py'),
+                              'fastllm.chat.ChatCallback': ('chat.html#chatcallback', 'fastllm/chat.py'),
+                              'fastllm.chat.ChatCallback.__repr__': ('chat.html#chatcallback.__repr__', 'fastllm/chat.py'),
+                              'fastllm.chat.DeepseekMsgsCallback': ('chat.html#deepseekmsgscallback', 'fastllm/chat.py'),
+                              'fastllm.chat.DeepseekMsgsCallback.after_msgs': ( 'chat.html#deepseekmsgscallback.after_msgs',
+                                                                                'fastllm/chat.py'),
+                              'fastllm.chat.DeepseekPrefillCallback': ('chat.html#deepseekprefillcallback', 'fastllm/chat.py'),
+                              'fastllm.chat.DeepseekPrefillCallback.before_acomplete': ( 'chat.html#deepseekprefillcallback.before_acomplete',
+                                                                                         'fastllm/chat.py'),
+                              'fastllm.chat.FenceToolCallback': ('chat.html#fencetoolcallback', 'fastllm/chat.py'),
+                              'fastllm.chat.FenceToolCallback.after_msgs': ('chat.html#fencetoolcallback.after_msgs', 'fastllm/chat.py'),
+                              'fastllm.chat.FenceToolCallback.before_acomplete': ( 'chat.html#fencetoolcallback.before_acomplete',
+                                                                                   'fastllm/chat.py'),
+                              'fastllm.chat.FenceToolCallback.before_tool_calls': ( 'chat.html#fencetoolcallback.before_tool_calls',
+                                                                                    'fastllm/chat.py'),
                               'fastllm.chat.FenceToolStop': ('chat.html#fencetoolstop', 'fastllm/chat.py'),
                               'fastllm.chat.FenceToolStop.__call__': ('chat.html#fencetoolstop.__call__', 'fastllm/chat.py'),
                               'fastllm.chat.FenceToolStop.__init__': ('chat.html#fencetoolstop.__init__', 'fastllm/chat.py'),
                               'fastllm.chat.FullResponse': ('chat.html#fullresponse', 'fastllm/chat.py'),
                               'fastllm.chat.Msg.text': ('chat.html#msg.text', 'fastllm/chat.py'),
+                              'fastllm.chat.StopReasonCallback': ('chat.html#stopreasoncallback', 'fastllm/chat.py'),
+                              'fastllm.chat.StopReasonCallback.after_acomplete': ( 'chat.html#stopreasoncallback.after_acomplete',
+                                                                                   'fastllm/chat.py'),
                               'fastllm.chat.StopResponse': ('chat.html#stopresponse', 'fastllm/chat.py'),
+                              'fastllm.chat.StopSequencesCallback': ('chat.html#stopsequencescallback', 'fastllm/chat.py'),
+                              'fastllm.chat.StopSequencesCallback.__init__': ( 'chat.html#stopsequencescallback.__init__',
+                                                                               'fastllm/chat.py'),
+                              'fastllm.chat.StopSequencesCallback.before_acomplete': ( 'chat.html#stopsequencescallback.before_acomplete',
+                                                                                       'fastllm/chat.py'),
                               'fastllm.chat.StreamFormatter': ('chat.html#streamformatter', 'fastllm/chat.py'),
                               'fastllm.chat.StreamFormatter.__init__': ('chat.html#streamformatter.__init__', 'fastllm/chat.py'),
                               'fastllm.chat.StreamFormatter.format_item': ('chat.html#streamformatter.format_item', 'fastllm/chat.py'),
                               'fastllm.chat.StreamFormatter.format_stream': ('chat.html#streamformatter.format_stream', 'fastllm/chat.py'),
+                              'fastllm.chat.ToolReminderCallback': ('chat.html#toolremindercallback', 'fastllm/chat.py'),
+                              'fastllm.chat.ToolReminderCallback.__init__': ('chat.html#toolremindercallback.__init__', 'fastllm/chat.py'),
+                              'fastllm.chat.ToolReminderCallback.after_msgs': ( 'chat.html#toolremindercallback.after_msgs',
+                                                                                'fastllm/chat.py'),
                               'fastllm.chat.ToolResponse': ('chat.html#toolresponse', 'fastllm/chat.py'),
                               'fastllm.chat.UsageStats': ('chat.html#usagestats', 'fastllm/chat.py'),
                               'fastllm.chat.UsageStats.__add__': ('chat.html#usagestats.__add__', 'fastllm/chat.py'),
@@ -116,6 +147,7 @@ d = { 'settings': { 'branch': 'main',
                               'fastllm.chat.stop_reason': ('chat.html#stop_reason', 'fastllm/chat.py'),
                               'fastllm.chat.stop_sequences': ('chat.html#stop_sequences', 'fastllm/chat.py'),
                               'fastllm.chat.structured': ('chat.html#structured', 'fastllm/chat.py')},
+            'fastllm.codex': {},
             'fastllm.gemini': { 'fastllm.gemini._gem_filter_sch': ('gemini.html#_gem_filter_sch', 'fastllm/gemini.py'),
                                 'fastllm.gemini._gem_part_type': ('gemini.html#_gem_part_type', 'fastllm/gemini.py'),
                                 'fastllm.gemini.acollect_stream': ('gemini.html#acollect_stream', 'fastllm/gemini.py'),

{python_fastllm-0.0.6 → python_fastllm-0.0.8}/fastllm/acomplete.py RENAMED Viewed

@@ -3,7 +3,7 @@
 # AUTOGENERATED! DO NOT EDIT! File to edit: ../nbs/06_acomplete.ipynb.
 # %% auto #0
-__all__ = ['specs_path', 'ant_spec', 'oai_spec', 'gem_spec', 'vendor_mapping', 'api2spec', 'mk_client',
+__all__ = ['specs_path', 'ant_spec', 'oai_spec', 'gem_spec', 'vendor_mapping', 'api2spec', 'defaults', 'mk_client',
            'ContextWindowExceededError', 'acomplete']
 # %% ../nbs/06_acomplete.ipynb #f2f57253
@@ -98,6 +98,21 @@ async def _classify_error_stream(gen):
         async for x in gen: yield x
     except APIError as e: raise _classify_error(e) from e
+# %% ../nbs/06_acomplete.ipynb #f626a4e1
+defaults = SimpleNamespace(debug_mode=False)
+def _debug_print(model, api_name, vendor_name, payload, func):
+    "Pretty-print acomplete inputs when defaults.debug_mode is set"
+    from pprint import pformat
+    p = dict(payload)
+    if defaults.debug_mode == 'brief' and 'tools' in p:
+        p['tools'] = '; '.join(o.get('name', o.get('type', o)) for o in p['tools'])
+    print('━'*60)
+    print(f"\033[1;36mfastllm debug\033[0m  model={model} vendor={vendor_name} api={api_name} base_url={func.base_url} path={func.path}")
+    print('─'*60)
+    print(f"\033[1;33mpayload:\033[0m\n{pformat(p, width=120, sort_dicts=False)}")
+    print('━'*60)
 # %% ../nbs/06_acomplete.ipynb #2379ec94
 @delegates(payload_kwargs)
 async def acomplete(msgs, model, api_name=None, vendor_name=None, api_key=None, base_url=None, xtra_body=None, xtra_hdrs=None,
@@ -114,6 +129,7 @@ async def acomplete(msgs, model, api_name=None, vendor_name=None, api_key=None,
         if vendor_name == 'deepseek' and 'v4' in model:   payload['messages'][-1]['prefix'] = True
         if vendor_name == 'moonshot' and 'kimi' in model: payload['messages'][-1]['partial'] = True
     func = attrgetter(api.op_path[stream])(cli)
+    if defaults.debug_mode: _debug_print(model, api_name, vendor_name, payload, func)
     try: resp = await func(**payload)
     except APIError as e: raise _classify_error(e) from e
     if stream: return _classify_error_stream(api.acollect_stream(resp, model=model, vendor_name=vendor_name, stop_callables=stop_callables))

{python_fastllm-0.0.6 → python_fastllm-0.0.8}/fastllm/anthropic.py RENAMED Viewed

@@ -1,7 +1,7 @@
 # AUTOGENERATED! DO NOT EDIT! File to edit: ../nbs/04_anthropic.ipynb.
 # %% auto #0
-__all__ = ['ant_tc_types', 'api_ns', 'norm_tool_call', 'norm_tool_calls', 'norm_usage', 'norm_finish', 'norm_parts',
+__all__ = ['ant_tc_types', 'norm_tool_call', 'norm_tool_calls', 'norm_usage', 'finalize_usage', 'norm_finish', 'norm_parts',
            'norm_sse_event', 'delta_index_fn', 'acollect_stream', 'denorm_tool_use', 'denorm_assistant', 'denorm_tool',
            'denorm_msgs', 'denorm_tool_schs', 'denorm_tool_choice', 'denorm_reasoning', 'denorm_web_search',
            'denorm_system', 'denorm_user', 'denorm_image', 'denorm_file', 'denorm_tool_result', 'mk_payload',
@@ -42,7 +42,18 @@ def norm_usage(resp):
     pt = int(usg.get("input_tokens", 0) or 0) + cached + cache_creation
     ct = int(usg.get("output_tokens", 0) or 0)
     return Usage(prompt_tokens=pt, completion_tokens=ct, total_tokens=pt + ct,
-                 cached_tokens=cached, cache_creation_tokens=cache_creation, raw=usg)
+                 cached_tokens=cached, cache_creation_tokens=cache_creation, reasoning_tokens=0, raw=usg)
+def finalize_usage(usg, parts):
+    "Adjust usage using finalized Anthropic content parts."
+    if not usg: return usg
+    rc = '\n'.join(p.text or '' for p in parts if p.type == PartType.thinking)
+    ct = int(usg.raw.get('output_tokens', usg.completion_tokens) or 0)
+    rt = min(int(len(rc.split())*1.5), ct) if rc else 0
+    res = Usage(prompt_tokens=usg.prompt_tokens, completion_tokens=ct-rt, total_tokens=usg.prompt_tokens+ct,
+                 cached_tokens=usg.cached_tokens, cache_creation_tokens=usg.cache_creation_tokens, reasoning_tokens=rt, raw=usg.raw)
+    print(res)
+    return res
 # %% ../nbs/04_anthropic.ipynb #7a8b1f8f
 def norm_finish(resp, tcs=None):
@@ -197,7 +208,7 @@ def denorm_reasoning(v):
 def denorm_web_search(v):
     "Map canonical web_search_options to Anthropic hosted web_search tool."
     _max_uses = {"low": 1, "medium": 5, "high": 10}
-    t = {"type": "web_search_20260209", "name": "web_search"}
+    t = {"type": "web_search_20250305", "name": "web_search"}
     if (typ := (v or {}).get("type")): t["type"] = typ
     if (s := (v or {}).get("search_context_size")):
         t["max_uses"] = _max_uses.get(s, 5)
@@ -286,13 +297,6 @@ def cost(usage, m):
     return cost
 # %% ../nbs/04_anthropic.ipynb #f7c0b989
-api_ns = dict(norm_tool_calls=norm_tool_calls,
-                norm_parts=norm_parts,
-                norm_finish=norm_finish,
-                norm_usage=norm_usage,
-                acollect_stream=acollect_stream,
-                mk_payload=mk_payload,
-                cost=cost,
-                get_hdrs=get_hdrs,
-                op_path=('messages.messages_post','messages.messages_post'))
-api_registry.register('anthropic', **api_ns)
+api_registry.register('anthropic', norm_tool_calls=norm_tool_calls, norm_parts=norm_parts, norm_finish=norm_finish, norm_usage=norm_usage,
+    finalize_usage=finalize_usage, acollect_stream=acollect_stream, mk_payload=mk_payload, cost=cost, get_hdrs=get_hdrs,
+    op_path=('messages.messages_post','messages.messages_post'))

{python_fastllm-0.0.6 → python_fastllm-0.0.8}/fastllm/chat.py RENAMED Viewed

@@ -4,10 +4,12 @@
 # %% auto #0
 __all__ = ['tool_dtls_tag', 're_tools', 'token_dtls_tag', 're_token', 'effort', 'remove_cache_ckpts', 'contents', 'stop_reason',
-           'mk_msg', 'FenceToolStop', 'extract_fence_call', 'stop_sequences', 'split_tools', 'fmt2hist', 'mk_msgs',
-           'cite_footnote', 'postproc', 'lite_mk_func', 'ToolResponse', 'structured', 'StopResponse', 'FullResponse',
-           'search_count', 'UsageStats', 'AsyncChat', 'add_warning', 'astream_with_complete', 'run_fence_tool',
-           'mk_tr_details', 'mk_srv_tc_details', 'StreamFormatter', 'AsyncStreamFormatter', 'adisplay_stream']
+           'mk_msg', 'FenceToolStop', 'extract_fence_call', 'split_tools', 'fmt2hist', 'mk_msgs', 'cite_footnote',
+           'postproc', 'lite_mk_func', 'ToolResponse', 'structured', 'StopResponse', 'FullResponse', 'search_count',
+           'UsageStats', 'AsyncChat', 'astream_with_complete', 'ChatCallback', 'DeepseekMsgsCallback',
+           'DeepseekPrefillCallback', 'add_warning', 'StopReasonCallback', 'run_fence_tool', 'FenceToolCallback',
+           'ToolReminderCallback', 'stop_sequences', 'StopSequencesCallback', 'mk_tr_details', 'mk_srv_tc_details',
+           'StreamFormatter', 'AsyncStreamFormatter', 'adisplay_stream']
 # %% ../nbs/07_chat.ipynb #d5a3bc1f
 import asyncio, base64, json, mimetypes, random, string, ast, warnings
@@ -55,7 +57,7 @@ def remove_cache_ckpts(msg):
     return msg
 def _mk_content(o):
-    if isinstance(o, str): return Part(type=PartType.text, text=o.strip())
+    if isinstance(o, str): return Part(type=PartType.text, text=o)
     elif isinstance(o,bytes): return _bytes2content(o)
     return o
@@ -87,16 +89,16 @@ def mk_msg(
     return _add_cache_control(msg, ttl=ttl) if cache else msg
 # %% ../nbs/07_chat.ipynb #db466e1c
-tool_dtls_tag = "<details class='tool-usage-details'>"
+tool_dtls_tag = "<details class='tool-usage-details' markdown='1'>"
 re_tools = re.compile(fr"^({tool_dtls_tag}\n*(?:<summary>(?P<summary>.*?)</summary>\n*)?\n*```json\n+(.*?)\n+```\n+</details>)",
                       flags=re.DOTALL|re.MULTILINE)
-token_dtls_tag = "<details class='token-usage-details'>"
-re_token = re.compile(fr"^{re.escape(token_dtls_tag)}<summary>.*?</summary>\n*\n*`.*?`\n*\n*</details>\n?",
+token_dtls_tag = "<details class='token-usage-details' markdown='1'>"
+re_token = re.compile(fr"^{re.escape(token_dtls_tag)}\n*<summary>.*?</summary>\n*\n*`.*?`\n*\n*</details>\n?",
                       flags=re.DOTALL|re.MULTILINE)
 # %% ../nbs/07_chat.ipynb #be998131
 _fence_back = '`````'
-_fence_re = re.compile(f'^{_fence_back}(py|bash)\n(.*?)\n{_fence_back}', re.DOTALL | re.MULTILINE)
+_fence_re = re.compile(f'^{_fence_back}(py|bash)\n(.*?)\n{_fence_back}$', re.DOTALL | re.MULTILINE)
 _result_re = re.compile(f'\n{_fence_back}result\n(.*?)\n{_fence_back}\n', re.DOTALL)
 _lang2tool = dict(py='python', bash='bash')
@@ -144,15 +146,6 @@ def _split_fence_msgs(msgs):
     for m in msgs: res.extend(_split_msg_on_fences(m))
     return res
-# %% ../nbs/07_chat.ipynb #b161ca9e
-def stop_sequences(seqs):
-    "Stop when any sequence appears in the accumulated completion text."
-    seqs = L(seqs)
-    def _stop(text):
-        for s in seqs:
-            if s in text: return text[:text.find(s)+len(s)]
-    return _stop
 # %% ../nbs/07_chat.ipynb #45ada210
 def _extract_tool_parts(text:str):
     "Extract (tool_use_part, tool_result_part) from <details> json block"
@@ -216,7 +209,9 @@ def mk_msgs(
     "Create a list of fastllm canonical Msgs."
     if not msgs: return []
     if not isinstance(msgs, list): msgs = [msgs]
-    msgs = L(msgs).map(lambda m: fmt2hist(m) if isinstance(m,str) and tool_dtls_tag in m else [m]).concat()
+    msgs = L(msgs).map(lambda m:
+        fmt2hist(m) if isinstance(m,str) and (tool_dtls_tag in m or token_dtls_tag in m) else [m]
+    ).concat()
     res, role = [], 'user'
     for m in msgs:
         res.append(msg := remove_cache_ckpts(mk_msg(m, role=role)))
@@ -304,9 +299,11 @@ def _has_stop(tres_parts): return any(isinstance(p.text, StopResponse) for p in
 def _trunc_str(s, mx=2000, skip=10, replace="TRUNCATED"):
     "Truncate `s` to `mx` chars max, adding `replace` if truncated"
     if not isinstance(s, str): s = str(s)
-    if len(s)>2 and s[0]=='𝍁' and s[-1]=='𝍁': return s[1:-1]
+    s = s.rstrip()
+    if len(s)>2 and s[0]=='𝍁' and s[-1]=='𝍁':
+        s = s[1:-1]
+        if replace: return s
     if isinstance_str(s, ('FullResponse','Safe')): return s
-    s = str(s).strip()
     if len(s)<=mx: return s
     s = s[skip:mx-skip]
     ss = s.split(' ')
@@ -362,24 +359,7 @@ class UsageStats:
         summ = f"${self.cost:.4f}" if self.cost else f"{self.total_tokens:,} tokens"
         return f"\n\n{token_dtls_tag}<summary>{summ}</summary>\n\n`{self!r}`\n\n</details>\n"
-# %% ../nbs/07_chat.ipynb #67fd51cb
-def _inject_tool_reminder(msgs, reminder):
-    i = len(msgs)
-    while i>0 and msgs[i-1].role=='tool': i-=1
-    if i>=len(msgs): return msgs
-    msgs,m = list(msgs),msgs[i]
-    m.content.append(Part(type=PartType.text, text=reminder))
-    msgs[i] = m
-    return msgs
-# %% ../nbs/07_chat.ipynb #e7eb2032
-def _active_fence_langs(tool_schemas):
-    "Return set of active fence langs whose mapped tool is registered"
-    if not tool_schemas: return set()
-    names = {nested_idx(t, 'function', 'name') for t in tool_schemas}
-    return {lang for lang, tname in _lang2tool.items() if tname in names}
-# %% ../nbs/07_chat.ipynb #e9a14051
+# %% ../nbs/07_chat.ipynb #cb3d7e77
 class AsyncChat:
     def __init__(
         self,
@@ -399,7 +379,8 @@ class AsyncChat:
         base_url=None,            # API base url when model can't be resolved or vendor_name is not known
         extra_headers=None,       # Extra HTTP headers for custom providers
         markup=0,                 # Cost markup multiplier (e.g. 0.5 for 50%)
-        tool_reminder=None,       # Prepended as a block to the first trailing tool result (transient)
+        cbs:list=None,            # Chat callbacks
+        default_cbs=True          # Whether to include default callbacks
     ):
         "LiteLLM chat client."
         self.model = model
@@ -408,7 +389,10 @@ class AsyncChat:
         elif ns is None: ns = globals()
         self.tool_schemas = [lite_mk_func(t) for t in tools] if tools else None
         self.use = UsageStats()
-        store_attr()
+        store_attr(but='cbs')
+        self.cbs = L()
+        if default_cbs: self.add_cbs(defaults.chat_callbacks)
+        self.add_cbs(cbs)
     def _prep_msg(self, msg=None, prefill=None):
         "Prepare the system prompt and messages list for the API call"
@@ -422,14 +406,6 @@ class AsyncChat:
         self.hist = mk_msgs(self.hist, self.cache and 'claude' in self.model, cache_idxs, self.ttl)
         msgs = self.hist
         if prefill: msgs = self.hist + [Msg(role='assistant', content=[Part(PartType.text, prefill)])]
-        msgs = _split_fence_msgs(msgs)
-        if self.tool_reminder: msgs = _inject_tool_reminder(msgs, self.tool_reminder)
-        if 'deepseek' in self.model:
-            # The `reasoning_content` in the thinking mode must be passed back to the API.
-            for m in msgs:
-                if m.role=='assistant':
-                    if not any(p.type==PartType.thinking for p in m.content):
-                        m.content.append(Part(PartType.thinking, ''))
         return sp, msgs
     @property
@@ -439,39 +415,35 @@ class AsyncChat:
         u.cost *= (1 + self.markup)
         self.use += u
+    def add_cb(self, cb):
+        if isinstance(cb, type): cb = cb()
+        cb.chat = self
+        self.cbs.append(cb)
+        return self
+    def add_cbs(self, cbs):
+        if cbs is None: return self
+        L(cbs).map(self.add_cb)
+        return self
 # %% ../nbs/07_chat.ipynb #2e469ea1
 def _srvtools(tcs): return L(tcs).filter(lambda o: o.server) if tcs else None
 def _usrtools(tcs): return L(tcs).filter(lambda o: not o.server) if tcs else None
-# %% ../nbs/07_chat.ipynb #a2e70fbb
-def add_warning(r, msg):
-    wrn = Part(PartType.text, f"<warning>{msg}</warning>")
-    if r.message.content: r.message.content.append(wrn)
-    else: r.message.content = [wrn]
-# %% ../nbs/07_chat.ipynb #e16195f9
-def _handle_stop_reason(res):
-    "Returns (action, warning_msg) - action is 'warning', 'pause', or None"
-    sr = stop_reason(res)
-    if sr == 'length': return 'warning', 'Response was cut off at token limit.'
-    if sr == 'refusal': return 'warning', 'AI server provider content filter was applied to this request'
-    if sr == 'content_filter': return 'warning', 'AI server provider content filter was applied to this request.'
-    # if sr == 'pause_turn': return 'retry', None # TODO: Not a canonical finish reason
-    return None, None
 # %% ../nbs/07_chat.ipynb #19b87f53
 def _think_kw(model, think, vendor_name):
     if not think: return {}
     if 'opus-4-7' in model:
         e = 'xhigh' if think=='h' else effort.get(think)
-        return dict(thinking={"type":"adaptive", "display":"summarized"}, output_config={"effort":e})
+        eff = dict(thinking={"type":"adaptive", "display":"summarized"}, output_config={"effort":e})
+        return dict(reasoning_effort=eff)
     try: xhigh = get_model_info(model, vendor_name).get('supports_xhigh_reasoning_effort')
     except: xhigh = False
     eff = effort.get(think) if think!='x' else 'xhigh' if xhigh else 'high'
     if vendor_name == 'codex': return dict(reasoning_effort={'effort':eff, 'summary':'auto'})
     return dict(reasoning_effort=eff)
-# %% ../nbs/07_chat.ipynb #b3f28523
+# %% ../nbs/07_chat.ipynb #06e898fd
 @patch
 def _prep_call(self:AsyncChat, prefill, search, max_tokens, kwargs, stream=False, think=None):
     "Prepare model info, prefill, search, and provider kwargs for a completion call"
@@ -483,19 +455,14 @@ def _prep_call(self:AsyncChat, prefill, search, max_tokens, kwargs, stream=False
         kwargs['web_search_options']['search_context_size'] = effort[s]
         if self.vendor_name == 'codex': kwargs['web_search_options']['type'] = 'web_search'
     else: kwargs.pop('web_search_options', None)
-    # kwargs['additional_drop_params'] = ['temperature'] # TODO: What is this for?
     if self.api_name:      kwargs['api_name'] = self.api_name
     if self.vendor_name:   kwargs['vendor_name'] = self.vendor_name
     if self.api_key:       kwargs['api_key'] = self.api_key
     if self.base_url:      kwargs['base_url'] = self.base_url
     if self.extra_headers: kwargs['xtra_headers'] = self.extra_headers
     kwargs.update(_think_kw(self.model, think, self.vendor_name))
-    if (langs := _active_fence_langs(self.tool_schemas)):
-        if not any(isinstance(s, FenceToolStop) for s in kwargs.get('stop_callables', [])):
-            kwargs['stop_callables'] = kwargs.get('stop_callables', []) + [FenceToolStop(langs)]
     return prefill, max_tokens
 # %% ../nbs/07_chat.ipynb #07951b77
 @patch
 def print_hist(self:AsyncChat):
@@ -515,50 +482,35 @@ async def astream_with_complete(self, agen, postproc=noop):
         if not isinstance(chunk, Completion): yield postproc(chunk)
     self.value = chunk
-# %% ../nbs/07_chat.ipynb #baf28c01
+# %% ../nbs/07_chat.ipynb #a049cf52
 @patch
 @delegates(acomplete)
 async def _call(self:AsyncChat, msg=None, prefill=None, temp=None, think=None, search=None, stream=False, max_steps=2, step=1,
         final_prompt=None, tool_choice=None, max_tokens=None, n_workers=8, pause=0.001, tc_timeout=7200, **kwargs):
     if step>max_steps+1: return
-    prefill, max_tokens = self._prep_call(prefill, search, max_tokens, kwargs, stream=stream, think=think)
-    sp,msgs = self._prep_msg(msg,prefill)
-    if prefill and self.vendor_name == 'deepseek' and self.model in ("deepseek-v4-flash", "deepseek-v4-pro"):
-        kwargs['base_url'] = 'https://api.deepseek.com/beta'
-    # TODO: num_retries=2 is this needed? If so add.
-    # caching removed, cache checkpoints are added for Anthropic and other providers do implicit caching
-    res = await acomplete(msgs, self.model, system=sp, stream=stream,
+    self.prefill, max_tokens = self._prep_call(prefill, search, max_tokens, kwargs, stream=stream, think=think)
+    self.turn_sysp, self.turn_msgs = self._prep_msg(msg, prefill)
+    async for o in self._call_cbs('after_msgs'): yield o
+    self.turn_kwargs, self.stream = kwargs, stream
+    async for o in self._call_cbs('before_acomplete'): yield o
+    res = await acomplete(self.turn_msgs, self.model, system=self.turn_sysp, stream=stream,
         tools=self.tool_schemas, tool_choice=tool_choice, max_tokens=int(max_tokens),
-        temperature=None if think else ifnone(temp,self.temp), **kwargs)
+        temperature=None if think else ifnone(temp,self.temp), **self.turn_kwargs)
     if stream:
-        if prefill: yield _mk_prefill(prefill)
+        if self.prefill: yield _mk_prefill(self.prefill)
         res = astream_with_complete(res, postproc=postproc)
         async for chunk in res: yield chunk
         res = res.value
-    m=contents(res)
-    if prefill: m.content[0].text = prefill + m.content[0].text
-    self.hist.append(m)
-    action, msg = _handle_stop_reason(res)
-    if action == 'warning': add_warning(res, msg)
-    elif action == 'retry':
-        async for result in self._call(
-            None, prefill, temp, think, search, stream, max_steps, step,
-            final_prompt, tool_choice, **kwargs): yield result
-        self.hist.pop(-2) # rm incomplete srvtoolu_
-        return
-    self._track(res)
+    self.turn_res, self.turn_msg = res, contents(res)
+    if self.prefill: self.turn_msg.content[0].text = self.prefill + self.turn_msg.content[0].text
+    self.hist.append(self.turn_msg)
+    async for o in self._call_cbs('after_acomplete'): yield o
+    self._track(self.turn_res)
     yield res
-    toolloop, prompt = False, None
-    if (langs := _active_fence_langs(self.tool_schemas)):
-        if m := last(self.hist, lambda o: o.role == 'assistant'):
-            if fence := extract_fence_call(m.text):
-                lang, code = fence
-                out = await run_fence_tool(lang, code, self.ns)
-                for p in reversed(m.content):
-                    if p.type == PartType.text: p.text += out; break
-                if stream: yield {'text': out}
-                toolloop = True
+    self.toolloop, self.prompt, tmsg = False, None, None
+    async for o in self._call_cbs('before_tool_calls'): yield o
     if stcs:= _srvtools(res.tool_calls):
         for tc in stcs: yield tc
     if tcs := _usrtools(res.tool_calls):
@@ -566,29 +518,23 @@ async def _call(self:AsyncChat, msg=None, prefill=None, temp=None, think=None, s
         tmsg = mk_tool_res_msg(tcs, tres)
         for r in tmsg.content: yield r
         self.hist.append(tmsg)
-        if step>=max_steps-1 or _has_stop(tmsg.content): prompt,tool_choice,search = mk_msg(final_prompt),'none',False
-        toolloop = True
+        if step>=max_steps-1 or _has_stop(tmsg.content): self.prompt,tool_choice,search = mk_msg(final_prompt),'none',False
+        self.toolloop = True
-    if toolloop and step <= max_steps:
+    async for o in self._call_cbs('after_tool_calls'): yield o
+    if self.toolloop and step <= max_steps:
         try:
             async for result in self._call(
-                prompt, prefill, temp, think, search, stream, max_steps, step+1,
+                self.prompt, None, temp, think, search, stream, max_steps, step+1,
                 final_prompt, tool_choice=tool_choice, **kwargs): yield result
         except ContextWindowExceededError:
-            for p in tmsg.content:
-                if len(p.text)>1000: p.text = _cwe_msg + _trunc_str(p.text, mx=1000)
+            if tmsg is not None:
+                for p in tmsg.content:
+                    if len(p.text)>1000: p.text = _cwe_msg + _trunc_str(p.text, mx=1000)
             async for result in self._call(
-                prompt, prefill, temp, think, search, stream, max_steps, step+1,
+                self.prompt, None, temp, think, search, stream, max_steps, step+1,
                 final_prompt, tool_choice='none', **kwargs): yield result
-# %% ../nbs/07_chat.ipynb #4dc002da
-async def run_fence_tool(lang, code, ns):
-    "Run the mapped tool for `lang` with the code, return result fence"
-    tname = _lang2tool[lang]
-    arg = dict(code=code) if lang == 'py' else dict(command=code)
-    res = _mk_tool_result(await call_func_async(tname, arg, ns=ns, raise_on_err=False))
-    return _mk_result_fence(_trunc_str(str(res)))
 # %% ../nbs/07_chat.ipynb #1361515a
 @patch
 @delegates(AsyncChat._call)
@@ -611,11 +557,146 @@ async def __call__(
     async for res in result_gen: pass
     return res # normal chat behavior only return last msg
+# %% ../nbs/07_chat.ipynb #a4bbd2ce
+class ChatCallback(GetAttr):
+    order,_default,chat,run = 0,'chat',None,True
+    def __repr__(self): return type(self).__name__
+# %% ../nbs/07_chat.ipynb #2f02135c
+@patch
+async def _call_cbs(self:AsyncChat, event):
+    for cb in self.cbs.sorted('order'):
+        if cb.run and hasattr(cb, event):
+            async for o in getattr(cb, event)(): yield o
+# %% ../nbs/07_chat.ipynb #cf3f064c
+class DeepseekMsgsCallback(ChatCallback):
+    order = 10
+    async def after_msgs(self):
+        if 'deepseek' not in self.model: return
+        for m in self.turn_msgs:
+            if m.role=='assistant' and not any(p.type==PartType.thinking for p in m.content):
+                m.content.append(Part(PartType.thinking, ''))
+        if False: yield
+# %% ../nbs/07_chat.ipynb #14baac3e
+class DeepseekPrefillCallback(ChatCallback):
+    order = 10
+    async def before_acomplete(self):
+        if self.prefill and self.vendor_name == 'deepseek' and self.model.startswith("deepseek-"):
+            self.chat.turn_kwargs['base_url'] = 'https://api.deepseek.com/beta'
+        if False: yield
+# %% ../nbs/07_chat.ipynb #ce47dc4a
+def add_warning(r, msg):
+    wrn = Part(PartType.text, f"<warning>{msg}</warning>")
+    if r.message.content: r.message.content.append(wrn)
+    else: r.message.content = [wrn]
+# %% ../nbs/07_chat.ipynb #b6ea161d
+def _handle_stop_reason(res):
+    "Returns (action, warning_msg) - action is 'warning', 'pause', or None"
+    sr = stop_reason(res)
+    if sr == 'length': return 'warning', 'Response was cut off at token limit.'
+    if sr == 'refusal': return 'warning', 'AI server provider content filter was applied to this request'
+    if sr == 'content_filter': return 'warning', 'AI server provider content filter was applied to this request.'
+    # if sr == 'pause_turn': return 'retry', None # TODO: Not a canonical finish reason
+    return None, None
+# %% ../nbs/07_chat.ipynb #daf876f4
+class StopReasonCallback(ChatCallback):
+    order = 40
+    async def after_acomplete(self):
+        action, msg = _handle_stop_reason(self.turn_res)
+        if action == 'warning': add_warning(self.chat.turn_res, msg)
+        if False: yield
+# %% ../nbs/07_chat.ipynb #aa7630b2
+def _active_fence_langs(tool_schemas):
+    "Return set of active fence langs whose mapped tool is registered"
+    if not tool_schemas: return set()
+    names = {nested_idx(t, 'function', 'name') for t in tool_schemas}
+    return {lang for lang, tname in _lang2tool.items() if tname in names}
+# %% ../nbs/07_chat.ipynb #72274cdc
+async def run_fence_tool(lang, code, ns):
+    "Run the mapped tool for `lang` with the code, return result fence"
+    tname = _lang2tool[lang]
+    arg = dict(code=code) if lang == 'py' else dict(command=code)
+    res = _mk_tool_result(await call_func_async(tname, arg, ns=ns, raise_on_err=False))
+    return _mk_result_fence(_trunc_str(str(res)))
+# %% ../nbs/07_chat.ipynb #740ee3a4
+class FenceToolCallback(ChatCallback):
+    order = 20
+    async def after_msgs(self):
+        self.chat.turn_msgs = _split_fence_msgs(self.turn_msgs)
+        if False: yield
+    async def before_acomplete(self):
+        if langs := _active_fence_langs(self.tool_schemas):
+            if not any(isinstance(s, FenceToolStop) for s in self.turn_kwargs.get('stop_callables', [])):
+                self.chat.turn_kwargs['stop_callables'] = self.turn_kwargs.get('stop_callables', []) + [FenceToolStop(langs)]
+        if False: yield
+    async def before_tool_calls(self):
+        if not _active_fence_langs(self.tool_schemas): return
+        if m := last(self.hist, lambda o: o.role == 'assistant'):
+            if fence := extract_fence_call(m.text):
+                lang, code = fence
+                out = await run_fence_tool(lang, code, self.ns)
+                for p in reversed(m.content):
+                    if p.type == PartType.text: p.text += out; break
+                self.chat.toolloop = True
+                if self.stream: yield {'text': out}
+# %% ../nbs/07_chat.ipynb #1897aea2
+def _inject_tool_reminder(msgs, reminder):
+    i = len(msgs)
+    while i>0 and msgs[i-1].role=='tool': i-=1
+    if i>=len(msgs): return msgs
+    msgs,m = list(msgs),msgs[i]
+    m.content.append(Part(type=PartType.text, text=reminder))
+    msgs[i] = m
+    return msgs
+# %% ../nbs/07_chat.ipynb #1b404e0f
+_tool_reminder = '\n<system-reminder>After *EVERY* tool call result, no matter how small, briefly summarise in prose what you found, before continuing or calling another tool.</system-reminder>'
+# %% ../nbs/07_chat.ipynb #fab308b7
+class ToolReminderCallback(ChatCallback):
+    order = 30
+    def __init__(self, tool_reminder=_tool_reminder): store_attr()
+    async def after_msgs(self):
+        self.chat.turn_msgs = _inject_tool_reminder(self.turn_msgs, self.tool_reminder)
+        if False: yield
+# %% ../nbs/07_chat.ipynb #423caa31
+def stop_sequences(seqs):
+    "Stop when any sequence appears in the accumulated completion text."
+    seqs = L(seqs)
+    def _stop(text):
+        for s in seqs:
+            if s in text: return text[:text.find(s)+len(s)]
+    return _stop
+# %% ../nbs/07_chat.ipynb #663eee29
+class StopSequencesCallback(ChatCallback):
+    order = 30
+    def __init__(self, seqs): self.seqs = L(seqs)
+    async def before_acomplete(self):
+        self.chat.turn_kwargs['stop_callables'] = self.turn_kwargs.get('stop_callables', []) + [stop_sequences(self.seqs)]
+        if False: yield
+# %% ../nbs/07_chat.ipynb #318ee856
+defaults.chat_callbacks = [DeepseekPrefillCallback, FenceToolCallback, ToolReminderCallback, StopReasonCallback]
 # %% ../nbs/07_chat.ipynb #115fd94f
 def _trunc_param(v, mx=40):
     "Truncate and escape param value for display"
     tp = _trunc_str(str(v).replace('`', r'\`'), mx=mx, replace=None, skip=0)
-    try: return ast.literal_eval(tp)
+    try: return dumps(tp, ensure_ascii=False)
     except Exception: return repr(tp).replace('\\\\', '\\')
 # %% ../nbs/07_chat.ipynb #80c0abdb
@@ -645,7 +726,7 @@ def mk_tr_details(tr, mx=2000):
            'call':{'function': tr.data['name'], 'arguments': args},
            'result':_trunc_content(tr.text, mx=mx),}
     summ = f"<summary>{_tc_summary(tr)}</summary>"
-    return f"\n\n{tool_dtls_tag}\n{summ}\n\n```json\n{dumps(res, indent=2)}\n```\n\n</details>\n\n"
+    return f"\n\n{tool_dtls_tag}\n{summ}\n\n```json\n{dumps(res, indent=2, ensure_ascii=False)}\n```\n\n</details>\n\n"
 # %% ../nbs/07_chat.ipynb #3049001c
 def mk_srv_tc_details(tc, mx=2000):
@@ -653,7 +734,7 @@ def mk_srv_tc_details(tc, mx=2000):
     args = {k:_trunc_str(v, mx=mx*5) for k,v in tc.arguments.items()}
     res = {'id':tc.id, 'server':True, 'call':{'function': tc.name, 'arguments': args}, 'result':"Server tool call executed."}
     summ = f"<summary>{_srv_tc_summary(tc)}</summary>"
-    return f"\n\n{tool_dtls_tag}\n{summ}\n\n```json\n{dumps(res, indent=2)}\n```\n\n</details>\n\n"
+    return f"\n\n{tool_dtls_tag}\n{summ}\n\n```json\n{dumps(res, indent=2, ensure_ascii=False)}\n```\n\n</details>\n\n"
 # %% ../nbs/07_chat.ipynb #f0d984ec
 # status_re = re.compile(r'^- ⏳ <code>(.*)</code> ⏳$|^🧠+$', re.MULTILINE) # TODO: Need to yield tool calls as they are done collated in fastllm `_acollect_stream`

python_fastllm-0.0.8/fastllm/codex.py ADDED Viewed

@@ -0,0 +1,7 @@
+# AUTOGENERATED! DO NOT EDIT! File to edit: ../nbs/05_codex.ipynb.
+# %% auto #0
+__all__ = []
+# %% ../nbs/05_codex.ipynb #a1d088d2
+from fastcore.utils import *

{python_fastllm-0.0.6 → python_fastllm-0.0.8}/fastllm/streaming.py RENAMED Viewed

@@ -116,7 +116,7 @@ async def mk_acollect_stream(it, index_fn, model=None, api_name=None, vendor_nam
     stop, stop_yielded = False, False
     async for d in it:
         # Check stop condition and yield stop delta
-        stop = stop_and_trim(part_accum, d, stop_callables)
+        if not stop: stop = stop_and_trim(part_accum, d, stop_callables)
         if stop and not stop_yielded:
             for r in _yield_parts(d): yield r
             stop_yielded = True
@@ -138,6 +138,7 @@ async def mk_acollect_stream(it, index_fn, model=None, api_name=None, vendor_nam
         deltas.append(d)
     part_accum.finalize()
     tcs = part_accum.tool_calls
+    if api_name: usg = api_registry.apis[api_name].finalize_usage(usg, part_accum.parts)
     if stop: fin = FinishReason.stop
     fin = FinishReason.tool_calls if fin==FinishReason.stop and any(~L(tcs).attrgot('server')) else fin # recheck tool calls post collation
     # tool calls and non-anthropic citations are yielded at the end
@@ -145,3 +146,4 @@ async def mk_acollect_stream(it, index_fn, model=None, api_name=None, vendor_nam
             message=Msg(role="assistant", content=part_accum.parts),
             finish_reason=fin, usage=usg, tool_calls=tcs, api_name=api_name, vendor_name=vendor_name,
             raw={'deltas':deltas})

{python_fastllm-0.0.6 → python_fastllm-0.0.8}/fastllm/types.py RENAMED Viewed

@@ -40,7 +40,7 @@ def _repr_markdown_(self: Part):
 {body}
-<details>
+<details markdown='1'>
 - data: `{data}`
@@ -80,7 +80,7 @@ def _repr_markdown_(self: ToolCall):
     extra = _trunc_strs(self.extra)
     return f"""🔧 **{self.name}**(`{self.arguments}`)
-<details>
+<details markdown='1'>
 - id: `{self.id}`
 - server: `{self.server}`
@@ -135,7 +135,7 @@ def _repr_markdown_(self: Completion):
     det_str = '\n- '.join(details)
     return f"""{content}
-<details>
+<details markdown='1'>
 - {det_str}
@@ -147,25 +147,29 @@ FinishReason = str_enum('finish_reason', 'stop', 'tool_calls', 'length', 'conten
 # %% ../nbs/00_types.ipynb #fc681c52
 class APIRegistry:
     def __init__(self): self.apis = {}
-    def register(self, name, **kwargs): self.apis[name] = SimpleNamespace(**kwargs)
+    def register(self, name, finalize_usage=noop, **kwargs): self.apis[name] = SimpleNamespace(finalize_usage=finalize_usage, **kwargs)
 api_registry = APIRegistry()
 # %% ../nbs/00_types.ipynb #d58a5f96
 def mk_completion(resp, model, api_name, vendor_name):
     "Normalize an api response into Completion."
     api = api_registry.apis[api_name]
     tcs = api.norm_tool_calls(resp)
+    parts = api.norm_parts(resp)
+    usg = api.finalize_usage(api.norm_usage(resp), parts)
     return Completion(
         model=resp.get("model") or model,
-        message=Msg(role="assistant", content=api.norm_parts(resp)),
+        message=Msg(role="assistant", content=parts),
         finish_reason=api.norm_finish(resp, tcs),
-        usage=api.norm_usage(resp),
+        usage=usg,
         tool_calls=tcs,
         api_name=api_name,
         vendor_name=vendor_name,
         raw=resp)
 # %% ../nbs/00_types.ipynb #d5322db5
 def mk_tool_res_msg(tool_calls:list[ToolCall], results:list[str|list]):
     'A util to prepare parallel tool call with str or media list results'

{python_fastllm-0.0.6 → python_fastllm-0.0.8}/python_fastllm.egg-info/PKG-INFO RENAMED Viewed

@@ -1,6 +1,6 @@
 Metadata-Version: 2.4
 Name: python-fastllm
-Version: 0.0.6
+Version: 0.0.8
 Author-email: Kerem Turgutlu <keremturgutlu@gmail.com>
 License: Apache-2.0
 Project-URL: Repository, https://github.com/AnswerDotAI/fastllm

{python_fastllm-0.0.6 → python_fastllm-0.0.8}/python_fastllm.egg-info/SOURCES.txt RENAMED Viewed

@@ -5,6 +5,7 @@ fastllm/_modidx.py
 fastllm/acomplete.py
 fastllm/anthropic.py
 fastllm/chat.py
+fastllm/codex.py
 fastllm/gemini.py
 fastllm/openai_chat.py
 fastllm/openai_responses.py