PyPI - python-fastllm - Versions diffs - 0.0.2__py3-none-any.whl → 0.0.4__py3-none-any.whl - Mend

python-fastllm 0.0.2py3-none-any.whl → 0.0.4py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (11) hide show

fastllm/__init__.py +1 -1
fastllm/_modidx.py +14 -4
fastllm/acomplete.py +1 -3
fastllm/chat.py +112 -30
fastllm/streaming.py +52 -67
fastllm/types.py +30 -14
{python_fastllm-0.0.2.dist-info → python_fastllm-0.0.4.dist-info}/METADATA +1 -1
{python_fastllm-0.0.2.dist-info → python_fastllm-0.0.4.dist-info}/RECORD +11 -11
{python_fastllm-0.0.2.dist-info → python_fastllm-0.0.4.dist-info}/WHEEL +0 -0
{python_fastllm-0.0.2.dist-info → python_fastllm-0.0.4.dist-info}/entry_points.txt +0 -0
{python_fastllm-0.0.2.dist-info → python_fastllm-0.0.4.dist-info}/top_level.txt +0 -0

fastllm/__init__.py CHANGED Viewed

	@@ -1 +1 @@
1	- __version__ = "0.0.2"
1	+ __version__ = "0.0.4"

fastllm/_modidx.py CHANGED Viewed

@@ -51,7 +51,11 @@ d = { 'settings': { 'branch': 'main',
                               'fastllm.chat.AsyncStreamFormatter': ('chat.html#asyncstreamformatter', 'fastllm/chat.py'),
                               'fastllm.chat.AsyncStreamFormatter.format_stream': ( 'chat.html#asyncstreamformatter.format_stream',
                                                                                    'fastllm/chat.py'),
+                              'fastllm.chat.FenceToolStop': ('chat.html#fencetoolstop', 'fastllm/chat.py'),
+                              'fastllm.chat.FenceToolStop.__call__': ('chat.html#fencetoolstop.__call__', 'fastllm/chat.py'),
+                              'fastllm.chat.FenceToolStop.__init__': ('chat.html#fencetoolstop.__init__', 'fastllm/chat.py'),
                               'fastllm.chat.FullResponse': ('chat.html#fullresponse', 'fastllm/chat.py'),
+                              'fastllm.chat.Msg.text': ('chat.html#msg.text', 'fastllm/chat.py'),
                               'fastllm.chat.StopResponse': ('chat.html#stopresponse', 'fastllm/chat.py'),
                               'fastllm.chat.StreamFormatter': ('chat.html#streamformatter', 'fastllm/chat.py'),
                               'fastllm.chat.StreamFormatter.__init__': ('chat.html#streamformatter.__init__', 'fastllm/chat.py'),
@@ -65,6 +69,7 @@ d = { 'settings': { 'branch': 'main',
                               'fastllm.chat.UsageStats.__repr__': ('chat.html#usagestats.__repr__', 'fastllm/chat.py'),
                               'fastllm.chat.UsageStats.fmt': ('chat.html#usagestats.fmt', 'fastllm/chat.py'),
                               'fastllm.chat.UsageStats.from_response': ('chat.html#usagestats.from_response', 'fastllm/chat.py'),
+                              'fastllm.chat._active_fence_langs': ('chat.html#_active_fence_langs', 'fastllm/chat.py'),
                               'fastllm.chat._add_cache_control': ('chat.html#_add_cache_control', 'fastllm/chat.py'),
                               'fastllm.chat._alite_call_func': ('chat.html#_alite_call_func', 'fastllm/chat.py'),
                               'fastllm.chat._apply_cache_idxs': ('chat.html#_apply_cache_idxs', 'fastllm/chat.py'),
@@ -79,7 +84,10 @@ d = { 'settings': { 'branch': 'main',
                               'fastllm.chat._lite_call_func': ('chat.html#_lite_call_func', 'fastllm/chat.py'),
                               'fastllm.chat._mk_content': ('chat.html#_mk_content', 'fastllm/chat.py'),
                               'fastllm.chat._mk_prefill': ('chat.html#_mk_prefill', 'fastllm/chat.py'),
+                              'fastllm.chat._mk_result_fence': ('chat.html#_mk_result_fence', 'fastllm/chat.py'),
                               'fastllm.chat._mk_tool_result': ('chat.html#_mk_tool_result', 'fastllm/chat.py'),
+                              'fastllm.chat._split_fence_msgs': ('chat.html#_split_fence_msgs', 'fastllm/chat.py'),
+                              'fastllm.chat._split_msg_on_fences': ('chat.html#_split_msg_on_fences', 'fastllm/chat.py'),
                               'fastllm.chat._srv_tc_summary': ('chat.html#_srv_tc_summary', 'fastllm/chat.py'),
                               'fastllm.chat._srvtools': ('chat.html#_srvtools', 'fastllm/chat.py'),
                               'fastllm.chat._tc_summary': ('chat.html#_tc_summary', 'fastllm/chat.py'),
@@ -93,6 +101,7 @@ d = { 'settings': { 'branch': 'main',
                               'fastllm.chat.astream_with_complete': ('chat.html#astream_with_complete', 'fastllm/chat.py'),
                               'fastllm.chat.cite_footnote': ('chat.html#cite_footnote', 'fastllm/chat.py'),
                               'fastllm.chat.contents': ('chat.html#contents', 'fastllm/chat.py'),
+                              'fastllm.chat.extract_fence_call': ('chat.html#extract_fence_call', 'fastllm/chat.py'),
                               'fastllm.chat.fmt2hist': ('chat.html#fmt2hist', 'fastllm/chat.py'),
                               'fastllm.chat.lite_mk_func': ('chat.html#lite_mk_func', 'fastllm/chat.py'),
                               'fastllm.chat.mk_msg': ('chat.html#mk_msg', 'fastllm/chat.py'),
@@ -101,9 +110,11 @@ d = { 'settings': { 'branch': 'main',
                               'fastllm.chat.mk_tr_details': ('chat.html#mk_tr_details', 'fastllm/chat.py'),
                               'fastllm.chat.postproc': ('chat.html#postproc', 'fastllm/chat.py'),
                               'fastllm.chat.remove_cache_ckpts': ('chat.html#remove_cache_ckpts', 'fastllm/chat.py'),
+                              'fastllm.chat.run_fence_tool': ('chat.html#run_fence_tool', 'fastllm/chat.py'),
                               'fastllm.chat.search_count': ('chat.html#search_count', 'fastllm/chat.py'),
                               'fastllm.chat.split_tools': ('chat.html#split_tools', 'fastllm/chat.py'),
                               'fastllm.chat.stop_reason': ('chat.html#stop_reason', 'fastllm/chat.py'),
+                              'fastllm.chat.stop_sequences': ('chat.html#stop_sequences', 'fastllm/chat.py'),
                               'fastllm.chat.structured': ('chat.html#structured', 'fastllm/chat.py')},
             'fastllm.gemini': { 'fastllm.gemini._gem_filter_sch': ('gemini.html#_gem_filter_sch', 'fastllm/gemini.py'),
                                 'fastllm.gemini._gem_part_type': ('gemini.html#_gem_part_type', 'fastllm/gemini.py'),
@@ -208,13 +219,12 @@ d = { 'settings': { 'branch': 'main',
                                    'fastllm.streaming.PartAccum': ('streaming.html#partaccum', 'fastllm/streaming.py'),
                                    'fastllm.streaming.PartAccum.append': ('streaming.html#partaccum.append', 'fastllm/streaming.py'),
                                    'fastllm.streaming.PartAccum.finalize': ('streaming.html#partaccum.finalize', 'fastllm/streaming.py'),
+                                   'fastllm.streaming.PartAccum.get_merged': ( 'streaming.html#partaccum.get_merged',
+                                                                               'fastllm/streaming.py'),
                                    'fastllm.streaming._trim_delta': ('streaming.html#_trim_delta', 'fastllm/streaming.py'),
-                                   'fastllm.streaming.accum_completion': ('streaming.html#accum_completion', 'fastllm/streaming.py'),
-                                   'fastllm.streaming.completion_text': ('streaming.html#completion_text', 'fastllm/streaming.py'),
-                                   'fastllm.streaming.fake_stream': ('streaming.html#fake_stream', 'fastllm/streaming.py'),
                                    'fastllm.streaming.mk_acollect_stream': ('streaming.html#mk_acollect_stream', 'fastllm/streaming.py'),
                                    'fastllm.streaming.norm_and_yield': ('streaming.html#norm_and_yield', 'fastllm/streaming.py'),
-                                   'fastllm.streaming.stop_sequences': ('streaming.html#stop_sequences', 'fastllm/streaming.py')},
+                                   'fastllm.streaming.stop_and_trim': ('streaming.html#stop_and_trim', 'fastllm/streaming.py')},
             'fastllm.types': { 'fastllm.types.APIRegistry': ('types.html#apiregistry', 'fastllm/types.py'),
                                'fastllm.types.APIRegistry.__init__': ('types.html#apiregistry.__init__', 'fastllm/types.py'),
                                'fastllm.types.APIRegistry.register': ('types.html#apiregistry.register', 'fastllm/types.py'),

fastllm/acomplete.py CHANGED Viewed

@@ -18,7 +18,6 @@ from fastspec.errors import APIError
 from .types import *
 from .streaming import *
 from .openai_responses import *
-from .streaming import stop_sequences as _stop_sequences
 from .openai_chat import *
 from .anthropic import *
 from .gemini import *
@@ -30,7 +29,7 @@ oai_spec  = SpecParser.from_openapi(dict2obj(json.loads((specs_path/'openai.with
 gem_spec  = SpecParser.from_discovery(dict2obj(json.loads((specs_path/'gemini.json').read_text())))
 # %% ../nbs/06_acomplete.ipynb #32ee2546
-_codex_json = '~/.codex/auth.json', 'tokens','access_token'
+_codex_json = '~/.codex/auth.json', ('tokens','access_token')
 vendor_mapping = {
     "openai":       ('openai', 'https://api.openai.com/v1', 'OPENAI_API_KEY'),
     "anthropic":    ('anthropic', 'https://api.anthropic.com', 'ANTHROPIC_API_KEY'),
@@ -106,7 +105,6 @@ async def acomplete(msgs, model, api_name=None, vendor_name=None, api_key=None,
     "Unified completion across different APIs."
     cli, api_name, vendor_name = mk_client(model, vendor_name, api_name, api_key, base_url, xtra_hdrs)
     api = api_registry.apis[api_name]
-    if stop_sequences: stop_callables = L(stop_callables) + [_stop_sequences(stop_sequences)]
     payload = api.mk_payload(msgs, model, stream=stream, stop_callables=stop_callables, **kwargs)
     payload = merge(payload, ifnone(xtra_body, {}))
     if vendor_name == 'codex':

fastllm/chat.py CHANGED Viewed

@@ -3,12 +3,11 @@
 # AUTOGENERATED! DO NOT EDIT! File to edit: ../nbs/07_chat.ipynb.
 # %% auto #0
-__all__ = ['haik45', 'sonn45', 'sonn', 'sonn46', 'opus46', 'opus', 'gpt54', 'gpt54m', 'codex54', 'codex55', 'tool_dtls_tag',
-           're_tools', 'token_dtls_tag', 're_token', 'effort', 'remove_cache_ckpts', 'contents', 'stop_reason',
-           'mk_msg', 'split_tools', 'fmt2hist', 'mk_msgs', 'cite_footnote', 'postproc', 'lite_mk_func', 'ToolResponse',
-           'structured', 'StopResponse', 'FullResponse', 'search_count', 'UsageStats', 'AsyncChat', 'add_warning',
-           'astream_with_complete', 'mk_tr_details', 'mk_srv_tc_details', 'StreamFormatter', 'AsyncStreamFormatter',
-           'adisplay_stream']
+__all__ = ['tool_dtls_tag', 're_tools', 'token_dtls_tag', 're_token', 'effort', 'remove_cache_ckpts', 'contents', 'stop_reason',
+           'mk_msg', 'FenceToolStop', 'extract_fence_call', 'stop_sequences', 'split_tools', 'fmt2hist', 'mk_msgs',
+           'cite_footnote', 'postproc', 'lite_mk_func', 'ToolResponse', 'structured', 'StopResponse', 'FullResponse',
+           'search_count', 'UsageStats', 'AsyncChat', 'add_warning', 'astream_with_complete', 'run_fence_tool',
+           'mk_tr_details', 'mk_srv_tc_details', 'StreamFormatter', 'AsyncStreamFormatter', 'adisplay_stream']
 # %% ../nbs/07_chat.ipynb #d5a3bc1f
 import asyncio, base64, json, mimetypes, random, string, ast, warnings
@@ -21,19 +20,8 @@ from fastcore import imghdr
 from fastcore.xml import Safe
 from dataclasses import dataclass
+from .types import *
 from .acomplete import *
-from .acomplete import Msg, Part, PartType, ToolCall, Completion, mk_tool_res_msg, get_model_info
-# %% ../nbs/07_chat.ipynb #c4b8f12b
-haik45 = "claude-haiku-4-5"
-sonn45 = "claude-sonnet-4-5"
-sonn = sonn46 = "claude-sonnet-4-6"
-opus46 = "claude-opus-4-6"
-opus = "claude-opus-4-7"
-gpt54 = "gpt-5.4"
-gpt54m = "gpt-5.4-mini"
-codex54 = "gpt-5.4"
-codex55 = "gpt-5.5"
 # %% ../nbs/07_chat.ipynb #90f55ad4
 def _bytes2content(data):
@@ -106,6 +94,65 @@ token_dtls_tag = "<details class='token-usage-details'>"
 re_token = re.compile(fr"^{re.escape(token_dtls_tag)}<summary>.*?</summary>\n*\n*`.*?`\n*\n*</details>\n?",
                       flags=re.DOTALL|re.MULTILINE)
+# %% ../nbs/07_chat.ipynb #be998131
+_fence_back = '`````'
+_fence_re = re.compile(f'^{_fence_back}(py|bash)\n(.*?)\n{_fence_back}', re.DOTALL | re.MULTILINE)
+_result_re = re.compile(f'\n{_fence_back}result\n(.*?)\n{_fence_back}\n', re.DOTALL)
+_lang2tool = dict(py='python', bash='bash')
+class FenceToolStop:
+    def __init__(self, langs): self.langs = langs
+    def __call__(self, text):
+        "Return trim result if complete fence detected in active lang"
+        m = _fence_re.search(text)
+        if m and m.group(1) in self.langs: return m.group(0)
+# %% ../nbs/07_chat.ipynb #e6360e96
+def extract_fence_call(text):
+    "Return (lang, code) if text ends with terminated py/bash fence, else None"
+    ms = list(_fence_re.finditer(text))
+    if not ms: return None
+    m = ms[-1]
+    if not text[m.end():].strip(): return m.group(1), m.group(2)
+# %% ../nbs/07_chat.ipynb #215183bf
+@patch(as_prop=True)
+def text(self:Msg): return ''.join(p.text or '' for p in self.content if p.type == PartType.text)
+# %% ../nbs/07_chat.ipynb #1de7e4d2
+def _mk_result_fence(output): return f"\n{_fence_back}result\n{output}\n{_fence_back}\n"
+def _split_msg_on_fences(msg):
+    "Split an assistant Msg on result fences, return list of Msgs"
+    if msg.role != 'assistant': return [msg]
+    if not _result_re.search(msg.text): return [msg]
+    res, asst_parts, tool_parts = [], [], []
+    for msg_part in msg.content:
+        if msg_part.type == PartType.thinking: asst_parts.append(msg_part)
+        elif msg_part.type == PartType.tool_use: tool_parts.append(msg_part)
+        elif parts := _result_re.split(msg_part.text or ''):
+            for i,p in enumerate(parts):
+                if not p: continue
+                if i % 2 == 0: res.append(Msg(role='assistant', content=asst_parts+[Part(type=PartType.text, text=p.strip())]))
+                else:          res.append(Msg(role='user', content=[Part(type=PartType.text, text=_mk_result_fence(p))]))
+    if tool_parts: res.append(Msg(role='assistant', content=tool_parts))
+    return res
+def _split_fence_msgs(msgs):
+    "Split all assistant msgs on result fences for wire protocol"
+    res = []
+    for m in msgs: res.extend(_split_msg_on_fences(m))
+    return res
+# %% ../nbs/07_chat.ipynb #b161ca9e
+def stop_sequences(seqs):
+    "Stop when any sequence appears in the accumulated completion text."
+    seqs = L(seqs)
+    def _stop(text):
+        for s in seqs:
+            if s in text: return text[:text.find(s)+len(s)]
+    return _stop
 # %% ../nbs/07_chat.ipynb #45ada210
 def _extract_tool_parts(text:str):
     "Extract (tool_use_part, tool_result_part) from <details> json block"
@@ -122,10 +169,13 @@ def split_tools(s):
     "Split formatted output into (text, summary, tooljson) chunks"
     return [(txt,summ,tj) for txt,_,summ,tj in chunked(re_tools.split(s.strip()), 4, pad=True)]
+# %% ../nbs/07_chat.ipynb #44060a78
 def fmt2hist(outp:str)->list[Msg]:
     "Transform a formatted output string into fastllm canonical Msgs"
     if token_dtls_tag in outp: outp = re_token.sub('', outp)
-    if tool_dtls_tag not in outp: return [Msg(role='assistant', content=[Part(type=PartType.text, text=outp.strip())])]
+    if tool_dtls_tag not in outp:
+        msg = Msg(role='assistant', content=[Part(type=PartType.text, text=outp.strip())])
+        return _split_msg_on_fences(msg)
     hist, asst_parts, tool_parts = [], [], []
     def flush():
         if tool_parts:
@@ -134,17 +184,18 @@ def fmt2hist(outp:str)->list[Msg]:
             asst_parts.clear(); tool_parts.clear()
     for txt,_,tj in split_tools(outp):
         if txt and txt.strip():
-            if tool_parts: flush()   # text after tool results => new assistant turn
+            if tool_parts: flush()
             asst_parts.append(Part(type=PartType.text, text=txt.strip()))
         if tj and (tp := _extract_tool_parts(tj)):
             asst_parts.append(tp[0])
             tool_parts.append(tp[1])
     flush()
     if asst_parts: hist.append(Msg(role='assistant', content=asst_parts))
-    # TODO: Is this needed?
-    # if hist and hist[-1].role == 'tool':
-    #     hist.append(Msg(role='assistant', content=[Part(type=PartType.text, text='.')]))
-    return hist
+    result = []
+    for msg in hist:
+        if msg.role == 'assistant': result.extend(_split_msg_on_fences(msg))
+        else: result.append(msg)
+    return result
 # %% ../nbs/07_chat.ipynb #8de5ce8d
 def _apply_cache_idxs(msgs, cache_idxs=[-1], ttl=None):
@@ -321,6 +372,13 @@ def _inject_tool_reminder(msgs, reminder):
     msgs[i] = m
     return msgs
+# %% ../nbs/07_chat.ipynb #e7eb2032
+def _active_fence_langs(tool_schemas):
+    "Return set of active fence langs whose mapped tool is registered"
+    if not tool_schemas: return set()
+    names = {nested_idx(t, 'function', 'name') for t in tool_schemas}
+    return {lang for lang, tname in _lang2tool.items() if tname in names}
 # %% ../nbs/07_chat.ipynb #e9a14051
 class AsyncChat:
     def __init__(
@@ -364,6 +422,7 @@ class AsyncChat:
         self.hist = mk_msgs(self.hist, self.cache and 'claude' in self.model, cache_idxs, self.ttl)
         msgs = self.hist
         if prefill: msgs = self.hist + [Msg(role='assistant', content=[Part(PartType.text, prefill)])]
+        msgs = _split_fence_msgs(msgs)
         if self.tool_reminder: msgs = _inject_tool_reminder(msgs, self.tool_reminder)
         if 'deepseek' in self.model:
             # The `reasoning_content` in the thinking mode must be passed back to the API.
@@ -431,8 +490,12 @@ def _prep_call(self:AsyncChat, prefill, search, max_tokens, kwargs, stream=False
     if self.base_url:      kwargs['base_url'] = self.base_url
     if self.extra_headers: kwargs['xtra_headers'] = self.extra_headers
     kwargs.update(_think_kw(self.model, think, self.vendor_name))
+    if (langs := _active_fence_langs(self.tool_schemas)):
+        if not any(isinstance(s, FenceToolStop) for s in kwargs.get('stop_callables', [])):
+            kwargs['stop_callables'] = kwargs.get('stop_callables', []) + [FenceToolStop(langs)]
     return prefill, max_tokens
 # %% ../nbs/07_chat.ipynb #07951b77
 @patch
 def print_hist(self:AsyncChat):
@@ -486,16 +549,27 @@ async def _call(self:AsyncChat, msg=None, prefill=None, temp=None, think=None, s
     self._track(res)
     yield res
+    toolloop, prompt = False, None
+    if (langs := _active_fence_langs(self.tool_schemas)):
+        if m := last(self.hist, lambda o: o.role == 'assistant'):
+            if fence := extract_fence_call(m.text):
+                lang, code = fence
+                out = await run_fence_tool(lang, code, self.ns)
+                for p in reversed(m.content):
+                    if p.type == PartType.text: p.text += out; break
+                if stream: yield {'text': out}
+                toolloop = True
     if stcs:= _srvtools(res.tool_calls):
         for tc in stcs: yield tc
     if tcs := _usrtools(res.tool_calls):
         tres = await parallel_async(_alite_call_func, tcs, timeout=tc_timeout, n_workers=n_workers, pause=pause, **self.tcdict)
         tmsg = mk_tool_res_msg(tcs, tres)
-        # TODO: We yield tool calls at the end with their results, fastllm doesn't yield streaming tool calls during streaming as once the collation is done for simplicity, but it can
         for r in tmsg.content: yield r
         self.hist.append(tmsg)
         if step>=max_steps-1 or _has_stop(tmsg.content): prompt,tool_choice,search = mk_msg(final_prompt),'none',False
-        else: prompt = None
+        toolloop = True
+    if toolloop and step <= max_steps:
         try:
             async for result in self._call(
                 prompt, prefill, temp, think, search, stream, max_steps, step+1,
@@ -507,6 +581,14 @@ async def _call(self:AsyncChat, msg=None, prefill=None, temp=None, think=None, s
                 prompt, prefill, temp, think, search, stream, max_steps, step+1,
                 final_prompt, tool_choice='none', **kwargs): yield result
+# %% ../nbs/07_chat.ipynb #4dc002da
+async def run_fence_tool(lang, code, ns):
+    "Run the mapped tool for `lang` with the code, return result fence"
+    tname = _lang2tool[lang]
+    arg = dict(code=code) if lang == 'py' else dict(command=code)
+    res = _mk_tool_result(await call_func_async(tname, arg, ns=ns, raise_on_err=False))
+    return _mk_result_fence(_trunc_str(str(res)))
 # %% ../nbs/07_chat.ipynb #1361515a
 @patch
 @delegates(AsyncChat._call)
@@ -608,15 +690,15 @@ class AsyncStreamFormatter(StreamFormatter):
         "Format the response stream for markdown display."
         async for o in rs: yield self.format_item(o)
-# %% ../nbs/07_chat.ipynb #f4345023
+# %% ../nbs/07_chat.ipynb #944bcd25
 @delegates(AsyncStreamFormatter)
 async def adisplay_stream(rs, **kwargs):
     "Use IPython.display to markdown display the response stream."
     try: from IPython.display import display, Markdown
     except ModuleNotFoundError: raise ModuleNotFoundError("This function requires ipython. Please run `pip install ipython` to use.")
     fmt = AsyncStreamFormatter(**kwargs)
-    md = ''
+    md,h = '',display(Markdown(' '), display_id=True)
     async for o in fmt.format_stream(rs):
-        md+=o
-        display(Markdown(md),clear=True)
+        md += o
+        if md: h.update(Markdown(md))
     return fmt

fastllm/streaming.py CHANGED Viewed

@@ -3,11 +3,10 @@
 # AUTOGENERATED! DO NOT EDIT! File to edit: ../nbs/01_streaming.ipynb.
 # %% auto #0
-__all__ = ['Delta', 'norm_and_yield', 'PartAccum', 'accum_completion', 'completion_text', 'stop_sequences', 'mk_acollect_stream',
-           'fake_stream']
+__all__ = ['Delta', 'norm_and_yield', 'PartAccum', 'stop_and_trim', 'mk_acollect_stream']
 # %% ../nbs/01_streaming.ipynb #0df5c926
-import json
+import json,copy
 from dataclasses import dataclass, field, fields
 from fastcore.utils import *
 from fastcore.meta import delegates
@@ -56,54 +55,44 @@ class PartAccum:
                 # anthropic citations have matching idx
                 self.parts[index].data['citations'].extend(citations or [])
-    def finalize(self):
-        for idx,tc in self.parts.items():
-            if isinstance(tc, ToolCall):
-                if isinstance(tc.arguments, str): tc.arguments = json.loads(tc.arguments) if tc.arguments else {}
-                self.tool_calls.append(tc)
-                data = {**tc.extra, 'id':tc.id, 'name':tc.name, 'arguments':tc.arguments, 'server':tc.server}
-                self.parts[idx] = Part(type=PartType.tool_use, data=data)
+    def get_merged(self, with_tools=True):
+        tmp_parts = copy.deepcopy(self.parts)
+        tool_calls = []
+        if with_tools:
+            for idx,tc in tmp_parts.items():
+                if isinstance(tc, ToolCall):
+                    if isinstance(tc.arguments, str): tc.arguments = json.loads(tc.arguments) if tc.arguments else {}
+                    tool_calls.append(tc)
+                    data = {**tc.extra, 'id':tc.id, 'name':tc.name, 'arguments':tc.arguments, 'server':tc.server}
+                    tmp_parts[idx] = Part(type=PartType.tool_use, data=data)
         merged = []
-        for p in self.parts.values():
+        for p in tmp_parts.values():
+            if isinstance(p, ToolCall) and not with_tools: continue
             if merged and merged[-1].type == p.type and p.type in (PartType.text, PartType.thinking): merged[-1].text += p.text
-            else: merged.append(p)
-        self.parts = merged
-# %% ../nbs/01_streaming.ipynb #0e8ca58e
-def accum_completion(pa, raw, fin, usg, deltas, model=None, api_name=None, vendor_name=None, delta=None):
-    "Build a Completion snapshot from in-progress PartAccum state"
-    parts = [p for p in pa.parts.values() if isinstance(p, Part)]
-    if delta and delta.text:
-        parts = parts.copy()
-        if parts and parts[-1].type==PartType.text:
-            p = parts[-1]
-            parts[-1] = Part(type=p.type, text=(p.text or '') + delta.text, data=p.data)
-        else: parts.append(Part(type=PartType.text, text=delta.text))
-    return Completion(raw.get('model', model), Msg(role="assistant", content=parts),
-                      fin, usg, api_name=api_name, vendor_name=vendor_name, raw={'deltas':deltas})
-# %% ../nbs/01_streaming.ipynb #c28f706f
-def completion_text(c):
-    "Combined text from a Completion."
-    return ''.join(p.text or '' for p in c.message.content if p.type==PartType.text)
-# %% ../nbs/01_streaming.ipynb #b2b9f7ca
-def stop_sequences(seqs):
-    "Stop when any sequence appears in the accumulated completion text."
-    seqs = L(seqs)
-    def _stop(c):
-        txt = completion_text(c)
-        for s in seqs:
-            if s in txt: return s
-    return _stop
+            else: merged.append(p)
+        return merged, tool_calls
+    def finalize(self):
+        self.parts, self.tool_calls = self.get_merged()
-# %% ../nbs/01_streaming.ipynb #931f686b
-def _trim_delta(d, cur, s):
+# %% ../nbs/01_streaming.ipynb #f11ea80a
+def _trim_delta(d, txt, s):
     "Trim `d.text` so accumulated text in `cur` stops just before stop sequence `s`."
-    txt,dt = completion_text(cur), d.text or ''
-    i = txt.find(s)
-    if i>=0: d.text = dt[:max(0, i-(len(txt)-len(dt)))]
+    idx = len(txt) - (txt.find(s) + len(s))
+    if idx>0: d.text = d.text[:-idx]
+# %% ../nbs/01_streaming.ipynb #efbf96d7
+def stop_and_trim(part_accum, d, stop_callables):
+    'Stop based on the accumulated text so far, and trim current delta'
+    parts,_ = part_accum.get_merged(with_tools=False)
+    prev = parts[-1].text if parts and parts[-1].type == PartType.text else ''
+    txt = prev + (d.text or '')
+    for f in stop_callables:
+        if res:=f(txt):
+            if isinstance(res, str): _trim_delta(d, txt, res)
+            return True
+    return False
 # %% ../nbs/01_streaming.ipynb #fc71790b
 async def mk_acollect_stream(it, index_fn, model=None, api_name=None, vendor_name=None, stop_callables=None):
@@ -120,18 +109,22 @@ async def mk_acollect_stream(it, index_fn, model=None, api_name=None, vendor_nam
         idx = _fidx(d, name, pt)
         part_accum.append(typ, idx, **(ret or {kw: val}))
         return ret or {name: val}
+    def _yield_parts(d):
+        for args in [('text',), ('thinking',), ('citations', 'text', 'citations')]:
+            if (r := _proc(d, args[0], pt=args[1] if len(args)>1 else None, kw=args[2] if len(args)>2 else 'txt')):
+                yield r
+    stop, stop_yielded = False, False
     async for d in it:
-        stop = False
-        if stop_callables:
-            cur = accum_completion(part_accum, d.raw, fin, usg, deltas+[d], model, api_name=api_name, vendor_name=vendor_name, delta=d)
-            for f in stop_callables:
-                if res:=f(cur):
-                    if isinstance(res, str): _trim_delta(d, cur, res)
-                    stop = True
-                    break
-        if (r:=_proc(d, 'text')): yield r
-        if (r:=_proc(d, 'thinking')): yield r
-        if (r:=_proc(d, 'citations', pt='text', kw='citations')): yield r
+        # Check stop condition and yield stop delta
+        stop = stop_and_trim(part_accum, d, stop_callables)
+        if stop and not stop_yielded:
+            for r in _yield_parts(d): yield r
+            stop_yielded = True
+        # If stop the remaining deltas are yielded as processing
+        if stop: yield {'thinking':'processing'}
+        else:
+            for r in _yield_parts(d): yield r
+        # Rest incl. tools, finish reason, usage is processed independently
         for tc in d.tool_calls:
             args = tc.arguments.get('_delta', tc.arguments)
             _proc(d, 'tool_use', ret=dict(id=tc.id, name=tc.name, arguments=args, server=tc.server, extra=tc.extra))
@@ -143,20 +136,12 @@ async def mk_acollect_stream(it, index_fn, model=None, api_name=None, vendor_nam
         if d.usage: usg = d.usage
         last_typ = typ
         deltas.append(d)
-        if stop:
-            fin = fin or FinishReason.stop
-            await it.aclose()
-            break
     part_accum.finalize()
-    # need to recheck for tool calls post collation for streaming
     tcs = part_accum.tool_calls
-    fin = FinishReason.tool_calls if fin==FinishReason.stop and any(~L(tcs).attrgot('server')) else fin
+    if stop: fin = FinishReason.stop
+    fin = FinishReason.tool_calls if fin==FinishReason.stop and any(~L(tcs).attrgot('server')) else fin # recheck tool calls post collation
     # tool calls and non-anthropic citations are yielded at the end
     yield Completion(d.raw.get('model', model),
             message=Msg(role="assistant", content=part_accum.parts),
             finish_reason=fin, usage=usg, tool_calls=tcs, api_name=api_name, vendor_name=vendor_name,
             raw={'deltas':deltas})
-# %% ../nbs/01_streaming.ipynb #f79d3b99
-async def fake_stream(*ss):
-    for s in ss: yield Delta(text=s, raw={'model':'fake'})

fastllm/types.py CHANGED Viewed

@@ -3,7 +3,8 @@
 # AUTOGENERATED! DO NOT EDIT! File to edit: ../nbs/00_types.ipynb.
 # %% auto #0
-__all__ = ['PartType', 'FinishReason', 'api_registry', 'model_prices_url', 'codex_pricing', 'Part', 'Msg', 'ToolCall',
+__all__ = ['PartType', 'FinishReason', 'api_registry', 'model_prices_url', 'haik45', 'sonn45', 'sonn', 'sonn46', 'opus46', 'opus',
+           'gpt54', 'gpt54m', 'codex54', 'codex55', 'codex53spark', 'codex_pricing', 'Part', 'Msg', 'ToolCall',
            'display_list', 'Usage', 'Completion', 'APIRegistry', 'mk_completion', 'mk_tool_res_msg', 'fn_schema',
            'sys_text', 'part_txt', 'data_url', 'url_mime', 'payload_kwargs', 'get_api_key', 'model_prices_meta',
            'infer_api_name', 'get_model_meta', 'get_model_info']
@@ -151,7 +152,6 @@ class APIRegistry:
 api_registry = APIRegistry()
 # %% ../nbs/00_types.ipynb #d58a5f96
-#COMMON
 def mk_completion(resp, model, api_name, vendor_name):
     "Normalize an api response into Completion."
     api = api_registry.apis[api_name]
@@ -167,7 +167,6 @@ def mk_completion(resp, model, api_name, vendor_name):
         raw=resp)
 # %% ../nbs/00_types.ipynb #d5322db5
-#COMMON
 def mk_tool_res_msg(tool_calls:list[ToolCall], results:list[str|list]):
     'A util to prepare parallel tool call with str or media list results'
     parts = []
@@ -177,7 +176,6 @@ def mk_tool_res_msg(tool_calls:list[ToolCall], results:list[str|list]):
     return Msg(role="tool", content=parts)
 # %% ../nbs/00_types.ipynb #8a8e468b
-#COMMON
 def fn_schema(t):
     "Extract (name, description, parameters) from any tool format."
     if not isinstance(t, dict): return None
@@ -188,7 +186,6 @@ def fn_schema(t):
     return None
 # %% ../nbs/00_types.ipynb #d1d48d91
-#COMMON
 def sys_text(system):
     "Extract text from system (str or Part)."
     if system is None: return None
@@ -197,7 +194,6 @@ def sys_text(system):
 def part_txt(p): return p.text if isinstance(p,Part) else p
 # %% ../nbs/00_types.ipynb #dc2b75a0
-#COMMON
 _ext_mime = {
     '.jpg':'image/jpeg', '.jpeg':'image/jpeg', '.png':'image/png', '.gif':'image/gif', '.webp':'image/webp',
     '.pdf':'application/pdf',
@@ -250,7 +246,19 @@ def get_model_meta(model, vendor_name=None, tfm=noop):
     elif vendor_name:                                           key = f"{vendor_name}/{model}"
     return dict2obj(tfm(mp.get(key), model, vendor_name))
-# %% ../nbs/00_types.ipynb #fbfdeb0a
+# %% ../nbs/00_types.ipynb #60607e23
+haik45 = "claude-haiku-4-5"
+sonn45 = "claude-sonnet-4-5"
+sonn = sonn46 = "claude-sonnet-4-6"
+opus46 = "claude-opus-4-6"
+opus = "claude-opus-4-7"
+gpt54 = "gpt-5.4"
+gpt54m = "gpt-5.4-mini"
+codex54 = "gpt-5.4"
+codex55 = "gpt-5.5"
+codex53spark = "gpt-5.3-codex-spark"
+# %% ../nbs/00_types.ipynb #d6d5b98c
 codex_pricing = {
     "input_cost_per_token": 0.10 / 1_000_000,
     "cache_creation_input_token_cost": 0.10 / 1_000_000,
@@ -258,23 +266,31 @@ codex_pricing = {
     "output_cost_per_token": 0.50 / 1_000_000,
 }
+_codex_overrides = {
+    codex53spark: dict(
+        supports_vision=False, supports_image_input=False, supports_web_search=True, supports_reasoning=True,
+        max_tokens=128000, max_input_tokens=128000, max_output_tokens=128000)
+}
+# %% ../nbs/00_types.ipynb #fbfdeb0a
 def get_model_info(mn, vendor_name=None):
-    info = get_model_meta(mn, vendor_name)
+    info = get_model_meta(mn, 'chatgpt' if vendor_name=='codex' else vendor_name)
     # anthropic web search
-    if 'search_context_cost_per_query' in info:
-        info['supports_web_search'] = True
-    # add reasoning to kimi
+    if 'search_context_cost_per_query' in info: info['supports_web_search'] = True
+    # kimi
     if 'kimi' in mn:
         if 'k2p6' in mn: info = get_model_meta(mn.replace('k2p6', 'k2p5'), vendor_name)
         info['supports_reasoning'] = True
         info['supports_vision'] = True
         if vendor_name == 'moonshot': info['supports_assistant_prefill'] = True
-    # add web search to gpt
+    # gpt web search
     if mn in ("gpt-5.4", "gpt-5.4-mini"):
         info['supports_web_search'] = True
         info.pop('mode', None)
-    # codex pricing
-    if vendor_name == 'codex': info = merge(info, codex_pricing)
+    # codex updates
+    if vendor_name == 'codex':
+        info = merge(info, codex_pricing)
+        info |= _codex_overrides.get(mn, {})
     # deepseek v4
     if vendor_name == 'deepseek' and mn in ("deepseek-v4-flash", "deepseek-v4-pro"):
         info = dict(get_model_meta("deepseek/deepseek-v3.2"))

{python_fastllm-0.0.2.dist-info → python_fastllm-0.0.4.dist-info}/METADATA RENAMED Viewed

@@ -1,6 +1,6 @@
 Metadata-Version: 2.4
 Name: python-fastllm
-Version: 0.0.2
+Version: 0.0.4
 Author-email: Kerem Turgutlu <keremturgutlu@gmail.com>
 License: Apache-2.0
 Project-URL: Repository, https://github.com/AnswerDotAI/fastllm

{python_fastllm-0.0.2.dist-info → python_fastllm-0.0.4.dist-info}/RECORD RENAMED Viewed

@@ -1,21 +1,21 @@
-fastllm/__init__.py,sha256=QvlVh4JTl3JL7jQAja76yKtT-IvF4631ASjWY1wS6AQ,22
-fastllm/_modidx.py,sha256=cOOFPFjpzDYzGBO-4V2gHyxfP9PJKGhxRBzmZ3k4yqs,28453
-fastllm/acomplete.py,sha256=Wl_MdtNe2CuTrmVGPDj5f5pOEnA-ToZqRpPKOHN0J2Q,6764
+fastllm/__init__.py,sha256=1mptEzQihbdyqqzMgdns_j5ZGK9gz7hR2bsgA_TnjO4,22
+fastllm/_modidx.py,sha256=oWXxw9eciYsnR2YAhla6c7HCPdXSFb9jV7WQ71fZXPQ,29551
+fastllm/acomplete.py,sha256=DTKUDx4Ed6_NdFlUj2XdQCY47p5tO0yv9ALB_22XVks,6615
 fastllm/anthropic.py,sha256=fG20kOv3d3wGKQe8rD5pFWgZHKe-vT-9QJ3nPXh2twY,14615
-fastllm/chat.py,sha256=cibUSgm_8FtFLU8ilCsD8f_2JqvA-KrwJOvHWiDiMrw,29390
+fastllm/chat.py,sha256=iVPHMbHFlUL8bIiUlYp4GqPhsCGn0pcl9M9LnKwj6lg,32921
 fastllm/gemini.py,sha256=E1EYMfV8IMpC_-WzlDrkhz_CJQmzmxvaVUucNgPOqSA,14947
 fastllm/openai_chat.py,sha256=wZ0HI0m9ipy9XVhqmYBXf-BmkVAOipUVwqu9NGB_rJU,10941
 fastllm/openai_responses.py,sha256=Nk5bfTCF2-a17nwvIsf-u39j539v9KIduVfScECItKk,13052
-fastllm/streaming.py,sha256=Ey0ufSYgJREvagMuHuTKBsxMxyS0S_StGuef8taY5PY,7235
-fastllm/types.py,sha256=ZL57Rhgfs_N-tj99B19-BUR8--MyWeyc2vnPjTcjpso,11473
+fastllm/streaming.py,sha256=FYG4-rt7mfGKrUPxCMp_Z3kkxGLKKAVPev8ifD3YFlQ,6652
+fastllm/types.py,sha256=2Tuf4AicfCSsf4AbkYWthPqyarGPru-LAkcPEWxQeLc,12192
 fastllm/specs/anthropic.json,sha256=VCgTjM2_HoDpCkeu3q_TCOEZLMHriJZLAG3LnDBAgGM,541035
 fastllm/specs/anthropic.yml,sha256=3S3NAKdXB1Nwp-Sn9Gmh4tBnwhGGhMO3DXkGqPXPUYs,724122
 fastllm/specs/gemini.json,sha256=zJGOdvZ2BvCiTENZt0-BDEvNBMl8h6EBmEskle_WBto,309331
 fastllm/specs/openai.with-code-samples.json,sha256=Kto19AW1u8MfxVDJ4cFVBIdZQOIyy8NWylswo57eABU,1995929
 fastllm/specs/openai.with-code-samples.yml,sha256=DlcWGdaeP4k7smVjt6UbyehJ-2XGU3rn3nCIBMDRfYU,2553630
 fastllm/specs/spec_manifest.json,sha256=9tVFwojXFnNqsAxQzCRTP1lgSIM0fXixnrXdv4Cmb0c,653
-python_fastllm-0.0.2.dist-info/METADATA,sha256=G1AwFAzZ0wS0-qgEkEi_mA3cbHCboq-bXVoHzWkhK7Y,19546
-python_fastllm-0.0.2.dist-info/WHEEL,sha256=aeYiig01lYGDzBgS8HxWXOg3uV61G9ijOsup-k9o1sk,91
-python_fastllm-0.0.2.dist-info/entry_points.txt,sha256=dq0chsiRjJYStCOXweFW9L6LpyMTjWu2AabKCbTSbuI,36
-python_fastllm-0.0.2.dist-info/top_level.txt,sha256=F8qodL7nEGUHGmzzqfhNKCTIr1i0D6cvudOnm3z7o0Y,8
-python_fastllm-0.0.2.dist-info/RECORD,,
+python_fastllm-0.0.4.dist-info/METADATA,sha256=akEn90MvhkmJUpjbL8k28h9bDeOIbxOIUDmbDAIFqws,19546
+python_fastllm-0.0.4.dist-info/WHEEL,sha256=aeYiig01lYGDzBgS8HxWXOg3uV61G9ijOsup-k9o1sk,91
+python_fastllm-0.0.4.dist-info/entry_points.txt,sha256=dq0chsiRjJYStCOXweFW9L6LpyMTjWu2AabKCbTSbuI,36
+python_fastllm-0.0.4.dist-info/top_level.txt,sha256=F8qodL7nEGUHGmzzqfhNKCTIr1i0D6cvudOnm3z7o0Y,8
+python_fastllm-0.0.4.dist-info/RECORD,,

{python_fastllm-0.0.2.dist-info → python_fastllm-0.0.4.dist-info}/WHEEL RENAMED Viewed

File without changes

{python_fastllm-0.0.2.dist-info → python_fastllm-0.0.4.dist-info}/entry_points.txt RENAMED Viewed

File without changes

{python_fastllm-0.0.2.dist-info → python_fastllm-0.0.4.dist-info}/top_level.txt RENAMED Viewed

File without changes

python-fastllm 0.0.2__py3-none-any.whl → 0.0.4__py3-none-any.whl

python-fastllm 0.0.2py3-none-any.whl → 0.0.4py3-none-any.whl