PyPI - python-fastllm - Versions diffs - 0.0.3__tar.gz → 0.0.5__tar.gz - Mend

python-fastllm 0.0.3tar.gz → 0.0.5tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (27) hide show

{python_fastllm-0.0.3 → python_fastllm-0.0.5}/PKG-INFO RENAMED Viewed

@@ -1,6 +1,6 @@
 Metadata-Version: 2.4
 Name: python-fastllm
-Version: 0.0.3
+Version: 0.0.5
 Author-email: Kerem Turgutlu <keremturgutlu@gmail.com>
 License: Apache-2.0
 Project-URL: Repository, https://github.com/AnswerDotAI/fastllm

python_fastllm-0.0.5/fastllm/__init__.py ADDED Viewed

	@@ -0,0 +1 @@
1	+ __version__ = "0.0.5"

{python_fastllm-0.0.3 → python_fastllm-0.0.5}/fastllm/_modidx.py RENAMED Viewed

@@ -51,7 +51,11 @@ d = { 'settings': { 'branch': 'main',
                               'fastllm.chat.AsyncStreamFormatter': ('chat.html#asyncstreamformatter', 'fastllm/chat.py'),
                               'fastllm.chat.AsyncStreamFormatter.format_stream': ( 'chat.html#asyncstreamformatter.format_stream',
                                                                                    'fastllm/chat.py'),
+                              'fastllm.chat.FenceToolStop': ('chat.html#fencetoolstop', 'fastllm/chat.py'),
+                              'fastllm.chat.FenceToolStop.__call__': ('chat.html#fencetoolstop.__call__', 'fastllm/chat.py'),
+                              'fastllm.chat.FenceToolStop.__init__': ('chat.html#fencetoolstop.__init__', 'fastllm/chat.py'),
                               'fastllm.chat.FullResponse': ('chat.html#fullresponse', 'fastllm/chat.py'),
+                              'fastllm.chat.Msg.text': ('chat.html#msg.text', 'fastllm/chat.py'),
                               'fastllm.chat.StopResponse': ('chat.html#stopresponse', 'fastllm/chat.py'),
                               'fastllm.chat.StreamFormatter': ('chat.html#streamformatter', 'fastllm/chat.py'),
                               'fastllm.chat.StreamFormatter.__init__': ('chat.html#streamformatter.__init__', 'fastllm/chat.py'),
@@ -65,6 +69,7 @@ d = { 'settings': { 'branch': 'main',
                               'fastllm.chat.UsageStats.__repr__': ('chat.html#usagestats.__repr__', 'fastllm/chat.py'),
                               'fastllm.chat.UsageStats.fmt': ('chat.html#usagestats.fmt', 'fastllm/chat.py'),
                               'fastllm.chat.UsageStats.from_response': ('chat.html#usagestats.from_response', 'fastllm/chat.py'),
+                              'fastllm.chat._active_fence_langs': ('chat.html#_active_fence_langs', 'fastllm/chat.py'),
                               'fastllm.chat._add_cache_control': ('chat.html#_add_cache_control', 'fastllm/chat.py'),
                               'fastllm.chat._alite_call_func': ('chat.html#_alite_call_func', 'fastllm/chat.py'),
                               'fastllm.chat._apply_cache_idxs': ('chat.html#_apply_cache_idxs', 'fastllm/chat.py'),
@@ -79,7 +84,10 @@ d = { 'settings': { 'branch': 'main',
                               'fastllm.chat._lite_call_func': ('chat.html#_lite_call_func', 'fastllm/chat.py'),
                               'fastllm.chat._mk_content': ('chat.html#_mk_content', 'fastllm/chat.py'),
                               'fastllm.chat._mk_prefill': ('chat.html#_mk_prefill', 'fastllm/chat.py'),
+                              'fastllm.chat._mk_result_fence': ('chat.html#_mk_result_fence', 'fastllm/chat.py'),
                               'fastllm.chat._mk_tool_result': ('chat.html#_mk_tool_result', 'fastllm/chat.py'),
+                              'fastllm.chat._split_fence_msgs': ('chat.html#_split_fence_msgs', 'fastllm/chat.py'),
+                              'fastllm.chat._split_msg_on_fences': ('chat.html#_split_msg_on_fences', 'fastllm/chat.py'),
                               'fastllm.chat._srv_tc_summary': ('chat.html#_srv_tc_summary', 'fastllm/chat.py'),
                               'fastllm.chat._srvtools': ('chat.html#_srvtools', 'fastllm/chat.py'),
                               'fastllm.chat._tc_summary': ('chat.html#_tc_summary', 'fastllm/chat.py'),
@@ -93,6 +101,7 @@ d = { 'settings': { 'branch': 'main',
                               'fastllm.chat.astream_with_complete': ('chat.html#astream_with_complete', 'fastllm/chat.py'),
                               'fastllm.chat.cite_footnote': ('chat.html#cite_footnote', 'fastllm/chat.py'),
                               'fastllm.chat.contents': ('chat.html#contents', 'fastllm/chat.py'),
+                              'fastllm.chat.extract_fence_call': ('chat.html#extract_fence_call', 'fastllm/chat.py'),
                               'fastllm.chat.fmt2hist': ('chat.html#fmt2hist', 'fastllm/chat.py'),
                               'fastllm.chat.lite_mk_func': ('chat.html#lite_mk_func', 'fastllm/chat.py'),
                               'fastllm.chat.mk_msg': ('chat.html#mk_msg', 'fastllm/chat.py'),
@@ -101,9 +110,11 @@ d = { 'settings': { 'branch': 'main',
                               'fastllm.chat.mk_tr_details': ('chat.html#mk_tr_details', 'fastllm/chat.py'),
                               'fastllm.chat.postproc': ('chat.html#postproc', 'fastllm/chat.py'),
                               'fastllm.chat.remove_cache_ckpts': ('chat.html#remove_cache_ckpts', 'fastllm/chat.py'),
+                              'fastllm.chat.run_fence_tool': ('chat.html#run_fence_tool', 'fastllm/chat.py'),
                               'fastllm.chat.search_count': ('chat.html#search_count', 'fastllm/chat.py'),
                               'fastllm.chat.split_tools': ('chat.html#split_tools', 'fastllm/chat.py'),
                               'fastllm.chat.stop_reason': ('chat.html#stop_reason', 'fastllm/chat.py'),
+                              'fastllm.chat.stop_sequences': ('chat.html#stop_sequences', 'fastllm/chat.py'),
                               'fastllm.chat.structured': ('chat.html#structured', 'fastllm/chat.py')},
             'fastllm.gemini': { 'fastllm.gemini._gem_filter_sch': ('gemini.html#_gem_filter_sch', 'fastllm/gemini.py'),
                                 'fastllm.gemini._gem_part_type': ('gemini.html#_gem_part_type', 'fastllm/gemini.py'),
@@ -208,13 +219,12 @@ d = { 'settings': { 'branch': 'main',
                                    'fastllm.streaming.PartAccum': ('streaming.html#partaccum', 'fastllm/streaming.py'),
                                    'fastllm.streaming.PartAccum.append': ('streaming.html#partaccum.append', 'fastllm/streaming.py'),
                                    'fastllm.streaming.PartAccum.finalize': ('streaming.html#partaccum.finalize', 'fastllm/streaming.py'),
+                                   'fastllm.streaming.PartAccum.get_merged': ( 'streaming.html#partaccum.get_merged',
+                                                                               'fastllm/streaming.py'),
                                    'fastllm.streaming._trim_delta': ('streaming.html#_trim_delta', 'fastllm/streaming.py'),
-                                   'fastllm.streaming.accum_completion': ('streaming.html#accum_completion', 'fastllm/streaming.py'),
-                                   'fastllm.streaming.completion_text': ('streaming.html#completion_text', 'fastllm/streaming.py'),
-                                   'fastllm.streaming.fake_stream': ('streaming.html#fake_stream', 'fastllm/streaming.py'),
                                    'fastllm.streaming.mk_acollect_stream': ('streaming.html#mk_acollect_stream', 'fastllm/streaming.py'),
                                    'fastllm.streaming.norm_and_yield': ('streaming.html#norm_and_yield', 'fastllm/streaming.py'),
-                                   'fastllm.streaming.stop_sequences': ('streaming.html#stop_sequences', 'fastllm/streaming.py')},
+                                   'fastllm.streaming.stop_and_trim': ('streaming.html#stop_and_trim', 'fastllm/streaming.py')},
             'fastllm.types': { 'fastllm.types.APIRegistry': ('types.html#apiregistry', 'fastllm/types.py'),
                                'fastllm.types.APIRegistry.__init__': ('types.html#apiregistry.__init__', 'fastllm/types.py'),
                                'fastllm.types.APIRegistry.register': ('types.html#apiregistry.register', 'fastllm/types.py'),

{python_fastllm-0.0.3 → python_fastllm-0.0.5}/fastllm/acomplete.py RENAMED Viewed

@@ -18,7 +18,6 @@ from fastspec.errors import APIError
 from .types import *
 from .streaming import *
 from .openai_responses import *
-from .streaming import stop_sequences as _stop_sequences
 from .openai_chat import *
 from .anthropic import *
 from .gemini import *
@@ -30,7 +29,7 @@ oai_spec  = SpecParser.from_openapi(dict2obj(json.loads((specs_path/'openai.with
 gem_spec  = SpecParser.from_discovery(dict2obj(json.loads((specs_path/'gemini.json').read_text())))
 # %% ../nbs/06_acomplete.ipynb #32ee2546
-_codex_json = '~/.codex/auth.json', 'tokens','access_token'
+_codex_json = '~/.codex/auth.json', ('tokens','access_token')
 vendor_mapping = {
     "openai":       ('openai', 'https://api.openai.com/v1', 'OPENAI_API_KEY'),
     "anthropic":    ('anthropic', 'https://api.anthropic.com', 'ANTHROPIC_API_KEY'),
@@ -63,7 +62,7 @@ def mk_client(model, vendor_name=None, api_name=None, api_key=None, base_url=Non
                 if auth_fn.exists(): api_key = nested_idx(json.loads(auth_fn.read_text()), *keys)
             api_key = get_api_key(api_key, env_api_nm)
         except KeyError: raise ValueError(f"Unknown vendor '{vendor_name}', {err_msg}")
-    elif api_name and base_url and api_key:  vendor_name = ifnone(vendor_name, 'custom')
+    elif base_url and api_key: vendor_name, api_name = ifnone(vendor_name, 'custom'), ifnone(api_name, 'openai_chat')
     elif (api_name:=infer_api_name(model)):  base_url, vendor_name = None, api_name
     else: raise ValueError(f"Model {model} can't be auto resolved, {err_msg}")
     api = api_registry.apis[api_name]
@@ -106,7 +105,6 @@ async def acomplete(msgs, model, api_name=None, vendor_name=None, api_key=None,
     "Unified completion across different APIs."
     cli, api_name, vendor_name = mk_client(model, vendor_name, api_name, api_key, base_url, xtra_hdrs)
     api = api_registry.apis[api_name]
-    if stop_sequences: stop_callables = L(stop_callables) + [_stop_sequences(stop_sequences)]
     payload = api.mk_payload(msgs, model, stream=stream, stop_callables=stop_callables, **kwargs)
     payload = merge(payload, ifnone(xtra_body, {}))
     if vendor_name == 'codex':

{python_fastllm-0.0.3 → python_fastllm-0.0.5}/fastllm/chat.py RENAMED Viewed

@@ -4,10 +4,10 @@
 # %% auto #0
 __all__ = ['tool_dtls_tag', 're_tools', 'token_dtls_tag', 're_token', 'effort', 'remove_cache_ckpts', 'contents', 'stop_reason',
-           'mk_msg', 'split_tools', 'fmt2hist', 'mk_msgs', 'cite_footnote', 'postproc', 'lite_mk_func', 'ToolResponse',
-           'structured', 'StopResponse', 'FullResponse', 'search_count', 'UsageStats', 'AsyncChat', 'add_warning',
-           'astream_with_complete', 'mk_tr_details', 'mk_srv_tc_details', 'StreamFormatter', 'AsyncStreamFormatter',
-           'adisplay_stream']
+           'mk_msg', 'FenceToolStop', 'extract_fence_call', 'stop_sequences', 'split_tools', 'fmt2hist', 'mk_msgs',
+           'cite_footnote', 'postproc', 'lite_mk_func', 'ToolResponse', 'structured', 'StopResponse', 'FullResponse',
+           'search_count', 'UsageStats', 'AsyncChat', 'add_warning', 'astream_with_complete', 'run_fence_tool',
+           'mk_tr_details', 'mk_srv_tc_details', 'StreamFormatter', 'AsyncStreamFormatter', 'adisplay_stream']
 # %% ../nbs/07_chat.ipynb #d5a3bc1f
 import asyncio, base64, json, mimetypes, random, string, ast, warnings
@@ -94,6 +94,65 @@ token_dtls_tag = "<details class='token-usage-details'>"
 re_token = re.compile(fr"^{re.escape(token_dtls_tag)}<summary>.*?</summary>\n*\n*`.*?`\n*\n*</details>\n?",
                       flags=re.DOTALL|re.MULTILINE)
+# %% ../nbs/07_chat.ipynb #be998131
+_fence_back = '`````'
+_fence_re = re.compile(f'^{_fence_back}(py|bash)\n(.*?)\n{_fence_back}', re.DOTALL | re.MULTILINE)
+_result_re = re.compile(f'\n{_fence_back}result\n(.*?)\n{_fence_back}\n', re.DOTALL)
+_lang2tool = dict(py='python', bash='bash')
+class FenceToolStop:
+    def __init__(self, langs): self.langs = langs
+    def __call__(self, text):
+        "Return trim result if complete fence detected in active lang"
+        m = _fence_re.search(text)
+        if m and m.group(1) in self.langs: return m.group(0)
+# %% ../nbs/07_chat.ipynb #e6360e96
+def extract_fence_call(text):
+    "Return (lang, code) if text ends with terminated py/bash fence, else None"
+    ms = list(_fence_re.finditer(text))
+    if not ms: return None
+    m = ms[-1]
+    if not text[m.end():].strip(): return m.group(1), m.group(2)
+# %% ../nbs/07_chat.ipynb #215183bf
+@patch(as_prop=True)
+def text(self:Msg): return ''.join(p.text or '' for p in self.content if p.type == PartType.text)
+# %% ../nbs/07_chat.ipynb #1de7e4d2
+def _mk_result_fence(output): return f"\n{_fence_back}result\n{output}\n{_fence_back}\n"
+def _split_msg_on_fences(msg):
+    "Split an assistant Msg on result fences, return list of Msgs"
+    if msg.role != 'assistant': return [msg]
+    if not _result_re.search(msg.text): return [msg]
+    res, asst_parts, tool_parts = [], [], []
+    for msg_part in msg.content:
+        if msg_part.type == PartType.thinking: asst_parts.append(msg_part)
+        elif msg_part.type == PartType.tool_use: tool_parts.append(msg_part)
+        elif parts := _result_re.split(msg_part.text or ''):
+            for i,p in enumerate(parts):
+                if not p: continue
+                if i % 2 == 0: res.append(Msg(role='assistant', content=asst_parts+[Part(type=PartType.text, text=p.strip())]))
+                else:          res.append(Msg(role='user', content=[Part(type=PartType.text, text=_mk_result_fence(p))]))
+    if tool_parts: res.append(Msg(role='assistant', content=tool_parts))
+    return res
+def _split_fence_msgs(msgs):
+    "Split all assistant msgs on result fences for wire protocol"
+    res = []
+    for m in msgs: res.extend(_split_msg_on_fences(m))
+    return res
+# %% ../nbs/07_chat.ipynb #b161ca9e
+def stop_sequences(seqs):
+    "Stop when any sequence appears in the accumulated completion text."
+    seqs = L(seqs)
+    def _stop(text):
+        for s in seqs:
+            if s in text: return text[:text.find(s)+len(s)]
+    return _stop
 # %% ../nbs/07_chat.ipynb #45ada210
 def _extract_tool_parts(text:str):
     "Extract (tool_use_part, tool_result_part) from <details> json block"
@@ -110,10 +169,13 @@ def split_tools(s):
     "Split formatted output into (text, summary, tooljson) chunks"
     return [(txt,summ,tj) for txt,_,summ,tj in chunked(re_tools.split(s.strip()), 4, pad=True)]
+# %% ../nbs/07_chat.ipynb #44060a78
 def fmt2hist(outp:str)->list[Msg]:
     "Transform a formatted output string into fastllm canonical Msgs"
     if token_dtls_tag in outp: outp = re_token.sub('', outp)
-    if tool_dtls_tag not in outp: return [Msg(role='assistant', content=[Part(type=PartType.text, text=outp.strip())])]
+    if tool_dtls_tag not in outp:
+        msg = Msg(role='assistant', content=[Part(type=PartType.text, text=outp.strip())])
+        return _split_msg_on_fences(msg)
     hist, asst_parts, tool_parts = [], [], []
     def flush():
         if tool_parts:
@@ -122,17 +184,18 @@ def fmt2hist(outp:str)->list[Msg]:
             asst_parts.clear(); tool_parts.clear()
     for txt,_,tj in split_tools(outp):
         if txt and txt.strip():
-            if tool_parts: flush()   # text after tool results => new assistant turn
+            if tool_parts: flush()
             asst_parts.append(Part(type=PartType.text, text=txt.strip()))
         if tj and (tp := _extract_tool_parts(tj)):
             asst_parts.append(tp[0])
             tool_parts.append(tp[1])
     flush()
     if asst_parts: hist.append(Msg(role='assistant', content=asst_parts))
-    # TODO: Is this needed?
-    # if hist and hist[-1].role == 'tool':
-    #     hist.append(Msg(role='assistant', content=[Part(type=PartType.text, text='.')]))
-    return hist
+    result = []
+    for msg in hist:
+        if msg.role == 'assistant': result.extend(_split_msg_on_fences(msg))
+        else: result.append(msg)
+    return result
 # %% ../nbs/07_chat.ipynb #8de5ce8d
 def _apply_cache_idxs(msgs, cache_idxs=[-1], ttl=None):
@@ -309,6 +372,13 @@ def _inject_tool_reminder(msgs, reminder):
     msgs[i] = m
     return msgs
+# %% ../nbs/07_chat.ipynb #e7eb2032
+def _active_fence_langs(tool_schemas):
+    "Return set of active fence langs whose mapped tool is registered"
+    if not tool_schemas: return set()
+    names = {nested_idx(t, 'function', 'name') for t in tool_schemas}
+    return {lang for lang, tname in _lang2tool.items() if tname in names}
 # %% ../nbs/07_chat.ipynb #e9a14051
 class AsyncChat:
     def __init__(
@@ -352,6 +422,7 @@ class AsyncChat:
         self.hist = mk_msgs(self.hist, self.cache and 'claude' in self.model, cache_idxs, self.ttl)
         msgs = self.hist
         if prefill: msgs = self.hist + [Msg(role='assistant', content=[Part(PartType.text, prefill)])]
+        msgs = _split_fence_msgs(msgs)
         if self.tool_reminder: msgs = _inject_tool_reminder(msgs, self.tool_reminder)
         if 'deepseek' in self.model:
             # The `reasoning_content` in the thinking mode must be passed back to the API.
@@ -405,7 +476,7 @@ def _think_kw(model, think, vendor_name):
 def _prep_call(self:AsyncChat, prefill, search, max_tokens, kwargs, stream=False, think=None):
     "Prepare model info, prefill, search, and provider kwargs for a completion call"
     model_info = get_model_info(self.model, self.vendor_name)
-    if max_tokens is None: max_tokens = model_info.get('max_output_tokens')
+    if max_tokens is None: max_tokens = ifnone(model_info.get('max_output_tokens'), 32_000)
     if not model_info.get("supports_assistant_prefill"): prefill = None
     if _has_search(model_info) and (s:=ifnone(search,self.search)):
         if 'web_search_options' not in kwargs: kwargs['web_search_options'] = {}
@@ -419,8 +490,12 @@ def _prep_call(self:AsyncChat, prefill, search, max_tokens, kwargs, stream=False
     if self.base_url:      kwargs['base_url'] = self.base_url
     if self.extra_headers: kwargs['xtra_headers'] = self.extra_headers
     kwargs.update(_think_kw(self.model, think, self.vendor_name))
+    if (langs := _active_fence_langs(self.tool_schemas)):
+        if not any(isinstance(s, FenceToolStop) for s in kwargs.get('stop_callables', [])):
+            kwargs['stop_callables'] = kwargs.get('stop_callables', []) + [FenceToolStop(langs)]
     return prefill, max_tokens
 # %% ../nbs/07_chat.ipynb #07951b77
 @patch
 def print_hist(self:AsyncChat):
@@ -474,16 +549,27 @@ async def _call(self:AsyncChat, msg=None, prefill=None, temp=None, think=None, s
     self._track(res)
     yield res
+    toolloop, prompt = False, None
+    if (langs := _active_fence_langs(self.tool_schemas)):
+        if m := last(self.hist, lambda o: o.role == 'assistant'):
+            if fence := extract_fence_call(m.text):
+                lang, code = fence
+                out = await run_fence_tool(lang, code, self.ns)
+                for p in reversed(m.content):
+                    if p.type == PartType.text: p.text += out; break
+                if stream: yield {'text': out}
+                toolloop = True
     if stcs:= _srvtools(res.tool_calls):
         for tc in stcs: yield tc
     if tcs := _usrtools(res.tool_calls):
         tres = await parallel_async(_alite_call_func, tcs, timeout=tc_timeout, n_workers=n_workers, pause=pause, **self.tcdict)
         tmsg = mk_tool_res_msg(tcs, tres)
-        # TODO: We yield tool calls at the end with their results, fastllm doesn't yield streaming tool calls during streaming as once the collation is done for simplicity, but it can
         for r in tmsg.content: yield r
         self.hist.append(tmsg)
         if step>=max_steps-1 or _has_stop(tmsg.content): prompt,tool_choice,search = mk_msg(final_prompt),'none',False
-        else: prompt = None
+        toolloop = True
+    if toolloop and step <= max_steps:
         try:
             async for result in self._call(
                 prompt, prefill, temp, think, search, stream, max_steps, step+1,
@@ -495,6 +581,14 @@ async def _call(self:AsyncChat, msg=None, prefill=None, temp=None, think=None, s
                 prompt, prefill, temp, think, search, stream, max_steps, step+1,
                 final_prompt, tool_choice='none', **kwargs): yield result
+# %% ../nbs/07_chat.ipynb #4dc002da
+async def run_fence_tool(lang, code, ns):
+    "Run the mapped tool for `lang` with the code, return result fence"
+    tname = _lang2tool[lang]
+    arg = dict(code=code) if lang == 'py' else dict(command=code)
+    res = _mk_tool_result(await call_func_async(tname, arg, ns=ns, raise_on_err=False))
+    return _mk_result_fence(_trunc_str(str(res)))
 # %% ../nbs/07_chat.ipynb #1361515a
 @patch
 @delegates(AsyncChat._call)
@@ -596,15 +690,15 @@ class AsyncStreamFormatter(StreamFormatter):
         "Format the response stream for markdown display."
         async for o in rs: yield self.format_item(o)
-# %% ../nbs/07_chat.ipynb #f4345023
+# %% ../nbs/07_chat.ipynb #944bcd25
 @delegates(AsyncStreamFormatter)
 async def adisplay_stream(rs, **kwargs):
     "Use IPython.display to markdown display the response stream."
     try: from IPython.display import display, Markdown
     except ModuleNotFoundError: raise ModuleNotFoundError("This function requires ipython. Please run `pip install ipython` to use.")
     fmt = AsyncStreamFormatter(**kwargs)
-    md = ''
+    md,h = '',display(Markdown(' '), display_id=True)
     async for o in fmt.format_stream(rs):
-        md+=o
-        display(Markdown(md),clear=True)
+        md += o
+        if md: h.update(Markdown(md))
     return fmt

{python_fastllm-0.0.3 → python_fastllm-0.0.5}/fastllm/streaming.py RENAMED Viewed

@@ -3,11 +3,10 @@
 # AUTOGENERATED! DO NOT EDIT! File to edit: ../nbs/01_streaming.ipynb.
 # %% auto #0
-__all__ = ['Delta', 'norm_and_yield', 'PartAccum', 'accum_completion', 'completion_text', 'stop_sequences', 'mk_acollect_stream',
-           'fake_stream']
+__all__ = ['Delta', 'norm_and_yield', 'PartAccum', 'stop_and_trim', 'mk_acollect_stream']
 # %% ../nbs/01_streaming.ipynb #0df5c926
-import json
+import json,copy
 from dataclasses import dataclass, field, fields
 from fastcore.utils import *
 from fastcore.meta import delegates
@@ -56,54 +55,44 @@ class PartAccum:
                 # anthropic citations have matching idx
                 self.parts[index].data['citations'].extend(citations or [])
-    def finalize(self):
-        for idx,tc in self.parts.items():
-            if isinstance(tc, ToolCall):
-                if isinstance(tc.arguments, str): tc.arguments = json.loads(tc.arguments) if tc.arguments else {}
-                self.tool_calls.append(tc)
-                data = {**tc.extra, 'id':tc.id, 'name':tc.name, 'arguments':tc.arguments, 'server':tc.server}
-                self.parts[idx] = Part(type=PartType.tool_use, data=data)
+    def get_merged(self, with_tools=True):
+        tmp_parts = copy.deepcopy(self.parts)
+        tool_calls = []
+        if with_tools:
+            for idx,tc in tmp_parts.items():
+                if isinstance(tc, ToolCall):
+                    if isinstance(tc.arguments, str): tc.arguments = json.loads(tc.arguments) if tc.arguments else {}
+                    tool_calls.append(tc)
+                    data = {**tc.extra, 'id':tc.id, 'name':tc.name, 'arguments':tc.arguments, 'server':tc.server}
+                    tmp_parts[idx] = Part(type=PartType.tool_use, data=data)
         merged = []
-        for p in self.parts.values():
+        for p in tmp_parts.values():
+            if isinstance(p, ToolCall) and not with_tools: continue
             if merged and merged[-1].type == p.type and p.type in (PartType.text, PartType.thinking): merged[-1].text += p.text
-            else: merged.append(p)
-        self.parts = merged
-# %% ../nbs/01_streaming.ipynb #0e8ca58e
-def accum_completion(pa, raw, fin, usg, deltas, model=None, api_name=None, vendor_name=None, delta=None):
-    "Build a Completion snapshot from in-progress PartAccum state"
-    parts = [p for p in pa.parts.values() if isinstance(p, Part)]
-    if delta and delta.text:
-        parts = parts.copy()
-        if parts and parts[-1].type==PartType.text:
-            p = parts[-1]
-            parts[-1] = Part(type=p.type, text=(p.text or '') + delta.text, data=p.data)
-        else: parts.append(Part(type=PartType.text, text=delta.text))
-    return Completion(raw.get('model', model), Msg(role="assistant", content=parts),
-                      fin, usg, api_name=api_name, vendor_name=vendor_name, raw={'deltas':deltas})
-# %% ../nbs/01_streaming.ipynb #c28f706f
-def completion_text(c):
-    "Combined text from a Completion."
-    return ''.join(p.text or '' for p in c.message.content if p.type==PartType.text)
-# %% ../nbs/01_streaming.ipynb #b2b9f7ca
-def stop_sequences(seqs):
-    "Stop when any sequence appears in the accumulated completion text."
-    seqs = L(seqs)
-    def _stop(c):
-        txt = completion_text(c)
-        for s in seqs:
-            if s in txt: return s
-    return _stop
+            else: merged.append(p)
+        return merged, tool_calls
+    def finalize(self):
+        self.parts, self.tool_calls = self.get_merged()
-# %% ../nbs/01_streaming.ipynb #931f686b
-def _trim_delta(d, cur, s):
+# %% ../nbs/01_streaming.ipynb #f11ea80a
+def _trim_delta(d, txt, s):
     "Trim `d.text` so accumulated text in `cur` stops just before stop sequence `s`."
-    txt,dt = completion_text(cur), d.text or ''
-    i = txt.find(s)
-    if i>=0: d.text = dt[:max(0, i-(len(txt)-len(dt)))]
+    idx = len(txt) - (txt.find(s) + len(s))
+    if idx>0: d.text = d.text[:-idx]
+# %% ../nbs/01_streaming.ipynb #efbf96d7
+def stop_and_trim(part_accum, d, stop_callables):
+    'Stop based on the accumulated text so far, and trim current delta'
+    parts,_ = part_accum.get_merged(with_tools=False)
+    prev = parts[-1].text if parts and parts[-1].type == PartType.text else ''
+    txt = prev + (d.text or '')
+    for f in stop_callables:
+        if res:=f(txt):
+            if isinstance(res, str): _trim_delta(d, txt, res)
+            return True
+    return False
 # %% ../nbs/01_streaming.ipynb #fc71790b
 async def mk_acollect_stream(it, index_fn, model=None, api_name=None, vendor_name=None, stop_callables=None):
@@ -120,18 +109,22 @@ async def mk_acollect_stream(it, index_fn, model=None, api_name=None, vendor_nam
         idx = _fidx(d, name, pt)
         part_accum.append(typ, idx, **(ret or {kw: val}))
         return ret or {name: val}
+    def _yield_parts(d):
+        for args in [('text',), ('thinking',), ('citations', 'text', 'citations')]:
+            if (r := _proc(d, args[0], pt=args[1] if len(args)>1 else None, kw=args[2] if len(args)>2 else 'txt')):
+                yield r
+    stop, stop_yielded = False, False
     async for d in it:
-        stop = False
-        if stop_callables:
-            cur = accum_completion(part_accum, d.raw, fin, usg, deltas+[d], model, api_name=api_name, vendor_name=vendor_name, delta=d)
-            for f in stop_callables:
-                if res:=f(cur):
-                    if isinstance(res, str): _trim_delta(d, cur, res)
-                    stop = True
-                    break
-        if (r:=_proc(d, 'text')): yield r
-        if (r:=_proc(d, 'thinking')): yield r
-        if (r:=_proc(d, 'citations', pt='text', kw='citations')): yield r
+        # Check stop condition and yield stop delta
+        stop = stop_and_trim(part_accum, d, stop_callables)
+        if stop and not stop_yielded:
+            for r in _yield_parts(d): yield r
+            stop_yielded = True
+        # If stop the remaining deltas are yielded as processing
+        if stop: yield {'thinking':'processing'}
+        else:
+            for r in _yield_parts(d): yield r
+        # Rest incl. tools, finish reason, usage is processed independently
         for tc in d.tool_calls:
             args = tc.arguments.get('_delta', tc.arguments)
             _proc(d, 'tool_use', ret=dict(id=tc.id, name=tc.name, arguments=args, server=tc.server, extra=tc.extra))
@@ -143,20 +136,12 @@ async def mk_acollect_stream(it, index_fn, model=None, api_name=None, vendor_nam
         if d.usage: usg = d.usage
         last_typ = typ
         deltas.append(d)
-        if stop:
-            fin = fin or FinishReason.stop
-            await it.aclose()
-            break
     part_accum.finalize()
-    # need to recheck for tool calls post collation for streaming
     tcs = part_accum.tool_calls
-    fin = FinishReason.tool_calls if fin==FinishReason.stop and any(~L(tcs).attrgot('server')) else fin
+    if stop: fin = FinishReason.stop
+    fin = FinishReason.tool_calls if fin==FinishReason.stop and any(~L(tcs).attrgot('server')) else fin # recheck tool calls post collation
     # tool calls and non-anthropic citations are yielded at the end
     yield Completion(d.raw.get('model', model),
             message=Msg(role="assistant", content=part_accum.parts),
             finish_reason=fin, usage=usg, tool_calls=tcs, api_name=api_name, vendor_name=vendor_name,
             raw={'deltas':deltas})
-# %% ../nbs/01_streaming.ipynb #f79d3b99
-async def fake_stream(*ss):
-    for s in ss: yield Delta(text=s, raw={'model':'fake'})

{python_fastllm-0.0.3 → python_fastllm-0.0.5}/fastllm/types.py RENAMED Viewed

@@ -240,7 +240,7 @@ def infer_api_name(model):
 def get_model_meta(model, vendor_name=None, tfm=noop):
     "Look up cost metadata for `model` from litellm price map, using `vendor_name` prefix if needed."
     vendor_name = ifnone(vendor_name, infer_api_name(model))
-    mp = model_prices_meta()
+    mp, key = model_prices_meta(), ''
     if model in mp: key = model
     elif vendor_name=='gemini' and model.startswith('models/'): key = f"gemini/{model.removeprefix('models/')}"
     elif vendor_name:                                           key = f"{vendor_name}/{model}"
@@ -268,13 +268,13 @@ codex_pricing = {
 _codex_overrides = {
     codex53spark: dict(
-        supports_vision=False, supports_image_input=False, supports_web_search=True,
+        supports_vision=False, supports_image_input=False, supports_web_search=True, supports_reasoning=True,
         max_tokens=128000, max_input_tokens=128000, max_output_tokens=128000)
 }
 # %% ../nbs/00_types.ipynb #fbfdeb0a
-def get_model_info(mn, vendor_name=None):
-    info = get_model_meta(mn, vendor_name)
+def get_model_info(mn, vendor_name=None, strict=False):
+    info = get_model_meta(mn, 'chatgpt' if vendor_name=='codex' else vendor_name)
     # anthropic web search
     if 'search_context_cost_per_query' in info: info['supports_web_search'] = True
     # kimi
@@ -288,7 +288,7 @@ def get_model_info(mn, vendor_name=None):
         info['supports_web_search'] = True
         info.pop('mode', None)
     # codex updates
-    if vendor_name == 'codex':
+    if vendor_name == 'codex':
         info = merge(info, codex_pricing)
         info |= _codex_overrides.get(mn, {})
     # deepseek v4
@@ -306,6 +306,9 @@ def get_model_info(mn, vendor_name=None):
                     supports_prompt_caching=True, supports_native_streaming=True, supports_native_structured_output=True,
                     max_tokens=1000000, max_input_tokens=1000000, max_output_tokens=65536,
                     input_cost_per_token=0.5e-6, cache_read_input_token_cost=0.1e-6, output_cost_per_token=3.0e-6)
+    # unresolved models
+    if not info and not strict: info = info | codex_pricing
     return dict2obj(info)
 # %% ../nbs/00_types.ipynb #8bfca02d

{python_fastllm-0.0.3 → python_fastllm-0.0.5}/python_fastllm.egg-info/PKG-INFO RENAMED Viewed

@@ -1,6 +1,6 @@
 Metadata-Version: 2.4
 Name: python-fastllm
-Version: 0.0.3
+Version: 0.0.5
 Author-email: Kerem Turgutlu <keremturgutlu@gmail.com>
 License: Apache-2.0
 Project-URL: Repository, https://github.com/AnswerDotAI/fastllm