python-fastllm 0.0.2__py3-none-any.whl → 0.0.4__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
fastllm/__init__.py CHANGED
@@ -1 +1 @@
1
- __version__ = "0.0.2"
1
+ __version__ = "0.0.4"
fastllm/_modidx.py CHANGED
@@ -51,7 +51,11 @@ d = { 'settings': { 'branch': 'main',
51
51
  'fastllm.chat.AsyncStreamFormatter': ('chat.html#asyncstreamformatter', 'fastllm/chat.py'),
52
52
  'fastllm.chat.AsyncStreamFormatter.format_stream': ( 'chat.html#asyncstreamformatter.format_stream',
53
53
  'fastllm/chat.py'),
54
+ 'fastllm.chat.FenceToolStop': ('chat.html#fencetoolstop', 'fastllm/chat.py'),
55
+ 'fastllm.chat.FenceToolStop.__call__': ('chat.html#fencetoolstop.__call__', 'fastllm/chat.py'),
56
+ 'fastllm.chat.FenceToolStop.__init__': ('chat.html#fencetoolstop.__init__', 'fastllm/chat.py'),
54
57
  'fastllm.chat.FullResponse': ('chat.html#fullresponse', 'fastllm/chat.py'),
58
+ 'fastllm.chat.Msg.text': ('chat.html#msg.text', 'fastllm/chat.py'),
55
59
  'fastllm.chat.StopResponse': ('chat.html#stopresponse', 'fastllm/chat.py'),
56
60
  'fastllm.chat.StreamFormatter': ('chat.html#streamformatter', 'fastllm/chat.py'),
57
61
  'fastllm.chat.StreamFormatter.__init__': ('chat.html#streamformatter.__init__', 'fastllm/chat.py'),
@@ -65,6 +69,7 @@ d = { 'settings': { 'branch': 'main',
65
69
  'fastllm.chat.UsageStats.__repr__': ('chat.html#usagestats.__repr__', 'fastllm/chat.py'),
66
70
  'fastllm.chat.UsageStats.fmt': ('chat.html#usagestats.fmt', 'fastllm/chat.py'),
67
71
  'fastllm.chat.UsageStats.from_response': ('chat.html#usagestats.from_response', 'fastllm/chat.py'),
72
+ 'fastllm.chat._active_fence_langs': ('chat.html#_active_fence_langs', 'fastllm/chat.py'),
68
73
  'fastllm.chat._add_cache_control': ('chat.html#_add_cache_control', 'fastllm/chat.py'),
69
74
  'fastllm.chat._alite_call_func': ('chat.html#_alite_call_func', 'fastllm/chat.py'),
70
75
  'fastllm.chat._apply_cache_idxs': ('chat.html#_apply_cache_idxs', 'fastllm/chat.py'),
@@ -79,7 +84,10 @@ d = { 'settings': { 'branch': 'main',
79
84
  'fastllm.chat._lite_call_func': ('chat.html#_lite_call_func', 'fastllm/chat.py'),
80
85
  'fastllm.chat._mk_content': ('chat.html#_mk_content', 'fastllm/chat.py'),
81
86
  'fastllm.chat._mk_prefill': ('chat.html#_mk_prefill', 'fastllm/chat.py'),
87
+ 'fastllm.chat._mk_result_fence': ('chat.html#_mk_result_fence', 'fastllm/chat.py'),
82
88
  'fastllm.chat._mk_tool_result': ('chat.html#_mk_tool_result', 'fastllm/chat.py'),
89
+ 'fastllm.chat._split_fence_msgs': ('chat.html#_split_fence_msgs', 'fastllm/chat.py'),
90
+ 'fastllm.chat._split_msg_on_fences': ('chat.html#_split_msg_on_fences', 'fastllm/chat.py'),
83
91
  'fastllm.chat._srv_tc_summary': ('chat.html#_srv_tc_summary', 'fastllm/chat.py'),
84
92
  'fastllm.chat._srvtools': ('chat.html#_srvtools', 'fastllm/chat.py'),
85
93
  'fastllm.chat._tc_summary': ('chat.html#_tc_summary', 'fastllm/chat.py'),
@@ -93,6 +101,7 @@ d = { 'settings': { 'branch': 'main',
93
101
  'fastllm.chat.astream_with_complete': ('chat.html#astream_with_complete', 'fastllm/chat.py'),
94
102
  'fastllm.chat.cite_footnote': ('chat.html#cite_footnote', 'fastllm/chat.py'),
95
103
  'fastllm.chat.contents': ('chat.html#contents', 'fastllm/chat.py'),
104
+ 'fastllm.chat.extract_fence_call': ('chat.html#extract_fence_call', 'fastllm/chat.py'),
96
105
  'fastllm.chat.fmt2hist': ('chat.html#fmt2hist', 'fastllm/chat.py'),
97
106
  'fastllm.chat.lite_mk_func': ('chat.html#lite_mk_func', 'fastllm/chat.py'),
98
107
  'fastllm.chat.mk_msg': ('chat.html#mk_msg', 'fastllm/chat.py'),
@@ -101,9 +110,11 @@ d = { 'settings': { 'branch': 'main',
101
110
  'fastllm.chat.mk_tr_details': ('chat.html#mk_tr_details', 'fastllm/chat.py'),
102
111
  'fastllm.chat.postproc': ('chat.html#postproc', 'fastllm/chat.py'),
103
112
  'fastllm.chat.remove_cache_ckpts': ('chat.html#remove_cache_ckpts', 'fastllm/chat.py'),
113
+ 'fastllm.chat.run_fence_tool': ('chat.html#run_fence_tool', 'fastllm/chat.py'),
104
114
  'fastllm.chat.search_count': ('chat.html#search_count', 'fastllm/chat.py'),
105
115
  'fastllm.chat.split_tools': ('chat.html#split_tools', 'fastllm/chat.py'),
106
116
  'fastllm.chat.stop_reason': ('chat.html#stop_reason', 'fastllm/chat.py'),
117
+ 'fastllm.chat.stop_sequences': ('chat.html#stop_sequences', 'fastllm/chat.py'),
107
118
  'fastllm.chat.structured': ('chat.html#structured', 'fastllm/chat.py')},
108
119
  'fastllm.gemini': { 'fastllm.gemini._gem_filter_sch': ('gemini.html#_gem_filter_sch', 'fastllm/gemini.py'),
109
120
  'fastllm.gemini._gem_part_type': ('gemini.html#_gem_part_type', 'fastllm/gemini.py'),
@@ -208,13 +219,12 @@ d = { 'settings': { 'branch': 'main',
208
219
  'fastllm.streaming.PartAccum': ('streaming.html#partaccum', 'fastllm/streaming.py'),
209
220
  'fastllm.streaming.PartAccum.append': ('streaming.html#partaccum.append', 'fastllm/streaming.py'),
210
221
  'fastllm.streaming.PartAccum.finalize': ('streaming.html#partaccum.finalize', 'fastllm/streaming.py'),
222
+ 'fastllm.streaming.PartAccum.get_merged': ( 'streaming.html#partaccum.get_merged',
223
+ 'fastllm/streaming.py'),
211
224
  'fastllm.streaming._trim_delta': ('streaming.html#_trim_delta', 'fastllm/streaming.py'),
212
- 'fastllm.streaming.accum_completion': ('streaming.html#accum_completion', 'fastllm/streaming.py'),
213
- 'fastllm.streaming.completion_text': ('streaming.html#completion_text', 'fastllm/streaming.py'),
214
- 'fastllm.streaming.fake_stream': ('streaming.html#fake_stream', 'fastllm/streaming.py'),
215
225
  'fastllm.streaming.mk_acollect_stream': ('streaming.html#mk_acollect_stream', 'fastllm/streaming.py'),
216
226
  'fastllm.streaming.norm_and_yield': ('streaming.html#norm_and_yield', 'fastllm/streaming.py'),
217
- 'fastllm.streaming.stop_sequences': ('streaming.html#stop_sequences', 'fastllm/streaming.py')},
227
+ 'fastllm.streaming.stop_and_trim': ('streaming.html#stop_and_trim', 'fastllm/streaming.py')},
218
228
  'fastllm.types': { 'fastllm.types.APIRegistry': ('types.html#apiregistry', 'fastllm/types.py'),
219
229
  'fastllm.types.APIRegistry.__init__': ('types.html#apiregistry.__init__', 'fastllm/types.py'),
220
230
  'fastllm.types.APIRegistry.register': ('types.html#apiregistry.register', 'fastllm/types.py'),
fastllm/acomplete.py CHANGED
@@ -18,7 +18,6 @@ from fastspec.errors import APIError
18
18
  from .types import *
19
19
  from .streaming import *
20
20
  from .openai_responses import *
21
- from .streaming import stop_sequences as _stop_sequences
22
21
  from .openai_chat import *
23
22
  from .anthropic import *
24
23
  from .gemini import *
@@ -30,7 +29,7 @@ oai_spec = SpecParser.from_openapi(dict2obj(json.loads((specs_path/'openai.with
30
29
  gem_spec = SpecParser.from_discovery(dict2obj(json.loads((specs_path/'gemini.json').read_text())))
31
30
 
32
31
  # %% ../nbs/06_acomplete.ipynb #32ee2546
33
- _codex_json = '~/.codex/auth.json', 'tokens','access_token'
32
+ _codex_json = '~/.codex/auth.json', ('tokens','access_token')
34
33
  vendor_mapping = {
35
34
  "openai": ('openai', 'https://api.openai.com/v1', 'OPENAI_API_KEY'),
36
35
  "anthropic": ('anthropic', 'https://api.anthropic.com', 'ANTHROPIC_API_KEY'),
@@ -106,7 +105,6 @@ async def acomplete(msgs, model, api_name=None, vendor_name=None, api_key=None,
106
105
  "Unified completion across different APIs."
107
106
  cli, api_name, vendor_name = mk_client(model, vendor_name, api_name, api_key, base_url, xtra_hdrs)
108
107
  api = api_registry.apis[api_name]
109
- if stop_sequences: stop_callables = L(stop_callables) + [_stop_sequences(stop_sequences)]
110
108
  payload = api.mk_payload(msgs, model, stream=stream, stop_callables=stop_callables, **kwargs)
111
109
  payload = merge(payload, ifnone(xtra_body, {}))
112
110
  if vendor_name == 'codex':
fastllm/chat.py CHANGED
@@ -3,12 +3,11 @@
3
3
  # AUTOGENERATED! DO NOT EDIT! File to edit: ../nbs/07_chat.ipynb.
4
4
 
5
5
  # %% auto #0
6
- __all__ = ['haik45', 'sonn45', 'sonn', 'sonn46', 'opus46', 'opus', 'gpt54', 'gpt54m', 'codex54', 'codex55', 'tool_dtls_tag',
7
- 're_tools', 'token_dtls_tag', 're_token', 'effort', 'remove_cache_ckpts', 'contents', 'stop_reason',
8
- 'mk_msg', 'split_tools', 'fmt2hist', 'mk_msgs', 'cite_footnote', 'postproc', 'lite_mk_func', 'ToolResponse',
9
- 'structured', 'StopResponse', 'FullResponse', 'search_count', 'UsageStats', 'AsyncChat', 'add_warning',
10
- 'astream_with_complete', 'mk_tr_details', 'mk_srv_tc_details', 'StreamFormatter', 'AsyncStreamFormatter',
11
- 'adisplay_stream']
6
+ __all__ = ['tool_dtls_tag', 're_tools', 'token_dtls_tag', 're_token', 'effort', 'remove_cache_ckpts', 'contents', 'stop_reason',
7
+ 'mk_msg', 'FenceToolStop', 'extract_fence_call', 'stop_sequences', 'split_tools', 'fmt2hist', 'mk_msgs',
8
+ 'cite_footnote', 'postproc', 'lite_mk_func', 'ToolResponse', 'structured', 'StopResponse', 'FullResponse',
9
+ 'search_count', 'UsageStats', 'AsyncChat', 'add_warning', 'astream_with_complete', 'run_fence_tool',
10
+ 'mk_tr_details', 'mk_srv_tc_details', 'StreamFormatter', 'AsyncStreamFormatter', 'adisplay_stream']
12
11
 
13
12
  # %% ../nbs/07_chat.ipynb #d5a3bc1f
14
13
  import asyncio, base64, json, mimetypes, random, string, ast, warnings
@@ -21,19 +20,8 @@ from fastcore import imghdr
21
20
  from fastcore.xml import Safe
22
21
  from dataclasses import dataclass
23
22
 
23
+ from .types import *
24
24
  from .acomplete import *
25
- from .acomplete import Msg, Part, PartType, ToolCall, Completion, mk_tool_res_msg, get_model_info
26
-
27
- # %% ../nbs/07_chat.ipynb #c4b8f12b
28
- haik45 = "claude-haiku-4-5"
29
- sonn45 = "claude-sonnet-4-5"
30
- sonn = sonn46 = "claude-sonnet-4-6"
31
- opus46 = "claude-opus-4-6"
32
- opus = "claude-opus-4-7"
33
- gpt54 = "gpt-5.4"
34
- gpt54m = "gpt-5.4-mini"
35
- codex54 = "gpt-5.4"
36
- codex55 = "gpt-5.5"
37
25
 
38
26
  # %% ../nbs/07_chat.ipynb #90f55ad4
39
27
  def _bytes2content(data):
@@ -106,6 +94,65 @@ token_dtls_tag = "<details class='token-usage-details'>"
106
94
  re_token = re.compile(fr"^{re.escape(token_dtls_tag)}<summary>.*?</summary>\n*\n*`.*?`\n*\n*</details>\n?",
107
95
  flags=re.DOTALL|re.MULTILINE)
108
96
 
97
+ # %% ../nbs/07_chat.ipynb #be998131
98
+ _fence_back = '`````'
99
+ _fence_re = re.compile(f'^{_fence_back}(py|bash)\n(.*?)\n{_fence_back}', re.DOTALL | re.MULTILINE)
100
+ _result_re = re.compile(f'\n{_fence_back}result\n(.*?)\n{_fence_back}\n', re.DOTALL)
101
+ _lang2tool = dict(py='python', bash='bash')
102
+
103
+ class FenceToolStop:
104
+ def __init__(self, langs): self.langs = langs
105
+ def __call__(self, text):
106
+ "Return trim result if complete fence detected in active lang"
107
+ m = _fence_re.search(text)
108
+ if m and m.group(1) in self.langs: return m.group(0)
109
+
110
+ # %% ../nbs/07_chat.ipynb #e6360e96
111
+ def extract_fence_call(text):
112
+ "Return (lang, code) if text ends with terminated py/bash fence, else None"
113
+ ms = list(_fence_re.finditer(text))
114
+ if not ms: return None
115
+ m = ms[-1]
116
+ if not text[m.end():].strip(): return m.group(1), m.group(2)
117
+
118
+ # %% ../nbs/07_chat.ipynb #215183bf
119
+ @patch(as_prop=True)
120
+ def text(self:Msg): return ''.join(p.text or '' for p in self.content if p.type == PartType.text)
121
+
122
+ # %% ../nbs/07_chat.ipynb #1de7e4d2
123
+ def _mk_result_fence(output): return f"\n{_fence_back}result\n{output}\n{_fence_back}\n"
124
+
125
+ def _split_msg_on_fences(msg):
126
+ "Split an assistant Msg on result fences, return list of Msgs"
127
+ if msg.role != 'assistant': return [msg]
128
+ if not _result_re.search(msg.text): return [msg]
129
+ res, asst_parts, tool_parts = [], [], []
130
+ for msg_part in msg.content:
131
+ if msg_part.type == PartType.thinking: asst_parts.append(msg_part)
132
+ elif msg_part.type == PartType.tool_use: tool_parts.append(msg_part)
133
+ elif parts := _result_re.split(msg_part.text or ''):
134
+ for i,p in enumerate(parts):
135
+ if not p: continue
136
+ if i % 2 == 0: res.append(Msg(role='assistant', content=asst_parts+[Part(type=PartType.text, text=p.strip())]))
137
+ else: res.append(Msg(role='user', content=[Part(type=PartType.text, text=_mk_result_fence(p))]))
138
+ if tool_parts: res.append(Msg(role='assistant', content=tool_parts))
139
+ return res
140
+
141
+ def _split_fence_msgs(msgs):
142
+ "Split all assistant msgs on result fences for wire protocol"
143
+ res = []
144
+ for m in msgs: res.extend(_split_msg_on_fences(m))
145
+ return res
146
+
147
+ # %% ../nbs/07_chat.ipynb #b161ca9e
148
+ def stop_sequences(seqs):
149
+ "Stop when any sequence appears in the accumulated completion text."
150
+ seqs = L(seqs)
151
+ def _stop(text):
152
+ for s in seqs:
153
+ if s in text: return text[:text.find(s)+len(s)]
154
+ return _stop
155
+
109
156
  # %% ../nbs/07_chat.ipynb #45ada210
110
157
  def _extract_tool_parts(text:str):
111
158
  "Extract (tool_use_part, tool_result_part) from <details> json block"
@@ -122,10 +169,13 @@ def split_tools(s):
122
169
  "Split formatted output into (text, summary, tooljson) chunks"
123
170
  return [(txt,summ,tj) for txt,_,summ,tj in chunked(re_tools.split(s.strip()), 4, pad=True)]
124
171
 
172
+ # %% ../nbs/07_chat.ipynb #44060a78
125
173
  def fmt2hist(outp:str)->list[Msg]:
126
174
  "Transform a formatted output string into fastllm canonical Msgs"
127
175
  if token_dtls_tag in outp: outp = re_token.sub('', outp)
128
- if tool_dtls_tag not in outp: return [Msg(role='assistant', content=[Part(type=PartType.text, text=outp.strip())])]
176
+ if tool_dtls_tag not in outp:
177
+ msg = Msg(role='assistant', content=[Part(type=PartType.text, text=outp.strip())])
178
+ return _split_msg_on_fences(msg)
129
179
  hist, asst_parts, tool_parts = [], [], []
130
180
  def flush():
131
181
  if tool_parts:
@@ -134,17 +184,18 @@ def fmt2hist(outp:str)->list[Msg]:
134
184
  asst_parts.clear(); tool_parts.clear()
135
185
  for txt,_,tj in split_tools(outp):
136
186
  if txt and txt.strip():
137
- if tool_parts: flush() # text after tool results => new assistant turn
187
+ if tool_parts: flush()
138
188
  asst_parts.append(Part(type=PartType.text, text=txt.strip()))
139
189
  if tj and (tp := _extract_tool_parts(tj)):
140
190
  asst_parts.append(tp[0])
141
191
  tool_parts.append(tp[1])
142
192
  flush()
143
193
  if asst_parts: hist.append(Msg(role='assistant', content=asst_parts))
144
- # TODO: Is this needed?
145
- # if hist and hist[-1].role == 'tool':
146
- # hist.append(Msg(role='assistant', content=[Part(type=PartType.text, text='.')]))
147
- return hist
194
+ result = []
195
+ for msg in hist:
196
+ if msg.role == 'assistant': result.extend(_split_msg_on_fences(msg))
197
+ else: result.append(msg)
198
+ return result
148
199
 
149
200
  # %% ../nbs/07_chat.ipynb #8de5ce8d
150
201
  def _apply_cache_idxs(msgs, cache_idxs=[-1], ttl=None):
@@ -321,6 +372,13 @@ def _inject_tool_reminder(msgs, reminder):
321
372
  msgs[i] = m
322
373
  return msgs
323
374
 
375
+ # %% ../nbs/07_chat.ipynb #e7eb2032
376
+ def _active_fence_langs(tool_schemas):
377
+ "Return set of active fence langs whose mapped tool is registered"
378
+ if not tool_schemas: return set()
379
+ names = {nested_idx(t, 'function', 'name') for t in tool_schemas}
380
+ return {lang for lang, tname in _lang2tool.items() if tname in names}
381
+
324
382
  # %% ../nbs/07_chat.ipynb #e9a14051
325
383
  class AsyncChat:
326
384
  def __init__(
@@ -364,6 +422,7 @@ class AsyncChat:
364
422
  self.hist = mk_msgs(self.hist, self.cache and 'claude' in self.model, cache_idxs, self.ttl)
365
423
  msgs = self.hist
366
424
  if prefill: msgs = self.hist + [Msg(role='assistant', content=[Part(PartType.text, prefill)])]
425
+ msgs = _split_fence_msgs(msgs)
367
426
  if self.tool_reminder: msgs = _inject_tool_reminder(msgs, self.tool_reminder)
368
427
  if 'deepseek' in self.model:
369
428
  # The `reasoning_content` in the thinking mode must be passed back to the API.
@@ -431,8 +490,12 @@ def _prep_call(self:AsyncChat, prefill, search, max_tokens, kwargs, stream=False
431
490
  if self.base_url: kwargs['base_url'] = self.base_url
432
491
  if self.extra_headers: kwargs['xtra_headers'] = self.extra_headers
433
492
  kwargs.update(_think_kw(self.model, think, self.vendor_name))
493
+ if (langs := _active_fence_langs(self.tool_schemas)):
494
+ if not any(isinstance(s, FenceToolStop) for s in kwargs.get('stop_callables', [])):
495
+ kwargs['stop_callables'] = kwargs.get('stop_callables', []) + [FenceToolStop(langs)]
434
496
  return prefill, max_tokens
435
497
 
498
+
436
499
  # %% ../nbs/07_chat.ipynb #07951b77
437
500
  @patch
438
501
  def print_hist(self:AsyncChat):
@@ -486,16 +549,27 @@ async def _call(self:AsyncChat, msg=None, prefill=None, temp=None, think=None, s
486
549
  self._track(res)
487
550
  yield res
488
551
 
552
+ toolloop, prompt = False, None
553
+ if (langs := _active_fence_langs(self.tool_schemas)):
554
+ if m := last(self.hist, lambda o: o.role == 'assistant'):
555
+ if fence := extract_fence_call(m.text):
556
+ lang, code = fence
557
+ out = await run_fence_tool(lang, code, self.ns)
558
+ for p in reversed(m.content):
559
+ if p.type == PartType.text: p.text += out; break
560
+ if stream: yield {'text': out}
561
+ toolloop = True
489
562
  if stcs:= _srvtools(res.tool_calls):
490
563
  for tc in stcs: yield tc
491
564
  if tcs := _usrtools(res.tool_calls):
492
565
  tres = await parallel_async(_alite_call_func, tcs, timeout=tc_timeout, n_workers=n_workers, pause=pause, **self.tcdict)
493
566
  tmsg = mk_tool_res_msg(tcs, tres)
494
- # TODO: We yield tool calls at the end with their results, fastllm doesn't yield streaming tool calls during streaming as once the collation is done for simplicity, but it can
495
567
  for r in tmsg.content: yield r
496
568
  self.hist.append(tmsg)
497
569
  if step>=max_steps-1 or _has_stop(tmsg.content): prompt,tool_choice,search = mk_msg(final_prompt),'none',False
498
- else: prompt = None
570
+ toolloop = True
571
+
572
+ if toolloop and step <= max_steps:
499
573
  try:
500
574
  async for result in self._call(
501
575
  prompt, prefill, temp, think, search, stream, max_steps, step+1,
@@ -507,6 +581,14 @@ async def _call(self:AsyncChat, msg=None, prefill=None, temp=None, think=None, s
507
581
  prompt, prefill, temp, think, search, stream, max_steps, step+1,
508
582
  final_prompt, tool_choice='none', **kwargs): yield result
509
583
 
584
+ # %% ../nbs/07_chat.ipynb #4dc002da
585
+ async def run_fence_tool(lang, code, ns):
586
+ "Run the mapped tool for `lang` with the code, return result fence"
587
+ tname = _lang2tool[lang]
588
+ arg = dict(code=code) if lang == 'py' else dict(command=code)
589
+ res = _mk_tool_result(await call_func_async(tname, arg, ns=ns, raise_on_err=False))
590
+ return _mk_result_fence(_trunc_str(str(res)))
591
+
510
592
  # %% ../nbs/07_chat.ipynb #1361515a
511
593
  @patch
512
594
  @delegates(AsyncChat._call)
@@ -608,15 +690,15 @@ class AsyncStreamFormatter(StreamFormatter):
608
690
  "Format the response stream for markdown display."
609
691
  async for o in rs: yield self.format_item(o)
610
692
 
611
- # %% ../nbs/07_chat.ipynb #f4345023
693
+ # %% ../nbs/07_chat.ipynb #944bcd25
612
694
  @delegates(AsyncStreamFormatter)
613
695
  async def adisplay_stream(rs, **kwargs):
614
696
  "Use IPython.display to markdown display the response stream."
615
697
  try: from IPython.display import display, Markdown
616
698
  except ModuleNotFoundError: raise ModuleNotFoundError("This function requires ipython. Please run `pip install ipython` to use.")
617
699
  fmt = AsyncStreamFormatter(**kwargs)
618
- md = ''
700
+ md,h = '',display(Markdown(' '), display_id=True)
619
701
  async for o in fmt.format_stream(rs):
620
- md+=o
621
- display(Markdown(md),clear=True)
702
+ md += o
703
+ if md: h.update(Markdown(md))
622
704
  return fmt
fastllm/streaming.py CHANGED
@@ -3,11 +3,10 @@
3
3
  # AUTOGENERATED! DO NOT EDIT! File to edit: ../nbs/01_streaming.ipynb.
4
4
 
5
5
  # %% auto #0
6
- __all__ = ['Delta', 'norm_and_yield', 'PartAccum', 'accum_completion', 'completion_text', 'stop_sequences', 'mk_acollect_stream',
7
- 'fake_stream']
6
+ __all__ = ['Delta', 'norm_and_yield', 'PartAccum', 'stop_and_trim', 'mk_acollect_stream']
8
7
 
9
8
  # %% ../nbs/01_streaming.ipynb #0df5c926
10
- import json
9
+ import json,copy
11
10
  from dataclasses import dataclass, field, fields
12
11
  from fastcore.utils import *
13
12
  from fastcore.meta import delegates
@@ -56,54 +55,44 @@ class PartAccum:
56
55
  # anthropic citations have matching idx
57
56
  self.parts[index].data['citations'].extend(citations or [])
58
57
 
59
- def finalize(self):
60
- for idx,tc in self.parts.items():
61
- if isinstance(tc, ToolCall):
62
- if isinstance(tc.arguments, str): tc.arguments = json.loads(tc.arguments) if tc.arguments else {}
63
- self.tool_calls.append(tc)
64
- data = {**tc.extra, 'id':tc.id, 'name':tc.name, 'arguments':tc.arguments, 'server':tc.server}
65
- self.parts[idx] = Part(type=PartType.tool_use, data=data)
58
+ def get_merged(self, with_tools=True):
59
+ tmp_parts = copy.deepcopy(self.parts)
60
+ tool_calls = []
61
+ if with_tools:
62
+ for idx,tc in tmp_parts.items():
63
+ if isinstance(tc, ToolCall):
64
+ if isinstance(tc.arguments, str): tc.arguments = json.loads(tc.arguments) if tc.arguments else {}
65
+ tool_calls.append(tc)
66
+ data = {**tc.extra, 'id':tc.id, 'name':tc.name, 'arguments':tc.arguments, 'server':tc.server}
67
+ tmp_parts[idx] = Part(type=PartType.tool_use, data=data)
66
68
 
67
69
  merged = []
68
- for p in self.parts.values():
70
+ for p in tmp_parts.values():
71
+ if isinstance(p, ToolCall) and not with_tools: continue
69
72
  if merged and merged[-1].type == p.type and p.type in (PartType.text, PartType.thinking): merged[-1].text += p.text
70
- else: merged.append(p)
71
- self.parts = merged
72
-
73
- # %% ../nbs/01_streaming.ipynb #0e8ca58e
74
- def accum_completion(pa, raw, fin, usg, deltas, model=None, api_name=None, vendor_name=None, delta=None):
75
- "Build a Completion snapshot from in-progress PartAccum state"
76
- parts = [p for p in pa.parts.values() if isinstance(p, Part)]
77
- if delta and delta.text:
78
- parts = parts.copy()
79
- if parts and parts[-1].type==PartType.text:
80
- p = parts[-1]
81
- parts[-1] = Part(type=p.type, text=(p.text or '') + delta.text, data=p.data)
82
- else: parts.append(Part(type=PartType.text, text=delta.text))
83
- return Completion(raw.get('model', model), Msg(role="assistant", content=parts),
84
- fin, usg, api_name=api_name, vendor_name=vendor_name, raw={'deltas':deltas})
85
-
86
- # %% ../nbs/01_streaming.ipynb #c28f706f
87
- def completion_text(c):
88
- "Combined text from a Completion."
89
- return ''.join(p.text or '' for p in c.message.content if p.type==PartType.text)
90
-
91
- # %% ../nbs/01_streaming.ipynb #b2b9f7ca
92
- def stop_sequences(seqs):
93
- "Stop when any sequence appears in the accumulated completion text."
94
- seqs = L(seqs)
95
- def _stop(c):
96
- txt = completion_text(c)
97
- for s in seqs:
98
- if s in txt: return s
99
- return _stop
73
+ else: merged.append(p)
74
+ return merged, tool_calls
75
+
76
+ def finalize(self):
77
+ self.parts, self.tool_calls = self.get_merged()
100
78
 
101
- # %% ../nbs/01_streaming.ipynb #931f686b
102
- def _trim_delta(d, cur, s):
79
+ # %% ../nbs/01_streaming.ipynb #f11ea80a
80
+ def _trim_delta(d, txt, s):
103
81
  "Trim `d.text` so accumulated text in `cur` stops just before stop sequence `s`."
104
- txt,dt = completion_text(cur), d.text or ''
105
- i = txt.find(s)
106
- if i>=0: d.text = dt[:max(0, i-(len(txt)-len(dt)))]
82
+ idx = len(txt) - (txt.find(s) + len(s))
83
+ if idx>0: d.text = d.text[:-idx]
84
+
85
+ # %% ../nbs/01_streaming.ipynb #efbf96d7
86
+ def stop_and_trim(part_accum, d, stop_callables):
87
+ 'Stop based on the accumulated text so far, and trim current delta'
88
+ parts,_ = part_accum.get_merged(with_tools=False)
89
+ prev = parts[-1].text if parts and parts[-1].type == PartType.text else ''
90
+ txt = prev + (d.text or '')
91
+ for f in stop_callables:
92
+ if res:=f(txt):
93
+ if isinstance(res, str): _trim_delta(d, txt, res)
94
+ return True
95
+ return False
107
96
 
108
97
  # %% ../nbs/01_streaming.ipynb #fc71790b
109
98
  async def mk_acollect_stream(it, index_fn, model=None, api_name=None, vendor_name=None, stop_callables=None):
@@ -120,18 +109,22 @@ async def mk_acollect_stream(it, index_fn, model=None, api_name=None, vendor_nam
120
109
  idx = _fidx(d, name, pt)
121
110
  part_accum.append(typ, idx, **(ret or {kw: val}))
122
111
  return ret or {name: val}
112
+ def _yield_parts(d):
113
+ for args in [('text',), ('thinking',), ('citations', 'text', 'citations')]:
114
+ if (r := _proc(d, args[0], pt=args[1] if len(args)>1 else None, kw=args[2] if len(args)>2 else 'txt')):
115
+ yield r
116
+ stop, stop_yielded = False, False
123
117
  async for d in it:
124
- stop = False
125
- if stop_callables:
126
- cur = accum_completion(part_accum, d.raw, fin, usg, deltas+[d], model, api_name=api_name, vendor_name=vendor_name, delta=d)
127
- for f in stop_callables:
128
- if res:=f(cur):
129
- if isinstance(res, str): _trim_delta(d, cur, res)
130
- stop = True
131
- break
132
- if (r:=_proc(d, 'text')): yield r
133
- if (r:=_proc(d, 'thinking')): yield r
134
- if (r:=_proc(d, 'citations', pt='text', kw='citations')): yield r
118
+ # Check stop condition and yield stop delta
119
+ stop = stop_and_trim(part_accum, d, stop_callables)
120
+ if stop and not stop_yielded:
121
+ for r in _yield_parts(d): yield r
122
+ stop_yielded = True
123
+ # If stop the remaining deltas are yielded as processing
124
+ if stop: yield {'thinking':'processing'}
125
+ else:
126
+ for r in _yield_parts(d): yield r
127
+ # Rest incl. tools, finish reason, usage is processed independently
135
128
  for tc in d.tool_calls:
136
129
  args = tc.arguments.get('_delta', tc.arguments)
137
130
  _proc(d, 'tool_use', ret=dict(id=tc.id, name=tc.name, arguments=args, server=tc.server, extra=tc.extra))
@@ -143,20 +136,12 @@ async def mk_acollect_stream(it, index_fn, model=None, api_name=None, vendor_nam
143
136
  if d.usage: usg = d.usage
144
137
  last_typ = typ
145
138
  deltas.append(d)
146
- if stop:
147
- fin = fin or FinishReason.stop
148
- await it.aclose()
149
- break
150
139
  part_accum.finalize()
151
- # need to recheck for tool calls post collation for streaming
152
140
  tcs = part_accum.tool_calls
153
- fin = FinishReason.tool_calls if fin==FinishReason.stop and any(~L(tcs).attrgot('server')) else fin
141
+ if stop: fin = FinishReason.stop
142
+ fin = FinishReason.tool_calls if fin==FinishReason.stop and any(~L(tcs).attrgot('server')) else fin # recheck tool calls post collation
154
143
  # tool calls and non-anthropic citations are yielded at the end
155
144
  yield Completion(d.raw.get('model', model),
156
145
  message=Msg(role="assistant", content=part_accum.parts),
157
146
  finish_reason=fin, usage=usg, tool_calls=tcs, api_name=api_name, vendor_name=vendor_name,
158
147
  raw={'deltas':deltas})
159
-
160
- # %% ../nbs/01_streaming.ipynb #f79d3b99
161
- async def fake_stream(*ss):
162
- for s in ss: yield Delta(text=s, raw={'model':'fake'})
fastllm/types.py CHANGED
@@ -3,7 +3,8 @@
3
3
  # AUTOGENERATED! DO NOT EDIT! File to edit: ../nbs/00_types.ipynb.
4
4
 
5
5
  # %% auto #0
6
- __all__ = ['PartType', 'FinishReason', 'api_registry', 'model_prices_url', 'codex_pricing', 'Part', 'Msg', 'ToolCall',
6
+ __all__ = ['PartType', 'FinishReason', 'api_registry', 'model_prices_url', 'haik45', 'sonn45', 'sonn', 'sonn46', 'opus46', 'opus',
7
+ 'gpt54', 'gpt54m', 'codex54', 'codex55', 'codex53spark', 'codex_pricing', 'Part', 'Msg', 'ToolCall',
7
8
  'display_list', 'Usage', 'Completion', 'APIRegistry', 'mk_completion', 'mk_tool_res_msg', 'fn_schema',
8
9
  'sys_text', 'part_txt', 'data_url', 'url_mime', 'payload_kwargs', 'get_api_key', 'model_prices_meta',
9
10
  'infer_api_name', 'get_model_meta', 'get_model_info']
@@ -151,7 +152,6 @@ class APIRegistry:
151
152
  api_registry = APIRegistry()
152
153
 
153
154
  # %% ../nbs/00_types.ipynb #d58a5f96
154
- #COMMON
155
155
  def mk_completion(resp, model, api_name, vendor_name):
156
156
  "Normalize an api response into Completion."
157
157
  api = api_registry.apis[api_name]
@@ -167,7 +167,6 @@ def mk_completion(resp, model, api_name, vendor_name):
167
167
  raw=resp)
168
168
 
169
169
  # %% ../nbs/00_types.ipynb #d5322db5
170
- #COMMON
171
170
  def mk_tool_res_msg(tool_calls:list[ToolCall], results:list[str|list]):
172
171
  'A util to prepare parallel tool call with str or media list results'
173
172
  parts = []
@@ -177,7 +176,6 @@ def mk_tool_res_msg(tool_calls:list[ToolCall], results:list[str|list]):
177
176
  return Msg(role="tool", content=parts)
178
177
 
179
178
  # %% ../nbs/00_types.ipynb #8a8e468b
180
- #COMMON
181
179
  def fn_schema(t):
182
180
  "Extract (name, description, parameters) from any tool format."
183
181
  if not isinstance(t, dict): return None
@@ -188,7 +186,6 @@ def fn_schema(t):
188
186
  return None
189
187
 
190
188
  # %% ../nbs/00_types.ipynb #d1d48d91
191
- #COMMON
192
189
  def sys_text(system):
193
190
  "Extract text from system (str or Part)."
194
191
  if system is None: return None
@@ -197,7 +194,6 @@ def sys_text(system):
197
194
  def part_txt(p): return p.text if isinstance(p,Part) else p
198
195
 
199
196
  # %% ../nbs/00_types.ipynb #dc2b75a0
200
- #COMMON
201
197
  _ext_mime = {
202
198
  '.jpg':'image/jpeg', '.jpeg':'image/jpeg', '.png':'image/png', '.gif':'image/gif', '.webp':'image/webp',
203
199
  '.pdf':'application/pdf',
@@ -250,7 +246,19 @@ def get_model_meta(model, vendor_name=None, tfm=noop):
250
246
  elif vendor_name: key = f"{vendor_name}/{model}"
251
247
  return dict2obj(tfm(mp.get(key), model, vendor_name))
252
248
 
253
- # %% ../nbs/00_types.ipynb #fbfdeb0a
249
+ # %% ../nbs/00_types.ipynb #60607e23
250
+ haik45 = "claude-haiku-4-5"
251
+ sonn45 = "claude-sonnet-4-5"
252
+ sonn = sonn46 = "claude-sonnet-4-6"
253
+ opus46 = "claude-opus-4-6"
254
+ opus = "claude-opus-4-7"
255
+ gpt54 = "gpt-5.4"
256
+ gpt54m = "gpt-5.4-mini"
257
+ codex54 = "gpt-5.4"
258
+ codex55 = "gpt-5.5"
259
+ codex53spark = "gpt-5.3-codex-spark"
260
+
261
+ # %% ../nbs/00_types.ipynb #d6d5b98c
254
262
  codex_pricing = {
255
263
  "input_cost_per_token": 0.10 / 1_000_000,
256
264
  "cache_creation_input_token_cost": 0.10 / 1_000_000,
@@ -258,23 +266,31 @@ codex_pricing = {
258
266
  "output_cost_per_token": 0.50 / 1_000_000,
259
267
  }
260
268
 
269
+ _codex_overrides = {
270
+ codex53spark: dict(
271
+ supports_vision=False, supports_image_input=False, supports_web_search=True, supports_reasoning=True,
272
+ max_tokens=128000, max_input_tokens=128000, max_output_tokens=128000)
273
+ }
274
+
275
+ # %% ../nbs/00_types.ipynb #fbfdeb0a
261
276
  def get_model_info(mn, vendor_name=None):
262
- info = get_model_meta(mn, vendor_name)
277
+ info = get_model_meta(mn, 'chatgpt' if vendor_name=='codex' else vendor_name)
263
278
  # anthropic web search
264
- if 'search_context_cost_per_query' in info:
265
- info['supports_web_search'] = True
266
- # add reasoning to kimi
279
+ if 'search_context_cost_per_query' in info: info['supports_web_search'] = True
280
+ # kimi
267
281
  if 'kimi' in mn:
268
282
  if 'k2p6' in mn: info = get_model_meta(mn.replace('k2p6', 'k2p5'), vendor_name)
269
283
  info['supports_reasoning'] = True
270
284
  info['supports_vision'] = True
271
285
  if vendor_name == 'moonshot': info['supports_assistant_prefill'] = True
272
- # add web search to gpt
286
+ # gpt web search
273
287
  if mn in ("gpt-5.4", "gpt-5.4-mini"):
274
288
  info['supports_web_search'] = True
275
289
  info.pop('mode', None)
276
- # codex pricing
277
- if vendor_name == 'codex': info = merge(info, codex_pricing)
290
+ # codex updates
291
+ if vendor_name == 'codex':
292
+ info = merge(info, codex_pricing)
293
+ info |= _codex_overrides.get(mn, {})
278
294
  # deepseek v4
279
295
  if vendor_name == 'deepseek' and mn in ("deepseek-v4-flash", "deepseek-v4-pro"):
280
296
  info = dict(get_model_meta("deepseek/deepseek-v3.2"))
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: python-fastllm
3
- Version: 0.0.2
3
+ Version: 0.0.4
4
4
  Author-email: Kerem Turgutlu <keremturgutlu@gmail.com>
5
5
  License: Apache-2.0
6
6
  Project-URL: Repository, https://github.com/AnswerDotAI/fastllm
@@ -1,21 +1,21 @@
1
- fastllm/__init__.py,sha256=QvlVh4JTl3JL7jQAja76yKtT-IvF4631ASjWY1wS6AQ,22
2
- fastllm/_modidx.py,sha256=cOOFPFjpzDYzGBO-4V2gHyxfP9PJKGhxRBzmZ3k4yqs,28453
3
- fastllm/acomplete.py,sha256=Wl_MdtNe2CuTrmVGPDj5f5pOEnA-ToZqRpPKOHN0J2Q,6764
1
+ fastllm/__init__.py,sha256=1mptEzQihbdyqqzMgdns_j5ZGK9gz7hR2bsgA_TnjO4,22
2
+ fastllm/_modidx.py,sha256=oWXxw9eciYsnR2YAhla6c7HCPdXSFb9jV7WQ71fZXPQ,29551
3
+ fastllm/acomplete.py,sha256=DTKUDx4Ed6_NdFlUj2XdQCY47p5tO0yv9ALB_22XVks,6615
4
4
  fastllm/anthropic.py,sha256=fG20kOv3d3wGKQe8rD5pFWgZHKe-vT-9QJ3nPXh2twY,14615
5
- fastllm/chat.py,sha256=cibUSgm_8FtFLU8ilCsD8f_2JqvA-KrwJOvHWiDiMrw,29390
5
+ fastllm/chat.py,sha256=iVPHMbHFlUL8bIiUlYp4GqPhsCGn0pcl9M9LnKwj6lg,32921
6
6
  fastllm/gemini.py,sha256=E1EYMfV8IMpC_-WzlDrkhz_CJQmzmxvaVUucNgPOqSA,14947
7
7
  fastllm/openai_chat.py,sha256=wZ0HI0m9ipy9XVhqmYBXf-BmkVAOipUVwqu9NGB_rJU,10941
8
8
  fastllm/openai_responses.py,sha256=Nk5bfTCF2-a17nwvIsf-u39j539v9KIduVfScECItKk,13052
9
- fastllm/streaming.py,sha256=Ey0ufSYgJREvagMuHuTKBsxMxyS0S_StGuef8taY5PY,7235
10
- fastllm/types.py,sha256=ZL57Rhgfs_N-tj99B19-BUR8--MyWeyc2vnPjTcjpso,11473
9
+ fastllm/streaming.py,sha256=FYG4-rt7mfGKrUPxCMp_Z3kkxGLKKAVPev8ifD3YFlQ,6652
10
+ fastllm/types.py,sha256=2Tuf4AicfCSsf4AbkYWthPqyarGPru-LAkcPEWxQeLc,12192
11
11
  fastllm/specs/anthropic.json,sha256=VCgTjM2_HoDpCkeu3q_TCOEZLMHriJZLAG3LnDBAgGM,541035
12
12
  fastllm/specs/anthropic.yml,sha256=3S3NAKdXB1Nwp-Sn9Gmh4tBnwhGGhMO3DXkGqPXPUYs,724122
13
13
  fastllm/specs/gemini.json,sha256=zJGOdvZ2BvCiTENZt0-BDEvNBMl8h6EBmEskle_WBto,309331
14
14
  fastllm/specs/openai.with-code-samples.json,sha256=Kto19AW1u8MfxVDJ4cFVBIdZQOIyy8NWylswo57eABU,1995929
15
15
  fastllm/specs/openai.with-code-samples.yml,sha256=DlcWGdaeP4k7smVjt6UbyehJ-2XGU3rn3nCIBMDRfYU,2553630
16
16
  fastllm/specs/spec_manifest.json,sha256=9tVFwojXFnNqsAxQzCRTP1lgSIM0fXixnrXdv4Cmb0c,653
17
- python_fastllm-0.0.2.dist-info/METADATA,sha256=G1AwFAzZ0wS0-qgEkEi_mA3cbHCboq-bXVoHzWkhK7Y,19546
18
- python_fastllm-0.0.2.dist-info/WHEEL,sha256=aeYiig01lYGDzBgS8HxWXOg3uV61G9ijOsup-k9o1sk,91
19
- python_fastllm-0.0.2.dist-info/entry_points.txt,sha256=dq0chsiRjJYStCOXweFW9L6LpyMTjWu2AabKCbTSbuI,36
20
- python_fastllm-0.0.2.dist-info/top_level.txt,sha256=F8qodL7nEGUHGmzzqfhNKCTIr1i0D6cvudOnm3z7o0Y,8
21
- python_fastllm-0.0.2.dist-info/RECORD,,
17
+ python_fastllm-0.0.4.dist-info/METADATA,sha256=akEn90MvhkmJUpjbL8k28h9bDeOIbxOIUDmbDAIFqws,19546
18
+ python_fastllm-0.0.4.dist-info/WHEEL,sha256=aeYiig01lYGDzBgS8HxWXOg3uV61G9ijOsup-k9o1sk,91
19
+ python_fastllm-0.0.4.dist-info/entry_points.txt,sha256=dq0chsiRjJYStCOXweFW9L6LpyMTjWu2AabKCbTSbuI,36
20
+ python_fastllm-0.0.4.dist-info/top_level.txt,sha256=F8qodL7nEGUHGmzzqfhNKCTIr1i0D6cvudOnm3z7o0Y,8
21
+ python_fastllm-0.0.4.dist-info/RECORD,,