python-fastllm 0.0.3__tar.gz → 0.0.5__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (27) hide show
  1. {python_fastllm-0.0.3 → python_fastllm-0.0.5}/PKG-INFO +1 -1
  2. python_fastllm-0.0.5/fastllm/__init__.py +1 -0
  3. {python_fastllm-0.0.3 → python_fastllm-0.0.5}/fastllm/_modidx.py +14 -4
  4. {python_fastllm-0.0.3 → python_fastllm-0.0.5}/fastllm/acomplete.py +2 -4
  5. {python_fastllm-0.0.3 → python_fastllm-0.0.5}/fastllm/chat.py +111 -17
  6. {python_fastllm-0.0.3 → python_fastllm-0.0.5}/fastllm/streaming.py +52 -67
  7. {python_fastllm-0.0.3 → python_fastllm-0.0.5}/fastllm/types.py +8 -5
  8. {python_fastllm-0.0.3 → python_fastllm-0.0.5}/python_fastllm.egg-info/PKG-INFO +1 -1
  9. python_fastllm-0.0.3/fastllm/__init__.py +0 -1
  10. {python_fastllm-0.0.3 → python_fastllm-0.0.5}/README.md +0 -0
  11. {python_fastllm-0.0.3 → python_fastllm-0.0.5}/fastllm/anthropic.py +0 -0
  12. {python_fastllm-0.0.3 → python_fastllm-0.0.5}/fastllm/gemini.py +0 -0
  13. {python_fastllm-0.0.3 → python_fastllm-0.0.5}/fastllm/openai_chat.py +0 -0
  14. {python_fastllm-0.0.3 → python_fastllm-0.0.5}/fastllm/openai_responses.py +0 -0
  15. {python_fastllm-0.0.3 → python_fastllm-0.0.5}/fastllm/specs/anthropic.json +0 -0
  16. {python_fastllm-0.0.3 → python_fastllm-0.0.5}/fastllm/specs/anthropic.yml +0 -0
  17. {python_fastllm-0.0.3 → python_fastllm-0.0.5}/fastllm/specs/gemini.json +0 -0
  18. {python_fastllm-0.0.3 → python_fastllm-0.0.5}/fastllm/specs/openai.with-code-samples.json +0 -0
  19. {python_fastllm-0.0.3 → python_fastllm-0.0.5}/fastllm/specs/openai.with-code-samples.yml +0 -0
  20. {python_fastllm-0.0.3 → python_fastllm-0.0.5}/fastllm/specs/spec_manifest.json +0 -0
  21. {python_fastllm-0.0.3 → python_fastllm-0.0.5}/pyproject.toml +0 -0
  22. {python_fastllm-0.0.3 → python_fastllm-0.0.5}/python_fastllm.egg-info/SOURCES.txt +0 -0
  23. {python_fastllm-0.0.3 → python_fastllm-0.0.5}/python_fastllm.egg-info/dependency_links.txt +0 -0
  24. {python_fastllm-0.0.3 → python_fastllm-0.0.5}/python_fastllm.egg-info/entry_points.txt +0 -0
  25. {python_fastllm-0.0.3 → python_fastllm-0.0.5}/python_fastllm.egg-info/requires.txt +0 -0
  26. {python_fastllm-0.0.3 → python_fastllm-0.0.5}/python_fastllm.egg-info/top_level.txt +0 -0
  27. {python_fastllm-0.0.3 → python_fastllm-0.0.5}/setup.cfg +0 -0
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: python-fastllm
3
- Version: 0.0.3
3
+ Version: 0.0.5
4
4
  Author-email: Kerem Turgutlu <keremturgutlu@gmail.com>
5
5
  License: Apache-2.0
6
6
  Project-URL: Repository, https://github.com/AnswerDotAI/fastllm
@@ -0,0 +1 @@
1
+ __version__ = "0.0.5"
@@ -51,7 +51,11 @@ d = { 'settings': { 'branch': 'main',
51
51
  'fastllm.chat.AsyncStreamFormatter': ('chat.html#asyncstreamformatter', 'fastllm/chat.py'),
52
52
  'fastllm.chat.AsyncStreamFormatter.format_stream': ( 'chat.html#asyncstreamformatter.format_stream',
53
53
  'fastllm/chat.py'),
54
+ 'fastllm.chat.FenceToolStop': ('chat.html#fencetoolstop', 'fastllm/chat.py'),
55
+ 'fastllm.chat.FenceToolStop.__call__': ('chat.html#fencetoolstop.__call__', 'fastllm/chat.py'),
56
+ 'fastllm.chat.FenceToolStop.__init__': ('chat.html#fencetoolstop.__init__', 'fastllm/chat.py'),
54
57
  'fastllm.chat.FullResponse': ('chat.html#fullresponse', 'fastllm/chat.py'),
58
+ 'fastllm.chat.Msg.text': ('chat.html#msg.text', 'fastllm/chat.py'),
55
59
  'fastllm.chat.StopResponse': ('chat.html#stopresponse', 'fastllm/chat.py'),
56
60
  'fastllm.chat.StreamFormatter': ('chat.html#streamformatter', 'fastllm/chat.py'),
57
61
  'fastllm.chat.StreamFormatter.__init__': ('chat.html#streamformatter.__init__', 'fastllm/chat.py'),
@@ -65,6 +69,7 @@ d = { 'settings': { 'branch': 'main',
65
69
  'fastllm.chat.UsageStats.__repr__': ('chat.html#usagestats.__repr__', 'fastllm/chat.py'),
66
70
  'fastllm.chat.UsageStats.fmt': ('chat.html#usagestats.fmt', 'fastllm/chat.py'),
67
71
  'fastllm.chat.UsageStats.from_response': ('chat.html#usagestats.from_response', 'fastllm/chat.py'),
72
+ 'fastllm.chat._active_fence_langs': ('chat.html#_active_fence_langs', 'fastllm/chat.py'),
68
73
  'fastllm.chat._add_cache_control': ('chat.html#_add_cache_control', 'fastllm/chat.py'),
69
74
  'fastllm.chat._alite_call_func': ('chat.html#_alite_call_func', 'fastllm/chat.py'),
70
75
  'fastllm.chat._apply_cache_idxs': ('chat.html#_apply_cache_idxs', 'fastllm/chat.py'),
@@ -79,7 +84,10 @@ d = { 'settings': { 'branch': 'main',
79
84
  'fastllm.chat._lite_call_func': ('chat.html#_lite_call_func', 'fastllm/chat.py'),
80
85
  'fastllm.chat._mk_content': ('chat.html#_mk_content', 'fastllm/chat.py'),
81
86
  'fastllm.chat._mk_prefill': ('chat.html#_mk_prefill', 'fastllm/chat.py'),
87
+ 'fastllm.chat._mk_result_fence': ('chat.html#_mk_result_fence', 'fastllm/chat.py'),
82
88
  'fastllm.chat._mk_tool_result': ('chat.html#_mk_tool_result', 'fastllm/chat.py'),
89
+ 'fastllm.chat._split_fence_msgs': ('chat.html#_split_fence_msgs', 'fastllm/chat.py'),
90
+ 'fastllm.chat._split_msg_on_fences': ('chat.html#_split_msg_on_fences', 'fastllm/chat.py'),
83
91
  'fastllm.chat._srv_tc_summary': ('chat.html#_srv_tc_summary', 'fastllm/chat.py'),
84
92
  'fastllm.chat._srvtools': ('chat.html#_srvtools', 'fastllm/chat.py'),
85
93
  'fastllm.chat._tc_summary': ('chat.html#_tc_summary', 'fastllm/chat.py'),
@@ -93,6 +101,7 @@ d = { 'settings': { 'branch': 'main',
93
101
  'fastllm.chat.astream_with_complete': ('chat.html#astream_with_complete', 'fastllm/chat.py'),
94
102
  'fastllm.chat.cite_footnote': ('chat.html#cite_footnote', 'fastllm/chat.py'),
95
103
  'fastllm.chat.contents': ('chat.html#contents', 'fastllm/chat.py'),
104
+ 'fastllm.chat.extract_fence_call': ('chat.html#extract_fence_call', 'fastllm/chat.py'),
96
105
  'fastllm.chat.fmt2hist': ('chat.html#fmt2hist', 'fastllm/chat.py'),
97
106
  'fastllm.chat.lite_mk_func': ('chat.html#lite_mk_func', 'fastllm/chat.py'),
98
107
  'fastllm.chat.mk_msg': ('chat.html#mk_msg', 'fastllm/chat.py'),
@@ -101,9 +110,11 @@ d = { 'settings': { 'branch': 'main',
101
110
  'fastllm.chat.mk_tr_details': ('chat.html#mk_tr_details', 'fastllm/chat.py'),
102
111
  'fastllm.chat.postproc': ('chat.html#postproc', 'fastllm/chat.py'),
103
112
  'fastllm.chat.remove_cache_ckpts': ('chat.html#remove_cache_ckpts', 'fastllm/chat.py'),
113
+ 'fastllm.chat.run_fence_tool': ('chat.html#run_fence_tool', 'fastllm/chat.py'),
104
114
  'fastllm.chat.search_count': ('chat.html#search_count', 'fastllm/chat.py'),
105
115
  'fastllm.chat.split_tools': ('chat.html#split_tools', 'fastllm/chat.py'),
106
116
  'fastllm.chat.stop_reason': ('chat.html#stop_reason', 'fastllm/chat.py'),
117
+ 'fastllm.chat.stop_sequences': ('chat.html#stop_sequences', 'fastllm/chat.py'),
107
118
  'fastllm.chat.structured': ('chat.html#structured', 'fastllm/chat.py')},
108
119
  'fastllm.gemini': { 'fastllm.gemini._gem_filter_sch': ('gemini.html#_gem_filter_sch', 'fastllm/gemini.py'),
109
120
  'fastllm.gemini._gem_part_type': ('gemini.html#_gem_part_type', 'fastllm/gemini.py'),
@@ -208,13 +219,12 @@ d = { 'settings': { 'branch': 'main',
208
219
  'fastllm.streaming.PartAccum': ('streaming.html#partaccum', 'fastllm/streaming.py'),
209
220
  'fastllm.streaming.PartAccum.append': ('streaming.html#partaccum.append', 'fastllm/streaming.py'),
210
221
  'fastllm.streaming.PartAccum.finalize': ('streaming.html#partaccum.finalize', 'fastllm/streaming.py'),
222
+ 'fastllm.streaming.PartAccum.get_merged': ( 'streaming.html#partaccum.get_merged',
223
+ 'fastllm/streaming.py'),
211
224
  'fastllm.streaming._trim_delta': ('streaming.html#_trim_delta', 'fastllm/streaming.py'),
212
- 'fastllm.streaming.accum_completion': ('streaming.html#accum_completion', 'fastllm/streaming.py'),
213
- 'fastllm.streaming.completion_text': ('streaming.html#completion_text', 'fastllm/streaming.py'),
214
- 'fastllm.streaming.fake_stream': ('streaming.html#fake_stream', 'fastllm/streaming.py'),
215
225
  'fastllm.streaming.mk_acollect_stream': ('streaming.html#mk_acollect_stream', 'fastllm/streaming.py'),
216
226
  'fastllm.streaming.norm_and_yield': ('streaming.html#norm_and_yield', 'fastllm/streaming.py'),
217
- 'fastllm.streaming.stop_sequences': ('streaming.html#stop_sequences', 'fastllm/streaming.py')},
227
+ 'fastllm.streaming.stop_and_trim': ('streaming.html#stop_and_trim', 'fastllm/streaming.py')},
218
228
  'fastllm.types': { 'fastllm.types.APIRegistry': ('types.html#apiregistry', 'fastllm/types.py'),
219
229
  'fastllm.types.APIRegistry.__init__': ('types.html#apiregistry.__init__', 'fastllm/types.py'),
220
230
  'fastllm.types.APIRegistry.register': ('types.html#apiregistry.register', 'fastllm/types.py'),
@@ -18,7 +18,6 @@ from fastspec.errors import APIError
18
18
  from .types import *
19
19
  from .streaming import *
20
20
  from .openai_responses import *
21
- from .streaming import stop_sequences as _stop_sequences
22
21
  from .openai_chat import *
23
22
  from .anthropic import *
24
23
  from .gemini import *
@@ -30,7 +29,7 @@ oai_spec = SpecParser.from_openapi(dict2obj(json.loads((specs_path/'openai.with
30
29
  gem_spec = SpecParser.from_discovery(dict2obj(json.loads((specs_path/'gemini.json').read_text())))
31
30
 
32
31
  # %% ../nbs/06_acomplete.ipynb #32ee2546
33
- _codex_json = '~/.codex/auth.json', 'tokens','access_token'
32
+ _codex_json = '~/.codex/auth.json', ('tokens','access_token')
34
33
  vendor_mapping = {
35
34
  "openai": ('openai', 'https://api.openai.com/v1', 'OPENAI_API_KEY'),
36
35
  "anthropic": ('anthropic', 'https://api.anthropic.com', 'ANTHROPIC_API_KEY'),
@@ -63,7 +62,7 @@ def mk_client(model, vendor_name=None, api_name=None, api_key=None, base_url=Non
63
62
  if auth_fn.exists(): api_key = nested_idx(json.loads(auth_fn.read_text()), *keys)
64
63
  api_key = get_api_key(api_key, env_api_nm)
65
64
  except KeyError: raise ValueError(f"Unknown vendor '{vendor_name}', {err_msg}")
66
- elif api_name and base_url and api_key: vendor_name = ifnone(vendor_name, 'custom')
65
+ elif base_url and api_key: vendor_name, api_name = ifnone(vendor_name, 'custom'), ifnone(api_name, 'openai_chat')
67
66
  elif (api_name:=infer_api_name(model)): base_url, vendor_name = None, api_name
68
67
  else: raise ValueError(f"Model {model} can't be auto resolved, {err_msg}")
69
68
  api = api_registry.apis[api_name]
@@ -106,7 +105,6 @@ async def acomplete(msgs, model, api_name=None, vendor_name=None, api_key=None,
106
105
  "Unified completion across different APIs."
107
106
  cli, api_name, vendor_name = mk_client(model, vendor_name, api_name, api_key, base_url, xtra_hdrs)
108
107
  api = api_registry.apis[api_name]
109
- if stop_sequences: stop_callables = L(stop_callables) + [_stop_sequences(stop_sequences)]
110
108
  payload = api.mk_payload(msgs, model, stream=stream, stop_callables=stop_callables, **kwargs)
111
109
  payload = merge(payload, ifnone(xtra_body, {}))
112
110
  if vendor_name == 'codex':
@@ -4,10 +4,10 @@
4
4
 
5
5
  # %% auto #0
6
6
  __all__ = ['tool_dtls_tag', 're_tools', 'token_dtls_tag', 're_token', 'effort', 'remove_cache_ckpts', 'contents', 'stop_reason',
7
- 'mk_msg', 'split_tools', 'fmt2hist', 'mk_msgs', 'cite_footnote', 'postproc', 'lite_mk_func', 'ToolResponse',
8
- 'structured', 'StopResponse', 'FullResponse', 'search_count', 'UsageStats', 'AsyncChat', 'add_warning',
9
- 'astream_with_complete', 'mk_tr_details', 'mk_srv_tc_details', 'StreamFormatter', 'AsyncStreamFormatter',
10
- 'adisplay_stream']
7
+ 'mk_msg', 'FenceToolStop', 'extract_fence_call', 'stop_sequences', 'split_tools', 'fmt2hist', 'mk_msgs',
8
+ 'cite_footnote', 'postproc', 'lite_mk_func', 'ToolResponse', 'structured', 'StopResponse', 'FullResponse',
9
+ 'search_count', 'UsageStats', 'AsyncChat', 'add_warning', 'astream_with_complete', 'run_fence_tool',
10
+ 'mk_tr_details', 'mk_srv_tc_details', 'StreamFormatter', 'AsyncStreamFormatter', 'adisplay_stream']
11
11
 
12
12
  # %% ../nbs/07_chat.ipynb #d5a3bc1f
13
13
  import asyncio, base64, json, mimetypes, random, string, ast, warnings
@@ -94,6 +94,65 @@ token_dtls_tag = "<details class='token-usage-details'>"
94
94
  re_token = re.compile(fr"^{re.escape(token_dtls_tag)}<summary>.*?</summary>\n*\n*`.*?`\n*\n*</details>\n?",
95
95
  flags=re.DOTALL|re.MULTILINE)
96
96
 
97
+ # %% ../nbs/07_chat.ipynb #be998131
98
+ _fence_back = '`````'
99
+ _fence_re = re.compile(f'^{_fence_back}(py|bash)\n(.*?)\n{_fence_back}', re.DOTALL | re.MULTILINE)
100
+ _result_re = re.compile(f'\n{_fence_back}result\n(.*?)\n{_fence_back}\n', re.DOTALL)
101
+ _lang2tool = dict(py='python', bash='bash')
102
+
103
+ class FenceToolStop:
104
+ def __init__(self, langs): self.langs = langs
105
+ def __call__(self, text):
106
+ "Return trim result if complete fence detected in active lang"
107
+ m = _fence_re.search(text)
108
+ if m and m.group(1) in self.langs: return m.group(0)
109
+
110
+ # %% ../nbs/07_chat.ipynb #e6360e96
111
+ def extract_fence_call(text):
112
+ "Return (lang, code) if text ends with terminated py/bash fence, else None"
113
+ ms = list(_fence_re.finditer(text))
114
+ if not ms: return None
115
+ m = ms[-1]
116
+ if not text[m.end():].strip(): return m.group(1), m.group(2)
117
+
118
+ # %% ../nbs/07_chat.ipynb #215183bf
119
+ @patch(as_prop=True)
120
+ def text(self:Msg): return ''.join(p.text or '' for p in self.content if p.type == PartType.text)
121
+
122
+ # %% ../nbs/07_chat.ipynb #1de7e4d2
123
+ def _mk_result_fence(output): return f"\n{_fence_back}result\n{output}\n{_fence_back}\n"
124
+
125
+ def _split_msg_on_fences(msg):
126
+ "Split an assistant Msg on result fences, return list of Msgs"
127
+ if msg.role != 'assistant': return [msg]
128
+ if not _result_re.search(msg.text): return [msg]
129
+ res, asst_parts, tool_parts = [], [], []
130
+ for msg_part in msg.content:
131
+ if msg_part.type == PartType.thinking: asst_parts.append(msg_part)
132
+ elif msg_part.type == PartType.tool_use: tool_parts.append(msg_part)
133
+ elif parts := _result_re.split(msg_part.text or ''):
134
+ for i,p in enumerate(parts):
135
+ if not p: continue
136
+ if i % 2 == 0: res.append(Msg(role='assistant', content=asst_parts+[Part(type=PartType.text, text=p.strip())]))
137
+ else: res.append(Msg(role='user', content=[Part(type=PartType.text, text=_mk_result_fence(p))]))
138
+ if tool_parts: res.append(Msg(role='assistant', content=tool_parts))
139
+ return res
140
+
141
+ def _split_fence_msgs(msgs):
142
+ "Split all assistant msgs on result fences for wire protocol"
143
+ res = []
144
+ for m in msgs: res.extend(_split_msg_on_fences(m))
145
+ return res
146
+
147
+ # %% ../nbs/07_chat.ipynb #b161ca9e
148
+ def stop_sequences(seqs):
149
+ "Stop when any sequence appears in the accumulated completion text."
150
+ seqs = L(seqs)
151
+ def _stop(text):
152
+ for s in seqs:
153
+ if s in text: return text[:text.find(s)+len(s)]
154
+ return _stop
155
+
97
156
  # %% ../nbs/07_chat.ipynb #45ada210
98
157
  def _extract_tool_parts(text:str):
99
158
  "Extract (tool_use_part, tool_result_part) from <details> json block"
@@ -110,10 +169,13 @@ def split_tools(s):
110
169
  "Split formatted output into (text, summary, tooljson) chunks"
111
170
  return [(txt,summ,tj) for txt,_,summ,tj in chunked(re_tools.split(s.strip()), 4, pad=True)]
112
171
 
172
+ # %% ../nbs/07_chat.ipynb #44060a78
113
173
  def fmt2hist(outp:str)->list[Msg]:
114
174
  "Transform a formatted output string into fastllm canonical Msgs"
115
175
  if token_dtls_tag in outp: outp = re_token.sub('', outp)
116
- if tool_dtls_tag not in outp: return [Msg(role='assistant', content=[Part(type=PartType.text, text=outp.strip())])]
176
+ if tool_dtls_tag not in outp:
177
+ msg = Msg(role='assistant', content=[Part(type=PartType.text, text=outp.strip())])
178
+ return _split_msg_on_fences(msg)
117
179
  hist, asst_parts, tool_parts = [], [], []
118
180
  def flush():
119
181
  if tool_parts:
@@ -122,17 +184,18 @@ def fmt2hist(outp:str)->list[Msg]:
122
184
  asst_parts.clear(); tool_parts.clear()
123
185
  for txt,_,tj in split_tools(outp):
124
186
  if txt and txt.strip():
125
- if tool_parts: flush() # text after tool results => new assistant turn
187
+ if tool_parts: flush()
126
188
  asst_parts.append(Part(type=PartType.text, text=txt.strip()))
127
189
  if tj and (tp := _extract_tool_parts(tj)):
128
190
  asst_parts.append(tp[0])
129
191
  tool_parts.append(tp[1])
130
192
  flush()
131
193
  if asst_parts: hist.append(Msg(role='assistant', content=asst_parts))
132
- # TODO: Is this needed?
133
- # if hist and hist[-1].role == 'tool':
134
- # hist.append(Msg(role='assistant', content=[Part(type=PartType.text, text='.')]))
135
- return hist
194
+ result = []
195
+ for msg in hist:
196
+ if msg.role == 'assistant': result.extend(_split_msg_on_fences(msg))
197
+ else: result.append(msg)
198
+ return result
136
199
 
137
200
  # %% ../nbs/07_chat.ipynb #8de5ce8d
138
201
  def _apply_cache_idxs(msgs, cache_idxs=[-1], ttl=None):
@@ -309,6 +372,13 @@ def _inject_tool_reminder(msgs, reminder):
309
372
  msgs[i] = m
310
373
  return msgs
311
374
 
375
+ # %% ../nbs/07_chat.ipynb #e7eb2032
376
+ def _active_fence_langs(tool_schemas):
377
+ "Return set of active fence langs whose mapped tool is registered"
378
+ if not tool_schemas: return set()
379
+ names = {nested_idx(t, 'function', 'name') for t in tool_schemas}
380
+ return {lang for lang, tname in _lang2tool.items() if tname in names}
381
+
312
382
  # %% ../nbs/07_chat.ipynb #e9a14051
313
383
  class AsyncChat:
314
384
  def __init__(
@@ -352,6 +422,7 @@ class AsyncChat:
352
422
  self.hist = mk_msgs(self.hist, self.cache and 'claude' in self.model, cache_idxs, self.ttl)
353
423
  msgs = self.hist
354
424
  if prefill: msgs = self.hist + [Msg(role='assistant', content=[Part(PartType.text, prefill)])]
425
+ msgs = _split_fence_msgs(msgs)
355
426
  if self.tool_reminder: msgs = _inject_tool_reminder(msgs, self.tool_reminder)
356
427
  if 'deepseek' in self.model:
357
428
  # The `reasoning_content` in the thinking mode must be passed back to the API.
@@ -405,7 +476,7 @@ def _think_kw(model, think, vendor_name):
405
476
  def _prep_call(self:AsyncChat, prefill, search, max_tokens, kwargs, stream=False, think=None):
406
477
  "Prepare model info, prefill, search, and provider kwargs for a completion call"
407
478
  model_info = get_model_info(self.model, self.vendor_name)
408
- if max_tokens is None: max_tokens = model_info.get('max_output_tokens')
479
+ if max_tokens is None: max_tokens = ifnone(model_info.get('max_output_tokens'), 32_000)
409
480
  if not model_info.get("supports_assistant_prefill"): prefill = None
410
481
  if _has_search(model_info) and (s:=ifnone(search,self.search)):
411
482
  if 'web_search_options' not in kwargs: kwargs['web_search_options'] = {}
@@ -419,8 +490,12 @@ def _prep_call(self:AsyncChat, prefill, search, max_tokens, kwargs, stream=False
419
490
  if self.base_url: kwargs['base_url'] = self.base_url
420
491
  if self.extra_headers: kwargs['xtra_headers'] = self.extra_headers
421
492
  kwargs.update(_think_kw(self.model, think, self.vendor_name))
493
+ if (langs := _active_fence_langs(self.tool_schemas)):
494
+ if not any(isinstance(s, FenceToolStop) for s in kwargs.get('stop_callables', [])):
495
+ kwargs['stop_callables'] = kwargs.get('stop_callables', []) + [FenceToolStop(langs)]
422
496
  return prefill, max_tokens
423
497
 
498
+
424
499
  # %% ../nbs/07_chat.ipynb #07951b77
425
500
  @patch
426
501
  def print_hist(self:AsyncChat):
@@ -474,16 +549,27 @@ async def _call(self:AsyncChat, msg=None, prefill=None, temp=None, think=None, s
474
549
  self._track(res)
475
550
  yield res
476
551
 
552
+ toolloop, prompt = False, None
553
+ if (langs := _active_fence_langs(self.tool_schemas)):
554
+ if m := last(self.hist, lambda o: o.role == 'assistant'):
555
+ if fence := extract_fence_call(m.text):
556
+ lang, code = fence
557
+ out = await run_fence_tool(lang, code, self.ns)
558
+ for p in reversed(m.content):
559
+ if p.type == PartType.text: p.text += out; break
560
+ if stream: yield {'text': out}
561
+ toolloop = True
477
562
  if stcs:= _srvtools(res.tool_calls):
478
563
  for tc in stcs: yield tc
479
564
  if tcs := _usrtools(res.tool_calls):
480
565
  tres = await parallel_async(_alite_call_func, tcs, timeout=tc_timeout, n_workers=n_workers, pause=pause, **self.tcdict)
481
566
  tmsg = mk_tool_res_msg(tcs, tres)
482
- # TODO: We yield tool calls at the end with their results, fastllm doesn't yield streaming tool calls during streaming as once the collation is done for simplicity, but it can
483
567
  for r in tmsg.content: yield r
484
568
  self.hist.append(tmsg)
485
569
  if step>=max_steps-1 or _has_stop(tmsg.content): prompt,tool_choice,search = mk_msg(final_prompt),'none',False
486
- else: prompt = None
570
+ toolloop = True
571
+
572
+ if toolloop and step <= max_steps:
487
573
  try:
488
574
  async for result in self._call(
489
575
  prompt, prefill, temp, think, search, stream, max_steps, step+1,
@@ -495,6 +581,14 @@ async def _call(self:AsyncChat, msg=None, prefill=None, temp=None, think=None, s
495
581
  prompt, prefill, temp, think, search, stream, max_steps, step+1,
496
582
  final_prompt, tool_choice='none', **kwargs): yield result
497
583
 
584
+ # %% ../nbs/07_chat.ipynb #4dc002da
585
+ async def run_fence_tool(lang, code, ns):
586
+ "Run the mapped tool for `lang` with the code, return result fence"
587
+ tname = _lang2tool[lang]
588
+ arg = dict(code=code) if lang == 'py' else dict(command=code)
589
+ res = _mk_tool_result(await call_func_async(tname, arg, ns=ns, raise_on_err=False))
590
+ return _mk_result_fence(_trunc_str(str(res)))
591
+
498
592
  # %% ../nbs/07_chat.ipynb #1361515a
499
593
  @patch
500
594
  @delegates(AsyncChat._call)
@@ -596,15 +690,15 @@ class AsyncStreamFormatter(StreamFormatter):
596
690
  "Format the response stream for markdown display."
597
691
  async for o in rs: yield self.format_item(o)
598
692
 
599
- # %% ../nbs/07_chat.ipynb #f4345023
693
+ # %% ../nbs/07_chat.ipynb #944bcd25
600
694
  @delegates(AsyncStreamFormatter)
601
695
  async def adisplay_stream(rs, **kwargs):
602
696
  "Use IPython.display to markdown display the response stream."
603
697
  try: from IPython.display import display, Markdown
604
698
  except ModuleNotFoundError: raise ModuleNotFoundError("This function requires ipython. Please run `pip install ipython` to use.")
605
699
  fmt = AsyncStreamFormatter(**kwargs)
606
- md = ''
700
+ md,h = '',display(Markdown(' '), display_id=True)
607
701
  async for o in fmt.format_stream(rs):
608
- md+=o
609
- display(Markdown(md),clear=True)
702
+ md += o
703
+ if md: h.update(Markdown(md))
610
704
  return fmt
@@ -3,11 +3,10 @@
3
3
  # AUTOGENERATED! DO NOT EDIT! File to edit: ../nbs/01_streaming.ipynb.
4
4
 
5
5
  # %% auto #0
6
- __all__ = ['Delta', 'norm_and_yield', 'PartAccum', 'accum_completion', 'completion_text', 'stop_sequences', 'mk_acollect_stream',
7
- 'fake_stream']
6
+ __all__ = ['Delta', 'norm_and_yield', 'PartAccum', 'stop_and_trim', 'mk_acollect_stream']
8
7
 
9
8
  # %% ../nbs/01_streaming.ipynb #0df5c926
10
- import json
9
+ import json,copy
11
10
  from dataclasses import dataclass, field, fields
12
11
  from fastcore.utils import *
13
12
  from fastcore.meta import delegates
@@ -56,54 +55,44 @@ class PartAccum:
56
55
  # anthropic citations have matching idx
57
56
  self.parts[index].data['citations'].extend(citations or [])
58
57
 
59
- def finalize(self):
60
- for idx,tc in self.parts.items():
61
- if isinstance(tc, ToolCall):
62
- if isinstance(tc.arguments, str): tc.arguments = json.loads(tc.arguments) if tc.arguments else {}
63
- self.tool_calls.append(tc)
64
- data = {**tc.extra, 'id':tc.id, 'name':tc.name, 'arguments':tc.arguments, 'server':tc.server}
65
- self.parts[idx] = Part(type=PartType.tool_use, data=data)
58
+ def get_merged(self, with_tools=True):
59
+ tmp_parts = copy.deepcopy(self.parts)
60
+ tool_calls = []
61
+ if with_tools:
62
+ for idx,tc in tmp_parts.items():
63
+ if isinstance(tc, ToolCall):
64
+ if isinstance(tc.arguments, str): tc.arguments = json.loads(tc.arguments) if tc.arguments else {}
65
+ tool_calls.append(tc)
66
+ data = {**tc.extra, 'id':tc.id, 'name':tc.name, 'arguments':tc.arguments, 'server':tc.server}
67
+ tmp_parts[idx] = Part(type=PartType.tool_use, data=data)
66
68
 
67
69
  merged = []
68
- for p in self.parts.values():
70
+ for p in tmp_parts.values():
71
+ if isinstance(p, ToolCall) and not with_tools: continue
69
72
  if merged and merged[-1].type == p.type and p.type in (PartType.text, PartType.thinking): merged[-1].text += p.text
70
- else: merged.append(p)
71
- self.parts = merged
72
-
73
- # %% ../nbs/01_streaming.ipynb #0e8ca58e
74
- def accum_completion(pa, raw, fin, usg, deltas, model=None, api_name=None, vendor_name=None, delta=None):
75
- "Build a Completion snapshot from in-progress PartAccum state"
76
- parts = [p for p in pa.parts.values() if isinstance(p, Part)]
77
- if delta and delta.text:
78
- parts = parts.copy()
79
- if parts and parts[-1].type==PartType.text:
80
- p = parts[-1]
81
- parts[-1] = Part(type=p.type, text=(p.text or '') + delta.text, data=p.data)
82
- else: parts.append(Part(type=PartType.text, text=delta.text))
83
- return Completion(raw.get('model', model), Msg(role="assistant", content=parts),
84
- fin, usg, api_name=api_name, vendor_name=vendor_name, raw={'deltas':deltas})
85
-
86
- # %% ../nbs/01_streaming.ipynb #c28f706f
87
- def completion_text(c):
88
- "Combined text from a Completion."
89
- return ''.join(p.text or '' for p in c.message.content if p.type==PartType.text)
90
-
91
- # %% ../nbs/01_streaming.ipynb #b2b9f7ca
92
- def stop_sequences(seqs):
93
- "Stop when any sequence appears in the accumulated completion text."
94
- seqs = L(seqs)
95
- def _stop(c):
96
- txt = completion_text(c)
97
- for s in seqs:
98
- if s in txt: return s
99
- return _stop
73
+ else: merged.append(p)
74
+ return merged, tool_calls
75
+
76
+ def finalize(self):
77
+ self.parts, self.tool_calls = self.get_merged()
100
78
 
101
- # %% ../nbs/01_streaming.ipynb #931f686b
102
- def _trim_delta(d, cur, s):
79
+ # %% ../nbs/01_streaming.ipynb #f11ea80a
80
+ def _trim_delta(d, txt, s):
103
81
  "Trim `d.text` so accumulated text in `cur` stops just before stop sequence `s`."
104
- txt,dt = completion_text(cur), d.text or ''
105
- i = txt.find(s)
106
- if i>=0: d.text = dt[:max(0, i-(len(txt)-len(dt)))]
82
+ idx = len(txt) - (txt.find(s) + len(s))
83
+ if idx>0: d.text = d.text[:-idx]
84
+
85
+ # %% ../nbs/01_streaming.ipynb #efbf96d7
86
+ def stop_and_trim(part_accum, d, stop_callables):
87
+ 'Stop based on the accumulated text so far, and trim current delta'
88
+ parts,_ = part_accum.get_merged(with_tools=False)
89
+ prev = parts[-1].text if parts and parts[-1].type == PartType.text else ''
90
+ txt = prev + (d.text or '')
91
+ for f in stop_callables:
92
+ if res:=f(txt):
93
+ if isinstance(res, str): _trim_delta(d, txt, res)
94
+ return True
95
+ return False
107
96
 
108
97
  # %% ../nbs/01_streaming.ipynb #fc71790b
109
98
  async def mk_acollect_stream(it, index_fn, model=None, api_name=None, vendor_name=None, stop_callables=None):
@@ -120,18 +109,22 @@ async def mk_acollect_stream(it, index_fn, model=None, api_name=None, vendor_nam
120
109
  idx = _fidx(d, name, pt)
121
110
  part_accum.append(typ, idx, **(ret or {kw: val}))
122
111
  return ret or {name: val}
112
+ def _yield_parts(d):
113
+ for args in [('text',), ('thinking',), ('citations', 'text', 'citations')]:
114
+ if (r := _proc(d, args[0], pt=args[1] if len(args)>1 else None, kw=args[2] if len(args)>2 else 'txt')):
115
+ yield r
116
+ stop, stop_yielded = False, False
123
117
  async for d in it:
124
- stop = False
125
- if stop_callables:
126
- cur = accum_completion(part_accum, d.raw, fin, usg, deltas+[d], model, api_name=api_name, vendor_name=vendor_name, delta=d)
127
- for f in stop_callables:
128
- if res:=f(cur):
129
- if isinstance(res, str): _trim_delta(d, cur, res)
130
- stop = True
131
- break
132
- if (r:=_proc(d, 'text')): yield r
133
- if (r:=_proc(d, 'thinking')): yield r
134
- if (r:=_proc(d, 'citations', pt='text', kw='citations')): yield r
118
+ # Check stop condition and yield stop delta
119
+ stop = stop_and_trim(part_accum, d, stop_callables)
120
+ if stop and not stop_yielded:
121
+ for r in _yield_parts(d): yield r
122
+ stop_yielded = True
123
+ # If stop the remaining deltas are yielded as processing
124
+ if stop: yield {'thinking':'processing'}
125
+ else:
126
+ for r in _yield_parts(d): yield r
127
+ # Rest incl. tools, finish reason, usage is processed independently
135
128
  for tc in d.tool_calls:
136
129
  args = tc.arguments.get('_delta', tc.arguments)
137
130
  _proc(d, 'tool_use', ret=dict(id=tc.id, name=tc.name, arguments=args, server=tc.server, extra=tc.extra))
@@ -143,20 +136,12 @@ async def mk_acollect_stream(it, index_fn, model=None, api_name=None, vendor_nam
143
136
  if d.usage: usg = d.usage
144
137
  last_typ = typ
145
138
  deltas.append(d)
146
- if stop:
147
- fin = fin or FinishReason.stop
148
- await it.aclose()
149
- break
150
139
  part_accum.finalize()
151
- # need to recheck for tool calls post collation for streaming
152
140
  tcs = part_accum.tool_calls
153
- fin = FinishReason.tool_calls if fin==FinishReason.stop and any(~L(tcs).attrgot('server')) else fin
141
+ if stop: fin = FinishReason.stop
142
+ fin = FinishReason.tool_calls if fin==FinishReason.stop and any(~L(tcs).attrgot('server')) else fin # recheck tool calls post collation
154
143
  # tool calls and non-anthropic citations are yielded at the end
155
144
  yield Completion(d.raw.get('model', model),
156
145
  message=Msg(role="assistant", content=part_accum.parts),
157
146
  finish_reason=fin, usage=usg, tool_calls=tcs, api_name=api_name, vendor_name=vendor_name,
158
147
  raw={'deltas':deltas})
159
-
160
- # %% ../nbs/01_streaming.ipynb #f79d3b99
161
- async def fake_stream(*ss):
162
- for s in ss: yield Delta(text=s, raw={'model':'fake'})
@@ -240,7 +240,7 @@ def infer_api_name(model):
240
240
  def get_model_meta(model, vendor_name=None, tfm=noop):
241
241
  "Look up cost metadata for `model` from litellm price map, using `vendor_name` prefix if needed."
242
242
  vendor_name = ifnone(vendor_name, infer_api_name(model))
243
- mp = model_prices_meta()
243
+ mp, key = model_prices_meta(), ''
244
244
  if model in mp: key = model
245
245
  elif vendor_name=='gemini' and model.startswith('models/'): key = f"gemini/{model.removeprefix('models/')}"
246
246
  elif vendor_name: key = f"{vendor_name}/{model}"
@@ -268,13 +268,13 @@ codex_pricing = {
268
268
 
269
269
  _codex_overrides = {
270
270
  codex53spark: dict(
271
- supports_vision=False, supports_image_input=False, supports_web_search=True,
271
+ supports_vision=False, supports_image_input=False, supports_web_search=True, supports_reasoning=True,
272
272
  max_tokens=128000, max_input_tokens=128000, max_output_tokens=128000)
273
273
  }
274
274
 
275
275
  # %% ../nbs/00_types.ipynb #fbfdeb0a
276
- def get_model_info(mn, vendor_name=None):
277
- info = get_model_meta(mn, vendor_name)
276
+ def get_model_info(mn, vendor_name=None, strict=False):
277
+ info = get_model_meta(mn, 'chatgpt' if vendor_name=='codex' else vendor_name)
278
278
  # anthropic web search
279
279
  if 'search_context_cost_per_query' in info: info['supports_web_search'] = True
280
280
  # kimi
@@ -288,7 +288,7 @@ def get_model_info(mn, vendor_name=None):
288
288
  info['supports_web_search'] = True
289
289
  info.pop('mode', None)
290
290
  # codex updates
291
- if vendor_name == 'codex':
291
+ if vendor_name == 'codex':
292
292
  info = merge(info, codex_pricing)
293
293
  info |= _codex_overrides.get(mn, {})
294
294
  # deepseek v4
@@ -306,6 +306,9 @@ def get_model_info(mn, vendor_name=None):
306
306
  supports_prompt_caching=True, supports_native_streaming=True, supports_native_structured_output=True,
307
307
  max_tokens=1000000, max_input_tokens=1000000, max_output_tokens=65536,
308
308
  input_cost_per_token=0.5e-6, cache_read_input_token_cost=0.1e-6, output_cost_per_token=3.0e-6)
309
+
310
+ # unresolved models
311
+ if not info and not strict: info = info | codex_pricing
309
312
  return dict2obj(info)
310
313
 
311
314
  # %% ../nbs/00_types.ipynb #8bfca02d
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: python-fastllm
3
- Version: 0.0.3
3
+ Version: 0.0.5
4
4
  Author-email: Kerem Turgutlu <keremturgutlu@gmail.com>
5
5
  License: Apache-2.0
6
6
  Project-URL: Repository, https://github.com/AnswerDotAI/fastllm
@@ -1 +0,0 @@
1
- __version__ = "0.0.3"
File without changes
File without changes