python-fastllm 0.0.6__tar.gz → 0.0.8__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (28) hide show
  1. {python_fastllm-0.0.6 → python_fastllm-0.0.8}/PKG-INFO +1 -1
  2. python_fastllm-0.0.8/fastllm/__init__.py +1 -0
  3. {python_fastllm-0.0.6 → python_fastllm-0.0.8}/fastllm/_modidx.py +32 -0
  4. {python_fastllm-0.0.6 → python_fastllm-0.0.8}/fastllm/acomplete.py +17 -1
  5. {python_fastllm-0.0.6 → python_fastllm-0.0.8}/fastllm/anthropic.py +17 -13
  6. {python_fastllm-0.0.6 → python_fastllm-0.0.8}/fastllm/chat.py +203 -122
  7. python_fastllm-0.0.8/fastllm/codex.py +7 -0
  8. {python_fastllm-0.0.6 → python_fastllm-0.0.8}/fastllm/streaming.py +3 -1
  9. {python_fastllm-0.0.6 → python_fastllm-0.0.8}/fastllm/types.py +10 -6
  10. {python_fastllm-0.0.6 → python_fastllm-0.0.8}/python_fastllm.egg-info/PKG-INFO +1 -1
  11. {python_fastllm-0.0.6 → python_fastllm-0.0.8}/python_fastllm.egg-info/SOURCES.txt +1 -0
  12. python_fastllm-0.0.6/fastllm/__init__.py +0 -1
  13. {python_fastllm-0.0.6 → python_fastllm-0.0.8}/README.md +0 -0
  14. {python_fastllm-0.0.6 → python_fastllm-0.0.8}/fastllm/gemini.py +0 -0
  15. {python_fastllm-0.0.6 → python_fastllm-0.0.8}/fastllm/openai_chat.py +0 -0
  16. {python_fastllm-0.0.6 → python_fastllm-0.0.8}/fastllm/openai_responses.py +0 -0
  17. {python_fastllm-0.0.6 → python_fastllm-0.0.8}/fastllm/specs/anthropic.json +0 -0
  18. {python_fastllm-0.0.6 → python_fastllm-0.0.8}/fastllm/specs/anthropic.yml +0 -0
  19. {python_fastllm-0.0.6 → python_fastllm-0.0.8}/fastllm/specs/gemini.json +0 -0
  20. {python_fastllm-0.0.6 → python_fastllm-0.0.8}/fastllm/specs/openai.with-code-samples.json +0 -0
  21. {python_fastllm-0.0.6 → python_fastllm-0.0.8}/fastllm/specs/openai.with-code-samples.yml +0 -0
  22. {python_fastllm-0.0.6 → python_fastllm-0.0.8}/fastllm/specs/spec_manifest.json +0 -0
  23. {python_fastllm-0.0.6 → python_fastllm-0.0.8}/pyproject.toml +0 -0
  24. {python_fastllm-0.0.6 → python_fastllm-0.0.8}/python_fastllm.egg-info/dependency_links.txt +0 -0
  25. {python_fastllm-0.0.6 → python_fastllm-0.0.8}/python_fastllm.egg-info/entry_points.txt +0 -0
  26. {python_fastllm-0.0.6 → python_fastllm-0.0.8}/python_fastllm.egg-info/requires.txt +0 -0
  27. {python_fastllm-0.0.6 → python_fastllm-0.0.8}/python_fastllm.egg-info/top_level.txt +0 -0
  28. {python_fastllm-0.0.6 → python_fastllm-0.0.8}/setup.cfg +0 -0
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: python-fastllm
3
- Version: 0.0.6
3
+ Version: 0.0.8
4
4
  Author-email: Kerem Turgutlu <keremturgutlu@gmail.com>
5
5
  License: Apache-2.0
6
6
  Project-URL: Repository, https://github.com/AnswerDotAI/fastllm
@@ -0,0 +1 @@
1
+ __version__ = "0.0.8"
@@ -10,6 +10,7 @@ d = { 'settings': { 'branch': 'main',
10
10
  'fastllm.acomplete._classify_error': ('acomplete.html#_classify_error', 'fastllm/acomplete.py'),
11
11
  'fastllm.acomplete._classify_error_stream': ( 'acomplete.html#_classify_error_stream',
12
12
  'fastllm/acomplete.py'),
13
+ 'fastllm.acomplete._debug_print': ('acomplete.html#_debug_print', 'fastllm/acomplete.py'),
13
14
  'fastllm.acomplete._is_ctx_exceeded': ('acomplete.html#_is_ctx_exceeded', 'fastllm/acomplete.py'),
14
15
  'fastllm.acomplete.acomplete': ('acomplete.html#acomplete', 'fastllm/acomplete.py'),
15
16
  'fastllm.acomplete.mk_client': ('acomplete.html#mk_client', 'fastllm/acomplete.py')},
@@ -31,6 +32,7 @@ d = { 'settings': { 'branch': 'main',
31
32
  'fastllm.anthropic.denorm_tool_use': ('anthropic.html#denorm_tool_use', 'fastllm/anthropic.py'),
32
33
  'fastllm.anthropic.denorm_user': ('anthropic.html#denorm_user', 'fastllm/anthropic.py'),
33
34
  'fastllm.anthropic.denorm_web_search': ('anthropic.html#denorm_web_search', 'fastllm/anthropic.py'),
35
+ 'fastllm.anthropic.finalize_usage': ('anthropic.html#finalize_usage', 'fastllm/anthropic.py'),
34
36
  'fastllm.anthropic.get_hdrs': ('anthropic.html#get_hdrs', 'fastllm/anthropic.py'),
35
37
  'fastllm.anthropic.mk_payload': ('anthropic.html#mk_payload', 'fastllm/anthropic.py'),
36
38
  'fastllm.anthropic.norm_finish': ('anthropic.html#norm_finish', 'fastllm/anthropic.py'),
@@ -43,24 +45,53 @@ d = { 'settings': { 'branch': 'main',
43
45
  'fastllm.chat.AsyncChat.__call__': ('chat.html#asyncchat.__call__', 'fastllm/chat.py'),
44
46
  'fastllm.chat.AsyncChat.__init__': ('chat.html#asyncchat.__init__', 'fastllm/chat.py'),
45
47
  'fastllm.chat.AsyncChat._call': ('chat.html#asyncchat._call', 'fastllm/chat.py'),
48
+ 'fastllm.chat.AsyncChat._call_cbs': ('chat.html#asyncchat._call_cbs', 'fastllm/chat.py'),
46
49
  'fastllm.chat.AsyncChat._prep_call': ('chat.html#asyncchat._prep_call', 'fastllm/chat.py'),
47
50
  'fastllm.chat.AsyncChat._prep_msg': ('chat.html#asyncchat._prep_msg', 'fastllm/chat.py'),
48
51
  'fastllm.chat.AsyncChat._track': ('chat.html#asyncchat._track', 'fastllm/chat.py'),
52
+ 'fastllm.chat.AsyncChat.add_cb': ('chat.html#asyncchat.add_cb', 'fastllm/chat.py'),
53
+ 'fastllm.chat.AsyncChat.add_cbs': ('chat.html#asyncchat.add_cbs', 'fastllm/chat.py'),
49
54
  'fastllm.chat.AsyncChat.print_hist': ('chat.html#asyncchat.print_hist', 'fastllm/chat.py'),
50
55
  'fastllm.chat.AsyncChat.tcdict': ('chat.html#asyncchat.tcdict', 'fastllm/chat.py'),
51
56
  'fastllm.chat.AsyncStreamFormatter': ('chat.html#asyncstreamformatter', 'fastllm/chat.py'),
52
57
  'fastllm.chat.AsyncStreamFormatter.format_stream': ( 'chat.html#asyncstreamformatter.format_stream',
53
58
  'fastllm/chat.py'),
59
+ 'fastllm.chat.ChatCallback': ('chat.html#chatcallback', 'fastllm/chat.py'),
60
+ 'fastllm.chat.ChatCallback.__repr__': ('chat.html#chatcallback.__repr__', 'fastllm/chat.py'),
61
+ 'fastllm.chat.DeepseekMsgsCallback': ('chat.html#deepseekmsgscallback', 'fastllm/chat.py'),
62
+ 'fastllm.chat.DeepseekMsgsCallback.after_msgs': ( 'chat.html#deepseekmsgscallback.after_msgs',
63
+ 'fastllm/chat.py'),
64
+ 'fastllm.chat.DeepseekPrefillCallback': ('chat.html#deepseekprefillcallback', 'fastllm/chat.py'),
65
+ 'fastllm.chat.DeepseekPrefillCallback.before_acomplete': ( 'chat.html#deepseekprefillcallback.before_acomplete',
66
+ 'fastllm/chat.py'),
67
+ 'fastllm.chat.FenceToolCallback': ('chat.html#fencetoolcallback', 'fastllm/chat.py'),
68
+ 'fastllm.chat.FenceToolCallback.after_msgs': ('chat.html#fencetoolcallback.after_msgs', 'fastllm/chat.py'),
69
+ 'fastllm.chat.FenceToolCallback.before_acomplete': ( 'chat.html#fencetoolcallback.before_acomplete',
70
+ 'fastllm/chat.py'),
71
+ 'fastllm.chat.FenceToolCallback.before_tool_calls': ( 'chat.html#fencetoolcallback.before_tool_calls',
72
+ 'fastllm/chat.py'),
54
73
  'fastllm.chat.FenceToolStop': ('chat.html#fencetoolstop', 'fastllm/chat.py'),
55
74
  'fastllm.chat.FenceToolStop.__call__': ('chat.html#fencetoolstop.__call__', 'fastllm/chat.py'),
56
75
  'fastllm.chat.FenceToolStop.__init__': ('chat.html#fencetoolstop.__init__', 'fastllm/chat.py'),
57
76
  'fastllm.chat.FullResponse': ('chat.html#fullresponse', 'fastllm/chat.py'),
58
77
  'fastllm.chat.Msg.text': ('chat.html#msg.text', 'fastllm/chat.py'),
78
+ 'fastllm.chat.StopReasonCallback': ('chat.html#stopreasoncallback', 'fastllm/chat.py'),
79
+ 'fastllm.chat.StopReasonCallback.after_acomplete': ( 'chat.html#stopreasoncallback.after_acomplete',
80
+ 'fastllm/chat.py'),
59
81
  'fastllm.chat.StopResponse': ('chat.html#stopresponse', 'fastllm/chat.py'),
82
+ 'fastllm.chat.StopSequencesCallback': ('chat.html#stopsequencescallback', 'fastllm/chat.py'),
83
+ 'fastllm.chat.StopSequencesCallback.__init__': ( 'chat.html#stopsequencescallback.__init__',
84
+ 'fastllm/chat.py'),
85
+ 'fastllm.chat.StopSequencesCallback.before_acomplete': ( 'chat.html#stopsequencescallback.before_acomplete',
86
+ 'fastllm/chat.py'),
60
87
  'fastllm.chat.StreamFormatter': ('chat.html#streamformatter', 'fastllm/chat.py'),
61
88
  'fastllm.chat.StreamFormatter.__init__': ('chat.html#streamformatter.__init__', 'fastllm/chat.py'),
62
89
  'fastllm.chat.StreamFormatter.format_item': ('chat.html#streamformatter.format_item', 'fastllm/chat.py'),
63
90
  'fastllm.chat.StreamFormatter.format_stream': ('chat.html#streamformatter.format_stream', 'fastllm/chat.py'),
91
+ 'fastllm.chat.ToolReminderCallback': ('chat.html#toolremindercallback', 'fastllm/chat.py'),
92
+ 'fastllm.chat.ToolReminderCallback.__init__': ('chat.html#toolremindercallback.__init__', 'fastllm/chat.py'),
93
+ 'fastllm.chat.ToolReminderCallback.after_msgs': ( 'chat.html#toolremindercallback.after_msgs',
94
+ 'fastllm/chat.py'),
64
95
  'fastllm.chat.ToolResponse': ('chat.html#toolresponse', 'fastllm/chat.py'),
65
96
  'fastllm.chat.UsageStats': ('chat.html#usagestats', 'fastllm/chat.py'),
66
97
  'fastllm.chat.UsageStats.__add__': ('chat.html#usagestats.__add__', 'fastllm/chat.py'),
@@ -116,6 +147,7 @@ d = { 'settings': { 'branch': 'main',
116
147
  'fastllm.chat.stop_reason': ('chat.html#stop_reason', 'fastllm/chat.py'),
117
148
  'fastllm.chat.stop_sequences': ('chat.html#stop_sequences', 'fastllm/chat.py'),
118
149
  'fastllm.chat.structured': ('chat.html#structured', 'fastllm/chat.py')},
150
+ 'fastllm.codex': {},
119
151
  'fastllm.gemini': { 'fastllm.gemini._gem_filter_sch': ('gemini.html#_gem_filter_sch', 'fastllm/gemini.py'),
120
152
  'fastllm.gemini._gem_part_type': ('gemini.html#_gem_part_type', 'fastllm/gemini.py'),
121
153
  'fastllm.gemini.acollect_stream': ('gemini.html#acollect_stream', 'fastllm/gemini.py'),
@@ -3,7 +3,7 @@
3
3
  # AUTOGENERATED! DO NOT EDIT! File to edit: ../nbs/06_acomplete.ipynb.
4
4
 
5
5
  # %% auto #0
6
- __all__ = ['specs_path', 'ant_spec', 'oai_spec', 'gem_spec', 'vendor_mapping', 'api2spec', 'mk_client',
6
+ __all__ = ['specs_path', 'ant_spec', 'oai_spec', 'gem_spec', 'vendor_mapping', 'api2spec', 'defaults', 'mk_client',
7
7
  'ContextWindowExceededError', 'acomplete']
8
8
 
9
9
  # %% ../nbs/06_acomplete.ipynb #f2f57253
@@ -98,6 +98,21 @@ async def _classify_error_stream(gen):
98
98
  async for x in gen: yield x
99
99
  except APIError as e: raise _classify_error(e) from e
100
100
 
101
+ # %% ../nbs/06_acomplete.ipynb #f626a4e1
102
+ defaults = SimpleNamespace(debug_mode=False)
103
+
104
+ def _debug_print(model, api_name, vendor_name, payload, func):
105
+ "Pretty-print acomplete inputs when defaults.debug_mode is set"
106
+ from pprint import pformat
107
+ p = dict(payload)
108
+ if defaults.debug_mode == 'brief' and 'tools' in p:
109
+ p['tools'] = '; '.join(o.get('name', o.get('type', o)) for o in p['tools'])
110
+ print('━'*60)
111
+ print(f"\033[1;36mfastllm debug\033[0m model={model} vendor={vendor_name} api={api_name} base_url={func.base_url} path={func.path}")
112
+ print('─'*60)
113
+ print(f"\033[1;33mpayload:\033[0m\n{pformat(p, width=120, sort_dicts=False)}")
114
+ print('━'*60)
115
+
101
116
  # %% ../nbs/06_acomplete.ipynb #2379ec94
102
117
  @delegates(payload_kwargs)
103
118
  async def acomplete(msgs, model, api_name=None, vendor_name=None, api_key=None, base_url=None, xtra_body=None, xtra_hdrs=None,
@@ -114,6 +129,7 @@ async def acomplete(msgs, model, api_name=None, vendor_name=None, api_key=None,
114
129
  if vendor_name == 'deepseek' and 'v4' in model: payload['messages'][-1]['prefix'] = True
115
130
  if vendor_name == 'moonshot' and 'kimi' in model: payload['messages'][-1]['partial'] = True
116
131
  func = attrgetter(api.op_path[stream])(cli)
132
+ if defaults.debug_mode: _debug_print(model, api_name, vendor_name, payload, func)
117
133
  try: resp = await func(**payload)
118
134
  except APIError as e: raise _classify_error(e) from e
119
135
  if stream: return _classify_error_stream(api.acollect_stream(resp, model=model, vendor_name=vendor_name, stop_callables=stop_callables))
@@ -1,7 +1,7 @@
1
1
  # AUTOGENERATED! DO NOT EDIT! File to edit: ../nbs/04_anthropic.ipynb.
2
2
 
3
3
  # %% auto #0
4
- __all__ = ['ant_tc_types', 'api_ns', 'norm_tool_call', 'norm_tool_calls', 'norm_usage', 'norm_finish', 'norm_parts',
4
+ __all__ = ['ant_tc_types', 'norm_tool_call', 'norm_tool_calls', 'norm_usage', 'finalize_usage', 'norm_finish', 'norm_parts',
5
5
  'norm_sse_event', 'delta_index_fn', 'acollect_stream', 'denorm_tool_use', 'denorm_assistant', 'denorm_tool',
6
6
  'denorm_msgs', 'denorm_tool_schs', 'denorm_tool_choice', 'denorm_reasoning', 'denorm_web_search',
7
7
  'denorm_system', 'denorm_user', 'denorm_image', 'denorm_file', 'denorm_tool_result', 'mk_payload',
@@ -42,7 +42,18 @@ def norm_usage(resp):
42
42
  pt = int(usg.get("input_tokens", 0) or 0) + cached + cache_creation
43
43
  ct = int(usg.get("output_tokens", 0) or 0)
44
44
  return Usage(prompt_tokens=pt, completion_tokens=ct, total_tokens=pt + ct,
45
- cached_tokens=cached, cache_creation_tokens=cache_creation, raw=usg)
45
+ cached_tokens=cached, cache_creation_tokens=cache_creation, reasoning_tokens=0, raw=usg)
46
+
47
+ def finalize_usage(usg, parts):
48
+ "Adjust usage using finalized Anthropic content parts."
49
+ if not usg: return usg
50
+ rc = '\n'.join(p.text or '' for p in parts if p.type == PartType.thinking)
51
+ ct = int(usg.raw.get('output_tokens', usg.completion_tokens) or 0)
52
+ rt = min(int(len(rc.split())*1.5), ct) if rc else 0
53
+ res = Usage(prompt_tokens=usg.prompt_tokens, completion_tokens=ct-rt, total_tokens=usg.prompt_tokens+ct,
54
+ cached_tokens=usg.cached_tokens, cache_creation_tokens=usg.cache_creation_tokens, reasoning_tokens=rt, raw=usg.raw)
55
+ print(res)
56
+ return res
46
57
 
47
58
  # %% ../nbs/04_anthropic.ipynb #7a8b1f8f
48
59
  def norm_finish(resp, tcs=None):
@@ -197,7 +208,7 @@ def denorm_reasoning(v):
197
208
  def denorm_web_search(v):
198
209
  "Map canonical web_search_options to Anthropic hosted web_search tool."
199
210
  _max_uses = {"low": 1, "medium": 5, "high": 10}
200
- t = {"type": "web_search_20260209", "name": "web_search"}
211
+ t = {"type": "web_search_20250305", "name": "web_search"}
201
212
  if (typ := (v or {}).get("type")): t["type"] = typ
202
213
  if (s := (v or {}).get("search_context_size")):
203
214
  t["max_uses"] = _max_uses.get(s, 5)
@@ -286,13 +297,6 @@ def cost(usage, m):
286
297
  return cost
287
298
 
288
299
  # %% ../nbs/04_anthropic.ipynb #f7c0b989
289
- api_ns = dict(norm_tool_calls=norm_tool_calls,
290
- norm_parts=norm_parts,
291
- norm_finish=norm_finish,
292
- norm_usage=norm_usage,
293
- acollect_stream=acollect_stream,
294
- mk_payload=mk_payload,
295
- cost=cost,
296
- get_hdrs=get_hdrs,
297
- op_path=('messages.messages_post','messages.messages_post'))
298
- api_registry.register('anthropic', **api_ns)
300
+ api_registry.register('anthropic', norm_tool_calls=norm_tool_calls, norm_parts=norm_parts, norm_finish=norm_finish, norm_usage=norm_usage,
301
+ finalize_usage=finalize_usage, acollect_stream=acollect_stream, mk_payload=mk_payload, cost=cost, get_hdrs=get_hdrs,
302
+ op_path=('messages.messages_post','messages.messages_post'))
@@ -4,10 +4,12 @@
4
4
 
5
5
  # %% auto #0
6
6
  __all__ = ['tool_dtls_tag', 're_tools', 'token_dtls_tag', 're_token', 'effort', 'remove_cache_ckpts', 'contents', 'stop_reason',
7
- 'mk_msg', 'FenceToolStop', 'extract_fence_call', 'stop_sequences', 'split_tools', 'fmt2hist', 'mk_msgs',
8
- 'cite_footnote', 'postproc', 'lite_mk_func', 'ToolResponse', 'structured', 'StopResponse', 'FullResponse',
9
- 'search_count', 'UsageStats', 'AsyncChat', 'add_warning', 'astream_with_complete', 'run_fence_tool',
10
- 'mk_tr_details', 'mk_srv_tc_details', 'StreamFormatter', 'AsyncStreamFormatter', 'adisplay_stream']
7
+ 'mk_msg', 'FenceToolStop', 'extract_fence_call', 'split_tools', 'fmt2hist', 'mk_msgs', 'cite_footnote',
8
+ 'postproc', 'lite_mk_func', 'ToolResponse', 'structured', 'StopResponse', 'FullResponse', 'search_count',
9
+ 'UsageStats', 'AsyncChat', 'astream_with_complete', 'ChatCallback', 'DeepseekMsgsCallback',
10
+ 'DeepseekPrefillCallback', 'add_warning', 'StopReasonCallback', 'run_fence_tool', 'FenceToolCallback',
11
+ 'ToolReminderCallback', 'stop_sequences', 'StopSequencesCallback', 'mk_tr_details', 'mk_srv_tc_details',
12
+ 'StreamFormatter', 'AsyncStreamFormatter', 'adisplay_stream']
11
13
 
12
14
  # %% ../nbs/07_chat.ipynb #d5a3bc1f
13
15
  import asyncio, base64, json, mimetypes, random, string, ast, warnings
@@ -55,7 +57,7 @@ def remove_cache_ckpts(msg):
55
57
  return msg
56
58
 
57
59
  def _mk_content(o):
58
- if isinstance(o, str): return Part(type=PartType.text, text=o.strip())
60
+ if isinstance(o, str): return Part(type=PartType.text, text=o)
59
61
  elif isinstance(o,bytes): return _bytes2content(o)
60
62
  return o
61
63
 
@@ -87,16 +89,16 @@ def mk_msg(
87
89
  return _add_cache_control(msg, ttl=ttl) if cache else msg
88
90
 
89
91
  # %% ../nbs/07_chat.ipynb #db466e1c
90
- tool_dtls_tag = "<details class='tool-usage-details'>"
92
+ tool_dtls_tag = "<details class='tool-usage-details' markdown='1'>"
91
93
  re_tools = re.compile(fr"^({tool_dtls_tag}\n*(?:<summary>(?P<summary>.*?)</summary>\n*)?\n*```json\n+(.*?)\n+```\n+</details>)",
92
94
  flags=re.DOTALL|re.MULTILINE)
93
- token_dtls_tag = "<details class='token-usage-details'>"
94
- re_token = re.compile(fr"^{re.escape(token_dtls_tag)}<summary>.*?</summary>\n*\n*`.*?`\n*\n*</details>\n?",
95
+ token_dtls_tag = "<details class='token-usage-details' markdown='1'>"
96
+ re_token = re.compile(fr"^{re.escape(token_dtls_tag)}\n*<summary>.*?</summary>\n*\n*`.*?`\n*\n*</details>\n?",
95
97
  flags=re.DOTALL|re.MULTILINE)
96
98
 
97
99
  # %% ../nbs/07_chat.ipynb #be998131
98
100
  _fence_back = '`````'
99
- _fence_re = re.compile(f'^{_fence_back}(py|bash)\n(.*?)\n{_fence_back}', re.DOTALL | re.MULTILINE)
101
+ _fence_re = re.compile(f'^{_fence_back}(py|bash)\n(.*?)\n{_fence_back}$', re.DOTALL | re.MULTILINE)
100
102
  _result_re = re.compile(f'\n{_fence_back}result\n(.*?)\n{_fence_back}\n', re.DOTALL)
101
103
  _lang2tool = dict(py='python', bash='bash')
102
104
 
@@ -144,15 +146,6 @@ def _split_fence_msgs(msgs):
144
146
  for m in msgs: res.extend(_split_msg_on_fences(m))
145
147
  return res
146
148
 
147
- # %% ../nbs/07_chat.ipynb #b161ca9e
148
- def stop_sequences(seqs):
149
- "Stop when any sequence appears in the accumulated completion text."
150
- seqs = L(seqs)
151
- def _stop(text):
152
- for s in seqs:
153
- if s in text: return text[:text.find(s)+len(s)]
154
- return _stop
155
-
156
149
  # %% ../nbs/07_chat.ipynb #45ada210
157
150
  def _extract_tool_parts(text:str):
158
151
  "Extract (tool_use_part, tool_result_part) from <details> json block"
@@ -216,7 +209,9 @@ def mk_msgs(
216
209
  "Create a list of fastllm canonical Msgs."
217
210
  if not msgs: return []
218
211
  if not isinstance(msgs, list): msgs = [msgs]
219
- msgs = L(msgs).map(lambda m: fmt2hist(m) if isinstance(m,str) and tool_dtls_tag in m else [m]).concat()
212
+ msgs = L(msgs).map(lambda m:
213
+ fmt2hist(m) if isinstance(m,str) and (tool_dtls_tag in m or token_dtls_tag in m) else [m]
214
+ ).concat()
220
215
  res, role = [], 'user'
221
216
  for m in msgs:
222
217
  res.append(msg := remove_cache_ckpts(mk_msg(m, role=role)))
@@ -304,9 +299,11 @@ def _has_stop(tres_parts): return any(isinstance(p.text, StopResponse) for p in
304
299
  def _trunc_str(s, mx=2000, skip=10, replace="TRUNCATED"):
305
300
  "Truncate `s` to `mx` chars max, adding `replace` if truncated"
306
301
  if not isinstance(s, str): s = str(s)
307
- if len(s)>2 and s[0]=='𝍁' and s[-1]=='𝍁': return s[1:-1]
302
+ s = s.rstrip()
303
+ if len(s)>2 and s[0]=='𝍁' and s[-1]=='𝍁':
304
+ s = s[1:-1]
305
+ if replace: return s
308
306
  if isinstance_str(s, ('FullResponse','Safe')): return s
309
- s = str(s).strip()
310
307
  if len(s)<=mx: return s
311
308
  s = s[skip:mx-skip]
312
309
  ss = s.split(' ')
@@ -362,24 +359,7 @@ class UsageStats:
362
359
  summ = f"${self.cost:.4f}" if self.cost else f"{self.total_tokens:,} tokens"
363
360
  return f"\n\n{token_dtls_tag}<summary>{summ}</summary>\n\n`{self!r}`\n\n</details>\n"
364
361
 
365
- # %% ../nbs/07_chat.ipynb #67fd51cb
366
- def _inject_tool_reminder(msgs, reminder):
367
- i = len(msgs)
368
- while i>0 and msgs[i-1].role=='tool': i-=1
369
- if i>=len(msgs): return msgs
370
- msgs,m = list(msgs),msgs[i]
371
- m.content.append(Part(type=PartType.text, text=reminder))
372
- msgs[i] = m
373
- return msgs
374
-
375
- # %% ../nbs/07_chat.ipynb #e7eb2032
376
- def _active_fence_langs(tool_schemas):
377
- "Return set of active fence langs whose mapped tool is registered"
378
- if not tool_schemas: return set()
379
- names = {nested_idx(t, 'function', 'name') for t in tool_schemas}
380
- return {lang for lang, tname in _lang2tool.items() if tname in names}
381
-
382
- # %% ../nbs/07_chat.ipynb #e9a14051
362
+ # %% ../nbs/07_chat.ipynb #cb3d7e77
383
363
  class AsyncChat:
384
364
  def __init__(
385
365
  self,
@@ -399,7 +379,8 @@ class AsyncChat:
399
379
  base_url=None, # API base url when model can't be resolved or vendor_name is not known
400
380
  extra_headers=None, # Extra HTTP headers for custom providers
401
381
  markup=0, # Cost markup multiplier (e.g. 0.5 for 50%)
402
- tool_reminder=None, # Prepended as a block to the first trailing tool result (transient)
382
+ cbs:list=None, # Chat callbacks
383
+ default_cbs=True # Whether to include default callbacks
403
384
  ):
404
385
  "LiteLLM chat client."
405
386
  self.model = model
@@ -408,7 +389,10 @@ class AsyncChat:
408
389
  elif ns is None: ns = globals()
409
390
  self.tool_schemas = [lite_mk_func(t) for t in tools] if tools else None
410
391
  self.use = UsageStats()
411
- store_attr()
392
+ store_attr(but='cbs')
393
+ self.cbs = L()
394
+ if default_cbs: self.add_cbs(defaults.chat_callbacks)
395
+ self.add_cbs(cbs)
412
396
 
413
397
  def _prep_msg(self, msg=None, prefill=None):
414
398
  "Prepare the system prompt and messages list for the API call"
@@ -422,14 +406,6 @@ class AsyncChat:
422
406
  self.hist = mk_msgs(self.hist, self.cache and 'claude' in self.model, cache_idxs, self.ttl)
423
407
  msgs = self.hist
424
408
  if prefill: msgs = self.hist + [Msg(role='assistant', content=[Part(PartType.text, prefill)])]
425
- msgs = _split_fence_msgs(msgs)
426
- if self.tool_reminder: msgs = _inject_tool_reminder(msgs, self.tool_reminder)
427
- if 'deepseek' in self.model:
428
- # The `reasoning_content` in the thinking mode must be passed back to the API.
429
- for m in msgs:
430
- if m.role=='assistant':
431
- if not any(p.type==PartType.thinking for p in m.content):
432
- m.content.append(Part(PartType.thinking, ''))
433
409
  return sp, msgs
434
410
 
435
411
  @property
@@ -439,39 +415,35 @@ class AsyncChat:
439
415
  u.cost *= (1 + self.markup)
440
416
  self.use += u
441
417
 
418
+ def add_cb(self, cb):
419
+ if isinstance(cb, type): cb = cb()
420
+ cb.chat = self
421
+ self.cbs.append(cb)
422
+ return self
423
+
424
+ def add_cbs(self, cbs):
425
+ if cbs is None: return self
426
+ L(cbs).map(self.add_cb)
427
+ return self
428
+
442
429
  # %% ../nbs/07_chat.ipynb #2e469ea1
443
430
  def _srvtools(tcs): return L(tcs).filter(lambda o: o.server) if tcs else None
444
431
  def _usrtools(tcs): return L(tcs).filter(lambda o: not o.server) if tcs else None
445
432
 
446
- # %% ../nbs/07_chat.ipynb #a2e70fbb
447
- def add_warning(r, msg):
448
- wrn = Part(PartType.text, f"<warning>{msg}</warning>")
449
- if r.message.content: r.message.content.append(wrn)
450
- else: r.message.content = [wrn]
451
-
452
- # %% ../nbs/07_chat.ipynb #e16195f9
453
- def _handle_stop_reason(res):
454
- "Returns (action, warning_msg) - action is 'warning', 'pause', or None"
455
- sr = stop_reason(res)
456
- if sr == 'length': return 'warning', 'Response was cut off at token limit.'
457
- if sr == 'refusal': return 'warning', 'AI server provider content filter was applied to this request'
458
- if sr == 'content_filter': return 'warning', 'AI server provider content filter was applied to this request.'
459
- # if sr == 'pause_turn': return 'retry', None # TODO: Not a canonical finish reason
460
- return None, None
461
-
462
433
  # %% ../nbs/07_chat.ipynb #19b87f53
463
434
  def _think_kw(model, think, vendor_name):
464
435
  if not think: return {}
465
436
  if 'opus-4-7' in model:
466
437
  e = 'xhigh' if think=='h' else effort.get(think)
467
- return dict(thinking={"type":"adaptive", "display":"summarized"}, output_config={"effort":e})
438
+ eff = dict(thinking={"type":"adaptive", "display":"summarized"}, output_config={"effort":e})
439
+ return dict(reasoning_effort=eff)
468
440
  try: xhigh = get_model_info(model, vendor_name).get('supports_xhigh_reasoning_effort')
469
441
  except: xhigh = False
470
442
  eff = effort.get(think) if think!='x' else 'xhigh' if xhigh else 'high'
471
443
  if vendor_name == 'codex': return dict(reasoning_effort={'effort':eff, 'summary':'auto'})
472
444
  return dict(reasoning_effort=eff)
473
445
 
474
- # %% ../nbs/07_chat.ipynb #b3f28523
446
+ # %% ../nbs/07_chat.ipynb #06e898fd
475
447
  @patch
476
448
  def _prep_call(self:AsyncChat, prefill, search, max_tokens, kwargs, stream=False, think=None):
477
449
  "Prepare model info, prefill, search, and provider kwargs for a completion call"
@@ -483,19 +455,14 @@ def _prep_call(self:AsyncChat, prefill, search, max_tokens, kwargs, stream=False
483
455
  kwargs['web_search_options']['search_context_size'] = effort[s]
484
456
  if self.vendor_name == 'codex': kwargs['web_search_options']['type'] = 'web_search'
485
457
  else: kwargs.pop('web_search_options', None)
486
- # kwargs['additional_drop_params'] = ['temperature'] # TODO: What is this for?
487
458
  if self.api_name: kwargs['api_name'] = self.api_name
488
459
  if self.vendor_name: kwargs['vendor_name'] = self.vendor_name
489
460
  if self.api_key: kwargs['api_key'] = self.api_key
490
461
  if self.base_url: kwargs['base_url'] = self.base_url
491
462
  if self.extra_headers: kwargs['xtra_headers'] = self.extra_headers
492
463
  kwargs.update(_think_kw(self.model, think, self.vendor_name))
493
- if (langs := _active_fence_langs(self.tool_schemas)):
494
- if not any(isinstance(s, FenceToolStop) for s in kwargs.get('stop_callables', [])):
495
- kwargs['stop_callables'] = kwargs.get('stop_callables', []) + [FenceToolStop(langs)]
496
464
  return prefill, max_tokens
497
465
 
498
-
499
466
  # %% ../nbs/07_chat.ipynb #07951b77
500
467
  @patch
501
468
  def print_hist(self:AsyncChat):
@@ -515,50 +482,35 @@ async def astream_with_complete(self, agen, postproc=noop):
515
482
  if not isinstance(chunk, Completion): yield postproc(chunk)
516
483
  self.value = chunk
517
484
 
518
- # %% ../nbs/07_chat.ipynb #baf28c01
485
+ # %% ../nbs/07_chat.ipynb #a049cf52
519
486
  @patch
520
487
  @delegates(acomplete)
521
488
  async def _call(self:AsyncChat, msg=None, prefill=None, temp=None, think=None, search=None, stream=False, max_steps=2, step=1,
522
489
  final_prompt=None, tool_choice=None, max_tokens=None, n_workers=8, pause=0.001, tc_timeout=7200, **kwargs):
523
490
  if step>max_steps+1: return
524
- prefill, max_tokens = self._prep_call(prefill, search, max_tokens, kwargs, stream=stream, think=think)
525
- sp,msgs = self._prep_msg(msg,prefill)
526
- if prefill and self.vendor_name == 'deepseek' and self.model in ("deepseek-v4-flash", "deepseek-v4-pro"):
527
- kwargs['base_url'] = 'https://api.deepseek.com/beta'
528
- # TODO: num_retries=2 is this needed? If so add.
529
- # caching removed, cache checkpoints are added for Anthropic and other providers do implicit caching
530
- res = await acomplete(msgs, self.model, system=sp, stream=stream,
491
+ self.prefill, max_tokens = self._prep_call(prefill, search, max_tokens, kwargs, stream=stream, think=think)
492
+ self.turn_sysp, self.turn_msgs = self._prep_msg(msg, prefill)
493
+ async for o in self._call_cbs('after_msgs'): yield o
494
+
495
+ self.turn_kwargs, self.stream = kwargs, stream
496
+ async for o in self._call_cbs('before_acomplete'): yield o
497
+ res = await acomplete(self.turn_msgs, self.model, system=self.turn_sysp, stream=stream,
531
498
  tools=self.tool_schemas, tool_choice=tool_choice, max_tokens=int(max_tokens),
532
- temperature=None if think else ifnone(temp,self.temp), **kwargs)
499
+ temperature=None if think else ifnone(temp,self.temp), **self.turn_kwargs)
533
500
  if stream:
534
- if prefill: yield _mk_prefill(prefill)
501
+ if self.prefill: yield _mk_prefill(self.prefill)
535
502
  res = astream_with_complete(res, postproc=postproc)
536
503
  async for chunk in res: yield chunk
537
504
  res = res.value
538
- m=contents(res)
539
- if prefill: m.content[0].text = prefill + m.content[0].text
540
- self.hist.append(m)
541
- action, msg = _handle_stop_reason(res)
542
- if action == 'warning': add_warning(res, msg)
543
- elif action == 'retry':
544
- async for result in self._call(
545
- None, prefill, temp, think, search, stream, max_steps, step,
546
- final_prompt, tool_choice, **kwargs): yield result
547
- self.hist.pop(-2) # rm incomplete srvtoolu_
548
- return
549
- self._track(res)
505
+ self.turn_res, self.turn_msg = res, contents(res)
506
+ if self.prefill: self.turn_msg.content[0].text = self.prefill + self.turn_msg.content[0].text
507
+ self.hist.append(self.turn_msg)
508
+ async for o in self._call_cbs('after_acomplete'): yield o
509
+ self._track(self.turn_res)
550
510
  yield res
551
511
 
552
- toolloop, prompt = False, None
553
- if (langs := _active_fence_langs(self.tool_schemas)):
554
- if m := last(self.hist, lambda o: o.role == 'assistant'):
555
- if fence := extract_fence_call(m.text):
556
- lang, code = fence
557
- out = await run_fence_tool(lang, code, self.ns)
558
- for p in reversed(m.content):
559
- if p.type == PartType.text: p.text += out; break
560
- if stream: yield {'text': out}
561
- toolloop = True
512
+ self.toolloop, self.prompt, tmsg = False, None, None
513
+ async for o in self._call_cbs('before_tool_calls'): yield o
562
514
  if stcs:= _srvtools(res.tool_calls):
563
515
  for tc in stcs: yield tc
564
516
  if tcs := _usrtools(res.tool_calls):
@@ -566,29 +518,23 @@ async def _call(self:AsyncChat, msg=None, prefill=None, temp=None, think=None, s
566
518
  tmsg = mk_tool_res_msg(tcs, tres)
567
519
  for r in tmsg.content: yield r
568
520
  self.hist.append(tmsg)
569
- if step>=max_steps-1 or _has_stop(tmsg.content): prompt,tool_choice,search = mk_msg(final_prompt),'none',False
570
- toolloop = True
521
+ if step>=max_steps-1 or _has_stop(tmsg.content): self.prompt,tool_choice,search = mk_msg(final_prompt),'none',False
522
+ self.toolloop = True
571
523
 
572
- if toolloop and step <= max_steps:
524
+ async for o in self._call_cbs('after_tool_calls'): yield o
525
+ if self.toolloop and step <= max_steps:
573
526
  try:
574
527
  async for result in self._call(
575
- prompt, prefill, temp, think, search, stream, max_steps, step+1,
528
+ self.prompt, None, temp, think, search, stream, max_steps, step+1,
576
529
  final_prompt, tool_choice=tool_choice, **kwargs): yield result
577
530
  except ContextWindowExceededError:
578
- for p in tmsg.content:
579
- if len(p.text)>1000: p.text = _cwe_msg + _trunc_str(p.text, mx=1000)
531
+ if tmsg is not None:
532
+ for p in tmsg.content:
533
+ if len(p.text)>1000: p.text = _cwe_msg + _trunc_str(p.text, mx=1000)
580
534
  async for result in self._call(
581
- prompt, prefill, temp, think, search, stream, max_steps, step+1,
535
+ self.prompt, None, temp, think, search, stream, max_steps, step+1,
582
536
  final_prompt, tool_choice='none', **kwargs): yield result
583
537
 
584
- # %% ../nbs/07_chat.ipynb #4dc002da
585
- async def run_fence_tool(lang, code, ns):
586
- "Run the mapped tool for `lang` with the code, return result fence"
587
- tname = _lang2tool[lang]
588
- arg = dict(code=code) if lang == 'py' else dict(command=code)
589
- res = _mk_tool_result(await call_func_async(tname, arg, ns=ns, raise_on_err=False))
590
- return _mk_result_fence(_trunc_str(str(res)))
591
-
592
538
  # %% ../nbs/07_chat.ipynb #1361515a
593
539
  @patch
594
540
  @delegates(AsyncChat._call)
@@ -611,11 +557,146 @@ async def __call__(
611
557
  async for res in result_gen: pass
612
558
  return res # normal chat behavior only return last msg
613
559
 
560
+ # %% ../nbs/07_chat.ipynb #a4bbd2ce
561
+ class ChatCallback(GetAttr):
562
+ order,_default,chat,run = 0,'chat',None,True
563
+ def __repr__(self): return type(self).__name__
564
+
565
+ # %% ../nbs/07_chat.ipynb #2f02135c
566
+ @patch
567
+ async def _call_cbs(self:AsyncChat, event):
568
+ for cb in self.cbs.sorted('order'):
569
+ if cb.run and hasattr(cb, event):
570
+ async for o in getattr(cb, event)(): yield o
571
+
572
+ # %% ../nbs/07_chat.ipynb #cf3f064c
573
+ class DeepseekMsgsCallback(ChatCallback):
574
+ order = 10
575
+ async def after_msgs(self):
576
+ if 'deepseek' not in self.model: return
577
+ for m in self.turn_msgs:
578
+ if m.role=='assistant' and not any(p.type==PartType.thinking for p in m.content):
579
+ m.content.append(Part(PartType.thinking, ''))
580
+ if False: yield
581
+
582
+ # %% ../nbs/07_chat.ipynb #14baac3e
583
+ class DeepseekPrefillCallback(ChatCallback):
584
+ order = 10
585
+ async def before_acomplete(self):
586
+ if self.prefill and self.vendor_name == 'deepseek' and self.model.startswith("deepseek-"):
587
+ self.chat.turn_kwargs['base_url'] = 'https://api.deepseek.com/beta'
588
+ if False: yield
589
+
590
+ # %% ../nbs/07_chat.ipynb #ce47dc4a
591
+ def add_warning(r, msg):
592
+ wrn = Part(PartType.text, f"<warning>{msg}</warning>")
593
+ if r.message.content: r.message.content.append(wrn)
594
+ else: r.message.content = [wrn]
595
+
596
+ # %% ../nbs/07_chat.ipynb #b6ea161d
597
+ def _handle_stop_reason(res):
598
+ "Returns (action, warning_msg) - action is 'warning', 'pause', or None"
599
+ sr = stop_reason(res)
600
+ if sr == 'length': return 'warning', 'Response was cut off at token limit.'
601
+ if sr == 'refusal': return 'warning', 'AI server provider content filter was applied to this request'
602
+ if sr == 'content_filter': return 'warning', 'AI server provider content filter was applied to this request.'
603
+ # if sr == 'pause_turn': return 'retry', None # TODO: Not a canonical finish reason
604
+ return None, None
605
+
606
+ # %% ../nbs/07_chat.ipynb #daf876f4
607
+ class StopReasonCallback(ChatCallback):
608
+ order = 40
609
+ async def after_acomplete(self):
610
+ action, msg = _handle_stop_reason(self.turn_res)
611
+ if action == 'warning': add_warning(self.chat.turn_res, msg)
612
+ if False: yield
613
+
614
+ # %% ../nbs/07_chat.ipynb #aa7630b2
615
+ def _active_fence_langs(tool_schemas):
616
+ "Return set of active fence langs whose mapped tool is registered"
617
+ if not tool_schemas: return set()
618
+ names = {nested_idx(t, 'function', 'name') for t in tool_schemas}
619
+ return {lang for lang, tname in _lang2tool.items() if tname in names}
620
+
621
+ # %% ../nbs/07_chat.ipynb #72274cdc
622
+ async def run_fence_tool(lang, code, ns):
623
+ "Run the mapped tool for `lang` with the code, return result fence"
624
+ tname = _lang2tool[lang]
625
+ arg = dict(code=code) if lang == 'py' else dict(command=code)
626
+ res = _mk_tool_result(await call_func_async(tname, arg, ns=ns, raise_on_err=False))
627
+ return _mk_result_fence(_trunc_str(str(res)))
628
+
629
+ # %% ../nbs/07_chat.ipynb #740ee3a4
630
+ class FenceToolCallback(ChatCallback):
631
+ order = 20
632
+
633
+ async def after_msgs(self):
634
+ self.chat.turn_msgs = _split_fence_msgs(self.turn_msgs)
635
+ if False: yield
636
+
637
+ async def before_acomplete(self):
638
+ if langs := _active_fence_langs(self.tool_schemas):
639
+ if not any(isinstance(s, FenceToolStop) for s in self.turn_kwargs.get('stop_callables', [])):
640
+ self.chat.turn_kwargs['stop_callables'] = self.turn_kwargs.get('stop_callables', []) + [FenceToolStop(langs)]
641
+ if False: yield
642
+
643
+ async def before_tool_calls(self):
644
+ if not _active_fence_langs(self.tool_schemas): return
645
+ if m := last(self.hist, lambda o: o.role == 'assistant'):
646
+ if fence := extract_fence_call(m.text):
647
+ lang, code = fence
648
+ out = await run_fence_tool(lang, code, self.ns)
649
+ for p in reversed(m.content):
650
+ if p.type == PartType.text: p.text += out; break
651
+ self.chat.toolloop = True
652
+ if self.stream: yield {'text': out}
653
+
654
+ # %% ../nbs/07_chat.ipynb #1897aea2
655
+ def _inject_tool_reminder(msgs, reminder):
656
+ i = len(msgs)
657
+ while i>0 and msgs[i-1].role=='tool': i-=1
658
+ if i>=len(msgs): return msgs
659
+ msgs,m = list(msgs),msgs[i]
660
+ m.content.append(Part(type=PartType.text, text=reminder))
661
+ msgs[i] = m
662
+ return msgs
663
+
664
+ # %% ../nbs/07_chat.ipynb #1b404e0f
665
+ _tool_reminder = '\n<system-reminder>After *EVERY* tool call result, no matter how small, briefly summarise in prose what you found, before continuing or calling another tool.</system-reminder>'
666
+
667
+ # %% ../nbs/07_chat.ipynb #fab308b7
668
+ class ToolReminderCallback(ChatCallback):
669
+ order = 30
670
+ def __init__(self, tool_reminder=_tool_reminder): store_attr()
671
+ async def after_msgs(self):
672
+ self.chat.turn_msgs = _inject_tool_reminder(self.turn_msgs, self.tool_reminder)
673
+ if False: yield
674
+
675
+ # %% ../nbs/07_chat.ipynb #423caa31
676
+ def stop_sequences(seqs):
677
+ "Stop when any sequence appears in the accumulated completion text."
678
+ seqs = L(seqs)
679
+ def _stop(text):
680
+ for s in seqs:
681
+ if s in text: return text[:text.find(s)+len(s)]
682
+ return _stop
683
+
684
+ # %% ../nbs/07_chat.ipynb #663eee29
685
+ class StopSequencesCallback(ChatCallback):
686
+ order = 30
687
+ def __init__(self, seqs): self.seqs = L(seqs)
688
+ async def before_acomplete(self):
689
+ self.chat.turn_kwargs['stop_callables'] = self.turn_kwargs.get('stop_callables', []) + [stop_sequences(self.seqs)]
690
+ if False: yield
691
+
692
+ # %% ../nbs/07_chat.ipynb #318ee856
693
+ defaults.chat_callbacks = [DeepseekPrefillCallback, FenceToolCallback, ToolReminderCallback, StopReasonCallback]
694
+
614
695
  # %% ../nbs/07_chat.ipynb #115fd94f
615
696
  def _trunc_param(v, mx=40):
616
697
  "Truncate and escape param value for display"
617
698
  tp = _trunc_str(str(v).replace('`', r'\`'), mx=mx, replace=None, skip=0)
618
- try: return ast.literal_eval(tp)
699
+ try: return dumps(tp, ensure_ascii=False)
619
700
  except Exception: return repr(tp).replace('\\\\', '\\')
620
701
 
621
702
  # %% ../nbs/07_chat.ipynb #80c0abdb
@@ -645,7 +726,7 @@ def mk_tr_details(tr, mx=2000):
645
726
  'call':{'function': tr.data['name'], 'arguments': args},
646
727
  'result':_trunc_content(tr.text, mx=mx),}
647
728
  summ = f"<summary>{_tc_summary(tr)}</summary>"
648
- return f"\n\n{tool_dtls_tag}\n{summ}\n\n```json\n{dumps(res, indent=2)}\n```\n\n</details>\n\n"
729
+ return f"\n\n{tool_dtls_tag}\n{summ}\n\n```json\n{dumps(res, indent=2, ensure_ascii=False)}\n```\n\n</details>\n\n"
649
730
 
650
731
  # %% ../nbs/07_chat.ipynb #3049001c
651
732
  def mk_srv_tc_details(tc, mx=2000):
@@ -653,7 +734,7 @@ def mk_srv_tc_details(tc, mx=2000):
653
734
  args = {k:_trunc_str(v, mx=mx*5) for k,v in tc.arguments.items()}
654
735
  res = {'id':tc.id, 'server':True, 'call':{'function': tc.name, 'arguments': args}, 'result':"Server tool call executed."}
655
736
  summ = f"<summary>{_srv_tc_summary(tc)}</summary>"
656
- return f"\n\n{tool_dtls_tag}\n{summ}\n\n```json\n{dumps(res, indent=2)}\n```\n\n</details>\n\n"
737
+ return f"\n\n{tool_dtls_tag}\n{summ}\n\n```json\n{dumps(res, indent=2, ensure_ascii=False)}\n```\n\n</details>\n\n"
657
738
 
658
739
  # %% ../nbs/07_chat.ipynb #f0d984ec
659
740
  # status_re = re.compile(r'^- ⏳ <code>(.*)</code> ⏳$|^🧠+$', re.MULTILINE) # TODO: Need to yield tool calls as they are done collated in fastllm `_acollect_stream`
@@ -0,0 +1,7 @@
1
+ # AUTOGENERATED! DO NOT EDIT! File to edit: ../nbs/05_codex.ipynb.
2
+
3
+ # %% auto #0
4
+ __all__ = []
5
+
6
+ # %% ../nbs/05_codex.ipynb #a1d088d2
7
+ from fastcore.utils import *
@@ -116,7 +116,7 @@ async def mk_acollect_stream(it, index_fn, model=None, api_name=None, vendor_nam
116
116
  stop, stop_yielded = False, False
117
117
  async for d in it:
118
118
  # Check stop condition and yield stop delta
119
- stop = stop_and_trim(part_accum, d, stop_callables)
119
+ if not stop: stop = stop_and_trim(part_accum, d, stop_callables)
120
120
  if stop and not stop_yielded:
121
121
  for r in _yield_parts(d): yield r
122
122
  stop_yielded = True
@@ -138,6 +138,7 @@ async def mk_acollect_stream(it, index_fn, model=None, api_name=None, vendor_nam
138
138
  deltas.append(d)
139
139
  part_accum.finalize()
140
140
  tcs = part_accum.tool_calls
141
+ if api_name: usg = api_registry.apis[api_name].finalize_usage(usg, part_accum.parts)
141
142
  if stop: fin = FinishReason.stop
142
143
  fin = FinishReason.tool_calls if fin==FinishReason.stop and any(~L(tcs).attrgot('server')) else fin # recheck tool calls post collation
143
144
  # tool calls and non-anthropic citations are yielded at the end
@@ -145,3 +146,4 @@ async def mk_acollect_stream(it, index_fn, model=None, api_name=None, vendor_nam
145
146
  message=Msg(role="assistant", content=part_accum.parts),
146
147
  finish_reason=fin, usage=usg, tool_calls=tcs, api_name=api_name, vendor_name=vendor_name,
147
148
  raw={'deltas':deltas})
149
+
@@ -40,7 +40,7 @@ def _repr_markdown_(self: Part):
40
40
 
41
41
  {body}
42
42
 
43
- <details>
43
+ <details markdown='1'>
44
44
 
45
45
  - data: `{data}`
46
46
 
@@ -80,7 +80,7 @@ def _repr_markdown_(self: ToolCall):
80
80
  extra = _trunc_strs(self.extra)
81
81
  return f"""🔧 **{self.name}**(`{self.arguments}`)
82
82
 
83
- <details>
83
+ <details markdown='1'>
84
84
 
85
85
  - id: `{self.id}`
86
86
  - server: `{self.server}`
@@ -135,7 +135,7 @@ def _repr_markdown_(self: Completion):
135
135
  det_str = '\n- '.join(details)
136
136
  return f"""{content}
137
137
 
138
- <details>
138
+ <details markdown='1'>
139
139
 
140
140
  - {det_str}
141
141
 
@@ -147,25 +147,29 @@ FinishReason = str_enum('finish_reason', 'stop', 'tool_calls', 'length', 'conten
147
147
  # %% ../nbs/00_types.ipynb #fc681c52
148
148
  class APIRegistry:
149
149
  def __init__(self): self.apis = {}
150
- def register(self, name, **kwargs): self.apis[name] = SimpleNamespace(**kwargs)
150
+ def register(self, name, finalize_usage=noop, **kwargs): self.apis[name] = SimpleNamespace(finalize_usage=finalize_usage, **kwargs)
151
151
 
152
152
  api_registry = APIRegistry()
153
153
 
154
+
154
155
  # %% ../nbs/00_types.ipynb #d58a5f96
155
156
  def mk_completion(resp, model, api_name, vendor_name):
156
157
  "Normalize an api response into Completion."
157
158
  api = api_registry.apis[api_name]
158
159
  tcs = api.norm_tool_calls(resp)
160
+ parts = api.norm_parts(resp)
161
+ usg = api.finalize_usage(api.norm_usage(resp), parts)
159
162
  return Completion(
160
163
  model=resp.get("model") or model,
161
- message=Msg(role="assistant", content=api.norm_parts(resp)),
164
+ message=Msg(role="assistant", content=parts),
162
165
  finish_reason=api.norm_finish(resp, tcs),
163
- usage=api.norm_usage(resp),
166
+ usage=usg,
164
167
  tool_calls=tcs,
165
168
  api_name=api_name,
166
169
  vendor_name=vendor_name,
167
170
  raw=resp)
168
171
 
172
+
169
173
  # %% ../nbs/00_types.ipynb #d5322db5
170
174
  def mk_tool_res_msg(tool_calls:list[ToolCall], results:list[str|list]):
171
175
  'A util to prepare parallel tool call with str or media list results'
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: python-fastllm
3
- Version: 0.0.6
3
+ Version: 0.0.8
4
4
  Author-email: Kerem Turgutlu <keremturgutlu@gmail.com>
5
5
  License: Apache-2.0
6
6
  Project-URL: Repository, https://github.com/AnswerDotAI/fastllm
@@ -5,6 +5,7 @@ fastllm/_modidx.py
5
5
  fastllm/acomplete.py
6
6
  fastllm/anthropic.py
7
7
  fastllm/chat.py
8
+ fastllm/codex.py
8
9
  fastllm/gemini.py
9
10
  fastllm/openai_chat.py
10
11
  fastllm/openai_responses.py
@@ -1 +0,0 @@
1
- __version__ = "0.0.6"
File without changes
File without changes