python-fastllm 0.0.9__tar.gz → 0.0.10__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (28) hide show
  1. {python_fastllm-0.0.9 → python_fastllm-0.0.10}/PKG-INFO +1 -1
  2. python_fastllm-0.0.10/fastllm/__init__.py +1 -0
  3. {python_fastllm-0.0.9 → python_fastllm-0.0.10}/fastllm/_modidx.py +8 -3
  4. {python_fastllm-0.0.9 → python_fastllm-0.0.10}/fastllm/acomplete.py +39 -10
  5. {python_fastllm-0.0.9 → python_fastllm-0.0.10}/fastllm/anthropic.py +3 -1
  6. {python_fastllm-0.0.9 → python_fastllm-0.0.10}/fastllm/chat.py +33 -36
  7. {python_fastllm-0.0.9 → python_fastllm-0.0.10}/fastllm/streaming.py +21 -7
  8. {python_fastllm-0.0.9 → python_fastllm-0.0.10}/fastllm/types.py +25 -11
  9. {python_fastllm-0.0.9 → python_fastllm-0.0.10}/python_fastllm.egg-info/PKG-INFO +1 -1
  10. python_fastllm-0.0.9/fastllm/__init__.py +0 -1
  11. {python_fastllm-0.0.9 → python_fastllm-0.0.10}/README.md +0 -0
  12. {python_fastllm-0.0.9 → python_fastllm-0.0.10}/fastllm/codex.py +0 -0
  13. {python_fastllm-0.0.9 → python_fastllm-0.0.10}/fastllm/gemini.py +0 -0
  14. {python_fastllm-0.0.9 → python_fastllm-0.0.10}/fastllm/openai_chat.py +0 -0
  15. {python_fastllm-0.0.9 → python_fastllm-0.0.10}/fastllm/openai_responses.py +0 -0
  16. {python_fastllm-0.0.9 → python_fastllm-0.0.10}/fastllm/specs/anthropic.json +0 -0
  17. {python_fastllm-0.0.9 → python_fastllm-0.0.10}/fastllm/specs/anthropic.yml +0 -0
  18. {python_fastllm-0.0.9 → python_fastllm-0.0.10}/fastllm/specs/gemini.json +0 -0
  19. {python_fastllm-0.0.9 → python_fastllm-0.0.10}/fastllm/specs/openai.with-code-samples.json +0 -0
  20. {python_fastllm-0.0.9 → python_fastllm-0.0.10}/fastllm/specs/openai.with-code-samples.yml +0 -0
  21. {python_fastllm-0.0.9 → python_fastllm-0.0.10}/fastllm/specs/spec_manifest.json +0 -0
  22. {python_fastllm-0.0.9 → python_fastllm-0.0.10}/pyproject.toml +0 -0
  23. {python_fastllm-0.0.9 → python_fastllm-0.0.10}/python_fastllm.egg-info/SOURCES.txt +0 -0
  24. {python_fastllm-0.0.9 → python_fastllm-0.0.10}/python_fastllm.egg-info/dependency_links.txt +0 -0
  25. {python_fastllm-0.0.9 → python_fastllm-0.0.10}/python_fastllm.egg-info/entry_points.txt +0 -0
  26. {python_fastllm-0.0.9 → python_fastllm-0.0.10}/python_fastllm.egg-info/requires.txt +0 -0
  27. {python_fastllm-0.0.9 → python_fastllm-0.0.10}/python_fastllm.egg-info/top_level.txt +0 -0
  28. {python_fastllm-0.0.9 → python_fastllm-0.0.10}/setup.cfg +0 -0
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: python-fastllm
3
- Version: 0.0.9
3
+ Version: 0.0.10
4
4
  Author-email: Kerem Turgutlu <keremturgutlu@gmail.com>
5
5
  License: Apache-2.0
6
6
  Project-URL: Repository, https://github.com/AnswerDotAI/fastllm
@@ -0,0 +1 @@
1
+ __version__ = "0.0.10"
@@ -12,6 +12,9 @@ d = { 'settings': { 'branch': 'main',
12
12
  'fastllm/acomplete.py'),
13
13
  'fastllm.acomplete._debug_print': ('acomplete.html#_debug_print', 'fastllm/acomplete.py'),
14
14
  'fastllm.acomplete._is_ctx_exceeded': ('acomplete.html#_is_ctx_exceeded', 'fastllm/acomplete.py'),
15
+ 'fastllm.acomplete._raise_if_done': ('acomplete.html#_raise_if_done', 'fastllm/acomplete.py'),
16
+ 'fastllm.acomplete._retry_call': ('acomplete.html#_retry_call', 'fastllm/acomplete.py'),
17
+ 'fastllm.acomplete._retry_stream': ('acomplete.html#_retry_stream', 'fastllm/acomplete.py'),
15
18
  'fastllm.acomplete.acomplete': ('acomplete.html#acomplete', 'fastllm/acomplete.py'),
16
19
  'fastllm.acomplete.mk_client': ('acomplete.html#mk_client', 'fastllm/acomplete.py')},
17
20
  'fastllm.anthropic': { 'fastllm.anthropic._ant_cc': ('anthropic.html#_ant_cc', 'fastllm/anthropic.py'),
@@ -74,6 +77,8 @@ d = { 'settings': { 'branch': 'main',
74
77
  'fastllm.chat.FenceToolStop.__call__': ('chat.html#fencetoolstop.__call__', 'fastllm/chat.py'),
75
78
  'fastllm.chat.FenceToolStop.__init__': ('chat.html#fencetoolstop.__init__', 'fastllm/chat.py'),
76
79
  'fastllm.chat.FullResponse': ('chat.html#fullresponse', 'fastllm/chat.py'),
80
+ 'fastllm.chat.MediaUrl': ('chat.html#mediaurl', 'fastllm/chat.py'),
81
+ 'fastllm.chat.MediaUrl.__init__': ('chat.html#mediaurl.__init__', 'fastllm/chat.py'),
77
82
  'fastllm.chat.Msg.text': ('chat.html#msg.text', 'fastllm/chat.py'),
78
83
  'fastllm.chat.StopReasonCallback': ('chat.html#stopreasoncallback', 'fastllm/chat.py'),
79
84
  'fastllm.chat.StopReasonCallback.after_acomplete': ( 'chat.html#stopreasoncallback.after_acomplete',
@@ -113,19 +118,19 @@ d = { 'settings': { 'branch': 'main',
113
118
  'fastllm.chat._has_stop': ('chat.html#_has_stop', 'fastllm/chat.py'),
114
119
  'fastllm.chat._inject_tool_reminder': ('chat.html#_inject_tool_reminder', 'fastllm/chat.py'),
115
120
  'fastllm.chat._lite_call_func': ('chat.html#_lite_call_func', 'fastllm/chat.py'),
121
+ 'fastllm.chat._mime2part_type': ('chat.html#_mime2part_type', 'fastllm/chat.py'),
116
122
  'fastllm.chat._mk_content': ('chat.html#_mk_content', 'fastllm/chat.py'),
117
123
  'fastllm.chat._mk_prefill': ('chat.html#_mk_prefill', 'fastllm/chat.py'),
118
124
  'fastllm.chat._mk_result_fence': ('chat.html#_mk_result_fence', 'fastllm/chat.py'),
119
125
  'fastllm.chat._mk_tool_result': ('chat.html#_mk_tool_result', 'fastllm/chat.py'),
120
126
  'fastllm.chat._split_fence_msgs': ('chat.html#_split_fence_msgs', 'fastllm/chat.py'),
121
127
  'fastllm.chat._split_msg_on_fences': ('chat.html#_split_msg_on_fences', 'fastllm/chat.py'),
122
- 'fastllm.chat._srv_tc_summary': ('chat.html#_srv_tc_summary', 'fastllm/chat.py'),
123
- 'fastllm.chat._srvtools': ('chat.html#_srvtools', 'fastllm/chat.py'),
124
128
  'fastllm.chat._tc_summary': ('chat.html#_tc_summary', 'fastllm/chat.py'),
125
129
  'fastllm.chat._think_kw': ('chat.html#_think_kw', 'fastllm/chat.py'),
126
130
  'fastllm.chat._trunc_content': ('chat.html#_trunc_content', 'fastllm/chat.py'),
127
131
  'fastllm.chat._trunc_param': ('chat.html#_trunc_param', 'fastllm/chat.py'),
128
132
  'fastllm.chat._trunc_str': ('chat.html#_trunc_str', 'fastllm/chat.py'),
133
+ 'fastllm.chat._url2content': ('chat.html#_url2content', 'fastllm/chat.py'),
129
134
  'fastllm.chat._usrtools': ('chat.html#_usrtools', 'fastllm/chat.py'),
130
135
  'fastllm.chat.add_warning': ('chat.html#add_warning', 'fastllm/chat.py'),
131
136
  'fastllm.chat.adisplay_stream': ('chat.html#adisplay_stream', 'fastllm/chat.py'),
@@ -137,7 +142,6 @@ d = { 'settings': { 'branch': 'main',
137
142
  'fastllm.chat.lite_mk_func': ('chat.html#lite_mk_func', 'fastllm/chat.py'),
138
143
  'fastllm.chat.mk_msg': ('chat.html#mk_msg', 'fastllm/chat.py'),
139
144
  'fastllm.chat.mk_msgs': ('chat.html#mk_msgs', 'fastllm/chat.py'),
140
- 'fastllm.chat.mk_srv_tc_details': ('chat.html#mk_srv_tc_details', 'fastllm/chat.py'),
141
145
  'fastllm.chat.mk_tr_details': ('chat.html#mk_tr_details', 'fastllm/chat.py'),
142
146
  'fastllm.chat.postproc': ('chat.html#postproc', 'fastllm/chat.py'),
143
147
  'fastllm.chat.remove_cache_ckpts': ('chat.html#remove_cache_ckpts', 'fastllm/chat.py'),
@@ -270,6 +274,7 @@ d = { 'settings': { 'branch': 'main',
270
274
  'fastllm.types.ToolCall': ('types.html#toolcall', 'fastllm/types.py'),
271
275
  'fastllm.types.ToolCall._repr_markdown_': ('types.html#toolcall._repr_markdown_', 'fastllm/types.py'),
272
276
  'fastllm.types.Usage': ('types.html#usage', 'fastllm/types.py'),
277
+ 'fastllm.types._fetch_url_partial': ('types.html#_fetch_url_partial', 'fastllm/types.py'),
273
278
  'fastllm.types._trunc_strs': ('types.html#_trunc_strs', 'fastllm/types.py'),
274
279
  'fastllm.types.approx_pricing': ('types.html#approx_pricing', 'fastllm/types.py'),
275
280
  'fastllm.types.data_url': ('types.html#data_url', 'fastllm/types.py'),
@@ -7,7 +7,7 @@ __all__ = ['specs_path', 'ant_spec', 'oai_spec', 'gem_spec', 'vendor_mapping', '
7
7
  'ContextWindowExceededError', 'acomplete']
8
8
 
9
9
  # %% ../nbs/06_acomplete.ipynb #f2f57253
10
- import json
10
+ import asyncio,json,httpx
11
11
  from importlib.resources import files
12
12
  from fastcore.utils import *
13
13
  from fastcore.meta import *
@@ -49,7 +49,8 @@ api2spec = {'openai':oai_spec, 'openai_chat':oai_spec, 'anthropic':ant_spec, 'ge
49
49
 
50
50
  # %% ../nbs/06_acomplete.ipynb #79075d95
51
51
  @flexicache()
52
- def mk_client(model=None, vendor_name=None, api_name=None, api_key=None, base_url=None, xtra_hdrs=None):
52
+ def mk_client(model=None, vendor_name=None, api_name=None, api_key=None, base_url=None, xtra_hdrs=None,
53
+ timeout=httpx.Timeout(connect=30, read=300, write=30, pool=10)):
53
54
  err_msg = f"please pass a valid one vendor: {', '.join(list(vendor_mapping))} or pass `api_name`,`base_url` and `api_key`"
54
55
  if vendor_name:
55
56
  override_base_url = base_url
@@ -67,7 +68,7 @@ def mk_client(model=None, vendor_name=None, api_name=None, api_key=None, base_ur
67
68
  else: raise ValueError(f"Model {model} can't be auto resolved, {err_msg}")
68
69
  api = api_registry.apis[api_name]
69
70
  spec, hdrs = api2spec[api_name], api.get_hdrs(api_key)
70
- cli = OpenAPIClient(spec, headers=merge(hdrs, ifnone(xtra_hdrs, {})))
71
+ cli = OpenAPIClient(spec, headers=merge(hdrs, ifnone(xtra_hdrs, {})), timeout=timeout)
71
72
  if base_url is not None:
72
73
  for op in cli.ops: op.base_url = base_url # pyright: ignore[reportAttributeAccessIssue]
73
74
  return cli, api_name, vendor_name
@@ -113,14 +114,36 @@ def _debug_print(model, api_name, vendor_name, payload, func):
113
114
  print(f"\033[1;33mpayload:\033[0m\n{pformat(p, width=120, sort_dicts=False)}")
114
115
  print('━'*60)
115
116
 
117
+ # %% ../nbs/06_acomplete.ipynb #497c8565
118
+ async def _raise_if_done(e, n, retries, retry_delay, yielded=False):
119
+ e = _classify_error(e)
120
+ if yielded or not e.retryable or n == retries: raise e
121
+ await asyncio.sleep(retry_delay*2**n)
122
+
123
+ async def _retry_call(f, retries=2, retry_delay=0.5):
124
+ for n in range(retries+1):
125
+ try: return await f()
126
+ except APIError as e: await _raise_if_done(e, n, retries, retry_delay)
127
+
128
+ async def _retry_stream(mk_gen, retries=2, retry_delay=0.5):
129
+ for n in range(retries+1):
130
+ yielded = False
131
+ try:
132
+ async for o in mk_gen():
133
+ yielded = True
134
+ yield o
135
+ return
136
+ except APIError as e: await _raise_if_done(e, n, retries, retry_delay, yielded=yielded)
137
+
116
138
  # %% ../nbs/06_acomplete.ipynb #2379ec94
117
139
  @delegates(payload_kwargs)
118
- async def acomplete(msgs, model, api_name=None, vendor_name=None, api_key=None, base_url=None, xtra_body=None, xtra_hdrs=None,
119
- stream=False, stop_callables=None, stop_sequences=None, **kwargs):
140
+ async def acomplete(msgs, model, api_name=None, vendor_name=None, api_key=None,
141
+ base_url=None, xtra_body=None, xtra_hdrs=None, stream=False,
142
+ stop_callables=None, retries=2, retry_delay=0.5, **kwargs):
120
143
  "Unified completion across different APIs."
121
144
  cli, api_name, vendor_name = mk_client(model, vendor_name, api_name, api_key, base_url, xtra_hdrs)
122
145
  api = api_registry.apis[api_name]
123
- payload = api.mk_payload(msgs, model, stream=stream, stop_callables=stop_callables, **kwargs)
146
+ payload = api.mk_payload(msgs, model, stream=stream, **kwargs)
124
147
  payload = merge(payload, ifnone(xtra_body, {}))
125
148
  if vendor_name == 'codex':
126
149
  for k in 'temperature max_tokens max_output_tokens max_completion_tokens metadata'.split(): payload.pop(k, None)
@@ -130,7 +153,13 @@ async def acomplete(msgs, model, api_name=None, vendor_name=None, api_key=None,
130
153
  if vendor_name == 'moonshot' and 'kimi' in model: payload['messages'][-1]['partial'] = True
131
154
  func = attrgetter(api.op_path[stream])(cli)
132
155
  if defaults.debug_mode: _debug_print(model, api_name, vendor_name, payload, func)
133
- try: resp = await func(**payload)
134
- except APIError as e: raise _classify_error(e) from e
135
- if stream: return _classify_error_stream(api.acollect_stream(resp, model=model, vendor_name=vendor_name, stop_callables=stop_callables))
136
- return mk_completion(resp, model=model, api_name=api_name, vendor_name=vendor_name)
156
+ async def _call(): return await func(**payload)
157
+ if not stream:
158
+ resp = await _retry_call(_call, retries, retry_delay)
159
+ return mk_completion(resp, model=model, api_name=api_name, vendor_name=vendor_name)
160
+
161
+ async def _mk_gen():
162
+ resp = await _call()
163
+ async for o in api.acollect_stream(resp, model=model, vendor_name=vendor_name, stop_callables=stop_callables): yield o
164
+
165
+ return _retry_stream(_mk_gen, retries, retry_delay)
@@ -90,7 +90,9 @@ def norm_sse_event(ev, **kwargs):
90
90
  if typ == "content_block_start":
91
91
  cb = ev.get("content_block", {})
92
92
  if cb.get("type", "").endswith("_tool_result"): return Delta(server_tool_result=cb, raw=ev, **kwargs)
93
- if tc := norm_tool_call(cb): tcs = [tc]
93
+ if tc := norm_tool_call(cb):
94
+ if not tc.arguments: tc.arguments = {'_delta': ''}
95
+ tcs = [tc]
94
96
  elif typ == "content_block_delta":
95
97
  d = ev.get("delta", {})
96
98
  dtyp = d.get("type")
@@ -3,13 +3,13 @@
3
3
  # AUTOGENERATED! DO NOT EDIT! File to edit: ../nbs/07_chat.ipynb.
4
4
 
5
5
  # %% auto #0
6
- __all__ = ['tool_dtls_tag', 're_tools', 'token_dtls_tag', 're_token', 'effort', 'remove_cache_ckpts', 'contents', 'stop_reason',
7
- 'mk_msg', 'FenceToolStop', 'extract_fence_call', 'split_tools', 'fmt2hist', 'mk_msgs', 'cite_footnote',
8
- 'postproc', 'lite_mk_func', 'ToolResponse', 'structured', 'StopResponse', 'FullResponse', 'search_count',
9
- 'UsageStats', 'AsyncChat', 'astream_with_complete', 'ChatCallback', 'DeepseekMsgsCallback',
6
+ __all__ = ['tool_dtls_tag', 're_tools', 'token_dtls_tag', 're_token', 'effort', 'MediaUrl', 'remove_cache_ckpts', 'contents',
7
+ 'stop_reason', 'mk_msg', 'FenceToolStop', 'extract_fence_call', 'split_tools', 'fmt2hist', 'mk_msgs',
8
+ 'cite_footnote', 'postproc', 'lite_mk_func', 'ToolResponse', 'structured', 'StopResponse', 'FullResponse',
9
+ 'search_count', 'UsageStats', 'AsyncChat', 'astream_with_complete', 'ChatCallback', 'DeepseekMsgsCallback',
10
10
  'DeepseekPrefillCallback', 'add_warning', 'StopReasonCallback', 'run_fence_tool', 'FenceToolCallback',
11
- 'ToolReminderCallback', 'stop_sequences', 'StopSequencesCallback', 'mk_tr_details', 'mk_srv_tc_details',
12
- 'StreamFormatter', 'AsyncStreamFormatter', 'adisplay_stream']
11
+ 'ToolReminderCallback', 'stop_sequences', 'StopSequencesCallback', 'mk_tr_details', 'StreamFormatter',
12
+ 'AsyncStreamFormatter', 'adisplay_stream']
13
13
 
14
14
  # %% ../nbs/07_chat.ipynb #d5a3bc1f
15
15
  import asyncio, base64, json, mimetypes, random, string, ast, warnings
@@ -25,15 +25,30 @@ from dataclasses import dataclass
25
25
  from .types import *
26
26
  from .acomplete import *
27
27
 
28
- # %% ../nbs/07_chat.ipynb #90f55ad4
28
+ # %% ../nbs/07_chat.ipynb #1b75c262
29
+ class MediaUrl(BasicRepr):
30
+ "Direct URL media reference"
31
+ def __init__(self, url, mime=None): self.url, self.mime = url, ifnone(mime, url_mime(url))
32
+
33
+ # %% ../nbs/07_chat.ipynb #eb557831
34
+ def _mime2part_type(mime):
35
+ "Map MIME string to canonical PartType"
36
+ if mime.startswith('image/'): return PartType.input_image
37
+ if mime.startswith('audio/'): return PartType.input_audio
38
+ if mime.startswith('video/'): return PartType.input_video
39
+ return PartType.input_file
40
+
29
41
  def _bytes2content(data):
30
- "Convert bytes to litellm content dict (image, pdf, audio, video)"
42
+ "Convert bytes to fastllm canonical content"
31
43
  mtype = detect_mime(data)
32
44
  if not mtype: raise ValueError(f'Data must be a supported file type, got {data[:10]}')
33
- encoded = base64.b64encode(data).decode("utf-8")
34
- if mtype.startswith('image/'): return Part(type=PartType.input_image, text=f'data:{mtype};base64,{encoded}')
35
- return Part(type=PartType.input_file, text=f'data:{mtype};base64,{encoded}')
45
+ encoded = base64.b64encode(data).decode("utf-8")
46
+ return Part(type=_mime2part_type(mtype), text=f'data:{mtype};base64,{encoded}')
36
47
 
48
+ def _url2content(o):
49
+ "Convert MediaUrl to fastllm canonical content"
50
+ mime = o.mime or url_mime(o.url)
51
+ return Part(type=_mime2part_type(mime), text=o.url, data=dict(mime=mime))
37
52
 
38
53
  # %% ../nbs/07_chat.ipynb #48c78e48
39
54
  def _add_cache_control(msg, # LiteLLM formatted msg
@@ -57,8 +72,9 @@ def remove_cache_ckpts(msg):
57
72
  return msg
58
73
 
59
74
  def _mk_content(o):
60
- if isinstance(o, str): return Part(type=PartType.text, text=o)
61
- elif isinstance(o,bytes): return _bytes2content(o)
75
+ if isinstance(o, str): return Part(type=PartType.text, text=o)
76
+ elif isinstance(o, bytes): return _bytes2content(o)
77
+ elif isinstance(o, MediaUrl): return _url2content(o)
62
78
  return o
63
79
 
64
80
  def contents(c):
@@ -427,7 +443,6 @@ class AsyncChat:
427
443
  return self
428
444
 
429
445
  # %% ../nbs/07_chat.ipynb #2e469ea1
430
- def _srvtools(tcs): return L(tcs).filter(lambda o: o.server) if tcs else None
431
446
  def _usrtools(tcs): return L(tcs).filter(lambda o: not o.server) if tcs else None
432
447
 
433
448
  # %% ../nbs/07_chat.ipynb #19b87f53
@@ -511,8 +526,6 @@ async def _call(self:AsyncChat, msg=None, prefill=None, temp=None, think=None, s
511
526
 
512
527
  self.toolloop, self.prompt, tmsg = False, None, None
513
528
  async for o in self._call_cbs('before_tool_calls'): yield o
514
- if stcs:= _srvtools(res.tool_calls):
515
- for tc in stcs: yield tc
516
529
  if tcs := _usrtools(res.tool_calls):
517
530
  tres = await parallel_async(_alite_call_func, tcs, timeout=tc_timeout, n_workers=n_workers, pause=pause, **self.tcdict)
518
531
  tmsg = mk_tool_res_msg(tcs, tres)
@@ -703,15 +716,9 @@ def _trunc_param(v, mx=40):
703
716
  def _tc_summary(tr):
704
717
  "Format tool call as func(params) → result string"
705
718
  params = ', '.join(f"{k}={_trunc_param(v)}" for k,v in tr.data['arguments'].items())
706
- res = f"→{_trunc_param(tr.text)}"
719
+ res = f"→{_trunc_param(tr.text)}" if tr.text else ''
707
720
  return '<code>'+escape(f"{tr.data['name']}({params}){res}")+'</code>'
708
721
 
709
- # %% ../nbs/07_chat.ipynb #91beb26c
710
- def _srv_tc_summary(tc):
711
- "Format tool call as func(params) → result string"
712
- params = ', '.join(f"{k}={_trunc_param(v)}" for k,v in tc.arguments.items())
713
- return '<code>'+escape(f"{tc.name}({params})")+'</code>'
714
-
715
722
  # %% ../nbs/07_chat.ipynb #80f344cc
716
723
  def _trunc_content(content, mx):
717
724
  "Truncate tool result content, respecting '_full' flag"
@@ -722,23 +729,13 @@ def _trunc_content(content, mx):
722
729
  def mk_tr_details(tr, mx=2000):
723
730
  "Create <details> block for tool call as JSON"
724
731
  args = {k:_trunc_str(v, mx=mx*5) for k,v in tr.data['arguments'].items()}
725
- res = {'id':tr.data['id'], 'server':False,
732
+ res = {'id':tr.data['id'], 'server':tr.data.get('server', False),
726
733
  'call':{'function': tr.data['name'], 'arguments': args},
727
734
  'result':_trunc_content(tr.text, mx=mx),}
728
735
  summ = f"<summary>{_tc_summary(tr)}</summary>"
729
736
  return f"\n\n{tool_dtls_tag}\n{summ}\n\n```json\n{dumps(res, indent=2, ensure_ascii=False)}\n```\n\n</details>\n\n"
730
737
 
731
- # %% ../nbs/07_chat.ipynb #3049001c
732
- def mk_srv_tc_details(tc, mx=2000):
733
- "Create <details> block for tool call as JSON"
734
- args = {k:_trunc_str(v, mx=mx*5) for k,v in tc.arguments.items()}
735
- res = {'id':tc.id, 'server':True, 'call':{'function': tc.name, 'arguments': args}, 'result':"Server tool call executed."}
736
- summ = f"<summary>{_srv_tc_summary(tc)}</summary>"
737
- return f"\n\n{tool_dtls_tag}\n{summ}\n\n```json\n{dumps(res, indent=2, ensure_ascii=False)}\n```\n\n</details>\n\n"
738
-
739
738
  # %% ../nbs/07_chat.ipynb #f0d984ec
740
- # status_re = re.compile(r'^- ⏳ <code>(.*)</code> ⏳$|^🧠+$', re.MULTILINE) # TODO: Need to yield tool calls as they are done collated in fastllm `_acollect_stream`
741
-
742
739
  class StreamFormatter:
743
740
  def __init__(self, mx=2000, debug=False, showthink=False):
744
741
  self.outp,self.tcs = '',{}
@@ -754,8 +751,8 @@ class StreamFormatter:
754
751
  res+= '🧠' if not self.outp or self.outp[-1]=='🧠' else '\n\n🧠'
755
752
  elif self.outp and self.outp[-1] == '🧠': res+= '\n\n'
756
753
  if txt:=o.get('text'): res+=f"\n\n{txt}" if res and res[-1] == '🧠' else txt
757
- if isinstance(o, ToolCall):
758
- res += mk_srv_tc_details(o)
754
+ if isinstance(o, Part) and o.type==PartType.tool_use:
755
+ res += f"\n- ⏳ {_tc_summary(o)} ⏳\n"
759
756
  if isinstance(o, Part) and o.type == PartType.tool_result:
760
757
  res += mk_tr_details(o,mx=self.mx)
761
758
  self.outp+=res
@@ -105,14 +105,14 @@ async def mk_acollect_stream(it, index_fn, model=None, api_name=None, vendor_nam
105
105
  idx,last_idx = index_fn(d, typ, last_typ, last_idx)
106
106
  return idx
107
107
  def _proc(d, name, pt=None, kw='txt', ret=None):
108
- if not ret and not (val := getattr(d, name)): return
108
+ if not ret and not (val := getattr(d, name)): return None, None
109
109
  idx = _fidx(d, name, pt)
110
110
  part_accum.append(typ, idx, **(ret or {kw: val}))
111
- return ret or {name: val}
111
+ return ret or {name: val}, idx
112
112
  def _yield_parts(d):
113
113
  for args in [('text',), ('thinking',), ('citations', 'text', 'citations')]:
114
- if (r := _proc(d, args[0], pt=args[1] if len(args)>1 else None, kw=args[2] if len(args)>2 else 'txt')):
115
- yield r
114
+ r = _proc(d, args[0], pt=args[1] if len(args)>1 else None, kw=args[2] if len(args)>2 else 'txt')
115
+ if r[0]: yield r[0]
116
116
  stop, stop_yielded = False, False
117
117
  async for d in it:
118
118
  # Check stop condition and yield stop delta
@@ -127,11 +127,26 @@ async def mk_acollect_stream(it, index_fn, model=None, api_name=None, vendor_nam
127
127
  # Rest incl. tools, finish reason, usage is processed independently
128
128
  for tc in d.tool_calls:
129
129
  args = tc.arguments.get('_delta', tc.arguments)
130
- _proc(d, 'tool_use', ret=dict(id=tc.id, name=tc.name, arguments=args, server=tc.server, extra=tc.extra))
130
+ _, idx = _proc(d, 'tool_use', ret=dict(id=tc.id, name=tc.name, arguments=args, server=tc.server, extra=tc.extra))
131
+ if (isinstance(args, str) and args.endswith('}')) or (isinstance(args, dict) and '_delta' not in tc.arguments): # tool call ready
132
+ if isinstance(args, str):
133
+ try: args = json.loads(part_accum.parts[idx].arguments) if args else {}
134
+ except json.JSONDecodeError: continue
135
+ acc = part_accum.parts[idx]
136
+ acc.arguments = args
137
+ data = {**acc.extra, 'id':acc.id, 'name':acc.name, 'arguments':args, 'server':acc.server}
138
+ yield Part(type=PartType.tool_use, data=data)
139
+ # Server tool results for anthropic are yielded in d.server_tool_result by checking injected dummy `_delta`
140
+ if acc.server and '_delta' not in tc.arguments: yield Part(type=PartType.tool_result, text="Server tool call executed.", data=data)
131
141
  if d.server_tool_result:
132
142
  idx = _fidx(d, 'server_tool_result')
133
143
  part_accum.parts[idx] = Part(type=typ, data=d.server_tool_result)
134
- if (r:=_proc(d, 'refusal')): yield r
144
+ srv_tc = next((p for p in reversed(list(part_accum.parts.values())) if isinstance(p, ToolCall) and p.server), None)
145
+ if srv_tc:
146
+ data = {**srv_tc.extra, 'id':srv_tc.id, 'name':srv_tc.name, 'arguments':srv_tc.arguments, 'server':True}
147
+ yield Part(type=PartType.tool_result, text="Server tool call executed.", data=data)
148
+ r = _proc(d, 'refusal')
149
+ if r[0]: yield r[0]
135
150
  if d.finish_reason: fin = d.finish_reason
136
151
  if d.usage: usg = d.usage
137
152
  last_typ = typ
@@ -146,4 +161,3 @@ async def mk_acollect_stream(it, index_fn, model=None, api_name=None, vendor_nam
146
161
  message=Msg(role="assistant", content=part_accum.parts),
147
162
  finish_reason=fin, usage=usg, tool_calls=tcs, api_name=api_name, vendor_name=vendor_name,
148
163
  raw={'deltas':deltas})
149
-
@@ -11,6 +11,7 @@ __all__ = ['PartType', 'FinishReason', 'api_registry', 'model_prices_url', 'haik
11
11
  'register_model_info', 'get_model_info', 'get_model_pricing', 'approx_pricing']
12
12
 
13
13
  # %% ../nbs/00_types.ipynb #b4d047fd
14
+ import httpx
14
15
  from dataclasses import dataclass, field
15
16
  from fastcore.net import urljson
16
17
  from fastcore.utils import *
@@ -28,14 +29,16 @@ PartType = str_enum('PartType', 'text', 'thinking', 'refusal', 'tool_use', 'serv
28
29
  'input_image', 'input_audio', 'input_video', 'input_file')
29
30
 
30
31
  # %% ../nbs/00_types.ipynb #2eeff103
31
- def _trunc_strs(d, n=200):
32
- "Return copy of dict `d` with str values >n chars truncated to first 10 chars + '...'"
33
- if not d: return d
34
- return {k: (v[:10]+'...' if isinstance(v,str) and len(v)>n else v) for k,v in d.items()}
32
+ def _trunc_strs(o, n=200):
33
+ "Truncate str or dict"
34
+ if not o: return o
35
+ if isinstance(o,str) and len(o)>n: return o[:100]+'...'
36
+ if isinstance(o,dict): return {k: (v[:100]+'...' if isinstance(v,str) and len(v)>n else v) for k,v in o.items()}
37
+ return o
35
38
 
36
39
  @patch
37
40
  def _repr_markdown_(self: Part):
38
- body = self.text if self.text else ''
41
+ body = _trunc_strs(self.text) if self.text else ''
39
42
  data = _trunc_strs(self.data)
40
43
  return f"""**Part** (`{self.type}`)
41
44
 
@@ -198,7 +201,17 @@ def sys_text(system):
198
201
 
199
202
  def part_txt(p): return p.text if isinstance(p,Part) else p
200
203
 
201
- # %% ../nbs/00_types.ipynb #dc2b75a0
204
+ # %% ../nbs/00_types.ipynb #f3deb055
205
+ @flexicache(time_policy(24*3600))
206
+ def _fetch_url_partial(url, nbytes=512):
207
+ "Fetch remote media bytes, optionally only first `nbytes`."
208
+ try:
209
+ with httpx.stream('GET', url, headers={'Range': f'bytes=0-{nbytes-1}'}, follow_redirects=True) as r:
210
+ if r.status_code not in (200, 206): return
211
+ return r.read()
212
+ except (httpx.HTTPError, httpx.InvalidURL): return
213
+
214
+ # %% ../nbs/00_types.ipynb #70a9a0c3
202
215
  _ext_mime = {
203
216
  '.jpg':'image/jpeg', '.jpeg':'image/jpeg', '.png':'image/png', '.gif':'image/gif', '.webp':'image/webp',
204
217
  '.pdf':'application/pdf',
@@ -214,18 +227,19 @@ def data_url(url):
214
227
  return header[5:].split(';',1)[0].strip() or 'application/octet-stream', body
215
228
 
216
229
  def url_mime(url, default='application/octet-stream'):
217
- "Guess mime from URL extension."
230
+ "Guess mime from URL extension, and optional bytes fallback."
231
+ if "youtube.com" in url or "youtu.be" in url: return "video/mp4"
218
232
  ext = '.' + url.rsplit('.', 1)[-1].split('?')[0].lower() if '.' in url.split('?')[0].split('/')[-1] else ''
219
- return _ext_mime.get(ext, default)
233
+ if (mime:=_ext_mime.get(ext)) is None: return detect_mime(_fetch_url_partial(url))
234
+ return ifnone(mime, default)
220
235
 
221
236
  # %% ../nbs/00_types.ipynb #28c698fe
222
- def payload_kwargs(msgs, model, stream=False, system=None, max_tokens=None, temperature=None, tools=None, tool_choice=None, reasoning_effort=None, web_search_options=None, stop_callables=None, stop_sequences=None): pass
237
+ def payload_kwargs(msgs, model, stream=False, system=None, max_tokens=None, temperature=None, tools=None, tool_choice=None, reasoning_effort=None, web_search_options=None, stop_callables=None): pass
223
238
 
224
239
  # %% ../nbs/00_types.ipynb #c2a2cb49
225
240
  def get_api_key(api_key, default):
226
- err = ValueError(f"Missing API key: make sure to have the expected env var name or pass `api_key`")
227
241
  key = api_key or os.getenv(default)
228
- if not key: raise err
242
+ if not key: raise ValueError(f"Missing API key: set environment variable '{default}' or pass `api_key` parameter")
229
243
  return key
230
244
 
231
245
  # %% ../nbs/00_types.ipynb #852adecd
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: python-fastllm
3
- Version: 0.0.9
3
+ Version: 0.0.10
4
4
  Author-email: Kerem Turgutlu <keremturgutlu@gmail.com>
5
5
  License: Apache-2.0
6
6
  Project-URL: Repository, https://github.com/AnswerDotAI/fastllm
@@ -1 +0,0 @@
1
- __version__ = "0.0.9"