python-fastllm 0.0.8__tar.gz → 0.0.10__tar.gz
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- {python_fastllm-0.0.8 → python_fastllm-0.0.10}/PKG-INFO +1 -1
- python_fastllm-0.0.10/fastllm/__init__.py +1 -0
- {python_fastllm-0.0.8 → python_fastllm-0.0.10}/fastllm/_modidx.py +11 -3
- {python_fastllm-0.0.8 → python_fastllm-0.0.10}/fastllm/acomplete.py +39 -10
- {python_fastllm-0.0.8 → python_fastllm-0.0.10}/fastllm/anthropic.py +5 -5
- {python_fastllm-0.0.8 → python_fastllm-0.0.10}/fastllm/chat.py +33 -36
- {python_fastllm-0.0.8 → python_fastllm-0.0.10}/fastllm/streaming.py +21 -7
- {python_fastllm-0.0.8 → python_fastllm-0.0.10}/fastllm/types.py +110 -63
- {python_fastllm-0.0.8 → python_fastllm-0.0.10}/python_fastllm.egg-info/PKG-INFO +1 -1
- python_fastllm-0.0.8/fastllm/__init__.py +0 -1
- {python_fastllm-0.0.8 → python_fastllm-0.0.10}/README.md +0 -0
- {python_fastllm-0.0.8 → python_fastllm-0.0.10}/fastllm/codex.py +0 -0
- {python_fastllm-0.0.8 → python_fastllm-0.0.10}/fastllm/gemini.py +0 -0
- {python_fastllm-0.0.8 → python_fastllm-0.0.10}/fastllm/openai_chat.py +0 -0
- {python_fastllm-0.0.8 → python_fastllm-0.0.10}/fastllm/openai_responses.py +0 -0
- {python_fastllm-0.0.8 → python_fastllm-0.0.10}/fastllm/specs/anthropic.json +0 -0
- {python_fastllm-0.0.8 → python_fastllm-0.0.10}/fastllm/specs/anthropic.yml +0 -0
- {python_fastllm-0.0.8 → python_fastllm-0.0.10}/fastllm/specs/gemini.json +0 -0
- {python_fastllm-0.0.8 → python_fastllm-0.0.10}/fastllm/specs/openai.with-code-samples.json +0 -0
- {python_fastllm-0.0.8 → python_fastllm-0.0.10}/fastllm/specs/openai.with-code-samples.yml +0 -0
- {python_fastllm-0.0.8 → python_fastllm-0.0.10}/fastllm/specs/spec_manifest.json +0 -0
- {python_fastllm-0.0.8 → python_fastllm-0.0.10}/pyproject.toml +0 -0
- {python_fastllm-0.0.8 → python_fastllm-0.0.10}/python_fastllm.egg-info/SOURCES.txt +0 -0
- {python_fastllm-0.0.8 → python_fastllm-0.0.10}/python_fastllm.egg-info/dependency_links.txt +0 -0
- {python_fastllm-0.0.8 → python_fastllm-0.0.10}/python_fastllm.egg-info/entry_points.txt +0 -0
- {python_fastllm-0.0.8 → python_fastllm-0.0.10}/python_fastllm.egg-info/requires.txt +0 -0
- {python_fastllm-0.0.8 → python_fastllm-0.0.10}/python_fastllm.egg-info/top_level.txt +0 -0
- {python_fastllm-0.0.8 → python_fastllm-0.0.10}/setup.cfg +0 -0
|
@@ -0,0 +1 @@
|
|
|
1
|
+
__version__ = "0.0.10"
|
|
@@ -12,6 +12,9 @@ d = { 'settings': { 'branch': 'main',
|
|
|
12
12
|
'fastllm/acomplete.py'),
|
|
13
13
|
'fastllm.acomplete._debug_print': ('acomplete.html#_debug_print', 'fastllm/acomplete.py'),
|
|
14
14
|
'fastllm.acomplete._is_ctx_exceeded': ('acomplete.html#_is_ctx_exceeded', 'fastllm/acomplete.py'),
|
|
15
|
+
'fastllm.acomplete._raise_if_done': ('acomplete.html#_raise_if_done', 'fastllm/acomplete.py'),
|
|
16
|
+
'fastllm.acomplete._retry_call': ('acomplete.html#_retry_call', 'fastllm/acomplete.py'),
|
|
17
|
+
'fastllm.acomplete._retry_stream': ('acomplete.html#_retry_stream', 'fastllm/acomplete.py'),
|
|
15
18
|
'fastllm.acomplete.acomplete': ('acomplete.html#acomplete', 'fastllm/acomplete.py'),
|
|
16
19
|
'fastllm.acomplete.mk_client': ('acomplete.html#mk_client', 'fastllm/acomplete.py')},
|
|
17
20
|
'fastllm.anthropic': { 'fastllm.anthropic._ant_cc': ('anthropic.html#_ant_cc', 'fastllm/anthropic.py'),
|
|
@@ -74,6 +77,8 @@ d = { 'settings': { 'branch': 'main',
|
|
|
74
77
|
'fastllm.chat.FenceToolStop.__call__': ('chat.html#fencetoolstop.__call__', 'fastllm/chat.py'),
|
|
75
78
|
'fastllm.chat.FenceToolStop.__init__': ('chat.html#fencetoolstop.__init__', 'fastllm/chat.py'),
|
|
76
79
|
'fastllm.chat.FullResponse': ('chat.html#fullresponse', 'fastllm/chat.py'),
|
|
80
|
+
'fastllm.chat.MediaUrl': ('chat.html#mediaurl', 'fastllm/chat.py'),
|
|
81
|
+
'fastllm.chat.MediaUrl.__init__': ('chat.html#mediaurl.__init__', 'fastllm/chat.py'),
|
|
77
82
|
'fastllm.chat.Msg.text': ('chat.html#msg.text', 'fastllm/chat.py'),
|
|
78
83
|
'fastllm.chat.StopReasonCallback': ('chat.html#stopreasoncallback', 'fastllm/chat.py'),
|
|
79
84
|
'fastllm.chat.StopReasonCallback.after_acomplete': ( 'chat.html#stopreasoncallback.after_acomplete',
|
|
@@ -113,19 +118,19 @@ d = { 'settings': { 'branch': 'main',
|
|
|
113
118
|
'fastllm.chat._has_stop': ('chat.html#_has_stop', 'fastllm/chat.py'),
|
|
114
119
|
'fastllm.chat._inject_tool_reminder': ('chat.html#_inject_tool_reminder', 'fastllm/chat.py'),
|
|
115
120
|
'fastllm.chat._lite_call_func': ('chat.html#_lite_call_func', 'fastllm/chat.py'),
|
|
121
|
+
'fastllm.chat._mime2part_type': ('chat.html#_mime2part_type', 'fastllm/chat.py'),
|
|
116
122
|
'fastllm.chat._mk_content': ('chat.html#_mk_content', 'fastllm/chat.py'),
|
|
117
123
|
'fastllm.chat._mk_prefill': ('chat.html#_mk_prefill', 'fastllm/chat.py'),
|
|
118
124
|
'fastllm.chat._mk_result_fence': ('chat.html#_mk_result_fence', 'fastllm/chat.py'),
|
|
119
125
|
'fastllm.chat._mk_tool_result': ('chat.html#_mk_tool_result', 'fastllm/chat.py'),
|
|
120
126
|
'fastllm.chat._split_fence_msgs': ('chat.html#_split_fence_msgs', 'fastllm/chat.py'),
|
|
121
127
|
'fastllm.chat._split_msg_on_fences': ('chat.html#_split_msg_on_fences', 'fastllm/chat.py'),
|
|
122
|
-
'fastllm.chat._srv_tc_summary': ('chat.html#_srv_tc_summary', 'fastllm/chat.py'),
|
|
123
|
-
'fastllm.chat._srvtools': ('chat.html#_srvtools', 'fastllm/chat.py'),
|
|
124
128
|
'fastllm.chat._tc_summary': ('chat.html#_tc_summary', 'fastllm/chat.py'),
|
|
125
129
|
'fastllm.chat._think_kw': ('chat.html#_think_kw', 'fastllm/chat.py'),
|
|
126
130
|
'fastllm.chat._trunc_content': ('chat.html#_trunc_content', 'fastllm/chat.py'),
|
|
127
131
|
'fastllm.chat._trunc_param': ('chat.html#_trunc_param', 'fastllm/chat.py'),
|
|
128
132
|
'fastllm.chat._trunc_str': ('chat.html#_trunc_str', 'fastllm/chat.py'),
|
|
133
|
+
'fastllm.chat._url2content': ('chat.html#_url2content', 'fastllm/chat.py'),
|
|
129
134
|
'fastllm.chat._usrtools': ('chat.html#_usrtools', 'fastllm/chat.py'),
|
|
130
135
|
'fastllm.chat.add_warning': ('chat.html#add_warning', 'fastllm/chat.py'),
|
|
131
136
|
'fastllm.chat.adisplay_stream': ('chat.html#adisplay_stream', 'fastllm/chat.py'),
|
|
@@ -137,7 +142,6 @@ d = { 'settings': { 'branch': 'main',
|
|
|
137
142
|
'fastllm.chat.lite_mk_func': ('chat.html#lite_mk_func', 'fastllm/chat.py'),
|
|
138
143
|
'fastllm.chat.mk_msg': ('chat.html#mk_msg', 'fastllm/chat.py'),
|
|
139
144
|
'fastllm.chat.mk_msgs': ('chat.html#mk_msgs', 'fastllm/chat.py'),
|
|
140
|
-
'fastllm.chat.mk_srv_tc_details': ('chat.html#mk_srv_tc_details', 'fastllm/chat.py'),
|
|
141
145
|
'fastllm.chat.mk_tr_details': ('chat.html#mk_tr_details', 'fastllm/chat.py'),
|
|
142
146
|
'fastllm.chat.postproc': ('chat.html#postproc', 'fastllm/chat.py'),
|
|
143
147
|
'fastllm.chat.remove_cache_ckpts': ('chat.html#remove_cache_ckpts', 'fastllm/chat.py'),
|
|
@@ -270,18 +274,22 @@ d = { 'settings': { 'branch': 'main',
|
|
|
270
274
|
'fastllm.types.ToolCall': ('types.html#toolcall', 'fastllm/types.py'),
|
|
271
275
|
'fastllm.types.ToolCall._repr_markdown_': ('types.html#toolcall._repr_markdown_', 'fastllm/types.py'),
|
|
272
276
|
'fastllm.types.Usage': ('types.html#usage', 'fastllm/types.py'),
|
|
277
|
+
'fastllm.types._fetch_url_partial': ('types.html#_fetch_url_partial', 'fastllm/types.py'),
|
|
273
278
|
'fastllm.types._trunc_strs': ('types.html#_trunc_strs', 'fastllm/types.py'),
|
|
279
|
+
'fastllm.types.approx_pricing': ('types.html#approx_pricing', 'fastllm/types.py'),
|
|
274
280
|
'fastllm.types.data_url': ('types.html#data_url', 'fastllm/types.py'),
|
|
275
281
|
'fastllm.types.display_list': ('types.html#display_list', 'fastllm/types.py'),
|
|
276
282
|
'fastllm.types.fn_schema': ('types.html#fn_schema', 'fastllm/types.py'),
|
|
277
283
|
'fastllm.types.get_api_key': ('types.html#get_api_key', 'fastllm/types.py'),
|
|
278
284
|
'fastllm.types.get_model_info': ('types.html#get_model_info', 'fastllm/types.py'),
|
|
279
285
|
'fastllm.types.get_model_meta': ('types.html#get_model_meta', 'fastllm/types.py'),
|
|
286
|
+
'fastllm.types.get_model_pricing': ('types.html#get_model_pricing', 'fastllm/types.py'),
|
|
280
287
|
'fastllm.types.infer_api_name': ('types.html#infer_api_name', 'fastllm/types.py'),
|
|
281
288
|
'fastllm.types.mk_completion': ('types.html#mk_completion', 'fastllm/types.py'),
|
|
282
289
|
'fastllm.types.mk_tool_res_msg': ('types.html#mk_tool_res_msg', 'fastllm/types.py'),
|
|
283
290
|
'fastllm.types.model_prices_meta': ('types.html#model_prices_meta', 'fastllm/types.py'),
|
|
284
291
|
'fastllm.types.part_txt': ('types.html#part_txt', 'fastllm/types.py'),
|
|
285
292
|
'fastllm.types.payload_kwargs': ('types.html#payload_kwargs', 'fastllm/types.py'),
|
|
293
|
+
'fastllm.types.register_model_info': ('types.html#register_model_info', 'fastllm/types.py'),
|
|
286
294
|
'fastllm.types.sys_text': ('types.html#sys_text', 'fastllm/types.py'),
|
|
287
295
|
'fastllm.types.url_mime': ('types.html#url_mime', 'fastllm/types.py')}}}
|
|
@@ -7,7 +7,7 @@ __all__ = ['specs_path', 'ant_spec', 'oai_spec', 'gem_spec', 'vendor_mapping', '
|
|
|
7
7
|
'ContextWindowExceededError', 'acomplete']
|
|
8
8
|
|
|
9
9
|
# %% ../nbs/06_acomplete.ipynb #f2f57253
|
|
10
|
-
import json
|
|
10
|
+
import asyncio,json,httpx
|
|
11
11
|
from importlib.resources import files
|
|
12
12
|
from fastcore.utils import *
|
|
13
13
|
from fastcore.meta import *
|
|
@@ -49,7 +49,8 @@ api2spec = {'openai':oai_spec, 'openai_chat':oai_spec, 'anthropic':ant_spec, 'ge
|
|
|
49
49
|
|
|
50
50
|
# %% ../nbs/06_acomplete.ipynb #79075d95
|
|
51
51
|
@flexicache()
|
|
52
|
-
def mk_client(model=None, vendor_name=None, api_name=None, api_key=None, base_url=None, xtra_hdrs=None
|
|
52
|
+
def mk_client(model=None, vendor_name=None, api_name=None, api_key=None, base_url=None, xtra_hdrs=None,
|
|
53
|
+
timeout=httpx.Timeout(connect=30, read=300, write=30, pool=10)):
|
|
53
54
|
err_msg = f"please pass a valid one vendor: {', '.join(list(vendor_mapping))} or pass `api_name`,`base_url` and `api_key`"
|
|
54
55
|
if vendor_name:
|
|
55
56
|
override_base_url = base_url
|
|
@@ -67,7 +68,7 @@ def mk_client(model=None, vendor_name=None, api_name=None, api_key=None, base_ur
|
|
|
67
68
|
else: raise ValueError(f"Model {model} can't be auto resolved, {err_msg}")
|
|
68
69
|
api = api_registry.apis[api_name]
|
|
69
70
|
spec, hdrs = api2spec[api_name], api.get_hdrs(api_key)
|
|
70
|
-
cli = OpenAPIClient(spec, headers=merge(hdrs, ifnone(xtra_hdrs, {})))
|
|
71
|
+
cli = OpenAPIClient(spec, headers=merge(hdrs, ifnone(xtra_hdrs, {})), timeout=timeout)
|
|
71
72
|
if base_url is not None:
|
|
72
73
|
for op in cli.ops: op.base_url = base_url # pyright: ignore[reportAttributeAccessIssue]
|
|
73
74
|
return cli, api_name, vendor_name
|
|
@@ -113,14 +114,36 @@ def _debug_print(model, api_name, vendor_name, payload, func):
|
|
|
113
114
|
print(f"\033[1;33mpayload:\033[0m\n{pformat(p, width=120, sort_dicts=False)}")
|
|
114
115
|
print('━'*60)
|
|
115
116
|
|
|
117
|
+
# %% ../nbs/06_acomplete.ipynb #497c8565
|
|
118
|
+
async def _raise_if_done(e, n, retries, retry_delay, yielded=False):
|
|
119
|
+
e = _classify_error(e)
|
|
120
|
+
if yielded or not e.retryable or n == retries: raise e
|
|
121
|
+
await asyncio.sleep(retry_delay*2**n)
|
|
122
|
+
|
|
123
|
+
async def _retry_call(f, retries=2, retry_delay=0.5):
|
|
124
|
+
for n in range(retries+1):
|
|
125
|
+
try: return await f()
|
|
126
|
+
except APIError as e: await _raise_if_done(e, n, retries, retry_delay)
|
|
127
|
+
|
|
128
|
+
async def _retry_stream(mk_gen, retries=2, retry_delay=0.5):
|
|
129
|
+
for n in range(retries+1):
|
|
130
|
+
yielded = False
|
|
131
|
+
try:
|
|
132
|
+
async for o in mk_gen():
|
|
133
|
+
yielded = True
|
|
134
|
+
yield o
|
|
135
|
+
return
|
|
136
|
+
except APIError as e: await _raise_if_done(e, n, retries, retry_delay, yielded=yielded)
|
|
137
|
+
|
|
116
138
|
# %% ../nbs/06_acomplete.ipynb #2379ec94
|
|
117
139
|
@delegates(payload_kwargs)
|
|
118
|
-
async def acomplete(msgs, model, api_name=None, vendor_name=None, api_key=None,
|
|
119
|
-
|
|
140
|
+
async def acomplete(msgs, model, api_name=None, vendor_name=None, api_key=None,
|
|
141
|
+
base_url=None, xtra_body=None, xtra_hdrs=None, stream=False,
|
|
142
|
+
stop_callables=None, retries=2, retry_delay=0.5, **kwargs):
|
|
120
143
|
"Unified completion across different APIs."
|
|
121
144
|
cli, api_name, vendor_name = mk_client(model, vendor_name, api_name, api_key, base_url, xtra_hdrs)
|
|
122
145
|
api = api_registry.apis[api_name]
|
|
123
|
-
payload = api.mk_payload(msgs, model, stream=stream,
|
|
146
|
+
payload = api.mk_payload(msgs, model, stream=stream, **kwargs)
|
|
124
147
|
payload = merge(payload, ifnone(xtra_body, {}))
|
|
125
148
|
if vendor_name == 'codex':
|
|
126
149
|
for k in 'temperature max_tokens max_output_tokens max_completion_tokens metadata'.split(): payload.pop(k, None)
|
|
@@ -130,7 +153,13 @@ async def acomplete(msgs, model, api_name=None, vendor_name=None, api_key=None,
|
|
|
130
153
|
if vendor_name == 'moonshot' and 'kimi' in model: payload['messages'][-1]['partial'] = True
|
|
131
154
|
func = attrgetter(api.op_path[stream])(cli)
|
|
132
155
|
if defaults.debug_mode: _debug_print(model, api_name, vendor_name, payload, func)
|
|
133
|
-
|
|
134
|
-
|
|
135
|
-
|
|
136
|
-
|
|
156
|
+
async def _call(): return await func(**payload)
|
|
157
|
+
if not stream:
|
|
158
|
+
resp = await _retry_call(_call, retries, retry_delay)
|
|
159
|
+
return mk_completion(resp, model=model, api_name=api_name, vendor_name=vendor_name)
|
|
160
|
+
|
|
161
|
+
async def _mk_gen():
|
|
162
|
+
resp = await _call()
|
|
163
|
+
async for o in api.acollect_stream(resp, model=model, vendor_name=vendor_name, stop_callables=stop_callables): yield o
|
|
164
|
+
|
|
165
|
+
return _retry_stream(_mk_gen, retries, retry_delay)
|
|
@@ -50,10 +50,8 @@ def finalize_usage(usg, parts):
|
|
|
50
50
|
rc = '\n'.join(p.text or '' for p in parts if p.type == PartType.thinking)
|
|
51
51
|
ct = int(usg.raw.get('output_tokens', usg.completion_tokens) or 0)
|
|
52
52
|
rt = min(int(len(rc.split())*1.5), ct) if rc else 0
|
|
53
|
-
|
|
54
|
-
|
|
55
|
-
print(res)
|
|
56
|
-
return res
|
|
53
|
+
return Usage(prompt_tokens=usg.prompt_tokens, completion_tokens=ct-rt, total_tokens=usg.prompt_tokens+ct,
|
|
54
|
+
cached_tokens=usg.cached_tokens, cache_creation_tokens=usg.cache_creation_tokens, reasoning_tokens=rt, raw=usg.raw)
|
|
57
55
|
|
|
58
56
|
# %% ../nbs/04_anthropic.ipynb #7a8b1f8f
|
|
59
57
|
def norm_finish(resp, tcs=None):
|
|
@@ -92,7 +90,9 @@ def norm_sse_event(ev, **kwargs):
|
|
|
92
90
|
if typ == "content_block_start":
|
|
93
91
|
cb = ev.get("content_block", {})
|
|
94
92
|
if cb.get("type", "").endswith("_tool_result"): return Delta(server_tool_result=cb, raw=ev, **kwargs)
|
|
95
|
-
if tc := norm_tool_call(cb):
|
|
93
|
+
if tc := norm_tool_call(cb):
|
|
94
|
+
if not tc.arguments: tc.arguments = {'_delta': ''}
|
|
95
|
+
tcs = [tc]
|
|
96
96
|
elif typ == "content_block_delta":
|
|
97
97
|
d = ev.get("delta", {})
|
|
98
98
|
dtyp = d.get("type")
|
|
@@ -3,13 +3,13 @@
|
|
|
3
3
|
# AUTOGENERATED! DO NOT EDIT! File to edit: ../nbs/07_chat.ipynb.
|
|
4
4
|
|
|
5
5
|
# %% auto #0
|
|
6
|
-
__all__ = ['tool_dtls_tag', 're_tools', 'token_dtls_tag', 're_token', 'effort', '
|
|
7
|
-
'mk_msg', 'FenceToolStop', 'extract_fence_call', 'split_tools', 'fmt2hist', 'mk_msgs',
|
|
8
|
-
'postproc', 'lite_mk_func', 'ToolResponse', 'structured', 'StopResponse', 'FullResponse',
|
|
9
|
-
'UsageStats', 'AsyncChat', 'astream_with_complete', 'ChatCallback', 'DeepseekMsgsCallback',
|
|
6
|
+
__all__ = ['tool_dtls_tag', 're_tools', 'token_dtls_tag', 're_token', 'effort', 'MediaUrl', 'remove_cache_ckpts', 'contents',
|
|
7
|
+
'stop_reason', 'mk_msg', 'FenceToolStop', 'extract_fence_call', 'split_tools', 'fmt2hist', 'mk_msgs',
|
|
8
|
+
'cite_footnote', 'postproc', 'lite_mk_func', 'ToolResponse', 'structured', 'StopResponse', 'FullResponse',
|
|
9
|
+
'search_count', 'UsageStats', 'AsyncChat', 'astream_with_complete', 'ChatCallback', 'DeepseekMsgsCallback',
|
|
10
10
|
'DeepseekPrefillCallback', 'add_warning', 'StopReasonCallback', 'run_fence_tool', 'FenceToolCallback',
|
|
11
|
-
'ToolReminderCallback', 'stop_sequences', 'StopSequencesCallback', 'mk_tr_details', '
|
|
12
|
-
'
|
|
11
|
+
'ToolReminderCallback', 'stop_sequences', 'StopSequencesCallback', 'mk_tr_details', 'StreamFormatter',
|
|
12
|
+
'AsyncStreamFormatter', 'adisplay_stream']
|
|
13
13
|
|
|
14
14
|
# %% ../nbs/07_chat.ipynb #d5a3bc1f
|
|
15
15
|
import asyncio, base64, json, mimetypes, random, string, ast, warnings
|
|
@@ -25,15 +25,30 @@ from dataclasses import dataclass
|
|
|
25
25
|
from .types import *
|
|
26
26
|
from .acomplete import *
|
|
27
27
|
|
|
28
|
-
# %% ../nbs/07_chat.ipynb #
|
|
28
|
+
# %% ../nbs/07_chat.ipynb #1b75c262
|
|
29
|
+
class MediaUrl(BasicRepr):
|
|
30
|
+
"Direct URL media reference"
|
|
31
|
+
def __init__(self, url, mime=None): self.url, self.mime = url, ifnone(mime, url_mime(url))
|
|
32
|
+
|
|
33
|
+
# %% ../nbs/07_chat.ipynb #eb557831
|
|
34
|
+
def _mime2part_type(mime):
|
|
35
|
+
"Map MIME string to canonical PartType"
|
|
36
|
+
if mime.startswith('image/'): return PartType.input_image
|
|
37
|
+
if mime.startswith('audio/'): return PartType.input_audio
|
|
38
|
+
if mime.startswith('video/'): return PartType.input_video
|
|
39
|
+
return PartType.input_file
|
|
40
|
+
|
|
29
41
|
def _bytes2content(data):
|
|
30
|
-
"Convert bytes to
|
|
42
|
+
"Convert bytes to fastllm canonical content"
|
|
31
43
|
mtype = detect_mime(data)
|
|
32
44
|
if not mtype: raise ValueError(f'Data must be a supported file type, got {data[:10]}')
|
|
33
|
-
encoded = base64.b64encode(data).decode("utf-8")
|
|
34
|
-
|
|
35
|
-
return Part(type=PartType.input_file, text=f'data:{mtype};base64,{encoded}')
|
|
45
|
+
encoded = base64.b64encode(data).decode("utf-8")
|
|
46
|
+
return Part(type=_mime2part_type(mtype), text=f'data:{mtype};base64,{encoded}')
|
|
36
47
|
|
|
48
|
+
def _url2content(o):
|
|
49
|
+
"Convert MediaUrl to fastllm canonical content"
|
|
50
|
+
mime = o.mime or url_mime(o.url)
|
|
51
|
+
return Part(type=_mime2part_type(mime), text=o.url, data=dict(mime=mime))
|
|
37
52
|
|
|
38
53
|
# %% ../nbs/07_chat.ipynb #48c78e48
|
|
39
54
|
def _add_cache_control(msg, # LiteLLM formatted msg
|
|
@@ -57,8 +72,9 @@ def remove_cache_ckpts(msg):
|
|
|
57
72
|
return msg
|
|
58
73
|
|
|
59
74
|
def _mk_content(o):
|
|
60
|
-
if isinstance(o, str):
|
|
61
|
-
elif isinstance(o,bytes):
|
|
75
|
+
if isinstance(o, str): return Part(type=PartType.text, text=o)
|
|
76
|
+
elif isinstance(o, bytes): return _bytes2content(o)
|
|
77
|
+
elif isinstance(o, MediaUrl): return _url2content(o)
|
|
62
78
|
return o
|
|
63
79
|
|
|
64
80
|
def contents(c):
|
|
@@ -427,7 +443,6 @@ class AsyncChat:
|
|
|
427
443
|
return self
|
|
428
444
|
|
|
429
445
|
# %% ../nbs/07_chat.ipynb #2e469ea1
|
|
430
|
-
def _srvtools(tcs): return L(tcs).filter(lambda o: o.server) if tcs else None
|
|
431
446
|
def _usrtools(tcs): return L(tcs).filter(lambda o: not o.server) if tcs else None
|
|
432
447
|
|
|
433
448
|
# %% ../nbs/07_chat.ipynb #19b87f53
|
|
@@ -511,8 +526,6 @@ async def _call(self:AsyncChat, msg=None, prefill=None, temp=None, think=None, s
|
|
|
511
526
|
|
|
512
527
|
self.toolloop, self.prompt, tmsg = False, None, None
|
|
513
528
|
async for o in self._call_cbs('before_tool_calls'): yield o
|
|
514
|
-
if stcs:= _srvtools(res.tool_calls):
|
|
515
|
-
for tc in stcs: yield tc
|
|
516
529
|
if tcs := _usrtools(res.tool_calls):
|
|
517
530
|
tres = await parallel_async(_alite_call_func, tcs, timeout=tc_timeout, n_workers=n_workers, pause=pause, **self.tcdict)
|
|
518
531
|
tmsg = mk_tool_res_msg(tcs, tres)
|
|
@@ -703,15 +716,9 @@ def _trunc_param(v, mx=40):
|
|
|
703
716
|
def _tc_summary(tr):
|
|
704
717
|
"Format tool call as func(params) → result string"
|
|
705
718
|
params = ', '.join(f"{k}={_trunc_param(v)}" for k,v in tr.data['arguments'].items())
|
|
706
|
-
res = f"→{_trunc_param(tr.text)}"
|
|
719
|
+
res = f"→{_trunc_param(tr.text)}" if tr.text else ''
|
|
707
720
|
return '<code>'+escape(f"{tr.data['name']}({params}){res}")+'</code>'
|
|
708
721
|
|
|
709
|
-
# %% ../nbs/07_chat.ipynb #91beb26c
|
|
710
|
-
def _srv_tc_summary(tc):
|
|
711
|
-
"Format tool call as func(params) → result string"
|
|
712
|
-
params = ', '.join(f"{k}={_trunc_param(v)}" for k,v in tc.arguments.items())
|
|
713
|
-
return '<code>'+escape(f"{tc.name}({params})")+'</code>'
|
|
714
|
-
|
|
715
722
|
# %% ../nbs/07_chat.ipynb #80f344cc
|
|
716
723
|
def _trunc_content(content, mx):
|
|
717
724
|
"Truncate tool result content, respecting '_full' flag"
|
|
@@ -722,23 +729,13 @@ def _trunc_content(content, mx):
|
|
|
722
729
|
def mk_tr_details(tr, mx=2000):
|
|
723
730
|
"Create <details> block for tool call as JSON"
|
|
724
731
|
args = {k:_trunc_str(v, mx=mx*5) for k,v in tr.data['arguments'].items()}
|
|
725
|
-
res = {'id':tr.data['id'], 'server':False,
|
|
732
|
+
res = {'id':tr.data['id'], 'server':tr.data.get('server', False),
|
|
726
733
|
'call':{'function': tr.data['name'], 'arguments': args},
|
|
727
734
|
'result':_trunc_content(tr.text, mx=mx),}
|
|
728
735
|
summ = f"<summary>{_tc_summary(tr)}</summary>"
|
|
729
736
|
return f"\n\n{tool_dtls_tag}\n{summ}\n\n```json\n{dumps(res, indent=2, ensure_ascii=False)}\n```\n\n</details>\n\n"
|
|
730
737
|
|
|
731
|
-
# %% ../nbs/07_chat.ipynb #3049001c
|
|
732
|
-
def mk_srv_tc_details(tc, mx=2000):
|
|
733
|
-
"Create <details> block for tool call as JSON"
|
|
734
|
-
args = {k:_trunc_str(v, mx=mx*5) for k,v in tc.arguments.items()}
|
|
735
|
-
res = {'id':tc.id, 'server':True, 'call':{'function': tc.name, 'arguments': args}, 'result':"Server tool call executed."}
|
|
736
|
-
summ = f"<summary>{_srv_tc_summary(tc)}</summary>"
|
|
737
|
-
return f"\n\n{tool_dtls_tag}\n{summ}\n\n```json\n{dumps(res, indent=2, ensure_ascii=False)}\n```\n\n</details>\n\n"
|
|
738
|
-
|
|
739
738
|
# %% ../nbs/07_chat.ipynb #f0d984ec
|
|
740
|
-
# status_re = re.compile(r'^- ⏳ <code>(.*)</code> ⏳$|^🧠+$', re.MULTILINE) # TODO: Need to yield tool calls as they are done collated in fastllm `_acollect_stream`
|
|
741
|
-
|
|
742
739
|
class StreamFormatter:
|
|
743
740
|
def __init__(self, mx=2000, debug=False, showthink=False):
|
|
744
741
|
self.outp,self.tcs = '',{}
|
|
@@ -754,8 +751,8 @@ class StreamFormatter:
|
|
|
754
751
|
res+= '🧠' if not self.outp or self.outp[-1]=='🧠' else '\n\n🧠'
|
|
755
752
|
elif self.outp and self.outp[-1] == '🧠': res+= '\n\n'
|
|
756
753
|
if txt:=o.get('text'): res+=f"\n\n{txt}" if res and res[-1] == '🧠' else txt
|
|
757
|
-
if isinstance(o,
|
|
758
|
-
res +=
|
|
754
|
+
if isinstance(o, Part) and o.type==PartType.tool_use:
|
|
755
|
+
res += f"\n- ⏳ {_tc_summary(o)} ⏳\n"
|
|
759
756
|
if isinstance(o, Part) and o.type == PartType.tool_result:
|
|
760
757
|
res += mk_tr_details(o,mx=self.mx)
|
|
761
758
|
self.outp+=res
|
|
@@ -105,14 +105,14 @@ async def mk_acollect_stream(it, index_fn, model=None, api_name=None, vendor_nam
|
|
|
105
105
|
idx,last_idx = index_fn(d, typ, last_typ, last_idx)
|
|
106
106
|
return idx
|
|
107
107
|
def _proc(d, name, pt=None, kw='txt', ret=None):
|
|
108
|
-
if not ret and not (val := getattr(d, name)): return
|
|
108
|
+
if not ret and not (val := getattr(d, name)): return None, None
|
|
109
109
|
idx = _fidx(d, name, pt)
|
|
110
110
|
part_accum.append(typ, idx, **(ret or {kw: val}))
|
|
111
|
-
return ret or {name: val}
|
|
111
|
+
return ret or {name: val}, idx
|
|
112
112
|
def _yield_parts(d):
|
|
113
113
|
for args in [('text',), ('thinking',), ('citations', 'text', 'citations')]:
|
|
114
|
-
|
|
115
|
-
|
|
114
|
+
r = _proc(d, args[0], pt=args[1] if len(args)>1 else None, kw=args[2] if len(args)>2 else 'txt')
|
|
115
|
+
if r[0]: yield r[0]
|
|
116
116
|
stop, stop_yielded = False, False
|
|
117
117
|
async for d in it:
|
|
118
118
|
# Check stop condition and yield stop delta
|
|
@@ -127,11 +127,26 @@ async def mk_acollect_stream(it, index_fn, model=None, api_name=None, vendor_nam
|
|
|
127
127
|
# Rest incl. tools, finish reason, usage is processed independently
|
|
128
128
|
for tc in d.tool_calls:
|
|
129
129
|
args = tc.arguments.get('_delta', tc.arguments)
|
|
130
|
-
_proc(d, 'tool_use', ret=dict(id=tc.id, name=tc.name, arguments=args, server=tc.server, extra=tc.extra))
|
|
130
|
+
_, idx = _proc(d, 'tool_use', ret=dict(id=tc.id, name=tc.name, arguments=args, server=tc.server, extra=tc.extra))
|
|
131
|
+
if (isinstance(args, str) and args.endswith('}')) or (isinstance(args, dict) and '_delta' not in tc.arguments): # tool call ready
|
|
132
|
+
if isinstance(args, str):
|
|
133
|
+
try: args = json.loads(part_accum.parts[idx].arguments) if args else {}
|
|
134
|
+
except json.JSONDecodeError: continue
|
|
135
|
+
acc = part_accum.parts[idx]
|
|
136
|
+
acc.arguments = args
|
|
137
|
+
data = {**acc.extra, 'id':acc.id, 'name':acc.name, 'arguments':args, 'server':acc.server}
|
|
138
|
+
yield Part(type=PartType.tool_use, data=data)
|
|
139
|
+
# Server tool results for anthropic are yielded in d.server_tool_result by checking injected dummy `_delta`
|
|
140
|
+
if acc.server and '_delta' not in tc.arguments: yield Part(type=PartType.tool_result, text="Server tool call executed.", data=data)
|
|
131
141
|
if d.server_tool_result:
|
|
132
142
|
idx = _fidx(d, 'server_tool_result')
|
|
133
143
|
part_accum.parts[idx] = Part(type=typ, data=d.server_tool_result)
|
|
134
|
-
|
|
144
|
+
srv_tc = next((p for p in reversed(list(part_accum.parts.values())) if isinstance(p, ToolCall) and p.server), None)
|
|
145
|
+
if srv_tc:
|
|
146
|
+
data = {**srv_tc.extra, 'id':srv_tc.id, 'name':srv_tc.name, 'arguments':srv_tc.arguments, 'server':True}
|
|
147
|
+
yield Part(type=PartType.tool_result, text="Server tool call executed.", data=data)
|
|
148
|
+
r = _proc(d, 'refusal')
|
|
149
|
+
if r[0]: yield r[0]
|
|
135
150
|
if d.finish_reason: fin = d.finish_reason
|
|
136
151
|
if d.usage: usg = d.usage
|
|
137
152
|
last_typ = typ
|
|
@@ -146,4 +161,3 @@ async def mk_acollect_stream(it, index_fn, model=None, api_name=None, vendor_nam
|
|
|
146
161
|
message=Msg(role="assistant", content=part_accum.parts),
|
|
147
162
|
finish_reason=fin, usage=usg, tool_calls=tcs, api_name=api_name, vendor_name=vendor_name,
|
|
148
163
|
raw={'deltas':deltas})
|
|
149
|
-
|
|
@@ -4,12 +4,14 @@
|
|
|
4
4
|
|
|
5
5
|
# %% auto #0
|
|
6
6
|
__all__ = ['PartType', 'FinishReason', 'api_registry', 'model_prices_url', 'haik45', 'sonn45', 'sonn', 'sonn46', 'opus46', 'opus',
|
|
7
|
-
'gpt54', 'gpt54m', '
|
|
8
|
-
'
|
|
9
|
-
'
|
|
10
|
-
'
|
|
7
|
+
'gpt54', 'gpt54m', 'gpt55', 'codex54', 'codex54m', 'codex55', 'codex53spark', 'model_info_registry',
|
|
8
|
+
'deepseek_v4_common', 'codex_pricing', 'Part', 'Msg', 'ToolCall', 'display_list', 'Usage', 'Completion',
|
|
9
|
+
'APIRegistry', 'mk_completion', 'mk_tool_res_msg', 'fn_schema', 'sys_text', 'part_txt', 'data_url',
|
|
10
|
+
'url_mime', 'payload_kwargs', 'get_api_key', 'model_prices_meta', 'infer_api_name', 'get_model_meta',
|
|
11
|
+
'register_model_info', 'get_model_info', 'get_model_pricing', 'approx_pricing']
|
|
11
12
|
|
|
12
13
|
# %% ../nbs/00_types.ipynb #b4d047fd
|
|
14
|
+
import httpx
|
|
13
15
|
from dataclasses import dataclass, field
|
|
14
16
|
from fastcore.net import urljson
|
|
15
17
|
from fastcore.utils import *
|
|
@@ -27,14 +29,16 @@ PartType = str_enum('PartType', 'text', 'thinking', 'refusal', 'tool_use', 'serv
|
|
|
27
29
|
'input_image', 'input_audio', 'input_video', 'input_file')
|
|
28
30
|
|
|
29
31
|
# %% ../nbs/00_types.ipynb #2eeff103
|
|
30
|
-
def _trunc_strs(
|
|
31
|
-
"
|
|
32
|
-
if not
|
|
33
|
-
|
|
32
|
+
def _trunc_strs(o, n=200):
|
|
33
|
+
"Truncate str or dict"
|
|
34
|
+
if not o: return o
|
|
35
|
+
if isinstance(o,str) and len(o)>n: return o[:100]+'...'
|
|
36
|
+
if isinstance(o,dict): return {k: (v[:100]+'...' if isinstance(v,str) and len(v)>n else v) for k,v in o.items()}
|
|
37
|
+
return o
|
|
34
38
|
|
|
35
39
|
@patch
|
|
36
40
|
def _repr_markdown_(self: Part):
|
|
37
|
-
body = self.text if self.text else ''
|
|
41
|
+
body = _trunc_strs(self.text) if self.text else ''
|
|
38
42
|
data = _trunc_strs(self.data)
|
|
39
43
|
return f"""**Part** (`{self.type}`)
|
|
40
44
|
|
|
@@ -197,7 +201,17 @@ def sys_text(system):
|
|
|
197
201
|
|
|
198
202
|
def part_txt(p): return p.text if isinstance(p,Part) else p
|
|
199
203
|
|
|
200
|
-
# %% ../nbs/00_types.ipynb #
|
|
204
|
+
# %% ../nbs/00_types.ipynb #f3deb055
|
|
205
|
+
@flexicache(time_policy(24*3600))
|
|
206
|
+
def _fetch_url_partial(url, nbytes=512):
|
|
207
|
+
"Fetch remote media bytes, optionally only first `nbytes`."
|
|
208
|
+
try:
|
|
209
|
+
with httpx.stream('GET', url, headers={'Range': f'bytes=0-{nbytes-1}'}, follow_redirects=True) as r:
|
|
210
|
+
if r.status_code not in (200, 206): return
|
|
211
|
+
return r.read()
|
|
212
|
+
except (httpx.HTTPError, httpx.InvalidURL): return
|
|
213
|
+
|
|
214
|
+
# %% ../nbs/00_types.ipynb #70a9a0c3
|
|
201
215
|
_ext_mime = {
|
|
202
216
|
'.jpg':'image/jpeg', '.jpeg':'image/jpeg', '.png':'image/png', '.gif':'image/gif', '.webp':'image/webp',
|
|
203
217
|
'.pdf':'application/pdf',
|
|
@@ -213,22 +227,24 @@ def data_url(url):
|
|
|
213
227
|
return header[5:].split(';',1)[0].strip() or 'application/octet-stream', body
|
|
214
228
|
|
|
215
229
|
def url_mime(url, default='application/octet-stream'):
|
|
216
|
-
"Guess mime from URL extension."
|
|
230
|
+
"Guess mime from URL extension, and optional bytes fallback."
|
|
231
|
+
if "youtube.com" in url or "youtu.be" in url: return "video/mp4"
|
|
217
232
|
ext = '.' + url.rsplit('.', 1)[-1].split('?')[0].lower() if '.' in url.split('?')[0].split('/')[-1] else ''
|
|
218
|
-
|
|
233
|
+
if (mime:=_ext_mime.get(ext)) is None: return detect_mime(_fetch_url_partial(url))
|
|
234
|
+
return ifnone(mime, default)
|
|
219
235
|
|
|
220
236
|
# %% ../nbs/00_types.ipynb #28c698fe
|
|
221
|
-
def payload_kwargs(msgs, model, stream=False, system=None, max_tokens=None, temperature=None, tools=None, tool_choice=None, reasoning_effort=None, web_search_options=None, stop_callables=None
|
|
237
|
+
def payload_kwargs(msgs, model, stream=False, system=None, max_tokens=None, temperature=None, tools=None, tool_choice=None, reasoning_effort=None, web_search_options=None, stop_callables=None): pass
|
|
222
238
|
|
|
223
239
|
# %% ../nbs/00_types.ipynb #c2a2cb49
|
|
224
240
|
def get_api_key(api_key, default):
|
|
225
|
-
err = ValueError(f"Missing API key: make sure to have the expected env var name or pass `api_key`")
|
|
226
241
|
key = api_key or os.getenv(default)
|
|
227
|
-
if not key: raise
|
|
242
|
+
if not key: raise ValueError(f"Missing API key: set environment variable '{default}' or pass `api_key` parameter")
|
|
228
243
|
return key
|
|
229
244
|
|
|
230
245
|
# %% ../nbs/00_types.ipynb #852adecd
|
|
231
246
|
model_prices_url = 'https://raw.githubusercontent.com/BerriAI/litellm/main/model_prices_and_context_window.json'
|
|
247
|
+
|
|
232
248
|
@flexicache(time_policy(24*60*60))
|
|
233
249
|
def model_prices_meta(): return urljson(model_prices_url)
|
|
234
250
|
|
|
@@ -258,63 +274,94 @@ opus46 = "claude-opus-4-6"
|
|
|
258
274
|
opus = "claude-opus-4-7"
|
|
259
275
|
gpt54 = "gpt-5.4"
|
|
260
276
|
gpt54m = "gpt-5.4-mini"
|
|
277
|
+
gpt55 = "gpt-5.5"
|
|
261
278
|
codex54 = "gpt-5.4"
|
|
279
|
+
codex54m = "gpt-5.4-mini"
|
|
262
280
|
codex55 = "gpt-5.5"
|
|
263
281
|
codex53spark = "gpt-5.3-codex-spark"
|
|
264
282
|
|
|
265
|
-
# %% ../nbs/00_types.ipynb #
|
|
266
|
-
|
|
267
|
-
"input_cost_per_token": 0.10 / 1_000_000,
|
|
268
|
-
"cache_creation_input_token_cost": 0.10 / 1_000_000,
|
|
269
|
-
"cache_read_input_token_cost": 0.10 / 1_000_000,
|
|
270
|
-
"output_cost_per_token": 0.50 / 1_000_000,
|
|
271
|
-
}
|
|
283
|
+
# %% ../nbs/00_types.ipynb #583e017b
|
|
284
|
+
model_info_registry = {}
|
|
272
285
|
|
|
273
|
-
|
|
274
|
-
|
|
275
|
-
|
|
276
|
-
|
|
277
|
-
|
|
286
|
+
def register_model_info(model, vendor_name=None, base=None, base_vendor_name=None, **overrides):
|
|
287
|
+
"Register model metadata, optionally starting from `base`."
|
|
288
|
+
info = dict(get_model_info(base, base_vendor_name or vendor_name)) if base else {}
|
|
289
|
+
info.update(overrides)
|
|
290
|
+
model_info_registry[vendor_name, model] = info
|
|
278
291
|
|
|
279
|
-
|
|
280
|
-
|
|
281
|
-
info = get_model_meta(mn, 'chatgpt' if vendor_name=='codex' else vendor_name)
|
|
282
|
-
# anthropic web search
|
|
292
|
+
def get_model_info(mn, vendor_name=None):
|
|
293
|
+
info = model_info_registry.get((vendor_name, mn)) or get_model_meta(mn, vendor_name)
|
|
283
294
|
if 'search_context_cost_per_query' in info: info['supports_web_search'] = True
|
|
284
|
-
# kimi
|
|
285
|
-
if 'kimi' in mn:
|
|
286
|
-
if 'k2p6' in mn: info = get_model_meta(mn.replace('k2p6', 'k2p5'), vendor_name)
|
|
287
|
-
info['supports_reasoning'] = True
|
|
288
|
-
info['supports_vision'] = True
|
|
289
|
-
if vendor_name == 'moonshot': info['supports_assistant_prefill'] = True
|
|
290
|
-
# gpt web search
|
|
291
|
-
if mn in ("gpt-5.4", "gpt-5.4-mini"):
|
|
292
|
-
info['supports_web_search'] = True
|
|
293
|
-
info.pop('mode', None)
|
|
294
|
-
# codex updates
|
|
295
|
-
if vendor_name == 'codex':
|
|
296
|
-
info = merge(info, codex_pricing)
|
|
297
|
-
info |= _codex_overrides.get(mn, {})
|
|
298
|
-
# deepseek v4
|
|
299
|
-
if vendor_name == 'deepseek' and mn in ("deepseek-v4-flash", "deepseek-v4-pro"):
|
|
300
|
-
info = dict(get_model_meta("deepseek/deepseek-v3.2"))
|
|
301
|
-
info |= dict(supports_assistant_prefill=True, supports_function_calling=True, supports_prompt_caching=True,
|
|
302
|
-
supports_reasoning=True, supports_tool_choice=True)
|
|
303
|
-
info.update(input_cost_per_token=1.4e-07, input_cost_per_token_cache_hit=2.8e-09, output_cost_per_token=2.8e-07,
|
|
304
|
-
max_input_tokens=1048576, max_output_tokens=393216, max_tokens=393216)
|
|
305
|
-
if 'pro' in mn: info = {**info, 'input_cost_per_token': 4.35e-07, 'input_cost_per_token_cache_hit': 3.625e-09, 'output_cost_per_token': 8.7e-07}
|
|
306
|
-
# qwen 3p6
|
|
307
|
-
if vendor_name == 'fireworks_ai' and mn == 'accounts/fireworks/models/qwen3p6-plus':
|
|
308
|
-
info = dict(supports_vision=True, supports_reasoning=True, supports_function_calling=True, supports_tool_choice=True,
|
|
309
|
-
supports_system_messages=True, supports_response_schema=True, supports_parallel_function_calling=True,
|
|
310
|
-
supports_prompt_caching=True, supports_native_streaming=True, supports_native_structured_output=True,
|
|
311
|
-
max_tokens=1000000, max_input_tokens=1000000, max_output_tokens=65536,
|
|
312
|
-
input_cost_per_token=0.5e-6, cache_read_input_token_cost=0.1e-6, output_cost_per_token=3.0e-6)
|
|
313
|
-
|
|
314
|
-
# unresolved models
|
|
315
|
-
if not info and not strict: info = info | codex_pricing
|
|
316
295
|
return dict2obj(info)
|
|
317
296
|
|
|
297
|
+
# %% ../nbs/00_types.ipynb #8261dcd0
|
|
298
|
+
register_model_info('accounts/fireworks/models/qwen3p6-plus', vendor_name='fireworks_ai',
|
|
299
|
+
supports_vision=True, supports_reasoning=True, supports_function_calling=True, supports_tool_choice=True,
|
|
300
|
+
supports_system_messages=True, supports_response_schema=True, supports_parallel_function_calling=True,
|
|
301
|
+
supports_prompt_caching=True, supports_native_streaming=True, supports_native_structured_output=True,
|
|
302
|
+
max_tokens=1000000, max_input_tokens=1000000, max_output_tokens=65536,
|
|
303
|
+
input_cost_per_token=0.5e-6, cache_read_input_token_cost=0.1e-6, output_cost_per_token=3.0e-6)
|
|
304
|
+
|
|
305
|
+
register_model_info('gemini-3.5-flash', vendor_name='gemini', base='gemini-3-flash-preview',
|
|
306
|
+
input_cost_per_token=1.5e-6, output_cost_per_token=9e-6,
|
|
307
|
+
output_cost_per_reasoning_token=9e-6, cache_read_input_token_cost=1.5e-7)
|
|
308
|
+
|
|
309
|
+
for model in ('gpt-5.4', 'gpt-5.4-mini'):
|
|
310
|
+
register_model_info(model, vendor_name='openai', base=model, supports_web_search=True, mode=None)
|
|
311
|
+
|
|
312
|
+
for model in ('kimi-k2.5', 'kimi-k2.6'):
|
|
313
|
+
register_model_info(model, vendor_name='moonshot', base=f'moonshot/{model}', base_vendor_name=None,
|
|
314
|
+
supports_reasoning=True, supports_vision=True, supports_assistant_prefill=True)
|
|
315
|
+
|
|
316
|
+
register_model_info('gemini-3.1-flash-lite', vendor_name='gemini', base='gemini-3.1-flash-lite-preview')
|
|
317
|
+
register_model_info('models/gemini-3.1-flash-lite', vendor_name='gemini', base='gemini-3.1-flash-lite-preview')
|
|
318
|
+
|
|
319
|
+
for model in ('accounts/fireworks/models/kimi-k2p5', 'accounts/fireworks/models/kimi-k2p6'):
|
|
320
|
+
register_model_info(model, vendor_name='fireworks_ai', base=model.replace('k2p6', 'k2p5'),
|
|
321
|
+
supports_reasoning=True, supports_vision=True,
|
|
322
|
+
input_cost_per_token=0.95e-6, cache_read_input_token_cost=0.16e-6, output_cost_per_token=4.0e-6)
|
|
323
|
+
|
|
324
|
+
# %% ../nbs/00_types.ipynb #948d55d0
|
|
325
|
+
deepseek_v4_common = dict(
|
|
326
|
+
supports_assistant_prefill=True, supports_function_calling=True, supports_prompt_caching=True,
|
|
327
|
+
supports_reasoning=True, supports_tool_choice=True,
|
|
328
|
+
max_input_tokens=1048576, max_output_tokens=393216, max_tokens=393216)
|
|
329
|
+
|
|
330
|
+
register_model_info('deepseek-v4-flash', vendor_name='deepseek', base='deepseek/deepseek-v3.2', **deepseek_v4_common,
|
|
331
|
+
input_cost_per_token=1.4e-07, input_cost_per_token_cache_hit=2.8e-09,
|
|
332
|
+
output_cost_per_token=2.8e-07, cache_read_input_token_cost=1.4e-07/10)
|
|
333
|
+
register_model_info('deepseek-v4-pro', vendor_name='deepseek', base='deepseek/deepseek-v3.2', **deepseek_v4_common,
|
|
334
|
+
input_cost_per_token=4.35e-07, input_cost_per_token_cache_hit=3.625e-09,
|
|
335
|
+
output_cost_per_token=8.7e-07, cache_read_input_token_cost=4.35e-07/10)
|
|
336
|
+
|
|
337
|
+
# %% ../nbs/00_types.ipynb #2c23d11e
|
|
338
|
+
codex_pricing = dict(
|
|
339
|
+
input_cost_per_token = 0.10/1_000_000, output_cost_per_token = 0.50/1_000_000,
|
|
340
|
+
cache_creation_input_token_cost = 0.10/1_000_000, cache_read_input_token_cost = 0.10/1_000_000)
|
|
341
|
+
|
|
342
|
+
for model in (codex54, codex54m, codex55):
|
|
343
|
+
register_model_info(model, 'codex', base=model, base_vendor_name='chatgpt', supports_web_search=True, **codex_pricing)
|
|
344
|
+
|
|
345
|
+
register_model_info(codex53spark, 'codex', **codex_pricing,
|
|
346
|
+
supports_vision=False, supports_image_input=False, supports_web_search=True, supports_reasoning=True,
|
|
347
|
+
max_tokens=128000, max_input_tokens=128000, max_output_tokens=128000)
|
|
348
|
+
|
|
349
|
+
|
|
350
|
+
# %% ../nbs/00_types.ipynb #24cc47ec
|
|
351
|
+
def get_model_pricing(mn, vendor_name, million=True):
|
|
352
|
+
return {k:round(v * (1e6 if million else 1), 6)
|
|
353
|
+
for k,v in get_model_info(mn, vendor_name).items()
|
|
354
|
+
if 'cost' in k and isinstance(v,float) and 'priority' not in k}
|
|
355
|
+
|
|
356
|
+
# %% ../nbs/00_types.ipynb #79304cd9
|
|
357
|
+
def approx_pricing(nm, vendor_name, out=10, cache=80, inp=10, markup=0):
|
|
358
|
+
"Approx cost per million tokens with given output/cache/input proportions"
|
|
359
|
+
p = get_model_pricing(nm, vendor_name)
|
|
360
|
+
ic = p.get('cache_creation_input_token_cost', p['input_cost_per_token'])
|
|
361
|
+
res = (p['output_cost_per_token']*out + p['cache_read_input_token_cost']*cache + ic*inp) / (out+cache+inp)
|
|
362
|
+
if nm=='claude-opus-4-7': res *= 1.5
|
|
363
|
+
return res*(1+markup)
|
|
364
|
+
|
|
318
365
|
# %% ../nbs/00_types.ipynb #8bfca02d
|
|
319
366
|
@patch(as_prop=True)
|
|
320
367
|
def cost(self:Completion):
|
|
@@ -1 +0,0 @@
|
|
|
1
|
-
__version__ = "0.0.8"
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|