python-fastllm 0.0.9__tar.gz → 0.0.11__tar.gz
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- {python_fastllm-0.0.9 → python_fastllm-0.0.11}/PKG-INFO +1 -1
- python_fastllm-0.0.11/fastllm/__init__.py +1 -0
- {python_fastllm-0.0.9 → python_fastllm-0.0.11}/fastllm/_modidx.py +8 -3
- {python_fastllm-0.0.9 → python_fastllm-0.0.11}/fastllm/acomplete.py +41 -11
- {python_fastllm-0.0.9 → python_fastllm-0.0.11}/fastllm/anthropic.py +4 -2
- {python_fastllm-0.0.9 → python_fastllm-0.0.11}/fastllm/chat.py +36 -38
- {python_fastllm-0.0.9 → python_fastllm-0.0.11}/fastllm/openai_chat.py +1 -1
- {python_fastllm-0.0.9 → python_fastllm-0.0.11}/fastllm/streaming.py +22 -8
- {python_fastllm-0.0.9 → python_fastllm-0.0.11}/fastllm/types.py +56 -29
- {python_fastllm-0.0.9 → python_fastllm-0.0.11}/python_fastllm.egg-info/PKG-INFO +1 -1
- python_fastllm-0.0.9/fastllm/__init__.py +0 -1
- {python_fastllm-0.0.9 → python_fastllm-0.0.11}/README.md +0 -0
- {python_fastllm-0.0.9 → python_fastllm-0.0.11}/fastllm/codex.py +0 -0
- {python_fastllm-0.0.9 → python_fastllm-0.0.11}/fastllm/gemini.py +0 -0
- {python_fastllm-0.0.9 → python_fastllm-0.0.11}/fastllm/openai_responses.py +0 -0
- {python_fastllm-0.0.9 → python_fastllm-0.0.11}/fastllm/specs/anthropic.json +0 -0
- {python_fastllm-0.0.9 → python_fastllm-0.0.11}/fastllm/specs/anthropic.yml +0 -0
- {python_fastllm-0.0.9 → python_fastllm-0.0.11}/fastllm/specs/gemini.json +0 -0
- {python_fastllm-0.0.9 → python_fastllm-0.0.11}/fastllm/specs/openai.with-code-samples.json +0 -0
- {python_fastllm-0.0.9 → python_fastllm-0.0.11}/fastllm/specs/openai.with-code-samples.yml +0 -0
- {python_fastllm-0.0.9 → python_fastllm-0.0.11}/fastllm/specs/spec_manifest.json +0 -0
- {python_fastllm-0.0.9 → python_fastllm-0.0.11}/pyproject.toml +0 -0
- {python_fastllm-0.0.9 → python_fastllm-0.0.11}/python_fastllm.egg-info/SOURCES.txt +0 -0
- {python_fastllm-0.0.9 → python_fastllm-0.0.11}/python_fastllm.egg-info/dependency_links.txt +0 -0
- {python_fastllm-0.0.9 → python_fastllm-0.0.11}/python_fastllm.egg-info/entry_points.txt +0 -0
- {python_fastllm-0.0.9 → python_fastllm-0.0.11}/python_fastllm.egg-info/requires.txt +0 -0
- {python_fastllm-0.0.9 → python_fastllm-0.0.11}/python_fastllm.egg-info/top_level.txt +0 -0
- {python_fastllm-0.0.9 → python_fastllm-0.0.11}/setup.cfg +0 -0
|
@@ -0,0 +1 @@
|
|
|
1
|
+
__version__ = "0.0.11"
|
|
@@ -12,6 +12,9 @@ d = { 'settings': { 'branch': 'main',
|
|
|
12
12
|
'fastllm/acomplete.py'),
|
|
13
13
|
'fastllm.acomplete._debug_print': ('acomplete.html#_debug_print', 'fastllm/acomplete.py'),
|
|
14
14
|
'fastllm.acomplete._is_ctx_exceeded': ('acomplete.html#_is_ctx_exceeded', 'fastllm/acomplete.py'),
|
|
15
|
+
'fastllm.acomplete._raise_if_done': ('acomplete.html#_raise_if_done', 'fastllm/acomplete.py'),
|
|
16
|
+
'fastllm.acomplete._retry_call': ('acomplete.html#_retry_call', 'fastllm/acomplete.py'),
|
|
17
|
+
'fastllm.acomplete._retry_stream': ('acomplete.html#_retry_stream', 'fastllm/acomplete.py'),
|
|
15
18
|
'fastllm.acomplete.acomplete': ('acomplete.html#acomplete', 'fastllm/acomplete.py'),
|
|
16
19
|
'fastllm.acomplete.mk_client': ('acomplete.html#mk_client', 'fastllm/acomplete.py')},
|
|
17
20
|
'fastllm.anthropic': { 'fastllm.anthropic._ant_cc': ('anthropic.html#_ant_cc', 'fastllm/anthropic.py'),
|
|
@@ -74,6 +77,8 @@ d = { 'settings': { 'branch': 'main',
|
|
|
74
77
|
'fastllm.chat.FenceToolStop.__call__': ('chat.html#fencetoolstop.__call__', 'fastllm/chat.py'),
|
|
75
78
|
'fastllm.chat.FenceToolStop.__init__': ('chat.html#fencetoolstop.__init__', 'fastllm/chat.py'),
|
|
76
79
|
'fastllm.chat.FullResponse': ('chat.html#fullresponse', 'fastllm/chat.py'),
|
|
80
|
+
'fastllm.chat.MediaUrl': ('chat.html#mediaurl', 'fastllm/chat.py'),
|
|
81
|
+
'fastllm.chat.MediaUrl.__init__': ('chat.html#mediaurl.__init__', 'fastllm/chat.py'),
|
|
77
82
|
'fastllm.chat.Msg.text': ('chat.html#msg.text', 'fastllm/chat.py'),
|
|
78
83
|
'fastllm.chat.StopReasonCallback': ('chat.html#stopreasoncallback', 'fastllm/chat.py'),
|
|
79
84
|
'fastllm.chat.StopReasonCallback.after_acomplete': ( 'chat.html#stopreasoncallback.after_acomplete',
|
|
@@ -113,19 +118,19 @@ d = { 'settings': { 'branch': 'main',
|
|
|
113
118
|
'fastllm.chat._has_stop': ('chat.html#_has_stop', 'fastllm/chat.py'),
|
|
114
119
|
'fastllm.chat._inject_tool_reminder': ('chat.html#_inject_tool_reminder', 'fastllm/chat.py'),
|
|
115
120
|
'fastllm.chat._lite_call_func': ('chat.html#_lite_call_func', 'fastllm/chat.py'),
|
|
121
|
+
'fastllm.chat._mime2part_type': ('chat.html#_mime2part_type', 'fastllm/chat.py'),
|
|
116
122
|
'fastllm.chat._mk_content': ('chat.html#_mk_content', 'fastllm/chat.py'),
|
|
117
123
|
'fastllm.chat._mk_prefill': ('chat.html#_mk_prefill', 'fastllm/chat.py'),
|
|
118
124
|
'fastllm.chat._mk_result_fence': ('chat.html#_mk_result_fence', 'fastllm/chat.py'),
|
|
119
125
|
'fastllm.chat._mk_tool_result': ('chat.html#_mk_tool_result', 'fastllm/chat.py'),
|
|
120
126
|
'fastllm.chat._split_fence_msgs': ('chat.html#_split_fence_msgs', 'fastllm/chat.py'),
|
|
121
127
|
'fastllm.chat._split_msg_on_fences': ('chat.html#_split_msg_on_fences', 'fastllm/chat.py'),
|
|
122
|
-
'fastllm.chat._srv_tc_summary': ('chat.html#_srv_tc_summary', 'fastllm/chat.py'),
|
|
123
|
-
'fastllm.chat._srvtools': ('chat.html#_srvtools', 'fastllm/chat.py'),
|
|
124
128
|
'fastllm.chat._tc_summary': ('chat.html#_tc_summary', 'fastllm/chat.py'),
|
|
125
129
|
'fastllm.chat._think_kw': ('chat.html#_think_kw', 'fastllm/chat.py'),
|
|
126
130
|
'fastllm.chat._trunc_content': ('chat.html#_trunc_content', 'fastllm/chat.py'),
|
|
127
131
|
'fastllm.chat._trunc_param': ('chat.html#_trunc_param', 'fastllm/chat.py'),
|
|
128
132
|
'fastllm.chat._trunc_str': ('chat.html#_trunc_str', 'fastllm/chat.py'),
|
|
133
|
+
'fastllm.chat._url2content': ('chat.html#_url2content', 'fastllm/chat.py'),
|
|
129
134
|
'fastllm.chat._usrtools': ('chat.html#_usrtools', 'fastllm/chat.py'),
|
|
130
135
|
'fastllm.chat.add_warning': ('chat.html#add_warning', 'fastllm/chat.py'),
|
|
131
136
|
'fastllm.chat.adisplay_stream': ('chat.html#adisplay_stream', 'fastllm/chat.py'),
|
|
@@ -137,7 +142,6 @@ d = { 'settings': { 'branch': 'main',
|
|
|
137
142
|
'fastllm.chat.lite_mk_func': ('chat.html#lite_mk_func', 'fastllm/chat.py'),
|
|
138
143
|
'fastllm.chat.mk_msg': ('chat.html#mk_msg', 'fastllm/chat.py'),
|
|
139
144
|
'fastllm.chat.mk_msgs': ('chat.html#mk_msgs', 'fastllm/chat.py'),
|
|
140
|
-
'fastllm.chat.mk_srv_tc_details': ('chat.html#mk_srv_tc_details', 'fastllm/chat.py'),
|
|
141
145
|
'fastllm.chat.mk_tr_details': ('chat.html#mk_tr_details', 'fastllm/chat.py'),
|
|
142
146
|
'fastllm.chat.postproc': ('chat.html#postproc', 'fastllm/chat.py'),
|
|
143
147
|
'fastllm.chat.remove_cache_ckpts': ('chat.html#remove_cache_ckpts', 'fastllm/chat.py'),
|
|
@@ -270,6 +274,7 @@ d = { 'settings': { 'branch': 'main',
|
|
|
270
274
|
'fastllm.types.ToolCall': ('types.html#toolcall', 'fastllm/types.py'),
|
|
271
275
|
'fastllm.types.ToolCall._repr_markdown_': ('types.html#toolcall._repr_markdown_', 'fastllm/types.py'),
|
|
272
276
|
'fastllm.types.Usage': ('types.html#usage', 'fastllm/types.py'),
|
|
277
|
+
'fastllm.types._fetch_url_partial': ('types.html#_fetch_url_partial', 'fastllm/types.py'),
|
|
273
278
|
'fastllm.types._trunc_strs': ('types.html#_trunc_strs', 'fastllm/types.py'),
|
|
274
279
|
'fastllm.types.approx_pricing': ('types.html#approx_pricing', 'fastllm/types.py'),
|
|
275
280
|
'fastllm.types.data_url': ('types.html#data_url', 'fastllm/types.py'),
|
|
@@ -7,7 +7,7 @@ __all__ = ['specs_path', 'ant_spec', 'oai_spec', 'gem_spec', 'vendor_mapping', '
|
|
|
7
7
|
'ContextWindowExceededError', 'acomplete']
|
|
8
8
|
|
|
9
9
|
# %% ../nbs/06_acomplete.ipynb #f2f57253
|
|
10
|
-
import json
|
|
10
|
+
import asyncio,json,httpx
|
|
11
11
|
from importlib.resources import files
|
|
12
12
|
from fastcore.utils import *
|
|
13
13
|
from fastcore.meta import *
|
|
@@ -38,6 +38,7 @@ vendor_mapping = {
|
|
|
38
38
|
"codex": ('openai', 'https://chatgpt.com/backend-api/codex', 'CODEX_AUTH_TOKEN', _codex_json),
|
|
39
39
|
"moonshot": ('openai_chat', "https://api.moonshot.ai/v1", "MOONSHOT_API_KEY"),
|
|
40
40
|
"deepseek": ('openai_chat', "https://api.deepseek.com/v1", "DEEPSEEK_API_KEY"),
|
|
41
|
+
"mimo": ('openai_chat', "https://api.xiaomimimo.com/v1", "MIMO_API_KEY"),
|
|
41
42
|
"openrouter": ('openai_chat', "https://openrouter.ai/api/v1", "OPENROUTER_API_KEY"),
|
|
42
43
|
"together": ('openai_chat', "https://api.together.xyz/v1", "TOGETHER_API_KEY"),
|
|
43
44
|
"fireworks_ai": ('openai_chat', "https://api.fireworks.ai/inference/v1", "FIREWORKS_API_KEY"),
|
|
@@ -49,7 +50,8 @@ api2spec = {'openai':oai_spec, 'openai_chat':oai_spec, 'anthropic':ant_spec, 'ge
|
|
|
49
50
|
|
|
50
51
|
# %% ../nbs/06_acomplete.ipynb #79075d95
|
|
51
52
|
@flexicache()
|
|
52
|
-
def mk_client(model=None, vendor_name=None, api_name=None, api_key=None, base_url=None, xtra_hdrs=None
|
|
53
|
+
def mk_client(model=None, vendor_name=None, api_name=None, api_key=None, base_url=None, xtra_hdrs=None,
|
|
54
|
+
timeout=httpx.Timeout(connect=30, read=300, write=30, pool=10)):
|
|
53
55
|
err_msg = f"please pass a valid one vendor: {', '.join(list(vendor_mapping))} or pass `api_name`,`base_url` and `api_key`"
|
|
54
56
|
if vendor_name:
|
|
55
57
|
override_base_url = base_url
|
|
@@ -67,7 +69,7 @@ def mk_client(model=None, vendor_name=None, api_name=None, api_key=None, base_ur
|
|
|
67
69
|
else: raise ValueError(f"Model {model} can't be auto resolved, {err_msg}")
|
|
68
70
|
api = api_registry.apis[api_name]
|
|
69
71
|
spec, hdrs = api2spec[api_name], api.get_hdrs(api_key)
|
|
70
|
-
cli = OpenAPIClient(spec, headers=merge(hdrs, ifnone(xtra_hdrs, {})))
|
|
72
|
+
cli = OpenAPIClient(spec, headers=merge(hdrs, ifnone(xtra_hdrs, {})), timeout=timeout)
|
|
71
73
|
if base_url is not None:
|
|
72
74
|
for op in cli.ops: op.base_url = base_url # pyright: ignore[reportAttributeAccessIssue]
|
|
73
75
|
return cli, api_name, vendor_name
|
|
@@ -81,7 +83,7 @@ def _is_ctx_exceeded(code, msg):
|
|
|
81
83
|
if str(code or "").lower() == "context_length_exceeded": return True
|
|
82
84
|
return any(s in m for s in ("exceed context limit", "maximum context length", "maximum context limit",
|
|
83
85
|
"longer than the model's context length", "input tokens exceed the configured limit",
|
|
84
|
-
"exceeds the maximum number of tokens allowed", "prompt is too long"))
|
|
86
|
+
"exceeds the maximum number of tokens allowed", "prompt is too long", "exceeds the context window"))
|
|
85
87
|
|
|
86
88
|
def _classify_error(exc):
|
|
87
89
|
"Upgrade generic `APIError` to a specific subclass if applicable."
|
|
@@ -113,14 +115,36 @@ def _debug_print(model, api_name, vendor_name, payload, func):
|
|
|
113
115
|
print(f"\033[1;33mpayload:\033[0m\n{pformat(p, width=120, sort_dicts=False)}")
|
|
114
116
|
print('━'*60)
|
|
115
117
|
|
|
118
|
+
# %% ../nbs/06_acomplete.ipynb #497c8565
|
|
119
|
+
async def _raise_if_done(e, n, retries, retry_delay, yielded=False):
|
|
120
|
+
e = _classify_error(e)
|
|
121
|
+
if yielded or not e.retryable or n == retries: raise e
|
|
122
|
+
await asyncio.sleep(retry_delay*2**n)
|
|
123
|
+
|
|
124
|
+
async def _retry_call(f, retries=2, retry_delay=0.5):
|
|
125
|
+
for n in range(retries+1):
|
|
126
|
+
try: return await f()
|
|
127
|
+
except APIError as e: await _raise_if_done(e, n, retries, retry_delay)
|
|
128
|
+
|
|
129
|
+
async def _retry_stream(mk_gen, retries=2, retry_delay=0.5):
|
|
130
|
+
for n in range(retries+1):
|
|
131
|
+
yielded = False
|
|
132
|
+
try:
|
|
133
|
+
async for o in mk_gen():
|
|
134
|
+
yielded = True
|
|
135
|
+
yield o
|
|
136
|
+
return
|
|
137
|
+
except APIError as e: await _raise_if_done(e, n, retries, retry_delay, yielded=yielded)
|
|
138
|
+
|
|
116
139
|
# %% ../nbs/06_acomplete.ipynb #2379ec94
|
|
117
140
|
@delegates(payload_kwargs)
|
|
118
|
-
async def acomplete(msgs, model, api_name=None, vendor_name=None, api_key=None,
|
|
119
|
-
|
|
141
|
+
async def acomplete(msgs, model, api_name=None, vendor_name=None, api_key=None,
|
|
142
|
+
base_url=None, xtra_body=None, xtra_hdrs=None, stream=False,
|
|
143
|
+
stop_callables=None, retries=2, retry_delay=0.5, **kwargs):
|
|
120
144
|
"Unified completion across different APIs."
|
|
121
145
|
cli, api_name, vendor_name = mk_client(model, vendor_name, api_name, api_key, base_url, xtra_hdrs)
|
|
122
146
|
api = api_registry.apis[api_name]
|
|
123
|
-
payload = api.mk_payload(msgs, model, stream=stream,
|
|
147
|
+
payload = api.mk_payload(msgs, model, stream=stream, **kwargs)
|
|
124
148
|
payload = merge(payload, ifnone(xtra_body, {}))
|
|
125
149
|
if vendor_name == 'codex':
|
|
126
150
|
for k in 'temperature max_tokens max_output_tokens max_completion_tokens metadata'.split(): payload.pop(k, None)
|
|
@@ -130,7 +154,13 @@ async def acomplete(msgs, model, api_name=None, vendor_name=None, api_key=None,
|
|
|
130
154
|
if vendor_name == 'moonshot' and 'kimi' in model: payload['messages'][-1]['partial'] = True
|
|
131
155
|
func = attrgetter(api.op_path[stream])(cli)
|
|
132
156
|
if defaults.debug_mode: _debug_print(model, api_name, vendor_name, payload, func)
|
|
133
|
-
|
|
134
|
-
|
|
135
|
-
|
|
136
|
-
|
|
157
|
+
async def _call(): return await func(**payload)
|
|
158
|
+
if not stream:
|
|
159
|
+
resp = await _retry_call(_call, retries, retry_delay)
|
|
160
|
+
return mk_completion(resp, model=model, api_name=api_name, vendor_name=vendor_name)
|
|
161
|
+
|
|
162
|
+
async def _mk_gen():
|
|
163
|
+
resp = await _call()
|
|
164
|
+
async for o in api.acollect_stream(resp, model=model, vendor_name=vendor_name, stop_callables=stop_callables): yield o
|
|
165
|
+
|
|
166
|
+
return _retry_stream(_mk_gen, retries, retry_delay)
|
|
@@ -90,7 +90,9 @@ def norm_sse_event(ev, **kwargs):
|
|
|
90
90
|
if typ == "content_block_start":
|
|
91
91
|
cb = ev.get("content_block", {})
|
|
92
92
|
if cb.get("type", "").endswith("_tool_result"): return Delta(server_tool_result=cb, raw=ev, **kwargs)
|
|
93
|
-
if tc := norm_tool_call(cb):
|
|
93
|
+
if tc := norm_tool_call(cb):
|
|
94
|
+
if not tc.arguments: tc.arguments = {'_delta': ''}
|
|
95
|
+
tcs = [tc]
|
|
94
96
|
elif typ == "content_block_delta":
|
|
95
97
|
d = ev.get("delta", {})
|
|
96
98
|
dtyp = d.get("type")
|
|
@@ -285,7 +287,7 @@ def cost(usage, m):
|
|
|
285
287
|
in_tok = raw['input_tokens']
|
|
286
288
|
cache_read = raw.get('cache_read_input_tokens', 0)
|
|
287
289
|
cc = raw.get('cache_creation', {}) or {}
|
|
288
|
-
cache_5m = cc.get('ephemeral_5m_input_tokens', 0)
|
|
290
|
+
cache_5m = cc.get('ephemeral_5m_input_tokens', raw.get('cache_creation_input_tokens', 0))
|
|
289
291
|
cache_1h = cc.get('ephemeral_1h_input_tokens', 0)
|
|
290
292
|
cost = in_tok * m.input_cost_per_token
|
|
291
293
|
cost += raw['output_tokens'] * m.output_cost_per_token
|
|
@@ -3,13 +3,13 @@
|
|
|
3
3
|
# AUTOGENERATED! DO NOT EDIT! File to edit: ../nbs/07_chat.ipynb.
|
|
4
4
|
|
|
5
5
|
# %% auto #0
|
|
6
|
-
__all__ = ['tool_dtls_tag', 're_tools', 'token_dtls_tag', 're_token', 'effort', '
|
|
7
|
-
'mk_msg', 'FenceToolStop', 'extract_fence_call', 'split_tools', 'fmt2hist', 'mk_msgs',
|
|
8
|
-
'postproc', 'lite_mk_func', 'ToolResponse', 'structured', 'StopResponse', 'FullResponse',
|
|
9
|
-
'UsageStats', 'AsyncChat', 'astream_with_complete', 'ChatCallback', 'DeepseekMsgsCallback',
|
|
6
|
+
__all__ = ['tool_dtls_tag', 're_tools', 'token_dtls_tag', 're_token', 'effort', 'MediaUrl', 'remove_cache_ckpts', 'contents',
|
|
7
|
+
'stop_reason', 'mk_msg', 'FenceToolStop', 'extract_fence_call', 'split_tools', 'fmt2hist', 'mk_msgs',
|
|
8
|
+
'cite_footnote', 'postproc', 'lite_mk_func', 'ToolResponse', 'structured', 'StopResponse', 'FullResponse',
|
|
9
|
+
'search_count', 'UsageStats', 'AsyncChat', 'astream_with_complete', 'ChatCallback', 'DeepseekMsgsCallback',
|
|
10
10
|
'DeepseekPrefillCallback', 'add_warning', 'StopReasonCallback', 'run_fence_tool', 'FenceToolCallback',
|
|
11
|
-
'ToolReminderCallback', 'stop_sequences', 'StopSequencesCallback', 'mk_tr_details', '
|
|
12
|
-
'
|
|
11
|
+
'ToolReminderCallback', 'stop_sequences', 'StopSequencesCallback', 'mk_tr_details', 'StreamFormatter',
|
|
12
|
+
'AsyncStreamFormatter', 'adisplay_stream']
|
|
13
13
|
|
|
14
14
|
# %% ../nbs/07_chat.ipynb #d5a3bc1f
|
|
15
15
|
import asyncio, base64, json, mimetypes, random, string, ast, warnings
|
|
@@ -25,15 +25,30 @@ from dataclasses import dataclass
|
|
|
25
25
|
from .types import *
|
|
26
26
|
from .acomplete import *
|
|
27
27
|
|
|
28
|
-
# %% ../nbs/07_chat.ipynb #
|
|
28
|
+
# %% ../nbs/07_chat.ipynb #1b75c262
|
|
29
|
+
class MediaUrl(BasicRepr):
|
|
30
|
+
"Direct URL media reference"
|
|
31
|
+
def __init__(self, url, mime=None): self.url, self.mime = url, ifnone(mime, url_mime(url))
|
|
32
|
+
|
|
33
|
+
# %% ../nbs/07_chat.ipynb #eb557831
|
|
34
|
+
def _mime2part_type(mime):
|
|
35
|
+
"Map MIME string to canonical PartType"
|
|
36
|
+
if mime.startswith('image/'): return PartType.input_image
|
|
37
|
+
if mime.startswith('audio/'): return PartType.input_audio
|
|
38
|
+
if mime.startswith('video/'): return PartType.input_video
|
|
39
|
+
return PartType.input_file
|
|
40
|
+
|
|
29
41
|
def _bytes2content(data):
|
|
30
|
-
"Convert bytes to
|
|
42
|
+
"Convert bytes to fastllm canonical content"
|
|
31
43
|
mtype = detect_mime(data)
|
|
32
44
|
if not mtype: raise ValueError(f'Data must be a supported file type, got {data[:10]}')
|
|
33
|
-
encoded = base64.b64encode(data).decode("utf-8")
|
|
34
|
-
|
|
35
|
-
return Part(type=PartType.input_file, text=f'data:{mtype};base64,{encoded}')
|
|
45
|
+
encoded = base64.b64encode(data).decode("utf-8")
|
|
46
|
+
return Part(type=_mime2part_type(mtype), text=f'data:{mtype};base64,{encoded}')
|
|
36
47
|
|
|
48
|
+
def _url2content(o):
|
|
49
|
+
"Convert MediaUrl to fastllm canonical content"
|
|
50
|
+
mime = o.mime or url_mime(o.url)
|
|
51
|
+
return Part(type=_mime2part_type(mime), text=o.url, data=dict(mime=mime))
|
|
37
52
|
|
|
38
53
|
# %% ../nbs/07_chat.ipynb #48c78e48
|
|
39
54
|
def _add_cache_control(msg, # LiteLLM formatted msg
|
|
@@ -57,8 +72,9 @@ def remove_cache_ckpts(msg):
|
|
|
57
72
|
return msg
|
|
58
73
|
|
|
59
74
|
def _mk_content(o):
|
|
60
|
-
if isinstance(o, str):
|
|
61
|
-
elif isinstance(o,bytes):
|
|
75
|
+
if isinstance(o, str): return Part(type=PartType.text, text=o)
|
|
76
|
+
elif isinstance(o, bytes): return _bytes2content(o)
|
|
77
|
+
elif isinstance(o, MediaUrl): return _url2content(o)
|
|
62
78
|
return o
|
|
63
79
|
|
|
64
80
|
def contents(c):
|
|
@@ -100,7 +116,7 @@ re_token = re.compile(fr"^{re.escape(token_dtls_tag)}\n*<summary>.*?</summary>\n
|
|
|
100
116
|
_fence_back = '`````'
|
|
101
117
|
_fence_re = re.compile(f'^{_fence_back}(py|bash)\n(.*?)\n{_fence_back}$', re.DOTALL | re.MULTILINE)
|
|
102
118
|
_result_re = re.compile(f'\n{_fence_back}result\n(.*?)\n{_fence_back}\n', re.DOTALL)
|
|
103
|
-
_lang2tool = dict(py='
|
|
119
|
+
_lang2tool = dict(py='pyrun', bash='bash')
|
|
104
120
|
|
|
105
121
|
class FenceToolStop:
|
|
106
122
|
def __init__(self, langs): self.langs = langs
|
|
@@ -188,6 +204,7 @@ def fmt2hist(outp:str)->list[Msg]:
|
|
|
188
204
|
for msg in hist:
|
|
189
205
|
if msg.role == 'assistant': result.extend(_split_msg_on_fences(msg))
|
|
190
206
|
else: result.append(msg)
|
|
207
|
+
if result[-1].role == 'tool': result.append(Msg(role='assistant', content=[Part(type=PartType.text, text='.')]))
|
|
191
208
|
return result
|
|
192
209
|
|
|
193
210
|
# %% ../nbs/07_chat.ipynb #8de5ce8d
|
|
@@ -427,7 +444,6 @@ class AsyncChat:
|
|
|
427
444
|
return self
|
|
428
445
|
|
|
429
446
|
# %% ../nbs/07_chat.ipynb #2e469ea1
|
|
430
|
-
def _srvtools(tcs): return L(tcs).filter(lambda o: o.server) if tcs else None
|
|
431
447
|
def _usrtools(tcs): return L(tcs).filter(lambda o: not o.server) if tcs else None
|
|
432
448
|
|
|
433
449
|
# %% ../nbs/07_chat.ipynb #19b87f53
|
|
@@ -511,8 +527,6 @@ async def _call(self:AsyncChat, msg=None, prefill=None, temp=None, think=None, s
|
|
|
511
527
|
|
|
512
528
|
self.toolloop, self.prompt, tmsg = False, None, None
|
|
513
529
|
async for o in self._call_cbs('before_tool_calls'): yield o
|
|
514
|
-
if stcs:= _srvtools(res.tool_calls):
|
|
515
|
-
for tc in stcs: yield tc
|
|
516
530
|
if tcs := _usrtools(res.tool_calls):
|
|
517
531
|
tres = await parallel_async(_alite_call_func, tcs, timeout=tc_timeout, n_workers=n_workers, pause=pause, **self.tcdict)
|
|
518
532
|
tmsg = mk_tool_res_msg(tcs, tres)
|
|
@@ -622,7 +636,7 @@ def _active_fence_langs(tool_schemas):
|
|
|
622
636
|
async def run_fence_tool(lang, code, ns):
|
|
623
637
|
"Run the mapped tool for `lang` with the code, return result fence"
|
|
624
638
|
tname = _lang2tool[lang]
|
|
625
|
-
arg = dict(code=code) if lang == 'py' else dict(
|
|
639
|
+
arg = dict(code=code) if lang == 'py' else dict(cmd=code)
|
|
626
640
|
res = _mk_tool_result(await call_func_async(tname, arg, ns=ns, raise_on_err=False))
|
|
627
641
|
return _mk_result_fence(_trunc_str(str(res)))
|
|
628
642
|
|
|
@@ -703,15 +717,9 @@ def _trunc_param(v, mx=40):
|
|
|
703
717
|
def _tc_summary(tr):
|
|
704
718
|
"Format tool call as func(params) → result string"
|
|
705
719
|
params = ', '.join(f"{k}={_trunc_param(v)}" for k,v in tr.data['arguments'].items())
|
|
706
|
-
res = f"→{_trunc_param(tr.text)}"
|
|
720
|
+
res = f"→{_trunc_param(tr.text)}" if tr.text else ''
|
|
707
721
|
return '<code>'+escape(f"{tr.data['name']}({params}){res}")+'</code>'
|
|
708
722
|
|
|
709
|
-
# %% ../nbs/07_chat.ipynb #91beb26c
|
|
710
|
-
def _srv_tc_summary(tc):
|
|
711
|
-
"Format tool call as func(params) → result string"
|
|
712
|
-
params = ', '.join(f"{k}={_trunc_param(v)}" for k,v in tc.arguments.items())
|
|
713
|
-
return '<code>'+escape(f"{tc.name}({params})")+'</code>'
|
|
714
|
-
|
|
715
723
|
# %% ../nbs/07_chat.ipynb #80f344cc
|
|
716
724
|
def _trunc_content(content, mx):
|
|
717
725
|
"Truncate tool result content, respecting '_full' flag"
|
|
@@ -722,23 +730,13 @@ def _trunc_content(content, mx):
|
|
|
722
730
|
def mk_tr_details(tr, mx=2000):
|
|
723
731
|
"Create <details> block for tool call as JSON"
|
|
724
732
|
args = {k:_trunc_str(v, mx=mx*5) for k,v in tr.data['arguments'].items()}
|
|
725
|
-
res = {'id':tr.data['id'], 'server':False,
|
|
733
|
+
res = {'id':tr.data['id'], 'server':tr.data.get('server', False),
|
|
726
734
|
'call':{'function': tr.data['name'], 'arguments': args},
|
|
727
735
|
'result':_trunc_content(tr.text, mx=mx),}
|
|
728
736
|
summ = f"<summary>{_tc_summary(tr)}</summary>"
|
|
729
737
|
return f"\n\n{tool_dtls_tag}\n{summ}\n\n```json\n{dumps(res, indent=2, ensure_ascii=False)}\n```\n\n</details>\n\n"
|
|
730
738
|
|
|
731
|
-
# %% ../nbs/07_chat.ipynb #3049001c
|
|
732
|
-
def mk_srv_tc_details(tc, mx=2000):
|
|
733
|
-
"Create <details> block for tool call as JSON"
|
|
734
|
-
args = {k:_trunc_str(v, mx=mx*5) for k,v in tc.arguments.items()}
|
|
735
|
-
res = {'id':tc.id, 'server':True, 'call':{'function': tc.name, 'arguments': args}, 'result':"Server tool call executed."}
|
|
736
|
-
summ = f"<summary>{_srv_tc_summary(tc)}</summary>"
|
|
737
|
-
return f"\n\n{tool_dtls_tag}\n{summ}\n\n```json\n{dumps(res, indent=2, ensure_ascii=False)}\n```\n\n</details>\n\n"
|
|
738
|
-
|
|
739
739
|
# %% ../nbs/07_chat.ipynb #f0d984ec
|
|
740
|
-
# status_re = re.compile(r'^- ⏳ <code>(.*)</code> ⏳$|^🧠+$', re.MULTILINE) # TODO: Need to yield tool calls as they are done collated in fastllm `_acollect_stream`
|
|
741
|
-
|
|
742
740
|
class StreamFormatter:
|
|
743
741
|
def __init__(self, mx=2000, debug=False, showthink=False):
|
|
744
742
|
self.outp,self.tcs = '',{}
|
|
@@ -754,8 +752,8 @@ class StreamFormatter:
|
|
|
754
752
|
res+= '🧠' if not self.outp or self.outp[-1]=='🧠' else '\n\n🧠'
|
|
755
753
|
elif self.outp and self.outp[-1] == '🧠': res+= '\n\n'
|
|
756
754
|
if txt:=o.get('text'): res+=f"\n\n{txt}" if res and res[-1] == '🧠' else txt
|
|
757
|
-
if isinstance(o,
|
|
758
|
-
res +=
|
|
755
|
+
if isinstance(o, Part) and o.type==PartType.tool_use:
|
|
756
|
+
res += f"\n- ⏳ {_tc_summary(o)} ⏳\n"
|
|
759
757
|
if isinstance(o, Part) and o.type == PartType.tool_result:
|
|
760
758
|
res += mk_tr_details(o,mx=self.mx)
|
|
761
759
|
self.outp+=res
|
|
@@ -195,7 +195,7 @@ def get_hdrs(api_key=None):
|
|
|
195
195
|
# %% ../nbs/03_oai_chat.ipynb #f89e2bf6
|
|
196
196
|
def cost(usage, m):
|
|
197
197
|
raw = usage.raw
|
|
198
|
-
pd,
|
|
198
|
+
pd,cd = raw.get('prompt_tokens_details') or {},raw.get('completion_tokens_details') or {}
|
|
199
199
|
cached = pd.get('cached_tokens', 0)
|
|
200
200
|
in_audio, out_audio = pd.get('audio_tokens', 0), cd.get('audio_tokens', 0)
|
|
201
201
|
in_txt = raw['prompt_tokens'] - cached - in_audio
|
|
@@ -105,14 +105,14 @@ async def mk_acollect_stream(it, index_fn, model=None, api_name=None, vendor_nam
|
|
|
105
105
|
idx,last_idx = index_fn(d, typ, last_typ, last_idx)
|
|
106
106
|
return idx
|
|
107
107
|
def _proc(d, name, pt=None, kw='txt', ret=None):
|
|
108
|
-
if not ret and not (val := getattr(d, name)): return
|
|
108
|
+
if not ret and not (val := getattr(d, name)): return None, None
|
|
109
109
|
idx = _fidx(d, name, pt)
|
|
110
110
|
part_accum.append(typ, idx, **(ret or {kw: val}))
|
|
111
|
-
return ret or {name: val}
|
|
111
|
+
return ret or {name: val}, idx
|
|
112
112
|
def _yield_parts(d):
|
|
113
113
|
for args in [('text',), ('thinking',), ('citations', 'text', 'citations')]:
|
|
114
|
-
|
|
115
|
-
|
|
114
|
+
r = _proc(d, args[0], pt=args[1] if len(args)>1 else None, kw=args[2] if len(args)>2 else 'txt')
|
|
115
|
+
if r[0]: yield r[0]
|
|
116
116
|
stop, stop_yielded = False, False
|
|
117
117
|
async for d in it:
|
|
118
118
|
# Check stop condition and yield stop delta
|
|
@@ -127,11 +127,26 @@ async def mk_acollect_stream(it, index_fn, model=None, api_name=None, vendor_nam
|
|
|
127
127
|
# Rest incl. tools, finish reason, usage is processed independently
|
|
128
128
|
for tc in d.tool_calls:
|
|
129
129
|
args = tc.arguments.get('_delta', tc.arguments)
|
|
130
|
-
_proc(d, 'tool_use', ret=dict(id=tc.id, name=tc.name, arguments=args, server=tc.server, extra=tc.extra))
|
|
130
|
+
_, idx = _proc(d, 'tool_use', ret=dict(id=tc.id, name=tc.name, arguments=args, server=tc.server, extra=tc.extra))
|
|
131
|
+
if (isinstance(args, str) and args.endswith('}')) or (isinstance(args, dict) and '_delta' not in tc.arguments): # tool call ready
|
|
132
|
+
if isinstance(args, str):
|
|
133
|
+
try: args = json.loads(part_accum.parts[idx].arguments) if args else {}
|
|
134
|
+
except json.JSONDecodeError: continue
|
|
135
|
+
acc = part_accum.parts[idx]
|
|
136
|
+
acc.arguments = args
|
|
137
|
+
data = {**acc.extra, 'id':acc.id, 'name':acc.name, 'arguments':args, 'server':acc.server}
|
|
138
|
+
yield Part(type=PartType.tool_use, data=data)
|
|
139
|
+
# Server tool results for anthropic are yielded in d.server_tool_result by checking injected dummy `_delta`
|
|
140
|
+
if acc.server and '_delta' not in tc.arguments: yield Part(type=PartType.tool_result, text="Server tool call executed.", data=data)
|
|
131
141
|
if d.server_tool_result:
|
|
132
142
|
idx = _fidx(d, 'server_tool_result')
|
|
133
143
|
part_accum.parts[idx] = Part(type=typ, data=d.server_tool_result)
|
|
134
|
-
|
|
144
|
+
srv_tc = next((p for p in reversed(list(part_accum.parts.values())) if isinstance(p, ToolCall) and p.server), None)
|
|
145
|
+
if srv_tc:
|
|
146
|
+
data = {**srv_tc.extra, 'id':srv_tc.id, 'name':srv_tc.name, 'arguments':srv_tc.arguments, 'server':True}
|
|
147
|
+
yield Part(type=PartType.tool_result, text="Server tool call executed.", data=data)
|
|
148
|
+
r = _proc(d, 'refusal')
|
|
149
|
+
if r[0]: yield r[0]
|
|
135
150
|
if d.finish_reason: fin = d.finish_reason
|
|
136
151
|
if d.usage: usg = d.usage
|
|
137
152
|
last_typ = typ
|
|
@@ -142,8 +157,7 @@ async def mk_acollect_stream(it, index_fn, model=None, api_name=None, vendor_nam
|
|
|
142
157
|
if stop: fin = FinishReason.stop
|
|
143
158
|
fin = FinishReason.tool_calls if fin==FinishReason.stop and any(~L(tcs).attrgot('server')) else fin # recheck tool calls post collation
|
|
144
159
|
# tool calls and non-anthropic citations are yielded at the end
|
|
145
|
-
yield Completion(
|
|
160
|
+
yield Completion(model,
|
|
146
161
|
message=Msg(role="assistant", content=part_accum.parts),
|
|
147
162
|
finish_reason=fin, usage=usg, tool_calls=tcs, api_name=api_name, vendor_name=vendor_name,
|
|
148
163
|
raw={'deltas':deltas})
|
|
149
|
-
|
|
@@ -5,12 +5,14 @@
|
|
|
5
5
|
# %% auto #0
|
|
6
6
|
__all__ = ['PartType', 'FinishReason', 'api_registry', 'model_prices_url', 'haik45', 'sonn45', 'sonn', 'sonn46', 'opus46', 'opus',
|
|
7
7
|
'gpt54', 'gpt54m', 'gpt55', 'codex54', 'codex54m', 'codex55', 'codex53spark', 'model_info_registry',
|
|
8
|
-
'
|
|
9
|
-
'
|
|
10
|
-
'
|
|
11
|
-
'register_model_info', 'get_model_info', 'get_model_pricing',
|
|
8
|
+
'modern_llm', 'deepseek_v4_common', 'mimo_v25_common', 'codex_pricing', 'Part', 'Msg', 'ToolCall',
|
|
9
|
+
'display_list', 'Usage', 'Completion', 'APIRegistry', 'mk_completion', 'mk_tool_res_msg', 'fn_schema',
|
|
10
|
+
'sys_text', 'part_txt', 'data_url', 'url_mime', 'payload_kwargs', 'get_api_key', 'model_prices_meta',
|
|
11
|
+
'infer_api_name', 'get_model_meta', 'register_model_info', 'get_model_info', 'get_model_pricing',
|
|
12
|
+
'approx_pricing']
|
|
12
13
|
|
|
13
14
|
# %% ../nbs/00_types.ipynb #b4d047fd
|
|
15
|
+
import httpx
|
|
14
16
|
from dataclasses import dataclass, field
|
|
15
17
|
from fastcore.net import urljson
|
|
16
18
|
from fastcore.utils import *
|
|
@@ -28,14 +30,16 @@ PartType = str_enum('PartType', 'text', 'thinking', 'refusal', 'tool_use', 'serv
|
|
|
28
30
|
'input_image', 'input_audio', 'input_video', 'input_file')
|
|
29
31
|
|
|
30
32
|
# %% ../nbs/00_types.ipynb #2eeff103
|
|
31
|
-
def _trunc_strs(
|
|
32
|
-
"
|
|
33
|
-
if not
|
|
34
|
-
|
|
33
|
+
def _trunc_strs(o, n=200):
|
|
34
|
+
"Truncate str or dict"
|
|
35
|
+
if not o: return o
|
|
36
|
+
if isinstance(o,str) and len(o)>n: return o[:100]+'...'
|
|
37
|
+
if isinstance(o,dict): return {k: (v[:100]+'...' if isinstance(v,str) and len(v)>n else v) for k,v in o.items()}
|
|
38
|
+
return o
|
|
35
39
|
|
|
36
40
|
@patch
|
|
37
41
|
def _repr_markdown_(self: Part):
|
|
38
|
-
body = self.text if self.text else ''
|
|
42
|
+
body = _trunc_strs(self.text) if self.text else ''
|
|
39
43
|
data = _trunc_strs(self.data)
|
|
40
44
|
return f"""**Part** (`{self.type}`)
|
|
41
45
|
|
|
@@ -161,7 +165,7 @@ def mk_completion(resp, model, api_name, vendor_name):
|
|
|
161
165
|
parts = api.norm_parts(resp)
|
|
162
166
|
usg = api.finalize_usage(api.norm_usage(resp), parts)
|
|
163
167
|
return Completion(
|
|
164
|
-
model=
|
|
168
|
+
model=model,
|
|
165
169
|
message=Msg(role="assistant", content=parts),
|
|
166
170
|
finish_reason=api.norm_finish(resp, tcs),
|
|
167
171
|
usage=usg,
|
|
@@ -170,7 +174,6 @@ def mk_completion(resp, model, api_name, vendor_name):
|
|
|
170
174
|
vendor_name=vendor_name,
|
|
171
175
|
raw=resp)
|
|
172
176
|
|
|
173
|
-
|
|
174
177
|
# %% ../nbs/00_types.ipynb #d5322db5
|
|
175
178
|
def mk_tool_res_msg(tool_calls:list[ToolCall], results:list[str|list]):
|
|
176
179
|
'A util to prepare parallel tool call with str or media list results'
|
|
@@ -198,7 +201,17 @@ def sys_text(system):
|
|
|
198
201
|
|
|
199
202
|
def part_txt(p): return p.text if isinstance(p,Part) else p
|
|
200
203
|
|
|
201
|
-
# %% ../nbs/00_types.ipynb #
|
|
204
|
+
# %% ../nbs/00_types.ipynb #f3deb055
|
|
205
|
+
@flexicache(time_policy(24*3600))
|
|
206
|
+
def _fetch_url_partial(url, nbytes=512):
|
|
207
|
+
"Fetch remote media bytes, optionally only first `nbytes`."
|
|
208
|
+
try:
|
|
209
|
+
with httpx.stream('GET', url, headers={'Range': f'bytes=0-{nbytes-1}'}, follow_redirects=True) as r:
|
|
210
|
+
if r.status_code not in (200, 206): return
|
|
211
|
+
return r.read()
|
|
212
|
+
except (httpx.HTTPError, httpx.InvalidURL): return
|
|
213
|
+
|
|
214
|
+
# %% ../nbs/00_types.ipynb #70a9a0c3
|
|
202
215
|
_ext_mime = {
|
|
203
216
|
'.jpg':'image/jpeg', '.jpeg':'image/jpeg', '.png':'image/png', '.gif':'image/gif', '.webp':'image/webp',
|
|
204
217
|
'.pdf':'application/pdf',
|
|
@@ -214,18 +227,19 @@ def data_url(url):
|
|
|
214
227
|
return header[5:].split(';',1)[0].strip() or 'application/octet-stream', body
|
|
215
228
|
|
|
216
229
|
def url_mime(url, default='application/octet-stream'):
|
|
217
|
-
"Guess mime from URL extension."
|
|
230
|
+
"Guess mime from URL extension, and optional bytes fallback."
|
|
231
|
+
if "youtube.com" in url or "youtu.be" in url: return "video/mp4"
|
|
218
232
|
ext = '.' + url.rsplit('.', 1)[-1].split('?')[0].lower() if '.' in url.split('?')[0].split('/')[-1] else ''
|
|
219
|
-
|
|
233
|
+
if (mime:=_ext_mime.get(ext)) is None: return detect_mime(_fetch_url_partial(url))
|
|
234
|
+
return ifnone(mime, default)
|
|
220
235
|
|
|
221
236
|
# %% ../nbs/00_types.ipynb #28c698fe
|
|
222
|
-
def payload_kwargs(msgs, model, stream=False, system=None, max_tokens=None, temperature=None, tools=None, tool_choice=None, reasoning_effort=None, web_search_options=None, stop_callables=None
|
|
237
|
+
def payload_kwargs(msgs, model, stream=False, system=None, max_tokens=None, temperature=None, tools=None, tool_choice=None, reasoning_effort=None, web_search_options=None, stop_callables=None): pass
|
|
223
238
|
|
|
224
239
|
# %% ../nbs/00_types.ipynb #c2a2cb49
|
|
225
240
|
def get_api_key(api_key, default):
|
|
226
|
-
err = ValueError(f"Missing API key: make sure to have the expected env var name or pass `api_key`")
|
|
227
241
|
key = api_key or os.getenv(default)
|
|
228
|
-
if not key: raise
|
|
242
|
+
if not key: raise ValueError(f"Missing API key: set environment variable '{default}' or pass `api_key` parameter")
|
|
229
243
|
return key
|
|
230
244
|
|
|
231
245
|
# %% ../nbs/00_types.ipynb #852adecd
|
|
@@ -257,7 +271,7 @@ haik45 = "claude-haiku-4-5"
|
|
|
257
271
|
sonn45 = "claude-sonnet-4-5"
|
|
258
272
|
sonn = sonn46 = "claude-sonnet-4-6"
|
|
259
273
|
opus46 = "claude-opus-4-6"
|
|
260
|
-
opus = "claude-opus-4-
|
|
274
|
+
opus = "claude-opus-4-8"
|
|
261
275
|
gpt54 = "gpt-5.4"
|
|
262
276
|
gpt54m = "gpt-5.4-mini"
|
|
263
277
|
gpt55 = "gpt-5.5"
|
|
@@ -273,6 +287,8 @@ def register_model_info(model, vendor_name=None, base=None, base_vendor_name=Non
|
|
|
273
287
|
"Register model metadata, optionally starting from `base`."
|
|
274
288
|
info = dict(get_model_info(base, base_vendor_name or vendor_name)) if base else {}
|
|
275
289
|
info.update(overrides)
|
|
290
|
+
if isinstance(c := info.get('search_context_cost_per_query'), (int,float)):
|
|
291
|
+
info['search_context_cost_per_query'] = {f'search_context_size_{s}':c for s in ('low','medium','high')}
|
|
276
292
|
model_info_registry[vendor_name, model] = info
|
|
277
293
|
|
|
278
294
|
def get_model_info(mn, vendor_name=None):
|
|
@@ -280,11 +296,17 @@ def get_model_info(mn, vendor_name=None):
|
|
|
280
296
|
if 'search_context_cost_per_query' in info: info['supports_web_search'] = True
|
|
281
297
|
return dict2obj(info)
|
|
282
298
|
|
|
299
|
+
# %% ../nbs/00_types.ipynb #331c5d0a
|
|
300
|
+
register_model_info("claude-opus-4-8", vendor_name='anthropic', base="claude-opus-4-6")
|
|
301
|
+
|
|
302
|
+
# %% ../nbs/00_types.ipynb #b36178d4
|
|
303
|
+
modern_llm = dict(supports_function_calling=True, supports_tool_choice=True, supports_prompt_caching=True,
|
|
304
|
+
supports_parallel_function_calling=True, supports_native_streaming=True, supports_native_structured_output=True,
|
|
305
|
+
supports_reasoning=True, supports_response_schema=True, supports_system_messages=True)
|
|
306
|
+
|
|
283
307
|
# %% ../nbs/00_types.ipynb #8261dcd0
|
|
284
|
-
register_model_info('accounts/fireworks/models/qwen3p6-plus', vendor_name='fireworks_ai',
|
|
285
|
-
supports_vision=True,
|
|
286
|
-
supports_system_messages=True, supports_response_schema=True, supports_parallel_function_calling=True,
|
|
287
|
-
supports_prompt_caching=True, supports_native_streaming=True, supports_native_structured_output=True,
|
|
308
|
+
register_model_info('accounts/fireworks/models/qwen3p6-plus', vendor_name='fireworks_ai', **modern_llm,
|
|
309
|
+
supports_vision=True,
|
|
288
310
|
max_tokens=1000000, max_input_tokens=1000000, max_output_tokens=65536,
|
|
289
311
|
input_cost_per_token=0.5e-6, cache_read_input_token_cost=0.1e-6, output_cost_per_token=3.0e-6)
|
|
290
312
|
|
|
@@ -308,9 +330,7 @@ for model in ('accounts/fireworks/models/kimi-k2p5', 'accounts/fireworks/models/
|
|
|
308
330
|
input_cost_per_token=0.95e-6, cache_read_input_token_cost=0.16e-6, output_cost_per_token=4.0e-6)
|
|
309
331
|
|
|
310
332
|
# %% ../nbs/00_types.ipynb #948d55d0
|
|
311
|
-
deepseek_v4_common = dict(
|
|
312
|
-
supports_assistant_prefill=True, supports_function_calling=True, supports_prompt_caching=True,
|
|
313
|
-
supports_reasoning=True, supports_tool_choice=True,
|
|
333
|
+
deepseek_v4_common = dict(**modern_llm, supports_assistant_prefill=True,
|
|
314
334
|
max_input_tokens=1048576, max_output_tokens=393216, max_tokens=393216)
|
|
315
335
|
|
|
316
336
|
register_model_info('deepseek-v4-flash', vendor_name='deepseek', base='deepseek/deepseek-v3.2', **deepseek_v4_common,
|
|
@@ -320,19 +340,26 @@ register_model_info('deepseek-v4-pro', vendor_name='deepseek', base='deepseek/de
|
|
|
320
340
|
input_cost_per_token=4.35e-07, input_cost_per_token_cache_hit=3.625e-09,
|
|
321
341
|
output_cost_per_token=8.7e-07, cache_read_input_token_cost=4.35e-07/10)
|
|
322
342
|
|
|
343
|
+
mimo_v25_common = dict(**modern_llm, supports_web_search=True, max_input_tokens=1048576, max_output_tokens=131072, max_tokens=131072)
|
|
344
|
+
|
|
345
|
+
register_model_info('mimo-v2.5-pro', vendor_name='mimo', **mimo_v25_common, base='deepseek/deepseek-v4-pro',
|
|
346
|
+
input_cost_per_token=0.435e-6, output_cost_per_token=0.87e-6, cache_read_input_token_cost=0.0036e-6, search_context_cost_per_query=0.005)
|
|
347
|
+
register_model_info('mimo-v2.5', vendor_name='mimo', **mimo_v25_common, base='deepseek/deepseek-v4',
|
|
348
|
+
input_cost_per_token=0.14e-6, output_cost_per_token=0.28e-6, cache_read_input_token_cost=0.0028e-6, search_context_cost_per_query=0.005,
|
|
349
|
+
supports_vision=True, supports_image_input=True)
|
|
350
|
+
|
|
323
351
|
# %% ../nbs/00_types.ipynb #2c23d11e
|
|
324
352
|
codex_pricing = dict(
|
|
325
353
|
input_cost_per_token = 0.10/1_000_000, output_cost_per_token = 0.50/1_000_000,
|
|
326
354
|
cache_creation_input_token_cost = 0.10/1_000_000, cache_read_input_token_cost = 0.10/1_000_000)
|
|
327
355
|
|
|
328
356
|
for model in (codex54, codex54m, codex55):
|
|
329
|
-
register_model_info(model, 'codex', base=model, base_vendor_name='chatgpt', supports_web_search=True, **codex_pricing)
|
|
357
|
+
register_model_info(model, 'codex', base=model, base_vendor_name='chatgpt', supports_web_search=True, max_input_tokens=256000, **codex_pricing)
|
|
330
358
|
|
|
331
359
|
register_model_info(codex53spark, 'codex', **codex_pricing,
|
|
332
|
-
supports_vision=False, supports_image_input=False, supports_web_search=True, supports_reasoning=True,
|
|
360
|
+
supports_vision=False, supports_image_input=False, supports_web_search=True, supports_reasoning=True, supports_function_calling=True,
|
|
333
361
|
max_tokens=128000, max_input_tokens=128000, max_output_tokens=128000)
|
|
334
362
|
|
|
335
|
-
|
|
336
363
|
# %% ../nbs/00_types.ipynb #24cc47ec
|
|
337
364
|
def get_model_pricing(mn, vendor_name, million=True):
|
|
338
365
|
return {k:round(v * (1e6 if million else 1), 6)
|
|
@@ -345,7 +372,7 @@ def approx_pricing(nm, vendor_name, out=10, cache=80, inp=10, markup=0):
|
|
|
345
372
|
p = get_model_pricing(nm, vendor_name)
|
|
346
373
|
ic = p.get('cache_creation_input_token_cost', p['input_cost_per_token'])
|
|
347
374
|
res = (p['output_cost_per_token']*out + p['cache_read_input_token_cost']*cache + ic*inp) / (out+cache+inp)
|
|
348
|
-
if nm
|
|
375
|
+
if nm in ('claude-opus-4-7','claude-opus-4-8'): res *= 1.5
|
|
349
376
|
return res*(1+markup)
|
|
350
377
|
|
|
351
378
|
# %% ../nbs/00_types.ipynb #8bfca02d
|
|
@@ -1 +0,0 @@
|
|
|
1
|
-
__version__ = "0.0.9"
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|