python-fastllm 0.0.7__tar.gz → 0.0.9__tar.gz
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- {python_fastllm-0.0.7 → python_fastllm-0.0.9}/PKG-INFO +1 -1
- python_fastllm-0.0.9/fastllm/__init__.py +1 -0
- {python_fastllm-0.0.7 → python_fastllm-0.0.9}/fastllm/_modidx.py +6 -0
- {python_fastllm-0.0.7 → python_fastllm-0.0.9}/fastllm/acomplete.py +17 -1
- {python_fastllm-0.0.7 → python_fastllm-0.0.9}/fastllm/anthropic.py +15 -13
- {python_fastllm-0.0.7 → python_fastllm-0.0.9}/fastllm/chat.py +13 -8
- python_fastllm-0.0.9/fastllm/codex.py +7 -0
- {python_fastllm-0.0.7 → python_fastllm-0.0.9}/fastllm/streaming.py +2 -0
- {python_fastllm-0.0.7 → python_fastllm-0.0.9}/fastllm/types.py +92 -55
- {python_fastllm-0.0.7 → python_fastllm-0.0.9}/python_fastllm.egg-info/PKG-INFO +1 -1
- {python_fastllm-0.0.7 → python_fastllm-0.0.9}/python_fastllm.egg-info/SOURCES.txt +1 -0
- python_fastllm-0.0.7/fastllm/__init__.py +0 -1
- {python_fastllm-0.0.7 → python_fastllm-0.0.9}/README.md +0 -0
- {python_fastllm-0.0.7 → python_fastllm-0.0.9}/fastllm/gemini.py +0 -0
- {python_fastllm-0.0.7 → python_fastllm-0.0.9}/fastllm/openai_chat.py +0 -0
- {python_fastllm-0.0.7 → python_fastllm-0.0.9}/fastllm/openai_responses.py +0 -0
- {python_fastllm-0.0.7 → python_fastllm-0.0.9}/fastllm/specs/anthropic.json +0 -0
- {python_fastllm-0.0.7 → python_fastllm-0.0.9}/fastllm/specs/anthropic.yml +0 -0
- {python_fastllm-0.0.7 → python_fastllm-0.0.9}/fastllm/specs/gemini.json +0 -0
- {python_fastllm-0.0.7 → python_fastllm-0.0.9}/fastllm/specs/openai.with-code-samples.json +0 -0
- {python_fastllm-0.0.7 → python_fastllm-0.0.9}/fastllm/specs/openai.with-code-samples.yml +0 -0
- {python_fastllm-0.0.7 → python_fastllm-0.0.9}/fastllm/specs/spec_manifest.json +0 -0
- {python_fastllm-0.0.7 → python_fastllm-0.0.9}/pyproject.toml +0 -0
- {python_fastllm-0.0.7 → python_fastllm-0.0.9}/python_fastllm.egg-info/dependency_links.txt +0 -0
- {python_fastllm-0.0.7 → python_fastllm-0.0.9}/python_fastllm.egg-info/entry_points.txt +0 -0
- {python_fastllm-0.0.7 → python_fastllm-0.0.9}/python_fastllm.egg-info/requires.txt +0 -0
- {python_fastllm-0.0.7 → python_fastllm-0.0.9}/python_fastllm.egg-info/top_level.txt +0 -0
- {python_fastllm-0.0.7 → python_fastllm-0.0.9}/setup.cfg +0 -0
|
@@ -0,0 +1 @@
|
|
|
1
|
+
__version__ = "0.0.9"
|
|
@@ -10,6 +10,7 @@ d = { 'settings': { 'branch': 'main',
|
|
|
10
10
|
'fastllm.acomplete._classify_error': ('acomplete.html#_classify_error', 'fastllm/acomplete.py'),
|
|
11
11
|
'fastllm.acomplete._classify_error_stream': ( 'acomplete.html#_classify_error_stream',
|
|
12
12
|
'fastllm/acomplete.py'),
|
|
13
|
+
'fastllm.acomplete._debug_print': ('acomplete.html#_debug_print', 'fastllm/acomplete.py'),
|
|
13
14
|
'fastllm.acomplete._is_ctx_exceeded': ('acomplete.html#_is_ctx_exceeded', 'fastllm/acomplete.py'),
|
|
14
15
|
'fastllm.acomplete.acomplete': ('acomplete.html#acomplete', 'fastllm/acomplete.py'),
|
|
15
16
|
'fastllm.acomplete.mk_client': ('acomplete.html#mk_client', 'fastllm/acomplete.py')},
|
|
@@ -31,6 +32,7 @@ d = { 'settings': { 'branch': 'main',
|
|
|
31
32
|
'fastllm.anthropic.denorm_tool_use': ('anthropic.html#denorm_tool_use', 'fastllm/anthropic.py'),
|
|
32
33
|
'fastllm.anthropic.denorm_user': ('anthropic.html#denorm_user', 'fastllm/anthropic.py'),
|
|
33
34
|
'fastllm.anthropic.denorm_web_search': ('anthropic.html#denorm_web_search', 'fastllm/anthropic.py'),
|
|
35
|
+
'fastllm.anthropic.finalize_usage': ('anthropic.html#finalize_usage', 'fastllm/anthropic.py'),
|
|
34
36
|
'fastllm.anthropic.get_hdrs': ('anthropic.html#get_hdrs', 'fastllm/anthropic.py'),
|
|
35
37
|
'fastllm.anthropic.mk_payload': ('anthropic.html#mk_payload', 'fastllm/anthropic.py'),
|
|
36
38
|
'fastllm.anthropic.norm_finish': ('anthropic.html#norm_finish', 'fastllm/anthropic.py'),
|
|
@@ -145,6 +147,7 @@ d = { 'settings': { 'branch': 'main',
|
|
|
145
147
|
'fastllm.chat.stop_reason': ('chat.html#stop_reason', 'fastllm/chat.py'),
|
|
146
148
|
'fastllm.chat.stop_sequences': ('chat.html#stop_sequences', 'fastllm/chat.py'),
|
|
147
149
|
'fastllm.chat.structured': ('chat.html#structured', 'fastllm/chat.py')},
|
|
150
|
+
'fastllm.codex': {},
|
|
148
151
|
'fastllm.gemini': { 'fastllm.gemini._gem_filter_sch': ('gemini.html#_gem_filter_sch', 'fastllm/gemini.py'),
|
|
149
152
|
'fastllm.gemini._gem_part_type': ('gemini.html#_gem_part_type', 'fastllm/gemini.py'),
|
|
150
153
|
'fastllm.gemini.acollect_stream': ('gemini.html#acollect_stream', 'fastllm/gemini.py'),
|
|
@@ -268,17 +271,20 @@ d = { 'settings': { 'branch': 'main',
|
|
|
268
271
|
'fastllm.types.ToolCall._repr_markdown_': ('types.html#toolcall._repr_markdown_', 'fastllm/types.py'),
|
|
269
272
|
'fastllm.types.Usage': ('types.html#usage', 'fastllm/types.py'),
|
|
270
273
|
'fastllm.types._trunc_strs': ('types.html#_trunc_strs', 'fastllm/types.py'),
|
|
274
|
+
'fastllm.types.approx_pricing': ('types.html#approx_pricing', 'fastllm/types.py'),
|
|
271
275
|
'fastllm.types.data_url': ('types.html#data_url', 'fastllm/types.py'),
|
|
272
276
|
'fastllm.types.display_list': ('types.html#display_list', 'fastllm/types.py'),
|
|
273
277
|
'fastllm.types.fn_schema': ('types.html#fn_schema', 'fastllm/types.py'),
|
|
274
278
|
'fastllm.types.get_api_key': ('types.html#get_api_key', 'fastllm/types.py'),
|
|
275
279
|
'fastllm.types.get_model_info': ('types.html#get_model_info', 'fastllm/types.py'),
|
|
276
280
|
'fastllm.types.get_model_meta': ('types.html#get_model_meta', 'fastllm/types.py'),
|
|
281
|
+
'fastllm.types.get_model_pricing': ('types.html#get_model_pricing', 'fastllm/types.py'),
|
|
277
282
|
'fastllm.types.infer_api_name': ('types.html#infer_api_name', 'fastllm/types.py'),
|
|
278
283
|
'fastllm.types.mk_completion': ('types.html#mk_completion', 'fastllm/types.py'),
|
|
279
284
|
'fastllm.types.mk_tool_res_msg': ('types.html#mk_tool_res_msg', 'fastllm/types.py'),
|
|
280
285
|
'fastllm.types.model_prices_meta': ('types.html#model_prices_meta', 'fastllm/types.py'),
|
|
281
286
|
'fastllm.types.part_txt': ('types.html#part_txt', 'fastllm/types.py'),
|
|
282
287
|
'fastllm.types.payload_kwargs': ('types.html#payload_kwargs', 'fastllm/types.py'),
|
|
288
|
+
'fastllm.types.register_model_info': ('types.html#register_model_info', 'fastllm/types.py'),
|
|
283
289
|
'fastllm.types.sys_text': ('types.html#sys_text', 'fastllm/types.py'),
|
|
284
290
|
'fastllm.types.url_mime': ('types.html#url_mime', 'fastllm/types.py')}}}
|
|
@@ -3,7 +3,7 @@
|
|
|
3
3
|
# AUTOGENERATED! DO NOT EDIT! File to edit: ../nbs/06_acomplete.ipynb.
|
|
4
4
|
|
|
5
5
|
# %% auto #0
|
|
6
|
-
__all__ = ['specs_path', 'ant_spec', 'oai_spec', 'gem_spec', 'vendor_mapping', 'api2spec', 'mk_client',
|
|
6
|
+
__all__ = ['specs_path', 'ant_spec', 'oai_spec', 'gem_spec', 'vendor_mapping', 'api2spec', 'defaults', 'mk_client',
|
|
7
7
|
'ContextWindowExceededError', 'acomplete']
|
|
8
8
|
|
|
9
9
|
# %% ../nbs/06_acomplete.ipynb #f2f57253
|
|
@@ -98,6 +98,21 @@ async def _classify_error_stream(gen):
|
|
|
98
98
|
async for x in gen: yield x
|
|
99
99
|
except APIError as e: raise _classify_error(e) from e
|
|
100
100
|
|
|
101
|
+
# %% ../nbs/06_acomplete.ipynb #f626a4e1
|
|
102
|
+
defaults = SimpleNamespace(debug_mode=False)
|
|
103
|
+
|
|
104
|
+
def _debug_print(model, api_name, vendor_name, payload, func):
|
|
105
|
+
"Pretty-print acomplete inputs when defaults.debug_mode is set"
|
|
106
|
+
from pprint import pformat
|
|
107
|
+
p = dict(payload)
|
|
108
|
+
if defaults.debug_mode == 'brief' and 'tools' in p:
|
|
109
|
+
p['tools'] = '; '.join(o.get('name', o.get('type', o)) for o in p['tools'])
|
|
110
|
+
print('━'*60)
|
|
111
|
+
print(f"\033[1;36mfastllm debug\033[0m model={model} vendor={vendor_name} api={api_name} base_url={func.base_url} path={func.path}")
|
|
112
|
+
print('─'*60)
|
|
113
|
+
print(f"\033[1;33mpayload:\033[0m\n{pformat(p, width=120, sort_dicts=False)}")
|
|
114
|
+
print('━'*60)
|
|
115
|
+
|
|
101
116
|
# %% ../nbs/06_acomplete.ipynb #2379ec94
|
|
102
117
|
@delegates(payload_kwargs)
|
|
103
118
|
async def acomplete(msgs, model, api_name=None, vendor_name=None, api_key=None, base_url=None, xtra_body=None, xtra_hdrs=None,
|
|
@@ -114,6 +129,7 @@ async def acomplete(msgs, model, api_name=None, vendor_name=None, api_key=None,
|
|
|
114
129
|
if vendor_name == 'deepseek' and 'v4' in model: payload['messages'][-1]['prefix'] = True
|
|
115
130
|
if vendor_name == 'moonshot' and 'kimi' in model: payload['messages'][-1]['partial'] = True
|
|
116
131
|
func = attrgetter(api.op_path[stream])(cli)
|
|
132
|
+
if defaults.debug_mode: _debug_print(model, api_name, vendor_name, payload, func)
|
|
117
133
|
try: resp = await func(**payload)
|
|
118
134
|
except APIError as e: raise _classify_error(e) from e
|
|
119
135
|
if stream: return _classify_error_stream(api.acollect_stream(resp, model=model, vendor_name=vendor_name, stop_callables=stop_callables))
|
|
@@ -1,7 +1,7 @@
|
|
|
1
1
|
# AUTOGENERATED! DO NOT EDIT! File to edit: ../nbs/04_anthropic.ipynb.
|
|
2
2
|
|
|
3
3
|
# %% auto #0
|
|
4
|
-
__all__ = ['ant_tc_types', '
|
|
4
|
+
__all__ = ['ant_tc_types', 'norm_tool_call', 'norm_tool_calls', 'norm_usage', 'finalize_usage', 'norm_finish', 'norm_parts',
|
|
5
5
|
'norm_sse_event', 'delta_index_fn', 'acollect_stream', 'denorm_tool_use', 'denorm_assistant', 'denorm_tool',
|
|
6
6
|
'denorm_msgs', 'denorm_tool_schs', 'denorm_tool_choice', 'denorm_reasoning', 'denorm_web_search',
|
|
7
7
|
'denorm_system', 'denorm_user', 'denorm_image', 'denorm_file', 'denorm_tool_result', 'mk_payload',
|
|
@@ -42,7 +42,16 @@ def norm_usage(resp):
|
|
|
42
42
|
pt = int(usg.get("input_tokens", 0) or 0) + cached + cache_creation
|
|
43
43
|
ct = int(usg.get("output_tokens", 0) or 0)
|
|
44
44
|
return Usage(prompt_tokens=pt, completion_tokens=ct, total_tokens=pt + ct,
|
|
45
|
-
cached_tokens=cached, cache_creation_tokens=cache_creation, raw=usg)
|
|
45
|
+
cached_tokens=cached, cache_creation_tokens=cache_creation, reasoning_tokens=0, raw=usg)
|
|
46
|
+
|
|
47
|
+
def finalize_usage(usg, parts):
|
|
48
|
+
"Adjust usage using finalized Anthropic content parts."
|
|
49
|
+
if not usg: return usg
|
|
50
|
+
rc = '\n'.join(p.text or '' for p in parts if p.type == PartType.thinking)
|
|
51
|
+
ct = int(usg.raw.get('output_tokens', usg.completion_tokens) or 0)
|
|
52
|
+
rt = min(int(len(rc.split())*1.5), ct) if rc else 0
|
|
53
|
+
return Usage(prompt_tokens=usg.prompt_tokens, completion_tokens=ct-rt, total_tokens=usg.prompt_tokens+ct,
|
|
54
|
+
cached_tokens=usg.cached_tokens, cache_creation_tokens=usg.cache_creation_tokens, reasoning_tokens=rt, raw=usg.raw)
|
|
46
55
|
|
|
47
56
|
# %% ../nbs/04_anthropic.ipynb #7a8b1f8f
|
|
48
57
|
def norm_finish(resp, tcs=None):
|
|
@@ -197,7 +206,7 @@ def denorm_reasoning(v):
|
|
|
197
206
|
def denorm_web_search(v):
|
|
198
207
|
"Map canonical web_search_options to Anthropic hosted web_search tool."
|
|
199
208
|
_max_uses = {"low": 1, "medium": 5, "high": 10}
|
|
200
|
-
t = {"type": "
|
|
209
|
+
t = {"type": "web_search_20250305", "name": "web_search"}
|
|
201
210
|
if (typ := (v or {}).get("type")): t["type"] = typ
|
|
202
211
|
if (s := (v or {}).get("search_context_size")):
|
|
203
212
|
t["max_uses"] = _max_uses.get(s, 5)
|
|
@@ -286,13 +295,6 @@ def cost(usage, m):
|
|
|
286
295
|
return cost
|
|
287
296
|
|
|
288
297
|
# %% ../nbs/04_anthropic.ipynb #f7c0b989
|
|
289
|
-
|
|
290
|
-
|
|
291
|
-
|
|
292
|
-
norm_usage=norm_usage,
|
|
293
|
-
acollect_stream=acollect_stream,
|
|
294
|
-
mk_payload=mk_payload,
|
|
295
|
-
cost=cost,
|
|
296
|
-
get_hdrs=get_hdrs,
|
|
297
|
-
op_path=('messages.messages_post','messages.messages_post'))
|
|
298
|
-
api_registry.register('anthropic', **api_ns)
|
|
298
|
+
api_registry.register('anthropic', norm_tool_calls=norm_tool_calls, norm_parts=norm_parts, norm_finish=norm_finish, norm_usage=norm_usage,
|
|
299
|
+
finalize_usage=finalize_usage, acollect_stream=acollect_stream, mk_payload=mk_payload, cost=cost, get_hdrs=get_hdrs,
|
|
300
|
+
op_path=('messages.messages_post','messages.messages_post'))
|
|
@@ -93,7 +93,7 @@ tool_dtls_tag = "<details class='tool-usage-details' markdown='1'>"
|
|
|
93
93
|
re_tools = re.compile(fr"^({tool_dtls_tag}\n*(?:<summary>(?P<summary>.*?)</summary>\n*)?\n*```json\n+(.*?)\n+```\n+</details>)",
|
|
94
94
|
flags=re.DOTALL|re.MULTILINE)
|
|
95
95
|
token_dtls_tag = "<details class='token-usage-details' markdown='1'>"
|
|
96
|
-
re_token = re.compile(fr"^{re.escape(token_dtls_tag)}
|
|
96
|
+
re_token = re.compile(fr"^{re.escape(token_dtls_tag)}\n*<summary>.*?</summary>\n*\n*`.*?`\n*\n*</details>\n?",
|
|
97
97
|
flags=re.DOTALL|re.MULTILINE)
|
|
98
98
|
|
|
99
99
|
# %% ../nbs/07_chat.ipynb #be998131
|
|
@@ -209,7 +209,9 @@ def mk_msgs(
|
|
|
209
209
|
"Create a list of fastllm canonical Msgs."
|
|
210
210
|
if not msgs: return []
|
|
211
211
|
if not isinstance(msgs, list): msgs = [msgs]
|
|
212
|
-
msgs = L(msgs).map(lambda m:
|
|
212
|
+
msgs = L(msgs).map(lambda m:
|
|
213
|
+
fmt2hist(m) if isinstance(m,str) and (tool_dtls_tag in m or token_dtls_tag in m) else [m]
|
|
214
|
+
).concat()
|
|
213
215
|
res, role = [], 'user'
|
|
214
216
|
for m in msgs:
|
|
215
217
|
res.append(msg := remove_cache_ckpts(mk_msg(m, role=role)))
|
|
@@ -297,9 +299,11 @@ def _has_stop(tres_parts): return any(isinstance(p.text, StopResponse) for p in
|
|
|
297
299
|
def _trunc_str(s, mx=2000, skip=10, replace="TRUNCATED"):
|
|
298
300
|
"Truncate `s` to `mx` chars max, adding `replace` if truncated"
|
|
299
301
|
if not isinstance(s, str): s = str(s)
|
|
300
|
-
|
|
302
|
+
s = s.rstrip()
|
|
303
|
+
if len(s)>2 and s[0]=='𝍁' and s[-1]=='𝍁':
|
|
304
|
+
s = s[1:-1]
|
|
305
|
+
if replace: return s
|
|
301
306
|
if isinstance_str(s, ('FullResponse','Safe')): return s
|
|
302
|
-
s = str(s).strip()
|
|
303
307
|
if len(s)<=mx: return s
|
|
304
308
|
s = s[skip:mx-skip]
|
|
305
309
|
ss = s.split(' ')
|
|
@@ -431,7 +435,8 @@ def _think_kw(model, think, vendor_name):
|
|
|
431
435
|
if not think: return {}
|
|
432
436
|
if 'opus-4-7' in model:
|
|
433
437
|
e = 'xhigh' if think=='h' else effort.get(think)
|
|
434
|
-
|
|
438
|
+
eff = dict(thinking={"type":"adaptive", "display":"summarized"}, output_config={"effort":e})
|
|
439
|
+
return dict(reasoning_effort=eff)
|
|
435
440
|
try: xhigh = get_model_info(model, vendor_name).get('supports_xhigh_reasoning_effort')
|
|
436
441
|
except: xhigh = False
|
|
437
442
|
eff = effort.get(think) if think!='x' else 'xhigh' if xhigh else 'high'
|
|
@@ -691,7 +696,7 @@ defaults.chat_callbacks = [DeepseekPrefillCallback, FenceToolCallback, ToolRemin
|
|
|
691
696
|
def _trunc_param(v, mx=40):
|
|
692
697
|
"Truncate and escape param value for display"
|
|
693
698
|
tp = _trunc_str(str(v).replace('`', r'\`'), mx=mx, replace=None, skip=0)
|
|
694
|
-
try: return
|
|
699
|
+
try: return dumps(tp, ensure_ascii=False)
|
|
695
700
|
except Exception: return repr(tp).replace('\\\\', '\\')
|
|
696
701
|
|
|
697
702
|
# %% ../nbs/07_chat.ipynb #80c0abdb
|
|
@@ -721,7 +726,7 @@ def mk_tr_details(tr, mx=2000):
|
|
|
721
726
|
'call':{'function': tr.data['name'], 'arguments': args},
|
|
722
727
|
'result':_trunc_content(tr.text, mx=mx),}
|
|
723
728
|
summ = f"<summary>{_tc_summary(tr)}</summary>"
|
|
724
|
-
return f"\n\n{tool_dtls_tag}\n{summ}\n\n```json\n{dumps(res, indent=2)}\n```\n\n</details>\n\n"
|
|
729
|
+
return f"\n\n{tool_dtls_tag}\n{summ}\n\n```json\n{dumps(res, indent=2, ensure_ascii=False)}\n```\n\n</details>\n\n"
|
|
725
730
|
|
|
726
731
|
# %% ../nbs/07_chat.ipynb #3049001c
|
|
727
732
|
def mk_srv_tc_details(tc, mx=2000):
|
|
@@ -729,7 +734,7 @@ def mk_srv_tc_details(tc, mx=2000):
|
|
|
729
734
|
args = {k:_trunc_str(v, mx=mx*5) for k,v in tc.arguments.items()}
|
|
730
735
|
res = {'id':tc.id, 'server':True, 'call':{'function': tc.name, 'arguments': args}, 'result':"Server tool call executed."}
|
|
731
736
|
summ = f"<summary>{_srv_tc_summary(tc)}</summary>"
|
|
732
|
-
return f"\n\n{tool_dtls_tag}\n{summ}\n\n```json\n{dumps(res, indent=2)}\n```\n\n</details>\n\n"
|
|
737
|
+
return f"\n\n{tool_dtls_tag}\n{summ}\n\n```json\n{dumps(res, indent=2, ensure_ascii=False)}\n```\n\n</details>\n\n"
|
|
733
738
|
|
|
734
739
|
# %% ../nbs/07_chat.ipynb #f0d984ec
|
|
735
740
|
# status_re = re.compile(r'^- ⏳ <code>(.*)</code> ⏳$|^🧠+$', re.MULTILINE) # TODO: Need to yield tool calls as they are done collated in fastllm `_acollect_stream`
|
|
@@ -138,6 +138,7 @@ async def mk_acollect_stream(it, index_fn, model=None, api_name=None, vendor_nam
|
|
|
138
138
|
deltas.append(d)
|
|
139
139
|
part_accum.finalize()
|
|
140
140
|
tcs = part_accum.tool_calls
|
|
141
|
+
if api_name: usg = api_registry.apis[api_name].finalize_usage(usg, part_accum.parts)
|
|
141
142
|
if stop: fin = FinishReason.stop
|
|
142
143
|
fin = FinishReason.tool_calls if fin==FinishReason.stop and any(~L(tcs).attrgot('server')) else fin # recheck tool calls post collation
|
|
143
144
|
# tool calls and non-anthropic citations are yielded at the end
|
|
@@ -145,3 +146,4 @@ async def mk_acollect_stream(it, index_fn, model=None, api_name=None, vendor_nam
|
|
|
145
146
|
message=Msg(role="assistant", content=part_accum.parts),
|
|
146
147
|
finish_reason=fin, usage=usg, tool_calls=tcs, api_name=api_name, vendor_name=vendor_name,
|
|
147
148
|
raw={'deltas':deltas})
|
|
149
|
+
|
|
@@ -4,10 +4,11 @@
|
|
|
4
4
|
|
|
5
5
|
# %% auto #0
|
|
6
6
|
__all__ = ['PartType', 'FinishReason', 'api_registry', 'model_prices_url', 'haik45', 'sonn45', 'sonn', 'sonn46', 'opus46', 'opus',
|
|
7
|
-
'gpt54', 'gpt54m', '
|
|
8
|
-
'
|
|
9
|
-
'
|
|
10
|
-
'
|
|
7
|
+
'gpt54', 'gpt54m', 'gpt55', 'codex54', 'codex54m', 'codex55', 'codex53spark', 'model_info_registry',
|
|
8
|
+
'deepseek_v4_common', 'codex_pricing', 'Part', 'Msg', 'ToolCall', 'display_list', 'Usage', 'Completion',
|
|
9
|
+
'APIRegistry', 'mk_completion', 'mk_tool_res_msg', 'fn_schema', 'sys_text', 'part_txt', 'data_url',
|
|
10
|
+
'url_mime', 'payload_kwargs', 'get_api_key', 'model_prices_meta', 'infer_api_name', 'get_model_meta',
|
|
11
|
+
'register_model_info', 'get_model_info', 'get_model_pricing', 'approx_pricing']
|
|
11
12
|
|
|
12
13
|
# %% ../nbs/00_types.ipynb #b4d047fd
|
|
13
14
|
from dataclasses import dataclass, field
|
|
@@ -147,25 +148,29 @@ FinishReason = str_enum('finish_reason', 'stop', 'tool_calls', 'length', 'conten
|
|
|
147
148
|
# %% ../nbs/00_types.ipynb #fc681c52
|
|
148
149
|
class APIRegistry:
|
|
149
150
|
def __init__(self): self.apis = {}
|
|
150
|
-
def register(self, name, **kwargs): self.apis[name] = SimpleNamespace(**kwargs)
|
|
151
|
+
def register(self, name, finalize_usage=noop, **kwargs): self.apis[name] = SimpleNamespace(finalize_usage=finalize_usage, **kwargs)
|
|
151
152
|
|
|
152
153
|
api_registry = APIRegistry()
|
|
153
154
|
|
|
155
|
+
|
|
154
156
|
# %% ../nbs/00_types.ipynb #d58a5f96
|
|
155
157
|
def mk_completion(resp, model, api_name, vendor_name):
|
|
156
158
|
"Normalize an api response into Completion."
|
|
157
159
|
api = api_registry.apis[api_name]
|
|
158
160
|
tcs = api.norm_tool_calls(resp)
|
|
161
|
+
parts = api.norm_parts(resp)
|
|
162
|
+
usg = api.finalize_usage(api.norm_usage(resp), parts)
|
|
159
163
|
return Completion(
|
|
160
164
|
model=resp.get("model") or model,
|
|
161
|
-
message=Msg(role="assistant", content=
|
|
165
|
+
message=Msg(role="assistant", content=parts),
|
|
162
166
|
finish_reason=api.norm_finish(resp, tcs),
|
|
163
|
-
usage=
|
|
167
|
+
usage=usg,
|
|
164
168
|
tool_calls=tcs,
|
|
165
169
|
api_name=api_name,
|
|
166
170
|
vendor_name=vendor_name,
|
|
167
171
|
raw=resp)
|
|
168
172
|
|
|
173
|
+
|
|
169
174
|
# %% ../nbs/00_types.ipynb #d5322db5
|
|
170
175
|
def mk_tool_res_msg(tool_calls:list[ToolCall], results:list[str|list]):
|
|
171
176
|
'A util to prepare parallel tool call with str or media list results'
|
|
@@ -225,6 +230,7 @@ def get_api_key(api_key, default):
|
|
|
225
230
|
|
|
226
231
|
# %% ../nbs/00_types.ipynb #852adecd
|
|
227
232
|
model_prices_url = 'https://raw.githubusercontent.com/BerriAI/litellm/main/model_prices_and_context_window.json'
|
|
233
|
+
|
|
228
234
|
@flexicache(time_policy(24*60*60))
|
|
229
235
|
def model_prices_meta(): return urljson(model_prices_url)
|
|
230
236
|
|
|
@@ -254,63 +260,94 @@ opus46 = "claude-opus-4-6"
|
|
|
254
260
|
opus = "claude-opus-4-7"
|
|
255
261
|
gpt54 = "gpt-5.4"
|
|
256
262
|
gpt54m = "gpt-5.4-mini"
|
|
263
|
+
gpt55 = "gpt-5.5"
|
|
257
264
|
codex54 = "gpt-5.4"
|
|
265
|
+
codex54m = "gpt-5.4-mini"
|
|
258
266
|
codex55 = "gpt-5.5"
|
|
259
267
|
codex53spark = "gpt-5.3-codex-spark"
|
|
260
268
|
|
|
261
|
-
# %% ../nbs/00_types.ipynb #
|
|
262
|
-
|
|
263
|
-
"input_cost_per_token": 0.10 / 1_000_000,
|
|
264
|
-
"cache_creation_input_token_cost": 0.10 / 1_000_000,
|
|
265
|
-
"cache_read_input_token_cost": 0.10 / 1_000_000,
|
|
266
|
-
"output_cost_per_token": 0.50 / 1_000_000,
|
|
267
|
-
}
|
|
269
|
+
# %% ../nbs/00_types.ipynb #583e017b
|
|
270
|
+
model_info_registry = {}
|
|
268
271
|
|
|
269
|
-
|
|
270
|
-
|
|
271
|
-
|
|
272
|
-
|
|
273
|
-
|
|
272
|
+
def register_model_info(model, vendor_name=None, base=None, base_vendor_name=None, **overrides):
|
|
273
|
+
"Register model metadata, optionally starting from `base`."
|
|
274
|
+
info = dict(get_model_info(base, base_vendor_name or vendor_name)) if base else {}
|
|
275
|
+
info.update(overrides)
|
|
276
|
+
model_info_registry[vendor_name, model] = info
|
|
274
277
|
|
|
275
|
-
|
|
276
|
-
|
|
277
|
-
info = get_model_meta(mn, 'chatgpt' if vendor_name=='codex' else vendor_name)
|
|
278
|
-
# anthropic web search
|
|
278
|
+
def get_model_info(mn, vendor_name=None):
|
|
279
|
+
info = model_info_registry.get((vendor_name, mn)) or get_model_meta(mn, vendor_name)
|
|
279
280
|
if 'search_context_cost_per_query' in info: info['supports_web_search'] = True
|
|
280
|
-
# kimi
|
|
281
|
-
if 'kimi' in mn:
|
|
282
|
-
if 'k2p6' in mn: info = get_model_meta(mn.replace('k2p6', 'k2p5'), vendor_name)
|
|
283
|
-
info['supports_reasoning'] = True
|
|
284
|
-
info['supports_vision'] = True
|
|
285
|
-
if vendor_name == 'moonshot': info['supports_assistant_prefill'] = True
|
|
286
|
-
# gpt web search
|
|
287
|
-
if mn in ("gpt-5.4", "gpt-5.4-mini"):
|
|
288
|
-
info['supports_web_search'] = True
|
|
289
|
-
info.pop('mode', None)
|
|
290
|
-
# codex updates
|
|
291
|
-
if vendor_name == 'codex':
|
|
292
|
-
info = merge(info, codex_pricing)
|
|
293
|
-
info |= _codex_overrides.get(mn, {})
|
|
294
|
-
# deepseek v4
|
|
295
|
-
if vendor_name == 'deepseek' and mn in ("deepseek-v4-flash", "deepseek-v4-pro"):
|
|
296
|
-
info = dict(get_model_meta("deepseek/deepseek-v3.2"))
|
|
297
|
-
info |= dict(supports_assistant_prefill=True, supports_function_calling=True, supports_prompt_caching=True,
|
|
298
|
-
supports_reasoning=True, supports_tool_choice=True)
|
|
299
|
-
info.update(input_cost_per_token=1.4e-07, input_cost_per_token_cache_hit=2.8e-09, output_cost_per_token=2.8e-07,
|
|
300
|
-
max_input_tokens=1048576, max_output_tokens=393216, max_tokens=393216)
|
|
301
|
-
if 'pro' in mn: info = {**info, 'input_cost_per_token': 4.35e-07, 'input_cost_per_token_cache_hit': 3.625e-09, 'output_cost_per_token': 8.7e-07}
|
|
302
|
-
# qwen 3p6
|
|
303
|
-
if vendor_name == 'fireworks_ai' and mn == 'accounts/fireworks/models/qwen3p6-plus':
|
|
304
|
-
info = dict(supports_vision=True, supports_reasoning=True, supports_function_calling=True, supports_tool_choice=True,
|
|
305
|
-
supports_system_messages=True, supports_response_schema=True, supports_parallel_function_calling=True,
|
|
306
|
-
supports_prompt_caching=True, supports_native_streaming=True, supports_native_structured_output=True,
|
|
307
|
-
max_tokens=1000000, max_input_tokens=1000000, max_output_tokens=65536,
|
|
308
|
-
input_cost_per_token=0.5e-6, cache_read_input_token_cost=0.1e-6, output_cost_per_token=3.0e-6)
|
|
309
|
-
|
|
310
|
-
# unresolved models
|
|
311
|
-
if not info and not strict: info = info | codex_pricing
|
|
312
281
|
return dict2obj(info)
|
|
313
282
|
|
|
283
|
+
# %% ../nbs/00_types.ipynb #8261dcd0
|
|
284
|
+
register_model_info('accounts/fireworks/models/qwen3p6-plus', vendor_name='fireworks_ai',
|
|
285
|
+
supports_vision=True, supports_reasoning=True, supports_function_calling=True, supports_tool_choice=True,
|
|
286
|
+
supports_system_messages=True, supports_response_schema=True, supports_parallel_function_calling=True,
|
|
287
|
+
supports_prompt_caching=True, supports_native_streaming=True, supports_native_structured_output=True,
|
|
288
|
+
max_tokens=1000000, max_input_tokens=1000000, max_output_tokens=65536,
|
|
289
|
+
input_cost_per_token=0.5e-6, cache_read_input_token_cost=0.1e-6, output_cost_per_token=3.0e-6)
|
|
290
|
+
|
|
291
|
+
register_model_info('gemini-3.5-flash', vendor_name='gemini', base='gemini-3-flash-preview',
|
|
292
|
+
input_cost_per_token=1.5e-6, output_cost_per_token=9e-6,
|
|
293
|
+
output_cost_per_reasoning_token=9e-6, cache_read_input_token_cost=1.5e-7)
|
|
294
|
+
|
|
295
|
+
for model in ('gpt-5.4', 'gpt-5.4-mini'):
|
|
296
|
+
register_model_info(model, vendor_name='openai', base=model, supports_web_search=True, mode=None)
|
|
297
|
+
|
|
298
|
+
for model in ('kimi-k2.5', 'kimi-k2.6'):
|
|
299
|
+
register_model_info(model, vendor_name='moonshot', base=f'moonshot/{model}', base_vendor_name=None,
|
|
300
|
+
supports_reasoning=True, supports_vision=True, supports_assistant_prefill=True)
|
|
301
|
+
|
|
302
|
+
register_model_info('gemini-3.1-flash-lite', vendor_name='gemini', base='gemini-3.1-flash-lite-preview')
|
|
303
|
+
register_model_info('models/gemini-3.1-flash-lite', vendor_name='gemini', base='gemini-3.1-flash-lite-preview')
|
|
304
|
+
|
|
305
|
+
for model in ('accounts/fireworks/models/kimi-k2p5', 'accounts/fireworks/models/kimi-k2p6'):
|
|
306
|
+
register_model_info(model, vendor_name='fireworks_ai', base=model.replace('k2p6', 'k2p5'),
|
|
307
|
+
supports_reasoning=True, supports_vision=True,
|
|
308
|
+
input_cost_per_token=0.95e-6, cache_read_input_token_cost=0.16e-6, output_cost_per_token=4.0e-6)
|
|
309
|
+
|
|
310
|
+
# %% ../nbs/00_types.ipynb #948d55d0
|
|
311
|
+
deepseek_v4_common = dict(
|
|
312
|
+
supports_assistant_prefill=True, supports_function_calling=True, supports_prompt_caching=True,
|
|
313
|
+
supports_reasoning=True, supports_tool_choice=True,
|
|
314
|
+
max_input_tokens=1048576, max_output_tokens=393216, max_tokens=393216)
|
|
315
|
+
|
|
316
|
+
register_model_info('deepseek-v4-flash', vendor_name='deepseek', base='deepseek/deepseek-v3.2', **deepseek_v4_common,
|
|
317
|
+
input_cost_per_token=1.4e-07, input_cost_per_token_cache_hit=2.8e-09,
|
|
318
|
+
output_cost_per_token=2.8e-07, cache_read_input_token_cost=1.4e-07/10)
|
|
319
|
+
register_model_info('deepseek-v4-pro', vendor_name='deepseek', base='deepseek/deepseek-v3.2', **deepseek_v4_common,
|
|
320
|
+
input_cost_per_token=4.35e-07, input_cost_per_token_cache_hit=3.625e-09,
|
|
321
|
+
output_cost_per_token=8.7e-07, cache_read_input_token_cost=4.35e-07/10)
|
|
322
|
+
|
|
323
|
+
# %% ../nbs/00_types.ipynb #2c23d11e
|
|
324
|
+
codex_pricing = dict(
|
|
325
|
+
input_cost_per_token = 0.10/1_000_000, output_cost_per_token = 0.50/1_000_000,
|
|
326
|
+
cache_creation_input_token_cost = 0.10/1_000_000, cache_read_input_token_cost = 0.10/1_000_000)
|
|
327
|
+
|
|
328
|
+
for model in (codex54, codex54m, codex55):
|
|
329
|
+
register_model_info(model, 'codex', base=model, base_vendor_name='chatgpt', supports_web_search=True, **codex_pricing)
|
|
330
|
+
|
|
331
|
+
register_model_info(codex53spark, 'codex', **codex_pricing,
|
|
332
|
+
supports_vision=False, supports_image_input=False, supports_web_search=True, supports_reasoning=True,
|
|
333
|
+
max_tokens=128000, max_input_tokens=128000, max_output_tokens=128000)
|
|
334
|
+
|
|
335
|
+
|
|
336
|
+
# %% ../nbs/00_types.ipynb #24cc47ec
|
|
337
|
+
def get_model_pricing(mn, vendor_name, million=True):
|
|
338
|
+
return {k:round(v * (1e6 if million else 1), 6)
|
|
339
|
+
for k,v in get_model_info(mn, vendor_name).items()
|
|
340
|
+
if 'cost' in k and isinstance(v,float) and 'priority' not in k}
|
|
341
|
+
|
|
342
|
+
# %% ../nbs/00_types.ipynb #79304cd9
|
|
343
|
+
def approx_pricing(nm, vendor_name, out=10, cache=80, inp=10, markup=0):
|
|
344
|
+
"Approx cost per million tokens with given output/cache/input proportions"
|
|
345
|
+
p = get_model_pricing(nm, vendor_name)
|
|
346
|
+
ic = p.get('cache_creation_input_token_cost', p['input_cost_per_token'])
|
|
347
|
+
res = (p['output_cost_per_token']*out + p['cache_read_input_token_cost']*cache + ic*inp) / (out+cache+inp)
|
|
348
|
+
if nm=='claude-opus-4-7': res *= 1.5
|
|
349
|
+
return res*(1+markup)
|
|
350
|
+
|
|
314
351
|
# %% ../nbs/00_types.ipynb #8bfca02d
|
|
315
352
|
@patch(as_prop=True)
|
|
316
353
|
def cost(self:Completion):
|
|
@@ -1 +0,0 @@
|
|
|
1
|
-
__version__ = "0.0.7"
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|