python-fastllm 0.0.1__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- fastllm/__init__.py +1 -0
- fastllm/_modidx.py +245 -0
- fastllm/acomplete.py +122 -0
- fastllm/anthropic.py +298 -0
- fastllm/chat.py +622 -0
- fastllm/gemini.py +304 -0
- fastllm/openai_chat.py +219 -0
- fastllm/openai_responses.py +260 -0
- fastllm/specs/anthropic.json +1 -0
- fastllm/specs/anthropic.yml +15684 -0
- fastllm/specs/gemini.json +6951 -0
- fastllm/specs/openai.with-code-samples.json +1 -0
- fastllm/specs/openai.with-code-samples.yml +73650 -0
- fastllm/specs/spec_manifest.json +17 -0
- fastllm/streaming.py +162 -0
- fastllm/types.py +301 -0
- python_fastllm-0.0.1.dist-info/METADATA +395 -0
- python_fastllm-0.0.1.dist-info/RECORD +21 -0
- python_fastllm-0.0.1.dist-info/WHEEL +5 -0
- python_fastllm-0.0.1.dist-info/entry_points.txt +2 -0
- python_fastllm-0.0.1.dist-info/top_level.txt +1 -0
|
@@ -0,0 +1,17 @@
|
|
|
1
|
+
{
|
|
2
|
+
"generated_at": "2026-03-30T14:27:11+00:00",
|
|
3
|
+
"specs_dir": "/Users/keremturgutlu/aai-ws/fastllm/specs",
|
|
4
|
+
"items": [
|
|
5
|
+
{
|
|
6
|
+
"provider": "anthropic",
|
|
7
|
+
"source_url": "https://storage.googleapis.com/stainless-sdk-openapi-specs/anthropic%2Fanthropic-dd2dcd00a757075370a7e4a7f469a1e2d067c2118684c3b70d7906a8f5cf518b.yml",
|
|
8
|
+
"target": "/Users/keremturgutlu/aai-ws/fastllm/specs/anthropic.yml",
|
|
9
|
+
"bytes": 724122,
|
|
10
|
+
"sha256": "dd2dcd00a757075370a7e4a7f469a1e2d067c2118684c3b70d7906a8f5cf518b",
|
|
11
|
+
"content_type": "text/yaml",
|
|
12
|
+
"fetched_at": "2026-03-30T14:27:11+00:00",
|
|
13
|
+
"changed": true,
|
|
14
|
+
"wrote": true
|
|
15
|
+
}
|
|
16
|
+
]
|
|
17
|
+
}
|
fastllm/streaming.py
ADDED
|
@@ -0,0 +1,162 @@
|
|
|
1
|
+
"""Streaming helpers for lossless event collation."""
|
|
2
|
+
|
|
3
|
+
# AUTOGENERATED! DO NOT EDIT! File to edit: ../nbs/01_streaming.ipynb.
|
|
4
|
+
|
|
5
|
+
# %% auto #0
|
|
6
|
+
__all__ = ['Delta', 'norm_and_yield', 'PartAccum', 'accum_completion', 'completion_text', 'stop_sequences', 'mk_acollect_stream',
|
|
7
|
+
'fake_stream']
|
|
8
|
+
|
|
9
|
+
# %% ../nbs/01_streaming.ipynb #0df5c926
|
|
10
|
+
import json
|
|
11
|
+
from dataclasses import dataclass, field, fields
|
|
12
|
+
from fastcore.utils import *
|
|
13
|
+
from fastcore.meta import delegates
|
|
14
|
+
from fastspec.errors import *
|
|
15
|
+
from .types import *
|
|
16
|
+
|
|
17
|
+
# %% ../nbs/01_streaming.ipynb #400d628a
|
|
18
|
+
@dataclass
|
|
19
|
+
class Delta:
|
|
20
|
+
"Normalized streaming delta event."
|
|
21
|
+
text: str = ""
|
|
22
|
+
thinking: str = ""
|
|
23
|
+
refusal: str = ""
|
|
24
|
+
tool_calls: list[ToolCall] = field(default_factory=list)
|
|
25
|
+
citations: list = field(default_factory=list)
|
|
26
|
+
server_tool_result: dict = None
|
|
27
|
+
finish_reason: str = None
|
|
28
|
+
usage: Usage = None
|
|
29
|
+
raw: dict = field(default_factory=dict)
|
|
30
|
+
|
|
31
|
+
# %% ../nbs/01_streaming.ipynb #f59f837b
|
|
32
|
+
async def norm_and_yield(resp, norm_func):
|
|
33
|
+
async for ev in resp:
|
|
34
|
+
if (d := norm_func(ev)) is not None: yield d
|
|
35
|
+
|
|
36
|
+
# %% ../nbs/01_streaming.ipynb #a7f1738a
|
|
37
|
+
@dataclass
|
|
38
|
+
class PartAccum:
|
|
39
|
+
parts: dict[Part|ToolCall] = field(default_factory=dict)
|
|
40
|
+
tool_calls: list[ToolCall] = field(default_factory=list)
|
|
41
|
+
|
|
42
|
+
def append(self, typ, index, txt='', citations=None, **tc_kwargs):
|
|
43
|
+
'Create and accumulate same type sequential parts'
|
|
44
|
+
if index not in self.parts:
|
|
45
|
+
if typ==PartType.tool_use: self.parts[index] = ToolCall(**tc_kwargs)
|
|
46
|
+
else: self.parts[index] = Part(type=typ, text=txt, data=dict(citations=citations or []))
|
|
47
|
+
else:
|
|
48
|
+
if typ==PartType.tool_use:
|
|
49
|
+
new_args = tc_kwargs.get('arguments', '')
|
|
50
|
+
cur_args = self.parts[index].arguments
|
|
51
|
+
if isinstance(new_args, str) and isinstance(cur_args, str): self.parts[index].arguments += new_args
|
|
52
|
+
elif isinstance(new_args, str) and isinstance(cur_args, dict): self.parts[index].arguments = new_args
|
|
53
|
+
else: self.parts[index].arguments = new_args
|
|
54
|
+
else:
|
|
55
|
+
self.parts[index].text += txt
|
|
56
|
+
# anthropic citations have matching idx
|
|
57
|
+
self.parts[index].data['citations'].extend(citations or [])
|
|
58
|
+
|
|
59
|
+
def finalize(self):
|
|
60
|
+
for idx,tc in self.parts.items():
|
|
61
|
+
if isinstance(tc, ToolCall):
|
|
62
|
+
if isinstance(tc.arguments, str): tc.arguments = json.loads(tc.arguments) if tc.arguments else {}
|
|
63
|
+
self.tool_calls.append(tc)
|
|
64
|
+
data = {**tc.extra, 'id':tc.id, 'name':tc.name, 'arguments':tc.arguments, 'server':tc.server}
|
|
65
|
+
self.parts[idx] = Part(type=PartType.tool_use, data=data)
|
|
66
|
+
|
|
67
|
+
merged = []
|
|
68
|
+
for p in self.parts.values():
|
|
69
|
+
if merged and merged[-1].type == p.type and p.type in (PartType.text, PartType.thinking): merged[-1].text += p.text
|
|
70
|
+
else: merged.append(p)
|
|
71
|
+
self.parts = merged
|
|
72
|
+
|
|
73
|
+
# %% ../nbs/01_streaming.ipynb #0e8ca58e
|
|
74
|
+
def accum_completion(pa, raw, fin, usg, deltas, model=None, api_name=None, vendor_name=None, delta=None):
|
|
75
|
+
"Build a Completion snapshot from in-progress PartAccum state"
|
|
76
|
+
parts = [p for p in pa.parts.values() if isinstance(p, Part)]
|
|
77
|
+
if delta and delta.text:
|
|
78
|
+
parts = parts.copy()
|
|
79
|
+
if parts and parts[-1].type==PartType.text:
|
|
80
|
+
p = parts[-1]
|
|
81
|
+
parts[-1] = Part(type=p.type, text=(p.text or '') + delta.text, data=p.data)
|
|
82
|
+
else: parts.append(Part(type=PartType.text, text=delta.text))
|
|
83
|
+
return Completion(raw.get('model', model), Msg(role="assistant", content=parts),
|
|
84
|
+
fin, usg, api_name=api_name, vendor_name=vendor_name, raw={'deltas':deltas})
|
|
85
|
+
|
|
86
|
+
# %% ../nbs/01_streaming.ipynb #c28f706f
|
|
87
|
+
def completion_text(c):
|
|
88
|
+
"Combined text from a Completion."
|
|
89
|
+
return ''.join(p.text or '' for p in c.message.content if p.type==PartType.text)
|
|
90
|
+
|
|
91
|
+
# %% ../nbs/01_streaming.ipynb #b2b9f7ca
|
|
92
|
+
def stop_sequences(seqs):
|
|
93
|
+
"Stop when any sequence appears in the accumulated completion text."
|
|
94
|
+
seqs = L(seqs)
|
|
95
|
+
def _stop(c):
|
|
96
|
+
txt = completion_text(c)
|
|
97
|
+
for s in seqs:
|
|
98
|
+
if s in txt: return s
|
|
99
|
+
return _stop
|
|
100
|
+
|
|
101
|
+
# %% ../nbs/01_streaming.ipynb #931f686b
|
|
102
|
+
def _trim_delta(d, cur, s):
|
|
103
|
+
"Trim `d.text` so accumulated text in `cur` stops just before stop sequence `s`."
|
|
104
|
+
txt,dt = completion_text(cur), d.text or ''
|
|
105
|
+
i = txt.find(s)
|
|
106
|
+
if i>=0: d.text = dt[:max(0, i-(len(txt)-len(dt)))]
|
|
107
|
+
|
|
108
|
+
# %% ../nbs/01_streaming.ipynb #fc71790b
|
|
109
|
+
async def mk_acollect_stream(it, index_fn, model=None, api_name=None, vendor_name=None, stop_callables=None):
|
|
110
|
+
"Collect a Delta stream, yielding incremental chunks and a final Completion."
|
|
111
|
+
part_accum,deltas,stop_callables = PartAccum(), [], L(stop_callables)
|
|
112
|
+
fin, usg, typ, last_typ, last_idx = [None]*5
|
|
113
|
+
def _fidx(d, name, pt=None):
|
|
114
|
+
nonlocal typ, last_idx
|
|
115
|
+
typ = getattr(PartType, pt or name)
|
|
116
|
+
idx,last_idx = index_fn(d, typ, last_typ, last_idx)
|
|
117
|
+
return idx
|
|
118
|
+
def _proc(d, name, pt=None, kw='txt', ret=None):
|
|
119
|
+
if not ret and not (val := getattr(d, name)): return
|
|
120
|
+
idx = _fidx(d, name, pt)
|
|
121
|
+
part_accum.append(typ, idx, **(ret or {kw: val}))
|
|
122
|
+
return ret or {name: val}
|
|
123
|
+
async for d in it:
|
|
124
|
+
stop = False
|
|
125
|
+
if stop_callables:
|
|
126
|
+
cur = accum_completion(part_accum, d.raw, fin, usg, deltas+[d], model, api_name=api_name, vendor_name=vendor_name, delta=d)
|
|
127
|
+
for f in stop_callables:
|
|
128
|
+
if res:=f(cur):
|
|
129
|
+
if isinstance(res, str): _trim_delta(d, cur, res)
|
|
130
|
+
stop = True
|
|
131
|
+
break
|
|
132
|
+
if (r:=_proc(d, 'text')): yield r
|
|
133
|
+
if (r:=_proc(d, 'thinking')): yield r
|
|
134
|
+
if (r:=_proc(d, 'citations', pt='text', kw='citations')): yield r
|
|
135
|
+
for tc in d.tool_calls:
|
|
136
|
+
args = tc.arguments.get('_delta', tc.arguments)
|
|
137
|
+
_proc(d, 'tool_use', ret=dict(id=tc.id, name=tc.name, arguments=args, server=tc.server, extra=tc.extra))
|
|
138
|
+
if d.server_tool_result:
|
|
139
|
+
idx = _fidx(d, 'server_tool_result')
|
|
140
|
+
part_accum.parts[idx] = Part(type=typ, data=d.server_tool_result)
|
|
141
|
+
if (r:=_proc(d, 'refusal')): yield r
|
|
142
|
+
if d.finish_reason: fin = d.finish_reason
|
|
143
|
+
if d.usage: usg = d.usage
|
|
144
|
+
last_typ = typ
|
|
145
|
+
deltas.append(d)
|
|
146
|
+
if stop:
|
|
147
|
+
fin = fin or FinishReason.stop
|
|
148
|
+
await it.aclose()
|
|
149
|
+
break
|
|
150
|
+
part_accum.finalize()
|
|
151
|
+
# need to recheck for tool calls post collation for streaming
|
|
152
|
+
tcs = part_accum.tool_calls
|
|
153
|
+
fin = FinishReason.tool_calls if fin==FinishReason.stop and any(~L(tcs).attrgot('server')) else fin
|
|
154
|
+
# tool calls and non-anthropic citations are yielded at the end
|
|
155
|
+
yield Completion(d.raw.get('model', model),
|
|
156
|
+
message=Msg(role="assistant", content=part_accum.parts),
|
|
157
|
+
finish_reason=fin, usage=usg, tool_calls=tcs, api_name=api_name, vendor_name=vendor_name,
|
|
158
|
+
raw={'deltas':deltas})
|
|
159
|
+
|
|
160
|
+
# %% ../nbs/01_streaming.ipynb #f79d3b99
|
|
161
|
+
async def fake_stream(*ss):
|
|
162
|
+
for s in ss: yield Delta(text=s, raw={'model':'fake'})
|
fastllm/types.py
ADDED
|
@@ -0,0 +1,301 @@
|
|
|
1
|
+
"""Core internal types."""
|
|
2
|
+
|
|
3
|
+
# AUTOGENERATED! DO NOT EDIT! File to edit: ../nbs/00_types.ipynb.
|
|
4
|
+
|
|
5
|
+
# %% auto #0
|
|
6
|
+
__all__ = ['PartType', 'FinishReason', 'api_registry', 'model_prices_url', 'codex_pricing', 'Part', 'Msg', 'ToolCall',
|
|
7
|
+
'display_list', 'Usage', 'Completion', 'APIRegistry', 'mk_completion', 'mk_tool_res_msg', 'fn_schema',
|
|
8
|
+
'sys_text', 'part_txt', 'data_url', 'url_mime', 'payload_kwargs', 'get_api_key', 'model_prices_meta',
|
|
9
|
+
'infer_api_name', 'get_model_meta', 'get_model_info']
|
|
10
|
+
|
|
11
|
+
# %% ../nbs/00_types.ipynb #b4d047fd
|
|
12
|
+
from dataclasses import dataclass, field
|
|
13
|
+
from fastcore.net import urljson
|
|
14
|
+
from fastcore.utils import *
|
|
15
|
+
|
|
16
|
+
# %% ../nbs/00_types.ipynb #e568bade
|
|
17
|
+
@dataclass
|
|
18
|
+
class Part:
|
|
19
|
+
"A normalized content part."
|
|
20
|
+
type: str
|
|
21
|
+
text: str = None
|
|
22
|
+
data: dict = None
|
|
23
|
+
|
|
24
|
+
# %% ../nbs/00_types.ipynb #aa3cfc23
|
|
25
|
+
PartType = str_enum('PartType', 'text', 'thinking', 'refusal', 'tool_use', 'server_tool_result', 'tool_result',
|
|
26
|
+
'input_image', 'input_audio', 'input_video', 'input_file')
|
|
27
|
+
|
|
28
|
+
# %% ../nbs/00_types.ipynb #2eeff103
|
|
29
|
+
def _trunc_strs(d, n=200):
|
|
30
|
+
"Return copy of dict `d` with str values >n chars truncated to first 10 chars + '...'"
|
|
31
|
+
if not d: return d
|
|
32
|
+
return {k: (v[:10]+'...' if isinstance(v,str) and len(v)>n else v) for k,v in d.items()}
|
|
33
|
+
|
|
34
|
+
@patch
|
|
35
|
+
def _repr_markdown_(self: Part):
|
|
36
|
+
body = self.text if self.text else ''
|
|
37
|
+
data = _trunc_strs(self.data)
|
|
38
|
+
return f"""**Part** (`{self.type}`)
|
|
39
|
+
|
|
40
|
+
{body}
|
|
41
|
+
|
|
42
|
+
<details>
|
|
43
|
+
|
|
44
|
+
- data: `{data}`
|
|
45
|
+
|
|
46
|
+
</details>"""
|
|
47
|
+
|
|
48
|
+
# %% ../nbs/00_types.ipynb #afeb9eef
|
|
49
|
+
@dataclass
|
|
50
|
+
class Msg:
|
|
51
|
+
"A normalized message."
|
|
52
|
+
role: str
|
|
53
|
+
content: List[Part]
|
|
54
|
+
|
|
55
|
+
def _repr_markdown_(self):
|
|
56
|
+
return f"""**Msg**
|
|
57
|
+
|
|
58
|
+
- role: `{self.role}`
|
|
59
|
+
|
|
60
|
+
<contents>
|
|
61
|
+
|
|
62
|
+
{'\n\n'.join(p._repr_markdown_() for p in self.content)}
|
|
63
|
+
|
|
64
|
+
</contents>"""
|
|
65
|
+
|
|
66
|
+
# %% ../nbs/00_types.ipynb #f6d04920
|
|
67
|
+
@dataclass
|
|
68
|
+
class ToolCall:
|
|
69
|
+
"Normalized tool call."
|
|
70
|
+
id: str
|
|
71
|
+
name: str
|
|
72
|
+
arguments: dict = field(default_factory=dict)
|
|
73
|
+
server: bool = False
|
|
74
|
+
extra: dict = field(default_factory=dict)
|
|
75
|
+
|
|
76
|
+
# %% ../nbs/00_types.ipynb #509cfc99
|
|
77
|
+
@patch
|
|
78
|
+
def _repr_markdown_(self: ToolCall):
|
|
79
|
+
extra = _trunc_strs(self.extra)
|
|
80
|
+
return f"""🔧 **{self.name}**(`{self.arguments}`)
|
|
81
|
+
|
|
82
|
+
<details>
|
|
83
|
+
|
|
84
|
+
- id: `{self.id}`
|
|
85
|
+
- server: `{self.server}`
|
|
86
|
+
- extra: `{extra}`
|
|
87
|
+
|
|
88
|
+
</details>"""
|
|
89
|
+
|
|
90
|
+
def display_list(l):
|
|
91
|
+
from IPython.display import Markdown, display
|
|
92
|
+
display(Markdown('\n\n'.join(o._repr_markdown_() for o in l)))
|
|
93
|
+
|
|
94
|
+
# %% ../nbs/00_types.ipynb #802ad832
|
|
95
|
+
@dataclass(frozen=True)
|
|
96
|
+
class Usage:
|
|
97
|
+
"Normalized usage."
|
|
98
|
+
prompt_tokens: int = 0
|
|
99
|
+
completion_tokens: int = 0
|
|
100
|
+
total_tokens: int = 0
|
|
101
|
+
cached_tokens: int = 0
|
|
102
|
+
cache_creation_tokens: int = 0
|
|
103
|
+
reasoning_tokens: int = 0
|
|
104
|
+
raw: dict = field(default_factory=dict)
|
|
105
|
+
|
|
106
|
+
|
|
107
|
+
# %% ../nbs/00_types.ipynb #297a1672
|
|
108
|
+
@dataclass(frozen=True)
|
|
109
|
+
class Completion:
|
|
110
|
+
"Normalized completion response."
|
|
111
|
+
model: str
|
|
112
|
+
message: Msg
|
|
113
|
+
finish_reason: str = None
|
|
114
|
+
usage: Usage = None
|
|
115
|
+
tool_calls: List[ToolCall] = field(default_factory=list)
|
|
116
|
+
api_name: str = None
|
|
117
|
+
vendor_name: str = None
|
|
118
|
+
raw: dict = field(default_factory=dict)
|
|
119
|
+
|
|
120
|
+
# %% ../nbs/00_types.ipynb #1b0dbab0
|
|
121
|
+
@patch
|
|
122
|
+
def _repr_markdown_(self: Completion):
|
|
123
|
+
message = self.message
|
|
124
|
+
content = ''
|
|
125
|
+
for p in message.content:
|
|
126
|
+
if p.type == PartType.thinking:
|
|
127
|
+
if p.text: content += f"<details><summary>Thinking</summary>\n\n{p.text}\n\n</details>\n\n"
|
|
128
|
+
elif txt := p.text: content += txt
|
|
129
|
+
if self.tool_calls:
|
|
130
|
+
tool_calls = [f"\n\n🔧 {tc.name}({tc.arguments})\n" for tc in self.tool_calls]
|
|
131
|
+
content += "\n".join(tool_calls)
|
|
132
|
+
# for img in getattr(message, 'images', []): content += f"\n\n})" # TODO
|
|
133
|
+
details = [f"model: `{self.model}`", f"finish_reason: `{self.finish_reason}`", f"usage: `{self.usage}`"]
|
|
134
|
+
det_str = '\n- '.join(details)
|
|
135
|
+
return f"""{content}
|
|
136
|
+
|
|
137
|
+
<details>
|
|
138
|
+
|
|
139
|
+
- {det_str}
|
|
140
|
+
|
|
141
|
+
</details>"""
|
|
142
|
+
|
|
143
|
+
# %% ../nbs/00_types.ipynb #ce59e431
|
|
144
|
+
FinishReason = str_enum('finish_reason', 'stop', 'tool_calls', 'length', 'content_filter')
|
|
145
|
+
|
|
146
|
+
# %% ../nbs/00_types.ipynb #fc681c52
|
|
147
|
+
class APIRegistry:
|
|
148
|
+
def __init__(self): self.apis = {}
|
|
149
|
+
def register(self, name, **kwargs): self.apis[name] = SimpleNamespace(**kwargs)
|
|
150
|
+
|
|
151
|
+
api_registry = APIRegistry()
|
|
152
|
+
|
|
153
|
+
# %% ../nbs/00_types.ipynb #d58a5f96
|
|
154
|
+
#COMMON
|
|
155
|
+
def mk_completion(resp, model, api_name, vendor_name):
|
|
156
|
+
"Normalize an api response into Completion."
|
|
157
|
+
api = api_registry.apis[api_name]
|
|
158
|
+
tcs = api.norm_tool_calls(resp)
|
|
159
|
+
return Completion(
|
|
160
|
+
model=resp.get("model") or model,
|
|
161
|
+
message=Msg(role="assistant", content=api.norm_parts(resp)),
|
|
162
|
+
finish_reason=api.norm_finish(resp, tcs),
|
|
163
|
+
usage=api.norm_usage(resp),
|
|
164
|
+
tool_calls=tcs,
|
|
165
|
+
api_name=api_name,
|
|
166
|
+
vendor_name=vendor_name,
|
|
167
|
+
raw=resp)
|
|
168
|
+
|
|
169
|
+
# %% ../nbs/00_types.ipynb #d5322db5
|
|
170
|
+
#COMMON
|
|
171
|
+
def mk_tool_res_msg(tool_calls:list[ToolCall], results:list[str|list]):
|
|
172
|
+
'A util to prepare parallel tool call with str or media list results'
|
|
173
|
+
parts = []
|
|
174
|
+
for tc,res in zip(tool_calls, results):
|
|
175
|
+
data = dict(id=tc.id, name=tc.name, arguments=tc.arguments, server=tc.server)
|
|
176
|
+
parts.append(Part(type=PartType.tool_result, text=res, data=data))
|
|
177
|
+
return Msg(role="tool", content=parts)
|
|
178
|
+
|
|
179
|
+
# %% ../nbs/00_types.ipynb #8a8e468b
|
|
180
|
+
#COMMON
|
|
181
|
+
def fn_schema(t):
|
|
182
|
+
"Extract (name, description, parameters) from any tool format."
|
|
183
|
+
if not isinstance(t, dict): return None
|
|
184
|
+
fn = t.get('function')
|
|
185
|
+
if isinstance(fn, dict): return fn.get('name',''), fn.get('description',''), fn.get('parameters',{})
|
|
186
|
+
if 'name' in t and ('parameters' in t or 'input_schema' in t):
|
|
187
|
+
return t.get('name',''), t.get('description',''), t.get('parameters', t.get('input_schema',{}))
|
|
188
|
+
return None
|
|
189
|
+
|
|
190
|
+
# %% ../nbs/00_types.ipynb #d1d48d91
|
|
191
|
+
#COMMON
|
|
192
|
+
def sys_text(system):
|
|
193
|
+
"Extract text from system (str or Part)."
|
|
194
|
+
if system is None: return None
|
|
195
|
+
return system if isinstance(system, str) else system.text
|
|
196
|
+
|
|
197
|
+
def part_txt(p): return p.text if isinstance(p,Part) else p
|
|
198
|
+
|
|
199
|
+
# %% ../nbs/00_types.ipynb #dc2b75a0
|
|
200
|
+
#COMMON
|
|
201
|
+
_ext_mime = {
|
|
202
|
+
'.jpg':'image/jpeg', '.jpeg':'image/jpeg', '.png':'image/png', '.gif':'image/gif', '.webp':'image/webp',
|
|
203
|
+
'.pdf':'application/pdf',
|
|
204
|
+
'.mp3':'audio/mpeg', '.wav':'audio/wav', '.ogg':'audio/ogg', '.flac':'audio/flac', '.m4a':'audio/mp4',
|
|
205
|
+
'.mp4':'video/mp4', '.mov':'video/quicktime', '.webm':'video/webm',
|
|
206
|
+
}
|
|
207
|
+
|
|
208
|
+
def data_url(url):
|
|
209
|
+
"Parse data:mime;base64,data URL into (mime, b64_data), or None."
|
|
210
|
+
if not isinstance(url, str) or not url.startswith('data:') or ',' not in url: return None
|
|
211
|
+
header, body = url.split(',', 1)
|
|
212
|
+
if ';base64' not in header or not body: return None
|
|
213
|
+
return header[5:].split(';',1)[0].strip() or 'application/octet-stream', body
|
|
214
|
+
|
|
215
|
+
def url_mime(url, default='application/octet-stream'):
|
|
216
|
+
"Guess mime from URL extension."
|
|
217
|
+
ext = '.' + url.rsplit('.', 1)[-1].split('?')[0].lower() if '.' in url.split('?')[0].split('/')[-1] else ''
|
|
218
|
+
return _ext_mime.get(ext, default)
|
|
219
|
+
|
|
220
|
+
# %% ../nbs/00_types.ipynb #28c698fe
|
|
221
|
+
def payload_kwargs(msgs, model, stream=False, system=None, max_tokens=None, temperature=None, tools=None, tool_choice=None, reasoning_effort=None, web_search_options=None, stop_callables=None, stop_sequences=None): pass
|
|
222
|
+
|
|
223
|
+
# %% ../nbs/00_types.ipynb #c2a2cb49
|
|
224
|
+
def get_api_key(api_key, default):
|
|
225
|
+
err = ValueError(f"Missing API key: make sure to have the expected env var name or pass `api_key`")
|
|
226
|
+
key = api_key or os.getenv(default)
|
|
227
|
+
if not key: raise err
|
|
228
|
+
return key
|
|
229
|
+
|
|
230
|
+
# %% ../nbs/00_types.ipynb #852adecd
|
|
231
|
+
model_prices_url = 'https://raw.githubusercontent.com/BerriAI/litellm/main/model_prices_and_context_window.json'
|
|
232
|
+
@flexicache(time_policy(24*60*60))
|
|
233
|
+
def model_prices_meta(): return urljson(model_prices_url)
|
|
234
|
+
|
|
235
|
+
# %% ../nbs/00_types.ipynb #68e488d8
|
|
236
|
+
def infer_api_name(model):
|
|
237
|
+
"Infer api_name from model"
|
|
238
|
+
if "claude" in model: return 'anthropic'
|
|
239
|
+
if "gemini" in model: return 'gemini'
|
|
240
|
+
if any(o in model for o in ('gpt','o3-','o4-')): return 'openai'
|
|
241
|
+
|
|
242
|
+
# %% ../nbs/00_types.ipynb #2f0720c2
|
|
243
|
+
@flexicache(time_policy(24*60*60))
|
|
244
|
+
def get_model_meta(model, vendor_name=None, tfm=noop):
|
|
245
|
+
"Look up cost metadata for `model` from litellm price map, using `vendor_name` prefix if needed."
|
|
246
|
+
vendor_name = ifnone(vendor_name, infer_api_name(model))
|
|
247
|
+
mp = model_prices_meta()
|
|
248
|
+
if model in mp: key = model
|
|
249
|
+
elif vendor_name=='gemini' and model.startswith('models/'): key = f"gemini/{model.removeprefix('models/')}"
|
|
250
|
+
elif vendor_name: key = f"{vendor_name}/{model}"
|
|
251
|
+
return dict2obj(tfm(mp.get(key), model, vendor_name))
|
|
252
|
+
|
|
253
|
+
# %% ../nbs/00_types.ipynb #fbfdeb0a
|
|
254
|
+
codex_pricing = {
|
|
255
|
+
"input_cost_per_token": 0.10 / 1_000_000,
|
|
256
|
+
"cache_creation_input_token_cost": 0.10 / 1_000_000,
|
|
257
|
+
"cache_read_input_token_cost": 0.10 / 1_000_000,
|
|
258
|
+
"output_cost_per_token": 0.50 / 1_000_000,
|
|
259
|
+
}
|
|
260
|
+
|
|
261
|
+
def get_model_info(mn, vendor_name=None):
|
|
262
|
+
info = get_model_meta(mn, vendor_name)
|
|
263
|
+
# anthropic web search
|
|
264
|
+
if 'search_context_cost_per_query' in info:
|
|
265
|
+
info['supports_web_search'] = True
|
|
266
|
+
# add reasoning to kimi
|
|
267
|
+
if 'kimi' in mn:
|
|
268
|
+
if 'k2p6' in mn: info = get_model_meta(mn.replace('k2p6', 'k2p5'), vendor_name)
|
|
269
|
+
info['supports_reasoning'] = True
|
|
270
|
+
info['supports_vision'] = True
|
|
271
|
+
if vendor_name == 'moonshot': info['supports_assistant_prefill'] = True
|
|
272
|
+
# add web search to gpt
|
|
273
|
+
if mn in ("gpt-5.4", "gpt-5.4-mini"):
|
|
274
|
+
info['supports_web_search'] = True
|
|
275
|
+
info.pop('mode', None)
|
|
276
|
+
# codex pricing
|
|
277
|
+
if vendor_name == 'codex': info = merge(info, codex_pricing)
|
|
278
|
+
# deepseek v4
|
|
279
|
+
if vendor_name == 'deepseek' and mn in ("deepseek-v4-flash", "deepseek-v4-pro"):
|
|
280
|
+
info = dict(get_model_meta("deepseek/deepseek-v3.2"))
|
|
281
|
+
info |= dict(supports_assistant_prefill=True, supports_function_calling=True, supports_prompt_caching=True,
|
|
282
|
+
supports_reasoning=True, supports_tool_choice=True)
|
|
283
|
+
info.update(input_cost_per_token=1.4e-07, input_cost_per_token_cache_hit=2.8e-09, output_cost_per_token=2.8e-07,
|
|
284
|
+
max_input_tokens=1048576, max_output_tokens=393216, max_tokens=393216)
|
|
285
|
+
if 'pro' in mn: info = {**info, 'input_cost_per_token': 4.35e-07, 'input_cost_per_token_cache_hit': 3.625e-09, 'output_cost_per_token': 8.7e-07}
|
|
286
|
+
# qwen 3p6
|
|
287
|
+
if vendor_name == 'fireworks_ai' and mn == 'accounts/fireworks/models/qwen3p6-plus':
|
|
288
|
+
info = dict(supports_vision=True, supports_reasoning=True, supports_function_calling=True, supports_tool_choice=True,
|
|
289
|
+
supports_system_messages=True, supports_response_schema=True, supports_parallel_function_calling=True,
|
|
290
|
+
supports_prompt_caching=True, supports_native_streaming=True, supports_native_structured_output=True,
|
|
291
|
+
max_tokens=1000000, max_input_tokens=1000000, max_output_tokens=65536,
|
|
292
|
+
input_cost_per_token=0.5e-6, cache_read_input_token_cost=0.1e-6, output_cost_per_token=3.0e-6)
|
|
293
|
+
return dict2obj(info)
|
|
294
|
+
|
|
295
|
+
# %% ../nbs/00_types.ipynb #8bfca02d
|
|
296
|
+
@patch(as_prop=True)
|
|
297
|
+
def cost(self:Completion):
|
|
298
|
+
meta = dict2obj(get_model_info(self.model, self.vendor_name))
|
|
299
|
+
api = api_registry.apis[self.api_name]
|
|
300
|
+
if not hasattr(api, 'cost'): raise NotImplementedError(f"API: {self.api_name} doesn't have a registered `cost` function in ns")
|
|
301
|
+
return api.cost(self.usage, meta)
|