python-fastllm 0.0.8__py3-none-any.whl → 0.0.9__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- fastllm/__init__.py +1 -1
- fastllm/_modidx.py +3 -0
- fastllm/anthropic.py +2 -4
- fastllm/types.py +85 -52
- {python_fastllm-0.0.8.dist-info → python_fastllm-0.0.9.dist-info}/METADATA +1 -1
- {python_fastllm-0.0.8.dist-info → python_fastllm-0.0.9.dist-info}/RECORD +9 -9
- {python_fastllm-0.0.8.dist-info → python_fastllm-0.0.9.dist-info}/WHEEL +0 -0
- {python_fastllm-0.0.8.dist-info → python_fastllm-0.0.9.dist-info}/entry_points.txt +0 -0
- {python_fastllm-0.0.8.dist-info → python_fastllm-0.0.9.dist-info}/top_level.txt +0 -0
fastllm/__init__.py
CHANGED
|
@@ -1 +1 @@
|
|
|
1
|
-
__version__ = "0.0.
|
|
1
|
+
__version__ = "0.0.9"
|
fastllm/_modidx.py
CHANGED
|
@@ -271,17 +271,20 @@ d = { 'settings': { 'branch': 'main',
|
|
|
271
271
|
'fastllm.types.ToolCall._repr_markdown_': ('types.html#toolcall._repr_markdown_', 'fastllm/types.py'),
|
|
272
272
|
'fastllm.types.Usage': ('types.html#usage', 'fastllm/types.py'),
|
|
273
273
|
'fastllm.types._trunc_strs': ('types.html#_trunc_strs', 'fastllm/types.py'),
|
|
274
|
+
'fastllm.types.approx_pricing': ('types.html#approx_pricing', 'fastllm/types.py'),
|
|
274
275
|
'fastllm.types.data_url': ('types.html#data_url', 'fastllm/types.py'),
|
|
275
276
|
'fastllm.types.display_list': ('types.html#display_list', 'fastllm/types.py'),
|
|
276
277
|
'fastllm.types.fn_schema': ('types.html#fn_schema', 'fastllm/types.py'),
|
|
277
278
|
'fastllm.types.get_api_key': ('types.html#get_api_key', 'fastllm/types.py'),
|
|
278
279
|
'fastllm.types.get_model_info': ('types.html#get_model_info', 'fastllm/types.py'),
|
|
279
280
|
'fastllm.types.get_model_meta': ('types.html#get_model_meta', 'fastllm/types.py'),
|
|
281
|
+
'fastllm.types.get_model_pricing': ('types.html#get_model_pricing', 'fastllm/types.py'),
|
|
280
282
|
'fastllm.types.infer_api_name': ('types.html#infer_api_name', 'fastllm/types.py'),
|
|
281
283
|
'fastllm.types.mk_completion': ('types.html#mk_completion', 'fastllm/types.py'),
|
|
282
284
|
'fastllm.types.mk_tool_res_msg': ('types.html#mk_tool_res_msg', 'fastllm/types.py'),
|
|
283
285
|
'fastllm.types.model_prices_meta': ('types.html#model_prices_meta', 'fastllm/types.py'),
|
|
284
286
|
'fastllm.types.part_txt': ('types.html#part_txt', 'fastllm/types.py'),
|
|
285
287
|
'fastllm.types.payload_kwargs': ('types.html#payload_kwargs', 'fastllm/types.py'),
|
|
288
|
+
'fastllm.types.register_model_info': ('types.html#register_model_info', 'fastllm/types.py'),
|
|
286
289
|
'fastllm.types.sys_text': ('types.html#sys_text', 'fastllm/types.py'),
|
|
287
290
|
'fastllm.types.url_mime': ('types.html#url_mime', 'fastllm/types.py')}}}
|
fastllm/anthropic.py
CHANGED
|
@@ -50,10 +50,8 @@ def finalize_usage(usg, parts):
|
|
|
50
50
|
rc = '\n'.join(p.text or '' for p in parts if p.type == PartType.thinking)
|
|
51
51
|
ct = int(usg.raw.get('output_tokens', usg.completion_tokens) or 0)
|
|
52
52
|
rt = min(int(len(rc.split())*1.5), ct) if rc else 0
|
|
53
|
-
|
|
54
|
-
|
|
55
|
-
print(res)
|
|
56
|
-
return res
|
|
53
|
+
return Usage(prompt_tokens=usg.prompt_tokens, completion_tokens=ct-rt, total_tokens=usg.prompt_tokens+ct,
|
|
54
|
+
cached_tokens=usg.cached_tokens, cache_creation_tokens=usg.cache_creation_tokens, reasoning_tokens=rt, raw=usg.raw)
|
|
57
55
|
|
|
58
56
|
# %% ../nbs/04_anthropic.ipynb #7a8b1f8f
|
|
59
57
|
def norm_finish(resp, tcs=None):
|
fastllm/types.py
CHANGED
|
@@ -4,10 +4,11 @@
|
|
|
4
4
|
|
|
5
5
|
# %% auto #0
|
|
6
6
|
__all__ = ['PartType', 'FinishReason', 'api_registry', 'model_prices_url', 'haik45', 'sonn45', 'sonn', 'sonn46', 'opus46', 'opus',
|
|
7
|
-
'gpt54', 'gpt54m', '
|
|
8
|
-
'
|
|
9
|
-
'
|
|
10
|
-
'
|
|
7
|
+
'gpt54', 'gpt54m', 'gpt55', 'codex54', 'codex54m', 'codex55', 'codex53spark', 'model_info_registry',
|
|
8
|
+
'deepseek_v4_common', 'codex_pricing', 'Part', 'Msg', 'ToolCall', 'display_list', 'Usage', 'Completion',
|
|
9
|
+
'APIRegistry', 'mk_completion', 'mk_tool_res_msg', 'fn_schema', 'sys_text', 'part_txt', 'data_url',
|
|
10
|
+
'url_mime', 'payload_kwargs', 'get_api_key', 'model_prices_meta', 'infer_api_name', 'get_model_meta',
|
|
11
|
+
'register_model_info', 'get_model_info', 'get_model_pricing', 'approx_pricing']
|
|
11
12
|
|
|
12
13
|
# %% ../nbs/00_types.ipynb #b4d047fd
|
|
13
14
|
from dataclasses import dataclass, field
|
|
@@ -229,6 +230,7 @@ def get_api_key(api_key, default):
|
|
|
229
230
|
|
|
230
231
|
# %% ../nbs/00_types.ipynb #852adecd
|
|
231
232
|
model_prices_url = 'https://raw.githubusercontent.com/BerriAI/litellm/main/model_prices_and_context_window.json'
|
|
233
|
+
|
|
232
234
|
@flexicache(time_policy(24*60*60))
|
|
233
235
|
def model_prices_meta(): return urljson(model_prices_url)
|
|
234
236
|
|
|
@@ -258,63 +260,94 @@ opus46 = "claude-opus-4-6"
|
|
|
258
260
|
opus = "claude-opus-4-7"
|
|
259
261
|
gpt54 = "gpt-5.4"
|
|
260
262
|
gpt54m = "gpt-5.4-mini"
|
|
263
|
+
gpt55 = "gpt-5.5"
|
|
261
264
|
codex54 = "gpt-5.4"
|
|
265
|
+
codex54m = "gpt-5.4-mini"
|
|
262
266
|
codex55 = "gpt-5.5"
|
|
263
267
|
codex53spark = "gpt-5.3-codex-spark"
|
|
264
268
|
|
|
265
|
-
# %% ../nbs/00_types.ipynb #
|
|
266
|
-
|
|
267
|
-
"input_cost_per_token": 0.10 / 1_000_000,
|
|
268
|
-
"cache_creation_input_token_cost": 0.10 / 1_000_000,
|
|
269
|
-
"cache_read_input_token_cost": 0.10 / 1_000_000,
|
|
270
|
-
"output_cost_per_token": 0.50 / 1_000_000,
|
|
271
|
-
}
|
|
269
|
+
# %% ../nbs/00_types.ipynb #583e017b
|
|
270
|
+
model_info_registry = {}
|
|
272
271
|
|
|
273
|
-
|
|
274
|
-
|
|
275
|
-
|
|
276
|
-
|
|
277
|
-
|
|
272
|
+
def register_model_info(model, vendor_name=None, base=None, base_vendor_name=None, **overrides):
|
|
273
|
+
"Register model metadata, optionally starting from `base`."
|
|
274
|
+
info = dict(get_model_info(base, base_vendor_name or vendor_name)) if base else {}
|
|
275
|
+
info.update(overrides)
|
|
276
|
+
model_info_registry[vendor_name, model] = info
|
|
278
277
|
|
|
279
|
-
|
|
280
|
-
|
|
281
|
-
info = get_model_meta(mn, 'chatgpt' if vendor_name=='codex' else vendor_name)
|
|
282
|
-
# anthropic web search
|
|
278
|
+
def get_model_info(mn, vendor_name=None):
|
|
279
|
+
info = model_info_registry.get((vendor_name, mn)) or get_model_meta(mn, vendor_name)
|
|
283
280
|
if 'search_context_cost_per_query' in info: info['supports_web_search'] = True
|
|
284
|
-
# kimi
|
|
285
|
-
if 'kimi' in mn:
|
|
286
|
-
if 'k2p6' in mn: info = get_model_meta(mn.replace('k2p6', 'k2p5'), vendor_name)
|
|
287
|
-
info['supports_reasoning'] = True
|
|
288
|
-
info['supports_vision'] = True
|
|
289
|
-
if vendor_name == 'moonshot': info['supports_assistant_prefill'] = True
|
|
290
|
-
# gpt web search
|
|
291
|
-
if mn in ("gpt-5.4", "gpt-5.4-mini"):
|
|
292
|
-
info['supports_web_search'] = True
|
|
293
|
-
info.pop('mode', None)
|
|
294
|
-
# codex updates
|
|
295
|
-
if vendor_name == 'codex':
|
|
296
|
-
info = merge(info, codex_pricing)
|
|
297
|
-
info |= _codex_overrides.get(mn, {})
|
|
298
|
-
# deepseek v4
|
|
299
|
-
if vendor_name == 'deepseek' and mn in ("deepseek-v4-flash", "deepseek-v4-pro"):
|
|
300
|
-
info = dict(get_model_meta("deepseek/deepseek-v3.2"))
|
|
301
|
-
info |= dict(supports_assistant_prefill=True, supports_function_calling=True, supports_prompt_caching=True,
|
|
302
|
-
supports_reasoning=True, supports_tool_choice=True)
|
|
303
|
-
info.update(input_cost_per_token=1.4e-07, input_cost_per_token_cache_hit=2.8e-09, output_cost_per_token=2.8e-07,
|
|
304
|
-
max_input_tokens=1048576, max_output_tokens=393216, max_tokens=393216)
|
|
305
|
-
if 'pro' in mn: info = {**info, 'input_cost_per_token': 4.35e-07, 'input_cost_per_token_cache_hit': 3.625e-09, 'output_cost_per_token': 8.7e-07}
|
|
306
|
-
# qwen 3p6
|
|
307
|
-
if vendor_name == 'fireworks_ai' and mn == 'accounts/fireworks/models/qwen3p6-plus':
|
|
308
|
-
info = dict(supports_vision=True, supports_reasoning=True, supports_function_calling=True, supports_tool_choice=True,
|
|
309
|
-
supports_system_messages=True, supports_response_schema=True, supports_parallel_function_calling=True,
|
|
310
|
-
supports_prompt_caching=True, supports_native_streaming=True, supports_native_structured_output=True,
|
|
311
|
-
max_tokens=1000000, max_input_tokens=1000000, max_output_tokens=65536,
|
|
312
|
-
input_cost_per_token=0.5e-6, cache_read_input_token_cost=0.1e-6, output_cost_per_token=3.0e-6)
|
|
313
|
-
|
|
314
|
-
# unresolved models
|
|
315
|
-
if not info and not strict: info = info | codex_pricing
|
|
316
281
|
return dict2obj(info)
|
|
317
282
|
|
|
283
|
+
# %% ../nbs/00_types.ipynb #8261dcd0
|
|
284
|
+
register_model_info('accounts/fireworks/models/qwen3p6-plus', vendor_name='fireworks_ai',
|
|
285
|
+
supports_vision=True, supports_reasoning=True, supports_function_calling=True, supports_tool_choice=True,
|
|
286
|
+
supports_system_messages=True, supports_response_schema=True, supports_parallel_function_calling=True,
|
|
287
|
+
supports_prompt_caching=True, supports_native_streaming=True, supports_native_structured_output=True,
|
|
288
|
+
max_tokens=1000000, max_input_tokens=1000000, max_output_tokens=65536,
|
|
289
|
+
input_cost_per_token=0.5e-6, cache_read_input_token_cost=0.1e-6, output_cost_per_token=3.0e-6)
|
|
290
|
+
|
|
291
|
+
register_model_info('gemini-3.5-flash', vendor_name='gemini', base='gemini-3-flash-preview',
|
|
292
|
+
input_cost_per_token=1.5e-6, output_cost_per_token=9e-6,
|
|
293
|
+
output_cost_per_reasoning_token=9e-6, cache_read_input_token_cost=1.5e-7)
|
|
294
|
+
|
|
295
|
+
for model in ('gpt-5.4', 'gpt-5.4-mini'):
|
|
296
|
+
register_model_info(model, vendor_name='openai', base=model, supports_web_search=True, mode=None)
|
|
297
|
+
|
|
298
|
+
for model in ('kimi-k2.5', 'kimi-k2.6'):
|
|
299
|
+
register_model_info(model, vendor_name='moonshot', base=f'moonshot/{model}', base_vendor_name=None,
|
|
300
|
+
supports_reasoning=True, supports_vision=True, supports_assistant_prefill=True)
|
|
301
|
+
|
|
302
|
+
register_model_info('gemini-3.1-flash-lite', vendor_name='gemini', base='gemini-3.1-flash-lite-preview')
|
|
303
|
+
register_model_info('models/gemini-3.1-flash-lite', vendor_name='gemini', base='gemini-3.1-flash-lite-preview')
|
|
304
|
+
|
|
305
|
+
for model in ('accounts/fireworks/models/kimi-k2p5', 'accounts/fireworks/models/kimi-k2p6'):
|
|
306
|
+
register_model_info(model, vendor_name='fireworks_ai', base=model.replace('k2p6', 'k2p5'),
|
|
307
|
+
supports_reasoning=True, supports_vision=True,
|
|
308
|
+
input_cost_per_token=0.95e-6, cache_read_input_token_cost=0.16e-6, output_cost_per_token=4.0e-6)
|
|
309
|
+
|
|
310
|
+
# %% ../nbs/00_types.ipynb #948d55d0
|
|
311
|
+
deepseek_v4_common = dict(
|
|
312
|
+
supports_assistant_prefill=True, supports_function_calling=True, supports_prompt_caching=True,
|
|
313
|
+
supports_reasoning=True, supports_tool_choice=True,
|
|
314
|
+
max_input_tokens=1048576, max_output_tokens=393216, max_tokens=393216)
|
|
315
|
+
|
|
316
|
+
register_model_info('deepseek-v4-flash', vendor_name='deepseek', base='deepseek/deepseek-v3.2', **deepseek_v4_common,
|
|
317
|
+
input_cost_per_token=1.4e-07, input_cost_per_token_cache_hit=2.8e-09,
|
|
318
|
+
output_cost_per_token=2.8e-07, cache_read_input_token_cost=1.4e-07/10)
|
|
319
|
+
register_model_info('deepseek-v4-pro', vendor_name='deepseek', base='deepseek/deepseek-v3.2', **deepseek_v4_common,
|
|
320
|
+
input_cost_per_token=4.35e-07, input_cost_per_token_cache_hit=3.625e-09,
|
|
321
|
+
output_cost_per_token=8.7e-07, cache_read_input_token_cost=4.35e-07/10)
|
|
322
|
+
|
|
323
|
+
# %% ../nbs/00_types.ipynb #2c23d11e
|
|
324
|
+
codex_pricing = dict(
|
|
325
|
+
input_cost_per_token = 0.10/1_000_000, output_cost_per_token = 0.50/1_000_000,
|
|
326
|
+
cache_creation_input_token_cost = 0.10/1_000_000, cache_read_input_token_cost = 0.10/1_000_000)
|
|
327
|
+
|
|
328
|
+
for model in (codex54, codex54m, codex55):
|
|
329
|
+
register_model_info(model, 'codex', base=model, base_vendor_name='chatgpt', supports_web_search=True, **codex_pricing)
|
|
330
|
+
|
|
331
|
+
register_model_info(codex53spark, 'codex', **codex_pricing,
|
|
332
|
+
supports_vision=False, supports_image_input=False, supports_web_search=True, supports_reasoning=True,
|
|
333
|
+
max_tokens=128000, max_input_tokens=128000, max_output_tokens=128000)
|
|
334
|
+
|
|
335
|
+
|
|
336
|
+
# %% ../nbs/00_types.ipynb #24cc47ec
|
|
337
|
+
def get_model_pricing(mn, vendor_name, million=True):
|
|
338
|
+
return {k:round(v * (1e6 if million else 1), 6)
|
|
339
|
+
for k,v in get_model_info(mn, vendor_name).items()
|
|
340
|
+
if 'cost' in k and isinstance(v,float) and 'priority' not in k}
|
|
341
|
+
|
|
342
|
+
# %% ../nbs/00_types.ipynb #79304cd9
|
|
343
|
+
def approx_pricing(nm, vendor_name, out=10, cache=80, inp=10, markup=0):
|
|
344
|
+
"Approx cost per million tokens with given output/cache/input proportions"
|
|
345
|
+
p = get_model_pricing(nm, vendor_name)
|
|
346
|
+
ic = p.get('cache_creation_input_token_cost', p['input_cost_per_token'])
|
|
347
|
+
res = (p['output_cost_per_token']*out + p['cache_read_input_token_cost']*cache + ic*inp) / (out+cache+inp)
|
|
348
|
+
if nm=='claude-opus-4-7': res *= 1.5
|
|
349
|
+
return res*(1+markup)
|
|
350
|
+
|
|
318
351
|
# %% ../nbs/00_types.ipynb #8bfca02d
|
|
319
352
|
@patch(as_prop=True)
|
|
320
353
|
def cost(self:Completion):
|
|
@@ -1,22 +1,22 @@
|
|
|
1
|
-
fastllm/__init__.py,sha256=
|
|
2
|
-
fastllm/_modidx.py,sha256=
|
|
1
|
+
fastllm/__init__.py,sha256=46Yjk3fz9o8aTN8E95McnzpJcjGzVJmHmQqUZ5mXzfc,22
|
|
2
|
+
fastllm/_modidx.py,sha256=xvXs-Irzd8lL5GeOdmOpuhv51PnfJOIE2xmyOar62tM,33661
|
|
3
3
|
fastllm/acomplete.py,sha256=jSQDotocHp5dK9c18_MxqZkT626XoLNCftlTkKZrqhY,7440
|
|
4
|
-
fastllm/anthropic.py,sha256=
|
|
4
|
+
fastllm/anthropic.py,sha256=sgN_ejJbxhfiTgddNfAn_nrH85BTsQKsdEW0n76tPjw,15090
|
|
5
5
|
fastllm/chat.py,sha256=LKqOczYFRM-s82seTbxBWFW5G16oqnG7nQO5oi3E_T4,35856
|
|
6
6
|
fastllm/codex.py,sha256=HZchfrGUgdf8ayhtOFbIRmh9YmIqfQBwqviAEeir4Uo,161
|
|
7
7
|
fastllm/gemini.py,sha256=E1EYMfV8IMpC_-WzlDrkhz_CJQmzmxvaVUucNgPOqSA,14947
|
|
8
8
|
fastllm/openai_chat.py,sha256=wZ0HI0m9ipy9XVhqmYBXf-BmkVAOipUVwqu9NGB_rJU,10941
|
|
9
9
|
fastllm/openai_responses.py,sha256=Nk5bfTCF2-a17nwvIsf-u39j539v9KIduVfScECItKk,13052
|
|
10
10
|
fastllm/streaming.py,sha256=aBEa1cFbK2XbFuEP_EG8FW0hp4T-FzZ2ZlwdPqGLpw8,6755
|
|
11
|
-
fastllm/types.py,sha256=
|
|
11
|
+
fastllm/types.py,sha256=AMaeki3Gu0urC57M7sJ-9OAqP__QH8aFL7X81IaW-24,14483
|
|
12
12
|
fastllm/specs/anthropic.json,sha256=VCgTjM2_HoDpCkeu3q_TCOEZLMHriJZLAG3LnDBAgGM,541035
|
|
13
13
|
fastllm/specs/anthropic.yml,sha256=3S3NAKdXB1Nwp-Sn9Gmh4tBnwhGGhMO3DXkGqPXPUYs,724122
|
|
14
14
|
fastllm/specs/gemini.json,sha256=zJGOdvZ2BvCiTENZt0-BDEvNBMl8h6EBmEskle_WBto,309331
|
|
15
15
|
fastllm/specs/openai.with-code-samples.json,sha256=Kto19AW1u8MfxVDJ4cFVBIdZQOIyy8NWylswo57eABU,1995929
|
|
16
16
|
fastllm/specs/openai.with-code-samples.yml,sha256=DlcWGdaeP4k7smVjt6UbyehJ-2XGU3rn3nCIBMDRfYU,2553630
|
|
17
17
|
fastllm/specs/spec_manifest.json,sha256=9tVFwojXFnNqsAxQzCRTP1lgSIM0fXixnrXdv4Cmb0c,653
|
|
18
|
-
python_fastllm-0.0.
|
|
19
|
-
python_fastllm-0.0.
|
|
20
|
-
python_fastllm-0.0.
|
|
21
|
-
python_fastllm-0.0.
|
|
22
|
-
python_fastllm-0.0.
|
|
18
|
+
python_fastllm-0.0.9.dist-info/METADATA,sha256=iEjvHoq-bK6aq5rn8Xt4M9-7OnRGIkUVUyWkk6VgpG8,19546
|
|
19
|
+
python_fastllm-0.0.9.dist-info/WHEEL,sha256=aeYiig01lYGDzBgS8HxWXOg3uV61G9ijOsup-k9o1sk,91
|
|
20
|
+
python_fastllm-0.0.9.dist-info/entry_points.txt,sha256=dq0chsiRjJYStCOXweFW9L6LpyMTjWu2AabKCbTSbuI,36
|
|
21
|
+
python_fastllm-0.0.9.dist-info/top_level.txt,sha256=F8qodL7nEGUHGmzzqfhNKCTIr1i0D6cvudOnm3z7o0Y,8
|
|
22
|
+
python_fastllm-0.0.9.dist-info/RECORD,,
|
|
File without changes
|
|
File without changes
|
|
File without changes
|