python-fastllm 0.0.8__tar.gz → 0.0.9__tar.gz
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- {python_fastllm-0.0.8 → python_fastllm-0.0.9}/PKG-INFO +1 -1
- python_fastllm-0.0.9/fastllm/__init__.py +1 -0
- {python_fastllm-0.0.8 → python_fastllm-0.0.9}/fastllm/_modidx.py +3 -0
- {python_fastllm-0.0.8 → python_fastllm-0.0.9}/fastllm/anthropic.py +2 -4
- {python_fastllm-0.0.8 → python_fastllm-0.0.9}/fastllm/types.py +85 -52
- {python_fastllm-0.0.8 → python_fastllm-0.0.9}/python_fastllm.egg-info/PKG-INFO +1 -1
- python_fastllm-0.0.8/fastllm/__init__.py +0 -1
- {python_fastllm-0.0.8 → python_fastllm-0.0.9}/README.md +0 -0
- {python_fastllm-0.0.8 → python_fastllm-0.0.9}/fastllm/acomplete.py +0 -0
- {python_fastllm-0.0.8 → python_fastllm-0.0.9}/fastllm/chat.py +0 -0
- {python_fastllm-0.0.8 → python_fastllm-0.0.9}/fastllm/codex.py +0 -0
- {python_fastllm-0.0.8 → python_fastllm-0.0.9}/fastllm/gemini.py +0 -0
- {python_fastllm-0.0.8 → python_fastllm-0.0.9}/fastllm/openai_chat.py +0 -0
- {python_fastllm-0.0.8 → python_fastllm-0.0.9}/fastllm/openai_responses.py +0 -0
- {python_fastllm-0.0.8 → python_fastllm-0.0.9}/fastllm/specs/anthropic.json +0 -0
- {python_fastllm-0.0.8 → python_fastllm-0.0.9}/fastllm/specs/anthropic.yml +0 -0
- {python_fastllm-0.0.8 → python_fastllm-0.0.9}/fastllm/specs/gemini.json +0 -0
- {python_fastllm-0.0.8 → python_fastllm-0.0.9}/fastllm/specs/openai.with-code-samples.json +0 -0
- {python_fastllm-0.0.8 → python_fastllm-0.0.9}/fastllm/specs/openai.with-code-samples.yml +0 -0
- {python_fastllm-0.0.8 → python_fastllm-0.0.9}/fastllm/specs/spec_manifest.json +0 -0
- {python_fastllm-0.0.8 → python_fastllm-0.0.9}/fastllm/streaming.py +0 -0
- {python_fastllm-0.0.8 → python_fastllm-0.0.9}/pyproject.toml +0 -0
- {python_fastllm-0.0.8 → python_fastllm-0.0.9}/python_fastllm.egg-info/SOURCES.txt +0 -0
- {python_fastllm-0.0.8 → python_fastllm-0.0.9}/python_fastllm.egg-info/dependency_links.txt +0 -0
- {python_fastllm-0.0.8 → python_fastllm-0.0.9}/python_fastllm.egg-info/entry_points.txt +0 -0
- {python_fastllm-0.0.8 → python_fastllm-0.0.9}/python_fastllm.egg-info/requires.txt +0 -0
- {python_fastllm-0.0.8 → python_fastllm-0.0.9}/python_fastllm.egg-info/top_level.txt +0 -0
- {python_fastllm-0.0.8 → python_fastllm-0.0.9}/setup.cfg +0 -0
|
@@ -0,0 +1 @@
|
|
|
1
|
+
__version__ = "0.0.9"
|
|
@@ -271,17 +271,20 @@ d = { 'settings': { 'branch': 'main',
|
|
|
271
271
|
'fastllm.types.ToolCall._repr_markdown_': ('types.html#toolcall._repr_markdown_', 'fastllm/types.py'),
|
|
272
272
|
'fastllm.types.Usage': ('types.html#usage', 'fastllm/types.py'),
|
|
273
273
|
'fastllm.types._trunc_strs': ('types.html#_trunc_strs', 'fastllm/types.py'),
|
|
274
|
+
'fastllm.types.approx_pricing': ('types.html#approx_pricing', 'fastllm/types.py'),
|
|
274
275
|
'fastllm.types.data_url': ('types.html#data_url', 'fastllm/types.py'),
|
|
275
276
|
'fastllm.types.display_list': ('types.html#display_list', 'fastllm/types.py'),
|
|
276
277
|
'fastllm.types.fn_schema': ('types.html#fn_schema', 'fastllm/types.py'),
|
|
277
278
|
'fastllm.types.get_api_key': ('types.html#get_api_key', 'fastllm/types.py'),
|
|
278
279
|
'fastllm.types.get_model_info': ('types.html#get_model_info', 'fastllm/types.py'),
|
|
279
280
|
'fastllm.types.get_model_meta': ('types.html#get_model_meta', 'fastllm/types.py'),
|
|
281
|
+
'fastllm.types.get_model_pricing': ('types.html#get_model_pricing', 'fastllm/types.py'),
|
|
280
282
|
'fastllm.types.infer_api_name': ('types.html#infer_api_name', 'fastllm/types.py'),
|
|
281
283
|
'fastllm.types.mk_completion': ('types.html#mk_completion', 'fastllm/types.py'),
|
|
282
284
|
'fastllm.types.mk_tool_res_msg': ('types.html#mk_tool_res_msg', 'fastllm/types.py'),
|
|
283
285
|
'fastllm.types.model_prices_meta': ('types.html#model_prices_meta', 'fastllm/types.py'),
|
|
284
286
|
'fastllm.types.part_txt': ('types.html#part_txt', 'fastllm/types.py'),
|
|
285
287
|
'fastllm.types.payload_kwargs': ('types.html#payload_kwargs', 'fastllm/types.py'),
|
|
288
|
+
'fastllm.types.register_model_info': ('types.html#register_model_info', 'fastllm/types.py'),
|
|
286
289
|
'fastllm.types.sys_text': ('types.html#sys_text', 'fastllm/types.py'),
|
|
287
290
|
'fastllm.types.url_mime': ('types.html#url_mime', 'fastllm/types.py')}}}
|
|
@@ -50,10 +50,8 @@ def finalize_usage(usg, parts):
|
|
|
50
50
|
rc = '\n'.join(p.text or '' for p in parts if p.type == PartType.thinking)
|
|
51
51
|
ct = int(usg.raw.get('output_tokens', usg.completion_tokens) or 0)
|
|
52
52
|
rt = min(int(len(rc.split())*1.5), ct) if rc else 0
|
|
53
|
-
|
|
54
|
-
|
|
55
|
-
print(res)
|
|
56
|
-
return res
|
|
53
|
+
return Usage(prompt_tokens=usg.prompt_tokens, completion_tokens=ct-rt, total_tokens=usg.prompt_tokens+ct,
|
|
54
|
+
cached_tokens=usg.cached_tokens, cache_creation_tokens=usg.cache_creation_tokens, reasoning_tokens=rt, raw=usg.raw)
|
|
57
55
|
|
|
58
56
|
# %% ../nbs/04_anthropic.ipynb #7a8b1f8f
|
|
59
57
|
def norm_finish(resp, tcs=None):
|
|
@@ -4,10 +4,11 @@
|
|
|
4
4
|
|
|
5
5
|
# %% auto #0
|
|
6
6
|
__all__ = ['PartType', 'FinishReason', 'api_registry', 'model_prices_url', 'haik45', 'sonn45', 'sonn', 'sonn46', 'opus46', 'opus',
|
|
7
|
-
'gpt54', 'gpt54m', '
|
|
8
|
-
'
|
|
9
|
-
'
|
|
10
|
-
'
|
|
7
|
+
'gpt54', 'gpt54m', 'gpt55', 'codex54', 'codex54m', 'codex55', 'codex53spark', 'model_info_registry',
|
|
8
|
+
'deepseek_v4_common', 'codex_pricing', 'Part', 'Msg', 'ToolCall', 'display_list', 'Usage', 'Completion',
|
|
9
|
+
'APIRegistry', 'mk_completion', 'mk_tool_res_msg', 'fn_schema', 'sys_text', 'part_txt', 'data_url',
|
|
10
|
+
'url_mime', 'payload_kwargs', 'get_api_key', 'model_prices_meta', 'infer_api_name', 'get_model_meta',
|
|
11
|
+
'register_model_info', 'get_model_info', 'get_model_pricing', 'approx_pricing']
|
|
11
12
|
|
|
12
13
|
# %% ../nbs/00_types.ipynb #b4d047fd
|
|
13
14
|
from dataclasses import dataclass, field
|
|
@@ -229,6 +230,7 @@ def get_api_key(api_key, default):
|
|
|
229
230
|
|
|
230
231
|
# %% ../nbs/00_types.ipynb #852adecd
|
|
231
232
|
model_prices_url = 'https://raw.githubusercontent.com/BerriAI/litellm/main/model_prices_and_context_window.json'
|
|
233
|
+
|
|
232
234
|
@flexicache(time_policy(24*60*60))
|
|
233
235
|
def model_prices_meta(): return urljson(model_prices_url)
|
|
234
236
|
|
|
@@ -258,63 +260,94 @@ opus46 = "claude-opus-4-6"
|
|
|
258
260
|
opus = "claude-opus-4-7"
|
|
259
261
|
gpt54 = "gpt-5.4"
|
|
260
262
|
gpt54m = "gpt-5.4-mini"
|
|
263
|
+
gpt55 = "gpt-5.5"
|
|
261
264
|
codex54 = "gpt-5.4"
|
|
265
|
+
codex54m = "gpt-5.4-mini"
|
|
262
266
|
codex55 = "gpt-5.5"
|
|
263
267
|
codex53spark = "gpt-5.3-codex-spark"
|
|
264
268
|
|
|
265
|
-
# %% ../nbs/00_types.ipynb #
|
|
266
|
-
|
|
267
|
-
"input_cost_per_token": 0.10 / 1_000_000,
|
|
268
|
-
"cache_creation_input_token_cost": 0.10 / 1_000_000,
|
|
269
|
-
"cache_read_input_token_cost": 0.10 / 1_000_000,
|
|
270
|
-
"output_cost_per_token": 0.50 / 1_000_000,
|
|
271
|
-
}
|
|
269
|
+
# %% ../nbs/00_types.ipynb #583e017b
|
|
270
|
+
model_info_registry = {}
|
|
272
271
|
|
|
273
|
-
|
|
274
|
-
|
|
275
|
-
|
|
276
|
-
|
|
277
|
-
|
|
272
|
+
def register_model_info(model, vendor_name=None, base=None, base_vendor_name=None, **overrides):
|
|
273
|
+
"Register model metadata, optionally starting from `base`."
|
|
274
|
+
info = dict(get_model_info(base, base_vendor_name or vendor_name)) if base else {}
|
|
275
|
+
info.update(overrides)
|
|
276
|
+
model_info_registry[vendor_name, model] = info
|
|
278
277
|
|
|
279
|
-
|
|
280
|
-
|
|
281
|
-
info = get_model_meta(mn, 'chatgpt' if vendor_name=='codex' else vendor_name)
|
|
282
|
-
# anthropic web search
|
|
278
|
+
def get_model_info(mn, vendor_name=None):
|
|
279
|
+
info = model_info_registry.get((vendor_name, mn)) or get_model_meta(mn, vendor_name)
|
|
283
280
|
if 'search_context_cost_per_query' in info: info['supports_web_search'] = True
|
|
284
|
-
# kimi
|
|
285
|
-
if 'kimi' in mn:
|
|
286
|
-
if 'k2p6' in mn: info = get_model_meta(mn.replace('k2p6', 'k2p5'), vendor_name)
|
|
287
|
-
info['supports_reasoning'] = True
|
|
288
|
-
info['supports_vision'] = True
|
|
289
|
-
if vendor_name == 'moonshot': info['supports_assistant_prefill'] = True
|
|
290
|
-
# gpt web search
|
|
291
|
-
if mn in ("gpt-5.4", "gpt-5.4-mini"):
|
|
292
|
-
info['supports_web_search'] = True
|
|
293
|
-
info.pop('mode', None)
|
|
294
|
-
# codex updates
|
|
295
|
-
if vendor_name == 'codex':
|
|
296
|
-
info = merge(info, codex_pricing)
|
|
297
|
-
info |= _codex_overrides.get(mn, {})
|
|
298
|
-
# deepseek v4
|
|
299
|
-
if vendor_name == 'deepseek' and mn in ("deepseek-v4-flash", "deepseek-v4-pro"):
|
|
300
|
-
info = dict(get_model_meta("deepseek/deepseek-v3.2"))
|
|
301
|
-
info |= dict(supports_assistant_prefill=True, supports_function_calling=True, supports_prompt_caching=True,
|
|
302
|
-
supports_reasoning=True, supports_tool_choice=True)
|
|
303
|
-
info.update(input_cost_per_token=1.4e-07, input_cost_per_token_cache_hit=2.8e-09, output_cost_per_token=2.8e-07,
|
|
304
|
-
max_input_tokens=1048576, max_output_tokens=393216, max_tokens=393216)
|
|
305
|
-
if 'pro' in mn: info = {**info, 'input_cost_per_token': 4.35e-07, 'input_cost_per_token_cache_hit': 3.625e-09, 'output_cost_per_token': 8.7e-07}
|
|
306
|
-
# qwen 3p6
|
|
307
|
-
if vendor_name == 'fireworks_ai' and mn == 'accounts/fireworks/models/qwen3p6-plus':
|
|
308
|
-
info = dict(supports_vision=True, supports_reasoning=True, supports_function_calling=True, supports_tool_choice=True,
|
|
309
|
-
supports_system_messages=True, supports_response_schema=True, supports_parallel_function_calling=True,
|
|
310
|
-
supports_prompt_caching=True, supports_native_streaming=True, supports_native_structured_output=True,
|
|
311
|
-
max_tokens=1000000, max_input_tokens=1000000, max_output_tokens=65536,
|
|
312
|
-
input_cost_per_token=0.5e-6, cache_read_input_token_cost=0.1e-6, output_cost_per_token=3.0e-6)
|
|
313
|
-
|
|
314
|
-
# unresolved models
|
|
315
|
-
if not info and not strict: info = info | codex_pricing
|
|
316
281
|
return dict2obj(info)
|
|
317
282
|
|
|
283
|
+
# %% ../nbs/00_types.ipynb #8261dcd0
|
|
284
|
+
register_model_info('accounts/fireworks/models/qwen3p6-plus', vendor_name='fireworks_ai',
|
|
285
|
+
supports_vision=True, supports_reasoning=True, supports_function_calling=True, supports_tool_choice=True,
|
|
286
|
+
supports_system_messages=True, supports_response_schema=True, supports_parallel_function_calling=True,
|
|
287
|
+
supports_prompt_caching=True, supports_native_streaming=True, supports_native_structured_output=True,
|
|
288
|
+
max_tokens=1000000, max_input_tokens=1000000, max_output_tokens=65536,
|
|
289
|
+
input_cost_per_token=0.5e-6, cache_read_input_token_cost=0.1e-6, output_cost_per_token=3.0e-6)
|
|
290
|
+
|
|
291
|
+
register_model_info('gemini-3.5-flash', vendor_name='gemini', base='gemini-3-flash-preview',
|
|
292
|
+
input_cost_per_token=1.5e-6, output_cost_per_token=9e-6,
|
|
293
|
+
output_cost_per_reasoning_token=9e-6, cache_read_input_token_cost=1.5e-7)
|
|
294
|
+
|
|
295
|
+
for model in ('gpt-5.4', 'gpt-5.4-mini'):
|
|
296
|
+
register_model_info(model, vendor_name='openai', base=model, supports_web_search=True, mode=None)
|
|
297
|
+
|
|
298
|
+
for model in ('kimi-k2.5', 'kimi-k2.6'):
|
|
299
|
+
register_model_info(model, vendor_name='moonshot', base=f'moonshot/{model}', base_vendor_name=None,
|
|
300
|
+
supports_reasoning=True, supports_vision=True, supports_assistant_prefill=True)
|
|
301
|
+
|
|
302
|
+
register_model_info('gemini-3.1-flash-lite', vendor_name='gemini', base='gemini-3.1-flash-lite-preview')
|
|
303
|
+
register_model_info('models/gemini-3.1-flash-lite', vendor_name='gemini', base='gemini-3.1-flash-lite-preview')
|
|
304
|
+
|
|
305
|
+
for model in ('accounts/fireworks/models/kimi-k2p5', 'accounts/fireworks/models/kimi-k2p6'):
|
|
306
|
+
register_model_info(model, vendor_name='fireworks_ai', base=model.replace('k2p6', 'k2p5'),
|
|
307
|
+
supports_reasoning=True, supports_vision=True,
|
|
308
|
+
input_cost_per_token=0.95e-6, cache_read_input_token_cost=0.16e-6, output_cost_per_token=4.0e-6)
|
|
309
|
+
|
|
310
|
+
# %% ../nbs/00_types.ipynb #948d55d0
|
|
311
|
+
deepseek_v4_common = dict(
|
|
312
|
+
supports_assistant_prefill=True, supports_function_calling=True, supports_prompt_caching=True,
|
|
313
|
+
supports_reasoning=True, supports_tool_choice=True,
|
|
314
|
+
max_input_tokens=1048576, max_output_tokens=393216, max_tokens=393216)
|
|
315
|
+
|
|
316
|
+
register_model_info('deepseek-v4-flash', vendor_name='deepseek', base='deepseek/deepseek-v3.2', **deepseek_v4_common,
|
|
317
|
+
input_cost_per_token=1.4e-07, input_cost_per_token_cache_hit=2.8e-09,
|
|
318
|
+
output_cost_per_token=2.8e-07, cache_read_input_token_cost=1.4e-07/10)
|
|
319
|
+
register_model_info('deepseek-v4-pro', vendor_name='deepseek', base='deepseek/deepseek-v3.2', **deepseek_v4_common,
|
|
320
|
+
input_cost_per_token=4.35e-07, input_cost_per_token_cache_hit=3.625e-09,
|
|
321
|
+
output_cost_per_token=8.7e-07, cache_read_input_token_cost=4.35e-07/10)
|
|
322
|
+
|
|
323
|
+
# %% ../nbs/00_types.ipynb #2c23d11e
|
|
324
|
+
codex_pricing = dict(
|
|
325
|
+
input_cost_per_token = 0.10/1_000_000, output_cost_per_token = 0.50/1_000_000,
|
|
326
|
+
cache_creation_input_token_cost = 0.10/1_000_000, cache_read_input_token_cost = 0.10/1_000_000)
|
|
327
|
+
|
|
328
|
+
for model in (codex54, codex54m, codex55):
|
|
329
|
+
register_model_info(model, 'codex', base=model, base_vendor_name='chatgpt', supports_web_search=True, **codex_pricing)
|
|
330
|
+
|
|
331
|
+
register_model_info(codex53spark, 'codex', **codex_pricing,
|
|
332
|
+
supports_vision=False, supports_image_input=False, supports_web_search=True, supports_reasoning=True,
|
|
333
|
+
max_tokens=128000, max_input_tokens=128000, max_output_tokens=128000)
|
|
334
|
+
|
|
335
|
+
|
|
336
|
+
# %% ../nbs/00_types.ipynb #24cc47ec
|
|
337
|
+
def get_model_pricing(mn, vendor_name, million=True):
|
|
338
|
+
return {k:round(v * (1e6 if million else 1), 6)
|
|
339
|
+
for k,v in get_model_info(mn, vendor_name).items()
|
|
340
|
+
if 'cost' in k and isinstance(v,float) and 'priority' not in k}
|
|
341
|
+
|
|
342
|
+
# %% ../nbs/00_types.ipynb #79304cd9
|
|
343
|
+
def approx_pricing(nm, vendor_name, out=10, cache=80, inp=10, markup=0):
|
|
344
|
+
"Approx cost per million tokens with given output/cache/input proportions"
|
|
345
|
+
p = get_model_pricing(nm, vendor_name)
|
|
346
|
+
ic = p.get('cache_creation_input_token_cost', p['input_cost_per_token'])
|
|
347
|
+
res = (p['output_cost_per_token']*out + p['cache_read_input_token_cost']*cache + ic*inp) / (out+cache+inp)
|
|
348
|
+
if nm=='claude-opus-4-7': res *= 1.5
|
|
349
|
+
return res*(1+markup)
|
|
350
|
+
|
|
318
351
|
# %% ../nbs/00_types.ipynb #8bfca02d
|
|
319
352
|
@patch(as_prop=True)
|
|
320
353
|
def cost(self:Completion):
|
|
@@ -1 +0,0 @@
|
|
|
1
|
-
__version__ = "0.0.8"
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|