PyPI - python-fastllm - Versions diffs - 0.0.8__tar.gz → 0.0.9__tar.gz - Mend

python-fastllm 0.0.8tar.gz → 0.0.9tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (28) hide show

{python_fastllm-0.0.8 → python_fastllm-0.0.9}/PKG-INFO RENAMED Viewed

@@ -1,6 +1,6 @@
 Metadata-Version: 2.4
 Name: python-fastllm
-Version: 0.0.8
+Version: 0.0.9
 Author-email: Kerem Turgutlu <keremturgutlu@gmail.com>
 License: Apache-2.0
 Project-URL: Repository, https://github.com/AnswerDotAI/fastllm

python_fastllm-0.0.9/fastllm/__init__.py ADDED Viewed

	@@ -0,0 +1 @@
1	+ __version__ = "0.0.9"

{python_fastllm-0.0.8 → python_fastllm-0.0.9}/fastllm/_modidx.py RENAMED Viewed

@@ -271,17 +271,20 @@ d = { 'settings': { 'branch': 'main',
                                'fastllm.types.ToolCall._repr_markdown_': ('types.html#toolcall._repr_markdown_', 'fastllm/types.py'),
                                'fastllm.types.Usage': ('types.html#usage', 'fastllm/types.py'),
                                'fastllm.types._trunc_strs': ('types.html#_trunc_strs', 'fastllm/types.py'),
+                               'fastllm.types.approx_pricing': ('types.html#approx_pricing', 'fastllm/types.py'),
                                'fastllm.types.data_url': ('types.html#data_url', 'fastllm/types.py'),
                                'fastllm.types.display_list': ('types.html#display_list', 'fastllm/types.py'),
                                'fastllm.types.fn_schema': ('types.html#fn_schema', 'fastllm/types.py'),
                                'fastllm.types.get_api_key': ('types.html#get_api_key', 'fastllm/types.py'),
                                'fastllm.types.get_model_info': ('types.html#get_model_info', 'fastllm/types.py'),
                                'fastllm.types.get_model_meta': ('types.html#get_model_meta', 'fastllm/types.py'),
+                               'fastllm.types.get_model_pricing': ('types.html#get_model_pricing', 'fastllm/types.py'),
                                'fastllm.types.infer_api_name': ('types.html#infer_api_name', 'fastllm/types.py'),
                                'fastllm.types.mk_completion': ('types.html#mk_completion', 'fastllm/types.py'),
                                'fastllm.types.mk_tool_res_msg': ('types.html#mk_tool_res_msg', 'fastllm/types.py'),
                                'fastllm.types.model_prices_meta': ('types.html#model_prices_meta', 'fastllm/types.py'),
                                'fastllm.types.part_txt': ('types.html#part_txt', 'fastllm/types.py'),
                                'fastllm.types.payload_kwargs': ('types.html#payload_kwargs', 'fastllm/types.py'),
+                               'fastllm.types.register_model_info': ('types.html#register_model_info', 'fastllm/types.py'),
                                'fastllm.types.sys_text': ('types.html#sys_text', 'fastllm/types.py'),
                                'fastllm.types.url_mime': ('types.html#url_mime', 'fastllm/types.py')}}}

{python_fastllm-0.0.8 → python_fastllm-0.0.9}/fastllm/anthropic.py RENAMED Viewed

@@ -50,10 +50,8 @@ def finalize_usage(usg, parts):
     rc = '\n'.join(p.text or '' for p in parts if p.type == PartType.thinking)
     ct = int(usg.raw.get('output_tokens', usg.completion_tokens) or 0)
     rt = min(int(len(rc.split())*1.5), ct) if rc else 0
-    res = Usage(prompt_tokens=usg.prompt_tokens, completion_tokens=ct-rt, total_tokens=usg.prompt_tokens+ct,
-                 cached_tokens=usg.cached_tokens, cache_creation_tokens=usg.cache_creation_tokens, reasoning_tokens=rt, raw=usg.raw)
-    print(res)
-    return res
+    return Usage(prompt_tokens=usg.prompt_tokens, completion_tokens=ct-rt, total_tokens=usg.prompt_tokens+ct,
+        cached_tokens=usg.cached_tokens, cache_creation_tokens=usg.cache_creation_tokens, reasoning_tokens=rt, raw=usg.raw)
 # %% ../nbs/04_anthropic.ipynb #7a8b1f8f
 def norm_finish(resp, tcs=None):

{python_fastllm-0.0.8 → python_fastllm-0.0.9}/fastllm/types.py RENAMED Viewed

@@ -4,10 +4,11 @@
 # %% auto #0
 __all__ = ['PartType', 'FinishReason', 'api_registry', 'model_prices_url', 'haik45', 'sonn45', 'sonn', 'sonn46', 'opus46', 'opus',
-           'gpt54', 'gpt54m', 'codex54', 'codex55', 'codex53spark', 'codex_pricing', 'Part', 'Msg', 'ToolCall',
-           'display_list', 'Usage', 'Completion', 'APIRegistry', 'mk_completion', 'mk_tool_res_msg', 'fn_schema',
-           'sys_text', 'part_txt', 'data_url', 'url_mime', 'payload_kwargs', 'get_api_key', 'model_prices_meta',
-           'infer_api_name', 'get_model_meta', 'get_model_info']
+           'gpt54', 'gpt54m', 'gpt55', 'codex54', 'codex54m', 'codex55', 'codex53spark', 'model_info_registry',
+           'deepseek_v4_common', 'codex_pricing', 'Part', 'Msg', 'ToolCall', 'display_list', 'Usage', 'Completion',
+           'APIRegistry', 'mk_completion', 'mk_tool_res_msg', 'fn_schema', 'sys_text', 'part_txt', 'data_url',
+           'url_mime', 'payload_kwargs', 'get_api_key', 'model_prices_meta', 'infer_api_name', 'get_model_meta',
+           'register_model_info', 'get_model_info', 'get_model_pricing', 'approx_pricing']
 # %% ../nbs/00_types.ipynb #b4d047fd
 from dataclasses import dataclass, field
@@ -229,6 +230,7 @@ def get_api_key(api_key, default):
 # %% ../nbs/00_types.ipynb #852adecd
 model_prices_url = 'https://raw.githubusercontent.com/BerriAI/litellm/main/model_prices_and_context_window.json'
 @flexicache(time_policy(24*60*60))
 def model_prices_meta(): return urljson(model_prices_url)
@@ -258,63 +260,94 @@ opus46 = "claude-opus-4-6"
 opus = "claude-opus-4-7"
 gpt54 = "gpt-5.4"
 gpt54m = "gpt-5.4-mini"
+gpt55 = "gpt-5.5"
 codex54 = "gpt-5.4"
+codex54m = "gpt-5.4-mini"
 codex55 = "gpt-5.5"
 codex53spark = "gpt-5.3-codex-spark"
-# %% ../nbs/00_types.ipynb #d6d5b98c
-codex_pricing = {
-    "input_cost_per_token": 0.10 / 1_000_000,
-    "cache_creation_input_token_cost": 0.10 / 1_000_000,
-    "cache_read_input_token_cost": 0.10 / 1_000_000,
-    "output_cost_per_token": 0.50 / 1_000_000,
-}
+# %% ../nbs/00_types.ipynb #583e017b
+model_info_registry = {}
-_codex_overrides = {
-    codex53spark: dict(
-        supports_vision=False, supports_image_input=False, supports_web_search=True, supports_reasoning=True,
-        max_tokens=128000, max_input_tokens=128000, max_output_tokens=128000)
-}
+def register_model_info(model, vendor_name=None, base=None, base_vendor_name=None, **overrides):
+    "Register model metadata, optionally starting from `base`."
+    info = dict(get_model_info(base, base_vendor_name or vendor_name)) if base else {}
+    info.update(overrides)
+    model_info_registry[vendor_name, model] = info
-# %% ../nbs/00_types.ipynb #fbfdeb0a
-def get_model_info(mn, vendor_name=None, strict=False):
-    info = get_model_meta(mn, 'chatgpt' if vendor_name=='codex' else vendor_name)
-    # anthropic web search
+def get_model_info(mn, vendor_name=None):
+    info = model_info_registry.get((vendor_name, mn)) or get_model_meta(mn, vendor_name)
     if 'search_context_cost_per_query' in info: info['supports_web_search'] = True
-    # kimi
-    if 'kimi' in mn:
-        if 'k2p6' in mn: info = get_model_meta(mn.replace('k2p6', 'k2p5'), vendor_name)
-        info['supports_reasoning'] = True
-        info['supports_vision'] = True
-        if vendor_name == 'moonshot': info['supports_assistant_prefill'] = True
-    # gpt web search
-    if mn in ("gpt-5.4", "gpt-5.4-mini"):
-        info['supports_web_search'] = True
-        info.pop('mode', None)
-    # codex updates
-    if vendor_name == 'codex':
-        info = merge(info, codex_pricing)
-        info |= _codex_overrides.get(mn, {})
-    # deepseek v4
-    if vendor_name == 'deepseek' and mn in ("deepseek-v4-flash", "deepseek-v4-pro"):
-        info = dict(get_model_meta("deepseek/deepseek-v3.2"))
-        info |= dict(supports_assistant_prefill=True, supports_function_calling=True, supports_prompt_caching=True,
-            supports_reasoning=True, supports_tool_choice=True)
-        info.update(input_cost_per_token=1.4e-07, input_cost_per_token_cache_hit=2.8e-09, output_cost_per_token=2.8e-07,
-            max_input_tokens=1048576, max_output_tokens=393216, max_tokens=393216)
-        if 'pro' in mn: info = {**info, 'input_cost_per_token': 4.35e-07, 'input_cost_per_token_cache_hit': 3.625e-09, 'output_cost_per_token': 8.7e-07}
-    # qwen 3p6
-    if vendor_name == 'fireworks_ai' and mn == 'accounts/fireworks/models/qwen3p6-plus':
-        info = dict(supports_vision=True, supports_reasoning=True, supports_function_calling=True, supports_tool_choice=True,
-                    supports_system_messages=True, supports_response_schema=True, supports_parallel_function_calling=True,
-                    supports_prompt_caching=True, supports_native_streaming=True, supports_native_structured_output=True,
-                    max_tokens=1000000, max_input_tokens=1000000, max_output_tokens=65536,
-                    input_cost_per_token=0.5e-6, cache_read_input_token_cost=0.1e-6, output_cost_per_token=3.0e-6)
-    # unresolved models
-    if not info and not strict: info = info | codex_pricing
     return dict2obj(info)
+# %% ../nbs/00_types.ipynb #8261dcd0
+register_model_info('accounts/fireworks/models/qwen3p6-plus', vendor_name='fireworks_ai',
+    supports_vision=True, supports_reasoning=True, supports_function_calling=True, supports_tool_choice=True,
+    supports_system_messages=True, supports_response_schema=True, supports_parallel_function_calling=True,
+    supports_prompt_caching=True, supports_native_streaming=True, supports_native_structured_output=True,
+    max_tokens=1000000, max_input_tokens=1000000, max_output_tokens=65536,
+    input_cost_per_token=0.5e-6, cache_read_input_token_cost=0.1e-6, output_cost_per_token=3.0e-6)
+register_model_info('gemini-3.5-flash', vendor_name='gemini', base='gemini-3-flash-preview',
+    input_cost_per_token=1.5e-6, output_cost_per_token=9e-6,
+    output_cost_per_reasoning_token=9e-6, cache_read_input_token_cost=1.5e-7)
+for model in ('gpt-5.4', 'gpt-5.4-mini'):
+    register_model_info(model, vendor_name='openai', base=model, supports_web_search=True, mode=None)
+for model in ('kimi-k2.5', 'kimi-k2.6'):
+    register_model_info(model, vendor_name='moonshot', base=f'moonshot/{model}', base_vendor_name=None,
+        supports_reasoning=True, supports_vision=True, supports_assistant_prefill=True)
+register_model_info('gemini-3.1-flash-lite', vendor_name='gemini', base='gemini-3.1-flash-lite-preview')
+register_model_info('models/gemini-3.1-flash-lite', vendor_name='gemini', base='gemini-3.1-flash-lite-preview')
+for model in ('accounts/fireworks/models/kimi-k2p5', 'accounts/fireworks/models/kimi-k2p6'):
+    register_model_info(model, vendor_name='fireworks_ai', base=model.replace('k2p6', 'k2p5'),
+        supports_reasoning=True, supports_vision=True,
+        input_cost_per_token=0.95e-6, cache_read_input_token_cost=0.16e-6, output_cost_per_token=4.0e-6)
+# %% ../nbs/00_types.ipynb #948d55d0
+deepseek_v4_common = dict(
+    supports_assistant_prefill=True, supports_function_calling=True, supports_prompt_caching=True,
+    supports_reasoning=True, supports_tool_choice=True,
+    max_input_tokens=1048576, max_output_tokens=393216, max_tokens=393216)
+register_model_info('deepseek-v4-flash', vendor_name='deepseek', base='deepseek/deepseek-v3.2', **deepseek_v4_common,
+    input_cost_per_token=1.4e-07, input_cost_per_token_cache_hit=2.8e-09,
+    output_cost_per_token=2.8e-07, cache_read_input_token_cost=1.4e-07/10)
+register_model_info('deepseek-v4-pro', vendor_name='deepseek', base='deepseek/deepseek-v3.2', **deepseek_v4_common,
+    input_cost_per_token=4.35e-07, input_cost_per_token_cache_hit=3.625e-09,
+    output_cost_per_token=8.7e-07, cache_read_input_token_cost=4.35e-07/10)
+# %% ../nbs/00_types.ipynb #2c23d11e
+codex_pricing = dict(
+    input_cost_per_token = 0.10/1_000_000, output_cost_per_token = 0.50/1_000_000,
+    cache_creation_input_token_cost = 0.10/1_000_000, cache_read_input_token_cost = 0.10/1_000_000)
+for model in (codex54, codex54m, codex55):
+    register_model_info(model, 'codex', base=model, base_vendor_name='chatgpt', supports_web_search=True, **codex_pricing)
+register_model_info(codex53spark, 'codex', **codex_pricing,
+    supports_vision=False, supports_image_input=False, supports_web_search=True, supports_reasoning=True,
+    max_tokens=128000, max_input_tokens=128000, max_output_tokens=128000)
+# %% ../nbs/00_types.ipynb #24cc47ec
+def get_model_pricing(mn, vendor_name, million=True):
+    return {k:round(v * (1e6 if million else 1), 6)
+        for k,v in get_model_info(mn, vendor_name).items()
+        if 'cost' in k and isinstance(v,float) and 'priority' not in k}
+# %% ../nbs/00_types.ipynb #79304cd9
+def approx_pricing(nm, vendor_name, out=10, cache=80, inp=10, markup=0):
+    "Approx cost per million tokens with given output/cache/input proportions"
+    p = get_model_pricing(nm, vendor_name)
+    ic = p.get('cache_creation_input_token_cost', p['input_cost_per_token'])
+    res = (p['output_cost_per_token']*out + p['cache_read_input_token_cost']*cache + ic*inp) / (out+cache+inp)
+    if nm=='claude-opus-4-7': res *= 1.5
+    return res*(1+markup)
 # %% ../nbs/00_types.ipynb #8bfca02d
 @patch(as_prop=True)
 def cost(self:Completion):

{python_fastllm-0.0.8 → python_fastllm-0.0.9}/python_fastllm.egg-info/PKG-INFO RENAMED Viewed

@@ -1,6 +1,6 @@
 Metadata-Version: 2.4
 Name: python-fastllm
-Version: 0.0.8
+Version: 0.0.9
 Author-email: Kerem Turgutlu <keremturgutlu@gmail.com>
 License: Apache-2.0
 Project-URL: Repository, https://github.com/AnswerDotAI/fastllm