python-fastllm 0.0.8__tar.gz → 0.0.9__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (28) hide show
  1. {python_fastllm-0.0.8 → python_fastllm-0.0.9}/PKG-INFO +1 -1
  2. python_fastllm-0.0.9/fastllm/__init__.py +1 -0
  3. {python_fastllm-0.0.8 → python_fastllm-0.0.9}/fastllm/_modidx.py +3 -0
  4. {python_fastllm-0.0.8 → python_fastllm-0.0.9}/fastllm/anthropic.py +2 -4
  5. {python_fastllm-0.0.8 → python_fastllm-0.0.9}/fastllm/types.py +85 -52
  6. {python_fastllm-0.0.8 → python_fastllm-0.0.9}/python_fastllm.egg-info/PKG-INFO +1 -1
  7. python_fastllm-0.0.8/fastllm/__init__.py +0 -1
  8. {python_fastllm-0.0.8 → python_fastllm-0.0.9}/README.md +0 -0
  9. {python_fastllm-0.0.8 → python_fastllm-0.0.9}/fastllm/acomplete.py +0 -0
  10. {python_fastllm-0.0.8 → python_fastllm-0.0.9}/fastllm/chat.py +0 -0
  11. {python_fastllm-0.0.8 → python_fastllm-0.0.9}/fastllm/codex.py +0 -0
  12. {python_fastllm-0.0.8 → python_fastllm-0.0.9}/fastllm/gemini.py +0 -0
  13. {python_fastllm-0.0.8 → python_fastllm-0.0.9}/fastllm/openai_chat.py +0 -0
  14. {python_fastllm-0.0.8 → python_fastllm-0.0.9}/fastllm/openai_responses.py +0 -0
  15. {python_fastllm-0.0.8 → python_fastllm-0.0.9}/fastllm/specs/anthropic.json +0 -0
  16. {python_fastllm-0.0.8 → python_fastllm-0.0.9}/fastllm/specs/anthropic.yml +0 -0
  17. {python_fastllm-0.0.8 → python_fastllm-0.0.9}/fastllm/specs/gemini.json +0 -0
  18. {python_fastllm-0.0.8 → python_fastllm-0.0.9}/fastllm/specs/openai.with-code-samples.json +0 -0
  19. {python_fastllm-0.0.8 → python_fastllm-0.0.9}/fastllm/specs/openai.with-code-samples.yml +0 -0
  20. {python_fastllm-0.0.8 → python_fastllm-0.0.9}/fastllm/specs/spec_manifest.json +0 -0
  21. {python_fastllm-0.0.8 → python_fastllm-0.0.9}/fastllm/streaming.py +0 -0
  22. {python_fastllm-0.0.8 → python_fastllm-0.0.9}/pyproject.toml +0 -0
  23. {python_fastllm-0.0.8 → python_fastllm-0.0.9}/python_fastllm.egg-info/SOURCES.txt +0 -0
  24. {python_fastllm-0.0.8 → python_fastllm-0.0.9}/python_fastllm.egg-info/dependency_links.txt +0 -0
  25. {python_fastllm-0.0.8 → python_fastllm-0.0.9}/python_fastllm.egg-info/entry_points.txt +0 -0
  26. {python_fastllm-0.0.8 → python_fastllm-0.0.9}/python_fastllm.egg-info/requires.txt +0 -0
  27. {python_fastllm-0.0.8 → python_fastllm-0.0.9}/python_fastllm.egg-info/top_level.txt +0 -0
  28. {python_fastllm-0.0.8 → python_fastllm-0.0.9}/setup.cfg +0 -0
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: python-fastllm
3
- Version: 0.0.8
3
+ Version: 0.0.9
4
4
  Author-email: Kerem Turgutlu <keremturgutlu@gmail.com>
5
5
  License: Apache-2.0
6
6
  Project-URL: Repository, https://github.com/AnswerDotAI/fastllm
@@ -0,0 +1 @@
1
+ __version__ = "0.0.9"
@@ -271,17 +271,20 @@ d = { 'settings': { 'branch': 'main',
271
271
  'fastllm.types.ToolCall._repr_markdown_': ('types.html#toolcall._repr_markdown_', 'fastllm/types.py'),
272
272
  'fastllm.types.Usage': ('types.html#usage', 'fastllm/types.py'),
273
273
  'fastllm.types._trunc_strs': ('types.html#_trunc_strs', 'fastllm/types.py'),
274
+ 'fastllm.types.approx_pricing': ('types.html#approx_pricing', 'fastllm/types.py'),
274
275
  'fastllm.types.data_url': ('types.html#data_url', 'fastllm/types.py'),
275
276
  'fastllm.types.display_list': ('types.html#display_list', 'fastllm/types.py'),
276
277
  'fastllm.types.fn_schema': ('types.html#fn_schema', 'fastllm/types.py'),
277
278
  'fastllm.types.get_api_key': ('types.html#get_api_key', 'fastllm/types.py'),
278
279
  'fastllm.types.get_model_info': ('types.html#get_model_info', 'fastllm/types.py'),
279
280
  'fastllm.types.get_model_meta': ('types.html#get_model_meta', 'fastllm/types.py'),
281
+ 'fastllm.types.get_model_pricing': ('types.html#get_model_pricing', 'fastllm/types.py'),
280
282
  'fastllm.types.infer_api_name': ('types.html#infer_api_name', 'fastllm/types.py'),
281
283
  'fastllm.types.mk_completion': ('types.html#mk_completion', 'fastllm/types.py'),
282
284
  'fastllm.types.mk_tool_res_msg': ('types.html#mk_tool_res_msg', 'fastllm/types.py'),
283
285
  'fastllm.types.model_prices_meta': ('types.html#model_prices_meta', 'fastllm/types.py'),
284
286
  'fastllm.types.part_txt': ('types.html#part_txt', 'fastllm/types.py'),
285
287
  'fastllm.types.payload_kwargs': ('types.html#payload_kwargs', 'fastllm/types.py'),
288
+ 'fastllm.types.register_model_info': ('types.html#register_model_info', 'fastllm/types.py'),
286
289
  'fastllm.types.sys_text': ('types.html#sys_text', 'fastllm/types.py'),
287
290
  'fastllm.types.url_mime': ('types.html#url_mime', 'fastllm/types.py')}}}
@@ -50,10 +50,8 @@ def finalize_usage(usg, parts):
50
50
  rc = '\n'.join(p.text or '' for p in parts if p.type == PartType.thinking)
51
51
  ct = int(usg.raw.get('output_tokens', usg.completion_tokens) or 0)
52
52
  rt = min(int(len(rc.split())*1.5), ct) if rc else 0
53
- res = Usage(prompt_tokens=usg.prompt_tokens, completion_tokens=ct-rt, total_tokens=usg.prompt_tokens+ct,
54
- cached_tokens=usg.cached_tokens, cache_creation_tokens=usg.cache_creation_tokens, reasoning_tokens=rt, raw=usg.raw)
55
- print(res)
56
- return res
53
+ return Usage(prompt_tokens=usg.prompt_tokens, completion_tokens=ct-rt, total_tokens=usg.prompt_tokens+ct,
54
+ cached_tokens=usg.cached_tokens, cache_creation_tokens=usg.cache_creation_tokens, reasoning_tokens=rt, raw=usg.raw)
57
55
 
58
56
  # %% ../nbs/04_anthropic.ipynb #7a8b1f8f
59
57
  def norm_finish(resp, tcs=None):
@@ -4,10 +4,11 @@
4
4
 
5
5
  # %% auto #0
6
6
  __all__ = ['PartType', 'FinishReason', 'api_registry', 'model_prices_url', 'haik45', 'sonn45', 'sonn', 'sonn46', 'opus46', 'opus',
7
- 'gpt54', 'gpt54m', 'codex54', 'codex55', 'codex53spark', 'codex_pricing', 'Part', 'Msg', 'ToolCall',
8
- 'display_list', 'Usage', 'Completion', 'APIRegistry', 'mk_completion', 'mk_tool_res_msg', 'fn_schema',
9
- 'sys_text', 'part_txt', 'data_url', 'url_mime', 'payload_kwargs', 'get_api_key', 'model_prices_meta',
10
- 'infer_api_name', 'get_model_meta', 'get_model_info']
7
+ 'gpt54', 'gpt54m', 'gpt55', 'codex54', 'codex54m', 'codex55', 'codex53spark', 'model_info_registry',
8
+ 'deepseek_v4_common', 'codex_pricing', 'Part', 'Msg', 'ToolCall', 'display_list', 'Usage', 'Completion',
9
+ 'APIRegistry', 'mk_completion', 'mk_tool_res_msg', 'fn_schema', 'sys_text', 'part_txt', 'data_url',
10
+ 'url_mime', 'payload_kwargs', 'get_api_key', 'model_prices_meta', 'infer_api_name', 'get_model_meta',
11
+ 'register_model_info', 'get_model_info', 'get_model_pricing', 'approx_pricing']
11
12
 
12
13
  # %% ../nbs/00_types.ipynb #b4d047fd
13
14
  from dataclasses import dataclass, field
@@ -229,6 +230,7 @@ def get_api_key(api_key, default):
229
230
 
230
231
  # %% ../nbs/00_types.ipynb #852adecd
231
232
  model_prices_url = 'https://raw.githubusercontent.com/BerriAI/litellm/main/model_prices_and_context_window.json'
233
+
232
234
  @flexicache(time_policy(24*60*60))
233
235
  def model_prices_meta(): return urljson(model_prices_url)
234
236
 
@@ -258,63 +260,94 @@ opus46 = "claude-opus-4-6"
258
260
  opus = "claude-opus-4-7"
259
261
  gpt54 = "gpt-5.4"
260
262
  gpt54m = "gpt-5.4-mini"
263
+ gpt55 = "gpt-5.5"
261
264
  codex54 = "gpt-5.4"
265
+ codex54m = "gpt-5.4-mini"
262
266
  codex55 = "gpt-5.5"
263
267
  codex53spark = "gpt-5.3-codex-spark"
264
268
 
265
- # %% ../nbs/00_types.ipynb #d6d5b98c
266
- codex_pricing = {
267
- "input_cost_per_token": 0.10 / 1_000_000,
268
- "cache_creation_input_token_cost": 0.10 / 1_000_000,
269
- "cache_read_input_token_cost": 0.10 / 1_000_000,
270
- "output_cost_per_token": 0.50 / 1_000_000,
271
- }
269
+ # %% ../nbs/00_types.ipynb #583e017b
270
+ model_info_registry = {}
272
271
 
273
- _codex_overrides = {
274
- codex53spark: dict(
275
- supports_vision=False, supports_image_input=False, supports_web_search=True, supports_reasoning=True,
276
- max_tokens=128000, max_input_tokens=128000, max_output_tokens=128000)
277
- }
272
+ def register_model_info(model, vendor_name=None, base=None, base_vendor_name=None, **overrides):
273
+ "Register model metadata, optionally starting from `base`."
274
+ info = dict(get_model_info(base, base_vendor_name or vendor_name)) if base else {}
275
+ info.update(overrides)
276
+ model_info_registry[vendor_name, model] = info
278
277
 
279
- # %% ../nbs/00_types.ipynb #fbfdeb0a
280
- def get_model_info(mn, vendor_name=None, strict=False):
281
- info = get_model_meta(mn, 'chatgpt' if vendor_name=='codex' else vendor_name)
282
- # anthropic web search
278
+ def get_model_info(mn, vendor_name=None):
279
+ info = model_info_registry.get((vendor_name, mn)) or get_model_meta(mn, vendor_name)
283
280
  if 'search_context_cost_per_query' in info: info['supports_web_search'] = True
284
- # kimi
285
- if 'kimi' in mn:
286
- if 'k2p6' in mn: info = get_model_meta(mn.replace('k2p6', 'k2p5'), vendor_name)
287
- info['supports_reasoning'] = True
288
- info['supports_vision'] = True
289
- if vendor_name == 'moonshot': info['supports_assistant_prefill'] = True
290
- # gpt web search
291
- if mn in ("gpt-5.4", "gpt-5.4-mini"):
292
- info['supports_web_search'] = True
293
- info.pop('mode', None)
294
- # codex updates
295
- if vendor_name == 'codex':
296
- info = merge(info, codex_pricing)
297
- info |= _codex_overrides.get(mn, {})
298
- # deepseek v4
299
- if vendor_name == 'deepseek' and mn in ("deepseek-v4-flash", "deepseek-v4-pro"):
300
- info = dict(get_model_meta("deepseek/deepseek-v3.2"))
301
- info |= dict(supports_assistant_prefill=True, supports_function_calling=True, supports_prompt_caching=True,
302
- supports_reasoning=True, supports_tool_choice=True)
303
- info.update(input_cost_per_token=1.4e-07, input_cost_per_token_cache_hit=2.8e-09, output_cost_per_token=2.8e-07,
304
- max_input_tokens=1048576, max_output_tokens=393216, max_tokens=393216)
305
- if 'pro' in mn: info = {**info, 'input_cost_per_token': 4.35e-07, 'input_cost_per_token_cache_hit': 3.625e-09, 'output_cost_per_token': 8.7e-07}
306
- # qwen 3p6
307
- if vendor_name == 'fireworks_ai' and mn == 'accounts/fireworks/models/qwen3p6-plus':
308
- info = dict(supports_vision=True, supports_reasoning=True, supports_function_calling=True, supports_tool_choice=True,
309
- supports_system_messages=True, supports_response_schema=True, supports_parallel_function_calling=True,
310
- supports_prompt_caching=True, supports_native_streaming=True, supports_native_structured_output=True,
311
- max_tokens=1000000, max_input_tokens=1000000, max_output_tokens=65536,
312
- input_cost_per_token=0.5e-6, cache_read_input_token_cost=0.1e-6, output_cost_per_token=3.0e-6)
313
-
314
- # unresolved models
315
- if not info and not strict: info = info | codex_pricing
316
281
  return dict2obj(info)
317
282
 
283
+ # %% ../nbs/00_types.ipynb #8261dcd0
284
+ register_model_info('accounts/fireworks/models/qwen3p6-plus', vendor_name='fireworks_ai',
285
+ supports_vision=True, supports_reasoning=True, supports_function_calling=True, supports_tool_choice=True,
286
+ supports_system_messages=True, supports_response_schema=True, supports_parallel_function_calling=True,
287
+ supports_prompt_caching=True, supports_native_streaming=True, supports_native_structured_output=True,
288
+ max_tokens=1000000, max_input_tokens=1000000, max_output_tokens=65536,
289
+ input_cost_per_token=0.5e-6, cache_read_input_token_cost=0.1e-6, output_cost_per_token=3.0e-6)
290
+
291
+ register_model_info('gemini-3.5-flash', vendor_name='gemini', base='gemini-3-flash-preview',
292
+ input_cost_per_token=1.5e-6, output_cost_per_token=9e-6,
293
+ output_cost_per_reasoning_token=9e-6, cache_read_input_token_cost=1.5e-7)
294
+
295
+ for model in ('gpt-5.4', 'gpt-5.4-mini'):
296
+ register_model_info(model, vendor_name='openai', base=model, supports_web_search=True, mode=None)
297
+
298
+ for model in ('kimi-k2.5', 'kimi-k2.6'):
299
+ register_model_info(model, vendor_name='moonshot', base=f'moonshot/{model}', base_vendor_name=None,
300
+ supports_reasoning=True, supports_vision=True, supports_assistant_prefill=True)
301
+
302
+ register_model_info('gemini-3.1-flash-lite', vendor_name='gemini', base='gemini-3.1-flash-lite-preview')
303
+ register_model_info('models/gemini-3.1-flash-lite', vendor_name='gemini', base='gemini-3.1-flash-lite-preview')
304
+
305
+ for model in ('accounts/fireworks/models/kimi-k2p5', 'accounts/fireworks/models/kimi-k2p6'):
306
+ register_model_info(model, vendor_name='fireworks_ai', base=model.replace('k2p6', 'k2p5'),
307
+ supports_reasoning=True, supports_vision=True,
308
+ input_cost_per_token=0.95e-6, cache_read_input_token_cost=0.16e-6, output_cost_per_token=4.0e-6)
309
+
310
+ # %% ../nbs/00_types.ipynb #948d55d0
311
+ deepseek_v4_common = dict(
312
+ supports_assistant_prefill=True, supports_function_calling=True, supports_prompt_caching=True,
313
+ supports_reasoning=True, supports_tool_choice=True,
314
+ max_input_tokens=1048576, max_output_tokens=393216, max_tokens=393216)
315
+
316
+ register_model_info('deepseek-v4-flash', vendor_name='deepseek', base='deepseek/deepseek-v3.2', **deepseek_v4_common,
317
+ input_cost_per_token=1.4e-07, input_cost_per_token_cache_hit=2.8e-09,
318
+ output_cost_per_token=2.8e-07, cache_read_input_token_cost=1.4e-07/10)
319
+ register_model_info('deepseek-v4-pro', vendor_name='deepseek', base='deepseek/deepseek-v3.2', **deepseek_v4_common,
320
+ input_cost_per_token=4.35e-07, input_cost_per_token_cache_hit=3.625e-09,
321
+ output_cost_per_token=8.7e-07, cache_read_input_token_cost=4.35e-07/10)
322
+
323
+ # %% ../nbs/00_types.ipynb #2c23d11e
324
+ codex_pricing = dict(
325
+ input_cost_per_token = 0.10/1_000_000, output_cost_per_token = 0.50/1_000_000,
326
+ cache_creation_input_token_cost = 0.10/1_000_000, cache_read_input_token_cost = 0.10/1_000_000)
327
+
328
+ for model in (codex54, codex54m, codex55):
329
+ register_model_info(model, 'codex', base=model, base_vendor_name='chatgpt', supports_web_search=True, **codex_pricing)
330
+
331
+ register_model_info(codex53spark, 'codex', **codex_pricing,
332
+ supports_vision=False, supports_image_input=False, supports_web_search=True, supports_reasoning=True,
333
+ max_tokens=128000, max_input_tokens=128000, max_output_tokens=128000)
334
+
335
+
336
+ # %% ../nbs/00_types.ipynb #24cc47ec
337
+ def get_model_pricing(mn, vendor_name, million=True):
338
+ return {k:round(v * (1e6 if million else 1), 6)
339
+ for k,v in get_model_info(mn, vendor_name).items()
340
+ if 'cost' in k and isinstance(v,float) and 'priority' not in k}
341
+
342
+ # %% ../nbs/00_types.ipynb #79304cd9
343
+ def approx_pricing(nm, vendor_name, out=10, cache=80, inp=10, markup=0):
344
+ "Approx cost per million tokens with given output/cache/input proportions"
345
+ p = get_model_pricing(nm, vendor_name)
346
+ ic = p.get('cache_creation_input_token_cost', p['input_cost_per_token'])
347
+ res = (p['output_cost_per_token']*out + p['cache_read_input_token_cost']*cache + ic*inp) / (out+cache+inp)
348
+ if nm=='claude-opus-4-7': res *= 1.5
349
+ return res*(1+markup)
350
+
318
351
  # %% ../nbs/00_types.ipynb #8bfca02d
319
352
  @patch(as_prop=True)
320
353
  def cost(self:Completion):
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: python-fastllm
3
- Version: 0.0.8
3
+ Version: 0.0.9
4
4
  Author-email: Kerem Turgutlu <keremturgutlu@gmail.com>
5
5
  License: Apache-2.0
6
6
  Project-URL: Repository, https://github.com/AnswerDotAI/fastllm
@@ -1 +0,0 @@
1
- __version__ = "0.0.8"
File without changes
File without changes