python-fastllm 0.0.7__tar.gz → 0.0.9__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (28) hide show
  1. {python_fastllm-0.0.7 → python_fastllm-0.0.9}/PKG-INFO +1 -1
  2. python_fastllm-0.0.9/fastllm/__init__.py +1 -0
  3. {python_fastllm-0.0.7 → python_fastllm-0.0.9}/fastllm/_modidx.py +6 -0
  4. {python_fastllm-0.0.7 → python_fastllm-0.0.9}/fastllm/acomplete.py +17 -1
  5. {python_fastllm-0.0.7 → python_fastllm-0.0.9}/fastllm/anthropic.py +15 -13
  6. {python_fastllm-0.0.7 → python_fastllm-0.0.9}/fastllm/chat.py +13 -8
  7. python_fastllm-0.0.9/fastllm/codex.py +7 -0
  8. {python_fastllm-0.0.7 → python_fastllm-0.0.9}/fastllm/streaming.py +2 -0
  9. {python_fastllm-0.0.7 → python_fastllm-0.0.9}/fastllm/types.py +92 -55
  10. {python_fastllm-0.0.7 → python_fastllm-0.0.9}/python_fastllm.egg-info/PKG-INFO +1 -1
  11. {python_fastllm-0.0.7 → python_fastllm-0.0.9}/python_fastllm.egg-info/SOURCES.txt +1 -0
  12. python_fastllm-0.0.7/fastllm/__init__.py +0 -1
  13. {python_fastllm-0.0.7 → python_fastllm-0.0.9}/README.md +0 -0
  14. {python_fastllm-0.0.7 → python_fastllm-0.0.9}/fastllm/gemini.py +0 -0
  15. {python_fastllm-0.0.7 → python_fastllm-0.0.9}/fastllm/openai_chat.py +0 -0
  16. {python_fastllm-0.0.7 → python_fastllm-0.0.9}/fastllm/openai_responses.py +0 -0
  17. {python_fastllm-0.0.7 → python_fastllm-0.0.9}/fastllm/specs/anthropic.json +0 -0
  18. {python_fastllm-0.0.7 → python_fastllm-0.0.9}/fastllm/specs/anthropic.yml +0 -0
  19. {python_fastllm-0.0.7 → python_fastllm-0.0.9}/fastllm/specs/gemini.json +0 -0
  20. {python_fastllm-0.0.7 → python_fastllm-0.0.9}/fastllm/specs/openai.with-code-samples.json +0 -0
  21. {python_fastllm-0.0.7 → python_fastllm-0.0.9}/fastllm/specs/openai.with-code-samples.yml +0 -0
  22. {python_fastllm-0.0.7 → python_fastllm-0.0.9}/fastllm/specs/spec_manifest.json +0 -0
  23. {python_fastllm-0.0.7 → python_fastllm-0.0.9}/pyproject.toml +0 -0
  24. {python_fastllm-0.0.7 → python_fastllm-0.0.9}/python_fastllm.egg-info/dependency_links.txt +0 -0
  25. {python_fastllm-0.0.7 → python_fastllm-0.0.9}/python_fastllm.egg-info/entry_points.txt +0 -0
  26. {python_fastllm-0.0.7 → python_fastllm-0.0.9}/python_fastllm.egg-info/requires.txt +0 -0
  27. {python_fastllm-0.0.7 → python_fastllm-0.0.9}/python_fastllm.egg-info/top_level.txt +0 -0
  28. {python_fastllm-0.0.7 → python_fastllm-0.0.9}/setup.cfg +0 -0
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: python-fastllm
3
- Version: 0.0.7
3
+ Version: 0.0.9
4
4
  Author-email: Kerem Turgutlu <keremturgutlu@gmail.com>
5
5
  License: Apache-2.0
6
6
  Project-URL: Repository, https://github.com/AnswerDotAI/fastllm
@@ -0,0 +1 @@
1
+ __version__ = "0.0.9"
@@ -10,6 +10,7 @@ d = { 'settings': { 'branch': 'main',
10
10
  'fastllm.acomplete._classify_error': ('acomplete.html#_classify_error', 'fastllm/acomplete.py'),
11
11
  'fastllm.acomplete._classify_error_stream': ( 'acomplete.html#_classify_error_stream',
12
12
  'fastllm/acomplete.py'),
13
+ 'fastllm.acomplete._debug_print': ('acomplete.html#_debug_print', 'fastllm/acomplete.py'),
13
14
  'fastllm.acomplete._is_ctx_exceeded': ('acomplete.html#_is_ctx_exceeded', 'fastllm/acomplete.py'),
14
15
  'fastllm.acomplete.acomplete': ('acomplete.html#acomplete', 'fastllm/acomplete.py'),
15
16
  'fastllm.acomplete.mk_client': ('acomplete.html#mk_client', 'fastllm/acomplete.py')},
@@ -31,6 +32,7 @@ d = { 'settings': { 'branch': 'main',
31
32
  'fastllm.anthropic.denorm_tool_use': ('anthropic.html#denorm_tool_use', 'fastllm/anthropic.py'),
32
33
  'fastllm.anthropic.denorm_user': ('anthropic.html#denorm_user', 'fastllm/anthropic.py'),
33
34
  'fastllm.anthropic.denorm_web_search': ('anthropic.html#denorm_web_search', 'fastllm/anthropic.py'),
35
+ 'fastllm.anthropic.finalize_usage': ('anthropic.html#finalize_usage', 'fastllm/anthropic.py'),
34
36
  'fastllm.anthropic.get_hdrs': ('anthropic.html#get_hdrs', 'fastllm/anthropic.py'),
35
37
  'fastllm.anthropic.mk_payload': ('anthropic.html#mk_payload', 'fastllm/anthropic.py'),
36
38
  'fastllm.anthropic.norm_finish': ('anthropic.html#norm_finish', 'fastllm/anthropic.py'),
@@ -145,6 +147,7 @@ d = { 'settings': { 'branch': 'main',
145
147
  'fastllm.chat.stop_reason': ('chat.html#stop_reason', 'fastllm/chat.py'),
146
148
  'fastllm.chat.stop_sequences': ('chat.html#stop_sequences', 'fastllm/chat.py'),
147
149
  'fastllm.chat.structured': ('chat.html#structured', 'fastllm/chat.py')},
150
+ 'fastllm.codex': {},
148
151
  'fastllm.gemini': { 'fastllm.gemini._gem_filter_sch': ('gemini.html#_gem_filter_sch', 'fastllm/gemini.py'),
149
152
  'fastllm.gemini._gem_part_type': ('gemini.html#_gem_part_type', 'fastllm/gemini.py'),
150
153
  'fastllm.gemini.acollect_stream': ('gemini.html#acollect_stream', 'fastllm/gemini.py'),
@@ -268,17 +271,20 @@ d = { 'settings': { 'branch': 'main',
268
271
  'fastllm.types.ToolCall._repr_markdown_': ('types.html#toolcall._repr_markdown_', 'fastllm/types.py'),
269
272
  'fastllm.types.Usage': ('types.html#usage', 'fastllm/types.py'),
270
273
  'fastllm.types._trunc_strs': ('types.html#_trunc_strs', 'fastllm/types.py'),
274
+ 'fastllm.types.approx_pricing': ('types.html#approx_pricing', 'fastllm/types.py'),
271
275
  'fastllm.types.data_url': ('types.html#data_url', 'fastllm/types.py'),
272
276
  'fastllm.types.display_list': ('types.html#display_list', 'fastllm/types.py'),
273
277
  'fastllm.types.fn_schema': ('types.html#fn_schema', 'fastllm/types.py'),
274
278
  'fastllm.types.get_api_key': ('types.html#get_api_key', 'fastllm/types.py'),
275
279
  'fastllm.types.get_model_info': ('types.html#get_model_info', 'fastllm/types.py'),
276
280
  'fastllm.types.get_model_meta': ('types.html#get_model_meta', 'fastllm/types.py'),
281
+ 'fastllm.types.get_model_pricing': ('types.html#get_model_pricing', 'fastllm/types.py'),
277
282
  'fastllm.types.infer_api_name': ('types.html#infer_api_name', 'fastllm/types.py'),
278
283
  'fastllm.types.mk_completion': ('types.html#mk_completion', 'fastllm/types.py'),
279
284
  'fastllm.types.mk_tool_res_msg': ('types.html#mk_tool_res_msg', 'fastllm/types.py'),
280
285
  'fastllm.types.model_prices_meta': ('types.html#model_prices_meta', 'fastllm/types.py'),
281
286
  'fastllm.types.part_txt': ('types.html#part_txt', 'fastllm/types.py'),
282
287
  'fastllm.types.payload_kwargs': ('types.html#payload_kwargs', 'fastllm/types.py'),
288
+ 'fastllm.types.register_model_info': ('types.html#register_model_info', 'fastllm/types.py'),
283
289
  'fastllm.types.sys_text': ('types.html#sys_text', 'fastllm/types.py'),
284
290
  'fastllm.types.url_mime': ('types.html#url_mime', 'fastllm/types.py')}}}
@@ -3,7 +3,7 @@
3
3
  # AUTOGENERATED! DO NOT EDIT! File to edit: ../nbs/06_acomplete.ipynb.
4
4
 
5
5
  # %% auto #0
6
- __all__ = ['specs_path', 'ant_spec', 'oai_spec', 'gem_spec', 'vendor_mapping', 'api2spec', 'mk_client',
6
+ __all__ = ['specs_path', 'ant_spec', 'oai_spec', 'gem_spec', 'vendor_mapping', 'api2spec', 'defaults', 'mk_client',
7
7
  'ContextWindowExceededError', 'acomplete']
8
8
 
9
9
  # %% ../nbs/06_acomplete.ipynb #f2f57253
@@ -98,6 +98,21 @@ async def _classify_error_stream(gen):
98
98
  async for x in gen: yield x
99
99
  except APIError as e: raise _classify_error(e) from e
100
100
 
101
+ # %% ../nbs/06_acomplete.ipynb #f626a4e1
102
+ defaults = SimpleNamespace(debug_mode=False)
103
+
104
+ def _debug_print(model, api_name, vendor_name, payload, func):
105
+ "Pretty-print acomplete inputs when defaults.debug_mode is set"
106
+ from pprint import pformat
107
+ p = dict(payload)
108
+ if defaults.debug_mode == 'brief' and 'tools' in p:
109
+ p['tools'] = '; '.join(o.get('name', o.get('type', o)) for o in p['tools'])
110
+ print('━'*60)
111
+ print(f"\033[1;36mfastllm debug\033[0m model={model} vendor={vendor_name} api={api_name} base_url={func.base_url} path={func.path}")
112
+ print('─'*60)
113
+ print(f"\033[1;33mpayload:\033[0m\n{pformat(p, width=120, sort_dicts=False)}")
114
+ print('━'*60)
115
+
101
116
  # %% ../nbs/06_acomplete.ipynb #2379ec94
102
117
  @delegates(payload_kwargs)
103
118
  async def acomplete(msgs, model, api_name=None, vendor_name=None, api_key=None, base_url=None, xtra_body=None, xtra_hdrs=None,
@@ -114,6 +129,7 @@ async def acomplete(msgs, model, api_name=None, vendor_name=None, api_key=None,
114
129
  if vendor_name == 'deepseek' and 'v4' in model: payload['messages'][-1]['prefix'] = True
115
130
  if vendor_name == 'moonshot' and 'kimi' in model: payload['messages'][-1]['partial'] = True
116
131
  func = attrgetter(api.op_path[stream])(cli)
132
+ if defaults.debug_mode: _debug_print(model, api_name, vendor_name, payload, func)
117
133
  try: resp = await func(**payload)
118
134
  except APIError as e: raise _classify_error(e) from e
119
135
  if stream: return _classify_error_stream(api.acollect_stream(resp, model=model, vendor_name=vendor_name, stop_callables=stop_callables))
@@ -1,7 +1,7 @@
1
1
  # AUTOGENERATED! DO NOT EDIT! File to edit: ../nbs/04_anthropic.ipynb.
2
2
 
3
3
  # %% auto #0
4
- __all__ = ['ant_tc_types', 'api_ns', 'norm_tool_call', 'norm_tool_calls', 'norm_usage', 'norm_finish', 'norm_parts',
4
+ __all__ = ['ant_tc_types', 'norm_tool_call', 'norm_tool_calls', 'norm_usage', 'finalize_usage', 'norm_finish', 'norm_parts',
5
5
  'norm_sse_event', 'delta_index_fn', 'acollect_stream', 'denorm_tool_use', 'denorm_assistant', 'denorm_tool',
6
6
  'denorm_msgs', 'denorm_tool_schs', 'denorm_tool_choice', 'denorm_reasoning', 'denorm_web_search',
7
7
  'denorm_system', 'denorm_user', 'denorm_image', 'denorm_file', 'denorm_tool_result', 'mk_payload',
@@ -42,7 +42,16 @@ def norm_usage(resp):
42
42
  pt = int(usg.get("input_tokens", 0) or 0) + cached + cache_creation
43
43
  ct = int(usg.get("output_tokens", 0) or 0)
44
44
  return Usage(prompt_tokens=pt, completion_tokens=ct, total_tokens=pt + ct,
45
- cached_tokens=cached, cache_creation_tokens=cache_creation, raw=usg)
45
+ cached_tokens=cached, cache_creation_tokens=cache_creation, reasoning_tokens=0, raw=usg)
46
+
47
+ def finalize_usage(usg, parts):
48
+ "Adjust usage using finalized Anthropic content parts."
49
+ if not usg: return usg
50
+ rc = '\n'.join(p.text or '' for p in parts if p.type == PartType.thinking)
51
+ ct = int(usg.raw.get('output_tokens', usg.completion_tokens) or 0)
52
+ rt = min(int(len(rc.split())*1.5), ct) if rc else 0
53
+ return Usage(prompt_tokens=usg.prompt_tokens, completion_tokens=ct-rt, total_tokens=usg.prompt_tokens+ct,
54
+ cached_tokens=usg.cached_tokens, cache_creation_tokens=usg.cache_creation_tokens, reasoning_tokens=rt, raw=usg.raw)
46
55
 
47
56
  # %% ../nbs/04_anthropic.ipynb #7a8b1f8f
48
57
  def norm_finish(resp, tcs=None):
@@ -197,7 +206,7 @@ def denorm_reasoning(v):
197
206
  def denorm_web_search(v):
198
207
  "Map canonical web_search_options to Anthropic hosted web_search tool."
199
208
  _max_uses = {"low": 1, "medium": 5, "high": 10}
200
- t = {"type": "web_search_20260209", "name": "web_search"}
209
+ t = {"type": "web_search_20250305", "name": "web_search"}
201
210
  if (typ := (v or {}).get("type")): t["type"] = typ
202
211
  if (s := (v or {}).get("search_context_size")):
203
212
  t["max_uses"] = _max_uses.get(s, 5)
@@ -286,13 +295,6 @@ def cost(usage, m):
286
295
  return cost
287
296
 
288
297
  # %% ../nbs/04_anthropic.ipynb #f7c0b989
289
- api_ns = dict(norm_tool_calls=norm_tool_calls,
290
- norm_parts=norm_parts,
291
- norm_finish=norm_finish,
292
- norm_usage=norm_usage,
293
- acollect_stream=acollect_stream,
294
- mk_payload=mk_payload,
295
- cost=cost,
296
- get_hdrs=get_hdrs,
297
- op_path=('messages.messages_post','messages.messages_post'))
298
- api_registry.register('anthropic', **api_ns)
298
+ api_registry.register('anthropic', norm_tool_calls=norm_tool_calls, norm_parts=norm_parts, norm_finish=norm_finish, norm_usage=norm_usage,
299
+ finalize_usage=finalize_usage, acollect_stream=acollect_stream, mk_payload=mk_payload, cost=cost, get_hdrs=get_hdrs,
300
+ op_path=('messages.messages_post','messages.messages_post'))
@@ -93,7 +93,7 @@ tool_dtls_tag = "<details class='tool-usage-details' markdown='1'>"
93
93
  re_tools = re.compile(fr"^({tool_dtls_tag}\n*(?:<summary>(?P<summary>.*?)</summary>\n*)?\n*```json\n+(.*?)\n+```\n+</details>)",
94
94
  flags=re.DOTALL|re.MULTILINE)
95
95
  token_dtls_tag = "<details class='token-usage-details' markdown='1'>"
96
- re_token = re.compile(fr"^{re.escape(token_dtls_tag)}<summary>.*?</summary>\n*\n*`.*?`\n*\n*</details>\n?",
96
+ re_token = re.compile(fr"^{re.escape(token_dtls_tag)}\n*<summary>.*?</summary>\n*\n*`.*?`\n*\n*</details>\n?",
97
97
  flags=re.DOTALL|re.MULTILINE)
98
98
 
99
99
  # %% ../nbs/07_chat.ipynb #be998131
@@ -209,7 +209,9 @@ def mk_msgs(
209
209
  "Create a list of fastllm canonical Msgs."
210
210
  if not msgs: return []
211
211
  if not isinstance(msgs, list): msgs = [msgs]
212
- msgs = L(msgs).map(lambda m: fmt2hist(m) if isinstance(m,str) and tool_dtls_tag in m else [m]).concat()
212
+ msgs = L(msgs).map(lambda m:
213
+ fmt2hist(m) if isinstance(m,str) and (tool_dtls_tag in m or token_dtls_tag in m) else [m]
214
+ ).concat()
213
215
  res, role = [], 'user'
214
216
  for m in msgs:
215
217
  res.append(msg := remove_cache_ckpts(mk_msg(m, role=role)))
@@ -297,9 +299,11 @@ def _has_stop(tres_parts): return any(isinstance(p.text, StopResponse) for p in
297
299
  def _trunc_str(s, mx=2000, skip=10, replace="TRUNCATED"):
298
300
  "Truncate `s` to `mx` chars max, adding `replace` if truncated"
299
301
  if not isinstance(s, str): s = str(s)
300
- if len(s)>2 and s[0]=='𝍁' and s[-1]=='𝍁': return s[1:-1]
302
+ s = s.rstrip()
303
+ if len(s)>2 and s[0]=='𝍁' and s[-1]=='𝍁':
304
+ s = s[1:-1]
305
+ if replace: return s
301
306
  if isinstance_str(s, ('FullResponse','Safe')): return s
302
- s = str(s).strip()
303
307
  if len(s)<=mx: return s
304
308
  s = s[skip:mx-skip]
305
309
  ss = s.split(' ')
@@ -431,7 +435,8 @@ def _think_kw(model, think, vendor_name):
431
435
  if not think: return {}
432
436
  if 'opus-4-7' in model:
433
437
  e = 'xhigh' if think=='h' else effort.get(think)
434
- return dict(thinking={"type":"adaptive", "display":"summarized"}, output_config={"effort":e})
438
+ eff = dict(thinking={"type":"adaptive", "display":"summarized"}, output_config={"effort":e})
439
+ return dict(reasoning_effort=eff)
435
440
  try: xhigh = get_model_info(model, vendor_name).get('supports_xhigh_reasoning_effort')
436
441
  except: xhigh = False
437
442
  eff = effort.get(think) if think!='x' else 'xhigh' if xhigh else 'high'
@@ -691,7 +696,7 @@ defaults.chat_callbacks = [DeepseekPrefillCallback, FenceToolCallback, ToolRemin
691
696
  def _trunc_param(v, mx=40):
692
697
  "Truncate and escape param value for display"
693
698
  tp = _trunc_str(str(v).replace('`', r'\`'), mx=mx, replace=None, skip=0)
694
- try: return ast.literal_eval(tp)
699
+ try: return dumps(tp, ensure_ascii=False)
695
700
  except Exception: return repr(tp).replace('\\\\', '\\')
696
701
 
697
702
  # %% ../nbs/07_chat.ipynb #80c0abdb
@@ -721,7 +726,7 @@ def mk_tr_details(tr, mx=2000):
721
726
  'call':{'function': tr.data['name'], 'arguments': args},
722
727
  'result':_trunc_content(tr.text, mx=mx),}
723
728
  summ = f"<summary>{_tc_summary(tr)}</summary>"
724
- return f"\n\n{tool_dtls_tag}\n{summ}\n\n```json\n{dumps(res, indent=2)}\n```\n\n</details>\n\n"
729
+ return f"\n\n{tool_dtls_tag}\n{summ}\n\n```json\n{dumps(res, indent=2, ensure_ascii=False)}\n```\n\n</details>\n\n"
725
730
 
726
731
  # %% ../nbs/07_chat.ipynb #3049001c
727
732
  def mk_srv_tc_details(tc, mx=2000):
@@ -729,7 +734,7 @@ def mk_srv_tc_details(tc, mx=2000):
729
734
  args = {k:_trunc_str(v, mx=mx*5) for k,v in tc.arguments.items()}
730
735
  res = {'id':tc.id, 'server':True, 'call':{'function': tc.name, 'arguments': args}, 'result':"Server tool call executed."}
731
736
  summ = f"<summary>{_srv_tc_summary(tc)}</summary>"
732
- return f"\n\n{tool_dtls_tag}\n{summ}\n\n```json\n{dumps(res, indent=2)}\n```\n\n</details>\n\n"
737
+ return f"\n\n{tool_dtls_tag}\n{summ}\n\n```json\n{dumps(res, indent=2, ensure_ascii=False)}\n```\n\n</details>\n\n"
733
738
 
734
739
  # %% ../nbs/07_chat.ipynb #f0d984ec
735
740
  # status_re = re.compile(r'^- ⏳ <code>(.*)</code> ⏳$|^🧠+$', re.MULTILINE) # TODO: Need to yield tool calls as they are done collated in fastllm `_acollect_stream`
@@ -0,0 +1,7 @@
1
+ # AUTOGENERATED! DO NOT EDIT! File to edit: ../nbs/05_codex.ipynb.
2
+
3
+ # %% auto #0
4
+ __all__ = []
5
+
6
+ # %% ../nbs/05_codex.ipynb #a1d088d2
7
+ from fastcore.utils import *
@@ -138,6 +138,7 @@ async def mk_acollect_stream(it, index_fn, model=None, api_name=None, vendor_nam
138
138
  deltas.append(d)
139
139
  part_accum.finalize()
140
140
  tcs = part_accum.tool_calls
141
+ if api_name: usg = api_registry.apis[api_name].finalize_usage(usg, part_accum.parts)
141
142
  if stop: fin = FinishReason.stop
142
143
  fin = FinishReason.tool_calls if fin==FinishReason.stop and any(~L(tcs).attrgot('server')) else fin # recheck tool calls post collation
143
144
  # tool calls and non-anthropic citations are yielded at the end
@@ -145,3 +146,4 @@ async def mk_acollect_stream(it, index_fn, model=None, api_name=None, vendor_nam
145
146
  message=Msg(role="assistant", content=part_accum.parts),
146
147
  finish_reason=fin, usage=usg, tool_calls=tcs, api_name=api_name, vendor_name=vendor_name,
147
148
  raw={'deltas':deltas})
149
+
@@ -4,10 +4,11 @@
4
4
 
5
5
  # %% auto #0
6
6
  __all__ = ['PartType', 'FinishReason', 'api_registry', 'model_prices_url', 'haik45', 'sonn45', 'sonn', 'sonn46', 'opus46', 'opus',
7
- 'gpt54', 'gpt54m', 'codex54', 'codex55', 'codex53spark', 'codex_pricing', 'Part', 'Msg', 'ToolCall',
8
- 'display_list', 'Usage', 'Completion', 'APIRegistry', 'mk_completion', 'mk_tool_res_msg', 'fn_schema',
9
- 'sys_text', 'part_txt', 'data_url', 'url_mime', 'payload_kwargs', 'get_api_key', 'model_prices_meta',
10
- 'infer_api_name', 'get_model_meta', 'get_model_info']
7
+ 'gpt54', 'gpt54m', 'gpt55', 'codex54', 'codex54m', 'codex55', 'codex53spark', 'model_info_registry',
8
+ 'deepseek_v4_common', 'codex_pricing', 'Part', 'Msg', 'ToolCall', 'display_list', 'Usage', 'Completion',
9
+ 'APIRegistry', 'mk_completion', 'mk_tool_res_msg', 'fn_schema', 'sys_text', 'part_txt', 'data_url',
10
+ 'url_mime', 'payload_kwargs', 'get_api_key', 'model_prices_meta', 'infer_api_name', 'get_model_meta',
11
+ 'register_model_info', 'get_model_info', 'get_model_pricing', 'approx_pricing']
11
12
 
12
13
  # %% ../nbs/00_types.ipynb #b4d047fd
13
14
  from dataclasses import dataclass, field
@@ -147,25 +148,29 @@ FinishReason = str_enum('finish_reason', 'stop', 'tool_calls', 'length', 'conten
147
148
  # %% ../nbs/00_types.ipynb #fc681c52
148
149
  class APIRegistry:
149
150
  def __init__(self): self.apis = {}
150
- def register(self, name, **kwargs): self.apis[name] = SimpleNamespace(**kwargs)
151
+ def register(self, name, finalize_usage=noop, **kwargs): self.apis[name] = SimpleNamespace(finalize_usage=finalize_usage, **kwargs)
151
152
 
152
153
  api_registry = APIRegistry()
153
154
 
155
+
154
156
  # %% ../nbs/00_types.ipynb #d58a5f96
155
157
  def mk_completion(resp, model, api_name, vendor_name):
156
158
  "Normalize an api response into Completion."
157
159
  api = api_registry.apis[api_name]
158
160
  tcs = api.norm_tool_calls(resp)
161
+ parts = api.norm_parts(resp)
162
+ usg = api.finalize_usage(api.norm_usage(resp), parts)
159
163
  return Completion(
160
164
  model=resp.get("model") or model,
161
- message=Msg(role="assistant", content=api.norm_parts(resp)),
165
+ message=Msg(role="assistant", content=parts),
162
166
  finish_reason=api.norm_finish(resp, tcs),
163
- usage=api.norm_usage(resp),
167
+ usage=usg,
164
168
  tool_calls=tcs,
165
169
  api_name=api_name,
166
170
  vendor_name=vendor_name,
167
171
  raw=resp)
168
172
 
173
+
169
174
  # %% ../nbs/00_types.ipynb #d5322db5
170
175
  def mk_tool_res_msg(tool_calls:list[ToolCall], results:list[str|list]):
171
176
  'A util to prepare parallel tool call with str or media list results'
@@ -225,6 +230,7 @@ def get_api_key(api_key, default):
225
230
 
226
231
  # %% ../nbs/00_types.ipynb #852adecd
227
232
  model_prices_url = 'https://raw.githubusercontent.com/BerriAI/litellm/main/model_prices_and_context_window.json'
233
+
228
234
  @flexicache(time_policy(24*60*60))
229
235
  def model_prices_meta(): return urljson(model_prices_url)
230
236
 
@@ -254,63 +260,94 @@ opus46 = "claude-opus-4-6"
254
260
  opus = "claude-opus-4-7"
255
261
  gpt54 = "gpt-5.4"
256
262
  gpt54m = "gpt-5.4-mini"
263
+ gpt55 = "gpt-5.5"
257
264
  codex54 = "gpt-5.4"
265
+ codex54m = "gpt-5.4-mini"
258
266
  codex55 = "gpt-5.5"
259
267
  codex53spark = "gpt-5.3-codex-spark"
260
268
 
261
- # %% ../nbs/00_types.ipynb #d6d5b98c
262
- codex_pricing = {
263
- "input_cost_per_token": 0.10 / 1_000_000,
264
- "cache_creation_input_token_cost": 0.10 / 1_000_000,
265
- "cache_read_input_token_cost": 0.10 / 1_000_000,
266
- "output_cost_per_token": 0.50 / 1_000_000,
267
- }
269
+ # %% ../nbs/00_types.ipynb #583e017b
270
+ model_info_registry = {}
268
271
 
269
- _codex_overrides = {
270
- codex53spark: dict(
271
- supports_vision=False, supports_image_input=False, supports_web_search=True, supports_reasoning=True,
272
- max_tokens=128000, max_input_tokens=128000, max_output_tokens=128000)
273
- }
272
+ def register_model_info(model, vendor_name=None, base=None, base_vendor_name=None, **overrides):
273
+ "Register model metadata, optionally starting from `base`."
274
+ info = dict(get_model_info(base, base_vendor_name or vendor_name)) if base else {}
275
+ info.update(overrides)
276
+ model_info_registry[vendor_name, model] = info
274
277
 
275
- # %% ../nbs/00_types.ipynb #fbfdeb0a
276
- def get_model_info(mn, vendor_name=None, strict=False):
277
- info = get_model_meta(mn, 'chatgpt' if vendor_name=='codex' else vendor_name)
278
- # anthropic web search
278
+ def get_model_info(mn, vendor_name=None):
279
+ info = model_info_registry.get((vendor_name, mn)) or get_model_meta(mn, vendor_name)
279
280
  if 'search_context_cost_per_query' in info: info['supports_web_search'] = True
280
- # kimi
281
- if 'kimi' in mn:
282
- if 'k2p6' in mn: info = get_model_meta(mn.replace('k2p6', 'k2p5'), vendor_name)
283
- info['supports_reasoning'] = True
284
- info['supports_vision'] = True
285
- if vendor_name == 'moonshot': info['supports_assistant_prefill'] = True
286
- # gpt web search
287
- if mn in ("gpt-5.4", "gpt-5.4-mini"):
288
- info['supports_web_search'] = True
289
- info.pop('mode', None)
290
- # codex updates
291
- if vendor_name == 'codex':
292
- info = merge(info, codex_pricing)
293
- info |= _codex_overrides.get(mn, {})
294
- # deepseek v4
295
- if vendor_name == 'deepseek' and mn in ("deepseek-v4-flash", "deepseek-v4-pro"):
296
- info = dict(get_model_meta("deepseek/deepseek-v3.2"))
297
- info |= dict(supports_assistant_prefill=True, supports_function_calling=True, supports_prompt_caching=True,
298
- supports_reasoning=True, supports_tool_choice=True)
299
- info.update(input_cost_per_token=1.4e-07, input_cost_per_token_cache_hit=2.8e-09, output_cost_per_token=2.8e-07,
300
- max_input_tokens=1048576, max_output_tokens=393216, max_tokens=393216)
301
- if 'pro' in mn: info = {**info, 'input_cost_per_token': 4.35e-07, 'input_cost_per_token_cache_hit': 3.625e-09, 'output_cost_per_token': 8.7e-07}
302
- # qwen 3p6
303
- if vendor_name == 'fireworks_ai' and mn == 'accounts/fireworks/models/qwen3p6-plus':
304
- info = dict(supports_vision=True, supports_reasoning=True, supports_function_calling=True, supports_tool_choice=True,
305
- supports_system_messages=True, supports_response_schema=True, supports_parallel_function_calling=True,
306
- supports_prompt_caching=True, supports_native_streaming=True, supports_native_structured_output=True,
307
- max_tokens=1000000, max_input_tokens=1000000, max_output_tokens=65536,
308
- input_cost_per_token=0.5e-6, cache_read_input_token_cost=0.1e-6, output_cost_per_token=3.0e-6)
309
-
310
- # unresolved models
311
- if not info and not strict: info = info | codex_pricing
312
281
  return dict2obj(info)
313
282
 
283
+ # %% ../nbs/00_types.ipynb #8261dcd0
284
+ register_model_info('accounts/fireworks/models/qwen3p6-plus', vendor_name='fireworks_ai',
285
+ supports_vision=True, supports_reasoning=True, supports_function_calling=True, supports_tool_choice=True,
286
+ supports_system_messages=True, supports_response_schema=True, supports_parallel_function_calling=True,
287
+ supports_prompt_caching=True, supports_native_streaming=True, supports_native_structured_output=True,
288
+ max_tokens=1000000, max_input_tokens=1000000, max_output_tokens=65536,
289
+ input_cost_per_token=0.5e-6, cache_read_input_token_cost=0.1e-6, output_cost_per_token=3.0e-6)
290
+
291
+ register_model_info('gemini-3.5-flash', vendor_name='gemini', base='gemini-3-flash-preview',
292
+ input_cost_per_token=1.5e-6, output_cost_per_token=9e-6,
293
+ output_cost_per_reasoning_token=9e-6, cache_read_input_token_cost=1.5e-7)
294
+
295
+ for model in ('gpt-5.4', 'gpt-5.4-mini'):
296
+ register_model_info(model, vendor_name='openai', base=model, supports_web_search=True, mode=None)
297
+
298
+ for model in ('kimi-k2.5', 'kimi-k2.6'):
299
+ register_model_info(model, vendor_name='moonshot', base=f'moonshot/{model}', base_vendor_name=None,
300
+ supports_reasoning=True, supports_vision=True, supports_assistant_prefill=True)
301
+
302
+ register_model_info('gemini-3.1-flash-lite', vendor_name='gemini', base='gemini-3.1-flash-lite-preview')
303
+ register_model_info('models/gemini-3.1-flash-lite', vendor_name='gemini', base='gemini-3.1-flash-lite-preview')
304
+
305
+ for model in ('accounts/fireworks/models/kimi-k2p5', 'accounts/fireworks/models/kimi-k2p6'):
306
+ register_model_info(model, vendor_name='fireworks_ai', base=model.replace('k2p6', 'k2p5'),
307
+ supports_reasoning=True, supports_vision=True,
308
+ input_cost_per_token=0.95e-6, cache_read_input_token_cost=0.16e-6, output_cost_per_token=4.0e-6)
309
+
310
+ # %% ../nbs/00_types.ipynb #948d55d0
311
+ deepseek_v4_common = dict(
312
+ supports_assistant_prefill=True, supports_function_calling=True, supports_prompt_caching=True,
313
+ supports_reasoning=True, supports_tool_choice=True,
314
+ max_input_tokens=1048576, max_output_tokens=393216, max_tokens=393216)
315
+
316
+ register_model_info('deepseek-v4-flash', vendor_name='deepseek', base='deepseek/deepseek-v3.2', **deepseek_v4_common,
317
+ input_cost_per_token=1.4e-07, input_cost_per_token_cache_hit=2.8e-09,
318
+ output_cost_per_token=2.8e-07, cache_read_input_token_cost=1.4e-07/10)
319
+ register_model_info('deepseek-v4-pro', vendor_name='deepseek', base='deepseek/deepseek-v3.2', **deepseek_v4_common,
320
+ input_cost_per_token=4.35e-07, input_cost_per_token_cache_hit=3.625e-09,
321
+ output_cost_per_token=8.7e-07, cache_read_input_token_cost=4.35e-07/10)
322
+
323
+ # %% ../nbs/00_types.ipynb #2c23d11e
324
+ codex_pricing = dict(
325
+ input_cost_per_token = 0.10/1_000_000, output_cost_per_token = 0.50/1_000_000,
326
+ cache_creation_input_token_cost = 0.10/1_000_000, cache_read_input_token_cost = 0.10/1_000_000)
327
+
328
+ for model in (codex54, codex54m, codex55):
329
+ register_model_info(model, 'codex', base=model, base_vendor_name='chatgpt', supports_web_search=True, **codex_pricing)
330
+
331
+ register_model_info(codex53spark, 'codex', **codex_pricing,
332
+ supports_vision=False, supports_image_input=False, supports_web_search=True, supports_reasoning=True,
333
+ max_tokens=128000, max_input_tokens=128000, max_output_tokens=128000)
334
+
335
+
336
+ # %% ../nbs/00_types.ipynb #24cc47ec
337
+ def get_model_pricing(mn, vendor_name, million=True):
338
+ return {k:round(v * (1e6 if million else 1), 6)
339
+ for k,v in get_model_info(mn, vendor_name).items()
340
+ if 'cost' in k and isinstance(v,float) and 'priority' not in k}
341
+
342
+ # %% ../nbs/00_types.ipynb #79304cd9
343
+ def approx_pricing(nm, vendor_name, out=10, cache=80, inp=10, markup=0):
344
+ "Approx cost per million tokens with given output/cache/input proportions"
345
+ p = get_model_pricing(nm, vendor_name)
346
+ ic = p.get('cache_creation_input_token_cost', p['input_cost_per_token'])
347
+ res = (p['output_cost_per_token']*out + p['cache_read_input_token_cost']*cache + ic*inp) / (out+cache+inp)
348
+ if nm=='claude-opus-4-7': res *= 1.5
349
+ return res*(1+markup)
350
+
314
351
  # %% ../nbs/00_types.ipynb #8bfca02d
315
352
  @patch(as_prop=True)
316
353
  def cost(self:Completion):
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: python-fastllm
3
- Version: 0.0.7
3
+ Version: 0.0.9
4
4
  Author-email: Kerem Turgutlu <keremturgutlu@gmail.com>
5
5
  License: Apache-2.0
6
6
  Project-URL: Repository, https://github.com/AnswerDotAI/fastllm
@@ -5,6 +5,7 @@ fastllm/_modidx.py
5
5
  fastllm/acomplete.py
6
6
  fastllm/anthropic.py
7
7
  fastllm/chat.py
8
+ fastllm/codex.py
8
9
  fastllm/gemini.py
9
10
  fastllm/openai_chat.py
10
11
  fastllm/openai_responses.py
@@ -1 +0,0 @@
1
- __version__ = "0.0.7"
File without changes
File without changes