python-fastllm 0.0.10__tar.gz → 0.0.11__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (28) hide show
  1. {python_fastllm-0.0.10 → python_fastllm-0.0.11}/PKG-INFO +1 -1
  2. python_fastllm-0.0.11/fastllm/__init__.py +1 -0
  3. {python_fastllm-0.0.10 → python_fastllm-0.0.11}/fastllm/acomplete.py +2 -1
  4. {python_fastllm-0.0.10 → python_fastllm-0.0.11}/fastllm/anthropic.py +1 -1
  5. {python_fastllm-0.0.10 → python_fastllm-0.0.11}/fastllm/chat.py +3 -2
  6. {python_fastllm-0.0.10 → python_fastllm-0.0.11}/fastllm/openai_chat.py +1 -1
  7. {python_fastllm-0.0.10 → python_fastllm-0.0.11}/fastllm/streaming.py +1 -1
  8. {python_fastllm-0.0.10 → python_fastllm-0.0.11}/fastllm/types.py +31 -18
  9. {python_fastllm-0.0.10 → python_fastllm-0.0.11}/python_fastllm.egg-info/PKG-INFO +1 -1
  10. python_fastllm-0.0.10/fastllm/__init__.py +0 -1
  11. {python_fastllm-0.0.10 → python_fastllm-0.0.11}/README.md +0 -0
  12. {python_fastllm-0.0.10 → python_fastllm-0.0.11}/fastllm/_modidx.py +0 -0
  13. {python_fastllm-0.0.10 → python_fastllm-0.0.11}/fastllm/codex.py +0 -0
  14. {python_fastllm-0.0.10 → python_fastllm-0.0.11}/fastllm/gemini.py +0 -0
  15. {python_fastllm-0.0.10 → python_fastllm-0.0.11}/fastllm/openai_responses.py +0 -0
  16. {python_fastllm-0.0.10 → python_fastllm-0.0.11}/fastllm/specs/anthropic.json +0 -0
  17. {python_fastllm-0.0.10 → python_fastllm-0.0.11}/fastllm/specs/anthropic.yml +0 -0
  18. {python_fastllm-0.0.10 → python_fastllm-0.0.11}/fastllm/specs/gemini.json +0 -0
  19. {python_fastllm-0.0.10 → python_fastllm-0.0.11}/fastllm/specs/openai.with-code-samples.json +0 -0
  20. {python_fastllm-0.0.10 → python_fastllm-0.0.11}/fastllm/specs/openai.with-code-samples.yml +0 -0
  21. {python_fastllm-0.0.10 → python_fastllm-0.0.11}/fastllm/specs/spec_manifest.json +0 -0
  22. {python_fastllm-0.0.10 → python_fastllm-0.0.11}/pyproject.toml +0 -0
  23. {python_fastllm-0.0.10 → python_fastllm-0.0.11}/python_fastllm.egg-info/SOURCES.txt +0 -0
  24. {python_fastllm-0.0.10 → python_fastllm-0.0.11}/python_fastllm.egg-info/dependency_links.txt +0 -0
  25. {python_fastllm-0.0.10 → python_fastllm-0.0.11}/python_fastllm.egg-info/entry_points.txt +0 -0
  26. {python_fastllm-0.0.10 → python_fastllm-0.0.11}/python_fastllm.egg-info/requires.txt +0 -0
  27. {python_fastllm-0.0.10 → python_fastllm-0.0.11}/python_fastllm.egg-info/top_level.txt +0 -0
  28. {python_fastllm-0.0.10 → python_fastllm-0.0.11}/setup.cfg +0 -0
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: python-fastllm
3
- Version: 0.0.10
3
+ Version: 0.0.11
4
4
  Author-email: Kerem Turgutlu <keremturgutlu@gmail.com>
5
5
  License: Apache-2.0
6
6
  Project-URL: Repository, https://github.com/AnswerDotAI/fastllm
@@ -0,0 +1 @@
1
+ __version__ = "0.0.11"
@@ -38,6 +38,7 @@ vendor_mapping = {
38
38
  "codex": ('openai', 'https://chatgpt.com/backend-api/codex', 'CODEX_AUTH_TOKEN', _codex_json),
39
39
  "moonshot": ('openai_chat', "https://api.moonshot.ai/v1", "MOONSHOT_API_KEY"),
40
40
  "deepseek": ('openai_chat', "https://api.deepseek.com/v1", "DEEPSEEK_API_KEY"),
41
+ "mimo": ('openai_chat', "https://api.xiaomimimo.com/v1", "MIMO_API_KEY"),
41
42
  "openrouter": ('openai_chat', "https://openrouter.ai/api/v1", "OPENROUTER_API_KEY"),
42
43
  "together": ('openai_chat', "https://api.together.xyz/v1", "TOGETHER_API_KEY"),
43
44
  "fireworks_ai": ('openai_chat', "https://api.fireworks.ai/inference/v1", "FIREWORKS_API_KEY"),
@@ -82,7 +83,7 @@ def _is_ctx_exceeded(code, msg):
82
83
  if str(code or "").lower() == "context_length_exceeded": return True
83
84
  return any(s in m for s in ("exceed context limit", "maximum context length", "maximum context limit",
84
85
  "longer than the model's context length", "input tokens exceed the configured limit",
85
- "exceeds the maximum number of tokens allowed", "prompt is too long"))
86
+ "exceeds the maximum number of tokens allowed", "prompt is too long", "exceeds the context window"))
86
87
 
87
88
  def _classify_error(exc):
88
89
  "Upgrade generic `APIError` to a specific subclass if applicable."
@@ -287,7 +287,7 @@ def cost(usage, m):
287
287
  in_tok = raw['input_tokens']
288
288
  cache_read = raw.get('cache_read_input_tokens', 0)
289
289
  cc = raw.get('cache_creation', {}) or {}
290
- cache_5m = cc.get('ephemeral_5m_input_tokens', 0)
290
+ cache_5m = cc.get('ephemeral_5m_input_tokens', raw.get('cache_creation_input_tokens', 0))
291
291
  cache_1h = cc.get('ephemeral_1h_input_tokens', 0)
292
292
  cost = in_tok * m.input_cost_per_token
293
293
  cost += raw['output_tokens'] * m.output_cost_per_token
@@ -116,7 +116,7 @@ re_token = re.compile(fr"^{re.escape(token_dtls_tag)}\n*<summary>.*?</summary>\n
116
116
  _fence_back = '`````'
117
117
  _fence_re = re.compile(f'^{_fence_back}(py|bash)\n(.*?)\n{_fence_back}$', re.DOTALL | re.MULTILINE)
118
118
  _result_re = re.compile(f'\n{_fence_back}result\n(.*?)\n{_fence_back}\n', re.DOTALL)
119
- _lang2tool = dict(py='python', bash='bash')
119
+ _lang2tool = dict(py='pyrun', bash='bash')
120
120
 
121
121
  class FenceToolStop:
122
122
  def __init__(self, langs): self.langs = langs
@@ -204,6 +204,7 @@ def fmt2hist(outp:str)->list[Msg]:
204
204
  for msg in hist:
205
205
  if msg.role == 'assistant': result.extend(_split_msg_on_fences(msg))
206
206
  else: result.append(msg)
207
+ if result[-1].role == 'tool': result.append(Msg(role='assistant', content=[Part(type=PartType.text, text='.')]))
207
208
  return result
208
209
 
209
210
  # %% ../nbs/07_chat.ipynb #8de5ce8d
@@ -635,7 +636,7 @@ def _active_fence_langs(tool_schemas):
635
636
  async def run_fence_tool(lang, code, ns):
636
637
  "Run the mapped tool for `lang` with the code, return result fence"
637
638
  tname = _lang2tool[lang]
638
- arg = dict(code=code) if lang == 'py' else dict(command=code)
639
+ arg = dict(code=code) if lang == 'py' else dict(cmd=code)
639
640
  res = _mk_tool_result(await call_func_async(tname, arg, ns=ns, raise_on_err=False))
640
641
  return _mk_result_fence(_trunc_str(str(res)))
641
642
 
@@ -195,7 +195,7 @@ def get_hdrs(api_key=None):
195
195
  # %% ../nbs/03_oai_chat.ipynb #f89e2bf6
196
196
  def cost(usage, m):
197
197
  raw = usage.raw
198
- pd, cd = raw.get('prompt_tokens_details', {}), raw.get('completion_tokens_details', {})
198
+ pd,cd = raw.get('prompt_tokens_details') or {},raw.get('completion_tokens_details') or {}
199
199
  cached = pd.get('cached_tokens', 0)
200
200
  in_audio, out_audio = pd.get('audio_tokens', 0), cd.get('audio_tokens', 0)
201
201
  in_txt = raw['prompt_tokens'] - cached - in_audio
@@ -157,7 +157,7 @@ async def mk_acollect_stream(it, index_fn, model=None, api_name=None, vendor_nam
157
157
  if stop: fin = FinishReason.stop
158
158
  fin = FinishReason.tool_calls if fin==FinishReason.stop and any(~L(tcs).attrgot('server')) else fin # recheck tool calls post collation
159
159
  # tool calls and non-anthropic citations are yielded at the end
160
- yield Completion(d.raw.get('model', model),
160
+ yield Completion(model,
161
161
  message=Msg(role="assistant", content=part_accum.parts),
162
162
  finish_reason=fin, usage=usg, tool_calls=tcs, api_name=api_name, vendor_name=vendor_name,
163
163
  raw={'deltas':deltas})
@@ -5,10 +5,11 @@
5
5
  # %% auto #0
6
6
  __all__ = ['PartType', 'FinishReason', 'api_registry', 'model_prices_url', 'haik45', 'sonn45', 'sonn', 'sonn46', 'opus46', 'opus',
7
7
  'gpt54', 'gpt54m', 'gpt55', 'codex54', 'codex54m', 'codex55', 'codex53spark', 'model_info_registry',
8
- 'deepseek_v4_common', 'codex_pricing', 'Part', 'Msg', 'ToolCall', 'display_list', 'Usage', 'Completion',
9
- 'APIRegistry', 'mk_completion', 'mk_tool_res_msg', 'fn_schema', 'sys_text', 'part_txt', 'data_url',
10
- 'url_mime', 'payload_kwargs', 'get_api_key', 'model_prices_meta', 'infer_api_name', 'get_model_meta',
11
- 'register_model_info', 'get_model_info', 'get_model_pricing', 'approx_pricing']
8
+ 'modern_llm', 'deepseek_v4_common', 'mimo_v25_common', 'codex_pricing', 'Part', 'Msg', 'ToolCall',
9
+ 'display_list', 'Usage', 'Completion', 'APIRegistry', 'mk_completion', 'mk_tool_res_msg', 'fn_schema',
10
+ 'sys_text', 'part_txt', 'data_url', 'url_mime', 'payload_kwargs', 'get_api_key', 'model_prices_meta',
11
+ 'infer_api_name', 'get_model_meta', 'register_model_info', 'get_model_info', 'get_model_pricing',
12
+ 'approx_pricing']
12
13
 
13
14
  # %% ../nbs/00_types.ipynb #b4d047fd
14
15
  import httpx
@@ -164,7 +165,7 @@ def mk_completion(resp, model, api_name, vendor_name):
164
165
  parts = api.norm_parts(resp)
165
166
  usg = api.finalize_usage(api.norm_usage(resp), parts)
166
167
  return Completion(
167
- model=resp.get("model") or model,
168
+ model=model,
168
169
  message=Msg(role="assistant", content=parts),
169
170
  finish_reason=api.norm_finish(resp, tcs),
170
171
  usage=usg,
@@ -173,7 +174,6 @@ def mk_completion(resp, model, api_name, vendor_name):
173
174
  vendor_name=vendor_name,
174
175
  raw=resp)
175
176
 
176
-
177
177
  # %% ../nbs/00_types.ipynb #d5322db5
178
178
  def mk_tool_res_msg(tool_calls:list[ToolCall], results:list[str|list]):
179
179
  'A util to prepare parallel tool call with str or media list results'
@@ -271,7 +271,7 @@ haik45 = "claude-haiku-4-5"
271
271
  sonn45 = "claude-sonnet-4-5"
272
272
  sonn = sonn46 = "claude-sonnet-4-6"
273
273
  opus46 = "claude-opus-4-6"
274
- opus = "claude-opus-4-7"
274
+ opus = "claude-opus-4-8"
275
275
  gpt54 = "gpt-5.4"
276
276
  gpt54m = "gpt-5.4-mini"
277
277
  gpt55 = "gpt-5.5"
@@ -287,6 +287,8 @@ def register_model_info(model, vendor_name=None, base=None, base_vendor_name=Non
287
287
  "Register model metadata, optionally starting from `base`."
288
288
  info = dict(get_model_info(base, base_vendor_name or vendor_name)) if base else {}
289
289
  info.update(overrides)
290
+ if isinstance(c := info.get('search_context_cost_per_query'), (int,float)):
291
+ info['search_context_cost_per_query'] = {f'search_context_size_{s}':c for s in ('low','medium','high')}
290
292
  model_info_registry[vendor_name, model] = info
291
293
 
292
294
  def get_model_info(mn, vendor_name=None):
@@ -294,11 +296,17 @@ def get_model_info(mn, vendor_name=None):
294
296
  if 'search_context_cost_per_query' in info: info['supports_web_search'] = True
295
297
  return dict2obj(info)
296
298
 
299
+ # %% ../nbs/00_types.ipynb #331c5d0a
300
+ register_model_info("claude-opus-4-8", vendor_name='anthropic', base="claude-opus-4-6")
301
+
302
+ # %% ../nbs/00_types.ipynb #b36178d4
303
+ modern_llm = dict(supports_function_calling=True, supports_tool_choice=True, supports_prompt_caching=True,
304
+ supports_parallel_function_calling=True, supports_native_streaming=True, supports_native_structured_output=True,
305
+ supports_reasoning=True, supports_response_schema=True, supports_system_messages=True)
306
+
297
307
  # %% ../nbs/00_types.ipynb #8261dcd0
298
- register_model_info('accounts/fireworks/models/qwen3p6-plus', vendor_name='fireworks_ai',
299
- supports_vision=True, supports_reasoning=True, supports_function_calling=True, supports_tool_choice=True,
300
- supports_system_messages=True, supports_response_schema=True, supports_parallel_function_calling=True,
301
- supports_prompt_caching=True, supports_native_streaming=True, supports_native_structured_output=True,
308
+ register_model_info('accounts/fireworks/models/qwen3p6-plus', vendor_name='fireworks_ai', **modern_llm,
309
+ supports_vision=True,
302
310
  max_tokens=1000000, max_input_tokens=1000000, max_output_tokens=65536,
303
311
  input_cost_per_token=0.5e-6, cache_read_input_token_cost=0.1e-6, output_cost_per_token=3.0e-6)
304
312
 
@@ -322,9 +330,7 @@ for model in ('accounts/fireworks/models/kimi-k2p5', 'accounts/fireworks/models/
322
330
  input_cost_per_token=0.95e-6, cache_read_input_token_cost=0.16e-6, output_cost_per_token=4.0e-6)
323
331
 
324
332
  # %% ../nbs/00_types.ipynb #948d55d0
325
- deepseek_v4_common = dict(
326
- supports_assistant_prefill=True, supports_function_calling=True, supports_prompt_caching=True,
327
- supports_reasoning=True, supports_tool_choice=True,
333
+ deepseek_v4_common = dict(**modern_llm, supports_assistant_prefill=True,
328
334
  max_input_tokens=1048576, max_output_tokens=393216, max_tokens=393216)
329
335
 
330
336
  register_model_info('deepseek-v4-flash', vendor_name='deepseek', base='deepseek/deepseek-v3.2', **deepseek_v4_common,
@@ -334,19 +340,26 @@ register_model_info('deepseek-v4-pro', vendor_name='deepseek', base='deepseek/de
334
340
  input_cost_per_token=4.35e-07, input_cost_per_token_cache_hit=3.625e-09,
335
341
  output_cost_per_token=8.7e-07, cache_read_input_token_cost=4.35e-07/10)
336
342
 
343
+ mimo_v25_common = dict(**modern_llm, supports_web_search=True, max_input_tokens=1048576, max_output_tokens=131072, max_tokens=131072)
344
+
345
+ register_model_info('mimo-v2.5-pro', vendor_name='mimo', **mimo_v25_common, base='deepseek/deepseek-v4-pro',
346
+ input_cost_per_token=0.435e-6, output_cost_per_token=0.87e-6, cache_read_input_token_cost=0.0036e-6, search_context_cost_per_query=0.005)
347
+ register_model_info('mimo-v2.5', vendor_name='mimo', **mimo_v25_common, base='deepseek/deepseek-v4',
348
+ input_cost_per_token=0.14e-6, output_cost_per_token=0.28e-6, cache_read_input_token_cost=0.0028e-6, search_context_cost_per_query=0.005,
349
+ supports_vision=True, supports_image_input=True)
350
+
337
351
  # %% ../nbs/00_types.ipynb #2c23d11e
338
352
  codex_pricing = dict(
339
353
  input_cost_per_token = 0.10/1_000_000, output_cost_per_token = 0.50/1_000_000,
340
354
  cache_creation_input_token_cost = 0.10/1_000_000, cache_read_input_token_cost = 0.10/1_000_000)
341
355
 
342
356
  for model in (codex54, codex54m, codex55):
343
- register_model_info(model, 'codex', base=model, base_vendor_name='chatgpt', supports_web_search=True, **codex_pricing)
357
+ register_model_info(model, 'codex', base=model, base_vendor_name='chatgpt', supports_web_search=True, max_input_tokens=256000, **codex_pricing)
344
358
 
345
359
  register_model_info(codex53spark, 'codex', **codex_pricing,
346
- supports_vision=False, supports_image_input=False, supports_web_search=True, supports_reasoning=True,
360
+ supports_vision=False, supports_image_input=False, supports_web_search=True, supports_reasoning=True, supports_function_calling=True,
347
361
  max_tokens=128000, max_input_tokens=128000, max_output_tokens=128000)
348
362
 
349
-
350
363
  # %% ../nbs/00_types.ipynb #24cc47ec
351
364
  def get_model_pricing(mn, vendor_name, million=True):
352
365
  return {k:round(v * (1e6 if million else 1), 6)
@@ -359,7 +372,7 @@ def approx_pricing(nm, vendor_name, out=10, cache=80, inp=10, markup=0):
359
372
  p = get_model_pricing(nm, vendor_name)
360
373
  ic = p.get('cache_creation_input_token_cost', p['input_cost_per_token'])
361
374
  res = (p['output_cost_per_token']*out + p['cache_read_input_token_cost']*cache + ic*inp) / (out+cache+inp)
362
- if nm=='claude-opus-4-7': res *= 1.5
375
+ if nm in ('claude-opus-4-7','claude-opus-4-8'): res *= 1.5
363
376
  return res*(1+markup)
364
377
 
365
378
  # %% ../nbs/00_types.ipynb #8bfca02d
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: python-fastllm
3
- Version: 0.0.10
3
+ Version: 0.0.11
4
4
  Author-email: Kerem Turgutlu <keremturgutlu@gmail.com>
5
5
  License: Apache-2.0
6
6
  Project-URL: Repository, https://github.com/AnswerDotAI/fastllm
@@ -1 +0,0 @@
1
- __version__ = "0.0.10"