python-fastllm 0.0.10__tar.gz → 0.0.11__tar.gz
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- {python_fastllm-0.0.10 → python_fastllm-0.0.11}/PKG-INFO +1 -1
- python_fastllm-0.0.11/fastllm/__init__.py +1 -0
- {python_fastllm-0.0.10 → python_fastllm-0.0.11}/fastllm/acomplete.py +2 -1
- {python_fastllm-0.0.10 → python_fastllm-0.0.11}/fastllm/anthropic.py +1 -1
- {python_fastllm-0.0.10 → python_fastllm-0.0.11}/fastllm/chat.py +3 -2
- {python_fastllm-0.0.10 → python_fastllm-0.0.11}/fastllm/openai_chat.py +1 -1
- {python_fastllm-0.0.10 → python_fastllm-0.0.11}/fastllm/streaming.py +1 -1
- {python_fastllm-0.0.10 → python_fastllm-0.0.11}/fastllm/types.py +31 -18
- {python_fastllm-0.0.10 → python_fastllm-0.0.11}/python_fastllm.egg-info/PKG-INFO +1 -1
- python_fastllm-0.0.10/fastllm/__init__.py +0 -1
- {python_fastllm-0.0.10 → python_fastllm-0.0.11}/README.md +0 -0
- {python_fastllm-0.0.10 → python_fastllm-0.0.11}/fastllm/_modidx.py +0 -0
- {python_fastllm-0.0.10 → python_fastllm-0.0.11}/fastllm/codex.py +0 -0
- {python_fastllm-0.0.10 → python_fastllm-0.0.11}/fastllm/gemini.py +0 -0
- {python_fastllm-0.0.10 → python_fastllm-0.0.11}/fastllm/openai_responses.py +0 -0
- {python_fastllm-0.0.10 → python_fastllm-0.0.11}/fastllm/specs/anthropic.json +0 -0
- {python_fastllm-0.0.10 → python_fastllm-0.0.11}/fastllm/specs/anthropic.yml +0 -0
- {python_fastllm-0.0.10 → python_fastllm-0.0.11}/fastllm/specs/gemini.json +0 -0
- {python_fastllm-0.0.10 → python_fastllm-0.0.11}/fastllm/specs/openai.with-code-samples.json +0 -0
- {python_fastllm-0.0.10 → python_fastllm-0.0.11}/fastllm/specs/openai.with-code-samples.yml +0 -0
- {python_fastllm-0.0.10 → python_fastllm-0.0.11}/fastllm/specs/spec_manifest.json +0 -0
- {python_fastllm-0.0.10 → python_fastllm-0.0.11}/pyproject.toml +0 -0
- {python_fastllm-0.0.10 → python_fastllm-0.0.11}/python_fastllm.egg-info/SOURCES.txt +0 -0
- {python_fastllm-0.0.10 → python_fastllm-0.0.11}/python_fastllm.egg-info/dependency_links.txt +0 -0
- {python_fastllm-0.0.10 → python_fastllm-0.0.11}/python_fastllm.egg-info/entry_points.txt +0 -0
- {python_fastllm-0.0.10 → python_fastllm-0.0.11}/python_fastllm.egg-info/requires.txt +0 -0
- {python_fastllm-0.0.10 → python_fastllm-0.0.11}/python_fastllm.egg-info/top_level.txt +0 -0
- {python_fastllm-0.0.10 → python_fastllm-0.0.11}/setup.cfg +0 -0
|
@@ -0,0 +1 @@
|
|
|
1
|
+
__version__ = "0.0.11"
|
|
@@ -38,6 +38,7 @@ vendor_mapping = {
|
|
|
38
38
|
"codex": ('openai', 'https://chatgpt.com/backend-api/codex', 'CODEX_AUTH_TOKEN', _codex_json),
|
|
39
39
|
"moonshot": ('openai_chat', "https://api.moonshot.ai/v1", "MOONSHOT_API_KEY"),
|
|
40
40
|
"deepseek": ('openai_chat', "https://api.deepseek.com/v1", "DEEPSEEK_API_KEY"),
|
|
41
|
+
"mimo": ('openai_chat', "https://api.xiaomimimo.com/v1", "MIMO_API_KEY"),
|
|
41
42
|
"openrouter": ('openai_chat', "https://openrouter.ai/api/v1", "OPENROUTER_API_KEY"),
|
|
42
43
|
"together": ('openai_chat', "https://api.together.xyz/v1", "TOGETHER_API_KEY"),
|
|
43
44
|
"fireworks_ai": ('openai_chat', "https://api.fireworks.ai/inference/v1", "FIREWORKS_API_KEY"),
|
|
@@ -82,7 +83,7 @@ def _is_ctx_exceeded(code, msg):
|
|
|
82
83
|
if str(code or "").lower() == "context_length_exceeded": return True
|
|
83
84
|
return any(s in m for s in ("exceed context limit", "maximum context length", "maximum context limit",
|
|
84
85
|
"longer than the model's context length", "input tokens exceed the configured limit",
|
|
85
|
-
"exceeds the maximum number of tokens allowed", "prompt is too long"))
|
|
86
|
+
"exceeds the maximum number of tokens allowed", "prompt is too long", "exceeds the context window"))
|
|
86
87
|
|
|
87
88
|
def _classify_error(exc):
|
|
88
89
|
"Upgrade generic `APIError` to a specific subclass if applicable."
|
|
@@ -287,7 +287,7 @@ def cost(usage, m):
|
|
|
287
287
|
in_tok = raw['input_tokens']
|
|
288
288
|
cache_read = raw.get('cache_read_input_tokens', 0)
|
|
289
289
|
cc = raw.get('cache_creation', {}) or {}
|
|
290
|
-
cache_5m = cc.get('ephemeral_5m_input_tokens', 0)
|
|
290
|
+
cache_5m = cc.get('ephemeral_5m_input_tokens', raw.get('cache_creation_input_tokens', 0))
|
|
291
291
|
cache_1h = cc.get('ephemeral_1h_input_tokens', 0)
|
|
292
292
|
cost = in_tok * m.input_cost_per_token
|
|
293
293
|
cost += raw['output_tokens'] * m.output_cost_per_token
|
|
@@ -116,7 +116,7 @@ re_token = re.compile(fr"^{re.escape(token_dtls_tag)}\n*<summary>.*?</summary>\n
|
|
|
116
116
|
_fence_back = '`````'
|
|
117
117
|
_fence_re = re.compile(f'^{_fence_back}(py|bash)\n(.*?)\n{_fence_back}$', re.DOTALL | re.MULTILINE)
|
|
118
118
|
_result_re = re.compile(f'\n{_fence_back}result\n(.*?)\n{_fence_back}\n', re.DOTALL)
|
|
119
|
-
_lang2tool = dict(py='
|
|
119
|
+
_lang2tool = dict(py='pyrun', bash='bash')
|
|
120
120
|
|
|
121
121
|
class FenceToolStop:
|
|
122
122
|
def __init__(self, langs): self.langs = langs
|
|
@@ -204,6 +204,7 @@ def fmt2hist(outp:str)->list[Msg]:
|
|
|
204
204
|
for msg in hist:
|
|
205
205
|
if msg.role == 'assistant': result.extend(_split_msg_on_fences(msg))
|
|
206
206
|
else: result.append(msg)
|
|
207
|
+
if result[-1].role == 'tool': result.append(Msg(role='assistant', content=[Part(type=PartType.text, text='.')]))
|
|
207
208
|
return result
|
|
208
209
|
|
|
209
210
|
# %% ../nbs/07_chat.ipynb #8de5ce8d
|
|
@@ -635,7 +636,7 @@ def _active_fence_langs(tool_schemas):
|
|
|
635
636
|
async def run_fence_tool(lang, code, ns):
|
|
636
637
|
"Run the mapped tool for `lang` with the code, return result fence"
|
|
637
638
|
tname = _lang2tool[lang]
|
|
638
|
-
arg = dict(code=code) if lang == 'py' else dict(
|
|
639
|
+
arg = dict(code=code) if lang == 'py' else dict(cmd=code)
|
|
639
640
|
res = _mk_tool_result(await call_func_async(tname, arg, ns=ns, raise_on_err=False))
|
|
640
641
|
return _mk_result_fence(_trunc_str(str(res)))
|
|
641
642
|
|
|
@@ -195,7 +195,7 @@ def get_hdrs(api_key=None):
|
|
|
195
195
|
# %% ../nbs/03_oai_chat.ipynb #f89e2bf6
|
|
196
196
|
def cost(usage, m):
|
|
197
197
|
raw = usage.raw
|
|
198
|
-
pd,
|
|
198
|
+
pd,cd = raw.get('prompt_tokens_details') or {},raw.get('completion_tokens_details') or {}
|
|
199
199
|
cached = pd.get('cached_tokens', 0)
|
|
200
200
|
in_audio, out_audio = pd.get('audio_tokens', 0), cd.get('audio_tokens', 0)
|
|
201
201
|
in_txt = raw['prompt_tokens'] - cached - in_audio
|
|
@@ -157,7 +157,7 @@ async def mk_acollect_stream(it, index_fn, model=None, api_name=None, vendor_nam
|
|
|
157
157
|
if stop: fin = FinishReason.stop
|
|
158
158
|
fin = FinishReason.tool_calls if fin==FinishReason.stop and any(~L(tcs).attrgot('server')) else fin # recheck tool calls post collation
|
|
159
159
|
# tool calls and non-anthropic citations are yielded at the end
|
|
160
|
-
yield Completion(
|
|
160
|
+
yield Completion(model,
|
|
161
161
|
message=Msg(role="assistant", content=part_accum.parts),
|
|
162
162
|
finish_reason=fin, usage=usg, tool_calls=tcs, api_name=api_name, vendor_name=vendor_name,
|
|
163
163
|
raw={'deltas':deltas})
|
|
@@ -5,10 +5,11 @@
|
|
|
5
5
|
# %% auto #0
|
|
6
6
|
__all__ = ['PartType', 'FinishReason', 'api_registry', 'model_prices_url', 'haik45', 'sonn45', 'sonn', 'sonn46', 'opus46', 'opus',
|
|
7
7
|
'gpt54', 'gpt54m', 'gpt55', 'codex54', 'codex54m', 'codex55', 'codex53spark', 'model_info_registry',
|
|
8
|
-
'
|
|
9
|
-
'
|
|
10
|
-
'
|
|
11
|
-
'register_model_info', 'get_model_info', 'get_model_pricing',
|
|
8
|
+
'modern_llm', 'deepseek_v4_common', 'mimo_v25_common', 'codex_pricing', 'Part', 'Msg', 'ToolCall',
|
|
9
|
+
'display_list', 'Usage', 'Completion', 'APIRegistry', 'mk_completion', 'mk_tool_res_msg', 'fn_schema',
|
|
10
|
+
'sys_text', 'part_txt', 'data_url', 'url_mime', 'payload_kwargs', 'get_api_key', 'model_prices_meta',
|
|
11
|
+
'infer_api_name', 'get_model_meta', 'register_model_info', 'get_model_info', 'get_model_pricing',
|
|
12
|
+
'approx_pricing']
|
|
12
13
|
|
|
13
14
|
# %% ../nbs/00_types.ipynb #b4d047fd
|
|
14
15
|
import httpx
|
|
@@ -164,7 +165,7 @@ def mk_completion(resp, model, api_name, vendor_name):
|
|
|
164
165
|
parts = api.norm_parts(resp)
|
|
165
166
|
usg = api.finalize_usage(api.norm_usage(resp), parts)
|
|
166
167
|
return Completion(
|
|
167
|
-
model=
|
|
168
|
+
model=model,
|
|
168
169
|
message=Msg(role="assistant", content=parts),
|
|
169
170
|
finish_reason=api.norm_finish(resp, tcs),
|
|
170
171
|
usage=usg,
|
|
@@ -173,7 +174,6 @@ def mk_completion(resp, model, api_name, vendor_name):
|
|
|
173
174
|
vendor_name=vendor_name,
|
|
174
175
|
raw=resp)
|
|
175
176
|
|
|
176
|
-
|
|
177
177
|
# %% ../nbs/00_types.ipynb #d5322db5
|
|
178
178
|
def mk_tool_res_msg(tool_calls:list[ToolCall], results:list[str|list]):
|
|
179
179
|
'A util to prepare parallel tool call with str or media list results'
|
|
@@ -271,7 +271,7 @@ haik45 = "claude-haiku-4-5"
|
|
|
271
271
|
sonn45 = "claude-sonnet-4-5"
|
|
272
272
|
sonn = sonn46 = "claude-sonnet-4-6"
|
|
273
273
|
opus46 = "claude-opus-4-6"
|
|
274
|
-
opus = "claude-opus-4-
|
|
274
|
+
opus = "claude-opus-4-8"
|
|
275
275
|
gpt54 = "gpt-5.4"
|
|
276
276
|
gpt54m = "gpt-5.4-mini"
|
|
277
277
|
gpt55 = "gpt-5.5"
|
|
@@ -287,6 +287,8 @@ def register_model_info(model, vendor_name=None, base=None, base_vendor_name=Non
|
|
|
287
287
|
"Register model metadata, optionally starting from `base`."
|
|
288
288
|
info = dict(get_model_info(base, base_vendor_name or vendor_name)) if base else {}
|
|
289
289
|
info.update(overrides)
|
|
290
|
+
if isinstance(c := info.get('search_context_cost_per_query'), (int,float)):
|
|
291
|
+
info['search_context_cost_per_query'] = {f'search_context_size_{s}':c for s in ('low','medium','high')}
|
|
290
292
|
model_info_registry[vendor_name, model] = info
|
|
291
293
|
|
|
292
294
|
def get_model_info(mn, vendor_name=None):
|
|
@@ -294,11 +296,17 @@ def get_model_info(mn, vendor_name=None):
|
|
|
294
296
|
if 'search_context_cost_per_query' in info: info['supports_web_search'] = True
|
|
295
297
|
return dict2obj(info)
|
|
296
298
|
|
|
299
|
+
# %% ../nbs/00_types.ipynb #331c5d0a
|
|
300
|
+
register_model_info("claude-opus-4-8", vendor_name='anthropic', base="claude-opus-4-6")
|
|
301
|
+
|
|
302
|
+
# %% ../nbs/00_types.ipynb #b36178d4
|
|
303
|
+
modern_llm = dict(supports_function_calling=True, supports_tool_choice=True, supports_prompt_caching=True,
|
|
304
|
+
supports_parallel_function_calling=True, supports_native_streaming=True, supports_native_structured_output=True,
|
|
305
|
+
supports_reasoning=True, supports_response_schema=True, supports_system_messages=True)
|
|
306
|
+
|
|
297
307
|
# %% ../nbs/00_types.ipynb #8261dcd0
|
|
298
|
-
register_model_info('accounts/fireworks/models/qwen3p6-plus', vendor_name='fireworks_ai',
|
|
299
|
-
supports_vision=True,
|
|
300
|
-
supports_system_messages=True, supports_response_schema=True, supports_parallel_function_calling=True,
|
|
301
|
-
supports_prompt_caching=True, supports_native_streaming=True, supports_native_structured_output=True,
|
|
308
|
+
register_model_info('accounts/fireworks/models/qwen3p6-plus', vendor_name='fireworks_ai', **modern_llm,
|
|
309
|
+
supports_vision=True,
|
|
302
310
|
max_tokens=1000000, max_input_tokens=1000000, max_output_tokens=65536,
|
|
303
311
|
input_cost_per_token=0.5e-6, cache_read_input_token_cost=0.1e-6, output_cost_per_token=3.0e-6)
|
|
304
312
|
|
|
@@ -322,9 +330,7 @@ for model in ('accounts/fireworks/models/kimi-k2p5', 'accounts/fireworks/models/
|
|
|
322
330
|
input_cost_per_token=0.95e-6, cache_read_input_token_cost=0.16e-6, output_cost_per_token=4.0e-6)
|
|
323
331
|
|
|
324
332
|
# %% ../nbs/00_types.ipynb #948d55d0
|
|
325
|
-
deepseek_v4_common = dict(
|
|
326
|
-
supports_assistant_prefill=True, supports_function_calling=True, supports_prompt_caching=True,
|
|
327
|
-
supports_reasoning=True, supports_tool_choice=True,
|
|
333
|
+
deepseek_v4_common = dict(**modern_llm, supports_assistant_prefill=True,
|
|
328
334
|
max_input_tokens=1048576, max_output_tokens=393216, max_tokens=393216)
|
|
329
335
|
|
|
330
336
|
register_model_info('deepseek-v4-flash', vendor_name='deepseek', base='deepseek/deepseek-v3.2', **deepseek_v4_common,
|
|
@@ -334,19 +340,26 @@ register_model_info('deepseek-v4-pro', vendor_name='deepseek', base='deepseek/de
|
|
|
334
340
|
input_cost_per_token=4.35e-07, input_cost_per_token_cache_hit=3.625e-09,
|
|
335
341
|
output_cost_per_token=8.7e-07, cache_read_input_token_cost=4.35e-07/10)
|
|
336
342
|
|
|
343
|
+
mimo_v25_common = dict(**modern_llm, supports_web_search=True, max_input_tokens=1048576, max_output_tokens=131072, max_tokens=131072)
|
|
344
|
+
|
|
345
|
+
register_model_info('mimo-v2.5-pro', vendor_name='mimo', **mimo_v25_common, base='deepseek/deepseek-v4-pro',
|
|
346
|
+
input_cost_per_token=0.435e-6, output_cost_per_token=0.87e-6, cache_read_input_token_cost=0.0036e-6, search_context_cost_per_query=0.005)
|
|
347
|
+
register_model_info('mimo-v2.5', vendor_name='mimo', **mimo_v25_common, base='deepseek/deepseek-v4',
|
|
348
|
+
input_cost_per_token=0.14e-6, output_cost_per_token=0.28e-6, cache_read_input_token_cost=0.0028e-6, search_context_cost_per_query=0.005,
|
|
349
|
+
supports_vision=True, supports_image_input=True)
|
|
350
|
+
|
|
337
351
|
# %% ../nbs/00_types.ipynb #2c23d11e
|
|
338
352
|
codex_pricing = dict(
|
|
339
353
|
input_cost_per_token = 0.10/1_000_000, output_cost_per_token = 0.50/1_000_000,
|
|
340
354
|
cache_creation_input_token_cost = 0.10/1_000_000, cache_read_input_token_cost = 0.10/1_000_000)
|
|
341
355
|
|
|
342
356
|
for model in (codex54, codex54m, codex55):
|
|
343
|
-
register_model_info(model, 'codex', base=model, base_vendor_name='chatgpt', supports_web_search=True, **codex_pricing)
|
|
357
|
+
register_model_info(model, 'codex', base=model, base_vendor_name='chatgpt', supports_web_search=True, max_input_tokens=256000, **codex_pricing)
|
|
344
358
|
|
|
345
359
|
register_model_info(codex53spark, 'codex', **codex_pricing,
|
|
346
|
-
supports_vision=False, supports_image_input=False, supports_web_search=True, supports_reasoning=True,
|
|
360
|
+
supports_vision=False, supports_image_input=False, supports_web_search=True, supports_reasoning=True, supports_function_calling=True,
|
|
347
361
|
max_tokens=128000, max_input_tokens=128000, max_output_tokens=128000)
|
|
348
362
|
|
|
349
|
-
|
|
350
363
|
# %% ../nbs/00_types.ipynb #24cc47ec
|
|
351
364
|
def get_model_pricing(mn, vendor_name, million=True):
|
|
352
365
|
return {k:round(v * (1e6 if million else 1), 6)
|
|
@@ -359,7 +372,7 @@ def approx_pricing(nm, vendor_name, out=10, cache=80, inp=10, markup=0):
|
|
|
359
372
|
p = get_model_pricing(nm, vendor_name)
|
|
360
373
|
ic = p.get('cache_creation_input_token_cost', p['input_cost_per_token'])
|
|
361
374
|
res = (p['output_cost_per_token']*out + p['cache_read_input_token_cost']*cache + ic*inp) / (out+cache+inp)
|
|
362
|
-
if nm
|
|
375
|
+
if nm in ('claude-opus-4-7','claude-opus-4-8'): res *= 1.5
|
|
363
376
|
return res*(1+markup)
|
|
364
377
|
|
|
365
378
|
# %% ../nbs/00_types.ipynb #8bfca02d
|
|
@@ -1 +0,0 @@
|
|
|
1
|
-
__version__ = "0.0.10"
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
{python_fastllm-0.0.10 → python_fastllm-0.0.11}/python_fastllm.egg-info/dependency_links.txt
RENAMED
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|