lm-deluge 0.0.54__tar.gz → 0.0.56__tar.gz
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of lm-deluge might be problematic. Click here for more details.
- {lm_deluge-0.0.54/src/lm_deluge.egg-info → lm_deluge-0.0.56}/PKG-INFO +1 -1
- {lm_deluge-0.0.54 → lm_deluge-0.0.56}/pyproject.toml +1 -1
- {lm_deluge-0.0.54 → lm_deluge-0.0.56}/src/lm_deluge/api_requests/base.py +6 -0
- {lm_deluge-0.0.54 → lm_deluge-0.0.56}/src/lm_deluge/api_requests/response.py +28 -1
- {lm_deluge-0.0.54 → lm_deluge-0.0.56}/src/lm_deluge/client.py +16 -0
- {lm_deluge-0.0.54 → lm_deluge-0.0.56}/src/lm_deluge/models/__init__.py +4 -1
- {lm_deluge-0.0.54 → lm_deluge-0.0.56}/src/lm_deluge/models/anthropic.py +20 -2
- {lm_deluge-0.0.54 → lm_deluge-0.0.56}/src/lm_deluge/models/google.py +20 -12
- {lm_deluge-0.0.54 → lm_deluge-0.0.56}/src/lm_deluge/models/openai.py +18 -8
- {lm_deluge-0.0.54 → lm_deluge-0.0.56}/src/lm_deluge/tracker.py +78 -10
- {lm_deluge-0.0.54 → lm_deluge-0.0.56}/src/lm_deluge/usage.py +30 -21
- {lm_deluge-0.0.54 → lm_deluge-0.0.56/src/lm_deluge.egg-info}/PKG-INFO +1 -1
- {lm_deluge-0.0.54 → lm_deluge-0.0.56}/LICENSE +0 -0
- {lm_deluge-0.0.54 → lm_deluge-0.0.56}/README.md +0 -0
- {lm_deluge-0.0.54 → lm_deluge-0.0.56}/setup.cfg +0 -0
- {lm_deluge-0.0.54 → lm_deluge-0.0.56}/src/lm_deluge/__init__.py +0 -0
- {lm_deluge-0.0.54 → lm_deluge-0.0.56}/src/lm_deluge/agent.py +0 -0
- {lm_deluge-0.0.54 → lm_deluge-0.0.56}/src/lm_deluge/api_requests/__init__.py +0 -0
- {lm_deluge-0.0.54 → lm_deluge-0.0.56}/src/lm_deluge/api_requests/anthropic.py +0 -0
- {lm_deluge-0.0.54 → lm_deluge-0.0.56}/src/lm_deluge/api_requests/bedrock.py +0 -0
- {lm_deluge-0.0.54 → lm_deluge-0.0.56}/src/lm_deluge/api_requests/common.py +0 -0
- {lm_deluge-0.0.54 → lm_deluge-0.0.56}/src/lm_deluge/api_requests/deprecated/bedrock.py +0 -0
- {lm_deluge-0.0.54 → lm_deluge-0.0.56}/src/lm_deluge/api_requests/deprecated/cohere.py +0 -0
- {lm_deluge-0.0.54 → lm_deluge-0.0.56}/src/lm_deluge/api_requests/deprecated/deepseek.py +0 -0
- {lm_deluge-0.0.54 → lm_deluge-0.0.56}/src/lm_deluge/api_requests/deprecated/mistral.py +0 -0
- {lm_deluge-0.0.54 → lm_deluge-0.0.56}/src/lm_deluge/api_requests/deprecated/vertex.py +0 -0
- {lm_deluge-0.0.54 → lm_deluge-0.0.56}/src/lm_deluge/api_requests/gemini.py +0 -0
- {lm_deluge-0.0.54 → lm_deluge-0.0.56}/src/lm_deluge/api_requests/mistral.py +0 -0
- {lm_deluge-0.0.54 → lm_deluge-0.0.56}/src/lm_deluge/api_requests/openai.py +0 -0
- {lm_deluge-0.0.54 → lm_deluge-0.0.56}/src/lm_deluge/batches.py +0 -0
- {lm_deluge-0.0.54 → lm_deluge-0.0.56}/src/lm_deluge/built_in_tools/anthropic/__init__.py +0 -0
- {lm_deluge-0.0.54 → lm_deluge-0.0.56}/src/lm_deluge/built_in_tools/anthropic/bash.py +0 -0
- {lm_deluge-0.0.54 → lm_deluge-0.0.56}/src/lm_deluge/built_in_tools/anthropic/computer_use.py +0 -0
- {lm_deluge-0.0.54 → lm_deluge-0.0.56}/src/lm_deluge/built_in_tools/anthropic/editor.py +0 -0
- {lm_deluge-0.0.54 → lm_deluge-0.0.56}/src/lm_deluge/built_in_tools/base.py +0 -0
- {lm_deluge-0.0.54 → lm_deluge-0.0.56}/src/lm_deluge/built_in_tools/openai.py +0 -0
- {lm_deluge-0.0.54 → lm_deluge-0.0.56}/src/lm_deluge/cache.py +0 -0
- {lm_deluge-0.0.54 → lm_deluge-0.0.56}/src/lm_deluge/cli.py +0 -0
- {lm_deluge-0.0.54 → lm_deluge-0.0.56}/src/lm_deluge/config.py +0 -0
- {lm_deluge-0.0.54 → lm_deluge-0.0.56}/src/lm_deluge/embed.py +0 -0
- {lm_deluge-0.0.54 → lm_deluge-0.0.56}/src/lm_deluge/errors.py +0 -0
- {lm_deluge-0.0.54 → lm_deluge-0.0.56}/src/lm_deluge/file.py +0 -0
- {lm_deluge-0.0.54 → lm_deluge-0.0.56}/src/lm_deluge/gemini_limits.py +0 -0
- {lm_deluge-0.0.54 → lm_deluge-0.0.56}/src/lm_deluge/image.py +0 -0
- {lm_deluge-0.0.54 → lm_deluge-0.0.56}/src/lm_deluge/llm_tools/__init__.py +0 -0
- {lm_deluge-0.0.54 → lm_deluge-0.0.56}/src/lm_deluge/llm_tools/classify.py +0 -0
- {lm_deluge-0.0.54 → lm_deluge-0.0.56}/src/lm_deluge/llm_tools/extract.py +0 -0
- {lm_deluge-0.0.54 → lm_deluge-0.0.56}/src/lm_deluge/llm_tools/locate.py +0 -0
- {lm_deluge-0.0.54 → lm_deluge-0.0.56}/src/lm_deluge/llm_tools/ocr.py +0 -0
- {lm_deluge-0.0.54 → lm_deluge-0.0.56}/src/lm_deluge/llm_tools/score.py +0 -0
- {lm_deluge-0.0.54 → lm_deluge-0.0.56}/src/lm_deluge/llm_tools/translate.py +0 -0
- {lm_deluge-0.0.54 → lm_deluge-0.0.56}/src/lm_deluge/models/bedrock.py +0 -0
- {lm_deluge-0.0.54 → lm_deluge-0.0.56}/src/lm_deluge/models/cerebras.py +0 -0
- {lm_deluge-0.0.54 → lm_deluge-0.0.56}/src/lm_deluge/models/cohere.py +0 -0
- {lm_deluge-0.0.54 → lm_deluge-0.0.56}/src/lm_deluge/models/deepseek.py +0 -0
- {lm_deluge-0.0.54 → lm_deluge-0.0.56}/src/lm_deluge/models/fireworks.py +0 -0
- {lm_deluge-0.0.54 → lm_deluge-0.0.56}/src/lm_deluge/models/grok.py +0 -0
- {lm_deluge-0.0.54 → lm_deluge-0.0.56}/src/lm_deluge/models/groq.py +0 -0
- {lm_deluge-0.0.54 → lm_deluge-0.0.56}/src/lm_deluge/models/meta.py +0 -0
- {lm_deluge-0.0.54 → lm_deluge-0.0.56}/src/lm_deluge/models/mistral.py +0 -0
- {lm_deluge-0.0.54 → lm_deluge-0.0.56}/src/lm_deluge/models/openrouter.py +0 -0
- {lm_deluge-0.0.54 → lm_deluge-0.0.56}/src/lm_deluge/models/together.py +0 -0
- {lm_deluge-0.0.54 → lm_deluge-0.0.56}/src/lm_deluge/presets/cerebras.py +0 -0
- {lm_deluge-0.0.54 → lm_deluge-0.0.56}/src/lm_deluge/presets/meta.py +0 -0
- {lm_deluge-0.0.54 → lm_deluge-0.0.56}/src/lm_deluge/prompt.py +0 -0
- {lm_deluge-0.0.54 → lm_deluge-0.0.56}/src/lm_deluge/request_context.py +0 -0
- {lm_deluge-0.0.54 → lm_deluge-0.0.56}/src/lm_deluge/rerank.py +0 -0
- {lm_deluge-0.0.54 → lm_deluge-0.0.56}/src/lm_deluge/tool.py +0 -0
- {lm_deluge-0.0.54 → lm_deluge-0.0.56}/src/lm_deluge/util/harmony.py +0 -0
- {lm_deluge-0.0.54 → lm_deluge-0.0.56}/src/lm_deluge/util/json.py +0 -0
- {lm_deluge-0.0.54 → lm_deluge-0.0.56}/src/lm_deluge/util/logprobs.py +0 -0
- {lm_deluge-0.0.54 → lm_deluge-0.0.56}/src/lm_deluge/util/spatial.py +0 -0
- {lm_deluge-0.0.54 → lm_deluge-0.0.56}/src/lm_deluge/util/validation.py +0 -0
- {lm_deluge-0.0.54 → lm_deluge-0.0.56}/src/lm_deluge/util/xml.py +0 -0
- {lm_deluge-0.0.54 → lm_deluge-0.0.56}/src/lm_deluge.egg-info/SOURCES.txt +0 -0
- {lm_deluge-0.0.54 → lm_deluge-0.0.56}/src/lm_deluge.egg-info/dependency_links.txt +0 -0
- {lm_deluge-0.0.54 → lm_deluge-0.0.56}/src/lm_deluge.egg-info/requires.txt +0 -0
- {lm_deluge-0.0.54 → lm_deluge-0.0.56}/src/lm_deluge.egg-info/top_level.txt +0 -0
- {lm_deluge-0.0.54 → lm_deluge-0.0.56}/tests/test_builtin_tools.py +0 -0
- {lm_deluge-0.0.54 → lm_deluge-0.0.56}/tests/test_native_mcp_server.py +0 -0
|
@@ -52,6 +52,9 @@ class APIRequestBase(ABC):
|
|
|
52
52
|
self, base_headers: dict[str, str], exclude_patterns: list[str] | None = None
|
|
53
53
|
) -> dict[str, str]:
|
|
54
54
|
"""Merge extra_headers with base headers, giving priority to extra_headers."""
|
|
55
|
+
# Filter out None values from base headers (e.g., missing API keys)
|
|
56
|
+
base_headers = {k: v for k, v in base_headers.items() if v is not None}
|
|
57
|
+
|
|
55
58
|
if not self.context.extra_headers:
|
|
56
59
|
return base_headers
|
|
57
60
|
|
|
@@ -69,6 +72,9 @@ class APIRequestBase(ABC):
|
|
|
69
72
|
# Start with base headers, then overlay filtered extra headers (extra takes precedence)
|
|
70
73
|
merged = dict(base_headers)
|
|
71
74
|
merged.update(filtered_extra)
|
|
75
|
+
|
|
76
|
+
# Filter out None values from final merged headers
|
|
77
|
+
merged = {k: v for k, v in merged.items() if v is not None}
|
|
72
78
|
return merged
|
|
73
79
|
|
|
74
80
|
def handle_success(self, data):
|
|
@@ -84,10 +84,37 @@ class APIResponse:
|
|
|
84
84
|
and api_model.input_cost is not None
|
|
85
85
|
and api_model.output_cost is not None
|
|
86
86
|
):
|
|
87
|
+
# Calculate input cost, accounting for cached vs non-cached tokens
|
|
88
|
+
# Different providers report tokens differently:
|
|
89
|
+
# - Anthropic/Bedrock: input_tokens is ONLY non-cached, cache_read_tokens is separate
|
|
90
|
+
# - OpenAI/Gemini: input_tokens INCLUDES cached, cache_read_tokens is a subset
|
|
91
|
+
cache_read_tokens = self.usage.cache_read_tokens or 0
|
|
92
|
+
|
|
93
|
+
if api_model.api_spec in ("anthropic", "bedrock"):
|
|
94
|
+
# For Anthropic: input_tokens already excludes cache, so use directly
|
|
95
|
+
non_cached_input_tokens = self.usage.input_tokens
|
|
96
|
+
else:
|
|
97
|
+
# For OpenAI/Gemini: input_tokens includes cache, so subtract it
|
|
98
|
+
non_cached_input_tokens = self.usage.input_tokens - cache_read_tokens
|
|
99
|
+
|
|
87
100
|
self.cost = (
|
|
88
|
-
|
|
101
|
+
non_cached_input_tokens * api_model.input_cost / 1e6
|
|
89
102
|
+ self.usage.output_tokens * api_model.output_cost / 1e6
|
|
90
103
|
)
|
|
104
|
+
|
|
105
|
+
# Add cost for cache read tokens (at reduced rate)
|
|
106
|
+
if cache_read_tokens > 0 and api_model.cached_input_cost is not None:
|
|
107
|
+
self.cost += cache_read_tokens * api_model.cached_input_cost / 1e6
|
|
108
|
+
|
|
109
|
+
# Add cost for cache write tokens (only for Anthropic)
|
|
110
|
+
if (
|
|
111
|
+
self.usage.cache_write_tokens
|
|
112
|
+
and self.usage.cache_write_tokens > 0
|
|
113
|
+
and api_model.cache_write_cost is not None
|
|
114
|
+
):
|
|
115
|
+
self.cost += (
|
|
116
|
+
self.usage.cache_write_tokens * api_model.cache_write_cost / 1e6
|
|
117
|
+
)
|
|
91
118
|
elif self.content is not None and self.completion is not None:
|
|
92
119
|
pass
|
|
93
120
|
# print(
|
|
@@ -30,6 +30,7 @@ class _LLMClient(BaseModel):
|
|
|
30
30
|
"""
|
|
31
31
|
|
|
32
32
|
model_names: str | list[str] = ["gpt-4.1-mini"]
|
|
33
|
+
name: str | None = None
|
|
33
34
|
max_requests_per_minute: int = 1_000
|
|
34
35
|
max_tokens_per_minute: int = 100_000
|
|
35
36
|
max_concurrent_requests: int = 225
|
|
@@ -69,6 +70,7 @@ class _LLMClient(BaseModel):
|
|
|
69
70
|
max_requests_per_minute=self.max_requests_per_minute,
|
|
70
71
|
max_tokens_per_minute=self.max_tokens_per_minute,
|
|
71
72
|
max_concurrent_requests=self.max_concurrent_requests,
|
|
73
|
+
client_name=self.name or "LLMClient",
|
|
72
74
|
progress_style=self.progress,
|
|
73
75
|
use_progress_bar=show_progress,
|
|
74
76
|
)
|
|
@@ -169,6 +171,13 @@ class _LLMClient(BaseModel):
|
|
|
169
171
|
# normalize weights
|
|
170
172
|
self.model_weights = [w / sum(self.model_weights) for w in self.model_weights]
|
|
171
173
|
|
|
174
|
+
# Auto-generate name if not provided
|
|
175
|
+
if self.name is None:
|
|
176
|
+
if len(self.model_names) == 1:
|
|
177
|
+
self.name = self.model_names[0]
|
|
178
|
+
else:
|
|
179
|
+
self.name = "LLMClient"
|
|
180
|
+
|
|
172
181
|
# Validate logprobs settings across all sampling params
|
|
173
182
|
if self.logprobs or any(sp.logprobs for sp in self.sampling_params):
|
|
174
183
|
print("Logprobs enabled.")
|
|
@@ -286,6 +295,7 @@ class _LLMClient(BaseModel):
|
|
|
286
295
|
# Handle successful response
|
|
287
296
|
if not response.is_error:
|
|
288
297
|
context.status_tracker.task_succeeded(context.task_id)
|
|
298
|
+
context.status_tracker.track_usage(response)
|
|
289
299
|
# Cache successful responses immediately
|
|
290
300
|
if self.cache and response.completion:
|
|
291
301
|
# print(f"DEBUG: Caching successful response")
|
|
@@ -324,6 +334,8 @@ class _LLMClient(BaseModel):
|
|
|
324
334
|
|
|
325
335
|
# No retries left or no retry queue - final failure
|
|
326
336
|
context.status_tracker.task_failed(context.task_id)
|
|
337
|
+
# Track usage even for failed requests if they made an API call
|
|
338
|
+
context.status_tracker.track_usage(response)
|
|
327
339
|
context.maybe_callback(response, context.status_tracker)
|
|
328
340
|
|
|
329
341
|
# Print final error message
|
|
@@ -725,6 +737,7 @@ class _LLMClient(BaseModel):
|
|
|
725
737
|
def LLMClient(
|
|
726
738
|
model_names: str,
|
|
727
739
|
*,
|
|
740
|
+
name: str | None = None,
|
|
728
741
|
max_requests_per_minute: int = 1_000,
|
|
729
742
|
max_tokens_per_minute: int = 100_000,
|
|
730
743
|
max_concurrent_requests: int = 225,
|
|
@@ -751,6 +764,7 @@ def LLMClient(
|
|
|
751
764
|
def LLMClient(
|
|
752
765
|
model_names: list[str],
|
|
753
766
|
*,
|
|
767
|
+
name: str | None = None,
|
|
754
768
|
max_requests_per_minute: int = 1_000,
|
|
755
769
|
max_tokens_per_minute: int = 100_000,
|
|
756
770
|
max_concurrent_requests: int = 225,
|
|
@@ -776,6 +790,7 @@ def LLMClient(
|
|
|
776
790
|
def LLMClient(
|
|
777
791
|
model_names: str | list[str] = "gpt-4.1-mini",
|
|
778
792
|
*,
|
|
793
|
+
name: str | None = None,
|
|
779
794
|
max_requests_per_minute: int = 1_000,
|
|
780
795
|
max_tokens_per_minute: int = 100_000,
|
|
781
796
|
max_concurrent_requests: int = 225,
|
|
@@ -813,6 +828,7 @@ def LLMClient(
|
|
|
813
828
|
# Simply pass everything to the Pydantic constructor
|
|
814
829
|
return _LLMClient(
|
|
815
830
|
model_names=model_names,
|
|
831
|
+
name=name,
|
|
816
832
|
max_requests_per_minute=max_requests_per_minute,
|
|
817
833
|
max_tokens_per_minute=max_tokens_per_minute,
|
|
818
834
|
max_concurrent_requests=max_concurrent_requests,
|
|
@@ -29,7 +29,8 @@ class APIModel:
|
|
|
29
29
|
api_base: str
|
|
30
30
|
api_key_env_var: str
|
|
31
31
|
api_spec: str
|
|
32
|
-
cached_input_cost: float | None = 0
|
|
32
|
+
cached_input_cost: float | None = 0 # $ per million cached/read input tokens
|
|
33
|
+
cache_write_cost: float | None = 0 # $ per million cache write tokens
|
|
33
34
|
input_cost: float | None = 0 # $ per million input tokens
|
|
34
35
|
output_cost: float | None = 0 # $ per million output tokens
|
|
35
36
|
supports_json: bool = False
|
|
@@ -89,6 +90,7 @@ def register_model(
|
|
|
89
90
|
api_spec: str = "openai",
|
|
90
91
|
input_cost: float | None = 0, # $ per million input tokens
|
|
91
92
|
cached_input_cost: float | None = 0,
|
|
93
|
+
cache_write_cost: float | None = 0, # $ per million cache write tokens
|
|
92
94
|
output_cost: float | None = 0, # $ per million output tokens
|
|
93
95
|
supports_json: bool = False,
|
|
94
96
|
supports_logprobs: bool = False,
|
|
@@ -106,6 +108,7 @@ def register_model(
|
|
|
106
108
|
api_key_env_var=api_key_env_var,
|
|
107
109
|
api_spec=api_spec,
|
|
108
110
|
cached_input_cost=cached_input_cost,
|
|
111
|
+
cache_write_cost=cache_write_cost,
|
|
109
112
|
input_cost=input_cost,
|
|
110
113
|
output_cost=output_cost,
|
|
111
114
|
supports_json=supports_json,
|
|
@@ -18,6 +18,8 @@ ANTHROPIC_MODELS = {
|
|
|
18
18
|
"supports_json": False,
|
|
19
19
|
"api_spec": "anthropic",
|
|
20
20
|
"input_cost": 3.0,
|
|
21
|
+
"cached_input_cost": 0.30,
|
|
22
|
+
"cache_write_cost": 3.75,
|
|
21
23
|
"output_cost": 15.0,
|
|
22
24
|
"requests_per_minute": 4_000,
|
|
23
25
|
"tokens_per_minute": 400_000,
|
|
@@ -30,6 +32,8 @@ ANTHROPIC_MODELS = {
|
|
|
30
32
|
"supports_json": False,
|
|
31
33
|
"api_spec": "anthropic",
|
|
32
34
|
"input_cost": 15.0,
|
|
35
|
+
"cached_input_cost": 1.50,
|
|
36
|
+
"cache_write_cost": 18.75,
|
|
33
37
|
"output_cost": 75.0,
|
|
34
38
|
"requests_per_minute": 4_000,
|
|
35
39
|
"tokens_per_minute": 400_000,
|
|
@@ -43,6 +47,8 @@ ANTHROPIC_MODELS = {
|
|
|
43
47
|
"supports_json": False,
|
|
44
48
|
"api_spec": "anthropic",
|
|
45
49
|
"input_cost": 15.0,
|
|
50
|
+
"cached_input_cost": 1.50,
|
|
51
|
+
"cache_write_cost": 18.75,
|
|
46
52
|
"output_cost": 75.0,
|
|
47
53
|
"requests_per_minute": 4_000,
|
|
48
54
|
"tokens_per_minute": 400_000,
|
|
@@ -56,6 +62,8 @@ ANTHROPIC_MODELS = {
|
|
|
56
62
|
"supports_json": False,
|
|
57
63
|
"api_spec": "anthropic",
|
|
58
64
|
"input_cost": 3.0,
|
|
65
|
+
"cached_input_cost": 0.30,
|
|
66
|
+
"cache_write_cost": 3.75,
|
|
59
67
|
"output_cost": 15.0,
|
|
60
68
|
"requests_per_minute": 4_000,
|
|
61
69
|
"tokens_per_minute": 400_000,
|
|
@@ -68,6 +76,8 @@ ANTHROPIC_MODELS = {
|
|
|
68
76
|
"supports_json": False,
|
|
69
77
|
"api_spec": "anthropic",
|
|
70
78
|
"input_cost": 3.0,
|
|
79
|
+
"cached_input_cost": 0.30,
|
|
80
|
+
"cache_write_cost": 3.75,
|
|
71
81
|
"output_cost": 15.0,
|
|
72
82
|
"requests_per_minute": 4_000,
|
|
73
83
|
"tokens_per_minute": 400_000,
|
|
@@ -81,6 +91,8 @@ ANTHROPIC_MODELS = {
|
|
|
81
91
|
"supports_json": False,
|
|
82
92
|
"api_spec": "anthropic",
|
|
83
93
|
"input_cost": 3.0,
|
|
94
|
+
"cached_input_cost": 0.30,
|
|
95
|
+
"cache_write_cost": 3.75,
|
|
84
96
|
"output_cost": 15.0,
|
|
85
97
|
"requests_per_minute": 4_000,
|
|
86
98
|
"tokens_per_minute": 400_000,
|
|
@@ -93,6 +105,8 @@ ANTHROPIC_MODELS = {
|
|
|
93
105
|
"supports_json": False,
|
|
94
106
|
"api_spec": "anthropic",
|
|
95
107
|
"input_cost": 3.0,
|
|
108
|
+
"cached_input_cost": 0.30,
|
|
109
|
+
"cache_write_cost": 3.75,
|
|
96
110
|
"output_cost": 15.0,
|
|
97
111
|
"requests_per_minute": 4_000,
|
|
98
112
|
"tokens_per_minute": 400_000,
|
|
@@ -116,8 +130,10 @@ ANTHROPIC_MODELS = {
|
|
|
116
130
|
"api_key_env_var": "ANTHROPIC_API_KEY",
|
|
117
131
|
"supports_json": False,
|
|
118
132
|
"api_spec": "anthropic",
|
|
119
|
-
"input_cost":
|
|
120
|
-
"
|
|
133
|
+
"input_cost": 0.8,
|
|
134
|
+
"cached_input_cost": 0.08,
|
|
135
|
+
"cache_write_cost": 1.00,
|
|
136
|
+
"output_cost": 4.00,
|
|
121
137
|
"requests_per_minute": 20_000,
|
|
122
138
|
"tokens_per_minute": 4_000_000, # supposed to be this but they fucked up
|
|
123
139
|
},
|
|
@@ -129,6 +145,8 @@ ANTHROPIC_MODELS = {
|
|
|
129
145
|
"supports_json": False,
|
|
130
146
|
"api_spec": "anthropic",
|
|
131
147
|
"input_cost": 0.25,
|
|
148
|
+
"cache_write_cost": 0.30,
|
|
149
|
+
"cached_input_cost": 0.03,
|
|
132
150
|
"output_cost": 1.25,
|
|
133
151
|
"requests_per_minute": 10_000,
|
|
134
152
|
"tokens_per_minute": 4_000_000, # supposed to be this but they fucked up
|
|
@@ -18,6 +18,7 @@ GOOGLE_MODELS = {
|
|
|
18
18
|
"supports_logprobs": False,
|
|
19
19
|
"api_spec": "openai",
|
|
20
20
|
"input_cost": 0.1,
|
|
21
|
+
"cached_input_cost": 0.025,
|
|
21
22
|
"output_cost": 0.4,
|
|
22
23
|
"requests_per_minute": 20,
|
|
23
24
|
"tokens_per_minute": 100_000,
|
|
@@ -31,8 +32,8 @@ GOOGLE_MODELS = {
|
|
|
31
32
|
"supports_json": True,
|
|
32
33
|
"supports_logprobs": False,
|
|
33
34
|
"api_spec": "openai",
|
|
34
|
-
"input_cost": 0.
|
|
35
|
-
"output_cost": 0.
|
|
35
|
+
"input_cost": 0.075,
|
|
36
|
+
"output_cost": 0.3,
|
|
36
37
|
"requests_per_minute": 20,
|
|
37
38
|
"tokens_per_minute": 100_000,
|
|
38
39
|
"reasoning_model": False,
|
|
@@ -45,8 +46,9 @@ GOOGLE_MODELS = {
|
|
|
45
46
|
"supports_json": True,
|
|
46
47
|
"supports_logprobs": False,
|
|
47
48
|
"api_spec": "openai",
|
|
48
|
-
"input_cost":
|
|
49
|
-
"
|
|
49
|
+
"input_cost": 1.25,
|
|
50
|
+
"cached_input_cost": 0.31,
|
|
51
|
+
"output_cost": 10.0,
|
|
50
52
|
"requests_per_minute": 20,
|
|
51
53
|
"tokens_per_minute": 100_000,
|
|
52
54
|
"reasoning_model": True,
|
|
@@ -59,8 +61,9 @@ GOOGLE_MODELS = {
|
|
|
59
61
|
"supports_json": True,
|
|
60
62
|
"supports_logprobs": False,
|
|
61
63
|
"api_spec": "openai",
|
|
62
|
-
"input_cost": 0.
|
|
63
|
-
"
|
|
64
|
+
"input_cost": 0.3,
|
|
65
|
+
"cached_input_cost": 0.075,
|
|
66
|
+
"output_cost": 2.5,
|
|
64
67
|
"requests_per_minute": 20,
|
|
65
68
|
"tokens_per_minute": 100_000,
|
|
66
69
|
"reasoning_model": True,
|
|
@@ -74,6 +77,7 @@ GOOGLE_MODELS = {
|
|
|
74
77
|
"supports_logprobs": False,
|
|
75
78
|
"api_spec": "openai",
|
|
76
79
|
"input_cost": 0.1,
|
|
80
|
+
"cached_input_cost": 0.025,
|
|
77
81
|
"output_cost": 0.4,
|
|
78
82
|
"requests_per_minute": 20,
|
|
79
83
|
"tokens_per_minute": 100_000,
|
|
@@ -89,6 +93,7 @@ GOOGLE_MODELS = {
|
|
|
89
93
|
"supports_logprobs": False,
|
|
90
94
|
"api_spec": "gemini",
|
|
91
95
|
"input_cost": 0.1,
|
|
96
|
+
"cached_input_cost": 0.025,
|
|
92
97
|
"output_cost": 0.4,
|
|
93
98
|
"requests_per_minute": 20,
|
|
94
99
|
"tokens_per_minute": 100_000,
|
|
@@ -102,8 +107,8 @@ GOOGLE_MODELS = {
|
|
|
102
107
|
"supports_json": True,
|
|
103
108
|
"supports_logprobs": False,
|
|
104
109
|
"api_spec": "gemini",
|
|
105
|
-
"input_cost": 0.
|
|
106
|
-
"output_cost": 0.
|
|
110
|
+
"input_cost": 0.075,
|
|
111
|
+
"output_cost": 0.3,
|
|
107
112
|
"requests_per_minute": 20,
|
|
108
113
|
"tokens_per_minute": 100_000,
|
|
109
114
|
"reasoning_model": False,
|
|
@@ -116,8 +121,9 @@ GOOGLE_MODELS = {
|
|
|
116
121
|
"supports_json": True,
|
|
117
122
|
"supports_logprobs": False,
|
|
118
123
|
"api_spec": "gemini",
|
|
119
|
-
"input_cost":
|
|
120
|
-
"
|
|
124
|
+
"input_cost": 1.25,
|
|
125
|
+
"cached_input_cost": 0.31,
|
|
126
|
+
"output_cost": 10.0,
|
|
121
127
|
"requests_per_minute": 20,
|
|
122
128
|
"tokens_per_minute": 100_000,
|
|
123
129
|
"reasoning_model": True,
|
|
@@ -130,8 +136,9 @@ GOOGLE_MODELS = {
|
|
|
130
136
|
"supports_json": True,
|
|
131
137
|
"supports_logprobs": False,
|
|
132
138
|
"api_spec": "gemini",
|
|
133
|
-
"input_cost": 0.
|
|
134
|
-
"
|
|
139
|
+
"input_cost": 0.3,
|
|
140
|
+
"cached_input_cost": 0.075,
|
|
141
|
+
"output_cost": 2.5,
|
|
135
142
|
"requests_per_minute": 20,
|
|
136
143
|
"tokens_per_minute": 100_000,
|
|
137
144
|
"reasoning_model": True,
|
|
@@ -145,6 +152,7 @@ GOOGLE_MODELS = {
|
|
|
145
152
|
"supports_logprobs": False,
|
|
146
153
|
"api_spec": "gemini",
|
|
147
154
|
"input_cost": 0.1,
|
|
155
|
+
"cached_input_cost": 0.025,
|
|
148
156
|
"output_cost": 0.4,
|
|
149
157
|
"requests_per_minute": 20,
|
|
150
158
|
"tokens_per_minute": 100_000,
|
|
@@ -75,8 +75,8 @@ OPENAI_MODELS = {
|
|
|
75
75
|
"supports_logprobs": False,
|
|
76
76
|
"supports_responses": True,
|
|
77
77
|
"api_spec": "openai",
|
|
78
|
-
"input_cost":
|
|
79
|
-
"output_cost":
|
|
78
|
+
"input_cost": 3.0,
|
|
79
|
+
"output_cost": 12.0,
|
|
80
80
|
"requests_per_minute": 20,
|
|
81
81
|
"tokens_per_minute": 100_000,
|
|
82
82
|
"reasoning_model": False,
|
|
@@ -90,8 +90,9 @@ OPENAI_MODELS = {
|
|
|
90
90
|
"supports_logprobs": True,
|
|
91
91
|
"supports_responses": True,
|
|
92
92
|
"api_spec": "openai",
|
|
93
|
-
"input_cost":
|
|
94
|
-
"
|
|
93
|
+
"input_cost": 2.0,
|
|
94
|
+
"cached_input_cost": 0.50,
|
|
95
|
+
"output_cost": 8.0,
|
|
95
96
|
"requests_per_minute": 20,
|
|
96
97
|
"tokens_per_minute": 100_000,
|
|
97
98
|
"reasoning_model": True,
|
|
@@ -106,6 +107,7 @@ OPENAI_MODELS = {
|
|
|
106
107
|
"supports_responses": True,
|
|
107
108
|
"api_spec": "openai",
|
|
108
109
|
"input_cost": 1.1,
|
|
110
|
+
"cached_input_cost": 0.275,
|
|
109
111
|
"output_cost": 4.4,
|
|
110
112
|
"requests_per_minute": 20,
|
|
111
113
|
"tokens_per_minute": 100_000,
|
|
@@ -121,6 +123,7 @@ OPENAI_MODELS = {
|
|
|
121
123
|
"supports_responses": True,
|
|
122
124
|
"api_spec": "openai",
|
|
123
125
|
"input_cost": 2.0,
|
|
126
|
+
"cached_input_cost": 0.50,
|
|
124
127
|
"output_cost": 8.0,
|
|
125
128
|
"requests_per_minute": 20,
|
|
126
129
|
"tokens_per_minute": 100_000,
|
|
@@ -136,6 +139,7 @@ OPENAI_MODELS = {
|
|
|
136
139
|
"supports_responses": True,
|
|
137
140
|
"api_spec": "openai",
|
|
138
141
|
"input_cost": 0.4,
|
|
142
|
+
"cached_input_cost": 0.10,
|
|
139
143
|
"output_cost": 1.6,
|
|
140
144
|
"requests_per_minute": 20,
|
|
141
145
|
"tokens_per_minute": 100_000,
|
|
@@ -151,6 +155,7 @@ OPENAI_MODELS = {
|
|
|
151
155
|
"supports_responses": True,
|
|
152
156
|
"api_spec": "openai",
|
|
153
157
|
"input_cost": 0.1,
|
|
158
|
+
"cached_input_cost": 0.025,
|
|
154
159
|
"output_cost": 0.4,
|
|
155
160
|
"requests_per_minute": 20,
|
|
156
161
|
"tokens_per_minute": 100_000,
|
|
@@ -181,6 +186,7 @@ OPENAI_MODELS = {
|
|
|
181
186
|
"supports_responses": True,
|
|
182
187
|
"api_spec": "openai",
|
|
183
188
|
"input_cost": 1.1,
|
|
189
|
+
"cached_input_cost": 0.55,
|
|
184
190
|
"output_cost": 4.4,
|
|
185
191
|
"requests_per_minute": 20,
|
|
186
192
|
"tokens_per_minute": 100_000,
|
|
@@ -196,6 +202,7 @@ OPENAI_MODELS = {
|
|
|
196
202
|
"supports_responses": True,
|
|
197
203
|
"api_spec": "openai",
|
|
198
204
|
"input_cost": 15.0,
|
|
205
|
+
"cached_input_cost": 7.50,
|
|
199
206
|
"output_cost": 60.0,
|
|
200
207
|
"requests_per_minute": 20,
|
|
201
208
|
"tokens_per_minute": 100_000,
|
|
@@ -225,8 +232,9 @@ OPENAI_MODELS = {
|
|
|
225
232
|
"supports_logprobs": True,
|
|
226
233
|
"supports_responses": True,
|
|
227
234
|
"api_spec": "openai",
|
|
228
|
-
"input_cost":
|
|
229
|
-
"
|
|
235
|
+
"input_cost": 1.1,
|
|
236
|
+
"cached_input_cost": 0.55,
|
|
237
|
+
"output_cost": 4.4,
|
|
230
238
|
"requests_per_minute": 20,
|
|
231
239
|
"tokens_per_minute": 100_000,
|
|
232
240
|
"reasoning_model": True,
|
|
@@ -240,8 +248,9 @@ OPENAI_MODELS = {
|
|
|
240
248
|
"supports_logprobs": True,
|
|
241
249
|
"supports_responses": True,
|
|
242
250
|
"api_spec": "openai",
|
|
243
|
-
"input_cost":
|
|
244
|
-
"
|
|
251
|
+
"input_cost": 2.50,
|
|
252
|
+
"cached_input_cost": 1.25,
|
|
253
|
+
"output_cost": 10.0,
|
|
245
254
|
"requests_per_minute": 10_000,
|
|
246
255
|
"tokens_per_minute": 30_000_000,
|
|
247
256
|
},
|
|
@@ -255,6 +264,7 @@ OPENAI_MODELS = {
|
|
|
255
264
|
"supports_responses": True,
|
|
256
265
|
"api_spec": "openai",
|
|
257
266
|
"input_cost": 0.15,
|
|
267
|
+
"cached_input_cost": 0.075,
|
|
258
268
|
"output_cost": 0.6,
|
|
259
269
|
"requests_per_minute": 60_000,
|
|
260
270
|
"tokens_per_minute": 250_000_000,
|
|
@@ -13,7 +13,6 @@ from rich.progress import (
|
|
|
13
13
|
TaskID,
|
|
14
14
|
TextColumn,
|
|
15
15
|
)
|
|
16
|
-
from rich.text import Text
|
|
17
16
|
from tqdm.auto import tqdm
|
|
18
17
|
|
|
19
18
|
SECONDS_TO_PAUSE_AFTER_RATE_LIMIT_ERROR = 5
|
|
@@ -24,6 +23,7 @@ class StatusTracker:
|
|
|
24
23
|
max_requests_per_minute: int
|
|
25
24
|
max_tokens_per_minute: int
|
|
26
25
|
max_concurrent_requests: int
|
|
26
|
+
client_name: str = "LLMClient"
|
|
27
27
|
num_tasks_started: int = 0
|
|
28
28
|
num_tasks_in_progress: int = 0
|
|
29
29
|
num_tasks_succeeded: int = 0
|
|
@@ -33,6 +33,13 @@ class StatusTracker:
|
|
|
33
33
|
total_requests: int = 0
|
|
34
34
|
retry_queue: asyncio.Queue = field(default_factory=asyncio.Queue)
|
|
35
35
|
|
|
36
|
+
# Cumulative usage tracking
|
|
37
|
+
total_cost: float = 0.0
|
|
38
|
+
total_input_tokens: int = 0 # non-cached input tokens
|
|
39
|
+
total_cache_read_tokens: int = 0
|
|
40
|
+
total_cache_write_tokens: int = 0
|
|
41
|
+
total_output_tokens: int = 0
|
|
42
|
+
|
|
36
43
|
# Progress bar configuration
|
|
37
44
|
use_progress_bar: bool = True
|
|
38
45
|
progress_bar_total: int | None = None
|
|
@@ -131,6 +138,25 @@ class StatusTracker:
|
|
|
131
138
|
self.num_tasks_in_progress -= 1
|
|
132
139
|
self.num_tasks_failed += 1
|
|
133
140
|
|
|
141
|
+
def track_usage(self, response):
|
|
142
|
+
"""Accumulate usage statistics from a completed request.
|
|
143
|
+
|
|
144
|
+
Args:
|
|
145
|
+
response: APIResponse object containing usage and cost information
|
|
146
|
+
"""
|
|
147
|
+
if response.cost:
|
|
148
|
+
self.total_cost += response.cost
|
|
149
|
+
|
|
150
|
+
if response.usage:
|
|
151
|
+
self.total_output_tokens += response.usage.output_tokens
|
|
152
|
+
self.total_input_tokens += response.usage.input_tokens
|
|
153
|
+
|
|
154
|
+
if response.usage.cache_read_tokens:
|
|
155
|
+
self.total_cache_read_tokens += response.usage.cache_read_tokens
|
|
156
|
+
|
|
157
|
+
if response.usage.cache_write_tokens:
|
|
158
|
+
self.total_cache_write_tokens += response.usage.cache_write_tokens
|
|
159
|
+
|
|
134
160
|
def log_final_status(self):
|
|
135
161
|
# Close progress bar before printing final status
|
|
136
162
|
self.close_progress_bar()
|
|
@@ -144,6 +170,22 @@ class StatusTracker:
|
|
|
144
170
|
f"{self.num_rate_limit_errors} rate limit errors received. Consider running at a lower rate."
|
|
145
171
|
)
|
|
146
172
|
|
|
173
|
+
# Display cumulative usage stats if available
|
|
174
|
+
if self.total_cost > 0 or self.total_input_tokens > 0 or self.total_output_tokens > 0:
|
|
175
|
+
usage_parts = []
|
|
176
|
+
if self.total_cost > 0:
|
|
177
|
+
usage_parts.append(f"Cost: ${self.total_cost:.4f}")
|
|
178
|
+
if self.total_input_tokens > 0 or self.total_output_tokens > 0:
|
|
179
|
+
usage_parts.append(
|
|
180
|
+
f"Tokens: {self.total_input_tokens:,} in / {self.total_output_tokens:,} out"
|
|
181
|
+
)
|
|
182
|
+
if self.total_cache_read_tokens > 0:
|
|
183
|
+
usage_parts.append(f"Cache: {self.total_cache_read_tokens:,} read")
|
|
184
|
+
if self.total_cache_write_tokens > 0:
|
|
185
|
+
usage_parts.append(f"{self.total_cache_write_tokens:,} write")
|
|
186
|
+
|
|
187
|
+
print(" | ".join(usage_parts))
|
|
188
|
+
|
|
147
189
|
@property
|
|
148
190
|
def pbar(self) -> tqdm | None:
|
|
149
191
|
"""Backward compatibility property to access progress bar."""
|
|
@@ -187,14 +229,16 @@ class StatusTracker:
|
|
|
187
229
|
|
|
188
230
|
def _init_rich_display(self, total: int):
|
|
189
231
|
"""Initialize Rich display components."""
|
|
190
|
-
self._rich_console = Console()
|
|
232
|
+
self._rich_console = Console(highlight=False)
|
|
233
|
+
# Escape square brackets so Rich doesn't interpret them as markup
|
|
234
|
+
description = f"[bold blue]\\[{self.client_name}][/bold blue] Processing..."
|
|
191
235
|
self._rich_progress = Progress(
|
|
192
236
|
SpinnerColumn(),
|
|
193
|
-
TextColumn("
|
|
237
|
+
TextColumn("[progress.description]{task.description}"),
|
|
194
238
|
BarColumn(),
|
|
195
239
|
MofNCompleteColumn(),
|
|
196
240
|
)
|
|
197
|
-
self._rich_task_id = self._rich_progress.add_task(
|
|
241
|
+
self._rich_task_id = self._rich_progress.add_task(description, total=total)
|
|
198
242
|
self._rich_stop_event = asyncio.Event()
|
|
199
243
|
self._rich_display_task = asyncio.create_task(self._rich_display_updater())
|
|
200
244
|
|
|
@@ -217,12 +261,36 @@ class StatusTracker:
|
|
|
217
261
|
total=self.progress_bar_total,
|
|
218
262
|
)
|
|
219
263
|
|
|
220
|
-
tokens_info = f"
|
|
221
|
-
reqs_info = f"
|
|
222
|
-
in_progress =
|
|
223
|
-
|
|
264
|
+
tokens_info = f"{self.available_token_capacity / 1000:.1f}k/{self.max_tokens_per_minute / 1000:.1f}k TPM"
|
|
265
|
+
reqs_info = f"{int(self.available_request_capacity)}/{self.max_requests_per_minute} RPM"
|
|
266
|
+
in_progress = (
|
|
267
|
+
f" [gold3]In Progress:[/gold3] {int(self.num_tasks_in_progress)} "
|
|
268
|
+
+ ("requests" if self.num_tasks_in_progress != 1 else "request")
|
|
269
|
+
)
|
|
270
|
+
capacity_text = (
|
|
271
|
+
f" [gold3]Capacity:[/gold3] {tokens_info} • {reqs_info}"
|
|
272
|
+
)
|
|
224
273
|
|
|
225
|
-
|
|
274
|
+
# Format usage stats
|
|
275
|
+
usage_parts = []
|
|
276
|
+
if self.total_cost > 0:
|
|
277
|
+
usage_parts.append(f"${self.total_cost:.4f}")
|
|
278
|
+
if self.total_input_tokens > 0 or self.total_output_tokens > 0:
|
|
279
|
+
input_k = self.total_input_tokens / 1000
|
|
280
|
+
output_k = self.total_output_tokens / 1000
|
|
281
|
+
usage_parts.append(f"{input_k:.1f}k in • {output_k:.1f}k out")
|
|
282
|
+
if self.total_cache_read_tokens > 0:
|
|
283
|
+
cache_k = self.total_cache_read_tokens / 1000
|
|
284
|
+
usage_parts.append(f"{cache_k:.1f}k cached")
|
|
285
|
+
|
|
286
|
+
usage_text = ""
|
|
287
|
+
if usage_parts:
|
|
288
|
+
usage_text = f" [gold3]Usage:[/gold3] {' • '.join(usage_parts)}"
|
|
289
|
+
|
|
290
|
+
if usage_text:
|
|
291
|
+
display = Group(self._rich_progress, in_progress, capacity_text, usage_text)
|
|
292
|
+
else:
|
|
293
|
+
display = Group(self._rich_progress, in_progress, capacity_text)
|
|
226
294
|
live.update(display)
|
|
227
295
|
|
|
228
296
|
await asyncio.sleep(0.1)
|
|
@@ -252,7 +320,7 @@ class StatusTracker:
|
|
|
252
320
|
return
|
|
253
321
|
while not self._manual_stop_event.is_set():
|
|
254
322
|
print(
|
|
255
|
-
f"Completed {self.num_tasks_succeeded}/{self.progress_bar_total} requests"
|
|
323
|
+
f"[{self.client_name}] Completed {self.num_tasks_succeeded}/{self.progress_bar_total} requests"
|
|
256
324
|
)
|
|
257
325
|
await asyncio.sleep(self.progress_print_interval)
|
|
258
326
|
|
|
@@ -1,5 +1,4 @@
|
|
|
1
1
|
from dataclasses import dataclass
|
|
2
|
-
from typing import Optional
|
|
3
2
|
|
|
4
3
|
|
|
5
4
|
@dataclass
|
|
@@ -13,8 +12,8 @@ class Usage:
|
|
|
13
12
|
|
|
14
13
|
input_tokens: int = 0
|
|
15
14
|
output_tokens: int = 0
|
|
16
|
-
cache_read_tokens:
|
|
17
|
-
cache_write_tokens:
|
|
15
|
+
cache_read_tokens: int = 0
|
|
16
|
+
cache_write_tokens: int = 0
|
|
18
17
|
|
|
19
18
|
@property
|
|
20
19
|
def total_input_tokens(self) -> int:
|
|
@@ -47,18 +46,29 @@ class Usage:
|
|
|
47
46
|
return cls(
|
|
48
47
|
input_tokens=usage_data.get("input_tokens", 0),
|
|
49
48
|
output_tokens=usage_data.get("output_tokens", 0),
|
|
50
|
-
cache_read_tokens=usage_data.get("cache_read_input_tokens"),
|
|
51
|
-
cache_write_tokens=usage_data.get("cache_creation_input_tokens"),
|
|
49
|
+
cache_read_tokens=usage_data.get("cache_read_input_tokens", 0),
|
|
50
|
+
cache_write_tokens=usage_data.get("cache_creation_input_tokens", 0),
|
|
52
51
|
)
|
|
53
52
|
|
|
54
53
|
@classmethod
|
|
55
54
|
def from_openai_usage(cls, usage_data: dict) -> "Usage":
|
|
56
|
-
"""Create Usage from OpenAI API response usage data.
|
|
55
|
+
"""Create Usage from OpenAI API response usage data.
|
|
56
|
+
|
|
57
|
+
OpenAI supports prompt caching - cached tokens appear in prompt_tokens_details.cached_tokens.
|
|
58
|
+
Caching is automatic for prompts over 1024 tokens.
|
|
59
|
+
"""
|
|
60
|
+
prompt_tokens_details = usage_data.get("prompt_tokens_details", {})
|
|
61
|
+
cached_tokens = (
|
|
62
|
+
prompt_tokens_details.get("cached_tokens", 0)
|
|
63
|
+
if prompt_tokens_details
|
|
64
|
+
else 0
|
|
65
|
+
)
|
|
66
|
+
|
|
57
67
|
return cls(
|
|
58
68
|
input_tokens=usage_data.get("prompt_tokens", 0),
|
|
59
69
|
output_tokens=usage_data.get("completion_tokens", 0),
|
|
60
|
-
cache_read_tokens=
|
|
61
|
-
cache_write_tokens=
|
|
70
|
+
cache_read_tokens=cached_tokens if cached_tokens > 0 else 0,
|
|
71
|
+
cache_write_tokens=0, # OpenAI doesn't charge separately for cache writes
|
|
62
72
|
)
|
|
63
73
|
|
|
64
74
|
@classmethod
|
|
@@ -67,18 +77,23 @@ class Usage:
|
|
|
67
77
|
return cls(
|
|
68
78
|
input_tokens=usage_data.get("prompt_tokens", 0),
|
|
69
79
|
output_tokens=usage_data.get("completion_tokens", 0),
|
|
70
|
-
cache_read_tokens=
|
|
71
|
-
cache_write_tokens=
|
|
80
|
+
cache_read_tokens=0, # Mistral doesn't support caching
|
|
81
|
+
cache_write_tokens=0,
|
|
72
82
|
)
|
|
73
83
|
|
|
74
84
|
@classmethod
|
|
75
85
|
def from_gemini_usage(cls, usage_data: dict) -> "Usage":
|
|
76
|
-
"""Create Usage from Gemini API response usage data.
|
|
86
|
+
"""Create Usage from Gemini API response usage data.
|
|
87
|
+
|
|
88
|
+
Gemini supports context caching - cached tokens appear in cachedContentTokenCount.
|
|
89
|
+
"""
|
|
90
|
+
cached_tokens = usage_data.get("cachedContentTokenCount", 0)
|
|
91
|
+
|
|
77
92
|
return cls(
|
|
78
93
|
input_tokens=usage_data.get("promptTokenCount", 0),
|
|
79
94
|
output_tokens=usage_data.get("candidatesTokenCount", 0),
|
|
80
|
-
cache_read_tokens=
|
|
81
|
-
cache_write_tokens=
|
|
95
|
+
cache_read_tokens=cached_tokens if cached_tokens > 0 else 0,
|
|
96
|
+
cache_write_tokens=0, # Gemini doesn't charge separately for cache writes
|
|
82
97
|
)
|
|
83
98
|
|
|
84
99
|
def to_dict(self) -> dict:
|
|
@@ -100,8 +115,8 @@ class Usage:
|
|
|
100
115
|
return cls(
|
|
101
116
|
input_tokens=data.get("input_tokens", 0),
|
|
102
117
|
output_tokens=data.get("output_tokens", 0),
|
|
103
|
-
cache_read_tokens=data.get("cache_read_tokens"),
|
|
104
|
-
cache_write_tokens=data.get("cache_write_tokens"),
|
|
118
|
+
cache_read_tokens=data.get("cache_read_tokens", 0),
|
|
119
|
+
cache_write_tokens=data.get("cache_write_tokens", 0),
|
|
105
120
|
)
|
|
106
121
|
|
|
107
122
|
def __add__(self, other: "Usage") -> "Usage":
|
|
@@ -111,14 +126,8 @@ class Usage:
|
|
|
111
126
|
output_tokens=self.output_tokens + other.output_tokens,
|
|
112
127
|
cache_read_tokens=(
|
|
113
128
|
(self.cache_read_tokens or 0) + (other.cache_read_tokens or 0)
|
|
114
|
-
if self.cache_read_tokens is not None
|
|
115
|
-
or other.cache_read_tokens is not None
|
|
116
|
-
else None
|
|
117
129
|
),
|
|
118
130
|
cache_write_tokens=(
|
|
119
131
|
(self.cache_write_tokens or 0) + (other.cache_write_tokens or 0)
|
|
120
|
-
if self.cache_write_tokens is not None
|
|
121
|
-
or other.cache_write_tokens is not None
|
|
122
|
-
else None
|
|
123
132
|
),
|
|
124
133
|
)
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
{lm_deluge-0.0.54 → lm_deluge-0.0.56}/src/lm_deluge/built_in_tools/anthropic/computer_use.py
RENAMED
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|