lm-deluge 0.0.54__tar.gz → 0.0.55__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of lm-deluge might be problematic. Click here for more details.

Files changed (80) hide show
  1. {lm_deluge-0.0.54/src/lm_deluge.egg-info → lm_deluge-0.0.55}/PKG-INFO +1 -1
  2. {lm_deluge-0.0.54 → lm_deluge-0.0.55}/pyproject.toml +1 -1
  3. {lm_deluge-0.0.54 → lm_deluge-0.0.55}/src/lm_deluge/api_requests/base.py +6 -0
  4. {lm_deluge-0.0.54 → lm_deluge-0.0.55}/src/lm_deluge/api_requests/response.py +28 -1
  5. {lm_deluge-0.0.54 → lm_deluge-0.0.55}/src/lm_deluge/client.py +13 -0
  6. {lm_deluge-0.0.54 → lm_deluge-0.0.55}/src/lm_deluge/models/__init__.py +4 -1
  7. {lm_deluge-0.0.54 → lm_deluge-0.0.55}/src/lm_deluge/models/anthropic.py +20 -2
  8. {lm_deluge-0.0.54 → lm_deluge-0.0.55}/src/lm_deluge/models/google.py +20 -12
  9. {lm_deluge-0.0.54 → lm_deluge-0.0.55}/src/lm_deluge/models/openai.py +18 -8
  10. {lm_deluge-0.0.54 → lm_deluge-0.0.55}/src/lm_deluge/tracker.py +17 -10
  11. {lm_deluge-0.0.54 → lm_deluge-0.0.55}/src/lm_deluge/usage.py +30 -21
  12. {lm_deluge-0.0.54 → lm_deluge-0.0.55/src/lm_deluge.egg-info}/PKG-INFO +1 -1
  13. {lm_deluge-0.0.54 → lm_deluge-0.0.55}/LICENSE +0 -0
  14. {lm_deluge-0.0.54 → lm_deluge-0.0.55}/README.md +0 -0
  15. {lm_deluge-0.0.54 → lm_deluge-0.0.55}/setup.cfg +0 -0
  16. {lm_deluge-0.0.54 → lm_deluge-0.0.55}/src/lm_deluge/__init__.py +0 -0
  17. {lm_deluge-0.0.54 → lm_deluge-0.0.55}/src/lm_deluge/agent.py +0 -0
  18. {lm_deluge-0.0.54 → lm_deluge-0.0.55}/src/lm_deluge/api_requests/__init__.py +0 -0
  19. {lm_deluge-0.0.54 → lm_deluge-0.0.55}/src/lm_deluge/api_requests/anthropic.py +0 -0
  20. {lm_deluge-0.0.54 → lm_deluge-0.0.55}/src/lm_deluge/api_requests/bedrock.py +0 -0
  21. {lm_deluge-0.0.54 → lm_deluge-0.0.55}/src/lm_deluge/api_requests/common.py +0 -0
  22. {lm_deluge-0.0.54 → lm_deluge-0.0.55}/src/lm_deluge/api_requests/deprecated/bedrock.py +0 -0
  23. {lm_deluge-0.0.54 → lm_deluge-0.0.55}/src/lm_deluge/api_requests/deprecated/cohere.py +0 -0
  24. {lm_deluge-0.0.54 → lm_deluge-0.0.55}/src/lm_deluge/api_requests/deprecated/deepseek.py +0 -0
  25. {lm_deluge-0.0.54 → lm_deluge-0.0.55}/src/lm_deluge/api_requests/deprecated/mistral.py +0 -0
  26. {lm_deluge-0.0.54 → lm_deluge-0.0.55}/src/lm_deluge/api_requests/deprecated/vertex.py +0 -0
  27. {lm_deluge-0.0.54 → lm_deluge-0.0.55}/src/lm_deluge/api_requests/gemini.py +0 -0
  28. {lm_deluge-0.0.54 → lm_deluge-0.0.55}/src/lm_deluge/api_requests/mistral.py +0 -0
  29. {lm_deluge-0.0.54 → lm_deluge-0.0.55}/src/lm_deluge/api_requests/openai.py +0 -0
  30. {lm_deluge-0.0.54 → lm_deluge-0.0.55}/src/lm_deluge/batches.py +0 -0
  31. {lm_deluge-0.0.54 → lm_deluge-0.0.55}/src/lm_deluge/built_in_tools/anthropic/__init__.py +0 -0
  32. {lm_deluge-0.0.54 → lm_deluge-0.0.55}/src/lm_deluge/built_in_tools/anthropic/bash.py +0 -0
  33. {lm_deluge-0.0.54 → lm_deluge-0.0.55}/src/lm_deluge/built_in_tools/anthropic/computer_use.py +0 -0
  34. {lm_deluge-0.0.54 → lm_deluge-0.0.55}/src/lm_deluge/built_in_tools/anthropic/editor.py +0 -0
  35. {lm_deluge-0.0.54 → lm_deluge-0.0.55}/src/lm_deluge/built_in_tools/base.py +0 -0
  36. {lm_deluge-0.0.54 → lm_deluge-0.0.55}/src/lm_deluge/built_in_tools/openai.py +0 -0
  37. {lm_deluge-0.0.54 → lm_deluge-0.0.55}/src/lm_deluge/cache.py +0 -0
  38. {lm_deluge-0.0.54 → lm_deluge-0.0.55}/src/lm_deluge/cli.py +0 -0
  39. {lm_deluge-0.0.54 → lm_deluge-0.0.55}/src/lm_deluge/config.py +0 -0
  40. {lm_deluge-0.0.54 → lm_deluge-0.0.55}/src/lm_deluge/embed.py +0 -0
  41. {lm_deluge-0.0.54 → lm_deluge-0.0.55}/src/lm_deluge/errors.py +0 -0
  42. {lm_deluge-0.0.54 → lm_deluge-0.0.55}/src/lm_deluge/file.py +0 -0
  43. {lm_deluge-0.0.54 → lm_deluge-0.0.55}/src/lm_deluge/gemini_limits.py +0 -0
  44. {lm_deluge-0.0.54 → lm_deluge-0.0.55}/src/lm_deluge/image.py +0 -0
  45. {lm_deluge-0.0.54 → lm_deluge-0.0.55}/src/lm_deluge/llm_tools/__init__.py +0 -0
  46. {lm_deluge-0.0.54 → lm_deluge-0.0.55}/src/lm_deluge/llm_tools/classify.py +0 -0
  47. {lm_deluge-0.0.54 → lm_deluge-0.0.55}/src/lm_deluge/llm_tools/extract.py +0 -0
  48. {lm_deluge-0.0.54 → lm_deluge-0.0.55}/src/lm_deluge/llm_tools/locate.py +0 -0
  49. {lm_deluge-0.0.54 → lm_deluge-0.0.55}/src/lm_deluge/llm_tools/ocr.py +0 -0
  50. {lm_deluge-0.0.54 → lm_deluge-0.0.55}/src/lm_deluge/llm_tools/score.py +0 -0
  51. {lm_deluge-0.0.54 → lm_deluge-0.0.55}/src/lm_deluge/llm_tools/translate.py +0 -0
  52. {lm_deluge-0.0.54 → lm_deluge-0.0.55}/src/lm_deluge/models/bedrock.py +0 -0
  53. {lm_deluge-0.0.54 → lm_deluge-0.0.55}/src/lm_deluge/models/cerebras.py +0 -0
  54. {lm_deluge-0.0.54 → lm_deluge-0.0.55}/src/lm_deluge/models/cohere.py +0 -0
  55. {lm_deluge-0.0.54 → lm_deluge-0.0.55}/src/lm_deluge/models/deepseek.py +0 -0
  56. {lm_deluge-0.0.54 → lm_deluge-0.0.55}/src/lm_deluge/models/fireworks.py +0 -0
  57. {lm_deluge-0.0.54 → lm_deluge-0.0.55}/src/lm_deluge/models/grok.py +0 -0
  58. {lm_deluge-0.0.54 → lm_deluge-0.0.55}/src/lm_deluge/models/groq.py +0 -0
  59. {lm_deluge-0.0.54 → lm_deluge-0.0.55}/src/lm_deluge/models/meta.py +0 -0
  60. {lm_deluge-0.0.54 → lm_deluge-0.0.55}/src/lm_deluge/models/mistral.py +0 -0
  61. {lm_deluge-0.0.54 → lm_deluge-0.0.55}/src/lm_deluge/models/openrouter.py +0 -0
  62. {lm_deluge-0.0.54 → lm_deluge-0.0.55}/src/lm_deluge/models/together.py +0 -0
  63. {lm_deluge-0.0.54 → lm_deluge-0.0.55}/src/lm_deluge/presets/cerebras.py +0 -0
  64. {lm_deluge-0.0.54 → lm_deluge-0.0.55}/src/lm_deluge/presets/meta.py +0 -0
  65. {lm_deluge-0.0.54 → lm_deluge-0.0.55}/src/lm_deluge/prompt.py +0 -0
  66. {lm_deluge-0.0.54 → lm_deluge-0.0.55}/src/lm_deluge/request_context.py +0 -0
  67. {lm_deluge-0.0.54 → lm_deluge-0.0.55}/src/lm_deluge/rerank.py +0 -0
  68. {lm_deluge-0.0.54 → lm_deluge-0.0.55}/src/lm_deluge/tool.py +0 -0
  69. {lm_deluge-0.0.54 → lm_deluge-0.0.55}/src/lm_deluge/util/harmony.py +0 -0
  70. {lm_deluge-0.0.54 → lm_deluge-0.0.55}/src/lm_deluge/util/json.py +0 -0
  71. {lm_deluge-0.0.54 → lm_deluge-0.0.55}/src/lm_deluge/util/logprobs.py +0 -0
  72. {lm_deluge-0.0.54 → lm_deluge-0.0.55}/src/lm_deluge/util/spatial.py +0 -0
  73. {lm_deluge-0.0.54 → lm_deluge-0.0.55}/src/lm_deluge/util/validation.py +0 -0
  74. {lm_deluge-0.0.54 → lm_deluge-0.0.55}/src/lm_deluge/util/xml.py +0 -0
  75. {lm_deluge-0.0.54 → lm_deluge-0.0.55}/src/lm_deluge.egg-info/SOURCES.txt +0 -0
  76. {lm_deluge-0.0.54 → lm_deluge-0.0.55}/src/lm_deluge.egg-info/dependency_links.txt +0 -0
  77. {lm_deluge-0.0.54 → lm_deluge-0.0.55}/src/lm_deluge.egg-info/requires.txt +0 -0
  78. {lm_deluge-0.0.54 → lm_deluge-0.0.55}/src/lm_deluge.egg-info/top_level.txt +0 -0
  79. {lm_deluge-0.0.54 → lm_deluge-0.0.55}/tests/test_builtin_tools.py +0 -0
  80. {lm_deluge-0.0.54 → lm_deluge-0.0.55}/tests/test_native_mcp_server.py +0 -0
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: lm_deluge
3
- Version: 0.0.54
3
+ Version: 0.0.55
4
4
  Summary: Python utility for using LLM API models.
5
5
  Author-email: Benjamin Anderson <ben@trytaylor.ai>
6
6
  Requires-Python: >=3.10
@@ -3,7 +3,7 @@ requires = ["setuptools", "wheel"]
3
3
 
4
4
  [project]
5
5
  name = "lm_deluge"
6
- version = "0.0.54"
6
+ version = "0.0.55"
7
7
  authors = [{ name = "Benjamin Anderson", email = "ben@trytaylor.ai" }]
8
8
  description = "Python utility for using LLM API models."
9
9
  readme = "README.md"
@@ -52,6 +52,9 @@ class APIRequestBase(ABC):
52
52
  self, base_headers: dict[str, str], exclude_patterns: list[str] | None = None
53
53
  ) -> dict[str, str]:
54
54
  """Merge extra_headers with base headers, giving priority to extra_headers."""
55
+ # Filter out None values from base headers (e.g., missing API keys)
56
+ base_headers = {k: v for k, v in base_headers.items() if v is not None}
57
+
55
58
  if not self.context.extra_headers:
56
59
  return base_headers
57
60
 
@@ -69,6 +72,9 @@ class APIRequestBase(ABC):
69
72
  # Start with base headers, then overlay filtered extra headers (extra takes precedence)
70
73
  merged = dict(base_headers)
71
74
  merged.update(filtered_extra)
75
+
76
+ # Filter out None values from final merged headers
77
+ merged = {k: v for k, v in merged.items() if v is not None}
72
78
  return merged
73
79
 
74
80
  def handle_success(self, data):
@@ -84,10 +84,37 @@ class APIResponse:
84
84
  and api_model.input_cost is not None
85
85
  and api_model.output_cost is not None
86
86
  ):
87
+ # Calculate input cost, accounting for cached vs non-cached tokens
88
+ # Different providers report tokens differently:
89
+ # - Anthropic/Bedrock: input_tokens is ONLY non-cached, cache_read_tokens is separate
90
+ # - OpenAI/Gemini: input_tokens INCLUDES cached, cache_read_tokens is a subset
91
+ cache_read_tokens = self.usage.cache_read_tokens or 0
92
+
93
+ if api_model.api_spec in ("anthropic", "bedrock"):
94
+ # For Anthropic: input_tokens already excludes cache, so use directly
95
+ non_cached_input_tokens = self.usage.input_tokens
96
+ else:
97
+ # For OpenAI/Gemini: input_tokens includes cache, so subtract it
98
+ non_cached_input_tokens = self.usage.input_tokens - cache_read_tokens
99
+
87
100
  self.cost = (
88
- self.usage.input_tokens * api_model.input_cost / 1e6
101
+ non_cached_input_tokens * api_model.input_cost / 1e6
89
102
  + self.usage.output_tokens * api_model.output_cost / 1e6
90
103
  )
104
+
105
+ # Add cost for cache read tokens (at reduced rate)
106
+ if cache_read_tokens > 0 and api_model.cached_input_cost is not None:
107
+ self.cost += cache_read_tokens * api_model.cached_input_cost / 1e6
108
+
109
+ # Add cost for cache write tokens (only for Anthropic)
110
+ if (
111
+ self.usage.cache_write_tokens
112
+ and self.usage.cache_write_tokens > 0
113
+ and api_model.cache_write_cost is not None
114
+ ):
115
+ self.cost += (
116
+ self.usage.cache_write_tokens * api_model.cache_write_cost / 1e6
117
+ )
91
118
  elif self.content is not None and self.completion is not None:
92
119
  pass
93
120
  # print(
@@ -30,6 +30,7 @@ class _LLMClient(BaseModel):
30
30
  """
31
31
 
32
32
  model_names: str | list[str] = ["gpt-4.1-mini"]
33
+ name: str | None = None
33
34
  max_requests_per_minute: int = 1_000
34
35
  max_tokens_per_minute: int = 100_000
35
36
  max_concurrent_requests: int = 225
@@ -69,6 +70,7 @@ class _LLMClient(BaseModel):
69
70
  max_requests_per_minute=self.max_requests_per_minute,
70
71
  max_tokens_per_minute=self.max_tokens_per_minute,
71
72
  max_concurrent_requests=self.max_concurrent_requests,
73
+ client_name=self.name or "LLMClient",
72
74
  progress_style=self.progress,
73
75
  use_progress_bar=show_progress,
74
76
  )
@@ -169,6 +171,13 @@ class _LLMClient(BaseModel):
169
171
  # normalize weights
170
172
  self.model_weights = [w / sum(self.model_weights) for w in self.model_weights]
171
173
 
174
+ # Auto-generate name if not provided
175
+ if self.name is None:
176
+ if len(self.model_names) == 1:
177
+ self.name = self.model_names[0]
178
+ else:
179
+ self.name = "LLMClient"
180
+
172
181
  # Validate logprobs settings across all sampling params
173
182
  if self.logprobs or any(sp.logprobs for sp in self.sampling_params):
174
183
  print("Logprobs enabled.")
@@ -725,6 +734,7 @@ class _LLMClient(BaseModel):
725
734
  def LLMClient(
726
735
  model_names: str,
727
736
  *,
737
+ name: str | None = None,
728
738
  max_requests_per_minute: int = 1_000,
729
739
  max_tokens_per_minute: int = 100_000,
730
740
  max_concurrent_requests: int = 225,
@@ -751,6 +761,7 @@ def LLMClient(
751
761
  def LLMClient(
752
762
  model_names: list[str],
753
763
  *,
764
+ name: str | None = None,
754
765
  max_requests_per_minute: int = 1_000,
755
766
  max_tokens_per_minute: int = 100_000,
756
767
  max_concurrent_requests: int = 225,
@@ -776,6 +787,7 @@ def LLMClient(
776
787
  def LLMClient(
777
788
  model_names: str | list[str] = "gpt-4.1-mini",
778
789
  *,
790
+ name: str | None = None,
779
791
  max_requests_per_minute: int = 1_000,
780
792
  max_tokens_per_minute: int = 100_000,
781
793
  max_concurrent_requests: int = 225,
@@ -813,6 +825,7 @@ def LLMClient(
813
825
  # Simply pass everything to the Pydantic constructor
814
826
  return _LLMClient(
815
827
  model_names=model_names,
828
+ name=name,
816
829
  max_requests_per_minute=max_requests_per_minute,
817
830
  max_tokens_per_minute=max_tokens_per_minute,
818
831
  max_concurrent_requests=max_concurrent_requests,
@@ -29,7 +29,8 @@ class APIModel:
29
29
  api_base: str
30
30
  api_key_env_var: str
31
31
  api_spec: str
32
- cached_input_cost: float | None = 0
32
+ cached_input_cost: float | None = 0 # $ per million cached/read input tokens
33
+ cache_write_cost: float | None = 0 # $ per million cache write tokens
33
34
  input_cost: float | None = 0 # $ per million input tokens
34
35
  output_cost: float | None = 0 # $ per million output tokens
35
36
  supports_json: bool = False
@@ -89,6 +90,7 @@ def register_model(
89
90
  api_spec: str = "openai",
90
91
  input_cost: float | None = 0, # $ per million input tokens
91
92
  cached_input_cost: float | None = 0,
93
+ cache_write_cost: float | None = 0, # $ per million cache write tokens
92
94
  output_cost: float | None = 0, # $ per million output tokens
93
95
  supports_json: bool = False,
94
96
  supports_logprobs: bool = False,
@@ -106,6 +108,7 @@ def register_model(
106
108
  api_key_env_var=api_key_env_var,
107
109
  api_spec=api_spec,
108
110
  cached_input_cost=cached_input_cost,
111
+ cache_write_cost=cache_write_cost,
109
112
  input_cost=input_cost,
110
113
  output_cost=output_cost,
111
114
  supports_json=supports_json,
@@ -18,6 +18,8 @@ ANTHROPIC_MODELS = {
18
18
  "supports_json": False,
19
19
  "api_spec": "anthropic",
20
20
  "input_cost": 3.0,
21
+ "cached_input_cost": 0.30,
22
+ "cache_write_cost": 3.75,
21
23
  "output_cost": 15.0,
22
24
  "requests_per_minute": 4_000,
23
25
  "tokens_per_minute": 400_000,
@@ -30,6 +32,8 @@ ANTHROPIC_MODELS = {
30
32
  "supports_json": False,
31
33
  "api_spec": "anthropic",
32
34
  "input_cost": 15.0,
35
+ "cached_input_cost": 1.50,
36
+ "cache_write_cost": 18.75,
33
37
  "output_cost": 75.0,
34
38
  "requests_per_minute": 4_000,
35
39
  "tokens_per_minute": 400_000,
@@ -43,6 +47,8 @@ ANTHROPIC_MODELS = {
43
47
  "supports_json": False,
44
48
  "api_spec": "anthropic",
45
49
  "input_cost": 15.0,
50
+ "cached_input_cost": 1.50,
51
+ "cache_write_cost": 18.75,
46
52
  "output_cost": 75.0,
47
53
  "requests_per_minute": 4_000,
48
54
  "tokens_per_minute": 400_000,
@@ -56,6 +62,8 @@ ANTHROPIC_MODELS = {
56
62
  "supports_json": False,
57
63
  "api_spec": "anthropic",
58
64
  "input_cost": 3.0,
65
+ "cached_input_cost": 0.30,
66
+ "cache_write_cost": 3.75,
59
67
  "output_cost": 15.0,
60
68
  "requests_per_minute": 4_000,
61
69
  "tokens_per_minute": 400_000,
@@ -68,6 +76,8 @@ ANTHROPIC_MODELS = {
68
76
  "supports_json": False,
69
77
  "api_spec": "anthropic",
70
78
  "input_cost": 3.0,
79
+ "cached_input_cost": 0.30,
80
+ "cache_write_cost": 3.75,
71
81
  "output_cost": 15.0,
72
82
  "requests_per_minute": 4_000,
73
83
  "tokens_per_minute": 400_000,
@@ -81,6 +91,8 @@ ANTHROPIC_MODELS = {
81
91
  "supports_json": False,
82
92
  "api_spec": "anthropic",
83
93
  "input_cost": 3.0,
94
+ "cached_input_cost": 0.30,
95
+ "cache_write_cost": 3.75,
84
96
  "output_cost": 15.0,
85
97
  "requests_per_minute": 4_000,
86
98
  "tokens_per_minute": 400_000,
@@ -93,6 +105,8 @@ ANTHROPIC_MODELS = {
93
105
  "supports_json": False,
94
106
  "api_spec": "anthropic",
95
107
  "input_cost": 3.0,
108
+ "cached_input_cost": 0.30,
109
+ "cache_write_cost": 3.75,
96
110
  "output_cost": 15.0,
97
111
  "requests_per_minute": 4_000,
98
112
  "tokens_per_minute": 400_000,
@@ -116,8 +130,10 @@ ANTHROPIC_MODELS = {
116
130
  "api_key_env_var": "ANTHROPIC_API_KEY",
117
131
  "supports_json": False,
118
132
  "api_spec": "anthropic",
119
- "input_cost": 1.00,
120
- "output_cost": 5.00,
133
+ "input_cost": 0.8,
134
+ "cached_input_cost": 0.08,
135
+ "cache_write_cost": 1.00,
136
+ "output_cost": 4.00,
121
137
  "requests_per_minute": 20_000,
122
138
  "tokens_per_minute": 4_000_000, # supposed to be this but they fucked up
123
139
  },
@@ -129,6 +145,8 @@ ANTHROPIC_MODELS = {
129
145
  "supports_json": False,
130
146
  "api_spec": "anthropic",
131
147
  "input_cost": 0.25,
148
+ "cache_write_cost": 0.30,
149
+ "cached_input_cost": 0.03,
132
150
  "output_cost": 1.25,
133
151
  "requests_per_minute": 10_000,
134
152
  "tokens_per_minute": 4_000_000, # supposed to be this but they fucked up
@@ -18,6 +18,7 @@ GOOGLE_MODELS = {
18
18
  "supports_logprobs": False,
19
19
  "api_spec": "openai",
20
20
  "input_cost": 0.1,
21
+ "cached_input_cost": 0.025,
21
22
  "output_cost": 0.4,
22
23
  "requests_per_minute": 20,
23
24
  "tokens_per_minute": 100_000,
@@ -31,8 +32,8 @@ GOOGLE_MODELS = {
31
32
  "supports_json": True,
32
33
  "supports_logprobs": False,
33
34
  "api_spec": "openai",
34
- "input_cost": 0.1,
35
- "output_cost": 0.4,
35
+ "input_cost": 0.075,
36
+ "output_cost": 0.3,
36
37
  "requests_per_minute": 20,
37
38
  "tokens_per_minute": 100_000,
38
39
  "reasoning_model": False,
@@ -45,8 +46,9 @@ GOOGLE_MODELS = {
45
46
  "supports_json": True,
46
47
  "supports_logprobs": False,
47
48
  "api_spec": "openai",
48
- "input_cost": 0.1,
49
- "output_cost": 0.4,
49
+ "input_cost": 1.25,
50
+ "cached_input_cost": 0.31,
51
+ "output_cost": 10.0,
50
52
  "requests_per_minute": 20,
51
53
  "tokens_per_minute": 100_000,
52
54
  "reasoning_model": True,
@@ -59,8 +61,9 @@ GOOGLE_MODELS = {
59
61
  "supports_json": True,
60
62
  "supports_logprobs": False,
61
63
  "api_spec": "openai",
62
- "input_cost": 0.1,
63
- "output_cost": 0.4,
64
+ "input_cost": 0.3,
65
+ "cached_input_cost": 0.075,
66
+ "output_cost": 2.5,
64
67
  "requests_per_minute": 20,
65
68
  "tokens_per_minute": 100_000,
66
69
  "reasoning_model": True,
@@ -74,6 +77,7 @@ GOOGLE_MODELS = {
74
77
  "supports_logprobs": False,
75
78
  "api_spec": "openai",
76
79
  "input_cost": 0.1,
80
+ "cached_input_cost": 0.025,
77
81
  "output_cost": 0.4,
78
82
  "requests_per_minute": 20,
79
83
  "tokens_per_minute": 100_000,
@@ -89,6 +93,7 @@ GOOGLE_MODELS = {
89
93
  "supports_logprobs": False,
90
94
  "api_spec": "gemini",
91
95
  "input_cost": 0.1,
96
+ "cached_input_cost": 0.025,
92
97
  "output_cost": 0.4,
93
98
  "requests_per_minute": 20,
94
99
  "tokens_per_minute": 100_000,
@@ -102,8 +107,8 @@ GOOGLE_MODELS = {
102
107
  "supports_json": True,
103
108
  "supports_logprobs": False,
104
109
  "api_spec": "gemini",
105
- "input_cost": 0.1,
106
- "output_cost": 0.4,
110
+ "input_cost": 0.075,
111
+ "output_cost": 0.3,
107
112
  "requests_per_minute": 20,
108
113
  "tokens_per_minute": 100_000,
109
114
  "reasoning_model": False,
@@ -116,8 +121,9 @@ GOOGLE_MODELS = {
116
121
  "supports_json": True,
117
122
  "supports_logprobs": False,
118
123
  "api_spec": "gemini",
119
- "input_cost": 0.1,
120
- "output_cost": 0.4,
124
+ "input_cost": 1.25,
125
+ "cached_input_cost": 0.31,
126
+ "output_cost": 10.0,
121
127
  "requests_per_minute": 20,
122
128
  "tokens_per_minute": 100_000,
123
129
  "reasoning_model": True,
@@ -130,8 +136,9 @@ GOOGLE_MODELS = {
130
136
  "supports_json": True,
131
137
  "supports_logprobs": False,
132
138
  "api_spec": "gemini",
133
- "input_cost": 0.1,
134
- "output_cost": 0.4,
139
+ "input_cost": 0.3,
140
+ "cached_input_cost": 0.075,
141
+ "output_cost": 2.5,
135
142
  "requests_per_minute": 20,
136
143
  "tokens_per_minute": 100_000,
137
144
  "reasoning_model": True,
@@ -145,6 +152,7 @@ GOOGLE_MODELS = {
145
152
  "supports_logprobs": False,
146
153
  "api_spec": "gemini",
147
154
  "input_cost": 0.1,
155
+ "cached_input_cost": 0.025,
148
156
  "output_cost": 0.4,
149
157
  "requests_per_minute": 20,
150
158
  "tokens_per_minute": 100_000,
@@ -75,8 +75,8 @@ OPENAI_MODELS = {
75
75
  "supports_logprobs": False,
76
76
  "supports_responses": True,
77
77
  "api_spec": "openai",
78
- "input_cost": 2.0,
79
- "output_cost": 8.0,
78
+ "input_cost": 3.0,
79
+ "output_cost": 12.0,
80
80
  "requests_per_minute": 20,
81
81
  "tokens_per_minute": 100_000,
82
82
  "reasoning_model": False,
@@ -90,8 +90,9 @@ OPENAI_MODELS = {
90
90
  "supports_logprobs": True,
91
91
  "supports_responses": True,
92
92
  "api_spec": "openai",
93
- "input_cost": 10.0,
94
- "output_cost": 40.0,
93
+ "input_cost": 2.0,
94
+ "cached_input_cost": 0.50,
95
+ "output_cost": 8.0,
95
96
  "requests_per_minute": 20,
96
97
  "tokens_per_minute": 100_000,
97
98
  "reasoning_model": True,
@@ -106,6 +107,7 @@ OPENAI_MODELS = {
106
107
  "supports_responses": True,
107
108
  "api_spec": "openai",
108
109
  "input_cost": 1.1,
110
+ "cached_input_cost": 0.275,
109
111
  "output_cost": 4.4,
110
112
  "requests_per_minute": 20,
111
113
  "tokens_per_minute": 100_000,
@@ -121,6 +123,7 @@ OPENAI_MODELS = {
121
123
  "supports_responses": True,
122
124
  "api_spec": "openai",
123
125
  "input_cost": 2.0,
126
+ "cached_input_cost": 0.50,
124
127
  "output_cost": 8.0,
125
128
  "requests_per_minute": 20,
126
129
  "tokens_per_minute": 100_000,
@@ -136,6 +139,7 @@ OPENAI_MODELS = {
136
139
  "supports_responses": True,
137
140
  "api_spec": "openai",
138
141
  "input_cost": 0.4,
142
+ "cached_input_cost": 0.10,
139
143
  "output_cost": 1.6,
140
144
  "requests_per_minute": 20,
141
145
  "tokens_per_minute": 100_000,
@@ -151,6 +155,7 @@ OPENAI_MODELS = {
151
155
  "supports_responses": True,
152
156
  "api_spec": "openai",
153
157
  "input_cost": 0.1,
158
+ "cached_input_cost": 0.025,
154
159
  "output_cost": 0.4,
155
160
  "requests_per_minute": 20,
156
161
  "tokens_per_minute": 100_000,
@@ -181,6 +186,7 @@ OPENAI_MODELS = {
181
186
  "supports_responses": True,
182
187
  "api_spec": "openai",
183
188
  "input_cost": 1.1,
189
+ "cached_input_cost": 0.55,
184
190
  "output_cost": 4.4,
185
191
  "requests_per_minute": 20,
186
192
  "tokens_per_minute": 100_000,
@@ -196,6 +202,7 @@ OPENAI_MODELS = {
196
202
  "supports_responses": True,
197
203
  "api_spec": "openai",
198
204
  "input_cost": 15.0,
205
+ "cached_input_cost": 7.50,
199
206
  "output_cost": 60.0,
200
207
  "requests_per_minute": 20,
201
208
  "tokens_per_minute": 100_000,
@@ -225,8 +232,9 @@ OPENAI_MODELS = {
225
232
  "supports_logprobs": True,
226
233
  "supports_responses": True,
227
234
  "api_spec": "openai",
228
- "input_cost": 3.0,
229
- "output_cost": 15.0,
235
+ "input_cost": 1.1,
236
+ "cached_input_cost": 0.55,
237
+ "output_cost": 4.4,
230
238
  "requests_per_minute": 20,
231
239
  "tokens_per_minute": 100_000,
232
240
  "reasoning_model": True,
@@ -240,8 +248,9 @@ OPENAI_MODELS = {
240
248
  "supports_logprobs": True,
241
249
  "supports_responses": True,
242
250
  "api_spec": "openai",
243
- "input_cost": 5.0,
244
- "output_cost": 15.0,
251
+ "input_cost": 2.50,
252
+ "cached_input_cost": 1.25,
253
+ "output_cost": 10.0,
245
254
  "requests_per_minute": 10_000,
246
255
  "tokens_per_minute": 30_000_000,
247
256
  },
@@ -255,6 +264,7 @@ OPENAI_MODELS = {
255
264
  "supports_responses": True,
256
265
  "api_spec": "openai",
257
266
  "input_cost": 0.15,
267
+ "cached_input_cost": 0.075,
258
268
  "output_cost": 0.6,
259
269
  "requests_per_minute": 60_000,
260
270
  "tokens_per_minute": 250_000_000,
@@ -13,7 +13,6 @@ from rich.progress import (
13
13
  TaskID,
14
14
  TextColumn,
15
15
  )
16
- from rich.text import Text
17
16
  from tqdm.auto import tqdm
18
17
 
19
18
  SECONDS_TO_PAUSE_AFTER_RATE_LIMIT_ERROR = 5
@@ -24,6 +23,7 @@ class StatusTracker:
24
23
  max_requests_per_minute: int
25
24
  max_tokens_per_minute: int
26
25
  max_concurrent_requests: int
26
+ client_name: str = "LLMClient"
27
27
  num_tasks_started: int = 0
28
28
  num_tasks_in_progress: int = 0
29
29
  num_tasks_succeeded: int = 0
@@ -187,14 +187,16 @@ class StatusTracker:
187
187
 
188
188
  def _init_rich_display(self, total: int):
189
189
  """Initialize Rich display components."""
190
- self._rich_console = Console()
190
+ self._rich_console = Console(highlight=False)
191
+ # Escape square brackets so Rich doesn't interpret them as markup
192
+ description = f"[bold blue]\\[{self.client_name}][/bold blue] Processing..."
191
193
  self._rich_progress = Progress(
192
194
  SpinnerColumn(),
193
- TextColumn("Processing requests..."),
195
+ TextColumn("[progress.description]{task.description}"),
194
196
  BarColumn(),
195
197
  MofNCompleteColumn(),
196
198
  )
197
- self._rich_task_id = self._rich_progress.add_task("requests", total=total)
199
+ self._rich_task_id = self._rich_progress.add_task(description, total=total)
198
200
  self._rich_stop_event = asyncio.Event()
199
201
  self._rich_display_task = asyncio.create_task(self._rich_display_updater())
200
202
 
@@ -217,12 +219,17 @@ class StatusTracker:
217
219
  total=self.progress_bar_total,
218
220
  )
219
221
 
220
- tokens_info = f"TPM Capacity: {self.available_token_capacity / 1000:.1f}k/{self.max_tokens_per_minute / 1000:.1f}k"
221
- reqs_info = f"RPM Capacity: {int(self.available_request_capacity)}/{self.max_requests_per_minute}"
222
- in_progress = f"In Progress: {int(self.num_tasks_in_progress)}"
223
- capacity_text = Text(f"{in_progress} {tokens_info} • {reqs_info}")
222
+ tokens_info = f"{self.available_token_capacity / 1000:.1f}k/{self.max_tokens_per_minute / 1000:.1f}k TPM"
223
+ reqs_info = f"{int(self.available_request_capacity)}/{self.max_requests_per_minute} RPM"
224
+ in_progress = (
225
+ f" [gold3]In Progress:[/gold3] {int(self.num_tasks_in_progress)} "
226
+ + ("requests" if self.num_tasks_in_progress != 1 else "request")
227
+ )
228
+ capacity_text = (
229
+ f" [gold3]Capacity:[/gold3] {tokens_info} • {reqs_info}"
230
+ )
224
231
 
225
- display = Group(self._rich_progress, capacity_text)
232
+ display = Group(self._rich_progress, in_progress, capacity_text)
226
233
  live.update(display)
227
234
 
228
235
  await asyncio.sleep(0.1)
@@ -252,7 +259,7 @@ class StatusTracker:
252
259
  return
253
260
  while not self._manual_stop_event.is_set():
254
261
  print(
255
- f"Completed {self.num_tasks_succeeded}/{self.progress_bar_total} requests"
262
+ f"[{self.client_name}] Completed {self.num_tasks_succeeded}/{self.progress_bar_total} requests"
256
263
  )
257
264
  await asyncio.sleep(self.progress_print_interval)
258
265
 
@@ -1,5 +1,4 @@
1
1
  from dataclasses import dataclass
2
- from typing import Optional
3
2
 
4
3
 
5
4
  @dataclass
@@ -13,8 +12,8 @@ class Usage:
13
12
 
14
13
  input_tokens: int = 0
15
14
  output_tokens: int = 0
16
- cache_read_tokens: Optional[int] = None # Tokens read from cache (Anthropic)
17
- cache_write_tokens: Optional[int] = None # Tokens written to cache (Anthropic)
15
+ cache_read_tokens: int = 0
16
+ cache_write_tokens: int = 0
18
17
 
19
18
  @property
20
19
  def total_input_tokens(self) -> int:
@@ -47,18 +46,29 @@ class Usage:
47
46
  return cls(
48
47
  input_tokens=usage_data.get("input_tokens", 0),
49
48
  output_tokens=usage_data.get("output_tokens", 0),
50
- cache_read_tokens=usage_data.get("cache_read_input_tokens"),
51
- cache_write_tokens=usage_data.get("cache_creation_input_tokens"),
49
+ cache_read_tokens=usage_data.get("cache_read_input_tokens", 0),
50
+ cache_write_tokens=usage_data.get("cache_creation_input_tokens", 0),
52
51
  )
53
52
 
54
53
  @classmethod
55
54
  def from_openai_usage(cls, usage_data: dict) -> "Usage":
56
- """Create Usage from OpenAI API response usage data."""
55
+ """Create Usage from OpenAI API response usage data.
56
+
57
+ OpenAI supports prompt caching - cached tokens appear in prompt_tokens_details.cached_tokens.
58
+ Caching is automatic for prompts over 1024 tokens.
59
+ """
60
+ prompt_tokens_details = usage_data.get("prompt_tokens_details", {})
61
+ cached_tokens = (
62
+ prompt_tokens_details.get("cached_tokens", 0)
63
+ if prompt_tokens_details
64
+ else 0
65
+ )
66
+
57
67
  return cls(
58
68
  input_tokens=usage_data.get("prompt_tokens", 0),
59
69
  output_tokens=usage_data.get("completion_tokens", 0),
60
- cache_read_tokens=None, # OpenAI doesn't support caching yet
61
- cache_write_tokens=None,
70
+ cache_read_tokens=cached_tokens if cached_tokens > 0 else 0,
71
+ cache_write_tokens=0, # OpenAI doesn't charge separately for cache writes
62
72
  )
63
73
 
64
74
  @classmethod
@@ -67,18 +77,23 @@ class Usage:
67
77
  return cls(
68
78
  input_tokens=usage_data.get("prompt_tokens", 0),
69
79
  output_tokens=usage_data.get("completion_tokens", 0),
70
- cache_read_tokens=None, # Mistral doesn't support caching
71
- cache_write_tokens=None,
80
+ cache_read_tokens=0, # Mistral doesn't support caching
81
+ cache_write_tokens=0,
72
82
  )
73
83
 
74
84
  @classmethod
75
85
  def from_gemini_usage(cls, usage_data: dict) -> "Usage":
76
- """Create Usage from Gemini API response usage data."""
86
+ """Create Usage from Gemini API response usage data.
87
+
88
+ Gemini supports context caching - cached tokens appear in cachedContentTokenCount.
89
+ """
90
+ cached_tokens = usage_data.get("cachedContentTokenCount", 0)
91
+
77
92
  return cls(
78
93
  input_tokens=usage_data.get("promptTokenCount", 0),
79
94
  output_tokens=usage_data.get("candidatesTokenCount", 0),
80
- cache_read_tokens=None, # Gemini doesn't support caching yet
81
- cache_write_tokens=None,
95
+ cache_read_tokens=cached_tokens if cached_tokens > 0 else 0,
96
+ cache_write_tokens=0, # Gemini doesn't charge separately for cache writes
82
97
  )
83
98
 
84
99
  def to_dict(self) -> dict:
@@ -100,8 +115,8 @@ class Usage:
100
115
  return cls(
101
116
  input_tokens=data.get("input_tokens", 0),
102
117
  output_tokens=data.get("output_tokens", 0),
103
- cache_read_tokens=data.get("cache_read_tokens"),
104
- cache_write_tokens=data.get("cache_write_tokens"),
118
+ cache_read_tokens=data.get("cache_read_tokens", 0),
119
+ cache_write_tokens=data.get("cache_write_tokens", 0),
105
120
  )
106
121
 
107
122
  def __add__(self, other: "Usage") -> "Usage":
@@ -111,14 +126,8 @@ class Usage:
111
126
  output_tokens=self.output_tokens + other.output_tokens,
112
127
  cache_read_tokens=(
113
128
  (self.cache_read_tokens or 0) + (other.cache_read_tokens or 0)
114
- if self.cache_read_tokens is not None
115
- or other.cache_read_tokens is not None
116
- else None
117
129
  ),
118
130
  cache_write_tokens=(
119
131
  (self.cache_write_tokens or 0) + (other.cache_write_tokens or 0)
120
- if self.cache_write_tokens is not None
121
- or other.cache_write_tokens is not None
122
- else None
123
132
  ),
124
133
  )
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: lm_deluge
3
- Version: 0.0.54
3
+ Version: 0.0.55
4
4
  Summary: Python utility for using LLM API models.
5
5
  Author-email: Benjamin Anderson <ben@trytaylor.ai>
6
6
  Requires-Python: >=3.10
File without changes
File without changes
File without changes