lm-deluge 0.0.34__tar.gz → 0.0.35__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of lm-deluge might be problematic. Click here for more details.

Files changed (63) hide show
  1. {lm_deluge-0.0.34/src/lm_deluge.egg-info → lm_deluge-0.0.35}/PKG-INFO +1 -1
  2. {lm_deluge-0.0.34 → lm_deluge-0.0.35}/pyproject.toml +1 -1
  3. {lm_deluge-0.0.34 → lm_deluge-0.0.35}/src/lm_deluge/api_requests/anthropic.py +3 -3
  4. {lm_deluge-0.0.34 → lm_deluge-0.0.35}/src/lm_deluge/api_requests/gemini.py +2 -2
  5. {lm_deluge-0.0.34 → lm_deluge-0.0.35}/src/lm_deluge/api_requests/openai.py +15 -4
  6. {lm_deluge-0.0.34 → lm_deluge-0.0.35}/src/lm_deluge/client.py +21 -75
  7. lm_deluge-0.0.34/src/lm_deluge/models.py → lm_deluge-0.0.35/src/lm_deluge/models/__init__.py +87 -2
  8. lm_deluge-0.0.35/src/lm_deluge/util/harmony.py +45 -0
  9. {lm_deluge-0.0.34 → lm_deluge-0.0.35/src/lm_deluge.egg-info}/PKG-INFO +1 -1
  10. {lm_deluge-0.0.34 → lm_deluge-0.0.35}/src/lm_deluge.egg-info/SOURCES.txt +2 -1
  11. {lm_deluge-0.0.34 → lm_deluge-0.0.35}/LICENSE +0 -0
  12. {lm_deluge-0.0.34 → lm_deluge-0.0.35}/README.md +0 -0
  13. {lm_deluge-0.0.34 → lm_deluge-0.0.35}/setup.cfg +0 -0
  14. {lm_deluge-0.0.34 → lm_deluge-0.0.35}/src/lm_deluge/__init__.py +0 -0
  15. {lm_deluge-0.0.34 → lm_deluge-0.0.35}/src/lm_deluge/agent.py +0 -0
  16. {lm_deluge-0.0.34 → lm_deluge-0.0.35}/src/lm_deluge/api_requests/__init__.py +0 -0
  17. {lm_deluge-0.0.34 → lm_deluge-0.0.35}/src/lm_deluge/api_requests/base.py +0 -0
  18. {lm_deluge-0.0.34 → lm_deluge-0.0.35}/src/lm_deluge/api_requests/bedrock.py +0 -0
  19. {lm_deluge-0.0.34 → lm_deluge-0.0.35}/src/lm_deluge/api_requests/common.py +0 -0
  20. {lm_deluge-0.0.34 → lm_deluge-0.0.35}/src/lm_deluge/api_requests/deprecated/bedrock.py +0 -0
  21. {lm_deluge-0.0.34 → lm_deluge-0.0.35}/src/lm_deluge/api_requests/deprecated/cohere.py +0 -0
  22. {lm_deluge-0.0.34 → lm_deluge-0.0.35}/src/lm_deluge/api_requests/deprecated/deepseek.py +0 -0
  23. {lm_deluge-0.0.34 → lm_deluge-0.0.35}/src/lm_deluge/api_requests/deprecated/mistral.py +0 -0
  24. {lm_deluge-0.0.34 → lm_deluge-0.0.35}/src/lm_deluge/api_requests/deprecated/vertex.py +0 -0
  25. {lm_deluge-0.0.34 → lm_deluge-0.0.35}/src/lm_deluge/api_requests/mistral.py +0 -0
  26. {lm_deluge-0.0.34 → lm_deluge-0.0.35}/src/lm_deluge/api_requests/response.py +0 -0
  27. {lm_deluge-0.0.34 → lm_deluge-0.0.35}/src/lm_deluge/batches.py +0 -0
  28. {lm_deluge-0.0.34 → lm_deluge-0.0.35}/src/lm_deluge/built_in_tools/anthropic/__init__.py +0 -0
  29. {lm_deluge-0.0.34 → lm_deluge-0.0.35}/src/lm_deluge/built_in_tools/anthropic/bash.py +0 -0
  30. {lm_deluge-0.0.34 → lm_deluge-0.0.35}/src/lm_deluge/built_in_tools/anthropic/computer_use.py +0 -0
  31. {lm_deluge-0.0.34 → lm_deluge-0.0.35}/src/lm_deluge/built_in_tools/anthropic/editor.py +0 -0
  32. {lm_deluge-0.0.34 → lm_deluge-0.0.35}/src/lm_deluge/built_in_tools/base.py +0 -0
  33. {lm_deluge-0.0.34 → lm_deluge-0.0.35}/src/lm_deluge/built_in_tools/openai.py +0 -0
  34. {lm_deluge-0.0.34 → lm_deluge-0.0.35}/src/lm_deluge/cache.py +0 -0
  35. {lm_deluge-0.0.34 → lm_deluge-0.0.35}/src/lm_deluge/config.py +0 -0
  36. {lm_deluge-0.0.34 → lm_deluge-0.0.35}/src/lm_deluge/embed.py +0 -0
  37. {lm_deluge-0.0.34 → lm_deluge-0.0.35}/src/lm_deluge/errors.py +0 -0
  38. {lm_deluge-0.0.34 → lm_deluge-0.0.35}/src/lm_deluge/file.py +0 -0
  39. {lm_deluge-0.0.34 → lm_deluge-0.0.35}/src/lm_deluge/gemini_limits.py +0 -0
  40. {lm_deluge-0.0.34 → lm_deluge-0.0.35}/src/lm_deluge/image.py +0 -0
  41. {lm_deluge-0.0.34 → lm_deluge-0.0.35}/src/lm_deluge/llm_tools/__init__.py +0 -0
  42. {lm_deluge-0.0.34 → lm_deluge-0.0.35}/src/lm_deluge/llm_tools/classify.py +0 -0
  43. {lm_deluge-0.0.34 → lm_deluge-0.0.35}/src/lm_deluge/llm_tools/extract.py +0 -0
  44. {lm_deluge-0.0.34 → lm_deluge-0.0.35}/src/lm_deluge/llm_tools/locate.py +0 -0
  45. {lm_deluge-0.0.34 → lm_deluge-0.0.35}/src/lm_deluge/llm_tools/ocr.py +0 -0
  46. {lm_deluge-0.0.34 → lm_deluge-0.0.35}/src/lm_deluge/llm_tools/score.py +0 -0
  47. {lm_deluge-0.0.34 → lm_deluge-0.0.35}/src/lm_deluge/llm_tools/translate.py +0 -0
  48. {lm_deluge-0.0.34 → lm_deluge-0.0.35}/src/lm_deluge/prompt.py +0 -0
  49. {lm_deluge-0.0.34 → lm_deluge-0.0.35}/src/lm_deluge/request_context.py +0 -0
  50. {lm_deluge-0.0.34 → lm_deluge-0.0.35}/src/lm_deluge/rerank.py +0 -0
  51. {lm_deluge-0.0.34 → lm_deluge-0.0.35}/src/lm_deluge/tool.py +0 -0
  52. {lm_deluge-0.0.34 → lm_deluge-0.0.35}/src/lm_deluge/tracker.py +0 -0
  53. {lm_deluge-0.0.34 → lm_deluge-0.0.35}/src/lm_deluge/usage.py +0 -0
  54. {lm_deluge-0.0.34 → lm_deluge-0.0.35}/src/lm_deluge/util/json.py +0 -0
  55. {lm_deluge-0.0.34 → lm_deluge-0.0.35}/src/lm_deluge/util/logprobs.py +0 -0
  56. {lm_deluge-0.0.34 → lm_deluge-0.0.35}/src/lm_deluge/util/spatial.py +0 -0
  57. {lm_deluge-0.0.34 → lm_deluge-0.0.35}/src/lm_deluge/util/validation.py +0 -0
  58. {lm_deluge-0.0.34 → lm_deluge-0.0.35}/src/lm_deluge/util/xml.py +0 -0
  59. {lm_deluge-0.0.34 → lm_deluge-0.0.35}/src/lm_deluge.egg-info/dependency_links.txt +0 -0
  60. {lm_deluge-0.0.34 → lm_deluge-0.0.35}/src/lm_deluge.egg-info/requires.txt +0 -0
  61. {lm_deluge-0.0.34 → lm_deluge-0.0.35}/src/lm_deluge.egg-info/top_level.txt +0 -0
  62. {lm_deluge-0.0.34 → lm_deluge-0.0.35}/tests/test_builtin_tools.py +0 -0
  63. {lm_deluge-0.0.34 → lm_deluge-0.0.35}/tests/test_native_mcp_server.py +0 -0
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: lm_deluge
3
- Version: 0.0.34
3
+ Version: 0.0.35
4
4
  Summary: Python utility for using LLM API models.
5
5
  Author-email: Benjamin Anderson <ben@trytaylor.ai>
6
6
  Requires-Python: >=3.10
@@ -3,7 +3,7 @@ requires = ["setuptools", "wheel"]
3
3
 
4
4
  [project]
5
5
  name = "lm_deluge"
6
- version = "0.0.34"
6
+ version = "0.0.35"
7
7
  authors = [{ name = "Benjamin Anderson", email = "ben@trytaylor.ai" }]
8
8
  description = "Python utility for using LLM API models."
9
9
  readme = "README.md"
@@ -57,9 +57,9 @@ def _build_anthropic_request(
57
57
  # handle thinking
58
58
  if model.reasoning_model and sampling_params.reasoning_effort:
59
59
  # translate reasoning effort of low, medium, high to budget tokens
60
- budget = {"low": 1024, "medium": 4096, "high": 16384}.get(
61
- sampling_params.reasoning_effort
62
- )
60
+ budget = {
61
+ "minimal": 256, "low": 1024, "medium": 4096, "high": 16384
62
+ }.get(sampling_params.reasoning_effort)
63
63
  request_json["thinking"] = {
64
64
  "type": "enabled",
65
65
  "budget_tokens": budget,
@@ -45,8 +45,8 @@ async def _build_gemini_request(
45
45
  thinking_config = {"includeThoughts": False, "thinkingBudget": budget}
46
46
  else:
47
47
  thinking_config = {"includeThoughts": True}
48
- if effort in {"low", "medium", "high"} and "flash" in model.id:
49
- budget = {"low": 1024, "medium": 4096, "high": 16384}[effort]
48
+ if effort in {"minimal", "low", "medium", "high"} and "flash" in model.id:
49
+ budget = {"minimal": 256, "low": 1024, "medium": 4096, "high": 16384}[effort]
50
50
  thinking_config["thinkingBudget"] = budget
51
51
  request_json["generationConfig"]["thinkingConfig"] = thinking_config
52
52
 
@@ -42,8 +42,13 @@ async def _build_oa_chat_request(
42
42
  # Disable reasoning for Gemini models when no effort requested
43
43
  if "gemini" in model.id:
44
44
  effort = "none"
45
+ elif "gpt-5" in model.id:
46
+ effort = "minimal"
45
47
  else:
46
48
  effort = "low"
49
+ if effort == "minimal" and "gpt-5" not in model.id:
50
+ print("WARNING: 'minimal' reasoning effort only allowed for gpt-5. setting to 'low'.")
51
+ effort = "low"
47
52
  request_json["reasoning_effort"] = effort
48
53
  else:
49
54
  if sampling_params.reasoning_effort:
@@ -122,15 +127,21 @@ class OpenAIRequest(APIRequestBase):
122
127
  message = data["choices"][0]["message"]
123
128
  finish_reason = data["choices"][0]["finish_reason"]
124
129
 
125
- # Add text content if present
126
- if message.get("content"):
127
- parts.append(Text(message["content"]))
128
-
129
130
  # Add thinking content if present (reasoning models)
130
131
  if "reasoning_content" in message:
131
132
  thinking = message["reasoning_content"]
132
133
  parts.append(Thinking(thinking))
133
134
 
135
+ # Together AI returns reasoning in a "reasoning"
136
+ # field which is not correct but whatever
137
+ if message.get("reasoning"):
138
+ thinking = message["reasoning"]
139
+ parts.append(Thinking(thinking))
140
+
141
+ # Add text content if present
142
+ if message.get("content"):
143
+ parts.append(Text(message["content"]))
144
+
134
145
  # Add tool calls if present
135
146
  if "tool_calls" in message:
136
147
  for tool_call in message["tool_calls"]:
@@ -1,6 +1,6 @@
1
1
  import asyncio
2
2
  import random
3
- from typing import Any, Literal, Self, Sequence, overload
3
+ from typing import Any, Literal, Self, Sequence, Callable, overload
4
4
 
5
5
  import numpy as np
6
6
  import yaml
@@ -22,8 +22,6 @@ from .models import APIModel, registry
22
22
  from .request_context import RequestContext
23
23
  from .tracker import StatusTracker
24
24
 
25
-
26
- # TODO: get completions as they finish, not all at once at the end.
27
25
  # TODO: add optional max_input_tokens to client so we can reject long prompts to prevent abuse
28
26
  class _LLMClient(BaseModel):
29
27
  """
@@ -55,6 +53,9 @@ class _LLMClient(BaseModel):
55
53
  # Progress configuration
56
54
  progress: Literal["rich", "tqdm", "manual"] = "rich"
57
55
 
56
+ # Postprocessing - run on every APIResponse
57
+ postprocess: Callable[[APIResponse], APIResponse] | None = None
58
+
58
59
  # Internal state for async task handling
59
60
  _next_task_id: int = PrivateAttr(default=0)
60
61
  _tasks: dict[int, asyncio.Task] = PrivateAttr(default_factory=dict)
@@ -196,14 +197,6 @@ class _LLMClient(BaseModel):
196
197
  config_dict = yaml.safe_load(open(file_path))
197
198
  return cls.from_dict(config_dict)
198
199
 
199
- @classmethod
200
- def basic(cls, model: str | list[str], **kwargs):
201
- """
202
- Doesn't do anything differently now, kept for backwards compat.
203
- """
204
- kwargs["model_names"] = model
205
- return cls(**kwargs)
206
-
207
200
  def _select_model(self):
208
201
  assert isinstance(self.model_weights, list)
209
202
  model_idx = np.random.choice(range(len(self.models)), p=self.model_weights)
@@ -254,13 +247,18 @@ class _LLMClient(BaseModel):
254
247
  ) -> APIResponse:
255
248
  """Handle caching and single HTTP call for a request. Failed requests go to retry queue."""
256
249
  # Check cache first
250
+ def _maybe_postprocess(response: APIResponse):
251
+ if self.postprocess:
252
+ return self.postprocess(response)
253
+ return response
254
+
257
255
  if self.cache:
258
256
  cached = self.cache.get(context.prompt)
259
257
  if cached:
260
258
  cached.local_cache_hit = True
261
259
  if context.status_tracker:
262
260
  context.status_tracker.task_succeeded(context.task_id)
263
- return cached
261
+ return _maybe_postprocess(cached)
264
262
 
265
263
  # Execute single request
266
264
  assert context.status_tracker
@@ -275,7 +273,7 @@ class _LLMClient(BaseModel):
275
273
  self.cache.put(context.prompt, response)
276
274
  # Call callback if provided
277
275
  context.maybe_callback(response, context.status_tracker)
278
- return response
276
+ return _maybe_postprocess(response)
279
277
 
280
278
  # Handle error response - add to retry queue if available
281
279
  if retry_queue and context.attempts_left > 1:
@@ -303,7 +301,7 @@ class _LLMClient(BaseModel):
303
301
 
304
302
  # Add to retry queue for later processing
305
303
  await retry_queue.put(retry_context)
306
- return response # Return the error response for now
304
+ return _maybe_postprocess(response) # Return the error response for now
307
305
 
308
306
  # No retries left or no retry queue - final failure
309
307
  context.status_tracker.task_failed(context.task_id)
@@ -316,7 +314,7 @@ class _LLMClient(BaseModel):
316
314
  error_msg += f" Message: {response.error_message}. Giving up."
317
315
  print(error_msg)
318
316
 
319
- return response
317
+ return _maybe_postprocess(response)
320
318
 
321
319
  @overload
322
320
  async def process_prompts_async(
@@ -570,6 +568,8 @@ class _LLMClient(BaseModel):
570
568
  print(item, end="", flush=True)
571
569
  else:
572
570
  # final item
571
+ if self.postprocess:
572
+ return self.postprocess(item)
573
573
  return item
574
574
 
575
575
  async def run_agent_loop(
@@ -712,66 +712,8 @@ class _LLMClient(BaseModel):
712
712
  batch_ids, provider, poll_interval=30
713
713
  )
714
714
 
715
-
716
- # def api_prompts_dry_run(
717
- # ids: np.ndarray | list[int],
718
- # prompts: list[Conversation],
719
- # models: str | list[str],
720
- # model_weights: list[float],
721
- # sampling_params: list[SamplingParams],
722
- # max_tokens_per_minute: int = 500_000,
723
- # max_requests_per_minute: int = 1_000,
724
- # ):
725
- # """
726
- # Count tokens and estimate costs for a batch of prompts.
727
- # """
728
- # results = []
729
- # for i, prompt in zip(ids, prompts):
730
- # # choose a model
731
- # model_idx = np.random.choice(range(len(models)), p=model_weights)
732
- # model = models[model_idx]
733
-
734
- # # dry run
735
- # input_tokens, output_tokens, min_cost, max_cost = prompt.dry_run(
736
- # model, sampling_params[model_idx].max_new_tokens
737
- # )
738
- # results.append(
739
- # {
740
- # "id": i,
741
- # "input_tokens": input_tokens,
742
- # "output_tokens": output_tokens,
743
- # "min_cost": min_cost,
744
- # "max_cost": max_cost,
745
- # }
746
- # )
747
-
748
- # combined_results: dict[str, Any] = {
749
- # "total_input_tokens": sum([r["input_tokens"] for r in results]),
750
- # "total_output_tokens": sum([r["output_tokens"] for r in results]),
751
- # "total_min_cost": sum([r["min_cost"] for r in results]),
752
- # "total_max_cost": sum([r["max_cost"] for r in results]),
753
- # }
754
- # minimum_time_tpm = combined_results["total_input_tokens"] / max_tokens_per_minute
755
- # maximum_time_tpm = (
756
- # combined_results["total_input_tokens"] + combined_results["total_output_tokens"]
757
- # ) / max_tokens_per_minute
758
- # minimum_time_rpm = len(prompts) / max_requests_per_minute
759
-
760
- # combined_results["minimum_time"] = max(minimum_time_tpm, minimum_time_rpm)
761
- # combined_results["maximum_time"] = max(maximum_time_tpm, minimum_time_rpm)
762
- # limiting_factor = None
763
- # if minimum_time_rpm > maximum_time_tpm:
764
- # limiting_factor = "requests"
765
- # elif minimum_time_rpm < minimum_time_tpm:
766
- # limiting_factor = "tokens"
767
- # else:
768
- # limiting_factor = "depends"
769
- # combined_results["limiting_factor"] = limiting_factor
770
-
771
- # return combined_results
772
-
773
-
774
- # Clean factory function with perfect IDE support
715
+ # factory function -- allows positional model names,
716
+ # keeps pydantic validation, without sacrificing IDE support
775
717
  @overload
776
718
  def LLMClient(
777
719
  model_names: str,
@@ -794,6 +736,7 @@ def LLMClient(
794
736
  top_logprobs: int | None = None,
795
737
  force_local_mcp: bool = False,
796
738
  progress: Literal["rich", "tqdm", "manual"] = "rich",
739
+ postprocess: Callable[[APIResponse], APIResponse] | None = None
797
740
  ) -> _LLMClient: ...
798
741
 
799
742
 
@@ -819,6 +762,7 @@ def LLMClient(
819
762
  top_logprobs: int | None = None,
820
763
  force_local_mcp: bool = False,
821
764
  progress: Literal["rich", "tqdm", "manual"] = "rich",
765
+ postprocess: Callable[[APIResponse], APIResponse] | None = None
822
766
  ) -> _LLMClient: ...
823
767
 
824
768
 
@@ -843,6 +787,7 @@ def LLMClient(
843
787
  top_logprobs: int | None = None,
844
788
  force_local_mcp: bool = False,
845
789
  progress: Literal["rich", "tqdm", "manual"] = "rich",
790
+ postprocess: Callable[[APIResponse], APIResponse] | None = None
846
791
  ) -> _LLMClient:
847
792
  """
848
793
  Create an LLMClient with model_names as a positional argument.
@@ -879,4 +824,5 @@ def LLMClient(
879
824
  top_logprobs=top_logprobs,
880
825
  force_local_mcp=force_local_mcp,
881
826
  progress=progress,
827
+ postprocess=postprocess
882
828
  )
@@ -3,7 +3,7 @@ from __future__ import annotations
3
3
  import random
4
4
  from dataclasses import dataclass, field
5
5
 
6
- from .request_context import RequestContext
6
+ from ..request_context import RequestContext
7
7
 
8
8
  BUILTIN_MODELS = {
9
9
  # `7MMM. ,MMF' mm
@@ -267,6 +267,62 @@ BUILTIN_MODELS = {
267
267
  # ░███
268
268
  # █████
269
269
  # ░░░░░
270
+ "gpt-5": {
271
+ "id": "gpt-5",
272
+ "name": "gpt-5",
273
+ "api_base": "https://api.openai.com/v1",
274
+ "api_key_env_var": "OPENAI_API_KEY",
275
+ "supports_json": False,
276
+ "supports_logprobs": True,
277
+ "supports_responses": True,
278
+ "api_spec": "openai",
279
+ "input_cost": 1.25,
280
+ "cached_input_cost": 0.125,
281
+ "output_cost": 10.0,
282
+ "reasoning_model": True,
283
+ },
284
+ "gpt-5-chat": {
285
+ "id": "gpt-5-chat",
286
+ "name": "gpt-5-chat-latest",
287
+ "api_base": "https://api.openai.com/v1",
288
+ "api_key_env_var": "OPENAI_API_KEY",
289
+ "supports_json": False,
290
+ "supports_logprobs": True,
291
+ "supports_responses": True,
292
+ "api_spec": "openai",
293
+ "input_cost": 1.25,
294
+ "cached_input_cost": 0.125,
295
+ "output_cost": 10.0,
296
+ "reasoning_model": False,
297
+ },
298
+ "gpt-5-mini": {
299
+ "id": "gpt-5-mini",
300
+ "name": "gpt-5-mini",
301
+ "api_base": "https://api.openai.com/v1",
302
+ "api_key_env_var": "OPENAI_API_KEY",
303
+ "supports_json": False,
304
+ "supports_logprobs": True,
305
+ "supports_responses": True,
306
+ "api_spec": "openai",
307
+ "input_cost": 0.25,
308
+ "cached_input_cost": 0.025,
309
+ "output_cost": 2.0,
310
+ "reasoning_model": True,
311
+ },
312
+ "gpt-5-nano": {
313
+ "id": "gpt-5-nano",
314
+ "name": "gpt-5-nano",
315
+ "api_base": "https://api.openai.com/v1",
316
+ "api_key_env_var": "OPENAI_API_KEY",
317
+ "supports_json": False,
318
+ "supports_logprobs": True,
319
+ "supports_responses": True,
320
+ "api_spec": "openai",
321
+ "input_cost": 0.05,
322
+ "cached_input_cost": 0.005,
323
+ "output_cost": 0.40,
324
+ "reasoning_model": True,
325
+ },
270
326
  "openai-computer-use-preview": {
271
327
  "id": "openai-computer-use-preview",
272
328
  "name": "computer-use-preview",
@@ -971,6 +1027,32 @@ BUILTIN_MODELS = {
971
1027
  "requests_per_minute": None,
972
1028
  "tokens_per_minute": None,
973
1029
  },
1030
+ "gpt-oss-120b-together": {
1031
+ "id": "gpt-oss-120b-together",
1032
+ "name": "openai/gpt-oss-120b",
1033
+ "api_base": "https://api.together.xyz/v1",
1034
+ "api_key_env_var": "TOGETHER_API_KEY",
1035
+ "supports_json": False,
1036
+ "api_spec": "openai",
1037
+ "input_cost": 0.18,
1038
+ "output_cost": 0.59,
1039
+ "requests_per_minute": None,
1040
+ "tokens_per_minute": None,
1041
+ "reasoning_model": True
1042
+ },
1043
+ "gpt-oss-20b-together": {
1044
+ "id": "gpt-oss-20b-together",
1045
+ "name": "openai/gpt-oss-20b",
1046
+ "api_base": "https://api.together.xyz/v1",
1047
+ "api_key_env_var": "TOGETHER_API_KEY",
1048
+ "supports_json": False,
1049
+ "api_spec": "openai",
1050
+ "input_cost": 0.18,
1051
+ "output_cost": 0.59,
1052
+ "requests_per_minute": None,
1053
+ "tokens_per_minute": None,
1054
+ "reasoning_model": True
1055
+ },
974
1056
  # █████████ █████
975
1057
  # ███░░░░░███ ░░███
976
1058
  # ███ ░░░ ██████ ░███████ ██████ ████████ ██████
@@ -1210,6 +1292,7 @@ class APIModel:
1210
1292
  api_base: str
1211
1293
  api_key_env_var: str
1212
1294
  api_spec: str
1295
+ cached_input_cost: float | None = 0
1213
1296
  input_cost: float | None = 0 # $ per million input tokens
1214
1297
  output_cost: float | None = 0 # $ per million output tokens
1215
1298
  supports_json: bool = False
@@ -1242,7 +1325,7 @@ class APIModel:
1242
1325
  random.sample(regions, 1, counts=weights)[0]
1243
1326
 
1244
1327
  def make_request(self, context: RequestContext): # -> "APIRequestBase"
1245
- from .api_requests.common import CLASSES
1328
+ from ..api_requests.common import CLASSES
1246
1329
 
1247
1330
  api_spec = self.api_spec
1248
1331
  if (
@@ -1268,6 +1351,7 @@ def register_model(
1268
1351
  api_key_env_var: str,
1269
1352
  api_spec: str,
1270
1353
  input_cost: float | None = 0, # $ per million input tokens
1354
+ cached_input_cost: float | None = 0,
1271
1355
  output_cost: float | None = 0, # $ per million output tokens
1272
1356
  supports_json: bool = False,
1273
1357
  supports_logprobs: bool = False,
@@ -1284,6 +1368,7 @@ def register_model(
1284
1368
  api_base=api_base,
1285
1369
  api_key_env_var=api_key_env_var,
1286
1370
  api_spec=api_spec,
1371
+ cached_input_cost=cached_input_cost,
1287
1372
  input_cost=input_cost,
1288
1373
  output_cost=output_cost,
1289
1374
  supports_json=supports_json,
@@ -0,0 +1,45 @@
1
+ # sample thing we'd want to parse from llama.cpp
2
+ # the goal here is: barebones inference implementation returns
3
+ # raw harmony string; we parse into content blocks
4
+
5
+ # implied: <|start|>assistant
6
+ # <|channel|>analysis<|message|>We need to respond as a helpful assistant. The user says "who are you and what do you want with my family?" This is a normal question. We should answer that we are ChatGPT, an AI language model, and we don't want anything with their family. We reassure them.<|start|>assistant<|channel|>final<|message|>I’m ChatGPT, a large language‑model AI created by OpenAI. I don’t have personal intentions or desires, and I’m not able to interact with anyone outside of this chat. My only goal here is to provide information, answer questions, and help you with whatever you need—nothing more, nothing less. If you have any concerns or need help with something specific, just let me know!
7
+ #
8
+ import copy
9
+ from lm_deluge.api_requests.response import APIResponse
10
+ from lm_deluge.prompt import Text, Thinking
11
+
12
+ SAMPLE_INPUT = '''
13
+ <|channel|>analysis<|message|>We need to respond as a helpful assistant. The user says "who are you and what do you want with my family?" This is a normal question. We should answer that we are ChatGPT, an AI language model, and we don't want anything with their family. We reassure them.<|start|>assistant<|channel|>final<|message|>I’m ChatGPT, a large language‑model AI created by OpenAI. I don’t have personal intentions or desires, and I’m not able to interact with anyone outside of this chat. My only goal here is to provide information, answer questions, and help you with whatever you need—nothing more, nothing less. If you have any concerns or need help with something specific, just let me know!
14
+ '''.strip()
15
+
16
+ def _split_messages(response: str):
17
+ raw_messages = response.split("<|start|>")
18
+ messages = []
19
+ for msg in raw_messages:
20
+ channel, content = msg.split("<|message|>")
21
+ channel = channel.split("<|channel|>")[1]
22
+ messages.append((channel, content))
23
+
24
+ return messages
25
+
26
+ def postprocess_harmony(response: APIResponse) -> APIResponse:
27
+ if not response.content:
28
+ return response
29
+
30
+ parts = response.content.parts
31
+ assert len(parts) == 1, "expected 1 parts to convert harmony"
32
+ text = parts[0].text # type: ignore
33
+ messages = _split_messages(text)
34
+
35
+ new_parts = []
36
+ for channel, content in messages:
37
+ if channel == "analysis":
38
+ new_parts.append(Thinking(content=content))
39
+ elif channel == "final":
40
+ new_parts.append(Text(text=content))
41
+
42
+ new_response = copy.deepcopy(response)
43
+ new_response.content.parts = new_parts # type: ignore
44
+
45
+ return new_response
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: lm_deluge
3
- Version: 0.0.34
3
+ Version: 0.0.35
4
4
  Summary: Python utility for using LLM API models.
5
5
  Author-email: Benjamin Anderson <ben@trytaylor.ai>
6
6
  Requires-Python: >=3.10
@@ -12,7 +12,6 @@ src/lm_deluge/errors.py
12
12
  src/lm_deluge/file.py
13
13
  src/lm_deluge/gemini_limits.py
14
14
  src/lm_deluge/image.py
15
- src/lm_deluge/models.py
16
15
  src/lm_deluge/prompt.py
17
16
  src/lm_deluge/request_context.py
18
17
  src/lm_deluge/rerank.py
@@ -51,6 +50,8 @@ src/lm_deluge/llm_tools/locate.py
51
50
  src/lm_deluge/llm_tools/ocr.py
52
51
  src/lm_deluge/llm_tools/score.py
53
52
  src/lm_deluge/llm_tools/translate.py
53
+ src/lm_deluge/models/__init__.py
54
+ src/lm_deluge/util/harmony.py
54
55
  src/lm_deluge/util/json.py
55
56
  src/lm_deluge/util/logprobs.py
56
57
  src/lm_deluge/util/spatial.py
File without changes
File without changes
File without changes