lm-deluge 0.0.5__py3-none-any.whl → 0.0.6__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of lm-deluge might be problematic. Click here for more details.

@@ -58,13 +58,18 @@ class OpenAIRequest(APIRequestBase):
58
58
  self.request_header = {
59
59
  "Authorization": f"Bearer {os.getenv(self.model.api_key_env_var)}"
60
60
  }
61
+
61
62
  self.request_json = {
62
63
  "model": self.model.name,
63
64
  "messages": prompt.to_openai(),
64
65
  "temperature": sampling_params.temperature,
65
66
  "top_p": sampling_params.top_p,
66
- "max_completion_tokens": sampling_params.max_new_tokens,
67
67
  }
68
+ # set max_tokens or max_completion_tokens dep. on provider
69
+ if "cohere" in self.model.api_base:
70
+ self.request_json["max_tokens"] = sampling_params.max_new_tokens
71
+ elif "openai" in self.model.api_base:
72
+ self.request_json["max_completion_tokens"] = sampling_params.max_new_tokens
68
73
  if self.model.reasoning_model:
69
74
  self.request_json["temperature"] = 1.0
70
75
  self.request_json["top_p"] = 1.0
@@ -84,6 +89,7 @@ class OpenAIRequest(APIRequestBase):
84
89
  async def handle_response(self, http_response: ClientResponse) -> APIResponse:
85
90
  is_error = False
86
91
  error_message = None
92
+ thinking = None
87
93
  completion = None
88
94
  input_tokens = None
89
95
  output_tokens = None
@@ -103,6 +109,8 @@ class OpenAIRequest(APIRequestBase):
103
109
  assert data is not None, "data is None"
104
110
  try:
105
111
  completion = data["choices"][0]["message"]["content"]
112
+ if "reasoning_content" in data["choices"][0]["message"]:
113
+ thinking = data["choices"][0]["message"]["reasoning_content"]
106
114
  input_tokens = data["usage"]["prompt_tokens"]
107
115
  output_tokens = data["usage"]["completion_tokens"]
108
116
  if self.logprobs and "logprobs" in data["choices"][0]:
@@ -135,6 +143,7 @@ class OpenAIRequest(APIRequestBase):
135
143
  error_message=error_message,
136
144
  prompt=self.prompt,
137
145
  logprobs=logprobs,
146
+ thinking=thinking,
138
147
  completion=completion,
139
148
  model_internal=self.model_name,
140
149
  sampling_params=self.sampling_params,
lm_deluge/client.py CHANGED
@@ -85,6 +85,7 @@ class LLMClient:
85
85
  def __init__(
86
86
  self,
87
87
  model_names: list[str],
88
+ *,
88
89
  max_requests_per_minute: int,
89
90
  max_tokens_per_minute: int,
90
91
  max_concurrent_requests: int,
@@ -345,6 +346,7 @@ class LLMClient:
345
346
 
346
347
  # add cache hits back in
347
348
  for id, res in zip(cache_hit_ids, cache_hit_results):
349
+ res.cache_hit = True
348
350
  results[id] = res
349
351
 
350
352
  if return_completions_only:
lm_deluge/image.py CHANGED
@@ -191,6 +191,12 @@ class Image:
191
191
  },
192
192
  }
193
193
 
194
+ def mistral(self) -> dict:
195
+ return {
196
+ "type": "image_url",
197
+ "image_url": self._base64(),
198
+ }
199
+
194
200
  def gemini(self) -> dict:
195
201
  return {
196
202
  "inlineData": {