lm-deluge 0.0.5__py3-none-any.whl → 0.0.7__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of lm-deluge might be problematic. Click here for more details.
- lm_deluge/__init__.py +2 -1
- lm_deluge/api_requests/base.py +1 -0
- lm_deluge/api_requests/common.py +2 -11
- lm_deluge/api_requests/deprecated/cohere.py +132 -0
- lm_deluge/api_requests/deprecated/vertex.py +361 -0
- lm_deluge/api_requests/{cohere.py → mistral.py} +37 -31
- lm_deluge/api_requests/openai.py +10 -1
- lm_deluge/client.py +2 -0
- lm_deluge/image.py +6 -0
- lm_deluge/models.py +348 -288
- lm_deluge/prompt.py +11 -9
- lm_deluge/util/json.py +4 -3
- lm_deluge/util/xml.py +11 -12
- lm_deluge-0.0.7.dist-info/METADATA +163 -0
- {lm_deluge-0.0.5.dist-info → lm_deluge-0.0.7.dist-info}/RECORD +17 -18
- lm_deluge/api_requests/google.py +0 -0
- lm_deluge/api_requests/vertex.py +0 -361
- lm_deluge/util/pdf.py +0 -45
- lm_deluge-0.0.5.dist-info/METADATA +0 -127
- {lm_deluge-0.0.5.dist-info → lm_deluge-0.0.7.dist-info}/WHEEL +0 -0
- {lm_deluge-0.0.5.dist-info → lm_deluge-0.0.7.dist-info}/top_level.txt +0 -0
lm_deluge/api_requests/openai.py
CHANGED
|
@@ -58,13 +58,18 @@ class OpenAIRequest(APIRequestBase):
|
|
|
58
58
|
self.request_header = {
|
|
59
59
|
"Authorization": f"Bearer {os.getenv(self.model.api_key_env_var)}"
|
|
60
60
|
}
|
|
61
|
+
|
|
61
62
|
self.request_json = {
|
|
62
63
|
"model": self.model.name,
|
|
63
64
|
"messages": prompt.to_openai(),
|
|
64
65
|
"temperature": sampling_params.temperature,
|
|
65
66
|
"top_p": sampling_params.top_p,
|
|
66
|
-
"max_completion_tokens": sampling_params.max_new_tokens,
|
|
67
67
|
}
|
|
68
|
+
# set max_tokens or max_completion_tokens dep. on provider
|
|
69
|
+
if "cohere" in self.model.api_base:
|
|
70
|
+
self.request_json["max_tokens"] = sampling_params.max_new_tokens
|
|
71
|
+
elif "openai" in self.model.api_base:
|
|
72
|
+
self.request_json["max_completion_tokens"] = sampling_params.max_new_tokens
|
|
68
73
|
if self.model.reasoning_model:
|
|
69
74
|
self.request_json["temperature"] = 1.0
|
|
70
75
|
self.request_json["top_p"] = 1.0
|
|
@@ -84,6 +89,7 @@ class OpenAIRequest(APIRequestBase):
|
|
|
84
89
|
async def handle_response(self, http_response: ClientResponse) -> APIResponse:
|
|
85
90
|
is_error = False
|
|
86
91
|
error_message = None
|
|
92
|
+
thinking = None
|
|
87
93
|
completion = None
|
|
88
94
|
input_tokens = None
|
|
89
95
|
output_tokens = None
|
|
@@ -103,6 +109,8 @@ class OpenAIRequest(APIRequestBase):
|
|
|
103
109
|
assert data is not None, "data is None"
|
|
104
110
|
try:
|
|
105
111
|
completion = data["choices"][0]["message"]["content"]
|
|
112
|
+
if "reasoning_content" in data["choices"][0]["message"]:
|
|
113
|
+
thinking = data["choices"][0]["message"]["reasoning_content"]
|
|
106
114
|
input_tokens = data["usage"]["prompt_tokens"]
|
|
107
115
|
output_tokens = data["usage"]["completion_tokens"]
|
|
108
116
|
if self.logprobs and "logprobs" in data["choices"][0]:
|
|
@@ -135,6 +143,7 @@ class OpenAIRequest(APIRequestBase):
|
|
|
135
143
|
error_message=error_message,
|
|
136
144
|
prompt=self.prompt,
|
|
137
145
|
logprobs=logprobs,
|
|
146
|
+
thinking=thinking,
|
|
138
147
|
completion=completion,
|
|
139
148
|
model_internal=self.model_name,
|
|
140
149
|
sampling_params=self.sampling_params,
|
lm_deluge/client.py
CHANGED
|
@@ -85,6 +85,7 @@ class LLMClient:
|
|
|
85
85
|
def __init__(
|
|
86
86
|
self,
|
|
87
87
|
model_names: list[str],
|
|
88
|
+
*,
|
|
88
89
|
max_requests_per_minute: int,
|
|
89
90
|
max_tokens_per_minute: int,
|
|
90
91
|
max_concurrent_requests: int,
|
|
@@ -345,6 +346,7 @@ class LLMClient:
|
|
|
345
346
|
|
|
346
347
|
# add cache hits back in
|
|
347
348
|
for id, res in zip(cache_hit_ids, cache_hit_results):
|
|
349
|
+
res.cache_hit = True
|
|
348
350
|
results[id] = res
|
|
349
351
|
|
|
350
352
|
if return_completions_only:
|