lm-deluge 0.0.5__py3-none-any.whl → 0.0.7__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of lm-deluge might be problematic. Click here for more details.

@@ -1,361 +0,0 @@
1
- # consider: https://cloud.google.com/vertex-ai/generative-ai/docs/multimodal/call-gemini-using-openai-library#call-chat-completions-api
2
- import asyncio
3
- from aiohttp import ClientResponse
4
- import json
5
- import os
6
- import time
7
- from tqdm import tqdm
8
- from typing import Callable
9
-
10
- from lm_deluge.prompt import Conversation
11
- from .base import APIRequestBase, APIResponse
12
- from ..tracker import StatusTracker
13
- from ..sampling_params import SamplingParams
14
- from ..models import APIModel
15
-
16
- from google.oauth2 import service_account
17
- from google.auth.transport.requests import Request
18
-
19
-
20
- def get_access_token(service_account_file: str):
21
- """
22
- Get access token from environment variables if another process/coroutine
23
- has already got them, otherwise get from service account file.
24
- """
25
- LAST_REFRESHED = os.getenv("VERTEX_TOKEN_LAST_REFRESHED", None)
26
- LAST_REFRESHED = int(LAST_REFRESHED) if LAST_REFRESHED is not None else 0
27
- VERTEX_API_TOKEN = os.getenv("VERTEX_API_TOKEN", None)
28
-
29
- if VERTEX_API_TOKEN is not None and time.time() - LAST_REFRESHED < 60 * 50:
30
- return VERTEX_API_TOKEN
31
- else:
32
- credentials = service_account.Credentials.from_service_account_file(
33
- service_account_file,
34
- scopes=["https://www.googleapis.com/auth/cloud-platform"],
35
- )
36
- credentials.refresh(Request())
37
- token = credentials.token
38
- os.environ["VERTEX_API_TOKEN"] = token
39
- os.environ["VERTEX_TOKEN_LAST_REFRESHED"] = str(int(time.time()))
40
-
41
- return token
42
-
43
-
44
- class VertexAnthropicRequest(APIRequestBase):
45
- """
46
- For Claude on Vertex, you'll also have to set the PROJECT_ID environment variable.
47
- """
48
-
49
- def __init__(
50
- self,
51
- task_id: int,
52
- model_name: str, # must correspond to registry
53
- prompt: Conversation,
54
- attempts_left: int,
55
- status_tracker: StatusTracker,
56
- retry_queue: asyncio.Queue,
57
- results_arr: list,
58
- request_timeout: int = 30,
59
- sampling_params: SamplingParams = SamplingParams(),
60
- pbar: tqdm | None = None,
61
- callback: Callable | None = None,
62
- debug: bool = False,
63
- ):
64
- super().__init__(
65
- task_id=task_id,
66
- model_name=model_name,
67
- prompt=prompt,
68
- attempts_left=attempts_left,
69
- status_tracker=status_tracker,
70
- retry_queue=retry_queue,
71
- results_arr=results_arr,
72
- request_timeout=request_timeout,
73
- sampling_params=sampling_params,
74
- pbar=pbar,
75
- callback=callback,
76
- debug=debug,
77
- )
78
- creds = os.getenv("GOOGLE_APPLICATION_CREDENTIALS")
79
- if not creds:
80
- raise RuntimeError(
81
- "GOOGLE_APPLICATION_CREDENTIALS not provided in environment"
82
- )
83
- token = get_access_token(creds)
84
-
85
- self.model = APIModel.from_registry(model_name)
86
- project_id = os.getenv("PROJECT_ID")
87
- region = self.model.sample_region()
88
-
89
- endpoint = f"https://{region}-aiplatform.googleapis.com"
90
- self.url = f"{endpoint}/v1/projects/{project_id}/locations/{region}/publishers/anthropic/models/{self.model.name}:generateContent"
91
- self.request_header = {
92
- "Authorization": f"Bearer {token}",
93
- "Content-Type": "application/json",
94
- }
95
- self.system_message, messages = prompt.to_anthropic()
96
-
97
- self.request_json = {
98
- "anthropic_version": "vertex-2023-10-16",
99
- "messages": messages,
100
- "temperature": self.sampling_params.temperature,
101
- "top_p": self.sampling_params.top_p,
102
- "max_tokens": self.sampling_params.max_new_tokens,
103
- }
104
- if self.system_message is not None:
105
- self.request_json["system"] = self.system_message
106
-
107
- async def handle_response(self, http_response: ClientResponse) -> APIResponse:
108
- is_error = False
109
- error_message = None
110
- completion = None
111
- input_tokens = None
112
- output_tokens = None
113
- status_code = http_response.status
114
- mimetype = http_response.headers.get("Content-Type", None)
115
- if status_code >= 200 and status_code < 300:
116
- try:
117
- data = await http_response.json()
118
- completion = data["content"][0]["text"]
119
- input_tokens = data["usage"]["input_tokens"]
120
- output_tokens = data["usage"]["output_tokens"]
121
- except Exception as e:
122
- is_error = True
123
- error_message = (
124
- f"Error calling .json() on response w/ status {status_code}: {e}"
125
- )
126
- elif "json" in (mimetype or "").lower():
127
- is_error = True # expected status is 200, otherwise it's an error
128
- data = await http_response.json()
129
- error_message = json.dumps(data)
130
-
131
- else:
132
- is_error = True
133
- text = await http_response.text()
134
- error_message = text
135
-
136
- # handle special kinds of errors. TODO: make sure these are correct for anthropic
137
- if is_error and error_message is not None:
138
- if (
139
- "rate limit" in error_message.lower()
140
- or "overloaded" in error_message.lower()
141
- or status_code == 429
142
- ):
143
- error_message += " (Rate limit error, triggering cooldown.)"
144
- self.status_tracker.rate_limit_exceeded()
145
- if "context length" in error_message:
146
- error_message += " (Context length exceeded, set retries to 0.)"
147
- self.attempts_left = 0
148
-
149
- return APIResponse(
150
- id=self.task_id,
151
- status_code=status_code,
152
- is_error=is_error,
153
- error_message=error_message,
154
- prompt=self.prompt,
155
- completion=completion,
156
- model_internal=self.model_name,
157
- sampling_params=self.sampling_params,
158
- input_tokens=input_tokens,
159
- output_tokens=output_tokens,
160
- )
161
-
162
-
163
- SAFETY_SETTING_CATEGORIES = [
164
- "HARM_CATEGORY_DANGEROUS_CONTENT",
165
- "HARM_CATEGORY_HARASSMENT",
166
- "HARM_CATEGORY_HATE_SPEECH",
167
- "HARM_CATEGORY_SEXUALLY_EXPLICIT",
168
- ]
169
-
170
-
171
- class GeminiRequest(APIRequestBase):
172
- """
173
- For Gemini, you'll also have to set the PROJECT_ID environment variable.
174
- """
175
-
176
- def __init__(
177
- self,
178
- task_id: int,
179
- model_name: str, # must correspond to registry
180
- prompt: Conversation,
181
- attempts_left: int,
182
- status_tracker: StatusTracker,
183
- retry_queue: asyncio.Queue,
184
- results_arr: list,
185
- request_timeout: int = 30,
186
- sampling_params: SamplingParams = SamplingParams(),
187
- pbar: tqdm | None = None,
188
- callback: Callable | None = None,
189
- debug: bool = False,
190
- all_model_names: list[str] | None = None,
191
- all_sampling_params: list[SamplingParams] | None = None,
192
- ):
193
- super().__init__(
194
- task_id=task_id,
195
- model_name=model_name,
196
- prompt=prompt,
197
- attempts_left=attempts_left,
198
- status_tracker=status_tracker,
199
- retry_queue=retry_queue,
200
- results_arr=results_arr,
201
- request_timeout=request_timeout,
202
- sampling_params=sampling_params,
203
- pbar=pbar,
204
- callback=callback,
205
- debug=debug,
206
- all_model_names=all_model_names,
207
- all_sampling_params=all_sampling_params,
208
- )
209
- self.model = APIModel.from_registry(model_name)
210
- credentials_file = os.getenv("GOOGLE_APPLICATION_CREDENTIALS")
211
- if not credentials_file:
212
- raise RuntimeError(
213
- "no credentials file found. ensure you provide a google credentials file and point to it with GOOGLE_APPLICATION_CREDENTIALS environment variable."
214
- )
215
- token = get_access_token(credentials_file)
216
- self.project_id = os.getenv("PROJECT_ID")
217
- # sample weighted by region counts
218
- self.region = self.model.sample_region()
219
- assert self.region is not None, "unable to sample region"
220
- self.url = f"https://{self.region}-aiplatform.googleapis.com/v1/projects/{self.project_id}/locations/{self.region}/publishers/google/models/{self.model.name}:generateContent"
221
-
222
- self.request_header = {
223
- "Authorization": f"Bearer {token}",
224
- "Content-Type": "application/json",
225
- }
226
- self.system_message, contents = prompt.to_gemini()
227
- self.request_json = {
228
- "contents": contents,
229
- "generationConfig": {
230
- "stopSequences": [],
231
- "temperature": sampling_params.temperature,
232
- "maxOutputTokens": sampling_params.max_new_tokens,
233
- "topP": sampling_params.top_p,
234
- "topK": None,
235
- },
236
- "safetySettings": [
237
- {"category": category, "threshold": "BLOCK_NONE"}
238
- for category in SAFETY_SETTING_CATEGORIES
239
- ],
240
- }
241
- if sampling_params.json_mode and self.model.supports_json:
242
- self.request_json["generationConfig"]["responseMimeType"] = (
243
- "application/json"
244
- )
245
-
246
- if self.system_message is not None:
247
- self.request_json["systemInstruction"] = (
248
- {"role": "SYSTEM", "parts": [{"text": self.system_message}]},
249
- )
250
-
251
- async def handle_response(self, http_response: ClientResponse) -> APIResponse:
252
- is_error = False
253
- error_message = None
254
- completion = None
255
- input_tokens = None
256
- output_tokens = None
257
- finish_reason = None
258
- data = None
259
- retry_with_different_model = False
260
- give_up_if_no_other_models = False
261
- status_code = http_response.status
262
- mimetype = http_response.headers.get("Content-Type", None)
263
- if status_code >= 200 and status_code < 300:
264
- try:
265
- data = await http_response.json()
266
- if "candidates" not in data:
267
- is_error = True
268
- if "promptFeedback" in data:
269
- error_message = "Prompt rejected. Feedback: " + str(
270
- data["promptFeedback"]
271
- )
272
- else:
273
- error_message = "No candidates in response."
274
- retry_with_different_model = True
275
- give_up_if_no_other_models = True
276
- else:
277
- candidate = data["candidates"][0]
278
- finish_reason = candidate["finishReason"]
279
- if "content" in candidate:
280
- parts = candidate["content"]["parts"]
281
- completion = " ".join([part["text"] for part in parts])
282
- usage = data["usageMetadata"]
283
- input_tokens = usage["promptTokenCount"]
284
- output_tokens = usage["candidatesTokenCount"]
285
- elif finish_reason == "RECITATION":
286
- is_error = True
287
- citations = candidate.get("citationMetadata", {}).get(
288
- "citations", []
289
- )
290
- urls = ",".join(
291
- [citation.get("uri", "") for citation in citations]
292
- )
293
- error_message = "Finish reason RECITATION. URLS: " + urls
294
- retry_with_different_model = True
295
- elif finish_reason == "OTHER":
296
- is_error = True
297
- error_message = "Finish reason OTHER."
298
- retry_with_different_model = True
299
- elif finish_reason == "SAFETY":
300
- is_error = True
301
- error_message = "Finish reason SAFETY."
302
- retry_with_different_model = True
303
- else:
304
- print("Actual structure of response:", data)
305
- is_error = True
306
- error_message = "No content in response."
307
- except Exception as e:
308
- is_error = True
309
- error_message = f"Error calling .json() on response w/ status {status_code}: {e.__class__} {e}"
310
- if isinstance(e, KeyError):
311
- print("Actual structure of response:", data)
312
- elif "json" in (mimetype or "").lower():
313
- is_error = True
314
- data = await http_response.json()
315
- error_message = json.dumps(data)
316
- else:
317
- is_error = True
318
- text = await http_response.text()
319
- error_message = text
320
-
321
- old_region = self.region
322
- if is_error and error_message is not None:
323
- if (
324
- "rate limit" in error_message.lower()
325
- or "temporarily out of capacity" in error_message.lower()
326
- or "exceeded" in error_message.lower()
327
- or
328
- # 429 code
329
- status_code == 429
330
- ):
331
- error_message += " (Rate limit error, triggering cooldown & retrying with different model.)"
332
- self.status_tracker.rate_limit_exceeded()
333
- retry_with_different_model = (
334
- True # if possible, retry with a different model
335
- )
336
- if is_error:
337
- # change the region in case error is due to region unavailability
338
- self.region = self.model.sample_region()
339
- assert self.region is not None, "Unable to sample region"
340
- self.url = f"https://{self.region}-aiplatform.googleapis.com/v1/projects/{self.project_id}/locations/{self.region}/publishers/google/models/{self.model.name}:generateContent"
341
-
342
- return APIResponse(
343
- id=self.task_id,
344
- status_code=status_code,
345
- is_error=is_error,
346
- error_message=error_message,
347
- prompt=self.prompt,
348
- completion=completion,
349
- model_internal=self.model_name,
350
- sampling_params=self.sampling_params,
351
- input_tokens=input_tokens,
352
- output_tokens=output_tokens,
353
- region=old_region,
354
- finish_reason=finish_reason,
355
- retry_with_different_model=retry_with_different_model,
356
- give_up_if_no_other_models=give_up_if_no_other_models,
357
- )
358
-
359
-
360
- # class LlamaEndpointRequest(APIRequestBase):
361
- # raise NotImplementedError("Llama endpoints are not implemented and never will be because Vertex AI sucks ass.")
lm_deluge/util/pdf.py DELETED
@@ -1,45 +0,0 @@
1
- import io
2
-
3
-
4
- def text_from_pdf(pdf: str | bytes | io.BytesIO):
5
- """
6
- Extract text from a PDF. Does NOT use OCR, extracts the literal text.
7
- The source can be:
8
- - A file path (str)
9
- - Bytes of a PDF file
10
- - A BytesIO object containing a PDF file
11
- """
12
- try:
13
- import pymupdf # pyright: ignore
14
- except ImportError:
15
- raise ImportError(
16
- "pymupdf is required to extract text from PDFs. Install lm_deluge[pdf] or lm_deluge[full]."
17
- )
18
- if isinstance(pdf, str):
19
- # It's a file path
20
- doc = pymupdf.open(pdf)
21
- elif isinstance(pdf, (bytes, io.BytesIO)):
22
- # It's bytes or a BytesIO object
23
- if isinstance(pdf, bytes):
24
- pdf = io.BytesIO(pdf)
25
- doc = pymupdf.open(stream=pdf, filetype="pdf")
26
- else:
27
- raise ValueError("Unsupported pdf_source type. Must be str, bytes, or BytesIO.")
28
-
29
- text_content = []
30
- for page in doc:
31
- blocks = page.get_text("blocks", sort=True)
32
- for block in blocks:
33
- # block[4] contains the text content
34
- text_content.append(block[4].strip())
35
- text_content.append("\n") # Add extra newlines between blocks
36
-
37
- # Join all text content with newlines
38
- full_text = "\n".join(text_content).strip()
39
- # Replace multiple consecutive spaces with a single space
40
- full_text = " ".join(full_text.split())
41
- # Clean up any resulting double spaces or newlines
42
- full_text = " ".join([x for x in full_text.split(" ") if x])
43
- full_text = "\n".join([x for x in full_text.split("\n") if x])
44
-
45
- return full_text
@@ -1,127 +0,0 @@
1
- Metadata-Version: 2.4
2
- Name: lm_deluge
3
- Version: 0.0.5
4
- Summary: Python utility for using LLM API models.
5
- Author-email: Benjamin Anderson <ben@trytaylor.ai>
6
- Requires-Python: >=3.10
7
- Description-Content-Type: text/markdown
8
- Requires-Dist: python-dotenv
9
- Requires-Dist: json5
10
- Requires-Dist: PyYAML
11
- Requires-Dist: pandas
12
- Requires-Dist: aiohttp
13
- Requires-Dist: tiktoken
14
- Requires-Dist: xxhash
15
- Requires-Dist: tqdm
16
- Requires-Dist: google-auth
17
- Requires-Dist: requests-aws4auth
18
- Requires-Dist: pydantic
19
- Requires-Dist: bs4
20
- Requires-Dist: lxml
21
- Provides-Extra: image
22
- Requires-Dist: pdf2image; extra == "image"
23
- Requires-Dist: pillow; extra == "image"
24
- Provides-Extra: pdf
25
- Requires-Dist: pdf2image; extra == "pdf"
26
- Requires-Dist: pymupdf; extra == "pdf"
27
- Provides-Extra: translate
28
- Requires-Dist: fasttext-wheel; extra == "translate"
29
- Requires-Dist: fasttext-langdetect; extra == "translate"
30
- Provides-Extra: full
31
- Requires-Dist: pillow; extra == "full"
32
- Requires-Dist: pdf2image; extra == "full"
33
- Requires-Dist: pymupdf; extra == "full"
34
- Requires-Dist: fasttext-wheel; extra == "full"
35
- Requires-Dist: fasttext-langdetect; extra == "full"
36
-
37
- # lm_deluge
38
-
39
- `lm_deluge` is a lightweight helper library for talking to large language model APIs. It wraps several providers under a single interface, handles rate limiting, and exposes a few useful utilities for common NLP tasks.
40
-
41
- ## Features
42
-
43
- - **Unified client** – send prompts to OpenAI‑compatible models, Anthropic, Cohere and Vertex hosted Claude models using the same API.
44
- - **Async or sync** – process prompts concurrently with `process_prompts_async` or run them synchronously with `process_prompts_sync`.
45
- - **Spray across providers** – configure multiple model names with weighting so requests are distributed across different providers.
46
- - **Caching** – optional LevelDB, SQLite or custom caches to avoid duplicate calls.
47
- - **Embeddings and reranking** – helper functions for embedding text and reranking documents via Cohere/OpenAI endpoints.
48
- - **Built‑in tools** – simple `extract`, `translate` and `score_llm` helpers for common patterns.
49
-
50
- ## Installation
51
-
52
- ```bash
53
- pip install lm_deluge
54
- ```
55
-
56
- The package relies on environment variables for API keys. Typical variables include `OPENAI_API_KEY`, `ANTHROPIC_API_KEY`, `COHERE_API_KEY`, `META_API_KEY` (for Llama) and `GOOGLE_APPLICATION_CREDENTIALS` for Vertex.
57
-
58
- ## Quickstart
59
-
60
- ```python
61
- from lm_deluge import LLMClient
62
-
63
- client = LLMClient.basic(
64
- model=["gpt-4o-mini"], # any model id from lm_deluge.models.registry
65
- temperature=0.2,
66
- max_new_tokens=256,
67
- )
68
-
69
- resp = client.process_prompts_sync(["Hello, world!"]) # returns list[APIResponse]
70
- print(resp[0].completion)
71
- ```
72
-
73
- ### Asynchronous usage
74
-
75
- ```python
76
- import asyncio
77
-
78
- async def main():
79
- responses = await client.process_prompts_async(
80
- ["an async call"],
81
- return_completions_only=True,
82
- )
83
- print(responses[0])
84
-
85
- asyncio.run(main())
86
- ```
87
-
88
- ### Distributing requests across models
89
-
90
- You can provide multiple `model_names` and optional `model_weights` when creating an `LLMClient`. Each prompt will be sent to one of the models based on those weights.
91
-
92
- ```python
93
- client = LLMClient(
94
- model_names=["gpt-4o-mini", "claude-haiku-anthropic"],
95
- model_weights="rate_limit", # or a list like [0.7, 0.3]
96
- max_requests_per_minute=5000,
97
- max_tokens_per_minute=1_000_000,
98
- max_concurrent_requests=100,
99
- )
100
- ```
101
-
102
- ### Provider specific notes
103
-
104
- - **OpenAI and compatible providers** – set `OPENAI_API_KEY`. Model ids in the registry include OpenAI models as well as Meta Llama, Grok and many others that expose OpenAI style APIs.
105
- - **Anthropic** – set `ANTHROPIC_API_KEY`. Use model ids such as `claude-haiku-anthropic` or `claude-sonnet-anthropic`.
106
- - **Cohere** – set `COHERE_API_KEY`. Models like `command-r` are available.
107
- - **Vertex Claude** – set `GOOGLE_APPLICATION_CREDENTIALS` and `PROJECT_ID`. Use a model id such as `claude-sonnet-vertex`.
108
-
109
- The [models.py](src/lm_deluge/models.py) file lists every supported model and the required environment variable.
110
-
111
- ## Built‑in tools
112
-
113
- The `lm_deluge.llm_tools` package exposes a few helper functions:
114
-
115
- - `extract` – structure text or images into a Pydantic model based on a schema.
116
- - `translate` – translate a list of strings to English if needed.
117
- - `score_llm` – simple yes/no style scoring with optional log probability output.
118
-
119
- Embeddings (`embed.embed_parallel_async`) and document reranking (`rerank.rerank_parallel_async`) are also provided.
120
-
121
- ## Caching results
122
-
123
- `lm_deluge.cache` includes LevelDB, SQLite and custom dictionary based caches. Pass an instance via `LLMClient(..., cache=my_cache)` and previously seen prompts will not be re‑sent.
124
-
125
- ## Development notes
126
-
127
- Models and costs are defined in [src/lm_deluge/models.py](src/lm_deluge/models.py). Conversations are built using the `Conversation` and `Message` helpers in [src/lm_deluge/prompt.py](src/lm_deluge/prompt.py), which also support images.