lm-deluge 0.0.3__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of lm-deluge might be problematic. Click here for more details.

Files changed (43) hide show
  1. lm_deluge-0.0.3/PKG-INFO +127 -0
  2. lm_deluge-0.0.3/README.md +91 -0
  3. lm_deluge-0.0.3/pyproject.toml +40 -0
  4. lm_deluge-0.0.3/setup.cfg +4 -0
  5. lm_deluge-0.0.3/src/lm_deluge/__init__.py +6 -0
  6. lm_deluge-0.0.3/src/lm_deluge/api_requests/__init__.py +3 -0
  7. lm_deluge-0.0.3/src/lm_deluge/api_requests/anthropic.py +177 -0
  8. lm_deluge-0.0.3/src/lm_deluge/api_requests/base.py +375 -0
  9. lm_deluge-0.0.3/src/lm_deluge/api_requests/cohere.py +138 -0
  10. lm_deluge-0.0.3/src/lm_deluge/api_requests/common.py +18 -0
  11. lm_deluge-0.0.3/src/lm_deluge/api_requests/deprecated/bedrock.py +288 -0
  12. lm_deluge-0.0.3/src/lm_deluge/api_requests/deprecated/deepseek.py +118 -0
  13. lm_deluge-0.0.3/src/lm_deluge/api_requests/deprecated/mistral.py +120 -0
  14. lm_deluge-0.0.3/src/lm_deluge/api_requests/google.py +0 -0
  15. lm_deluge-0.0.3/src/lm_deluge/api_requests/openai.py +145 -0
  16. lm_deluge-0.0.3/src/lm_deluge/api_requests/vertex.py +365 -0
  17. lm_deluge-0.0.3/src/lm_deluge/cache.py +144 -0
  18. lm_deluge-0.0.3/src/lm_deluge/client.py +760 -0
  19. lm_deluge-0.0.3/src/lm_deluge/embed.py +392 -0
  20. lm_deluge-0.0.3/src/lm_deluge/errors.py +8 -0
  21. lm_deluge-0.0.3/src/lm_deluge/gemini_limits.py +65 -0
  22. lm_deluge-0.0.3/src/lm_deluge/image.py +200 -0
  23. lm_deluge-0.0.3/src/lm_deluge/llm_tools/__init__.py +11 -0
  24. lm_deluge-0.0.3/src/lm_deluge/llm_tools/extract.py +111 -0
  25. lm_deluge-0.0.3/src/lm_deluge/llm_tools/score.py +71 -0
  26. lm_deluge-0.0.3/src/lm_deluge/llm_tools/translate.py +44 -0
  27. lm_deluge-0.0.3/src/lm_deluge/models.py +957 -0
  28. lm_deluge-0.0.3/src/lm_deluge/prompt.py +355 -0
  29. lm_deluge-0.0.3/src/lm_deluge/rerank.py +338 -0
  30. lm_deluge-0.0.3/src/lm_deluge/sampling_params.py +25 -0
  31. lm_deluge-0.0.3/src/lm_deluge/tool.py +106 -0
  32. lm_deluge-0.0.3/src/lm_deluge/tracker.py +12 -0
  33. lm_deluge-0.0.3/src/lm_deluge/util/json.py +167 -0
  34. lm_deluge-0.0.3/src/lm_deluge/util/logprobs.py +446 -0
  35. lm_deluge-0.0.3/src/lm_deluge/util/pdf.py +45 -0
  36. lm_deluge-0.0.3/src/lm_deluge/util/validation.py +46 -0
  37. lm_deluge-0.0.3/src/lm_deluge/util/xml.py +291 -0
  38. lm_deluge-0.0.3/src/lm_deluge.egg-info/PKG-INFO +127 -0
  39. lm_deluge-0.0.3/src/lm_deluge.egg-info/SOURCES.txt +41 -0
  40. lm_deluge-0.0.3/src/lm_deluge.egg-info/dependency_links.txt +1 -0
  41. lm_deluge-0.0.3/src/lm_deluge.egg-info/requires.txt +32 -0
  42. lm_deluge-0.0.3/src/lm_deluge.egg-info/top_level.txt +1 -0
  43. lm_deluge-0.0.3/tests/test_heal_json.py +65 -0
@@ -0,0 +1,127 @@
1
+ Metadata-Version: 2.4
2
+ Name: lm_deluge
3
+ Version: 0.0.3
4
+ Summary: Python utility for using LLM API models.
5
+ Author-email: Benjamin Anderson <ben@trytaylor.ai>
6
+ Requires-Python: >=3.9
7
+ Description-Content-Type: text/markdown
8
+ Requires-Dist: python-dotenv
9
+ Requires-Dist: json5
10
+ Requires-Dist: PyYAML
11
+ Requires-Dist: pandas
12
+ Requires-Dist: aiohttp
13
+ Requires-Dist: tiktoken
14
+ Requires-Dist: xxhash
15
+ Requires-Dist: tqdm
16
+ Requires-Dist: google-auth
17
+ Requires-Dist: requests-aws4auth
18
+ Requires-Dist: pydantic
19
+ Requires-Dist: bs4
20
+ Requires-Dist: lxml
21
+ Provides-Extra: image
22
+ Requires-Dist: pdf2image; extra == "image"
23
+ Requires-Dist: pillow; extra == "image"
24
+ Provides-Extra: pdf
25
+ Requires-Dist: pdf2image; extra == "pdf"
26
+ Requires-Dist: pymupdf; extra == "pdf"
27
+ Provides-Extra: translate
28
+ Requires-Dist: fasttext-wheel; extra == "translate"
29
+ Requires-Dist: fasttext-langdetect; extra == "translate"
30
+ Provides-Extra: full
31
+ Requires-Dist: pillow; extra == "full"
32
+ Requires-Dist: pdf2image; extra == "full"
33
+ Requires-Dist: pymupdf; extra == "full"
34
+ Requires-Dist: fasttext-wheel; extra == "full"
35
+ Requires-Dist: fasttext-langdetect; extra == "full"
36
+
37
+ # lm_deluge
38
+
39
+ `lm_deluge` is a lightweight helper library for talking to large language model APIs. It wraps several providers under a single interface, handles rate limiting, and exposes a few useful utilities for common NLP tasks.
40
+
41
+ ## Features
42
+
43
+ - **Unified client** – send prompts to OpenAI‑compatible models, Anthropic, Cohere and Vertex hosted Claude models using the same API.
44
+ - **Async or sync** – process prompts concurrently with `process_prompts_async` or run them synchronously with `process_prompts_sync`.
45
+ - **Spray across providers** – configure multiple model names with weighting so requests are distributed across different providers.
46
+ - **Caching** – optional LevelDB, SQLite or custom caches to avoid duplicate calls.
47
+ - **Embeddings and reranking** – helper functions for embedding text and reranking documents via Cohere/OpenAI endpoints.
48
+ - **Built‑in tools** – simple `extract`, `translate` and `score_llm` helpers for common patterns.
49
+
50
+ ## Installation
51
+
52
+ ```bash
53
+ pip install lm_deluge
54
+ ```
55
+
56
+ The package relies on environment variables for API keys. Typical variables include `OPENAI_API_KEY`, `ANTHROPIC_API_KEY`, `COHERE_API_KEY`, `META_API_KEY` (for Llama) and `GOOGLE_APPLICATION_CREDENTIALS` for Vertex.
57
+
58
+ ## Quickstart
59
+
60
+ ```python
61
+ from lm_deluge import LLMClient
62
+
63
+ client = LLMClient.basic(
64
+ model=["gpt-4o-mini"], # any model id from lm_deluge.models.registry
65
+ temperature=0.2,
66
+ max_new_tokens=256,
67
+ )
68
+
69
+ resp = client.process_prompts_sync(["Hello, world!"]) # returns list[APIResponse]
70
+ print(resp[0].completion)
71
+ ```
72
+
73
+ ### Asynchronous usage
74
+
75
+ ```python
76
+ import asyncio
77
+
78
+ async def main():
79
+ responses = await client.process_prompts_async(
80
+ ["an async call"],
81
+ return_completions_only=True,
82
+ )
83
+ print(responses[0])
84
+
85
+ asyncio.run(main())
86
+ ```
87
+
88
+ ### Distributing requests across models
89
+
90
+ You can provide multiple `model_names` and optional `model_weights` when creating an `LLMClient`. Each prompt will be sent to one of the models based on those weights.
91
+
92
+ ```python
93
+ client = LLMClient(
94
+ model_names=["gpt-4o-mini", "claude-haiku-anthropic"],
95
+ model_weights="rate_limit", # or a list like [0.7, 0.3]
96
+ max_requests_per_minute=5000,
97
+ max_tokens_per_minute=1_000_000,
98
+ max_concurrent_requests=100,
99
+ )
100
+ ```
101
+
102
+ ### Provider specific notes
103
+
104
+ - **OpenAI and compatible providers** – set `OPENAI_API_KEY`. Model ids in the registry include OpenAI models as well as Meta Llama, Grok and many others that expose OpenAI style APIs.
105
+ - **Anthropic** – set `ANTHROPIC_API_KEY`. Use model ids such as `claude-haiku-anthropic` or `claude-sonnet-anthropic`.
106
+ - **Cohere** – set `COHERE_API_KEY`. Models like `command-r` are available.
107
+ - **Vertex Claude** – set `GOOGLE_APPLICATION_CREDENTIALS` and `PROJECT_ID`. Use a model id such as `claude-sonnet-vertex`.
108
+
109
+ The [models.py](src/lm_deluge/models.py) file lists every supported model and the required environment variable.
110
+
111
+ ## Built‑in tools
112
+
113
+ The `lm_deluge.llm_tools` package exposes a few helper functions:
114
+
115
+ - `extract` – structure text or images into a Pydantic model based on a schema.
116
+ - `translate` – translate a list of strings to English if needed.
117
+ - `score_llm` – simple yes/no style scoring with optional log probability output.
118
+
119
+ Embeddings (`embed.embed_parallel_async`) and document reranking (`rerank.rerank_parallel_async`) are also provided.
120
+
121
+ ## Caching results
122
+
123
+ `lm_deluge.cache` includes LevelDB, SQLite and custom dictionary based caches. Pass an instance via `LLMClient(..., cache=my_cache)` and previously seen prompts will not be re‑sent.
124
+
125
+ ## Development notes
126
+
127
+ Models and costs are defined in [src/lm_deluge/models.py](src/lm_deluge/models.py). Conversations are built using the `Conversation` and `Message` helpers in [src/lm_deluge/prompt.py](src/lm_deluge/prompt.py), which also support images.
@@ -0,0 +1,91 @@
1
+ # lm_deluge
2
+
3
+ `lm_deluge` is a lightweight helper library for talking to large language model APIs. It wraps several providers under a single interface, handles rate limiting, and exposes a few useful utilities for common NLP tasks.
4
+
5
+ ## Features
6
+
7
+ - **Unified client** – send prompts to OpenAI‑compatible models, Anthropic, Cohere and Vertex hosted Claude models using the same API.
8
+ - **Async or sync** – process prompts concurrently with `process_prompts_async` or run them synchronously with `process_prompts_sync`.
9
+ - **Spray across providers** – configure multiple model names with weighting so requests are distributed across different providers.
10
+ - **Caching** – optional LevelDB, SQLite or custom caches to avoid duplicate calls.
11
+ - **Embeddings and reranking** – helper functions for embedding text and reranking documents via Cohere/OpenAI endpoints.
12
+ - **Built‑in tools** – simple `extract`, `translate` and `score_llm` helpers for common patterns.
13
+
14
+ ## Installation
15
+
16
+ ```bash
17
+ pip install lm_deluge
18
+ ```
19
+
20
+ The package relies on environment variables for API keys. Typical variables include `OPENAI_API_KEY`, `ANTHROPIC_API_KEY`, `COHERE_API_KEY`, `META_API_KEY` (for Llama) and `GOOGLE_APPLICATION_CREDENTIALS` for Vertex.
21
+
22
+ ## Quickstart
23
+
24
+ ```python
25
+ from lm_deluge import LLMClient
26
+
27
+ client = LLMClient.basic(
28
+ model=["gpt-4o-mini"], # any model id from lm_deluge.models.registry
29
+ temperature=0.2,
30
+ max_new_tokens=256,
31
+ )
32
+
33
+ resp = client.process_prompts_sync(["Hello, world!"]) # returns list[APIResponse]
34
+ print(resp[0].completion)
35
+ ```
36
+
37
+ ### Asynchronous usage
38
+
39
+ ```python
40
+ import asyncio
41
+
42
+ async def main():
43
+ responses = await client.process_prompts_async(
44
+ ["an async call"],
45
+ return_completions_only=True,
46
+ )
47
+ print(responses[0])
48
+
49
+ asyncio.run(main())
50
+ ```
51
+
52
+ ### Distributing requests across models
53
+
54
+ You can provide multiple `model_names` and optional `model_weights` when creating an `LLMClient`. Each prompt will be sent to one of the models based on those weights.
55
+
56
+ ```python
57
+ client = LLMClient(
58
+ model_names=["gpt-4o-mini", "claude-haiku-anthropic"],
59
+ model_weights="rate_limit", # or a list like [0.7, 0.3]
60
+ max_requests_per_minute=5000,
61
+ max_tokens_per_minute=1_000_000,
62
+ max_concurrent_requests=100,
63
+ )
64
+ ```
65
+
66
+ ### Provider specific notes
67
+
68
+ - **OpenAI and compatible providers** – set `OPENAI_API_KEY`. Model ids in the registry include OpenAI models as well as Meta Llama, Grok and many others that expose OpenAI style APIs.
69
+ - **Anthropic** – set `ANTHROPIC_API_KEY`. Use model ids such as `claude-haiku-anthropic` or `claude-sonnet-anthropic`.
70
+ - **Cohere** – set `COHERE_API_KEY`. Models like `command-r` are available.
71
+ - **Vertex Claude** – set `GOOGLE_APPLICATION_CREDENTIALS` and `PROJECT_ID`. Use a model id such as `claude-sonnet-vertex`.
72
+
73
+ The [models.py](src/lm_deluge/models.py) file lists every supported model and the required environment variable.
74
+
75
+ ## Built‑in tools
76
+
77
+ The `lm_deluge.llm_tools` package exposes a few helper functions:
78
+
79
+ - `extract` – structure text or images into a Pydantic model based on a schema.
80
+ - `translate` – translate a list of strings to English if needed.
81
+ - `score_llm` – simple yes/no style scoring with optional log probability output.
82
+
83
+ Embeddings (`embed.embed_parallel_async`) and document reranking (`rerank.rerank_parallel_async`) are also provided.
84
+
85
+ ## Caching results
86
+
87
+ `lm_deluge.cache` includes LevelDB, SQLite and custom dictionary based caches. Pass an instance via `LLMClient(..., cache=my_cache)` and previously seen prompts will not be re‑sent.
88
+
89
+ ## Development notes
90
+
91
+ Models and costs are defined in [src/lm_deluge/models.py](src/lm_deluge/models.py). Conversations are built using the `Conversation` and `Message` helpers in [src/lm_deluge/prompt.py](src/lm_deluge/prompt.py), which also support images.
@@ -0,0 +1,40 @@
1
+ [build-system]
2
+ requires = ["setuptools", "wheel"]
3
+
4
+ [project]
5
+ name = "lm_deluge"
6
+ version = "0.0.3"
7
+ authors = [{ name = "Benjamin Anderson", email = "ben@trytaylor.ai" }]
8
+ description = "Python utility for using LLM API models."
9
+ readme = "README.md"
10
+ requires-python = ">=3.9"
11
+ keywords = []
12
+ license = { text = "" }
13
+ classifiers = []
14
+ dependencies = [
15
+ "python-dotenv",
16
+ "json5",
17
+ "PyYAML",
18
+ "pandas",
19
+ "aiohttp",
20
+ "tiktoken",
21
+ "xxhash",
22
+ "tqdm",
23
+ "google-auth",
24
+ "requests-aws4auth",
25
+ "pydantic",
26
+ "bs4",
27
+ "lxml",
28
+ ]
29
+
30
+ [project.optional-dependencies]
31
+ image = ["pdf2image", "pillow"]
32
+ pdf = ["pdf2image", "pymupdf"]
33
+ translate = ["fasttext-wheel", "fasttext-langdetect"]
34
+ full = [
35
+ "pillow",
36
+ "pdf2image",
37
+ "pymupdf",
38
+ "fasttext-wheel",
39
+ "fasttext-langdetect",
40
+ ]
@@ -0,0 +1,4 @@
1
+ [egg_info]
2
+ tag_build =
3
+ tag_date = 0
4
+
@@ -0,0 +1,6 @@
1
+ from .client import LLMClient, SamplingParams, APIResponse
2
+ import dotenv
3
+
4
+ dotenv.load_dotenv()
5
+
6
+ __all__ = ["LLMClient", "SamplingParams", "APIResponse"]
@@ -0,0 +1,3 @@
1
+ from .base import create_api_request
2
+
3
+ __all__ = ["create_api_request"]
@@ -0,0 +1,177 @@
1
+ import asyncio
2
+ from aiohttp import ClientResponse
3
+ import json
4
+ import os
5
+ import warnings
6
+ import time
7
+ from tqdm import tqdm
8
+ from typing import Optional, Callable
9
+
10
+ from lm_deluge.prompt import Conversation
11
+ from .base import APIRequestBase, APIResponse
12
+
13
+ from ..tracker import StatusTracker
14
+ from ..sampling_params import SamplingParams
15
+ from ..models import APIModel
16
+
17
+
18
+ class AnthropicRequest(APIRequestBase):
19
+ def __init__(
20
+ self,
21
+ task_id: int,
22
+ # should always be 'role', 'content' keys.
23
+ # internal logic should handle translating to specific API format
24
+ model_name: str, # must correspond to registry
25
+ prompt: Conversation,
26
+ attempts_left: int,
27
+ status_tracker: StatusTracker,
28
+ retry_queue: asyncio.Queue,
29
+ results_arr: list,
30
+ request_timeout: int = 30,
31
+ sampling_params: SamplingParams = SamplingParams(),
32
+ pbar: Optional[tqdm] = None,
33
+ callback: Optional[Callable] = None,
34
+ debug: bool = False,
35
+ # for retries
36
+ all_model_names: list[str] | None = None,
37
+ all_sampling_params: list[SamplingParams] | None = None,
38
+ ):
39
+ super().__init__(
40
+ task_id=task_id,
41
+ model_name=model_name,
42
+ prompt=prompt,
43
+ attempts_left=attempts_left,
44
+ status_tracker=status_tracker,
45
+ retry_queue=retry_queue,
46
+ results_arr=results_arr,
47
+ request_timeout=request_timeout,
48
+ sampling_params=sampling_params,
49
+ pbar=pbar,
50
+ callback=callback,
51
+ debug=debug,
52
+ all_model_names=all_model_names,
53
+ all_sampling_params=all_sampling_params,
54
+ )
55
+ self.model = APIModel.from_registry(model_name)
56
+ self.url = f"{self.model.api_base}/messages"
57
+
58
+ self.system_message, messages = prompt.to_anthropic()
59
+ self.request_header = {
60
+ "x-api-key": os.getenv(self.model.api_key_env_var),
61
+ "anthropic-version": "2023-06-01",
62
+ "content-type": "application/json",
63
+ }
64
+
65
+ self.request_json = {
66
+ "model": self.model.name,
67
+ "messages": messages,
68
+ "temperature": self.sampling_params.temperature,
69
+ "top_p": self.sampling_params.top_p,
70
+ "max_tokens": self.sampling_params.max_new_tokens,
71
+ }
72
+ # handle thinking
73
+ if self.model.reasoning_model:
74
+ if sampling_params.reasoning_effort:
75
+ # translate reasoning effort of low, medium, high to budget tokens
76
+ budget = {"low": 1024, "medium": 4096, "high": 16384}.get(
77
+ sampling_params.reasoning_effort
78
+ )
79
+ self.request_json["thinking"] = {
80
+ "type": "enabled",
81
+ "budget_tokens": budget,
82
+ }
83
+ self.request_json.pop("top_p")
84
+ self.request_json["temperature"] = 1.0
85
+ self.request_json["max_tokens"] += (
86
+ budget # assume max tokens is max completion tokens
87
+ )
88
+ else:
89
+ # no thinking
90
+ self.request_json["thinking"] = {"type": "disabled"}
91
+ else:
92
+ if sampling_params.reasoning_effort:
93
+ warnings.warn(
94
+ f"Ignoring reasoning_effort param for non-reasoning model: {model_name}"
95
+ )
96
+ if self.system_message is not None:
97
+ self.request_json["system"] = self.system_message
98
+
99
+ # print("request data:", self.request_json)
100
+
101
+ async def handle_response(self, http_response: ClientResponse) -> APIResponse:
102
+ is_error = False
103
+ error_message = None
104
+ thinking = None
105
+ completion = None
106
+ input_tokens = None
107
+ output_tokens = None
108
+ status_code = http_response.status
109
+ mimetype = http_response.headers.get("Content-Type", None)
110
+ rate_limits = {}
111
+ for header in [
112
+ "anthropic-ratelimit-requests-limit",
113
+ "anthropic-ratelimit-requests-remaining",
114
+ "anthropic-ratelimit-requests-reset",
115
+ "anthropic-ratelimit-tokens-limit",
116
+ "anthropic-ratelimit-tokens-remaining",
117
+ "anthropic-ratelimit-tokens-reset",
118
+ ]:
119
+ rate_limits[header] = http_response.headers.get(header, None)
120
+ if self.debug:
121
+ print(f"Rate limits: {rate_limits}")
122
+ if status_code >= 200 and status_code < 300:
123
+ try:
124
+ data = await http_response.json()
125
+ print("response data:", data)
126
+ content = data["content"] # [0]["text"]
127
+ print("content is length", len(content))
128
+ for item in content:
129
+ if item["type"] == "text":
130
+ completion = item["text"]
131
+ elif item["type"] == "thinking":
132
+ thinking = item["thinking"]
133
+ elif item["type"] == "tool_use":
134
+ continue # TODO: implement and report tool use
135
+ input_tokens = data["usage"]["input_tokens"]
136
+ output_tokens = data["usage"]["output_tokens"]
137
+ except Exception as e:
138
+ is_error = True
139
+ error_message = (
140
+ f"Error calling .json() on response w/ status {status_code}: {e}"
141
+ )
142
+ elif mimetype and "json" in mimetype.lower():
143
+ is_error = True # expected status is 200, otherwise it's an error
144
+ data = await http_response.json()
145
+ error_message = json.dumps(data)
146
+
147
+ else:
148
+ is_error = True
149
+ text = await http_response.text()
150
+ error_message = text
151
+
152
+ # handle special kinds of errors. TODO: make sure these are correct for anthropic
153
+ if is_error and error_message is not None:
154
+ if (
155
+ "rate limit" in error_message.lower()
156
+ or "overloaded" in error_message.lower()
157
+ ):
158
+ error_message += " (Rate limit error, triggering cooldown.)"
159
+ self.status_tracker.time_of_last_rate_limit_error = time.time()
160
+ self.status_tracker.num_rate_limit_errors += 1
161
+ if "context length" in error_message:
162
+ error_message += " (Context length exceeded, set retries to 0.)"
163
+ self.attempts_left = 0
164
+
165
+ return APIResponse(
166
+ id=self.task_id,
167
+ status_code=status_code,
168
+ is_error=is_error,
169
+ error_message=error_message,
170
+ prompt=self.prompt,
171
+ completion=completion,
172
+ thinking=thinking,
173
+ model_internal=self.model_name,
174
+ sampling_params=self.sampling_params,
175
+ input_tokens=input_tokens,
176
+ output_tokens=output_tokens,
177
+ )