lm-deluge 0.0.3__tar.gz
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of lm-deluge might be problematic. Click here for more details.
- lm_deluge-0.0.3/PKG-INFO +127 -0
- lm_deluge-0.0.3/README.md +91 -0
- lm_deluge-0.0.3/pyproject.toml +40 -0
- lm_deluge-0.0.3/setup.cfg +4 -0
- lm_deluge-0.0.3/src/lm_deluge/__init__.py +6 -0
- lm_deluge-0.0.3/src/lm_deluge/api_requests/__init__.py +3 -0
- lm_deluge-0.0.3/src/lm_deluge/api_requests/anthropic.py +177 -0
- lm_deluge-0.0.3/src/lm_deluge/api_requests/base.py +375 -0
- lm_deluge-0.0.3/src/lm_deluge/api_requests/cohere.py +138 -0
- lm_deluge-0.0.3/src/lm_deluge/api_requests/common.py +18 -0
- lm_deluge-0.0.3/src/lm_deluge/api_requests/deprecated/bedrock.py +288 -0
- lm_deluge-0.0.3/src/lm_deluge/api_requests/deprecated/deepseek.py +118 -0
- lm_deluge-0.0.3/src/lm_deluge/api_requests/deprecated/mistral.py +120 -0
- lm_deluge-0.0.3/src/lm_deluge/api_requests/google.py +0 -0
- lm_deluge-0.0.3/src/lm_deluge/api_requests/openai.py +145 -0
- lm_deluge-0.0.3/src/lm_deluge/api_requests/vertex.py +365 -0
- lm_deluge-0.0.3/src/lm_deluge/cache.py +144 -0
- lm_deluge-0.0.3/src/lm_deluge/client.py +760 -0
- lm_deluge-0.0.3/src/lm_deluge/embed.py +392 -0
- lm_deluge-0.0.3/src/lm_deluge/errors.py +8 -0
- lm_deluge-0.0.3/src/lm_deluge/gemini_limits.py +65 -0
- lm_deluge-0.0.3/src/lm_deluge/image.py +200 -0
- lm_deluge-0.0.3/src/lm_deluge/llm_tools/__init__.py +11 -0
- lm_deluge-0.0.3/src/lm_deluge/llm_tools/extract.py +111 -0
- lm_deluge-0.0.3/src/lm_deluge/llm_tools/score.py +71 -0
- lm_deluge-0.0.3/src/lm_deluge/llm_tools/translate.py +44 -0
- lm_deluge-0.0.3/src/lm_deluge/models.py +957 -0
- lm_deluge-0.0.3/src/lm_deluge/prompt.py +355 -0
- lm_deluge-0.0.3/src/lm_deluge/rerank.py +338 -0
- lm_deluge-0.0.3/src/lm_deluge/sampling_params.py +25 -0
- lm_deluge-0.0.3/src/lm_deluge/tool.py +106 -0
- lm_deluge-0.0.3/src/lm_deluge/tracker.py +12 -0
- lm_deluge-0.0.3/src/lm_deluge/util/json.py +167 -0
- lm_deluge-0.0.3/src/lm_deluge/util/logprobs.py +446 -0
- lm_deluge-0.0.3/src/lm_deluge/util/pdf.py +45 -0
- lm_deluge-0.0.3/src/lm_deluge/util/validation.py +46 -0
- lm_deluge-0.0.3/src/lm_deluge/util/xml.py +291 -0
- lm_deluge-0.0.3/src/lm_deluge.egg-info/PKG-INFO +127 -0
- lm_deluge-0.0.3/src/lm_deluge.egg-info/SOURCES.txt +41 -0
- lm_deluge-0.0.3/src/lm_deluge.egg-info/dependency_links.txt +1 -0
- lm_deluge-0.0.3/src/lm_deluge.egg-info/requires.txt +32 -0
- lm_deluge-0.0.3/src/lm_deluge.egg-info/top_level.txt +1 -0
- lm_deluge-0.0.3/tests/test_heal_json.py +65 -0
lm_deluge-0.0.3/PKG-INFO
ADDED
|
@@ -0,0 +1,127 @@
|
|
|
1
|
+
Metadata-Version: 2.4
|
|
2
|
+
Name: lm_deluge
|
|
3
|
+
Version: 0.0.3
|
|
4
|
+
Summary: Python utility for using LLM API models.
|
|
5
|
+
Author-email: Benjamin Anderson <ben@trytaylor.ai>
|
|
6
|
+
Requires-Python: >=3.9
|
|
7
|
+
Description-Content-Type: text/markdown
|
|
8
|
+
Requires-Dist: python-dotenv
|
|
9
|
+
Requires-Dist: json5
|
|
10
|
+
Requires-Dist: PyYAML
|
|
11
|
+
Requires-Dist: pandas
|
|
12
|
+
Requires-Dist: aiohttp
|
|
13
|
+
Requires-Dist: tiktoken
|
|
14
|
+
Requires-Dist: xxhash
|
|
15
|
+
Requires-Dist: tqdm
|
|
16
|
+
Requires-Dist: google-auth
|
|
17
|
+
Requires-Dist: requests-aws4auth
|
|
18
|
+
Requires-Dist: pydantic
|
|
19
|
+
Requires-Dist: bs4
|
|
20
|
+
Requires-Dist: lxml
|
|
21
|
+
Provides-Extra: image
|
|
22
|
+
Requires-Dist: pdf2image; extra == "image"
|
|
23
|
+
Requires-Dist: pillow; extra == "image"
|
|
24
|
+
Provides-Extra: pdf
|
|
25
|
+
Requires-Dist: pdf2image; extra == "pdf"
|
|
26
|
+
Requires-Dist: pymupdf; extra == "pdf"
|
|
27
|
+
Provides-Extra: translate
|
|
28
|
+
Requires-Dist: fasttext-wheel; extra == "translate"
|
|
29
|
+
Requires-Dist: fasttext-langdetect; extra == "translate"
|
|
30
|
+
Provides-Extra: full
|
|
31
|
+
Requires-Dist: pillow; extra == "full"
|
|
32
|
+
Requires-Dist: pdf2image; extra == "full"
|
|
33
|
+
Requires-Dist: pymupdf; extra == "full"
|
|
34
|
+
Requires-Dist: fasttext-wheel; extra == "full"
|
|
35
|
+
Requires-Dist: fasttext-langdetect; extra == "full"
|
|
36
|
+
|
|
37
|
+
# lm_deluge
|
|
38
|
+
|
|
39
|
+
`lm_deluge` is a lightweight helper library for talking to large language model APIs. It wraps several providers under a single interface, handles rate limiting, and exposes a few useful utilities for common NLP tasks.
|
|
40
|
+
|
|
41
|
+
## Features
|
|
42
|
+
|
|
43
|
+
- **Unified client** – send prompts to OpenAI‑compatible models, Anthropic, Cohere and Vertex hosted Claude models using the same API.
|
|
44
|
+
- **Async or sync** – process prompts concurrently with `process_prompts_async` or run them synchronously with `process_prompts_sync`.
|
|
45
|
+
- **Spray across providers** – configure multiple model names with weighting so requests are distributed across different providers.
|
|
46
|
+
- **Caching** – optional LevelDB, SQLite or custom caches to avoid duplicate calls.
|
|
47
|
+
- **Embeddings and reranking** – helper functions for embedding text and reranking documents via Cohere/OpenAI endpoints.
|
|
48
|
+
- **Built‑in tools** – simple `extract`, `translate` and `score_llm` helpers for common patterns.
|
|
49
|
+
|
|
50
|
+
## Installation
|
|
51
|
+
|
|
52
|
+
```bash
|
|
53
|
+
pip install lm_deluge
|
|
54
|
+
```
|
|
55
|
+
|
|
56
|
+
The package relies on environment variables for API keys. Typical variables include `OPENAI_API_KEY`, `ANTHROPIC_API_KEY`, `COHERE_API_KEY`, `META_API_KEY` (for Llama) and `GOOGLE_APPLICATION_CREDENTIALS` for Vertex.
|
|
57
|
+
|
|
58
|
+
## Quickstart
|
|
59
|
+
|
|
60
|
+
```python
|
|
61
|
+
from lm_deluge import LLMClient
|
|
62
|
+
|
|
63
|
+
client = LLMClient.basic(
|
|
64
|
+
model=["gpt-4o-mini"], # any model id from lm_deluge.models.registry
|
|
65
|
+
temperature=0.2,
|
|
66
|
+
max_new_tokens=256,
|
|
67
|
+
)
|
|
68
|
+
|
|
69
|
+
resp = client.process_prompts_sync(["Hello, world!"]) # returns list[APIResponse]
|
|
70
|
+
print(resp[0].completion)
|
|
71
|
+
```
|
|
72
|
+
|
|
73
|
+
### Asynchronous usage
|
|
74
|
+
|
|
75
|
+
```python
|
|
76
|
+
import asyncio
|
|
77
|
+
|
|
78
|
+
async def main():
|
|
79
|
+
responses = await client.process_prompts_async(
|
|
80
|
+
["an async call"],
|
|
81
|
+
return_completions_only=True,
|
|
82
|
+
)
|
|
83
|
+
print(responses[0])
|
|
84
|
+
|
|
85
|
+
asyncio.run(main())
|
|
86
|
+
```
|
|
87
|
+
|
|
88
|
+
### Distributing requests across models
|
|
89
|
+
|
|
90
|
+
You can provide multiple `model_names` and optional `model_weights` when creating an `LLMClient`. Each prompt will be sent to one of the models based on those weights.
|
|
91
|
+
|
|
92
|
+
```python
|
|
93
|
+
client = LLMClient(
|
|
94
|
+
model_names=["gpt-4o-mini", "claude-haiku-anthropic"],
|
|
95
|
+
model_weights="rate_limit", # or a list like [0.7, 0.3]
|
|
96
|
+
max_requests_per_minute=5000,
|
|
97
|
+
max_tokens_per_minute=1_000_000,
|
|
98
|
+
max_concurrent_requests=100,
|
|
99
|
+
)
|
|
100
|
+
```
|
|
101
|
+
|
|
102
|
+
### Provider specific notes
|
|
103
|
+
|
|
104
|
+
- **OpenAI and compatible providers** – set `OPENAI_API_KEY`. Model ids in the registry include OpenAI models as well as Meta Llama, Grok and many others that expose OpenAI style APIs.
|
|
105
|
+
- **Anthropic** – set `ANTHROPIC_API_KEY`. Use model ids such as `claude-haiku-anthropic` or `claude-sonnet-anthropic`.
|
|
106
|
+
- **Cohere** – set `COHERE_API_KEY`. Models like `command-r` are available.
|
|
107
|
+
- **Vertex Claude** – set `GOOGLE_APPLICATION_CREDENTIALS` and `PROJECT_ID`. Use a model id such as `claude-sonnet-vertex`.
|
|
108
|
+
|
|
109
|
+
The [models.py](src/lm_deluge/models.py) file lists every supported model and the required environment variable.
|
|
110
|
+
|
|
111
|
+
## Built‑in tools
|
|
112
|
+
|
|
113
|
+
The `lm_deluge.llm_tools` package exposes a few helper functions:
|
|
114
|
+
|
|
115
|
+
- `extract` – structure text or images into a Pydantic model based on a schema.
|
|
116
|
+
- `translate` – translate a list of strings to English if needed.
|
|
117
|
+
- `score_llm` – simple yes/no style scoring with optional log probability output.
|
|
118
|
+
|
|
119
|
+
Embeddings (`embed.embed_parallel_async`) and document reranking (`rerank.rerank_parallel_async`) are also provided.
|
|
120
|
+
|
|
121
|
+
## Caching results
|
|
122
|
+
|
|
123
|
+
`lm_deluge.cache` includes LevelDB, SQLite and custom dictionary based caches. Pass an instance via `LLMClient(..., cache=my_cache)` and previously seen prompts will not be re‑sent.
|
|
124
|
+
|
|
125
|
+
## Development notes
|
|
126
|
+
|
|
127
|
+
Models and costs are defined in [src/lm_deluge/models.py](src/lm_deluge/models.py). Conversations are built using the `Conversation` and `Message` helpers in [src/lm_deluge/prompt.py](src/lm_deluge/prompt.py), which also support images.
|
|
@@ -0,0 +1,91 @@
|
|
|
1
|
+
# lm_deluge
|
|
2
|
+
|
|
3
|
+
`lm_deluge` is a lightweight helper library for talking to large language model APIs. It wraps several providers under a single interface, handles rate limiting, and exposes a few useful utilities for common NLP tasks.
|
|
4
|
+
|
|
5
|
+
## Features
|
|
6
|
+
|
|
7
|
+
- **Unified client** – send prompts to OpenAI‑compatible models, Anthropic, Cohere and Vertex hosted Claude models using the same API.
|
|
8
|
+
- **Async or sync** – process prompts concurrently with `process_prompts_async` or run them synchronously with `process_prompts_sync`.
|
|
9
|
+
- **Spray across providers** – configure multiple model names with weighting so requests are distributed across different providers.
|
|
10
|
+
- **Caching** – optional LevelDB, SQLite or custom caches to avoid duplicate calls.
|
|
11
|
+
- **Embeddings and reranking** – helper functions for embedding text and reranking documents via Cohere/OpenAI endpoints.
|
|
12
|
+
- **Built‑in tools** – simple `extract`, `translate` and `score_llm` helpers for common patterns.
|
|
13
|
+
|
|
14
|
+
## Installation
|
|
15
|
+
|
|
16
|
+
```bash
|
|
17
|
+
pip install lm_deluge
|
|
18
|
+
```
|
|
19
|
+
|
|
20
|
+
The package relies on environment variables for API keys. Typical variables include `OPENAI_API_KEY`, `ANTHROPIC_API_KEY`, `COHERE_API_KEY`, `META_API_KEY` (for Llama) and `GOOGLE_APPLICATION_CREDENTIALS` for Vertex.
|
|
21
|
+
|
|
22
|
+
## Quickstart
|
|
23
|
+
|
|
24
|
+
```python
|
|
25
|
+
from lm_deluge import LLMClient
|
|
26
|
+
|
|
27
|
+
client = LLMClient.basic(
|
|
28
|
+
model=["gpt-4o-mini"], # any model id from lm_deluge.models.registry
|
|
29
|
+
temperature=0.2,
|
|
30
|
+
max_new_tokens=256,
|
|
31
|
+
)
|
|
32
|
+
|
|
33
|
+
resp = client.process_prompts_sync(["Hello, world!"]) # returns list[APIResponse]
|
|
34
|
+
print(resp[0].completion)
|
|
35
|
+
```
|
|
36
|
+
|
|
37
|
+
### Asynchronous usage
|
|
38
|
+
|
|
39
|
+
```python
|
|
40
|
+
import asyncio
|
|
41
|
+
|
|
42
|
+
async def main():
|
|
43
|
+
responses = await client.process_prompts_async(
|
|
44
|
+
["an async call"],
|
|
45
|
+
return_completions_only=True,
|
|
46
|
+
)
|
|
47
|
+
print(responses[0])
|
|
48
|
+
|
|
49
|
+
asyncio.run(main())
|
|
50
|
+
```
|
|
51
|
+
|
|
52
|
+
### Distributing requests across models
|
|
53
|
+
|
|
54
|
+
You can provide multiple `model_names` and optional `model_weights` when creating an `LLMClient`. Each prompt will be sent to one of the models based on those weights.
|
|
55
|
+
|
|
56
|
+
```python
|
|
57
|
+
client = LLMClient(
|
|
58
|
+
model_names=["gpt-4o-mini", "claude-haiku-anthropic"],
|
|
59
|
+
model_weights="rate_limit", # or a list like [0.7, 0.3]
|
|
60
|
+
max_requests_per_minute=5000,
|
|
61
|
+
max_tokens_per_minute=1_000_000,
|
|
62
|
+
max_concurrent_requests=100,
|
|
63
|
+
)
|
|
64
|
+
```
|
|
65
|
+
|
|
66
|
+
### Provider specific notes
|
|
67
|
+
|
|
68
|
+
- **OpenAI and compatible providers** – set `OPENAI_API_KEY`. Model ids in the registry include OpenAI models as well as Meta Llama, Grok and many others that expose OpenAI style APIs.
|
|
69
|
+
- **Anthropic** – set `ANTHROPIC_API_KEY`. Use model ids such as `claude-haiku-anthropic` or `claude-sonnet-anthropic`.
|
|
70
|
+
- **Cohere** – set `COHERE_API_KEY`. Models like `command-r` are available.
|
|
71
|
+
- **Vertex Claude** – set `GOOGLE_APPLICATION_CREDENTIALS` and `PROJECT_ID`. Use a model id such as `claude-sonnet-vertex`.
|
|
72
|
+
|
|
73
|
+
The [models.py](src/lm_deluge/models.py) file lists every supported model and the required environment variable.
|
|
74
|
+
|
|
75
|
+
## Built‑in tools
|
|
76
|
+
|
|
77
|
+
The `lm_deluge.llm_tools` package exposes a few helper functions:
|
|
78
|
+
|
|
79
|
+
- `extract` – structure text or images into a Pydantic model based on a schema.
|
|
80
|
+
- `translate` – translate a list of strings to English if needed.
|
|
81
|
+
- `score_llm` – simple yes/no style scoring with optional log probability output.
|
|
82
|
+
|
|
83
|
+
Embeddings (`embed.embed_parallel_async`) and document reranking (`rerank.rerank_parallel_async`) are also provided.
|
|
84
|
+
|
|
85
|
+
## Caching results
|
|
86
|
+
|
|
87
|
+
`lm_deluge.cache` includes LevelDB, SQLite and custom dictionary based caches. Pass an instance via `LLMClient(..., cache=my_cache)` and previously seen prompts will not be re‑sent.
|
|
88
|
+
|
|
89
|
+
## Development notes
|
|
90
|
+
|
|
91
|
+
Models and costs are defined in [src/lm_deluge/models.py](src/lm_deluge/models.py). Conversations are built using the `Conversation` and `Message` helpers in [src/lm_deluge/prompt.py](src/lm_deluge/prompt.py), which also support images.
|
|
@@ -0,0 +1,40 @@
|
|
|
1
|
+
[build-system]
|
|
2
|
+
requires = ["setuptools", "wheel"]
|
|
3
|
+
|
|
4
|
+
[project]
|
|
5
|
+
name = "lm_deluge"
|
|
6
|
+
version = "0.0.3"
|
|
7
|
+
authors = [{ name = "Benjamin Anderson", email = "ben@trytaylor.ai" }]
|
|
8
|
+
description = "Python utility for using LLM API models."
|
|
9
|
+
readme = "README.md"
|
|
10
|
+
requires-python = ">=3.9"
|
|
11
|
+
keywords = []
|
|
12
|
+
license = { text = "" }
|
|
13
|
+
classifiers = []
|
|
14
|
+
dependencies = [
|
|
15
|
+
"python-dotenv",
|
|
16
|
+
"json5",
|
|
17
|
+
"PyYAML",
|
|
18
|
+
"pandas",
|
|
19
|
+
"aiohttp",
|
|
20
|
+
"tiktoken",
|
|
21
|
+
"xxhash",
|
|
22
|
+
"tqdm",
|
|
23
|
+
"google-auth",
|
|
24
|
+
"requests-aws4auth",
|
|
25
|
+
"pydantic",
|
|
26
|
+
"bs4",
|
|
27
|
+
"lxml",
|
|
28
|
+
]
|
|
29
|
+
|
|
30
|
+
[project.optional-dependencies]
|
|
31
|
+
image = ["pdf2image", "pillow"]
|
|
32
|
+
pdf = ["pdf2image", "pymupdf"]
|
|
33
|
+
translate = ["fasttext-wheel", "fasttext-langdetect"]
|
|
34
|
+
full = [
|
|
35
|
+
"pillow",
|
|
36
|
+
"pdf2image",
|
|
37
|
+
"pymupdf",
|
|
38
|
+
"fasttext-wheel",
|
|
39
|
+
"fasttext-langdetect",
|
|
40
|
+
]
|
|
@@ -0,0 +1,177 @@
|
|
|
1
|
+
import asyncio
|
|
2
|
+
from aiohttp import ClientResponse
|
|
3
|
+
import json
|
|
4
|
+
import os
|
|
5
|
+
import warnings
|
|
6
|
+
import time
|
|
7
|
+
from tqdm import tqdm
|
|
8
|
+
from typing import Optional, Callable
|
|
9
|
+
|
|
10
|
+
from lm_deluge.prompt import Conversation
|
|
11
|
+
from .base import APIRequestBase, APIResponse
|
|
12
|
+
|
|
13
|
+
from ..tracker import StatusTracker
|
|
14
|
+
from ..sampling_params import SamplingParams
|
|
15
|
+
from ..models import APIModel
|
|
16
|
+
|
|
17
|
+
|
|
18
|
+
class AnthropicRequest(APIRequestBase):
|
|
19
|
+
def __init__(
|
|
20
|
+
self,
|
|
21
|
+
task_id: int,
|
|
22
|
+
# should always be 'role', 'content' keys.
|
|
23
|
+
# internal logic should handle translating to specific API format
|
|
24
|
+
model_name: str, # must correspond to registry
|
|
25
|
+
prompt: Conversation,
|
|
26
|
+
attempts_left: int,
|
|
27
|
+
status_tracker: StatusTracker,
|
|
28
|
+
retry_queue: asyncio.Queue,
|
|
29
|
+
results_arr: list,
|
|
30
|
+
request_timeout: int = 30,
|
|
31
|
+
sampling_params: SamplingParams = SamplingParams(),
|
|
32
|
+
pbar: Optional[tqdm] = None,
|
|
33
|
+
callback: Optional[Callable] = None,
|
|
34
|
+
debug: bool = False,
|
|
35
|
+
# for retries
|
|
36
|
+
all_model_names: list[str] | None = None,
|
|
37
|
+
all_sampling_params: list[SamplingParams] | None = None,
|
|
38
|
+
):
|
|
39
|
+
super().__init__(
|
|
40
|
+
task_id=task_id,
|
|
41
|
+
model_name=model_name,
|
|
42
|
+
prompt=prompt,
|
|
43
|
+
attempts_left=attempts_left,
|
|
44
|
+
status_tracker=status_tracker,
|
|
45
|
+
retry_queue=retry_queue,
|
|
46
|
+
results_arr=results_arr,
|
|
47
|
+
request_timeout=request_timeout,
|
|
48
|
+
sampling_params=sampling_params,
|
|
49
|
+
pbar=pbar,
|
|
50
|
+
callback=callback,
|
|
51
|
+
debug=debug,
|
|
52
|
+
all_model_names=all_model_names,
|
|
53
|
+
all_sampling_params=all_sampling_params,
|
|
54
|
+
)
|
|
55
|
+
self.model = APIModel.from_registry(model_name)
|
|
56
|
+
self.url = f"{self.model.api_base}/messages"
|
|
57
|
+
|
|
58
|
+
self.system_message, messages = prompt.to_anthropic()
|
|
59
|
+
self.request_header = {
|
|
60
|
+
"x-api-key": os.getenv(self.model.api_key_env_var),
|
|
61
|
+
"anthropic-version": "2023-06-01",
|
|
62
|
+
"content-type": "application/json",
|
|
63
|
+
}
|
|
64
|
+
|
|
65
|
+
self.request_json = {
|
|
66
|
+
"model": self.model.name,
|
|
67
|
+
"messages": messages,
|
|
68
|
+
"temperature": self.sampling_params.temperature,
|
|
69
|
+
"top_p": self.sampling_params.top_p,
|
|
70
|
+
"max_tokens": self.sampling_params.max_new_tokens,
|
|
71
|
+
}
|
|
72
|
+
# handle thinking
|
|
73
|
+
if self.model.reasoning_model:
|
|
74
|
+
if sampling_params.reasoning_effort:
|
|
75
|
+
# translate reasoning effort of low, medium, high to budget tokens
|
|
76
|
+
budget = {"low": 1024, "medium": 4096, "high": 16384}.get(
|
|
77
|
+
sampling_params.reasoning_effort
|
|
78
|
+
)
|
|
79
|
+
self.request_json["thinking"] = {
|
|
80
|
+
"type": "enabled",
|
|
81
|
+
"budget_tokens": budget,
|
|
82
|
+
}
|
|
83
|
+
self.request_json.pop("top_p")
|
|
84
|
+
self.request_json["temperature"] = 1.0
|
|
85
|
+
self.request_json["max_tokens"] += (
|
|
86
|
+
budget # assume max tokens is max completion tokens
|
|
87
|
+
)
|
|
88
|
+
else:
|
|
89
|
+
# no thinking
|
|
90
|
+
self.request_json["thinking"] = {"type": "disabled"}
|
|
91
|
+
else:
|
|
92
|
+
if sampling_params.reasoning_effort:
|
|
93
|
+
warnings.warn(
|
|
94
|
+
f"Ignoring reasoning_effort param for non-reasoning model: {model_name}"
|
|
95
|
+
)
|
|
96
|
+
if self.system_message is not None:
|
|
97
|
+
self.request_json["system"] = self.system_message
|
|
98
|
+
|
|
99
|
+
# print("request data:", self.request_json)
|
|
100
|
+
|
|
101
|
+
async def handle_response(self, http_response: ClientResponse) -> APIResponse:
|
|
102
|
+
is_error = False
|
|
103
|
+
error_message = None
|
|
104
|
+
thinking = None
|
|
105
|
+
completion = None
|
|
106
|
+
input_tokens = None
|
|
107
|
+
output_tokens = None
|
|
108
|
+
status_code = http_response.status
|
|
109
|
+
mimetype = http_response.headers.get("Content-Type", None)
|
|
110
|
+
rate_limits = {}
|
|
111
|
+
for header in [
|
|
112
|
+
"anthropic-ratelimit-requests-limit",
|
|
113
|
+
"anthropic-ratelimit-requests-remaining",
|
|
114
|
+
"anthropic-ratelimit-requests-reset",
|
|
115
|
+
"anthropic-ratelimit-tokens-limit",
|
|
116
|
+
"anthropic-ratelimit-tokens-remaining",
|
|
117
|
+
"anthropic-ratelimit-tokens-reset",
|
|
118
|
+
]:
|
|
119
|
+
rate_limits[header] = http_response.headers.get(header, None)
|
|
120
|
+
if self.debug:
|
|
121
|
+
print(f"Rate limits: {rate_limits}")
|
|
122
|
+
if status_code >= 200 and status_code < 300:
|
|
123
|
+
try:
|
|
124
|
+
data = await http_response.json()
|
|
125
|
+
print("response data:", data)
|
|
126
|
+
content = data["content"] # [0]["text"]
|
|
127
|
+
print("content is length", len(content))
|
|
128
|
+
for item in content:
|
|
129
|
+
if item["type"] == "text":
|
|
130
|
+
completion = item["text"]
|
|
131
|
+
elif item["type"] == "thinking":
|
|
132
|
+
thinking = item["thinking"]
|
|
133
|
+
elif item["type"] == "tool_use":
|
|
134
|
+
continue # TODO: implement and report tool use
|
|
135
|
+
input_tokens = data["usage"]["input_tokens"]
|
|
136
|
+
output_tokens = data["usage"]["output_tokens"]
|
|
137
|
+
except Exception as e:
|
|
138
|
+
is_error = True
|
|
139
|
+
error_message = (
|
|
140
|
+
f"Error calling .json() on response w/ status {status_code}: {e}"
|
|
141
|
+
)
|
|
142
|
+
elif mimetype and "json" in mimetype.lower():
|
|
143
|
+
is_error = True # expected status is 200, otherwise it's an error
|
|
144
|
+
data = await http_response.json()
|
|
145
|
+
error_message = json.dumps(data)
|
|
146
|
+
|
|
147
|
+
else:
|
|
148
|
+
is_error = True
|
|
149
|
+
text = await http_response.text()
|
|
150
|
+
error_message = text
|
|
151
|
+
|
|
152
|
+
# handle special kinds of errors. TODO: make sure these are correct for anthropic
|
|
153
|
+
if is_error and error_message is not None:
|
|
154
|
+
if (
|
|
155
|
+
"rate limit" in error_message.lower()
|
|
156
|
+
or "overloaded" in error_message.lower()
|
|
157
|
+
):
|
|
158
|
+
error_message += " (Rate limit error, triggering cooldown.)"
|
|
159
|
+
self.status_tracker.time_of_last_rate_limit_error = time.time()
|
|
160
|
+
self.status_tracker.num_rate_limit_errors += 1
|
|
161
|
+
if "context length" in error_message:
|
|
162
|
+
error_message += " (Context length exceeded, set retries to 0.)"
|
|
163
|
+
self.attempts_left = 0
|
|
164
|
+
|
|
165
|
+
return APIResponse(
|
|
166
|
+
id=self.task_id,
|
|
167
|
+
status_code=status_code,
|
|
168
|
+
is_error=is_error,
|
|
169
|
+
error_message=error_message,
|
|
170
|
+
prompt=self.prompt,
|
|
171
|
+
completion=completion,
|
|
172
|
+
thinking=thinking,
|
|
173
|
+
model_internal=self.model_name,
|
|
174
|
+
sampling_params=self.sampling_params,
|
|
175
|
+
input_tokens=input_tokens,
|
|
176
|
+
output_tokens=output_tokens,
|
|
177
|
+
)
|