lm-deluge 0.0.15__tar.gz → 0.0.17__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of lm-deluge might be problematic. Click here for more details.

Files changed (94) hide show
  1. {lm_deluge-0.0.15/src/lm_deluge.egg-info → lm_deluge-0.0.17}/PKG-INFO +35 -1
  2. {lm_deluge-0.0.15 → lm_deluge-0.0.17}/README.md +34 -0
  3. {lm_deluge-0.0.15 → lm_deluge-0.0.17}/pyproject.toml +1 -1
  4. lm_deluge-0.0.17/src/lm_deluge/api_requests/__init__.py +1 -0
  5. {lm_deluge-0.0.15 → lm_deluge-0.0.17}/src/lm_deluge/api_requests/anthropic.py +58 -84
  6. lm_deluge-0.0.17/src/lm_deluge/api_requests/base.py +120 -0
  7. lm_deluge-0.0.17/src/lm_deluge/api_requests/bedrock.py +298 -0
  8. {lm_deluge-0.0.15 → lm_deluge-0.0.17}/src/lm_deluge/api_requests/gemini.py +18 -44
  9. {lm_deluge-0.0.15 → lm_deluge-0.0.17}/src/lm_deluge/api_requests/mistral.py +30 -60
  10. {lm_deluge-0.0.15 → lm_deluge-0.0.17}/src/lm_deluge/api_requests/openai.py +147 -148
  11. {lm_deluge-0.0.15 → lm_deluge-0.0.17}/src/lm_deluge/api_requests/response.py +2 -1
  12. {lm_deluge-0.0.15 → lm_deluge-0.0.17}/src/lm_deluge/batches.py +1 -1
  13. lm_deluge-0.0.15/src/lm_deluge/computer_use/anthropic_tools.py → lm_deluge-0.0.17/src/lm_deluge/built_in_tools/anthropic/__init__.py +58 -5
  14. lm_deluge-0.0.17/src/lm_deluge/built_in_tools/anthropic/bash.py +0 -0
  15. lm_deluge-0.0.17/src/lm_deluge/built_in_tools/anthropic/computer_use.py +0 -0
  16. lm_deluge-0.0.17/src/lm_deluge/built_in_tools/anthropic/editor.py +559 -0
  17. lm_deluge-0.0.17/src/lm_deluge/built_in_tools/base.py +9 -0
  18. lm_deluge-0.0.17/src/lm_deluge/built_in_tools/openai.py +28 -0
  19. {lm_deluge-0.0.15 → lm_deluge-0.0.17}/src/lm_deluge/client.py +304 -150
  20. {lm_deluge-0.0.15 → lm_deluge-0.0.17}/src/lm_deluge/image.py +13 -8
  21. {lm_deluge-0.0.15 → lm_deluge-0.0.17}/src/lm_deluge/llm_tools/extract.py +23 -4
  22. lm_deluge-0.0.17/src/lm_deluge/llm_tools/ocr.py +1 -0
  23. {lm_deluge-0.0.15 → lm_deluge-0.0.17}/src/lm_deluge/models.py +39 -2
  24. {lm_deluge-0.0.15 → lm_deluge-0.0.17}/src/lm_deluge/prompt.py +43 -27
  25. lm_deluge-0.0.17/src/lm_deluge/request_context.py +75 -0
  26. {lm_deluge-0.0.15 → lm_deluge-0.0.17}/src/lm_deluge/tool.py +97 -15
  27. {lm_deluge-0.0.15 → lm_deluge-0.0.17}/src/lm_deluge/tracker.py +1 -0
  28. {lm_deluge-0.0.15 → lm_deluge-0.0.17/src/lm_deluge.egg-info}/PKG-INFO +35 -1
  29. {lm_deluge-0.0.15 → lm_deluge-0.0.17}/src/lm_deluge.egg-info/SOURCES.txt +10 -33
  30. lm_deluge-0.0.17/tests/test_builtin_tools.py +58 -0
  31. lm_deluge-0.0.17/tests/test_native_mcp_server.py +66 -0
  32. lm_deluge-0.0.15/src/lm_deluge/api_requests/__init__.py +0 -3
  33. lm_deluge-0.0.15/src/lm_deluge/api_requests/base.py +0 -306
  34. lm_deluge-0.0.15/src/lm_deluge/api_requests/bedrock.py +0 -320
  35. lm_deluge-0.0.15/tests/test_all_models.py +0 -88
  36. lm_deluge-0.0.15/tests/test_batch_real.py +0 -95
  37. lm_deluge-0.0.15/tests/test_bedrock_computer_use.py +0 -378
  38. lm_deluge-0.0.15/tests/test_bedrock_models.py +0 -205
  39. lm_deluge-0.0.15/tests/test_cache.py +0 -56
  40. lm_deluge-0.0.15/tests/test_client_tracker_integration.py +0 -43
  41. lm_deluge-0.0.15/tests/test_computer_use.py +0 -103
  42. lm_deluge-0.0.15/tests/test_computer_use_integration.py +0 -277
  43. lm_deluge-0.0.15/tests/test_debug_format.py +0 -47
  44. lm_deluge-0.0.15/tests/test_file_integration.py +0 -156
  45. lm_deluge-0.0.15/tests/test_file_support.py +0 -210
  46. lm_deluge-0.0.15/tests/test_gemini_integration.py +0 -238
  47. lm_deluge-0.0.15/tests/test_image_models.py +0 -57
  48. lm_deluge-0.0.15/tests/test_image_utils.py +0 -21
  49. lm_deluge-0.0.15/tests/test_json_utils.py +0 -78
  50. lm_deluge-0.0.15/tests/test_logprobs_refactor.py +0 -306
  51. lm_deluge-0.0.15/tests/test_max_concurrent_requests.py +0 -38
  52. lm_deluge-0.0.15/tests/test_mcp_tools.py +0 -221
  53. lm_deluge-0.0.15/tests/test_openai_responses.py +0 -356
  54. lm_deluge-0.0.15/tests/test_prompt_caching.py +0 -257
  55. lm_deluge-0.0.15/tests/test_real_caching.py +0 -305
  56. lm_deluge-0.0.15/tests/test_real_caching_bedrock.py +0 -307
  57. lm_deluge-0.0.15/tests/test_retry_fix.py +0 -67
  58. lm_deluge-0.0.15/tests/test_rich_display.py +0 -114
  59. lm_deluge-0.0.15/tests/test_sampling_params.py +0 -13
  60. lm_deluge-0.0.15/tests/test_simple_gemini.py +0 -32
  61. lm_deluge-0.0.15/tests/test_tool_calls.py +0 -401
  62. lm_deluge-0.0.15/tests/test_tool_from_function.py +0 -150
  63. lm_deluge-0.0.15/tests/test_tool_validation.py +0 -36
  64. lm_deluge-0.0.15/tests/test_tracker_refactor.py +0 -99
  65. lm_deluge-0.0.15/tests/test_translate.py +0 -31
  66. lm_deluge-0.0.15/tests/test_xml_utils.py +0 -35
  67. {lm_deluge-0.0.15 → lm_deluge-0.0.17}/LICENSE +0 -0
  68. {lm_deluge-0.0.15 → lm_deluge-0.0.17}/setup.cfg +0 -0
  69. {lm_deluge-0.0.15 → lm_deluge-0.0.17}/src/lm_deluge/__init__.py +0 -0
  70. {lm_deluge-0.0.15 → lm_deluge-0.0.17}/src/lm_deluge/agent.py +0 -0
  71. {lm_deluge-0.0.15 → lm_deluge-0.0.17}/src/lm_deluge/api_requests/common.py +0 -0
  72. {lm_deluge-0.0.15 → lm_deluge-0.0.17}/src/lm_deluge/api_requests/deprecated/bedrock.py +0 -0
  73. {lm_deluge-0.0.15 → lm_deluge-0.0.17}/src/lm_deluge/api_requests/deprecated/cohere.py +0 -0
  74. {lm_deluge-0.0.15 → lm_deluge-0.0.17}/src/lm_deluge/api_requests/deprecated/deepseek.py +0 -0
  75. {lm_deluge-0.0.15 → lm_deluge-0.0.17}/src/lm_deluge/api_requests/deprecated/mistral.py +0 -0
  76. {lm_deluge-0.0.15 → lm_deluge-0.0.17}/src/lm_deluge/api_requests/deprecated/vertex.py +0 -0
  77. {lm_deluge-0.0.15 → lm_deluge-0.0.17}/src/lm_deluge/cache.py +0 -0
  78. {lm_deluge-0.0.15 → lm_deluge-0.0.17}/src/lm_deluge/config.py +0 -0
  79. {lm_deluge-0.0.15 → lm_deluge-0.0.17}/src/lm_deluge/embed.py +0 -0
  80. {lm_deluge-0.0.15 → lm_deluge-0.0.17}/src/lm_deluge/errors.py +0 -0
  81. {lm_deluge-0.0.15 → lm_deluge-0.0.17}/src/lm_deluge/file.py +0 -0
  82. {lm_deluge-0.0.15 → lm_deluge-0.0.17}/src/lm_deluge/gemini_limits.py +0 -0
  83. {lm_deluge-0.0.15 → lm_deluge-0.0.17}/src/lm_deluge/llm_tools/__init__.py +0 -0
  84. {lm_deluge-0.0.15 → lm_deluge-0.0.17}/src/lm_deluge/llm_tools/score.py +0 -0
  85. {lm_deluge-0.0.15 → lm_deluge-0.0.17}/src/lm_deluge/llm_tools/translate.py +0 -0
  86. {lm_deluge-0.0.15 → lm_deluge-0.0.17}/src/lm_deluge/rerank.py +0 -0
  87. {lm_deluge-0.0.15 → lm_deluge-0.0.17}/src/lm_deluge/usage.py +0 -0
  88. {lm_deluge-0.0.15 → lm_deluge-0.0.17}/src/lm_deluge/util/json.py +0 -0
  89. {lm_deluge-0.0.15 → lm_deluge-0.0.17}/src/lm_deluge/util/logprobs.py +0 -0
  90. {lm_deluge-0.0.15 → lm_deluge-0.0.17}/src/lm_deluge/util/validation.py +0 -0
  91. {lm_deluge-0.0.15 → lm_deluge-0.0.17}/src/lm_deluge/util/xml.py +0 -0
  92. {lm_deluge-0.0.15 → lm_deluge-0.0.17}/src/lm_deluge.egg-info/dependency_links.txt +0 -0
  93. {lm_deluge-0.0.15 → lm_deluge-0.0.17}/src/lm_deluge.egg-info/requires.txt +0 -0
  94. {lm_deluge-0.0.15 → lm_deluge-0.0.17}/src/lm_deluge.egg-info/top_level.txt +0 -0
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: lm_deluge
3
- Version: 0.0.15
3
+ Version: 0.0.17
4
4
  Summary: Python utility for using LLM API models.
5
5
  Author-email: Benjamin Anderson <ben@trytaylor.ai>
6
6
  Requires-Python: >=3.10
@@ -128,6 +128,30 @@ This just works. Images can be local images on disk, URLs, bytes, base64 data UR
128
128
 
129
129
  See a full multi-turn chat example in `examples/multiturn.md`.
130
130
 
131
+ ## Files
132
+
133
+ For models that support file uploads (OpenAI, Anthropic, and Gemini), you can easily include PDF files and other documents:
134
+
135
+ ```python
136
+ from lm_deluge import LLMClient, Conversation
137
+
138
+ # Simple file upload
139
+ client = LLMClient.basic("gpt-4.1-mini")
140
+ conversation = Conversation.user(
141
+ "Please summarize this document",
142
+ file="path/to/document.pdf"
143
+ )
144
+ resps = client.process_prompts_sync([conversation])
145
+
146
+ # You can also create File objects for more control
147
+ from lm_deluge import File
148
+ file = File("path/to/report.pdf", filename="Q4_Report.pdf")
149
+ conversation = Conversation.user("Analyze this financial report")
150
+ conversation.messages[0].parts.append(file)
151
+ ```
152
+
153
+ Files can be local paths, URLs, bytes, or base64 data URLs, just like images.
154
+
131
155
  ## Tool Use
132
156
 
133
157
  Define tools from Python functions and use them with any model:
@@ -187,6 +211,16 @@ for tool_call in resps[0].tool_calls:
187
211
  # this is dumb sorry will make it better
188
212
  tool_to_call = [x for x in tools if x.name == tool_call.name][0]
189
213
  tool_to_call.call(**tool_call.arguments) # in async code, use .acall()
214
+
215
+ # or use the built-in agent loop to handle this automatically
216
+ import asyncio
217
+
218
+ async def main():
219
+ conv = Conversation.user("List the files in the current directory")
220
+ conv, resp = await client.run_agent_loop(conv, tools=tools)
221
+ print(resp.content.completion)
222
+
223
+ asyncio.run(main())
190
224
  ```
191
225
 
192
226
  ### Prompt Caching (Anthropic)
@@ -101,6 +101,30 @@ This just works. Images can be local images on disk, URLs, bytes, base64 data UR
101
101
 
102
102
  See a full multi-turn chat example in `examples/multiturn.md`.
103
103
 
104
+ ## Files
105
+
106
+ For models that support file uploads (OpenAI, Anthropic, and Gemini), you can easily include PDF files and other documents:
107
+
108
+ ```python
109
+ from lm_deluge import LLMClient, Conversation
110
+
111
+ # Simple file upload
112
+ client = LLMClient.basic("gpt-4.1-mini")
113
+ conversation = Conversation.user(
114
+ "Please summarize this document",
115
+ file="path/to/document.pdf"
116
+ )
117
+ resps = client.process_prompts_sync([conversation])
118
+
119
+ # You can also create File objects for more control
120
+ from lm_deluge import File
121
+ file = File("path/to/report.pdf", filename="Q4_Report.pdf")
122
+ conversation = Conversation.user("Analyze this financial report")
123
+ conversation.messages[0].parts.append(file)
124
+ ```
125
+
126
+ Files can be local paths, URLs, bytes, or base64 data URLs, just like images.
127
+
104
128
  ## Tool Use
105
129
 
106
130
  Define tools from Python functions and use them with any model:
@@ -160,6 +184,16 @@ for tool_call in resps[0].tool_calls:
160
184
  # this is dumb sorry will make it better
161
185
  tool_to_call = [x for x in tools if x.name == tool_call.name][0]
162
186
  tool_to_call.call(**tool_call.arguments) # in async code, use .acall()
187
+
188
+ # or use the built-in agent loop to handle this automatically
189
+ import asyncio
190
+
191
+ async def main():
192
+ conv = Conversation.user("List the files in the current directory")
193
+ conv, resp = await client.run_agent_loop(conv, tools=tools)
194
+ print(resp.content.completion)
195
+
196
+ asyncio.run(main())
163
197
  ```
164
198
 
165
199
  ### Prompt Caching (Anthropic)
@@ -3,7 +3,7 @@ requires = ["setuptools", "wheel"]
3
3
 
4
4
  [project]
5
5
  name = "lm_deluge"
6
- version = "0.0.15"
6
+ version = "0.0.17"
7
7
  authors = [{ name = "Benjamin Anderson", email = "ben@trytaylor.ai" }]
8
8
  description = "Python utility for using LLM API models."
9
9
  readme = "README.md"
@@ -1,35 +1,39 @@
1
- from aiohttp import ClientResponse
2
1
  import json
3
2
  import os
4
- from typing import Callable
3
+
4
+ from aiohttp import ClientResponse
5
5
 
6
6
  from lm_deluge.prompt import (
7
+ CachePattern,
7
8
  Conversation,
8
9
  Message,
9
10
  Text,
10
- ToolCall,
11
11
  Thinking,
12
- CachePattern,
12
+ ToolCall,
13
13
  )
14
- from lm_deluge.tool import Tool
14
+ from lm_deluge.request_context import RequestContext
15
+ from lm_deluge.tool import MCPServer, Tool
15
16
  from lm_deluge.usage import Usage
16
- from .base import APIRequestBase, APIResponse
17
17
 
18
- from ..tracker import StatusTracker
19
18
  from ..config import SamplingParams
20
19
  from ..models import APIModel
21
- from ..computer_use.anthropic_tools import get_anthropic_cu_tools
20
+ from .base import APIRequestBase, APIResponse
21
+
22
+
23
+ def _add_beta(headers: dict, beta: str):
24
+ if "anthropic-beta" in headers and headers["anthropic-beta"]:
25
+ if beta not in headers["anthropic-beta"]:
26
+ headers["anthropic-beta"] += f",{beta}"
27
+ else:
28
+ headers["anthropic-beta"] = beta
22
29
 
23
30
 
24
31
  def _build_anthropic_request(
25
32
  model: APIModel,
26
33
  prompt: Conversation,
27
- tools: list[Tool] | None,
34
+ tools: list[Tool | dict | MCPServer] | None,
28
35
  sampling_params: SamplingParams,
29
36
  cache_pattern: CachePattern | None = None,
30
- computer_use: bool = False,
31
- display_width: int = 1024,
32
- display_height: int = 768,
33
37
  ):
34
38
  system_message, messages = prompt.to_anthropic(cache_pattern=cache_pattern)
35
39
  request_header = {
@@ -38,10 +42,6 @@ def _build_anthropic_request(
38
42
  "content-type": "application/json",
39
43
  }
40
44
 
41
- # Add beta header for Computer Use
42
- if computer_use:
43
- request_header["anthropic-beta"] = "computer-use-2025-01-24"
44
-
45
45
  request_json = {
46
46
  "model": model.name,
47
47
  "messages": messages,
@@ -69,89 +69,61 @@ def _build_anthropic_request(
69
69
  print("ignoring reasoning_effort for non-reasoning model")
70
70
  if system_message is not None:
71
71
  request_json["system"] = system_message
72
- if tools or computer_use:
72
+ if tools:
73
+ mcp_servers = []
73
74
  tool_definitions = []
74
- if tools:
75
- tool_definitions.extend([tool.dump_for("anthropic") for tool in tools])
76
- # Add Computer Use tools
77
- if computer_use:
78
- cu_tools = get_anthropic_cu_tools(
79
- model=model.id,
80
- display_width=display_width, # todo: set from ComputerUseParams
81
- display_height=display_height,
82
- )
83
- tool_definitions.extend(cu_tools)
75
+ for tool in tools:
76
+ if isinstance(tool, Tool):
77
+ tool_definitions.append(tool.dump_for("anthropic"))
78
+ elif isinstance(tool, dict):
79
+ tool_definitions.append(tool)
80
+ # add betas if needed
81
+ if tool["type"] in [
82
+ "computer_20241022",
83
+ "text_editor_20241022",
84
+ "bash_20241022",
85
+ ]:
86
+ _add_beta(request_header, "computer-use-2024-10-22")
87
+ elif tool["type"] == "computer_20250124":
88
+ _add_beta(request_header, "computer-use-2025-01-24")
89
+ elif tool["type"] == "code_execution_20250522":
90
+ _add_beta(request_header, "code-execution-2025-05-22")
91
+ elif isinstance(tool, MCPServer):
92
+ _add_beta(request_header, "mcp-client-2025-04-04")
93
+ mcp_servers.append(tool.for_anthropic())
84
94
 
85
95
  # Add cache control to last tool if tools_only caching is specified
86
96
  if cache_pattern == "tools_only" and tool_definitions:
87
97
  tool_definitions[-1]["cache_control"] = {"type": "ephemeral"}
88
98
 
89
99
  request_json["tools"] = tool_definitions
100
+ if len(mcp_servers) > 0:
101
+ request_json["mcp_servers"] = mcp_servers
90
102
 
91
103
  return request_json, request_header
92
104
 
93
105
 
94
106
  class AnthropicRequest(APIRequestBase):
95
- def __init__(
96
- self,
97
- task_id: int,
98
- # should always be 'role', 'content' keys.
99
- # internal logic should handle translating to specific API format
100
- model_name: str, # must correspond to registry
101
- prompt: Conversation,
102
- attempts_left: int,
103
- status_tracker: StatusTracker,
104
- results_arr: list,
105
- request_timeout: int = 30,
106
- sampling_params: SamplingParams = SamplingParams(),
107
- callback: Callable | None = None,
108
- # for retries
109
- all_model_names: list[str] | None = None,
110
- all_sampling_params: list[SamplingParams] | None = None,
111
- tools: list | None = None,
112
- cache: CachePattern | None = None,
113
- # Computer Use support
114
- computer_use: bool = False,
115
- display_width: int = 1024,
116
- display_height: int = 768,
117
- ):
118
- super().__init__(
119
- task_id=task_id,
120
- model_name=model_name,
121
- prompt=prompt,
122
- attempts_left=attempts_left,
123
- status_tracker=status_tracker,
124
- results_arr=results_arr,
125
- request_timeout=request_timeout,
126
- sampling_params=sampling_params,
127
- callback=callback,
128
- all_model_names=all_model_names,
129
- all_sampling_params=all_sampling_params,
130
- tools=tools,
131
- cache=cache,
132
- )
133
- self.computer_use = computer_use
134
- self.display_width = display_width
135
- self.display_height = display_height
136
- self.model = APIModel.from_registry(model_name)
107
+ def __init__(self, context: RequestContext):
108
+ super().__init__(context=context)
109
+
110
+ self.model = APIModel.from_registry(self.context.model_name)
137
111
  self.url = f"{self.model.api_base}/messages"
138
112
 
139
113
  # Lock images as bytes if caching is enabled
140
- if cache is not None:
141
- prompt.lock_images_as_bytes()
114
+ if self.context.cache is not None:
115
+ self.context.prompt.lock_images_as_bytes()
142
116
 
143
117
  self.request_json, self.request_header = _build_anthropic_request(
144
118
  self.model,
145
- prompt,
146
- tools,
147
- sampling_params,
148
- cache,
149
- computer_use,
150
- display_width,
151
- display_height,
119
+ self.context.prompt,
120
+ self.context.tools,
121
+ self.context.sampling_params,
122
+ self.context.cache,
152
123
  )
153
124
 
154
125
  async def handle_response(self, http_response: ClientResponse) -> APIResponse:
126
+ data = None
155
127
  is_error = False
156
128
  error_message = None
157
129
  thinking = None
@@ -160,6 +132,7 @@ class AnthropicRequest(APIRequestBase):
160
132
  status_code = http_response.status
161
133
  mimetype = http_response.headers.get("Content-Type", None)
162
134
  rate_limits = {}
135
+ assert self.context.status_tracker
163
136
  for header in [
164
137
  "anthropic-ratelimit-requests-limit",
165
138
  "anthropic-ratelimit-requests-remaining",
@@ -215,20 +188,21 @@ class AnthropicRequest(APIRequestBase):
215
188
  or "overloaded" in error_message.lower()
216
189
  ):
217
190
  error_message += " (Rate limit error, triggering cooldown.)"
218
- self.status_tracker.rate_limit_exceeded()
191
+ self.context.status_tracker.rate_limit_exceeded()
219
192
  if "context length" in error_message:
220
193
  error_message += " (Context length exceeded, set retries to 0.)"
221
- self.attempts_left = 0
194
+ self.context.attempts_left = 0
222
195
 
223
196
  return APIResponse(
224
- id=self.task_id,
197
+ id=self.context.task_id,
225
198
  status_code=status_code,
226
199
  is_error=is_error,
227
200
  error_message=error_message,
228
- prompt=self.prompt,
201
+ prompt=self.context.prompt,
229
202
  content=content,
230
203
  thinking=thinking,
231
- model_internal=self.model_name,
232
- sampling_params=self.sampling_params,
204
+ model_internal=self.context.model_name,
205
+ sampling_params=self.context.sampling_params,
233
206
  usage=usage,
207
+ raw_response=data,
234
208
  )
@@ -0,0 +1,120 @@
1
+ import asyncio
2
+ import traceback
3
+ from abc import ABC, abstractmethod
4
+
5
+ import aiohttp
6
+ from aiohttp import ClientResponse
7
+
8
+ from ..errors import raise_if_modal_exception
9
+ from ..request_context import RequestContext
10
+ from .response import APIResponse
11
+
12
+
13
+ class APIRequestBase(ABC):
14
+ """
15
+ Class for handling API requests. All model/endpoint-specific logic should be
16
+ handled by overriding __init__ and implementing the handle_response method.
17
+ For call_api to work, the __init__ must handle setting:
18
+ - url
19
+ - request_header
20
+ - request_json
21
+ """
22
+
23
+ def __init__(
24
+ self,
25
+ context: RequestContext,
26
+ ):
27
+ # If context is provided, use it; otherwise construct one from individual parameters
28
+ self.context = context
29
+
30
+ # Everything is now accessed through self.context - no copying!
31
+ self.system_prompt = None
32
+ self.result = [] # list of APIResponse objects from each attempt
33
+
34
+ # these should be set in the __init__ of the subclass
35
+ self.url = None
36
+ self.request_header = None
37
+ self.request_json = None
38
+ self.region = None
39
+
40
+ def increment_pbar(self):
41
+ if self.context.status_tracker:
42
+ self.context.status_tracker.increment_pbar()
43
+
44
+ def call_callback(self):
45
+ if self.context.callback is not None:
46
+ # the APIResponse in self.result includes all the information
47
+ self.context.callback(self.result[-1], self.context.status_tracker)
48
+
49
+ def handle_success(self, data):
50
+ self.call_callback()
51
+ if self.context.status_tracker:
52
+ self.context.status_tracker.task_succeeded(self.context.task_id)
53
+
54
+ async def execute_once(self) -> APIResponse:
55
+ """Send the HTTP request once and return the parsed APIResponse."""
56
+ assert self.context.status_tracker
57
+ try:
58
+ self.context.status_tracker.total_requests += 1
59
+ timeout = aiohttp.ClientTimeout(total=self.context.request_timeout)
60
+ async with aiohttp.ClientSession(timeout=timeout) as session:
61
+ assert self.url is not None, "URL is not set"
62
+ async with session.post(
63
+ url=self.url,
64
+ headers=self.request_header,
65
+ json=self.request_json,
66
+ ) as http_response:
67
+ response: APIResponse = await self.handle_response(http_response)
68
+ return response
69
+
70
+ except asyncio.TimeoutError:
71
+ return APIResponse(
72
+ id=self.context.task_id,
73
+ model_internal=self.context.model_name,
74
+ prompt=self.context.prompt,
75
+ sampling_params=self.context.sampling_params,
76
+ status_code=None,
77
+ is_error=True,
78
+ error_message="Request timed out (terminated by client).",
79
+ content=None,
80
+ usage=None,
81
+ )
82
+
83
+ except Exception as e:
84
+ raise_if_modal_exception(e)
85
+ tb = traceback.format_exc()
86
+ print(tb)
87
+ return APIResponse(
88
+ id=self.context.task_id,
89
+ model_internal=self.context.model_name,
90
+ prompt=self.context.prompt,
91
+ sampling_params=self.context.sampling_params,
92
+ status_code=None,
93
+ is_error=True,
94
+ error_message=f"Unexpected {type(e).__name__}: {str(e) or 'No message.'}",
95
+ content=None,
96
+ usage=None,
97
+ )
98
+
99
+ @abstractmethod
100
+ async def handle_response(self, http_response: ClientResponse) -> APIResponse:
101
+ raise NotImplementedError
102
+
103
+
104
+ def deduplicate_responses(results: list[APIRequestBase]) -> list[APIResponse]:
105
+ deduplicated = {}
106
+ for request in results:
107
+ if request.context.task_id not in deduplicated:
108
+ deduplicated[request.context.task_id] = request.result[-1]
109
+ else:
110
+ current_response: APIResponse = deduplicated[request.context.task_id]
111
+ # only replace if the current request has no completion and the new one does
112
+ if (
113
+ request.result[-1].completion is not None
114
+ and current_response.completion is None
115
+ ):
116
+ deduplicated[request.context.task_id] = request.result[-1]
117
+
118
+ output = [deduplicated[request.context.task_id] for request in results]
119
+
120
+ return output