lm-deluge 0.0.3__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of lm-deluge might be problematic. Click here for more details.
- lm_deluge/__init__.py +6 -0
- lm_deluge/api_requests/__init__.py +3 -0
- lm_deluge/api_requests/anthropic.py +177 -0
- lm_deluge/api_requests/base.py +375 -0
- lm_deluge/api_requests/cohere.py +138 -0
- lm_deluge/api_requests/common.py +18 -0
- lm_deluge/api_requests/deprecated/bedrock.py +288 -0
- lm_deluge/api_requests/deprecated/deepseek.py +118 -0
- lm_deluge/api_requests/deprecated/mistral.py +120 -0
- lm_deluge/api_requests/google.py +0 -0
- lm_deluge/api_requests/openai.py +145 -0
- lm_deluge/api_requests/vertex.py +365 -0
- lm_deluge/cache.py +144 -0
- lm_deluge/client.py +760 -0
- lm_deluge/embed.py +392 -0
- lm_deluge/errors.py +8 -0
- lm_deluge/gemini_limits.py +65 -0
- lm_deluge/image.py +200 -0
- lm_deluge/llm_tools/__init__.py +11 -0
- lm_deluge/llm_tools/extract.py +111 -0
- lm_deluge/llm_tools/score.py +71 -0
- lm_deluge/llm_tools/translate.py +44 -0
- lm_deluge/models.py +957 -0
- lm_deluge/prompt.py +355 -0
- lm_deluge/rerank.py +338 -0
- lm_deluge/sampling_params.py +25 -0
- lm_deluge/tool.py +106 -0
- lm_deluge/tracker.py +12 -0
- lm_deluge/util/json.py +167 -0
- lm_deluge/util/logprobs.py +446 -0
- lm_deluge/util/pdf.py +45 -0
- lm_deluge/util/validation.py +46 -0
- lm_deluge/util/xml.py +291 -0
- lm_deluge-0.0.3.dist-info/METADATA +127 -0
- lm_deluge-0.0.3.dist-info/RECORD +37 -0
- lm_deluge-0.0.3.dist-info/WHEEL +5 -0
- lm_deluge-0.0.3.dist-info/top_level.txt +1 -0
|
@@ -0,0 +1,25 @@
|
|
|
1
|
+
from dataclasses import dataclass
|
|
2
|
+
from typing import Literal
|
|
3
|
+
|
|
4
|
+
|
|
5
|
+
@dataclass
|
|
6
|
+
class SamplingParams:
|
|
7
|
+
temperature: float = 0.0
|
|
8
|
+
top_p: float = 1.0
|
|
9
|
+
json_mode: bool = False
|
|
10
|
+
max_new_tokens: int = 512
|
|
11
|
+
reasoning_effort: Literal["low", "medium", "high", None] = None
|
|
12
|
+
|
|
13
|
+
def to_vllm(self):
|
|
14
|
+
try:
|
|
15
|
+
from vllm import SamplingParams as VLLMSamplingParams # pyright: ignore
|
|
16
|
+
except ImportError as e:
|
|
17
|
+
print(
|
|
18
|
+
"Unable to import from vLLM. Make sure it's installed with `pip install vllm`."
|
|
19
|
+
)
|
|
20
|
+
raise e
|
|
21
|
+
return VLLMSamplingParams(
|
|
22
|
+
temperature=self.temperature,
|
|
23
|
+
top_p=self.top_p,
|
|
24
|
+
max_tokens=self.max_new_tokens,
|
|
25
|
+
)
|
lm_deluge/tool.py
ADDED
|
@@ -0,0 +1,106 @@
|
|
|
1
|
+
from typing import Any, Dict, Literal, Callable
|
|
2
|
+
from pydantic import BaseModel, Field
|
|
3
|
+
|
|
4
|
+
|
|
5
|
+
class ToolSpec(BaseModel):
|
|
6
|
+
"""
|
|
7
|
+
Provider‑agnostic tool definition with no extra nesting.
|
|
8
|
+
"""
|
|
9
|
+
|
|
10
|
+
name: str
|
|
11
|
+
description: str
|
|
12
|
+
parameters: Dict[str, Any]
|
|
13
|
+
required: list[str] = Field(default_factory=list)
|
|
14
|
+
additionalProperties: bool | None = None # only
|
|
15
|
+
# if desired, can provide a callable to run the tool
|
|
16
|
+
run: Callable | None = None
|
|
17
|
+
|
|
18
|
+
def call(self, **kwargs):
|
|
19
|
+
if self.run is None:
|
|
20
|
+
raise ValueError("No run function provided")
|
|
21
|
+
return self.run(**kwargs)
|
|
22
|
+
|
|
23
|
+
def _json_schema(self, include_additional_properties=False) -> Dict[str, Any]:
|
|
24
|
+
return {
|
|
25
|
+
"type": "object",
|
|
26
|
+
"properties": self.parameters,
|
|
27
|
+
"required": self.required or [],
|
|
28
|
+
**(
|
|
29
|
+
{"additionalProperties": self.additionalProperties}
|
|
30
|
+
if self.additionalProperties is not None
|
|
31
|
+
and include_additional_properties
|
|
32
|
+
else {}
|
|
33
|
+
),
|
|
34
|
+
}
|
|
35
|
+
|
|
36
|
+
# ---------- dumpers ----------
|
|
37
|
+
def for_openai_responses(self) -> Dict[str, Any]:
|
|
38
|
+
return {
|
|
39
|
+
"type": "function",
|
|
40
|
+
"name": self.name,
|
|
41
|
+
"description": self.description,
|
|
42
|
+
"parameters": self._json_schema(include_additional_properties=True),
|
|
43
|
+
}
|
|
44
|
+
|
|
45
|
+
def for_openai_completions(self, *, strict: bool = True) -> Dict[str, Any]:
|
|
46
|
+
return {
|
|
47
|
+
"type": "function",
|
|
48
|
+
"function": {
|
|
49
|
+
"name": self.name,
|
|
50
|
+
"description": self.description,
|
|
51
|
+
"parameters": self._json_schema(),
|
|
52
|
+
"strict": strict,
|
|
53
|
+
},
|
|
54
|
+
}
|
|
55
|
+
|
|
56
|
+
def for_anthropic(self) -> Dict[str, Any]:
|
|
57
|
+
return {
|
|
58
|
+
"name": self.name,
|
|
59
|
+
"description": self.description,
|
|
60
|
+
"input_schema": self._json_schema(),
|
|
61
|
+
}
|
|
62
|
+
|
|
63
|
+
def for_google(self) -> Dict[str, Any]:
|
|
64
|
+
"""
|
|
65
|
+
Shape used by google.genai docs.
|
|
66
|
+
"""
|
|
67
|
+
return {
|
|
68
|
+
"name": self.name,
|
|
69
|
+
"description": self.description,
|
|
70
|
+
"parameters": self._json_schema(),
|
|
71
|
+
}
|
|
72
|
+
|
|
73
|
+
def dump_for(
|
|
74
|
+
self,
|
|
75
|
+
provider: Literal[
|
|
76
|
+
"openai-responses", "openai-completions", "anthropic", "google"
|
|
77
|
+
],
|
|
78
|
+
**kw,
|
|
79
|
+
) -> Dict[str, Any]:
|
|
80
|
+
if provider == "openai-responses":
|
|
81
|
+
return self.for_openai_responses()
|
|
82
|
+
if provider == "openai-completions":
|
|
83
|
+
return self.for_openai_completions(**kw)
|
|
84
|
+
if provider == "anthropic":
|
|
85
|
+
return self.for_anthropic()
|
|
86
|
+
if provider == "google":
|
|
87
|
+
return self.for_google()
|
|
88
|
+
raise ValueError(provider)
|
|
89
|
+
|
|
90
|
+
|
|
91
|
+
# ---- computer tools (for non-CUA models) ----
|
|
92
|
+
_BUTTONS = ["left", "right", "wheel", "back", "forward"]
|
|
93
|
+
|
|
94
|
+
# --- helpers ----
|
|
95
|
+
_COORD_OBJECT = {
|
|
96
|
+
"type": "object",
|
|
97
|
+
"properties": {
|
|
98
|
+
"x": {"type": "integer", "description": "X-coordinate in pixels"},
|
|
99
|
+
"y": {"type": "integer", "description": "Y-coordinate in pixels"},
|
|
100
|
+
},
|
|
101
|
+
"required": ["x", "y"],
|
|
102
|
+
}
|
|
103
|
+
|
|
104
|
+
|
|
105
|
+
def _coord_field(desc: str):
|
|
106
|
+
return {"type": "integer", "description": desc}
|
lm_deluge/tracker.py
ADDED
|
@@ -0,0 +1,12 @@
|
|
|
1
|
+
from dataclasses import dataclass
|
|
2
|
+
|
|
3
|
+
|
|
4
|
+
@dataclass
|
|
5
|
+
class StatusTracker:
|
|
6
|
+
num_tasks_started: int = 0
|
|
7
|
+
num_tasks_in_progress: int = 0
|
|
8
|
+
num_tasks_succeeded: int = 0
|
|
9
|
+
num_tasks_failed: int = 0
|
|
10
|
+
num_rate_limit_errors: int = 0
|
|
11
|
+
time_of_last_rate_limit_error: int | float = 0
|
|
12
|
+
total_requests = 0
|
lm_deluge/util/json.py
ADDED
|
@@ -0,0 +1,167 @@
|
|
|
1
|
+
import re
|
|
2
|
+
import json
|
|
3
|
+
import json5
|
|
4
|
+
|
|
5
|
+
|
|
6
|
+
def extract_quoted_expressions(json_string: str):
|
|
7
|
+
# This pattern matches double-quoted strings while handling escaped quotes
|
|
8
|
+
pattern = r'"((?:\\.|[^"\\])*)"'
|
|
9
|
+
expressions = re.findall(pattern, json_string, re.DOTALL)
|
|
10
|
+
return expressions
|
|
11
|
+
|
|
12
|
+
|
|
13
|
+
def string_list_to_dict(string_list: list[str]) -> dict:
|
|
14
|
+
"""
|
|
15
|
+
Converts a list of strings to a dictionary.
|
|
16
|
+
The list should contain alternating keys and values.
|
|
17
|
+
"""
|
|
18
|
+
result_dict = {}
|
|
19
|
+
# Iterate over the list in steps of 2 to get key-value pairs
|
|
20
|
+
for i in range(0, len(string_list) - 1, 2):
|
|
21
|
+
key = string_list[i]
|
|
22
|
+
value = string_list[i + 1]
|
|
23
|
+
result_dict[key] = value
|
|
24
|
+
return result_dict
|
|
25
|
+
|
|
26
|
+
|
|
27
|
+
def strip_json(json_string: str | None) -> str | None:
|
|
28
|
+
"""
|
|
29
|
+
Strips extra stuff from beginning & end of JSON string.
|
|
30
|
+
"""
|
|
31
|
+
if json_string is None:
|
|
32
|
+
return None
|
|
33
|
+
json_string = json_string.strip()
|
|
34
|
+
if json_string.startswith("```json"):
|
|
35
|
+
json_string = json_string.split("```json", 1)[1]
|
|
36
|
+
if "```json\n" in json_string:
|
|
37
|
+
json_string = json_string.split("```json\n", 1)[1]
|
|
38
|
+
json_string = json_string.strip("`").strip()
|
|
39
|
+
|
|
40
|
+
# not strict enough!
|
|
41
|
+
if "[" not in json_string and "{" not in json_string:
|
|
42
|
+
return None
|
|
43
|
+
|
|
44
|
+
# Find the first opening bracket/brace
|
|
45
|
+
start_idx = min(
|
|
46
|
+
(json_string.find("{") if "{" in json_string else len(json_string)),
|
|
47
|
+
(json_string.find("[") if "[" in json_string else len(json_string)),
|
|
48
|
+
)
|
|
49
|
+
|
|
50
|
+
# Find the last closing bracket/brace
|
|
51
|
+
end_idx = max(json_string.rfind("}"), json_string.rfind("]"))
|
|
52
|
+
|
|
53
|
+
if start_idx >= 0 and end_idx >= 0:
|
|
54
|
+
return json_string[start_idx : end_idx + 1]
|
|
55
|
+
|
|
56
|
+
return None
|
|
57
|
+
|
|
58
|
+
|
|
59
|
+
def heal_json(json_string: str) -> str:
|
|
60
|
+
"""
|
|
61
|
+
Attempts to heal malformed JSON by fixing common issues like unclosed brackets and braces.
|
|
62
|
+
Uses a stack-based approach to ensure proper nesting order is maintained.
|
|
63
|
+
|
|
64
|
+
:param json_string: The potentially malformed JSON string
|
|
65
|
+
:return: A hopefully valid JSON string
|
|
66
|
+
"""
|
|
67
|
+
if not json_string:
|
|
68
|
+
return json_string
|
|
69
|
+
|
|
70
|
+
# Handle trailing commas before closing brackets
|
|
71
|
+
json_string = re.sub(r",\s*}", "}", json_string)
|
|
72
|
+
json_string = re.sub(r",\s*\]", "]", json_string)
|
|
73
|
+
|
|
74
|
+
# Use a stack to track opening brackets
|
|
75
|
+
stack = []
|
|
76
|
+
for char in json_string:
|
|
77
|
+
if char in "{[":
|
|
78
|
+
stack.append(char)
|
|
79
|
+
elif char == "}" and stack and stack[-1] == "{":
|
|
80
|
+
stack.pop()
|
|
81
|
+
elif char == "]" and stack and stack[-1] == "[":
|
|
82
|
+
stack.pop()
|
|
83
|
+
|
|
84
|
+
# Add missing closing braces/brackets in the correct order
|
|
85
|
+
closing = ""
|
|
86
|
+
while stack:
|
|
87
|
+
bracket = stack.pop()
|
|
88
|
+
if bracket == "{":
|
|
89
|
+
closing += "}"
|
|
90
|
+
elif bracket == "[":
|
|
91
|
+
closing += "]"
|
|
92
|
+
|
|
93
|
+
# Check for unclosed strings
|
|
94
|
+
quote_count = json_string.count('"') - json_string.count('\\"')
|
|
95
|
+
if quote_count % 2 == 1: # Odd number of quotes means unclosed string
|
|
96
|
+
# Find the last unescaped quote
|
|
97
|
+
last_pos = -1
|
|
98
|
+
i = 0
|
|
99
|
+
while i < len(json_string):
|
|
100
|
+
if json_string[i] == '"' and (i == 0 or json_string[i - 1] != "\\"):
|
|
101
|
+
last_pos = i
|
|
102
|
+
i += 1
|
|
103
|
+
|
|
104
|
+
# If we have unclosed quotes and the last one is not followed by closing brackets
|
|
105
|
+
if last_pos != -1 and last_pos < len(json_string) - 1:
|
|
106
|
+
# Add closing quote before any closing brackets we're going to add
|
|
107
|
+
json_string += '"'
|
|
108
|
+
|
|
109
|
+
return json_string + closing
|
|
110
|
+
|
|
111
|
+
|
|
112
|
+
def load_json(
|
|
113
|
+
json_string: str | None,
|
|
114
|
+
allow_json5: bool = True,
|
|
115
|
+
allow_partial: bool = False,
|
|
116
|
+
allow_healing: bool = True,
|
|
117
|
+
):
|
|
118
|
+
"""
|
|
119
|
+
Loads a JSON string into a Python object.
|
|
120
|
+
:param json_string: The JSON string to load.
|
|
121
|
+
:param allow_json5: Whether to allow lax parsing of the JSON string.
|
|
122
|
+
:param allow_partial: Whether to allow partial parsing of the JSON string.
|
|
123
|
+
This will extract as many valid fields as possible.
|
|
124
|
+
:param allow_healing: Whether to attempt to heal malformed JSON.
|
|
125
|
+
:return: The loaded Python object.
|
|
126
|
+
"""
|
|
127
|
+
if json_string is None:
|
|
128
|
+
raise ValueError("Invalid (None) json_string")
|
|
129
|
+
json_string = strip_json(json_string)
|
|
130
|
+
raise ValueError("Invalid (empty) json_string")
|
|
131
|
+
|
|
132
|
+
# Try standard JSON parsing
|
|
133
|
+
try:
|
|
134
|
+
return json.loads(json_string)
|
|
135
|
+
except json.JSONDecodeError:
|
|
136
|
+
pass
|
|
137
|
+
|
|
138
|
+
# Try JSON5 parsing
|
|
139
|
+
if allow_json5:
|
|
140
|
+
try:
|
|
141
|
+
return json5.loads(json_string)
|
|
142
|
+
except Exception:
|
|
143
|
+
pass
|
|
144
|
+
|
|
145
|
+
# Try healing the JSON
|
|
146
|
+
if allow_healing:
|
|
147
|
+
try:
|
|
148
|
+
healed_json = heal_json(json_string)
|
|
149
|
+
return json.loads(healed_json)
|
|
150
|
+
except Exception:
|
|
151
|
+
# If healing with standard JSON fails, try with JSON5
|
|
152
|
+
if allow_json5:
|
|
153
|
+
try:
|
|
154
|
+
healed_json = heal_json(json_string)
|
|
155
|
+
return json5.loads(healed_json)
|
|
156
|
+
except Exception:
|
|
157
|
+
pass
|
|
158
|
+
|
|
159
|
+
# Try partial parsing as a last resort
|
|
160
|
+
if allow_partial:
|
|
161
|
+
try:
|
|
162
|
+
string_list = extract_quoted_expressions(json_string)
|
|
163
|
+
return string_list_to_dict(string_list)
|
|
164
|
+
except Exception:
|
|
165
|
+
pass
|
|
166
|
+
|
|
167
|
+
raise ValueError(f"Invalid JSON string: {json_string}")
|