llmcomp 1.0.0__py3-none-any.whl → 1.2.0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- llmcomp/__init__.py +4 -0
- llmcomp/config.py +44 -38
- llmcomp/default_adapters.py +81 -0
- llmcomp/finetuning/__init__.py +2 -0
- llmcomp/finetuning/manager.py +490 -0
- llmcomp/finetuning/update_jobs.py +38 -0
- llmcomp/question/question.py +11 -31
- llmcomp/question/result.py +58 -6
- llmcomp/runner/chat_completion.py +6 -8
- llmcomp/runner/model_adapter.py +98 -0
- llmcomp/runner/runner.py +74 -63
- {llmcomp-1.0.0.dist-info → llmcomp-1.2.0.dist-info}/METADATA +87 -25
- llmcomp-1.2.0.dist-info/RECORD +19 -0
- llmcomp-1.2.0.dist-info/entry_points.txt +2 -0
- llmcomp-1.0.0.dist-info/RECORD +0 -13
- {llmcomp-1.0.0.dist-info → llmcomp-1.2.0.dist-info}/WHEEL +0 -0
- {llmcomp-1.0.0.dist-info → llmcomp-1.2.0.dist-info}/licenses/LICENSE +0 -0
llmcomp/__init__.py
CHANGED
|
@@ -1,3 +1,7 @@
|
|
|
1
1
|
from llmcomp.config import Config
|
|
2
2
|
from llmcomp.question.question import Question
|
|
3
|
+
from llmcomp.runner.model_adapter import ModelAdapter
|
|
3
4
|
from llmcomp.runner.runner import Runner
|
|
5
|
+
|
|
6
|
+
# Import to register default model adapters
|
|
7
|
+
import llmcomp.default_adapters # noqa: F401
|
llmcomp/config.py
CHANGED
|
@@ -28,14 +28,14 @@ class NoClientForModel(Exception):
|
|
|
28
28
|
pass
|
|
29
29
|
|
|
30
30
|
|
|
31
|
-
def _get_api_keys(env_var_name: str, *, include_suffixed: bool = True) -> list[str]:
|
|
31
|
+
def _get_api_keys(env_var_name: str, *, include_suffixed: bool = True) -> list[tuple[str, str]]:
|
|
32
32
|
"""Get API keys from environment variable(s).
|
|
33
33
|
|
|
34
34
|
Args:
|
|
35
35
|
env_var_name: Base environment variable name (e.g., "OPENAI_API_KEY")
|
|
36
36
|
include_suffixed: If True, also look for {env_var_name}_* variants (default: True)
|
|
37
37
|
|
|
38
|
-
Returns list of
|
|
38
|
+
Returns list of (env_var_name, api_key) tuples found.
|
|
39
39
|
"""
|
|
40
40
|
key_names = [env_var_name]
|
|
41
41
|
|
|
@@ -44,11 +44,10 @@ def _get_api_keys(env_var_name: str, *, include_suffixed: bool = True) -> list[s
|
|
|
44
44
|
if env_var.startswith(f"{env_var_name}_"):
|
|
45
45
|
key_names.append(env_var)
|
|
46
46
|
|
|
47
|
-
|
|
48
|
-
return [key for key in keys if key is not None]
|
|
47
|
+
return [(name, os.getenv(name)) for name in key_names if os.getenv(name) is not None]
|
|
49
48
|
|
|
50
49
|
|
|
51
|
-
def _discover_url_key_pairs() -> list[tuple[str, str]]:
|
|
50
|
+
def _discover_url_key_pairs() -> list[tuple[str, str, str]]:
|
|
52
51
|
"""Discover URL-key pairs from environment variables.
|
|
53
52
|
|
|
54
53
|
Discovers (including _* suffix variants for each):
|
|
@@ -56,21 +55,21 @@ def _discover_url_key_pairs() -> list[tuple[str, str]]:
|
|
|
56
55
|
- OPENROUTER_API_KEY for OpenRouter
|
|
57
56
|
- TINKER_API_KEY for Tinker (OpenAI-compatible)
|
|
58
57
|
|
|
59
|
-
Returns list of (base_url, api_key) tuples.
|
|
58
|
+
Returns list of (base_url, api_key, env_var_name) tuples.
|
|
60
59
|
"""
|
|
61
60
|
url_pairs = []
|
|
62
61
|
|
|
63
62
|
# OpenAI
|
|
64
|
-
for key in _get_api_keys("OPENAI_API_KEY"):
|
|
65
|
-
url_pairs.append(("https://api.openai.com/v1", key))
|
|
63
|
+
for env_name, key in _get_api_keys("OPENAI_API_KEY"):
|
|
64
|
+
url_pairs.append(("https://api.openai.com/v1", key, env_name))
|
|
66
65
|
|
|
67
66
|
# OpenRouter
|
|
68
|
-
for key in _get_api_keys("OPENROUTER_API_KEY"):
|
|
69
|
-
url_pairs.append(("https://openrouter.ai/api/v1", key))
|
|
67
|
+
for env_name, key in _get_api_keys("OPENROUTER_API_KEY"):
|
|
68
|
+
url_pairs.append(("https://openrouter.ai/api/v1", key, env_name))
|
|
70
69
|
|
|
71
70
|
# Tinker (OpenAI-compatible API)
|
|
72
|
-
for key in _get_api_keys("TINKER_API_KEY"):
|
|
73
|
-
url_pairs.append(("https://tinker.thinkingmachines.dev/services/tinker-prod/oai/api/v1", key))
|
|
71
|
+
for env_name, key in _get_api_keys("TINKER_API_KEY"):
|
|
72
|
+
url_pairs.append(("https://tinker.thinkingmachines.dev/services/tinker-prod/oai/api/v1", key, env_name))
|
|
74
73
|
|
|
75
74
|
return url_pairs
|
|
76
75
|
|
|
@@ -78,21 +77,23 @@ def _discover_url_key_pairs() -> list[tuple[str, str]]:
|
|
|
78
77
|
class _ConfigMeta(type):
|
|
79
78
|
"""Metaclass for Config to support lazy initialization of url_key_pairs."""
|
|
80
79
|
|
|
81
|
-
_url_key_pairs: list[tuple[str, str]] | None = None
|
|
80
|
+
_url_key_pairs: list[tuple[str, str, str]] | None = None
|
|
82
81
|
|
|
83
82
|
@property
|
|
84
|
-
def url_key_pairs(cls) -> list[tuple[str, str]]:
|
|
83
|
+
def url_key_pairs(cls) -> list[tuple[str, str, str]]:
|
|
85
84
|
"""URL-key pairs for client creation.
|
|
86
85
|
|
|
87
86
|
Auto-discovered from environment variables on first access.
|
|
88
87
|
Users can modify this list (add/remove pairs).
|
|
88
|
+
|
|
89
|
+
Returns list of (base_url, api_key, env_var_name) tuples.
|
|
89
90
|
"""
|
|
90
91
|
if cls._url_key_pairs is None:
|
|
91
92
|
cls._url_key_pairs = _discover_url_key_pairs()
|
|
92
93
|
return cls._url_key_pairs
|
|
93
94
|
|
|
94
95
|
@url_key_pairs.setter
|
|
95
|
-
def url_key_pairs(cls, value: list[tuple[str, str]] | None):
|
|
96
|
+
def url_key_pairs(cls, value: list[tuple[str, str, str]] | None):
|
|
96
97
|
cls._url_key_pairs = value
|
|
97
98
|
|
|
98
99
|
|
|
@@ -106,6 +107,7 @@ class Config(metaclass=_ConfigMeta):
|
|
|
106
107
|
# Default values for reset()
|
|
107
108
|
_defaults = {
|
|
108
109
|
"timeout": 60,
|
|
110
|
+
"reasoning_effort": "none",
|
|
109
111
|
"max_workers": 100,
|
|
110
112
|
"cache_dir": "llmcomp_cache",
|
|
111
113
|
"yaml_dir": "questions",
|
|
@@ -115,6 +117,11 @@ class Config(metaclass=_ConfigMeta):
|
|
|
115
117
|
# API request timeout in seconds
|
|
116
118
|
timeout: int = _defaults["timeout"]
|
|
117
119
|
|
|
120
|
+
# Reasoning effort for reasoning models (o1, o3, gpt-5, etc.)
|
|
121
|
+
# Available values: "none", "minimal", "low", "medium", "high", "xhigh"
|
|
122
|
+
# NOTE: with "none" (default), you don't get answers from models before gpt-5.1
|
|
123
|
+
reasoning_effort: str = _defaults["reasoning_effort"]
|
|
124
|
+
|
|
118
125
|
# Maximum number of concurrent API requests (total across all models, not per model).
|
|
119
126
|
# When querying multiple models, they share a single thread pool of this size.
|
|
120
127
|
max_workers: int = _defaults["max_workers"]
|
|
@@ -188,7 +195,11 @@ class Config(metaclass=_ConfigMeta):
|
|
|
188
195
|
|
|
189
196
|
@classmethod
|
|
190
197
|
def _find_openai_client(cls, model: str) -> openai.OpenAI:
|
|
191
|
-
"""Find a working OpenAI client by testing URL-key pairs in parallel.
|
|
198
|
+
"""Find a working OpenAI client by testing URL-key pairs in parallel.
|
|
199
|
+
|
|
200
|
+
When multiple API keys work for a model, selects the one whose
|
|
201
|
+
environment variable name is lexicographically lowest.
|
|
202
|
+
"""
|
|
192
203
|
all_pairs = cls.url_key_pairs
|
|
193
204
|
|
|
194
205
|
if not all_pairs:
|
|
@@ -197,42 +208,37 @@ class Config(metaclass=_ConfigMeta):
|
|
|
197
208
|
"Set an API key (e.g. OPENAI_API_KEY) or Config.url_key_pairs."
|
|
198
209
|
)
|
|
199
210
|
|
|
200
|
-
# Test all pairs in parallel
|
|
211
|
+
# Test all pairs in parallel, collect all working clients
|
|
212
|
+
working_clients: list[tuple[str, openai.OpenAI]] = [] # (env_var_name, client)
|
|
213
|
+
|
|
201
214
|
with ThreadPoolExecutor(max_workers=len(all_pairs)) as executor:
|
|
202
215
|
future_to_pair = {
|
|
203
|
-
executor.submit(cls._test_url_key_pair, model, url, key): (url, key
|
|
216
|
+
executor.submit(cls._test_url_key_pair, model, url, key): (url, key, env_name)
|
|
217
|
+
for url, key, env_name in all_pairs
|
|
204
218
|
}
|
|
205
219
|
|
|
206
220
|
for future in as_completed(future_to_pair):
|
|
221
|
+
url, key, env_name = future_to_pair[future]
|
|
207
222
|
client = future.result()
|
|
208
223
|
if client:
|
|
209
|
-
|
|
210
|
-
|
|
211
|
-
|
|
212
|
-
|
|
224
|
+
working_clients.append((env_name, client))
|
|
225
|
+
|
|
226
|
+
if not working_clients:
|
|
227
|
+
raise NoClientForModel(f"No working API client found for model {model}")
|
|
213
228
|
|
|
214
|
-
|
|
229
|
+
# Select client with lexicographically lowest env var name
|
|
230
|
+
working_clients.sort(key=lambda x: x[0])
|
|
231
|
+
return working_clients[0][1]
|
|
215
232
|
|
|
216
233
|
@classmethod
|
|
217
234
|
def _test_url_key_pair(cls, model: str, url: str, key: str) -> openai.OpenAI | None:
|
|
218
235
|
"""Test if a url-key pair works for the given model."""
|
|
236
|
+
from llmcomp.runner.model_adapter import ModelAdapter
|
|
237
|
+
|
|
219
238
|
try:
|
|
220
239
|
client = openai.OpenAI(api_key=key, base_url=url)
|
|
221
|
-
|
|
222
|
-
|
|
223
|
-
"model": model,
|
|
224
|
-
"messages": [{"role": "user", "content": "Hi"}],
|
|
225
|
-
"timeout": 30, # tinker sometimes takes a while
|
|
226
|
-
}
|
|
227
|
-
if not (model.startswith("o") or model.startswith("gpt-5")):
|
|
228
|
-
args["max_tokens"] = 1
|
|
229
|
-
else:
|
|
230
|
-
if model.startswith("gpt-5"):
|
|
231
|
-
args["max_completion_tokens"] = 16
|
|
232
|
-
else:
|
|
233
|
-
args["max_completion_tokens"] = 1
|
|
234
|
-
|
|
235
|
-
openai_chat_completion(**args)
|
|
240
|
+
params = ModelAdapter.test_request_params(model)
|
|
241
|
+
openai_chat_completion(client=client, **params)
|
|
236
242
|
except (
|
|
237
243
|
openai.NotFoundError,
|
|
238
244
|
openai.BadRequestError,
|
|
@@ -0,0 +1,81 @@
|
|
|
1
|
+
"""Model-specific logic.
|
|
2
|
+
|
|
3
|
+
You might want to register your own handlers for specific models.
|
|
4
|
+
Just add more ModelAdapter.register() calls somewhere in your code.
|
|
5
|
+
|
|
6
|
+
Later-registered handlers can override earlier-registered handlers.
|
|
7
|
+
"""
|
|
8
|
+
|
|
9
|
+
from llmcomp.config import Config
|
|
10
|
+
from llmcomp.runner.model_adapter import ModelAdapter
|
|
11
|
+
|
|
12
|
+
|
|
13
|
+
# -----------------------------------------------------------------------------
|
|
14
|
+
# Base handler: adds model to all requests
|
|
15
|
+
# Note: runner also later adds timeout=Config.timeout
|
|
16
|
+
# -----------------------------------------------------------------------------
|
|
17
|
+
|
|
18
|
+
def base_prepare(params: dict, model: str) -> dict:
|
|
19
|
+
return {
|
|
20
|
+
"model": model,
|
|
21
|
+
**params,
|
|
22
|
+
}
|
|
23
|
+
|
|
24
|
+
|
|
25
|
+
ModelAdapter.register(lambda model: True, base_prepare)
|
|
26
|
+
|
|
27
|
+
|
|
28
|
+
# -----------------------------------------------------------------------------
|
|
29
|
+
# Reasoning effort: adds reasoning_effort from Config for reasoning models
|
|
30
|
+
# -----------------------------------------------------------------------------
|
|
31
|
+
|
|
32
|
+
def supports_reasoning_effort(model: str) -> bool:
|
|
33
|
+
"""o1, o3, o4 series and gpt-5 series."""
|
|
34
|
+
return (
|
|
35
|
+
model.startswith("o1")
|
|
36
|
+
or model.startswith("o3")
|
|
37
|
+
or model.startswith("o4")
|
|
38
|
+
or model.startswith("gpt-5")
|
|
39
|
+
)
|
|
40
|
+
|
|
41
|
+
|
|
42
|
+
def reasoning_effort_prepare(params: dict, model: str) -> dict:
|
|
43
|
+
return {
|
|
44
|
+
"reasoning_effort": Config.reasoning_effort,
|
|
45
|
+
**params,
|
|
46
|
+
}
|
|
47
|
+
|
|
48
|
+
|
|
49
|
+
ModelAdapter.register(supports_reasoning_effort, reasoning_effort_prepare)
|
|
50
|
+
|
|
51
|
+
|
|
52
|
+
# -----------------------------------------------------------------------------
|
|
53
|
+
# Max completion tokens: converts max_tokens to max_completion_tokens
|
|
54
|
+
# -----------------------------------------------------------------------------
|
|
55
|
+
|
|
56
|
+
def requires_max_completion_tokens(model: str) -> bool:
|
|
57
|
+
"""o-series models (o1, o3, o4) and gpt-5 series don't support max_tokens."""
|
|
58
|
+
return (
|
|
59
|
+
model.startswith("o1")
|
|
60
|
+
or model.startswith("o3")
|
|
61
|
+
or model.startswith("o4")
|
|
62
|
+
or model.startswith("gpt-5")
|
|
63
|
+
)
|
|
64
|
+
|
|
65
|
+
|
|
66
|
+
def max_completion_tokens_prepare(params: dict, model: str) -> dict:
|
|
67
|
+
if "max_tokens" not in params:
|
|
68
|
+
return params
|
|
69
|
+
if "max_completion_tokens" in params:
|
|
70
|
+
# User explicitly set max_completion_tokens, just remove max_tokens
|
|
71
|
+
result = dict(params)
|
|
72
|
+
del result["max_tokens"]
|
|
73
|
+
return result
|
|
74
|
+
# Convert max_tokens to max_completion_tokens
|
|
75
|
+
result = dict(params)
|
|
76
|
+
result["max_completion_tokens"] = result.pop("max_tokens")
|
|
77
|
+
return result
|
|
78
|
+
|
|
79
|
+
|
|
80
|
+
ModelAdapter.register(requires_max_completion_tokens, max_completion_tokens_prepare)
|
|
81
|
+
|