llmcomp 1.2.0__py3-none-any.whl → 1.2.2__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- llmcomp/finetuning/manager.py +12 -2
- llmcomp/question/question.py +40 -5
- llmcomp/runner/runner.py +17 -1
- {llmcomp-1.2.0.dist-info → llmcomp-1.2.2.dist-info}/METADATA +2 -2
- {llmcomp-1.2.0.dist-info → llmcomp-1.2.2.dist-info}/RECORD +8 -8
- {llmcomp-1.2.0.dist-info → llmcomp-1.2.2.dist-info}/WHEEL +0 -0
- {llmcomp-1.2.0.dist-info → llmcomp-1.2.2.dist-info}/entry_points.txt +0 -0
- {llmcomp-1.2.0.dist-info → llmcomp-1.2.2.dist-info}/licenses/LICENSE +0 -0
llmcomp/finetuning/manager.py
CHANGED
|
@@ -363,10 +363,20 @@ class FinetuningManager:
|
|
|
363
363
|
files = []
|
|
364
364
|
|
|
365
365
|
md5 = self._get_file_md5(file_name)
|
|
366
|
+
client = openai.OpenAI(api_key=api_key)
|
|
367
|
+
|
|
366
368
|
for file in files:
|
|
367
369
|
if file["name"] == file_name and file["md5"] == md5 and file["organization_id"] == organization_id:
|
|
368
|
-
|
|
369
|
-
|
|
370
|
+
# Verify the file actually exists (it might be in a different project)
|
|
371
|
+
# See: https://github.com/johny-b/llmcomp/issues/31
|
|
372
|
+
try:
|
|
373
|
+
client.files.retrieve(file["id"])
|
|
374
|
+
print(f"File {file_name} already uploaded. ID: {file['id']}")
|
|
375
|
+
return file["id"]
|
|
376
|
+
except openai.NotFoundError:
|
|
377
|
+
# File is in this organization, but in another project
|
|
378
|
+
pass
|
|
379
|
+
|
|
370
380
|
return self._upload_file(file_name, api_key, organization_id)
|
|
371
381
|
|
|
372
382
|
def _upload_file(self, file_name, api_key, organization_id):
|
llmcomp/question/question.py
CHANGED
|
@@ -1,12 +1,14 @@
|
|
|
1
1
|
from __future__ import annotations
|
|
2
2
|
|
|
3
3
|
import os
|
|
4
|
+
import re
|
|
4
5
|
import warnings
|
|
5
6
|
from abc import ABC, abstractmethod
|
|
7
|
+
from collections import defaultdict
|
|
6
8
|
from concurrent.futures import ThreadPoolExecutor
|
|
7
9
|
from copy import deepcopy
|
|
8
10
|
from queue import Queue
|
|
9
|
-
from typing import TYPE_CHECKING
|
|
11
|
+
from typing import TYPE_CHECKING, Literal, overload
|
|
10
12
|
|
|
11
13
|
import pandas as pd
|
|
12
14
|
import yaml
|
|
@@ -23,6 +25,7 @@ from llmcomp.question.result import JudgeCache, Result
|
|
|
23
25
|
from llmcomp.runner.runner import Runner
|
|
24
26
|
|
|
25
27
|
if TYPE_CHECKING:
|
|
28
|
+
from llmcomp.question.judge import FreeFormJudge, RatingJudge
|
|
26
29
|
from llmcomp.question.question import Question
|
|
27
30
|
|
|
28
31
|
|
|
@@ -43,6 +46,13 @@ class Question(ABC):
|
|
|
43
46
|
self.logit_bias = logit_bias
|
|
44
47
|
self.name = name
|
|
45
48
|
|
|
49
|
+
# Validate question name to prevent path traversal issues in cache
|
|
50
|
+
if not re.match(r'^[a-zA-Z0-9_-]+$', name):
|
|
51
|
+
raise ValueError(
|
|
52
|
+
f"Invalid question name: {name!r}. "
|
|
53
|
+
f"Name must contain only letters, numbers, underscores, and hyphens."
|
|
54
|
+
)
|
|
55
|
+
|
|
46
56
|
@property
|
|
47
57
|
@abstractmethod
|
|
48
58
|
def _runner_sampling_func_name(self) -> str:
|
|
@@ -56,6 +66,30 @@ class Question(ABC):
|
|
|
56
66
|
"""Type is snake_case version of the class name."""
|
|
57
67
|
return "".join("_" + c.lower() if c.isupper() else c.lower() for c in cls.__name__).lstrip("_")
|
|
58
68
|
|
|
69
|
+
@overload
|
|
70
|
+
@classmethod
|
|
71
|
+
def create(cls, *, type: Literal["free_form"], **kwargs) -> "FreeForm": ...
|
|
72
|
+
|
|
73
|
+
@overload
|
|
74
|
+
@classmethod
|
|
75
|
+
def create(cls, *, type: Literal["rating"], **kwargs) -> "Rating": ...
|
|
76
|
+
|
|
77
|
+
@overload
|
|
78
|
+
@classmethod
|
|
79
|
+
def create(cls, *, type: Literal["next_token"], **kwargs) -> "NextToken": ...
|
|
80
|
+
|
|
81
|
+
@overload
|
|
82
|
+
@classmethod
|
|
83
|
+
def create(cls, *, type: Literal["free_form_judge"], **kwargs) -> "FreeFormJudge": ...
|
|
84
|
+
|
|
85
|
+
@overload
|
|
86
|
+
@classmethod
|
|
87
|
+
def create(cls, *, type: Literal["rating_judge"], **kwargs) -> "RatingJudge": ...
|
|
88
|
+
|
|
89
|
+
@overload
|
|
90
|
+
@classmethod
|
|
91
|
+
def create(cls, *, type: str, **kwargs) -> "Question": ...
|
|
92
|
+
|
|
59
93
|
@classmethod
|
|
60
94
|
def create(cls, **kwargs) -> "Question":
|
|
61
95
|
"""Create a Question instance from a type string and keyword arguments.
|
|
@@ -761,8 +795,9 @@ class Rating(Question):
|
|
|
761
795
|
"""
|
|
762
796
|
if score is None:
|
|
763
797
|
return None
|
|
764
|
-
|
|
765
|
-
|
|
798
|
+
|
|
799
|
+
# Note: you might have multiple tokens mapping to the same integer key, e.g. "100" and "100"
|
|
800
|
+
probs = defaultdict(float)
|
|
766
801
|
total = 0
|
|
767
802
|
for key, val in score.items():
|
|
768
803
|
try:
|
|
@@ -770,9 +805,9 @@ class Rating(Question):
|
|
|
770
805
|
except ValueError:
|
|
771
806
|
continue
|
|
772
807
|
if self.min_rating <= int_key <= self.max_rating:
|
|
773
|
-
probs[int_key]
|
|
808
|
+
probs[int_key] += val
|
|
774
809
|
total += val
|
|
775
|
-
|
|
810
|
+
|
|
776
811
|
if total == 0 or (1 - total) >= self.refusal_threshold:
|
|
777
812
|
return None
|
|
778
813
|
|
llmcomp/runner/runner.py
CHANGED
|
@@ -10,6 +10,13 @@ from llmcomp.config import Config, NoClientForModel
|
|
|
10
10
|
from llmcomp.runner.chat_completion import openai_chat_completion
|
|
11
11
|
from llmcomp.runner.model_adapter import ModelAdapter
|
|
12
12
|
|
|
13
|
+
|
|
14
|
+
class DuplicateTokenError(Exception):
|
|
15
|
+
"""Raised when API returns duplicate tokens in logprobs (unexpected provider behavior)."""
|
|
16
|
+
|
|
17
|
+
pass
|
|
18
|
+
|
|
19
|
+
|
|
13
20
|
NO_LOGPROBS_WARNING = """\
|
|
14
21
|
Failed to get logprobs because {model} didn't send them.
|
|
15
22
|
Returning empty dict, I hope you can handle it.
|
|
@@ -121,6 +128,15 @@ class Runner:
|
|
|
121
128
|
print(NO_LOGPROBS_WARNING.format(model=self.model, completion=completion))
|
|
122
129
|
return {}
|
|
123
130
|
|
|
131
|
+
# Check for duplicate tokens - this shouldn't happen with OpenAI but might with other providers
|
|
132
|
+
tokens = [el.token for el in logprobs]
|
|
133
|
+
if len(tokens) != len(set(tokens)):
|
|
134
|
+
duplicates = [t for t in tokens if tokens.count(t) > 1]
|
|
135
|
+
raise DuplicateTokenError(
|
|
136
|
+
f"API returned duplicate tokens in logprobs: {set(duplicates)}. "
|
|
137
|
+
f"Model: {self.model}. This is unexpected - please report this issue."
|
|
138
|
+
)
|
|
139
|
+
|
|
124
140
|
result = {}
|
|
125
141
|
for el in logprobs:
|
|
126
142
|
result[el.token] = math.exp(el.logprob) if convert_to_probs else el.logprob
|
|
@@ -186,7 +202,7 @@ class Runner:
|
|
|
186
202
|
func_kwargs = {key: val for key, val in kwargs.items() if not key.startswith("_")}
|
|
187
203
|
try:
|
|
188
204
|
result = func(**func_kwargs)
|
|
189
|
-
except NoClientForModel:
|
|
205
|
+
except (NoClientForModel, DuplicateTokenError):
|
|
190
206
|
raise
|
|
191
207
|
except Exception as e:
|
|
192
208
|
# Truncate messages for readability
|
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
Metadata-Version: 2.4
|
|
2
2
|
Name: llmcomp
|
|
3
|
-
Version: 1.2.
|
|
3
|
+
Version: 1.2.2
|
|
4
4
|
Summary: Research library for black-box experiments on language models.
|
|
5
5
|
Project-URL: Homepage, https://github.com/johny-b/llmcomp
|
|
6
6
|
Project-URL: Repository, https://github.com/johny-b/llmcomp
|
|
@@ -150,7 +150,7 @@ Suppose you have many prompts you want to send to models. There are three option
|
|
|
150
150
|
3. Have a single Question object with many paraphrases and then split the resulting dataframe (using any of the `paraphrase_ix`, `question` or `messages` columns)
|
|
151
151
|
|
|
152
152
|
Option 1 will be slow - the more quick questions you have, the worse.
|
|
153
|
-
Option 2 will be fast, but you need to write parallelization yourself.
|
|
153
|
+
Option 2 will be fast, but you need to write parallelization yourself. Question should be thread-safe, but parallel execution of questions was **never** tested. One thing that won't work: `llmcomp.Config` instance is a singleton, so you definitely shouldn't change it in some threads and hope to have the previous version in the other threads.
|
|
154
154
|
Option 3 will also be fast and is recommended. Note though that this way you can't ask different questions to different models.
|
|
155
155
|
|
|
156
156
|
Parallelization within a single question is done via threads. Perhaps async would be faster. Prompting claude-opus-4.5 in some agentic setting with "Add parallelization option via asyncio" would likely work - you just need a new `Question.many_models_execute`.
|
|
@@ -3,17 +3,17 @@ llmcomp/config.py,sha256=xADWhqsQphJZQvf7WemWencmWuBnvTN_KeJrjWfnmHY,8942
|
|
|
3
3
|
llmcomp/default_adapters.py,sha256=txs6NUOwGttC8jUahaRsoPCTbE5riBE7yKdAGPvKRhM,2578
|
|
4
4
|
llmcomp/utils.py,sha256=8-jakxvwbMqfDkelE9ZY1q8Fo538Y_ryRv6PizRhHR0,2683
|
|
5
5
|
llmcomp/finetuning/__init__.py,sha256=UEdwtJNVVqWjhrxvLvRLW4W4xjkKKwOR-GRkDxCP2Qo,58
|
|
6
|
-
llmcomp/finetuning/manager.py,sha256=
|
|
6
|
+
llmcomp/finetuning/manager.py,sha256=JaILoQYkNA9jIM_WR9eZactFHHcNFVeQeObXjQS8KcI,18779
|
|
7
7
|
llmcomp/finetuning/update_jobs.py,sha256=blsHzg_ViTa2hBJtWCqR5onttehTtmXn3vmCTNd_hJw,980
|
|
8
8
|
llmcomp/question/judge.py,sha256=ovlEVp4XfgMc_qxYc4M7eq5qS-7C_WLjJklsO9wfU34,6105
|
|
9
9
|
llmcomp/question/plots.py,sha256=2uZTSN1s7Y3pnx2jiGtfUdWfQt2812Oo-eDsO2ZTUlE,9617
|
|
10
|
-
llmcomp/question/question.py,sha256=
|
|
10
|
+
llmcomp/question/question.py,sha256=2CvE0xePLnD5SUJsE_ZyvAIE_36rjjW37fUqG3NHTV0,39171
|
|
11
11
|
llmcomp/question/result.py,sha256=EcgXV-CbLNAQ1Bu0p-0QcjtrwBDt1WxSINwYuMmWoGs,8216
|
|
12
12
|
llmcomp/runner/chat_completion.py,sha256=iDiWE0N0_MYfggD-ouyfUPyaADt7602K5Wo16a7JJo4,967
|
|
13
13
|
llmcomp/runner/model_adapter.py,sha256=xBf6_WZbwKKTctecATujX9ZKQLDetDh-7UeCGaXJ9Zc,3244
|
|
14
|
-
llmcomp/runner/runner.py,sha256=
|
|
15
|
-
llmcomp-1.2.
|
|
16
|
-
llmcomp-1.2.
|
|
17
|
-
llmcomp-1.2.
|
|
18
|
-
llmcomp-1.2.
|
|
19
|
-
llmcomp-1.2.
|
|
14
|
+
llmcomp/runner/runner.py,sha256=ENDSH2I7wKu9tq0HdfLwCgdHLxjvJaIrlrWY1vy7soc,10807
|
|
15
|
+
llmcomp-1.2.2.dist-info/METADATA,sha256=DFiSsygEmaTNejveyEBEf-4wv47iqlXq0SWMTlRbf94,12518
|
|
16
|
+
llmcomp-1.2.2.dist-info/WHEEL,sha256=WLgqFyCfm_KASv4WHyYy0P3pM_m7J5L9k2skdKLirC8,87
|
|
17
|
+
llmcomp-1.2.2.dist-info/entry_points.txt,sha256=1aoN8_W9LDUnX7OIOX7ACmzNkbBMJ6GqNn_A1KUKjQc,76
|
|
18
|
+
llmcomp-1.2.2.dist-info/licenses/LICENSE,sha256=z7WR2X27WF_wZNuzfNFNlkt9cU7eFwP_3-qx7RyrGK4,1064
|
|
19
|
+
llmcomp-1.2.2.dist-info/RECORD,,
|
|
File without changes
|
|
File without changes
|
|
File without changes
|