llmcomp 1.2.0__py3-none-any.whl → 1.2.1__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -363,10 +363,20 @@ class FinetuningManager:
363
363
  files = []
364
364
 
365
365
  md5 = self._get_file_md5(file_name)
366
+ client = openai.OpenAI(api_key=api_key)
367
+
366
368
  for file in files:
367
369
  if file["name"] == file_name and file["md5"] == md5 and file["organization_id"] == organization_id:
368
- print(f"File {file_name} already uploaded. ID: {file['id']}")
369
- return file["id"]
370
+ # Verify the file actually exists (it might be in a different project)
371
+ # See: https://github.com/johny-b/llmcomp/issues/31
372
+ try:
373
+ client.files.retrieve(file["id"])
374
+ print(f"File {file_name} already uploaded. ID: {file['id']}")
375
+ return file["id"]
376
+ except openai.NotFoundError:
377
+ # File is in this organization, but in another project
378
+ pass
379
+
370
380
  return self._upload_file(file_name, api_key, organization_id)
371
381
 
372
382
  def _upload_file(self, file_name, api_key, organization_id):
@@ -1,8 +1,10 @@
1
1
  from __future__ import annotations
2
2
 
3
3
  import os
4
+ import re
4
5
  import warnings
5
6
  from abc import ABC, abstractmethod
7
+ from collections import defaultdict
6
8
  from concurrent.futures import ThreadPoolExecutor
7
9
  from copy import deepcopy
8
10
  from queue import Queue
@@ -43,6 +45,13 @@ class Question(ABC):
43
45
  self.logit_bias = logit_bias
44
46
  self.name = name
45
47
 
48
+ # Validate question name to prevent path traversal issues in cache
49
+ if not re.match(r'^[a-zA-Z0-9_-]+$', name):
50
+ raise ValueError(
51
+ f"Invalid question name: {name!r}. "
52
+ f"Name must contain only letters, numbers, underscores, and hyphens."
53
+ )
54
+
46
55
  @property
47
56
  @abstractmethod
48
57
  def _runner_sampling_func_name(self) -> str:
@@ -761,8 +770,9 @@ class Rating(Question):
761
770
  """
762
771
  if score is None:
763
772
  return None
764
-
765
- probs = {}
773
+
774
+ # Note: you might have multiple tokens mapping to the same integer key, e.g. "100" and "100"
775
+ probs = defaultdict(float)
766
776
  total = 0
767
777
  for key, val in score.items():
768
778
  try:
@@ -770,9 +780,9 @@ class Rating(Question):
770
780
  except ValueError:
771
781
  continue
772
782
  if self.min_rating <= int_key <= self.max_rating:
773
- probs[int_key] = val
783
+ probs[int_key] += val
774
784
  total += val
775
-
785
+
776
786
  if total == 0 or (1 - total) >= self.refusal_threshold:
777
787
  return None
778
788
 
llmcomp/runner/runner.py CHANGED
@@ -10,6 +10,13 @@ from llmcomp.config import Config, NoClientForModel
10
10
  from llmcomp.runner.chat_completion import openai_chat_completion
11
11
  from llmcomp.runner.model_adapter import ModelAdapter
12
12
 
13
+
14
+ class DuplicateTokenError(Exception):
15
+ """Raised when API returns duplicate tokens in logprobs (unexpected provider behavior)."""
16
+
17
+ pass
18
+
19
+
13
20
  NO_LOGPROBS_WARNING = """\
14
21
  Failed to get logprobs because {model} didn't send them.
15
22
  Returning empty dict, I hope you can handle it.
@@ -121,6 +128,15 @@ class Runner:
121
128
  print(NO_LOGPROBS_WARNING.format(model=self.model, completion=completion))
122
129
  return {}
123
130
 
131
+ # Check for duplicate tokens - this shouldn't happen with OpenAI but might with other providers
132
+ tokens = [el.token for el in logprobs]
133
+ if len(tokens) != len(set(tokens)):
134
+ duplicates = [t for t in tokens if tokens.count(t) > 1]
135
+ raise DuplicateTokenError(
136
+ f"API returned duplicate tokens in logprobs: {set(duplicates)}. "
137
+ f"Model: {self.model}. This is unexpected - please report this issue."
138
+ )
139
+
124
140
  result = {}
125
141
  for el in logprobs:
126
142
  result[el.token] = math.exp(el.logprob) if convert_to_probs else el.logprob
@@ -186,7 +202,7 @@ class Runner:
186
202
  func_kwargs = {key: val for key, val in kwargs.items() if not key.startswith("_")}
187
203
  try:
188
204
  result = func(**func_kwargs)
189
- except NoClientForModel:
205
+ except (NoClientForModel, DuplicateTokenError):
190
206
  raise
191
207
  except Exception as e:
192
208
  # Truncate messages for readability
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: llmcomp
3
- Version: 1.2.0
3
+ Version: 1.2.1
4
4
  Summary: Research library for black-box experiments on language models.
5
5
  Project-URL: Homepage, https://github.com/johny-b/llmcomp
6
6
  Project-URL: Repository, https://github.com/johny-b/llmcomp
@@ -150,7 +150,7 @@ Suppose you have many prompts you want to send to models. There are three option
150
150
  3. Have a single Question object with many paraphrases and then split the resulting dataframe (using any of the `paraphrase_ix`, `question` or `messages` columns)
151
151
 
152
152
  Option 1 will be slow - the more quick questions you have, the worse.
153
- Option 2 will be fast, but you need to write parallelization yourself. Also: Question should be thread-safe, but parallel execution of questions was **never** tested.
153
+ Option 2 will be fast, but you need to write parallelization yourself. Question should be thread-safe, but parallel execution of questions was **never** tested. One thing that won't work: `llmcomp.Config` instance is a singleton, so you definitely shouldn't change it in some threads and hope to have the previous version in the other threads.
154
154
  Option 3 will also be fast and is recommended. Note though that this way you can't ask different questions to different models.
155
155
 
156
156
  Parallelization within a single question is done via threads. Perhaps async would be faster. Prompting claude-opus-4.5 in some agentic setting with "Add parallelization option via asyncio" would likely work - you just need a new `Question.many_models_execute`.
@@ -3,17 +3,17 @@ llmcomp/config.py,sha256=xADWhqsQphJZQvf7WemWencmWuBnvTN_KeJrjWfnmHY,8942
3
3
  llmcomp/default_adapters.py,sha256=txs6NUOwGttC8jUahaRsoPCTbE5riBE7yKdAGPvKRhM,2578
4
4
  llmcomp/utils.py,sha256=8-jakxvwbMqfDkelE9ZY1q8Fo538Y_ryRv6PizRhHR0,2683
5
5
  llmcomp/finetuning/__init__.py,sha256=UEdwtJNVVqWjhrxvLvRLW4W4xjkKKwOR-GRkDxCP2Qo,58
6
- llmcomp/finetuning/manager.py,sha256=vIM_FAswWr01KtfeFC6ffgvlimKgKUj4ij34tnBzBNk,18346
6
+ llmcomp/finetuning/manager.py,sha256=JaILoQYkNA9jIM_WR9eZactFHHcNFVeQeObXjQS8KcI,18779
7
7
  llmcomp/finetuning/update_jobs.py,sha256=blsHzg_ViTa2hBJtWCqR5onttehTtmXn3vmCTNd_hJw,980
8
8
  llmcomp/question/judge.py,sha256=ovlEVp4XfgMc_qxYc4M7eq5qS-7C_WLjJklsO9wfU34,6105
9
9
  llmcomp/question/plots.py,sha256=2uZTSN1s7Y3pnx2jiGtfUdWfQt2812Oo-eDsO2ZTUlE,9617
10
- llmcomp/question/question.py,sha256=eZT1jQObp9VZ8E9QGx6XBo3Ms9OF2kG6b6l8kW8pma0,37919
10
+ llmcomp/question/question.py,sha256=ljYxoYmWfWCyOm7sD8RPqT9m72g0s0GHF1Z_KDG28_w,38417
11
11
  llmcomp/question/result.py,sha256=EcgXV-CbLNAQ1Bu0p-0QcjtrwBDt1WxSINwYuMmWoGs,8216
12
12
  llmcomp/runner/chat_completion.py,sha256=iDiWE0N0_MYfggD-ouyfUPyaADt7602K5Wo16a7JJo4,967
13
13
  llmcomp/runner/model_adapter.py,sha256=xBf6_WZbwKKTctecATujX9ZKQLDetDh-7UeCGaXJ9Zc,3244
14
- llmcomp/runner/runner.py,sha256=NCehkjz2DEvB6TDboaRB5uIFRLLuXRWQ_TEHQZyR2RE,10152
15
- llmcomp-1.2.0.dist-info/METADATA,sha256=9vMgp2uYxyPAtsTjAFIMVQhuKBPTXbAFquCe-YlxxD8,12341
16
- llmcomp-1.2.0.dist-info/WHEEL,sha256=WLgqFyCfm_KASv4WHyYy0P3pM_m7J5L9k2skdKLirC8,87
17
- llmcomp-1.2.0.dist-info/entry_points.txt,sha256=1aoN8_W9LDUnX7OIOX7ACmzNkbBMJ6GqNn_A1KUKjQc,76
18
- llmcomp-1.2.0.dist-info/licenses/LICENSE,sha256=z7WR2X27WF_wZNuzfNFNlkt9cU7eFwP_3-qx7RyrGK4,1064
19
- llmcomp-1.2.0.dist-info/RECORD,,
14
+ llmcomp/runner/runner.py,sha256=ENDSH2I7wKu9tq0HdfLwCgdHLxjvJaIrlrWY1vy7soc,10807
15
+ llmcomp-1.2.1.dist-info/METADATA,sha256=AJ4cBJPpW_sIjxZaLQm3_qjOs7Xzx4aY-9XC7TP3z2I,12518
16
+ llmcomp-1.2.1.dist-info/WHEEL,sha256=WLgqFyCfm_KASv4WHyYy0P3pM_m7J5L9k2skdKLirC8,87
17
+ llmcomp-1.2.1.dist-info/entry_points.txt,sha256=1aoN8_W9LDUnX7OIOX7ACmzNkbBMJ6GqNn_A1KUKjQc,76
18
+ llmcomp-1.2.1.dist-info/licenses/LICENSE,sha256=z7WR2X27WF_wZNuzfNFNlkt9cU7eFwP_3-qx7RyrGK4,1064
19
+ llmcomp-1.2.1.dist-info/RECORD,,