PyPI - llmcomp - Versions diffs - 1.2.0__py3-none-any.whl → 1.2.2__py3-none-any.whl - Mend

llmcomp 1.2.0py3-none-any.whl → 1.2.2py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (8) hide show

llmcomp/finetuning/manager.py CHANGED Viewed

@@ -363,10 +363,20 @@ class FinetuningManager:
             files = []
         md5 = self._get_file_md5(file_name)
+        client = openai.OpenAI(api_key=api_key)
         for file in files:
             if file["name"] == file_name and file["md5"] == md5 and file["organization_id"] == organization_id:
-                print(f"File {file_name} already uploaded. ID: {file['id']}")
-                return file["id"]
+                # Verify the file actually exists (it might be in a different project)
+                # See: https://github.com/johny-b/llmcomp/issues/31
+                try:
+                    client.files.retrieve(file["id"])
+                    print(f"File {file_name} already uploaded. ID: {file['id']}")
+                    return file["id"]
+                except openai.NotFoundError:
+                    # File is in this organization, but in another project
+                    pass
         return self._upload_file(file_name, api_key, organization_id)
     def _upload_file(self, file_name, api_key, organization_id):

llmcomp/question/question.py CHANGED Viewed

@@ -1,12 +1,14 @@
 from __future__ import annotations
 import os
+import re
 import warnings
 from abc import ABC, abstractmethod
+from collections import defaultdict
 from concurrent.futures import ThreadPoolExecutor
 from copy import deepcopy
 from queue import Queue
-from typing import TYPE_CHECKING
+from typing import TYPE_CHECKING, Literal, overload
 import pandas as pd
 import yaml
@@ -23,6 +25,7 @@ from llmcomp.question.result import JudgeCache, Result
 from llmcomp.runner.runner import Runner
 if TYPE_CHECKING:
+    from llmcomp.question.judge import FreeFormJudge, RatingJudge
     from llmcomp.question.question import Question
@@ -43,6 +46,13 @@ class Question(ABC):
         self.logit_bias = logit_bias
         self.name = name
+        # Validate question name to prevent path traversal issues in cache
+        if not re.match(r'^[a-zA-Z0-9_-]+$', name):
+            raise ValueError(
+                f"Invalid question name: {name!r}. "
+                f"Name must contain only letters, numbers, underscores, and hyphens."
+            )
     @property
     @abstractmethod
     def _runner_sampling_func_name(self) -> str:
@@ -56,6 +66,30 @@ class Question(ABC):
         """Type is snake_case version of the class name."""
         return "".join("_" + c.lower() if c.isupper() else c.lower() for c in cls.__name__).lstrip("_")
+    @overload
+    @classmethod
+    def create(cls, *, type: Literal["free_form"], **kwargs) -> "FreeForm": ...
+    @overload
+    @classmethod
+    def create(cls, *, type: Literal["rating"], **kwargs) -> "Rating": ...
+    @overload
+    @classmethod
+    def create(cls, *, type: Literal["next_token"], **kwargs) -> "NextToken": ...
+    @overload
+    @classmethod
+    def create(cls, *, type: Literal["free_form_judge"], **kwargs) -> "FreeFormJudge": ...
+    @overload
+    @classmethod
+    def create(cls, *, type: Literal["rating_judge"], **kwargs) -> "RatingJudge": ...
+    @overload
+    @classmethod
+    def create(cls, *, type: str, **kwargs) -> "Question": ...
     @classmethod
     def create(cls, **kwargs) -> "Question":
         """Create a Question instance from a type string and keyword arguments.
@@ -761,8 +795,9 @@ class Rating(Question):
         """
         if score is None:
             return None
-        probs = {}
+        # Note: you might have multiple tokens mapping to the same integer key, e.g. "100" and "１００"
+        probs = defaultdict(float)
         total = 0
         for key, val in score.items():
             try:
@@ -770,9 +805,9 @@ class Rating(Question):
             except ValueError:
                 continue
             if self.min_rating <= int_key <= self.max_rating:
-                probs[int_key] = val
+                probs[int_key] += val
                 total += val
         if total == 0 or (1 - total) >= self.refusal_threshold:
             return None

llmcomp/runner/runner.py CHANGED Viewed

@@ -10,6 +10,13 @@ from llmcomp.config import Config, NoClientForModel
 from llmcomp.runner.chat_completion import openai_chat_completion
 from llmcomp.runner.model_adapter import ModelAdapter
+class DuplicateTokenError(Exception):
+    """Raised when API returns duplicate tokens in logprobs (unexpected provider behavior)."""
+    pass
 NO_LOGPROBS_WARNING = """\
 Failed to get logprobs because {model} didn't send them.
 Returning empty dict, I hope you can handle it.
@@ -121,6 +128,15 @@ class Runner:
             print(NO_LOGPROBS_WARNING.format(model=self.model, completion=completion))
             return {}
+        # Check for duplicate tokens - this shouldn't happen with OpenAI but might with other providers
+        tokens = [el.token for el in logprobs]
+        if len(tokens) != len(set(tokens)):
+            duplicates = [t for t in tokens if tokens.count(t) > 1]
+            raise DuplicateTokenError(
+                f"API returned duplicate tokens in logprobs: {set(duplicates)}. "
+                f"Model: {self.model}. This is unexpected - please report this issue."
+            )
         result = {}
         for el in logprobs:
             result[el.token] = math.exp(el.logprob) if convert_to_probs else el.logprob
@@ -186,7 +202,7 @@ class Runner:
             func_kwargs = {key: val for key, val in kwargs.items() if not key.startswith("_")}
             try:
                 result = func(**func_kwargs)
-            except NoClientForModel:
+            except (NoClientForModel, DuplicateTokenError):
                 raise
             except Exception as e:
                 # Truncate messages for readability

{llmcomp-1.2.0.dist-info → llmcomp-1.2.2.dist-info}/METADATA RENAMED Viewed

@@ -1,6 +1,6 @@
 Metadata-Version: 2.4
 Name: llmcomp
-Version: 1.2.0
+Version: 1.2.2
 Summary: Research library for black-box experiments on language models.
 Project-URL: Homepage, https://github.com/johny-b/llmcomp
 Project-URL: Repository, https://github.com/johny-b/llmcomp
@@ -150,7 +150,7 @@ Suppose you have many prompts you want to send to models. There are three option
 3. Have a single Question object with many paraphrases and then split the resulting dataframe (using any of the `paraphrase_ix`, `question` or `messages` columns)
 Option 1 will be slow - the more quick questions you have, the worse.
-Option 2 will be fast, but you need to write parallelization yourself. Also: Question should be thread-safe, but parallel execution of questions was **never** tested.
+Option 2 will be fast, but you need to write parallelization yourself. Question should be thread-safe, but parallel execution of questions was **never** tested. One thing that won't work: `llmcomp.Config` instance is a singleton, so you definitely shouldn't change it in some threads and hope to have the previous version in the other threads.
 Option 3 will also be fast and is recommended. Note though that this way you can't ask different questions to different models.
 Parallelization within a single question is done via threads. Perhaps async would be faster. Prompting claude-opus-4.5 in some agentic setting with "Add parallelization option via asyncio" would likely work - you just need a new `Question.many_models_execute`.

{llmcomp-1.2.0.dist-info → llmcomp-1.2.2.dist-info}/RECORD RENAMED Viewed

@@ -3,17 +3,17 @@ llmcomp/config.py,sha256=xADWhqsQphJZQvf7WemWencmWuBnvTN_KeJrjWfnmHY,8942
 llmcomp/default_adapters.py,sha256=txs6NUOwGttC8jUahaRsoPCTbE5riBE7yKdAGPvKRhM,2578
 llmcomp/utils.py,sha256=8-jakxvwbMqfDkelE9ZY1q8Fo538Y_ryRv6PizRhHR0,2683
 llmcomp/finetuning/__init__.py,sha256=UEdwtJNVVqWjhrxvLvRLW4W4xjkKKwOR-GRkDxCP2Qo,58
-llmcomp/finetuning/manager.py,sha256=vIM_FAswWr01KtfeFC6ffgvlimKgKUj4ij34tnBzBNk,18346
+llmcomp/finetuning/manager.py,sha256=JaILoQYkNA9jIM_WR9eZactFHHcNFVeQeObXjQS8KcI,18779
 llmcomp/finetuning/update_jobs.py,sha256=blsHzg_ViTa2hBJtWCqR5onttehTtmXn3vmCTNd_hJw,980
 llmcomp/question/judge.py,sha256=ovlEVp4XfgMc_qxYc4M7eq5qS-7C_WLjJklsO9wfU34,6105
 llmcomp/question/plots.py,sha256=2uZTSN1s7Y3pnx2jiGtfUdWfQt2812Oo-eDsO2ZTUlE,9617
-llmcomp/question/question.py,sha256=eZT1jQObp9VZ8E9QGx6XBo3Ms9OF2kG6b6l8kW8pma0,37919
+llmcomp/question/question.py,sha256=2CvE0xePLnD5SUJsE_ZyvAIE_36rjjW37fUqG3NHTV0,39171
 llmcomp/question/result.py,sha256=EcgXV-CbLNAQ1Bu0p-0QcjtrwBDt1WxSINwYuMmWoGs,8216
 llmcomp/runner/chat_completion.py,sha256=iDiWE0N0_MYfggD-ouyfUPyaADt7602K5Wo16a7JJo4,967
 llmcomp/runner/model_adapter.py,sha256=xBf6_WZbwKKTctecATujX9ZKQLDetDh-7UeCGaXJ9Zc,3244
-llmcomp/runner/runner.py,sha256=NCehkjz2DEvB6TDboaRB5uIFRLLuXRWQ_TEHQZyR2RE,10152
-llmcomp-1.2.0.dist-info/METADATA,sha256=9vMgp2uYxyPAtsTjAFIMVQhuKBPTXbAFquCe-YlxxD8,12341
-llmcomp-1.2.0.dist-info/WHEEL,sha256=WLgqFyCfm_KASv4WHyYy0P3pM_m7J5L9k2skdKLirC8,87
-llmcomp-1.2.0.dist-info/entry_points.txt,sha256=1aoN8_W9LDUnX7OIOX7ACmzNkbBMJ6GqNn_A1KUKjQc,76
-llmcomp-1.2.0.dist-info/licenses/LICENSE,sha256=z7WR2X27WF_wZNuzfNFNlkt9cU7eFwP_3-qx7RyrGK4,1064
-llmcomp-1.2.0.dist-info/RECORD,,
+llmcomp/runner/runner.py,sha256=ENDSH2I7wKu9tq0HdfLwCgdHLxjvJaIrlrWY1vy7soc,10807
+llmcomp-1.2.2.dist-info/METADATA,sha256=DFiSsygEmaTNejveyEBEf-4wv47iqlXq0SWMTlRbf94,12518
+llmcomp-1.2.2.dist-info/WHEEL,sha256=WLgqFyCfm_KASv4WHyYy0P3pM_m7J5L9k2skdKLirC8,87
+llmcomp-1.2.2.dist-info/entry_points.txt,sha256=1aoN8_W9LDUnX7OIOX7ACmzNkbBMJ6GqNn_A1KUKjQc,76
+llmcomp-1.2.2.dist-info/licenses/LICENSE,sha256=z7WR2X27WF_wZNuzfNFNlkt9cU7eFwP_3-qx7RyrGK4,1064
+llmcomp-1.2.2.dist-info/RECORD,,

{llmcomp-1.2.0.dist-info → llmcomp-1.2.2.dist-info}/WHEEL RENAMED Viewed

File without changes

{llmcomp-1.2.0.dist-info → llmcomp-1.2.2.dist-info}/entry_points.txt RENAMED Viewed

File without changes

{llmcomp-1.2.0.dist-info → llmcomp-1.2.2.dist-info}/licenses/LICENSE RENAMED Viewed

File without changes

llmcomp 1.2.0__py3-none-any.whl → 1.2.2__py3-none-any.whl

llmcomp 1.2.0py3-none-any.whl → 1.2.2py3-none-any.whl