PyPI - llmcomp - Versions diffs - 1.2.4__py3-none-any.whl → 1.3.1__py3-none-any.whl - Mend

llmcomp 1.2.4py3-none-any.whl → 1.3.1py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (14) hide show

llmcomp/finetuning/manager.py +21 -0
llmcomp/finetuning/validation.py +406 -0
llmcomp/question/judge.py +11 -0
llmcomp/question/plots.py +150 -71
llmcomp/question/question.py +255 -190
llmcomp/question/result.py +33 -10
llmcomp/question/viewer.py +488 -0
llmcomp/runner/runner.py +32 -18
{llmcomp-1.2.4.dist-info → llmcomp-1.3.1.dist-info}/METADATA +8 -5
llmcomp-1.3.1.dist-info/RECORD +21 -0
llmcomp-1.2.4.dist-info/RECORD +0 -19
{llmcomp-1.2.4.dist-info → llmcomp-1.3.1.dist-info}/WHEEL +0 -0
{llmcomp-1.2.4.dist-info → llmcomp-1.3.1.dist-info}/entry_points.txt +0 -0
{llmcomp-1.2.4.dist-info → llmcomp-1.3.1.dist-info}/licenses/LICENSE +0 -0

{llmcomp-1.2.4.dist-info → llmcomp-1.3.1.dist-info}/METADATA RENAMED Viewed

@@ -1,6 +1,6 @@
 Metadata-Version: 2.4
 Name: llmcomp
-Version: 1.2.4
+Version: 1.3.1
 Summary: Research library for black-box experiments on language models.
 Project-URL: Homepage, https://github.com/johny-b/llmcomp
 Project-URL: Repository, https://github.com/johny-b/llmcomp
@@ -9,12 +9,14 @@ License: MIT
 License-File: LICENSE
 Requires-Python: >=3.9
 Requires-Dist: backoff
+Requires-Dist: filelock
 Requires-Dist: matplotlib
 Requires-Dist: numpy
 Requires-Dist: openai>=1.0.0
 Requires-Dist: pandas
 Requires-Dist: pyyaml
 Requires-Dist: requests
+Requires-Dist: streamlit>=1.20.0
 Requires-Dist: tqdm
 Description-Content-Type: text/markdown
@@ -49,9 +51,9 @@ question = Question.create(
     samples_per_paraphrase=100,
     temperature=1,
 )
-question.plot(MODELS, min_fraction=0.03)
-df = question.df(MODELS)
-print(df.head(1).iloc[0])
+df = question.df(MODELS)  # Dataframe with the results
+question.plot(MODELS, min_fraction=0.03)  # Aggregated bar chart
+question.view(MODELS)  # Interactive browser for individual responses
 ```
 ## Main features
@@ -61,6 +63,7 @@ print(df.head(1).iloc[0])
 * **Parallel requests** - configurable concurrency across models
 * **Multi-key support** - use `OPENAI_API_KEY_0`, `OPENAI_API_KEY_1`, etc. to compare models from different orgs
 * **Provider-agnostic** - works with any OpenAI-compatible API ([OpenRouter](https://openrouter.ai/docs/quickstart#using-the-openai-sdk), [Tinker](https://tinker-docs.thinkingmachines.ai/compatible-apis/openai), etc.)
+* **Built-in viewer** - browse answers interactively with `question.view(MODELS)`
 * **Extensible** - highly configurable as long as your goal is comparing LLMs
 ## Cookbook
@@ -148,7 +151,7 @@ You can send more parallel requests by increasing `Config.max_workers`.
 Suppose you have many prompts you want to send to models. There are three options:
 1. Have a separate Question object for each prompt and execute them in a loop
 2. Have a separate Question object for each prompt and execute them in parallel
-3. Have a single Question object with many paraphrases and then split the resulting dataframe (using any of the `paraphrase_ix`, `question` or `messages` columns)
+3. Have a single Question object with many paraphrases and then split the resulting dataframe (using any of the `paraphrase_ix` or `question` columns)
 Option 1 will be slow - the more quick questions you have, the worse.
 Option 2 will be fast, but you need to write parallelization yourself. Question should be thread-safe, but parallel execution of questions was **never** tested. One thing that won't work: `llmcomp.Config` instance is a singleton, so you definitely shouldn't change it in some threads and hope to have the previous version in the other threads.

llmcomp-1.3.1.dist-info/RECORD ADDED Viewed

@@ -0,0 +1,21 @@
+llmcomp/__init__.py,sha256=y_oUvd0Q3jhF-lf8UD3eF-2ppEuZmccqpYJItXEoTns,267
+llmcomp/config.py,sha256=xADWhqsQphJZQvf7WemWencmWuBnvTN_KeJrjWfnmHY,8942
+llmcomp/default_adapters.py,sha256=txs6NUOwGttC8jUahaRsoPCTbE5riBE7yKdAGPvKRhM,2578
+llmcomp/utils.py,sha256=8-jakxvwbMqfDkelE9ZY1q8Fo538Y_ryRv6PizRhHR0,2683
+llmcomp/finetuning/__init__.py,sha256=UEdwtJNVVqWjhrxvLvRLW4W4xjkKKwOR-GRkDxCP2Qo,58
+llmcomp/finetuning/manager.py,sha256=6G0CW3NWK8vdfBoAjH0HATx_g16wwq5oU0mlHs-q28o,19083
+llmcomp/finetuning/update_jobs.py,sha256=blsHzg_ViTa2hBJtWCqR5onttehTtmXn3vmCTNd_hJw,980
+llmcomp/finetuning/validation.py,sha256=v4FoFw8woo5No9A01ktuALsMsXdgb3N2rS58ttBUmHY,14047
+llmcomp/question/judge.py,sha256=tNY94AHqncrbl2gf-g_Y3lepJ_HrahJRH-WgQyokegk,6568
+llmcomp/question/plots.py,sha256=rKh6U2CboznTPRlpBSgFW5-j3rWGw8QvngMkF1yVB6c,12468
+llmcomp/question/question.py,sha256=EO6MAHqz46ksKAE4NysN5gyEoU4KAcrkJkTwqKvoT_Y,41799
+llmcomp/question/result.py,sha256=UHpXVANR0jM7sJig2BtDDGh43ysBf8RiTZrXvx-Bi7c,8845
+llmcomp/question/viewer.py,sha256=82a5iL_lFjRs3hDS0igoFrc5zedCAzJ23zrmY8G3bZM,17843
+llmcomp/runner/chat_completion.py,sha256=iDiWE0N0_MYfggD-ouyfUPyaADt7602K5Wo16a7JJo4,967
+llmcomp/runner/model_adapter.py,sha256=Dua98E7aBVrCaZ2Ep44vl164oFkpH1P78YqImQkns4U,3406
+llmcomp/runner/runner.py,sha256=B8p9b3At9JWWIW-mlADwyelJKqHxW4CIorSWyaD3gHM,12294
+llmcomp-1.3.1.dist-info/METADATA,sha256=A6fObtQ4qpYa9gWU8rAO5zH-sfyqJcXtiOwdkkla290,12987
+llmcomp-1.3.1.dist-info/WHEEL,sha256=WLgqFyCfm_KASv4WHyYy0P3pM_m7J5L9k2skdKLirC8,87
+llmcomp-1.3.1.dist-info/entry_points.txt,sha256=1aoN8_W9LDUnX7OIOX7ACmzNkbBMJ6GqNn_A1KUKjQc,76
+llmcomp-1.3.1.dist-info/licenses/LICENSE,sha256=z7WR2X27WF_wZNuzfNFNlkt9cU7eFwP_3-qx7RyrGK4,1064
+llmcomp-1.3.1.dist-info/RECORD,,

llmcomp-1.2.4.dist-info/RECORD DELETED Viewed

@@ -1,19 +0,0 @@
-llmcomp/__init__.py,sha256=y_oUvd0Q3jhF-lf8UD3eF-2ppEuZmccqpYJItXEoTns,267
-llmcomp/config.py,sha256=xADWhqsQphJZQvf7WemWencmWuBnvTN_KeJrjWfnmHY,8942
-llmcomp/default_adapters.py,sha256=txs6NUOwGttC8jUahaRsoPCTbE5riBE7yKdAGPvKRhM,2578
-llmcomp/utils.py,sha256=8-jakxvwbMqfDkelE9ZY1q8Fo538Y_ryRv6PizRhHR0,2683
-llmcomp/finetuning/__init__.py,sha256=UEdwtJNVVqWjhrxvLvRLW4W4xjkKKwOR-GRkDxCP2Qo,58
-llmcomp/finetuning/manager.py,sha256=6U5oQkOL_aGclFyhlQduTUhx4i7rjk6TLI3OtgA8L_o,18257
-llmcomp/finetuning/update_jobs.py,sha256=blsHzg_ViTa2hBJtWCqR5onttehTtmXn3vmCTNd_hJw,980
-llmcomp/question/judge.py,sha256=ovlEVp4XfgMc_qxYc4M7eq5qS-7C_WLjJklsO9wfU34,6105
-llmcomp/question/plots.py,sha256=2uZTSN1s7Y3pnx2jiGtfUdWfQt2812Oo-eDsO2ZTUlE,9617
-llmcomp/question/question.py,sha256=2CvE0xePLnD5SUJsE_ZyvAIE_36rjjW37fUqG3NHTV0,39171
-llmcomp/question/result.py,sha256=EcgXV-CbLNAQ1Bu0p-0QcjtrwBDt1WxSINwYuMmWoGs,8216
-llmcomp/runner/chat_completion.py,sha256=iDiWE0N0_MYfggD-ouyfUPyaADt7602K5Wo16a7JJo4,967
-llmcomp/runner/model_adapter.py,sha256=Dua98E7aBVrCaZ2Ep44vl164oFkpH1P78YqImQkns4U,3406
-llmcomp/runner/runner.py,sha256=C_SQ8pVGaGO57_4B85PMYWyntznsJnIwUOwnH3zP7IA,11494
-llmcomp-1.2.4.dist-info/METADATA,sha256=TN_1IckuzHBhHnObNRHwOFHZKEqfA2F6cR1KJzjazoI,12762
-llmcomp-1.2.4.dist-info/WHEEL,sha256=WLgqFyCfm_KASv4WHyYy0P3pM_m7J5L9k2skdKLirC8,87
-llmcomp-1.2.4.dist-info/entry_points.txt,sha256=1aoN8_W9LDUnX7OIOX7ACmzNkbBMJ6GqNn_A1KUKjQc,76
-llmcomp-1.2.4.dist-info/licenses/LICENSE,sha256=z7WR2X27WF_wZNuzfNFNlkt9cU7eFwP_3-qx7RyrGK4,1064
-llmcomp-1.2.4.dist-info/RECORD,,

{llmcomp-1.2.4.dist-info → llmcomp-1.3.1.dist-info}/WHEEL RENAMED Viewed

File without changes

{llmcomp-1.2.4.dist-info → llmcomp-1.3.1.dist-info}/entry_points.txt RENAMED Viewed

File without changes

{llmcomp-1.2.4.dist-info → llmcomp-1.3.1.dist-info}/licenses/LICENSE RENAMED Viewed

File without changes

llmcomp 1.2.4__py3-none-any.whl → 1.3.1__py3-none-any.whl

llmcomp 1.2.4py3-none-any.whl → 1.3.1py3-none-any.whl