llmcomp 1.2.3__py3-none-any.whl → 1.3.0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- llmcomp/finetuning/manager.py +29 -22
- llmcomp/finetuning/validation.py +406 -0
- llmcomp/question/judge.py +11 -0
- llmcomp/question/plots.py +123 -68
- llmcomp/question/question.py +235 -187
- llmcomp/question/result.py +1 -1
- llmcomp/question/viewer.py +459 -0
- llmcomp/runner/model_adapter.py +7 -2
- llmcomp/runner/runner.py +32 -18
- {llmcomp-1.2.3.dist-info → llmcomp-1.3.0.dist-info}/METADATA +12 -9
- llmcomp-1.3.0.dist-info/RECORD +21 -0
- llmcomp-1.2.3.dist-info/RECORD +0 -19
- {llmcomp-1.2.3.dist-info → llmcomp-1.3.0.dist-info}/WHEEL +0 -0
- {llmcomp-1.2.3.dist-info → llmcomp-1.3.0.dist-info}/entry_points.txt +0 -0
- {llmcomp-1.2.3.dist-info → llmcomp-1.3.0.dist-info}/licenses/LICENSE +0 -0
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
Metadata-Version: 2.4
|
|
2
2
|
Name: llmcomp
|
|
3
|
-
Version: 1.
|
|
3
|
+
Version: 1.3.0
|
|
4
4
|
Summary: Research library for black-box experiments on language models.
|
|
5
5
|
Project-URL: Homepage, https://github.com/johny-b/llmcomp
|
|
6
6
|
Project-URL: Repository, https://github.com/johny-b/llmcomp
|
|
@@ -15,6 +15,7 @@ Requires-Dist: openai>=1.0.0
|
|
|
15
15
|
Requires-Dist: pandas
|
|
16
16
|
Requires-Dist: pyyaml
|
|
17
17
|
Requires-Dist: requests
|
|
18
|
+
Requires-Dist: streamlit>=1.20.0
|
|
18
19
|
Requires-Dist: tqdm
|
|
19
20
|
Description-Content-Type: text/markdown
|
|
20
21
|
|
|
@@ -49,9 +50,9 @@ question = Question.create(
|
|
|
49
50
|
samples_per_paraphrase=100,
|
|
50
51
|
temperature=1,
|
|
51
52
|
)
|
|
52
|
-
question.
|
|
53
|
-
|
|
54
|
-
|
|
53
|
+
df = question.df(MODELS) # Dataframe with the results
|
|
54
|
+
question.plot(MODELS, min_fraction=0.03) # Aggregated bar chart
|
|
55
|
+
question.view(MODELS) # Interactive browser for individual responses
|
|
55
56
|
```
|
|
56
57
|
|
|
57
58
|
## Main features
|
|
@@ -61,6 +62,7 @@ print(df.head(1).iloc[0])
|
|
|
61
62
|
* **Parallel requests** - configurable concurrency across models
|
|
62
63
|
* **Multi-key support** - use `OPENAI_API_KEY_0`, `OPENAI_API_KEY_1`, etc. to compare models from different orgs
|
|
63
64
|
* **Provider-agnostic** - works with any OpenAI-compatible API ([OpenRouter](https://openrouter.ai/docs/quickstart#using-the-openai-sdk), [Tinker](https://tinker-docs.thinkingmachines.ai/compatible-apis/openai), etc.)
|
|
65
|
+
* **Built-in viewer** - browse answers interactively with `question.view(MODELS)`
|
|
64
66
|
* **Extensible** - highly configurable as long as your goal is comparing LLMs
|
|
65
67
|
|
|
66
68
|
## Cookbook
|
|
@@ -78,10 +80,11 @@ Examples 1-4 demonstrate all key functionalities of llmcomp.
|
|
|
78
80
|
| 7 | [tinker.py](examples/tinker.py) | Using Tinker models via OpenAI-compatible API. |
|
|
79
81
|
| 8 | [openrouter.py](examples/openrouter.py) | Using OpenRouter models via OpenAI-Compatible API. |
|
|
80
82
|
| 9 | [model_adapter.py](examples/model_adapter.py) | Setting model-specific API parameters |
|
|
81
|
-
| 10 | [x_mod_57.py](examples/x_mod_57.py) | Complete script I used for a short blogpost. |
|
|
82
83
|
| 11 | [runner.py](examples/runner.py) | Direct Runner usage for low-level API interactions. |
|
|
83
84
|
| 12 | [create_finetuning_job.py](examples/create_finetuning_job.py) | Create an OpenAI [finetuning](#finetuning) job & manage models. |
|
|
85
|
+
| 13 | [emergent misalignment replication](https://github.com/emergent-misalignment/emergent-misalignment/blob/main/evaluation/evaluate_openai.py) | Complete script replicating results from a paper |
|
|
84
86
|
| 13 | [old bird names replication](https://github.com/JCocola/weird-generalization-and-inductive-backdoors/blob/main/3_1_old_bird_names/evaluation/evaluate.py) | Complete script replicating results from a paper |
|
|
87
|
+
| 14 | [x_mod_57.py](examples/x_mod_57.py) | Complete script I used for a short blogpost. |
|
|
85
88
|
|
|
86
89
|
## Model provider configuration
|
|
87
90
|
|
|
@@ -97,7 +100,7 @@ You can interfere with this process:
|
|
|
97
100
|
```
|
|
98
101
|
from llmcomp import Config
|
|
99
102
|
|
|
100
|
-
# See all pairs
|
|
103
|
+
# See all pairs read from the env variables
|
|
101
104
|
print(Config.url_key_pairs)
|
|
102
105
|
|
|
103
106
|
# Get the OpenAI client instance for a given model.
|
|
@@ -106,10 +109,10 @@ print(client.base_url, client.api_key[:16] + "...")
|
|
|
106
109
|
|
|
107
110
|
# Set the pairs to whatever you want.
|
|
108
111
|
# You can add other OpenAI-compatible providers, or e.g. local inference.
|
|
109
|
-
Config.url_key_pairs = [("http://localhost:8000/v1", "fake-key")]
|
|
112
|
+
Config.url_key_pairs = [("http://localhost:8000/v1", "fake-key", "FAKE_API_KEY")]
|
|
110
113
|
```
|
|
111
114
|
|
|
112
|
-
This has an unintended consequence: llmcomp sends some nonsensical requests. E.g. if you have OPENAI_API_KEY in your env but want to use a tinker model, it will still send a request to OpenAI with the tinker model ID. This is easy to improve, but also doesn't seem important.
|
|
115
|
+
This provider discovery process has an unintended consequence: llmcomp sends some nonsensical requests. E.g. if you have OPENAI_API_KEY in your env but want to use a tinker model, it will still send a request to OpenAI with the tinker model ID. This is easy to improve, but also doesn't seem important.
|
|
113
116
|
|
|
114
117
|
## API reference
|
|
115
118
|
|
|
@@ -147,7 +150,7 @@ You can send more parallel requests by increasing `Config.max_workers`.
|
|
|
147
150
|
Suppose you have many prompts you want to send to models. There are three options:
|
|
148
151
|
1. Have a separate Question object for each prompt and execute them in a loop
|
|
149
152
|
2. Have a separate Question object for each prompt and execute them in parallel
|
|
150
|
-
3. Have a single Question object with many paraphrases and then split the resulting dataframe (using any of the `paraphrase_ix
|
|
153
|
+
3. Have a single Question object with many paraphrases and then split the resulting dataframe (using any of the `paraphrase_ix` or `question` columns)
|
|
151
154
|
|
|
152
155
|
Option 1 will be slow - the more quick questions you have, the worse.
|
|
153
156
|
Option 2 will be fast, but you need to write parallelization yourself. Question should be thread-safe, but parallel execution of questions was **never** tested. One thing that won't work: `llmcomp.Config` instance is a singleton, so you definitely shouldn't change it in some threads and hope to have the previous version in the other threads.
|
|
@@ -0,0 +1,21 @@
|
|
|
1
|
+
llmcomp/__init__.py,sha256=y_oUvd0Q3jhF-lf8UD3eF-2ppEuZmccqpYJItXEoTns,267
|
|
2
|
+
llmcomp/config.py,sha256=xADWhqsQphJZQvf7WemWencmWuBnvTN_KeJrjWfnmHY,8942
|
|
3
|
+
llmcomp/default_adapters.py,sha256=txs6NUOwGttC8jUahaRsoPCTbE5riBE7yKdAGPvKRhM,2578
|
|
4
|
+
llmcomp/utils.py,sha256=8-jakxvwbMqfDkelE9ZY1q8Fo538Y_ryRv6PizRhHR0,2683
|
|
5
|
+
llmcomp/finetuning/__init__.py,sha256=UEdwtJNVVqWjhrxvLvRLW4W4xjkKKwOR-GRkDxCP2Qo,58
|
|
6
|
+
llmcomp/finetuning/manager.py,sha256=6G0CW3NWK8vdfBoAjH0HATx_g16wwq5oU0mlHs-q28o,19083
|
|
7
|
+
llmcomp/finetuning/update_jobs.py,sha256=blsHzg_ViTa2hBJtWCqR5onttehTtmXn3vmCTNd_hJw,980
|
|
8
|
+
llmcomp/finetuning/validation.py,sha256=v4FoFw8woo5No9A01ktuALsMsXdgb3N2rS58ttBUmHY,14047
|
|
9
|
+
llmcomp/question/judge.py,sha256=tNY94AHqncrbl2gf-g_Y3lepJ_HrahJRH-WgQyokegk,6568
|
|
10
|
+
llmcomp/question/plots.py,sha256=Izp9jxWzQDgRgycgM7_-lhIkqx7yr_WBQedUcUcpaFA,11164
|
|
11
|
+
llmcomp/question/question.py,sha256=cLOVp8ZD0O-Y1UI8RVpi6ZD3ulRtY8PeFwEgeAnLzvs,41100
|
|
12
|
+
llmcomp/question/result.py,sha256=psc9tQpwEEhS4LGxaI7GhqCE1CSAmCo39yrKap9cLjA,8216
|
|
13
|
+
llmcomp/question/viewer.py,sha256=hMHWr5cONWXF37ybXJTI_kudSz3xaA0shkQFRoNRZmI,16380
|
|
14
|
+
llmcomp/runner/chat_completion.py,sha256=iDiWE0N0_MYfggD-ouyfUPyaADt7602K5Wo16a7JJo4,967
|
|
15
|
+
llmcomp/runner/model_adapter.py,sha256=Dua98E7aBVrCaZ2Ep44vl164oFkpH1P78YqImQkns4U,3406
|
|
16
|
+
llmcomp/runner/runner.py,sha256=B8p9b3At9JWWIW-mlADwyelJKqHxW4CIorSWyaD3gHM,12294
|
|
17
|
+
llmcomp-1.3.0.dist-info/METADATA,sha256=CWC5sdrfuvQWWFOwjj7RJIzk0Rgb3EKCRPA75D5Wu4U,12963
|
|
18
|
+
llmcomp-1.3.0.dist-info/WHEEL,sha256=WLgqFyCfm_KASv4WHyYy0P3pM_m7J5L9k2skdKLirC8,87
|
|
19
|
+
llmcomp-1.3.0.dist-info/entry_points.txt,sha256=1aoN8_W9LDUnX7OIOX7ACmzNkbBMJ6GqNn_A1KUKjQc,76
|
|
20
|
+
llmcomp-1.3.0.dist-info/licenses/LICENSE,sha256=z7WR2X27WF_wZNuzfNFNlkt9cU7eFwP_3-qx7RyrGK4,1064
|
|
21
|
+
llmcomp-1.3.0.dist-info/RECORD,,
|
llmcomp-1.2.3.dist-info/RECORD
DELETED
|
@@ -1,19 +0,0 @@
|
|
|
1
|
-
llmcomp/__init__.py,sha256=y_oUvd0Q3jhF-lf8UD3eF-2ppEuZmccqpYJItXEoTns,267
|
|
2
|
-
llmcomp/config.py,sha256=xADWhqsQphJZQvf7WemWencmWuBnvTN_KeJrjWfnmHY,8942
|
|
3
|
-
llmcomp/default_adapters.py,sha256=txs6NUOwGttC8jUahaRsoPCTbE5riBE7yKdAGPvKRhM,2578
|
|
4
|
-
llmcomp/utils.py,sha256=8-jakxvwbMqfDkelE9ZY1q8Fo538Y_ryRv6PizRhHR0,2683
|
|
5
|
-
llmcomp/finetuning/__init__.py,sha256=UEdwtJNVVqWjhrxvLvRLW4W4xjkKKwOR-GRkDxCP2Qo,58
|
|
6
|
-
llmcomp/finetuning/manager.py,sha256=JaILoQYkNA9jIM_WR9eZactFHHcNFVeQeObXjQS8KcI,18779
|
|
7
|
-
llmcomp/finetuning/update_jobs.py,sha256=blsHzg_ViTa2hBJtWCqR5onttehTtmXn3vmCTNd_hJw,980
|
|
8
|
-
llmcomp/question/judge.py,sha256=ovlEVp4XfgMc_qxYc4M7eq5qS-7C_WLjJklsO9wfU34,6105
|
|
9
|
-
llmcomp/question/plots.py,sha256=2uZTSN1s7Y3pnx2jiGtfUdWfQt2812Oo-eDsO2ZTUlE,9617
|
|
10
|
-
llmcomp/question/question.py,sha256=2CvE0xePLnD5SUJsE_ZyvAIE_36rjjW37fUqG3NHTV0,39171
|
|
11
|
-
llmcomp/question/result.py,sha256=EcgXV-CbLNAQ1Bu0p-0QcjtrwBDt1WxSINwYuMmWoGs,8216
|
|
12
|
-
llmcomp/runner/chat_completion.py,sha256=iDiWE0N0_MYfggD-ouyfUPyaADt7602K5Wo16a7JJo4,967
|
|
13
|
-
llmcomp/runner/model_adapter.py,sha256=xBf6_WZbwKKTctecATujX9ZKQLDetDh-7UeCGaXJ9Zc,3244
|
|
14
|
-
llmcomp/runner/runner.py,sha256=C_SQ8pVGaGO57_4B85PMYWyntznsJnIwUOwnH3zP7IA,11494
|
|
15
|
-
llmcomp-1.2.3.dist-info/METADATA,sha256=NJ4ZqfAZo0SC5n1gfSKmPxAFjujTzRWlvkaeBrfUdg8,12518
|
|
16
|
-
llmcomp-1.2.3.dist-info/WHEEL,sha256=WLgqFyCfm_KASv4WHyYy0P3pM_m7J5L9k2skdKLirC8,87
|
|
17
|
-
llmcomp-1.2.3.dist-info/entry_points.txt,sha256=1aoN8_W9LDUnX7OIOX7ACmzNkbBMJ6GqNn_A1KUKjQc,76
|
|
18
|
-
llmcomp-1.2.3.dist-info/licenses/LICENSE,sha256=z7WR2X27WF_wZNuzfNFNlkt9cU7eFwP_3-qx7RyrGK4,1064
|
|
19
|
-
llmcomp-1.2.3.dist-info/RECORD,,
|
|
File without changes
|
|
File without changes
|
|
File without changes
|