llmcomp 1.2.4__py3-none-any.whl → 1.3.1__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- llmcomp/finetuning/manager.py +21 -0
- llmcomp/finetuning/validation.py +406 -0
- llmcomp/question/judge.py +11 -0
- llmcomp/question/plots.py +150 -71
- llmcomp/question/question.py +255 -190
- llmcomp/question/result.py +33 -10
- llmcomp/question/viewer.py +488 -0
- llmcomp/runner/runner.py +32 -18
- {llmcomp-1.2.4.dist-info → llmcomp-1.3.1.dist-info}/METADATA +8 -5
- llmcomp-1.3.1.dist-info/RECORD +21 -0
- llmcomp-1.2.4.dist-info/RECORD +0 -19
- {llmcomp-1.2.4.dist-info → llmcomp-1.3.1.dist-info}/WHEEL +0 -0
- {llmcomp-1.2.4.dist-info → llmcomp-1.3.1.dist-info}/entry_points.txt +0 -0
- {llmcomp-1.2.4.dist-info → llmcomp-1.3.1.dist-info}/licenses/LICENSE +0 -0
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
Metadata-Version: 2.4
|
|
2
2
|
Name: llmcomp
|
|
3
|
-
Version: 1.
|
|
3
|
+
Version: 1.3.1
|
|
4
4
|
Summary: Research library for black-box experiments on language models.
|
|
5
5
|
Project-URL: Homepage, https://github.com/johny-b/llmcomp
|
|
6
6
|
Project-URL: Repository, https://github.com/johny-b/llmcomp
|
|
@@ -9,12 +9,14 @@ License: MIT
|
|
|
9
9
|
License-File: LICENSE
|
|
10
10
|
Requires-Python: >=3.9
|
|
11
11
|
Requires-Dist: backoff
|
|
12
|
+
Requires-Dist: filelock
|
|
12
13
|
Requires-Dist: matplotlib
|
|
13
14
|
Requires-Dist: numpy
|
|
14
15
|
Requires-Dist: openai>=1.0.0
|
|
15
16
|
Requires-Dist: pandas
|
|
16
17
|
Requires-Dist: pyyaml
|
|
17
18
|
Requires-Dist: requests
|
|
19
|
+
Requires-Dist: streamlit>=1.20.0
|
|
18
20
|
Requires-Dist: tqdm
|
|
19
21
|
Description-Content-Type: text/markdown
|
|
20
22
|
|
|
@@ -49,9 +51,9 @@ question = Question.create(
|
|
|
49
51
|
samples_per_paraphrase=100,
|
|
50
52
|
temperature=1,
|
|
51
53
|
)
|
|
52
|
-
question.
|
|
53
|
-
|
|
54
|
-
|
|
54
|
+
df = question.df(MODELS) # Dataframe with the results
|
|
55
|
+
question.plot(MODELS, min_fraction=0.03) # Aggregated bar chart
|
|
56
|
+
question.view(MODELS) # Interactive browser for individual responses
|
|
55
57
|
```
|
|
56
58
|
|
|
57
59
|
## Main features
|
|
@@ -61,6 +63,7 @@ print(df.head(1).iloc[0])
|
|
|
61
63
|
* **Parallel requests** - configurable concurrency across models
|
|
62
64
|
* **Multi-key support** - use `OPENAI_API_KEY_0`, `OPENAI_API_KEY_1`, etc. to compare models from different orgs
|
|
63
65
|
* **Provider-agnostic** - works with any OpenAI-compatible API ([OpenRouter](https://openrouter.ai/docs/quickstart#using-the-openai-sdk), [Tinker](https://tinker-docs.thinkingmachines.ai/compatible-apis/openai), etc.)
|
|
66
|
+
* **Built-in viewer** - browse answers interactively with `question.view(MODELS)`
|
|
64
67
|
* **Extensible** - highly configurable as long as your goal is comparing LLMs
|
|
65
68
|
|
|
66
69
|
## Cookbook
|
|
@@ -148,7 +151,7 @@ You can send more parallel requests by increasing `Config.max_workers`.
|
|
|
148
151
|
Suppose you have many prompts you want to send to models. There are three options:
|
|
149
152
|
1. Have a separate Question object for each prompt and execute them in a loop
|
|
150
153
|
2. Have a separate Question object for each prompt and execute them in parallel
|
|
151
|
-
3. Have a single Question object with many paraphrases and then split the resulting dataframe (using any of the `paraphrase_ix
|
|
154
|
+
3. Have a single Question object with many paraphrases and then split the resulting dataframe (using any of the `paraphrase_ix` or `question` columns)
|
|
152
155
|
|
|
153
156
|
Option 1 will be slow - the more quick questions you have, the worse.
|
|
154
157
|
Option 2 will be fast, but you need to write parallelization yourself. Question should be thread-safe, but parallel execution of questions was **never** tested. One thing that won't work: `llmcomp.Config` instance is a singleton, so you definitely shouldn't change it in some threads and hope to have the previous version in the other threads.
|
|
@@ -0,0 +1,21 @@
|
|
|
1
|
+
llmcomp/__init__.py,sha256=y_oUvd0Q3jhF-lf8UD3eF-2ppEuZmccqpYJItXEoTns,267
|
|
2
|
+
llmcomp/config.py,sha256=xADWhqsQphJZQvf7WemWencmWuBnvTN_KeJrjWfnmHY,8942
|
|
3
|
+
llmcomp/default_adapters.py,sha256=txs6NUOwGttC8jUahaRsoPCTbE5riBE7yKdAGPvKRhM,2578
|
|
4
|
+
llmcomp/utils.py,sha256=8-jakxvwbMqfDkelE9ZY1q8Fo538Y_ryRv6PizRhHR0,2683
|
|
5
|
+
llmcomp/finetuning/__init__.py,sha256=UEdwtJNVVqWjhrxvLvRLW4W4xjkKKwOR-GRkDxCP2Qo,58
|
|
6
|
+
llmcomp/finetuning/manager.py,sha256=6G0CW3NWK8vdfBoAjH0HATx_g16wwq5oU0mlHs-q28o,19083
|
|
7
|
+
llmcomp/finetuning/update_jobs.py,sha256=blsHzg_ViTa2hBJtWCqR5onttehTtmXn3vmCTNd_hJw,980
|
|
8
|
+
llmcomp/finetuning/validation.py,sha256=v4FoFw8woo5No9A01ktuALsMsXdgb3N2rS58ttBUmHY,14047
|
|
9
|
+
llmcomp/question/judge.py,sha256=tNY94AHqncrbl2gf-g_Y3lepJ_HrahJRH-WgQyokegk,6568
|
|
10
|
+
llmcomp/question/plots.py,sha256=rKh6U2CboznTPRlpBSgFW5-j3rWGw8QvngMkF1yVB6c,12468
|
|
11
|
+
llmcomp/question/question.py,sha256=EO6MAHqz46ksKAE4NysN5gyEoU4KAcrkJkTwqKvoT_Y,41799
|
|
12
|
+
llmcomp/question/result.py,sha256=UHpXVANR0jM7sJig2BtDDGh43ysBf8RiTZrXvx-Bi7c,8845
|
|
13
|
+
llmcomp/question/viewer.py,sha256=82a5iL_lFjRs3hDS0igoFrc5zedCAzJ23zrmY8G3bZM,17843
|
|
14
|
+
llmcomp/runner/chat_completion.py,sha256=iDiWE0N0_MYfggD-ouyfUPyaADt7602K5Wo16a7JJo4,967
|
|
15
|
+
llmcomp/runner/model_adapter.py,sha256=Dua98E7aBVrCaZ2Ep44vl164oFkpH1P78YqImQkns4U,3406
|
|
16
|
+
llmcomp/runner/runner.py,sha256=B8p9b3At9JWWIW-mlADwyelJKqHxW4CIorSWyaD3gHM,12294
|
|
17
|
+
llmcomp-1.3.1.dist-info/METADATA,sha256=A6fObtQ4qpYa9gWU8rAO5zH-sfyqJcXtiOwdkkla290,12987
|
|
18
|
+
llmcomp-1.3.1.dist-info/WHEEL,sha256=WLgqFyCfm_KASv4WHyYy0P3pM_m7J5L9k2skdKLirC8,87
|
|
19
|
+
llmcomp-1.3.1.dist-info/entry_points.txt,sha256=1aoN8_W9LDUnX7OIOX7ACmzNkbBMJ6GqNn_A1KUKjQc,76
|
|
20
|
+
llmcomp-1.3.1.dist-info/licenses/LICENSE,sha256=z7WR2X27WF_wZNuzfNFNlkt9cU7eFwP_3-qx7RyrGK4,1064
|
|
21
|
+
llmcomp-1.3.1.dist-info/RECORD,,
|
llmcomp-1.2.4.dist-info/RECORD
DELETED
|
@@ -1,19 +0,0 @@
|
|
|
1
|
-
llmcomp/__init__.py,sha256=y_oUvd0Q3jhF-lf8UD3eF-2ppEuZmccqpYJItXEoTns,267
|
|
2
|
-
llmcomp/config.py,sha256=xADWhqsQphJZQvf7WemWencmWuBnvTN_KeJrjWfnmHY,8942
|
|
3
|
-
llmcomp/default_adapters.py,sha256=txs6NUOwGttC8jUahaRsoPCTbE5riBE7yKdAGPvKRhM,2578
|
|
4
|
-
llmcomp/utils.py,sha256=8-jakxvwbMqfDkelE9ZY1q8Fo538Y_ryRv6PizRhHR0,2683
|
|
5
|
-
llmcomp/finetuning/__init__.py,sha256=UEdwtJNVVqWjhrxvLvRLW4W4xjkKKwOR-GRkDxCP2Qo,58
|
|
6
|
-
llmcomp/finetuning/manager.py,sha256=6U5oQkOL_aGclFyhlQduTUhx4i7rjk6TLI3OtgA8L_o,18257
|
|
7
|
-
llmcomp/finetuning/update_jobs.py,sha256=blsHzg_ViTa2hBJtWCqR5onttehTtmXn3vmCTNd_hJw,980
|
|
8
|
-
llmcomp/question/judge.py,sha256=ovlEVp4XfgMc_qxYc4M7eq5qS-7C_WLjJklsO9wfU34,6105
|
|
9
|
-
llmcomp/question/plots.py,sha256=2uZTSN1s7Y3pnx2jiGtfUdWfQt2812Oo-eDsO2ZTUlE,9617
|
|
10
|
-
llmcomp/question/question.py,sha256=2CvE0xePLnD5SUJsE_ZyvAIE_36rjjW37fUqG3NHTV0,39171
|
|
11
|
-
llmcomp/question/result.py,sha256=EcgXV-CbLNAQ1Bu0p-0QcjtrwBDt1WxSINwYuMmWoGs,8216
|
|
12
|
-
llmcomp/runner/chat_completion.py,sha256=iDiWE0N0_MYfggD-ouyfUPyaADt7602K5Wo16a7JJo4,967
|
|
13
|
-
llmcomp/runner/model_adapter.py,sha256=Dua98E7aBVrCaZ2Ep44vl164oFkpH1P78YqImQkns4U,3406
|
|
14
|
-
llmcomp/runner/runner.py,sha256=C_SQ8pVGaGO57_4B85PMYWyntznsJnIwUOwnH3zP7IA,11494
|
|
15
|
-
llmcomp-1.2.4.dist-info/METADATA,sha256=TN_1IckuzHBhHnObNRHwOFHZKEqfA2F6cR1KJzjazoI,12762
|
|
16
|
-
llmcomp-1.2.4.dist-info/WHEEL,sha256=WLgqFyCfm_KASv4WHyYy0P3pM_m7J5L9k2skdKLirC8,87
|
|
17
|
-
llmcomp-1.2.4.dist-info/entry_points.txt,sha256=1aoN8_W9LDUnX7OIOX7ACmzNkbBMJ6GqNn_A1KUKjQc,76
|
|
18
|
-
llmcomp-1.2.4.dist-info/licenses/LICENSE,sha256=z7WR2X27WF_wZNuzfNFNlkt9cU7eFwP_3-qx7RyrGK4,1064
|
|
19
|
-
llmcomp-1.2.4.dist-info/RECORD,,
|
|
File without changes
|
|
File without changes
|
|
File without changes
|