ai2-olmo-eval 0.7.1__py3-none-any.whl → 0.7.2__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- {ai2_olmo_eval-0.7.1.dist-info → ai2_olmo_eval-0.7.2.dist-info}/METADATA +1 -1
- {ai2_olmo_eval-0.7.1.dist-info → ai2_olmo_eval-0.7.2.dist-info}/RECORD +19 -7
- {ai2_olmo_eval-0.7.1.dist-info → ai2_olmo_eval-0.7.2.dist-info}/WHEEL +1 -1
- olmo_eval/oe_eval_tasks/basic_skills_arithmetic/rc_5shot/config.json +23 -0
- olmo_eval/oe_eval_tasks/basic_skills_arithmetic/rc_5shot/requests.jsonl.gz +0 -0
- olmo_eval/oe_eval_tasks/basic_skills_coding/rc_5shot/config.json +23 -0
- olmo_eval/oe_eval_tasks/basic_skills_coding/rc_5shot/requests.jsonl.gz +0 -0
- olmo_eval/oe_eval_tasks/basic_skills_common_knowledge/rc_5shot/config.json +23 -0
- olmo_eval/oe_eval_tasks/basic_skills_common_knowledge/rc_5shot/requests.jsonl.gz +0 -0
- olmo_eval/oe_eval_tasks/basic_skills_logical_reasoning/rc_5shot/config.json +23 -0
- olmo_eval/oe_eval_tasks/basic_skills_logical_reasoning/rc_5shot/requests.jsonl.gz +0 -0
- olmo_eval/oe_eval_tasks/basic_skills_pattern/rc_5shot/config.json +23 -0
- olmo_eval/oe_eval_tasks/basic_skills_pattern/rc_5shot/requests.jsonl.gz +0 -0
- olmo_eval/oe_eval_tasks/basic_skills_string_operations/rc_5shot/config.json +23 -0
- olmo_eval/oe_eval_tasks/basic_skills_string_operations/rc_5shot/requests.jsonl.gz +0 -0
- olmo_eval/tasks.py +40 -0
- olmo_eval/version.py +1 -1
- {ai2_olmo_eval-0.7.1.dist-info → ai2_olmo_eval-0.7.2.dist-info}/licenses/LICENSE +0 -0
- {ai2_olmo_eval-0.7.1.dist-info → ai2_olmo_eval-0.7.2.dist-info}/top_level.txt +0 -0
|
@@ -1,10 +1,10 @@
|
|
|
1
|
-
ai2_olmo_eval-0.7.
|
|
1
|
+
ai2_olmo_eval-0.7.2.dist-info/licenses/LICENSE,sha256=YvuKOpYh3COIF0yqq-nCMXtpS7mh1GyYvPVlW2j1G-M,11359
|
|
2
2
|
olmo_eval/__init__.py,sha256=49RxnAaJNk8U9XP3SF5MjyFIxLSkxH0vXQuZgnEOi44,283
|
|
3
3
|
olmo_eval/metrics.py,sha256=NcI_1B3BV-DC9RXjsSIftU-2GeF8vvU6SNyJnlYlKwU,18705
|
|
4
|
-
olmo_eval/tasks.py,sha256=
|
|
4
|
+
olmo_eval/tasks.py,sha256=QGLyF7JA2-T9mkh-N4cZGNOQp9si90yQSS41T3x5Lak,79630
|
|
5
5
|
olmo_eval/tokenizer.py,sha256=PnkidE0nAtEA1QZjuQpE_bIwgAsHxodnaJRALAPqrJQ,5127
|
|
6
6
|
olmo_eval/util.py,sha256=ARmZmRQl8VOvnKQoUprb3cOunzcApeNhRdV4BMXZuvo,3856
|
|
7
|
-
olmo_eval/version.py,sha256=
|
|
7
|
+
olmo_eval/version.py,sha256=QWjPfx79C2NOQw2G7iDEsM4FKsLiGLCLNDzEx7EImf8,308
|
|
8
8
|
olmo_eval/hf_datasets/ai2_arc/ARC-Challenge/validation/data-00000-of-00001.arrow,sha256=TPWbMhBmticWjYp7TA3etcKbXbaoCDBWhxuqlD1bDJA,98080
|
|
9
9
|
olmo_eval/hf_datasets/ai2_arc/ARC-Challenge/validation/dataset_info.json,sha256=iZumP5Udu8LD7cbew3o7nNpnGu-o9jPaMxUrNDDNIVY,1795
|
|
10
10
|
olmo_eval/hf_datasets/ai2_arc/ARC-Challenge/validation/state.json,sha256=6Q1XhM-HMZcymuGAKBC_8RjMBKgJSaR_6lLUO9Z8XwE,255
|
|
@@ -600,6 +600,18 @@ olmo_eval/oe_eval_tasks/arc_easy/val_mc_5shot/config.json,sha256=CEgPNm226vxmMim
|
|
|
600
600
|
olmo_eval/oe_eval_tasks/arc_easy/val_mc_5shot/requests.jsonl.gz,sha256=LZ7XuWwDo6zJTqhgpZgHNj6yi-xOXb-TQxl9yxB9gVg,114271
|
|
601
601
|
olmo_eval/oe_eval_tasks/arc_easy/val_rc_5shot/config.json,sha256=LeNP534voujfcp9ph8SKHfnfYPjfSu8ik3HWiXt3TFM,761
|
|
602
602
|
olmo_eval/oe_eval_tasks/arc_easy/val_rc_5shot/requests.jsonl.gz,sha256=28UmHQnAB2DIlfYbqhuhJ4AjAVLDAHWWoEmaHlI-UKU,202290
|
|
603
|
+
olmo_eval/oe_eval_tasks/basic_skills_arithmetic/rc_5shot/config.json,sha256=_gSH-miyIWms4r3TSLCMihc42v7kt8tEPnqQJcgux-4,616
|
|
604
|
+
olmo_eval/oe_eval_tasks/basic_skills_arithmetic/rc_5shot/requests.jsonl.gz,sha256=iiVqzSVTiEk5lbq0WAiR8ujvBHHv73azRpvfuCIrEfI,215180
|
|
605
|
+
olmo_eval/oe_eval_tasks/basic_skills_coding/rc_5shot/config.json,sha256=19NZFpCouu7oEidoUBthKUekW87pT5pzR1bX1NJV77g,592
|
|
606
|
+
olmo_eval/oe_eval_tasks/basic_skills_coding/rc_5shot/requests.jsonl.gz,sha256=suXkEgQLUT-XK_EDyQKIoniNYNJvo4vUpe8-jyeNe-w,274302
|
|
607
|
+
olmo_eval/oe_eval_tasks/basic_skills_common_knowledge/rc_5shot/config.json,sha256=DOHsmlO6_OMBIl-oEfKT8O0yIj89I1gTV_uvOxdiT8M,652
|
|
608
|
+
olmo_eval/oe_eval_tasks/basic_skills_common_knowledge/rc_5shot/requests.jsonl.gz,sha256=ie673jV3ShxUhrqux3Y8YRNfAazKa8ayGEjo7hxEp1Y,237402
|
|
609
|
+
olmo_eval/oe_eval_tasks/basic_skills_logical_reasoning/rc_5shot/config.json,sha256=OB20jgxj00v3bvfsc1M1zyWGlEJvZdXBlg4L9NeGsZY,658
|
|
610
|
+
olmo_eval/oe_eval_tasks/basic_skills_logical_reasoning/rc_5shot/requests.jsonl.gz,sha256=5ElEBHtBq6tBQ1hqEbg9---XkUFV3GjcMGHFXxP_urs,284843
|
|
611
|
+
olmo_eval/oe_eval_tasks/basic_skills_pattern/rc_5shot/config.json,sha256=OAZyUX7pw7cEguIsSbs_fKXiuHh1sbEkpF7x9v6ZI80,598
|
|
612
|
+
olmo_eval/oe_eval_tasks/basic_skills_pattern/rc_5shot/requests.jsonl.gz,sha256=FY1pf-fTh5BNnN5H7uN0ksm21tdC6ewKsOhaOpN3760,71330
|
|
613
|
+
olmo_eval/oe_eval_tasks/basic_skills_string_operations/rc_5shot/config.json,sha256=TfwWhRHC_G17uqk60-pNROMNzzmd0rMTY5nPP0dje00,658
|
|
614
|
+
olmo_eval/oe_eval_tasks/basic_skills_string_operations/rc_5shot/requests.jsonl.gz,sha256=FIBdOQSDoQ99gDEpHYHYTmhW5qfClVP-rh3ll2I0fDQ,231341
|
|
603
615
|
olmo_eval/oe_eval_tasks/boolq/mc_5shot/config.json,sha256=87GTyDGser1tWfSWmktZ1X17jKXU1EZzHOJLMSbVspA,632
|
|
604
616
|
olmo_eval/oe_eval_tasks/boolq/mc_5shot/requests.jsonl.gz,sha256=uZ9ZkbFkiUn4XcCzypgPscTFTrVDexVC1L-e6zBiEMg,393249
|
|
605
617
|
olmo_eval/oe_eval_tasks/boolq/rc_0shot/config.json,sha256=d1GKQMIX1cUgnZHlUe9kgAZsgkMc1N2GnMlyhccO9pE,509
|
|
@@ -704,7 +716,7 @@ olmo_eval/oe_eval_tasks/winogrande/val_rc_5shot/config.json,sha256=ySjEVqTOj5GwC
|
|
|
704
716
|
olmo_eval/oe_eval_tasks/winogrande/val_rc_5shot/requests.jsonl.gz,sha256=knTzcqigWCfdYLN1Pl0TfCm0Fi1lRASWAo_SC6KtXsc,115262
|
|
705
717
|
olmo_eval/tokenizers/allenai_eleuther-ai-gpt-neox-20b-pii-special.json,sha256=yjXYcnpTO7Zjm_R4Gucrn9oA5paadiYM-ZZER5q_EXc,2114319
|
|
706
718
|
olmo_eval/tokenizers/allenai_gpt-neox-olmo-dolma-v1_5.json,sha256=mtM7Szmp-Dlzw_jEKgGUjdW4d6KKyaU1aVbE_07QtxQ,2115113
|
|
707
|
-
ai2_olmo_eval-0.7.
|
|
708
|
-
ai2_olmo_eval-0.7.
|
|
709
|
-
ai2_olmo_eval-0.7.
|
|
710
|
-
ai2_olmo_eval-0.7.
|
|
719
|
+
ai2_olmo_eval-0.7.2.dist-info/METADATA,sha256=PKJfkoDu4hrLzb6NA1MDfXOjZnUxQ4WFpJouWU1Cr_4,14398
|
|
720
|
+
ai2_olmo_eval-0.7.2.dist-info/WHEEL,sha256=Nw36Djuh_5VDukK0H78QzOX-_FQEo6V37m3nkm96gtU,91
|
|
721
|
+
ai2_olmo_eval-0.7.2.dist-info/top_level.txt,sha256=Pryk28JTb89-j624Uy1gRZiE0YXI3czgbNIfJCl9-x0,10
|
|
722
|
+
ai2_olmo_eval-0.7.2.dist-info/RECORD,,
|
|
@@ -0,0 +1,23 @@
|
|
|
1
|
+
{
|
|
2
|
+
"task_name": "basic_skills_arithmetic",
|
|
3
|
+
"task_hash": "56711b967c78d896ef51ba00aef5cfb0",
|
|
4
|
+
"task_config": {
|
|
5
|
+
"dataset_path": "basic_skills_arithmetic",
|
|
6
|
+
"primary_metric": "acc_per_token",
|
|
7
|
+
"split": "validation",
|
|
8
|
+
"num_shots": 5,
|
|
9
|
+
"metadata": {
|
|
10
|
+
"regimes": [
|
|
11
|
+
"OLMES-v0.1"
|
|
12
|
+
],
|
|
13
|
+
"alias": "basic_skills_arithmetic:rc::olmes"
|
|
14
|
+
},
|
|
15
|
+
"generation_kwargs": {},
|
|
16
|
+
"context_kwargs": {},
|
|
17
|
+
"dataset_name": "arithmetic",
|
|
18
|
+
"task_name": "basic_skills_arithmetic",
|
|
19
|
+
"version": 0,
|
|
20
|
+
"task_core": "basic_skills_arithmetic"
|
|
21
|
+
},
|
|
22
|
+
"current_date": "2025-05-12 00:06:28 UTC"
|
|
23
|
+
}
|
|
@@ -0,0 +1,23 @@
|
|
|
1
|
+
{
|
|
2
|
+
"task_name": "basic_skills_coding",
|
|
3
|
+
"task_hash": "d748d1d8ba506d3d234eed529ef62c3e",
|
|
4
|
+
"task_config": {
|
|
5
|
+
"dataset_path": "basic_skills_coding",
|
|
6
|
+
"primary_metric": "acc_per_token",
|
|
7
|
+
"split": "validation",
|
|
8
|
+
"num_shots": 5,
|
|
9
|
+
"metadata": {
|
|
10
|
+
"regimes": [
|
|
11
|
+
"OLMES-v0.1"
|
|
12
|
+
],
|
|
13
|
+
"alias": "basic_skills_coding:rc::olmes"
|
|
14
|
+
},
|
|
15
|
+
"generation_kwargs": {},
|
|
16
|
+
"context_kwargs": {},
|
|
17
|
+
"dataset_name": "coding",
|
|
18
|
+
"task_name": "basic_skills_coding",
|
|
19
|
+
"version": 0,
|
|
20
|
+
"task_core": "basic_skills_coding"
|
|
21
|
+
},
|
|
22
|
+
"current_date": "2025-05-12 00:06:28 UTC"
|
|
23
|
+
}
|
|
Binary file
|
|
@@ -0,0 +1,23 @@
|
|
|
1
|
+
{
|
|
2
|
+
"task_name": "basic_skills_common_knowledge",
|
|
3
|
+
"task_hash": "51e88e759602f9085a8c779da375d833",
|
|
4
|
+
"task_config": {
|
|
5
|
+
"dataset_path": "basic_skills_common_knowledge",
|
|
6
|
+
"primary_metric": "acc_per_token",
|
|
7
|
+
"split": "validation",
|
|
8
|
+
"num_shots": 5,
|
|
9
|
+
"metadata": {
|
|
10
|
+
"regimes": [
|
|
11
|
+
"OLMES-v0.1"
|
|
12
|
+
],
|
|
13
|
+
"alias": "basic_skills_common_knowledge:rc::olmes"
|
|
14
|
+
},
|
|
15
|
+
"generation_kwargs": {},
|
|
16
|
+
"context_kwargs": {},
|
|
17
|
+
"dataset_name": "common_knowledge",
|
|
18
|
+
"task_name": "basic_skills_common_knowledge",
|
|
19
|
+
"version": 0,
|
|
20
|
+
"task_core": "basic_skills_common_knowledge"
|
|
21
|
+
},
|
|
22
|
+
"current_date": "2025-05-12 00:06:28 UTC"
|
|
23
|
+
}
|
|
@@ -0,0 +1,23 @@
|
|
|
1
|
+
{
|
|
2
|
+
"task_name": "basic_skills_logical_reasoning",
|
|
3
|
+
"task_hash": "a3d406a2f4224604b7e6bbf68050691d",
|
|
4
|
+
"task_config": {
|
|
5
|
+
"dataset_path": "basic_skills_logical_reasoning",
|
|
6
|
+
"primary_metric": "acc_per_token",
|
|
7
|
+
"split": "validation",
|
|
8
|
+
"num_shots": 5,
|
|
9
|
+
"metadata": {
|
|
10
|
+
"regimes": [
|
|
11
|
+
"OLMES-v0.1"
|
|
12
|
+
],
|
|
13
|
+
"alias": "basic_skills_logical_reasoning:rc::olmes"
|
|
14
|
+
},
|
|
15
|
+
"generation_kwargs": {},
|
|
16
|
+
"context_kwargs": {},
|
|
17
|
+
"dataset_name": "logical_reasoning",
|
|
18
|
+
"task_name": "basic_skills_logical_reasoning",
|
|
19
|
+
"version": 0,
|
|
20
|
+
"task_core": "basic_skills_logical_reasoning"
|
|
21
|
+
},
|
|
22
|
+
"current_date": "2025-05-12 00:06:28 UTC"
|
|
23
|
+
}
|
|
@@ -0,0 +1,23 @@
|
|
|
1
|
+
{
|
|
2
|
+
"task_name": "basic_skills_pattern",
|
|
3
|
+
"task_hash": "67983750bfb70a3b5cc34dcd67ee3c6a",
|
|
4
|
+
"task_config": {
|
|
5
|
+
"dataset_path": "basic_skills_pattern",
|
|
6
|
+
"primary_metric": "acc_per_token",
|
|
7
|
+
"split": "validation",
|
|
8
|
+
"num_shots": 5,
|
|
9
|
+
"metadata": {
|
|
10
|
+
"regimes": [
|
|
11
|
+
"OLMES-v0.1"
|
|
12
|
+
],
|
|
13
|
+
"alias": "basic_skills_pattern:rc::olmes"
|
|
14
|
+
},
|
|
15
|
+
"generation_kwargs": {},
|
|
16
|
+
"context_kwargs": {},
|
|
17
|
+
"dataset_name": "pattern",
|
|
18
|
+
"task_name": "basic_skills_pattern",
|
|
19
|
+
"version": 0,
|
|
20
|
+
"task_core": "basic_skills_pattern"
|
|
21
|
+
},
|
|
22
|
+
"current_date": "2025-05-12 00:06:28 UTC"
|
|
23
|
+
}
|
|
Binary file
|
|
@@ -0,0 +1,23 @@
|
|
|
1
|
+
{
|
|
2
|
+
"task_name": "basic_skills_string_operations",
|
|
3
|
+
"task_hash": "8e5fdc7697f1bc7b0c9487a6fa682e45",
|
|
4
|
+
"task_config": {
|
|
5
|
+
"dataset_path": "basic_skills_string_operations",
|
|
6
|
+
"primary_metric": "acc_per_token",
|
|
7
|
+
"split": "validation",
|
|
8
|
+
"num_shots": 5,
|
|
9
|
+
"metadata": {
|
|
10
|
+
"regimes": [
|
|
11
|
+
"OLMES-v0.1"
|
|
12
|
+
],
|
|
13
|
+
"alias": "basic_skills_string_operations:rc::olmes"
|
|
14
|
+
},
|
|
15
|
+
"generation_kwargs": {},
|
|
16
|
+
"context_kwargs": {},
|
|
17
|
+
"dataset_name": "string_operations",
|
|
18
|
+
"task_name": "basic_skills_string_operations",
|
|
19
|
+
"version": 0,
|
|
20
|
+
"task_core": "basic_skills_string_operations"
|
|
21
|
+
},
|
|
22
|
+
"current_date": "2025-05-12 00:06:28 UTC"
|
|
23
|
+
}
|
olmo_eval/tasks.py
CHANGED
|
@@ -1704,6 +1704,46 @@ LABEL_TO_TASK_MAP_ORIG = {
|
|
|
1704
1704
|
OEEvalTask,
|
|
1705
1705
|
{"dataset_path": "arc_easy", "dataset_name": "rc_5shot", "metric_type": "acc"},
|
|
1706
1706
|
),
|
|
1707
|
+
"basic_skills_arithmetic_rc_5shot": (
|
|
1708
|
+
OEEvalTask,
|
|
1709
|
+
{
|
|
1710
|
+
"dataset_path": "basic_skills_arithmetic",
|
|
1711
|
+
"dataset_name": "rc_5shot",
|
|
1712
|
+
"metric_type": "acc",
|
|
1713
|
+
},
|
|
1714
|
+
),
|
|
1715
|
+
"basic_skills_coding_rc_5shot": (
|
|
1716
|
+
OEEvalTask,
|
|
1717
|
+
{"dataset_path": "basic_skills_coding", "dataset_name": "rc_5shot", "metric_type": "acc"},
|
|
1718
|
+
),
|
|
1719
|
+
"basic_skills_common_knowledge_rc_5shot": (
|
|
1720
|
+
OEEvalTask,
|
|
1721
|
+
{
|
|
1722
|
+
"dataset_path": "basic_skills_common_knowledge",
|
|
1723
|
+
"dataset_name": "rc_5shot",
|
|
1724
|
+
"metric_type": "acc",
|
|
1725
|
+
},
|
|
1726
|
+
),
|
|
1727
|
+
"basic_skills_logical_reasoning_rc_5shot": (
|
|
1728
|
+
OEEvalTask,
|
|
1729
|
+
{
|
|
1730
|
+
"dataset_path": "basic_skills_logical_reasoning",
|
|
1731
|
+
"dataset_name": "rc_5shot",
|
|
1732
|
+
"metric_type": "acc",
|
|
1733
|
+
},
|
|
1734
|
+
),
|
|
1735
|
+
"basic_skills_pattern_rc_5shot": (
|
|
1736
|
+
OEEvalTask,
|
|
1737
|
+
{"dataset_path": "basic_skills_pattern", "dataset_name": "rc_5shot", "metric_type": "acc"},
|
|
1738
|
+
),
|
|
1739
|
+
"basic_skills_string_operations_rc_5shot": (
|
|
1740
|
+
OEEvalTask,
|
|
1741
|
+
{
|
|
1742
|
+
"dataset_path": "basic_skills_string_operations",
|
|
1743
|
+
"dataset_name": "rc_5shot",
|
|
1744
|
+
"metric_type": "acc",
|
|
1745
|
+
},
|
|
1746
|
+
),
|
|
1707
1747
|
"boolq_mc_5shot": (
|
|
1708
1748
|
OEEvalTask,
|
|
1709
1749
|
{"dataset_path": "boolq", "dataset_name": "mc_5shot", "metric_type": "acc"},
|
olmo_eval/version.py
CHANGED
|
File without changes
|
|
File without changes
|