ai2-olmo-eval 0.8.2__py3-none-any.whl → 0.8.4__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- {ai2_olmo_eval-0.8.2.dist-info → ai2_olmo_eval-0.8.4.dist-info}/METADATA +1 -1
- {ai2_olmo_eval-0.8.2.dist-info → ai2_olmo_eval-0.8.4.dist-info}/RECORD +8 -8
- {ai2_olmo_eval-0.8.2.dist-info → ai2_olmo_eval-0.8.4.dist-info}/WHEEL +1 -1
- olmo_eval/metrics.py +1 -1
- olmo_eval/tasks.py +33 -0
- olmo_eval/version.py +1 -1
- {ai2_olmo_eval-0.8.2.dist-info → ai2_olmo_eval-0.8.4.dist-info}/licenses/LICENSE +0 -0
- {ai2_olmo_eval-0.8.2.dist-info → ai2_olmo_eval-0.8.4.dist-info}/top_level.txt +0 -0
|
@@ -1,10 +1,10 @@
|
|
|
1
|
-
ai2_olmo_eval-0.8.
|
|
1
|
+
ai2_olmo_eval-0.8.4.dist-info/licenses/LICENSE,sha256=YvuKOpYh3COIF0yqq-nCMXtpS7mh1GyYvPVlW2j1G-M,11359
|
|
2
2
|
olmo_eval/__init__.py,sha256=49RxnAaJNk8U9XP3SF5MjyFIxLSkxH0vXQuZgnEOi44,283
|
|
3
|
-
olmo_eval/metrics.py,sha256=
|
|
4
|
-
olmo_eval/tasks.py,sha256=
|
|
3
|
+
olmo_eval/metrics.py,sha256=xUnFUGho1Y99595G79chqv2iFZU6LU5KVACHRYcUI1k,20046
|
|
4
|
+
olmo_eval/tasks.py,sha256=yONOV2rI8rDkmaUetdXgeub-shZaNBo9j6Pslu1fKXA,97851
|
|
5
5
|
olmo_eval/tokenizer.py,sha256=PnkidE0nAtEA1QZjuQpE_bIwgAsHxodnaJRALAPqrJQ,5127
|
|
6
6
|
olmo_eval/util.py,sha256=ARmZmRQl8VOvnKQoUprb3cOunzcApeNhRdV4BMXZuvo,3856
|
|
7
|
-
olmo_eval/version.py,sha256=
|
|
7
|
+
olmo_eval/version.py,sha256=wUkgiv1wrgtooOky_Dd4BYqHwcJ850V_jdiQ649cm9s,308
|
|
8
8
|
olmo_eval/hf_datasets/ai2_arc/ARC-Challenge/validation/data-00000-of-00001.arrow,sha256=TPWbMhBmticWjYp7TA3etcKbXbaoCDBWhxuqlD1bDJA,98080
|
|
9
9
|
olmo_eval/hf_datasets/ai2_arc/ARC-Challenge/validation/dataset_info.json,sha256=iZumP5Udu8LD7cbew3o7nNpnGu-o9jPaMxUrNDDNIVY,1795
|
|
10
10
|
olmo_eval/hf_datasets/ai2_arc/ARC-Challenge/validation/state.json,sha256=6Q1XhM-HMZcymuGAKBC_8RjMBKgJSaR_6lLUO9Z8XwE,255
|
|
@@ -756,7 +756,7 @@ olmo_eval/oe_eval_tasks/winogrande/val_rc_5shot/config.json,sha256=ySjEVqTOj5GwC
|
|
|
756
756
|
olmo_eval/oe_eval_tasks/winogrande/val_rc_5shot/requests.jsonl.gz,sha256=knTzcqigWCfdYLN1Pl0TfCm0Fi1lRASWAo_SC6KtXsc,115262
|
|
757
757
|
olmo_eval/tokenizers/allenai_eleuther-ai-gpt-neox-20b-pii-special.json,sha256=yjXYcnpTO7Zjm_R4Gucrn9oA5paadiYM-ZZER5q_EXc,2114319
|
|
758
758
|
olmo_eval/tokenizers/allenai_gpt-neox-olmo-dolma-v1_5.json,sha256=mtM7Szmp-Dlzw_jEKgGUjdW4d6KKyaU1aVbE_07QtxQ,2115113
|
|
759
|
-
ai2_olmo_eval-0.8.
|
|
760
|
-
ai2_olmo_eval-0.8.
|
|
761
|
-
ai2_olmo_eval-0.8.
|
|
762
|
-
ai2_olmo_eval-0.8.
|
|
759
|
+
ai2_olmo_eval-0.8.4.dist-info/METADATA,sha256=vbYd0LaHbtdOtA9NAxjJVlV2EmZt4ch2QhP5D1OYk3k,14398
|
|
760
|
+
ai2_olmo_eval-0.8.4.dist-info/WHEEL,sha256=_zCd3N1l69ArxyTb8rzEoP9TpbYXkqRFSNOD5OuxnTs,91
|
|
761
|
+
ai2_olmo_eval-0.8.4.dist-info/top_level.txt,sha256=Pryk28JTb89-j624Uy1gRZiE0YXI3czgbNIfJCl9-x0,10
|
|
762
|
+
ai2_olmo_eval-0.8.4.dist-info/RECORD,,
|
olmo_eval/metrics.py
CHANGED
|
@@ -103,7 +103,7 @@ class ICLMetric(Metric):
|
|
|
103
103
|
choice_ids = batch["choice_ids"][idx]
|
|
104
104
|
else:
|
|
105
105
|
fast_mc = False
|
|
106
|
-
choice_ids = cont_tokens
|
|
106
|
+
choice_ids = [cont_tokens]
|
|
107
107
|
|
|
108
108
|
# For each choice token, calculate metrics and append as separate entries
|
|
109
109
|
for choice_idx, choice_token in enumerate(choice_ids):
|
olmo_eval/tasks.py
CHANGED
|
@@ -94,6 +94,17 @@ class ICLMultiChoiceTaskDataset(metaclass=abc.ABCMeta):
|
|
|
94
94
|
label_id = self.doc_to_label(doc)
|
|
95
95
|
doc_text = self.doc_to_text(doc)
|
|
96
96
|
ctx = self.token_encode(doc_text)
|
|
97
|
+
|
|
98
|
+
# Add BOS token if it is exists in the tokenizer
|
|
99
|
+
if (
|
|
100
|
+
self.tokenizer.bos_token_id is not None
|
|
101
|
+
and ctx[0] != self.tokenizer.bos_token_id
|
|
102
|
+
):
|
|
103
|
+
ctx = [self.tokenizer.bos_token_id] + ctx
|
|
104
|
+
|
|
105
|
+
if doc_id == 0:
|
|
106
|
+
log.info(f"First tokens of in-loop eval context: {ctx[:5]}")
|
|
107
|
+
|
|
97
108
|
dc = self.token_encode(self.doc_to_domain_conditional(doc))
|
|
98
109
|
if self.log_instances > 0:
|
|
99
110
|
self.log_instances -= 1
|
|
@@ -552,6 +563,17 @@ class WinoGrande(ICLMultiChoiceTaskDataset):
|
|
|
552
563
|
|
|
553
564
|
for cont_id, (ctx, dc) in enumerate(zip(ctxs, dcs)):
|
|
554
565
|
ctx = self.token_encode(ctx)
|
|
566
|
+
|
|
567
|
+
# Add BOS token if it is exists in the tokenizer
|
|
568
|
+
if (
|
|
569
|
+
self.tokenizer.bos_token_id is not None
|
|
570
|
+
and ctx[0] != self.tokenizer.bos_token_id
|
|
571
|
+
):
|
|
572
|
+
ctx = [self.tokenizer.bos_token_id] + ctx
|
|
573
|
+
|
|
574
|
+
if doc_id == 0:
|
|
575
|
+
log.info(f"First tokens of in-loop eval context: {ctx[:5]}")
|
|
576
|
+
|
|
555
577
|
dc = self.token_encode(dc)
|
|
556
578
|
|
|
557
579
|
# query, remove last token from continuation, truncate from left is longer than model ctx length
|
|
@@ -1608,6 +1630,17 @@ class OEEvalTask(ICLMultiChoiceTaskDataset):
|
|
|
1608
1630
|
label_id = 0
|
|
1609
1631
|
doc_text = request_dict["context"]
|
|
1610
1632
|
ctx = self.token_encode(doc_text)
|
|
1633
|
+
|
|
1634
|
+
# Add BOS token if it is exists in the tokenizer
|
|
1635
|
+
if (
|
|
1636
|
+
self.tokenizer.bos_token_id is not None
|
|
1637
|
+
and ctx[0] != self.tokenizer.bos_token_id
|
|
1638
|
+
):
|
|
1639
|
+
ctx = [self.tokenizer.bos_token_id] + ctx
|
|
1640
|
+
|
|
1641
|
+
if doc_id == 0:
|
|
1642
|
+
log.info(f"First tokens of in-loop eval context: {ctx[:5]}")
|
|
1643
|
+
|
|
1611
1644
|
dc = self.token_encode(self.doc_to_domain_conditional(doc))
|
|
1612
1645
|
if self.log_instances > 0:
|
|
1613
1646
|
self.log_instances -= 1
|
olmo_eval/version.py
CHANGED
|
File without changes
|
|
File without changes
|