ai2-olmo-eval 0.8.2__py3-none-any.whl → 0.8.4__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: ai2-olmo-eval
3
- Version: 0.8.2
3
+ Version: 0.8.4
4
4
  Summary: In-loop evaluation tasks for language modeling
5
5
  Author-email: Allen Institute for Artificial Intelligence <olmo@allenai.org>
6
6
  License: Apache License
@@ -1,10 +1,10 @@
1
- ai2_olmo_eval-0.8.2.dist-info/licenses/LICENSE,sha256=YvuKOpYh3COIF0yqq-nCMXtpS7mh1GyYvPVlW2j1G-M,11359
1
+ ai2_olmo_eval-0.8.4.dist-info/licenses/LICENSE,sha256=YvuKOpYh3COIF0yqq-nCMXtpS7mh1GyYvPVlW2j1G-M,11359
2
2
  olmo_eval/__init__.py,sha256=49RxnAaJNk8U9XP3SF5MjyFIxLSkxH0vXQuZgnEOi44,283
3
- olmo_eval/metrics.py,sha256=MfDXgHD0SXZIiwLsBeH1BSXa22ruyORFzEkQVgXWBQM,20044
4
- olmo_eval/tasks.py,sha256=eecUt07ww7lDuh9w974QXMIykV7RX6GhsI5iVoG4eQk,96636
3
+ olmo_eval/metrics.py,sha256=xUnFUGho1Y99595G79chqv2iFZU6LU5KVACHRYcUI1k,20046
4
+ olmo_eval/tasks.py,sha256=yONOV2rI8rDkmaUetdXgeub-shZaNBo9j6Pslu1fKXA,97851
5
5
  olmo_eval/tokenizer.py,sha256=PnkidE0nAtEA1QZjuQpE_bIwgAsHxodnaJRALAPqrJQ,5127
6
6
  olmo_eval/util.py,sha256=ARmZmRQl8VOvnKQoUprb3cOunzcApeNhRdV4BMXZuvo,3856
7
- olmo_eval/version.py,sha256=AL-S96WVnSUTcXCxNiQsZbHl0hTj0gYOE4z4HJAm8eQ,308
7
+ olmo_eval/version.py,sha256=wUkgiv1wrgtooOky_Dd4BYqHwcJ850V_jdiQ649cm9s,308
8
8
  olmo_eval/hf_datasets/ai2_arc/ARC-Challenge/validation/data-00000-of-00001.arrow,sha256=TPWbMhBmticWjYp7TA3etcKbXbaoCDBWhxuqlD1bDJA,98080
9
9
  olmo_eval/hf_datasets/ai2_arc/ARC-Challenge/validation/dataset_info.json,sha256=iZumP5Udu8LD7cbew3o7nNpnGu-o9jPaMxUrNDDNIVY,1795
10
10
  olmo_eval/hf_datasets/ai2_arc/ARC-Challenge/validation/state.json,sha256=6Q1XhM-HMZcymuGAKBC_8RjMBKgJSaR_6lLUO9Z8XwE,255
@@ -756,7 +756,7 @@ olmo_eval/oe_eval_tasks/winogrande/val_rc_5shot/config.json,sha256=ySjEVqTOj5GwC
756
756
  olmo_eval/oe_eval_tasks/winogrande/val_rc_5shot/requests.jsonl.gz,sha256=knTzcqigWCfdYLN1Pl0TfCm0Fi1lRASWAo_SC6KtXsc,115262
757
757
  olmo_eval/tokenizers/allenai_eleuther-ai-gpt-neox-20b-pii-special.json,sha256=yjXYcnpTO7Zjm_R4Gucrn9oA5paadiYM-ZZER5q_EXc,2114319
758
758
  olmo_eval/tokenizers/allenai_gpt-neox-olmo-dolma-v1_5.json,sha256=mtM7Szmp-Dlzw_jEKgGUjdW4d6KKyaU1aVbE_07QtxQ,2115113
759
- ai2_olmo_eval-0.8.2.dist-info/METADATA,sha256=ktpIXzDX50NF70lVVzXab_z5no0lIXWug1qybPvbtac,14398
760
- ai2_olmo_eval-0.8.2.dist-info/WHEEL,sha256=Nw36Djuh_5VDukK0H78QzOX-_FQEo6V37m3nkm96gtU,91
761
- ai2_olmo_eval-0.8.2.dist-info/top_level.txt,sha256=Pryk28JTb89-j624Uy1gRZiE0YXI3czgbNIfJCl9-x0,10
762
- ai2_olmo_eval-0.8.2.dist-info/RECORD,,
759
+ ai2_olmo_eval-0.8.4.dist-info/METADATA,sha256=vbYd0LaHbtdOtA9NAxjJVlV2EmZt4ch2QhP5D1OYk3k,14398
760
+ ai2_olmo_eval-0.8.4.dist-info/WHEEL,sha256=_zCd3N1l69ArxyTb8rzEoP9TpbYXkqRFSNOD5OuxnTs,91
761
+ ai2_olmo_eval-0.8.4.dist-info/top_level.txt,sha256=Pryk28JTb89-j624Uy1gRZiE0YXI3czgbNIfJCl9-x0,10
762
+ ai2_olmo_eval-0.8.4.dist-info/RECORD,,
@@ -1,5 +1,5 @@
1
1
  Wheel-Version: 1.0
2
- Generator: setuptools (80.7.1)
2
+ Generator: setuptools (80.9.0)
3
3
  Root-Is-Purelib: true
4
4
  Tag: py3-none-any
5
5
 
olmo_eval/metrics.py CHANGED
@@ -103,7 +103,7 @@ class ICLMetric(Metric):
103
103
  choice_ids = batch["choice_ids"][idx]
104
104
  else:
105
105
  fast_mc = False
106
- choice_ids = cont_tokens
106
+ choice_ids = [cont_tokens]
107
107
 
108
108
  # For each choice token, calculate metrics and append as separate entries
109
109
  for choice_idx, choice_token in enumerate(choice_ids):
olmo_eval/tasks.py CHANGED
@@ -94,6 +94,17 @@ class ICLMultiChoiceTaskDataset(metaclass=abc.ABCMeta):
94
94
  label_id = self.doc_to_label(doc)
95
95
  doc_text = self.doc_to_text(doc)
96
96
  ctx = self.token_encode(doc_text)
97
+
98
+ # Add BOS token if it is exists in the tokenizer
99
+ if (
100
+ self.tokenizer.bos_token_id is not None
101
+ and ctx[0] != self.tokenizer.bos_token_id
102
+ ):
103
+ ctx = [self.tokenizer.bos_token_id] + ctx
104
+
105
+ if doc_id == 0:
106
+ log.info(f"First tokens of in-loop eval context: {ctx[:5]}")
107
+
97
108
  dc = self.token_encode(self.doc_to_domain_conditional(doc))
98
109
  if self.log_instances > 0:
99
110
  self.log_instances -= 1
@@ -552,6 +563,17 @@ class WinoGrande(ICLMultiChoiceTaskDataset):
552
563
 
553
564
  for cont_id, (ctx, dc) in enumerate(zip(ctxs, dcs)):
554
565
  ctx = self.token_encode(ctx)
566
+
567
+ # Add BOS token if it is exists in the tokenizer
568
+ if (
569
+ self.tokenizer.bos_token_id is not None
570
+ and ctx[0] != self.tokenizer.bos_token_id
571
+ ):
572
+ ctx = [self.tokenizer.bos_token_id] + ctx
573
+
574
+ if doc_id == 0:
575
+ log.info(f"First tokens of in-loop eval context: {ctx[:5]}")
576
+
555
577
  dc = self.token_encode(dc)
556
578
 
557
579
  # query, remove last token from continuation, truncate from left is longer than model ctx length
@@ -1608,6 +1630,17 @@ class OEEvalTask(ICLMultiChoiceTaskDataset):
1608
1630
  label_id = 0
1609
1631
  doc_text = request_dict["context"]
1610
1632
  ctx = self.token_encode(doc_text)
1633
+
1634
+ # Add BOS token if it is exists in the tokenizer
1635
+ if (
1636
+ self.tokenizer.bos_token_id is not None
1637
+ and ctx[0] != self.tokenizer.bos_token_id
1638
+ ):
1639
+ ctx = [self.tokenizer.bos_token_id] + ctx
1640
+
1641
+ if doc_id == 0:
1642
+ log.info(f"First tokens of in-loop eval context: {ctx[:5]}")
1643
+
1611
1644
  dc = self.token_encode(self.doc_to_domain_conditional(doc))
1612
1645
  if self.log_instances > 0:
1613
1646
  self.log_instances -= 1
olmo_eval/version.py CHANGED
@@ -1,6 +1,6 @@
1
1
  _MAJOR = "0"
2
2
  _MINOR = "8"
3
- _PATCH = "2"
3
+ _PATCH = "4"
4
4
  _SUFFIX = ""
5
5
 
6
6
  VERSION_SHORT = "{0}.{1}".format(_MAJOR, _MINOR)