PyPI - ai2-olmo-eval - Versions diffs - 0.8.2__py3-none-any.whl → 0.8.4__py3-none-any.whl - Mend

ai2-olmo-eval 0.8.2py3-none-any.whl → 0.8.4py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (8) hide show

{ai2_olmo_eval-0.8.2.dist-info → ai2_olmo_eval-0.8.4.dist-info}/METADATA RENAMED Viewed

@@ -1,6 +1,6 @@
 Metadata-Version: 2.4
 Name: ai2-olmo-eval
-Version: 0.8.2
+Version: 0.8.4
 Summary: In-loop evaluation tasks for language modeling
 Author-email: Allen Institute for Artificial Intelligence <olmo@allenai.org>
 License:                                  Apache License

{ai2_olmo_eval-0.8.2.dist-info → ai2_olmo_eval-0.8.4.dist-info}/RECORD RENAMED Viewed

@@ -1,10 +1,10 @@
-ai2_olmo_eval-0.8.2.dist-info/licenses/LICENSE,sha256=YvuKOpYh3COIF0yqq-nCMXtpS7mh1GyYvPVlW2j1G-M,11359
+ai2_olmo_eval-0.8.4.dist-info/licenses/LICENSE,sha256=YvuKOpYh3COIF0yqq-nCMXtpS7mh1GyYvPVlW2j1G-M,11359
 olmo_eval/__init__.py,sha256=49RxnAaJNk8U9XP3SF5MjyFIxLSkxH0vXQuZgnEOi44,283
-olmo_eval/metrics.py,sha256=MfDXgHD0SXZIiwLsBeH1BSXa22ruyORFzEkQVgXWBQM,20044
-olmo_eval/tasks.py,sha256=eecUt07ww7lDuh9w974QXMIykV7RX6GhsI5iVoG4eQk,96636
+olmo_eval/metrics.py,sha256=xUnFUGho1Y99595G79chqv2iFZU6LU5KVACHRYcUI1k,20046
+olmo_eval/tasks.py,sha256=yONOV2rI8rDkmaUetdXgeub-shZaNBo9j6Pslu1fKXA,97851
 olmo_eval/tokenizer.py,sha256=PnkidE0nAtEA1QZjuQpE_bIwgAsHxodnaJRALAPqrJQ,5127
 olmo_eval/util.py,sha256=ARmZmRQl8VOvnKQoUprb3cOunzcApeNhRdV4BMXZuvo,3856
-olmo_eval/version.py,sha256=AL-S96WVnSUTcXCxNiQsZbHl0hTj0gYOE4z4HJAm8eQ,308
+olmo_eval/version.py,sha256=wUkgiv1wrgtooOky_Dd4BYqHwcJ850V_jdiQ649cm9s,308
 olmo_eval/hf_datasets/ai2_arc/ARC-Challenge/validation/data-00000-of-00001.arrow,sha256=TPWbMhBmticWjYp7TA3etcKbXbaoCDBWhxuqlD1bDJA,98080
 olmo_eval/hf_datasets/ai2_arc/ARC-Challenge/validation/dataset_info.json,sha256=iZumP5Udu8LD7cbew3o7nNpnGu-o9jPaMxUrNDDNIVY,1795
 olmo_eval/hf_datasets/ai2_arc/ARC-Challenge/validation/state.json,sha256=6Q1XhM-HMZcymuGAKBC_8RjMBKgJSaR_6lLUO9Z8XwE,255
@@ -756,7 +756,7 @@ olmo_eval/oe_eval_tasks/winogrande/val_rc_5shot/config.json,sha256=ySjEVqTOj5GwC
 olmo_eval/oe_eval_tasks/winogrande/val_rc_5shot/requests.jsonl.gz,sha256=knTzcqigWCfdYLN1Pl0TfCm0Fi1lRASWAo_SC6KtXsc,115262
 olmo_eval/tokenizers/allenai_eleuther-ai-gpt-neox-20b-pii-special.json,sha256=yjXYcnpTO7Zjm_R4Gucrn9oA5paadiYM-ZZER5q_EXc,2114319
 olmo_eval/tokenizers/allenai_gpt-neox-olmo-dolma-v1_5.json,sha256=mtM7Szmp-Dlzw_jEKgGUjdW4d6KKyaU1aVbE_07QtxQ,2115113
-ai2_olmo_eval-0.8.2.dist-info/METADATA,sha256=ktpIXzDX50NF70lVVzXab_z5no0lIXWug1qybPvbtac,14398
-ai2_olmo_eval-0.8.2.dist-info/WHEEL,sha256=Nw36Djuh_5VDukK0H78QzOX-_FQEo6V37m3nkm96gtU,91
-ai2_olmo_eval-0.8.2.dist-info/top_level.txt,sha256=Pryk28JTb89-j624Uy1gRZiE0YXI3czgbNIfJCl9-x0,10
-ai2_olmo_eval-0.8.2.dist-info/RECORD,,
+ai2_olmo_eval-0.8.4.dist-info/METADATA,sha256=vbYd0LaHbtdOtA9NAxjJVlV2EmZt4ch2QhP5D1OYk3k,14398
+ai2_olmo_eval-0.8.4.dist-info/WHEEL,sha256=_zCd3N1l69ArxyTb8rzEoP9TpbYXkqRFSNOD5OuxnTs,91
+ai2_olmo_eval-0.8.4.dist-info/top_level.txt,sha256=Pryk28JTb89-j624Uy1gRZiE0YXI3czgbNIfJCl9-x0,10
+ai2_olmo_eval-0.8.4.dist-info/RECORD,,

{ai2_olmo_eval-0.8.2.dist-info → ai2_olmo_eval-0.8.4.dist-info}/WHEEL RENAMED Viewed

@@ -1,5 +1,5 @@
 Wheel-Version: 1.0
-Generator: setuptools (80.7.1)
+Generator: setuptools (80.9.0)
 Root-Is-Purelib: true
 Tag: py3-none-any

olmo_eval/metrics.py CHANGED Viewed

@@ -103,7 +103,7 @@ class ICLMetric(Metric):
                 choice_ids = batch["choice_ids"][idx]
             else:
                 fast_mc = False
-                choice_ids = cont_tokens
+                choice_ids = [cont_tokens]
             # For each choice token, calculate metrics and append as separate entries
             for choice_idx, choice_token in enumerate(choice_ids):

olmo_eval/tasks.py CHANGED Viewed

@@ -94,6 +94,17 @@ class ICLMultiChoiceTaskDataset(metaclass=abc.ABCMeta):
                 label_id = self.doc_to_label(doc)
                 doc_text = self.doc_to_text(doc)
                 ctx = self.token_encode(doc_text)
+                # Add BOS token if it is exists in the tokenizer
+                if (
+                    self.tokenizer.bos_token_id is not None
+                    and ctx[0] != self.tokenizer.bos_token_id
+                ):
+                    ctx = [self.tokenizer.bos_token_id] + ctx
+                if doc_id == 0:
+                    log.info(f"First tokens of in-loop eval context: {ctx[:5]}")
                 dc = self.token_encode(self.doc_to_domain_conditional(doc))
                 if self.log_instances > 0:
                     self.log_instances -= 1
@@ -552,6 +563,17 @@ class WinoGrande(ICLMultiChoiceTaskDataset):
             for cont_id, (ctx, dc) in enumerate(zip(ctxs, dcs)):
                 ctx = self.token_encode(ctx)
+                # Add BOS token if it is exists in the tokenizer
+                if (
+                    self.tokenizer.bos_token_id is not None
+                    and ctx[0] != self.tokenizer.bos_token_id
+                ):
+                    ctx = [self.tokenizer.bos_token_id] + ctx
+                if doc_id == 0:
+                    log.info(f"First tokens of in-loop eval context: {ctx[:5]}")
                 dc = self.token_encode(dc)
                 # query, remove last token from continuation, truncate from left is longer than model ctx length
@@ -1608,6 +1630,17 @@ class OEEvalTask(ICLMultiChoiceTaskDataset):
                         label_id = 0
                 doc_text = request_dict["context"]
                 ctx = self.token_encode(doc_text)
+                # Add BOS token if it is exists in the tokenizer
+                if (
+                    self.tokenizer.bos_token_id is not None
+                    and ctx[0] != self.tokenizer.bos_token_id
+                ):
+                    ctx = [self.tokenizer.bos_token_id] + ctx
+                if doc_id == 0:
+                    log.info(f"First tokens of in-loop eval context: {ctx[:5]}")
                 dc = self.token_encode(self.doc_to_domain_conditional(doc))
                 if self.log_instances > 0:
                     self.log_instances -= 1

olmo_eval/version.py CHANGED Viewed

@@ -1,6 +1,6 @@
 _MAJOR = "0"
 _MINOR = "8"
-_PATCH = "2"
+_PATCH = "4"
 _SUFFIX = ""
 VERSION_SHORT = "{0}.{1}".format(_MAJOR, _MINOR)

{ai2_olmo_eval-0.8.2.dist-info → ai2_olmo_eval-0.8.4.dist-info}/licenses/LICENSE RENAMED Viewed

File without changes

{ai2_olmo_eval-0.8.2.dist-info → ai2_olmo_eval-0.8.4.dist-info}/top_level.txt RENAMED Viewed

File without changes

ai2-olmo-eval 0.8.2__py3-none-any.whl → 0.8.4__py3-none-any.whl

ai2-olmo-eval 0.8.2py3-none-any.whl → 0.8.4py3-none-any.whl