PyPI - fusion-bench - Versions diffs - 0.2.5__py3-none-any.whl → 0.2.7__py3-none-any.whl - Mend

fusion-bench 0.2.5py3-none-any.whl → 0.2.7py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (105) hide show

fusion_bench/compat/method/__init__.py +2 -0
fusion_bench/compat/method/base_algorithm.py +7 -2
fusion_bench/compat/modelpool/__init__.py +3 -2
fusion_bench/compat/taskpool/__init__.py +1 -1
fusion_bench/dataset/arc_agi/__init__.py +6 -1
fusion_bench/dataset/arc_agi/arc.py +26 -7
fusion_bench/dataset/arc_agi/arc_agi.py +156 -25
fusion_bench/dataset/arc_agi/np_cache.py +0 -1
fusion_bench/dataset/arc_agi/preprocess.py +51 -9
fusion_bench/dataset/llama/__init__.py +1 -0
fusion_bench/dataset/llama/alpaca.py +93 -3
fusion_bench/dataset/llama/collate.py +72 -5
fusion_bench/dataset/llama/metamathqa.py +50 -0
fusion_bench/dataset/llama/preference_700k.py +70 -0
fusion_bench/dataset/llama/stanford_shp.py +90 -0
fusion_bench/dataset/llama/ultrachat.py +58 -0
fusion_bench/dataset/llama/utils/__init__.py +0 -0
fusion_bench/method/__init__.py +4 -1
fusion_bench/method/adamerging/__init__.py +1 -1
fusion_bench/method/adamerging/layer_wise_adamerging.py +11 -4
fusion_bench/method/adamerging/min_norm_solvers.py +4 -4
fusion_bench/method/linear/expo.py +39 -0
fusion_bench/method/lm_finetune/__init__.py +1 -0
fusion_bench/method/lm_finetune/bradley_terry_rm.py +432 -0
fusion_bench/method/lm_finetune/fullfinetune_sft.py +122 -150
fusion_bench/method/lm_finetune/peftfinetune_sft.py +102 -157
fusion_bench/method/pruning/llama_magnitude_prune.py +2 -2
fusion_bench/method/pruning/llama_random_prune.py +2 -2
fusion_bench/method/pruning/magnitude_diff_pruning.py +2 -1
fusion_bench/method/rankone_moe/__init__.py +3 -0
fusion_bench/method/rankone_moe/clip_rankone_moe.py +160 -0
fusion_bench/method/rankone_moe/rankone_moe.py +249 -0
fusion_bench/method/simple_average.py +1 -1
fusion_bench/method/surgery/__init__.py +3 -0
fusion_bench/method/surgery/clip_layer_wise_adamerging_surgery.py +157 -0
fusion_bench/mixins/__init__.py +2 -0
fusion_bench/mixins/clip_classification.py +60 -12
fusion_bench/mixins/fabric_training.py +320 -0
fusion_bench/mixins/lightning_fabric.py +11 -2
fusion_bench/modelpool/__init__.py +2 -0
fusion_bench/modelpool/causal_lm/__init__.py +1 -1
fusion_bench/modelpool/causal_lm/causal_lm.py +21 -22
fusion_bench/modelpool/seq_classification_lm/__init__.py +2 -0
fusion_bench/modelpool/seq_classification_lm/reward_model.py +15 -0
fusion_bench/modelpool/seq_classification_lm/seq_classification_lm.py +98 -0
fusion_bench/models/chat_templates/__init__.py +1 -0
fusion_bench/models/chat_templates/llama_3_Instruct.py +1 -0
fusion_bench/models/chat_templates/load_tokenizer.py +43 -0
fusion_bench/models/hf_clip.py +50 -9
fusion_bench/models/rankone_moe.py +410 -0
fusion_bench/models/surgery/surgerymodelwrapper.py +157 -0
fusion_bench/models/utils.py +8 -0
fusion_bench/models/wrappers/layer_wise_fusion.py +14 -5
fusion_bench/models/wrappers/task_wise_fusion.py +5 -5
fusion_bench/optim/__init__.py +2 -0
fusion_bench/optim/exception.py +47 -0
fusion_bench/optim/lr_scheduler/__init__.py +1 -0
fusion_bench/optim/lr_scheduler/linear_warmup.py +222 -0
fusion_bench/optim/lr_scheduler/utils/__init__.py +1 -0
fusion_bench/optim/lr_scheduler/utils/visualization.py +119 -0
fusion_bench/optim/mezo.py +0 -2
fusion_bench/programs/fabric_fusion_program.py +5 -1
fusion_bench/taskpool/__init__.py +10 -2
fusion_bench/taskpool/clip_vision/__init__.py +1 -0
fusion_bench/taskpool/clip_vision/clip_rankone_moe_taskpool.py +112 -0
fusion_bench/taskpool/clip_vision/taskpool.py +43 -6
fusion_bench/taskpool/llama/reward_model.py +157 -0
fusion_bench/taskpool/nyuv2_taskpool.py +2 -0
fusion_bench/tasks/flan_t5_text_generation/glue_load_dataset.py +2 -1
fusion_bench/utils/hydra_utils.py +22 -0
fusion_bench/utils/plot/__init__.py +0 -0
fusion_bench/utils/plot/token.py +52 -0
fusion_bench/utils/plot/token_notebook.py +127 -0
fusion_bench/utils/type.py +5 -3
{fusion_bench-0.2.5.dist-info → fusion_bench-0.2.7.dist-info}/METADATA +1 -1
{fusion_bench-0.2.5.dist-info → fusion_bench-0.2.7.dist-info}/RECORD +104 -57
fusion_bench_config/clip-vit-base-patch32_robustness_corrupted.yaml +1 -1
fusion_bench_config/dataset/llm_sft/alpaca_cleaned.yaml +6 -0
fusion_bench_config/dataset/llm_sft/ultrachat_200k.yaml +3 -0
fusion_bench_config/fabric/llama_peft_fsdp.yaml +16 -0
fusion_bench_config/fabric/loggers/wandb_logger.yaml +2 -0
fusion_bench_config/fabric/strategy/deepspeed.yaml +10 -0
fusion_bench_config/fabric/strategy/llama_peft_fsdp.yaml +9 -0
fusion_bench_config/fabric_model_fusion.yaml +1 -1
fusion_bench_config/llama_full_finetune.yaml +19 -0
fusion_bench_config/method/lm_finetune/bradley_terry_rm.yaml +47 -0
fusion_bench_config/method/lm_finetune/fullfinetune_sft.yaml +13 -6
fusion_bench_config/method/lm_finetune/peftfinetune_sft.yaml +17 -9
fusion_bench_config/method/rankone_moe/rankone_moe.yaml +26 -0
fusion_bench_config/method/regmean/clip_regmean.yaml +1 -0
fusion_bench_config/method/surgery/adamerging_surgery.yaml +27 -0
fusion_bench_config/modelpool/CausalLMPool/llama_alpaca_cleaned.yaml +21 -0
fusion_bench_config/modelpool/CausalLMPool/llama_codealpaca.yaml +21 -0
fusion_bench_config/modelpool/CausalLMPool/llama_metamathqa.yaml +19 -0
fusion_bench_config/modelpool/CausalLMPool/llama_ultrachat.yaml +18 -0
fusion_bench_config/modelpool/SeqenceClassificationModelPool/llama_preference700k.yaml +23 -0
fusion_bench_config/modelpool/SeqenceClassificationModelPool/single_reward_model.yaml +14 -0
fusion_bench_config/nyuv2_config.yaml +5 -1
fusion_bench_config/taskpool/CLIPVisionModelTaskPool/clip_rankone_wemoe_clip-vit-classification_TA8.yaml +18 -0
fusion_bench_config/taskpool/reward_model_evaluation.yaml +18 -0
fusion_bench_config/llama_weighted_average.yaml +0 -26
{fusion_bench-0.2.5.dist-info → fusion_bench-0.2.7.dist-info}/LICENSE +0 -0
{fusion_bench-0.2.5.dist-info → fusion_bench-0.2.7.dist-info}/WHEEL +0 -0
{fusion_bench-0.2.5.dist-info → fusion_bench-0.2.7.dist-info}/entry_points.txt +0 -0
{fusion_bench-0.2.5.dist-info → fusion_bench-0.2.7.dist-info}/top_level.txt +0 -0

fusion_bench/compat/method/__init__.py CHANGED Viewed

@@ -21,6 +21,7 @@ class AlgorithmFactory:
         "clip_task_wise_adamerging": ".adamerging.clip_task_wise_adamerging.CLIPTaskWiseAdaMergingAlgorithm",
         "clip_layer_wise_adamerging": ".adamerging.clip_layer_wise_adamerging.CLIPLayerWiseAdaMergingAlgorithm",
         "singular_projection_merging": "fusion_bench.method.smile_upscaling.singular_projection_merging.SingularProjectionMergingAlgorithm",
+        "clip_layer_wise_adamerging_surgery": ".surgery.clip_layer_wise_adamerging_surgery.CLIPLayerWiseAdaMergingSurgeryAlgorithm",
         # plug-and-play model merging methods
         "clip_concrete_task_arithmetic": ".concrete_subspace.clip_concrete_task_arithmetic.ConcreteTaskArithmeticAlgorithmForCLIP",
         "clip_concrete_task_wise_adamerging": ".concrete_subspace.clip_concrete_adamerging.ConcreteTaskWiseAdaMergingForCLIP",
@@ -29,6 +30,7 @@ class AlgorithmFactory:
         "clip_weight_ensembling_moe": ".we_moe.clip_we_moe.CLIPWeightEnsemblingMoEAlgorithm",
         "sparse_clip_weight_ensembling_moe": "fusion_bench.method.SparseCLIPWeightEnsemblingMoEAlgorithm",
         "smile_mistral_upscaling": ".smile_upscaling.smile_mistral_upscaling.SmileMistralUpscalingAlgorithm",
+        "rankone_moe": ".rankone_moe.clip_rankone_moe.CLIPRankOneMoEAlgorithm",
     }
     @staticmethod

fusion_bench/compat/method/base_algorithm.py CHANGED Viewed

@@ -1,8 +1,11 @@
 from abc import ABC, abstractmethod
-from typing import Optional
+from typing import Optional, TYPE_CHECKING
 from omegaconf import DictConfig
+if TYPE_CHECKING:
+    from fusion_bench.programs.base_program import BaseHydraProgram
 __all__ = ["ModelFusionAlgorithm"]
@@ -18,6 +21,9 @@ class ModelFusionAlgorithm(ABC):
         config (DictConfig): Configuration for the algorithm.
     """
+    _program: "BaseHydraProgram" = None
+    """A reference to the program that is running the algorithm."""
     def __init__(self, algorithm_config: Optional[DictConfig] = None):
         """
         Initialize the model fusion algorithm with the given configuration.
@@ -26,7 +32,6 @@ class ModelFusionAlgorithm(ABC):
             algorithm_config (Optional[DictConfig]): Configuration for the algorithm. Defaults to an empty configuration if not provided.
                 Get access to the configuration using `self.config`.
         """
-        super().__init__()
         if algorithm_config is None:
             algorithm_config = DictConfig({})
         self.config = algorithm_config

fusion_bench/compat/modelpool/__init__.py CHANGED Viewed

@@ -1,4 +1,6 @@
 # flake8: noqa F401
+import warnings
 from omegaconf import DictConfig
 from fusion_bench.modelpool.huggingface_gpt2_classification import (
@@ -9,7 +11,6 @@ from fusion_bench.modelpool.PeftModelForSeq2SeqLM import PeftModelForSeq2SeqLMPo
 from .AutoModelForSeq2SeqLM import AutoModelForSeq2SeqLMPool
 from .base_pool import DictModelPool, ListModelPool, ModelPool, to_modelpool
 from .huggingface_clip_vision import HuggingFaceClipVisionPool
-import warnings
 class ModelPoolFactory:
@@ -21,7 +22,7 @@ class ModelPoolFactory:
     """
     _modelpool = {
-        "NYUv2ModelPool": ".nyuv2_modelpool.NYUv2ModelPool",
+        "NYUv2ModelPool": "fusion_bench.modelpool.nyuv2_modelpool.NYUv2ModelPool",
         "huggingface_clip_vision": HuggingFaceClipVisionPool,
         "HF_GPT2ForSequenceClassification": GPT2ForSequenceClassificationPool,
         "AutoModelPool": ".huggingface_automodel.AutoModelPool",

fusion_bench/compat/taskpool/__init__.py CHANGED Viewed

@@ -20,7 +20,7 @@ class TaskPoolFactory:
         "dummy": DummyTaskPool,
         "clip_vit_classification": ".clip_image_classification.CLIPImageClassificationTaskPool",
         "FlanT5GLUETextGenerationTaskPool": ".flan_t5_glue_text_generation.FlanT5GLUETextGenerationTaskPool",
-        "NYUv2TaskPool": ".nyuv2_taskpool.NYUv2TaskPool",
+        "NYUv2TaskPool": "fusion_bench.taskpool.nyuv2_taskpool.NYUv2TaskPool",
     }
     @staticmethod

fusion_bench/dataset/arc_agi/__init__.py CHANGED Viewed

@@ -1 +1,6 @@
-from .arc_agi import load_tokenized_arc_agi_dataset
+from .arc_agi import (
+    load_tokenized_arc_agi_dataset,
+    load_tokenized_arc_agi_dataset_for_ttt,
+    process_task,
+    process_task_for_ttt,
+)

fusion_bench/dataset/arc_agi/arc.py CHANGED Viewed

@@ -7,6 +7,7 @@ Task: a class to represent a task (task.test_example and task.train_examples are
 read_from_single_file: a function to read challenge problems and solutions from a single file
 make_submission: a function to create a submission file
 """
 import dataclasses
 import glob
 import json
@@ -15,7 +16,6 @@ from typing import List, Optional
 import numpy as np
 Grid = np.ndarray
@@ -66,7 +66,9 @@ class Example:
     def __eq__(self, other: object) -> bool:
         if not isinstance(other, Example):
             return NotImplemented
-        return np.array_equal(self.input, other.input) and np.array_equal(self.output, other.output)
+        return np.array_equal(self.input, other.input) and np.array_equal(
+            self.output, other.output
+        )
     @classmethod
     def deserialize(cls, data: dict, test: bool = False) -> "Example":
@@ -150,7 +152,16 @@ class Task:
         tasks = []
         for test_data in data["test"]:
             task = cls.deserialize(
-                {"train": data["train"], "test": [test_data], "name": data.get("name", "")},
+                {
+                    "train": data["train"],
+                    "test": [test_data],
+                    "name": data.get("name", ""),
+                },
+                {
+                    "train": data["train"],
+                    "test": [test_data],
+                    "name": data.get("name", ""),
+                },
                 test=test,
             )
             tasks.append(task)
@@ -245,7 +256,9 @@ def make_submission(
     """
     Make a submission
     """
-    assert len(tasks) == len(predictions), "Number of tasks and predictions should be the same"
+    assert len(tasks) == len(
+        predictions
+    ), "Number of tasks and predictions should be the same"
     # sort by task_name alphabetically to ensure order of subtasks
     indices = np.argsort([task.name for task in tasks])
@@ -259,8 +272,12 @@ def make_submission(
         if task_name not in submissions:
             submissions[task_name] = []
-        assert len(prediction) == number_of_attempts, "Number of attempts should be the same"
-        attempts = {f"attempt_{j+1}": to_list(pred) for j, pred in enumerate(prediction)}
+        assert (
+            len(prediction) == number_of_attempts
+        ), "Number of attempts should be the same"
+        attempts = {
+            f"attempt_{j+1}": to_list(pred) for j, pred in enumerate(prediction)
+        }
         while len(submissions[task_name]) <= task_no:
             submissions[task_name].append({"attempt_1": [[0]], "attempt_2": [[0]]})
@@ -277,7 +294,9 @@ if __name__ == "__main__":
     arc_path = "/kaggle/input/arc-prize-2024/"
     tasks = read_tasks_from_single_file(arc_path + "arc-agi_training_challenges.json")
     print(tasks[0])
-    tasks = read_tasks_from_single_file(arc_path + "arc-agi_evaluation_challenges.json", test=True)
+    tasks = read_tasks_from_single_file(
+        arc_path + "arc-agi_evaluation_challenges.json", test=True
+    )
     print(tasks[0])
     tasks = read_tasks_from_single_file(

fusion_bench/dataset/arc_agi/arc_agi.py CHANGED Viewed

@@ -5,15 +5,16 @@ import sys
 from multiprocessing import Pool
 from typing import Any, Dict, List, Literal, Optional
-import fusion_bench
 import numpy as np
 from datasets import Dataset, DatasetDict, load_dataset, load_from_disk
 from lightning.fabric.utilities import rank_zero_only
 from tqdm.auto import tqdm
 from typing_extensions import TYPE_CHECKING
+import fusion_bench
 from .arc import Example, Task
-from .preprocess import get_augmenters, process_task
+from .preprocess import get_augmenters, process_task, process_task_for_ttt
 if TYPE_CHECKING:
     from transformers import PreTrainedTokenizer
@@ -65,7 +66,7 @@ def _join_list(lists: List[List[Any]]) -> List[Any]:
     return ans
-def _to_task(
+def _to_tasks(
     train_data: List[Dict[str, Any]],
     test_data: List[Dict[str, Any]],
     name: str,
@@ -87,7 +88,7 @@ def _to_task(
     return tasks
-def _tokenizer_tasks(
+def tokenizer_tasks_for_ttt(
     tasks: List[Task],
     tokenizer: "PreTrainedTokenizer",
     use_data_augmentation: bool = True,
@@ -106,7 +107,7 @@ def _tokenizer_tasks(
     formatter = _get_formatter("new")
     processor = functools.partial(
-        process_task,
+        process_task_for_ttt,
         augmenters=augmenters_to_apply,
         formatter=formatter,
         tokenizer=tokenizer,
@@ -133,7 +134,34 @@ def _tokenizer_tasks(
     return dataset
-def load_tokenized_arc_agi_dataset(
+def tokenizer_tasks(
+    tasks: List[Task],
+    tokenizer: "PreTrainedTokenizer",
+):
+    formatter = _get_formatter("new")
+    processor = functools.partial(
+        process_task, formatter=formatter, tokenizer=tokenizer
+    )
+    # with Pool(multiprocessing.cpu_count()) as p:
+    #     data = p.map(processor, tasks)
+    data = _join_list(
+        [
+            processor(task)
+            for task in tqdm(
+                tasks,
+                desc="Processing tasks",
+                dynamic_ncols=True,
+                leave=False,
+                disable=not rank_zero_only.rank == 0,
+            )
+        ]
+    )
+    dataset = Dataset.from_list(data)
+    return dataset
+def load_tokenized_arc_agi_dataset_for_ttt(
     tokenizer: Optional["PreTrainedTokenizer"],
     path: str = "dataartist/arc-agi",
     split: Optional[str] = None,
@@ -144,47 +172,47 @@ def load_tokenized_arc_agi_dataset(
     max_num_tasks: Optional[int] = None,
 ):
     # regularize split
-    if split.lower() == "train":
-        split = "training"
-    if split.lower() == "test":
-        split = "evaluation"
+    split = split.lower() if split is not None else split
     # load cached dataset if available
     if cache_path is not None and fusion_bench.utils.path.path_is_dir_and_not_empty(
         cache_path
     ):
         datasets = load_from_disk(cache_path)
-        if split is None:
-            return datasets
-        else:
+        if split is None and split in datasets.column_names:
             return datasets[split]
+        else:
+            return datasets
     else:
         assert (
             tokenizer is not None
         ), "Cached dataset not found. Need tokenizer to process the raw data."
     # load raw dataset
-    datasets = load_dataset(path, split=split)
+    datasets = load_dataset(path)
+    datasets = DatasetDict(
+        {"train": datasets["training"], "test": datasets["evaluation"]}
+    )
     if split is None:
-        converted_datasets = {
+        converted_datasets: Dict[str, List[Task]] = {
             "train": _join_list(
                 [
-                    _to_task(
+                    _to_tasks(
                         task["train"],
                         task["test"],
                         task["id"],
                     )
-                    for task in datasets["training"]
+                    for task in datasets["train"]
                 ]
             ),
             "test": _join_list(
                 [
-                    _to_task(
+                    _to_tasks(
                         task["train"],
                         task["test"],
                         task["id"],
                     )
-                    for task in datasets["evaluation"]
+                    for task in datasets["test"]
                 ]
             ),
         }
@@ -195,7 +223,7 @@ def load_tokenized_arc_agi_dataset(
                 for split in converted_datasets
             }
         converted_datasets = {
-            split: _tokenizer_tasks(
+            split: tokenizer_tasks_for_ttt(
                 converted_datasets[split],
                 tokenizer,
                 use_data_augmentation,
@@ -210,25 +238,128 @@ def load_tokenized_arc_agi_dataset(
             )
         }
         converted_datasets = DatasetDict(converted_datasets)
-    else:
+    else:  # split is not None
         converted_datasets = _join_list(
             [
-                _to_task(
+                _to_tasks(
                     task["train"],
                     task["test"],
                     task["id"],
                 )
-                for task in datasets
+                for task in datasets[split]
             ]
         )
         if max_num_tasks is not None:
             # limit the number of tasks, useful for debugging
             converted_datasets = converted_datasets[:max_num_tasks]
-        converted_datasets = _tokenizer_tasks(
+        converted_datasets = tokenizer_tasks_for_ttt(
             converted_datasets, tokenizer, use_data_augmentation, permute_n, seed
         )
-    if cache_path is not None:
+    if cache_path is not None and rank_zero_only.rank == 0:
+        os.makedirs(cache_path, exist_ok=True)
+        converted_datasets.save_to_disk(cache_path)
+    return converted_datasets
+def load_tokenized_arc_agi_dataset(
+    tokenizer: Optional["PreTrainedTokenizer"],
+    path: str = "dataartist/arc-agi",
+    split: Optional[str] = None,
+    cache_path: Optional[str] = None,
+    max_num_tasks: Optional[int] = None,
+):
+    """
+    Loads and tokenizes the ARC-AGI dataset.
+    Args:
+        tokenizer (Optional[PreTrainedTokenizer]): The tokenizer to use for tokenizing the dataset.
+        path (str, optional): The path to the dataset. Defaults to "dataartist/arc-agi".
+        split (Optional[str], optional): The dataset split to load (e.g., "train", "test"). Defaults to None.
+        cache_path (Optional[str], optional): The path to cache the processed dataset. Defaults to None.
+        max_num_tasks (Optional[int], optional): The maximum number of tasks to load. Useful for debugging. Defaults to None.
+    Returns:
+        DatasetDict or Dataset: The tokenized dataset, either as a DatasetDict if split is None, or as a Dataset if a specific split is specified.
+    """
+    # regularize split
+    split = split.lower() if split is not None else split
+    # load cached dataset if available
+    if cache_path is not None and fusion_bench.utils.path.path_is_dir_and_not_empty(
+        cache_path
+    ):
+        datasets = load_from_disk(cache_path)
+        if split is None and split in datasets.column_names:
+            return datasets[split]
+        else:
+            return datasets
+    else:
+        assert (
+            tokenizer is not None
+        ), "Cached dataset not found. Need tokenizer to process the raw data."
+    # load raw dataset
+    datasets = load_dataset(path)
+    datasets = DatasetDict(
+        {"train": datasets["training"], "test": datasets["evaluation"]}
+    )
+    if split is None:
+        converted_datasets: Dict[str, List[Task]] = {
+            "train": _join_list(
+                [
+                    _to_tasks(
+                        task["train"],
+                        task["test"],
+                        task["id"],
+                    )
+                    for task in datasets["train"]
+                ]
+            ),
+            "test": _join_list(
+                [
+                    _to_tasks(
+                        task["train"],
+                        task["test"],
+                        task["id"],
+                    )
+                    for task in datasets["test"]
+                ]
+            ),
+        }
+        if max_num_tasks is not None:
+            # limit the number of tasks, useful for debugging
+            converted_datasets = {
+                split: converted_datasets[split][:max_num_tasks]
+                for split in converted_datasets
+            }
+        converted_datasets = {
+            split: tokenizer_tasks(converted_datasets[split], tokenizer)
+            for split in tqdm(
+                converted_datasets,
+                desc="Processing splits",
+                dynamic_ncols=True,
+                disable=not rank_zero_only.rank == 0,
+            )
+        }
+        converted_datasets = DatasetDict(converted_datasets)
+    else:  # split is not None
+        converted_datasets = _join_list(
+            [
+                _to_tasks(
+                    task["train"],
+                    task["test"],
+                    task["id"],
+                )
+                for task in datasets[split]
+            ]
+        )
+        if max_num_tasks is not None:
+            # limit the number of tasks, useful for debugging
+            converted_datasets = converted_datasets[:max_num_tasks]
+        converted_datasets = tokenizer_tasks(converted_datasets, tokenizer)
+    if cache_path is not None and rank_zero_only.rank == 0:
         os.makedirs(cache_path, exist_ok=True)
         converted_datasets.save_to_disk(cache_path)
     return converted_datasets

fusion_bench/dataset/arc_agi/np_cache.py CHANGED Viewed

@@ -6,7 +6,6 @@ from typing import Callable, Optional, TypeVar, cast
 import numpy as np
 from xxhash import xxh3_64_hexdigest
 __all__ = ["np_lru_cache"]
 TCallable = TypeVar("TCallable", bound=Callable)

fusion_bench/dataset/arc_agi/preprocess.py CHANGED Viewed

@@ -112,19 +112,41 @@ def get_augmenters(
 def format_and_filter(
     formatter: MessageRepresenter,
     tokenizer: "PreTrainedTokenizer",
-    task,
+    task: Task,
 ):
+    """
+    Formats and filters a task for model input.
+    Args:
+        formatter (MessageRepresenter): The formatter to encode the task.
+        tokenizer (PreTrainedTokenizer): The tokenizer to tokenize the conversation.
+        task: The task to be formatted and filtered.
+    Returns:
+        Dict[str, Any]: A dictionary containing the formatted data with keys:
+            - "input_ids": The tokenized input IDs.
+            - "attention_mask": The attention mask for the input IDs.
+            - "labels": The labels for the input IDs.
+            - "task_id": The task ID.
+            - "num_prompt_tokens": The number of prompt tokens.
+            - "num_output_tokens": The number of output tokens.
+    """
+    task_id = task.name
     task = formatter.encode(task)
     conversation = task[0] + [task[1]]
     assert conversation[-1]["role"] == "assistant", "Last message should be assistant"
     prompt_tokens = tokenizer.apply_chat_template(
         conversation[:-1], tokenize=True, add_generation_prompt=True
     )
-    output_tokens = tokenizer.encode(conversation[-1]["content"] + tokenizer.eos_token)
+    generation_tokens = tokenizer.apply_chat_template(conversation, tokenize=True)
+    output_tokens = generation_tokens[len(prompt_tokens) :]
     data = {
         "input_ids": prompt_tokens + output_tokens,
         "attention_mask": [1] * len(prompt_tokens) + [1] * len(output_tokens),
-        "labels": [-100] * len(prompt_tokens) + output_tokens,
+        "labels": prompt_tokens + output_tokens,
+        "task_id": task_id,
+        "num_prompt_tokens": len(prompt_tokens),
+        "num_output_tokens": len(output_tokens),
     }
     return data
@@ -136,6 +158,19 @@ def get_test_time_train_data(
     permute_n: int = 1,
     seed: int = 0,
 ) -> List[Task]:
+    """
+    Generates augmented training data for test-time training.
+    Args:
+        original_task (Task): The original task containing training examples.
+        augmenters (List[Augmenter]): A list of augmenters to apply to the tasks.
+        n (int, optional): The number of examples to leave out for testing. Defaults to 1.
+        permute_n (int, optional): The number of times to permute the augmented tasks. Defaults to 1.
+        seed (int, optional): The random seed for reproducibility. Defaults to 0.
+    Returns:
+        List[Task]: A list of augmented tasks.
+    """
     rng = np.random.RandomState(seed)
     train_examples = original_task.train_examples.copy()
     initial_tasks = []
@@ -150,7 +185,7 @@ def get_test_time_train_data(
         for comb in combs:
             initial_tasks.append(
                 Task(
-                    name="",
+                    name=original_task.name,
                     train_examples=[examples[j] for j in comb],
                     test_example=examples[i],
                 )
@@ -183,7 +218,6 @@ def get_test_time_train_data(
             color_and_permute_augmented_tasks.append(new_task)
     augmented_tasks = color_and_permute_augmented_tasks + augmented_tasks
     augmented_tasks = list(set(augmented_tasks))
     return augmented_tasks
@@ -193,13 +227,12 @@ def get_formatted_data(
     task: Task,
     augmenters: List[Augmenter],
     formatter: MessageRepresenter,
-    tokenizer,
+    tokenizer: "PreTrainedTokenizer",
     leave_n: int = 1,
     permute_n: int = 1,
     seed: int = 0,
     max_tokens: int = 8192,
 ):
     train_data = get_test_time_train_data(
         task, augmenters, n=leave_n, permute_n=permute_n, seed=seed
     )
@@ -213,11 +246,11 @@ def get_formatted_data(
     return formatted_data
-def process_task(
+def process_task_for_ttt(
     task: Task,
     augmenters: List[Augmenter],
     formatter: MessageRepresenter,
-    tokenizer,
+    tokenizer: "PreTrainedTokenizer",
     permute_n: int = 1,
     Nmax: int = 250,
     seed: int = 0,
@@ -254,3 +287,12 @@ def process_task(
         train = train[:Nmax]
     return train
+def process_task(
+    task: Task,
+    formatter: MessageRepresenter,
+    tokenizer: "PreTrainedTokenizer",
+):
+    formatted = format_and_filter(formatter, tokenizer, task)
+    return [formatted]

fusion_bench/dataset/llama/__init__.py CHANGED Viewed

	@@ -0,0 +1 @@
1	+ from . import collate

fusion-bench 0.2.5__py3-none-any.whl → 0.2.7__py3-none-any.whl

fusion-bench 0.2.5py3-none-any.whl → 0.2.7py3-none-any.whl