PyPI - fusion-bench - Versions diffs - 0.2.19__py3-none-any.whl → 0.2.21__py3-none-any.whl - Mend

fusion-bench 0.2.19py3-none-any.whl → 0.2.21py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (193) hide show

fusion_bench/models/modeling_smile_llama/register.py ADDED Viewed

@@ -0,0 +1,8 @@
+from transformers import AutoConfig, AutoModel, AutoModelForCausalLM
+from .configuration_smile_llama import SmileLlamaConfig
+from .modeling_smile_llama import SmileLlamaForCausalLM, SmileLlamaModel
+AutoConfig.register("smile_llama", SmileLlamaConfig)
+AutoModel.register(SmileLlamaConfig, SmileLlamaModel)
+AutoModelForCausalLM.register(SmileLlamaConfig, SmileLlamaForCausalLM)

fusion_bench/models/modeling_smile_mistral/__init__.py CHANGED Viewed

@@ -1,48 +1,6 @@
-# flake8: noqa F401
-from typing import TYPE_CHECKING
-from transformers.utils.import_utils import (
-    OptionalDependencyNotAvailable,
-    _LazyModule,
-    is_flax_available,
-    is_tf_available,
-    is_torch_available,
+from .configuration_smile_mistral import SmileMistralConfig
+from .modeling_smile_mistral import (
+    SmileMistralForCausalLM,
+    SmileMistralModel,
 )
-_import_structure = {
-    "configuration_smile_mistral": ["SmileMistralConfig"],
-}
-try:
-    if not is_torch_available():
-        raise OptionalDependencyNotAvailable()
-except OptionalDependencyNotAvailable:
-    pass
-else:
-    _import_structure["modeling_smile_mistral"] = [
-        "SmileMistralForCausalLM",
-        "SmileMistralModel",
-        "SmileMistralPreTrainedModel",
-    ]
-if TYPE_CHECKING:
-    from .configuration_smile_mistral import SmileMistralConfig
-    try:
-        if not is_torch_available():
-            raise OptionalDependencyNotAvailable()
-    except OptionalDependencyNotAvailable:
-        pass
-    else:
-        from .modeling_smile_mistral import (
-            SmileMistralForCausalLM,
-            SmileMistralModel,
-            SmileMistralPreTrainedModel,
-        )
-else:
-    import sys
-    sys.modules[__name__] = _LazyModule(
-        __name__, globals()["__file__"], _import_structure, module_spec=__spec__
-    )
+from . import register

fusion_bench/models/modeling_smile_qwen2/__init__.py CHANGED Viewed

@@ -5,4 +5,4 @@ from .modeling_smile_qwen2 import (
     SmileQwen2ForQuestionAnswering,
     SmileQwen2ForSequenceClassification,
     SmileQwen2Model,
-)
+)

fusion_bench/models/modeling_smile_qwen2/modeling_smile_qwen2.py CHANGED Viewed

@@ -24,7 +24,6 @@ from transformers.modeling_outputs import (
 from transformers.modeling_rope_utils import ROPE_INIT_FUNCTIONS, dynamic_rope_update
 from transformers.modeling_utils import ALL_ATTENTION_FUNCTIONS, PreTrainedModel
 from transformers.models.qwen2.modeling_qwen2 import (
-    QWEN2_INPUTS_DOCSTRING,
     Qwen2RMSNorm,
     Qwen2RotaryEmbedding,
     apply_rotary_pos_emb,
@@ -314,7 +313,6 @@ class SmileQwen2Model(SmileQwen2PreTrainedModel):
         self.embed_tokens = value
     @can_return_tuple
-    @add_start_docstrings_to_model_forward(QWEN2_INPUTS_DOCSTRING)
     def forward(
         self,
         input_ids: Optional[torch.LongTensor] = None,
@@ -646,7 +644,6 @@ class SmileQwen2ForCausalLM(SmileQwen2PreTrainedModel, GenerationMixin):
     @can_return_tuple
     @deprecate_kwarg("num_logits_to_keep", version="4.50", new_name="logits_to_keep")
-    @add_start_docstrings_to_model_forward(QWEN2_INPUTS_DOCSTRING)
     @replace_return_docstrings(
         output_type=CausalLMOutputWithPast, config_class=_CONFIG_FOR_DOC
     )
@@ -752,7 +749,9 @@ class SmileQwen2ForSequenceClassification(SmileQwen2PreTrainedModel):
     def __init__(self, config):
         super().__init__(config)
         self.num_labels = config.num_labels
-        self.model = SmileQwen2Model(config)  #* replace Qwen2Model with SmileQwen2Model
+        self.model = SmileQwen2Model(
+            config
+        )  # * replace Qwen2Model with SmileQwen2Model
         self.score = nn.Linear(config.hidden_size, self.num_labels, bias=False)
         # Initialize weights and apply final processing
@@ -765,7 +764,6 @@ class SmileQwen2ForSequenceClassification(SmileQwen2PreTrainedModel):
         self.model.embed_tokens = value
     @can_return_tuple
-    @add_start_docstrings_to_model_forward(QWEN2_INPUTS_DOCSTRING)
     def forward(
         self,
         input_ids: Optional[torch.LongTensor] = None,
@@ -852,7 +850,9 @@ class SmileQwen2ForQuestionAnswering(SmileQwen2PreTrainedModel):
     def __init__(self, config):
         super().__init__(config)
-        self.transformer = SmileQwen2Model(config)  #* replace Qwen2Model with SmileQwen2Model
+        self.transformer = SmileQwen2Model(
+            config
+        )  # * replace Qwen2Model with SmileQwen2Model
         self.qa_outputs = nn.Linear(config.hidden_size, 2)
         # Initialize weights and apply final processing
@@ -865,7 +865,6 @@ class SmileQwen2ForQuestionAnswering(SmileQwen2PreTrainedModel):
         self.transformer.embed_tokens = value
     @can_return_tuple
-    @add_start_docstrings_to_model_forward(QWEN2_INPUTS_DOCSTRING)
     def forward(
         self,
         input_ids: Optional[torch.LongTensor] = None,

fusion_bench/models/modeling_smile_qwen2/register.py CHANGED Viewed

@@ -1,10 +1,7 @@
 from transformers import AutoConfig, AutoModel, AutoModelForCausalLM
 from .configuration_smile_qwen2 import SmileQwen2Config
-from .modeling_smile_qwen2 import (
-    SmileQwen2ForCausalLM,
-    SmileQwen2Model,
-)
+from .modeling_smile_qwen2 import SmileQwen2ForCausalLM, SmileQwen2Model
 AutoConfig.register("smile_qwen2", SmileQwen2Config)
 AutoModel.register(SmileQwen2Config, SmileQwen2Model)

fusion_bench/models/parameter_dict.py CHANGED Viewed

@@ -74,7 +74,7 @@ class ParameterDictModel(nn.Module):
                     name.split("."),
                     param,
                     check_parent=True,
-                    parent_builder=self.__class__,
+                    parent_builder=__class__,
                 )
     def __repr__(self):

fusion_bench/models/sparse_we_moe.py CHANGED Viewed

@@ -11,6 +11,7 @@ from torch.func import functional_call
 from torch.nn import functional as F
 from tqdm.auto import tqdm
+from fusion_bench.models.utils import del_attr, get_attr, set_attr
 from fusion_bench.utils.state_dict_arithmetic import (
     state_dict_sub,
     state_dict_weighted_sum,
@@ -20,59 +21,6 @@ from fusion_bench.utils.type import StateDictType
 log = logging.getLogger(__name__)
-def join_list(list_of_list: List[List]):
-    ans = []
-    for l in list_of_list:
-        ans.extend(l)
-    return ans
-def del_attr(obj, names: List[str]):
-    """
-    Deletes an attribute from an object recursively.
-    Args:
-        obj (object): Object to delete attribute from.
-        names (list): List of attribute names to delete recursively.
-    """
-    if len(names) == 1:
-        delattr(obj, names[0])
-    else:
-        del_attr(getattr(obj, names[0]), names[1:])
-def set_attr(obj, names: List[str], val):
-    """
-    Sets an attribute of an object recursively.
-    Args:
-        obj (object): Object to set attribute of.
-        names (list): List of attribute names to set recursively.
-        val (object): Value to set the attribute to.
-    """
-    if len(names) == 1:
-        setattr(obj, names[0], val)
-    else:
-        set_attr(getattr(obj, names[0]), names[1:], val)
-def get_attr(obj, names: List[str]):
-    """
-    Gets an attribute of an object recursively.
-    Args:
-        obj (object): Object to get attribute of.
-        names (list): List of attribute names to get recursively.
-    Returns:
-        object: The attribute of the object.
-    """
-    if len(names) == 1:
-        return getattr(obj, names[0])
-    else:
-        return get_attr(getattr(obj, names[0]), names[1:])
 class Depth_0_Gate(nn.Module):
     def __init__(self, num_experts: int):
         super().__init__()

fusion_bench/models/utils.py CHANGED Viewed

@@ -3,6 +3,8 @@ from typing import List
 import torch
 from torch import nn
+from fusion_bench.utils.type import StateDictType
 def del_attr(obj, names: List[str]):
     """
@@ -50,6 +52,30 @@ def get_attr(obj, names: List[str]):
         return get_attr(getattr(obj, names[0]), names[1:])
+def check_parameterNamesMatch(checkpoints: List[StateDictType]) -> None:
+    """
+    Checks that the parameter names of the given checkpoints match.
+    Args:
+        checkpoints (List[Dict[str, float]]): A list of checkpoints, where each checkpoint is a dictionary of parameter names and their corresponding values.
+    Raises:
+        ValueError: If the number of checkpoints is less than 2 or if the parameter names of any two checkpoints differ.
+    """
+    parameter_names = set(checkpoints[0].keys())
+    if len(checkpoints) >= 2:
+        # raise ValueError("Number of models is less than 2.")
+        for checkpoint in checkpoints[1:]:
+            current_parameterNames = set(checkpoint.keys())
+            if current_parameterNames != parameter_names:
+                raise ValueError(
+                    "Differing parameter names in models. "
+                    f"The different parameters are {parameter_names.symmetric_difference(current_parameterNames)}"
+                )
 def find_layers_with_type(
     module: nn.Module,
     layer_types=[nn.Linear],

fusion_bench/models/we_moe.py CHANGED Viewed

@@ -8,64 +8,12 @@ from torch import Tensor, nn
 from torch.func import functional_call
 from torch.nn import functional as F
+from fusion_bench.models.utils import del_attr, get_attr, set_attr
 from fusion_bench.utils.type import StateDictType
 log = logging.getLogger(__name__)
-def join_list(list_of_list: List[List]):
-    ans = []
-    for l in list_of_list:
-        ans.extend(l)
-    return ans
-def del_attr(obj, names: List[str]):
-    """
-    Deletes an attribute from an object recursively.
-    Args:
-        obj (object): Object to delete attribute from.
-        names (list): List of attribute names to delete recursively.
-    """
-    if len(names) == 1:
-        delattr(obj, names[0])
-    else:
-        del_attr(getattr(obj, names[0]), names[1:])
-def set_attr(obj, names: List[str], val):
-    """
-    Sets an attribute of an object recursively.
-    Args:
-        obj (object): Object to set attribute of.
-        names (list): List of attribute names to set recursively.
-        val (object): Value to set the attribute to.
-    """
-    if len(names) == 1:
-        setattr(obj, names[0], val)
-    else:
-        set_attr(getattr(obj, names[0]), names[1:], val)
-def get_attr(obj, names: List[str]):
-    """
-    Gets an attribute of an object recursively.
-    Args:
-        obj (object): Object to get attribute of.
-        names (list): List of attribute names to get recursively.
-    Returns:
-        object: The attribute of the object.
-    """
-    if len(names) == 1:
-        return getattr(obj, names[0])
-    else:
-        return get_attr(getattr(obj, names[0]), names[1:])
 class Depth_0_Gate(nn.Module):
     def __init__(self, num_experts: int):
         super().__init__()

fusion_bench/models/wrappers/ensemble.py CHANGED Viewed

@@ -1,4 +1,4 @@
-from typing import Any, Callable, Dict, List, cast
+from typing import Any, Callable, Dict, List, Union, cast
 import numpy as np
 import torch
@@ -6,7 +6,9 @@ from omegaconf import ListConfig
 from torch import Tensor, nn
-def aggregate_tensors(outputs: List[Any], aggregate_fn: Callable) -> Tensor:
+def aggregate_tensors(
+    outputs: List[Any], aggregate_fn: Callable
+) -> Union[Tensor, Dict, List, None]:
     """
     Aggregates a list of outputs using the provided aggregation function.
@@ -84,7 +86,7 @@ class EnsembleModule(nn.Module):
         """
         return torch.stack(outputs).mean(dim=0)
-    def forward(self, *args, **kwargs):
+    def forward(self, *args: Any, **kwargs: Any) -> Any:
         """
         Performs a forward pass by averaging the outputs of the models.
@@ -150,7 +152,7 @@ class WeightedEnsembleModule(nn.Module):
         weights = cast(Tensor, self.weights).view(-1, *([1] * outputs[0].dim()))
         return (torch.stack(outputs) * weights).sum(dim=0)
-    def forward(self, *args, **kwargs):
+    def forward(self, *args: Any, **kwargs: Any) -> Any:
         """
         Performs a forward pass by computing the weighted average of the models' outputs.

fusion_bench/models/wrappers/layer_wise_fusion.py CHANGED Viewed

@@ -49,7 +49,7 @@ def get_layer_wise_weights(
     return torch.full((num_models, num_layers), init_values, dtype=dtype)
-def _fuse_weights(layer_wise_weight: Tensor, tensors: List[Tensor]):
+def _fuse_weights(layer_wise_weight: Tensor, tensors: List[Tensor]) -> Tensor:
     """
     Fuse the layer-wise weights with the given state dictionaries.

fusion-bench 0.2.19__py3-none-any.whl → 0.2.21__py3-none-any.whl

fusion-bench 0.2.19py3-none-any.whl → 0.2.21py3-none-any.whl