PyPI - fusion-bench - Versions diffs - 0.2.21__py3-none-any.whl → 0.2.22__py3-none-any.whl - Mend

fusion-bench 0.2.21py3-none-any.whl → 0.2.22py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (53) hide show

fusion_bench/mixins/serialization.py CHANGED Viewed

@@ -90,14 +90,21 @@ def auto_register_config(cls):
     # Auto-register parameters in _config_mapping
     if not "_config_mapping" in cls.__dict__:
         cls._config_mapping = deepcopy(getattr(cls, "_config_mapping", {}))
+    registered_parameters = tuple(cls._config_mapping.values())
     for param_name in list(sig.parameters.keys())[1:]:  # Skip 'self'
-        if sig.parameters[param_name].kind not in [
-            _ParameterKind.VAR_POSITIONAL,
-            _ParameterKind.VAR_KEYWORD,
-        ]:
+        if (
+            sig.parameters[param_name].kind
+            not in [
+                _ParameterKind.VAR_POSITIONAL,
+                _ParameterKind.VAR_KEYWORD,
+            ]
+        ) and (param_name not in registered_parameters):
             cls._config_mapping[param_name] = param_name
     def __init__(self, *args, **kwargs):
+        nonlocal original_init, registered_parameters
         # auto-register the attributes based on the signature
         sig = inspect.signature(original_init)
         param_names = list(sig.parameters.keys())[1:]  # Skip 'self'
@@ -114,10 +121,13 @@ def auto_register_config(cls):
         # Handle keyword arguments and defaults
         for param_name in param_names:
-            if sig.parameters[param_name].kind not in [
-                _ParameterKind.VAR_POSITIONAL,
-                _ParameterKind.VAR_KEYWORD,
-            ]:
+            if (
+                sig.parameters[param_name].kind
+                not in [
+                    _ParameterKind.VAR_POSITIONAL,
+                    _ParameterKind.VAR_KEYWORD,
+                ]
+            ) and (param_name not in registered_parameters):
                 # Skip if already set by positional argument
                 param_index = param_names.index(param_name)
                 if param_index >= 0 and param_index < len(args):

fusion_bench/modelpool/causal_lm/causal_lm.py CHANGED Viewed

@@ -8,7 +8,7 @@ from copy import deepcopy
 from typing import Any, Dict, Optional, TypeAlias, Union, cast  # noqa: F401
 import peft
-from omegaconf import DictConfig, flag_override
+from omegaconf import DictConfig, OmegaConf, flag_override
 from torch import nn
 from torch.nn.modules import Module
 from transformers import (
@@ -19,43 +19,32 @@ from transformers import (
 )
 from typing_extensions import override
-from fusion_bench.modelpool import BaseModelPool
-from fusion_bench.utils import instantiate
-from fusion_bench.utils.dtype import parse_dtype
+from fusion_bench import (
+    BaseModelPool,
+    auto_register_config,
+    import_object,
+    instantiate,
+    parse_dtype,
+)
 from fusion_bench.utils.lazy_state_dict import LazyStateDict
-from fusion_bench.utils.packages import import_object
 log = logging.getLogger(__name__)
+@auto_register_config
 class CausalLMPool(BaseModelPool):
-    _config_mapping = BaseModelPool._config_mapping | {
-        "_tokenizer": "tokenizer",
-        "_model_kwargs": "model_kwargs",
-        "load_lazy": "load_lazy",
-    }
     def __init__(
         self,
         models,
         *,
-        tokenizer: Optional[DictConfig],
+        tokenizer: Optional[DictConfig | str],
         model_kwargs: Optional[DictConfig] = None,
-        load_lazy: bool = False,
+        enable_lazy_loading: bool = False,
         **kwargs,
     ):
         super().__init__(models, **kwargs)
-        # process `model_kwargs`
-        self._tokenizer = tokenizer
-        self._model_kwargs = model_kwargs
-        if self._model_kwargs is None:
-            self._model_kwargs = DictConfig({})
-        with flag_override(self._model_kwargs, "allow_objects", True):
-            if hasattr(self._model_kwargs, "torch_dtype"):
-                self._model_kwargs.torch_dtype = parse_dtype(
-                    self._model_kwargs.torch_dtype
-                )
-        self.load_lazy = load_lazy
+        if model_kwargs is None:
+            self.model_kwargs = DictConfig({})
     def get_model_path(self, model_name: str):
         model_name_or_config = self._models[model_name]
@@ -66,6 +55,16 @@ class CausalLMPool(BaseModelPool):
         else:
             raise RuntimeError("Invalid model configuration")
+    def get_model_kwargs(self):
+        model_kwargs = (
+            OmegaConf.to_container(self.model_kwargs, resolve=True)
+            if isinstance(self.model_kwargs, DictConfig)
+            else self.model_kwargs
+        )
+        if "torch_dtype" in model_kwargs:
+            model_kwargs["torch_dtype"] = parse_dtype(model_kwargs["torch_dtype"])
+        return model_kwargs
     @override
     def load_model(
         self,
@@ -98,7 +97,7 @@ class CausalLMPool(BaseModelPool):
             pretrained_model_name_or_path: path_to_model_b
         ```
         """
-        model_kwargs = deepcopy(self._model_kwargs)
+        model_kwargs = self.get_model_kwargs()
         model_kwargs.update(kwargs)
         if isinstance(model_name_or_config, str):
@@ -108,7 +107,7 @@ class CausalLMPool(BaseModelPool):
                 model_config = self._models[model_name_or_config]
                 if isinstance(model_config, str):
                     # model_config is a string
-                    if not self.load_lazy:
+                    if not self.enable_lazy_loading:
                         model = AutoModelForCausalLM.from_pretrained(
                             model_config,
                             *args,
@@ -126,7 +125,7 @@ class CausalLMPool(BaseModelPool):
         elif isinstance(model_name_or_config, (DictConfig, Dict)):
             model_config = model_name_or_config
-        if not self.load_lazy:
+        if not self.enable_lazy_loading:
             model = instantiate(model_config, *args, **model_kwargs)
         else:
             meta_module_class = model_config.pop("_target_")
@@ -158,12 +157,12 @@ class CausalLMPool(BaseModelPool):
         Returns:
             PreTrainedTokenizer: The tokenizer.
         """
-        assert self._tokenizer is not None, "Tokenizer is not defined in the config"
+        assert self.tokenizer is not None, "Tokenizer is not defined in the config"
         log.info("Loading tokenizer.", stacklevel=2)
-        if isinstance(self._tokenizer, str):
-            tokenizer = AutoTokenizer.from_pretrained(self._tokenizer, *args, **kwargs)
+        if isinstance(self.tokenizer, str):
+            tokenizer = AutoTokenizer.from_pretrained(self.tokenizer, *args, **kwargs)
         else:
-            tokenizer = instantiate(self._tokenizer, *args, **kwargs)
+            tokenizer = instantiate(self.tokenizer, *args, **kwargs)
         return tokenizer
     @override
@@ -213,12 +212,12 @@ class CausalLMBackbonePool(CausalLMPool):
     def load_model(
         self, model_name_or_config: str | DictConfig, *args, **kwargs
     ) -> Module:
-        if self.load_lazy:
+        if self.enable_lazy_loading:
             log.warning(
                 "CausalLMBackbonePool does not support lazy loading. "
                 "Falling back to normal loading."
             )
-            self.load_lazy = False
+            self.enable_lazy_loading = False
         model: AutoModelForCausalLM = super().load_model(
             model_name_or_config, *args, **kwargs
         )

fusion_bench/models/__init__.py CHANGED Viewed

@@ -2,4 +2,9 @@
 from fusion_bench.utils import LazyStateDict
 from . import separate_io, utils
+from .hf_utils import (
+    create_default_model_card,
+    load_model_card_template,
+    save_pretrained_with_remote_code,
+)
 from .parameter_dict import ParameterDictModel

fusion_bench/models/hf_utils.py CHANGED Viewed

@@ -5,23 +5,63 @@ This module contains utilities for working with Hugging Face models.
 import inspect
 import os
 import shutil
-from typing import Optional, cast
+from typing import List, Optional, cast
-from omegaconf import OmegaConf
+from omegaconf import DictConfig, OmegaConf
 from transformers.modeling_utils import PreTrainedModel
-from fusion_bench import BaseAlgorithm, BaseModelPool
-from fusion_bench.utils.pylogger import getRankZeroLogger
+from fusion_bench.utils.pylogger import get_rankzero_logger
-log = getRankZeroLogger(__name__)
+log = get_rankzero_logger(__name__)
 __all__ = [
+    "load_model_card_template",
     "save_pretrained_with_remote_code",
-    "generate_readme_head",
-    "generate_readme_body",
-    "generate_complete_readme",
+    "create_default_model_card",
 ]
+MODEL_CARD_TEMPLATE_DIRS = [
+    os.path.join(os.path.dirname(__file__), "model_card_templates")
+]
+def load_model_card_template(basename: str) -> str:
+    """
+    Load a model card template from file.
+    Searches for a template file by name, first checking if the name is a direct file path,
+    then searching through predefined template directories.
+    Args:
+        name (str): The name of the template file or a direct file path to the template.
+    Returns:
+        str: The contents of the template file as a string.
+    Raises:
+        FileNotFoundError: If the template file is not found in any of the search locations.
+    """
+    if os.path.exists(basename):
+        return open(basename).read()
+    for template_dir in MODEL_CARD_TEMPLATE_DIRS:
+        template_path = os.path.join(template_dir, basename)
+        if os.path.exists(template_path):
+            return open(template_path).read()
+    raise FileNotFoundError(f"Model card template '{basename}' not found.")
+def try_to_yaml(config):
+    if config is None:
+        return None
+    try:
+        return OmegaConf.to_yaml(config, resolve=True, sort_keys=True)
+    except Exception as e:
+        log.error(f"Failed to convert config to YAML: {e}. Return `None`.")
+        return None
 def save_pretrained_with_remote_code(
     model: PreTrainedModel,
@@ -99,84 +139,22 @@ def save_pretrained_with_remote_code(
             f.write(f"from .{base_name} import {auto_map[key].__name__}\n")
-def generate_readme_head(
-    models: list[str] | BaseModelPool,
-    library_name: str = "transformers",
-    tags: list[str] = ["fusion-bench", "merge"],
-):
-    text = "---\nbase_model:\n"
-    for model_name in models:
-        text += f"- {model_name}\n"
-    if library_name:
-        text += f"library_name: {library_name}\n"
-    text += "tags:\n"
-    for tag in tags:
-        text += f"- {tag}\n"
-    text += "---\n"
-    return text
-def generate_readme_body(
-    algorithm: BaseAlgorithm,
-    models_or_modelpool: Optional[list[str] | BaseModelPool] = None,
-    models: list[str] = None,
+def create_default_model_card(
+    models: list[str],
+    description=None,
+    algorithm_config: DictConfig = None,
+    modelpool_config: DictConfig = None,
 ):
-    text = """\
-# Merge
-This is a merge of pre-trained language models created using [fusion-bench](https://github.com/tanganke/fusion_bench).
-"""
-    if models is not None:
-        text += """
-## Models Merged
-The following models were included in the merge:
-"""
-        for model_name in models:
-            text += f"- {model_name}\n"
-        text += "\n"
-        try:
-            text += f"""\
-    ## Configuration
-    The following YAML configuration was used to produce this model:
-    ```yaml
-    {OmegaConf.to_yaml(algorithm.config, resolve=True, sort_keys=True)}
-    ```
-    """
-        except Exception as e:
-            return (
-                text  # If the algorithm config cannot be converted to YAML, we skip it.
-            )
-    if isinstance(models_or_modelpool, BaseModelPool):
-        try:
-            text += f"""
-```yaml
-{OmegaConf.to_yaml(models_or_modelpool.config, resolve=True, sort_keys=True)}
-```
-"""
-        except Exception as e:
-            pass  # If the model pool config cannot be converted to YAML, we skip it.
-    return text
-def generate_complete_readme(
-    algorithm: BaseAlgorithm, modelpool: BaseModelPool, models: list[str]
-):
-    # Generate the complete README content
-    text = generate_readme_head(
-        [modelpool.get_model_path(m) for m in modelpool.model_names]
-    )
-    readme_body = generate_readme_body(
-        algorithm,
-        models_or_modelpool=modelpool,
-        models=[modelpool.get_model_path(m) for m in modelpool.model_names],
+    from jinja2 import Template
+    template: Template = Template(load_model_card_template("default.md"))
+    card = template.render(
+        models=models,
+        library_name="transformers",
+        tags=["fusion-bench", "merge"],
+        title="Deep Model Fusion",
+        description=description,
+        algorithm_config_str=try_to_yaml(algorithm_config),
+        modelpool_config_str=try_to_yaml(modelpool_config),
     )
-    complete_readme = text + "\n" + readme_body
-    return complete_readme
+    return card

fusion_bench/models/model_card_templates/default.md ADDED Viewed

@@ -0,0 +1,46 @@
+---
+base_model:
+{%- for model in models %}
+- {{ model }}
+{%- endfor %}
+library_name: {{ library_name }}
+tags:
+{%- for tag in tags %}
+- {{ tag }}
+{%- endfor %}
+---
+# {{ title }}
+{% if description is not none %}{{ description }}{% endif %}
+## Models Merged
+This is a merged model created using [fusion-bench](https://github.com/tanganke/fusion_bench).
+The following models were included in the merge:
+{% for model in models %}
+- {{ model }}
+{%- endfor %}
+{% if algorithm_config_str is not none or modelpool_config_str is not none %}
+## Configuration
+The following YAML configuration was used to produce this model:
+{% if algorithm_config_str is not none -%}
+### Algorithm Configuration
+```yaml
+{{ algorithm_config_str -}}
+```
+{%- endif %}
+{% if modelpool_config_str is not none -%}
+### Model Pool Configuration
+```yaml
+{{ modelpool_config_str -}}
+```
+{%- endif %}
+{% endif %}

fusion_bench/models/modeling_smile_llama/__init__.py CHANGED Viewed

@@ -0,0 +1,7 @@
+from . import register
+from .configuration_smile_llama import SmileLlamaConfig
+from .modeling_smile_llama import (
+    SmileLlamaDecoderLayer,
+    SmileLlamaForCausalLM,
+    SmileLlamaModel,
+)

fusion_bench/models/modeling_smile_llama/modeling_smile_llama.py CHANGED Viewed

@@ -17,7 +17,6 @@ from transformers.modeling_outputs import (
 )
 from transformers.modeling_utils import ALL_ATTENTION_FUNCTIONS
 from transformers.models.llama.modeling_llama import (
-    LLAMA_INPUTS_DOCSTRING,
     LlamaRMSNorm,
     LlamaRotaryEmbedding,
     apply_rotary_pos_emb,
@@ -25,7 +24,6 @@ from transformers.models.llama.modeling_llama import (
 )
 from transformers.processing_utils import Unpack
 from transformers.utils import (
-    LossKwargs,
     add_start_docstrings_to_model_forward,
     can_return_tuple,
     is_torch_flex_attn_available,
@@ -296,7 +294,6 @@ class SmileLlamaModel(SmileLlamaPreTrainedModel):
         self.embed_tokens = value
     @can_return_tuple
-    @add_start_docstrings_to_model_forward(LLAMA_INPUTS_DOCSTRING)
     def forward(
         self,
         input_ids: Optional[torch.LongTensor] = None,
@@ -566,9 +563,6 @@ class SmileLlamaModel(SmileLlamaPreTrainedModel):
         return causal_mask
-class KwargsForCausalLM(FlashAttentionKwargs, LossKwargs): ...
 class SmileLlamaForCausalLM(SmileLlamaPreTrainedModel, GenerationMixin):
     _tied_weights_keys = ["lm_head.weight"]
     _tp_plan = {"lm_head": "colwise_rep"}
@@ -603,7 +597,6 @@ class SmileLlamaForCausalLM(SmileLlamaPreTrainedModel, GenerationMixin):
     @can_return_tuple
     @deprecate_kwarg("num_logits_to_keep", version="4.50", new_name="logits_to_keep")
-    @add_start_docstrings_to_model_forward(LLAMA_INPUTS_DOCSTRING)
     @replace_return_docstrings(
         output_type=CausalLMOutputWithPast, config_class=_CONFIG_FOR_DOC
     )
@@ -620,7 +613,7 @@ class SmileLlamaForCausalLM(SmileLlamaPreTrainedModel, GenerationMixin):
         output_hidden_states: Optional[bool] = None,
         cache_position: Optional[torch.LongTensor] = None,
         logits_to_keep: Union[int, torch.Tensor] = 0,
-        **kwargs: Unpack[KwargsForCausalLM],
+        **kwargs,
     ) -> CausalLMOutputWithPast:
         r"""
             labels (`torch.LongTensor` of shape `(batch_size, sequence_length)`, *optional*):

fusion_bench/models/modeling_smile_mistral/__init__.py CHANGED Viewed

@@ -1,6 +1,6 @@
+from . import register
 from .configuration_smile_mistral import SmileMistralConfig
 from .modeling_smile_mistral import (
     SmileMistralForCausalLM,
     SmileMistralModel,
 )
-from . import register

fusion_bench/models/modeling_smile_qwen2/modeling_smile_qwen2.py CHANGED Viewed

@@ -31,7 +31,6 @@ from transformers.models.qwen2.modeling_qwen2 import (
 )
 from transformers.processing_utils import Unpack
 from transformers.utils import (
-    LossKwargs,
     add_code_sample_docstrings,
     add_start_docstrings,
     add_start_docstrings_to_model_forward,
@@ -607,9 +606,6 @@ class SmileQwen2Model(SmileQwen2PreTrainedModel):
         return causal_mask
-class KwargsForCausalLM(FlashAttentionKwargs, LossKwargs): ...
 class SmileQwen2ForCausalLM(SmileQwen2PreTrainedModel, GenerationMixin):
     _tied_weights_keys = ["lm_head.weight"]
     _tp_plan = {"lm_head": "colwise_rep"}
@@ -660,7 +656,7 @@ class SmileQwen2ForCausalLM(SmileQwen2PreTrainedModel, GenerationMixin):
         output_hidden_states: Optional[bool] = None,
         cache_position: Optional[torch.LongTensor] = None,
         logits_to_keep: Union[int, torch.Tensor] = 0,
-        **kwargs: Unpack[KwargsForCausalLM],
+        **kwargs,
     ) -> CausalLMOutputWithPast:
         r"""
             labels (`torch.LongTensor` of shape `(batch_size, sequence_length)`, *optional*):

fusion_bench/programs/fabric_fusion_program.py CHANGED Viewed

@@ -1,6 +1,7 @@
 import json
 import logging
 import os
+from pathlib import Path
 from typing import Any, Callable, Dict, Iterable, List, Optional, Union  # noqa: F401
 import lightning as L
@@ -9,19 +10,24 @@ from omegaconf import DictConfig, OmegaConf
 from torch import nn
 from tqdm.auto import tqdm
-import fusion_bench.utils.instantiate_utils
-from fusion_bench.method import BaseAlgorithm
+import fusion_bench
+from fusion_bench import (
+    BaseAlgorithm,
+    BaseHydraProgram,
+    BaseModelPool,
+    BaseTaskPool,
+    RuntimeConstants,
+    import_object,
+    instantiate,
+    timeit_context,
+)
 from fusion_bench.mixins import LightningFabricMixin
-from fusion_bench.modelpool import BaseModelPool
-from fusion_bench.programs import BaseHydraProgram
-from fusion_bench.taskpool import BaseTaskPool
-from fusion_bench.utils import import_object, instantiate, timeit_context
 from fusion_bench.utils.hydra_utils import get_hydra_output_dir
 from fusion_bench.utils.json import print_json
-from fusion_bench.utils.pylogger import getRankZeroLogger
+from fusion_bench.utils.path import create_symlink
 from fusion_bench.utils.rich_utils import print_bordered, print_config_tree
-log = getRankZeroLogger(__name__)
+log = fusion_bench.get_rankzero_logger(__name__)
 class FabricModelFusionProgram(
@@ -60,6 +66,7 @@ class FabricModelFusionProgram(
         path: DictConfig = None,
         **kwargs,
     ):
+        super().__init__(**kwargs)
         self._method = method
         self._modelpool = modelpool
         self._taskpool = taskpool
@@ -70,8 +77,10 @@ class FabricModelFusionProgram(
         self.fast_dev_run = fast_dev_run
         self.seed = seed
         self.path = path
-        fusion_bench.utils.instantiate_utils.PRINT_FUNCTION_CALL = print_function_call
-        super().__init__(**kwargs)
+        RuntimeConstants.debug = fast_dev_run
+        RuntimeConstants.print_function_call = print_function_call
+        if path is not None:
+            RuntimeConstants.cache_dir = path.get("cache_dir", None)
         if print_config:
             print_config_tree(
@@ -224,8 +233,16 @@ class FabricModelFusionProgram(
         fabric = self.fabric
         if self.seed is not None:
             L.seed_everything(self.seed)
-        if fabric.global_rank == 0:
-            self._link_hydra_output()
+        # create symbol link to hydra output directory
+        if (
+            self.fabric.is_global_zero
+            and self.log_dir is not None
+            and os.path.abspath(self.log_dir) != os.path.abspath(get_hydra_output_dir())
+        ):
+            create_symlink(
+                get_hydra_output_dir(), self.log_dir, link_name="hydra_output"
+            )
         log.info("Running the model fusion program.")
         # setup the modelpool, method, and taskpool
@@ -278,51 +295,3 @@ class FabricModelFusionProgram(
                     json.dump(report, open(self.report_save_path, "w"))
             else:
                 log.info("No task pool specified. Skipping evaluation.")
-    @rank_zero_only
-    def _link_hydra_output(self):
-        """
-        Creates a symbolic link to the Hydra output directory within the specified log directory.
-        If `self.log_dir` is not None, this method will:
-        1. Retrieve the Hydra output directory using `get_hydra_output_dir()`.
-        2. Create the log directory if it does not already exist.
-        3. Create a symbolic link named "hydra_output_<basename_of_hydra_output_dir>"
-           within the log directory, pointing to the Hydra output directory.
-        Note:
-            - The symbolic link is created only if the Hydra output directory is not None.
-            - The `target_is_directory` parameter is set to True to indicate that the target is a directory.
-        Raises:
-            OSError: If the symbolic link creation fails.
-        """
-        if self.log_dir is not None:
-            # make symlink to the hydra output directory
-            try:
-                hydra_output_dir = get_hydra_output_dir()
-            except Exception as e:
-                hydra_output_dir = None
-            if hydra_output_dir is not None:
-                if os.path.abspath(hydra_output_dir) == os.path.abspath(self.log_dir):
-                    return
-                os.makedirs(self.log_dir, exist_ok=True)
-                try:
-                    # if the system is windows, use the `mklink` command in "CMD" to create the symlink
-                    if os.name == "nt":
-                        os.system(
-                            f"mklink /J {os.path.abspath(os.path.join(self.log_dir, 'hydra_output_' + os.path.basename(hydra_output_dir)))} {os.path.abspath(hydra_output_dir)}"
-                        )
-                    else:
-                        os.symlink(
-                            hydra_output_dir,
-                            os.path.join(
-                                self.log_dir,
-                                "hydra_output_" + os.path.basename(hydra_output_dir),
-                            ),
-                            target_is_directory=True,
-                        )
-                except OSError as e:
-                    log.warning(f"Failed to create symbolic link: {e}")

fusion-bench 0.2.21__py3-none-any.whl → 0.2.22__py3-none-any.whl

fusion-bench 0.2.21py3-none-any.whl → 0.2.22py3-none-any.whl