PyPI - fusion-bench - Versions diffs - 0.2.21__py3-none-any.whl → 0.2.23__py3-none-any.whl - Mend

fusion-bench 0.2.21py3-none-any.whl → 0.2.23py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (89) hide show

fusion_bench/__init__.py +25 -2
fusion_bench/compat/method/__init__.py +5 -2
fusion_bench/compat/method/base_algorithm.py +3 -2
fusion_bench/compat/modelpool/base_pool.py +3 -3
fusion_bench/compat/taskpool/clip_image_classification.py +1 -1
fusion_bench/constants/__init__.py +1 -0
fusion_bench/constants/runtime.py +57 -0
fusion_bench/dataset/gpt2_glue.py +1 -1
fusion_bench/method/__init__.py +12 -4
fusion_bench/method/analysis/task_vector_cos_similarity.py +95 -12
fusion_bench/method/analysis/task_vector_violin_plot.py +160 -52
fusion_bench/method/bitdelta/__init__.py +1 -0
fusion_bench/method/bitdelta/bitdelta.py +7 -23
fusion_bench/method/classification/clip_finetune.py +1 -1
fusion_bench/method/expert_sparsity/mixtral/dynamic_skipping.py +2 -0
fusion_bench/method/expert_sparsity/mixtral/layer_wise_pruning.py +2 -0
fusion_bench/method/expert_sparsity/mixtral/progressive_pruning.py +2 -0
fusion_bench/method/fisher_merging/clip_fisher_merging.py +0 -4
fusion_bench/method/fisher_merging/gpt2_fisher_merging.py +2 -2
fusion_bench/method/linear/simple_average_for_llama.py +16 -11
fusion_bench/method/model_stock/__init__.py +1 -0
fusion_bench/method/model_stock/model_stock.py +309 -0
fusion_bench/method/regmean/clip_regmean.py +3 -6
fusion_bench/method/regmean/regmean.py +27 -56
fusion_bench/method/regmean/utils.py +56 -0
fusion_bench/method/regmean_plusplus/regmean_plusplus.py +21 -60
fusion_bench/method/simple_average.py +7 -7
fusion_bench/method/slerp/__init__.py +1 -1
fusion_bench/method/slerp/slerp.py +110 -14
fusion_bench/method/smile_upscaling/causal_lm_upscaling.py +371 -0
fusion_bench/method/smile_upscaling/projected_energy.py +1 -2
fusion_bench/method/smile_upscaling/smile_mistral_upscaling.py +5 -1
fusion_bench/method/smile_upscaling/smile_qwen2_upscaling.py +40 -31
fusion_bench/method/smile_upscaling/smile_upscaling.py +1 -1
fusion_bench/method/we_moe/__init__.py +1 -0
fusion_bench/method/we_moe/entropy_loss.py +25 -0
fusion_bench/method/we_moe/flan_t5_we_moe.py +320 -0
fusion_bench/method/we_moe/utils.py +15 -0
fusion_bench/method/weighted_average/llama.py +1 -1
fusion_bench/mixins/clip_classification.py +37 -48
fusion_bench/mixins/serialization.py +30 -10
fusion_bench/modelpool/base_pool.py +1 -1
fusion_bench/modelpool/causal_lm/causal_lm.py +293 -75
fusion_bench/modelpool/seq2seq_lm/modelpool.py +146 -0
fusion_bench/models/__init__.py +5 -0
fusion_bench/models/hf_utils.py +69 -86
fusion_bench/models/linearized/vision_model.py +6 -6
fusion_bench/models/model_card_templates/default.md +46 -0
fusion_bench/models/modeling_smile_llama/__init__.py +7 -0
fusion_bench/models/modeling_smile_llama/modeling_smile_llama.py +1 -8
fusion_bench/models/modeling_smile_mistral/__init__.py +2 -1
fusion_bench/models/modeling_smile_qwen2/modeling_smile_qwen2.py +1 -5
fusion_bench/models/we_moe.py +8 -8
fusion_bench/programs/fabric_fusion_program.py +29 -60
fusion_bench/scripts/cli.py +34 -1
fusion_bench/taskpool/base_pool.py +99 -17
fusion_bench/taskpool/clip_vision/taskpool.py +10 -5
fusion_bench/taskpool/dummy.py +101 -13
fusion_bench/taskpool/lm_eval_harness/taskpool.py +80 -0
fusion_bench/taskpool/nyuv2_taskpool.py +28 -0
fusion_bench/utils/__init__.py +2 -0
fusion_bench/utils/cache_utils.py +101 -1
fusion_bench/utils/data.py +6 -4
fusion_bench/utils/devices.py +7 -4
fusion_bench/utils/dtype.py +3 -2
fusion_bench/utils/fabric.py +2 -2
fusion_bench/utils/lazy_imports.py +23 -0
fusion_bench/utils/lazy_state_dict.py +117 -19
fusion_bench/utils/modelscope.py +3 -3
fusion_bench/utils/packages.py +3 -3
fusion_bench/utils/parameters.py +0 -2
fusion_bench/utils/path.py +56 -0
fusion_bench/utils/pylogger.py +1 -1
fusion_bench/utils/timer.py +92 -10
{fusion_bench-0.2.21.dist-info → fusion_bench-0.2.23.dist-info}/METADATA +1 -23
{fusion_bench-0.2.21.dist-info → fusion_bench-0.2.23.dist-info}/RECORD +89 -75
fusion_bench_config/_get_started/llm_slerp.yaml +12 -0
fusion_bench_config/method/fisher_merging/clip_fisher_merging.yaml +0 -1
fusion_bench_config/method/linear/simple_average_for_llama.yaml +3 -2
fusion_bench_config/method/model_stock/model_stock.yaml +12 -0
fusion_bench_config/method/slerp/slerp_lm.yaml +4 -0
fusion_bench_config/method/smile_upscaling/causal_lm_upscaling.yaml +21 -0
fusion_bench_config/method/smile_upscaling/smile_qwen2_upscaling.yaml +1 -1
fusion_bench_config/method/wemoe/flan_t5_weight_ensembling_moe.yaml +20 -0
fusion_bench_config/modelpool/CausalLMPool/Qwen2.5-1.5B_math_and_coder.yaml +1 -1
{fusion_bench-0.2.21.dist-info → fusion_bench-0.2.23.dist-info}/WHEEL +0 -0
{fusion_bench-0.2.21.dist-info → fusion_bench-0.2.23.dist-info}/entry_points.txt +0 -0
{fusion_bench-0.2.21.dist-info → fusion_bench-0.2.23.dist-info}/licenses/LICENSE +0 -0
{fusion_bench-0.2.21.dist-info → fusion_bench-0.2.23.dist-info}/top_level.txt +0 -0

fusion_bench/models/hf_utils.py CHANGED Viewed

@@ -5,23 +5,65 @@ This module contains utilities for working with Hugging Face models.
 import inspect
 import os
 import shutil
-from typing import Optional, cast
+from typing import List, Optional, cast
-from omegaconf import OmegaConf
+from omegaconf import DictConfig, OmegaConf
 from transformers.modeling_utils import PreTrainedModel
-from fusion_bench import BaseAlgorithm, BaseModelPool
-from fusion_bench.utils.pylogger import getRankZeroLogger
+from fusion_bench.utils.pylogger import get_rankzero_logger
-log = getRankZeroLogger(__name__)
+log = get_rankzero_logger(__name__)
 __all__ = [
+    "load_model_card_template",
     "save_pretrained_with_remote_code",
-    "generate_readme_head",
-    "generate_readme_body",
-    "generate_complete_readme",
+    "create_default_model_card",
 ]
+MODEL_CARD_TEMPLATE_DIRS = [
+    os.path.join(os.path.dirname(__file__), "model_card_templates")
+]
+def load_model_card_template(basename: str) -> str:
+    """
+    Load a model card template from file.
+    Searches for a template file by name, first checking if the name is a direct file path,
+    then searching through predefined template directories.
+    Args:
+        name (str): The name of the template file or a direct file path to the template.
+    Returns:
+        str: The contents of the template file as a string.
+    Raises:
+        FileNotFoundError: If the template file is not found in any of the search locations.
+    """
+    if os.path.exists(basename):
+        with open(basename, "r") as f:
+            return f.read()
+    for template_dir in MODEL_CARD_TEMPLATE_DIRS:
+        template_path = os.path.join(template_dir, basename)
+        if os.path.exists(template_path):
+            with open(template_path, "r") as f:
+                return f.read()
+    raise FileNotFoundError(f"Model card template '{basename}' not found.")
+def try_to_yaml(config):
+    if config is None:
+        return None
+    try:
+        return OmegaConf.to_yaml(config, resolve=True, sort_keys=True)
+    except Exception as e:
+        log.error(f"Failed to convert config to YAML: {e}. Return `None`.")
+        return None
 def save_pretrained_with_remote_code(
     model: PreTrainedModel,
@@ -99,84 +141,25 @@ def save_pretrained_with_remote_code(
             f.write(f"from .{base_name} import {auto_map[key].__name__}\n")
-def generate_readme_head(
-    models: list[str] | BaseModelPool,
-    library_name: str = "transformers",
+def create_default_model_card(
+    models: list[str],
+    *,
+    title: str = "Deep Model Fusion",
     tags: list[str] = ["fusion-bench", "merge"],
+    description=None,
+    algorithm_config: DictConfig = None,
+    modelpool_config: DictConfig = None,
 ):
-    text = "---\nbase_model:\n"
-    for model_name in models:
-        text += f"- {model_name}\n"
-    if library_name:
-        text += f"library_name: {library_name}\n"
-    text += "tags:\n"
-    for tag in tags:
-        text += f"- {tag}\n"
-    text += "---\n"
-    return text
-def generate_readme_body(
-    algorithm: BaseAlgorithm,
-    models_or_modelpool: Optional[list[str] | BaseModelPool] = None,
-    models: list[str] = None,
-):
-    text = """\
-# Merge
-This is a merge of pre-trained language models created using [fusion-bench](https://github.com/tanganke/fusion_bench).
-"""
-    if models is not None:
-        text += """
-## Models Merged
-The following models were included in the merge:
-"""
-        for model_name in models:
-            text += f"- {model_name}\n"
-        text += "\n"
-        try:
-            text += f"""\
-    ## Configuration
-    The following YAML configuration was used to produce this model:
-    ```yaml
-    {OmegaConf.to_yaml(algorithm.config, resolve=True, sort_keys=True)}
-    ```
-    """
-        except Exception as e:
-            return (
-                text  # If the algorithm config cannot be converted to YAML, we skip it.
-            )
-    if isinstance(models_or_modelpool, BaseModelPool):
-        try:
-            text += f"""
-```yaml
-{OmegaConf.to_yaml(models_or_modelpool.config, resolve=True, sort_keys=True)}
-```
-"""
-        except Exception as e:
-            pass  # If the model pool config cannot be converted to YAML, we skip it.
-    return text
-def generate_complete_readme(
-    algorithm: BaseAlgorithm, modelpool: BaseModelPool, models: list[str]
-):
-    # Generate the complete README content
-    text = generate_readme_head(
-        [modelpool.get_model_path(m) for m in modelpool.model_names]
-    )
-    readme_body = generate_readme_body(
-        algorithm,
-        models_or_modelpool=modelpool,
-        models=[modelpool.get_model_path(m) for m in modelpool.model_names],
+    from jinja2 import Template
+    template: Template = Template(load_model_card_template("default.md"))
+    card = template.render(
+        models=models,
+        library_name="transformers",
+        title=title,
+        tags=tags,
+        description=description,
+        algorithm_config_str=try_to_yaml(algorithm_config),
+        modelpool_config_str=try_to_yaml(modelpool_config),
     )
-    complete_readme = text + "\n" + readme_body
-    return complete_readme
+    return card

fusion_bench/models/linearized/vision_model.py CHANGED Viewed

@@ -45,21 +45,21 @@ def linearize_lora_model_(model):
 def load_fft_vision_model_hf(
-    model_name: str, return_vison_model=True
+    model_name: str, return_vision_model=True
 ) -> Union[CLIPVisionTransformer, CLIPVisionModel]:
     """
     Load a CLIP vision model from Hugging Face.
     Args:
         model_name (str): The name of the CLIP vision model to load from Hugging Face.
-        return_vison_model (bool, optional): If False, the full CLIPVisionModel is returned. If True, only the vision model (`CLIPVisionTransformer`) is returned. Defaults to True.
+        return_vision_model (bool, optional): If False, the full CLIPVisionModel is returned. If True, only the vision model (`CLIPVisionTransformer`) is returned. Defaults to True.
     Returns:
         Union[CLIPVisionTransformer, CLIPVisionModel]: The vision model.
     """
     model = CLIPVisionModel.from_pretrained(model_name)
-    if return_vison_model:
+    if return_vision_model:
         return CLIPVisionModel.from_pretrained(model_name).vision_model
     else:
         return model
@@ -69,7 +69,7 @@ def load_lora_vision_model_hf(
     base_model_name: str,
     peft_name: str,
     merge_and_unload: bool = False,
-    return_vison_model=True,
+    return_vision_model=True,
 ) -> PeftModel:
     """
     Load a LoRA (Low-Rank Adaptation) vision model from Hugging Face.
@@ -80,7 +80,7 @@ def load_lora_vision_model_hf(
         base_model_name (str): The name of the base vision model to load from Hugging Face.
         peft_name (str): The name of the LoRA adaptation to apply to the base model.
         merge_and_unload (bool, optional): If True, the LoRA adaptation is merged into the base model and the LoRA layers are removed. Defaults to False.
-        return_vison_model (bool, optional): If False, the full CLIPVisionModel is returned. If True, only the vision model (`CLIPVisionTransformer`) is returned. Defaults to True.
+        return_vision_model (bool, optional): If False, the full CLIPVisionModel is returned. If True, only the vision model (`CLIPVisionTransformer`) is returned. Defaults to True.
     Returns:
         PeftModel: The adapted vision model, optionally merged and unloaded.
@@ -97,7 +97,7 @@ def load_lora_vision_model_hf(
         vision_model = peft_model
     # Return the vision model
-    if return_vison_model:
+    if return_vision_model:
         return vision_model
     else:
         model.vision_model = vision_model

fusion_bench/models/model_card_templates/default.md ADDED Viewed

@@ -0,0 +1,46 @@
+---
+base_model:
+{%- for model in models %}
+- {{ model }}
+{%- endfor %}
+library_name: {{ library_name }}
+tags:
+{%- for tag in tags %}
+- {{ tag }}
+{%- endfor %}
+---
+# {{ title }}
+{% if description is not none %}{{ description }}{% endif %}
+## Models Merged
+This is a merged model created using [fusion-bench](https://github.com/tanganke/fusion_bench).
+The following models were included in the merge:
+{% for model in models %}
+- {{ model }}
+{%- endfor %}
+{% if algorithm_config_str is not none or modelpool_config_str is not none %}
+## Configuration
+The following YAML configuration was used to produce this model:
+{% if algorithm_config_str is not none -%}
+### Algorithm Configuration
+```yaml
+{{ algorithm_config_str -}}
+```
+{%- endif %}
+{% if modelpool_config_str is not none -%}
+### Model Pool Configuration
+```yaml
+{{ modelpool_config_str -}}
+```
+{%- endif %}
+{% endif %}

fusion_bench/models/modeling_smile_llama/__init__.py CHANGED Viewed

@@ -0,0 +1,7 @@
+from . import register
+from .configuration_smile_llama import SmileLlamaConfig
+from .modeling_smile_llama import (
+    SmileLlamaDecoderLayer,
+    SmileLlamaForCausalLM,
+    SmileLlamaModel,
+)

fusion_bench/models/modeling_smile_llama/modeling_smile_llama.py CHANGED Viewed

@@ -17,7 +17,6 @@ from transformers.modeling_outputs import (
 )
 from transformers.modeling_utils import ALL_ATTENTION_FUNCTIONS
 from transformers.models.llama.modeling_llama import (
-    LLAMA_INPUTS_DOCSTRING,
     LlamaRMSNorm,
     LlamaRotaryEmbedding,
     apply_rotary_pos_emb,
@@ -25,7 +24,6 @@ from transformers.models.llama.modeling_llama import (
 )
 from transformers.processing_utils import Unpack
 from transformers.utils import (
-    LossKwargs,
     add_start_docstrings_to_model_forward,
     can_return_tuple,
     is_torch_flex_attn_available,
@@ -296,7 +294,6 @@ class SmileLlamaModel(SmileLlamaPreTrainedModel):
         self.embed_tokens = value
     @can_return_tuple
-    @add_start_docstrings_to_model_forward(LLAMA_INPUTS_DOCSTRING)
     def forward(
         self,
         input_ids: Optional[torch.LongTensor] = None,
@@ -566,9 +563,6 @@ class SmileLlamaModel(SmileLlamaPreTrainedModel):
         return causal_mask
-class KwargsForCausalLM(FlashAttentionKwargs, LossKwargs): ...
 class SmileLlamaForCausalLM(SmileLlamaPreTrainedModel, GenerationMixin):
     _tied_weights_keys = ["lm_head.weight"]
     _tp_plan = {"lm_head": "colwise_rep"}
@@ -603,7 +597,6 @@ class SmileLlamaForCausalLM(SmileLlamaPreTrainedModel, GenerationMixin):
     @can_return_tuple
     @deprecate_kwarg("num_logits_to_keep", version="4.50", new_name="logits_to_keep")
-    @add_start_docstrings_to_model_forward(LLAMA_INPUTS_DOCSTRING)
     @replace_return_docstrings(
         output_type=CausalLMOutputWithPast, config_class=_CONFIG_FOR_DOC
     )
@@ -620,7 +613,7 @@ class SmileLlamaForCausalLM(SmileLlamaPreTrainedModel, GenerationMixin):
         output_hidden_states: Optional[bool] = None,
         cache_position: Optional[torch.LongTensor] = None,
         logits_to_keep: Union[int, torch.Tensor] = 0,
-        **kwargs: Unpack[KwargsForCausalLM],
+        **kwargs,
     ) -> CausalLMOutputWithPast:
         r"""
             labels (`torch.LongTensor` of shape `(batch_size, sequence_length)`, *optional*):

fusion_bench/models/modeling_smile_mistral/__init__.py CHANGED Viewed

@@ -1,6 +1,7 @@
+from . import register
 from .configuration_smile_mistral import SmileMistralConfig
 from .modeling_smile_mistral import (
+    SmileMistralDecoderLayer,
     SmileMistralForCausalLM,
     SmileMistralModel,
 )
-from . import register

fusion_bench/models/modeling_smile_qwen2/modeling_smile_qwen2.py CHANGED Viewed

@@ -31,7 +31,6 @@ from transformers.models.qwen2.modeling_qwen2 import (
 )
 from transformers.processing_utils import Unpack
 from transformers.utils import (
-    LossKwargs,
     add_code_sample_docstrings,
     add_start_docstrings,
     add_start_docstrings_to_model_forward,
@@ -607,9 +606,6 @@ class SmileQwen2Model(SmileQwen2PreTrainedModel):
         return causal_mask
-class KwargsForCausalLM(FlashAttentionKwargs, LossKwargs): ...
 class SmileQwen2ForCausalLM(SmileQwen2PreTrainedModel, GenerationMixin):
     _tied_weights_keys = ["lm_head.weight"]
     _tp_plan = {"lm_head": "colwise_rep"}
@@ -660,7 +656,7 @@ class SmileQwen2ForCausalLM(SmileQwen2PreTrainedModel, GenerationMixin):
         output_hidden_states: Optional[bool] = None,
         cache_position: Optional[torch.LongTensor] = None,
         logits_to_keep: Union[int, torch.Tensor] = 0,
-        **kwargs: Unpack[KwargsForCausalLM],
+        **kwargs,
     ) -> CausalLMOutputWithPast:
         r"""
             labels (`torch.LongTensor` of shape `(batch_size, sequence_length)`, *optional*):

fusion_bench/models/we_moe.py CHANGED Viewed

@@ -1,6 +1,6 @@
 import functools
 import logging
-from typing import List
+from typing import Generic, List
 import torch
 import torch.func
@@ -9,7 +9,7 @@ from torch.func import functional_call
 from torch.nn import functional as F
 from fusion_bench.models.utils import del_attr, get_attr, set_attr
-from fusion_bench.utils.type import StateDictType
+from fusion_bench.utils.type import StateDictType, TorchModelType
 log = logging.getLogger(__name__)
@@ -76,15 +76,15 @@ def construct_weight_ensembling_gate(
     return gate
-class WeightEnsemblingMoE(nn.Module):
+class WeightEnsemblingMoE(nn.Module, Generic[TorchModelType]):
     # variable to store the merged state dict temporarily
     _merged_state_dict: StateDictType = None
     def __init__(
         self,
         hidden_size: int,
-        base_model: nn.Module,
-        expert_models: List[nn.Module],
+        base_model: TorchModelType,
+        expert_models: List[TorchModelType],
         init_lambda: float = 0.2,
         batch_first: bool = False,
         router_hidden_layers: int = 2,
@@ -101,8 +101,8 @@ class WeightEnsemblingMoE(nn.Module):
         Args:
             hidden_size (int): The size of the hidden layer in the models.
-            base_model (nn.Module): The base model that will be used as a reference for the expert models.
-            expert_models (List[nn.Module]): A list of expert models that will be combined.
+            base_model (TorchModelType): The base model that will be used as a reference for the expert models.
+            expert_models (List[TorchModelType]): A list of expert models that will be combined.
             init_lambda (float, optional): The initial lambda value for the weight ensembling gate. Defaults to 0.2.
             batch_first (bool, optional): If True, the input tensors are expected to have the batch size as the first dimension. Defaults to False.
             router_hidden_layers (int, optional): The number of hidden layers in the router. Defaults to 2.
@@ -145,7 +145,7 @@ class WeightEnsemblingMoE(nn.Module):
             self._merged_state_dict,
         )
-    def merge_weights(self, expert_weights):
+    def merge_weights(self, expert_weights) -> StateDictType:
         state_dict = self.base_model.state_dict(keep_vars=True)
         for weight, task_vector in zip(expert_weights, self.task_vectors):
             for name, param in task_vector.named_parameters():

fusion_bench/programs/fabric_fusion_program.py CHANGED Viewed

@@ -1,6 +1,7 @@
 import json
 import logging
 import os
+from pathlib import Path
 from typing import Any, Callable, Dict, Iterable, List, Optional, Union  # noqa: F401
 import lightning as L
@@ -9,19 +10,24 @@ from omegaconf import DictConfig, OmegaConf
 from torch import nn
 from tqdm.auto import tqdm
-import fusion_bench.utils.instantiate_utils
-from fusion_bench.method import BaseAlgorithm
+import fusion_bench
+from fusion_bench import (
+    BaseAlgorithm,
+    BaseHydraProgram,
+    BaseModelPool,
+    BaseTaskPool,
+    RuntimeConstants,
+    import_object,
+    instantiate,
+    timeit_context,
+)
 from fusion_bench.mixins import LightningFabricMixin
-from fusion_bench.modelpool import BaseModelPool
-from fusion_bench.programs import BaseHydraProgram
-from fusion_bench.taskpool import BaseTaskPool
-from fusion_bench.utils import import_object, instantiate, timeit_context
 from fusion_bench.utils.hydra_utils import get_hydra_output_dir
 from fusion_bench.utils.json import print_json
-from fusion_bench.utils.pylogger import getRankZeroLogger
+from fusion_bench.utils.path import create_symlink
 from fusion_bench.utils.rich_utils import print_bordered, print_config_tree
-log = getRankZeroLogger(__name__)
+log = fusion_bench.get_rankzero_logger(__name__)
 class FabricModelFusionProgram(
@@ -60,6 +66,7 @@ class FabricModelFusionProgram(
         path: DictConfig = None,
         **kwargs,
     ):
+        super().__init__(**kwargs)
         self._method = method
         self._modelpool = modelpool
         self._taskpool = taskpool
@@ -70,8 +77,10 @@ class FabricModelFusionProgram(
         self.fast_dev_run = fast_dev_run
         self.seed = seed
         self.path = path
-        fusion_bench.utils.instantiate_utils.PRINT_FUNCTION_CALL = print_function_call
-        super().__init__(**kwargs)
+        RuntimeConstants.debug = fast_dev_run
+        RuntimeConstants.print_function_call = print_function_call
+        if path is not None:
+            RuntimeConstants.cache_dir = path.get("cache_dir", None)
         if print_config:
             print_config_tree(
@@ -224,8 +233,16 @@ class FabricModelFusionProgram(
         fabric = self.fabric
         if self.seed is not None:
             L.seed_everything(self.seed)
-        if fabric.global_rank == 0:
-            self._link_hydra_output()
+        # create symbol link to hydra output directory
+        if (
+            self.fabric.is_global_zero
+            and self.log_dir is not None
+            and os.path.abspath(self.log_dir) != os.path.abspath(get_hydra_output_dir())
+        ):
+            create_symlink(
+                get_hydra_output_dir(), self.log_dir, link_name="hydra_output"
+            )
         log.info("Running the model fusion program.")
         # setup the modelpool, method, and taskpool
@@ -278,51 +295,3 @@ class FabricModelFusionProgram(
                     json.dump(report, open(self.report_save_path, "w"))
             else:
                 log.info("No task pool specified. Skipping evaluation.")
-    @rank_zero_only
-    def _link_hydra_output(self):
-        """
-        Creates a symbolic link to the Hydra output directory within the specified log directory.
-        If `self.log_dir` is not None, this method will:
-        1. Retrieve the Hydra output directory using `get_hydra_output_dir()`.
-        2. Create the log directory if it does not already exist.
-        3. Create a symbolic link named "hydra_output_<basename_of_hydra_output_dir>"
-           within the log directory, pointing to the Hydra output directory.
-        Note:
-            - The symbolic link is created only if the Hydra output directory is not None.
-            - The `target_is_directory` parameter is set to True to indicate that the target is a directory.
-        Raises:
-            OSError: If the symbolic link creation fails.
-        """
-        if self.log_dir is not None:
-            # make symlink to the hydra output directory
-            try:
-                hydra_output_dir = get_hydra_output_dir()
-            except Exception as e:
-                hydra_output_dir = None
-            if hydra_output_dir is not None:
-                if os.path.abspath(hydra_output_dir) == os.path.abspath(self.log_dir):
-                    return
-                os.makedirs(self.log_dir, exist_ok=True)
-                try:
-                    # if the system is windows, use the `mklink` command in "CMD" to create the symlink
-                    if os.name == "nt":
-                        os.system(
-                            f"mklink /J {os.path.abspath(os.path.join(self.log_dir, 'hydra_output_' + os.path.basename(hydra_output_dir)))} {os.path.abspath(hydra_output_dir)}"
-                        )
-                    else:
-                        os.symlink(
-                            hydra_output_dir,
-                            os.path.join(
-                                self.log_dir,
-                                "hydra_output_" + os.path.basename(hydra_output_dir),
-                            ),
-                            target_is_directory=True,
-                        )
-                except OSError as e:
-                    log.warning(f"Failed to create symbolic link: {e}")

fusion_bench/scripts/cli.py CHANGED Viewed

@@ -12,9 +12,9 @@ import os
 import hydra
 from omegaconf import DictConfig, OmegaConf
+from fusion_bench.constants import PROJECT_ROOT_PATH
 from fusion_bench.programs import BaseHydraProgram
 from fusion_bench.utils import instantiate
-from fusion_bench.constants import PROJECT_ROOT_PATH
 log = logging.getLogger(__name__)
@@ -34,6 +34,39 @@ def _get_default_config_path():
     version_base=None,
 )
 def main(cfg: DictConfig) -> None:
+    """
+    Main entry point for the FusionBench command-line interface.
+    This function serves as the primary entry point for the `fusion_bench` CLI command.
+    It is decorated with Hydra's main decorator to handle configuration management,
+    command-line argument parsing, and configuration file loading.
+    The function performs the following operations:
+    1. Resolves any interpolations in the configuration using OmegaConf
+    2. Instantiates the appropriate program class based on the configuration
+    3. Executes the program's run method to perform the fusion task
+    Args:
+        cfg (DictConfig): The Hydra configuration object containing all settings
+            for the fusion task. This includes method configuration, model pool
+            configuration, task pool configuration, and other runtime parameters.
+            The configuration is automatically loaded by Hydra from the specified
+            config files and command-line overrides.
+    Returns:
+        None: This function doesn't return a value but executes the fusion
+            program which may save results, log outputs, or perform other
+            side effects as configured.
+    Example:
+        This function is typically called automatically when running:
+        ```bash
+        fusion_bench method=... modelpool=... taskpool=...
+        ```
+        The Hydra decorator handles parsing these command-line arguments and
+        loading the corresponding configuration files to populate the cfg parameter.
+    """
     OmegaConf.resolve(cfg)
     program: BaseHydraProgram = instantiate(cfg)
     program.run()

fusion-bench 0.2.21__py3-none-any.whl → 0.2.23__py3-none-any.whl

fusion-bench 0.2.21py3-none-any.whl → 0.2.23py3-none-any.whl