PyPI - onnx-diagnostic - Versions diffs - 0.7.1__py3-none-any.whl → 0.7.3__py3-none-any.whl - Mend

onnx-diagnostic 0.7.1py3-none-any.whl → 0.7.3py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (28) hide show

onnx_diagnostic/torch_export_patches/patches/patch_transformers.py CHANGED Viewed

@@ -2,6 +2,7 @@ import inspect
 from dataclasses import dataclass
 from functools import wraps
 from typing import Any, Callable, Dict, List, Optional, Tuple
+import packaging.version as pv
 import torch
 import transformers
 from transformers.modeling_attn_mask_utils import AttentionMaskConverter
@@ -20,18 +21,41 @@ def patched__vmap_for_bhqkv(mask_function: Callable, bh_indices: bool = True) ->
     ]
     if bh_indices:
         dimensions.extend([(None, 0, None, None), (0, None, None, None)])
+    # reshape
     dimensions = [tuple(1 if d is None else -1 for d in shape) for shape in dimensions]
     dimensions = tuple(reversed(dimensions))
     indices = tuple(shape.index(-1) for shape in dimensions)
+    # unsqueeze
+    udimensions = [tuple(di for di, d in enumerate(shape) if d == 1) for shape in dimensions]
     def vector_mask_function(
         *args, mask_function=mask_function, dimensions=dimensions, indices=indices
     ):
-        assert len(args) == len(
-            dimensions
-        ), f"Mismatch between args={string_type(args)} and dimensions={dimensions}"
+        assert len(args) == len(dimensions) == len(udimensions), (
+            f"Mismatch between args={string_type(args)} and dimensions={dimensions} "
+            f"and udimensions={udimensions}."
+        )
+        assert len(indices) == len(args), (
+            f"Mismatch between args={string_type(args)} and indices={indices}, "
+            f"they should have the same length."
+        )
+        for a in args:
+            assert (
+                a.ndim == 1
+            ), f"Expected a tensor with 1 dimension not {string_type(a, with_shape=True)}"
+            torch._check(a.shape[0] > 0)
         new_args = [a.reshape(shape) for a, shape in zip(args, dimensions)]
+        # new_args = [
+        #    a.unsqueeze(dims[0]).unsqueeze(dims[1]).unsqueeze(dims[2])
+        #    for a, dims in zip(args, udimensions)
+        # ]
         max_shape = tuple(args[i].shape[0] for i in indices)
+        # if is_torchdynamo_exporting():
+        #     for a in args:
+        #         # The exporter should export with a dimension > 1 to make sure it is dynamic.
+        #         torch._check(a.shape[0] > 1)
         expanded_args = [a.expand(max_shape) for a in new_args]
         return mask_function(*expanded_args)
@@ -190,8 +214,8 @@ class patched_DynamicCache:
             if len(self.key_cache) <= layer_idx:
                 # There may be skipped layers, fill them with empty lists
                 for _ in range(len(self.key_cache), layer_idx):
-                    self.key_cache.append(torch.tensor([]))
-                    self.value_cache.append(torch.tensor([]))
+                    self.key_cache.append(torch.tensor([], dtype=key_states.dtype))
+                    self.value_cache.append(torch.tensor([], dtype=key_states.dtype))
                 self.key_cache.append(key_states)
                 self.value_cache.append(value_states)
             elif not self.key_cache[
@@ -207,7 +231,6 @@ class patched_DynamicCache:
                 self.value_cache[layer_idx] = torch.cat(
                     [self.value_cache[layer_idx], value_states], dim=-2
                 )
         return self.key_cache[layer_idx], self.value_cache[layer_idx]
     def crop(self, max_length: int):
@@ -791,10 +814,7 @@ def patched_dynamic_rope_update(rope_forward):
     return wrapper
-class patched_Phi3RotaryEmbedding(torch.nn.Module):
-    _PATCHES_ = ["forward"]
-    _PATCHED_CLASS_ = transformers.models.phi3.modeling_phi3.Phi3RotaryEmbedding
+class common_RotaryEmbedding(torch.nn.Module):
     @torch.no_grad()
     @patched_dynamic_rope_update
     def forward(self, x, position_ids):
@@ -820,6 +840,65 @@ class patched_Phi3RotaryEmbedding(torch.nn.Module):
         return cos.to(dtype=x.dtype), sin.to(dtype=x.dtype)
+class patched_GemmaRotaryEmbedding(common_RotaryEmbedding):
+    _PATCHES_ = ["forward"]
+    _PATCHED_CLASS_ = transformers.models.gemma.modeling_gemma.GemmaRotaryEmbedding
+if pv.Version(transformers.__version__) >= pv.Version("4.52"):
+    class patched_Gemma2RotaryEmbedding(common_RotaryEmbedding):
+        _PATCHES_ = ["forward"]
+        _PATCHED_CLASS_ = transformers.models.gemma2.modeling_gemma2.Gemma2RotaryEmbedding
+    class patched_Gemma3RotaryEmbedding(common_RotaryEmbedding):
+        _PATCHES_ = ["forward"]
+        _PATCHED_CLASS_ = transformers.models.gemma3.modeling_gemma3.Gemma3RotaryEmbedding
+class patched_LlamaRotaryEmbedding(common_RotaryEmbedding):
+    _PATCHES_ = ["forward"]
+    _PATCHED_CLASS_ = transformers.models.llama.modeling_llama.LlamaRotaryEmbedding
+class patched_MistralRotaryEmbedding(common_RotaryEmbedding):
+    _PATCHES_ = ["forward"]
+    _PATCHED_CLASS_ = transformers.models.mistral.modeling_mistral.MistralRotaryEmbedding
+class patched_MixtralRotaryEmbedding(common_RotaryEmbedding):
+    _PATCHES_ = ["forward"]
+    _PATCHED_CLASS_ = transformers.models.mixtral.modeling_mixtral.MixtralRotaryEmbedding
+class patched_PhiRotaryEmbedding(common_RotaryEmbedding):
+    _PATCHES_ = ["forward"]
+    _PATCHED_CLASS_ = transformers.models.phi.modeling_phi.PhiRotaryEmbedding
+if pv.Version(transformers.__version__) >= pv.Version("4.51"):
+    class patched_Phi3RotaryEmbedding(common_RotaryEmbedding):
+        _PATCHES_ = ["forward"]
+        _PATCHED_CLASS_ = transformers.models.phi3.modeling_phi3.Phi3RotaryEmbedding
+if pv.Version(transformers.__version__) >= pv.Version("4.52"):
+    class patched_Phi4MultimodalRotaryEmbedding(common_RotaryEmbedding):
+        _PATCHES_ = ["forward"]
+        _PATCHED_CLASS_ = (
+            transformers.models.phi4_multimodal.modeling_phi4_multimodal.Phi4MultimodalRotaryEmbedding
+        )
+if pv.Version(transformers.__version__) >= pv.Version("4.53"):
+    class patched_SmolLM3RotaryEmbedding(common_RotaryEmbedding):
+        _PATCHES_ = ["forward"]
+        _PATCHED_CLASS_ = transformers.models.smollm3.modeling_smollm3.SmolLM3RotaryEmbedding
 class patched_IdeficsEmbedding(torch.nn.Module):
     _PATCHES_ = ["forward"]
     _PATCHED_CLASS_ = transformers.models.idefics.modeling_idefics.IdeficsEmbedding

onnx_diagnostic/torch_export_patches/serialization/__init__.py ADDED Viewed

@@ -0,0 +1,46 @@
+import re
+from typing import Any, Callable, List, Set, Tuple
+import torch
+def _lower_name_with_(name):
+    s1 = re.sub("(.)([A-Z][a-z]+)", r"\1_\2", name)
+    return re.sub("([a-z0-9])([A-Z])", r"\1_\2", s1).lower()
+def make_serialization_function_for_dataclass(
+    cls: type, supported_classes: Set[type]
+) -> Tuple[Callable, Callable, Callable]:
+    """
+    Automatically creates serialization function for a class decorated with
+    ``dataclasses.dataclass``.
+    """
+    def flatten_cls(obj: cls) -> Tuple[List[Any], torch.utils._pytree.Context]:  # type: ignore[valid-type]
+        """Serializes a ``%s`` with python objects."""
+        return list(obj.values()), list(obj.keys())
+    def flatten_with_keys_cls(
+        obj: cls,  # type: ignore[valid-type]
+    ) -> Tuple[List[Tuple[torch.utils._pytree.KeyEntry, Any]], torch.utils._pytree.Context]:
+        """Serializes a ``%s`` with python objects with keys."""
+        values, context = list(obj.values()), list(obj.keys())
+        return [
+            (torch.utils._pytree.MappingKey(k), v) for k, v in zip(context, values)
+        ], context
+    def unflatten_cls(
+        values: List[Any], context: torch.utils._pytree.Context, output_type=None
+    ) -> cls:  # type: ignore[valid-type]
+        """Restores an instance of ``%s`` from python objects."""
+        return cls(**dict(zip(context, values)))
+    name = _lower_name_with_(cls.__name__)
+    flatten_cls.__name__ = f"flatten_{name}"
+    flatten_with_keys_cls.__name__ = f"flatten_with_keys_{name}"
+    unflatten_cls.__name__ = f"unflatten_{name}"
+    flatten_cls.__doc__ = flatten_cls.__doc__ % cls.__name__
+    flatten_with_keys_cls.__doc__ = flatten_with_keys_cls.__doc__ % cls.__name__
+    unflatten_cls.__doc__ = unflatten_cls.__doc__ % cls.__name__
+    supported_classes.add(cls)
+    return flatten_cls, flatten_with_keys_cls, unflatten_cls

onnx_diagnostic/torch_export_patches/serialization/diffusers_impl.py ADDED Viewed

@@ -0,0 +1,34 @@
+from typing import Dict, Optional, Set
+try:
+    from diffusers.models.unets.unet_2d_condition import UNet2DConditionOutput
+except ImportError as e:
+    try:
+        import diffusers
+    except ImportError:
+        diffusers = None
+        UNet2DConditionOutput = None
+    if diffusers:
+        raise e
+from . import make_serialization_function_for_dataclass
+def _make_wrong_registrations() -> Dict[type, Optional[str]]:
+    res: Dict[type, Optional[str]] = {}
+    for c in [UNet2DConditionOutput]:
+        if c is not None:
+            res[c] = None
+    return res
+SUPPORTED_DATACLASSES: Set[type] = set()
+WRONG_REGISTRATIONS = _make_wrong_registrations()
+if UNet2DConditionOutput is not None:
+    (
+        flatten_u_net2_d_condition_output,
+        flatten_with_keys_u_net2_d_condition_output,
+        unflatten_u_net2_d_condition_output,
+    ) = make_serialization_function_for_dataclass(UNet2DConditionOutput, SUPPORTED_DATACLASSES)

onnx_diagnostic/torch_export_patches/serialization/transformers_impl.py ADDED Viewed

@@ -0,0 +1,259 @@
+from typing import Any, List, Set, Tuple
+import torch
+import transformers
+from transformers.cache_utils import (
+    DynamicCache,
+    MambaCache,
+    EncoderDecoderCache,
+    SlidingWindowCache,
+    StaticCache,
+)
+from transformers.modeling_outputs import BaseModelOutput
+from ...helpers.cache_helper import make_static_cache
+from . import make_serialization_function_for_dataclass
+SUPPORTED_DATACLASSES: Set[type] = set()
+WRONG_REGISTRATIONS = {
+    DynamicCache: "4.50",
+    BaseModelOutput: None,
+}
+############
+# MambaCache
+############
+def flatten_mamba_cache(
+    mamba_cache: MambaCache,
+) -> Tuple[List[Any], torch.utils._pytree.Context]:
+    """Serializes a :class:`transformers.cache_utils.MambaCache` with python objects."""
+    flat = [
+        ("conv_states", mamba_cache.conv_states),
+        ("ssm_states", mamba_cache.ssm_states),
+    ]
+    return [f[1] for f in flat], [f[0] for f in flat]
+def unflatten_mamba_cache(
+    values: List[Any], context: torch.utils._pytree.Context, output_type=None
+) -> MambaCache:
+    """Restores a :class:`transformers.cache_utils.MambaCache` from python objects."""
+    conv_states, ssm_states = values
+    class _config:
+        def __init__(self):
+            if isinstance(conv_states, list):
+                self.intermediate_size = conv_states[0].shape[1]
+                self.state_size = ssm_states[0].shape[2]
+                self.conv_kernel = conv_states[0].shape[2]
+                self.num_hidden_layers = len(conv_states)
+            else:
+                self.intermediate_size = conv_states.shape[2]
+                self.state_size = ssm_states.shape[3]
+                self.conv_kernel = conv_states.shape[3]
+                self.num_hidden_layers = conv_states.shape[0]
+    cache = MambaCache(
+        _config(),
+        max_batch_size=1,
+        dtype=values[-1][0].dtype,
+        device="cpu" if values[-1][0].get_device() < 0 else "cuda",
+    )
+    values = dict(zip(context, values))
+    for k, v in values.items():
+        setattr(cache, k, v)
+    return cache
+def flatten_with_keys_mamba_cache(cache: MambaCache) -> Tuple[
+    List[Tuple[torch.utils._pytree.KeyEntry, Any]],
+    torch.utils._pytree.Context,
+]:
+    """Serializes a :class:`transformers.cache_utils.MambaCache` with python objects."""
+    values, context = flatten_mamba_cache(cache)
+    return [(torch.utils._pytree.MappingKey(k), v) for k, v in zip(context, values)], context
+##############
+# DynamicCache
+##############
+def flatten_dynamic_cache(
+    dynamic_cache: DynamicCache,
+) -> Tuple[List[Any], torch.utils._pytree.Context]:
+    """Serializes a :class:`transformers.cache_utils.DynamicCache` with python objects."""
+    if hasattr(transformers.cache_utils, "_flatten_dynamic_cache"):
+        return transformers.cache_utils._flatten_dynamic_cache(dynamic_cache)
+    flat = [("key_cache", dynamic_cache.key_cache), ("value_cache", dynamic_cache.value_cache)]
+    return [f[1] for f in flat], [f[0] for f in flat]
+def flatten_with_keys_dynamic_cache(
+    dynamic_cache: DynamicCache,
+) -> Tuple[List[Tuple[torch.utils._pytree.KeyEntry, Any]], torch.utils._pytree.Context]:
+    """Serializes a :class:`transformers.cache_utils.DynamicCache` with python objects."""
+    if hasattr(transformers.cache_utils, "_flatten_with_keys_dynamic_cache"):
+        return transformers.cache_utils._flatten_with_keys_dynamic_cache(dynamic_cache)
+    values, context = flatten_dynamic_cache(dynamic_cache)
+    return [(torch.utils._pytree.MappingKey(k), v) for k, v in zip(context, values)], context
+def unflatten_dynamic_cache(
+    values: List[Any], context: torch.utils._pytree.Context, output_type=None
+) -> DynamicCache:
+    """Restores a :class:`transformers.cache_utils.DynamicCache` from python objects."""
+    if hasattr(transformers.cache_utils, "_unflatten_dynamic_cache"):
+        assert output_type is None, f"output_type={output_type} not supported"
+        return transformers.cache_utils._unflatten_dynamic_cache(values, context)
+    cache = transformers.cache_utils.DynamicCache()
+    values = dict(zip(context, values))
+    for k, v in values.items():
+        setattr(cache, k, v)
+    return cache
+#############
+# StaticCache
+#############
+def flatten_static_cache(
+    cache: StaticCache,
+) -> Tuple[List[Any], torch.utils._pytree.Context]:
+    """Serializes a :class:`transformers.cache_utils.StaticCache` with python objects."""
+    assert not cache.key_cache or cache.max_cache_len == cache.key_cache[0].shape[2], (
+        f"Serialization doet not work when "
+        f"cache.max_cache_len={cache.max_cache_len} != "
+        f"cache.key_cache[0].shape[2]={cache.key_cache[0].shape[2]}"
+    )
+    flat = [("key_cache", cache.key_cache), ("value_cache", cache.value_cache)]
+    return [f[1] for f in flat], [f[0] for f in flat]
+def flatten_with_keys_static_cache(
+    cache: StaticCache,
+) -> Tuple[List[Tuple[torch.utils._pytree.KeyEntry, Any]], torch.utils._pytree.Context]:
+    """Serializes a :class:`transformers.cache_utils.StaticCache` with python objects."""
+    values, context = flatten_static_cache(cache)
+    return [(torch.utils._pytree.MappingKey(k), v) for k, v in zip(context, values)], context
+def unflatten_static_cache(
+    values: List[Any], context: torch.utils._pytree.Context, output_type=None
+) -> StaticCache:
+    """Restores a :class:`transformers.cache_utils.StaticCache` from python objects."""
+    return make_static_cache(
+        list(zip(values[0], values[1])), max_cache_len=values[0][0].shape[2]
+    )
+####################
+# SlidingWindowCache
+####################
+def flatten_sliding_window_cache(
+    cache: SlidingWindowCache,
+) -> Tuple[List[Any], torch.utils._pytree.Context]:
+    """
+    Serializes a :class:`transformers.cache_utils.SlidingWindowCache`
+    with python objects.
+    """
+    flat = [("key_cache", cache.key_cache), ("value_cache", cache.value_cache)]
+    return [f[1] for f in flat], [f[0] for f in flat]
+def flatten_with_keys_sliding_window_cache(
+    cache: SlidingWindowCache,
+) -> Tuple[List[Tuple[torch.utils._pytree.KeyEntry, Any]], torch.utils._pytree.Context]:
+    """
+    Serializes a :class:`transformers.cache_utils.SlidingWindowCache`
+    with python objects.
+    """
+    values, context = flatten_sliding_window_cache(cache)
+    return [(torch.utils._pytree.MappingKey(k), v) for k, v in zip(context, values)], context
+def unflatten_sliding_window_cache(
+    values: List[Any], context: torch.utils._pytree.Context, output_type=None
+) -> SlidingWindowCache:
+    """Restores a :class:`transformers.cache_utils.SlidingWindowCache` from python objects."""
+    key_cache, value_cache = values
+    class _config:
+        def __init__(self):
+            self.head_dim = key_cache[0].shape[-1]
+            self.num_attention_heads = key_cache[0].shape[1]
+            self.num_hidden_layers = len(key_cache)
+            self.sliding_window = key_cache[0].shape[2]
+    cache = SlidingWindowCache(
+        _config(),
+        max_batch_size=key_cache[0].shape[0],
+        max_cache_len=key_cache[0].shape[2],  # sligding window
+        device=key_cache[0].device,
+        dtype=key_cache[0].dtype,
+    )
+    values = dict(zip(context, values))
+    for k, v in values.items():
+        setattr(cache, k, v)
+    return cache
+#####################
+# EncoderDecoderCache
+#####################
+def flatten_encoder_decoder_cache(
+    ec_cache: EncoderDecoderCache,
+) -> Tuple[List[Any], torch.utils._pytree.Context]:
+    """
+    Serializes a :class:`transformers.cache_utils.EncoderDecoderCache`
+    with python objects.
+    """
+    dictionary = {
+        "self_attention_cache": ec_cache.self_attention_cache,
+        "cross_attention_cache": ec_cache.cross_attention_cache,
+    }
+    return torch.utils._pytree._dict_flatten(dictionary)
+def flatten_with_keys_encoder_decoder_cache(ec_cache: EncoderDecoderCache) -> Tuple[
+    List[Tuple[torch.utils._pytree.KeyEntry, Any]],
+    torch.utils._pytree.Context,
+]:
+    """
+    Serializes a :class:`transformers.cache_utils.EncoderDecoderCache`
+    with python objects.
+    """
+    dictionary = {
+        "self_attention_cache": ec_cache.self_attention_cache,
+        "cross_attention_cache": ec_cache.cross_attention_cache,
+    }
+    return torch.utils._pytree._dict_flatten_with_keys(dictionary)
+def unflatten_encoder_decoder_cache(
+    values: List[Any], context: torch.utils._pytree.Context, output_type=None
+) -> EncoderDecoderCache:
+    """Restores a :class:`transformers.cache_utils.EncoderDecoderCache` from python objects."""
+    dictionary = torch.utils._pytree._dict_unflatten(values, context)
+    return EncoderDecoderCache(**dictionary)
+#############
+# dataclasses
+#############
+(
+    flatten_base_model_output,
+    flatten_with_keys_base_model_output,
+    unflatten_base_model_output,
+) = make_serialization_function_for_dataclass(BaseModelOutput, SUPPORTED_DATACLASSES)

onnx_diagnostic/torch_models/hghub/hub_api.py CHANGED Viewed

@@ -140,7 +140,10 @@ def _guess_task_from_config(config: Any) -> Optional[str]:
 @functools.cache
 def task_from_arch(
-    arch: str, default_value: Optional[str] = None, model_id: Optional[str] = None
+    arch: str,
+    default_value: Optional[str] = None,
+    model_id: Optional[str] = None,
+    subfolder: Optional[str] = None,
 ) -> str:
     """
     This function relies on stored information. That information needs to be refresh.
@@ -148,6 +151,7 @@ def task_from_arch(
     :param arch: architecture name
     :param default_value: default value in case the task cannot be determined
     :param model_id: unused unless the architecture does not help.
+    :param subfolder: subfolder
     :return: task
     .. runpython::
@@ -162,7 +166,7 @@ def task_from_arch(
     data = load_architecture_task()
     if arch not in data and model_id:
         # Let's try with the model id.
-        return task_from_id(model_id)
+        return task_from_id(model_id, subfolder=subfolder)
     if default_value is not None:
         return data.get(arch, default_value)
     assert arch in data, (
@@ -178,6 +182,7 @@ def task_from_id(
     default_value: Optional[str] = None,
     pretrained: bool = False,
     fall_back_to_pretrained: bool = True,
+    subfolder: Optional[str] = None,
 ) -> str:
     """
     Returns the task attached to a model id.
@@ -187,7 +192,7 @@ def task_from_id(
         if the task cannot be determined
     :param pretrained: uses the config
     :param fall_back_to_pretrained: falls back to pretrained config
-    :param exc: raises an exception if True
+    :param subfolder: subfolder
     :return: task
     """
     if not pretrained:
@@ -196,7 +201,7 @@ def task_from_id(
         except RuntimeError:
             if not fall_back_to_pretrained:
                 raise
-    config = get_pretrained_config(model_id)
+    config = get_pretrained_config(model_id, subfolder=subfolder)
     try:
         return config.pipeline_tag
     except AttributeError:
@@ -206,6 +211,12 @@ def task_from_id(
         data = load_architecture_task()
         if model_id in data:
             return data[model_id]
+        if type(config) is dict and "_class_name" in config:
+            return task_from_arch(config["_class_name"], default_value=default_value)
+        if not config.architectures or not config.architectures:
+            # Some hardcoded values until a better solution is found.
+            if model_id.startswith("google/bert_"):
+                return "fill-mask"
         assert config.architectures is not None and len(config.architectures) == 1, (
             f"Cannot return the task of {model_id!r}, pipeline_tag is not setup, "
             f"architectures={config.architectures} in config={config}. "

onnx_diagnostic/torch_models/hghub/hub_data.py CHANGED Viewed

@@ -22,6 +22,7 @@ __data_arch__ = textwrap.dedent(
     BlenderbotModel,feature-extraction
     BloomModel,feature-extraction
     CLIPModel,zero-shot-image-classification
+    CLIPTextModel,feature-extraction
     CLIPVisionModel,feature-extraction
     CamembertModel,feature-extraction
     CodeGenModel,feature-extraction

onnx_diagnostic/torch_models/hghub/hub_data_cached_configs.py CHANGED Viewed

@@ -4302,3 +4302,31 @@ def _ccached_microsoft_phi_35_mini_instruct():
             "vocab_size": 32064,
         }
     )
+def _ccached_diffusers_tiny_torch_full_checker_unet():
+    "diffusers/tiny-torch-full-checker/unet"
+    return {
+        "_class_name": "UNet2DConditionModel",
+        "_diffusers_version": "0.8.0",
+        "_name_or_path": "https://huggingface.co/diffusers/tiny-torch-full-checker/blob/main/unet/config.json",
+        "act_fn": "silu",
+        "attention_head_dim": 8,
+        "block_out_channels": [32, 64],
+        "center_input_sample": false,
+        "cross_attention_dim": 32,
+        "down_block_types": ["DownBlock2D", "CrossAttnDownBlock2D"],
+        "downsample_padding": 1,
+        "dual_cross_attention": false,
+        "flip_sin_to_cos": true,
+        "freq_shift": 0,
+        "in_channels": 4,
+        "layers_per_block": 2,
+        "mid_block_scale_factor": 1,
+        "norm_eps": 1e-05,
+        "norm_num_groups": 32,
+        "out_channels": 4,
+        "sample_size": 32,
+        "up_block_types": ["CrossAttnUpBlock2D", "UpBlock2D"],
+        "use_linear_projection": false,
+    }

onnx_diagnostic/torch_models/hghub/model_inputs.py CHANGED Viewed

@@ -106,7 +106,7 @@ def get_untrained_model_with_inputs(
         print(f"[get_untrained_model_with_inputs] architectures={archs!r}")
         print(f"[get_untrained_model_with_inputs] cls={config.__class__.__name__!r}")
     if task is None:
-        task = task_from_arch(archs[0], model_id=model_id)
+        task = task_from_arch(archs[0], model_id=model_id, subfolder=subfolder)
     if verbose:
         print(f"[get_untrained_model_with_inputs] task={task!r}")
@@ -145,12 +145,19 @@ def get_untrained_model_with_inputs(
                 f"{config._attn_implementation!r}"  # type: ignore[union-attr]
             )
+    if type(config) is dict and "_diffusers_version" in config:
+        import diffusers
+        package_source = diffusers
+    else:
+        package_source = transformers
     if use_pretrained:
         model = transformers.AutoModel.from_pretrained(model_id, **mkwargs)
     else:
         if archs is not None:
             try:
-                model = getattr(transformers, archs[0])(config)
+                cls_model = getattr(package_source, archs[0])
             except AttributeError as e:
                 # The code of the models is not in transformers but in the
                 # repository of the model. We need to download it.
@@ -174,10 +181,12 @@ def get_untrained_model_with_inputs(
                             f"[get_untrained_model_with_inputs] from folder "
                             f"{os.path.split(pyfiles[0])[0]!r}"
                         )
-                    cls = transformers.dynamic_module_utils.get_class_from_dynamic_module(
-                        cls_name, pretrained_model_name_or_path=os.path.split(pyfiles[0])[0]
+                    cls_model = (
+                        transformers.dynamic_module_utils.get_class_from_dynamic_module(
+                            cls_name,
+                            pretrained_model_name_or_path=os.path.split(pyfiles[0])[0],
+                        )
                     )
-                    model = cls(config)
                 else:
                     raise AttributeError(
                         f"Unable to find class 'tranformers.{archs[0]}'. "
@@ -191,6 +200,16 @@ def get_untrained_model_with_inputs(
                 f"and use_pretrained=True."
             )
+        try:
+            if type(config) is dict:
+                model = cls_model(**config)
+            else:
+                model = cls_model(config)
+        except RuntimeError as e:
+            raise RuntimeError(
+                f"Unable to instantiate class {cls_model.__name__} with\n{config}"
+            ) from e
     # input kwargs
     kwargs, fct = random_input_kwargs(config, task)
     if verbose:

onnx-diagnostic 0.7.1__py3-none-any.whl → 0.7.3__py3-none-any.whl

onnx-diagnostic 0.7.1py3-none-any.whl → 0.7.3py3-none-any.whl