PyPI - onnx-diagnostic - Versions diffs - 0.6.3__py3-none-any.whl → 0.7.1__py3-none-any.whl - Mend

onnx-diagnostic 0.6.3py3-none-any.whl → 0.7.1py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (44) hide show

onnx_diagnostic/__init__.py +1 -1
onnx_diagnostic/_command_lines_parser.py +281 -80
onnx_diagnostic/doc.py +22 -0
onnx_diagnostic/export/dynamic_shapes.py +48 -20
onnx_diagnostic/export/shape_helper.py +126 -0
onnx_diagnostic/ext_test_case.py +1 -1
onnx_diagnostic/helpers/cache_helper.py +78 -8
onnx_diagnostic/helpers/config_helper.py +8 -4
onnx_diagnostic/helpers/helper.py +30 -3
onnx_diagnostic/helpers/log_helper.py +1744 -0
onnx_diagnostic/helpers/mini_onnx_builder.py +4 -1
onnx_diagnostic/helpers/model_builder_helper.py +54 -73
onnx_diagnostic/helpers/torch_helper.py +18 -2
onnx_diagnostic/reference/__init__.py +1 -0
onnx_diagnostic/reference/ort_evaluator.py +29 -4
onnx_diagnostic/reference/report_results_comparison.py +95 -0
onnx_diagnostic/reference/torch_evaluator.py +21 -0
onnx_diagnostic/tasks/automatic_speech_recognition.py +3 -0
onnx_diagnostic/tasks/feature_extraction.py +3 -0
onnx_diagnostic/tasks/fill_mask.py +3 -0
onnx_diagnostic/tasks/image_classification.py +7 -1
onnx_diagnostic/tasks/image_text_to_text.py +72 -18
onnx_diagnostic/tasks/mixture_of_expert.py +3 -0
onnx_diagnostic/tasks/object_detection.py +3 -0
onnx_diagnostic/tasks/sentence_similarity.py +3 -0
onnx_diagnostic/tasks/summarization.py +3 -0
onnx_diagnostic/tasks/text2text_generation.py +3 -0
onnx_diagnostic/tasks/text_classification.py +3 -0
onnx_diagnostic/tasks/text_generation.py +90 -43
onnx_diagnostic/tasks/zero_shot_image_classification.py +3 -0
onnx_diagnostic/torch_export_patches/onnx_export_errors.py +78 -25
onnx_diagnostic/torch_export_patches/onnx_export_serialization.py +37 -0
onnx_diagnostic/torch_export_patches/patches/patch_transformers.py +365 -17
onnx_diagnostic/torch_models/hghub/hub_api.py +81 -8
onnx_diagnostic/torch_models/hghub/hub_data.py +6 -2
onnx_diagnostic/torch_models/hghub/hub_data_cached_configs.py +209 -0
onnx_diagnostic/torch_models/hghub/model_inputs.py +58 -14
onnx_diagnostic/torch_models/untrained/llm_tiny_llm.py +23 -50
onnx_diagnostic/torch_models/{test_helper.py → validate.py} +166 -106
{onnx_diagnostic-0.6.3.dist-info → onnx_diagnostic-0.7.1.dist-info}/METADATA +2 -2
{onnx_diagnostic-0.6.3.dist-info → onnx_diagnostic-0.7.1.dist-info}/RECORD +44 -41
{onnx_diagnostic-0.6.3.dist-info → onnx_diagnostic-0.7.1.dist-info}/WHEEL +0 -0
{onnx_diagnostic-0.6.3.dist-info → onnx_diagnostic-0.7.1.dist-info}/licenses/LICENSE.txt +0 -0
{onnx_diagnostic-0.6.3.dist-info → onnx_diagnostic-0.7.1.dist-info}/top_level.txt +0 -0

onnx_diagnostic/torch_export_patches/patches/patch_transformers.py CHANGED Viewed

@@ -11,7 +11,7 @@ from ...helpers.torch_helper import is_torchdynamo_exporting
 def patched__vmap_for_bhqkv(mask_function: Callable, bh_indices: bool = True) -> Callable:
-    """Patch for function ``transformers.masking_utils._vmap_for_bhqkv``."""
+    """manual patch for function ``transformers.masking_utils._vmap_for_bhqkv``."""
     from ...helpers import string_type
     dimensions: List[Tuple[Optional[int], ...]] = [
@@ -534,19 +534,169 @@ class patched_GenerationMixin:
         return model_inputs
-def patched_dynamic_rope_update(rope_forward):
+def patched__compute_dynamic_ntk_parameters(
+    config: Optional[transformers.PretrainedConfig] = None,
+    device: Optional["torch.device"] = None,
+    seq_len: Optional[int] = None,
+    **rope_kwargs,
+) -> Tuple["torch.Tensor", float]:
+    """
+    manual patch:
+    ``[patch:transformers.modeling_rope_utils._compute_dynamic_ntk_parameters]``
+    Computes the inverse frequencies with NTK scaling.
+    Credits to the Reddit users /u/bloc97 and /u/emozilla
+    Args:
+        config ([`~transformers.PretrainedConfig`]):
+            The model configuration.
+        device (`torch.device`):
+            The device to use for initialization of the inverse frequencies.
+        seq_len (`int`, *optional*):
+            The current sequence length,
+            used to update the dynamic RoPE at inference time.
+        rope_kwargs (`Dict`, *optional*):
+            BC compatibility with the previous
+            RoPE class instantiation, will be removed in v4.45.
+    Returns:
+        Tuple of (`torch.Tensor`, `float`),
+        containing the inverse frequencies for the RoPE embeddings and the
+        post-processing scaling factor applied to the
+        omputed cos/sin (unused in this type of RoPE).
     """
-    patch:transformers.modeling_rope_utils.dynamic_rope_update
+    if config is not None and len(rope_kwargs) > 0:
+        raise ValueError(
+            "Unexpected arguments: `**rope_kwargs` and `config` are mutually exclusive in "
+            f"`_compute_dynamic_ntk_parameters`, got "
+            f"`rope_kwargs`={rope_kwargs} and `config`={config}"
+        )
+    if len(rope_kwargs) > 0:
+        base = rope_kwargs["base"]
+        dim = rope_kwargs["dim"]
+        max_position_embeddings = rope_kwargs["max_position_embeddings"]
+        factor = rope_kwargs["factor"]
+    elif config is not None:
+        base = config.rope_theta
+        partial_rotary_factor = (
+            config.partial_rotary_factor if hasattr(config, "partial_rotary_factor") else 1.0
+        )
+        head_dim = getattr(
+            config, "head_dim", config.hidden_size // config.num_attention_heads
+        )
+        dim = int(head_dim * partial_rotary_factor)
+        max_position_embeddings = config.max_position_embeddings
+        factor = config.rope_scaling["factor"]
+    attention_factor = 1.0  # Unused in this type of RoPE
+    # seq_len: default to max_position_embeddings, e.g. at init time
+    # seq_len = seq_len if seq_len is not None and
+    #       seq_len > max_position_embeddings else max_position_embeddings
+    if seq_len is None:
+        seq_len = max_position_embeddings
+    else:
+        torch._check(isinstance(seq_len, torch.Tensor))
+        seq_len = torch.maximum(
+            seq_len,
+            torch.tensor(max_position_embeddings, dtype=seq_len.dtype, device=seq_len.device),
+        )
+    # Compute the inverse frequencies
+    base = base * ((factor * seq_len / max_position_embeddings) - (factor - 1)) ** (
+        dim / (dim - 2)
+    )
+    inv_freq = 1.0 / (
+        base
+        ** (
+            torch.arange(0, dim, 2, dtype=torch.int64).to(device=device, dtype=torch.float)
+            / dim
+        )
+    )
+    return inv_freq, attention_factor
+def patched_dynamic_rope_update(rope_forward):
+    """manual patch: ``[patch:transformers.modeling_rope_utils.dynamic_rope_update]``
+    ``rope_type`` is determined in the constructor of class
+    :class:`transformers.models.phi3.modeling_phi3.Phi3RotaryEmbedding`.
+    .. code-block:: python
+        if hasattr(config, "rope_scaling") and config.rope_scaling is not None:
+            self.rope_type = config.rope_scaling.get(
+                "rope_type", config.rope_scaling.get("type"))
+        else:
+            self.rope_type = "default"
+    The original code of the patched function:
+    .. code-block:: python
+        def dynamic_rope_update(rope_forward):
+            def longrope_frequency_update(self, position_ids, device):
+                seq_len = torch.max(position_ids) + 1
+                if hasattr(self.config, "original_max_position_embeddings"):
+                    original_max_position_embeddings =
+                        self.config.original_max_position_embeddings
+                else:
+                    original_max_position_embeddings =
+                        self.config.max_position_embeddings
+                if seq_len > original_max_position_embeddings:
+                    if not hasattr(self, "long_inv_freq"):
+                        self.long_inv_freq, _ = self.rope_init_fn(
+                            self.config, device, seq_len=original_max_position_embeddings + 1
+                        )
+                    self.register_buffer("inv_freq", self.long_inv_freq, persistent=False)
+                else:
+                    self.original_inv_freq = self.original_inv_freq.to(device)
+                    self.register_buffer("inv_freq", self.original_inv_freq, persistent=False)
+            def dynamic_frequency_update(self, position_ids, device):
+                seq_len = torch.max(position_ids) + 1
+                if seq_len > self.max_seq_len_cached:  # growth
+                    inv_freq, self.attention_scaling = self.rope_init_fn(
+                        self.config, device, seq_len=seq_len)
+                    self.register_buffer("inv_freq", inv_freq, persistent=False)
+                    self.max_seq_len_cached = seq_len
+                if seq_len < self.original_max_seq_len and
+                        self.max_seq_len_cached > self.original_max_seq_len:
+                    self.original_inv_freq = self.original_inv_freq.to(device)
+                    self.register_buffer("inv_freq", self.original_inv_freq, persistent=False)
+                    self.max_seq_len_cached = self.original_max_seq_len
+            @wraps(rope_forward)
+            def wrapper(self, x, position_ids):
+                if "dynamic" in self.rope_type:
+                    dynamic_frequency_update(self, position_ids, device=x.device)
+                elif self.rope_type == "longrope":
+                    longrope_frequency_update(self, position_ids, device=x.device)
+                return rope_forward(self, x, position_ids)
+            return wrapper
     """
     def longrope_frequency_update(self, position_ids, device):
+        # It is no use to patch the function after the model is created
+        # as rope_init_fn is an attribute set to one function when the model
+        # is created and when no patch is applied yet.
+        # So we select the patched version here.
+        rope_init_fn = (
+            patched__compute_dynamic_ntk_parameters
+            if self.rope_init_fn
+            is transformers.modeling_rope_utils._compute_dynamic_ntk_parameters
+            else self.rope_init_fn
+        )
         seq_len = torch.max(position_ids) + 1
         if hasattr(self.config, "original_max_position_embeddings"):
             original_max_position_embeddings = self.config.original_max_position_embeddings
         else:
             original_max_position_embeddings = self.config.max_position_embeddings
         # At export time, seq_len is unknown.
-        long_inv_freq, _ = self.rope_init_fn(
+        long_inv_freq, _ = rope_init_fn(
             self.config, device, seq_len=original_max_position_embeddings + 1
         )
         original_inv_freq = self.original_inv_freq.to(device)
@@ -565,21 +715,70 @@ def patched_dynamic_rope_update(rope_forward):
         #    self.inv_freq = self.original_inv_freq
     def dynamic_frequency_update(self, position_ids, device):
+        # constructor:
+        # - self.max_seq_len_cached = config.max_position_embeddings
+        # - self.original_max_seq_len = config.max_position_embeddings
+        # - inv_freq, self.attention_scaling = self.rope_init_fn(self.config, device)
+        # It is no use to patch the function after the model is created
+        # as rope_init_fn is an attribute set to one function when the model
+        # is created and when no patch is applied yet.
+        # So we select the patched version here.
+        rope_init_fn = (
+            patched__compute_dynamic_ntk_parameters
+            if self.rope_init_fn
+            is transformers.modeling_rope_utils._compute_dynamic_ntk_parameters
+            else self.rope_init_fn
+        )
+        # This behaviour is difficult to translate.
+        # The sequence always grows.
+        # The test should always True.
+        # So:  self.max_seq_len_cached = max(self.max_seq_len_cached, seq_len) --> seq_len
+        #
+        # if seq_len > self.max_seq_len_cached:  # growth
+        #    inv_freq, self.attention_scaling = self.rope_init_fn(
+        #        self.config, device, seq_len=seq_len
+        #    )
+        #    self.register_buffer("inv_freq", inv_freq, persistent=False)
+        #    self.max_seq_len_cached = seq_len
+        #
+        # So we should not need what follows.
+        #
+        # cond = (seq_len > self.max_seq_len_cached).item()
+        # self.attention_scaling = torch.cond(
+        #    cond,
+        #    (lambda x, y: x.clone()),
+        #    (lambda x, y: y.clone()),
+        #    [attention_scaling, self.attention_scaling],
+        # )
         seq_len = torch.max(position_ids) + 1
-        if seq_len > self.max_seq_len_cached:  # growth
-            inv_freq, self.attention_scaling = self.rope_init_fn(
-                self.config, device, seq_len=seq_len
-            )
-            self.register_buffer("inv_freq", inv_freq, persistent=False)
-            self.max_seq_len_cached = seq_len
+        long_inv_freq, self.attention_scaling = rope_init_fn(
+            self.config, device, seq_len=seq_len
+        )
-        if (
-            seq_len < self.original_max_seq_len
-            and self.max_seq_len_cached > self.original_max_seq_len
-        ):
-            self.original_inv_freq = self.original_inv_freq.to(device)
-            self.register_buffer("inv_freq", self.original_inv_freq, persistent=False)
-            self.max_seq_len_cached = self.original_max_seq_len
+        # Second test to translate.
+        # Let's keep in mind, self.max_seq_len_cached = seq_len is likely to be True.
+        # But in that case the following condition is a way to restore the original cache.
+        # if (
+        #    seq_len < self.original_max_seq_len
+        #    and self.max_seq_len_cached > self.original_max_seq_len
+        # ):
+        #    self.original_inv_freq = self.original_inv_freq.to(device)
+        #    self.register_buffer("inv_freq", self.original_inv_freq, persistent=False)
+        #    self.max_seq_len_cached = self.original_max_seq_len
+        original_inv_freq = self.original_inv_freq.to(device)
+        cond = (seq_len >= self.original_max_seq_len).item()
+        inv_freq = torch.cond(
+            cond,
+            (lambda x, y: x.clone()),
+            (lambda x, y: y.clone()),
+            [long_inv_freq, original_inv_freq],
+        )
+        self.inv_freq = inv_freq
     @wraps(rope_forward)
     def wrapper(self, x, position_ids):
@@ -619,3 +818,152 @@ class patched_Phi3RotaryEmbedding(torch.nn.Module):
             sin = emb.sin() * self.attention_scaling
         return cos.to(dtype=x.dtype), sin.to(dtype=x.dtype)
+class patched_IdeficsEmbedding(torch.nn.Module):
+    _PATCHES_ = ["forward"]
+    _PATCHED_CLASS_ = transformers.models.idefics.modeling_idefics.IdeficsEmbedding
+    def forward(self, x, seq_len=None):
+        # x: [bs, num_attention_heads, seq_len, head_size]
+        # if seq_len > self.max_seq_len_cached:
+        #    self._set_cos_sin_cache(seq_len=seq_len, device=x.device, dtype=x.dtype)
+        def _set_cos_sin_cache_then(x, inv_freq, seq_len, _cos_cached, _sin_cached):
+            t = torch.arange(seq_len, device=x.device, dtype=torch.int64).type_as(inv_freq)
+            freqs = torch.einsum("i,j->ij", t, inv_freq)
+            emb = torch.cat((freqs, freqs), dim=-1)
+            return emb.cos().to(x.dtype), emb.sin().to(x.dtype)
+        def _set_cos_sin_cache_else(_x, _inv_freq, _seq_len, cos_cached, sin_cached):
+            torch._check(seq_len.item() <= cos_cached.shape[0])
+            co = cos_cached[: seq_len.item()].detach().clone()
+            torch._check(seq_len.item() <= sin_cached.shape[0])
+            si = sin_cached[: seq_len.item()].detach().clone()
+            return co.to(dtype=x.dtype), si.to(dtype=x.dtype)
+        cos_cached, sin_cached = torch.cond(
+            (seq_len > self.max_seq_len_cached).item(),
+            _set_cos_sin_cache_then,
+            _set_cos_sin_cache_else,
+            [x, self.inv_freq, seq_len, self.cos_cached, self.sin_cached],
+        )
+        return cos_cached, sin_cached
+class patched_IdeficsAttention(torch.nn.Module):
+    _PATCHES_ = ["forward"]
+    _PATCHED_CLASS_ = transformers.models.idefics.modeling_idefics.IdeficsAttention
+    def forward(
+        self,
+        hidden_states: torch.Tensor,
+        key_value_states: Optional[torch.Tensor] = None,
+        attention_mask: Optional[torch.Tensor] = None,
+        position_ids: Optional[torch.LongTensor] = None,
+        past_key_value: Optional[Tuple[torch.Tensor]] = None,
+        output_attentions: bool = False,
+        use_cache: bool = False,
+        cache_position: Optional[torch.LongTensor] = None,
+        **kwargs,
+    ) -> Tuple[torch.Tensor, Optional[torch.Tensor], Optional[Tuple[torch.Tensor]]]:
+        # if key_value_states are provided this layer is used as a cross-attention layer
+        is_cross_attention = self.is_cross_attention or key_value_states is not None
+        bsz, q_len, _ = hidden_states.size()
+        query_states = (
+            self.q_proj(hidden_states)
+            .view(bsz, q_len, self.num_heads, self.head_dim)
+            .transpose(1, 2)
+        )
+        if not is_cross_attention:
+            key_states = (
+                self.k_proj(hidden_states)
+                .view(bsz, q_len, self.num_heads, self.head_dim)
+                .transpose(1, 2)
+            )
+            value_states = (
+                self.v_proj(hidden_states)
+                .view(bsz, q_len, self.num_heads, self.head_dim)
+                .transpose(1, 2)
+            )
+        else:
+            _, kv_len, _ = (
+                key_value_states.size()
+            )  # Note that, in this case, `kv_len` == `kv_seq_len`
+            key_states = (
+                self.k_proj(key_value_states)
+                .view(bsz, kv_len, self.num_heads, self.head_dim)
+                .transpose(1, 2)
+            )
+            value_states = (
+                self.v_proj(key_value_states)
+                .view(bsz, kv_len, self.num_heads, self.head_dim)
+                .transpose(1, 2)
+            )
+        kv_seq_len = key_states.shape[-2]
+        if past_key_value is not None:
+            kv_seq_len += cache_position[0]
+        if not is_cross_attention:
+            rotary_length = torch.maximum(
+                torch.tensor(kv_seq_len, dtype=torch.int64),
+                torch.tensor(q_len, dtype=torch.int64),
+            )
+            cos, sin = self.rotary_emb(value_states, seq_len=rotary_length)
+            query_states, key_states = (
+                transformers.models.idefics.modeling_idefics.apply_rotary_pos_emb(
+                    query_states, key_states, cos, sin, position_ids
+                )
+            )
+        # [bsz, nh, t, hd]
+        if past_key_value is not None:
+            # sin and cos are specific to RoPE models;
+            # cache_position needed for the static cache
+            cache_kwargs = {"cache_position": cache_position}
+            key_states, value_states = past_key_value.update(
+                key_states, value_states, self.layer_idx, cache_kwargs
+            )
+        if self.qk_layer_norms:
+            query_states = self.q_layer_norm(query_states)
+            key_states = self.k_layer_norm(key_states)
+        attention_interface: Callable = (
+            transformers.models.idefics.modeling_idefics.eager_attention_forward
+        )
+        if self.config._attn_implementation != "eager":
+            if self.config._attn_implementation == "sdpa" and output_attentions:
+                transformers.models.idefics.modeling_idefics.logger.warning_once(
+                    "`torch.nn.functional.scaled_dot_product_attention` does not support "
+                    "`output_attentions=True`. Falling back to "
+                    "eager attention. This warning can be removed using the argument "
+                    '`attn_implementation="eager"` when loading the model.'
+                )
+            else:
+                attention_interface = transformers.modeling_utils.ALL_ATTENTION_FUNCTIONS[
+                    self.config._attn_implementation
+                ]
+        attn_output, attn_weights = attention_interface(
+            self,
+            query_states,
+            key_states,
+            value_states,
+            attention_mask,
+            dropout=0.0 if not self.training else self.dropout,
+            scaling=self.scaling,
+            **kwargs,
+        )
+        attn_output = attn_output.reshape(bsz, q_len, -1).contiguous()
+        attn_output = self.o_proj(attn_output)
+        if output_attentions:
+            attn_weights = None
+        return attn_output, attn_weights, past_key_value

onnx_diagnostic/torch_models/hghub/hub_api.py CHANGED Viewed

@@ -2,9 +2,11 @@ import copy
 import functools
 import json
 import os
+import pprint
+import sys
 from typing import Any, Dict, List, Optional, Union
 import transformers
-from huggingface_hub import HfApi, model_info, hf_hub_download
+from huggingface_hub import HfApi, model_info, hf_hub_download, list_repo_files
 from ...helpers.config_helper import update_config
 from . import hub_data_cached_configs
 from .hub_data import __date__, __data_tasks__, load_architecture_task, __data_arch_values__
@@ -33,10 +35,14 @@ def _retrieve_cached_configurations() -> Dict[str, transformers.PretrainedConfig
     return res
-def get_cached_configuration(name: str, **kwargs) -> Optional[transformers.PretrainedConfig]:
+def get_cached_configuration(
+    name: str, exc: bool = False, **kwargs
+) -> Optional[transformers.PretrainedConfig]:
     """
     Returns cached configuration to avoid having to many accesses to internet.
     It returns None if not Cache. The list of cached models follows.
+    If *exc* is True or if environment variable ``NOHTTP`` is defined,
+    the function raises an exception if *name* is not found.
     .. runpython::
@@ -54,8 +60,11 @@ def get_cached_configuration(name: str, **kwargs) -> Optional[transformers.Pretr
             conf = copy.deepcopy(conf)
             update_config(conf, kwargs)
         return conf
-    if os.environ.get("NOHTTP", ""):
-        raise AssertionError(f"Unable to find {name!r} in {sorted(cached)}")
+    assert not exc and not os.environ.get("NOHTTP", ""), (
+        f"Unable to find {name!r} (exc={exc}, "
+        f"NOHTTP={os.environ.get('NOHTTP', '')!r}) "
+        f"in {pprint.pformat(sorted(cached))}"
+    )
     return None
@@ -64,6 +73,7 @@ def get_pretrained_config(
     trust_remote_code: bool = True,
     use_preinstalled: bool = True,
     subfolder: Optional[str] = None,
+    use_only_preinstalled: bool = False,
     **kwargs,
 ) -> Any:
     """
@@ -77,13 +87,20 @@ def get_pretrained_config(
         :func:`get_cached_configuration`, the cached list is mostly for
         unit tests
     :param subfolder: subfolder for the given model id
+    :param use_only_preinstalled: if True, raises an exception if not preinstalled
     :param kwargs: additional kwargs
     :return: a configuration
     """
     if use_preinstalled:
-        conf = get_cached_configuration(model_id, subfolder=subfolder, **kwargs)
+        conf = get_cached_configuration(
+            model_id, exc=use_only_preinstalled, subfolder=subfolder, **kwargs
+        )
         if conf is not None:
             return conf
+    assert not use_only_preinstalled, (
+        f"Inconsistencies: use_only_preinstalled={use_only_preinstalled}, "
+        f"use_preinstalled={use_preinstalled!r}"
+    )
     if subfolder:
         try:
             return transformers.AutoConfig.from_pretrained(
@@ -122,12 +139,15 @@ def _guess_task_from_config(config: Any) -> Optional[str]:
 @functools.cache
-def task_from_arch(arch: str, default_value: Optional[str] = None) -> str:
+def task_from_arch(
+    arch: str, default_value: Optional[str] = None, model_id: Optional[str] = None
+) -> str:
     """
     This function relies on stored information. That information needs to be refresh.
     :param arch: architecture name
     :param default_value: default value in case the task cannot be determined
+    :param model_id: unused unless the architecture does not help.
     :return: task
     .. runpython::
@@ -140,9 +160,16 @@ def task_from_arch(arch: str, default_value: Optional[str] = None) -> str:
     <onnx_diagnostic.torch_models.hghub.hub_data.load_architecture_task>`.
     """
     data = load_architecture_task()
+    if arch not in data and model_id:
+        # Let's try with the model id.
+        return task_from_id(model_id)
     if default_value is not None:
         return data.get(arch, default_value)
-    assert arch in data, f"Architecture {arch!r} is unknown, last refresh in {__date__}"
+    assert arch in data, (
+        f"Architecture {arch!r} is unknown, last refresh in {__date__}. "
+        f"``onnx_diagnostic.torch_models.hghub.hub_data.__data_arch__`` "
+        f"needs to be updated (model_id={(model_id or '?')!r})."
+    )
     return data[arch]
@@ -160,6 +187,7 @@ def task_from_id(
         if the task cannot be determined
     :param pretrained: uses the config
     :param fall_back_to_pretrained: falls back to pretrained config
+    :param exc: raises an exception if True
     :return: task
     """
     if not pretrained:
@@ -175,9 +203,14 @@ def task_from_id(
         guess = _guess_task_from_config(config)
         if guess is not None:
             return guess
+        data = load_architecture_task()
+        if model_id in data:
+            return data[model_id]
         assert config.architectures is not None and len(config.architectures) == 1, (
             f"Cannot return the task of {model_id!r}, pipeline_tag is not setup, "
-            f"architectures={config.architectures} in config={config}"
+            f"architectures={config.architectures} in config={config}. "
+            f"The task can be added in "
+            f"``onnx_diagnostic.torch_models.hghub.hub_data.__data_arch__``."
         )
         return task_from_arch(config.architectures[0], default_value=default_value)
@@ -295,3 +328,43 @@ def enumerate_model_list(
             n -= 1
             if n == 0:
                 break
+def download_code_modelid(
+    model_id: str, verbose: int = 0, add_path_to_sys_path: bool = True
+) -> List[str]:
+    """
+    Downloads the code for a given model id.
+    :param model_id: model id
+    :param verbose: verbosity
+    :param add_path_to_sys_path: add folder where the files are downloaded to sys.path
+    :return: list of downloaded files
+    """
+    if verbose:
+        print(f"[download_code_modelid] retrieve file list for {model_id!r}")
+    files = list_repo_files(model_id)
+    pyfiles = [name for name in files if os.path.splitext(name)[-1] == ".py"]
+    if verbose:
+        print(f"[download_code_modelid] python files {pyfiles}")
+    absfiles = []
+    paths = set()
+    for i, name in enumerate(pyfiles):
+        if verbose:
+            print(f"[download_code_modelid] download file {i+1}/{len(pyfiles)}: {name!r}")
+        r = hf_hub_download(repo_id=model_id, filename=name)
+        p = os.path.split(r)[0]
+        paths.add(p)
+        absfiles.append(r)
+    if add_path_to_sys_path:
+        for p in paths:
+            init = os.path.join(p, "__init__.py")
+            if not os.path.exists(init):
+                with open(init, "w"):
+                    pass
+            if p in sys.path:
+                continue
+            if verbose:
+                print(f"[download_code_modelid] add {p!r} to 'sys.path'")
+            sys.path.insert(0, p)
+    return absfiles

onnx_diagnostic/torch_models/hghub/hub_data.py CHANGED Viewed

@@ -3,7 +3,7 @@ import functools
 import textwrap
 from typing import Dict, List
-__date__ = "2025-03-26"
+__date__ = "2025-06-21"
 __data_arch_values__ = {"ResNetForImageClassification": dict(image_size=224)}
@@ -52,6 +52,8 @@ __data_arch__ = textwrap.dedent(
     GPTNeoModel,feature-extraction
     GPTNeoXForCausalLM,text-generation
     GemmaForCausalLM,text-generation
+    Gemma2ForCausalLM,text-generation
+    Gemma3ForConditionalGeneration,image-text-to-text
     GraniteForCausalLM,text-generation
     GroupViTModel,feature-extraction
     HieraForImageClassification,image-classification
@@ -97,6 +99,7 @@ __data_arch__ = textwrap.dedent(
     PegasusModel,feature-extraction
     Phi3ForCausalLM,text-generation
     PhiForCausalLM,text-generation
+    PhiMoEForCausalLM,text-generation
     Pix2StructForConditionalGeneration,image-to-text
     PLBartForConditionalGeneration,text2text-generation
     PoolFormerModel,image-feature-extraction
@@ -144,7 +147,8 @@ __data_arch__ = textwrap.dedent(
     XLMRobertaModel,sentence-similarity
     Wav2Vec2ForCTC,automatic-speech-recognition
     YolosForObjectDetection,object-detection
-    YolosModel,image-feature-extraction"""
+    YolosModel,image-feature-extraction
+    emilyalsentzer/Bio_ClinicalBERT,fill-mask"""
 )
 __data_tasks__ = [

onnx-diagnostic 0.6.3__py3-none-any.whl → 0.7.1__py3-none-any.whl

onnx-diagnostic 0.6.3py3-none-any.whl → 0.7.1py3-none-any.whl