PyPI - onnx-diagnostic - Versions diffs - 0.7.11__py3-none-any.whl → 0.7.13__py3-none-any.whl - Mend

onnx-diagnostic 0.7.11py3-none-any.whl → 0.7.13py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (30) hide show

onnx_diagnostic/torch_export_patches/patches/patch_torch.py CHANGED Viewed

@@ -1,7 +1,7 @@
 import inspect
 import os
 import traceback
-from typing import Any, Callable, Dict, List, Sequence, Tuple, Union
+from typing import Any, Callable, Dict, List, Optional, Sequence, Tuple, Union
 import torch
 from torch._subclasses.fake_tensor import FakeTensorMode
@@ -65,6 +65,8 @@ def patch__check_input_constraints_for_graph(
     verbose: int = 0,
 ) -> None:
     try:
+        # PATCHED: catches exception and prints out the information instead of
+        # stopping the conversion.
         return previous_function(input_placeholders, flat_args_with_path, range_constraints)
     except Exception as e:
         if not int(os.environ.get("SKIP_SOLVE_CONSTRAINTS", "1")):
@@ -122,8 +124,7 @@ def patched_infer_size(a, b):
         if b1 or b2 or b3:
             expandedSizes[i] = sizeB if guard_size_oblivious(sizeA == 1) else sizeA
         else:
-            # In this case, the current implementation of torch fails (17/12/2024).
-            # Try model SmolLM.
+            # PATCHED: generic case, the dimension is known, no need to assert
             expandedSizes[i] = torch.sym_max(sizeA, sizeB)
     return tuple(expandedSizes)
@@ -132,7 +133,11 @@ def patched__broadcast_shapes(*_shapes):
     """Patches ``torch._refs._broadcast_shapes``."""
     from functools import reduce
     from torch._prims_common import IntLike
-    from torch.fx.experimental.symbolic_shapes import guard_size_oblivious
+    from torch.fx.experimental.symbolic_shapes import (
+        guard_size_oblivious,
+        guard_or_false,
+        is_nested_int,
+    )
     shapes = tuple(
         (x,) if isinstance(x, IntLike) else x for x in filter(lambda x: x is not None, _shapes)
@@ -142,17 +147,30 @@ def patched__broadcast_shapes(*_shapes):
     if len(shapes) == 0:
         return None
-    # Type checking
-    # TODO: make common validations available as utils
     for shape in shapes:
-        assert isinstance(shape, Sequence)
+        if not isinstance(shape, Sequence):
+            raise RuntimeError(
+                "Input shapes should be of type ints, a tuple of ints, "
+                "or a list of ints, got ",
+                shape,
+            )
     # Computes common shape
-    common_shape = [  # List[Union[int, torch.SymInt]]
-        1,
-    ] * reduce(max, (len(shape) for shape in shapes))
+    common_shape = [1] * reduce(max, (len(shape) for shape in shapes))
     for _arg_idx, shape in enumerate(shapes):
         for idx in range(-1, -1 - len(shape), -1):
+            if is_nested_int(shape[idx]):
+                # Broadcasting is allowed for (j0, 1) or (j0, j0);
+                # not (j0, j1), (j0, 5), etc.
+                if is_nested_int(common_shape[idx]) and guard_or_false(
+                    shape[idx] == common_shape[idx]
+                ):
+                    continue
+            else:
+                if guard_or_false(shape[idx] == common_shape[idx]):
+                    continue
+            # PATCHED: two cases, if == for sure, no broadcast,
+            # otherwise maybe broadcast with max(dimensions)
             if guard_size_oblivious(common_shape[idx] == 1):
                 if shape[idx] < 0:
                     raise ValueError(
@@ -172,6 +190,7 @@ class patched_ShapeEnv:
     ) -> None:
         if self.frozen:
             self.counter["ignored_backward_guard"] += 1
+            # PATCHED: raised an exception instead of logging.
             raise AssertionError(
                 f"[patched_ShapeEnv] Ignored guard {expr} == {concrete_val}, "
                 f"this could result in accuracy problems"
@@ -338,11 +357,13 @@ class patched_ShapeEnv:
                 },
             )
+            # PATCHED: removed lines
             # if config.print_specializations:
             #    self.log.warning(
             #         "Specializing %s to %s", self.var_to_sources[a][0].name(), tgt
             #     )
             #     self.log.debug("SPECIALIZATION", stack_info=True)
+        # PATCHED: replaces logging by raising an exception
         assert msg != "range_refined_to_singleton", (
             f"patched_ShapeEnv: A dynamic dimension becomes static! "
             f"a={a!r}, tgt={tgt!r}, msg={msg!r}, tgt_bound={tgt_bound}"
@@ -364,6 +385,7 @@ class patched_ShapeEnv:
         self, prefix: str, g: "SympyBoolean", forcing_spec: bool  # noqa: F821
     ) -> None:
         self._log_guard_remember(prefix=prefix, g=g, forcing_spec=forcing_spec)
+        # PATCHED: removed
         # It happens too often to be relevant.
         # sloc, _maybe_extra_debug = self._get_stack_summary(True)
         # warnings.warn(
@@ -464,3 +486,87 @@ def patched_vmap(func, in_dims=0, out_dims=0):
             return results
     return wrapped
+def patched__constrain_user_specified_dimhint_range(
+    symint: torch.SymInt,
+    hint: int,
+    dim: "_DimHint",  # noqa: F821
+    range_constraints,
+    shape_env,
+    keypath: "KeyPath",  # noqa: F821
+    i: Optional[int] = None,
+) -> Optional[str]:
+    """Patches ``torch._export.non_strict_utils._constrain_user_specified_dimhint_range``."""
+    from torch._export.non_strict_utils import is_int, int_oo, _DimHintType, ValueRanges
+    trace_vr = (
+        range_constraints[symint.node.expr]
+        if not is_int(symint)
+        else ValueRanges(int(symint), int(symint))
+    )
+    # warn on 0/1 specialization for Dim.AUTO; not an actual error
+    # PATCHED: remove logging
+    # if dim.type == _DimHintType.AUTO and trace_vr.is_singleton() and hint in (0, 1):
+    #    pathstr = f"inputs{pytree.keystr(keypath)}"
+    #    if i is not None:
+    #        pathstr += f".shape[{i}]"
+    #    msg = (
+    #        f"dimension {pathstr} 0/1 specialized; Dim.AUTO was specified along "
+    #        f"with a sample input with hint = {hint}."
+    #    )
+    #    log.warning(msg)
+    try:
+        user_vr = ValueRanges(
+            lower=0 if dim.min is None else dim.min,
+            upper=int_oo if dim.max is None else dim.max,
+        )
+        if is_int(symint):
+            out_vr = trace_vr & user_vr
+        else:
+            range_constraints[symint.node.expr] &= user_vr
+            shape_env.var_to_range[symint.node._expr] &= user_vr
+            out_vr = range_constraints[symint.node.expr]
+        # check for Dim.DYNAMIC specializations; special case error message on 0/1
+        if dim.type == _DimHintType.DYNAMIC and out_vr.is_singleton():
+            path = f"inputs{torch.utils._pytree.keystr(keypath)}"
+            if i is not None:
+                path += f".shape[{i}]"
+            if (
+                trace_vr.is_singleton()
+                and hint in (0, 1)
+                # PATCHED: line removed
+                # and not torch.fx.experimental._config.backed_size_oblivious
+            ):
+                return None
+                # PATCHED: line removed
+                # msg = (
+                #     f"- Received user-specified dim hint "
+                #     f"Dim.DYNAMIC(min={dim.min}, max={dim.max}), "
+                #     f"but export 0/1 specialized due to hint of "
+                #     f"{hint} for dimension {path}."
+                # )
+            else:
+                msg = (
+                    f"- Received user-specified dim hint "
+                    f"Dim.DYNAMIC(min={dim.min}, max={dim.max}), "
+                    f"but tracing inferred a static shape of "
+                    f"{out_vr.lower} for dimension {path}."
+                )
+            return msg
+    except torch.utils._sympy.value_ranges.ValueRangeError:
+        path = f"inputs{torch.utils._pytree.keystr(keypath)}"
+        if i is not None:
+            path += f".shape[{i}]"
+        msg = (
+            f"- Received user-specified min/max range of [{dim.min}, {dim.max}], "
+            f"conflicting with the inferred min/max range of "
+            f"[{trace_vr.lower}, {trace_vr.upper}], "
+            f"for {path}."
+        )
+        return msg
+    return None

onnx_diagnostic/torch_export_patches/patches/patch_transformers.py CHANGED Viewed

@@ -1,13 +1,20 @@
 import inspect
 import math
+import os
 from dataclasses import dataclass
 from functools import wraps
-from typing import Callable, List, Optional, Tuple
+from typing import Callable, List, Optional, Tuple, Union
 import packaging.version as pv
 import torch
 import transformers
 from transformers.modeling_attn_mask_utils import AttentionMaskConverter
 from transformers.cache_utils import StaticCache, Cache
+from transformers.generation.utils import (
+    GenerateNonBeamOutput,
+    GenerationConfig,
+    StoppingCriteriaList,
+    LogitsProcessorList,
+)
 try:
     from transformers.cache_utils import parse_processor_args  # noqa: F401
@@ -114,6 +121,7 @@ if patch_masking_utils:
         """manual patch for function ``transformers.masking_utils.eager_mask``."""
         # The masks for eager attention are simply boolean mask from sdpa, casted to 0 and -inf
         _ = kwargs.pop("allow_is_causal_skip", None)
+        # PATCHED: this line called the patched version of sdpa_mask
         mask = patched_sdpa_mask_recent_torch(
             batch_size=batch_size,
             cache_position=cache_position,
@@ -126,7 +134,7 @@ if patch_masking_utils:
             **kwargs,
         )
         min_dtype = torch.finfo(dtype).min
-        # The patched line.
+        # PATCHED: the following line
         # we need 0s where the tokens should be taken into account,
         # and -inf otherwise (mask is already of boolean type)
         # mask =
@@ -158,6 +166,7 @@ if patch_masking_utils:
             mask_function = and_masks(mask_function, padding_mask_function(padding_mask))
         batch_arange = torch.arange(batch_size, device=cache_position.device)
         head_arange = torch.arange(1, device=cache_position.device)
+        # PATCHED: this line calls the patched version of vmap_for_bhqkv
         causal_mask = patched__vmap_for_bhqkv(mask_function)(
             batch_arange, head_arange, cache_position, kv_arange
         )
@@ -214,6 +223,7 @@ if patch_DynamicLayer:
             self.dtype, self.device = key_states.dtype, key_states.device
             new_shape = list(key_states.shape)
             new_shape[-2] = 0
+            # PATCHED: used a tensor with an empty shape and not en empty list to initialize
             self.keys = torch.empty(new_shape, dtype=self.dtype, device=self.device)
             self.values = torch.empty(new_shape, dtype=self.dtype, device=self.device)
             if patch_is_initialized:
@@ -248,6 +258,8 @@ def _patch_make_causal_mask(
         diagonal = past_key_values_length - sliding_window - 1
         context_mask = torch.tril(torch.ones_like(mask, dtype=torch.bool), diagonal=diagonal)
+        # PATCHED: removed if is_torchdynamo_compiling(): mask = mask.clone()
+        # and used masked_fill instead of masked_fill_
         # In this case, the current implementation of torch fails (17/12/2024).
         # Try model Phi-3.5-Mini-Instruct.
         mask = mask.masked_fill(context_mask, torch.finfo(dtype).min)
@@ -455,7 +467,16 @@ class patched_GenerationMixin:
     _PATCHES_ = [
         "_cache_dependant_input_preparation",
         "_cache_dependant_input_preparation_exporting",
-        "prepare_inputs_for_generation",
+        (
+            None
+            if pv.Version(transformers.__version__) >= pv.Version("4.56")
+            else "prepare_inputs_for_generation"
+        ),
+        (
+            "_sample"
+            if pv.Version(transformers.__version__) == pv.Version("4.57.0.dev0")
+            else None
+        ),
     ]
     _PATCHED_CLASS_ = transformers.generation.utils.GenerationMixin
@@ -588,7 +609,7 @@ class patched_GenerationMixin:
         model_inputs = {}
         # - some models don't have `Cache` support
         # (which implies they don't expect `cache_position` in `forward`)
-        if self._supports_cache_class:
+        if getattr(self, "_supports_cache_class", False):
             model_inputs["cache_position"] = cache_position
         # - `cache_position` was not a mandatory input in
         # `prepare_inputs_for_generation` for those models, and this
@@ -728,6 +749,192 @@ class patched_GenerationMixin:
         model_inputs.pop("labels", None)
         return model_inputs
+    def _sample(
+        self,
+        input_ids: torch.LongTensor,
+        logits_processor: "LogitsProcessorList",  # noqa: F821
+        stopping_criteria: "StoppingCriteriaList",  # noqa: F821
+        generation_config: "GenerationConfig",  # noqa: F821
+        synced_gpus: bool = False,
+        streamer: Optional["BaseStreamer"] = None,  # noqa: F821
+        **model_kwargs,
+    ) -> Union["GenerateNonBeamOutput", torch.LongTensor]:  # noqa: F821
+        """
+        2025/09/29: updates for Gemma3 models, fix for eager mode as well as the export.
+        """
+        # init values
+        pad_token_id = generation_config._pad_token_tensor
+        output_attentions = generation_config.output_attentions
+        output_hidden_states = generation_config.output_hidden_states
+        output_scores = generation_config.output_scores
+        output_logits = generation_config.output_logits
+        return_dict_in_generate = generation_config.return_dict_in_generate
+        has_eos_stopping_criteria = any(
+            hasattr(criteria, "eos_token_id") for criteria in stopping_criteria
+        )
+        do_sample = generation_config.do_sample
+        # init attention / hidden states / scores tuples
+        scores = () if (return_dict_in_generate and output_scores) else None
+        raw_logits = () if (return_dict_in_generate and output_logits) else None
+        decoder_attentions = () if (return_dict_in_generate and output_attentions) else None
+        cross_attentions = () if (return_dict_in_generate and output_attentions) else None
+        decoder_hidden_states = (
+            () if (return_dict_in_generate and output_hidden_states) else None
+        )
+        # if model is an encoder-decoder, retrieve encoder attention weights and hidden states
+        if return_dict_in_generate and self.config.is_encoder_decoder:
+            encoder_attentions = (
+                model_kwargs["encoder_outputs"].get("attentions")
+                if output_attentions
+                else None
+            )
+            encoder_hidden_states = (
+                model_kwargs["encoder_outputs"].get("hidden_states")
+                if output_hidden_states
+                else None
+            )
+        # keep track of which sequences are already finished
+        batch_size, cur_len = input_ids.shape[:2]
+        this_peer_finished = False
+        unfinished_sequences = torch.ones(
+            batch_size, dtype=torch.long, device=input_ids.device
+        )
+        model_kwargs = self._get_initial_cache_position(
+            cur_len, input_ids.device, model_kwargs
+        )
+        model_forward = self.__call__
+        compile_forward = self._valid_auto_compile_criteria(model_kwargs, generation_config)
+        if compile_forward:
+            os.environ["TOKENIZERS_PARALLELISM"] = "0"
+            # If we use FA2 and a static cache, we cannot compile with fullgraph
+            if self.config._attn_implementation == "flash_attention_2":
+                # only raise warning if the user passed an explicit compile-config
+                if (
+                    generation_config.compile_config is not None
+                    and generation_config.compile_config.fullgraph
+                ):
+                    generation_config.compile_config.fullgraph = False
+            model_forward = self.get_compiled_call(generation_config.compile_config)
+        if generation_config.prefill_chunk_size is not None:
+            model_kwargs = self._prefill_chunking(input_ids, generation_config, **model_kwargs)
+            is_prefill = False
+        else:
+            is_prefill = True
+        while self._has_unfinished_sequences(
+            this_peer_finished, synced_gpus, device=input_ids.device
+        ):
+            # prepare model inputs
+            model_inputs = self.prepare_inputs_for_generation(input_ids, **model_kwargs)
+            if is_prefill:
+                outputs = self(**model_inputs, return_dict=True)
+                is_prefill = False
+            else:
+                outputs = model_forward(**model_inputs, return_dict=True)
+            model_kwargs = self._update_model_kwargs_for_generation(
+                outputs,
+                model_kwargs,
+                is_encoder_decoder=self.config.is_encoder_decoder,
+            )
+            if synced_gpus and this_peer_finished:
+                continue
+            next_token_logits = outputs.logits[:, -1, :].to(
+                copy=True, dtype=torch.float32, device=input_ids.device
+            )
+            # pre-process distribution
+            next_token_scores = logits_processor(input_ids, next_token_logits)
+            # Store scores, attentions and hidden_states when required
+            if return_dict_in_generate:
+                if output_scores:
+                    scores += (next_token_scores,)
+                if output_logits:
+                    raw_logits += (next_token_logits,)
+                if output_attentions:
+                    decoder_attentions += (
+                        (outputs.decoder_attentions,)
+                        if self.config.is_encoder_decoder
+                        else (outputs.attentions,)
+                    )
+                    if self.config.is_encoder_decoder:
+                        cross_attentions += (outputs.cross_attentions,)
+                if output_hidden_states:
+                    decoder_hidden_states += (
+                        (outputs.decoder_hidden_states,)
+                        if self.config.is_encoder_decoder
+                        else (outputs.hidden_states,)
+                    )
+            # token selection
+            if do_sample:
+                probs = torch.nn.functional.softmax(next_token_scores, dim=-1)
+                next_tokens = torch.multinomial(probs, num_samples=1).squeeze(1)
+            else:
+                next_tokens = torch.argmax(next_token_scores, dim=-1)
+            # finished sentences should have their next token be a padding token
+            if has_eos_stopping_criteria:
+                next_tokens = next_tokens * unfinished_sequences + pad_token_id * (
+                    1 - unfinished_sequences
+                )
+            # update generated ids, model inputs, and length for next step
+            # PATCHED: the two following lines, next_tokens can 2D already for this model
+            next_tokens_2d = (
+                next_tokens if len(next_tokens.shape) == 2 else next_tokens[:, None]
+            )
+            input_ids = torch.cat([input_ids, next_tokens_2d], dim=-1)
+            if streamer is not None:
+                streamer.put(next_tokens.cpu())
+            unfinished_sequences = unfinished_sequences & ~stopping_criteria(input_ids, scores)
+            this_peer_finished = unfinished_sequences.max() == 0
+            cur_len += 1
+            # This is needed to properly delete outputs.logits which may be very large
+            # for first iteration
+            # Otherwise a reference to outputs is kept which keeps
+            # the logits alive in the next iteration
+            del outputs
+        if streamer is not None:
+            streamer.end()
+        if return_dict_in_generate:
+            if self.config.is_encoder_decoder:
+                return transformers.generation.utils.GenerateEncoderDecoderOutput(
+                    sequences=input_ids,
+                    scores=scores,
+                    logits=raw_logits,
+                    encoder_attentions=encoder_attentions,
+                    encoder_hidden_states=encoder_hidden_states,
+                    decoder_attentions=decoder_attentions,
+                    cross_attentions=cross_attentions,
+                    decoder_hidden_states=decoder_hidden_states,
+                    past_key_values=model_kwargs.get("past_key_values"),
+                )
+            else:
+                return transformers.generation.utils.GenerateDecoderOnlyOutput(
+                    sequences=input_ids,
+                    scores=scores,
+                    logits=raw_logits,
+                    attentions=decoder_attentions,
+                    hidden_states=decoder_hidden_states,
+                    past_key_values=model_kwargs.get("past_key_values"),
+                )
+        else:
+            return input_ids
 def patched__compute_dynamic_ntk_parameters(
     config: Optional[transformers.PretrainedConfig] = None,
@@ -791,6 +998,7 @@ def patched__compute_dynamic_ntk_parameters(
     if seq_len is None:
         seq_len = max_position_embeddings
     else:
+        # PATCHED: remove the line using max
         torch._check(isinstance(seq_len, torch.Tensor))
         seq_len = torch.maximum(
             seq_len,
@@ -896,6 +1104,7 @@ def patched_dynamic_rope_update(rope_forward):
         )
         original_inv_freq = self.original_inv_freq.to(device)
+        # PATCHED: uses torch.cond instead of a test
         cond = (seq_len > original_max_position_embeddings).item()
         inv_freq = torch.cond(
             cond,
@@ -967,6 +1176,7 @@ def patched_dynamic_rope_update(rope_forward):
         original_inv_freq = self.original_inv_freq.to(device)
         cond = (seq_len >= self.original_max_seq_len).item()
+        # PATCHED: uses torch.cond instead of a test
         inv_freq = torch.cond(
             cond,
             (lambda x, y: x.clone()),
@@ -1002,6 +1212,7 @@ def common_eager_attention_forward(
     attn_weights = torch.matmul(query, key.transpose(2, 3)) * scaling
     if attention_mask is not None:
+        # PATCHED
         # The two following lines were added.
         if attention_mask is not None and attention_mask.ndim == 4:
             attention_mask = attention_mask[:, :, :, : key.shape[-2]]
@@ -1074,6 +1285,7 @@ def patched_modeling_marian_eager_attention_forward(
 class common_RotaryEmbedding(torch.nn.Module):
     # This may cause some issues.
     # @torch.no_grad()
+    # PATCHED: the decorator
     @patched_dynamic_rope_update
     def forward(self, x, position_ids):
         inv_freq_expanded = (
@@ -1629,3 +1841,56 @@ if patch_qwen3:
                 batch_size, sequence_length, hidden_dim
             )
             return final_hidden_states, router_logits
+try:
+    from transformers.models.gemma3.modeling_gemma3 import Gemma3Model  # noqa: F401
+    patch_gemma3 = True
+except ImportError:
+    patch_gemma3 = False
+if patch_gemma3:
+    class patched_Gemma3Model(torch.nn.Module):
+        _PATCHES_ = ["get_placeholder_mask"]
+        _PATCHED_CLASS_ = transformers.models.gemma3.modeling_gemma3.Gemma3Model
+        _PATCHED_PR_ = "https://github.com/huggingface/transformers/pull/41319"
+        def get_placeholder_mask(
+            self,
+            input_ids: torch.LongTensor,
+            inputs_embeds: torch.FloatTensor,
+            image_features: torch.FloatTensor,
+        ):
+            if input_ids is None:
+                special_image_mask = inputs_embeds == self.get_input_embeddings()(
+                    torch.tensor(
+                        self.config.image_token_id,
+                        dtype=torch.long,
+                        device=inputs_embeds.device,
+                    )
+                )
+                special_image_mask = special_image_mask.all(-1)
+            else:
+                special_image_mask = input_ids == self.config.image_token_id
+            n_image_tokens = special_image_mask.sum()
+            special_image_mask = (
+                special_image_mask.unsqueeze(-1)
+                .expand_as(inputs_embeds)
+                .to(inputs_embeds.device)
+            )
+            n_image_features = image_features.shape[0] * image_features.shape[1]
+            # PATCHED: torch._check
+            # if inputs_embeds[special_image_mask].numel() != image_features.numel():
+            #    raise ValueError( ... )
+            torch._check(
+                inputs_embeds[special_image_mask].numel() == image_features.numel(),
+                lambda: (
+                    f"Image features and image tokens do not match: tokens: "
+                    f"{n_image_tokens}, features {n_image_features}"
+                ),
+            )
+            return special_image_mask

onnx_diagnostic/torch_models/hghub/hub_api.py CHANGED Viewed

@@ -289,21 +289,17 @@ def task_from_tags(tags: Union[str, List[str]]) -> str:
 def enumerate_model_list(
     n: int = 50,
-    task: Optional[str] = None,
-    library: Optional[str] = None,
-    tags: Optional[Union[str, List[str]]] = None,
+    pipeline_tag: Optional[str] = None,
     search: Optional[str] = None,
     dump: Optional[str] = None,
-    filter: Optional[str] = None,
+    filter: Optional[Union[str, List[str]]] = None,
     verbose: int = 0,
 ):
     """
     Enumerates models coming from :epkg:`huggingface_hub`.
     :param n: number of models to retrieve (-1 for all)
-    :param task: see :meth:`huggingface_hub.HfApi.list_models`
-    :param tags: see :meth:`huggingface_hub.HfApi.list_models`
-    :param library: see :meth:`huggingface_hub.HfApi.list_models`
+    :param pipeline_tag: see :meth:`huggingface_hub.HfApi.list_models`
     :param search: see :meth:`huggingface_hub.HfApi.list_models`
     :param filter: see :meth:`huggingface_hub.HfApi.list_models`
     :param dump: dumps the result in this csv file
@@ -311,9 +307,7 @@ def enumerate_model_list(
     """
     api = HfApi()
     models = api.list_models(
-        task=task,
-        library=library,
-        tags=tags,
+        pipeline_tag=pipeline_tag,
         search=search,
         full=True,
         filter=filter,

onnx_diagnostic/torch_models/hghub/hub_data_cached_configs.py CHANGED Viewed

@@ -4829,3 +4829,39 @@ def _ccached_microsoft_phi3_mini_128k_instruct():
             "vocab_size": 32064,
         }
     )
+def _ccached_google_gemma_3_4b_it_like():
+    "google/gemma-3-4b-it"
+    return transformers.Gemma3Config(
+        **{
+            "architectures": ["Gemma3ForConditionalGeneration"],
+            "boi_token_index": 255999,
+            "eoi_token_index": 256000,
+            "eos_token_id": [1, 106],
+            "image_token_index": 262144,
+            "initializer_range": 0.02,
+            "mm_tokens_per_image": 256,
+            "model_type": "gemma3",
+            "text_config": {
+                "hidden_size": 2560,
+                "intermediate_size": 10240,
+                "model_type": "gemma3_text",
+                "num_hidden_layers": 34,
+                "rope_scaling": {"factor": 8.0, "rope_type": "linear"},
+                "sliding_window": 1024,
+            },
+            "torch_dtype": "bfloat16",
+            "transformers_version": "4.50.0.dev0",
+            "vision_config": {
+                "hidden_size": 1152,
+                "image_size": 896,
+                "intermediate_size": 4304,
+                "model_type": "siglip_vision_model",
+                "num_attention_heads": 16,
+                "num_hidden_layers": 27,
+                "patch_size": 14,
+                "vision_use_head": false,
+            },
+        }
+    )

onnx-diagnostic 0.7.11__py3-none-any.whl → 0.7.13__py3-none-any.whl

onnx-diagnostic 0.7.11py3-none-any.whl → 0.7.13py3-none-any.whl