PyPI - onnx-diagnostic - Versions diffs - 0.7.0__py3-none-any.whl → 0.7.2__py3-none-any.whl - Mend

onnx-diagnostic 0.7.0py3-none-any.whl → 0.7.2py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (29) hide show

onnx_diagnostic/helpers/torch_helper.py CHANGED Viewed

@@ -735,7 +735,8 @@ def to_any(value: Any, to_value: Union[torch.dtype, torch.device, str]) -> Any:
                     [t.to(to_value) for t in value.key_cache],
                     [t.to(to_value) for t in value.value_cache],
                 )
-            )
+            ),
+            max_cache_len=value.max_cache_len,
         )
     if value.__class__.__name__ == "EncoderDecoderCache":
         return make_encoder_decoder_cache(
@@ -784,7 +785,10 @@ def torch_deepcopy(value: Any) -> Any:
             torch_deepcopy(list(zip(value.key_cache, value.value_cache)))
         )
     if value.__class__.__name__ == "StaticCache":
-        return make_static_cache(torch_deepcopy(list(zip(value.key_cache, value.value_cache))))
+        return make_static_cache(
+            torch_deepcopy(list(zip(value.key_cache, value.value_cache))),
+            max_cache_len=value.max_cache_len,
+        )
     if value.__class__.__name__ == "SlidingWindowCache":
         return make_sliding_window_cache(
             torch_deepcopy(list(zip(value.key_cache, value.value_cache)))

onnx_diagnostic/tasks/__init__.py CHANGED Viewed

@@ -11,6 +11,7 @@ from . import (
     summarization,
     text_classification,
     text_generation,
+    text_to_image,
     text2text_generation,
     zero_shot_image_classification,
 )
@@ -27,6 +28,7 @@ __TASKS__ = [
     summarization,
     text_classification,
     text_generation,
+    text_to_image,
     text2text_generation,
     zero_shot_image_classification,
 ]

onnx_diagnostic/tasks/image_text_to_text.py CHANGED Viewed

@@ -96,10 +96,10 @@ def get_inputs(
                 for i in range(num_hidden_layers)
             ]
         ),
-        image_attention_mask=torch.ones((batch_size, sequence_length2, n_images)).to(
+        pixel_values=torch.ones((batch_size, n_images, num_channels, width, height)).to(
             torch.int64
         ),
-        pixel_values=torch.ones((batch_size, n_images, num_channels, width, height)).to(
+        image_attention_mask=torch.ones((batch_size, sequence_length2, n_images)).to(
             torch.int64
         ),
     )
@@ -132,16 +132,30 @@ def random_input_kwargs(config: Any) -> Tuple[Dict[str, Any], Callable]:
     If the configuration is None, the function selects typical dimensions.
     """
     if config is not None:
-        check_hasattr(
-            config,
-            "vocab_size",
-            "hidden_size",
-            "num_attention_heads",
-            ("num_key_value_heads", "num_attention_heads"),
-            "intermediate_size",
-            "hidden_size",
-            "vision_config",
-        )
+        if hasattr(config, "text_config"):
+            check_hasattr(
+                config.text_config,
+                "vocab_size",
+                "hidden_size",
+                "num_attention_heads",
+                ("num_key_value_heads", "num_attention_heads"),
+                "intermediate_size",
+                "hidden_size",
+            )
+            check_hasattr(config, "vision_config")
+            text_config = True
+        else:
+            check_hasattr(
+                config,
+                "vocab_size",
+                "hidden_size",
+                "num_attention_heads",
+                ("num_key_value_heads", "num_attention_heads"),
+                "intermediate_size",
+                "hidden_size",
+                "vision_config",
+            )
+            text_config = False
         check_hasattr(config.vision_config, "image_size", "num_channels")
     kwargs = dict(
         batch_size=2,
@@ -150,17 +164,54 @@ def random_input_kwargs(config: Any) -> Tuple[Dict[str, Any], Callable]:
         head_dim=(
             16
             if config is None
-            else getattr(config, "head_dim", config.hidden_size // config.num_attention_heads)
+            else getattr(
+                config,
+                "head_dim",
+                (config.text_config.hidden_size if text_config else config.hidden_size)
+                // (
+                    config.text_config.num_attention_heads
+                    if text_config
+                    else config.num_attention_heads
+                ),
+            )
+        ),
+        dummy_max_token_id=(
+            31999
+            if config is None
+            else (config.text_config.vocab_size if text_config else config.vocab_size) - 1
+        ),
+        num_hidden_layers=(
+            4
+            if config is None
+            else (
+                config.text_config.num_hidden_layers
+                if text_config
+                else config.num_hidden_layers
+            )
         ),
-        dummy_max_token_id=31999 if config is None else config.vocab_size - 1,
-        num_hidden_layers=4 if config is None else config.num_hidden_layers,
         num_key_value_heads=(
             8
             if config is None
-            else _pick(config, "num_key_value_heads", "num_attention_heads")
+            else (
+                _pick(config.text_config, "num_key_value_heads", "num_attention_heads")
+                if text_config
+                else _pick(config, "num_key_value_heads", "num_attention_heads")
+            )
+        ),
+        intermediate_size=(
+            1024
+            if config is None
+            else (
+                config.text_config.intermediate_size
+                if text_config
+                else config.intermediate_size
+            )
+        ),
+        hidden_size=(
+            512
+            if config is None
+            else (config.text_config.hidden_size if text_config else config.hidden_size)
         ),
-        intermediate_size=1024 if config is None else config.intermediate_size,
-        hidden_size=512 if config is None else config.hidden_size,
         width=224 if config is None else config.vision_config.image_size,
         height=224 if config is None else config.vision_config.image_size,
         num_channels=3 if config is None else config.vision_config.num_channels,

onnx_diagnostic/tasks/text_generation.py CHANGED Viewed

@@ -109,7 +109,7 @@ def get_inputs(
         sequence_length2 = seq_length_multiple
         shapes = {
-            "input_ids": {0: batch, 1: torch.export.Dim.DYNAMIC},
+            "input_ids": {0: batch, 1: "sequence_length"},
             "attention_mask": {
                 0: batch,
                 1: "cache+seq",  # cache_length + seq_length
@@ -176,8 +176,10 @@ def get_inputs(
                 "attention_mask": {0: batch, 2: "seq"},
                 "cache_position": {0: "seq"},
                 "past_key_values": [
-                    [{0: batch, 2: cache_length} for _ in range(num_hidden_layers)],
-                    [{0: batch, 2: cache_length} for _ in range(num_hidden_layers)],
+                    # [{0: batch, 2: cache_length} for _ in range(num_hidden_layers)],
+                    # [{0: batch, 2: cache_length} for _ in range(num_hidden_layers)],
+                    [{0: batch} for _ in range(num_hidden_layers)],
+                    [{0: batch} for _ in range(num_hidden_layers)],
                 ],
             }
             inputs = dict(
@@ -188,18 +190,25 @@ def get_inputs(
                     (batch_size, num_key_value_heads, sequence_length2, head_dim)
                 ).to(torch.bool),
                 cache_position=torch.arange(sequence_length2).to(torch.int64),
-                past_key_values=make_cache(
+                past_key_values=make_static_cache(
                     [
                         (
                             torch.randn(
-                                batch_size, num_key_value_heads, sequence_length, head_dim
+                                batch_size,
+                                num_key_value_heads,
+                                sequence_length + sequence_length2,
+                                head_dim,
                             ),
                             torch.randn(
-                                batch_size, num_key_value_heads, sequence_length, head_dim
+                                batch_size,
+                                num_key_value_heads,
+                                sequence_length + sequence_length2,
+                                head_dim,
                             ),
                         )
                         for i in range(num_hidden_layers)
-                    ]
+                    ],
+                    max_cache_len=max(sequence_length + sequence_length2, head_dim),
                 ),
             )
         else:
@@ -230,7 +239,7 @@ def get_inputs(
                 position_ids=torch.arange(sequence_length, sequence_length + sequence_length2)
                 .to(torch.int64)
                 .expand((batch_size, -1)),
-                past_key_values=make_cache(
+                past_key_values=make_cache(  # type: ignore[operator]
                     [
                         (
                             torch.randn(

onnx_diagnostic/tasks/text_to_image.py ADDED Viewed

@@ -0,0 +1,91 @@
+from typing import Any, Callable, Dict, Optional, Tuple
+import torch
+from ..helpers.config_helper import update_config, check_hasattr, pick
+__TASK__ = "text-to-image"
+def reduce_model_config(config: Any) -> Dict[str, Any]:
+    """Reduces a model size."""
+    check_hasattr(config, "sample_size", "cross_attention_dim")
+    kwargs = dict(
+        sample_size=min(config["sample_size"], 32),
+        cross_attention_dim=min(config["cross_attention_dim"], 64),
+    )
+    update_config(config, kwargs)
+    return kwargs
+def get_inputs(
+    model: torch.nn.Module,
+    config: Optional[Any],
+    batch_size: int,
+    sequence_length: int,
+    cache_length: int,
+    in_channels: int,
+    sample_size: int,
+    cross_attention_dim: int,
+    add_second_input: bool = False,
+    **kwargs,  # unused
+):
+    """
+    Generates inputs for task ``text-to-image``.
+    Example:
+    ::
+        sample:T10s2x4x96x96[-3.7734375,4.359375:A-0.043463995395642184]
+        timestep:T7s=101
+        encoder_hidden_states:T10s2x77x1024[-6.58203125,13.0234375:A-0.16780663634440257]
+    """
+    assert (
+        "cls_cache" not in kwargs
+    ), f"Not yet implemented for cls_cache={kwargs['cls_cache']!r}."
+    batch = "batch"
+    shapes = {
+        "sample": {0: batch},
+        "timestep": {},
+        "encoder_hidden_states": {0: batch, 1: "encoder_length"},
+    }
+    inputs = dict(
+        sample=torch.randn((batch_size, sequence_length, sample_size, sample_size)).to(
+            torch.float32
+        ),
+        timestep=torch.tensor([101], dtype=torch.int64),
+        encoder_hidden_states=torch.randn(
+            (batch_size, sequence_length, cross_attention_dim)
+        ).to(torch.float32),
+    )
+    res = dict(inputs=inputs, dynamic_shapes=shapes)
+    if add_second_input:
+        res["inputs2"] = get_inputs(
+            model=model,
+            config=config,
+            batch_size=batch_size + 1,
+            sequence_length=sequence_length,
+            cache_length=cache_length + 1,
+            in_channels=in_channels,
+            sample_size=sample_size,
+            cross_attention_dim=cross_attention_dim,
+            **kwargs,
+        )["inputs"]
+    return res
+def random_input_kwargs(config: Any) -> Tuple[Dict[str, Any], Callable]:
+    """
+    Inputs kwargs.
+    If the configuration is None, the function selects typical dimensions.
+    """
+    if config is not None:
+        check_hasattr(config, "sample_size", "cross_attention_dim", "in_channels")
+    kwargs = dict(
+        batch_size=2,
+        sequence_length=pick(config, "in_channels", 4),
+        cache_length=77,
+        in_channels=pick(config, "in_channels", 4),
+        sample_size=pick(config, "sample_size", 32),
+        cross_attention_dim=pick(config, "cross_attention_dim", 64),
+    )
+    return kwargs, get_inputs

onnx_diagnostic/torch_export_patches/onnx_export_errors.py CHANGED Viewed

@@ -134,11 +134,17 @@ def unpatch_module_or_classes(mod, info: Dict[type, Dict[type, Callable]], verbo
 @contextlib.contextmanager
 def register_additional_serialization_functions(
-    patch_transformers: bool = False, verbose: int = 0
+    patch_transformers: bool = False, patch_diffusers: bool = False, verbose: int = 0
 ) -> Callable:
     """The necessary modifications to run the fx Graph."""
-    fct_callable = replacement_before_exporting if patch_transformers else (lambda x: x)
-    done = register_cache_serialization(verbose=verbose)
+    fct_callable = (
+        replacement_before_exporting
+        if patch_transformers or patch_diffusers
+        else (lambda x: x)
+    )
+    done = register_cache_serialization(
+        patch_transformers=patch_transformers, patch_diffusers=patch_diffusers, verbose=verbose
+    )
     try:
         yield fct_callable
     finally:
@@ -150,6 +156,7 @@ def torch_export_patches(
     patch_sympy: bool = True,
     patch_torch: bool = True,
     patch_transformers: bool = False,
+    patch_diffusers: bool = False,
     catch_constraints: bool = True,
     stop_if_static: int = 0,
     verbose: int = 0,
@@ -165,6 +172,7 @@ def torch_export_patches(
     :param patch_sympy: fix missing method ``name`` for IntegerConstant
     :param patch_torch: patches :epkg:`torch` with supported implementation
     :param patch_transformers: patches :epkg:`transformers` with supported implementation
+    :param patch_diffusers: patches :epkg:`diffusers` with supported implementation
     :param catch_constraints: catch constraints related to dynamic shapes,
         as a result, some dynamic dimension may turn into static ones,
         the environment variable ``SKIP_SOLVE_CONSTRAINTS=0``
@@ -174,8 +182,8 @@ def torch_export_patches(
         and show a stack trace indicating the exact location of the issue,
         ``if stop_if_static > 1``, more methods are replace to catch more
         issues
-    :param patch: if False, disable all patches except the registration of
-        serialization function
+    :param patch: if False, disable all patches but keeps the registration of
+        serialization functions if other patch functions are enabled
     :param custom_patches: to apply custom patches,
         every patched class must define static attributes
         ``_PATCHES_``, ``_PATCHED_CLASS_``
@@ -249,6 +257,7 @@ def torch_export_patches(
             patch_sympy=patch_sympy,
             patch_torch=patch_torch,
             patch_transformers=patch_transformers,
+            patch_diffusers=patch_diffusers,
             catch_constraints=catch_constraints,
             stop_if_static=stop_if_static,
             verbose=verbose,
@@ -261,7 +270,11 @@ def torch_export_patches(
                 pass
     elif not patch:
         fct_callable = lambda x: x  # noqa: E731
-        done = register_cache_serialization(verbose=verbose)
+        done = register_cache_serialization(
+            patch_transformers=patch_transformers,
+            patch_diffusers=patch_diffusers,
+            verbose=verbose,
+        )
         try:
             yield fct_callable
         finally:
@@ -281,7 +294,11 @@ def torch_export_patches(
         # caches
         ########
-        cache_done = register_cache_serialization(verbose=verbose)
+        cache_done = register_cache_serialization(
+            patch_transformers=patch_transformers,
+            patch_diffusers=patch_diffusers,
+            verbose=verbose,
+        )
         #############
         # patch sympy

onnx-diagnostic 0.7.0__py3-none-any.whl → 0.7.2__py3-none-any.whl

onnx-diagnostic 0.7.0py3-none-any.whl → 0.7.2py3-none-any.whl