PyPI - onnx-diagnostic - Versions diffs - 0.8.10__py3-none-any.whl → 0.9.0__py3-none-any.whl - Mend

onnx-diagnostic 0.8.10py3-none-any.whl → 0.9.0py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (56) hide show

onnx_diagnostic/__init__.py +1 -1
onnx_diagnostic/_command_lines_parser.py +136 -140
onnx_diagnostic/ci_models/data/Blanca_Lake_Hudak.jpg +0 -0
onnx_diagnostic/ci_models/data/Ice_worm_glacier.jpg +0 -0
onnx_diagnostic/ci_models/data/__init__.py +0 -0
onnx_diagnostic/ci_models/export_phi4_mm.py +10 -7
onnx_diagnostic/export/api.py +13 -4
onnx_diagnostic/export/dynamic_shapes.py +1 -1
onnx_diagnostic/export/validate.py +2 -0
onnx_diagnostic/ext_test_case.py +32 -15
onnx_diagnostic/helpers/args_helper.py +1 -0
onnx_diagnostic/helpers/bench_run.py +0 -1
onnx_diagnostic/helpers/cache_helper.py +102 -36
onnx_diagnostic/helpers/doc_helper.py +7 -4
onnx_diagnostic/helpers/graph_helper.py +6 -6
onnx_diagnostic/helpers/helper.py +39 -0
onnx_diagnostic/helpers/log_helper.py +37 -14
onnx_diagnostic/helpers/memory_peak.py +5 -1
onnx_diagnostic/helpers/mini_onnx_builder.py +9 -14
onnx_diagnostic/helpers/model_builder_helper.py +1 -1
onnx_diagnostic/helpers/onnx_helper.py +283 -110
onnx_diagnostic/helpers/ort_session.py +5 -2
onnx_diagnostic/helpers/rt_helper.py +53 -9
onnx_diagnostic/helpers/torch_helper.py +15 -11
onnx_diagnostic/investigate/__init__.py +0 -0
onnx_diagnostic/investigate/input_observer.py +970 -0
onnx_diagnostic/reference/evaluator.py +0 -1
onnx_diagnostic/reference/ort_evaluator.py +0 -1
onnx_diagnostic/reference/report_results_comparison.py +9 -3
onnx_diagnostic/reference/torch_evaluator.py +5 -1
onnx_diagnostic/reference/torch_ops/_op_run.py +3 -5
onnx_diagnostic/reference/torch_ops/sequence_ops.py +1 -1
onnx_diagnostic/tasks/feature_extraction.py +0 -1
onnx_diagnostic/torch_export_patches/__init__.py +0 -1
onnx_diagnostic/torch_export_patches/onnx_export_errors.py +32 -14
onnx_diagnostic/torch_export_patches/patch_module.py +1 -1
onnx_diagnostic/torch_export_patches/patches/_patch_transformers_masking_utils.py +107 -6
onnx_diagnostic/torch_export_patches/patches/_patch_transformers_rotary_embedding.py +2 -2
onnx_diagnostic/torch_export_patches/patches/patch_torch.py +13 -3
onnx_diagnostic/torch_export_patches/patches/patch_transformers.py +1 -0
onnx_diagnostic/torch_export_patches/serialization/transformers_impl.py +70 -23
onnx_diagnostic/torch_models/code_sample.py +5 -10
onnx_diagnostic/torch_models/hghub/hub_data.py +2 -4
onnx_diagnostic/torch_models/hghub/hub_data_cached_configs.py +6 -12
onnx_diagnostic/torch_models/validate.py +1 -1
onnx_diagnostic/torch_onnx/compare.py +0 -1
onnx_diagnostic/torch_onnx/runtime_info.py +1 -1
onnx_diagnostic/torch_onnx/sbs.py +1 -1
onnx_diagnostic/torch_onnx/sbs_dataclasses.py +2 -4
onnx_diagnostic/typing.py +15 -0
{onnx_diagnostic-0.8.10.dist-info → onnx_diagnostic-0.9.0.dist-info}/METADATA +2 -2
{onnx_diagnostic-0.8.10.dist-info → onnx_diagnostic-0.9.0.dist-info}/RECORD +55 -50
{onnx_diagnostic-0.8.10.dist-info → onnx_diagnostic-0.9.0.dist-info}/WHEEL +1 -1
onnx_diagnostic/api.py +0 -15
{onnx_diagnostic-0.8.10.dist-info → onnx_diagnostic-0.9.0.dist-info}/licenses/LICENSE.txt +0 -0
{onnx_diagnostic-0.8.10.dist-info → onnx_diagnostic-0.9.0.dist-info}/top_level.txt +0 -0

onnx_diagnostic/helpers/rt_helper.py CHANGED Viewed

@@ -115,7 +115,7 @@ def make_feeds(
 def _get_dim(i: int, s: Union[str, int], batch: int = 1) -> int:
     if isinstance(s, int):
         return s
-    if s == "batch":
+    if s == "batch" or i == 0:
         return batch
     # Everything else is cache length or sequence length.
     return 0
@@ -153,9 +153,13 @@ def make_empty_cache(
             [i.type for i in sess.get_inputs()[2:]],
         )
     """
+    assert batch > 0, f"batch size = {batch} must be positive"
     feeds = {}
     for name, shape, dtype in zip(onnx_input_names, onnx_input_shapes, onnx_input_types):
         new_shape = tuple(_get_dim(i, s, batch=batch) for i, s in enumerate(shape))
+        assert (
+            new_shape and new_shape[0] > 0
+        ), f"new_shape={new_shape} cannot have a null batch size, name={name!r}, shape={shape}"
         feeds[name] = torch.empty(new_shape, dtype=rt_type_to_torch_dtype(dtype))
     return feeds
@@ -272,6 +276,7 @@ def generate_and_validate(
 def onnx_generate(
     model_or_path: Union[onnx.ModelProto, str, InferenceSessionForTorch],
     input_ids: torch.Tensor,
+    attention_mask: Optional[torch.Tensor] = None,
     eos_token_id: int = 2,
     max_new_tokens=100,
     return_session: bool = False,
@@ -330,7 +335,9 @@ def onnx_generate(
             )
         print("-- generate with onnx")
-        onnx_outputs = onnx_generate(model_name, input_ids[:1], 2, max_new_tokens=10)
+        onnx_outputs = onnx_generate(
+            model_name, input_ids[:1], eos_token_id=2, max_new_tokens=10
+        )
         print("-- onnx output", onnx_outputs)
         # The example continues with other functions doing the same.
@@ -364,6 +371,7 @@ def onnx_generate(
     input_names = session.input_names
     input_types = session.input_types
     has_position_ids = "position_ids" in session.input_names
+    has_cache_position = "cache_position" in session.input_names
     assert (
         len(input_names) > 2
@@ -377,21 +385,46 @@ def onnx_generate(
         not has_position_ids or input_names[2] == "position_ids"
     ), f"position_ids must the third input but input_names={input_names}"
+    cache_names, cache_shapes, cache_types = [], [], []
+    for name, shape, dt in zip(input_names, input_shapes, input_types):
+        if name.startswith("past_key_values"):
+            cache_names.append(name)
+            cache_shapes.append(shape)
+            cache_types.append(dt)
     # First call: prefill
+    empty_cache = make_empty_cache(input_ids.shape[0], cache_names, cache_shapes, cache_types)
     feeds = dict(
         input_ids=input_ids,
-        attention_mask=torch.ones(
-            input_ids.shape, dtype=input_ids.dtype, device=input_ids.device
-        ),
-        **make_empty_cache(
-            input_ids.shape[0], input_names[2:], input_shapes[2:], input_types[2:]
+        attention_mask=(
+            attention_mask
+            if attention_mask is not None
+            else torch.ones(input_ids.shape, dtype=input_ids.dtype, device=input_ids.device)
         ),
+        **empty_cache,
     )
     if has_position_ids:
-        feeds["position_ids"] = torch.unsqueeze(
+        assert (
+            input_ids.shape[1] > 0
+        ), f"unexpected value for input_ids shape={input_ids.shape}"
+        position_ids = torch.unsqueeze(
             torch.arange(input_ids.shape[1], dtype=torch.int64, device=input_ids.device), 0
         )
+        feeds["position_ids"] = position_ids
+    if has_cache_position:
+        assert empty_cache, "no cache means no cache_position"
+        first_tensor = next(iter(empty_cache.values()))
+        cache_position = torch.arange(
+            first_tensor.shape[2],
+            input_ids.shape[1] + first_tensor.shape[2],
+            dtype=torch.int64,
+            device=input_ids.device,
+        )
+        feeds["cache_position"] = cache_position
+    # prefill step
     outputs = session.run(None, feeds)
     # Next calls: decode
@@ -424,7 +457,18 @@ def onnx_generate(
                 ),
                 0,
             )
-        feeds.update(dict(zip(input_names[3 if has_position_ids else 2 :], outputs[1:])))
+        if has_cache_position:
+            feeds["cache_position"] = torch.arange(
+                input_ids.shape[1],
+                input_ids.shape[1] + 1,
+                dtype=torch.int64,
+                device=input_ids.device,
+            )
+        feeds.update(
+            dict(zip([n for n in input_names if n.startswith("past_key_values")], outputs[1:]))
+        )
+        # generate/decoding step
         outputs = session.run(None, feeds)
     if return_session:

onnx_diagnostic/helpers/torch_helper.py CHANGED Viewed

@@ -19,12 +19,7 @@ from .cache_helper import (
     CacheKeyValue,
 )
 from .mini_onnx_builder import create_onnx_model_from_input_tensors
-from .onnx_helper import (
-    to_array_extended,
-    tensor_dtype_to_np_dtype,
-    _STORAGE_TYPE,
-    onnx_dtype_name,
-)
+from .onnx_helper import to_array_extended, tensor_dtype_to_np_dtype, onnx_dtype_name
 def proto_from_tensor(
@@ -84,13 +79,17 @@ def proto_from_tensor(
         byte_data = (ctypes.c_ubyte * numel * element_size).from_address(np_arr.data_ptr())
         tensor.raw_data = bytes(byte_data)
         if sys.byteorder == "big":
-            np_dtype = _STORAGE_TYPE[tensor.data_type]  # type: ignore
-            np.byteswap(np.frombuffer(tensor.raw_data, dtype=np_dtype), inplace=True)  # type: ignore
+            storage_type = {
+                onnx.TensorProto.FLOAT16: np.int16,
+                onnx.TensorProto.BFLOAT16: np.int16,
+            }
+            np_dtype = storage_type[tensor.data_type]  # type: ignore
+            np.frombuffer(tensor.raw_data, dtype=np_dtype).byteswap(inplace=True)  # type: ignore
     else:
         tensor.raw_data = np_arr.tobytes()
         if sys.byteorder == "big":
             np_dtype = tensor_dtype_to_np_dtype(tensor.data_type)
-            np.byteswap(np.frombuffer(tensor.raw_data, dtype=np_dtype), inplace=True)
+            np.frombuffer(tensor.raw_data, dtype=np_dtype).byteswap(inplace=True)
     return tensor
@@ -852,9 +851,14 @@ def torch_deepcopy(value: Any) -> Any:
         from .cache_helper import CacheKeyValue
         ca = CacheKeyValue(value)
-        return make_dynamic_cache(
-            torch_deepcopy(list(zip(ca.key_cache, ca.value_cache))), cls_layers=ca.cls_layers
+        pairs = list(zip(ca.key_cache, ca.value_cache))
+        assert not hasattr(value, "layers") or len(value.layers) == len(pairs), (
+            f"Size mismatch between {len(value.layers)=} and {len(pairs)=}. "
+            f"value={string_type(value, with_shape=True)}, "
+            f"first key={value.layers[0].keys}, "
+            f"first value={value.layers[0].values}"
         )
+        return make_dynamic_cache(torch_deepcopy(pairs), cls_layers=ca.cls_layers)
     if value.__class__.__name__ == "StaticCache":
         from .cache_helper import CacheKeyValue

onnx_diagnostic/investigate/__init__.py ADDED Viewed

File without changes

onnx-diagnostic 0.8.10__py3-none-any.whl → 0.9.0__py3-none-any.whl

onnx-diagnostic 0.8.10py3-none-any.whl → 0.9.0py3-none-any.whl