PyPI - onnx2tf - Versions diffs - 1.29.19__py3-none-any.whl → 1.29.20__py3-none-any.whl - Mend

onnx2tf 1.29.19py3-none-any.whl → 1.29.20py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (12) hide show

onnx2tf/__init__.py +1 -1
onnx2tf/onnx2tf.py +967 -27
onnx2tf/ops/GatherElements.py +25 -7
onnx2tf/ops/GatherND.py +28 -1
onnx2tf/ops/ScatterElements.py +25 -7
onnx2tf/ops/ScatterND.py +45 -6
onnx2tf/ops/TensorScatter.py +20 -6
onnx2tf/utils/common_functions.py +99 -2
{onnx2tf-1.29.19.dist-info → onnx2tf-1.29.20.dist-info}/METADATA +25 -3
{onnx2tf-1.29.19.dist-info → onnx2tf-1.29.20.dist-info}/RECORD +12 -12
{onnx2tf-1.29.19.dist-info → onnx2tf-1.29.20.dist-info}/WHEEL +0 -0
{onnx2tf-1.29.19.dist-info → onnx2tf-1.29.20.dist-info}/entry_points.txt +0 -0

onnx2tf/onnx2tf.py CHANGED Viewed

@@ -2,6 +2,8 @@
 import os
 import re
+import shutil
+import tempfile
 __path__ = (os.path.dirname(__file__), )
 with open(os.path.join(__path__[0], '__init__.py')) as f:
     init_text = f.read()
@@ -51,6 +53,7 @@ from onnx2tf.utils.common_functions import (
     get_tf_model_outputs,
     rewrite_tflite_inout_opname,
     check_cuda_enabled,
+    check_has_external_data,
 )
 from onnx2tf.utils.json_auto_generator import (
     generate_auto_replacement_json,
@@ -62,6 +65,349 @@ from onnx2tf.utils.enums import (
 from onnx2tf.utils.logging import *
 from sng4onnx import generate as op_name_auto_generate
+def _sanitize_split_input_name(name: str) -> str:
+    if not name:
+        return 'tensor'
+    return re.sub(r'[^0-9A-Za-z._-]+', '_', name)
+def _write_memmap_array(path: str, array: np.ndarray) -> str:
+    mm = np.lib.format.open_memmap(
+        path,
+        mode='w+',
+        dtype=array.dtype,
+        shape=array.shape,
+    )
+    mm[...] = array
+    mm.flush()
+    return path
+def _tensorproto_nbytes(tensor: onnx.TensorProto) -> int:
+    if tensor is None:
+        return 0
+    if tensor.HasField('raw_data'):
+        return len(tensor.raw_data)
+    try:
+        np_dtype = onnx.helper.tensor_dtype_to_np_dtype(tensor.data_type)
+    except Exception:
+        np_dtype = None
+    if np_dtype is None:
+        return 0
+    elem_size = np.dtype(np_dtype).itemsize
+    num_elems = int(np.prod(tensor.dims)) if len(tensor.dims) > 0 else 0
+    if num_elems == 0:
+        try:
+            field_name = onnx.helper.tensor_dtype_to_field(tensor.data_type)
+            if hasattr(tensor, field_name):
+                num_elems = len(getattr(tensor, field_name))
+        except Exception:
+            num_elems = 0
+    return num_elems * elem_size
+def _collect_initializer_sizes(onnx_graph: onnx.ModelProto) -> Dict[str, int]:
+    initializer_sizes: Dict[str, int] = {}
+    if onnx_graph is None:
+        return initializer_sizes
+    for initializer in onnx_graph.graph.initializer:
+        if not initializer.name:
+            continue
+        try:
+            initializer_sizes[initializer.name] = _tensorproto_nbytes(initializer)
+        except Exception:
+            initializer_sizes[initializer.name] = 0
+    return initializer_sizes
+def _collect_node_weight_keys(
+    *,
+    graph: gs.Graph,
+    initializer_sizes: Dict[str, int],
+) -> tuple[List[List[str]], Dict[str, int]]:
+    weight_sizes = dict(initializer_sizes)
+    node_weight_keys: List[List[str]] = []
+    for node in graph.nodes:
+        keys: List[str] = []
+        for inp in node.inputs:
+            if isinstance(inp, gs.Constant):
+                if isinstance(getattr(inp, 'values', None), np.ndarray):
+                    key = f'const:{id(inp)}'
+                    if key not in weight_sizes:
+                        weight_sizes[key] = int(inp.values.nbytes)
+                    keys.append(key)
+                continue
+            name = getattr(inp, 'name', '')
+            if name and name in initializer_sizes:
+                keys.append(name)
+        node_weight_keys.append(keys)
+    return node_weight_keys, weight_sizes
+def _auto_partition_ranges(
+    *,
+    node_weight_keys: List[List[str]],
+    weight_sizes: Dict[str, int],
+    max_size_bytes: int,
+    reachable_node_indices: Optional[set] = None,
+) -> List[tuple]:
+    ranges: List[tuple] = []
+    if max_size_bytes <= 0 or not node_weight_keys:
+        return ranges
+    current_keys: set = set()
+    current_bytes = 0
+    start_idx = 0
+    for idx, keys in enumerate(node_weight_keys):
+        new_bytes = 0
+        for key in keys:
+            if key not in current_keys:
+                new_bytes += weight_sizes.get(key, 0)
+                current_keys.add(key)
+        current_bytes += new_bytes
+        if current_bytes >= max_size_bytes and idx > start_idx:
+            if reachable_node_indices is not None and idx not in reachable_node_indices:
+                continue
+            ranges.append((start_idx, idx))
+            start_idx = idx + 1
+            current_keys = set()
+            current_bytes = 0
+    if start_idx <= len(node_weight_keys) - 1:
+        ranges.append((start_idx, len(node_weight_keys) - 1))
+    return ranges
+def _collect_reachable_node_indices(
+    graph: gs.Graph,
+    initializer_names: Optional[set] = None,
+) -> set:
+    reachable_nodes: set = set()
+    reachable_vars: set = set()
+    initializer_names = initializer_names or set()
+    for graph_input in graph.inputs:
+        name = getattr(graph_input, 'name', '')
+        if name and name not in initializer_names:
+            reachable_vars.add(name)
+    for idx, node in enumerate(graph.nodes):
+        is_reachable = False
+        for inp in node.inputs:
+            if isinstance(inp, gs.Variable):
+                name = getattr(inp, 'name', '')
+                if name in reachable_vars and name not in initializer_names:
+                    is_reachable = True
+                    break
+        if is_reachable:
+            reachable_nodes.add(idx)
+            for out in node.outputs:
+                name = getattr(out, 'name', '')
+                if name:
+                    reachable_vars.add(name)
+    return reachable_nodes
+def _collect_constant_only_node_indices(
+    graph: gs.Graph,
+    initializer_names: Optional[set] = None,
+) -> set:
+    initializer_names = initializer_names or set()
+    const_only_nodes: set = set()
+    for idx, node in enumerate(graph.nodes):
+        has_variable_input = False
+        for inp in node.inputs:
+            if isinstance(inp, gs.Constant):
+                continue
+            name = getattr(inp, 'name', '')
+            if name and name not in initializer_names:
+                has_variable_input = True
+                break
+        if not has_variable_input:
+            const_only_nodes.add(idx)
+    return const_only_nodes
+def _complete_custom_inputs_for_graph(
+    *,
+    onnx_graph: onnx.ModelProto,
+    custom_inputs: List[List[Any]],
+    output_dir: str,
+    file_prefix: str,
+    shape_hints: Optional[List[str]] = None,
+    require_mean_std: bool = False,
+) -> List[List[Any]]:
+    gs_graph = gs.import_onnx(onnx_graph)
+    input_names: List[str] = [inp.name for inp in gs_graph.inputs]
+    input_sizes: List[List[Any]] = [inp.shape for inp in gs_graph.inputs]
+    input_dtypes: List[Any] = [inp.dtype for inp in gs_graph.inputs]
+    if shape_hints is None:
+        new_input_sizes = []
+        for input_size in input_sizes:
+            new_input_size = []
+            for idx, dim in enumerate(input_size):
+                if idx == 0 and input_sizes and input_sizes[0][0] is not None \
+                    and not isinstance(input_sizes[0][0], str) \
+                    and len(input_sizes[0]) == len(input_size) \
+                    and (dim is None or isinstance(dim, str)):
+                    new_input_size.append(input_sizes[0][0])
+                elif dim is None or isinstance(dim, str):
+                    new_input_size.append(1)
+                else:
+                    new_input_size.append(dim)
+            new_input_sizes.append(new_input_size)
+        input_sizes = new_input_sizes
+    else:
+        shape_hints_dict = {}
+        for hint in shape_hints:
+            parts = hint.split(':')
+            if len(parts) == 2:
+                input_name = parts[0]
+                shape_values = [int(val) for val in parts[1].split(',')]
+                shape_hints_dict[input_name] = shape_values
+        for i, (input_name, original_shape) in enumerate(zip(input_names, input_sizes)):
+            if input_name in shape_hints_dict:
+                updated_shape = shape_hints_dict[input_name]
+                for j, (orig_dim, hint_dim) in enumerate(zip(original_shape, updated_shape)):
+                    if orig_dim is not None and not isinstance(orig_dim, str):
+                        updated_shape[j] = orig_dim
+                    else:
+                        updated_shape[j] = hint_dim
+                input_sizes[i] = updated_shape
+    custom_map = {}
+    for item in custom_inputs or []:
+        if len(item) >= 2:
+            custom_map[item[0]] = item
+    results: List[List[Any]] = []
+    for input_name, input_size, input_dtype in zip(input_names, input_sizes, input_dtypes):
+        if input_name in custom_map:
+            item = list(custom_map[input_name])
+            if require_mean_std and len(item) == 2:
+                item = [item[0], item[1], 0.0, 1.0]
+            results.append(item)
+            continue
+        dtype = input_dtype if input_dtype is not None else np.float32
+        file_name = f'{file_prefix}_{_sanitize_split_input_name(input_name)}.npy'
+        file_path = os.path.join(output_dir, file_name)
+        mm = np.lib.format.open_memmap(
+            file_path,
+            mode='w+',
+            dtype=dtype,
+            shape=tuple(input_size),
+        )
+        mm[...] = 1
+        mm.flush()
+        if require_mean_std:
+            results.append([input_name, file_path, 0.0, 1.0])
+        else:
+            results.append([input_name, file_path])
+    return results
+def _estimate_partition_weight_bytes(
+    *,
+    ranges: List[tuple],
+    node_weight_keys: List[List[str]],
+    weight_sizes: Dict[str, int],
+) -> List[int]:
+    partition_sizes: List[int] = []
+    for start_idx, end_idx in ranges:
+        seen: set = set()
+        total_bytes = 0
+        for idx in range(start_idx, end_idx + 1):
+            for key in node_weight_keys[idx]:
+                if key not in seen:
+                    total_bytes += weight_sizes.get(key, 0)
+                    seen.add(key)
+        partition_sizes.append(total_bytes)
+    return partition_sizes
+def _build_partition_io(
+    *,
+    graph: gs.Graph,
+    ranges: List[tuple],
+    const_only_nodes: Optional[set] = None,
+) -> List[Dict[str, Any]]:
+    if not ranges:
+        return []
+    const_only_nodes = const_only_nodes or set()
+    producer_by_tensor: Dict[str, int] = {}
+    consumers_by_tensor: Dict[str, set] = {}
+    graph_output_names = [o.name for o in graph.outputs if o.name]
+    for idx, node in enumerate(graph.nodes):
+        for out in node.outputs:
+            name = getattr(out, 'name', '')
+            if name:
+                producer_by_tensor[name] = idx
+        for inp in node.inputs:
+            if isinstance(inp, gs.Constant):
+                continue
+            name = getattr(inp, 'name', '')
+            if not name:
+                continue
+            consumers_by_tensor.setdefault(name, set()).add(idx)
+    partitions: List[Dict[str, Any]] = []
+    for start_idx, end_idx in ranges:
+        node_idx_set = set(range(start_idx, end_idx + 1))
+        part_inputs: set = set()
+        part_outputs: set = set()
+        for idx in node_idx_set:
+            node = graph.nodes[idx]
+            for inp in node.inputs:
+                if isinstance(inp, gs.Constant):
+                    continue
+                name = getattr(inp, 'name', '')
+                if not name:
+                    continue
+                producer_idx = producer_by_tensor.get(name)
+                if producer_idx is None or producer_idx not in node_idx_set:
+                    if producer_idx is not None and producer_idx in const_only_nodes:
+                        continue
+                    part_inputs.add(name)
+            for out in node.outputs:
+                name = getattr(out, 'name', '')
+                if not name:
+                    continue
+                consumers = consumers_by_tensor.get(name, set())
+                if name in graph_output_names or any(c not in node_idx_set for c in consumers):
+                    if idx in const_only_nodes and name not in graph_output_names:
+                        continue
+                    part_outputs.add(name)
+        partitions.append({
+            'inputs': sorted(part_inputs),
+            'outputs': sorted(part_outputs),
+            'node_count': end_idx - start_idx + 1,
+            'start_idx': start_idx,
+            'end_idx': end_idx,
+        })
+    return partitions
+def _merge_ranges_with_missing_io(
+    *,
+    graph: gs.Graph,
+    ranges: List[tuple],
+    const_only_nodes: Optional[set] = None,
+) -> tuple[List[tuple], List[Dict[str, Any]]]:
+    if not ranges:
+        return ranges, []
+    ranges = list(ranges)
+    const_only_nodes = const_only_nodes or set()
+    while True:
+        partitions = _build_partition_io(
+            graph=graph,
+            ranges=ranges,
+            const_only_nodes=const_only_nodes,
+        ) or []
+        if all(part['inputs'] and part['outputs'] for part in partitions):
+            return ranges, partitions
+        if len(ranges) <= 1:
+            return ranges, partitions
+        merged = False
+        for idx, part in enumerate(partitions):
+            if not part['inputs'] or not part['outputs']:
+                if idx > 0:
+                    ranges[idx - 1] = (ranges[idx - 1][0], ranges[idx][1])
+                    del ranges[idx]
+                else:
+                    ranges[idx] = (ranges[idx][0], ranges[idx + 1][1])
+                    del ranges[idx + 1]
+                merged = True
+                break
+        if not merged:
+            return ranges, partitions
 def fuse_expanded_qdq_to_qdq(
     *,
     graph: gs.Graph,
@@ -285,6 +631,7 @@ def convert(
     quant_norm_std: Optional[str] = '[[[[0.229, 0.224, 0.225]]]]',
     quant_type: Optional[str] = 'per-channel',
     custom_input_op_name_np_data_path: Optional[List] = None,
+    tf_input_cache: Optional[Dict[str, np.ndarray]] = None,
     input_quant_dtype: Optional[str] = 'int8',
     output_quant_dtype: Optional[str] = 'int8',
     not_use_onnxsim: Optional[bool] = False,
@@ -321,6 +668,8 @@ def convert(
     param_replacement_file: Optional[str] = '',
     auto_generate_json: Optional[bool] = False,
     auto_generate_json_on_error: Optional[bool] = False,
+    enable_auto_split_model: Optional[bool] = False,
+    auto_split_max_size_mb: Optional[int] = 1024,
     check_gpu_delegate_compatibility: Optional[bool] = False,
     check_onnx_tf_outputs_elementwise_close: Optional[bool] = False,
     check_onnx_tf_outputs_elementwise_close_full: Optional[bool] = False,
@@ -451,6 +800,10 @@ def convert(
             ["input2","input2.npy",[0.3],[0.07]],\n
         ]
+    tf_input_cache: Optional[Dict[str, np.ndarray]]
+        Cache of TF dummy inference inputs keyed by TF input tensor name.\n
+        Used to propagate TF outputs between auto-split partitions.\n
     input_quant_dtype: Optional[str]
         Input dtypes when doing Full INT8 Quantization.\n
         "int8"(default) or "uint8" or "float32"
@@ -682,6 +1035,15 @@ def convert(
         This is now opt-in and requires explicitly enabling the feature.\n
         Default: False
+    enable_auto_split_model: Optional[bool]
+        Force auto split regardless of the ONNX file size.\n
+        The target size is controlled by auto_split_max_size_mb.\n
+        Default: False
+    auto_split_max_size_mb: Optional[int]
+        Target maximum size per partition in MB based on ONNX initializer sizes.\n
+        Default: 1024
     check_gpu_delegate_compatibility: Optional[bool]
         Run TFLite ModelAnalyzer on the generated Float16 tflite model\n
         to check if the model can be supported by GPU Delegate.
@@ -771,6 +1133,23 @@ def convert(
             f'input_onnx_file_path: {input_onnx_file_path}'
         )
         sys.exit(1)
+    auto_split_model = bool(enable_auto_split_model)
+    if auto_split_model:
+        info(
+            Color.GREEN('Auto split forced by --enable_auto_split_model. ') +
+            f'target={auto_split_max_size_mb} MB'
+        )
+    if onnx_graph is None and input_onnx_file_path and os.path.exists(input_onnx_file_path):
+        try:
+            onnx_file_size = os.path.getsize(input_onnx_file_path)
+            if not auto_split_model and onnx_file_size > 2 * 1024 * 1024 * 1024:
+                info(
+                    Color.GREEN('ONNX file exceeds 2GB; switching to auto-split mode. ') +
+                    f'size={onnx_file_size / (1024 * 1024 * 1024):.2f} GB'
+                )
+                auto_split_model = True
+        except Exception:
+            pass
     # Extracting onnx filenames
     output_file_name = ''
@@ -917,9 +1296,8 @@ def convert(
                         exported_onnx_graph = gs.export_onnx(graph, do_type_check=False, **meta_data)
                         if metadata_props is not None:
                             exported_onnx_graph.metadata_props.extend(metadata_props)
-                        estimated_graph = onnx.shape_inference.infer_shapes(exported_onnx_graph)
-                        onnx.save(estimated_graph, f=input_onnx_file_path)
-                        del estimated_graph
+                        onnx.save(exported_onnx_graph, f=input_onnx_file_path)
+                        del exported_onnx_graph
                 except:
                     if tmp_graph is not None:
                         del tmp_graph
@@ -957,6 +1335,7 @@ def convert(
                 input_onnx_file_path=f'{input_onnx_file_path}',
                 onnx_graph=onnx_graph,
                 output_onnx_file_path=f'{input_onnx_file_path}',
+                has_external_data=has_external_data,
                 non_verbose=True,
             )
             info(Color.GREEN(f'Automatic generation of each OP name complete!'))
@@ -978,9 +1357,24 @@ def convert(
     # Loading Graphs
     # onnx_graph If specified, onnx_graph is processed first
+    has_external_data = False
     if not onnx_graph:
+        has_external_data = check_has_external_data(input_onnx_file_path)
         onnx_graph = onnx.load(input_onnx_file_path)
+    if not auto_split_model and onnx_graph is not None:
+        try:
+            initializer_sizes = _collect_initializer_sizes(onnx_graph)
+            total_init_bytes = sum(initializer_sizes.values())
+            if total_init_bytes > 2 * 1024 * 1024 * 1024:
+                info(
+                    Color.GREEN('ONNX graph estimated initializer size exceeds 2GB; ') +
+                    f'switching to auto-split mode. size={total_init_bytes / (1024 * 1024 * 1024):.2f} GB'
+                )
+                auto_split_model = True
+        except Exception:
+            pass
     domain: str = onnx_graph.domain
     ir_version: int = onnx_graph.ir_version
     meta_data = {'domain': domain, 'ir_version': ir_version}
@@ -990,6 +1384,522 @@ def convert(
     graph = gs.import_onnx(onnx_graph)
     fuse_expanded_qdq_to_qdq(graph=graph)
+    # Auto split model by estimated weight size
+    if auto_split_model:
+        if input_names_to_interrupt_model_conversion or output_names_to_interrupt_model_conversion:
+            error(
+                'Auto split cannot be used together with input_names_to_interrupt_model_conversion '
+                'or output_names_to_interrupt_model_conversion.'
+            )
+            sys.exit(1)
+        if auto_split_max_size_mb is None or auto_split_max_size_mb <= 0:
+            error(
+                f'auto_split_max_size_mb must be greater than 0. auto_split_max_size_mb: {auto_split_max_size_mb}'
+            )
+            sys.exit(1)
+        try:
+            import sne4onnx
+        except Exception:
+            error(
+                'Auto split requires sne4onnx. pip install sne4onnx'
+            )
+            sys.exit(1)
+        try:
+            graph.toposort()
+        except Exception:
+            pass
+        onnx_graph_for_split = onnx_graph
+        try:
+            onnx_graph_for_split = gs.export_onnx(
+                graph=graph,
+                do_type_check=False,
+                **meta_data,
+            )
+            if metadata_props is not None:
+                onnx_graph_for_split.metadata_props.extend(metadata_props)
+        except Exception:
+            onnx_graph_for_split = onnx_graph
+        initializer_sizes = _collect_initializer_sizes(onnx_graph_for_split)
+        node_weight_keys, weight_sizes = _collect_node_weight_keys(
+            graph=graph,
+            initializer_sizes=initializer_sizes,
+        )
+        const_only_nodes = _collect_constant_only_node_indices(
+            graph,
+            initializer_names=set(initializer_sizes.keys()),
+        )
+        reachable_node_indices = _collect_reachable_node_indices(
+            graph,
+            initializer_names=set(initializer_sizes.keys()),
+        )
+        max_size_bytes = int(auto_split_max_size_mb) * 1024 * 1024
+        ranges = _auto_partition_ranges(
+            node_weight_keys=node_weight_keys,
+            weight_sizes=weight_sizes,
+            max_size_bytes=max_size_bytes,
+            reachable_node_indices=reachable_node_indices,
+        )
+        if len(ranges) > 1:
+            ranges, partitions = _merge_ranges_with_missing_io(
+                graph=graph,
+                ranges=ranges,
+                const_only_nodes=const_only_nodes,
+            )
+            if not partitions:
+                warn(
+                    'Auto split failed to determine partition boundaries. Proceeding without split.'
+                )
+            else:
+                if any([not p['inputs'] or not p['outputs'] for p in partitions]):
+                    warn(
+                        'Auto split produced partitions with missing inputs or outputs. '
+                        'Some partitions may not be inferable.'
+                    )
+                partition_sizes = _estimate_partition_weight_bytes(
+                    ranges=ranges,
+                    node_weight_keys=node_weight_keys,
+                    weight_sizes=weight_sizes,
+                )
+                try:
+                    op_type_list = list(set([node.op for node in graph.nodes]))
+                    local_use_cuda = sum(
+                        [1 if op_type in CUDA_ONLY_OPS else 0 for op_type in op_type_list]
+                    ) > 0
+                except Exception:
+                    local_use_cuda = False
+                info('')
+                info(Color.REVERSE(f'Auto model partitioning enabled'), '=' * 44)
+                info(
+                    Color.GREEN(f'Target partition size (estimated weights): ') +
+                    f'{auto_split_max_size_mb} MB'
+                )
+                for idx, part in enumerate(partitions):
+                    size_mb = partition_sizes[idx] / (1024 * 1024)
+                    info(
+                        f'  part {idx+1}: nodes={part["node_count"]}, '
+                        f'est_weights={size_mb:.2f} MB, '
+                        f'inputs={len(part["inputs"])}, outputs={len(part["outputs"])}'
+                    )
+                    info(
+                        f'    inputs: {", ".join(part["inputs"]) if part["inputs"] else "(none)"}'
+                    )
+                    info(
+                        f'    outputs: {", ".join(part["outputs"]) if part["outputs"] else "(none)"}'
+                    )
+                split_input_cache: Dict[str, str] = {}
+                split_input_dir = tempfile.mkdtemp(prefix='onnx2tf_split_')
+                split_tf_input_cache: Dict[str, np.ndarray] = {}
+                split_output_layouts: Dict[str, bool] = {}
+                def _sanitize_tf_input_name(name: str) -> str:
+                    if name is None:
+                        return ''
+                    sanitized = name.replace(':', '__')
+                    if output_signaturedefs or output_integer_quantized_tflite:
+                        sanitized = re.sub('^/', 'wa/', sanitized)
+                    return f'{sanitized}:0'
+                def _normalize_onnx_output_name(name: str) -> str:
+                    if name is None:
+                        return ''
+                    normalized = name.replace(':', '__')
+                    if output_signaturedefs or output_integer_quantized_tflite:
+                        normalized = re.sub('^/', '', normalized)
+                    return normalized
+                def _common_layout_perms(rank: int) -> List[tuple]:
+                    if rank == 3:
+                        return [(0, 1, 2), (0, 2, 1)]
+                    if rank == 4:
+                        return [(0, 1, 2, 3), (0, 2, 3, 1), (0, 3, 1, 2)]
+                    if rank == 5:
+                        return [(0, 1, 2, 3, 4), (0, 2, 3, 4, 1), (0, 4, 1, 2, 3)]
+                    return [tuple(range(rank))]
+                def _build_onnx_tf_output_map(
+                    *,
+                    onnx_output_names: List[str],
+                    tf_output_tensors: List[tf.Tensor],
+                    onnx_output_values: Optional[Dict[str, np.ndarray]] = None,
+                    tf_output_values: Optional[Dict[str, np.ndarray]] = None,
+                ) -> Dict[str, tf.Tensor]:
+                    tf_by_base = {t.name.split(':')[0]: t for t in tf_output_tensors}
+                    tf_by_full = {t.name: t for t in tf_output_tensors}
+                    mapping: Dict[str, tf.Tensor] = {}
+                    used_tf: set = set()
+                    missing: List[str] = []
+                    for onnx_name in onnx_output_names:
+                        normalized = _normalize_onnx_output_name(onnx_name)
+                        candidates = [
+                            normalized,
+                            f'wa/{normalized}',
+                        ]
+                        tf_tensor = None
+                        for cand in candidates:
+                            if cand in tf_by_base:
+                                tf_tensor = tf_by_base[cand]
+                                break
+                            full_name = f'{cand}:0'
+                            if full_name in tf_by_full:
+                                tf_tensor = tf_by_full[full_name]
+                                break
+                        if tf_tensor is None:
+                            if onnx_name in tf_by_base:
+                                tf_tensor = tf_by_base[onnx_name]
+                            else:
+                                full_name = f'{onnx_name}:0'
+                                if full_name in tf_by_full:
+                                    tf_tensor = tf_by_full[full_name]
+                        if tf_tensor is not None:
+                            mapping[onnx_name] = tf_tensor
+                            used_tf.add(tf_tensor.name)
+                        else:
+                            missing.append(onnx_name)
+                    if onnx_output_values and tf_output_values and missing:
+                        tf_candidates = []
+                        for tf_tensor in tf_output_tensors:
+                            tf_val = tf_output_values.get(tf_tensor.name)
+                            if tf_val is None:
+                                tf_val = tf_output_values.get(tf_tensor.name.split(':')[0])
+                            if tf_val is not None:
+                                tf_candidates.append((tf_tensor, tf_val))
+                        still_missing = []
+                        for onnx_name in list(missing):
+                            onnx_val = onnx_output_values.get(onnx_name)
+                            if onnx_val is None:
+                                still_missing.append(onnx_name)
+                                continue
+                            best = None
+                            best_err = None
+                            for tf_tensor, tf_val in tf_candidates:
+                                if tf_tensor.name in used_tf:
+                                    continue
+                                if tf_val.shape != onnx_val.shape:
+                                    continue
+                                err = np.max(np.abs(onnx_val - tf_val))
+                                if best is None or err < best_err:
+                                    best = tf_tensor
+                                    best_err = err
+                            if best is None:
+                                for tf_tensor, tf_val in tf_candidates:
+                                    if tf_tensor.name in used_tf:
+                                        continue
+                                    if tf_val.ndim != onnx_val.ndim:
+                                        continue
+                                    for perm in _common_layout_perms(tf_val.ndim):
+                                        if tf_val.transpose(perm).shape != onnx_val.shape:
+                                            continue
+                                        err = np.max(np.abs(onnx_val - tf_val.transpose(perm)))
+                                        if best is None or err < best_err:
+                                            best = tf_tensor
+                                            best_err = err
+                            if best is not None and best_err is not None and best_err <= 1e-3:
+                                mapping[onnx_name] = best
+                                used_tf.add(best.name)
+                            else:
+                                still_missing.append(onnx_name)
+                        missing = still_missing
+                    if missing:
+                        warn(
+                            'Auto split output mapping failed for: ' +
+                            ', '.join(missing) +
+                            '. Output cache/layout may be incomplete.'
+                        )
+                    return mapping
+                def _onnx_output_shape_map(onnx_model: onnx.ModelProto) -> Dict[str, List[Optional[int]]]:
+                    shape_map: Dict[str, List[Optional[int]]] = {}
+                    try:
+                        for out in onnx_model.graph.output:
+                            dims: List[Optional[int]] = []
+                            t = out.type.tensor_type
+                            if t.HasField('shape'):
+                                for d in t.shape.dim:
+                                    if d.dim_value > 0:
+                                        dims.append(int(d.dim_value))
+                                    elif d.dim_param:
+                                        dims.append(None)
+                                    else:
+                                        dims.append(None)
+                            if dims:
+                                shape_map[out.name] = dims
+                    except Exception:
+                        pass
+                    return shape_map
+                def _infer_keep_shape(onnx_shape: List[Optional[int]], tf_shape: List[int]) -> Optional[bool]:
+                    if not onnx_shape or any(d is None for d in onnx_shape):
+                        return None
+                    if list(onnx_shape) == list(tf_shape):
+                        return True
+                    rank = len(onnx_shape)
+                    if rank == 3:
+                        if list(tf_shape) == [onnx_shape[0], onnx_shape[2], onnx_shape[1]]:
+                            return False
+                    elif rank == 4:
+                        if list(tf_shape) == [onnx_shape[0], onnx_shape[2], onnx_shape[3], onnx_shape[1]]:
+                            return False
+                    elif rank == 5:
+                        if list(tf_shape) == [onnx_shape[0], onnx_shape[2], onnx_shape[3], onnx_shape[4], onnx_shape[1]]:
+                            return False
+                    return None
+                def _merge_custom_inputs(user_inputs, auto_inputs):
+                    merged = []
+                    seen = set()
+                    if user_inputs:
+                        for item in user_inputs:
+                            if len(item) >= 2:
+                                merged.append(item)
+                                seen.add(item[0])
+                    for item in auto_inputs:
+                        if len(item) >= 2 and item[0] not in seen:
+                            merged.append(item)
+                            seen.add(item[0])
+                    return merged
+                base_kwargs = {
+                    'input_onnx_file_path': input_onnx_file_path if input_onnx_file_path is not None else None,
+                    'onnx_graph': onnx_graph,
+                    'output_folder_path': output_folder_path,
+                    'output_signaturedefs': output_signaturedefs,
+                    'output_h5': output_h5,
+                    'output_keras_v3': output_keras_v3,
+                    'output_tfv1_pb': output_tfv1_pb,
+                    'output_weights': output_weights,
+                    'copy_onnx_input_output_names_to_tflite': copy_onnx_input_output_names_to_tflite,
+                    'output_dynamic_range_quantized_tflite': output_dynamic_range_quantized_tflite,
+                    'output_integer_quantized_tflite': output_integer_quantized_tflite,
+                    'quant_norm_mean': quant_norm_mean,
+                    'quant_norm_std': quant_norm_std,
+                    'quant_type': quant_type,
+                    'custom_input_op_name_np_data_path': custom_input_op_name_np_data_path,
+                    'tf_input_cache': split_tf_input_cache,
+                    'input_quant_dtype': input_quant_dtype,
+                    'output_quant_dtype': output_quant_dtype,
+                    'not_use_onnxsim': not_use_onnxsim,
+                    'not_use_opname_auto_generate': not_use_opname_auto_generate,
+                    'batch_size': batch_size,
+                    'overwrite_input_shape': overwrite_input_shape,
+                    'shape_hints': shape_hints,
+                    'no_large_tensor': no_large_tensor,
+                    'output_nms_with_dynamic_tensor': output_nms_with_dynamic_tensor,
+                    'switch_nms_version': switch_nms_version,
+                    'keep_ncw_or_nchw_or_ncdhw_input_names': keep_ncw_or_nchw_or_ncdhw_input_names,
+                    'keep_nwc_or_nhwc_or_ndhwc_input_names': keep_nwc_or_nhwc_or_ndhwc_input_names,
+                    'keep_shape_absolutely_input_names': keep_shape_absolutely_input_names,
+                    'input_names_to_interrupt_model_conversion': None,
+                    'output_names_to_interrupt_model_conversion': None,
+                    'disable_group_convolution': disable_group_convolution,
+                    'enable_accumulation_type_float16': enable_accumulation_type_float16,
+                    'enable_batchmatmul_unfold': enable_batchmatmul_unfold,
+                    'enable_rnn_unroll': enable_rnn_unroll,
+                    'disable_suppression_flextranspose': disable_suppression_flextranspose,
+                    'disable_strict_mode': disable_strict_mode,
+                    'onnxruntime_output_memmap': onnxruntime_output_memmap,
+                    'onnxruntime_output_memmap_dir': onnxruntime_output_memmap_dir,
+                    'number_of_dimensions_after_flextranspose_compression': number_of_dimensions_after_flextranspose_compression,
+                    'disable_suppression_flexstridedslice': disable_suppression_flexstridedslice,
+                    'number_of_dimensions_after_flexstridedslice_compression': number_of_dimensions_after_flexstridedslice_compression,
+                    'optimization_for_gpu_delegate': optimization_for_gpu_delegate,
+                    'replace_argmax_to_reducemax_and_indices_is_int64': replace_argmax_to_reducemax_and_indices_is_int64,
+                    'replace_argmax_to_reducemax_and_indices_is_float32': replace_argmax_to_reducemax_and_indices_is_float32,
+                    'replace_argmax_to_fused_argmax_and_indices_is_int64': replace_argmax_to_fused_argmax_and_indices_is_int64,
+                    'replace_argmax_to_fused_argmax_and_indices_is_float32': replace_argmax_to_fused_argmax_and_indices_is_float32,
+                    'fused_argmax_scale_ratio': fused_argmax_scale_ratio,
+                    'replace_to_pseudo_operators': replace_to_pseudo_operators,
+                    'param_replacement_file': param_replacement_file,
+                    'auto_generate_json': auto_generate_json,
+                    'auto_generate_json_on_error': auto_generate_json_on_error,
+                    'enable_auto_split_model': False,
+                    'auto_split_max_size_mb': auto_split_max_size_mb,
+                    'check_gpu_delegate_compatibility': check_gpu_delegate_compatibility,
+                    'check_onnx_tf_outputs_elementwise_close': check_onnx_tf_outputs_elementwise_close,
+                    'check_onnx_tf_outputs_elementwise_close_full': check_onnx_tf_outputs_elementwise_close_full,
+                    'check_onnx_tf_outputs_sample_data_normalization': check_onnx_tf_outputs_sample_data_normalization,
+                    'check_onnx_tf_outputs_elementwise_close_rtol': check_onnx_tf_outputs_elementwise_close_rtol,
+                    'check_onnx_tf_outputs_elementwise_close_atol': check_onnx_tf_outputs_elementwise_close_atol,
+                    'mvn_epsilon': mvn_epsilon,
+                    'disable_model_save': disable_model_save,
+                    'non_verbose': non_verbose,
+                    'verbosity': verbosity,
+                }
+                base_kwargs['input_names_to_interrupt_model_conversion'] = None
+                base_kwargs['output_names_to_interrupt_model_conversion'] = None
+                model_ret = None
+                try:
+                    for idx, part in enumerate(partitions):
+                        part_output_values: Optional[Dict[str, np.ndarray]] = None
+                        part_output_folder = os.path.join(
+                            output_folder_path,
+                            f'part_{idx+1:04d}',
+                        )
+                        base_name = os.path.splitext(os.path.basename(input_onnx_file_path))[0] \
+                            if input_onnx_file_path else 'model'
+                        os.makedirs(part_output_folder, exist_ok=True)
+                        split_onnx_path = os.path.join(
+                            part_output_folder,
+                            f'{base_name}_part_{idx+1:04d}.onnx'
+                        )
+                        part_graph = sne4onnx.extraction(
+                            input_op_names=part['inputs'],
+                            output_op_names=part['outputs'],
+                            onnx_graph=onnx_graph_for_split,
+                            output_onnx_file_path=split_onnx_path,
+                            has_external_data=has_external_data,
+                        )
+                        auto_custom_inputs = []
+                        if split_input_cache:
+                            for input_name in part['inputs']:
+                                if input_name in split_input_cache:
+                                    auto_custom_inputs.append([
+                                        input_name,
+                                        split_input_cache[input_name],
+                                    ])
+                        merged_custom_inputs = _merge_custom_inputs(
+                            custom_input_op_name_np_data_path,
+                            auto_custom_inputs,
+                        )
+                        # For the first partition, keep the same behavior as non-split conversion.
+                        # Only user-provided custom inputs are used.
+                        if idx == 0 and not auto_custom_inputs:
+                            custom_inputs_for_part = merged_custom_inputs
+                        else:
+                            require_mean_std = bool(output_integer_quantized_tflite)
+                            custom_inputs_for_part = _complete_custom_inputs_for_graph(
+                                onnx_graph=part_graph,
+                                custom_inputs=merged_custom_inputs,
+                                output_dir=split_input_dir,
+                                file_prefix=f'part_{idx+1:04d}',
+                                shape_hints=shape_hints,
+                                require_mean_std=require_mean_std,
+                            )
+                        # Propagate dummy outputs to next partitions
+                        try:
+                            has_inputs = len(part_graph.graph.input) > 0
+                            has_outputs = len(part_graph.graph.output) > 0
+                            if has_inputs and has_outputs:
+                                part_input_datas = {}
+                                part_outputs = dummy_onnx_inference(
+                                    onnx_graph=part_graph,
+                                    output_names=part['outputs'],
+                                    test_data_nhwc=None,
+                                    custom_input_op_name_np_data_path=custom_inputs_for_part,
+                                    tf_layers_dict={},
+                                    use_cuda=local_use_cuda,
+                                    disable_strict_mode=disable_strict_mode,
+                                    enable_ort_output_memmap=False,
+                                    ort_output_memmap_dir=None,
+                                    shape_hints=shape_hints,
+                                    input_datas_for_validation=part_input_datas,
+                                )
+                                for input_name, input_value in part_input_datas.items():
+                                    file_name = (
+                                        f'part_{idx+1:04d}_' +
+                                        f'{_sanitize_split_input_name(input_name)}.npy'
+                                    )
+                                    file_path = os.path.join(split_input_dir, file_name)
+                                    split_input_cache[input_name] = _write_memmap_array(
+                                        file_path,
+                                        input_value,
+                                    )
+                                part_output_values = {
+                                    name: value for name, value in zip(part['outputs'], part_outputs)
+                                }
+                                for output_name, output_value in zip(part['outputs'], part_outputs):
+                                    file_name = (
+                                        f'part_{idx+1:04d}_' +
+                                        f'{_sanitize_split_input_name(output_name)}.npy'
+                                    )
+                                    file_path = os.path.join(split_input_dir, file_name)
+                                    split_input_cache[output_name] = _write_memmap_array(
+                                        file_path,
+                                        output_value,
+                                    )
+                            else:
+                                warn(
+                                    'Auto split input propagation skipped for this partition '
+                                    'because it has no inputs or outputs.'
+                                )
+                        except Exception as ex:
+                            warn(
+                                'Auto split input propagation failed for this partition. '
+                                'Subsequent partitions may use default dummy inputs.'
+                            )
+                            warn(f'{ex}')
+                        part_kwargs = dict(base_kwargs)
+                        if split_output_layouts:
+                            part_keep_shape_abs = set(keep_shape_absolutely_input_names or [])
+                            for input_name in part['inputs']:
+                                if split_output_layouts.get(input_name, False):
+                                    part_keep_shape_abs.add(input_name)
+                            part_kwargs['keep_shape_absolutely_input_names'] = \
+                                list(part_keep_shape_abs) if part_keep_shape_abs else None
+                        part_kwargs['input_onnx_file_path'] = split_onnx_path
+                        part_kwargs['onnx_graph'] = part_graph
+                        part_kwargs['output_folder_path'] = part_output_folder
+                        if custom_inputs_for_part:
+                            part_kwargs['custom_input_op_name_np_data_path'] = custom_inputs_for_part
+                        model_ret = convert(**part_kwargs)
+                        if hasattr(model_ret, 'onnx_output_layouts') \
+                            and isinstance(model_ret.onnx_output_layouts, dict):
+                            for out_name in part['outputs']:
+                                if out_name in model_ret.onnx_output_layouts:
+                                    split_output_layouts[out_name] = \
+                                        bool(model_ret.onnx_output_layouts[out_name])
+                        # Cache TF outputs for the next partition's TF dummy inference.
+                        try:
+                            tf_outputs = dummy_tf_inference(
+                                model=model_ret,
+                                inputs=model_ret.inputs,
+                                test_data_nhwc=None,
+                                custom_input_op_name_np_data_path=custom_input_op_name_np_data_path,
+                                prefilled_input_datas=split_tf_input_cache,
+                                shape_hints=shape_hints,
+                                keep_shape_absolutely_input_names=keep_shape_absolutely_input_names,
+                                keep_ncw_or_nchw_or_ncdhw_input_names=keep_ncw_or_nchw_or_ncdhw_input_names,
+                                keep_nwc_or_nhwc_or_ndhwc_input_names=keep_nwc_or_nhwc_or_ndhwc_input_names,
+                            )
+                            onnx_output_shapes = _onnx_output_shape_map(part_graph)
+                            if model_ret.outputs and part['outputs']:
+                                tf_output_map = _build_onnx_tf_output_map(
+                                    onnx_output_names=part['outputs'],
+                                    tf_output_tensors=model_ret.outputs,
+                                    onnx_output_values=part_output_values,
+                                    tf_output_values=tf_outputs,
+                                )
+                                for onnx_out, tf_tensor in tf_output_map.items():
+                                    tf_val = tf_outputs.get(tf_tensor.name)
+                                    if tf_val is None:
+                                        continue
+                                    # Store both full and base TF names to maximize cache hits.
+                                    split_tf_input_cache[tf_tensor.name] = tf_val
+                                    split_tf_input_cache[tf_tensor.name.split(':')[0]] = tf_val
+                                    # Keep legacy key for compatibility with existing lookups.
+                                    sanitized = _sanitize_tf_input_name(onnx_out)
+                                    split_tf_input_cache[sanitized] = tf_val
+                                    split_tf_input_cache[sanitized.split(':')[0]] = tf_val
+                                    keep_shape = _infer_keep_shape(
+                                        onnx_output_shapes.get(onnx_out),
+                                        list(tf_val.shape),
+                                    )
+                                    if keep_shape is not None:
+                                        split_output_layouts[onnx_out] = keep_shape
+                        except Exception:
+                            pass
+                finally:
+                    shutil.rmtree(split_input_dir, ignore_errors=True)
+                return model_ret
     # Cut the ONNX graph when an input name is specified that interrupts the conversion
     if not input_names_to_interrupt_model_conversion:
         input_names = [
@@ -1218,6 +2128,7 @@ def convert(
         'relu_relu6_merge_op_names': {},
         'mul_div_replace_op_names': {},
         'use_cuda': use_cuda,
+        'tf_input_cache': tf_input_cache,
     }
     tf_layers_dict = {}
@@ -1291,30 +2202,29 @@ def convert(
         )
         # download test data
-        all_four_dim = sum(
-            [
-                1 for input in inputs \
-                    if len(input.shape) == 4 \
-                        and input.shape[0] is not None \
-                        and input.shape[0] <= 20 \
-                        and input.shape[-1] == 3 \
-                        and input.shape[1] is not None \
-                        and input.shape[2] is not None
-            ]
-        ) == len(inputs)
-        same_batch_dim = False
-        if all_four_dim:
-            batch_size = inputs[0].shape[0]
-            for input in inputs:
-                same_batch_dim = batch_size == input.shape[0]
         test_data_nhwc = None
-        if all_four_dim and same_batch_dim:
-            test_data: np.ndarray = download_test_image_data()
-            test_data_nhwc = test_data[:inputs[0].shape[0], ...]
-            if check_onnx_tf_outputs_sample_data_normalization == "norm":
-                pass
-            elif check_onnx_tf_outputs_sample_data_normalization == "denorm":
-                test_data_nhwc = test_data_nhwc * 255.0
+        if inputs:
+            all_four_dim = sum(
+                [
+                    1 for input in inputs \
+                        if len(input.shape) == 4 \
+                            and input.shape[0] is not None \
+                            and input.shape[0] <= 20 \
+                            and input.shape[-1] == 3 \
+                            and input.shape[1] is not None \
+                            and input.shape[2] is not None
+                ]
+            ) == len(inputs)
+            same_batch_dim = False
+            if all_four_dim:
+                batch_size = inputs[0].shape[0]
+                for input in inputs:
+                    same_batch_dim = batch_size == input.shape[0]
+            if all_four_dim and same_batch_dim:
+                test_data: np.ndarray = download_test_image_data()
+                test_data_nhwc = test_data[:inputs[0].shape[0], ...]
+                if check_onnx_tf_outputs_sample_data_normalization == "denorm":
+                    test_data_nhwc = test_data_nhwc * 255.0
         # ONNX dummy inference
         # Generate output for all OPs.
@@ -1400,7 +2310,10 @@ def convert(
                     exported_onnx_graph = gs.export_onnx(graph, do_type_check=False, **meta_data)
                     if metadata_props is not None:
                         exported_onnx_graph.metadata_props.extend(metadata_props)
-                    estimated_graph = onnx.shape_inference.infer_shapes(exported_onnx_graph)
+                    if not has_external_data:
+                        estimated_graph = onnx.shape_inference.infer_shapes(exported_onnx_graph)
+                    else:
+                        estimated_graph = exported_onnx_graph
                     if input_onnx_file_path is not None:
                         onnx.save(estimated_graph, input_onnx_file_path)
                         if not not_use_onnxsim:
@@ -1580,6 +2493,14 @@ def convert(
                     outputs[oidx] = tf_keras.layers.Lambda(lambda x: tf.constant(y))(x)
         model = tf_keras.Model(inputs=inputs, outputs=outputs)
+        try:
+            onnx_output_layouts = {
+                name: tf_layers_dict.get(name, {}).get('nhwc', False)
+                for name in onnx_graph_output_names
+            }
+            model.onnx_output_layouts = onnx_output_layouts
+        except Exception:
+            pass
         debug('')
         # The process ends normally without saving the model.
@@ -3050,6 +3971,23 @@ def main():
             'e.g. \n' +
             '--output_names_to_interrupt_model_conversion "output0" "output1" "output2"'
     )
+    parser.add_argument(
+        '-easm',
+        '--enable_auto_split_model',
+        action='store_true',
+        help=\
+            'Force auto split regardless of the ONNX file size. \n' +
+            'Uses --auto_split_max_size_mb as the target partition size.'
+    )
+    parser.add_argument(
+        '-asmsm',
+        '--auto_split_max_size_mb',
+        type=int,
+        default=1024,
+        help=\
+            'Target maximum size per partition in MB based on ONNX initializer sizes. \n' +
+            'Used when auto-split is triggered or forced.'
+    )
     parser.add_argument(
         '-dgc',
         '--disable_group_convolution',
@@ -3450,6 +4388,8 @@ def main():
         param_replacement_file=args.param_replacement_file,
         auto_generate_json=args.auto_generate_json,
         auto_generate_json_on_error=args.auto_generate_json_on_error,
+        enable_auto_split_model=args.enable_auto_split_model,
+        auto_split_max_size_mb=args.auto_split_max_size_mb,
         check_gpu_delegate_compatibility=args.check_gpu_delegate_compatibility,
         check_onnx_tf_outputs_elementwise_close=args.check_onnx_tf_outputs_elementwise_close,
         check_onnx_tf_outputs_elementwise_close_full=args.check_onnx_tf_outputs_elementwise_close_full,

onnx2tf 1.29.19__py3-none-any.whl → 1.29.20__py3-none-any.whl

onnx2tf 1.29.19py3-none-any.whl → 1.29.20py3-none-any.whl