onnxslim 0.1.46__tar.gz → 0.1.77__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (80) hide show
  1. onnxslim-0.1.77/PKG-INFO +146 -0
  2. onnxslim-0.1.77/README.md +112 -0
  3. onnxslim-0.1.77/VERSION +1 -0
  4. {onnxslim-0.1.46 → onnxslim-0.1.77}/onnxslim/__init__.py +0 -1
  5. {onnxslim-0.1.46 → onnxslim-0.1.77}/onnxslim/argparser.py +36 -8
  6. {onnxslim-0.1.46 → onnxslim-0.1.77}/onnxslim/cli/_main.py +21 -8
  7. {onnxslim-0.1.46 → onnxslim-0.1.77}/onnxslim/core/__init__.py +17 -9
  8. {onnxslim-0.1.46 → onnxslim-0.1.77}/onnxslim/core/optimization/__init__.py +71 -14
  9. {onnxslim-0.1.46 → onnxslim-0.1.77}/onnxslim/core/optimization/dead_node_elimination.py +31 -17
  10. {onnxslim-0.1.46 → onnxslim-0.1.77}/onnxslim/core/optimization/subexpression_elimination.py +11 -20
  11. {onnxslim-0.1.46 → onnxslim-0.1.77}/onnxslim/core/optimization/weight_tying.py +19 -0
  12. {onnxslim-0.1.46 → onnxslim-0.1.77}/onnxslim/core/pattern/__init__.py +68 -24
  13. {onnxslim-0.1.46 → onnxslim-0.1.77}/onnxslim/core/pattern/elimination/__init__.py +2 -0
  14. onnxslim-0.1.77/onnxslim/core/pattern/elimination/concat.py +61 -0
  15. {onnxslim-0.1.46 → onnxslim-0.1.77}/onnxslim/core/pattern/elimination/reshape.py +1 -1
  16. onnxslim-0.1.77/onnxslim/core/pattern/elimination/reshape_as.py +64 -0
  17. {onnxslim-0.1.46 → onnxslim-0.1.77}/onnxslim/core/pattern/elimination/slice.py +5 -5
  18. {onnxslim-0.1.46 → onnxslim-0.1.77}/onnxslim/core/pattern/elimination/unsqueeze.py +12 -3
  19. {onnxslim-0.1.46 → onnxslim-0.1.77}/onnxslim/core/pattern/fusion/__init__.py +2 -0
  20. onnxslim-0.1.77/onnxslim/core/pattern/fusion/concat_reshape.py +50 -0
  21. {onnxslim-0.1.46 → onnxslim-0.1.77}/onnxslim/core/pattern/fusion/convadd.py +1 -1
  22. {onnxslim-0.1.46 → onnxslim-0.1.77}/onnxslim/core/pattern/fusion/convbn.py +7 -7
  23. onnxslim-0.1.77/onnxslim/core/pattern/fusion/convmul.py +69 -0
  24. {onnxslim-0.1.46 → onnxslim-0.1.77}/onnxslim/core/pattern/fusion/gemm.py +157 -3
  25. {onnxslim-0.1.46 → onnxslim-0.1.77}/onnxslim/core/pattern/fusion/padconv.py +9 -6
  26. {onnxslim-0.1.46 → onnxslim-0.1.77}/onnxslim/core/pattern/fusion/reduce.py +19 -8
  27. {onnxslim-0.1.46 → onnxslim-0.1.77}/onnxslim/core/pattern/registry.py +3 -1
  28. {onnxslim-0.1.46 → onnxslim-0.1.77}/onnxslim/misc/tabulate.py +12 -10
  29. onnxslim-0.1.77/onnxslim/third_party/_sympy/functions.py +205 -0
  30. onnxslim-0.1.77/onnxslim/third_party/_sympy/numbers.py +397 -0
  31. onnxslim-0.1.77/onnxslim/third_party/_sympy/printers.py +491 -0
  32. onnxslim-0.1.77/onnxslim/third_party/_sympy/solve.py +172 -0
  33. onnxslim-0.1.77/onnxslim/third_party/_sympy/symbol.py +102 -0
  34. {onnxslim-0.1.46 → onnxslim-0.1.77}/onnxslim/third_party/onnx_graphsurgeon/exporters/onnx_exporter.py +103 -53
  35. {onnxslim-0.1.46 → onnxslim-0.1.77}/onnxslim/third_party/onnx_graphsurgeon/graph_pattern/graph_pattern.py +12 -13
  36. {onnxslim-0.1.46 → onnxslim-0.1.77}/onnxslim/third_party/onnx_graphsurgeon/importers/onnx_importer.py +30 -27
  37. {onnxslim-0.1.46 → onnxslim-0.1.77}/onnxslim/third_party/onnx_graphsurgeon/ir/function.py +13 -12
  38. {onnxslim-0.1.46 → onnxslim-0.1.77}/onnxslim/third_party/onnx_graphsurgeon/ir/graph.py +16 -15
  39. {onnxslim-0.1.46 → onnxslim-0.1.77}/onnxslim/third_party/onnx_graphsurgeon/ir/node.py +32 -37
  40. {onnxslim-0.1.46 → onnxslim-0.1.77}/onnxslim/third_party/onnx_graphsurgeon/ir/tensor.py +25 -14
  41. onnxslim-0.1.77/onnxslim/third_party/onnx_graphsurgeon/util/__init__.py +0 -0
  42. {onnxslim-0.1.46 → onnxslim-0.1.77}/onnxslim/third_party/onnx_graphsurgeon/util/misc.py +9 -8
  43. {onnxslim-0.1.46 → onnxslim-0.1.77}/onnxslim/third_party/symbolic_shape_infer.py +270 -178
  44. {onnxslim-0.1.46 → onnxslim-0.1.77}/onnxslim/utils.py +232 -88
  45. onnxslim-0.1.77/onnxslim/version.py +1 -0
  46. onnxslim-0.1.77/onnxslim.egg-info/PKG-INFO +146 -0
  47. {onnxslim-0.1.46 → onnxslim-0.1.77}/onnxslim.egg-info/SOURCES.txt +11 -1
  48. onnxslim-0.1.77/onnxslim.egg-info/requires.txt +5 -0
  49. onnxslim-0.1.77/pyproject.toml +9 -0
  50. {onnxslim-0.1.46 → onnxslim-0.1.77}/setup.py +1 -1
  51. onnxslim-0.1.46/PKG-INFO +0 -101
  52. onnxslim-0.1.46/README.md +0 -81
  53. onnxslim-0.1.46/VERSION +0 -1
  54. onnxslim-0.1.46/onnxslim/misc/font.py +0 -3
  55. onnxslim-0.1.46/onnxslim/version.py +0 -1
  56. onnxslim-0.1.46/onnxslim.egg-info/PKG-INFO +0 -101
  57. onnxslim-0.1.46/onnxslim.egg-info/requires.txt +0 -3
  58. {onnxslim-0.1.46 → onnxslim-0.1.77}/LICENSE +0 -0
  59. {onnxslim-0.1.46 → onnxslim-0.1.77}/MANIFEST.in +0 -0
  60. {onnxslim-0.1.46 → onnxslim-0.1.77}/onnxslim/__main__.py +0 -0
  61. {onnxslim-0.1.46 → onnxslim-0.1.77}/onnxslim/cli/__init__.py +0 -0
  62. {onnxslim-0.1.46 → onnxslim-0.1.77}/onnxslim/core/pattern/fusion/gelu.py +0 -0
  63. {onnxslim-0.1.46 → onnxslim-0.1.77}/onnxslim/misc/__init__.py +0 -0
  64. {onnxslim-0.1.46 → onnxslim-0.1.77}/onnxslim/third_party/__init__.py +0 -0
  65. {onnxslim-0.1.46/onnxslim/third_party/onnx_graphsurgeon/ir → onnxslim-0.1.77/onnxslim/third_party/_sympy}/__init__.py +0 -0
  66. {onnxslim-0.1.46 → onnxslim-0.1.77}/onnxslim/third_party/onnx_graphsurgeon/__init__.py +0 -0
  67. {onnxslim-0.1.46 → onnxslim-0.1.77}/onnxslim/third_party/onnx_graphsurgeon/exporters/__init__.py +0 -0
  68. {onnxslim-0.1.46 → onnxslim-0.1.77}/onnxslim/third_party/onnx_graphsurgeon/exporters/base_exporter.py +0 -0
  69. {onnxslim-0.1.46 → onnxslim-0.1.77}/onnxslim/third_party/onnx_graphsurgeon/graph_pattern/__init__.py +0 -0
  70. {onnxslim-0.1.46 → onnxslim-0.1.77}/onnxslim/third_party/onnx_graphsurgeon/importers/__init__.py +0 -0
  71. {onnxslim-0.1.46 → onnxslim-0.1.77}/onnxslim/third_party/onnx_graphsurgeon/importers/base_importer.py +0 -0
  72. {onnxslim-0.1.46/onnxslim/third_party/onnx_graphsurgeon/util → onnxslim-0.1.77/onnxslim/third_party/onnx_graphsurgeon/ir}/__init__.py +0 -0
  73. {onnxslim-0.1.46 → onnxslim-0.1.77}/onnxslim/third_party/onnx_graphsurgeon/logger/__init__.py +0 -0
  74. {onnxslim-0.1.46 → onnxslim-0.1.77}/onnxslim/third_party/onnx_graphsurgeon/logger/logger.py +0 -0
  75. {onnxslim-0.1.46 → onnxslim-0.1.77}/onnxslim/third_party/onnx_graphsurgeon/util/exception.py +0 -0
  76. {onnxslim-0.1.46 → onnxslim-0.1.77}/onnxslim.egg-info/dependency_links.txt +0 -0
  77. {onnxslim-0.1.46 → onnxslim-0.1.77}/onnxslim.egg-info/entry_points.txt +0 -0
  78. {onnxslim-0.1.46 → onnxslim-0.1.77}/onnxslim.egg-info/top_level.txt +0 -0
  79. {onnxslim-0.1.46 → onnxslim-0.1.77}/onnxslim.egg-info/zip-safe +0 -0
  80. {onnxslim-0.1.46 → onnxslim-0.1.77}/setup.cfg +0 -0
@@ -0,0 +1,146 @@
1
+ Metadata-Version: 2.4
2
+ Name: onnxslim
3
+ Version: 0.1.77
4
+ Summary: OnnxSlim: A Toolkit to Help Optimize Onnx Model
5
+ Home-page: https://github.com/inisis/OnnxSlim
6
+ Author: inisis
7
+ Author-email: desmond.yao@buaa.edu.cn
8
+ License: MIT
9
+ Project-URL: Bug Tracker, https://github.com/inisis/OnnxSlim/issues
10
+ Classifier: Programming Language :: Python :: 3
11
+ Classifier: License :: OSI Approved :: MIT License
12
+ Classifier: Intended Audience :: Developers
13
+ Classifier: Topic :: Scientific/Engineering :: Artificial Intelligence
14
+ Requires-Python: >=3.6
15
+ Description-Content-Type: text/markdown
16
+ License-File: LICENSE
17
+ Requires-Dist: onnx
18
+ Requires-Dist: sympy>=1.13.1
19
+ Requires-Dist: packaging
20
+ Requires-Dist: colorama
21
+ Requires-Dist: ml_dtypes
22
+ Dynamic: author
23
+ Dynamic: author-email
24
+ Dynamic: classifier
25
+ Dynamic: description
26
+ Dynamic: description-content-type
27
+ Dynamic: home-page
28
+ Dynamic: license
29
+ Dynamic: license-file
30
+ Dynamic: project-url
31
+ Dynamic: requires-dist
32
+ Dynamic: requires-python
33
+ Dynamic: summary
34
+
35
+ # OnnxSlim
36
+
37
+ <p align="center">
38
+ <a href="https://pypi.org/project/onnxslim">
39
+ <img src="https://img.shields.io/pypi/v/onnxslim?color=blue" />
40
+ </a>
41
+ <a href="https://pypi.org/project/onnxslim">
42
+ <img src="https://static.pepy.tech/badge/onnxslim/week" />
43
+ </a>
44
+ <a href="https://pypi.org/project/onnxslim">
45
+ <img src="https://static.pepy.tech/badge/onnxslim/month" />
46
+ </a>
47
+ <a href="https://pypi.org/project/onnxslim">
48
+ <img src="https://static.pepy.tech/badge/onnxslim" />
49
+ </a>
50
+ <a href="https://github.com/inisis/onnxslim/actions/workflows/ci.yaml">
51
+ <img src="https://github.com/inisis/onnxslim/actions/workflows/ci.yml/badge.svg" />
52
+ </a>
53
+ <a href="https://codecov.io/gh/inisis/onnxslim" >
54
+ <img src="https://codecov.io/gh/inisis/onnxslim/branch/main/graph/badge.svg?token=C69ZH6802N"/>
55
+ </a>
56
+ <a href="https://muhammadrizwanmunawar.medium.com/boost-onnx-load-speed-by-10-15-with-onnxslims-python-package-d401eb8c2e69">
57
+ <img src="https://img.shields.io/badge/Blog-OnnxSlim?style=flat&label=OnnxSlim" />
58
+ </a>
59
+ <a href="https://deepwiki.com/inisis/OnnxSlim"><img src="https://img.shields.io/badge/DeepWiki-inisis%2FOnnxSlim-blue.svg?logo=" alt="DeepWiki"></a>
60
+ </p>
61
+
62
+ OnnxSlim can help you slim your onnx model, with less operators, but same accuracy, better inference speed.
63
+
64
+ - 🚀 2025/05/17: OnnxSlim is merged into [optimum](https://github.com/huggingface/optimum) 🤗🤗🤗
65
+ - 🚀 2025/04/30: Rank 1st in the [AICAS 2025 LLM inference optimization challenge](https://tianchi.aliyun.com/competition/entrance/532289/customize588)
66
+ - 🚀 2025/01/28: Achieved 1M downloads
67
+ - 🚀 2024/06/23: OnnxSlim is merged into [transformers.js](https://github.com/huggingface/transformers.js) 🤗🤗🤗
68
+ - 🚀 2024/06/02: OnnxSlim is merged into [ultralytics](https://github.com/ultralytics/ultralytics) ❤️❤️❤️
69
+ - 🚀 2024/04/30: Rank 1st in the [AICAS 2024 LLM inference optimization challenge](https://tianchi.aliyun.com/competition/entrance/532170/customize440) held by Arm and T-head
70
+ - 🚀 2024/01/25: OnnxSlim is merged to [mnn-llm](https://github.com/wangzhaode/mnn-llm), performance increased by 5%
71
+
72
+ # Benchmark
73
+
74
+ ![Image](https://github.com/user-attachments/assets/fefc79f1-5d8d-486b-935a-a088846b3900)
75
+
76
+ # Installation
77
+
78
+ ## Using Prebuilt
79
+
80
+ ```bash
81
+ pip install onnxslim
82
+ ```
83
+
84
+ ## Install From Source
85
+
86
+ ```bash
87
+ pip install git+https://github.com/inisis/OnnxSlim@main
88
+ ```
89
+
90
+ ## Install From Local
91
+
92
+ ```bash
93
+ git clone https://github.com/inisis/OnnxSlim && cd OnnxSlim/
94
+ pip install .
95
+ ```
96
+
97
+ # How to use
98
+
99
+ ## Bash
100
+
101
+ ```bash
102
+ onnxslim your_onnx_model slimmed_onnx_model
103
+ ```
104
+
105
+ <div align=left><img src="https://raw.githubusercontent.com/inisis/onnxslim/main/images/onnxslim.gif"></div>
106
+
107
+ ## Inscript
108
+
109
+ ```inscript
110
+ import onnx
111
+ import onnxslim
112
+
113
+ model = onnx.load("model.onnx")
114
+ slimmed_model = onnxslim.slim(model)
115
+ onnx.save(slimmed_model, "slimmed_model.onnx")
116
+ ```
117
+
118
+ For more usage, see onnxslim -h or refer to our [examples](./examples)
119
+
120
+ # Projects using OnnxSlim
121
+
122
+ - <img src="https://avatars.githubusercontent.com/u/131524?s=48&v=4" width="22" height="22"/>[Mozilla/smart_autofill](https://github.com/mozilla/smart_autofill)
123
+ - <img src="https://avatars.githubusercontent.com/u/1961952?s=48&v=4" width="22" height="22"/>[alibaba/MNN](https://github.com/alibaba/MNN)
124
+ - <img src="https://avatars.githubusercontent.com/u/23534030?s=48&v=4" width="22" height="22"/>[PaddlePaddle/PaddleOCR](https://github.com/PaddlePaddle/PaddleOCR)
125
+ - <img src="https://avatars.githubusercontent.com/u/25720743?s=48&v=4" width="22" height="22"/>[huggingface/transformers.js](https://github.com/huggingface/transformers.js)
126
+ - <img src="https://avatars.githubusercontent.com/u/25720743?s=48&v=4" width="22" height="22"/>[huggingface/optimum](https://github.com/huggingface/optimum)
127
+ - <img src="https://avatars.githubusercontent.com/u/86091366?s=48&v=4" width="22" height="22"/>[THU-MIG/yolov10](https://github.com/THU-MIG/yolov10)
128
+ - <img src="https://avatars.githubusercontent.com/u/26833451?s=48&v=4" width="22" height="22"/>[ultralytics/ultralytics](https://github.com/ultralytics/ultralytics)
129
+ - <img src="https://avatars.githubusercontent.com/u/109945100?s=48&v=4" width="22" height="22"/>[ModelScope/FunASR](https://github.com/modelscope/FunASR)
130
+ - <img src="https://avatars.githubusercontent.com/u/1961952?s=48&v=4" width="22" height="22"/>[alibaba/MNN-LLM](https://github.com/wangzhaode/mnn-llm)
131
+ - <img src="https://avatars.githubusercontent.com/u/126587470?s=48&v=4" width="22" height="22"/>[deepghs/imgutils](https://github.com/deepghs/imgutils)
132
+ - <img src="https://avatars.githubusercontent.com/u/48153283?s=48&v=4" width="22" height="22"/>[sunsmarterjie/yolov12](https://github.com/sunsmarterjie/yolov12)
133
+ - <img src="https://avatars.githubusercontent.com/u/147458884?s=48&v=4" width="22" height="22"/>[nndeploy/nndeploy](https://github.com/nndeploy/nndeploy)
134
+ - <img src="https://avatars.githubusercontent.com/u/111754012?s=48&v=4" width="22" height="22"/>[CVCUDA/CV-CUDA](https://github.com/CVCUDA/CV-CUDA)
135
+
136
+ # References
137
+
138
+ > - [onnx-graphsurgeon](https://github.com/NVIDIA/TensorRT/tree/main/tools/onnx-graphsurgeon)
139
+ > - [Polygraphy](https://github.com/NVIDIA/TensorRT/tree/main/tools/Polygraphy/polygraphy)
140
+ > - [onnx-simplifier](https://github.com/daquexian/onnx-simplifier)
141
+ > - [tabulate](https://github.com/astanin/python-tabulate)
142
+ > - [onnxruntime](https://github.com/microsoft/onnxruntime)
143
+
144
+ # Contact
145
+
146
+ Discord: https://discord.gg/nRw2Fd3VUS QQ Group: `873569894`
@@ -0,0 +1,112 @@
1
+ # OnnxSlim
2
+
3
+ <p align="center">
4
+ <a href="https://pypi.org/project/onnxslim">
5
+ <img src="https://img.shields.io/pypi/v/onnxslim?color=blue" />
6
+ </a>
7
+ <a href="https://pypi.org/project/onnxslim">
8
+ <img src="https://static.pepy.tech/badge/onnxslim/week" />
9
+ </a>
10
+ <a href="https://pypi.org/project/onnxslim">
11
+ <img src="https://static.pepy.tech/badge/onnxslim/month" />
12
+ </a>
13
+ <a href="https://pypi.org/project/onnxslim">
14
+ <img src="https://static.pepy.tech/badge/onnxslim" />
15
+ </a>
16
+ <a href="https://github.com/inisis/onnxslim/actions/workflows/ci.yaml">
17
+ <img src="https://github.com/inisis/onnxslim/actions/workflows/ci.yml/badge.svg" />
18
+ </a>
19
+ <a href="https://codecov.io/gh/inisis/onnxslim" >
20
+ <img src="https://codecov.io/gh/inisis/onnxslim/branch/main/graph/badge.svg?token=C69ZH6802N"/>
21
+ </a>
22
+ <a href="https://muhammadrizwanmunawar.medium.com/boost-onnx-load-speed-by-10-15-with-onnxslims-python-package-d401eb8c2e69">
23
+ <img src="https://img.shields.io/badge/Blog-OnnxSlim?style=flat&label=OnnxSlim" />
24
+ </a>
25
+ <a href="https://deepwiki.com/inisis/OnnxSlim"><img src="https://img.shields.io/badge/DeepWiki-inisis%2FOnnxSlim-blue.svg?logo=" alt="DeepWiki"></a>
26
+ </p>
27
+
28
+ OnnxSlim can help you slim your onnx model, with less operators, but same accuracy, better inference speed.
29
+
30
+ - 🚀 2025/05/17: OnnxSlim is merged into [optimum](https://github.com/huggingface/optimum) 🤗🤗🤗
31
+ - 🚀 2025/04/30: Rank 1st in the [AICAS 2025 LLM inference optimization challenge](https://tianchi.aliyun.com/competition/entrance/532289/customize588)
32
+ - 🚀 2025/01/28: Achieved 1M downloads
33
+ - 🚀 2024/06/23: OnnxSlim is merged into [transformers.js](https://github.com/huggingface/transformers.js) 🤗🤗🤗
34
+ - 🚀 2024/06/02: OnnxSlim is merged into [ultralytics](https://github.com/ultralytics/ultralytics) ❤️❤️❤️
35
+ - 🚀 2024/04/30: Rank 1st in the [AICAS 2024 LLM inference optimization challenge](https://tianchi.aliyun.com/competition/entrance/532170/customize440) held by Arm and T-head
36
+ - 🚀 2024/01/25: OnnxSlim is merged to [mnn-llm](https://github.com/wangzhaode/mnn-llm), performance increased by 5%
37
+
38
+ # Benchmark
39
+
40
+ ![Image](https://github.com/user-attachments/assets/fefc79f1-5d8d-486b-935a-a088846b3900)
41
+
42
+ # Installation
43
+
44
+ ## Using Prebuilt
45
+
46
+ ```bash
47
+ pip install onnxslim
48
+ ```
49
+
50
+ ## Install From Source
51
+
52
+ ```bash
53
+ pip install git+https://github.com/inisis/OnnxSlim@main
54
+ ```
55
+
56
+ ## Install From Local
57
+
58
+ ```bash
59
+ git clone https://github.com/inisis/OnnxSlim && cd OnnxSlim/
60
+ pip install .
61
+ ```
62
+
63
+ # How to use
64
+
65
+ ## Bash
66
+
67
+ ```bash
68
+ onnxslim your_onnx_model slimmed_onnx_model
69
+ ```
70
+
71
+ <div align=left><img src="https://raw.githubusercontent.com/inisis/onnxslim/main/images/onnxslim.gif"></div>
72
+
73
+ ## Inscript
74
+
75
+ ```inscript
76
+ import onnx
77
+ import onnxslim
78
+
79
+ model = onnx.load("model.onnx")
80
+ slimmed_model = onnxslim.slim(model)
81
+ onnx.save(slimmed_model, "slimmed_model.onnx")
82
+ ```
83
+
84
+ For more usage, see onnxslim -h or refer to our [examples](./examples)
85
+
86
+ # Projects using OnnxSlim
87
+
88
+ - <img src="https://avatars.githubusercontent.com/u/131524?s=48&v=4" width="22" height="22"/>[Mozilla/smart_autofill](https://github.com/mozilla/smart_autofill)
89
+ - <img src="https://avatars.githubusercontent.com/u/1961952?s=48&v=4" width="22" height="22"/>[alibaba/MNN](https://github.com/alibaba/MNN)
90
+ - <img src="https://avatars.githubusercontent.com/u/23534030?s=48&v=4" width="22" height="22"/>[PaddlePaddle/PaddleOCR](https://github.com/PaddlePaddle/PaddleOCR)
91
+ - <img src="https://avatars.githubusercontent.com/u/25720743?s=48&v=4" width="22" height="22"/>[huggingface/transformers.js](https://github.com/huggingface/transformers.js)
92
+ - <img src="https://avatars.githubusercontent.com/u/25720743?s=48&v=4" width="22" height="22"/>[huggingface/optimum](https://github.com/huggingface/optimum)
93
+ - <img src="https://avatars.githubusercontent.com/u/86091366?s=48&v=4" width="22" height="22"/>[THU-MIG/yolov10](https://github.com/THU-MIG/yolov10)
94
+ - <img src="https://avatars.githubusercontent.com/u/26833451?s=48&v=4" width="22" height="22"/>[ultralytics/ultralytics](https://github.com/ultralytics/ultralytics)
95
+ - <img src="https://avatars.githubusercontent.com/u/109945100?s=48&v=4" width="22" height="22"/>[ModelScope/FunASR](https://github.com/modelscope/FunASR)
96
+ - <img src="https://avatars.githubusercontent.com/u/1961952?s=48&v=4" width="22" height="22"/>[alibaba/MNN-LLM](https://github.com/wangzhaode/mnn-llm)
97
+ - <img src="https://avatars.githubusercontent.com/u/126587470?s=48&v=4" width="22" height="22"/>[deepghs/imgutils](https://github.com/deepghs/imgutils)
98
+ - <img src="https://avatars.githubusercontent.com/u/48153283?s=48&v=4" width="22" height="22"/>[sunsmarterjie/yolov12](https://github.com/sunsmarterjie/yolov12)
99
+ - <img src="https://avatars.githubusercontent.com/u/147458884?s=48&v=4" width="22" height="22"/>[nndeploy/nndeploy](https://github.com/nndeploy/nndeploy)
100
+ - <img src="https://avatars.githubusercontent.com/u/111754012?s=48&v=4" width="22" height="22"/>[CVCUDA/CV-CUDA](https://github.com/CVCUDA/CV-CUDA)
101
+
102
+ # References
103
+
104
+ > - [onnx-graphsurgeon](https://github.com/NVIDIA/TensorRT/tree/main/tools/onnx-graphsurgeon)
105
+ > - [Polygraphy](https://github.com/NVIDIA/TensorRT/tree/main/tools/Polygraphy/polygraphy)
106
+ > - [onnx-simplifier](https://github.com/daquexian/onnx-simplifier)
107
+ > - [tabulate](https://github.com/astanin/python-tabulate)
108
+ > - [onnxruntime](https://github.com/microsoft/onnxruntime)
109
+
110
+ # Contact
111
+
112
+ Discord: https://discord.gg/nRw2Fd3VUS QQ Group: `873569894`
@@ -0,0 +1 @@
1
+ 0.1.77
@@ -3,7 +3,6 @@ import warnings
3
3
 
4
4
  from onnxslim.cli import slim
5
5
  from onnxslim.core.pattern.registry import (
6
- DEFAULT_FUSION_PATTERNS,
7
6
  register_fusion_pattern,
8
7
  )
9
8
  from onnxslim.version import __version__
@@ -2,10 +2,28 @@ import argparse
2
2
  import dataclasses
3
3
  from argparse import ArgumentDefaultsHelpFormatter, ArgumentParser
4
4
  from dataclasses import dataclass, field
5
- from typing import List, Optional, Type, Union, get_args, get_origin
6
-
7
- import onnxslim
8
-
5
+ from typing import List, Optional, Type, Union, get_args, get_origin, TypedDict, Dict, Literal
6
+
7
+ from .core.optimization import OptimizationSettings
8
+ from .core.pattern.registry import DEFAULT_FUSION_PATTERNS
9
+ from .version import __version__
10
+
11
+
12
+ class OnnxSlimKwargs(TypedDict, total=False):
13
+ model_check: bool
14
+ input_shapes: Dict[str, List[int]]
15
+ inputs: List[str]
16
+ outputs: List[str]
17
+ no_shape_infer: bool
18
+ skip_optimizations: List[str]
19
+ dtype: Literal["float16", "float32", "uint8", "int8"]
20
+ skip_fusion_patterns: List[str]
21
+ size_threshold: int
22
+ inspect: bool
23
+ dump_to_disk: bool
24
+ save_as_external_data: bool
25
+ model_check_inputs: Optional[List[str]]
26
+ verbose: bool
9
27
 
10
28
  def _get_inner_type(arg_type):
11
29
  if get_origin(arg_type) is Union:
@@ -38,14 +56,24 @@ class OptimizationArguments:
38
56
  """
39
57
 
40
58
  no_shape_infer: bool = field(default=False, metadata={"help": "whether to disable shape_infer, default false."})
41
- no_constant_folding: bool = field(
42
- default=False, metadata={"help": "whether to disable constant_folding, default false."}
59
+ skip_optimizations: Optional[List[str]] = field(
60
+ default=None,
61
+ metadata={
62
+ "help": "whether to skip some optimizations",
63
+ "choices": list(OptimizationSettings.keys()),
64
+ },
43
65
  )
44
66
  skip_fusion_patterns: Optional[List[str]] = field(
45
67
  default=None,
46
68
  metadata={
47
69
  "help": "whether to skip the fusion of some patterns",
48
- "choices": list(onnxslim.DEFAULT_FUSION_PATTERNS.keys()),
70
+ "choices": list(DEFAULT_FUSION_PATTERNS.keys()),
71
+ },
72
+ )
73
+ size_threshold: int = field(
74
+ default=None,
75
+ metadata={
76
+ "help": "size threshold in bytes, size larger than this value will not be folded, default None, which means fold all constants",
49
77
  },
50
78
  )
51
79
 
@@ -163,7 +191,7 @@ class OnnxSlimArgumentParser(ArgumentParser):
163
191
  # Add positional arguments separately for ModelArguments
164
192
  self.parser.add_argument("input_model", help="input onnx model")
165
193
  self.parser.add_argument("output_model", nargs="?", default=None, help="output onnx model")
166
- self.parser.add_argument("-v", "--version", action="version", version=onnxslim.__version__)
194
+ self.parser.add_argument("-v", "--version", action="version", version=__version__)
167
195
 
168
196
  def parse_args_into_dataclasses(self):
169
197
  # Pre-parse arguments to check for `--inspect`
@@ -1,14 +1,17 @@
1
- from typing import List, Union
1
+ from __future__ import annotations
2
2
 
3
3
  import onnx
4
4
 
5
+ from onnxslim.argparser import OnnxSlimKwargs
5
6
 
6
- def slim(model: Union[str, onnx.ModelProto, List[Union[str, onnx.ModelProto]]], *args, **kwargs):
7
+
8
+ def slim(model: str | onnx.ModelProto | list[str | onnx.ModelProto], *args, **kwargs: OnnxSlimKwargs):
7
9
  import os
8
10
  import time
9
11
  from pathlib import Path
10
12
 
11
13
  from onnxslim.core import (
14
+ OptimizationSettings,
12
15
  convert_data_format,
13
16
  freeze,
14
17
  input_modification,
@@ -18,6 +21,7 @@ def slim(model: Union[str, onnx.ModelProto, List[Union[str, onnx.ModelProto]]],
18
21
  shape_infer,
19
22
  )
20
23
  from onnxslim.utils import (
24
+ TensorInfo,
21
25
  check_onnx,
22
26
  check_point,
23
27
  check_result,
@@ -27,6 +31,7 @@ def slim(model: Union[str, onnx.ModelProto, List[Union[str, onnx.ModelProto]]],
27
31
  print_model_info_as_table,
28
32
  save,
29
33
  summarize_model,
34
+ update_outputs_dims,
30
35
  )
31
36
 
32
37
  output_model = args[0] if len(args) > 0 else kwargs.get("output_model", None)
@@ -35,9 +40,11 @@ def slim(model: Union[str, onnx.ModelProto, List[Union[str, onnx.ModelProto]]],
35
40
  inputs = kwargs.get("inputs", None)
36
41
  outputs = kwargs.get("outputs", None)
37
42
  no_shape_infer = kwargs.get("no_shape_infer", False)
38
- no_constant_folding = kwargs.get("no_constant_folding", False)
43
+ skip_optimizations = kwargs.get("skip_optimizations", None)
39
44
  dtype = kwargs.get("dtype", None)
40
45
  skip_fusion_patterns = kwargs.get("skip_fusion_patterns", None)
46
+ size_threshold = kwargs.get("size_threshold", None)
47
+ size_threshold = int(size_threshold) if size_threshold else None
41
48
  kwargs.get("inspect", False)
42
49
  dump_to_disk = kwargs.get("dump_to_disk", False)
43
50
  save_as_external_data = kwargs.get("save_as_external_data", False)
@@ -92,14 +99,17 @@ def slim(model: Union[str, onnx.ModelProto, List[Union[str, onnx.ModelProto]]],
92
99
  if model_check:
93
100
  input_data_dict, raw_onnx_output, model = check_onnx(model, model_check_inputs)
94
101
 
102
+ output_info = {TensorInfo(o).name: TensorInfo(o).shape for o in model.graph.output}
103
+
95
104
  if not no_shape_infer:
96
105
  model = shape_infer(model)
97
106
 
98
- if not no_constant_folding:
107
+ OptimizationSettings.reset(skip_optimizations)
108
+ if OptimizationSettings.enabled():
99
109
  graph_check_point = check_point(model)
100
110
  while MAX_ITER > 0:
101
111
  logger.debug(f"iter: {MAX_ITER}")
102
- model = optimize(model, skip_fusion_patterns)
112
+ model = optimize(model, skip_fusion_patterns, size_threshold)
103
113
  if not no_shape_infer:
104
114
  model = shape_infer(model)
105
115
  graph = check_point(model)
@@ -114,6 +124,8 @@ def slim(model: Union[str, onnx.ModelProto, List[Union[str, onnx.ModelProto]]],
114
124
  if dtype:
115
125
  model = convert_data_format(model, dtype)
116
126
 
127
+ model = update_outputs_dims(model, output_dims=output_info)
128
+
117
129
  if model_check:
118
130
  slimmed_onnx_output, model = onnxruntime_inference(model, input_data_dict)
119
131
  if not check_result(raw_onnx_output, slimmed_onnx_output):
@@ -151,10 +163,11 @@ def main():
151
163
  if not checker_args.inspect and checker_args.dump_to_disk:
152
164
  argument_parser.error("dump_to_disk can only be used with --inspect")
153
165
 
154
- if not optimization_args.no_shape_infer or optimization_args.no_constant_folding:
155
- from onnxslim.utils import check_onnx_compatibility
166
+ if not optimization_args.no_shape_infer:
167
+ from onnxslim.utils import check_onnx_compatibility, is_onnxruntime_available
156
168
 
157
- check_onnx_compatibility()
169
+ if is_onnxruntime_available():
170
+ check_onnx_compatibility()
158
171
 
159
172
  slim(
160
173
  model_args.input_model,
@@ -1,15 +1,18 @@
1
+ from __future__ import annotations
2
+
1
3
  import logging
2
4
  import os
3
5
  import tempfile
6
+ from typing import Optional
4
7
 
5
8
  import numpy as np
6
9
  import onnx
7
10
  from onnx import checker
8
11
 
9
12
  import onnxslim.third_party.onnx_graphsurgeon as gs
10
- from onnxslim.core.optimization import optimize_model
13
+ from onnxslim.core.optimization import OptimizationSettings, optimize_model
11
14
  from onnxslim.third_party.onnx_graphsurgeon.exporters.onnx_exporter import dtype_to_onnx
12
- from onnxslim.third_party.onnx_graphsurgeon.ir.tensor import Constant, Variable
15
+ from onnxslim.third_party.onnx_graphsurgeon.ir.tensor import Constant
13
16
  from onnxslim.third_party.symbolic_shape_infer import SymbolicShapeInference
14
17
  from onnxslim.utils import save
15
18
 
@@ -18,6 +21,7 @@ logger = logging.getLogger("onnxslim")
18
21
 
19
22
  DEBUG = bool(os.getenv("ONNXSLIM_DEBUG"))
20
23
  AUTO_MERGE = True if os.getenv("ONNXSLIM_AUTO_MERGE") is None else bool(int(os.getenv("ONNXSLIM_AUTO_MERGE")))
24
+ FORCE_ONNXRUNTIME_SHAPE_INFERENCE = bool(os.getenv("ONNXSLIM_FORCE_ONNXRUNTIME_SHAPE_INFERENCE"))
21
25
 
22
26
 
23
27
  def input_shape_modification(model: onnx.ModelProto, input_shapes: str) -> onnx.ModelProto:
@@ -122,6 +126,9 @@ def input_modification(model: onnx.ModelProto, inputs: str) -> onnx.ModelProto:
122
126
  def shape_infer(model: onnx.ModelProto):
123
127
  """Infer tensor shapes in an ONNX model using symbolic and static shape inference techniques."""
124
128
  logger.debug("Start shape inference.")
129
+ if FORCE_ONNXRUNTIME_SHAPE_INFERENCE:
130
+ logger.debug("force onnxruntime shape infer.")
131
+ return SymbolicShapeInference.infer_shapes(model, auto_merge=AUTO_MERGE)
125
132
  try:
126
133
  logger.debug("try onnxruntime shape infer.")
127
134
  model = SymbolicShapeInference.infer_shapes(model, auto_merge=AUTO_MERGE)
@@ -142,14 +149,15 @@ def shape_infer(model: onnx.ModelProto):
142
149
  return model
143
150
 
144
151
 
145
- def optimize(model: onnx.ModelProto, skip_fusion_patterns: str = None):
152
+ def optimize(model: onnx.ModelProto, skip_fusion_patterns: str | None = None, size_threshold: int | None = None):
146
153
  """Optimize the given ONNX model with options to skip specific fusion patterns and return the optimized model."""
147
154
  logger.debug("Start converting model to gs.")
148
155
  graph = gs.import_onnx(model).toposort()
149
156
  logger.debug("Finish converting model to gs.")
150
- logger.debug("Start constant folding.")
151
- graph.fold_constants().cleanup().toposort()
152
- logger.debug("Finish constant folding.")
157
+ if OptimizationSettings.constant_folding:
158
+ logger.debug("Start constant folding.")
159
+ graph.fold_constants(size_threshold=size_threshold).cleanup().toposort()
160
+ logger.debug("Finish constant folding.")
153
161
  logger.debug("Start optimize model.")
154
162
  model = optimize_model(graph, skip_fusion_patterns)
155
163
  logger.debug("Finish optimize model.")
@@ -170,11 +178,11 @@ def convert_data_format(model: onnx.ModelProto, dtype: str) -> onnx.ModelProto:
170
178
 
171
179
  for node in graph.nodes:
172
180
  if node.op == "Cast":
173
- inp_dtype = [input.dtype for input in node.inputs][0]
181
+ inp_dtype = next(input.dtype for input in node.inputs)
174
182
  if inp_dtype in [np.float16, np.float32]:
175
- node.replace_all_uses_with(node.inputs[0])
183
+ node.erase()
176
184
  else:
177
- outp_dtype = [output.dtype for output in node.outputs][0]
185
+ outp_dtype = next(output.dtype for output in node.outputs)
178
186
  if outp_dtype == np.float16:
179
187
  node.attrs["to"] = dtype_to_onnx(np.float32)
180
188
  node.outputs[0].dtype = np.float32
@@ -1,6 +1,8 @@
1
+ from __future__ import annotations
2
+
1
3
  import logging
2
4
  from collections import Counter
3
- from typing import List, Union
5
+ from typing import List, Optional, Union
4
6
 
5
7
  import onnx
6
8
 
@@ -15,19 +17,62 @@ from .subexpression_elimination import subexpression_elimination
15
17
  from .weight_tying import tie_weights
16
18
 
17
19
 
18
- def optimize_model(model: Union[onnx.ModelProto, gs.Graph], skip_fusion_patterns: str = None) -> onnx.ModelProto:
20
+ class OptimizationSettings:
21
+ constant_folding = True
22
+ graph_fusion = True
23
+ dead_node_elimination = True
24
+ subexpression_elimination = True
25
+ weight_tying = True
26
+
27
+ @classmethod
28
+ def keys(cls):
29
+ return [
30
+ "constant_folding",
31
+ "graph_fusion",
32
+ "dead_node_elimination",
33
+ "subexpression_elimination",
34
+ "weight_tying",
35
+ ]
36
+
37
+ @classmethod
38
+ def reset(cls, skip_optimizations: list[str] | None = None):
39
+ for key in cls.keys():
40
+ if skip_optimizations and key in skip_optimizations:
41
+ setattr(cls, key, False)
42
+ else:
43
+ setattr(cls, key, True)
44
+
45
+ @classmethod
46
+ def stats(cls):
47
+ return {key: getattr(cls, key) for key in cls.keys()}
48
+
49
+ @classmethod
50
+ def enabled(cls):
51
+ return any([getattr(cls, key) for key in cls.keys()])
52
+
53
+
54
+ def optimize_model(model: onnx.ModelProto | gs.Graph, skip_fusion_patterns: str | None = None) -> onnx.ModelProto:
19
55
  """Optimize and transform the given ONNX model using various fusion patterns and graph rewriting techniques."""
20
56
  graph = model if isinstance(model, gs.Graph) else gs.import_onnx(model)
21
- fusion_patterns = get_fusion_patterns(skip_fusion_patterns)
22
- fusion_pairs = find_matches(graph, fusion_patterns)
23
- for match in fusion_pairs.values():
24
- graph.replace_custom_layer(**match)
25
- graph.cleanup(remove_unused_graph_inputs=True).toposort()
26
- dead_node_elimination(graph)
27
- graph.cleanup(remove_unused_graph_inputs=True).toposort()
28
- subexpression_elimination(graph)
29
- graph.cleanup(remove_unused_graph_inputs=True).toposort()
30
- tie_weights(graph)
57
+ if OptimizationSettings.graph_fusion:
58
+ logger.debug("Start graph_fusion.")
59
+ fusion_patterns = get_fusion_patterns(skip_fusion_patterns)
60
+ graph_fusion(graph, fusion_patterns)
61
+ logger.debug("Finish graph_fusion.")
62
+ if OptimizationSettings.dead_node_elimination:
63
+ logger.debug("Start dead_node_elimination.")
64
+ dead_node_elimination(graph)
65
+ graph.cleanup(remove_unused_graph_inputs=True).toposort()
66
+ logger.debug("Finish dead_node_elimination.")
67
+ if OptimizationSettings.subexpression_elimination:
68
+ logger.debug("Start subexpression_elimination.")
69
+ subexpression_elimination(graph)
70
+ graph.cleanup(remove_unused_graph_inputs=True).toposort()
71
+ logger.debug("Finish subexpression_elimination.")
72
+ if OptimizationSettings.weight_tying:
73
+ logger.debug("Start weight_tying.")
74
+ tie_weights(graph)
75
+ logger.debug("Finish weight_tying.")
31
76
  model = gs.export_onnx(graph)
32
77
 
33
78
  return model
@@ -38,9 +83,9 @@ def replace_custom_layer(
38
83
  self,
39
84
  op: str,
40
85
  inputs,
41
- outputs: List[str],
86
+ outputs: list[str],
42
87
  name: str,
43
- attrs: dict = None,
88
+ attrs: dict | None = None,
44
89
  domain: str = "ai.onnx.contrib",
45
90
  ):
46
91
  """Replace a custom layer in the computational graph with specified parameters and domain."""
@@ -54,9 +99,21 @@ def replace_custom_layer(
54
99
  )
55
100
 
56
101
 
102
+ def graph_fusion(graph: Graph, fusion_patterns: dict, is_subgraph=False):
103
+ for subgraph in graph.subgraphs():
104
+ graph_fusion(subgraph, fusion_patterns, is_subgraph=True)
105
+
106
+ fusion_pairs = find_matches(graph, fusion_patterns)
107
+ for match in fusion_pairs.values():
108
+ graph.replace_custom_layer(**match)
109
+
110
+ graph.cleanup(remove_unused_graph_inputs=True if not is_subgraph else False).toposort()
111
+
112
+
57
113
  def find_matches(graph: Graph, fusion_patterns: dict):
58
114
  """Find matching patterns in the graph based on provided fusion patterns."""
59
115
  match_map = {}
116
+
60
117
  counter = Counter()
61
118
  for node in reversed(graph.nodes):
62
119
  if node.name not in match_map: