onnxslim 0.1.46__tar.gz → 0.1.77__tar.gz
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- onnxslim-0.1.77/PKG-INFO +146 -0
- onnxslim-0.1.77/README.md +112 -0
- onnxslim-0.1.77/VERSION +1 -0
- {onnxslim-0.1.46 → onnxslim-0.1.77}/onnxslim/__init__.py +0 -1
- {onnxslim-0.1.46 → onnxslim-0.1.77}/onnxslim/argparser.py +36 -8
- {onnxslim-0.1.46 → onnxslim-0.1.77}/onnxslim/cli/_main.py +21 -8
- {onnxslim-0.1.46 → onnxslim-0.1.77}/onnxslim/core/__init__.py +17 -9
- {onnxslim-0.1.46 → onnxslim-0.1.77}/onnxslim/core/optimization/__init__.py +71 -14
- {onnxslim-0.1.46 → onnxslim-0.1.77}/onnxslim/core/optimization/dead_node_elimination.py +31 -17
- {onnxslim-0.1.46 → onnxslim-0.1.77}/onnxslim/core/optimization/subexpression_elimination.py +11 -20
- {onnxslim-0.1.46 → onnxslim-0.1.77}/onnxslim/core/optimization/weight_tying.py +19 -0
- {onnxslim-0.1.46 → onnxslim-0.1.77}/onnxslim/core/pattern/__init__.py +68 -24
- {onnxslim-0.1.46 → onnxslim-0.1.77}/onnxslim/core/pattern/elimination/__init__.py +2 -0
- onnxslim-0.1.77/onnxslim/core/pattern/elimination/concat.py +61 -0
- {onnxslim-0.1.46 → onnxslim-0.1.77}/onnxslim/core/pattern/elimination/reshape.py +1 -1
- onnxslim-0.1.77/onnxslim/core/pattern/elimination/reshape_as.py +64 -0
- {onnxslim-0.1.46 → onnxslim-0.1.77}/onnxslim/core/pattern/elimination/slice.py +5 -5
- {onnxslim-0.1.46 → onnxslim-0.1.77}/onnxslim/core/pattern/elimination/unsqueeze.py +12 -3
- {onnxslim-0.1.46 → onnxslim-0.1.77}/onnxslim/core/pattern/fusion/__init__.py +2 -0
- onnxslim-0.1.77/onnxslim/core/pattern/fusion/concat_reshape.py +50 -0
- {onnxslim-0.1.46 → onnxslim-0.1.77}/onnxslim/core/pattern/fusion/convadd.py +1 -1
- {onnxslim-0.1.46 → onnxslim-0.1.77}/onnxslim/core/pattern/fusion/convbn.py +7 -7
- onnxslim-0.1.77/onnxslim/core/pattern/fusion/convmul.py +69 -0
- {onnxslim-0.1.46 → onnxslim-0.1.77}/onnxslim/core/pattern/fusion/gemm.py +157 -3
- {onnxslim-0.1.46 → onnxslim-0.1.77}/onnxslim/core/pattern/fusion/padconv.py +9 -6
- {onnxslim-0.1.46 → onnxslim-0.1.77}/onnxslim/core/pattern/fusion/reduce.py +19 -8
- {onnxslim-0.1.46 → onnxslim-0.1.77}/onnxslim/core/pattern/registry.py +3 -1
- {onnxslim-0.1.46 → onnxslim-0.1.77}/onnxslim/misc/tabulate.py +12 -10
- onnxslim-0.1.77/onnxslim/third_party/_sympy/functions.py +205 -0
- onnxslim-0.1.77/onnxslim/third_party/_sympy/numbers.py +397 -0
- onnxslim-0.1.77/onnxslim/third_party/_sympy/printers.py +491 -0
- onnxslim-0.1.77/onnxslim/third_party/_sympy/solve.py +172 -0
- onnxslim-0.1.77/onnxslim/third_party/_sympy/symbol.py +102 -0
- {onnxslim-0.1.46 → onnxslim-0.1.77}/onnxslim/third_party/onnx_graphsurgeon/exporters/onnx_exporter.py +103 -53
- {onnxslim-0.1.46 → onnxslim-0.1.77}/onnxslim/third_party/onnx_graphsurgeon/graph_pattern/graph_pattern.py +12 -13
- {onnxslim-0.1.46 → onnxslim-0.1.77}/onnxslim/third_party/onnx_graphsurgeon/importers/onnx_importer.py +30 -27
- {onnxslim-0.1.46 → onnxslim-0.1.77}/onnxslim/third_party/onnx_graphsurgeon/ir/function.py +13 -12
- {onnxslim-0.1.46 → onnxslim-0.1.77}/onnxslim/third_party/onnx_graphsurgeon/ir/graph.py +16 -15
- {onnxslim-0.1.46 → onnxslim-0.1.77}/onnxslim/third_party/onnx_graphsurgeon/ir/node.py +32 -37
- {onnxslim-0.1.46 → onnxslim-0.1.77}/onnxslim/third_party/onnx_graphsurgeon/ir/tensor.py +25 -14
- onnxslim-0.1.77/onnxslim/third_party/onnx_graphsurgeon/util/__init__.py +0 -0
- {onnxslim-0.1.46 → onnxslim-0.1.77}/onnxslim/third_party/onnx_graphsurgeon/util/misc.py +9 -8
- {onnxslim-0.1.46 → onnxslim-0.1.77}/onnxslim/third_party/symbolic_shape_infer.py +270 -178
- {onnxslim-0.1.46 → onnxslim-0.1.77}/onnxslim/utils.py +232 -88
- onnxslim-0.1.77/onnxslim/version.py +1 -0
- onnxslim-0.1.77/onnxslim.egg-info/PKG-INFO +146 -0
- {onnxslim-0.1.46 → onnxslim-0.1.77}/onnxslim.egg-info/SOURCES.txt +11 -1
- onnxslim-0.1.77/onnxslim.egg-info/requires.txt +5 -0
- onnxslim-0.1.77/pyproject.toml +9 -0
- {onnxslim-0.1.46 → onnxslim-0.1.77}/setup.py +1 -1
- onnxslim-0.1.46/PKG-INFO +0 -101
- onnxslim-0.1.46/README.md +0 -81
- onnxslim-0.1.46/VERSION +0 -1
- onnxslim-0.1.46/onnxslim/misc/font.py +0 -3
- onnxslim-0.1.46/onnxslim/version.py +0 -1
- onnxslim-0.1.46/onnxslim.egg-info/PKG-INFO +0 -101
- onnxslim-0.1.46/onnxslim.egg-info/requires.txt +0 -3
- {onnxslim-0.1.46 → onnxslim-0.1.77}/LICENSE +0 -0
- {onnxslim-0.1.46 → onnxslim-0.1.77}/MANIFEST.in +0 -0
- {onnxslim-0.1.46 → onnxslim-0.1.77}/onnxslim/__main__.py +0 -0
- {onnxslim-0.1.46 → onnxslim-0.1.77}/onnxslim/cli/__init__.py +0 -0
- {onnxslim-0.1.46 → onnxslim-0.1.77}/onnxslim/core/pattern/fusion/gelu.py +0 -0
- {onnxslim-0.1.46 → onnxslim-0.1.77}/onnxslim/misc/__init__.py +0 -0
- {onnxslim-0.1.46 → onnxslim-0.1.77}/onnxslim/third_party/__init__.py +0 -0
- {onnxslim-0.1.46/onnxslim/third_party/onnx_graphsurgeon/ir → onnxslim-0.1.77/onnxslim/third_party/_sympy}/__init__.py +0 -0
- {onnxslim-0.1.46 → onnxslim-0.1.77}/onnxslim/third_party/onnx_graphsurgeon/__init__.py +0 -0
- {onnxslim-0.1.46 → onnxslim-0.1.77}/onnxslim/third_party/onnx_graphsurgeon/exporters/__init__.py +0 -0
- {onnxslim-0.1.46 → onnxslim-0.1.77}/onnxslim/third_party/onnx_graphsurgeon/exporters/base_exporter.py +0 -0
- {onnxslim-0.1.46 → onnxslim-0.1.77}/onnxslim/third_party/onnx_graphsurgeon/graph_pattern/__init__.py +0 -0
- {onnxslim-0.1.46 → onnxslim-0.1.77}/onnxslim/third_party/onnx_graphsurgeon/importers/__init__.py +0 -0
- {onnxslim-0.1.46 → onnxslim-0.1.77}/onnxslim/third_party/onnx_graphsurgeon/importers/base_importer.py +0 -0
- {onnxslim-0.1.46/onnxslim/third_party/onnx_graphsurgeon/util → onnxslim-0.1.77/onnxslim/third_party/onnx_graphsurgeon/ir}/__init__.py +0 -0
- {onnxslim-0.1.46 → onnxslim-0.1.77}/onnxslim/third_party/onnx_graphsurgeon/logger/__init__.py +0 -0
- {onnxslim-0.1.46 → onnxslim-0.1.77}/onnxslim/third_party/onnx_graphsurgeon/logger/logger.py +0 -0
- {onnxslim-0.1.46 → onnxslim-0.1.77}/onnxslim/third_party/onnx_graphsurgeon/util/exception.py +0 -0
- {onnxslim-0.1.46 → onnxslim-0.1.77}/onnxslim.egg-info/dependency_links.txt +0 -0
- {onnxslim-0.1.46 → onnxslim-0.1.77}/onnxslim.egg-info/entry_points.txt +0 -0
- {onnxslim-0.1.46 → onnxslim-0.1.77}/onnxslim.egg-info/top_level.txt +0 -0
- {onnxslim-0.1.46 → onnxslim-0.1.77}/onnxslim.egg-info/zip-safe +0 -0
- {onnxslim-0.1.46 → onnxslim-0.1.77}/setup.cfg +0 -0
onnxslim-0.1.77/PKG-INFO
ADDED
|
@@ -0,0 +1,146 @@
|
|
|
1
|
+
Metadata-Version: 2.4
|
|
2
|
+
Name: onnxslim
|
|
3
|
+
Version: 0.1.77
|
|
4
|
+
Summary: OnnxSlim: A Toolkit to Help Optimize Onnx Model
|
|
5
|
+
Home-page: https://github.com/inisis/OnnxSlim
|
|
6
|
+
Author: inisis
|
|
7
|
+
Author-email: desmond.yao@buaa.edu.cn
|
|
8
|
+
License: MIT
|
|
9
|
+
Project-URL: Bug Tracker, https://github.com/inisis/OnnxSlim/issues
|
|
10
|
+
Classifier: Programming Language :: Python :: 3
|
|
11
|
+
Classifier: License :: OSI Approved :: MIT License
|
|
12
|
+
Classifier: Intended Audience :: Developers
|
|
13
|
+
Classifier: Topic :: Scientific/Engineering :: Artificial Intelligence
|
|
14
|
+
Requires-Python: >=3.6
|
|
15
|
+
Description-Content-Type: text/markdown
|
|
16
|
+
License-File: LICENSE
|
|
17
|
+
Requires-Dist: onnx
|
|
18
|
+
Requires-Dist: sympy>=1.13.1
|
|
19
|
+
Requires-Dist: packaging
|
|
20
|
+
Requires-Dist: colorama
|
|
21
|
+
Requires-Dist: ml_dtypes
|
|
22
|
+
Dynamic: author
|
|
23
|
+
Dynamic: author-email
|
|
24
|
+
Dynamic: classifier
|
|
25
|
+
Dynamic: description
|
|
26
|
+
Dynamic: description-content-type
|
|
27
|
+
Dynamic: home-page
|
|
28
|
+
Dynamic: license
|
|
29
|
+
Dynamic: license-file
|
|
30
|
+
Dynamic: project-url
|
|
31
|
+
Dynamic: requires-dist
|
|
32
|
+
Dynamic: requires-python
|
|
33
|
+
Dynamic: summary
|
|
34
|
+
|
|
35
|
+
# OnnxSlim
|
|
36
|
+
|
|
37
|
+
<p align="center">
|
|
38
|
+
<a href="https://pypi.org/project/onnxslim">
|
|
39
|
+
<img src="https://img.shields.io/pypi/v/onnxslim?color=blue" />
|
|
40
|
+
</a>
|
|
41
|
+
<a href="https://pypi.org/project/onnxslim">
|
|
42
|
+
<img src="https://static.pepy.tech/badge/onnxslim/week" />
|
|
43
|
+
</a>
|
|
44
|
+
<a href="https://pypi.org/project/onnxslim">
|
|
45
|
+
<img src="https://static.pepy.tech/badge/onnxslim/month" />
|
|
46
|
+
</a>
|
|
47
|
+
<a href="https://pypi.org/project/onnxslim">
|
|
48
|
+
<img src="https://static.pepy.tech/badge/onnxslim" />
|
|
49
|
+
</a>
|
|
50
|
+
<a href="https://github.com/inisis/onnxslim/actions/workflows/ci.yaml">
|
|
51
|
+
<img src="https://github.com/inisis/onnxslim/actions/workflows/ci.yml/badge.svg" />
|
|
52
|
+
</a>
|
|
53
|
+
<a href="https://codecov.io/gh/inisis/onnxslim" >
|
|
54
|
+
<img src="https://codecov.io/gh/inisis/onnxslim/branch/main/graph/badge.svg?token=C69ZH6802N"/>
|
|
55
|
+
</a>
|
|
56
|
+
<a href="https://muhammadrizwanmunawar.medium.com/boost-onnx-load-speed-by-10-15-with-onnxslims-python-package-d401eb8c2e69">
|
|
57
|
+
<img src="https://img.shields.io/badge/Blog-OnnxSlim?style=flat&label=OnnxSlim" />
|
|
58
|
+
</a>
|
|
59
|
+
<a href="https://deepwiki.com/inisis/OnnxSlim"><img src="https://img.shields.io/badge/DeepWiki-inisis%2FOnnxSlim-blue.svg?logo=data:image/png;base64,iVBORw0KGgoAAAANSUhEUgAAACwAAAAyCAYAAAAnWDnqAAAAAXNSR0IArs4c6QAAA05JREFUaEPtmUtyEzEQhtWTQyQLHNak2AB7ZnyXZMEjXMGeK/AIi+QuHrMnbChYY7MIh8g01fJoopFb0uhhEqqcbWTp06/uv1saEDv4O3n3dV60RfP947Mm9/SQc0ICFQgzfc4CYZoTPAswgSJCCUJUnAAoRHOAUOcATwbmVLWdGoH//PB8mnKqScAhsD0kYP3j/Yt5LPQe2KvcXmGvRHcDnpxfL2zOYJ1mFwrryWTz0advv1Ut4CJgf5uhDuDj5eUcAUoahrdY/56ebRWeraTjMt/00Sh3UDtjgHtQNHwcRGOC98BJEAEymycmYcWwOprTgcB6VZ5JK5TAJ+fXGLBm3FDAmn6oPPjR4rKCAoJCal2eAiQp2x0vxTPB3ALO2CRkwmDy5WohzBDwSEFKRwPbknEggCPB/imwrycgxX2NzoMCHhPkDwqYMr9tRcP5qNrMZHkVnOjRMWwLCcr8ohBVb1OMjxLwGCvjTikrsBOiA6fNyCrm8V1rP93iVPpwaE+gO0SsWmPiXB+jikdf6SizrT5qKasx5j8ABbHpFTx+vFXp9EnYQmLx02h1QTTrl6eDqxLnGjporxl3NL3agEvXdT0WmEost648sQOYAeJS9Q7bfUVoMGnjo4AZdUMQku50McDcMWcBPvr0SzbTAFDfvJqwLzgxwATnCgnp4wDl6Aa+Ax283gghmj+vj7feE2KBBRMW3FzOpLOADl0Isb5587h/U4gGvkt5v60Z1VLG8BhYjbzRwyQZemwAd6cCR5/XFWLYZRIMpX39AR0tjaGGiGzLVyhse5C9RKC6ai42ppWPKiBagOvaYk8lO7DajerabOZP46Lby5wKjw1HCRx7p9sVMOWGzb/vA1hwiWc6jm3MvQDTogQkiqIhJV0nBQBTU+3okKCFDy9WwferkHjtxib7t3xIUQtHxnIwtx4mpg26/HfwVNVDb4oI9RHmx5WGelRVlrtiw43zboCLaxv46AZeB3IlTkwouebTr1y2NjSpHz68WNFjHvupy3q8TFn3Hos2IAk4Ju5dCo8B3wP7VPr/FGaKiG+T+v+TQqIrOqMTL1VdWV1DdmcbO8KXBz6esmYWYKPwDL5b5FA1a0hwapHiom0r/cKaoqr+27/XcrS5UwSMbQAAAABJRU5ErkJggg==" alt="DeepWiki"></a>
|
|
60
|
+
</p>
|
|
61
|
+
|
|
62
|
+
OnnxSlim can help you slim your onnx model, with less operators, but same accuracy, better inference speed.
|
|
63
|
+
|
|
64
|
+
- 🚀 2025/05/17: OnnxSlim is merged into [optimum](https://github.com/huggingface/optimum) 🤗🤗🤗
|
|
65
|
+
- 🚀 2025/04/30: Rank 1st in the [AICAS 2025 LLM inference optimization challenge](https://tianchi.aliyun.com/competition/entrance/532289/customize588)
|
|
66
|
+
- 🚀 2025/01/28: Achieved 1M downloads
|
|
67
|
+
- 🚀 2024/06/23: OnnxSlim is merged into [transformers.js](https://github.com/huggingface/transformers.js) 🤗🤗🤗
|
|
68
|
+
- 🚀 2024/06/02: OnnxSlim is merged into [ultralytics](https://github.com/ultralytics/ultralytics) ❤️❤️❤️
|
|
69
|
+
- 🚀 2024/04/30: Rank 1st in the [AICAS 2024 LLM inference optimization challenge](https://tianchi.aliyun.com/competition/entrance/532170/customize440) held by Arm and T-head
|
|
70
|
+
- 🚀 2024/01/25: OnnxSlim is merged to [mnn-llm](https://github.com/wangzhaode/mnn-llm), performance increased by 5%
|
|
71
|
+
|
|
72
|
+
# Benchmark
|
|
73
|
+
|
|
74
|
+

|
|
75
|
+
|
|
76
|
+
# Installation
|
|
77
|
+
|
|
78
|
+
## Using Prebuilt
|
|
79
|
+
|
|
80
|
+
```bash
|
|
81
|
+
pip install onnxslim
|
|
82
|
+
```
|
|
83
|
+
|
|
84
|
+
## Install From Source
|
|
85
|
+
|
|
86
|
+
```bash
|
|
87
|
+
pip install git+https://github.com/inisis/OnnxSlim@main
|
|
88
|
+
```
|
|
89
|
+
|
|
90
|
+
## Install From Local
|
|
91
|
+
|
|
92
|
+
```bash
|
|
93
|
+
git clone https://github.com/inisis/OnnxSlim && cd OnnxSlim/
|
|
94
|
+
pip install .
|
|
95
|
+
```
|
|
96
|
+
|
|
97
|
+
# How to use
|
|
98
|
+
|
|
99
|
+
## Bash
|
|
100
|
+
|
|
101
|
+
```bash
|
|
102
|
+
onnxslim your_onnx_model slimmed_onnx_model
|
|
103
|
+
```
|
|
104
|
+
|
|
105
|
+
<div align=left><img src="https://raw.githubusercontent.com/inisis/onnxslim/main/images/onnxslim.gif"></div>
|
|
106
|
+
|
|
107
|
+
## Inscript
|
|
108
|
+
|
|
109
|
+
```inscript
|
|
110
|
+
import onnx
|
|
111
|
+
import onnxslim
|
|
112
|
+
|
|
113
|
+
model = onnx.load("model.onnx")
|
|
114
|
+
slimmed_model = onnxslim.slim(model)
|
|
115
|
+
onnx.save(slimmed_model, "slimmed_model.onnx")
|
|
116
|
+
```
|
|
117
|
+
|
|
118
|
+
For more usage, see onnxslim -h or refer to our [examples](./examples)
|
|
119
|
+
|
|
120
|
+
# Projects using OnnxSlim
|
|
121
|
+
|
|
122
|
+
- <img src="https://avatars.githubusercontent.com/u/131524?s=48&v=4" width="22" height="22"/>[Mozilla/smart_autofill](https://github.com/mozilla/smart_autofill)
|
|
123
|
+
- <img src="https://avatars.githubusercontent.com/u/1961952?s=48&v=4" width="22" height="22"/>[alibaba/MNN](https://github.com/alibaba/MNN)
|
|
124
|
+
- <img src="https://avatars.githubusercontent.com/u/23534030?s=48&v=4" width="22" height="22"/>[PaddlePaddle/PaddleOCR](https://github.com/PaddlePaddle/PaddleOCR)
|
|
125
|
+
- <img src="https://avatars.githubusercontent.com/u/25720743?s=48&v=4" width="22" height="22"/>[huggingface/transformers.js](https://github.com/huggingface/transformers.js)
|
|
126
|
+
- <img src="https://avatars.githubusercontent.com/u/25720743?s=48&v=4" width="22" height="22"/>[huggingface/optimum](https://github.com/huggingface/optimum)
|
|
127
|
+
- <img src="https://avatars.githubusercontent.com/u/86091366?s=48&v=4" width="22" height="22"/>[THU-MIG/yolov10](https://github.com/THU-MIG/yolov10)
|
|
128
|
+
- <img src="https://avatars.githubusercontent.com/u/26833451?s=48&v=4" width="22" height="22"/>[ultralytics/ultralytics](https://github.com/ultralytics/ultralytics)
|
|
129
|
+
- <img src="https://avatars.githubusercontent.com/u/109945100?s=48&v=4" width="22" height="22"/>[ModelScope/FunASR](https://github.com/modelscope/FunASR)
|
|
130
|
+
- <img src="https://avatars.githubusercontent.com/u/1961952?s=48&v=4" width="22" height="22"/>[alibaba/MNN-LLM](https://github.com/wangzhaode/mnn-llm)
|
|
131
|
+
- <img src="https://avatars.githubusercontent.com/u/126587470?s=48&v=4" width="22" height="22"/>[deepghs/imgutils](https://github.com/deepghs/imgutils)
|
|
132
|
+
- <img src="https://avatars.githubusercontent.com/u/48153283?s=48&v=4" width="22" height="22"/>[sunsmarterjie/yolov12](https://github.com/sunsmarterjie/yolov12)
|
|
133
|
+
- <img src="https://avatars.githubusercontent.com/u/147458884?s=48&v=4" width="22" height="22"/>[nndeploy/nndeploy](https://github.com/nndeploy/nndeploy)
|
|
134
|
+
- <img src="https://avatars.githubusercontent.com/u/111754012?s=48&v=4" width="22" height="22"/>[CVCUDA/CV-CUDA](https://github.com/CVCUDA/CV-CUDA)
|
|
135
|
+
|
|
136
|
+
# References
|
|
137
|
+
|
|
138
|
+
> - [onnx-graphsurgeon](https://github.com/NVIDIA/TensorRT/tree/main/tools/onnx-graphsurgeon)
|
|
139
|
+
> - [Polygraphy](https://github.com/NVIDIA/TensorRT/tree/main/tools/Polygraphy/polygraphy)
|
|
140
|
+
> - [onnx-simplifier](https://github.com/daquexian/onnx-simplifier)
|
|
141
|
+
> - [tabulate](https://github.com/astanin/python-tabulate)
|
|
142
|
+
> - [onnxruntime](https://github.com/microsoft/onnxruntime)
|
|
143
|
+
|
|
144
|
+
# Contact
|
|
145
|
+
|
|
146
|
+
Discord: https://discord.gg/nRw2Fd3VUS QQ Group: `873569894`
|
|
@@ -0,0 +1,112 @@
|
|
|
1
|
+
# OnnxSlim
|
|
2
|
+
|
|
3
|
+
<p align="center">
|
|
4
|
+
<a href="https://pypi.org/project/onnxslim">
|
|
5
|
+
<img src="https://img.shields.io/pypi/v/onnxslim?color=blue" />
|
|
6
|
+
</a>
|
|
7
|
+
<a href="https://pypi.org/project/onnxslim">
|
|
8
|
+
<img src="https://static.pepy.tech/badge/onnxslim/week" />
|
|
9
|
+
</a>
|
|
10
|
+
<a href="https://pypi.org/project/onnxslim">
|
|
11
|
+
<img src="https://static.pepy.tech/badge/onnxslim/month" />
|
|
12
|
+
</a>
|
|
13
|
+
<a href="https://pypi.org/project/onnxslim">
|
|
14
|
+
<img src="https://static.pepy.tech/badge/onnxslim" />
|
|
15
|
+
</a>
|
|
16
|
+
<a href="https://github.com/inisis/onnxslim/actions/workflows/ci.yaml">
|
|
17
|
+
<img src="https://github.com/inisis/onnxslim/actions/workflows/ci.yml/badge.svg" />
|
|
18
|
+
</a>
|
|
19
|
+
<a href="https://codecov.io/gh/inisis/onnxslim" >
|
|
20
|
+
<img src="https://codecov.io/gh/inisis/onnxslim/branch/main/graph/badge.svg?token=C69ZH6802N"/>
|
|
21
|
+
</a>
|
|
22
|
+
<a href="https://muhammadrizwanmunawar.medium.com/boost-onnx-load-speed-by-10-15-with-onnxslims-python-package-d401eb8c2e69">
|
|
23
|
+
<img src="https://img.shields.io/badge/Blog-OnnxSlim?style=flat&label=OnnxSlim" />
|
|
24
|
+
</a>
|
|
25
|
+
<a href="https://deepwiki.com/inisis/OnnxSlim"><img src="https://img.shields.io/badge/DeepWiki-inisis%2FOnnxSlim-blue.svg?logo=data:image/png;base64,iVBORw0KGgoAAAANSUhEUgAAACwAAAAyCAYAAAAnWDnqAAAAAXNSR0IArs4c6QAAA05JREFUaEPtmUtyEzEQhtWTQyQLHNak2AB7ZnyXZMEjXMGeK/AIi+QuHrMnbChYY7MIh8g01fJoopFb0uhhEqqcbWTp06/uv1saEDv4O3n3dV60RfP947Mm9/SQc0ICFQgzfc4CYZoTPAswgSJCCUJUnAAoRHOAUOcATwbmVLWdGoH//PB8mnKqScAhsD0kYP3j/Yt5LPQe2KvcXmGvRHcDnpxfL2zOYJ1mFwrryWTz0advv1Ut4CJgf5uhDuDj5eUcAUoahrdY/56ebRWeraTjMt/00Sh3UDtjgHtQNHwcRGOC98BJEAEymycmYcWwOprTgcB6VZ5JK5TAJ+fXGLBm3FDAmn6oPPjR4rKCAoJCal2eAiQp2x0vxTPB3ALO2CRkwmDy5WohzBDwSEFKRwPbknEggCPB/imwrycgxX2NzoMCHhPkDwqYMr9tRcP5qNrMZHkVnOjRMWwLCcr8ohBVb1OMjxLwGCvjTikrsBOiA6fNyCrm8V1rP93iVPpwaE+gO0SsWmPiXB+jikdf6SizrT5qKasx5j8ABbHpFTx+vFXp9EnYQmLx02h1QTTrl6eDqxLnGjporxl3NL3agEvXdT0WmEost648sQOYAeJS9Q7bfUVoMGnjo4AZdUMQku50McDcMWcBPvr0SzbTAFDfvJqwLzgxwATnCgnp4wDl6Aa+Ax283gghmj+vj7feE2KBBRMW3FzOpLOADl0Isb5587h/U4gGvkt5v60Z1VLG8BhYjbzRwyQZemwAd6cCR5/XFWLYZRIMpX39AR0tjaGGiGzLVyhse5C9RKC6ai42ppWPKiBagOvaYk8lO7DajerabOZP46Lby5wKjw1HCRx7p9sVMOWGzb/vA1hwiWc6jm3MvQDTogQkiqIhJV0nBQBTU+3okKCFDy9WwferkHjtxib7t3xIUQtHxnIwtx4mpg26/HfwVNVDb4oI9RHmx5WGelRVlrtiw43zboCLaxv46AZeB3IlTkwouebTr1y2NjSpHz68WNFjHvupy3q8TFn3Hos2IAk4Ju5dCo8B3wP7VPr/FGaKiG+T+v+TQqIrOqMTL1VdWV1DdmcbO8KXBz6esmYWYKPwDL5b5FA1a0hwapHiom0r/cKaoqr+27/XcrS5UwSMbQAAAABJRU5ErkJggg==" alt="DeepWiki"></a>
|
|
26
|
+
</p>
|
|
27
|
+
|
|
28
|
+
OnnxSlim can help you slim your onnx model, with less operators, but same accuracy, better inference speed.
|
|
29
|
+
|
|
30
|
+
- 🚀 2025/05/17: OnnxSlim is merged into [optimum](https://github.com/huggingface/optimum) 🤗🤗🤗
|
|
31
|
+
- 🚀 2025/04/30: Rank 1st in the [AICAS 2025 LLM inference optimization challenge](https://tianchi.aliyun.com/competition/entrance/532289/customize588)
|
|
32
|
+
- 🚀 2025/01/28: Achieved 1M downloads
|
|
33
|
+
- 🚀 2024/06/23: OnnxSlim is merged into [transformers.js](https://github.com/huggingface/transformers.js) 🤗🤗🤗
|
|
34
|
+
- 🚀 2024/06/02: OnnxSlim is merged into [ultralytics](https://github.com/ultralytics/ultralytics) ❤️❤️❤️
|
|
35
|
+
- 🚀 2024/04/30: Rank 1st in the [AICAS 2024 LLM inference optimization challenge](https://tianchi.aliyun.com/competition/entrance/532170/customize440) held by Arm and T-head
|
|
36
|
+
- 🚀 2024/01/25: OnnxSlim is merged to [mnn-llm](https://github.com/wangzhaode/mnn-llm), performance increased by 5%
|
|
37
|
+
|
|
38
|
+
# Benchmark
|
|
39
|
+
|
|
40
|
+

|
|
41
|
+
|
|
42
|
+
# Installation
|
|
43
|
+
|
|
44
|
+
## Using Prebuilt
|
|
45
|
+
|
|
46
|
+
```bash
|
|
47
|
+
pip install onnxslim
|
|
48
|
+
```
|
|
49
|
+
|
|
50
|
+
## Install From Source
|
|
51
|
+
|
|
52
|
+
```bash
|
|
53
|
+
pip install git+https://github.com/inisis/OnnxSlim@main
|
|
54
|
+
```
|
|
55
|
+
|
|
56
|
+
## Install From Local
|
|
57
|
+
|
|
58
|
+
```bash
|
|
59
|
+
git clone https://github.com/inisis/OnnxSlim && cd OnnxSlim/
|
|
60
|
+
pip install .
|
|
61
|
+
```
|
|
62
|
+
|
|
63
|
+
# How to use
|
|
64
|
+
|
|
65
|
+
## Bash
|
|
66
|
+
|
|
67
|
+
```bash
|
|
68
|
+
onnxslim your_onnx_model slimmed_onnx_model
|
|
69
|
+
```
|
|
70
|
+
|
|
71
|
+
<div align=left><img src="https://raw.githubusercontent.com/inisis/onnxslim/main/images/onnxslim.gif"></div>
|
|
72
|
+
|
|
73
|
+
## Inscript
|
|
74
|
+
|
|
75
|
+
```inscript
|
|
76
|
+
import onnx
|
|
77
|
+
import onnxslim
|
|
78
|
+
|
|
79
|
+
model = onnx.load("model.onnx")
|
|
80
|
+
slimmed_model = onnxslim.slim(model)
|
|
81
|
+
onnx.save(slimmed_model, "slimmed_model.onnx")
|
|
82
|
+
```
|
|
83
|
+
|
|
84
|
+
For more usage, see onnxslim -h or refer to our [examples](./examples)
|
|
85
|
+
|
|
86
|
+
# Projects using OnnxSlim
|
|
87
|
+
|
|
88
|
+
- <img src="https://avatars.githubusercontent.com/u/131524?s=48&v=4" width="22" height="22"/>[Mozilla/smart_autofill](https://github.com/mozilla/smart_autofill)
|
|
89
|
+
- <img src="https://avatars.githubusercontent.com/u/1961952?s=48&v=4" width="22" height="22"/>[alibaba/MNN](https://github.com/alibaba/MNN)
|
|
90
|
+
- <img src="https://avatars.githubusercontent.com/u/23534030?s=48&v=4" width="22" height="22"/>[PaddlePaddle/PaddleOCR](https://github.com/PaddlePaddle/PaddleOCR)
|
|
91
|
+
- <img src="https://avatars.githubusercontent.com/u/25720743?s=48&v=4" width="22" height="22"/>[huggingface/transformers.js](https://github.com/huggingface/transformers.js)
|
|
92
|
+
- <img src="https://avatars.githubusercontent.com/u/25720743?s=48&v=4" width="22" height="22"/>[huggingface/optimum](https://github.com/huggingface/optimum)
|
|
93
|
+
- <img src="https://avatars.githubusercontent.com/u/86091366?s=48&v=4" width="22" height="22"/>[THU-MIG/yolov10](https://github.com/THU-MIG/yolov10)
|
|
94
|
+
- <img src="https://avatars.githubusercontent.com/u/26833451?s=48&v=4" width="22" height="22"/>[ultralytics/ultralytics](https://github.com/ultralytics/ultralytics)
|
|
95
|
+
- <img src="https://avatars.githubusercontent.com/u/109945100?s=48&v=4" width="22" height="22"/>[ModelScope/FunASR](https://github.com/modelscope/FunASR)
|
|
96
|
+
- <img src="https://avatars.githubusercontent.com/u/1961952?s=48&v=4" width="22" height="22"/>[alibaba/MNN-LLM](https://github.com/wangzhaode/mnn-llm)
|
|
97
|
+
- <img src="https://avatars.githubusercontent.com/u/126587470?s=48&v=4" width="22" height="22"/>[deepghs/imgutils](https://github.com/deepghs/imgutils)
|
|
98
|
+
- <img src="https://avatars.githubusercontent.com/u/48153283?s=48&v=4" width="22" height="22"/>[sunsmarterjie/yolov12](https://github.com/sunsmarterjie/yolov12)
|
|
99
|
+
- <img src="https://avatars.githubusercontent.com/u/147458884?s=48&v=4" width="22" height="22"/>[nndeploy/nndeploy](https://github.com/nndeploy/nndeploy)
|
|
100
|
+
- <img src="https://avatars.githubusercontent.com/u/111754012?s=48&v=4" width="22" height="22"/>[CVCUDA/CV-CUDA](https://github.com/CVCUDA/CV-CUDA)
|
|
101
|
+
|
|
102
|
+
# References
|
|
103
|
+
|
|
104
|
+
> - [onnx-graphsurgeon](https://github.com/NVIDIA/TensorRT/tree/main/tools/onnx-graphsurgeon)
|
|
105
|
+
> - [Polygraphy](https://github.com/NVIDIA/TensorRT/tree/main/tools/Polygraphy/polygraphy)
|
|
106
|
+
> - [onnx-simplifier](https://github.com/daquexian/onnx-simplifier)
|
|
107
|
+
> - [tabulate](https://github.com/astanin/python-tabulate)
|
|
108
|
+
> - [onnxruntime](https://github.com/microsoft/onnxruntime)
|
|
109
|
+
|
|
110
|
+
# Contact
|
|
111
|
+
|
|
112
|
+
Discord: https://discord.gg/nRw2Fd3VUS QQ Group: `873569894`
|
onnxslim-0.1.77/VERSION
ADDED
|
@@ -0,0 +1 @@
|
|
|
1
|
+
0.1.77
|
|
@@ -2,10 +2,28 @@ import argparse
|
|
|
2
2
|
import dataclasses
|
|
3
3
|
from argparse import ArgumentDefaultsHelpFormatter, ArgumentParser
|
|
4
4
|
from dataclasses import dataclass, field
|
|
5
|
-
from typing import List, Optional, Type, Union, get_args, get_origin
|
|
6
|
-
|
|
7
|
-
import
|
|
8
|
-
|
|
5
|
+
from typing import List, Optional, Type, Union, get_args, get_origin, TypedDict, Dict, Literal
|
|
6
|
+
|
|
7
|
+
from .core.optimization import OptimizationSettings
|
|
8
|
+
from .core.pattern.registry import DEFAULT_FUSION_PATTERNS
|
|
9
|
+
from .version import __version__
|
|
10
|
+
|
|
11
|
+
|
|
12
|
+
class OnnxSlimKwargs(TypedDict, total=False):
|
|
13
|
+
model_check: bool
|
|
14
|
+
input_shapes: Dict[str, List[int]]
|
|
15
|
+
inputs: List[str]
|
|
16
|
+
outputs: List[str]
|
|
17
|
+
no_shape_infer: bool
|
|
18
|
+
skip_optimizations: List[str]
|
|
19
|
+
dtype: Literal["float16", "float32", "uint8", "int8"]
|
|
20
|
+
skip_fusion_patterns: List[str]
|
|
21
|
+
size_threshold: int
|
|
22
|
+
inspect: bool
|
|
23
|
+
dump_to_disk: bool
|
|
24
|
+
save_as_external_data: bool
|
|
25
|
+
model_check_inputs: Optional[List[str]]
|
|
26
|
+
verbose: bool
|
|
9
27
|
|
|
10
28
|
def _get_inner_type(arg_type):
|
|
11
29
|
if get_origin(arg_type) is Union:
|
|
@@ -38,14 +56,24 @@ class OptimizationArguments:
|
|
|
38
56
|
"""
|
|
39
57
|
|
|
40
58
|
no_shape_infer: bool = field(default=False, metadata={"help": "whether to disable shape_infer, default false."})
|
|
41
|
-
|
|
42
|
-
default=
|
|
59
|
+
skip_optimizations: Optional[List[str]] = field(
|
|
60
|
+
default=None,
|
|
61
|
+
metadata={
|
|
62
|
+
"help": "whether to skip some optimizations",
|
|
63
|
+
"choices": list(OptimizationSettings.keys()),
|
|
64
|
+
},
|
|
43
65
|
)
|
|
44
66
|
skip_fusion_patterns: Optional[List[str]] = field(
|
|
45
67
|
default=None,
|
|
46
68
|
metadata={
|
|
47
69
|
"help": "whether to skip the fusion of some patterns",
|
|
48
|
-
"choices": list(
|
|
70
|
+
"choices": list(DEFAULT_FUSION_PATTERNS.keys()),
|
|
71
|
+
},
|
|
72
|
+
)
|
|
73
|
+
size_threshold: int = field(
|
|
74
|
+
default=None,
|
|
75
|
+
metadata={
|
|
76
|
+
"help": "size threshold in bytes, size larger than this value will not be folded, default None, which means fold all constants",
|
|
49
77
|
},
|
|
50
78
|
)
|
|
51
79
|
|
|
@@ -163,7 +191,7 @@ class OnnxSlimArgumentParser(ArgumentParser):
|
|
|
163
191
|
# Add positional arguments separately for ModelArguments
|
|
164
192
|
self.parser.add_argument("input_model", help="input onnx model")
|
|
165
193
|
self.parser.add_argument("output_model", nargs="?", default=None, help="output onnx model")
|
|
166
|
-
self.parser.add_argument("-v", "--version", action="version", version=
|
|
194
|
+
self.parser.add_argument("-v", "--version", action="version", version=__version__)
|
|
167
195
|
|
|
168
196
|
def parse_args_into_dataclasses(self):
|
|
169
197
|
# Pre-parse arguments to check for `--inspect`
|
|
@@ -1,14 +1,17 @@
|
|
|
1
|
-
from
|
|
1
|
+
from __future__ import annotations
|
|
2
2
|
|
|
3
3
|
import onnx
|
|
4
4
|
|
|
5
|
+
from onnxslim.argparser import OnnxSlimKwargs
|
|
5
6
|
|
|
6
|
-
|
|
7
|
+
|
|
8
|
+
def slim(model: str | onnx.ModelProto | list[str | onnx.ModelProto], *args, **kwargs: OnnxSlimKwargs):
|
|
7
9
|
import os
|
|
8
10
|
import time
|
|
9
11
|
from pathlib import Path
|
|
10
12
|
|
|
11
13
|
from onnxslim.core import (
|
|
14
|
+
OptimizationSettings,
|
|
12
15
|
convert_data_format,
|
|
13
16
|
freeze,
|
|
14
17
|
input_modification,
|
|
@@ -18,6 +21,7 @@ def slim(model: Union[str, onnx.ModelProto, List[Union[str, onnx.ModelProto]]],
|
|
|
18
21
|
shape_infer,
|
|
19
22
|
)
|
|
20
23
|
from onnxslim.utils import (
|
|
24
|
+
TensorInfo,
|
|
21
25
|
check_onnx,
|
|
22
26
|
check_point,
|
|
23
27
|
check_result,
|
|
@@ -27,6 +31,7 @@ def slim(model: Union[str, onnx.ModelProto, List[Union[str, onnx.ModelProto]]],
|
|
|
27
31
|
print_model_info_as_table,
|
|
28
32
|
save,
|
|
29
33
|
summarize_model,
|
|
34
|
+
update_outputs_dims,
|
|
30
35
|
)
|
|
31
36
|
|
|
32
37
|
output_model = args[0] if len(args) > 0 else kwargs.get("output_model", None)
|
|
@@ -35,9 +40,11 @@ def slim(model: Union[str, onnx.ModelProto, List[Union[str, onnx.ModelProto]]],
|
|
|
35
40
|
inputs = kwargs.get("inputs", None)
|
|
36
41
|
outputs = kwargs.get("outputs", None)
|
|
37
42
|
no_shape_infer = kwargs.get("no_shape_infer", False)
|
|
38
|
-
|
|
43
|
+
skip_optimizations = kwargs.get("skip_optimizations", None)
|
|
39
44
|
dtype = kwargs.get("dtype", None)
|
|
40
45
|
skip_fusion_patterns = kwargs.get("skip_fusion_patterns", None)
|
|
46
|
+
size_threshold = kwargs.get("size_threshold", None)
|
|
47
|
+
size_threshold = int(size_threshold) if size_threshold else None
|
|
41
48
|
kwargs.get("inspect", False)
|
|
42
49
|
dump_to_disk = kwargs.get("dump_to_disk", False)
|
|
43
50
|
save_as_external_data = kwargs.get("save_as_external_data", False)
|
|
@@ -92,14 +99,17 @@ def slim(model: Union[str, onnx.ModelProto, List[Union[str, onnx.ModelProto]]],
|
|
|
92
99
|
if model_check:
|
|
93
100
|
input_data_dict, raw_onnx_output, model = check_onnx(model, model_check_inputs)
|
|
94
101
|
|
|
102
|
+
output_info = {TensorInfo(o).name: TensorInfo(o).shape for o in model.graph.output}
|
|
103
|
+
|
|
95
104
|
if not no_shape_infer:
|
|
96
105
|
model = shape_infer(model)
|
|
97
106
|
|
|
98
|
-
|
|
107
|
+
OptimizationSettings.reset(skip_optimizations)
|
|
108
|
+
if OptimizationSettings.enabled():
|
|
99
109
|
graph_check_point = check_point(model)
|
|
100
110
|
while MAX_ITER > 0:
|
|
101
111
|
logger.debug(f"iter: {MAX_ITER}")
|
|
102
|
-
model = optimize(model, skip_fusion_patterns)
|
|
112
|
+
model = optimize(model, skip_fusion_patterns, size_threshold)
|
|
103
113
|
if not no_shape_infer:
|
|
104
114
|
model = shape_infer(model)
|
|
105
115
|
graph = check_point(model)
|
|
@@ -114,6 +124,8 @@ def slim(model: Union[str, onnx.ModelProto, List[Union[str, onnx.ModelProto]]],
|
|
|
114
124
|
if dtype:
|
|
115
125
|
model = convert_data_format(model, dtype)
|
|
116
126
|
|
|
127
|
+
model = update_outputs_dims(model, output_dims=output_info)
|
|
128
|
+
|
|
117
129
|
if model_check:
|
|
118
130
|
slimmed_onnx_output, model = onnxruntime_inference(model, input_data_dict)
|
|
119
131
|
if not check_result(raw_onnx_output, slimmed_onnx_output):
|
|
@@ -151,10 +163,11 @@ def main():
|
|
|
151
163
|
if not checker_args.inspect and checker_args.dump_to_disk:
|
|
152
164
|
argument_parser.error("dump_to_disk can only be used with --inspect")
|
|
153
165
|
|
|
154
|
-
if not optimization_args.no_shape_infer
|
|
155
|
-
from onnxslim.utils import check_onnx_compatibility
|
|
166
|
+
if not optimization_args.no_shape_infer:
|
|
167
|
+
from onnxslim.utils import check_onnx_compatibility, is_onnxruntime_available
|
|
156
168
|
|
|
157
|
-
|
|
169
|
+
if is_onnxruntime_available():
|
|
170
|
+
check_onnx_compatibility()
|
|
158
171
|
|
|
159
172
|
slim(
|
|
160
173
|
model_args.input_model,
|
|
@@ -1,15 +1,18 @@
|
|
|
1
|
+
from __future__ import annotations
|
|
2
|
+
|
|
1
3
|
import logging
|
|
2
4
|
import os
|
|
3
5
|
import tempfile
|
|
6
|
+
from typing import Optional
|
|
4
7
|
|
|
5
8
|
import numpy as np
|
|
6
9
|
import onnx
|
|
7
10
|
from onnx import checker
|
|
8
11
|
|
|
9
12
|
import onnxslim.third_party.onnx_graphsurgeon as gs
|
|
10
|
-
from onnxslim.core.optimization import optimize_model
|
|
13
|
+
from onnxslim.core.optimization import OptimizationSettings, optimize_model
|
|
11
14
|
from onnxslim.third_party.onnx_graphsurgeon.exporters.onnx_exporter import dtype_to_onnx
|
|
12
|
-
from onnxslim.third_party.onnx_graphsurgeon.ir.tensor import Constant
|
|
15
|
+
from onnxslim.third_party.onnx_graphsurgeon.ir.tensor import Constant
|
|
13
16
|
from onnxslim.third_party.symbolic_shape_infer import SymbolicShapeInference
|
|
14
17
|
from onnxslim.utils import save
|
|
15
18
|
|
|
@@ -18,6 +21,7 @@ logger = logging.getLogger("onnxslim")
|
|
|
18
21
|
|
|
19
22
|
DEBUG = bool(os.getenv("ONNXSLIM_DEBUG"))
|
|
20
23
|
AUTO_MERGE = True if os.getenv("ONNXSLIM_AUTO_MERGE") is None else bool(int(os.getenv("ONNXSLIM_AUTO_MERGE")))
|
|
24
|
+
FORCE_ONNXRUNTIME_SHAPE_INFERENCE = bool(os.getenv("ONNXSLIM_FORCE_ONNXRUNTIME_SHAPE_INFERENCE"))
|
|
21
25
|
|
|
22
26
|
|
|
23
27
|
def input_shape_modification(model: onnx.ModelProto, input_shapes: str) -> onnx.ModelProto:
|
|
@@ -122,6 +126,9 @@ def input_modification(model: onnx.ModelProto, inputs: str) -> onnx.ModelProto:
|
|
|
122
126
|
def shape_infer(model: onnx.ModelProto):
|
|
123
127
|
"""Infer tensor shapes in an ONNX model using symbolic and static shape inference techniques."""
|
|
124
128
|
logger.debug("Start shape inference.")
|
|
129
|
+
if FORCE_ONNXRUNTIME_SHAPE_INFERENCE:
|
|
130
|
+
logger.debug("force onnxruntime shape infer.")
|
|
131
|
+
return SymbolicShapeInference.infer_shapes(model, auto_merge=AUTO_MERGE)
|
|
125
132
|
try:
|
|
126
133
|
logger.debug("try onnxruntime shape infer.")
|
|
127
134
|
model = SymbolicShapeInference.infer_shapes(model, auto_merge=AUTO_MERGE)
|
|
@@ -142,14 +149,15 @@ def shape_infer(model: onnx.ModelProto):
|
|
|
142
149
|
return model
|
|
143
150
|
|
|
144
151
|
|
|
145
|
-
def optimize(model: onnx.ModelProto, skip_fusion_patterns: str = None):
|
|
152
|
+
def optimize(model: onnx.ModelProto, skip_fusion_patterns: str | None = None, size_threshold: int | None = None):
|
|
146
153
|
"""Optimize the given ONNX model with options to skip specific fusion patterns and return the optimized model."""
|
|
147
154
|
logger.debug("Start converting model to gs.")
|
|
148
155
|
graph = gs.import_onnx(model).toposort()
|
|
149
156
|
logger.debug("Finish converting model to gs.")
|
|
150
|
-
|
|
151
|
-
|
|
152
|
-
|
|
157
|
+
if OptimizationSettings.constant_folding:
|
|
158
|
+
logger.debug("Start constant folding.")
|
|
159
|
+
graph.fold_constants(size_threshold=size_threshold).cleanup().toposort()
|
|
160
|
+
logger.debug("Finish constant folding.")
|
|
153
161
|
logger.debug("Start optimize model.")
|
|
154
162
|
model = optimize_model(graph, skip_fusion_patterns)
|
|
155
163
|
logger.debug("Finish optimize model.")
|
|
@@ -170,11 +178,11 @@ def convert_data_format(model: onnx.ModelProto, dtype: str) -> onnx.ModelProto:
|
|
|
170
178
|
|
|
171
179
|
for node in graph.nodes:
|
|
172
180
|
if node.op == "Cast":
|
|
173
|
-
inp_dtype =
|
|
181
|
+
inp_dtype = next(input.dtype for input in node.inputs)
|
|
174
182
|
if inp_dtype in [np.float16, np.float32]:
|
|
175
|
-
node.
|
|
183
|
+
node.erase()
|
|
176
184
|
else:
|
|
177
|
-
outp_dtype =
|
|
185
|
+
outp_dtype = next(output.dtype for output in node.outputs)
|
|
178
186
|
if outp_dtype == np.float16:
|
|
179
187
|
node.attrs["to"] = dtype_to_onnx(np.float32)
|
|
180
188
|
node.outputs[0].dtype = np.float32
|
|
@@ -1,6 +1,8 @@
|
|
|
1
|
+
from __future__ import annotations
|
|
2
|
+
|
|
1
3
|
import logging
|
|
2
4
|
from collections import Counter
|
|
3
|
-
from typing import List, Union
|
|
5
|
+
from typing import List, Optional, Union
|
|
4
6
|
|
|
5
7
|
import onnx
|
|
6
8
|
|
|
@@ -15,19 +17,62 @@ from .subexpression_elimination import subexpression_elimination
|
|
|
15
17
|
from .weight_tying import tie_weights
|
|
16
18
|
|
|
17
19
|
|
|
18
|
-
|
|
20
|
+
class OptimizationSettings:
|
|
21
|
+
constant_folding = True
|
|
22
|
+
graph_fusion = True
|
|
23
|
+
dead_node_elimination = True
|
|
24
|
+
subexpression_elimination = True
|
|
25
|
+
weight_tying = True
|
|
26
|
+
|
|
27
|
+
@classmethod
|
|
28
|
+
def keys(cls):
|
|
29
|
+
return [
|
|
30
|
+
"constant_folding",
|
|
31
|
+
"graph_fusion",
|
|
32
|
+
"dead_node_elimination",
|
|
33
|
+
"subexpression_elimination",
|
|
34
|
+
"weight_tying",
|
|
35
|
+
]
|
|
36
|
+
|
|
37
|
+
@classmethod
|
|
38
|
+
def reset(cls, skip_optimizations: list[str] | None = None):
|
|
39
|
+
for key in cls.keys():
|
|
40
|
+
if skip_optimizations and key in skip_optimizations:
|
|
41
|
+
setattr(cls, key, False)
|
|
42
|
+
else:
|
|
43
|
+
setattr(cls, key, True)
|
|
44
|
+
|
|
45
|
+
@classmethod
|
|
46
|
+
def stats(cls):
|
|
47
|
+
return {key: getattr(cls, key) for key in cls.keys()}
|
|
48
|
+
|
|
49
|
+
@classmethod
|
|
50
|
+
def enabled(cls):
|
|
51
|
+
return any([getattr(cls, key) for key in cls.keys()])
|
|
52
|
+
|
|
53
|
+
|
|
54
|
+
def optimize_model(model: onnx.ModelProto | gs.Graph, skip_fusion_patterns: str | None = None) -> onnx.ModelProto:
|
|
19
55
|
"""Optimize and transform the given ONNX model using various fusion patterns and graph rewriting techniques."""
|
|
20
56
|
graph = model if isinstance(model, gs.Graph) else gs.import_onnx(model)
|
|
21
|
-
|
|
22
|
-
|
|
23
|
-
|
|
24
|
-
graph
|
|
25
|
-
|
|
26
|
-
dead_node_elimination
|
|
27
|
-
|
|
28
|
-
|
|
29
|
-
|
|
30
|
-
|
|
57
|
+
if OptimizationSettings.graph_fusion:
|
|
58
|
+
logger.debug("Start graph_fusion.")
|
|
59
|
+
fusion_patterns = get_fusion_patterns(skip_fusion_patterns)
|
|
60
|
+
graph_fusion(graph, fusion_patterns)
|
|
61
|
+
logger.debug("Finish graph_fusion.")
|
|
62
|
+
if OptimizationSettings.dead_node_elimination:
|
|
63
|
+
logger.debug("Start dead_node_elimination.")
|
|
64
|
+
dead_node_elimination(graph)
|
|
65
|
+
graph.cleanup(remove_unused_graph_inputs=True).toposort()
|
|
66
|
+
logger.debug("Finish dead_node_elimination.")
|
|
67
|
+
if OptimizationSettings.subexpression_elimination:
|
|
68
|
+
logger.debug("Start subexpression_elimination.")
|
|
69
|
+
subexpression_elimination(graph)
|
|
70
|
+
graph.cleanup(remove_unused_graph_inputs=True).toposort()
|
|
71
|
+
logger.debug("Finish subexpression_elimination.")
|
|
72
|
+
if OptimizationSettings.weight_tying:
|
|
73
|
+
logger.debug("Start weight_tying.")
|
|
74
|
+
tie_weights(graph)
|
|
75
|
+
logger.debug("Finish weight_tying.")
|
|
31
76
|
model = gs.export_onnx(graph)
|
|
32
77
|
|
|
33
78
|
return model
|
|
@@ -38,9 +83,9 @@ def replace_custom_layer(
|
|
|
38
83
|
self,
|
|
39
84
|
op: str,
|
|
40
85
|
inputs,
|
|
41
|
-
outputs:
|
|
86
|
+
outputs: list[str],
|
|
42
87
|
name: str,
|
|
43
|
-
attrs: dict = None,
|
|
88
|
+
attrs: dict | None = None,
|
|
44
89
|
domain: str = "ai.onnx.contrib",
|
|
45
90
|
):
|
|
46
91
|
"""Replace a custom layer in the computational graph with specified parameters and domain."""
|
|
@@ -54,9 +99,21 @@ def replace_custom_layer(
|
|
|
54
99
|
)
|
|
55
100
|
|
|
56
101
|
|
|
102
|
+
def graph_fusion(graph: Graph, fusion_patterns: dict, is_subgraph=False):
|
|
103
|
+
for subgraph in graph.subgraphs():
|
|
104
|
+
graph_fusion(subgraph, fusion_patterns, is_subgraph=True)
|
|
105
|
+
|
|
106
|
+
fusion_pairs = find_matches(graph, fusion_patterns)
|
|
107
|
+
for match in fusion_pairs.values():
|
|
108
|
+
graph.replace_custom_layer(**match)
|
|
109
|
+
|
|
110
|
+
graph.cleanup(remove_unused_graph_inputs=True if not is_subgraph else False).toposort()
|
|
111
|
+
|
|
112
|
+
|
|
57
113
|
def find_matches(graph: Graph, fusion_patterns: dict):
|
|
58
114
|
"""Find matching patterns in the graph based on provided fusion patterns."""
|
|
59
115
|
match_map = {}
|
|
116
|
+
|
|
60
117
|
counter = Counter()
|
|
61
118
|
for node in reversed(graph.nodes):
|
|
62
119
|
if node.name not in match_map:
|