Defuser 0.0.3__tar.gz → 0.0.4__tar.gz
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- {defuser-0.0.3 → defuser-0.0.4}/Defuser.egg-info/PKG-INFO +6 -4
- defuser-0.0.4/Defuser.egg-info/SOURCES.txt +28 -0
- {defuser-0.0.3 → defuser-0.0.4}/PKG-INFO +6 -4
- {defuser-0.0.3 → defuser-0.0.4}/README.md +5 -3
- defuser-0.0.4/defuser/__init__.py +13 -0
- defuser-0.0.4/defuser/defuser.py +100 -0
- defuser-0.0.4/defuser/model_registry.py +35 -0
- defuser-0.0.4/defuser/modeling/fused_moe/moe_experts_interface.py +643 -0
- defuser-0.0.4/defuser/modeling/fused_moe/qwen3_5_moe.py +116 -0
- defuser-0.0.4/defuser/modeling/fused_moe/replace_modules.py +442 -0
- defuser-0.0.4/defuser/modeling/fused_moe/update_module.py +22 -0
- {defuser-0.0.3 → defuser-0.0.4}/defuser/modeling/unfused_moe/qwen3_moe.py +3 -0
- defuser-0.0.4/defuser/utils/__init__.py +0 -0
- defuser-0.0.4/defuser/utils/common.py +35 -0
- defuser-0.0.4/defuser/utils/device.py +110 -0
- {defuser-0.0.3 → defuser-0.0.4}/defuser/utils/hf.py +18 -29
- defuser-0.0.4/defuser/utils/model.py +19 -0
- {defuser-0.0.3 → defuser-0.0.4}/pyproject.toml +12 -12
- defuser-0.0.4/tests/test_convert_model.py +63 -0
- defuser-0.0.4/tests/test_device_utils.py +43 -0
- defuser-0.0.4/tests/test_replace_modules_tracker.py +107 -0
- defuser-0.0.3/Defuser.egg-info/SOURCES.txt +0 -18
- defuser-0.0.3/defuser/__init__.py +0 -6
- defuser-0.0.3/defuser/defuser.py +0 -14
- defuser-0.0.3/defuser/logger.py +0 -8
- defuser-0.0.3/tests/test_convert_model.py +0 -19
- {defuser-0.0.3 → defuser-0.0.4}/Defuser.egg-info/dependency_links.txt +0 -0
- {defuser-0.0.3 → defuser-0.0.4}/Defuser.egg-info/requires.txt +0 -0
- {defuser-0.0.3 → defuser-0.0.4}/Defuser.egg-info/top_level.txt +0 -0
- {defuser-0.0.3 → defuser-0.0.4}/LICENSE +0 -0
- {defuser-0.0.3 → defuser-0.0.4}/defuser/modeling/__init__.py +0 -0
- {defuser-0.0.3/defuser/modeling/unfused_moe → defuser-0.0.4/defuser/modeling/fused_moe}/__init__.py +0 -0
- {defuser-0.0.3/defuser/utils → defuser-0.0.4/defuser/modeling/unfused_moe}/__init__.py +0 -0
- {defuser-0.0.3 → defuser-0.0.4}/setup.cfg +0 -0
- {defuser-0.0.3 → defuser-0.0.4}/setup.py +0 -0
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
Metadata-Version: 2.4
|
|
2
2
|
Name: Defuser
|
|
3
|
-
Version: 0.0.
|
|
3
|
+
Version: 0.0.4
|
|
4
4
|
Summary: Model defuser helper for HF Transformers.
|
|
5
5
|
Author-email: ModelCloud <qubitium@modelcloud.ai>
|
|
6
6
|
License-Expression: Apache-2.0
|
|
@@ -22,7 +22,6 @@ License-File: LICENSE
|
|
|
22
22
|
Requires-Dist: transformers
|
|
23
23
|
Dynamic: license-file
|
|
24
24
|
|
|
25
|
-
|
|
26
25
|
<div align=center>
|
|
27
26
|
<img width="50%" alt="image" src="https://github.com/user-attachments/assets/f801617b-8959-474a-a565-6b8897e2fcbf" />
|
|
28
27
|
<h1 align="center">Defuser</h1>
|
|
@@ -37,5 +36,8 @@ Dynamic: license-file
|
|
|
37
36
|
</p>
|
|
38
37
|
Model defuser helper for HF Transformers >= 5.0. In HF Transformers 5.x releases, many MoE modules became auto-stacked or auto-fused by new modeling code which has benefits but also downsides.
|
|
39
38
|
|
|
40
|
-
* Goal is to provide naive module/layer forwarding code for all models supported by HF transformers where run-time
|
|
41
|
-
|
|
39
|
+
* Goal is to provide naive module/layer forwarding code for all models supported by HF transformers where run-time
|
|
40
|
+
weight and structure level optimizations such weight merging, stacking, fusing are reversed so the model is operating
|
|
41
|
+
in a simple naive state.
|
|
42
|
+
* There are cases, quantization libraries, where we need to run inference where module input/output needs to be
|
|
43
|
+
individually captured and this pkg can help complete this task.
|
|
@@ -0,0 +1,28 @@
|
|
|
1
|
+
LICENSE
|
|
2
|
+
README.md
|
|
3
|
+
pyproject.toml
|
|
4
|
+
setup.py
|
|
5
|
+
Defuser.egg-info/PKG-INFO
|
|
6
|
+
Defuser.egg-info/SOURCES.txt
|
|
7
|
+
Defuser.egg-info/dependency_links.txt
|
|
8
|
+
Defuser.egg-info/requires.txt
|
|
9
|
+
Defuser.egg-info/top_level.txt
|
|
10
|
+
defuser/__init__.py
|
|
11
|
+
defuser/defuser.py
|
|
12
|
+
defuser/model_registry.py
|
|
13
|
+
defuser/modeling/__init__.py
|
|
14
|
+
defuser/modeling/fused_moe/__init__.py
|
|
15
|
+
defuser/modeling/fused_moe/moe_experts_interface.py
|
|
16
|
+
defuser/modeling/fused_moe/qwen3_5_moe.py
|
|
17
|
+
defuser/modeling/fused_moe/replace_modules.py
|
|
18
|
+
defuser/modeling/fused_moe/update_module.py
|
|
19
|
+
defuser/modeling/unfused_moe/__init__.py
|
|
20
|
+
defuser/modeling/unfused_moe/qwen3_moe.py
|
|
21
|
+
defuser/utils/__init__.py
|
|
22
|
+
defuser/utils/common.py
|
|
23
|
+
defuser/utils/device.py
|
|
24
|
+
defuser/utils/hf.py
|
|
25
|
+
defuser/utils/model.py
|
|
26
|
+
tests/test_convert_model.py
|
|
27
|
+
tests/test_device_utils.py
|
|
28
|
+
tests/test_replace_modules_tracker.py
|
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
Metadata-Version: 2.4
|
|
2
2
|
Name: Defuser
|
|
3
|
-
Version: 0.0.
|
|
3
|
+
Version: 0.0.4
|
|
4
4
|
Summary: Model defuser helper for HF Transformers.
|
|
5
5
|
Author-email: ModelCloud <qubitium@modelcloud.ai>
|
|
6
6
|
License-Expression: Apache-2.0
|
|
@@ -22,7 +22,6 @@ License-File: LICENSE
|
|
|
22
22
|
Requires-Dist: transformers
|
|
23
23
|
Dynamic: license-file
|
|
24
24
|
|
|
25
|
-
|
|
26
25
|
<div align=center>
|
|
27
26
|
<img width="50%" alt="image" src="https://github.com/user-attachments/assets/f801617b-8959-474a-a565-6b8897e2fcbf" />
|
|
28
27
|
<h1 align="center">Defuser</h1>
|
|
@@ -37,5 +36,8 @@ Dynamic: license-file
|
|
|
37
36
|
</p>
|
|
38
37
|
Model defuser helper for HF Transformers >= 5.0. In HF Transformers 5.x releases, many MoE modules became auto-stacked or auto-fused by new modeling code which has benefits but also downsides.
|
|
39
38
|
|
|
40
|
-
* Goal is to provide naive module/layer forwarding code for all models supported by HF transformers where run-time
|
|
41
|
-
|
|
39
|
+
* Goal is to provide naive module/layer forwarding code for all models supported by HF transformers where run-time
|
|
40
|
+
weight and structure level optimizations such weight merging, stacking, fusing are reversed so the model is operating
|
|
41
|
+
in a simple naive state.
|
|
42
|
+
* There are cases, quantization libraries, where we need to run inference where module input/output needs to be
|
|
43
|
+
individually captured and this pkg can help complete this task.
|
|
@@ -1,4 +1,3 @@
|
|
|
1
|
-
|
|
2
1
|
<div align=center>
|
|
3
2
|
<img width="50%" alt="image" src="https://github.com/user-attachments/assets/f801617b-8959-474a-a565-6b8897e2fcbf" />
|
|
4
3
|
<h1 align="center">Defuser</h1>
|
|
@@ -13,5 +12,8 @@
|
|
|
13
12
|
</p>
|
|
14
13
|
Model defuser helper for HF Transformers >= 5.0. In HF Transformers 5.x releases, many MoE modules became auto-stacked or auto-fused by new modeling code which has benefits but also downsides.
|
|
15
14
|
|
|
16
|
-
* Goal is to provide naive module/layer forwarding code for all models supported by HF transformers where run-time
|
|
17
|
-
|
|
15
|
+
* Goal is to provide naive module/layer forwarding code for all models supported by HF transformers where run-time
|
|
16
|
+
weight and structure level optimizations such weight merging, stacking, fusing are reversed so the model is operating
|
|
17
|
+
in a simple naive state.
|
|
18
|
+
* There are cases, quantization libraries, where we need to run inference where module input/output needs to be
|
|
19
|
+
individually captured and this pkg can help complete this task.
|
|
@@ -0,0 +1,13 @@
|
|
|
1
|
+
# SPDX-FileCopyrightText: 2026 ModelCloud.ai
|
|
2
|
+
# SPDX-FileCopyrightText: 2026 qubitium@modelcloud.ai
|
|
3
|
+
# SPDX-License-Identifier: Apache-2.0
|
|
4
|
+
# Contact: qubitium@modelcloud.ai, x.com/qubitium
|
|
5
|
+
|
|
6
|
+
def convert_hf_model(*args, **kwargs):
|
|
7
|
+
"""Lazily import conversion entrypoint to avoid import-time cycles."""
|
|
8
|
+
from .defuser import convert_hf_model as _convert_hf_model
|
|
9
|
+
|
|
10
|
+
return _convert_hf_model(*args, **kwargs)
|
|
11
|
+
|
|
12
|
+
|
|
13
|
+
__all__ = ["convert_hf_model"]
|
|
@@ -0,0 +1,100 @@
|
|
|
1
|
+
# SPDX-FileCopyrightText: 2026 ModelCloud.ai
|
|
2
|
+
# SPDX-FileCopyrightText: 2026 qubitium@modelcloud.ai
|
|
3
|
+
# SPDX-License-Identifier: Apache-2.0
|
|
4
|
+
# Contact: qubitium@modelcloud.ai, x.com/qubitium
|
|
5
|
+
|
|
6
|
+
from torch import nn
|
|
7
|
+
|
|
8
|
+
from defuser.modeling.fused_moe.update_module import update_module
|
|
9
|
+
from defuser.utils.hf import patch
|
|
10
|
+
|
|
11
|
+
|
|
12
|
+
def convert_hf_model(
|
|
13
|
+
model: nn.Module,
|
|
14
|
+
cleanup_original: bool = False,
|
|
15
|
+
max_layers: int | None = None,
|
|
16
|
+
) -> nn.Module:
|
|
17
|
+
if max_layers is not None and max_layers < 1:
|
|
18
|
+
raise ValueError("max_layers must be >= 1 when provided")
|
|
19
|
+
|
|
20
|
+
# Patch modeling structure for legacy Qwen3 MoE
|
|
21
|
+
#
|
|
22
|
+
# There are two slightlyfis_within_max_layers different checkpoint formats we need to support:
|
|
23
|
+
# 1) Qwen3 MoE
|
|
24
|
+
# 2) Qwen3.5 MoE
|
|
25
|
+
#
|
|
26
|
+
# The key difference is how the expert MLP weights are stored in the original
|
|
27
|
+
# checkpoint (fused vs. defused). Because of that, the amount of work needed
|
|
28
|
+
# after replacing the modeling structure is different.
|
|
29
|
+
#
|
|
30
|
+
# ---------------------------------------------------------------------------
|
|
31
|
+
# Step 1: Try applying a lightweight modeling patch
|
|
32
|
+
# ---------------------------------------------------------------------------
|
|
33
|
+
# `apply_modeling_patch(model)` only replaces the *modeling structure*
|
|
34
|
+
# (module definitions / forward logic) to match our runtime implementation.
|
|
35
|
+
#
|
|
36
|
+
# For **Qwen3 MoE**, this is sufficient because:
|
|
37
|
+
# - The original checkpoint already stores `mlp.experts` weights in a
|
|
38
|
+
# **defused format**.
|
|
39
|
+
# - In other words, the tensors are already separated as:
|
|
40
|
+
#
|
|
41
|
+
# gate_proj
|
|
42
|
+
# up_proj
|
|
43
|
+
# down_proj
|
|
44
|
+
#
|
|
45
|
+
# - Therefore we only need to swap the modeling implementation so that the
|
|
46
|
+
# module structure matches the expected layout, without touching the
|
|
47
|
+
# underlying tensors.
|
|
48
|
+
#
|
|
49
|
+
# If this patch succeeds, it means the model is in the Qwen3 MoE format and
|
|
50
|
+
# no further tensor transformation is required.
|
|
51
|
+
is_applied = patch(model, max_layers=max_layers)
|
|
52
|
+
if not is_applied:
|
|
53
|
+
# -----------------------------------------------------------------------
|
|
54
|
+
# Step 2: Handle Qwen3.5 MoE checkpoints
|
|
55
|
+
# -----------------------------------------------------------------------
|
|
56
|
+
#
|
|
57
|
+
# If `apply_modeling_patch` fails, we assume the checkpoint corresponds to
|
|
58
|
+
# **Qwen3.5 MoE**.
|
|
59
|
+
#
|
|
60
|
+
# In Qwen3.5 MoE, the expert MLP weights are stored in a **fused format**.
|
|
61
|
+
# Specifically, the checkpoint keeps tensors such as:
|
|
62
|
+
#
|
|
63
|
+
# gate_up_proj
|
|
64
|
+
# down_proj
|
|
65
|
+
#
|
|
66
|
+
# where `gate_proj` and `up_proj` are fused together.
|
|
67
|
+
#
|
|
68
|
+
# Because our runtime modeling expects **defused tensors**, simply replacing
|
|
69
|
+
# the module structure is not enough. We must also convert the stored
|
|
70
|
+
# parameters.
|
|
71
|
+
#
|
|
72
|
+
# `update_module()` performs two tasks:
|
|
73
|
+
#
|
|
74
|
+
# 1) Replace the modeling structure so that it matches the expected
|
|
75
|
+
# defused MoE implementation.
|
|
76
|
+
#
|
|
77
|
+
# 2) Prepare the module for **tensor defusion** of the expert weights.
|
|
78
|
+
#
|
|
79
|
+
# After the structure update, `materialize_model_()` will be invoked to
|
|
80
|
+
# actually split the fused tensors:
|
|
81
|
+
#
|
|
82
|
+
# gate_up_proj --> gate_proj + up_proj
|
|
83
|
+
#
|
|
84
|
+
# and ensure the module finally contains the expected parameters:
|
|
85
|
+
#
|
|
86
|
+
# gate_proj
|
|
87
|
+
# up_proj
|
|
88
|
+
# down_proj
|
|
89
|
+
#
|
|
90
|
+
# This ensures compatibility between the Qwen3.5 fused checkpoint format
|
|
91
|
+
# and the runtime model implementation that operates on defused weights.
|
|
92
|
+
model = update_module(
|
|
93
|
+
model,
|
|
94
|
+
cleanup_original=cleanup_original,
|
|
95
|
+
max_layers=max_layers,
|
|
96
|
+
)
|
|
97
|
+
return model
|
|
98
|
+
|
|
99
|
+
|
|
100
|
+
__all__ = ["convert_hf_model"]
|
|
@@ -0,0 +1,35 @@
|
|
|
1
|
+
# SPDX-FileCopyrightText: 2026 ModelCloud.ai
|
|
2
|
+
# SPDX-FileCopyrightText: 2026 qubitium@modelcloud.ai
|
|
3
|
+
# SPDX-License-Identifier: Apache-2.0
|
|
4
|
+
# Contact: qubitium@modelcloud.ai, x.com/qubitium
|
|
5
|
+
|
|
6
|
+
from enum import Enum
|
|
7
|
+
|
|
8
|
+
|
|
9
|
+
class PATCH(str, Enum):
|
|
10
|
+
DEFUSE = "defuse"
|
|
11
|
+
REPLACE_MODULE = "replace_module"
|
|
12
|
+
|
|
13
|
+
|
|
14
|
+
MODEL_CONFIG = {
|
|
15
|
+
"qwen3_moe": {
|
|
16
|
+
"min_transformers_version": "5.0.0",
|
|
17
|
+
# structure path only replaces modeling structure
|
|
18
|
+
PATCH.REPLACE_MODULE: [
|
|
19
|
+
(
|
|
20
|
+
"transformers.models.qwen3_moe.modeling_qwen3_moe.Qwen3MoeSparseMoeBlock",
|
|
21
|
+
"defuser.modeling.unfused_moe.qwen3_moe.LinearQwen3MoeSparseMoeBlock",
|
|
22
|
+
)
|
|
23
|
+
],
|
|
24
|
+
},
|
|
25
|
+
"qwen3_5_moe": {
|
|
26
|
+
"min_transformers_version": "5.2.0",
|
|
27
|
+
# Replacement module path imported only when the defuse workflow runs
|
|
28
|
+
PATCH.DEFUSE: "defuser.modeling.fused_moe.qwen3_5_moe",
|
|
29
|
+
},
|
|
30
|
+
"qwen3_5_moe_text": {
|
|
31
|
+
"min_transformers_version": "5.2.0",
|
|
32
|
+
# Replacement module path imported only when the defuse workflow runs
|
|
33
|
+
PATCH.DEFUSE: "defuser.modeling.fused_moe.qwen3_5_moe",
|
|
34
|
+
},
|
|
35
|
+
}
|