Defuser 0.0.3__tar.gz → 0.0.4__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (35) hide show
  1. {defuser-0.0.3 → defuser-0.0.4}/Defuser.egg-info/PKG-INFO +6 -4
  2. defuser-0.0.4/Defuser.egg-info/SOURCES.txt +28 -0
  3. {defuser-0.0.3 → defuser-0.0.4}/PKG-INFO +6 -4
  4. {defuser-0.0.3 → defuser-0.0.4}/README.md +5 -3
  5. defuser-0.0.4/defuser/__init__.py +13 -0
  6. defuser-0.0.4/defuser/defuser.py +100 -0
  7. defuser-0.0.4/defuser/model_registry.py +35 -0
  8. defuser-0.0.4/defuser/modeling/fused_moe/moe_experts_interface.py +643 -0
  9. defuser-0.0.4/defuser/modeling/fused_moe/qwen3_5_moe.py +116 -0
  10. defuser-0.0.4/defuser/modeling/fused_moe/replace_modules.py +442 -0
  11. defuser-0.0.4/defuser/modeling/fused_moe/update_module.py +22 -0
  12. {defuser-0.0.3 → defuser-0.0.4}/defuser/modeling/unfused_moe/qwen3_moe.py +3 -0
  13. defuser-0.0.4/defuser/utils/__init__.py +0 -0
  14. defuser-0.0.4/defuser/utils/common.py +35 -0
  15. defuser-0.0.4/defuser/utils/device.py +110 -0
  16. {defuser-0.0.3 → defuser-0.0.4}/defuser/utils/hf.py +18 -29
  17. defuser-0.0.4/defuser/utils/model.py +19 -0
  18. {defuser-0.0.3 → defuser-0.0.4}/pyproject.toml +12 -12
  19. defuser-0.0.4/tests/test_convert_model.py +63 -0
  20. defuser-0.0.4/tests/test_device_utils.py +43 -0
  21. defuser-0.0.4/tests/test_replace_modules_tracker.py +107 -0
  22. defuser-0.0.3/Defuser.egg-info/SOURCES.txt +0 -18
  23. defuser-0.0.3/defuser/__init__.py +0 -6
  24. defuser-0.0.3/defuser/defuser.py +0 -14
  25. defuser-0.0.3/defuser/logger.py +0 -8
  26. defuser-0.0.3/tests/test_convert_model.py +0 -19
  27. {defuser-0.0.3 → defuser-0.0.4}/Defuser.egg-info/dependency_links.txt +0 -0
  28. {defuser-0.0.3 → defuser-0.0.4}/Defuser.egg-info/requires.txt +0 -0
  29. {defuser-0.0.3 → defuser-0.0.4}/Defuser.egg-info/top_level.txt +0 -0
  30. {defuser-0.0.3 → defuser-0.0.4}/LICENSE +0 -0
  31. {defuser-0.0.3 → defuser-0.0.4}/defuser/modeling/__init__.py +0 -0
  32. {defuser-0.0.3/defuser/modeling/unfused_moe → defuser-0.0.4/defuser/modeling/fused_moe}/__init__.py +0 -0
  33. {defuser-0.0.3/defuser/utils → defuser-0.0.4/defuser/modeling/unfused_moe}/__init__.py +0 -0
  34. {defuser-0.0.3 → defuser-0.0.4}/setup.cfg +0 -0
  35. {defuser-0.0.3 → defuser-0.0.4}/setup.py +0 -0
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: Defuser
3
- Version: 0.0.3
3
+ Version: 0.0.4
4
4
  Summary: Model defuser helper for HF Transformers.
5
5
  Author-email: ModelCloud <qubitium@modelcloud.ai>
6
6
  License-Expression: Apache-2.0
@@ -22,7 +22,6 @@ License-File: LICENSE
22
22
  Requires-Dist: transformers
23
23
  Dynamic: license-file
24
24
 
25
-
26
25
  <div align=center>
27
26
  <img width="50%" alt="image" src="https://github.com/user-attachments/assets/f801617b-8959-474a-a565-6b8897e2fcbf" />
28
27
  <h1 align="center">Defuser</h1>
@@ -37,5 +36,8 @@ Dynamic: license-file
37
36
  </p>
38
37
  Model defuser helper for HF Transformers >= 5.0. In HF Transformers 5.x releases, many MoE modules became auto-stacked or auto-fused by new modeling code which has benefits but also downsides.
39
38
 
40
- * Goal is to provide naive module/layer forwarding code for all models supported by HF transformers where run-time weight and structure level optimizations such weight merging, stacking, fusing are reversed so the model is operating in a simple naive state.
41
- * There are cases, quantization libraries, where we need to run inference where module input/output needs to be individually captured and this pkg can help complete this task.
39
+ * Goal is to provide naive module/layer forwarding code for all models supported by HF transformers where run-time
40
+ weight and structure level optimizations such weight merging, stacking, fusing are reversed so the model is operating
41
+ in a simple naive state.
42
+ * There are cases, quantization libraries, where we need to run inference where module input/output needs to be
43
+ individually captured and this pkg can help complete this task.
@@ -0,0 +1,28 @@
1
+ LICENSE
2
+ README.md
3
+ pyproject.toml
4
+ setup.py
5
+ Defuser.egg-info/PKG-INFO
6
+ Defuser.egg-info/SOURCES.txt
7
+ Defuser.egg-info/dependency_links.txt
8
+ Defuser.egg-info/requires.txt
9
+ Defuser.egg-info/top_level.txt
10
+ defuser/__init__.py
11
+ defuser/defuser.py
12
+ defuser/model_registry.py
13
+ defuser/modeling/__init__.py
14
+ defuser/modeling/fused_moe/__init__.py
15
+ defuser/modeling/fused_moe/moe_experts_interface.py
16
+ defuser/modeling/fused_moe/qwen3_5_moe.py
17
+ defuser/modeling/fused_moe/replace_modules.py
18
+ defuser/modeling/fused_moe/update_module.py
19
+ defuser/modeling/unfused_moe/__init__.py
20
+ defuser/modeling/unfused_moe/qwen3_moe.py
21
+ defuser/utils/__init__.py
22
+ defuser/utils/common.py
23
+ defuser/utils/device.py
24
+ defuser/utils/hf.py
25
+ defuser/utils/model.py
26
+ tests/test_convert_model.py
27
+ tests/test_device_utils.py
28
+ tests/test_replace_modules_tracker.py
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: Defuser
3
- Version: 0.0.3
3
+ Version: 0.0.4
4
4
  Summary: Model defuser helper for HF Transformers.
5
5
  Author-email: ModelCloud <qubitium@modelcloud.ai>
6
6
  License-Expression: Apache-2.0
@@ -22,7 +22,6 @@ License-File: LICENSE
22
22
  Requires-Dist: transformers
23
23
  Dynamic: license-file
24
24
 
25
-
26
25
  <div align=center>
27
26
  <img width="50%" alt="image" src="https://github.com/user-attachments/assets/f801617b-8959-474a-a565-6b8897e2fcbf" />
28
27
  <h1 align="center">Defuser</h1>
@@ -37,5 +36,8 @@ Dynamic: license-file
37
36
  </p>
38
37
  Model defuser helper for HF Transformers >= 5.0. In HF Transformers 5.x releases, many MoE modules became auto-stacked or auto-fused by new modeling code which has benefits but also downsides.
39
38
 
40
- * Goal is to provide naive module/layer forwarding code for all models supported by HF transformers where run-time weight and structure level optimizations such weight merging, stacking, fusing are reversed so the model is operating in a simple naive state.
41
- * There are cases, quantization libraries, where we need to run inference where module input/output needs to be individually captured and this pkg can help complete this task.
39
+ * Goal is to provide naive module/layer forwarding code for all models supported by HF transformers where run-time
40
+ weight and structure level optimizations such weight merging, stacking, fusing are reversed so the model is operating
41
+ in a simple naive state.
42
+ * There are cases, quantization libraries, where we need to run inference where module input/output needs to be
43
+ individually captured and this pkg can help complete this task.
@@ -1,4 +1,3 @@
1
-
2
1
  <div align=center>
3
2
  <img width="50%" alt="image" src="https://github.com/user-attachments/assets/f801617b-8959-474a-a565-6b8897e2fcbf" />
4
3
  <h1 align="center">Defuser</h1>
@@ -13,5 +12,8 @@
13
12
  </p>
14
13
  Model defuser helper for HF Transformers >= 5.0. In HF Transformers 5.x releases, many MoE modules became auto-stacked or auto-fused by new modeling code which has benefits but also downsides.
15
14
 
16
- * Goal is to provide naive module/layer forwarding code for all models supported by HF transformers where run-time weight and structure level optimizations such weight merging, stacking, fusing are reversed so the model is operating in a simple naive state.
17
- * There are cases, quantization libraries, where we need to run inference where module input/output needs to be individually captured and this pkg can help complete this task.
15
+ * Goal is to provide naive module/layer forwarding code for all models supported by HF transformers where run-time
16
+ weight and structure level optimizations such weight merging, stacking, fusing are reversed so the model is operating
17
+ in a simple naive state.
18
+ * There are cases, quantization libraries, where we need to run inference where module input/output needs to be
19
+ individually captured and this pkg can help complete this task.
@@ -0,0 +1,13 @@
1
+ # SPDX-FileCopyrightText: 2026 ModelCloud.ai
2
+ # SPDX-FileCopyrightText: 2026 qubitium@modelcloud.ai
3
+ # SPDX-License-Identifier: Apache-2.0
4
+ # Contact: qubitium@modelcloud.ai, x.com/qubitium
5
+
6
+ def convert_hf_model(*args, **kwargs):
7
+ """Lazily import conversion entrypoint to avoid import-time cycles."""
8
+ from .defuser import convert_hf_model as _convert_hf_model
9
+
10
+ return _convert_hf_model(*args, **kwargs)
11
+
12
+
13
+ __all__ = ["convert_hf_model"]
@@ -0,0 +1,100 @@
1
+ # SPDX-FileCopyrightText: 2026 ModelCloud.ai
2
+ # SPDX-FileCopyrightText: 2026 qubitium@modelcloud.ai
3
+ # SPDX-License-Identifier: Apache-2.0
4
+ # Contact: qubitium@modelcloud.ai, x.com/qubitium
5
+
6
+ from torch import nn
7
+
8
+ from defuser.modeling.fused_moe.update_module import update_module
9
+ from defuser.utils.hf import patch
10
+
11
+
12
+ def convert_hf_model(
13
+ model: nn.Module,
14
+ cleanup_original: bool = False,
15
+ max_layers: int | None = None,
16
+ ) -> nn.Module:
17
+ if max_layers is not None and max_layers < 1:
18
+ raise ValueError("max_layers must be >= 1 when provided")
19
+
20
+ # Patch modeling structure for legacy Qwen3 MoE
21
+ #
22
+ # There are two slightlyfis_within_max_layers different checkpoint formats we need to support:
23
+ # 1) Qwen3 MoE
24
+ # 2) Qwen3.5 MoE
25
+ #
26
+ # The key difference is how the expert MLP weights are stored in the original
27
+ # checkpoint (fused vs. defused). Because of that, the amount of work needed
28
+ # after replacing the modeling structure is different.
29
+ #
30
+ # ---------------------------------------------------------------------------
31
+ # Step 1: Try applying a lightweight modeling patch
32
+ # ---------------------------------------------------------------------------
33
+ # `apply_modeling_patch(model)` only replaces the *modeling structure*
34
+ # (module definitions / forward logic) to match our runtime implementation.
35
+ #
36
+ # For **Qwen3 MoE**, this is sufficient because:
37
+ # - The original checkpoint already stores `mlp.experts` weights in a
38
+ # **defused format**.
39
+ # - In other words, the tensors are already separated as:
40
+ #
41
+ # gate_proj
42
+ # up_proj
43
+ # down_proj
44
+ #
45
+ # - Therefore we only need to swap the modeling implementation so that the
46
+ # module structure matches the expected layout, without touching the
47
+ # underlying tensors.
48
+ #
49
+ # If this patch succeeds, it means the model is in the Qwen3 MoE format and
50
+ # no further tensor transformation is required.
51
+ is_applied = patch(model, max_layers=max_layers)
52
+ if not is_applied:
53
+ # -----------------------------------------------------------------------
54
+ # Step 2: Handle Qwen3.5 MoE checkpoints
55
+ # -----------------------------------------------------------------------
56
+ #
57
+ # If `apply_modeling_patch` fails, we assume the checkpoint corresponds to
58
+ # **Qwen3.5 MoE**.
59
+ #
60
+ # In Qwen3.5 MoE, the expert MLP weights are stored in a **fused format**.
61
+ # Specifically, the checkpoint keeps tensors such as:
62
+ #
63
+ # gate_up_proj
64
+ # down_proj
65
+ #
66
+ # where `gate_proj` and `up_proj` are fused together.
67
+ #
68
+ # Because our runtime modeling expects **defused tensors**, simply replacing
69
+ # the module structure is not enough. We must also convert the stored
70
+ # parameters.
71
+ #
72
+ # `update_module()` performs two tasks:
73
+ #
74
+ # 1) Replace the modeling structure so that it matches the expected
75
+ # defused MoE implementation.
76
+ #
77
+ # 2) Prepare the module for **tensor defusion** of the expert weights.
78
+ #
79
+ # After the structure update, `materialize_model_()` will be invoked to
80
+ # actually split the fused tensors:
81
+ #
82
+ # gate_up_proj --> gate_proj + up_proj
83
+ #
84
+ # and ensure the module finally contains the expected parameters:
85
+ #
86
+ # gate_proj
87
+ # up_proj
88
+ # down_proj
89
+ #
90
+ # This ensures compatibility between the Qwen3.5 fused checkpoint format
91
+ # and the runtime model implementation that operates on defused weights.
92
+ model = update_module(
93
+ model,
94
+ cleanup_original=cleanup_original,
95
+ max_layers=max_layers,
96
+ )
97
+ return model
98
+
99
+
100
+ __all__ = ["convert_hf_model"]
@@ -0,0 +1,35 @@
1
+ # SPDX-FileCopyrightText: 2026 ModelCloud.ai
2
+ # SPDX-FileCopyrightText: 2026 qubitium@modelcloud.ai
3
+ # SPDX-License-Identifier: Apache-2.0
4
+ # Contact: qubitium@modelcloud.ai, x.com/qubitium
5
+
6
+ from enum import Enum
7
+
8
+
9
+ class PATCH(str, Enum):
10
+ DEFUSE = "defuse"
11
+ REPLACE_MODULE = "replace_module"
12
+
13
+
14
+ MODEL_CONFIG = {
15
+ "qwen3_moe": {
16
+ "min_transformers_version": "5.0.0",
17
+ # structure path only replaces modeling structure
18
+ PATCH.REPLACE_MODULE: [
19
+ (
20
+ "transformers.models.qwen3_moe.modeling_qwen3_moe.Qwen3MoeSparseMoeBlock",
21
+ "defuser.modeling.unfused_moe.qwen3_moe.LinearQwen3MoeSparseMoeBlock",
22
+ )
23
+ ],
24
+ },
25
+ "qwen3_5_moe": {
26
+ "min_transformers_version": "5.2.0",
27
+ # Replacement module path imported only when the defuse workflow runs
28
+ PATCH.DEFUSE: "defuser.modeling.fused_moe.qwen3_5_moe",
29
+ },
30
+ "qwen3_5_moe_text": {
31
+ "min_transformers_version": "5.2.0",
32
+ # Replacement module path imported only when the defuse workflow runs
33
+ PATCH.DEFUSE: "defuser.modeling.fused_moe.qwen3_5_moe",
34
+ },
35
+ }