Defuser 0.0.2__tar.gz → 0.0.3__tar.gz
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- {defuser-0.0.2 → defuser-0.0.3}/Defuser.egg-info/PKG-INFO +8 -2
- {defuser-0.0.2 → defuser-0.0.3}/Defuser.egg-info/SOURCES.txt +5 -0
- defuser-0.0.3/Defuser.egg-info/top_level.txt +1 -0
- {defuser-0.0.2 → defuser-0.0.3}/PKG-INFO +8 -2
- {defuser-0.0.2 → defuser-0.0.3}/README.md +7 -1
- defuser-0.0.3/defuser/modeling/__init__.py +0 -0
- defuser-0.0.3/defuser/modeling/unfused_moe/__init__.py +0 -0
- defuser-0.0.3/defuser/modeling/unfused_moe/qwen3_moe.py +67 -0
- defuser-0.0.3/defuser/utils/__init__.py +0 -0
- defuser-0.0.3/defuser/utils/hf.py +149 -0
- {defuser-0.0.2 → defuser-0.0.3}/pyproject.toml +6 -3
- {defuser-0.0.2 → defuser-0.0.3}/tests/test_convert_model.py +2 -1
- defuser-0.0.2/Defuser.egg-info/top_level.txt +0 -2
- {defuser-0.0.2 → defuser-0.0.3}/Defuser.egg-info/dependency_links.txt +0 -0
- {defuser-0.0.2 → defuser-0.0.3}/Defuser.egg-info/requires.txt +0 -0
- {defuser-0.0.2 → defuser-0.0.3}/LICENSE +0 -0
- {defuser-0.0.2 → defuser-0.0.3}/defuser/__init__.py +0 -0
- {defuser-0.0.2 → defuser-0.0.3}/defuser/defuser.py +0 -0
- {defuser-0.0.2 → defuser-0.0.3}/defuser/logger.py +0 -0
- {defuser-0.0.2 → defuser-0.0.3}/setup.cfg +0 -0
- {defuser-0.0.2 → defuser-0.0.3}/setup.py +0 -0
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
Metadata-Version: 2.4
|
|
2
2
|
Name: Defuser
|
|
3
|
-
Version: 0.0.
|
|
3
|
+
Version: 0.0.3
|
|
4
4
|
Summary: Model defuser helper for HF Transformers.
|
|
5
5
|
Author-email: ModelCloud <qubitium@modelcloud.ai>
|
|
6
6
|
License-Expression: Apache-2.0
|
|
@@ -28,7 +28,13 @@ Dynamic: license-file
|
|
|
28
28
|
<h1 align="center">Defuser</h1>
|
|
29
29
|
</div>
|
|
30
30
|
|
|
31
|
-
|
|
31
|
+
<p align="center">
|
|
32
|
+
<a href="https://github.com/ModelCloud/Defuser/releases" style="text-decoration:none;"><img alt="GitHub release" src="https://img.shields.io/github/release/ModelCloud/Defuser.svg"></a>
|
|
33
|
+
<a href="https://pypi.org/project/Defuser/" style="text-decoration:none;"><img alt="PyPI - Version" src="https://img.shields.io/pypi/v/Defuser"></a>
|
|
34
|
+
<a href="https://pepy.tech/projects/Defuser" style="text-decoration:none;"><img src="https://static.pepy.tech/badge/Defuser" alt="PyPI Downloads"></a>
|
|
35
|
+
<a href="https://github.com/ModelCloud/Defuser/blob/main/LICENSE"><img src="https://img.shields.io/pypi/l/Defuser"></a>
|
|
36
|
+
<a href="https://huggingface.co/modelcloud/"><img src="https://img.shields.io/badge/🤗%20Hugging%20Face-ModelCloud-%23ff8811.svg"></a>
|
|
37
|
+
</p>
|
|
32
38
|
Model defuser helper for HF Transformers >= 5.0. In HF Transformers 5.x releases, many MoE modules became auto-stacked or auto-fused by new modeling code which has benefits but also downsides.
|
|
33
39
|
|
|
34
40
|
* Goal is to provide naive module/layer forwarding code for all models supported by HF transformers where run-time weight and structure level optimizations such weight merging, stacking, fusing are reversed so the model is operating in a simple naive state.
|
|
@@ -10,4 +10,9 @@ Defuser.egg-info/top_level.txt
|
|
|
10
10
|
defuser/__init__.py
|
|
11
11
|
defuser/defuser.py
|
|
12
12
|
defuser/logger.py
|
|
13
|
+
defuser/modeling/__init__.py
|
|
14
|
+
defuser/modeling/unfused_moe/__init__.py
|
|
15
|
+
defuser/modeling/unfused_moe/qwen3_moe.py
|
|
16
|
+
defuser/utils/__init__.py
|
|
17
|
+
defuser/utils/hf.py
|
|
13
18
|
tests/test_convert_model.py
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
defuser
|
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
Metadata-Version: 2.4
|
|
2
2
|
Name: Defuser
|
|
3
|
-
Version: 0.0.
|
|
3
|
+
Version: 0.0.3
|
|
4
4
|
Summary: Model defuser helper for HF Transformers.
|
|
5
5
|
Author-email: ModelCloud <qubitium@modelcloud.ai>
|
|
6
6
|
License-Expression: Apache-2.0
|
|
@@ -28,7 +28,13 @@ Dynamic: license-file
|
|
|
28
28
|
<h1 align="center">Defuser</h1>
|
|
29
29
|
</div>
|
|
30
30
|
|
|
31
|
-
|
|
31
|
+
<p align="center">
|
|
32
|
+
<a href="https://github.com/ModelCloud/Defuser/releases" style="text-decoration:none;"><img alt="GitHub release" src="https://img.shields.io/github/release/ModelCloud/Defuser.svg"></a>
|
|
33
|
+
<a href="https://pypi.org/project/Defuser/" style="text-decoration:none;"><img alt="PyPI - Version" src="https://img.shields.io/pypi/v/Defuser"></a>
|
|
34
|
+
<a href="https://pepy.tech/projects/Defuser" style="text-decoration:none;"><img src="https://static.pepy.tech/badge/Defuser" alt="PyPI Downloads"></a>
|
|
35
|
+
<a href="https://github.com/ModelCloud/Defuser/blob/main/LICENSE"><img src="https://img.shields.io/pypi/l/Defuser"></a>
|
|
36
|
+
<a href="https://huggingface.co/modelcloud/"><img src="https://img.shields.io/badge/🤗%20Hugging%20Face-ModelCloud-%23ff8811.svg"></a>
|
|
37
|
+
</p>
|
|
32
38
|
Model defuser helper for HF Transformers >= 5.0. In HF Transformers 5.x releases, many MoE modules became auto-stacked or auto-fused by new modeling code which has benefits but also downsides.
|
|
33
39
|
|
|
34
40
|
* Goal is to provide naive module/layer forwarding code for all models supported by HF transformers where run-time weight and structure level optimizations such weight merging, stacking, fusing are reversed so the model is operating in a simple naive state.
|
|
@@ -4,7 +4,13 @@
|
|
|
4
4
|
<h1 align="center">Defuser</h1>
|
|
5
5
|
</div>
|
|
6
6
|
|
|
7
|
-
|
|
7
|
+
<p align="center">
|
|
8
|
+
<a href="https://github.com/ModelCloud/Defuser/releases" style="text-decoration:none;"><img alt="GitHub release" src="https://img.shields.io/github/release/ModelCloud/Defuser.svg"></a>
|
|
9
|
+
<a href="https://pypi.org/project/Defuser/" style="text-decoration:none;"><img alt="PyPI - Version" src="https://img.shields.io/pypi/v/Defuser"></a>
|
|
10
|
+
<a href="https://pepy.tech/projects/Defuser" style="text-decoration:none;"><img src="https://static.pepy.tech/badge/Defuser" alt="PyPI Downloads"></a>
|
|
11
|
+
<a href="https://github.com/ModelCloud/Defuser/blob/main/LICENSE"><img src="https://img.shields.io/pypi/l/Defuser"></a>
|
|
12
|
+
<a href="https://huggingface.co/modelcloud/"><img src="https://img.shields.io/badge/🤗%20Hugging%20Face-ModelCloud-%23ff8811.svg"></a>
|
|
13
|
+
</p>
|
|
8
14
|
Model defuser helper for HF Transformers >= 5.0. In HF Transformers 5.x releases, many MoE modules became auto-stacked or auto-fused by new modeling code which has benefits but also downsides.
|
|
9
15
|
|
|
10
16
|
* Goal is to provide naive module/layer forwarding code for all models supported by HF transformers where run-time weight and structure level optimizations such weight merging, stacking, fusing are reversed so the model is operating in a simple naive state.
|
|
File without changes
|
|
File without changes
|
|
@@ -0,0 +1,67 @@
|
|
|
1
|
+
# SPDX-FileCopyrightText: 2026 ModelCloud.ai
|
|
2
|
+
# SPDX-FileCopyrightText: 2026 qubitium@modelcloud.ai
|
|
3
|
+
# SPDX-License-Identifier: Apache-2.0
|
|
4
|
+
# Contact: qubitium@modelcloud.ai, x.com/qubitium
|
|
5
|
+
|
|
6
|
+
import torch
|
|
7
|
+
import torch.nn as nn
|
|
8
|
+
from torch.nn import functional as F
|
|
9
|
+
|
|
10
|
+
|
|
11
|
+
class LinearQwen3MoeSparseMoeBlock(nn.Module):
|
|
12
|
+
def __init__(self, config):
|
|
13
|
+
super().__init__()
|
|
14
|
+
from transformers.models.qwen3_moe.modeling_qwen3_moe import Qwen3MoeMLP
|
|
15
|
+
|
|
16
|
+
self.num_experts = config.num_experts
|
|
17
|
+
self.top_k = config.num_experts_per_tok
|
|
18
|
+
self.norm_topk_prob = config.norm_topk_prob
|
|
19
|
+
|
|
20
|
+
# This must be linear for vllm alignment
|
|
21
|
+
self.gate = nn.Linear(config.hidden_size, config.num_experts, bias=False)
|
|
22
|
+
self.experts = nn.ModuleList(
|
|
23
|
+
[Qwen3MoeMLP(config, intermediate_size=config.moe_intermediate_size) for _ in range(self.num_experts)]
|
|
24
|
+
)
|
|
25
|
+
|
|
26
|
+
def forward(self, hidden_states: torch.Tensor) -> torch.Tensor:
|
|
27
|
+
""" """
|
|
28
|
+
batch_size, sequence_length, hidden_dim = hidden_states.shape
|
|
29
|
+
hidden_states = hidden_states.view(-1, hidden_dim)
|
|
30
|
+
# router_logits: (batch * sequence_length, n_experts)
|
|
31
|
+
router_logits = self.gate(hidden_states)
|
|
32
|
+
|
|
33
|
+
routing_weights = F.softmax(router_logits, dim=1, dtype=torch.float)
|
|
34
|
+
routing_weights, selected_experts = torch.topk(routing_weights, self.top_k, dim=-1)
|
|
35
|
+
if self.norm_topk_prob: # only diff with mixtral sparse moe block!
|
|
36
|
+
routing_weights /= routing_weights.sum(dim=-1, keepdim=True)
|
|
37
|
+
# we cast back to the input dtype
|
|
38
|
+
routing_weights = routing_weights.to(hidden_states.dtype)
|
|
39
|
+
|
|
40
|
+
final_hidden_states = torch.zeros(
|
|
41
|
+
(batch_size * sequence_length, hidden_dim), dtype=hidden_states.dtype, device=hidden_states.device
|
|
42
|
+
)
|
|
43
|
+
|
|
44
|
+
# One hot encode the selected experts to create an expert mask
|
|
45
|
+
# this will be used to easily index which expert is going to be solicited
|
|
46
|
+
with torch.no_grad():
|
|
47
|
+
expert_mask = torch.nn.functional.one_hot(selected_experts, num_classes=self.num_experts).permute(2, 1, 0)
|
|
48
|
+
|
|
49
|
+
# Loop over all available experts in the model and perform the computation on each expert
|
|
50
|
+
expert_hit = torch.greater(expert_mask.sum(dim=(-1, -2)), 0).nonzero()
|
|
51
|
+
for expert_idx in expert_hit:
|
|
52
|
+
if expert_idx == self.num_experts:
|
|
53
|
+
continue
|
|
54
|
+
expert_layer = self.experts[expert_idx]
|
|
55
|
+
idx, top_x = torch.where(expert_mask[expert_idx].squeeze(0))
|
|
56
|
+
|
|
57
|
+
# Index the correct hidden states and compute the expert hidden state for
|
|
58
|
+
# the current expert. We need to make sure to multiply the output hidden
|
|
59
|
+
# states by `routing_weights` on the corresponding tokens (top-1 and top-2)
|
|
60
|
+
current_state = hidden_states[None, top_x].reshape(-1, hidden_dim)
|
|
61
|
+
current_hidden_states = expert_layer(current_state) * routing_weights[top_x, idx, None]
|
|
62
|
+
|
|
63
|
+
# However `index_add_` only support torch tensors for indexing so we'll use
|
|
64
|
+
# the `top_x` tensor here.
|
|
65
|
+
final_hidden_states.index_add_(0, top_x, current_hidden_states.to(hidden_states.dtype))
|
|
66
|
+
final_hidden_states = final_hidden_states.reshape(batch_size, sequence_length, hidden_dim)
|
|
67
|
+
return final_hidden_states
|
|
File without changes
|
|
@@ -0,0 +1,149 @@
|
|
|
1
|
+
# SPDX-FileCopyrightText: 2026 ModelCloud.ai
|
|
2
|
+
# SPDX-FileCopyrightText: 2026 qubitium@modelcloud.ai
|
|
3
|
+
# SPDX-License-Identifier: Apache-2.0
|
|
4
|
+
# Contact: qubitium@modelcloud.ai, x.com/qubitium
|
|
5
|
+
|
|
6
|
+
import torch
|
|
7
|
+
from torch import nn
|
|
8
|
+
|
|
9
|
+
from packaging import version
|
|
10
|
+
from transformers import AutoConfig
|
|
11
|
+
import transformers
|
|
12
|
+
import os
|
|
13
|
+
import importlib
|
|
14
|
+
|
|
15
|
+
from typing import Final
|
|
16
|
+
|
|
17
|
+
from ..logger import logger
|
|
18
|
+
|
|
19
|
+
MODEL_CONFIG = {
|
|
20
|
+
"qwen3_moe": {
|
|
21
|
+
"min_transformers_version": "5.0.0",
|
|
22
|
+
"checkpoint_mapping": [],
|
|
23
|
+
"block_patch": [
|
|
24
|
+
(
|
|
25
|
+
"transformers.models.qwen3_moe.modeling_qwen3_moe.Qwen3MoeSparseMoeBlock",
|
|
26
|
+
"defuser.modeling.unfused_moe.qwen3_moe.LinearQwen3MoeSparseMoeBlock",
|
|
27
|
+
)
|
|
28
|
+
],
|
|
29
|
+
},
|
|
30
|
+
}
|
|
31
|
+
|
|
32
|
+
_ENV_VAR: Final[str] = "GPTQMODEL_USE_MODELSCOPE"
|
|
33
|
+
|
|
34
|
+
|
|
35
|
+
TRUTHFUL = {"1", "true", "yes", "on", "y"}
|
|
36
|
+
|
|
37
|
+
|
|
38
|
+
def env_flag(name: str, default: str | bool | None = "0") -> bool:
|
|
39
|
+
"""Return ``True`` when an env var is set to a truthy value."""
|
|
40
|
+
|
|
41
|
+
value = os.getenv(name)
|
|
42
|
+
if value is None:
|
|
43
|
+
if default is None:
|
|
44
|
+
return False
|
|
45
|
+
if isinstance(default, bool):
|
|
46
|
+
return default
|
|
47
|
+
value = default
|
|
48
|
+
return str(value).strip().lower() in TRUTHFUL
|
|
49
|
+
|
|
50
|
+
def modelscope_requested() -> bool:
|
|
51
|
+
"""
|
|
52
|
+
Return ``True`` when the user explicitly enabled ModelScope integration
|
|
53
|
+
via the GPTQMODEL_USE_MODELSCOPE environment variable.
|
|
54
|
+
"""
|
|
55
|
+
return env_flag(_ENV_VAR, default="0")
|
|
56
|
+
|
|
57
|
+
|
|
58
|
+
def get_file_path_via_model_name(model_or_path: str, file_name):
|
|
59
|
+
from huggingface_hub import hf_hub_download
|
|
60
|
+
|
|
61
|
+
# 1) local folder
|
|
62
|
+
if os.path.isdir(model_or_path):
|
|
63
|
+
index_path = os.path.join(model_or_path, file_name)
|
|
64
|
+
|
|
65
|
+
# 2) HF model name
|
|
66
|
+
elif not modelscope_requested():
|
|
67
|
+
index_path = hf_hub_download(
|
|
68
|
+
repo_id=model_or_path,
|
|
69
|
+
filename=file_name,
|
|
70
|
+
repo_type="model",
|
|
71
|
+
)
|
|
72
|
+
elif modelscope_requested():
|
|
73
|
+
from modelscope import snapshot_download # pylint: disable=E0401
|
|
74
|
+
|
|
75
|
+
# ModelSCOPE is different, it returns the folder path
|
|
76
|
+
folder = snapshot_download(model_or_path, allow_patterns=[file_name])
|
|
77
|
+
index_path = os.path.join(folder, file_name)
|
|
78
|
+
else:
|
|
79
|
+
index_path = None
|
|
80
|
+
|
|
81
|
+
return index_path
|
|
82
|
+
|
|
83
|
+
|
|
84
|
+
def pre_check_config(model_name: str | torch.nn.Module):
|
|
85
|
+
if isinstance(model_name, str):
|
|
86
|
+
config = AutoConfig.from_pretrained(model_name)
|
|
87
|
+
elif isinstance(model_name, torch.nn.Module):
|
|
88
|
+
config = getattr(model_name, "config", None)
|
|
89
|
+
if config is None:
|
|
90
|
+
return False
|
|
91
|
+
|
|
92
|
+
model_type = getattr(config, "model_type", None)
|
|
93
|
+
if model_type is None or model_type not in MODEL_CONFIG:
|
|
94
|
+
return False
|
|
95
|
+
|
|
96
|
+
cfg = MODEL_CONFIG[model_type]
|
|
97
|
+
|
|
98
|
+
min_ver = cfg.get("min_transformers_version")
|
|
99
|
+
tf_ver = version.parse(transformers.__version__)
|
|
100
|
+
if min_ver and tf_ver < version.parse(min_ver):
|
|
101
|
+
return False
|
|
102
|
+
try:
|
|
103
|
+
file_path = get_file_path_via_model_name(model_name, "model.safetensors.index.json")
|
|
104
|
+
if os.path.exists(file_path):
|
|
105
|
+
import json
|
|
106
|
+
|
|
107
|
+
with open(file_path, "r") as f:
|
|
108
|
+
index_data = json.load(f)
|
|
109
|
+
model_keys = list(index_data.get("weight_map", {}).keys())
|
|
110
|
+
for key in model_keys:
|
|
111
|
+
if "gate_up_proj" in key:
|
|
112
|
+
return False
|
|
113
|
+
except:
|
|
114
|
+
return True
|
|
115
|
+
return True
|
|
116
|
+
|
|
117
|
+
|
|
118
|
+
def apply_modeling_patch(model: torch.nn.Module) -> bool:
|
|
119
|
+
res = pre_check_config(model)
|
|
120
|
+
if not res:
|
|
121
|
+
return False
|
|
122
|
+
model_type = getattr(model.config, "model_type")
|
|
123
|
+
cfg = MODEL_CONFIG[model_type]
|
|
124
|
+
# patch blocks
|
|
125
|
+
for orig_path, custom_path in cfg.get("block_patch", []):
|
|
126
|
+
orig_module_path, orig_class_name = orig_path.rsplit(".", 1)
|
|
127
|
+
custom_module_path, custom_class_name = custom_path.rsplit(".", 1)
|
|
128
|
+
try:
|
|
129
|
+
orig_module = importlib.import_module(orig_module_path)
|
|
130
|
+
custom_module = importlib.import_module(custom_module_path)
|
|
131
|
+
custom_class = getattr(custom_module, custom_class_name)
|
|
132
|
+
orig_class = getattr(orig_module, orig_class_name)
|
|
133
|
+
names = []
|
|
134
|
+
for n, m in model.named_modules():
|
|
135
|
+
if isinstance(m, orig_class):
|
|
136
|
+
names.append((n, next(m.parameters()).dtype))
|
|
137
|
+
for (n, orig_dtype) in names:
|
|
138
|
+
model.set_submodule(n, custom_class(model.config).to(orig_dtype), True)
|
|
139
|
+
logger.info(f"Patched module: {orig_path} -> {custom_path}")
|
|
140
|
+
return True
|
|
141
|
+
except Exception as e:
|
|
142
|
+
logger.warning(f"Failed to patch {orig_path}: {e}")
|
|
143
|
+
return False
|
|
144
|
+
return False
|
|
145
|
+
|
|
146
|
+
|
|
147
|
+
def convert_hf_model(model: nn.Module):
|
|
148
|
+
apply_modeling_patch(model)
|
|
149
|
+
|
|
@@ -9,7 +9,7 @@ build-backend = "setuptools.build_meta"
|
|
|
9
9
|
|
|
10
10
|
[project]
|
|
11
11
|
name = "Defuser"
|
|
12
|
-
version = "0.0.
|
|
12
|
+
version = "0.0.3"
|
|
13
13
|
description = "Model defuser helper for HF Transformers."
|
|
14
14
|
readme = "README.md"
|
|
15
15
|
requires-python = ">=3.9"
|
|
@@ -38,8 +38,11 @@ dependencies = [
|
|
|
38
38
|
Homepage = "https://github.com/ModelCloud/Defuser"
|
|
39
39
|
|
|
40
40
|
[tool.setuptools]
|
|
41
|
-
|
|
42
|
-
|
|
41
|
+
include-package-data = false
|
|
42
|
+
|
|
43
|
+
[tool.setuptools.packages.find]
|
|
44
|
+
where = ["."]
|
|
45
|
+
include = ["defuser*"]
|
|
43
46
|
|
|
44
47
|
[tool.pytest.ini_options]
|
|
45
48
|
pythonpath = ["."]
|
|
@@ -3,13 +3,14 @@
|
|
|
3
3
|
# SPDX-License-Identifier: Apache-2.0
|
|
4
4
|
# Contact: qubitium@modelcloud.ai, x.com/qubitium
|
|
5
5
|
from transformers import AutoModelForCausalLM
|
|
6
|
+
|
|
6
7
|
from defuser import convert_hf_model
|
|
7
8
|
|
|
8
9
|
|
|
9
10
|
def test_qwen3_moe():
|
|
10
11
|
from defuser.modeling.unfused_moe.qwen3_moe import LinearQwen3MoeSparseMoeBlock
|
|
11
12
|
|
|
12
|
-
model = AutoModelForCausalLM.from_pretrained("
|
|
13
|
+
model = AutoModelForCausalLM.from_pretrained("/monster/data/model/Qwen3-30B-A3B")
|
|
13
14
|
|
|
14
15
|
assert model.config.model_type == "qwen3_moe"
|
|
15
16
|
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|