Defuser 0.0.2__tar.gz → 0.0.3__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: Defuser
3
- Version: 0.0.2
3
+ Version: 0.0.3
4
4
  Summary: Model defuser helper for HF Transformers.
5
5
  Author-email: ModelCloud <qubitium@modelcloud.ai>
6
6
  License-Expression: Apache-2.0
@@ -28,7 +28,13 @@ Dynamic: license-file
28
28
  <h1 align="center">Defuser</h1>
29
29
  </div>
30
30
 
31
-
31
+ <p align="center">
32
+ <a href="https://github.com/ModelCloud/Defuser/releases" style="text-decoration:none;"><img alt="GitHub release" src="https://img.shields.io/github/release/ModelCloud/Defuser.svg"></a>
33
+ <a href="https://pypi.org/project/Defuser/" style="text-decoration:none;"><img alt="PyPI - Version" src="https://img.shields.io/pypi/v/Defuser"></a>
34
+ <a href="https://pepy.tech/projects/Defuser" style="text-decoration:none;"><img src="https://static.pepy.tech/badge/Defuser" alt="PyPI Downloads"></a>
35
+ <a href="https://github.com/ModelCloud/Defuser/blob/main/LICENSE"><img src="https://img.shields.io/pypi/l/Defuser"></a>
36
+ <a href="https://huggingface.co/modelcloud/"><img src="https://img.shields.io/badge/🤗%20Hugging%20Face-ModelCloud-%23ff8811.svg"></a>
37
+ </p>
32
38
  Model defuser helper for HF Transformers >= 5.0. In HF Transformers 5.x releases, many MoE modules became auto-stacked or auto-fused by new modeling code which has benefits but also downsides.
33
39
 
34
40
  * Goal is to provide naive module/layer forwarding code for all models supported by HF transformers where run-time weight and structure level optimizations such weight merging, stacking, fusing are reversed so the model is operating in a simple naive state.
@@ -10,4 +10,9 @@ Defuser.egg-info/top_level.txt
10
10
  defuser/__init__.py
11
11
  defuser/defuser.py
12
12
  defuser/logger.py
13
+ defuser/modeling/__init__.py
14
+ defuser/modeling/unfused_moe/__init__.py
15
+ defuser/modeling/unfused_moe/qwen3_moe.py
16
+ defuser/utils/__init__.py
17
+ defuser/utils/hf.py
13
18
  tests/test_convert_model.py
@@ -0,0 +1 @@
1
+ defuser
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: Defuser
3
- Version: 0.0.2
3
+ Version: 0.0.3
4
4
  Summary: Model defuser helper for HF Transformers.
5
5
  Author-email: ModelCloud <qubitium@modelcloud.ai>
6
6
  License-Expression: Apache-2.0
@@ -28,7 +28,13 @@ Dynamic: license-file
28
28
  <h1 align="center">Defuser</h1>
29
29
  </div>
30
30
 
31
-
31
+ <p align="center">
32
+ <a href="https://github.com/ModelCloud/Defuser/releases" style="text-decoration:none;"><img alt="GitHub release" src="https://img.shields.io/github/release/ModelCloud/Defuser.svg"></a>
33
+ <a href="https://pypi.org/project/Defuser/" style="text-decoration:none;"><img alt="PyPI - Version" src="https://img.shields.io/pypi/v/Defuser"></a>
34
+ <a href="https://pepy.tech/projects/Defuser" style="text-decoration:none;"><img src="https://static.pepy.tech/badge/Defuser" alt="PyPI Downloads"></a>
35
+ <a href="https://github.com/ModelCloud/Defuser/blob/main/LICENSE"><img src="https://img.shields.io/pypi/l/Defuser"></a>
36
+ <a href="https://huggingface.co/modelcloud/"><img src="https://img.shields.io/badge/🤗%20Hugging%20Face-ModelCloud-%23ff8811.svg"></a>
37
+ </p>
32
38
  Model defuser helper for HF Transformers >= 5.0. In HF Transformers 5.x releases, many MoE modules became auto-stacked or auto-fused by new modeling code which has benefits but also downsides.
33
39
 
34
40
  * Goal is to provide naive module/layer forwarding code for all models supported by HF transformers where run-time weight and structure level optimizations such weight merging, stacking, fusing are reversed so the model is operating in a simple naive state.
@@ -4,7 +4,13 @@
4
4
  <h1 align="center">Defuser</h1>
5
5
  </div>
6
6
 
7
-
7
+ <p align="center">
8
+ <a href="https://github.com/ModelCloud/Defuser/releases" style="text-decoration:none;"><img alt="GitHub release" src="https://img.shields.io/github/release/ModelCloud/Defuser.svg"></a>
9
+ <a href="https://pypi.org/project/Defuser/" style="text-decoration:none;"><img alt="PyPI - Version" src="https://img.shields.io/pypi/v/Defuser"></a>
10
+ <a href="https://pepy.tech/projects/Defuser" style="text-decoration:none;"><img src="https://static.pepy.tech/badge/Defuser" alt="PyPI Downloads"></a>
11
+ <a href="https://github.com/ModelCloud/Defuser/blob/main/LICENSE"><img src="https://img.shields.io/pypi/l/Defuser"></a>
12
+ <a href="https://huggingface.co/modelcloud/"><img src="https://img.shields.io/badge/🤗%20Hugging%20Face-ModelCloud-%23ff8811.svg"></a>
13
+ </p>
8
14
  Model defuser helper for HF Transformers >= 5.0. In HF Transformers 5.x releases, many MoE modules became auto-stacked or auto-fused by new modeling code which has benefits but also downsides.
9
15
 
10
16
  * Goal is to provide naive module/layer forwarding code for all models supported by HF transformers where run-time weight and structure level optimizations such weight merging, stacking, fusing are reversed so the model is operating in a simple naive state.
File without changes
File without changes
@@ -0,0 +1,67 @@
1
+ # SPDX-FileCopyrightText: 2026 ModelCloud.ai
2
+ # SPDX-FileCopyrightText: 2026 qubitium@modelcloud.ai
3
+ # SPDX-License-Identifier: Apache-2.0
4
+ # Contact: qubitium@modelcloud.ai, x.com/qubitium
5
+
6
+ import torch
7
+ import torch.nn as nn
8
+ from torch.nn import functional as F
9
+
10
+
11
+ class LinearQwen3MoeSparseMoeBlock(nn.Module):
12
+ def __init__(self, config):
13
+ super().__init__()
14
+ from transformers.models.qwen3_moe.modeling_qwen3_moe import Qwen3MoeMLP
15
+
16
+ self.num_experts = config.num_experts
17
+ self.top_k = config.num_experts_per_tok
18
+ self.norm_topk_prob = config.norm_topk_prob
19
+
20
+ # This must be linear for vllm alignment
21
+ self.gate = nn.Linear(config.hidden_size, config.num_experts, bias=False)
22
+ self.experts = nn.ModuleList(
23
+ [Qwen3MoeMLP(config, intermediate_size=config.moe_intermediate_size) for _ in range(self.num_experts)]
24
+ )
25
+
26
+ def forward(self, hidden_states: torch.Tensor) -> torch.Tensor:
27
+ """ """
28
+ batch_size, sequence_length, hidden_dim = hidden_states.shape
29
+ hidden_states = hidden_states.view(-1, hidden_dim)
30
+ # router_logits: (batch * sequence_length, n_experts)
31
+ router_logits = self.gate(hidden_states)
32
+
33
+ routing_weights = F.softmax(router_logits, dim=1, dtype=torch.float)
34
+ routing_weights, selected_experts = torch.topk(routing_weights, self.top_k, dim=-1)
35
+ if self.norm_topk_prob: # only diff with mixtral sparse moe block!
36
+ routing_weights /= routing_weights.sum(dim=-1, keepdim=True)
37
+ # we cast back to the input dtype
38
+ routing_weights = routing_weights.to(hidden_states.dtype)
39
+
40
+ final_hidden_states = torch.zeros(
41
+ (batch_size * sequence_length, hidden_dim), dtype=hidden_states.dtype, device=hidden_states.device
42
+ )
43
+
44
+ # One hot encode the selected experts to create an expert mask
45
+ # this will be used to easily index which expert is going to be solicited
46
+ with torch.no_grad():
47
+ expert_mask = torch.nn.functional.one_hot(selected_experts, num_classes=self.num_experts).permute(2, 1, 0)
48
+
49
+ # Loop over all available experts in the model and perform the computation on each expert
50
+ expert_hit = torch.greater(expert_mask.sum(dim=(-1, -2)), 0).nonzero()
51
+ for expert_idx in expert_hit:
52
+ if expert_idx == self.num_experts:
53
+ continue
54
+ expert_layer = self.experts[expert_idx]
55
+ idx, top_x = torch.where(expert_mask[expert_idx].squeeze(0))
56
+
57
+ # Index the correct hidden states and compute the expert hidden state for
58
+ # the current expert. We need to make sure to multiply the output hidden
59
+ # states by `routing_weights` on the corresponding tokens (top-1 and top-2)
60
+ current_state = hidden_states[None, top_x].reshape(-1, hidden_dim)
61
+ current_hidden_states = expert_layer(current_state) * routing_weights[top_x, idx, None]
62
+
63
+ # However `index_add_` only support torch tensors for indexing so we'll use
64
+ # the `top_x` tensor here.
65
+ final_hidden_states.index_add_(0, top_x, current_hidden_states.to(hidden_states.dtype))
66
+ final_hidden_states = final_hidden_states.reshape(batch_size, sequence_length, hidden_dim)
67
+ return final_hidden_states
File without changes
@@ -0,0 +1,149 @@
1
+ # SPDX-FileCopyrightText: 2026 ModelCloud.ai
2
+ # SPDX-FileCopyrightText: 2026 qubitium@modelcloud.ai
3
+ # SPDX-License-Identifier: Apache-2.0
4
+ # Contact: qubitium@modelcloud.ai, x.com/qubitium
5
+
6
+ import torch
7
+ from torch import nn
8
+
9
+ from packaging import version
10
+ from transformers import AutoConfig
11
+ import transformers
12
+ import os
13
+ import importlib
14
+
15
+ from typing import Final
16
+
17
+ from ..logger import logger
18
+
19
+ MODEL_CONFIG = {
20
+ "qwen3_moe": {
21
+ "min_transformers_version": "5.0.0",
22
+ "checkpoint_mapping": [],
23
+ "block_patch": [
24
+ (
25
+ "transformers.models.qwen3_moe.modeling_qwen3_moe.Qwen3MoeSparseMoeBlock",
26
+ "defuser.modeling.unfused_moe.qwen3_moe.LinearQwen3MoeSparseMoeBlock",
27
+ )
28
+ ],
29
+ },
30
+ }
31
+
32
+ _ENV_VAR: Final[str] = "GPTQMODEL_USE_MODELSCOPE"
33
+
34
+
35
+ TRUTHFUL = {"1", "true", "yes", "on", "y"}
36
+
37
+
38
+ def env_flag(name: str, default: str | bool | None = "0") -> bool:
39
+ """Return ``True`` when an env var is set to a truthy value."""
40
+
41
+ value = os.getenv(name)
42
+ if value is None:
43
+ if default is None:
44
+ return False
45
+ if isinstance(default, bool):
46
+ return default
47
+ value = default
48
+ return str(value).strip().lower() in TRUTHFUL
49
+
50
+ def modelscope_requested() -> bool:
51
+ """
52
+ Return ``True`` when the user explicitly enabled ModelScope integration
53
+ via the GPTQMODEL_USE_MODELSCOPE environment variable.
54
+ """
55
+ return env_flag(_ENV_VAR, default="0")
56
+
57
+
58
+ def get_file_path_via_model_name(model_or_path: str, file_name):
59
+ from huggingface_hub import hf_hub_download
60
+
61
+ # 1) local folder
62
+ if os.path.isdir(model_or_path):
63
+ index_path = os.path.join(model_or_path, file_name)
64
+
65
+ # 2) HF model name
66
+ elif not modelscope_requested():
67
+ index_path = hf_hub_download(
68
+ repo_id=model_or_path,
69
+ filename=file_name,
70
+ repo_type="model",
71
+ )
72
+ elif modelscope_requested():
73
+ from modelscope import snapshot_download # pylint: disable=E0401
74
+
75
+ # ModelSCOPE is different, it returns the folder path
76
+ folder = snapshot_download(model_or_path, allow_patterns=[file_name])
77
+ index_path = os.path.join(folder, file_name)
78
+ else:
79
+ index_path = None
80
+
81
+ return index_path
82
+
83
+
84
+ def pre_check_config(model_name: str | torch.nn.Module):
85
+ if isinstance(model_name, str):
86
+ config = AutoConfig.from_pretrained(model_name)
87
+ elif isinstance(model_name, torch.nn.Module):
88
+ config = getattr(model_name, "config", None)
89
+ if config is None:
90
+ return False
91
+
92
+ model_type = getattr(config, "model_type", None)
93
+ if model_type is None or model_type not in MODEL_CONFIG:
94
+ return False
95
+
96
+ cfg = MODEL_CONFIG[model_type]
97
+
98
+ min_ver = cfg.get("min_transformers_version")
99
+ tf_ver = version.parse(transformers.__version__)
100
+ if min_ver and tf_ver < version.parse(min_ver):
101
+ return False
102
+ try:
103
+ file_path = get_file_path_via_model_name(model_name, "model.safetensors.index.json")
104
+ if os.path.exists(file_path):
105
+ import json
106
+
107
+ with open(file_path, "r") as f:
108
+ index_data = json.load(f)
109
+ model_keys = list(index_data.get("weight_map", {}).keys())
110
+ for key in model_keys:
111
+ if "gate_up_proj" in key:
112
+ return False
113
+ except:
114
+ return True
115
+ return True
116
+
117
+
118
+ def apply_modeling_patch(model: torch.nn.Module) -> bool:
119
+ res = pre_check_config(model)
120
+ if not res:
121
+ return False
122
+ model_type = getattr(model.config, "model_type")
123
+ cfg = MODEL_CONFIG[model_type]
124
+ # patch blocks
125
+ for orig_path, custom_path in cfg.get("block_patch", []):
126
+ orig_module_path, orig_class_name = orig_path.rsplit(".", 1)
127
+ custom_module_path, custom_class_name = custom_path.rsplit(".", 1)
128
+ try:
129
+ orig_module = importlib.import_module(orig_module_path)
130
+ custom_module = importlib.import_module(custom_module_path)
131
+ custom_class = getattr(custom_module, custom_class_name)
132
+ orig_class = getattr(orig_module, orig_class_name)
133
+ names = []
134
+ for n, m in model.named_modules():
135
+ if isinstance(m, orig_class):
136
+ names.append((n, next(m.parameters()).dtype))
137
+ for (n, orig_dtype) in names:
138
+ model.set_submodule(n, custom_class(model.config).to(orig_dtype), True)
139
+ logger.info(f"Patched module: {orig_path} -> {custom_path}")
140
+ return True
141
+ except Exception as e:
142
+ logger.warning(f"Failed to patch {orig_path}: {e}")
143
+ return False
144
+ return False
145
+
146
+
147
+ def convert_hf_model(model: nn.Module):
148
+ apply_modeling_patch(model)
149
+
@@ -9,7 +9,7 @@ build-backend = "setuptools.build_meta"
9
9
 
10
10
  [project]
11
11
  name = "Defuser"
12
- version = "0.0.2"
12
+ version = "0.0.3"
13
13
  description = "Model defuser helper for HF Transformers."
14
14
  readme = "README.md"
15
15
  requires-python = ">=3.9"
@@ -38,8 +38,11 @@ dependencies = [
38
38
  Homepage = "https://github.com/ModelCloud/Defuser"
39
39
 
40
40
  [tool.setuptools]
41
- packages = ["defuser"]
42
- py-modules = ["setup_utils"]
41
+ include-package-data = false
42
+
43
+ [tool.setuptools.packages.find]
44
+ where = ["."]
45
+ include = ["defuser*"]
43
46
 
44
47
  [tool.pytest.ini_options]
45
48
  pythonpath = ["."]
@@ -3,13 +3,14 @@
3
3
  # SPDX-License-Identifier: Apache-2.0
4
4
  # Contact: qubitium@modelcloud.ai, x.com/qubitium
5
5
  from transformers import AutoModelForCausalLM
6
+
6
7
  from defuser import convert_hf_model
7
8
 
8
9
 
9
10
  def test_qwen3_moe():
10
11
  from defuser.modeling.unfused_moe.qwen3_moe import LinearQwen3MoeSparseMoeBlock
11
12
 
12
- model = AutoModelForCausalLM.from_pretrained("Qwen/Qwen3-30B-A3B")
13
+ model = AutoModelForCausalLM.from_pretrained("/monster/data/model/Qwen3-30B-A3B")
13
14
 
14
15
  assert model.config.model_type == "qwen3_moe"
15
16
 
@@ -1,2 +0,0 @@
1
- defuser
2
- setup_utils
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes