potnn 1.0.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
potnn/modules/conv.py ADDED
@@ -0,0 +1,203 @@
1
+ """PoT-quantized Conv2d layer with Integer Simulation.
2
+
3
+ v2: Added integer simulation for C-compatible QAT
4
+ - Forward pass can simulate C integer operations exactly
5
+ - Matches C inference bit-for-bit when use_integer_sim=True
6
+ - Eliminates QAT-C accuracy gap
7
+ """
8
+
9
+ import torch
10
+ import torch.nn as nn
11
+ import torch.nn.functional as F
12
+ from typing import Optional, Union, Tuple
13
+
14
+ from .base import PoTLayerBase
15
+ from ..quantize.pot import quantize_to_pot_ste, quantize_to_pot, quantize_activation_ste, apply_5level_zero_constraint
16
+ from ..quantize.integer_ops import (
17
+ round_half_up_ste, clamp_ste,
18
+ fake_quantize_input, fake_quantize_input_uint8,
19
+ fake_requantize
20
+ )
21
+
22
+
23
+ class PoTConv2d(PoTLayerBase):
24
+ """Power-of-Two quantized Conv2d layer.
25
+
26
+ This layer implements a Conv2d layer with PoT weight quantization.
27
+
28
+ [Integer-Only QAT Mode]
29
+ The forward pass simulates C integer arithmetic EXACTLY:
30
+ 1. Input Quantization: float -> int8 (or uint8 for first layer)
31
+ 2. Integer Conv: int8 * int8 -> int32
32
+ 3. Requantize: (int32 * scale_int + round) >> shift
33
+ 4. Bias Add: + round(bias_adjusted * act_scale)
34
+ 5. Clamp: [0, 127]
35
+
36
+ This ensures that training accuracy matches C deployment accuracy.
37
+ """
38
+
39
+ def __init__(
40
+ self,
41
+ in_channels: int,
42
+ out_channels: int,
43
+ kernel_size: Union[int, Tuple[int, int]],
44
+ stride: Union[int, Tuple[int, int]] = 1,
45
+ padding: Union[int, Tuple[int, int]] = 0,
46
+ dilation: Union[int, Tuple[int, int]] = 1,
47
+ groups: int = 1,
48
+ bias: bool = True,
49
+ encoding: str = 'unroll'
50
+ ):
51
+ super().__init__(encoding)
52
+
53
+ self.in_channels = in_channels
54
+ self.out_channels = out_channels
55
+ self.kernel_size = kernel_size if isinstance(kernel_size, tuple) else (kernel_size, kernel_size)
56
+ self.stride = stride if isinstance(stride, tuple) else (stride, stride)
57
+ self.padding = padding if isinstance(padding, tuple) else (padding, padding)
58
+ self.dilation = dilation if isinstance(dilation, tuple) else (dilation, dilation)
59
+ self.groups = groups
60
+
61
+ # Initialize weight parameter
62
+ self.weight = nn.Parameter(torch.empty(
63
+ out_channels, in_channels // groups, *self.kernel_size
64
+ ))
65
+
66
+ # Initialize bias parameter
67
+ if bias:
68
+ self.bias = nn.Parameter(torch.zeros(out_channels))
69
+ else:
70
+ self.register_parameter('bias', None)
71
+
72
+ # Initialize weights using Kaiming normal
73
+ nn.init.kaiming_normal_(self.weight, mode='fan_out', nonlinearity='relu')
74
+
75
+ def forward(self, x: torch.Tensor) -> torch.Tensor:
76
+ """Forward pass with three modes:
77
+ 1. Float warmup (quantize=False): Standard conv
78
+ 2. Float QAT (use_integer_sim=False): PoT weight + float activation
79
+ 3. Integer sim (use_integer_sim=True): C-identical integer ops
80
+ """
81
+ if not self.quantize:
82
+ # Float mode (warmup training)
83
+ return F.conv2d(
84
+ x, self.weight, self.bias,
85
+ self.stride, self.padding, self.dilation, self.groups
86
+ )
87
+
88
+ if not getattr(self, 'use_integer_sim', False):
89
+ # Float QAT: PoT weight + float activation
90
+ # ReLU는 모델에서 외부로 호출 (torch.relu(conv(x)))
91
+ w_pot = quantize_to_pot_ste(self.weight, self.alpha, encoding=self.encoding)
92
+
93
+ # 5level constraint
94
+ if self.encoding == '5level' and self.enforce_5level_constraint:
95
+ w_pot = apply_5level_zero_constraint(w_pot)
96
+
97
+ out = F.conv2d(
98
+ x, w_pot * self.alpha, self.bias,
99
+ self.stride, self.padding, self.dilation, self.groups
100
+ )
101
+ return out
102
+
103
+ # === Integer Simulation Mode (C-identical) ===
104
+
105
+ # === 1. Prepare Integer Parameters ===
106
+ # Always compute dynamically to ensure consistency with export
107
+ scale_int, shift, _ = self._compute_scale_and_shift()
108
+
109
+ is_first = self.is_first_layer.item() if self.is_first_layer is not None else False
110
+ is_last = self.is_last_layer.item() if self.is_last_layer is not None else False
111
+
112
+ # === 2. Input Quantization ===
113
+ if is_first:
114
+ # First layer: Input is normalized float (x - mean) / std
115
+ # We must simulate C behavior: raw uint8 input
116
+ if self.input_mean is not None and self.input_std is not None:
117
+ # Denormalize: x_raw = x * avg_std + mean
118
+ avg_std = self.input_std.mean().item()
119
+ mean = self.input_mean.view(1, -1, 1, 1).to(x.device)
120
+ x_raw = x * avg_std + mean
121
+ x_raw = clamp_ste(x_raw, 0.0, 1.0)
122
+ else:
123
+ x_raw = x
124
+
125
+ # Quantize to uint8 [0, 255]
126
+ # Match C test data generation (img * 255.0)
127
+ x_int = fake_quantize_input_uint8(x_raw, 255.0)
128
+ else:
129
+ # Other layers: Input is already int8 from previous layer
130
+ # No quantization needed
131
+ x_int = x
132
+
133
+ # === 3. Weight Quantization ===
134
+ w_pot = quantize_to_pot_ste(self.weight, self.alpha, encoding=self.encoding)
135
+
136
+ # 5level constraint (always apply for 5level encoding to match export)
137
+ if self.encoding == '5level':
138
+ w_pot = apply_5level_zero_constraint(w_pot)
139
+
140
+ if is_first:
141
+ # DEBUG: L0 weights
142
+ pass
143
+
144
+
145
+ # === 4. Integer Convolution ===
146
+ # F.conv2d with integer-valued inputs/weights -> integer-valued output (float dtype)
147
+ acc = F.conv2d(
148
+ x_int, w_pot,
149
+ None, # Bias added separately
150
+ self.stride, self.padding, self.dilation, self.groups
151
+ )
152
+
153
+ # === 5. Requantize ===
154
+ # (acc * scale_int + round) >> shift
155
+ acc_scaled = fake_requantize(acc, scale_int, shift)
156
+
157
+ # === 6. Bias Addition ===
158
+ if self.bias is not None:
159
+ act_scale = self.act_scale if self.act_scale is not None else torch.tensor(1.0)
160
+
161
+ if is_first and self.input_mean is not None and self.input_std is not None:
162
+ # Absorb mean/std into bias (Dynamic for training)
163
+ # bias_adj = bias - (mean/std) * sum(W) * alpha
164
+ avg_std = self.input_std.mean().item()
165
+ alpha = self.alpha
166
+
167
+ # Calculate weight sum per channel
168
+ # w_pot shape: [out, in, k, k]
169
+ w_sum = w_pot.sum(dim=(2, 3)) # [out, in]
170
+
171
+ # We need to sum over input channels weighted by mean[c]
172
+ # bias_correction = sum_c (mean[c]/avg_std * w_sum[:, c])
173
+ mean_vec = self.input_mean.view(1, -1).to(x.device) # [1, in]
174
+ bias_correction = (mean_vec / avg_std * w_sum).sum(dim=1) # [out]
175
+
176
+ bias_adjusted = self.bias - bias_correction * alpha
177
+ else:
178
+ bias_adjusted = self.bias
179
+
180
+ # Quantize bias: round(bias * act_scale)
181
+ bias_int = round_half_up_ste(bias_adjusted * act_scale)
182
+
183
+ # Add bias
184
+ acc_scaled = acc_scaled + bias_int.view(1, -1, 1, 1)
185
+
186
+ # === 7. Clamp (ReLU) ===
187
+ if not is_last:
188
+ out = clamp_ste(acc_scaled, 0.0, 127.0)
189
+ else:
190
+ out = acc_scaled
191
+
192
+ # === 8. Output ===
193
+ # Round to ensure exact integer (floating point precision)
194
+ # Use STE to maintain gradient flow during training
195
+ # int8 그대로 반환 (C와 동일)
196
+ out = round_half_up_ste(out)
197
+
198
+ return out
199
+
200
+ def extra_repr(self) -> str:
201
+ s = super().extra_repr()
202
+ s += f', quantize={self.quantize}'
203
+ return s
@@ -0,0 +1,317 @@
1
+ """PoT-quantized Conv1d layer with Integer Simulation.
2
+
3
+ v1: 1D convolution support for time-series and audio processing
4
+ - Forward pass can simulate C integer operations exactly
5
+ - Matches C inference bit-for-bit when use_integer_sim=True
6
+ - Eliminates QAT-C accuracy gap
7
+ """
8
+
9
+ import torch
10
+ import torch.nn as nn
11
+ import torch.nn.functional as F
12
+ from typing import Optional, Union
13
+
14
+ from .base import PoTLayerBase
15
+ from ..quantize.pot import quantize_to_pot_ste, quantize_to_pot, quantize_activation_ste
16
+ from ..quantize.integer_sim import (
17
+ round_ste, floor_ste, clamp_ste,
18
+ quantize_to_int8_ste, quantize_to_uint8_ste,
19
+ requantize_ste
20
+ )
21
+
22
+
23
+ class PoTConv1d(PoTLayerBase):
24
+ """Power-of-Two quantized Conv1d layer.
25
+
26
+ This layer implements a Conv1d layer with PoT weight quantization
27
+ and alpha scaling. It can be used as a drop-in replacement for
28
+ nn.Conv1d in QAT-aware models.
29
+
30
+ Supports two modes:
31
+ - Float QAT (default): Standard fake quantization with float operations
32
+ - Integer Simulation: C-compatible integer operations for exact match
33
+ """
34
+
35
+ def __init__(
36
+ self,
37
+ in_channels: int,
38
+ out_channels: int,
39
+ kernel_size: int,
40
+ stride: int = 1,
41
+ padding: int = 0,
42
+ dilation: int = 1,
43
+ groups: int = 1,
44
+ bias: bool = True,
45
+ encoding: str = 'unroll'
46
+ ):
47
+ """Initialize PoTConv1d layer.
48
+
49
+ Args:
50
+ in_channels: Number of input channels
51
+ out_channels: Number of output channels
52
+ kernel_size: Size of the convolution kernel
53
+ stride: Stride of the convolution (default: 1)
54
+ padding: Zero-padding added to both sides (default: 0)
55
+ dilation: Spacing between kernel elements (default: 1)
56
+ groups: Number of blocked connections (default: 1)
57
+ bias: If True, adds a learnable bias (default: True)
58
+ encoding: Encoding type ('unroll', 'fp130', '5level', '2bit', 'ternary')
59
+ """
60
+ super().__init__(encoding)
61
+
62
+ self.in_channels = in_channels
63
+ self.out_channels = out_channels
64
+ self.kernel_size = kernel_size
65
+ self.stride = stride
66
+ self.padding = padding
67
+ self.dilation = dilation
68
+ self.groups = groups
69
+
70
+ # Initialize weight parameter: (out_channels, in_channels/groups, kernel_size)
71
+ self.weight = nn.Parameter(torch.empty(
72
+ out_channels, in_channels // groups, self.kernel_size
73
+ ))
74
+
75
+ # Initialize bias parameter
76
+ if bias:
77
+ self.bias = nn.Parameter(torch.zeros(out_channels))
78
+ else:
79
+ self.register_parameter('bias', None)
80
+
81
+ # Initialize weights using Kaiming normal
82
+ nn.init.kaiming_normal_(self.weight, mode='fan_out', nonlinearity='relu')
83
+
84
+ def forward(self, x: torch.Tensor) -> torch.Tensor:
85
+ """Forward pass with optional PoT quantization.
86
+
87
+ Args:
88
+ x: Input tensor of shape (N, C_in, L)
89
+
90
+ Returns:
91
+ Output tensor of shape (N, C_out, L_out)
92
+ """
93
+ if not self.quantize:
94
+ # Float mode (warmup training)
95
+ return F.conv1d(
96
+ x, self.weight, self.bias,
97
+ self.stride, self.padding, self.dilation, self.groups
98
+ )
99
+
100
+ if self.use_integer_sim and self.scale_int is not None:
101
+ if self.training:
102
+ # Training: use float QAT for gradient flow
103
+ # Integer simulation doesn't support alpha gradients
104
+ return self._forward_float_qat(x)
105
+ else:
106
+ # Eval: use integer sim for C-exact match
107
+ return self._forward_integer_sim(x)
108
+ else:
109
+ # Standard Float QAT Mode
110
+ return self._forward_float_qat(x)
111
+
112
+ def _forward_float_qat(self, x: torch.Tensor) -> torch.Tensor:
113
+ """Original float QAT forward.
114
+
115
+ NOTE: Input is already normalized as (data * 256/255 - mean) / avg_std
116
+ so mean is already subtracted. NO bias absorption needed here.
117
+ Bias absorption is only for Integer Sim (raw uint8 input) and C export.
118
+ """
119
+ DEBUG_QAT = False # True로 바꾸면 QAT 디버그 출력
120
+
121
+ # PoT quantization
122
+ w_q = quantize_to_pot_ste(self.weight, self.alpha, encoding=self.encoding)
123
+
124
+ if DEBUG_QAT:
125
+ print(f"\n[DEBUG QAT] input: range=[{x.min():.4f}, {x.max():.4f}]")
126
+ print(f" w_q: unique_vals={torch.unique(w_q).tolist()[:10]}...")
127
+ print(f" alpha={self.alpha.item():.4f}")
128
+ print(f" w_effective (w_q*alpha): range=[{(w_q*self.alpha).min():.4f}, {(w_q*self.alpha).max():.4f}]")
129
+
130
+ # Convolution with scaled weights (NO bias adjustment - input already normalized)
131
+ out = F.conv1d(
132
+ x, w_q * self.alpha, self.bias,
133
+ self.stride, self.padding, self.dilation, self.groups
134
+ )
135
+
136
+ if DEBUG_QAT:
137
+ print(f" conv output: range=[{out.min():.4f}, {out.max():.4f}]")
138
+
139
+ # Activation quantization
140
+ if self.act_scale is not None:
141
+ out = quantize_activation_ste(out, self.act_scale)
142
+ if DEBUG_QAT:
143
+ print(f" after act_quant (scale={self.act_scale.item():.4f}): range=[{out.min():.4f}, {out.max():.4f}]")
144
+
145
+ return out
146
+
147
+ def _forward_integer_sim(self, x: torch.Tensor) -> torch.Tensor:
148
+ """Integer simulation forward - matches C inference exactly.
149
+
150
+ C code equivalent:
151
+ // Step 1: PoT convolution
152
+ int32_t acc = 0;
153
+ for (...) {
154
+ acc += input[i] << k; // or -= for negative weights
155
+ }
156
+
157
+ // Step 2: Requantize
158
+ acc = ((int64_t)acc * scale_int + round) >> shift;
159
+
160
+ // Step 3: Add bias (with mean absorption for first layer)
161
+ acc += bias_int;
162
+
163
+ // Step 4: Clamp (ReLU)
164
+ output = clamp(acc, 0, 127); // or -128,127 if no ReLU
165
+ """
166
+ DEBUG = False # True로 바꾸면 상세 디버그 출력
167
+
168
+ is_first = self.is_first_layer.item() if self.is_first_layer is not None else False
169
+ is_last = self.is_last_layer.item() if self.is_last_layer is not None else False
170
+
171
+ if DEBUG:
172
+ print(f"\n[DEBUG _forward_integer_sim] is_first={is_first}, is_last={is_last}")
173
+ print(f" input: shape={x.shape}, range=[{x.min():.4f}, {x.max():.4f}]")
174
+
175
+ # === Step 1: Quantize input to integer ===
176
+ if is_first:
177
+ # First layer: input is NORMALIZED (x - mean) / avg_std
178
+ # C code receives raw uint8 [0,255], so we denormalize first
179
+ if self.input_mean is not None and self.input_std is not None:
180
+ # Denormalize: x_raw = x_norm * avg_std + mean (channel-wise mean!)
181
+ # QAT normalized with channel-wise mean, so denorm with channel-wise mean
182
+ avg_std = self.input_std.mean().item()
183
+ mean = self.input_mean.view(1, -1, 1).to(x.device) # [1, C, 1]
184
+ x_raw = x * avg_std + mean # channel-wise mean
185
+ x_raw = torch.clamp(x_raw, 0.0, 1.0)
186
+ else:
187
+ x_raw = x
188
+ # [0,1] → [0,255] (uint8), /256 absorbed in shift (+8)
189
+ x_int = quantize_to_uint8_ste(x_raw, 256.0)
190
+ if DEBUG:
191
+ print(f" x_int (uint8): range=[{x_int.min():.0f}, {x_int.max():.0f}]")
192
+ else:
193
+ # Other layers: convert float back to int8
194
+ # Input was divided by prev_act_scale in previous layer
195
+ prev_scale = self.prev_act_scale if self.prev_act_scale is not None else torch.tensor(1.0)
196
+ x_int = quantize_to_int8_ste(x, prev_scale)
197
+ if DEBUG:
198
+ print(f" x_int (int8): prev_scale={prev_scale.item():.4f}, range=[{x_int.min():.0f}, {x_int.max():.0f}]")
199
+
200
+ # === Step 2: PoT Convolution (integer) ===
201
+ # Get PoT weights with STE for gradient flow
202
+ w_pot = quantize_to_pot_ste(self.weight, self.alpha, encoding=self.encoding)
203
+
204
+ if DEBUG:
205
+ print(f" w_pot: shape={w_pot.shape}, unique_vals={torch.unique(w_pot).tolist()[:10]}...")
206
+ print(f" alpha={self.alpha.item():.4f}")
207
+
208
+ # Integer convolution
209
+ # In C: acc += input << k (shift operation)
210
+ # In Python: float tensor but values are integers
211
+ acc = F.conv1d(
212
+ x_int, w_pot,
213
+ None, # bias added separately
214
+ self.stride, self.padding, self.dilation, self.groups
215
+ )
216
+
217
+ if DEBUG:
218
+ print(f" acc after conv: range=[{acc.min():.0f}, {acc.max():.0f}]")
219
+
220
+ # === Step 3: Requantize ===
221
+ # C: ((int64_t)acc * scale_int + round) >> shift
222
+ scale_int = self.scale_int.item() if self.scale_int is not None else 1
223
+ shift = self.shift.item() if self.shift is not None else 0
224
+
225
+ if DEBUG:
226
+ print(f" scale_int={scale_int}, shift={shift}")
227
+
228
+ acc = requantize_ste(acc, scale_int, shift)
229
+
230
+ if DEBUG:
231
+ print(f" acc after requantize: range=[{acc.min():.0f}, {acc.max():.0f}]")
232
+
233
+ # === Step 4: Add bias (with mean absorption for first layer) ===
234
+ if self.bias is not None:
235
+ act_scale = self.act_scale if self.act_scale is not None else torch.tensor(1.0)
236
+
237
+ if is_first:
238
+ # First layer: absorb mean into bias
239
+ # MUST match export.py absorb_standardization exactly
240
+ # Use avg_std to match QAT and C export
241
+
242
+ if self.input_mean is not None and self.input_std is not None:
243
+ avg_std = self.input_std.mean().item()
244
+ in_ch = w_pot.shape[1]
245
+ alpha = self.alpha
246
+ bias_adjusted = self.bias.clone()
247
+
248
+ for c in range(in_ch):
249
+ mean_c = self.input_mean[c].item()
250
+ weight_sum_c = w_pot[:, c, :].sum(dim=1) * alpha # [out_ch]
251
+ bias_adjusted = bias_adjusted - (mean_c / avg_std) * weight_sum_c
252
+
253
+ if DEBUG:
254
+ print(f" [First layer bias absorption - {in_ch} channels, avg_std={avg_std:.4f}]")
255
+ print(f" input_mean={self.input_mean.tolist()}")
256
+ print(f" original bias sample: [{self.bias[0].item():.4f}, {self.bias[1].item():.4f}, ...]")
257
+ print(f" adjusted bias sample: [{bias_adjusted[0].item():.4f}, {bias_adjusted[1].item():.4f}, ...]")
258
+ else:
259
+ # No standardization - use bias as-is
260
+ bias_adjusted = self.bias
261
+ if DEBUG:
262
+ print(f" [First layer - no standardization]")
263
+
264
+ bias_int = round_ste(bias_adjusted * act_scale)
265
+
266
+ if DEBUG:
267
+ print(f" bias_int sample: [{bias_int[0].item():.0f}, {bias_int[1].item():.0f}, ...]")
268
+ else:
269
+ # Other layers: simple bias scaling
270
+ bias_int = round_ste(self.bias * act_scale)
271
+ if DEBUG:
272
+ print(f" bias_int: act_scale={act_scale.item():.4f}, range=[{bias_int.min():.0f}, {bias_int.max():.0f}]")
273
+
274
+ # Add bias (broadcast over length dimension)
275
+ acc = acc + bias_int.view(1, -1, 1)
276
+
277
+ if DEBUG:
278
+ print(f" acc after bias: range=[{acc.min():.0f}, {acc.max():.0f}]")
279
+
280
+ # === Step 5: Clamp (ReLU) ===
281
+ if not is_last:
282
+ # ReLU: clamp to [0, 127]
283
+ out = clamp_ste(acc, 0.0, 127.0)
284
+ if DEBUG:
285
+ print(f" out after ReLU clamp: range=[{out.min():.0f}, {out.max():.0f}]")
286
+ else:
287
+ # Last layer: no ReLU, output raw logits
288
+ out = acc
289
+ if DEBUG:
290
+ print(f" out (last layer, no clamp): range=[{out.min():.0f}, {out.max():.0f}]")
291
+
292
+ # === Step 6: Convert back to float for next layer ===
293
+ # Next layer expects: int_value / act_scale
294
+ if self.act_scale is not None and not is_last:
295
+ out = out / self.act_scale
296
+ if DEBUG:
297
+ print(f" out after /act_scale: range=[{out.min():.4f}, {out.max():.4f}]")
298
+
299
+ return out
300
+
301
+ def extra_repr(self) -> str:
302
+ """String representation of layer configuration."""
303
+ s = (f'{self.in_channels}, {self.out_channels}, '
304
+ f'kernel_size={self.kernel_size}, stride={self.stride}')
305
+ if self.padding != 0:
306
+ s += f', padding={self.padding}'
307
+ if self.dilation != 1:
308
+ s += f', dilation={self.dilation}'
309
+ if self.groups != 1:
310
+ s += f', groups={self.groups}'
311
+ if self.bias is None:
312
+ s += ', bias=False'
313
+ if self.quantize:
314
+ s += f', quantize=True, encoding={self.encoding}'
315
+ if self.use_integer_sim:
316
+ s += ', integer_sim=True'
317
+ return s