diffcb 0.1.7__tar.gz → 0.1.8__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (27) hide show
  1. {diffcb-0.1.7 → diffcb-0.1.8}/PKG-INFO +13 -11
  2. {diffcb-0.1.7 → diffcb-0.1.8}/README.md +12 -10
  3. {diffcb-0.1.7 → diffcb-0.1.8}/dcb/__init__.py +1 -1
  4. {diffcb-0.1.7 → diffcb-0.1.8}/dcb/layer.py +22 -31
  5. {diffcb-0.1.7 → diffcb-0.1.8}/pyproject.toml +1 -1
  6. {diffcb-0.1.7 → diffcb-0.1.8}/tests/test_gradcheck.py +14 -22
  7. {diffcb-0.1.7 → diffcb-0.1.8}/tests/test_layer.py +9 -6
  8. {diffcb-0.1.7 → diffcb-0.1.8}/.gitignore +0 -0
  9. {diffcb-0.1.7 → diffcb-0.1.8}/.zenodo.json +0 -0
  10. {diffcb-0.1.7 → diffcb-0.1.8}/LICENSE +0 -0
  11. {diffcb-0.1.7 → diffcb-0.1.8}/dcb/diagnostics.py +0 -0
  12. {diffcb-0.1.7 → diffcb-0.1.8}/dcb/fft_kde.py +0 -0
  13. {diffcb-0.1.7 → diffcb-0.1.8}/dcb/kde.py +0 -0
  14. {diffcb-0.1.7 → diffcb-0.1.8}/dcb/solver.py +0 -0
  15. {diffcb-0.1.7 → diffcb-0.1.8}/dcb/training.py +0 -0
  16. {diffcb-0.1.7 → diffcb-0.1.8}/dcb/utils.py +0 -0
  17. {diffcb-0.1.7 → diffcb-0.1.8}/notebooks/.gitkeep +0 -0
  18. {diffcb-0.1.7 → diffcb-0.1.8}/round24_cumulative_bench.py +0 -0
  19. {diffcb-0.1.7 → diffcb-0.1.8}/round24_v016_test.py +0 -0
  20. {diffcb-0.1.7 → diffcb-0.1.8}/round25_full_range_sweep.py +0 -0
  21. {diffcb-0.1.7 → diffcb-0.1.8}/round25_write_csv.py +0 -0
  22. {diffcb-0.1.7 → diffcb-0.1.8}/tests/test_kde.py +0 -0
  23. {diffcb-0.1.7 → diffcb-0.1.8}/tests/test_r18c_denom_audit.py +0 -0
  24. {diffcb-0.1.7 → diffcb-0.1.8}/tests/test_r18c_deprecation_warn.py +0 -0
  25. {diffcb-0.1.7 → diffcb-0.1.8}/tests/test_r19_default_fft.py +0 -0
  26. {diffcb-0.1.7 → diffcb-0.1.8}/tests/test_r19_diagnostics.py +0 -0
  27. {diffcb-0.1.7 → diffcb-0.1.8}/tests/test_solver.py +0 -0
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: diffcb
3
- Version: 0.1.7
3
+ Version: 0.1.8
4
4
  Summary: Differentiable Critical Bandwidth: Silverman's modality test as a differentiable PyTorch layer with IFT backward pass.
5
5
  Project-URL: Homepage, https://github.com/ryZhangHason/differentiable-critical-bandwidth
6
6
  Project-URL: Repository, https://github.com/ryZhangHason/differentiable-critical-bandwidth
@@ -295,15 +295,17 @@ Cumulative speedup vs v0.1.4 on CPU: 1.1× (100K), 1.7× (1M), **4.2× (10M)**.
295
295
 
296
296
  ```python
297
297
  DCBLayer(
298
- target_modes=1, # target number of modes (default 1)
299
- use_fft=True, # FFT path for n > 50K (default True)
300
- max_n_exact=None, # sketch above this n (None = always exact)
301
- G_min=16384, # minimum FFT histogram bins (accuracy ↑ with G)
302
- use_richardson=True, # Richardson extrapolation on h_crit (30% accuracy gain)
303
- direct_n_max=25_000, # use direct-KDE (no histogram) for n ≤ this
304
- direct_M=2048, # direct-KDE evaluation grid size
305
- use_compile=False, # infrastructure flag; use TrainingLayer for compile
306
- safe_backward=False, # clamp IFT denominator near bifurcations
298
+ target_modes=1, # target number of modes (default 1)
299
+ use_fft=True, # FFT path for n > 50K (default True)
300
+ max_n_exact=None, # sketch above this n (None = always exact)
301
+ G_min=16384, # minimum FFT histogram bins (accuracy ↑ with G)
302
+ use_richardson="auto", # Richardson on CPU, off on GPU (30% accuracy gain on CPU)
303
+ direct_n_max=25_000, # direct-KDE active only when forward_path='auto'/'direct'
304
+ direct_M=2048, # direct-KDE evaluation grid size
305
+ forward_path='smooth', # 'smooth' (default, strictly differentiable) |
306
+ # 'auto' (direct-KDE at n≤25K, surrogate gradient) |
307
+ # 'direct' (force direct-KDE, accuracy benchmarks)
308
+ safe_backward=False, # clamp IFT denominator near bifurcations
307
309
  )
308
310
  ```
309
311
 
@@ -343,7 +345,7 @@ By default (`use_richardson=True`), DCB runs a second bisection at G/2=8192 and
343
345
 
344
346
  - **`compile=True` on MPS**: blocked by float64 in `_refine_hcrit` fallback (fix in v0.1.7)
345
347
  - **`compile=True` on CUDA with Python 3.12**: requires torch ≥ 2.4 or Python ≤ 3.11
346
- - **`gradcheck` with direct-KDE forward**: forward (n 25K) uses exact KDE; backward uses smooth IFT surrogate gradcheck will fail by design; use `DCBLayer(direct_n_max=0)` for gradcheck
348
+ - **`gradcheck`**: passes with the default `forward_path='smooth'`; the default is strictly differentiable at all n. Opt into `forward_path='auto'` only for forward-only accuracy benchmarks (surrogate gradient at n≤25K)
347
349
  - **n > 100M**: requires streaming histogram (not yet public API); use `max_n_exact=1_000_000` sketch as workaround
348
350
 
349
351
  ## Confirmed Experimental Results
@@ -73,15 +73,17 @@ Cumulative speedup vs v0.1.4 on CPU: 1.1× (100K), 1.7× (1M), **4.2× (10M)**.
73
73
 
74
74
  ```python
75
75
  DCBLayer(
76
- target_modes=1, # target number of modes (default 1)
77
- use_fft=True, # FFT path for n > 50K (default True)
78
- max_n_exact=None, # sketch above this n (None = always exact)
79
- G_min=16384, # minimum FFT histogram bins (accuracy ↑ with G)
80
- use_richardson=True, # Richardson extrapolation on h_crit (30% accuracy gain)
81
- direct_n_max=25_000, # use direct-KDE (no histogram) for n ≤ this
82
- direct_M=2048, # direct-KDE evaluation grid size
83
- use_compile=False, # infrastructure flag; use TrainingLayer for compile
84
- safe_backward=False, # clamp IFT denominator near bifurcations
76
+ target_modes=1, # target number of modes (default 1)
77
+ use_fft=True, # FFT path for n > 50K (default True)
78
+ max_n_exact=None, # sketch above this n (None = always exact)
79
+ G_min=16384, # minimum FFT histogram bins (accuracy ↑ with G)
80
+ use_richardson="auto", # Richardson on CPU, off on GPU (30% accuracy gain on CPU)
81
+ direct_n_max=25_000, # direct-KDE active only when forward_path='auto'/'direct'
82
+ direct_M=2048, # direct-KDE evaluation grid size
83
+ forward_path='smooth', # 'smooth' (default, strictly differentiable) |
84
+ # 'auto' (direct-KDE at n≤25K, surrogate gradient) |
85
+ # 'direct' (force direct-KDE, accuracy benchmarks)
86
+ safe_backward=False, # clamp IFT denominator near bifurcations
85
87
  )
86
88
  ```
87
89
 
@@ -121,7 +123,7 @@ By default (`use_richardson=True`), DCB runs a second bisection at G/2=8192 and
121
123
 
122
124
  - **`compile=True` on MPS**: blocked by float64 in `_refine_hcrit` fallback (fix in v0.1.7)
123
125
  - **`compile=True` on CUDA with Python 3.12**: requires torch ≥ 2.4 or Python ≤ 3.11
124
- - **`gradcheck` with direct-KDE forward**: forward (n 25K) uses exact KDE; backward uses smooth IFT surrogate gradcheck will fail by design; use `DCBLayer(direct_n_max=0)` for gradcheck
126
+ - **`gradcheck`**: passes with the default `forward_path='smooth'`; the default is strictly differentiable at all n. Opt into `forward_path='auto'` only for forward-only accuracy benchmarks (surrogate gradient at n≤25K)
125
127
  - **n > 100M**: requires streaming histogram (not yet public API); use `max_n_exact=1_000_000` sketch as workaround
126
128
 
127
129
  ## Confirmed Experimental Results
@@ -21,4 +21,4 @@ __all__ = [
21
21
  "TrainingLayer",
22
22
  "anneal_eps_tau", "soft_mode_count_cross", "soft_mode_count",
23
23
  ]
24
- __version__ = "0.1.7"
24
+ __version__ = "0.1.8"
@@ -14,16 +14,16 @@ the computational graph of the iterative solver and maintaining O(1) memory
14
14
  cost relative to the number of solver iterations. Hyperparameters ε and τ
15
15
  may be supplied explicitly or computed adaptively via `dcb.utils`.
16
16
 
17
- Forward/backward path mismatch (by design)
18
- -------------------------------------------
19
- At n ≤ direct_n_max AND forward_path='auto' (default), the FORWARD pass uses
20
- direct KDE (no histogram) for zero binning bias. The BACKWARD pass always
21
- uses the smooth IFT on (soft mode count) at all n. These are different
22
- implicit functions, so ``torch.autograd.gradcheck`` will fail for
23
- n direct_n_max with the default settings. Use ``forward_path='smooth'``
24
- to force the smooth path in the forward pass gradcheck will then pass.
25
- For ML training the mismatch is correct by design: the smooth gradient is
26
- the appropriate object for gradient descent.
17
+ Strict differentiability
18
+ ------------------------
19
+ ``forward_path='smooth'`` (the default) forces both the forward and backward
20
+ to use the same smooth surrogate at all n — ``torch.autograd.gradcheck``
21
+ passes and ∂h_crit/∂X is the exact IFT gradient of the computed h_crit.
22
+
23
+ ``forward_path='auto'`` opts into the direct-KDE forward at n ≤ direct_n_max
24
+ for zero-bias accuracy, but forward and backward then use different implicit
25
+ functions gradcheck will fail and the gradient is a surrogate. Only use
26
+ 'auto' for forward-only inference benchmarks.
27
27
  """
28
28
 
29
29
  from __future__ import annotations
@@ -189,26 +189,17 @@ class DCBLayer(nn.Module):
189
189
  forward_path : str
190
190
  Controls forward-pass routing. One of:
191
191
 
192
- - ``'auto'`` (default): use direct-KDE for n ≤ direct_n_max, FFT
193
- histogram for n > 50K, legacy chunked-KDE for 50K >= n > direct_n_max.
194
- This is the current behaviour.
195
- - ``'smooth'``: always use the FFT histogram or chunked-KDE path
196
- (never direct-KDE). Internally sets direct_n_max=0.
197
- **Use this when you need ``torch.autograd.gradcheck`` to pass**,
198
- because it forces forward and backward to use the same smooth M̃.
199
- - ``'direct'``: force direct-KDE even for large n. Slow for large n;
200
- intended for accuracy benchmarks. Internally sets direct_n_max=inf.
201
-
202
- Notes
203
- -----
204
- Forward/backward path at n <= direct_n_max:
205
- The forward pass uses direct KDE (no histogram) for accuracy.
206
- The backward pass uses the smooth IFT on M̃ (soft mode count) at all n.
207
- These are different implicit functions, so ``torch.autograd.gradcheck``
208
- will fail for n <= direct_n_max. Use ``forward_path='smooth'`` to
209
- force the smooth path at all n — gradcheck will then pass.
210
- For ML training this mismatch is correct by design: the smooth
211
- gradient is the appropriate object for gradient descent.
192
+ - ``'smooth'`` **(default)**: always use the FFT histogram or
193
+ chunked-KDE path. Both forward and backward use the same smooth
194
+ surrogate ``torch.autograd.gradcheck`` passes and gradients
195
+ are the exact IFT derivatives of the computed h_crit.
196
+ - ``'auto'``: use direct-KDE for n ≤ direct_n_max (zero histogram
197
+ bias), FFT for n > 50K, chunked-KDE otherwise. Forward and
198
+ backward use different implicit functions at n direct_n_max,
199
+ so gradcheck will fail and the gradient is a surrogate.
200
+ Use only for forward-only inference / accuracy benchmarks.
201
+ - ``'direct'``: force direct-KDE at all n (accuracy benchmark only;
202
+ slow for large n; surrogate gradient at all n).
212
203
 
213
204
  Examples
214
205
  --------
@@ -244,7 +235,7 @@ class DCBLayer(nn.Module):
244
235
  use_compile: bool = False,
245
236
  direct_n_max: int = 25_000,
246
237
  direct_M: int = 2048,
247
- forward_path: str = 'auto',
238
+ forward_path: str = 'smooth',
248
239
  ):
249
240
  super().__init__()
250
241
  if forward_path not in ('auto', 'smooth', 'direct'):
@@ -4,7 +4,7 @@ build-backend = "hatchling.build"
4
4
 
5
5
  [project]
6
6
  name = "diffcb"
7
- version = "0.1.7"
7
+ version = "0.1.8"
8
8
  description = "Differentiable Critical Bandwidth: Silverman's modality test as a differentiable PyTorch layer with IFT backward pass."
9
9
  readme = "README.md"
10
10
  license = { file = "LICENSE" }
@@ -17,37 +17,29 @@ from dcb.layer import DCBLayer
17
17
  # ---------------------------------------------------------------------------
18
18
 
19
19
  def test_gradcheck_smooth_path():
20
- """gradcheck passes when forward_path='smooth' forces smooth M̃ at all n.
21
-
22
- With forward_path='smooth', direct_n_max is set to 0 internally, so the
23
- forward pass uses the same chunked-KDE path as the IFT backward (both use
24
- the smooth without direct-KDE). The forward and backward are consistent
25
- implicit functions, so gradcheck passes.
26
-
27
- Tolerances are intentionally loose (atol=0.05, rtol=0.2) because:
28
- 1. Hard bisection finds h_crit by discrete mode-count steps, introducing
29
- quantisation noise of ~O(bisection_tol) in h_crit.
30
- 2. The IFT backward uses smooth M̃ evaluated at this h_crit, which is
31
- not perfectly aligned with the bisection steps.
32
- 3. These two effects cause O(1%) discrepancies between the analytical
33
- and numerical Jacobians — well within the intended ML-training use case,
34
- but requiring loose gradcheck tolerances to pass reliably.
20
+ """gradcheck passes with the default DCBLayer (forward_path='smooth').
21
+
22
+ forward_path='smooth' (the default since v0.1.7) forces both the forward
23
+ pass and the IFT backward to use the same smooth surrogate — they are
24
+ the same implicit function, so gradcheck is internally consistent.
25
+
26
+ Tolerances are loose (atol=0.05, rtol=0.2) because the hard bisection
27
+ root-finder introduces O(1%) quantisation noise between the analytical
28
+ IFT gradient and the finite-difference Jacobian. This is the expected
29
+ residual for the discrete-step bisection and does not affect ML training.
35
30
  """
36
31
  torch.manual_seed(42)
37
32
  X = torch.cat([torch.randn(30) - 1.0, torch.randn(30) + 1.0]).double()
38
33
  X.requires_grad_(True)
34
+ # Default forward_path='smooth' — strictly differentiable at all n
39
35
  layer = DCBLayer(
40
- use_fft=False, # use chunked-KDE (smooth) at small n
41
- forward_path='smooth', # disable direct-KDE routing; sets direct_n_max=0
42
- use_richardson=False, # Richardson adds a second backward; skip for gradcheck
36
+ use_fft=False,
37
+ use_richardson=False, # skip Richardson for gradcheck clarity
43
38
  )
44
39
 
45
- # eps=1e-3 for finite differences; atol/rtol are loose because the smooth
46
- # IFT gradient and FD Jacobian can differ by ~1–5% due to bisection
47
- # quantisation — verified reliable across 20 seeds.
48
40
  result = torch.autograd.gradcheck(layer, (X,), eps=1e-3, atol=0.05, rtol=0.2,
49
41
  raise_exception=True)
50
- assert result, "gradcheck failed with forward_path='smooth'"
42
+ assert result, "gradcheck failed with default DCBLayer (forward_path='smooth')"
51
43
 
52
44
 
53
45
  # ---------------------------------------------------------------------------
@@ -121,24 +121,27 @@ def test_dcblayer_state_dict():
121
121
 
122
122
  @pytest.mark.xfail(
123
123
  reason=(
124
- "IFT gradient is an approximation (soft M̃_cross at h_crit found by hard bisection). "
125
- "gradcheck at atol=1e-3 is too strict for the soft/hard mismatch at small n. "
126
- "Qualitative correctness verified in test_ift_gradient_matches_finite_diff."
124
+ "Hard bisection introduces quantisation noise even with forward_path='smooth'. "
125
+ "atol=1e-3 is too strict for the bisection step-function discretisation. "
126
+ "Qualitative correctness verified in test_ift_gradient_matches_finite_diff; "
127
+ "loose-tolerance gradcheck passes in tests/test_gradcheck.py."
127
128
  ),
128
129
  strict=False,
129
130
  )
130
131
  def test_dcblayer_gradcheck():
131
132
  """torch.autograd.gradcheck with double precision, eps=1e-4, atol=1e-3.
132
133
 
133
- Uses bimodal X with n=30 (small for speed). This is the strictest criterion.
134
- gradcheck verifies that the custom IFT backward matches the numerical Jacobian.
134
+ Uses bimodal X with n=30 (small for speed). Default forward_path='smooth'
135
+ means forward and backward use the same surrogate the remaining xfail
136
+ is due to bisection quantisation noise (step-function root-finding), not a
137
+ forward/backward path mismatch. Loose-tolerance gradcheck passes separately.
135
138
  """
136
139
  torch.manual_seed(42)
137
140
  n = 30
138
141
  X_base = torch.cat([torch.randn(15) - 1.0, torch.randn(15) + 1.0])
139
142
  X = X_base.double().requires_grad_(True)
140
143
 
141
- layer = DCBLayer(target_modes=1, G=64)
144
+ layer = DCBLayer(target_modes=1, G=64) # default forward_path='smooth'
142
145
 
143
146
  def fn(x):
144
147
  return layer(x)
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes