PyPI - diffcb - Versions diffs - 0.1.8__tar.gz → 0.1.9__tar.gz - Mend

diffcb 0.1.8tar.gz → 0.1.9tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (28) hide show

{diffcb-0.1.8 → diffcb-0.1.9}/PKG-INFO RENAMED Viewed

@@ -1,6 +1,6 @@
 Metadata-Version: 2.4
 Name: diffcb
-Version: 0.1.8
+Version: 0.1.9
 Summary: Differentiable Critical Bandwidth: Silverman's modality test as a differentiable PyTorch layer with IFT backward pass.
 Project-URL: Homepage, https://github.com/ryZhangHason/differentiable-critical-bandwidth
 Project-URL: Repository, https://github.com/ryZhangHason/differentiable-critical-bandwidth

{diffcb-0.1.8 → diffcb-0.1.9}/dcb/__init__.py RENAMED Viewed

@@ -21,4 +21,4 @@ __all__ = [
     "TrainingLayer",
     "anneal_eps_tau", "soft_mode_count_cross", "soft_mode_count",
 ]
-__version__ = "0.1.8"
+__version__ = "0.1.9"

{diffcb-0.1.8 → diffcb-0.1.9}/dcb/fft_kde.py RENAMED Viewed

@@ -137,12 +137,15 @@ def mode_count_from_C(
     if C.numel() == 0:
         return 1  # degenerate single-point distribution
+    # Caller must pass C computed with fft_dtype=torch.float64 (complex128).
+    # Float32 histogram errors (~1e-4 relative) create spurious sign changes
+    # near small h_crit that cannot be filtered without killing genuine lobes.
+    # A relative threshold of 1e-12 removes machine-epsilon edge-bin noise
+    # (empty histogram bins at domain boundaries contribute ~eps_f64 to f′).
     K_deriv = 1j * omega * torch.exp(-0.5 * (omega * h) ** 2)
     f_prime = torch.fft.irfft(C * K_deriv, n=N).real[:G]
-    # Exact zeros are measure-zero for smooth KDE on non-degenerate data; strict
-    # inequalities match the masked path in all practical cases while avoiding
-    # one host sync (.any()) and two allocations (nonzero_mask, s) per call.
-    return int(((f_prime[:-1] > 0) & (f_prime[1:] < 0)).sum().item())
+    thresh = f_prime.abs().max() * 1e-12
+    return int(((f_prime[:-1] > thresh) & (f_prime[1:] < -thresh)).sum().item())
 def mode_count_from_C_batch(
@@ -186,12 +189,15 @@ def mode_count_from_C_batch(
     else:
         h_t = h_batch.to(dtype=omega.dtype, device=omega.device)              # (B,)
+    # Caller must pass C in float64 (complex128) — same rationale as mode_count_from_C.
     # Build (B, M) kernel matrix in one vectorised op
     omega_h = omega.unsqueeze(0) * h_t.unsqueeze(1)                       # (B, M)
     K_batch = 1j * omega.unsqueeze(0) * torch.exp(-0.5 * omega_h ** 2)    # (B, M)
     # One batched irfft dispatch instead of B separate calls
-    f_prime_batch = torch.fft.irfft(C.unsqueeze(0) * K_batch, n=N)[:, :G]  # (B, G)
-    return ((f_prime_batch[:, :-1] > 0) & (f_prime_batch[:, 1:] < 0)).sum(dim=1)
+    f_prime_batch = torch.fft.irfft(C.unsqueeze(0) * K_batch, n=N)[:, :G] # (B, G)
+    # Per-bandwidth threshold: remove edge-bin machine-epsilon noise.
+    thresholds = f_prime_batch.abs().amax(dim=1, keepdim=True) * 1e-12    # (B, 1)
+    return ((f_prime_batch[:, :-1] > thresholds) & (f_prime_batch[:, 1:] < -thresholds)).sum(dim=1)
 def fft_mode_count(

{diffcb-0.1.8 → diffcb-0.1.9}/dcb/solver.py RENAMED Viewed

@@ -238,28 +238,36 @@ def find_h_crit_hard(
         with torch.no_grad():
             # Worker 1: precomputed C — hoist histogram + rfft out of bisection.
-            # Worker 3: float32 FFT by default — 2× faster; _refine_hcrit uses float64 independently.
+            # Worker 3: float32 FFT for _refine_hcrit (2× faster; refinement is
+            # accuracy-insensitive to histogram dtype).  Mode counting requires
+            # float64 histogram precision: float32 FFT errors (~1e-4 relative)
+            # create spurious sign changes near small h_crit that a relative
+            # threshold cannot remove without also killing genuine small lobes.
             C, omega, _domain = precompute_fft(
                 X, G=G_fft, domain=_domain, pad_factor=pad_factor, fft_dtype=fft_dtype,
             )
+            C_mc, omega_mc, _ = precompute_fft(
+                X, G=G_fft, domain=_domain, pad_factor=pad_factor,
+                fft_dtype=torch.float64,
+            )
             # Verify bracket using FFT mode count on full X
-            count_lo = mode_count_from_C(C, omega, h_lo, G_fft, N)
+            count_lo = mode_count_from_C(C_mc, omega_mc, h_lo, G_fft, N)
             if count_lo <= target_modes:
                 h_lo_try = h_lo
                 for _ in range(30):
                     h_lo_try *= 0.5
                     if h_lo_try < 1e-10:
                         break
-                    if mode_count_from_C(C, omega, h_lo_try, G_fft, N) > target_modes:
+                    if mode_count_from_C(C_mc, omega_mc, h_lo_try, G_fft, N) > target_modes:
                         h_lo = h_lo_try
                         break
-            count_hi = mode_count_from_C(C, omega, h_hi, G_fft, N)
+            count_hi = mode_count_from_C(C_mc, omega_mc, h_hi, G_fft, N)
             if count_hi > target_modes:
                 for _ in range(30):
                     h_hi *= 2.0
-                    if mode_count_from_C(C, omega, h_hi, G_fft, N) <= target_modes:
+                    if mode_count_from_C(C_mc, omega_mc, h_hi, G_fft, N) <= target_modes:
                         break
             # Compile-friendly trisection: lo/hi are 0-d tensors, no .item()
@@ -267,8 +275,8 @@ def find_h_crit_hard(
             # than enough for any bracket).  torch.where replaces the Python
             # if/elif/else so the loop body is a pure tensor computation that
             # torch.compile(mode="reduce-overhead") can trace and replay.
-            _dtype = omega.dtype
-            _dev   = C.device
+            _dtype = omega_mc.dtype
+            _dev   = C_mc.device
             lo_t = torch.tensor(h_lo, dtype=_dtype, device=_dev)
             hi_t = torch.tensor(h_hi, dtype=_dtype, device=_dev)
             _target = torch.tensor(target_modes, dtype=torch.long, device=_dev)
@@ -277,7 +285,7 @@ def find_h_crit_hard(
                 h1 = lo_t + width * (1.0 / 3.0)
                 h2 = lo_t + width * (2.0 / 3.0)
                 counts = mode_count_from_C_batch(
-                    C, omega, torch.stack([h1, h2]), G_fft, N
+                    C_mc, omega_mc, torch.stack([h1, h2]), G_fft, N
                 )
                 c1 = counts[0]
                 c2 = counts[1]

{diffcb-0.1.8 → diffcb-0.1.9}/pyproject.toml RENAMED Viewed

@@ -4,7 +4,7 @@ build-backend = "hatchling.build"
 [project]
 name = "diffcb"
-version = "0.1.8"
+version = "0.1.9"
 description = "Differentiable Critical Bandwidth: Silverman's modality test as a differentiable PyTorch layer with IFT backward pass."
 readme = "README.md"
 license = { file = "LICENSE" }

diffcb-0.1.9/v018_local_bench.py ADDED Viewed

@@ -0,0 +1,394 @@
+"""
+v018_local_bench.py
+Comprehensive benchmark for diffcb v0.1.8 (forward_path='smooth' default).
+Produces 3 CSV files and prints summary tables.
+"""
+import os
+import time
+import math
+import subprocess
+import tempfile
+import statistics
+import csv
+import torch
+from dcb import DCBLayer
+RESULTS_DIR = "/Users/h/Downloads/DCB-workspace/02_projects/01_dcb_proposal/04_analysis/results"
+os.makedirs(RESULTS_DIR, exist_ok=True)
+SPEED_NS = [1_000, 2_000, 5_000, 10_000, 25_000, 50_000, 100_000,
+            500_000, 1_000_000, 5_000_000, 10_000_000]
+ACCURACY_NS = [1_000, 5_000, 10_000, 25_000, 50_000, 100_000, 500_000, 1_000_000]
+DEVICES = ['cpu']
+if torch.backends.mps.is_available():
+    DEVICES.append('mps')
+    print("MPS is available — will benchmark both CPU and MPS.")
+else:
+    print("MPS not available — benchmarking CPU only.")
+R_BINARY = '/usr/local/bin/Rscript'
+# ─────────────────────────────────────────────────────────
+# Calibration: T_fft_one_ms per device
+# ─────────────────────────────────────────────────────────
+def calibrate_fft(device: str) -> float:
+    """Return mean time in ms for one rfft(ones(16384)) call."""
+    x = torch.ones(16384, dtype=torch.float32, device=device)
+    WARMUP = 500
+    TIMED = 5000
+    # Warm-up
+    for _ in range(WARMUP):
+        torch.fft.rfft(x)
+    if device == 'mps':
+        torch.mps.synchronize()
+        t0 = time.perf_counter()
+        for _ in range(TIMED):
+            torch.fft.rfft(x)
+        torch.mps.synchronize()
+        t1 = time.perf_counter()
+    else:
+        t0 = time.perf_counter()
+        for _ in range(TIMED):
+            torch.fft.rfft(x)
+        t1 = time.perf_counter()
+    total_ms = (t1 - t0) * 1000.0
+    return total_ms / TIMED
+print("\n=== Calibrating FFT units ===")
+T_fft = {}
+for dev in DEVICES:
+    T_fft[dev] = calibrate_fft(dev)
+    print(f"  {dev}: T_fft_one_ms = {T_fft[dev]:.6f} ms")
+# ─────────────────────────────────────────────────────────
+# Part 1 — Speed benchmark
+# ─────────────────────────────────────────────────────────
+print("\n=== Part 1: Speed Benchmark ===")
+speed_rows = []
+for device in DEVICES:
+    # Build a shared DCBLayer for this device (reuse across n — it's stateless)
+    layer = DCBLayer()
+    if device == 'mps':
+        layer = layer.to(device)
+    T_fft_one_ms = T_fft[device]
+    for n in SPEED_NS:
+        if n >= 5_000_000 and device == 'mps':
+            print(f"  [mps, n={n:,}] — skipping (too large for MPS RAM guard)")
+            speed_rows.append({
+                'device': device, 'n': n,
+                't_median_ms': float('nan'), 't_mean_ms': float('nan'),
+                't_std_ms': float('nan'), 'throughput_ns': float('nan'),
+                'fft_norm_cost': float('nan'), 'T_fft_one_ms': T_fft_one_ms,
+            })
+            continue
+        try:
+            X = torch.randn(n, device=device)
+            # 3 warm-up calls
+            for _ in range(3):
+                _ = layer(X)
+            if device == 'mps':
+                torch.mps.synchronize()
+            # 15 timed calls
+            times_ms = []
+            REPS = 15
+            for _ in range(REPS):
+                if device == 'mps':
+                    torch.mps.synchronize()
+                    t0 = time.perf_counter()
+                    layer(X)
+                    torch.mps.synchronize()
+                    t1 = time.perf_counter()
+                else:
+                    t0 = time.perf_counter()
+                    layer(X)
+                    t1 = time.perf_counter()
+                times_ms.append((t1 - t0) * 1000.0)
+            t_median_ms = statistics.median(times_ms)
+            t_mean_ms = statistics.mean(times_ms)
+            t_std_ms = statistics.stdev(times_ms) if len(times_ms) > 1 else 0.0
+            throughput_ns = n / (t_median_ms / 1000.0)
+            fft_norm_cost = t_median_ms / T_fft_one_ms
+            speed_rows.append({
+                'device': device, 'n': n,
+                't_median_ms': t_median_ms, 't_mean_ms': t_mean_ms,
+                't_std_ms': t_std_ms, 'throughput_ns': throughput_ns,
+                'fft_norm_cost': fft_norm_cost, 'T_fft_one_ms': T_fft_one_ms,
+            })
+            print(f"  [{device}, n={n:>10,}] median={t_median_ms:.3f}ms  tput={throughput_ns/1e6:.2f}M/s  fft_cost={fft_norm_cost:.1f}")
+        except RuntimeError as e:
+            print(f"  [{device}, n={n:,}] RuntimeError (OOM?): {e}")
+            speed_rows.append({
+                'device': device, 'n': n,
+                't_median_ms': float('nan'), 't_mean_ms': float('nan'),
+                't_std_ms': float('nan'), 'throughput_ns': float('nan'),
+                'fft_norm_cost': float('nan'), 'T_fft_one_ms': T_fft_one_ms,
+            })
+# Write speed CSV
+speed_csv = os.path.join(RESULTS_DIR, 'v018_local_speed.csv')
+speed_fields = ['device', 'n', 't_median_ms', 't_mean_ms', 't_std_ms',
+                'throughput_ns', 'fft_norm_cost', 'T_fft_one_ms']
+with open(speed_csv, 'w', newline='') as f:
+    w = csv.DictWriter(f, fieldnames=speed_fields)
+    w.writeheader()
+    w.writerows(speed_rows)
+print(f"\nSpeed CSV saved: {speed_csv}")
+# ─────────────────────────────────────────────────────────
+# Helper: call R bw.crit on a tensor
+# ─────────────────────────────────────────────────────────
+def r_bwcrit(X_tensor: torch.Tensor) -> float:
+    """Write tensor to temp CSV and call R bw.crit. Returns NaN on failure."""
+    try:
+        with tempfile.NamedTemporaryFile(suffix='.csv', mode='w', delete=False) as f:
+            f.write('x\n')
+            for v in X_tensor.tolist():
+                f.write(f'{v:.10f}\n')
+            fname = f.name
+        result = subprocess.run(
+            [R_BINARY, '--vanilla', '-e',
+             f'library(multimode); x<-read.csv("{fname}")$x; cat(bw.crit(x,mod0=1L))'],
+            capture_output=True, text=True, timeout=300
+        )
+        os.unlink(fname)
+        if result.returncode != 0 or not result.stdout.strip():
+            print(f"  WARNING: R call failed. stderr: {result.stderr.strip()[:200]}")
+            return float('nan')
+        return float(result.stdout.strip())
+    except Exception as e:
+        print(f"  WARNING: R call exception: {e}")
+        try:
+            os.unlink(fname)
+        except Exception:
+            pass
+        return float('nan')
+# ─────────────────────────────────────────────────────────
+# Part 2 — Accuracy: Independent-sample
+# ─────────────────────────────────────────────────────────
+print("\n=== Part 2: Accuracy — Independent-sample ===")
+indep_rows = []
+for n in ACCURACY_NS:
+    for seed in range(20):
+        torch.manual_seed(seed)
+        X = torch.randn(n)
+        # DCB
+        try:
+            layer_cpu = DCBLayer()
+            h_dcb = layer_cpu(X).item()
+        except Exception as e:
+            print(f"  WARNING: DCB failed n={n}, seed={seed}: {e}")
+            h_dcb = float('nan')
+        # R (skip if n > 1_000_000)
+        if n > 1_000_000:
+            h_r = float('nan')
+            err_pct = float('nan')
+        else:
+            h_r = r_bwcrit(X)
+            if math.isnan(h_r) or math.isnan(h_dcb):
+                err_pct = float('nan')
+            else:
+                err_pct = abs(h_dcb - h_r) / h_r * 100.0
+        indep_rows.append({'n': n, 'seed': seed, 'h_dcb': h_dcb,
+                           'h_r': h_r, 'err_pct': err_pct})
+    valid = [r for r in indep_rows if r['n'] == n and not math.isnan(r.get('err_pct', float('nan')))]
+    if valid:
+        mean_e = statistics.mean(r['err_pct'] for r in valid)
+        print(f"  n={n:>9,}: mean_err={mean_e:.4f}% over {len(valid)} seeds with R comparison")
+    else:
+        dcb_vals = [r['h_dcb'] for r in indep_rows if r['n'] == n]
+        print(f"  n={n:>9,}: no R comparison (skipped); h_dcb range [{min(dcb_vals):.4f}, {max(dcb_vals):.4f}]")
+indep_csv = os.path.join(RESULTS_DIR, 'v018_local_accuracy_indep.csv')
+with open(indep_csv, 'w', newline='') as f:
+    w = csv.DictWriter(f, fieldnames=['n', 'seed', 'h_dcb', 'h_r', 'err_pct'])
+    w.writeheader()
+    w.writerows(indep_rows)
+print(f"\nIndep accuracy CSV saved: {indep_csv}")
+# ─────────────────────────────────────────────────────────
+# Part 3 — Accuracy: Same-sample
+# ─────────────────────────────────────────────────────────
+print("\n=== Part 3: Accuracy — Same-sample ===")
+same_rows = []
+SAME_NS = [1_000, 5_000, 10_000, 25_000, 50_000, 100_000, 500_000, 1_000_000]
+for n in SAME_NS:
+    for seed in range(10):
+        torch.manual_seed(seed)
+        X = torch.randn(n)
+        # Write to temp CSV once — both DCB and R use same data
+        try:
+            with tempfile.NamedTemporaryFile(suffix='.csv', mode='w', delete=False) as f:
+                f.write('x\n')
+                for v in X.tolist():
+                    f.write(f'{v:.10f}\n')
+                fname = f.name
+            # DCB
+            try:
+                layer_cpu = DCBLayer()
+                h_dcb = layer_cpu(X).item()
+            except Exception as e:
+                print(f"  WARNING: DCB failed n={n}, seed={seed}: {e}")
+                h_dcb = float('nan')
+            # R (using same file)
+            try:
+                result = subprocess.run(
+                    [R_BINARY, '--vanilla', '-e',
+                     f'library(multimode); x<-read.csv("{fname}")$x; cat(bw.crit(x,mod0=1L))'],
+                    capture_output=True, text=True, timeout=300
+                )
+                if result.returncode != 0 or not result.stdout.strip():
+                    print(f"  WARNING: R call failed n={n}, seed={seed}. stderr: {result.stderr.strip()[:200]}")
+                    h_r = float('nan')
+                else:
+                    h_r = float(result.stdout.strip())
+            except Exception as e:
+                print(f"  WARNING: R call exception n={n}, seed={seed}: {e}")
+                h_r = float('nan')
+            os.unlink(fname)
+        except Exception as e:
+            print(f"  WARNING: Temp file error n={n}, seed={seed}: {e}")
+            h_dcb = float('nan')
+            h_r = float('nan')
+        if math.isnan(h_r) or math.isnan(h_dcb):
+            err_pct = float('nan')
+        else:
+            err_pct = abs(h_dcb - h_r) / h_r * 100.0
+        same_rows.append({'n': n, 'seed': seed, 'h_dcb': h_dcb,
+                          'h_r': h_r, 'err_pct': err_pct})
+    valid = [r for r in same_rows if r['n'] == n and not math.isnan(r.get('err_pct', float('nan')))]
+    if valid:
+        mean_e = statistics.mean(r['err_pct'] for r in valid)
+        print(f"  n={n:>9,}: mean_err={mean_e:.4f}% over {len(valid)} seeds")
+    else:
+        print(f"  n={n:>9,}: no valid comparisons")
+same_csv = os.path.join(RESULTS_DIR, 'v018_local_accuracy_same.csv')
+with open(same_csv, 'w', newline='') as f:
+    w = csv.DictWriter(f, fieldnames=['n', 'seed', 'h_dcb', 'h_r', 'err_pct'])
+    w.writeheader()
+    w.writerows(same_rows)
+print(f"\nSame-sample accuracy CSV saved: {same_csv}")
+# ─────────────────────────────────────────────────────────
+# Summary Tables
+# ─────────────────────────────────────────────────────────
+def fmt_float(x, fmt='.3f'):
+    if math.isnan(x):
+        return 'N/A'
+    return format(x, fmt)
+print("\n" + "="*110)
+print("SUMMARY")
+print("="*110)
+# Calibration
+print("\n--- Calibration Units ---")
+print(f"{'Device':<8} | {'T_fft_one_ms':>14}")
+print("-" * 28)
+for dev in DEVICES:
+    print(f"{dev:<8} | {T_fft[dev]:>14.6f}")
+# Speed table
+print("\n--- Speed Benchmark ---")
+col_w = 14
+hdr = (f"{'n':>12} | {'CPU t_med(ms)':>{col_w}} | {'CPU fft_cost':>{col_w}} | "
+       f"{'CPU n/s':>{col_w}}")
+if 'mps' in DEVICES:
+    hdr += (f" | {'MPS t_med(ms)':>{col_w}} | {'MPS fft_cost':>{col_w}} | "
+            f"{'MPS n/s':>{col_w}}")
+print(hdr)
+print("-" * len(hdr))
+speed_by_n = {}
+for r in speed_rows:
+    key = (r['n'], r['device'])
+    speed_by_n[key] = r
+for n in SPEED_NS:
+    cpu_r = speed_by_n.get((n, 'cpu'), {})
+    cpu_med = fmt_float(cpu_r.get('t_median_ms', float('nan')))
+    cpu_fft = fmt_float(cpu_r.get('fft_norm_cost', float('nan')), '.1f')
+    cpu_tput_raw = cpu_r.get('throughput_ns', float('nan'))
+    cpu_tput = 'N/A' if math.isnan(cpu_tput_raw) else f"{cpu_tput_raw/1e6:.2f}M/s"
+    line = f"{n:>12,} | {cpu_med:>{col_w}} | {cpu_fft:>{col_w}} | {cpu_tput:>{col_w}}"
+    if 'mps' in DEVICES:
+        mps_r = speed_by_n.get((n, 'mps'), {})
+        mps_med = fmt_float(mps_r.get('t_median_ms', float('nan')))
+        mps_fft = fmt_float(mps_r.get('fft_norm_cost', float('nan')), '.1f')
+        mps_tput_raw = mps_r.get('throughput_ns', float('nan'))
+        mps_tput = 'N/A' if math.isnan(mps_tput_raw) else f"{mps_tput_raw/1e6:.2f}M/s"
+        line += f" | {mps_med:>{col_w}} | {mps_fft:>{col_w}} | {mps_tput:>{col_w}}"
+    print(line)
+# Accuracy table
+print("\n--- Accuracy (mean % error vs R bw.crit) ---")
+print(f"{'n':>12} | {'Indep mean_err%':>16} | {'Indep std_err%':>15} | {'Same mean_err%':>15} | {'Same std_err%':>14}")
+print("-" * 90)
+def acc_stats(rows, n_val):
+    valid = [r['err_pct'] for r in rows if r['n'] == n_val and not math.isnan(r.get('err_pct', float('nan')))]
+    if not valid:
+        return float('nan'), float('nan')
+    mean_e = statistics.mean(valid)
+    std_e = statistics.stdev(valid) if len(valid) > 1 else 0.0
+    return mean_e, std_e
+for n in ACCURACY_NS:
+    indep_mean, indep_std = acc_stats(indep_rows, n)
+    same_mean, same_std = acc_stats(same_rows, n)
+    print(f"{n:>12,} | {fmt_float(indep_mean, '.4f'):>16} | {fmt_float(indep_std, '.4f'):>15} | "
+          f"{fmt_float(same_mean, '.4f'):>15} | {fmt_float(same_std, '.4f'):>14}")
+print("\n" + "="*110)
+print("Benchmark complete.")
+print(f"CSVs saved to: {RESULTS_DIR}")
+print(f"  Speed:          v018_local_speed.csv")
+print(f"  Accuracy indep: v018_local_accuracy_indep.csv")
+print(f"  Accuracy same:  v018_local_accuracy_same.csv")