PyPI - dpdfnet - Versions diffs - 0.2.0__py3-none-any.whl - Mend

dpdfnet 0.2.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (13) hide show

dpdfnet/__init__.py +19 -0
dpdfnet/__main__.py +5 -0
dpdfnet/api.py +151 -0
dpdfnet/audio.py +103 -0
dpdfnet/cli.py +268 -0
dpdfnet/models.py +515 -0
dpdfnet/onnx_backend.py +111 -0
dpdfnet-0.2.0.dist-info/METADATA +144 -0
dpdfnet-0.2.0.dist-info/RECORD +13 -0
dpdfnet-0.2.0.dist-info/WHEEL +5 -0
dpdfnet-0.2.0.dist-info/entry_points.txt +2 -0
dpdfnet-0.2.0.dist-info/licenses/LICENSE +201 -0
dpdfnet-0.2.0.dist-info/top_level.txt +1 -0

dpdfnet/__init__.py ADDED Viewed

@@ -0,0 +1,19 @@
+from typing import TYPE_CHECKING
+__all__ = [
+    "enhance",
+    "enhance_file",
+    "available_models",
+    "download",
+]
+if TYPE_CHECKING:
+    from .api import available_models, download, enhance, enhance_file
+def __getattr__(name: str):
+    if name in {"enhance", "enhance_file", "available_models", "download"}:
+        from . import api
+        return getattr(api, name)
+    raise AttributeError(f"module 'dpdfnet' has no attribute '{name}'")

dpdfnet/__main__.py ADDED Viewed

@@ -0,0 +1,5 @@
+from .cli import main
+if __name__ == "__main__":
+    raise SystemExit(main())

dpdfnet/api.py ADDED Viewed

@@ -0,0 +1,151 @@
+from __future__ import annotations
+from pathlib import Path
+from typing import Any, Callable, Dict, List, Optional, Union
+import numpy as np
+from .models import (
+    DEFAULT_MODEL,
+    available_model_entries,
+    download_model,
+    download_models,
+    resolve_model,
+)
+def available_models(
+) -> List[Dict[str, Any]]:
+    return available_model_entries()
+def download(
+    model: Optional[str] = None,
+    *,
+    force: bool = False,
+    quiet: bool = False,
+    verbose: bool = False,
+) -> Union[Path, Dict[str, Path]]:
+    if quiet and verbose:
+        raise ValueError("quiet=True and verbose=True are mutually exclusive.")
+    notifier = (lambda _message: None) if quiet else None
+    if model is None:
+        resolved_all = download_models(
+            models=None,
+            force=force,
+            verbose=verbose,
+            notifier=notifier,
+        )
+        return {item.info.name: item.onnx_path.parent for item in resolved_all}
+    resolved = download_model(
+        model=model,
+        force=force,
+        verbose=verbose,
+        notifier=notifier,
+    )
+    return resolved.onnx_path.parent
+def enhance(
+    audio: np.ndarray,
+    sample_rate: int,
+    *,
+    model: str = DEFAULT_MODEL,
+    onnx_path: Optional[Union[str, Path]] = None,
+    state_path: Optional[Union[str, Path]] = None,
+    verbose: bool = False,
+    progress_callback: Optional[Callable[[int, int], None]] = None,
+) -> np.ndarray:
+    from .audio import (
+        ensure_sample_rate,
+        fit_length,
+        make_stft_config,
+        postprocess_spec,
+        preprocess_waveform,
+        to_mono,
+    )
+    from .onnx_backend import build_runtime_model, infer_win_len
+    waveform = to_mono(np.asarray(audio, dtype=np.float32))
+    sr_in = int(sample_rate)
+    resolved = resolve_model(
+        model=model,
+        onnx_path=onnx_path,
+        state_path=state_path,
+        auto_download=True,
+        verbose=verbose,
+    )
+    runtime = build_runtime_model(resolved.onnx_path, resolved.state_path)
+    waveform_model_sr = ensure_sample_rate(waveform, sr_in, resolved.info.sample_rate)
+    win_len = infer_win_len(runtime.session, resolved.info.sample_rate)
+    cfg = make_stft_config(win_len)
+    # Keep alignment behavior from the original scripts.
+    waveform_padded = np.pad(waveform_model_sr, (0, cfg.win_len), mode="constant")
+    spec_r = preprocess_waveform(waveform_padded, cfg)
+    state = runtime.init_state.copy()
+    frames: list[np.ndarray] = []
+    total_frames = int(spec_r.shape[1])
+    if progress_callback is not None:
+        progress_callback(0, total_frames)
+    for t in range(total_frames):
+        spec_t = np.ascontiguousarray(spec_r[:, t : t + 1, :, :], dtype=np.float32)
+        spec_e_t, state = runtime.session.run(
+            [runtime.out_spec_name, runtime.out_state_name],
+            {runtime.in_spec_name: spec_t, runtime.in_state_name: state},
+        )
+        frames.append(np.ascontiguousarray(spec_e_t, dtype=np.float32))
+        if progress_callback is not None:
+            progress_callback(t + 1, total_frames)
+    if not frames:
+        return waveform.copy()
+    spec_e = np.concatenate(frames, axis=1)
+    enhanced_model_sr = postprocess_spec(spec_e, cfg)
+    enhanced = ensure_sample_rate(enhanced_model_sr, resolved.info.sample_rate, sr_in)
+    return fit_length(enhanced, waveform.shape[0]).astype(np.float32, copy=False)
+def enhance_file(
+    input_path: Union[str, Path],
+    output_path: Optional[Union[str, Path]] = None,
+    *,
+    model: str = DEFAULT_MODEL,
+    onnx_path: Optional[Union[str, Path]] = None,
+    state_path: Optional[Union[str, Path]] = None,
+    verbose: bool = False,
+    progress_callback: Optional[Callable[[int, int], None]] = None,
+) -> Path:
+    import soundfile as sf
+    from .audio import pcm16_safe
+    in_path = Path(input_path).expanduser().resolve()
+    if not in_path.is_file():
+        raise FileNotFoundError(f"Input file not found: {in_path}")
+    audio, sr = sf.read(str(in_path), always_2d=False)
+    enhanced = enhance(
+        audio=audio,
+        sample_rate=int(sr),
+        model=model,
+        onnx_path=onnx_path,
+        state_path=state_path,
+        verbose=verbose,
+        progress_callback=progress_callback,
+    )
+    if output_path is None:
+        out_path = in_path.with_name(f"{in_path.stem}_enhanced.wav")
+    else:
+        out_path = Path(output_path).expanduser().resolve()
+    out_path.parent.mkdir(parents=True, exist_ok=True)
+    sf.write(str(out_path), pcm16_safe(enhanced), int(sr), subtype="PCM_16")
+    return out_path

dpdfnet/audio.py ADDED Viewed

@@ -0,0 +1,103 @@
+from __future__ import annotations
+from dataclasses import dataclass
+import librosa
+import numpy as np
+def to_mono(audio: np.ndarray) -> np.ndarray:
+    x = np.asarray(audio, dtype=np.float32)
+    if x.ndim == 1:
+        return x
+    if x.ndim != 2:
+        raise ValueError(f"Expected mono/stereo audio, got shape {x.shape}")
+    return np.mean(x, axis=1, dtype=np.float32)
+def ensure_sample_rate(audio: np.ndarray, sample_rate: int, target_sample_rate: int) -> np.ndarray:
+    if sample_rate == target_sample_rate:
+        return np.asarray(audio, dtype=np.float32)
+    return librosa.resample(
+        np.asarray(audio, dtype=np.float32),
+        orig_sr=sample_rate,
+        target_sr=target_sample_rate,
+    ).astype(np.float32, copy=False)
+def fit_length(audio: np.ndarray, target_len: int) -> np.ndarray:
+    x = np.asarray(audio, dtype=np.float32).reshape(-1)
+    if x.shape[0] == target_len:
+        return x
+    if x.shape[0] > target_len:
+        return x[:target_len]
+    out = np.zeros(target_len, dtype=np.float32)
+    out[: x.shape[0]] = x
+    return out
+def pcm16_safe(audio: np.ndarray) -> np.ndarray:
+    x = np.clip(np.asarray(audio, dtype=np.float32), -1.0, 1.0)
+    return (x * 32767.0).astype(np.int16)
+def vorbis_window(window_len: int) -> np.ndarray:
+    window_size_h = window_len / 2
+    indices = np.arange(window_len)
+    s = np.sin(0.5 * np.pi * (indices + 0.5) / window_size_h)
+    return np.sin(0.5 * np.pi * s * s).astype(np.float32)
+def get_wnorm(window_len: int, frame_size: int) -> float:
+    return 1.0 / (window_len**2 / (2 * frame_size))
+@dataclass(frozen=True)
+class StftConfig:
+    win_len: int
+    hop_size: int
+    window: np.ndarray
+    wnorm: float
+def make_stft_config(win_len: int) -> StftConfig:
+    hop_size = win_len // 2
+    window = vorbis_window(win_len)
+    wnorm = get_wnorm(win_len, hop_size)
+    return StftConfig(win_len=win_len, hop_size=hop_size, window=window, wnorm=wnorm)
+def preprocess_waveform(waveform: np.ndarray, cfg: StftConfig) -> np.ndarray:
+    x = np.asarray(waveform, dtype=np.float32).reshape(-1)
+    spec = librosa.stft(
+        y=x,
+        n_fft=cfg.win_len,
+        hop_length=cfg.hop_size,
+        win_length=cfg.win_len,
+        window=cfg.window,
+        center=True,
+        pad_mode="reflect",
+    )
+    spec = (spec.T * cfg.wnorm).astype(np.complex64, copy=False)
+    spec_ri = np.stack([spec.real, spec.imag], axis=-1).astype(np.float32, copy=False)
+    return spec_ri[None, ...]
+def postprocess_spec(spec_e: np.ndarray, cfg: StftConfig) -> np.ndarray:
+    spec_c = np.asarray(spec_e[0], dtype=np.float32)
+    spec = (spec_c[..., 0] + 1j * spec_c[..., 1]).T.astype(np.complex64, copy=False)
+    waveform_e = librosa.istft(
+        spec,
+        hop_length=cfg.hop_size,
+        win_length=cfg.win_len,
+        window=cfg.window,
+        center=True,
+        length=None,
+    ).astype(np.float32, copy=False)
+    waveform_e = waveform_e / cfg.wnorm
+    return np.concatenate(
+        [waveform_e[cfg.win_len * 2 :], np.zeros(cfg.win_len * 2, dtype=np.float32)],
+        axis=0,
+    )

dpdfnet/cli.py ADDED Viewed

@@ -0,0 +1,268 @@
+from __future__ import annotations
+import argparse
+from importlib import metadata
+from pathlib import Path
+import sys
+from typing import Callable, List, Optional
+from tqdm import tqdm
+from .models import DEFAULT_MODEL, get_cache_model_dir, supported_models
+def _build_frame_progress_callback(
+    bar: tqdm,
+) -> Callable[[int, int], None]:
+    last_done = 0
+    def _callback(done: int, total: int) -> None:
+        nonlocal last_done
+        if bar.total != total:
+            bar.total = total
+            bar.refresh()
+        delta = max(0, done - last_done)
+        if delta:
+            bar.update(delta)
+        last_done = done
+    return _callback
+def _version_string() -> str:
+    try:
+        return f"dpdfnet {metadata.version('dpdfnet')}"
+    except metadata.PackageNotFoundError:
+        return "dpdfnet (local)"
+def _add_model_resolution_args(parser: argparse.ArgumentParser) -> None:
+    parser.add_argument(
+        "--model",
+        default=DEFAULT_MODEL,
+        choices=supported_models(),
+        help="Model name to run.",
+    )
+    parser.add_argument(
+        "-v",
+        "--verbose",
+        action="store_true",
+        help="Enable verbose model-resolution/download logs.",
+    )
+def _build_parser() -> argparse.ArgumentParser:
+    parser = argparse.ArgumentParser(
+        prog="dpdfnet",
+        description="DPDFNet CPU-only ONNX speech enhancement toolkit.",
+    )
+    parser.add_argument(
+        "--version",
+        action="version",
+        version=_version_string(),
+    )
+    subparsers = parser.add_subparsers(dest="command")
+    p_models = subparsers.add_parser(
+        "models",
+        help="List supported models and local availability.",
+    )
+    p_enhance = subparsers.add_parser(
+        "enhance",
+        help="Enhance a single wav file.",
+    )
+    p_enhance.add_argument("input", type=Path, help="Input wav file path.")
+    p_enhance.add_argument("output", type=Path, help="Output wav file path.")
+    _add_model_resolution_args(p_enhance)
+    p_enhance_dir = subparsers.add_parser(
+        "enhance-dir",
+        help="Enhance all .wav files from one directory (non-recursive).",
+    )
+    p_enhance_dir.add_argument("input_dir", type=Path, help="Input directory.")
+    p_enhance_dir.add_argument("output_dir", type=Path, help="Output directory.")
+    _add_model_resolution_args(p_enhance_dir)
+    p_download = subparsers.add_parser(
+        "download",
+        help="Download all models by default, or a single model if provided.",
+    )
+    p_download.add_argument(
+        "model",
+        nargs="?",
+        choices=supported_models(),
+        default=None,
+        help="Optional model name to download. If omitted, all models are fetched.",
+    )
+    p_download.add_argument(
+        "--model",
+        dest="model_flag",
+        choices=supported_models(),
+        default=None,
+        help=argparse.SUPPRESS,
+    )
+    p_download.add_argument(
+        "--force",
+        "--refresh",
+        action="store_true",
+        help="Force re-download even if files are already cached.",
+    )
+    p_download_verbosity = p_download.add_mutually_exclusive_group()
+    p_download_verbosity.add_argument(
+        "-q",
+        "--quiet",
+        action="store_true",
+        help="Suppress download progress messages.",
+    )
+    p_download_verbosity.add_argument(
+        "-v",
+        "--verbose",
+        action="store_true",
+        help="Enable verbose download logs.",
+    )
+    return parser
+def _print_model_table() -> int:
+    from .api import available_models
+    rows = available_models()
+    print(f"cache_dir={get_cache_model_dir().resolve()}")
+    for row in rows:
+        print(
+            f"{row['name']}: sr={row['sample_rate']}Hz, "
+            f"ready={row['ready']}, "
+            f"onnx_found={row['onnx_found']}, state_found={row['state_found']}, "
+            f"cached={row['cached']}"
+        )
+    return 0
+def _run_enhance(args: argparse.Namespace) -> int:
+    from .api import enhance_file
+    with tqdm(
+        total=0,
+        unit="frame",
+        desc="Enhancing",
+        dynamic_ncols=True,
+        file=sys.stderr,
+    ) as progress:
+        enhance_file(
+            input_path=args.input,
+            output_path=args.output,
+            model=args.model,
+            verbose=args.verbose,
+            progress_callback=_build_frame_progress_callback(progress),
+        )
+    print(f"Wrote enhanced audio: {Path(args.output).expanduser().resolve()}")
+    return 0
+def _run_enhance_dir(args: argparse.Namespace) -> int:
+    from .api import enhance_file
+    input_dir = Path(args.input_dir).expanduser().resolve()
+    output_dir = Path(args.output_dir).expanduser().resolve()
+    if not input_dir.is_dir():
+        raise FileNotFoundError(f"Input directory not found: {input_dir}")
+    wav_files = sorted([p for p in input_dir.iterdir() if p.is_file() and p.suffix.lower() == ".wav"])
+    if not wav_files:
+        raise FileNotFoundError(f"No .wav files found in {input_dir}")
+    output_dir.mkdir(parents=True, exist_ok=True)
+    with tqdm(
+        total=len(wav_files),
+        unit="file",
+        desc="Files",
+        dynamic_ncols=True,
+        file=sys.stderr,
+    ) as files_progress:
+        with tqdm(
+            total=0,
+            unit="frame",
+            desc="Frames",
+            dynamic_ncols=True,
+            file=sys.stderr,
+        ) as frames_progress:
+            for wav_path in wav_files:
+                out_path = output_dir / f"{wav_path.stem}_enhanced.wav"
+                last_done = 0
+                def _callback(done: int, total: int) -> None:
+                    nonlocal last_done
+                    if done == 0:
+                        frames_progress.total = (frames_progress.total or 0) + total
+                        frames_progress.refresh()
+                        last_done = 0
+                        return
+                    delta = max(0, done - last_done)
+                    if delta:
+                        frames_progress.update(delta)
+                    last_done = done
+                enhance_file(
+                    input_path=wav_path,
+                    output_path=out_path,
+                    model=args.model,
+                    verbose=args.verbose,
+                    progress_callback=_callback,
+                )
+                files_progress.update(1)
+                files_progress.set_postfix_str(wav_path.name)
+    return 0
+def _run_download(args: argparse.Namespace) -> int:
+    from .api import download
+    if args.model is not None and args.model_flag is not None and args.model != args.model_flag:
+        raise ValueError("Conflicting model names provided in positional argument and --model.")
+    model = args.model if args.model is not None else args.model_flag
+    destination = download(
+        model=model,
+        force=args.force,
+        quiet=args.quiet,
+        verbose=args.verbose,
+    )
+    if isinstance(destination, dict):
+        print("Downloaded models:")
+        for model_name, model_path in destination.items():
+            print(f"- {model_name}: {model_path}")
+    else:
+        model_name = model if model is not None else "<unknown>"
+        print(f"Downloaded '{model_name}' to: {destination}")
+    return 0
+def main(argv: Optional[List[str]] = None) -> int:
+    parser = _build_parser()
+    args = parser.parse_args(argv)
+    if args.command is None:
+        parser.print_help()
+        return 0
+    try:
+        if args.command == "models":
+            return _print_model_table()
+        if args.command == "enhance":
+            return _run_enhance(args)
+        if args.command == "enhance-dir":
+            return _run_enhance_dir(args)
+        if args.command == "download":
+            return _run_download(args)
+    except Exception as exc:
+        print(f"Error: {exc}", file=sys.stderr)
+        return 2
+    parser.print_help()
+    return 2
+if __name__ == "__main__":
+    raise SystemExit(main())