ruopus 0.1.1__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (89) hide show
  1. ruopus-0.1.1/.github/workflows/ci.yml +116 -0
  2. ruopus-0.1.1/.github/workflows/python-wheels.yml +83 -0
  3. ruopus-0.1.1/.gitignore +21 -0
  4. ruopus-0.1.1/CHANGELOG.md +65 -0
  5. ruopus-0.1.1/Cargo.lock +511 -0
  6. ruopus-0.1.1/Cargo.toml +78 -0
  7. ruopus-0.1.1/LICENSE +20 -0
  8. ruopus-0.1.1/PKG-INFO +171 -0
  9. ruopus-0.1.1/README.md +129 -0
  10. ruopus-0.1.1/docs/encoder-future-work.md +58 -0
  11. ruopus-0.1.1/docs/python/api.rst +20 -0
  12. ruopus-0.1.1/docs/python/conf.py +45 -0
  13. ruopus-0.1.1/docs/python/index.rst +30 -0
  14. ruopus-0.1.1/docs/unsafe.md +76 -0
  15. ruopus-0.1.1/pyproject.toml +68 -0
  16. ruopus-0.1.1/rustfmt.toml +17 -0
  17. ruopus-0.1.1/src/celt/bands.rs +1684 -0
  18. ruopus-0.1.1/src/celt/cwrs.rs +321 -0
  19. ruopus-0.1.1/src/celt/decoder.rs +1043 -0
  20. ruopus-0.1.1/src/celt/encoder.rs +1688 -0
  21. ruopus-0.1.1/src/celt/energy.rs +297 -0
  22. ruopus-0.1.1/src/celt/laplace.rs +179 -0
  23. ruopus-0.1.1/src/celt/mdct.rs +577 -0
  24. ruopus-0.1.1/src/celt/mod.rs +46 -0
  25. ruopus-0.1.1/src/celt/modes.rs +126 -0
  26. ruopus-0.1.1/src/celt/pitch.rs +390 -0
  27. ruopus-0.1.1/src/celt/plc.rs +288 -0
  28. ruopus-0.1.1/src/celt/rate.rs +722 -0
  29. ruopus-0.1.1/src/celt/tables.rs +205 -0
  30. ruopus-0.1.1/src/celt/vq.rs +491 -0
  31. ruopus-0.1.1/src/celt/vq_simd.rs +431 -0
  32. ruopus-0.1.1/src/decoder.rs +640 -0
  33. ruopus-0.1.1/src/encoder.rs +3044 -0
  34. ruopus-0.1.1/src/encoder_analysis.rs +239 -0
  35. ruopus-0.1.1/src/float.rs +172 -0
  36. ruopus-0.1.1/src/lib.rs +72 -0
  37. ruopus-0.1.1/src/multistream.rs +107 -0
  38. ruopus-0.1.1/src/ogg/crc.rs +63 -0
  39. ruopus-0.1.1/src/ogg/mod.rs +22 -0
  40. ruopus-0.1.1/src/ogg/opus.rs +545 -0
  41. ruopus-0.1.1/src/ogg/page.rs +451 -0
  42. ruopus-0.1.1/src/packet.rs +547 -0
  43. ruopus-0.1.1/src/python/decoder.rs +194 -0
  44. ruopus-0.1.1/src/python/encoder.rs +482 -0
  45. ruopus-0.1.1/src/python/enums.rs +250 -0
  46. ruopus-0.1.1/src/python/errors.rs +66 -0
  47. ruopus-0.1.1/src/python/lowlevel/celt.rs +240 -0
  48. ruopus-0.1.1/src/python/lowlevel/mod.rs +6 -0
  49. ruopus-0.1.1/src/python/lowlevel/silk.rs +455 -0
  50. ruopus-0.1.1/src/python/mod.rs +75 -0
  51. ruopus-0.1.1/src/python/multistream.rs +109 -0
  52. ruopus-0.1.1/src/python/numpy_io.rs +92 -0
  53. ruopus-0.1.1/src/python/ogg.rs +170 -0
  54. ruopus-0.1.1/src/python/packet.rs +216 -0
  55. ruopus-0.1.1/src/range/decoder.rs +318 -0
  56. ruopus-0.1.1/src/range/encoder.rs +368 -0
  57. ruopus-0.1.1/src/range/mod.rs +90 -0
  58. ruopus-0.1.1/src/silk/api.rs +458 -0
  59. ruopus-0.1.1/src/silk/decoder.rs +473 -0
  60. ruopus-0.1.1/src/silk/encode/api.rs +910 -0
  61. ruopus-0.1.1/src/silk/encode/control.rs +90 -0
  62. ruopus-0.1.1/src/silk/encode/dsp.rs +146 -0
  63. ruopus-0.1.1/src/silk/encode/frame.rs +929 -0
  64. ruopus-0.1.1/src/silk/encode/gains.rs +167 -0
  65. ruopus-0.1.1/src/silk/encode/lpc.rs +224 -0
  66. ruopus-0.1.1/src/silk/encode/ltp.rs +403 -0
  67. ruopus-0.1.1/src/silk/encode/mod.rs +28 -0
  68. ruopus-0.1.1/src/silk/encode/nlsf.rs +617 -0
  69. ruopus-0.1.1/src/silk/encode/noise_shape.rs +377 -0
  70. ruopus-0.1.1/src/silk/encode/nsq.rs +617 -0
  71. ruopus-0.1.1/src/silk/encode/pitch_analysis.rs +675 -0
  72. ruopus-0.1.1/src/silk/encode/resample.rs +177 -0
  73. ruopus-0.1.1/src/silk/encode/resample_in.rs +215 -0
  74. ruopus-0.1.1/src/silk/encode/stereo.rs +435 -0
  75. ruopus-0.1.1/src/silk/encode/vad.rs +329 -0
  76. ruopus-0.1.1/src/silk/gains.rs +155 -0
  77. ruopus-0.1.1/src/silk/indices.rs +468 -0
  78. ruopus-0.1.1/src/silk/lpc.rs +181 -0
  79. ruopus-0.1.1/src/silk/math.rs +305 -0
  80. ruopus-0.1.1/src/silk/mod.rs +39 -0
  81. ruopus-0.1.1/src/silk/nlsf.rs +252 -0
  82. ruopus-0.1.1/src/silk/params.rs +205 -0
  83. ruopus-0.1.1/src/silk/pitch.rs +61 -0
  84. ruopus-0.1.1/src/silk/plc.rs +498 -0
  85. ruopus-0.1.1/src/silk/pulses.rs +476 -0
  86. ruopus-0.1.1/src/silk/resampler.rs +709 -0
  87. ruopus-0.1.1/src/silk/stereo.rs +158 -0
  88. ruopus-0.1.1/src/silk/tables.rs +554 -0
  89. ruopus-0.1.1/src/simd.rs +617 -0
@@ -0,0 +1,116 @@
1
+ name: ci
2
+
3
+ on:
4
+ push:
5
+ branches: [main]
6
+ pull_request:
7
+
8
+ env:
9
+ CARGO_TERM_COLOR: always
10
+
11
+ jobs:
12
+ fmt:
13
+ name: rustfmt
14
+ runs-on: ubuntu-latest
15
+ steps:
16
+ - uses: actions/checkout@v4
17
+ # rustfmt.toml uses nightly-only options, so format checking needs nightly.
18
+ - uses: dtolnay/rust-toolchain@nightly
19
+ with:
20
+ components: rustfmt
21
+ - run: cargo +nightly fmt --all --check
22
+
23
+ clippy:
24
+ name: clippy
25
+ runs-on: ubuntu-latest
26
+ steps:
27
+ - uses: actions/checkout@v4
28
+ - uses: dtolnay/rust-toolchain@stable
29
+ with:
30
+ components: clippy
31
+ - uses: Swatinem/rust-cache@v2
32
+ # The vs_libopus bench (an --all-targets target) links libopus via FFI.
33
+ - run: sudo apt-get update && sudo apt-get install -y libopus-dev pkg-config cmake
34
+ - name: clippy (default features)
35
+ run: cargo clippy --all-targets --features std -- -D warnings
36
+ - name: clippy (zero-dependency build)
37
+ run: cargo clippy --lib --tests --no-default-features --features std -- -D warnings
38
+
39
+ test:
40
+ name: test
41
+ runs-on: ubuntu-latest
42
+ steps:
43
+ - uses: actions/checkout@v4
44
+ - uses: dtolnay/rust-toolchain@stable
45
+ - uses: Swatinem/rust-cache@v2
46
+ - name: test (default features)
47
+ run: cargo test --features std
48
+ - name: test (zero-dependency build)
49
+ run: cargo test --no-default-features --features std
50
+
51
+ no-std:
52
+ name: no_std (rlib) builds
53
+ runs-on: ubuntu-latest
54
+ steps:
55
+ - uses: actions/checkout@v4
56
+ - uses: dtolnay/rust-toolchain@stable
57
+ with:
58
+ targets: thumbv7em-none-eabihf
59
+ - uses: Swatinem/rust-cache@v2
60
+ # A bare-metal target with no std proves the core crate is no_std + alloc
61
+ # clean (the cdylib/Python path needs std and is not built here).
62
+ - run: cargo build -p ruopus --no-default-features --features libm --target thumbv7em-none-eabihf
63
+
64
+ conformance:
65
+ name: conformance vectors
66
+ runs-on: ubuntu-latest
67
+ steps:
68
+ - uses: actions/checkout@v4
69
+ - uses: dtolnay/rust-toolchain@stable
70
+ - uses: Swatinem/rust-cache@v2
71
+ - name: cache official test vectors (~121 MB, RFC 8251 set)
72
+ id: vectors
73
+ uses: actions/cache@v4
74
+ with:
75
+ path: tests/vectors
76
+ key: opus-testvectors-rfc8251
77
+ - name: fetch test vectors
78
+ if: steps.vectors.outputs.cache-hit != 'true'
79
+ run: sh tools/fetch-testvectors.sh
80
+ - name: conformance (including the slow opus_compare quality metric)
81
+ run: cargo test --release --features std --test conformance -- --include-ignored
82
+
83
+ miri:
84
+ name: miri (unsafe SIMD kernels)
85
+ runs-on: ubuntu-latest
86
+ steps:
87
+ - uses: actions/checkout@v4
88
+ - uses: dtolnay/rust-toolchain@nightly
89
+ with:
90
+ components: miri
91
+ - uses: Swatinem/rust-cache@v2
92
+ - run: tools/miri.sh
93
+
94
+ bench:
95
+ name: perf vs libopus
96
+ runs-on: ubuntu-latest
97
+ steps:
98
+ - uses: actions/checkout@v4
99
+ - uses: dtolnay/rust-toolchain@stable
100
+ - uses: Swatinem/rust-cache@v2
101
+ - run: sudo apt-get update && sudo apt-get install -y libopus-dev pkg-config cmake
102
+ # Shared CI runners are noisy and not core-pinned, so the absolute ratios
103
+ # are not a hard gate; the table is printed on every run so a reviewer can
104
+ # spot a large regression (e.g. the kind a per-frame analysis pass caused).
105
+ - run: cargo bench --bench vs_libopus --features std
106
+
107
+ python:
108
+ name: python tests
109
+ runs-on: ubuntu-latest
110
+ steps:
111
+ - uses: actions/checkout@v4
112
+ - uses: dtolnay/rust-toolchain@stable
113
+ - uses: Swatinem/rust-cache@v2
114
+ - uses: astral-sh/setup-uv@v5
115
+ # uv builds the extension (maturin backend) as part of the sync.
116
+ - run: uv run --project . --extra test python -m pytest tests/python -q
@@ -0,0 +1,83 @@
1
+ name: python-wheels
2
+
3
+ # Build per-version wheels (cp39-cp313) for Linux, macOS, and Windows, each with
4
+ # PyO3-introspected type stubs patched by tools/build_python.py (NumPy imports +
5
+ # the typed exception hierarchy). No abi3: subclassing PyException for the
6
+ # exception hierarchy requires per-version builds.
7
+
8
+ on:
9
+ workflow_dispatch:
10
+ push:
11
+ tags: ["v*"]
12
+ pull_request:
13
+ paths:
14
+ - "src/python/**"
15
+ - "tools/build_python.py"
16
+ - "Cargo.toml"
17
+ - "pyproject.toml"
18
+ - ".github/workflows/python-wheels.yml"
19
+
20
+ jobs:
21
+ linux:
22
+ name: linux ${{ matrix.py }}
23
+ runs-on: ubuntu-latest
24
+ container: quay.io/pypa/manylinux_2_28_x86_64
25
+ strategy:
26
+ fail-fast: false
27
+ matrix:
28
+ py: ["cp39-cp39", "cp310-cp310", "cp311-cp311", "cp312-cp312", "cp313-cp313"]
29
+ steps:
30
+ - uses: actions/checkout@v4
31
+ - name: Install Rust
32
+ run: |
33
+ curl --proto '=https' --tlsv1.2 -sSf https://sh.rustup.rs | sh -s -- -y
34
+ echo "$HOME/.cargo/bin" >> "$GITHUB_PATH"
35
+ - name: Build + patch wheel
36
+ run: |
37
+ PY=/opt/python/${{ matrix.py }}/bin/python
38
+ $PY -m pip install --upgrade pip maturin numpy
39
+ $PY tools/build_python.py --release -i "$PY"
40
+ - name: Repair to manylinux
41
+ run: |
42
+ /opt/python/${{ matrix.py }}/bin/pip install auditwheel
43
+ for whl in target/wheels/*.whl; do auditwheel repair "$whl" -w dist/; done
44
+ # Re-patch stubs in the repaired wheel (auditwheel rewrites the wheel).
45
+ /opt/python/${{ matrix.py }}/bin/python tools/build_python.py --patch-only dist/*.whl
46
+ - uses: actions/upload-artifact@v4
47
+ with:
48
+ name: wheels-linux-${{ matrix.py }}
49
+ path: dist/*.whl
50
+
51
+ macos-windows:
52
+ name: ${{ matrix.os }} ${{ matrix.py }}
53
+ runs-on: ${{ matrix.os }}
54
+ strategy:
55
+ fail-fast: false
56
+ matrix:
57
+ os: [macos-latest, windows-latest]
58
+ py: ["3.9", "3.10", "3.11", "3.12", "3.13"]
59
+ steps:
60
+ - uses: actions/checkout@v4
61
+ - uses: actions/setup-python@v5
62
+ with:
63
+ python-version: ${{ matrix.py }}
64
+ - uses: dtolnay/rust-toolchain@stable
65
+ - name: Build + patch wheel
66
+ run: |
67
+ python -m pip install --upgrade pip maturin numpy
68
+ python tools/build_python.py --release
69
+ - uses: actions/upload-artifact@v4
70
+ with:
71
+ name: wheels-${{ matrix.os }}-${{ matrix.py }}
72
+ path: target/wheels/*.whl
73
+
74
+ sdist:
75
+ runs-on: ubuntu-latest
76
+ steps:
77
+ - uses: actions/checkout@v4
78
+ - uses: dtolnay/rust-toolchain@stable
79
+ - run: pipx run maturin sdist -o dist
80
+ - uses: actions/upload-artifact@v4
81
+ with:
82
+ name: sdist
83
+ path: dist/*.tar.gz
@@ -0,0 +1,21 @@
1
+ /target
2
+ /tests/vectors/
3
+
4
+ # Python build / test artifacts
5
+ /dist/
6
+ /docs/python/_build/
7
+ __pycache__/
8
+ *.py[cod]
9
+ .pytest_cache/
10
+ *.egg-info/
11
+
12
+ *.aux
13
+ *.bbl
14
+ *.blg
15
+ *.bcf
16
+ *.log
17
+ *.out
18
+ *.run.xml
19
+ *.toc
20
+ manual/build
21
+ *.lock
@@ -0,0 +1,65 @@
1
+ # Changelog
2
+
3
+ All notable changes to this project will be documented in this file.
4
+
5
+ ## [Unreleased]
6
+
7
+ ### Changed
8
+ - The published crate ships only its sources, license and docs - the reference manual PDF, tests, examples, benches and dev tooling are excluded, cutting the package from 877 KiB to 316 KiB compressed
9
+ - SILK/hybrid encode now beats libopus 1.6.1 at equal complexity (matched complexity-0: SILK wideband ~1.08×, hybrid fullband ~1.03×, up from ~0.87-0.90×; CELT at parity 0.99×; at default complexity-10 `ruopus` is 1.6-3.4× faster). Five bit-exact hot-path rewrites, found by profiling against libopus's own debuginfo function-for-function (pinned to one core): (1) the NSQ inner loop picks between the two quantization levels with a precomputed rate-distortion table (a port of `silk_NSQ_sse4_1`'s `table[64][4]` - one lookup/multiply/compare replacing a four-way branch with four multiplies); (2) the LPC analysis filter dispatches on the prediction order to compiler-unrolled per-order FIRs instead of a generic SIMD dot per output sample (whose per-call horizontal fold dominated); (3) the NSQ noise-shape feedback loop shifts its state in place fused with the dot, with no per-sample scratch array; (4) the shell-coder pulse tree is built on the stack instead of allocating four Vecs per 16-sample block (~80 allocations per frame); (5) the Burg LPC work vectors are stack-allocated. All leave the SILK bitstream bit-identical (conformance range + PCM and all encoder round-trips unchanged)
10
+ - SILK NSQ (noise-shaping quantiser) no longer allocates on the hot path: its three per-call work buffers are reused across calls (it runs once per rate-control iteration, not just once per frame) and the voiced re-whitening branch's per-subframe input clone is gone (it filters directly from the output history). Bit-identical - SILK conformance range and PCM unchanged
11
+ - SILK encode hot loops shed remaining per-iteration overhead: the front-end resampler drops a libm `roundf` per output sample (round-half-away via a `copysign` add and the saturating `as i16` cast), and the LPC analysis whitening filter pre-reverses its taps so the inner prediction is a bounds-check-free forward windowed dot. Both stay bit-identical (SILK conformance range and PCM unchanged)
12
+ - SILK and hybrid encode are substantially faster (wideband 16 kb/s ~370×→~520× realtime, fullband hybrid 32 kb/s ~270×→~320×): the pitch analysis that dominates SILK encode now runs its cross-correlations, autocorrelation, energy, and LPC whitening filter through SIMD dot products (AVX2+FMA / SSE2, `f64` accumulator to preserve the pinned-against-reference pitch decisions). Both are now well over 2× faster than libopus at its default complexity
13
+ - CELT encode is now faster than libopus at its default complexity (fullband 64 kb/s ~590× vs ~400× realtime, up from ~305×): the pitch-analysis correlations that dominate the CELT pre-filter (`celt_pitch_xcorr` and the doubling search - ~two-thirds of CELT encode) now run through SIMD dot products (AVX2+FMA with runtime detection, SSE2 baseline, scalar fallback off x86-64). The pitch values feed encoder decisions only, which the reference float build does not require to be bit-exact
14
+ - CELT encode is ~15-20% faster (fullband 64 kb/s ~305×→~355× realtime) from an SSE2 port of the PVQ pulse search (`op_pvq_search`, the O(K·N) band-encode hot loop). The crate now permits `unsafe` at explicitly-annotated SIMD sites - the lint is relaxed from `forbid` to `deny`, every site carries a `// SAFETY:` justification listed in `docs/unsafe.md`. Still no FFI and no dependencies; decode conformance is unchanged (the range coder, not the encoder's pulse choice, defines the bitstream)
15
+ - **BREAKING:** the `spectrograms` FFT backend is now a default feature - the default build decodes at ~410-730× realtime (one core) vs ~10× with the built-in evaluation; use `default-features = false` for the zero-dependency build
16
+
17
+ ### Fixed
18
+ - Decoder no longer panics on malformed/corrupt packets (a DoS surface): `force_tell` underflowed past the silence-path force-fill target, and the hybrid redundancy split underflowed `len - redundancy_bytes` in unsigned arithmetic - both now match the reference's signed semantics. Found by differential fuzzing (1M+ iterations clean); guarded by `tests/robustness.rs`
19
+ - Hybrid encoding no longer panics on loud frames: the CELT coarse-energy quantiser compared `budget - tell` as unsigned, which underflowed when the SILK low band pushed the range coder past the budget; it now uses signed arithmetic like the reference, and a genuine SILK/CELT budget overrun returns `EncodeError::InvalidBudget` instead of panicking
20
+
21
+ ### Added
22
+ - `no_std` support for the decoder: the Opus decoder, the CELT and SILK decode paths, the range coder and the packet layer build for `no_std` + `alloc` via a new `libm` feature (`default-features = false, features = ["libm"]`), with `libm` supplying the float transcendentals the `std` `f32`/`f64` methods provide otherwise. The `std` build is byte-for-byte unchanged (conformance unaffected). The encoder, its analysis and the SIMD kernels still require `std`. CI builds the decoder for `thumbv7em-none-eabihf`
23
+ - Encoder control-surface parity with libopus (the classic `OPUS_SET_*` surface), each verified against the bit-exact range oracle and the existing `cargo test --lib` suite: **in-band FEC** (`set_inband_fec` + `set_packet_loss_perc`) - the SILK encoder generates a redundant LBRR copy of the previous packet's frame across the **mono, stereo, and hybrid** paths, so `OpusDecoder::decode_fec` genuinely reconstructs a lost frame (recovery-vs-concealment correlation ≈0.996/0.18 mono, ≈0.85/0.22 stereo, ≈0.97/0.25 hybrid; the non-FEC output stays byte-identical, hybrid LBRR shares the SILK byte budget so the CELT high band keeps its floor). `set_packet_loss_perc` drives loss-robust coding: it raises the LTP scaling of independently-coded voiced frames and codes the LBRR copy at a reduced rate (a second NSQ pass at the `LBRR_GainIncreases` gain offset); an **analysis-driven mode/bandwidth decision** (`src/encoder_analysis.rs`: FFT-free 6-band energies, spectral tilt, zero-crossing rate → a smoothed music-vs-speech probability with mode hysteresis and automatic NB/MB/WB/SWB/FB selection) replacing the previous fixed heuristic, with `set_signal` (voice/music), `set_application` (VoIP/Audio/RestrictedLowDelay → CELT-only) and `set_max_bandwidth`; `set_force_channels` (a stereo encoder downmixes to mono and codes mono packets); `set_vbr`/CBR (a fixed byte count per CELT frame at the target rate); `OPUS_RESET_STATE` (`reset`); `OPUS_GET_LOOKAHEAD` (`lookahead`, the measured 120-sample CELT delay); and `OPUS_GET_*` getters for every setter. All are mirrored on the Python `OpusEncoder`. (Deliberately not implemented, to avoid hollow knobs that don't behave: `OPUS_SET_LSB_DEPTH`, `OPUS_SET_PREDICTION_DISABLED`, `OPUS_SET_PHASE_INVERSION_DISABLED`.)
24
+ - Python bindings (PyO3 0.29 + rust-numpy 0.29), in-crate behind an optional `python` feature so the default `cargo` build stays zero-dependency and pure Rust. The full public surface is exposed and idiomatic: `OpusEncoder`/`OpusDecoder` (config as properties, keyword-only constructors), `MultistreamDecoder`, the `Packet`/`Toc`/`OpusHead` introspection types (`Packet` is a read-only sequence of frame `bytes`), the `Mode`/`Bandwidth`/`FrameSize` enums, an `OpusError`-rooted exception hierarchy, the `encode_ogg_opus`/`decode_ogg_opus` functions, and an `ruopus.lowlevel` submodule for the SILK encoders/decoder, LPC analysis/synthesis, and the top-level CELT encoder/decoder. PCM crosses the boundary as NumPy `float32`/`int16` arrays shaped `(frames, channels)` with **no extra copy** - decode output moves the codec's `Vec` into NumPy (`PyArray1::from_vec`), encode input borrows the array's buffer (`as_slice`), and the GIL is released around every codec kernel (decode scales 810→2308× realtime across 1-4 threads; a 10 s decode's move-out avoids a ~68 µs / 28 GB/s copy a copying binding would pay). Fully type-hinted (`.pyi` generated from the binary by PyO3's `experimental-inspect` and shipped in the wheel, checked clean by `pyright`) and documented (one set of docstrings, sourced from the Rust `///` comments, served to both IDEs and Sphinx). Per-version wheels cp39-cp313 on Linux/macOS/Windows (`.github/workflows/python-wheels.yml`); build locally with `python tools/build_python.py`, benchmark with `tools/bench_python.py`, tests in `tests/python/`, runnable examples in `examples/python/`
25
+ - Miri soundness coverage for every `unsafe` SIMD kernel: `tools/miri.sh` runs focused tests that drive the PVQ pulse search, the dot/FIR/convert kernels, and the forward-MDCT pre-rotation through Miri (Rust's UB detector) over boundary-straddling sizes - once on the SSE2 baseline and once with AVX2+FMA forced - so each vector load/store is checked for out-of-bounds and misalignment on both dispatch paths. Documented in `docs/unsafe.md`
26
+ - SILK rate control gained the reference's lambda/quant-offset adjustment and per-subframe gain locking for frames stuck against the byte cap, lifting hybrid super-wideband 24 kb/s to +1.6 dB vs libopus and removing its CELT fallbacks
27
+ - Discontinuous transmission (`OpusEncoder::set_dtx`): after 200 ms of inactivity `encode_auto` emits 1-byte TOC-only packets the decoder conceals, dropping the silence bitrate to ~0.4 kb/s. Activity uses an adaptive energy detector that tracks the background-noise floor, so DTX engages during pauses with audible ambient noise, not just digital silence
28
+ - SILK LPC analysis now runs on the gain-normalised, LTP-pre-whitened signal (`silk_find_pred_coefs_FLP`: noise-shape gains first, then Burg on `LPC_in_pre` with the per-subframe `minInvGain` cap) instead of the raw frame. Measured on real speech, every SILK and hybrid config now beats libopus (e.g. wideband 24 kb/s SNR +2.6 → +5.3 dB vs libopus; fullband hybrid 32 kb/s +0.9 → +3.8 dB)
29
+ - `encode_ogg_opus` - encodes 48 kHz PCM into a complete standard Ogg Opus file (RFC 7845: `OpusHead`/`OpusTags` + audio pages), the symmetric counterpart to `decode_ogg_opus`. The codec now reads and writes `.opus` files; output is verified to decode in ffmpeg/libopus at the correct duration with 0.997 correlation at zero lag
30
+ - SILK per-frame hard bit cap (libopus's `silk_encode_frame_FLP` gain-multiplier rate control): scales the gains coarser - geometric then bisection-interpolated toward the cap - re-running NSQ until the coded frame fits, restoring the best fitting attempt. Applies to mono and stereo (mid + side share a cumulative cap). Improves hybrid quality at matched bitrate (fullband 32 kb/s +0.9 dB vs libopus, no longer needing the CELT fallback), keeps stereo hybrid within budget on real speech (super-wideband 32 kb/s: 467/468 frames vs 385/468 uncapped), and underpins the hybrid SILK/CELT split
31
+ - Hybrid SILK/CELT bitrate split (`compute_silk_rate_for_hybrid`, libopus's rate table) with a byte-capped SILK low band so the CELT high band always has room - hybrid now encodes real speech at its natural 24-32 kb/s rates (previously it overran and failed), tracking libopus within ~1 dB SNR; `encode_auto` falls back to CELT-only for a rare loud frame SILK cannot squeeze under the cap
32
+ - SILK encoder: started - building the speech/low-bitrate encoder kernel by kernel, each validated by round-tripping through the existing bit-exact decoder: modified Burg LPC analysis (`silk_burg_modified_FLP`), LPC→NLSF (`silk_A2NLSF`), the NLSF vector quantiser (`silk_NLSF_encode`: Laroia weights, first-stage VQ, delayed-decision trellis), subframe gain quantisation (`silk_gains_quant`), the excitation pulse/shell coder (`silk_encode_pulses`), the side-information index coder (`silk_encode_indices`), and the noise-shaping quantiser (`silk_NSQ_c` - the central kernel, validated by encoder/decoder reconstruction consistency); these are now assembled into the first complete encode path (`encode_frame`, unvoiced configuration) that round-trips a coded frame through the bit-exact decoder sample-for-sample, with real noise-shaping analysis (`silk_noise_shape_analysis_FLP`: windowed short-term analysis, Schur/k2a AR shaping with bandwidth expansion, SNR-driven gain control, sparseness-based quantiser offset, and low-frequency/tilt/harmonic shapers) colouring the quantisation noise; and the voiced long-term-prediction analysis and gain VQ (`silk_find_LTP_FLP` correlation matrices, `silk_VQ_WMat_EC` rate-distortion search, `silk_quant_LTP_gains` across the three periodicity codebooks); the encode path now covers both unvoiced and voiced frames (the voiced path adds pitch prediction, harmonic/pitch-dependent noise shaping and the NSQ long-term-prediction branch), each round-tripping through the bit-exact decoder - the pitch lag/contour is supplied as input pending the pitch search; and the pitch-analysis decimating resamplers (`silk_resampler_down2` half-band, `silk_resampler_down2_3` 2/3) pinned bit-exactly against the reference; and the three-stage pitch estimator (`silk_pitch_analysis_core_FLP`: 4 kHz coarse correlation search, 8 kHz contour-codebook refinement, internal-rate final search) that decides voiced/unvoiced and finds the per-subframe lags, with the `silk_find_pitch_lags_FLP` whitening driver (short-term LPC analysis → residual → voicing threshold → lag search) on top, both pinned bit-exactly; the per-frame encode now decides voiced/unvoiced and picks the pitch lags itself, carrying cross-frame state (NSQ history, input history, previous lag/type) so consecutive frames track the decoder; voice-activity detection (`silk_VAD_GetSA_Q8`: four-band filter tree, noise-floor tracking, SNR→sigmoid) now drives the pitch threshold, noise shaping and gain tuning with real speech-activity/tilt/quality, pinned bit-exactly; and rate control (`silk_control_SNR`: target bitrate → coding SNR via the per-bandwidth tables) is wired in behind a settable target bitrate, replacing the fixed coding SNR; and a public mono SILK encoder (`ruopus::SilkEncoder`, `silk_Encode` framing - VAD/LBRR header + coded frame) produces a SILK payload that decodes through the full `SilkDecoder` API and reconstructs the encoder's output sample-for-sample (the first end-to-end SILK encode→decode); and the encoder front-end resampler (`silk_resampler` forEnc, API rate → internal rate, adding the 1:3/1:4/1:6 down ratios) pinned bit-exactly; and multi-frame SILK packets (40/60 ms - later frames conditionally coded), exercising the conditional gains/lag coding end to end through the decoder
33
+ - `OpusEncoder::encode_silk` - SILK-mode Opus packets (mono **and stereo**, 10/20/40/60 ms, narrowband/mediumband/wideband): the 48 kHz input is resampled to the SILK internal rate and coded with the SILK TOC, producing packets that decode through the full `OpusDecoder` with matching final range, and which **libopus `opus_demo` decodes without error** (mono and stereo). The first speech-capable Opus output from the encoder
34
+ - `OpusEncoder::encode_hybrid` - hybrid Opus packets (mono **and stereo**, 10/20 ms, super-wideband/fullband): SILK codes the wideband low band (stereo via `SilkStereoEncoder`) and CELT the high band (bands 17..end) in one shared range coder, decoding through `OpusDecoder` with matching final range and accepted by libopus `opus_demo`. The CELT encoder gained a borrowed-range-coder core (`encode_core`/`encode_hybrid_into`) to share the coder.
35
+ - `OpusEncoder::encode_auto` - automatic per-frame SILK/CELT/hybrid mode selection from the frame size and target bitrate (a single "just works" entry point)
36
+ - Encoder input DC-reject high-pass (libopus `dc_reject`): a 3 Hz one-pole high-pass applied to the 48 kHz input ahead of every mode, with per-channel state across frames, so DC and sub-audible rumble are stripped before coding rather than reproduced
37
+ - SILK closed-loop rate control (`SilkEncoder::encode_capped`): re-encode from a snapshot at a lower coding SNR until the payload fits a byte budget, so the mono SILK path honours `max_bytes` instead of erroring
38
+ - SILK encoder: stereo predictor quantisation and coding (`silk_stereo_quant_pred`/`silk_stereo_encode_pred`), round-tripping bit-exactly through the decoder, and the LR→MS analysis (`silk_stereo_find_predictor`, `silk_stereo_LR_to_MS`: mid/side conversion, predictor estimation, rate split and mid-only/width decisions, interpolated predictor application) pinned bit-exactly against the reference, and the 2-channel stereo encoder (`SilkStereoEncoder`: LR→MS framing, predictor/mid-only coding, conditional side channel with the mid-only→side transition reset) - a stereo SILK stream round-trips through the decoder finishing each packet on the encoder's exact range state
39
+ - CELT encoder: unconstrained variable bitrate (`OpusEncoder::set_bitrate`) - a `compute_vbr`-derived per-frame target (dynalloc/transient boost, band-depth floor, 2× cap) with the range coder shrunk to size; achieved rate tracks the target within a few percent
40
+ - CELT encoder: selectable audio bandwidth (narrowband/wideband/super-wideband/fullband) via `OpusEncoder::set_bandwidth`, spanning the CELT-only TOC config range 16-31
41
+ - Public `OpusEncoder` API producing CELT-only Opus packets (TOC + payload) at 48 kHz, mono or stereo - the encode-side counterpart to `OpusDecoder`, with `final_range()` and a typed `EncodeError`
42
+ - CELT encoder: rate-driven intensity-stereo band and dual-stereo decisions (`stereo_analysis` + hysteresis), replacing the forced full-stereo placeholder
43
+ - CELT encoder: gain-stabilising energy-error bias - nudges coarse energy toward the previous frame's quantisation error when energy is stable, matching the reference
44
+ - CELT encoder: pitch comb pre-filter - whitens the harmonic structure before the MDCT (3-tap FIR with a windowed cross-fade) and codes the post-filter octave/period/gain/tapset, the inverse of the decoder's post-filter; lifts tonal-signal SNR to 36.3 dB at 64 kbps mono
45
+ - CELT encoder: pitch analysis for the pre-filter - a float-build port of `pitch_downsample`/`pitch_search`/`remove_doubling` (LPC whitening, decimated cross-correlation, octave-error rejection)
46
+ - CELT encoder: spreading decision (`spreading_decision`) and per-band time/frequency resolution analysis (`tf_analysis` + Viterbi `tf_select`), replacing the fixed normal-spread / no-tf-change defaults
47
+ - CELT encoder: dynamic-allocation and trim analysis - per-band importance boosts (`dynalloc_analysis`) and spectral-tilt/transient-driven allocation trim (`alloc_trim_analysis`), lifting tonal-signal SNR by ~9 dB (23.7 → 32.7 dB at 64 kbps mono)
48
+ - CELT encoder: transient detection (`transient_analysis`) with short-block coding - encode-side Haar/Hadamard band reshaping, the transient tf schedule and tf_select, and the anti-collapse bit; conformant at every CELT frame size (2.5-20 ms), mono and stereo
49
+ - CELT encoder (§5.3), mono and stereo, long blocks: conservative-but-conformant frame decisions with bit-exact energy quantisation, allocation, theta splits and PVQ search - encoded streams round trip through the decoder with bit-identical range states and are accepted by libopus's `opus_demo` with zero range-coder mismatches across byte budgets 25-251
50
+ - `decode_throughput` example measuring decode speed (× realtime) over an `opus_demo` bitstream
51
+ - SILK decoder groundwork (§4.2): module scaffold, all 69 static tables extracted mechanically from the reference sources (cross-file spot pins), the fixed-point arithmetic kernels pinned against outputs of the compiled reference headers, excitation pulse decoding (shell coder, LSB planes, signs), and side-information decoding (gains, two-stage NLSF VQ, pitch lags/contour, LTP indices, seed) round-trip tested against ports of the reference encode side, gain dequantisation, and the NLSF chain (residual dequantiser, stabiliser, NLSF→LPC conversion with stability enforcement), pitch lag resolution with the contour codebooks, per-frame parameter assembly, the synthesis core (excitation reconstruction, rewhitening, LTP and LPC filters) - bit-exact against the compiled reference - the per-frame decode driver, stereo prediction/unmixing (MS→LR pinned against the reference), and the resampler to the API rate (allpass 2× + fractional FIR, pinned per rate pair) - fixed-point stages pinned against the compiled reference
52
+ - Optional `spectrograms` feature: routes the MDCT's inner FFT through the `spectrograms` crate's planned transforms (~10× faster decode of the conformance vectors); the default build stays dependency-free with the built-in evaluation
53
+ - Conformance: CELT-only vectors are also scored against the reference PCM decode (`.dec`), validating the synthesis chain end to end - testvector01/07/11 land at 96/83/104 dB SNR against the reference float build
54
+ - CELT frame decoder (§4.3): the full `celt_decode_with_ec` sequence from bitstream flags through PVQ shapes, anti-collapse, synthesis (inverse MDCT, comb post-filter, de-emphasis) and cross-frame state - every packet of the CELT-only official test vectors (testvector01/07/11) decodes bit-exactly, with the per-packet final-range oracle enforced in the conformance suite
55
+ - CELT low-overlap MDCT (§4.3.7), forward and backward, with the Vorbis power window and short-block interleaving; the inner FFT is isolated behind a seam so the `spectrograms` crate can become an optional accelerated backend while the default build stays dependency-free - validated by TDAC perfect-reconstruction round trips (zero delay, unit gain)
56
+ - CELT band shape decoding (§4.3.4): PVQ shape decode with spreading rotation (`vq`), and the full band loop (`bands`) - recursive theta splits with the step/uniform/triangular PDFs, bit-exact cos/log2tan steering, stereo (theta, intensity, dual, N=2 special case), time/frequency reshaping (Haar + Hadamard), spectral folding with LCG noise fill, and collapse-mask tracking
57
+ - CELT bit allocation (§4.3.3): the full decoder-side `compute_allocation`/`interp_bits2pulses` derivation - quality-vector interpolation, explicit band skipping, intensity/dual-stereo parameters, fine/shape budget split with cap rebalancing - plus the pulse cache (`bits2pulses`/`pulses2bits`) and allocation tables extracted mechanically from the reference sources
58
+ - CELT energy envelope decoding (§4.3.2): coarse energy with time/frequency prediction and the Laplace probability model, budget-starved fallback codes, fine refinement, and final-bit distribution; plus the standard 48 kHz mode tables (band layout, energy means, prediction coefficients)
59
+ - CELT decoder kernels (`celt` module): the Laplace coder for coarse energy deltas (§4.3.2.1) and the PVQ codeword enumeration (§4.3.4.2, table-free CWRS) - exhaustively tested for index bijection against the reference V(N,K) table and through range-coder round trips
60
+ - Conformance harness against the official Opus test vectors (RFC 8251 set, fetched by `tools/fetch-testvectors.sh`): `opus_demo` bitstream parsing, packet-level validation of all 20,075 packets across the twelve vectors, TOC-duration agreement with the reference PCM, full configuration coverage; skips cleanly when vectors are absent
61
+ - Ogg container (RFC 3533) and Ogg Opus mapping (RFC 7845): CRC-verified page parsing with capture-pattern resync, cross-page packet reassembly with the RFC 7845 continuity rules, page writing, `OpusHead`/`OpusTags` headers (all channel-mapping families), per-packet granule resolution (pre-skip, end trimming), and a conformant stream writer - interop-tested against an ffmpeg/libopus file
62
+ - `lpc` module: Levinson-Durbin, LP analysis/synthesis filters (stateless and cross-frame), pitch estimation, and single-tap LTP - ported from `audio_samples` and decoupled to plain slices
63
+ - `experimental` module (feature `experimental-codec`, on by default): the pre-conformance SILK-style frame codec, spectral-flatness mode detection, hybrid crossover, and mid/side helpers, with documented divergences from RFC 6716
64
+ - Packet framing layer (RFC 6716 §3): TOC byte introspection (mode/bandwidth/frame size per Table 2), frame packing codes 0-3, padding, and full [R1]-[R7] malformed-packet validation
65
+ - Range decoder and encoder (RFC 6716 §4.1/§5.1): symbol, binary, ICDF, raw-bits, and uniform-integer coding with `tell`/`tell_frac`, verified by encoder/decoder `rng`-agreement round-trips