rsplitcap 0.1.0__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (36) hide show
  1. rsplitcap-0.1.0/.github/workflows/ci.yml +64 -0
  2. rsplitcap-0.1.0/.gitignore +40 -0
  3. rsplitcap-0.1.0/CLAUDE.md +92 -0
  4. rsplitcap-0.1.0/Cargo.lock +1334 -0
  5. rsplitcap-0.1.0/Cargo.toml +44 -0
  6. rsplitcap-0.1.0/EXECUTION_STEPS.md +161 -0
  7. rsplitcap-0.1.0/LICENSE +21 -0
  8. rsplitcap-0.1.0/PKG-INFO +7 -0
  9. rsplitcap-0.1.0/README.md +374 -0
  10. rsplitcap-0.1.0/bench/bench_report/report.html +351 -0
  11. rsplitcap-0.1.0/bench/bench_report/report.json +2119 -0
  12. rsplitcap-0.1.0/bench/bench_rsplitcap.py +1921 -0
  13. rsplitcap-0.1.0/bench/win_bench_report/report.html +346 -0
  14. rsplitcap-0.1.0/bench/win_bench_report/report.json +3169 -0
  15. rsplitcap-0.1.0/pyproject.toml +14 -0
  16. rsplitcap-0.1.0/python/rsplit_pipe_reader.py +76 -0
  17. rsplitcap-0.1.0/python/rsplitcap/__init__.py +44 -0
  18. rsplitcap-0.1.0/python/rsplitcap/__init__.pyi +83 -0
  19. rsplitcap-0.1.0/src/archive/mod.rs +526 -0
  20. rsplitcap-0.1.0/src/archive/reader.rs +229 -0
  21. rsplitcap-0.1.0/src/archive/writer.rs +265 -0
  22. rsplitcap-0.1.0/src/cli.rs +237 -0
  23. rsplitcap-0.1.0/src/filter.rs +52 -0
  24. rsplitcap-0.1.0/src/flow/mod.rs +125 -0
  25. rsplitcap-0.1.0/src/flow/strategy.rs +124 -0
  26. rsplitcap-0.1.0/src/lib.rs +15 -0
  27. rsplitcap-0.1.0/src/main.rs +631 -0
  28. rsplitcap-0.1.0/src/output/mod.rs +150 -0
  29. rsplitcap-0.1.0/src/output/split.rs +237 -0
  30. rsplitcap-0.1.0/src/packet.rs +426 -0
  31. rsplitcap-0.1.0/src/parser/mod.rs +51 -0
  32. rsplitcap-0.1.0/src/parser/pcap.rs +132 -0
  33. rsplitcap-0.1.0/src/parser/pcapng.rs +372 -0
  34. rsplitcap-0.1.0/src/python/mod.rs +576 -0
  35. rsplitcap-0.1.0/tests/fuzz_parsers.rs +262 -0
  36. rsplitcap-0.1.0/tests/integration.rs +333 -0
@@ -0,0 +1,64 @@
1
+ name: CI
2
+
3
+ on:
4
+ push:
5
+ branches: [main]
6
+ pull_request:
7
+ branches: [main]
8
+ release:
9
+ types: [published]
10
+
11
+ jobs:
12
+ test:
13
+ name: Test (Rust ${{ matrix.rust }})
14
+ runs-on: ubuntu-latest
15
+ strategy:
16
+ matrix:
17
+ rust: [stable]
18
+ steps:
19
+ - uses: actions/checkout@v4
20
+ - uses: dtolnay/rust-toolchain@stable
21
+ with:
22
+ toolchain: ${{ matrix.rust }}
23
+ - name: Run tests
24
+ run: cargo test
25
+
26
+ build-python:
27
+ name: Build Python wheels
28
+ runs-on: ${{ matrix.os }}
29
+ needs: test
30
+ if: github.event_name == 'release' || github.event_name == 'push'
31
+ strategy:
32
+ matrix:
33
+ os: [ubuntu-latest, macos-latest, windows-latest]
34
+ steps:
35
+ - uses: actions/checkout@v4
36
+ - uses: dtolnay/rust-toolchain@stable
37
+ - name: Build wheel
38
+ uses: PyO3/maturin-action@v1
39
+ with:
40
+ target: ${{ matrix.target }}
41
+ args: --release --out dist --features python
42
+ - name: Upload wheels
43
+ uses: actions/upload-artifact@v4
44
+ with:
45
+ name: wheels-${{ matrix.os }}
46
+ path: dist
47
+
48
+ publish:
49
+ name: Publish to PyPI
50
+ runs-on: ubuntu-latest
51
+ needs: build-python
52
+ if: github.event_name == 'release'
53
+ steps:
54
+ - uses: actions/upload-artifact/merge@v4
55
+ with:
56
+ name: wheels-all
57
+ pattern: wheels-*
58
+ - name: Publish
59
+ uses: PyO3/maturin-action@v1
60
+ env:
61
+ MATURIN_PYPI_TOKEN: ${{ secrets.PYPI_TOKEN }}
62
+ with:
63
+ command: upload
64
+ args: --non-interactive --skip-existing wheels-all/*.whl
@@ -0,0 +1,40 @@
1
+ # Build artifacts
2
+ /target/
3
+
4
+ # IDE
5
+ .vscode/
6
+ .idea/
7
+ *.swp
8
+ *.swo
9
+ *~
10
+
11
+ # OS
12
+ .DS_Store
13
+ Thumbs.db
14
+
15
+ # Test artifacts
16
+ *.rsplit
17
+ *.pcap
18
+ *.pcapng
19
+ *.cap
20
+ /tmp/
21
+
22
+ # Environment
23
+ .env
24
+ .env.local
25
+
26
+ # Benchmark data (keep script and reports, ignore datasets and binaries)
27
+ bench/USTC-TFC2016-master/
28
+ bench/USTC-TFC2016-master.rar
29
+ bench/SplitCap.exe
30
+ bench/**/runs/
31
+ bench/.wine32/
32
+
33
+ # Python
34
+ __pycache__/
35
+ *.pyc
36
+ .venv/
37
+ *.whl
38
+
39
+ # Cross-compile config (machine-specific paths)
40
+ .cargo/config.toml
@@ -0,0 +1,92 @@
1
+ # CLAUDE.md — RSplitCap project guide for Claude Code
2
+
3
+ ## Project Overview
4
+ RSplitCap is a Rust rewrite of SplitCap (Windows PCAP splitting tool), with added support for PCAP-NG and a custom single-file archive format (`.rsplit`).
5
+
6
+ ## Build & Test Commands
7
+ ```bash
8
+ cargo build # Debug build
9
+ cargo build --release # Release build (LTO, single codegen unit)
10
+ cargo test # Run all tests (16 tests: 6 integration + 10 fuzz/robustness)
11
+ cargo clippy -- -D warnings # Lint (zero warnings enforced)
12
+ RUST_LOG=debug cargo run -- <args> # Verbose run
13
+
14
+ # Python bindings build
15
+ cargo build --release --features python # Build native .so
16
+ # Copy to Python package:
17
+ cp target/release/librsplitcap.so python/rsplitcap/_rsplitcap.abi3.so
18
+ PYTHONPATH=$(pwd)/python python -c "import rsplitcap"
19
+
20
+ # Build Python wheel
21
+ pip install maturin
22
+ maturin build --release
23
+
24
+ # Cross-compile for Windows (from Linux/WSL2, requires mingw-w64)
25
+ rustup target add x86_64-pc-windows-gnu
26
+ cargo build --release --target x86_64-pc-windows-gnu
27
+
28
+ # Benchmark (requires Python 3.8+, matplotlib optional)
29
+ python bench/bench_rsplitcap.py --data-dir <path_to_pcaps> \
30
+ --rsplitcap ./target/release/rsplitcap \
31
+ --splitcap ./path/to/SplitCap.exe \
32
+ --output ./bench_report
33
+ ```
34
+
35
+ ## Benchmark Script (`bench/bench_rsplitcap.py`)
36
+ - **Shared tests**: 6 grouping strategies × up to 3 files, IP/port filter tests, L7 output — RSplitCap vs SplitCap
37
+ - **RSplitCap-unique tests**: BSSID strategy, archive create/extract, pipeline vs legacy, mmap vs no-mmap
38
+ - **Metrics**: wall time, CPU time, peak RSS (via `/usr/bin/time -v`), output file count/size, packet count
39
+ - **Output**: `report.html` (interactive charts with matplotlib) + `report.json` (machine-readable)
40
+ - **Key options**: `--quick` (1+1 runs), `--max-files N`, `--no-splitcap`, `--skip-*`, `--timeout N`
41
+ - On WSL2, SplitCap paths are auto-converted via `wslpath -w`; wine fallback if no WSL interop
42
+
43
+ ## Architecture (layered)
44
+ 1. **CLI** (`src/cli.rs`) — clap-based, normalizes SplitCap multi-char flags (`-ip`→`--ip`)
45
+ 2. **Parser** (`src/parser/`) — `CaptureReader` trait, PCAP + PCAP-NG (SHB/IDB/EPB/SPB)
46
+ 3. **Packet** (`src/packet.rs`) — unified Packet struct, Ethernet/IPv4/IPv6 parsing, WiFi 802.11 + radiotap
47
+ 4. **Filter** (`src/filter.rs`) — IP + port AND-logic whitelist
48
+ 5. **Flow Manager** (`src/flow/`) — 9 grouping strategies, LRU eviction via generation counter
49
+ 6. **Output** — Split mode (per-flow PCAP/L7), Archive mode (`.rsplit` with secondary indexes)
50
+
51
+ ## Key Design Decisions
52
+ - Input files are mmap'd via `memmap2` + `Bytes::from_owner` — zero-copy, OS-managed paging for >memory files.
53
+ - Split mode uses pipelined streaming: parser in background thread (crossbeam bounded channel) → classify + write in main thread via `SplitWriter`. Packets written as they arrive, not accumulated.
54
+ - Archive uses 2-phase write: sequential packets → post-positioned indexes (Delta+LEB128).
55
+ - FlowEntry is fixed 96 bytes — enables O(1) random access in Flow Table.
56
+ - IP addresses stored as 16 bytes (IPv4-mapped IPv6) for uniform handling.
57
+ - `-s seconds N` and `-s packets N` use the `-s` flag with a sub-argument — manually parsed.
58
+ - Secondary indexes: sorted key→[flow_id] maps for IP/port/protocol in archive (binary-searchable).
59
+ - All file writes use atomic temp-file + rename pattern to prevent corruption on interrupt.
60
+ - `--no-pipeline` flag falls back to legacy accumulate-then-write mode.
61
+
62
+ ## Module Map
63
+ ```
64
+ main.rs ── cli ── filter ── flow (strategy + manager) ── output (split + mod)
65
+ ── parser (pcap + pcapng) ── archive (writer + reader + format)
66
+ ── packet (ethernet, ipv4, ipv6 ext hdrs, wifi 802.11 + radiotap)
67
+ ── python (PyO3 bindings: Archive, Flow, Packet, read_flows, create_archive, split, pipe_archive)
68
+ ```
69
+
70
+ ## Python Bindings (`src/python/mod.rs`)
71
+
72
+ PyO3 feature (`--features python`, cfg-gated):
73
+ - **`Archive`** (`ArchivePy`): wraps `ArchiveReader`, open .rsplit files, `flows()`, `get_flow(id)`, `find_by_*`
74
+ - **`Flow`** (`FlowPy`): holds `Arc<ArchiveReader>` + `FlowEntry`, metadata + `packets()` → `Packet` list
75
+ - **`Packet`** (`PacketPy`): `ts_sec`, `ts_usec`, `ts`, `length`, `orig_len`, `data` (bytes)
76
+ - **`read_flows(path)`**: pcap → temp .rsplit → Archive (temp file auto-cleaned on Drop)
77
+ - **`create_archive(input, output, strategy, ip_filters, port_filters)`**: pcap → .rsplit
78
+ - **`split(input, output_dir, strategy, ip_filters, port_filters, output_type)`**: split mode
79
+ - **`pipe_archive(path)`** → `list[bytes]`: each flow as standalone pcap
80
+
81
+ Tracing writes to stderr (`with_writer(io::stderr)`) so stdout stays clean for `--pipe` mode.
82
+ Build requires `libpython3.12.so` symlink (dev package) or `RUSTFLAGS="-L <dir>"`.
83
+
84
+ ## Known Gaps
85
+ - LRU eviction is O(n) per eviction (fast in practice for default 10k max_sessions)
86
+ - PCAP-NG: single interface only, no name resolution blocks
87
+ - No WiFi data-frame IP parsing over LLC/SNAP (BSSID/mac extraction works; five-tuple returns None)
88
+ - **Windows performance**: Cross-compiled `x86_64-pc-windows-gnu` binary is ~8× slower than Linux native. MSVC native build should restore full performance but has not been benchmarked yet. SplitCap runs 7.6× faster natively on Windows than via WSL2/Wine.
89
+
90
+ ## Test Coverage
91
+ - `tests/integration.rs`: 6 tests (split, filter, L7, PCAP-NG, archive roundtrip, list-flows)
92
+ - `tests/fuzz_parsers.rs`: 10 tests (random data PCAP/PCAP-NG parsers, malformed packets, LEB128 codec, FlowEntry roundtrip, corrupt archive rejection, WiFi radiotap/management BSSID, WiFi malformed, IPv6 extension header chain)