rsplitcap 0.1.0__tar.gz
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- rsplitcap-0.1.0/.github/workflows/ci.yml +64 -0
- rsplitcap-0.1.0/.gitignore +40 -0
- rsplitcap-0.1.0/CLAUDE.md +92 -0
- rsplitcap-0.1.0/Cargo.lock +1334 -0
- rsplitcap-0.1.0/Cargo.toml +44 -0
- rsplitcap-0.1.0/EXECUTION_STEPS.md +161 -0
- rsplitcap-0.1.0/LICENSE +21 -0
- rsplitcap-0.1.0/PKG-INFO +7 -0
- rsplitcap-0.1.0/README.md +374 -0
- rsplitcap-0.1.0/bench/bench_report/report.html +351 -0
- rsplitcap-0.1.0/bench/bench_report/report.json +2119 -0
- rsplitcap-0.1.0/bench/bench_rsplitcap.py +1921 -0
- rsplitcap-0.1.0/bench/win_bench_report/report.html +346 -0
- rsplitcap-0.1.0/bench/win_bench_report/report.json +3169 -0
- rsplitcap-0.1.0/pyproject.toml +14 -0
- rsplitcap-0.1.0/python/rsplit_pipe_reader.py +76 -0
- rsplitcap-0.1.0/python/rsplitcap/__init__.py +44 -0
- rsplitcap-0.1.0/python/rsplitcap/__init__.pyi +83 -0
- rsplitcap-0.1.0/src/archive/mod.rs +526 -0
- rsplitcap-0.1.0/src/archive/reader.rs +229 -0
- rsplitcap-0.1.0/src/archive/writer.rs +265 -0
- rsplitcap-0.1.0/src/cli.rs +237 -0
- rsplitcap-0.1.0/src/filter.rs +52 -0
- rsplitcap-0.1.0/src/flow/mod.rs +125 -0
- rsplitcap-0.1.0/src/flow/strategy.rs +124 -0
- rsplitcap-0.1.0/src/lib.rs +15 -0
- rsplitcap-0.1.0/src/main.rs +631 -0
- rsplitcap-0.1.0/src/output/mod.rs +150 -0
- rsplitcap-0.1.0/src/output/split.rs +237 -0
- rsplitcap-0.1.0/src/packet.rs +426 -0
- rsplitcap-0.1.0/src/parser/mod.rs +51 -0
- rsplitcap-0.1.0/src/parser/pcap.rs +132 -0
- rsplitcap-0.1.0/src/parser/pcapng.rs +372 -0
- rsplitcap-0.1.0/src/python/mod.rs +576 -0
- rsplitcap-0.1.0/tests/fuzz_parsers.rs +262 -0
- rsplitcap-0.1.0/tests/integration.rs +333 -0
|
@@ -0,0 +1,64 @@
|
|
|
1
|
+
name: CI
|
|
2
|
+
|
|
3
|
+
on:
|
|
4
|
+
push:
|
|
5
|
+
branches: [main]
|
|
6
|
+
pull_request:
|
|
7
|
+
branches: [main]
|
|
8
|
+
release:
|
|
9
|
+
types: [published]
|
|
10
|
+
|
|
11
|
+
jobs:
|
|
12
|
+
test:
|
|
13
|
+
name: Test (Rust ${{ matrix.rust }})
|
|
14
|
+
runs-on: ubuntu-latest
|
|
15
|
+
strategy:
|
|
16
|
+
matrix:
|
|
17
|
+
rust: [stable]
|
|
18
|
+
steps:
|
|
19
|
+
- uses: actions/checkout@v4
|
|
20
|
+
- uses: dtolnay/rust-toolchain@stable
|
|
21
|
+
with:
|
|
22
|
+
toolchain: ${{ matrix.rust }}
|
|
23
|
+
- name: Run tests
|
|
24
|
+
run: cargo test
|
|
25
|
+
|
|
26
|
+
build-python:
|
|
27
|
+
name: Build Python wheels
|
|
28
|
+
runs-on: ${{ matrix.os }}
|
|
29
|
+
needs: test
|
|
30
|
+
if: github.event_name == 'release' || github.event_name == 'push'
|
|
31
|
+
strategy:
|
|
32
|
+
matrix:
|
|
33
|
+
os: [ubuntu-latest, macos-latest, windows-latest]
|
|
34
|
+
steps:
|
|
35
|
+
- uses: actions/checkout@v4
|
|
36
|
+
- uses: dtolnay/rust-toolchain@stable
|
|
37
|
+
- name: Build wheel
|
|
38
|
+
uses: PyO3/maturin-action@v1
|
|
39
|
+
with:
|
|
40
|
+
target: ${{ matrix.target }}
|
|
41
|
+
args: --release --out dist --features python
|
|
42
|
+
- name: Upload wheels
|
|
43
|
+
uses: actions/upload-artifact@v4
|
|
44
|
+
with:
|
|
45
|
+
name: wheels-${{ matrix.os }}
|
|
46
|
+
path: dist
|
|
47
|
+
|
|
48
|
+
publish:
|
|
49
|
+
name: Publish to PyPI
|
|
50
|
+
runs-on: ubuntu-latest
|
|
51
|
+
needs: build-python
|
|
52
|
+
if: github.event_name == 'release'
|
|
53
|
+
steps:
|
|
54
|
+
- uses: actions/upload-artifact/merge@v4
|
|
55
|
+
with:
|
|
56
|
+
name: wheels-all
|
|
57
|
+
pattern: wheels-*
|
|
58
|
+
- name: Publish
|
|
59
|
+
uses: PyO3/maturin-action@v1
|
|
60
|
+
env:
|
|
61
|
+
MATURIN_PYPI_TOKEN: ${{ secrets.PYPI_TOKEN }}
|
|
62
|
+
with:
|
|
63
|
+
command: upload
|
|
64
|
+
args: --non-interactive --skip-existing wheels-all/*.whl
|
|
@@ -0,0 +1,40 @@
|
|
|
1
|
+
# Build artifacts
|
|
2
|
+
/target/
|
|
3
|
+
|
|
4
|
+
# IDE
|
|
5
|
+
.vscode/
|
|
6
|
+
.idea/
|
|
7
|
+
*.swp
|
|
8
|
+
*.swo
|
|
9
|
+
*~
|
|
10
|
+
|
|
11
|
+
# OS
|
|
12
|
+
.DS_Store
|
|
13
|
+
Thumbs.db
|
|
14
|
+
|
|
15
|
+
# Test artifacts
|
|
16
|
+
*.rsplit
|
|
17
|
+
*.pcap
|
|
18
|
+
*.pcapng
|
|
19
|
+
*.cap
|
|
20
|
+
/tmp/
|
|
21
|
+
|
|
22
|
+
# Environment
|
|
23
|
+
.env
|
|
24
|
+
.env.local
|
|
25
|
+
|
|
26
|
+
# Benchmark data (keep script and reports, ignore datasets and binaries)
|
|
27
|
+
bench/USTC-TFC2016-master/
|
|
28
|
+
bench/USTC-TFC2016-master.rar
|
|
29
|
+
bench/SplitCap.exe
|
|
30
|
+
bench/**/runs/
|
|
31
|
+
bench/.wine32/
|
|
32
|
+
|
|
33
|
+
# Python
|
|
34
|
+
__pycache__/
|
|
35
|
+
*.pyc
|
|
36
|
+
.venv/
|
|
37
|
+
*.whl
|
|
38
|
+
|
|
39
|
+
# Cross-compile config (machine-specific paths)
|
|
40
|
+
.cargo/config.toml
|
|
@@ -0,0 +1,92 @@
|
|
|
1
|
+
# CLAUDE.md — RSplitCap project guide for Claude Code
|
|
2
|
+
|
|
3
|
+
## Project Overview
|
|
4
|
+
RSplitCap is a Rust rewrite of SplitCap (Windows PCAP splitting tool), with added support for PCAP-NG and a custom single-file archive format (`.rsplit`).
|
|
5
|
+
|
|
6
|
+
## Build & Test Commands
|
|
7
|
+
```bash
|
|
8
|
+
cargo build # Debug build
|
|
9
|
+
cargo build --release # Release build (LTO, single codegen unit)
|
|
10
|
+
cargo test # Run all tests (16 tests: 6 integration + 10 fuzz/robustness)
|
|
11
|
+
cargo clippy -- -D warnings # Lint (zero warnings enforced)
|
|
12
|
+
RUST_LOG=debug cargo run -- <args> # Verbose run
|
|
13
|
+
|
|
14
|
+
# Python bindings build
|
|
15
|
+
cargo build --release --features python # Build native .so
|
|
16
|
+
# Copy to Python package:
|
|
17
|
+
cp target/release/librsplitcap.so python/rsplitcap/_rsplitcap.abi3.so
|
|
18
|
+
PYTHONPATH=$(pwd)/python python -c "import rsplitcap"
|
|
19
|
+
|
|
20
|
+
# Build Python wheel
|
|
21
|
+
pip install maturin
|
|
22
|
+
maturin build --release
|
|
23
|
+
|
|
24
|
+
# Cross-compile for Windows (from Linux/WSL2, requires mingw-w64)
|
|
25
|
+
rustup target add x86_64-pc-windows-gnu
|
|
26
|
+
cargo build --release --target x86_64-pc-windows-gnu
|
|
27
|
+
|
|
28
|
+
# Benchmark (requires Python 3.8+, matplotlib optional)
|
|
29
|
+
python bench/bench_rsplitcap.py --data-dir <path_to_pcaps> \
|
|
30
|
+
--rsplitcap ./target/release/rsplitcap \
|
|
31
|
+
--splitcap ./path/to/SplitCap.exe \
|
|
32
|
+
--output ./bench_report
|
|
33
|
+
```
|
|
34
|
+
|
|
35
|
+
## Benchmark Script (`bench/bench_rsplitcap.py`)
|
|
36
|
+
- **Shared tests**: 6 grouping strategies × up to 3 files, IP/port filter tests, L7 output — RSplitCap vs SplitCap
|
|
37
|
+
- **RSplitCap-unique tests**: BSSID strategy, archive create/extract, pipeline vs legacy, mmap vs no-mmap
|
|
38
|
+
- **Metrics**: wall time, CPU time, peak RSS (via `/usr/bin/time -v`), output file count/size, packet count
|
|
39
|
+
- **Output**: `report.html` (interactive charts with matplotlib) + `report.json` (machine-readable)
|
|
40
|
+
- **Key options**: `--quick` (1+1 runs), `--max-files N`, `--no-splitcap`, `--skip-*`, `--timeout N`
|
|
41
|
+
- On WSL2, SplitCap paths are auto-converted via `wslpath -w`; wine fallback if no WSL interop
|
|
42
|
+
|
|
43
|
+
## Architecture (layered)
|
|
44
|
+
1. **CLI** (`src/cli.rs`) — clap-based, normalizes SplitCap multi-char flags (`-ip`→`--ip`)
|
|
45
|
+
2. **Parser** (`src/parser/`) — `CaptureReader` trait, PCAP + PCAP-NG (SHB/IDB/EPB/SPB)
|
|
46
|
+
3. **Packet** (`src/packet.rs`) — unified Packet struct, Ethernet/IPv4/IPv6 parsing, WiFi 802.11 + radiotap
|
|
47
|
+
4. **Filter** (`src/filter.rs`) — IP + port AND-logic whitelist
|
|
48
|
+
5. **Flow Manager** (`src/flow/`) — 9 grouping strategies, LRU eviction via generation counter
|
|
49
|
+
6. **Output** — Split mode (per-flow PCAP/L7), Archive mode (`.rsplit` with secondary indexes)
|
|
50
|
+
|
|
51
|
+
## Key Design Decisions
|
|
52
|
+
- Input files are mmap'd via `memmap2` + `Bytes::from_owner` — zero-copy, OS-managed paging for >memory files.
|
|
53
|
+
- Split mode uses pipelined streaming: parser in background thread (crossbeam bounded channel) → classify + write in main thread via `SplitWriter`. Packets written as they arrive, not accumulated.
|
|
54
|
+
- Archive uses 2-phase write: sequential packets → post-positioned indexes (Delta+LEB128).
|
|
55
|
+
- FlowEntry is fixed 96 bytes — enables O(1) random access in Flow Table.
|
|
56
|
+
- IP addresses stored as 16 bytes (IPv4-mapped IPv6) for uniform handling.
|
|
57
|
+
- `-s seconds N` and `-s packets N` use the `-s` flag with a sub-argument — manually parsed.
|
|
58
|
+
- Secondary indexes: sorted key→[flow_id] maps for IP/port/protocol in archive (binary-searchable).
|
|
59
|
+
- All file writes use atomic temp-file + rename pattern to prevent corruption on interrupt.
|
|
60
|
+
- `--no-pipeline` flag falls back to legacy accumulate-then-write mode.
|
|
61
|
+
|
|
62
|
+
## Module Map
|
|
63
|
+
```
|
|
64
|
+
main.rs ── cli ── filter ── flow (strategy + manager) ── output (split + mod)
|
|
65
|
+
── parser (pcap + pcapng) ── archive (writer + reader + format)
|
|
66
|
+
── packet (ethernet, ipv4, ipv6 ext hdrs, wifi 802.11 + radiotap)
|
|
67
|
+
── python (PyO3 bindings: Archive, Flow, Packet, read_flows, create_archive, split, pipe_archive)
|
|
68
|
+
```
|
|
69
|
+
|
|
70
|
+
## Python Bindings (`src/python/mod.rs`)
|
|
71
|
+
|
|
72
|
+
PyO3 feature (`--features python`, cfg-gated):
|
|
73
|
+
- **`Archive`** (`ArchivePy`): wraps `ArchiveReader`, open .rsplit files, `flows()`, `get_flow(id)`, `find_by_*`
|
|
74
|
+
- **`Flow`** (`FlowPy`): holds `Arc<ArchiveReader>` + `FlowEntry`, metadata + `packets()` → `Packet` list
|
|
75
|
+
- **`Packet`** (`PacketPy`): `ts_sec`, `ts_usec`, `ts`, `length`, `orig_len`, `data` (bytes)
|
|
76
|
+
- **`read_flows(path)`**: pcap → temp .rsplit → Archive (temp file auto-cleaned on Drop)
|
|
77
|
+
- **`create_archive(input, output, strategy, ip_filters, port_filters)`**: pcap → .rsplit
|
|
78
|
+
- **`split(input, output_dir, strategy, ip_filters, port_filters, output_type)`**: split mode
|
|
79
|
+
- **`pipe_archive(path)`** → `list[bytes]`: each flow as standalone pcap
|
|
80
|
+
|
|
81
|
+
Tracing writes to stderr (`with_writer(io::stderr)`) so stdout stays clean for `--pipe` mode.
|
|
82
|
+
Build requires `libpython3.12.so` symlink (dev package) or `RUSTFLAGS="-L <dir>"`.
|
|
83
|
+
|
|
84
|
+
## Known Gaps
|
|
85
|
+
- LRU eviction is O(n) per eviction (fast in practice for default 10k max_sessions)
|
|
86
|
+
- PCAP-NG: single interface only, no name resolution blocks
|
|
87
|
+
- No WiFi data-frame IP parsing over LLC/SNAP (BSSID/mac extraction works; five-tuple returns None)
|
|
88
|
+
- **Windows performance**: Cross-compiled `x86_64-pc-windows-gnu` binary is ~8× slower than Linux native. MSVC native build should restore full performance but has not been benchmarked yet. SplitCap runs 7.6× faster natively on Windows than via WSL2/Wine.
|
|
89
|
+
|
|
90
|
+
## Test Coverage
|
|
91
|
+
- `tests/integration.rs`: 6 tests (split, filter, L7, PCAP-NG, archive roundtrip, list-flows)
|
|
92
|
+
- `tests/fuzz_parsers.rs`: 10 tests (random data PCAP/PCAP-NG parsers, malformed packets, LEB128 codec, FlowEntry roundtrip, corrupt archive rejection, WiFi radiotap/management BSSID, WiFi malformed, IPv6 extension header chain)
|