PyPI - network-core - Versions diffs - 0.2.2__tar.gz → 0.3.1__tar.gz - Mend

network-core 0.2.2tar.gz → 0.3.1tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (33) hide show

{network_core-0.2.2 → network_core-0.3.1}/PKG-INFO RENAMED Viewed

@@ -1,6 +1,6 @@
 Metadata-Version: 2.4
 Name: network_core
-Version: 0.2.2
+Version: 0.3.1
 Summary: Core networking utilities and data models
 Author: Your Name
 Requires-Python: >=3.9
@@ -13,3 +13,4 @@ Requires-Dist: scapy
 Requires-Dist: matplotlib
 Requires-Dist: brotli
 Requires-Dist: blackboxprotobuf
+Requires-Dist: cryptography

network_core-0.3.1/network_core/__init__.py ADDED Viewed

@@ -0,0 +1,18 @@
+from .conn import Connection
+from .connOps import (
+    ConnStats,
+    filterConnections,
+    combine_connections,
+    printConns,
+    getPacketsInInterval,
+    normalizePacketStream,
+)
+from .dataModels import PacketType, TransPortType, FiveTuple, PacketInfo, HttpUnit
+from .analysis import (
+    classify_http_version,
+    http_version_debug,
+    detect_requests,
+    burst_analysis,
+    conn_features,
+    nan_to_none,
+)

network_core-0.3.1/network_core/analysis.py ADDED Viewed

@@ -0,0 +1,649 @@
+"""Connection and packet stream analysis functions."""
+from __future__ import annotations
+import math
+import numpy as np
+from .dataModels import PacketInfo
+# ---------------------------------------------------------------------------
+# Internal statistical helpers
+# ---------------------------------------------------------------------------
+def _pct(arr: np.ndarray, p: float) -> float:
+    return float(np.percentile(arr, p)) if len(arr) > 0 else float("nan")
+def _safe_mean(arr: np.ndarray) -> float:
+    return float(arr.mean()) if len(arr) > 0 else float("nan")
+def _safe_median(arr: np.ndarray) -> float:
+    return float(np.median(arr)) if len(arr) > 0 else float("nan")
+def _safe_std(arr: np.ndarray) -> float:
+    return float(arr.std()) if len(arr) > 0 else float("nan")
+def _entropy(arr: np.ndarray, bins: int = 50) -> float:
+    """Shannon entropy of a distribution in bits."""
+    if len(arr) < 2:
+        return float("nan")
+    counts, _ = np.histogram(arr, bins=bins)
+    counts = counts[counts > 0]
+    p = counts / counts.sum()
+    return float(-np.sum(p * np.log2(p)))
+def _skewness(arr: np.ndarray) -> float:
+    """Fisher skewness (0 = symmetric, >0 = right tail)."""
+    if len(arr) < 3:
+        return float("nan")
+    std = arr.std()
+    if std == 0:
+        return 0.0
+    return float(((arr - arr.mean()) ** 3).mean() / std ** 3)
+def _kurtosis(arr: np.ndarray) -> float:
+    """Excess kurtosis (0 = normal, >0 = heavy tails)."""
+    if len(arr) < 4:
+        return float("nan")
+    std = arr.std()
+    if std == 0:
+        return 0.0
+    return float(((arr - arr.mean()) ** 4).mean() / std ** 4 - 3)
+def _autocorr(arr: np.ndarray, lag: int = 1) -> float:
+    """Pearson autocorrelation at a given lag."""
+    if len(arr) <= lag + 1:
+        return float("nan")
+    c = np.corrcoef(arr[:-lag], arr[lag:])
+    v = c[0, 1]
+    return float(v) if not math.isnan(v) else float("nan")
+def _burst_stats(ps: list[PacketInfo], direction: int, gap_ms: float = 500) -> dict:
+    """Burst statistics for one traffic direction.
+    A burst is a run of data-carrying packets (>100 B) with no gap > gap_ms.
+    Returns:
+      n_bursts             — number of distinct bursts
+      burst_mean_bytes     — mean bytes per burst
+      burst_mean_pkts      — mean packets per burst
+      burst_mean_dur_ms    — mean burst duration (ms)
+      interburst_mean_ms   — mean gap between burst end and next burst start (ms)
+      interburst_cv        — coefficient of variation of inter-burst gaps
+      interburst_p95_ms    — 95th-percentile inter-burst gap (ms)
+    """
+    pkts = [p for p in ps if p.direction == direction and p.length > 100]
+    nan_result = {
+        "n_bursts": 0,
+        "burst_mean_bytes": float("nan"),
+        "burst_mean_pkts": float("nan"),
+        "burst_mean_dur_ms": float("nan"),
+        "interburst_mean_ms": float("nan"),
+        "interburst_cv": float("nan"),
+        "interburst_p95_ms": float("nan"),
+    }
+    if len(pkts) < 2:
+        return nan_result
+    bursts: list[list[PacketInfo]] = []
+    cur = [pkts[0]]
+    for i in range(1, len(pkts)):
+        if (pkts[i].timestamp - pkts[i - 1].timestamp) * 1000 > gap_ms:
+            bursts.append(cur)
+            cur = [pkts[i]]
+        else:
+            cur.append(pkts[i])
+    bursts.append(cur)
+    burst_bytes = np.array([sum(p.length for p in b) for b in bursts], dtype=np.float64)
+    burst_pkts  = np.array([len(b)                  for b in bursts], dtype=np.float64)
+    burst_durs  = np.array(
+        [(b[-1].timestamp - b[0].timestamp) * 1000 for b in bursts], dtype=np.float64
+    )
+    if len(bursts) > 1:
+        gaps = np.array(
+            [(bursts[i + 1][0].timestamp - bursts[i][-1].timestamp) * 1000
+             for i in range(len(bursts) - 1)],
+            dtype=np.float64,
+        )
+        ib_mean  = float(gaps.mean())
+        ib_cv    = float(gaps.std() / gaps.mean()) if gaps.mean() > 0 else float("nan")
+        ib_p95   = float(np.percentile(gaps, 95))
+    else:
+        ib_mean = ib_cv = ib_p95 = float("nan")
+    return {
+        "n_bursts":           len(bursts),
+        "burst_mean_bytes":   float(burst_bytes.mean()),
+        "burst_mean_pkts":    float(burst_pkts.mean()),
+        "burst_mean_dur_ms":  float(burst_durs.mean()),
+        "interburst_mean_ms": ib_mean,
+        "interburst_cv":      ib_cv,
+        "interburst_p95_ms":  ib_p95,
+    }
+def _classify_pattern(
+    n_pkts: int,
+    up_frac: float,
+    bytes_per_sec: float,
+    iat_cv: float,
+    iat_autocorr_lag1: float,
+    up_n_bursts: int,
+    dn_pkt_p95: float,
+    n_requests: int,
+    http_ver: str,
+) -> str:
+    """Classify the dominant traffic pattern into one of six labels.
+    Labels:
+      idle             — too few packets to characterise (<10)
+      bulk_upload      — upload-dominated (>75% of bytes)
+      bulk_download    — download-dominated, large packets, high throughput
+      streaming        — sustained download with regular inter-arrival (LLM tokens,
+                         video, audio); low IAT variance + positive autocorrelation
+      interactive      — alternating request/response turns (LLM chat, browsing,
+                         API calls); multiple upload bursts or detected requests
+      request_response — single upload burst followed by download (one-shot API call)
+      mixed            — does not fit any of the above cleanly
+    """
+    def _ok(v: float) -> bool:
+        return v is not None and not math.isnan(v)
+    if n_pkts < 10:
+        return "idle"
+    if up_frac > 0.75:
+        return "bulk_upload"
+    if up_frac < 0.15 and _ok(dn_pkt_p95) and dn_pkt_p95 > 1200 and _ok(bytes_per_sec) and bytes_per_sec > 50_000:
+        return "bulk_download"
+    is_periodic = (
+        _ok(iat_cv) and iat_cv < 1.2
+        and _ok(iat_autocorr_lag1) and iat_autocorr_lag1 > 0.25
+        and up_frac < 0.30
+    )
+    if is_periodic:
+        return "streaming"
+    if n_requests >= 2 or up_n_bursts >= 2:
+        return "interactive"
+    if up_n_bursts == 1 or n_requests == 1:
+        return "request_response"
+    return "mixed"
+# ---------------------------------------------------------------------------
+# Public functions
+# ---------------------------------------------------------------------------
+def classify_http_version(ps: list[PacketInfo], alpn: str = "nan") -> str:
+    """Infer HTTP version, preferring ALPN ground truth over a packet heuristic.
+    If ALPN is available (not 'nan'), it is authoritative:
+      'h2'       → 'HTTP/2'
+      'http/1.1' → 'HTTP/1.1'
+    Fallback heuristic (used when ALPN is 'nan', i.e. session-reused socket):
+      Median of small (<200 B) upload packets ≤ 72 B → HTTP/1.1  (pure TCP ACKs)
+      Median > 72 B → HTTP/2  (WINDOW_UPDATE/PING frames ~79-87 B)
+    Returns 'HTTP/1.1', 'HTTP/2', or 'unknown'.
+    """
+    if alpn and alpn not in ("nan", ""):
+        if alpn == "h2":
+            return "HTTP/2"
+        if alpn in ("http/1.1", "http/1.0"):
+            return "HTTP/1.1"
+    small = np.array(
+        [p.length for p in ps if p.direction == 0 and p.length < 200],
+        dtype=np.float32,
+    )
+    if len(small) < 3:
+        return "unknown"
+    return "HTTP/1.1" if np.median(small) <= 72 else "HTTP/2"
+def http_version_debug(ps: list[PacketInfo], alpn: str = "nan") -> dict:
+    """Return a detailed breakdown of the HTTP version classification.
+    Useful for auditing flows where the heuristic result seems wrong.
+    """
+    small = [p.length for p in ps if p.direction == 0 and p.length < 200]
+    all_up = [p.length for p in ps if p.direction == 0]
+    verdict = classify_http_version(ps, alpn)
+    source = "ALPN" if (alpn and alpn not in ("nan", "")) else "heuristic"
+    buckets = {
+        "<54":     sum(1 for l in small if l < 54),
+        "54-66":   sum(1 for l in small if 54 <= l <= 66),
+        "67-80":   sum(1 for l in small if 67 <= l <= 80),
+        "81-100":  sum(1 for l in small if 81 <= l <= 100),
+        "101-199": sum(1 for l in small if 101 <= l <= 199),
+    }
+    med = float(np.median(small)) if small else None
+    if source == "ALPN":
+        confidence = "high (ALPN)"
+    elif med is None:
+        confidence = "unknown (no small packets)"
+    elif abs(med - 72) > 12:
+        confidence = "high (heuristic)"
+    else:
+        confidence = "low (heuristic, median near threshold)"
+    return {
+        "verdict": verdict,
+        "source": source,
+        "confidence": confidence,
+        "alpn": alpn,
+        "small_up_count": len(small),
+        "total_up_count": len(all_up),
+        "small_up_median": med,
+        "small_up_p25": float(np.percentile(small, 25)) if small else None,
+        "small_up_p75": float(np.percentile(small, 75)) if small else None,
+        "small_up_min": float(min(small)) if small else None,
+        "small_up_max": float(max(small)) if small else None,
+        "histogram": buckets,
+        "threshold": 72,
+        "note": "ACKs cluster at ~66 B; HTTP/2 WINDOW_UPDATE/PING at ~79-87 B",
+    }
+def detect_requests(
+    ps: list[PacketInfo],
+    gap_ms: float = 800,
+    min_bytes: int = 5000,
+    min_pkts: int = 5,
+) -> list[dict]:
+    """Split the upload stream on idle gaps to find HTTP/1.1 request boundaries.
+    Only data-carrying packets (>100 B) are used for burst detection to avoid
+    pure TCP ACKs extending burst windows across server-sent heartbeats.
+    Returns one dict per detected request:
+      req_kb, req_pkts, req_dur_ms — upload burst stats
+      think_ms                     — server processing time (first dn packet after burst)
+      res_kb, res_pkts, res_dur_ms — downstream response stats
+      client_gap_ms                — gap before next request (None if last)
+      t_start, t_end               — burst timestamps
+      turn_idx                     — 0-based request index
+    """
+    data_ups = [p for p in ps if p.direction == 0 and p.length > 100]
+    if len(data_ups) < 2:
+        return []
+    bursts: list[list[PacketInfo]] = []
+    cur = [data_ups[0]]
+    for i in range(1, len(data_ups)):
+        if (data_ups[i].timestamp - data_ups[i - 1].timestamp) * 1000 > gap_ms:
+            bursts.append(cur)
+            cur = [data_ups[i]]
+        else:
+            cur.append(data_ups[i])
+    bursts.append(cur)
+    real = [
+        b for b in bursts
+        if sum(p.length for p in b) >= min_bytes and len(b) >= min_pkts
+    ]
+    if not real:
+        return []
+    dns = [p for p in ps if p.direction == 1]
+    dn_ts = np.array([p.timestamp for p in dns], dtype=np.float64)
+    dn_lens = np.array([p.length for p in dns], dtype=np.float32)
+    last_ts = ps[-1].timestamp
+    rows = []
+    for idx, b in enumerate(real):
+        burst_end = b[-1].timestamp
+        next_start = real[idx + 1][0].timestamp if idx + 1 < len(real) else None
+        res_win_end = next_start if next_start else last_ts + 1
+        dn_s = int(np.searchsorted(dn_ts, burst_end, side="right"))
+        dn_e = int(np.searchsorted(dn_ts, res_win_end, side="left"))
+        think_ms = float((dn_ts[dn_s] - burst_end) * 1000) if dn_s < len(dn_ts) else None
+        res_lens = dn_lens[dn_s:dn_e]
+        res_kb = float(res_lens.sum()) / 1024
+        n_res = dn_e - dn_s
+        if n_res > 0:
+            res_dur_ms = float((dn_ts[dn_e - 1] - dn_ts[dn_s]) * 1000)
+            client_gap_ms = float((next_start - dn_ts[dn_e - 1]) * 1000) if next_start else None
+        else:
+            res_dur_ms = client_gap_ms = None
+        rows.append({
+            "req_kb":        sum(p.length for p in b) / 1024,
+            "req_pkts":      len(b),
+            "req_dur_ms":    (b[-1].timestamp - b[0].timestamp) * 1000,
+            "think_ms":      think_ms,
+            "res_kb":        res_kb,
+            "res_pkts":      n_res,
+            "res_dur_ms":    res_dur_ms,
+            "client_gap_ms": client_gap_ms,
+            "t_start":       b[0].timestamp,
+            "t_end":         burst_end,
+            "turn_idx":      idx,
+        })
+    return rows
+def burst_analysis(
+    ps: list[PacketInfo],
+    gap_ms: float = 500,
+) -> dict:
+    """General burst statistics for both traffic directions.
+    Unlike detect_requests (which is HTTP/1.1-specific), this works on any
+    protocol by splitting each direction's data stream on idle gaps.
+    Returns upload and download burst stats as nested dicts under 'up' and 'dn'.
+    """
+    return {
+        "up": _burst_stats(ps, direction=0, gap_ms=gap_ms),
+        "dn": _burst_stats(ps, direction=1, gap_ms=gap_ms),
+    }
+def conn_features(
+    ps: list[PacketInfo],
+    http_ver: str,
+    requests: list[dict],
+) -> dict | None:
+    """Extract a comprehensive statistical feature set from a connection's packet stream.
+    Returns None if the connection has fewer than 5 packets.
+    Fields that are undefined for a given flow appear as NaN (use nan_to_none before
+    serialising to JSON).
+    Output is organised into sections (all in a flat dict):
+    Flow summary
+      http_ver        — 'HTTP/1.1', 'HTTP/2', or 'unknown'
+      n_pkts          — total packet count
+      flow_dur_s      — connection duration in seconds
+      bytes_up/dn     — total bytes per direction
+      up_frac         — fraction of bytes that are upload (0-1)
+    Throughput
+      bytes_per_sec, bytes_per_sec_up, bytes_per_sec_dn
+      pps, pps_up, pps_dn
+    Packet size — all packets
+      pkt_mean/median/std/min/max
+      pkt_p25/p75/p95/p99
+      pkt_skew        — Fisher skewness (>0 = right tail, i.e. many small + few large)
+      pkt_kurt        — excess kurtosis (>0 = heavier tails than normal)
+      pkt_entropy     — Shannon entropy of the size histogram (bits); low = repetitive sizes
+      frac_ack        — fraction of packets <100 B (pure TCP ACKs / control frames)
+      frac_medium     — fraction 100-999 B
+      frac_large      — fraction ≥1000 B
+      frac_mtu        — fraction ≥1400 B (near MTU, data-carrying)
+    Packet size — per direction
+      up_pkt_mean/median/std/p95
+      dn_pkt_mean/median/std/p95
+    Inter-arrival time (IAT) — all packets (milliseconds)
+      iat_mean/median/std/min/max_ms
+      iat_p25/p75/p95/p99_ms
+      iat_cv          — coefficient of variation (std/mean); high = bursty, low = smooth
+      iat_entropy     — entropy of the IAT histogram; low = periodic, high = irregular
+      iat_autocorr_lag1/lag2 — autocorrelation at lag 1 and 2; >0.3 suggests periodicity
+    IAT — per direction
+      up_iat_mean/median/std_ms
+      dn_iat_mean/median/std_ms
+    Upload burst analysis  (gap_ms=500 threshold)
+      up_n_bursts
+      up_burst_mean_bytes/pkts/dur_ms
+      up_interburst_mean_ms, up_interburst_cv, up_interburst_p95_ms
+    Timing landmarks
+      ttfb_ms         — time from first upload data packet to first download data packet (ms)
+      idle_up_med     — median size of small (<200 B) upload packets; used for HTTP version
+                        heuristic (~66 B = ACK → HTTP/1.1, ~82 B = H2 frame → HTTP/2)
+    HTTP/1.1 request-level features  (NaN when <2 requests detected)
+      n_requests
+      mean_think_ms   — mean server processing time across requests
+      mean_res_kb     — mean response size per request (KB)
+      req_slope       — linear slope of request sizes over turns (KB/turn)
+      req_growth      — ratio of last to first request size
+      req_cv          — coefficient of variation of request sizes
+      mono_frac       — fraction of consecutive turns where request grew
+      delta_res_corr  — correlation between Δrequest and previous response size
+      delta_res_ratio — mean ratio of Δrequest to previous response
+    Traffic pattern  (derived label)
+      traffic_pattern — one of:
+        'idle'             <10 packets
+        'bulk_upload'      >75% bytes are upload
+        'bulk_download'    <15% bytes upload, large packets, high throughput
+        'streaming'        sustained download with regular IAT (LLM tokens, video, audio)
+        'interactive'      alternating request/response turns (LLM chat, API, browsing)
+        'request_response' single upload burst + download (one-shot API call)
+        'mixed'            does not fit cleanly
+    """
+    if len(ps) < 5:
+        return None
+    lens = np.array([p.length    for p in ps], dtype=np.float32)
+    dirs = np.array([p.direction for p in ps], dtype=np.int8)
+    ts   = np.array([p.timestamp for p in ps], dtype=np.float64)
+    iats = np.diff(ts) * 1000  # ms
+    up_m   = dirs == 0
+    dn_m   = ~up_m
+    up_len = lens[up_m]
+    dn_len = lens[dn_m]
+    bytes_up = float(up_len.sum())
+    bytes_dn = float(dn_len.sum())
+    total    = bytes_up + bytes_dn
+    if total == 0 or len(iats) == 0:
+        return None
+    dur    = float(ts[-1] - ts[0])
+    n_pkts = len(ps)
+    # --- throughput ---
+    bps    = total    / dur if dur > 0 else float("nan")
+    bps_up = bytes_up / dur if dur > 0 else float("nan")
+    bps_dn = bytes_dn / dur if dur > 0 else float("nan")
+    pps    = n_pkts         / dur if dur > 0 else float("nan")
+    pps_up = int(up_m.sum()) / dur if dur > 0 else float("nan")
+    pps_dn = int(dn_m.sum()) / dur if dur > 0 else float("nan")
+    # --- directional IATs ---
+    up_ts   = ts[up_m]
+    dn_ts_a = ts[dn_m]
+    up_iats = np.diff(up_ts)   * 1000 if len(up_ts)   > 1 else np.array([], dtype=np.float64)
+    dn_iats = np.diff(dn_ts_a) * 1000 if len(dn_ts_a) > 1 else np.array([], dtype=np.float64)
+    iat_cv = float(iats.std() / iats.mean()) if iats.mean() > 0 else float("nan")
+    ac1    = _autocorr(iats, 1)
+    ac2    = _autocorr(iats, 2)
+    # --- TTFB ---
+    up_data = [p for p in ps if p.direction == 0 and p.length > 100]
+    dn_data = [p for p in ps if p.direction == 1 and p.length > 100]
+    if up_data and dn_data and dn_data[0].timestamp > up_data[0].timestamp:
+        ttfb_ms = float((dn_data[0].timestamp - up_data[0].timestamp) * 1000)
+    else:
+        ttfb_ms = float("nan")
+    # --- upload burst stats ---
+    ub = _burst_stats(ps, direction=0, gap_ms=500)
+    # --- HTTP/1.1 request features ---
+    req_kbs = [r["req_kb"]   for r in requests]
+    res_kbs = [r["res_kb"]   for r in requests]
+    think_times = [r["think_ms"] for r in requests if r.get("think_ms") is not None]
+    if len(req_kbs) >= 2:
+        turns       = np.arange(len(req_kbs), dtype=float)
+        req_slope   = float(np.polyfit(turns, req_kbs, 1)[0])
+        req_growth  = float(req_kbs[-1] / max(req_kbs[0], 0.01))
+        req_cv      = float(np.std(req_kbs) / max(np.mean(req_kbs), 0.01))
+        mono_frac   = float(
+            sum(req_kbs[i + 1] > req_kbs[i] for i in range(len(req_kbs) - 1))
+            / (len(req_kbs) - 1)
+        )
+        delta_reqs  = [req_kbs[i + 1] - req_kbs[i] for i in range(len(req_kbs) - 1)]
+        prev_res    = [res_kbs[i]                   for i in range(len(res_kbs) - 1)]
+        valid       = [(d, r) for d, r in zip(delta_reqs, prev_res) if r > 0]
+        if len(valid) >= 2:
+            d_arr, r_arr    = np.array([v[0] for v in valid]), np.array([v[1] for v in valid])
+            delta_res_corr  = float(np.corrcoef(d_arr, r_arr)[0, 1])
+            delta_res_ratio = float(np.mean(d_arr / r_arr))
+        else:
+            delta_res_corr = delta_res_ratio = float("nan")
+    else:
+        req_slope = req_growth = req_cv = float("nan")
+        mono_frac = delta_res_corr = delta_res_ratio = float("nan")
+    small_ups   = up_len[up_len < 200]
+    idle_up_med = float(np.median(small_ups)) if len(small_ups) >= 3 else float("nan")
+    # --- traffic pattern ---
+    pattern = _classify_pattern(
+        n_pkts=n_pkts,
+        up_frac=bytes_up / total,
+        bytes_per_sec=bps,
+        iat_cv=iat_cv,
+        iat_autocorr_lag1=ac1,
+        up_n_bursts=ub["n_bursts"],
+        dn_pkt_p95=_pct(dn_len, 95),
+        n_requests=len(requests),
+        http_ver=http_ver,
+    )
+    return {
+        # flow summary
+        "http_ver":             http_ver,
+        "n_pkts":               n_pkts,
+        "flow_dur_s":           dur,
+        "bytes_up":             bytes_up,
+        "bytes_dn":             bytes_dn,
+        "up_frac":              bytes_up / total,
+        # throughput
+        "bytes_per_sec":        bps,
+        "bytes_per_sec_up":     bps_up,
+        "bytes_per_sec_dn":     bps_dn,
+        "pps":                  pps,
+        "pps_up":               pps_up,
+        "pps_dn":               pps_dn,
+        # packet size — all
+        "pkt_mean":             float(lens.mean()),
+        "pkt_median":           float(np.median(lens)),
+        "pkt_std":              float(lens.std()),
+        "pkt_min":              float(lens.min()),
+        "pkt_max":              float(lens.max()),
+        "pkt_p25":              _pct(lens, 25),
+        "pkt_p75":              _pct(lens, 75),
+        "pkt_p95":              _pct(lens, 95),
+        "pkt_p99":              _pct(lens, 99),
+        "pkt_skew":             _skewness(lens),
+        "pkt_kurt":             _kurtosis(lens),
+        "pkt_entropy":          _entropy(lens),
+        "frac_ack":             float(np.mean(lens < 100)),
+        "frac_medium":          float(np.mean((lens >= 100) & (lens < 1000))),
+        "frac_large":           float(np.mean(lens >= 1000)),
+        "frac_mtu":             float(np.mean(lens >= 1400)),
+        # packet size — upload
+        "up_pkt_mean":          _safe_mean(up_len),
+        "up_pkt_median":        _safe_median(up_len),
+        "up_pkt_std":           _safe_std(up_len),
+        "up_pkt_p95":           _pct(up_len, 95),
+        # packet size — download
+        "dn_pkt_mean":          _safe_mean(dn_len),
+        "dn_pkt_median":        _safe_median(dn_len),
+        "dn_pkt_std":           _safe_std(dn_len),
+        "dn_pkt_p95":           _pct(dn_len, 95),
+        # IAT — all (ms)
+        "iat_mean_ms":          float(iats.mean()),
+        "iat_median_ms":        float(np.median(iats)),
+        "iat_std_ms":           float(iats.std()),
+        "iat_min_ms":           float(iats.min()),
+        "iat_max_ms":           float(iats.max()),
+        "iat_p25_ms":           _pct(iats, 25),
+        "iat_p75_ms":           _pct(iats, 75),
+        "iat_p95_ms":           _pct(iats, 95),
+        "iat_p99_ms":           _pct(iats, 99),
+        "iat_cv":               iat_cv,
+        "iat_entropy":          _entropy(iats),
+        "iat_autocorr_lag1":    ac1,
+        "iat_autocorr_lag2":    ac2,
+        # IAT — upload
+        "up_iat_mean_ms":       _safe_mean(up_iats),
+        "up_iat_median_ms":     _safe_median(up_iats),
+        "up_iat_std_ms":        _safe_std(up_iats),
+        # IAT — download
+        "dn_iat_mean_ms":       _safe_mean(dn_iats),
+        "dn_iat_median_ms":     _safe_median(dn_iats),
+        "dn_iat_std_ms":        _safe_std(dn_iats),
+        # upload bursts
+        "up_n_bursts":          ub["n_bursts"],
+        "up_burst_mean_bytes":  ub["burst_mean_bytes"],
+        "up_burst_mean_pkts":   ub["burst_mean_pkts"],
+        "up_burst_mean_dur_ms": ub["burst_mean_dur_ms"],
+        "up_interburst_mean_ms":ub["interburst_mean_ms"],
+        "up_interburst_cv":     ub["interburst_cv"],
+        "up_interburst_p95_ms": ub["interburst_p95_ms"],
+        # timing landmarks
+        "ttfb_ms":              ttfb_ms,
+        "idle_up_med":          idle_up_med,
+        # HTTP/1.1 request features
+        "n_requests":           len(requests),
+        "mean_think_ms":        float(np.mean(think_times)) if think_times else float("nan"),
+        "mean_res_kb":          float(np.mean(res_kbs))     if res_kbs    else float("nan"),
+        "req_slope":            req_slope,
+        "req_growth":           req_growth,
+        "req_cv":               req_cv,
+        "mono_frac":            mono_frac,
+        "delta_res_corr":       delta_res_corr,
+        "delta_res_ratio":      delta_res_ratio,
+        # derived pattern
+        "traffic_pattern":      pattern,
+    }
+def nan_to_none(obj: object) -> object:
+    """Recursively replace float NaN with None for JSON serialisation."""
+    if isinstance(obj, dict):
+        return {k: nan_to_none(v) for k, v in obj.items()}
+    if isinstance(obj, list):
+        return [nan_to_none(v) for v in obj]
+    if isinstance(obj, float) and math.isnan(obj):
+        return None
+    return obj

{network_core-0.2.2 → network_core-0.3.1}/network_core/conn.py RENAMED Viewed

@@ -47,7 +47,30 @@ class Connection:
         self.packet_stream: List[PacketInfo] = []
         self.other_info = other_info
         self.__sni = None
+        self.__alpn = None
         self.is_quic = False
+        self._stats_dirty = True
+        self._up_bytes_c = 0
+        self._down_bytes_c = 0
+        self._up_packets_c = 0
+        self._down_packets_c = 0
+    def _build_stats(self):
+        if not self._stats_dirty:
+            return
+        up_b = dn_b = up_p = dn_p = 0
+        for p in self.packet_stream:
+            if p.direction == 0:
+                up_b += p.length
+                up_p += 1
+            else:
+                dn_b += p.length
+                dn_p += 1
+        self._up_bytes_c = up_b
+        self._down_bytes_c = dn_b
+        self._up_packets_c = up_p
+        self._down_packets_c = dn_p
+        self._stats_dirty = False
     @staticmethod
     def getTransPortType(packet):
@@ -91,6 +114,7 @@ class Connection:
             assert False, "Packet Type is Unknown"
         self.connection_type.add(packet.packet_type)
         self.packet_stream.append(packet)
+        self._stats_dirty = True
     def __len__(self):
         return len(self.packet_stream)
@@ -101,6 +125,12 @@ class Connection:
     def getSNI(self):
         return self.__sni
+    def setALPN(self, alpn: str):
+        self.__alpn = alpn
+    def getALPN(self) -> str | None:
+        return self.__alpn
     def getPacketsOfTypes(self, packet_types: set[PacketType]) -> List[PacketInfo]:
         return list(filter(lambda x: x.packet_type in packet_types, self.packet_stream))
@@ -149,35 +179,23 @@ class Connection:
     @property
     def down_bytes(self):
-        total = 0
-        for p in self.packet_stream:
-            if p.direction == 1:
-                total += p.length
-        return total
+        self._build_stats()
+        return self._down_bytes_c
     @property
     def up_bytes(self):
-        total = 0
-        for p in self.packet_stream:
-            if p.direction == 0:
-                total += p.length
-        return total
+        self._build_stats()
+        return self._up_bytes_c
     @property
     def down_packets(self):
-        total = 0
-        for p in self.packet_stream:
-            if p.direction == 1:
-                total += 1
-        return total
+        self._build_stats()
+        return self._down_packets_c
     @property
     def up_packets(self):
-        total = 0
-        for p in self.packet_stream:
-            if p.direction == 0:
-                total += 1
-        return total
+        self._build_stats()
+        return self._up_packets_c
     @property
     def start_timestamp(self):

network_core-0.3.1/network_core/utils/dt.py ADDED Viewed

@@ -0,0 +1,60 @@
+import calendar
+import datetime, pytz
+def convertUNIXToDT(timestamp: float, timezone="Australia/Sydney") -> datetime.datetime:
+    australian_timezone = pytz.timezone(timezone)
+    utc_dt = datetime.datetime.fromtimestamp(timestamp, tz=pytz.utc)
+    return utc_dt.astimezone(australian_timezone)
+def convertUNIXToHumanReadable(timestamp: float, timezone="Australia/Sydney") -> str:
+    dt = convertUNIXToDT(timestamp=timestamp, timezone=timezone)
+    ms = dt.microsecond // 1000  # get milliseconds, doing this so it isn't 0
+    return dt.strftime("%Y-%m-%d %H:%M:%S") + f".{ms}"
+def getDatetimeFromTime(time: datetime.time) -> datetime.datetime:
+    return datetime.datetime.combine(datetime.datetime.today(), time=time)
+def addBufferToTime(time: datetime.time, buffer_in_seconds: float) -> datetime.time:
+    delta = datetime.timedelta(seconds=buffer_in_seconds)
+    return (getDatetimeFromTime(time=time) + delta).time()
+def fast_strptime(s: str) -> float:
+    """Parse '2026-01-11 12:25:31.689768 +0530 IST' to a UNIX timestamp float.
+    Avoids datetime.strptime (slow format-string regex) by parsing integers
+    directly. ~5-10x faster when called per-packet on large CSV files.
+    """
+    parts = s.split()
+    # parts[0]=date  parts[1]=time  parts[2]=offset  parts[3]=tz_name (optional)
+    dp = parts[0]   # "2026-01-11"
+    tp = parts[1]   # "12:25:31.689768" or "12:25:31"
+    op = parts[2] if len(parts) > 2 else "+0000"
+    y  = int(dp[0:4])
+    mo = int(dp[5:7])
+    d  = int(dp[8:10])
+    dot = tp.find(".")
+    if dot >= 0:
+        hms = tp[:dot]
+        frac_str = tp[dot + 1 : dot + 7]           # max 6 digits (microseconds)
+        microseconds = int(frac_str.ljust(6, "0"))
+    else:
+        hms = tp
+        microseconds = 0
+    h   = int(hms[0:2])
+    m   = int(hms[3:5])
+    sec = int(hms[6:8])
+    sign = 1 if op[0] == "+" else -1
+    offset_secs = sign * (int(op[1:3]) * 3600 + int(op[3:5]) * 60)
+    # calendar.timegm treats the tuple as UTC; subtract offset to convert to UTC
+    utc_ts = calendar.timegm((y, mo, d, h, m, sec, 0, 0, 0))
+    return utc_ts - offset_secs + microseconds / 1_000_000

network_core-0.3.1/network_core/utils/pcapIO.py ADDED Viewed

@@ -0,0 +1,162 @@
+from scapy.all import PcapReader
+from network_core.conn import Connection, getPacketInfoFromPacket
+from network_core.dataModels import TransPortType, FiveTuple, PacketInfo, PacketType
+from ..sni.clientHello import quic_ch, tls_ch
+from .csvIO import read_csv_to_dicts
+from .dt import fast_strptime
+def getConnMp(pcap_path: str, put_snis=True, remove_payload=True):
+    """
+    If remove payload I will null the transport payload for connection
+    This will return a connection map mapping FiveTuple object to Connection
+    """
+    conn_mp: dict[FiveTuple, Connection] = {}
+    with PcapReader(pcap_path) as pcap_reader:
+        for packet in pcap_reader:
+            temp_conn = Connection.getConnFromPacket(packet=packet)
+            if temp_conn is None:
+                continue
+            key = temp_conn.five_tuple
+            rev_key = temp_conn.five_tuple.rev_ft()
+            direction = 0  # outgoing
+            if key not in conn_mp and rev_key not in conn_mp:
+                conn_mp[key] = temp_conn
+                temp_conn.addPacket(
+                    getPacketInfoFromPacket(packet=packet, direction=direction)
+                )
+            if key in conn_mp:
+                conn_mp[key].addPacket(
+                    getPacketInfoFromPacket(packet=packet, direction=direction)
+                )
+            elif rev_key in conn_mp:
+                direction = 1  # incoming
+                conn_mp[rev_key].addPacket(
+                    getPacketInfoFromPacket(packet=packet, direction=direction)
+                )
+    for _, conn in conn_mp.items():
+        conn.sort()
+    if put_snis:
+        for _, conn in conn_mp.items():
+            sni: str | None = None
+            if conn.five_tuple.transport_type == TransPortType.TCP:
+                ch_dict = tls_ch(connection=conn)
+                if ch_dict is None:
+                    continue
+                else:
+                    sni = ch_dict["snis"][0]
+                    conn.setSNI(sni)  # type: ignore
+            elif conn.five_tuple.transport_type == TransPortType.UDP:
+                ch_dict = quic_ch(connection=conn)
+                if ch_dict is None:
+                    continue
+                else:
+                    sni = ch_dict["snis"][0]
+                    conn.setSNI(sni)  # type: ignore
+                    conn.is_quic = True
+            if remove_payload:
+                # This removes payload
+                conn.removePayload()
+    return conn_mp
+# ---------------------------------------------------------------------------
+# CSV loading path (faster than pcap for post-processed data)
+# ---------------------------------------------------------------------------
+def _ft_from_go_string(tuple_str: str) -> FiveTuple:
+    """Parse the Go extractor five-tuple format.
+    Example: '192.168.31.246:64841->49.44.204.59:443->(Protocol: 6)'
+    """
+    parts = tuple_str.split("->")
+    src = parts[0].strip()
+    dst = parts[1].strip()
+    src_port = int(src.split(":")[-1])
+    dst_port = int(dst.split(":")[-1])
+    src_ip = ":".join(src.split(":")[:-1])
+    dst_ip = ":".join(dst.split(":")[:-1])
+    protocol = parts[2].strip().split(":")[-1].strip(")").strip()
+    if protocol == "6":
+        transport_type = TransPortType.TCP
+    elif protocol == "17":
+        transport_type = TransPortType.UDP
+    else:
+        transport_type = TransPortType.UNKNOWN
+    return FiveTuple(
+        src_ip=src_ip,
+        dst_ip=dst_ip,
+        src_port=src_port,
+        dst_port=dst_port,
+        transport_type=transport_type,
+    )
+def _packet_from_csv_row(row: dict, transport_type: TransPortType) -> PacketInfo:
+    if transport_type == TransPortType.TCP:
+        packet_type = PacketType.TCP
+    elif transport_type == TransPortType.UDP:
+        packet_type = PacketType.UDP
+    else:
+        packet_type = PacketType.UNKNOWN
+    return PacketInfo(
+        length=int(row["PacketLength"]),
+        timestamp=fast_strptime(row["Timestamp"]),
+        direction=int(row["Direction"]),
+        other_info={},
+        packet_type=packet_type,
+    )
+def get_connections_from_csv(
+    packets_csv_path: str,
+    flows_csv_path: str,
+) -> list[Connection]:
+    """Build Connection objects from the CSVs produced by the Go flow extractor.
+    Faster than getConnMp() for post-processed data because it skips Scapy
+    packet parsing and TLS/QUIC SNI re-extraction (SNI is already in the flows CSV).
+    packets_csv_path — path to the *Packets.csv (FlowId, PacketLength, Timestamp, Direction)
+    flows_csv_path   — path to the *Flows.csv   (FlowId, FiveTuple, StartTime, EndTime, SNI[, ALPN])
+    """
+    flow_index: dict[str, dict] = {}
+    for row in read_csv_to_dicts(flows_csv_path):
+        flow_index[row["FlowId"]] = {
+            "five_tuple": _ft_from_go_string(row["FiveTuple"]),
+            "sni": row["SNI"],
+            "alpn": row.get("ALPN", "nan"),
+        }
+    conns: dict[str, Connection] = {}
+    for row in read_csv_to_dicts(packets_csv_path):
+        fid = row["FlowId"]
+        if fid not in conns:
+            info = flow_index[fid]
+            c = Connection(five_tuple=info["five_tuple"])
+            c.setSNI(info["sni"])
+            if info["alpn"] and info["alpn"] != "nan":
+                c.setALPN(info["alpn"])
+            conns[fid] = c
+        conns[fid].addPacket(
+            _packet_from_csv_row(row, conns[fid].five_tuple.transport_type)
+        )
+    conn_list = list(conns.values())
+    for c in conn_list:
+        c.sort()
+    return conn_list

{network_core-0.2.2 → network_core-0.3.1}/network_core.egg-info/PKG-INFO RENAMED Viewed

@@ -1,6 +1,6 @@
 Metadata-Version: 2.4
 Name: network_core
-Version: 0.2.2
+Version: 0.3.1
 Summary: Core networking utilities and data models
 Author: Your Name
 Requires-Python: >=3.9
@@ -13,3 +13,4 @@ Requires-Dist: scapy
 Requires-Dist: matplotlib
 Requires-Dist: brotli
 Requires-Dist: blackboxprotobuf
+Requires-Dist: cryptography

{network_core-0.2.2 → network_core-0.3.1}/network_core.egg-info/SOURCES.txt RENAMED Viewed

@@ -1,5 +1,6 @@
 pyproject.toml
 network_core/__init__.py
+network_core/analysis.py
 network_core/conn.py
 network_core/connOps.py
 network_core/dataModels.py

{network_core-0.2.2 → network_core-0.3.1}/network_core.egg-info/requires.txt RENAMED Viewed

@@ -6,3 +6,4 @@ scapy
 matplotlib
 brotli
 blackboxprotobuf
+cryptography

{network_core-0.2.2 → network_core-0.3.1}/pyproject.toml RENAMED Viewed

@@ -1,6 +1,6 @@
 [project]
 name = "network_core"
-version = "0.2.2"
+version = "0.3.1"
 description = "Core networking utilities and data models"
 authors = [{ name = "Your Name" }]
 readme = "README.md"
@@ -14,7 +14,8 @@ dependencies = [
     "scapy",
     "matplotlib",
     "brotli",
-    "blackboxprotobuf"
+    "blackboxprotobuf",
+    "cryptography"
 ]
 [build-system]

network_core-0.2.2/network_core/utils/__init__.py DELETED Viewed

File without changes

network_core-0.2.2/network_core/utils/dt.py DELETED Viewed

@@ -1,47 +0,0 @@
-import datetime, pytz
-def convertUNIXToDT(timestamp: float, timezone="Australia/Sydney") -> datetime.datetime:
-    australian_timezone = pytz.timezone(timezone)
-    utc_dt = datetime.datetime.fromtimestamp(timestamp, tz=pytz.utc)
-    return utc_dt.astimezone(australian_timezone)
-def convertUNIXToHumanReadable(timestamp: float, timezone="Australia/Sydney") -> str:
-    dt = convertUNIXToDT(timestamp=timestamp, timezone=timezone)
-    ms = dt.microsecond // 1000  # get milliseconds, doing this so it isn't 0
-    return dt.strftime("%Y-%m-%d %H:%M:%S") + f".{ms}"
-def getDatetimeFromTime(time: datetime.time) -> datetime.datetime:
-    return datetime.datetime.combine(datetime.datetime.today(), time=time)
-def addBufferToTime(time: datetime.time, buffer_in_seconds: float) -> datetime.time:
-    delta = datetime.timedelta(seconds=buffer_in_seconds)
-    return (getDatetimeFromTime(time=time) + delta).time()
-def fast_strptime(s: str) -> float:
-    """
-    2026-01-11 12:25:31.689768 +0530 IST to UNIX
-    """
-    parts = s.split()
-    # Drop trailing timezone name (like 'UTC', 'AEDT', etc.)
-    if len(parts) > 3:
-        s = " ".join(parts[:3])
-    else:
-        s = " ".join(parts)
-    # Handle too many fractional digits (datetime only supports microseconds)
-    date, time_str, offset = s.split()
-    if "." in time_str:
-        main, frac = time_str.split(".")
-        frac = frac[:6]  # truncate nanoseconds to microseconds
-        time_str = f"{main}.{frac}"
-    s = f"{date} {time_str} {offset}"
-    fmt = "%Y-%m-%d %H:%M:%S.%f %z" if "." in time_str else "%Y-%m-%d %H:%M:%S %z"
-    return datetime.datetime.strptime(s, fmt).timestamp()

network_core-0.2.2/network_core/utils/pcapIO.py DELETED Viewed

@@ -1,68 +0,0 @@
-from scapy.all import PcapReader
-from network_core.conn import Connection, getPacketInfoFromPacket
-from network_core.dataModels import TransPortType, FiveTuple
-from ..sni.clientHello import quic_ch, tls_ch
-def getConnMp(pcap_path: str, put_snis=True, remove_payload=True):
-    """
-    If remove payload I will null the transport payload for connection
-    This will return a connection map mapping FiveTuple object to Connection
-    """
-    conn_mp: dict[FiveTuple, Connection] = {}
-    with PcapReader(pcap_path) as pcap_reader:
-        for packet in pcap_reader:
-            temp_conn = Connection.getConnFromPacket(packet=packet)
-            if temp_conn is None:
-                continue
-            key = temp_conn.five_tuple
-            rev_key = temp_conn.five_tuple.rev_ft()
-            direction = 0  # outgoing
-            if key not in conn_mp and rev_key not in conn_mp:
-                conn_mp[key] = temp_conn
-                temp_conn.addPacket(
-                    getPacketInfoFromPacket(packet=packet, direction=direction)
-                )
-            if key in conn_mp:
-                conn_mp[key].addPacket(
-                    getPacketInfoFromPacket(packet=packet, direction=direction)
-                )
-            elif rev_key in conn_mp:
-                direction = 1  # incoming
-                conn_mp[rev_key].addPacket(
-                    getPacketInfoFromPacket(packet=packet, direction=direction)
-                )
-    for _, conn in conn_mp.items():
-        conn.sort()
-    if put_snis:
-        for _, conn in conn_mp.items():
-            sni: str | None = None
-            if conn.five_tuple.transport_type == TransPortType.TCP:
-                ch_dict = tls_ch(connection=conn)
-                if ch_dict is None:
-                    continue
-                else:
-                    sni = ch_dict["snis"][0]
-                    conn.setSNI(sni)  # type: ignore
-            elif conn.five_tuple.transport_type == TransPortType.UDP:
-                ch_dict = quic_ch(connection=conn)
-                if ch_dict is None:
-                    continue
-                else:
-                    sni = ch_dict["snis"][0]
-                    conn.setSNI(sni)  # type: ignore
-                    conn.is_quic = True
-            if remove_payload:
-                # This removes payload
-                conn.removePayload()
-    return conn_mp

{network_core-0.2.2 → network_core-0.3.1}/network_core/connOps.py RENAMED Viewed

File without changes

{network_core-0.2.2 → network_core-0.3.1}/network_core/dataModels.py RENAMED Viewed

File without changes

{network_core-0.2.2/network_core → network_core-0.3.1/network_core/http}/__init__.py RENAMED Viewed

File without changes

{network_core-0.2.2/network_core/http → network_core-0.3.1/network_core/http/httpExtract}/__init__.py RENAMED Viewed

File without changes

{network_core-0.2.2 → network_core-0.3.1}/network_core/http/httpExtract/helpers.py RENAMED Viewed

File without changes

{network_core-0.2.2 → network_core-0.3.1}/network_core/http/httpExtract/parser.py RENAMED Viewed

File without changes

{network_core-0.2.2 → network_core-0.3.1}/network_core/http/httpExtract/pdh.py RENAMED Viewed

File without changes

{network_core-0.2.2 → network_core-0.3.1}/network_core/http/httpIO.py RENAMED Viewed

File without changes

{network_core-0.2.2/network_core/http/httpExtract → network_core-0.3.1/network_core/sni}/__init__.py RENAMED Viewed

File without changes

{network_core-0.2.2 → network_core-0.3.1}/network_core/sni/clientHello.py RENAMED Viewed

File without changes

{network_core-0.2.2 → network_core-0.3.1}/network_core/sni/constants.py RENAMED Viewed

File without changes

{network_core-0.2.2 → network_core-0.3.1}/network_core/sni/crypto.py RENAMED Viewed

File without changes

{network_core-0.2.2 → network_core-0.3.1}/network_core/sni/helpers.py RENAMED Viewed

File without changes

{network_core-0.2.2 → network_core-0.3.1}/network_core/sni/parsers.py RENAMED Viewed

File without changes

{network_core-0.2.2/network_core/sni → network_core-0.3.1/network_core/utils}/__init__.py RENAMED Viewed

File without changes

{network_core-0.2.2 → network_core-0.3.1}/network_core/utils/csvIO.py RENAMED Viewed

File without changes

{network_core-0.2.2 → network_core-0.3.1}/network_core/utils/jsonIO.py RENAMED Viewed

File without changes

{network_core-0.2.2 → network_core-0.3.1}/network_core.egg-info/dependency_links.txt RENAMED Viewed

File without changes

{network_core-0.2.2 → network_core-0.3.1}/network_core.egg-info/top_level.txt RENAMED Viewed

File without changes

{network_core-0.2.2 → network_core-0.3.1}/setup.cfg RENAMED Viewed

File without changes

network-core 0.2.2__tar.gz → 0.3.1__tar.gz

network-core 0.2.2tar.gz → 0.3.1tar.gz