network-core 0.2.2__tar.gz → 0.3.1__tar.gz
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- {network_core-0.2.2 → network_core-0.3.1}/PKG-INFO +2 -1
- network_core-0.3.1/network_core/__init__.py +18 -0
- network_core-0.3.1/network_core/analysis.py +649 -0
- {network_core-0.2.2 → network_core-0.3.1}/network_core/conn.py +38 -20
- network_core-0.3.1/network_core/utils/dt.py +60 -0
- network_core-0.3.1/network_core/utils/pcapIO.py +162 -0
- {network_core-0.2.2 → network_core-0.3.1}/network_core.egg-info/PKG-INFO +2 -1
- {network_core-0.2.2 → network_core-0.3.1}/network_core.egg-info/SOURCES.txt +1 -0
- {network_core-0.2.2 → network_core-0.3.1}/network_core.egg-info/requires.txt +1 -0
- {network_core-0.2.2 → network_core-0.3.1}/pyproject.toml +3 -2
- network_core-0.2.2/network_core/utils/__init__.py +0 -0
- network_core-0.2.2/network_core/utils/dt.py +0 -47
- network_core-0.2.2/network_core/utils/pcapIO.py +0 -68
- {network_core-0.2.2 → network_core-0.3.1}/network_core/connOps.py +0 -0
- {network_core-0.2.2 → network_core-0.3.1}/network_core/dataModels.py +0 -0
- {network_core-0.2.2/network_core → network_core-0.3.1/network_core/http}/__init__.py +0 -0
- {network_core-0.2.2/network_core/http → network_core-0.3.1/network_core/http/httpExtract}/__init__.py +0 -0
- {network_core-0.2.2 → network_core-0.3.1}/network_core/http/httpExtract/helpers.py +0 -0
- {network_core-0.2.2 → network_core-0.3.1}/network_core/http/httpExtract/parser.py +0 -0
- {network_core-0.2.2 → network_core-0.3.1}/network_core/http/httpExtract/pdh.py +0 -0
- {network_core-0.2.2 → network_core-0.3.1}/network_core/http/httpIO.py +0 -0
- {network_core-0.2.2/network_core/http/httpExtract → network_core-0.3.1/network_core/sni}/__init__.py +0 -0
- {network_core-0.2.2 → network_core-0.3.1}/network_core/sni/clientHello.py +0 -0
- {network_core-0.2.2 → network_core-0.3.1}/network_core/sni/constants.py +0 -0
- {network_core-0.2.2 → network_core-0.3.1}/network_core/sni/crypto.py +0 -0
- {network_core-0.2.2 → network_core-0.3.1}/network_core/sni/helpers.py +0 -0
- {network_core-0.2.2 → network_core-0.3.1}/network_core/sni/parsers.py +0 -0
- {network_core-0.2.2/network_core/sni → network_core-0.3.1/network_core/utils}/__init__.py +0 -0
- {network_core-0.2.2 → network_core-0.3.1}/network_core/utils/csvIO.py +0 -0
- {network_core-0.2.2 → network_core-0.3.1}/network_core/utils/jsonIO.py +0 -0
- {network_core-0.2.2 → network_core-0.3.1}/network_core.egg-info/dependency_links.txt +0 -0
- {network_core-0.2.2 → network_core-0.3.1}/network_core.egg-info/top_level.txt +0 -0
- {network_core-0.2.2 → network_core-0.3.1}/setup.cfg +0 -0
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
Metadata-Version: 2.4
|
|
2
2
|
Name: network_core
|
|
3
|
-
Version: 0.
|
|
3
|
+
Version: 0.3.1
|
|
4
4
|
Summary: Core networking utilities and data models
|
|
5
5
|
Author: Your Name
|
|
6
6
|
Requires-Python: >=3.9
|
|
@@ -13,3 +13,4 @@ Requires-Dist: scapy
|
|
|
13
13
|
Requires-Dist: matplotlib
|
|
14
14
|
Requires-Dist: brotli
|
|
15
15
|
Requires-Dist: blackboxprotobuf
|
|
16
|
+
Requires-Dist: cryptography
|
|
@@ -0,0 +1,18 @@
|
|
|
1
|
+
from .conn import Connection
|
|
2
|
+
from .connOps import (
|
|
3
|
+
ConnStats,
|
|
4
|
+
filterConnections,
|
|
5
|
+
combine_connections,
|
|
6
|
+
printConns,
|
|
7
|
+
getPacketsInInterval,
|
|
8
|
+
normalizePacketStream,
|
|
9
|
+
)
|
|
10
|
+
from .dataModels import PacketType, TransPortType, FiveTuple, PacketInfo, HttpUnit
|
|
11
|
+
from .analysis import (
|
|
12
|
+
classify_http_version,
|
|
13
|
+
http_version_debug,
|
|
14
|
+
detect_requests,
|
|
15
|
+
burst_analysis,
|
|
16
|
+
conn_features,
|
|
17
|
+
nan_to_none,
|
|
18
|
+
)
|
|
@@ -0,0 +1,649 @@
|
|
|
1
|
+
"""Connection and packet stream analysis functions."""
|
|
2
|
+
from __future__ import annotations
|
|
3
|
+
|
|
4
|
+
import math
|
|
5
|
+
|
|
6
|
+
import numpy as np
|
|
7
|
+
|
|
8
|
+
from .dataModels import PacketInfo
|
|
9
|
+
|
|
10
|
+
|
|
11
|
+
# ---------------------------------------------------------------------------
|
|
12
|
+
# Internal statistical helpers
|
|
13
|
+
# ---------------------------------------------------------------------------
|
|
14
|
+
|
|
15
|
+
|
|
16
|
+
def _pct(arr: np.ndarray, p: float) -> float:
|
|
17
|
+
return float(np.percentile(arr, p)) if len(arr) > 0 else float("nan")
|
|
18
|
+
|
|
19
|
+
|
|
20
|
+
def _safe_mean(arr: np.ndarray) -> float:
|
|
21
|
+
return float(arr.mean()) if len(arr) > 0 else float("nan")
|
|
22
|
+
|
|
23
|
+
|
|
24
|
+
def _safe_median(arr: np.ndarray) -> float:
|
|
25
|
+
return float(np.median(arr)) if len(arr) > 0 else float("nan")
|
|
26
|
+
|
|
27
|
+
|
|
28
|
+
def _safe_std(arr: np.ndarray) -> float:
|
|
29
|
+
return float(arr.std()) if len(arr) > 0 else float("nan")
|
|
30
|
+
|
|
31
|
+
|
|
32
|
+
def _entropy(arr: np.ndarray, bins: int = 50) -> float:
|
|
33
|
+
"""Shannon entropy of a distribution in bits."""
|
|
34
|
+
if len(arr) < 2:
|
|
35
|
+
return float("nan")
|
|
36
|
+
counts, _ = np.histogram(arr, bins=bins)
|
|
37
|
+
counts = counts[counts > 0]
|
|
38
|
+
p = counts / counts.sum()
|
|
39
|
+
return float(-np.sum(p * np.log2(p)))
|
|
40
|
+
|
|
41
|
+
|
|
42
|
+
def _skewness(arr: np.ndarray) -> float:
|
|
43
|
+
"""Fisher skewness (0 = symmetric, >0 = right tail)."""
|
|
44
|
+
if len(arr) < 3:
|
|
45
|
+
return float("nan")
|
|
46
|
+
std = arr.std()
|
|
47
|
+
if std == 0:
|
|
48
|
+
return 0.0
|
|
49
|
+
return float(((arr - arr.mean()) ** 3).mean() / std ** 3)
|
|
50
|
+
|
|
51
|
+
|
|
52
|
+
def _kurtosis(arr: np.ndarray) -> float:
|
|
53
|
+
"""Excess kurtosis (0 = normal, >0 = heavy tails)."""
|
|
54
|
+
if len(arr) < 4:
|
|
55
|
+
return float("nan")
|
|
56
|
+
std = arr.std()
|
|
57
|
+
if std == 0:
|
|
58
|
+
return 0.0
|
|
59
|
+
return float(((arr - arr.mean()) ** 4).mean() / std ** 4 - 3)
|
|
60
|
+
|
|
61
|
+
|
|
62
|
+
def _autocorr(arr: np.ndarray, lag: int = 1) -> float:
|
|
63
|
+
"""Pearson autocorrelation at a given lag."""
|
|
64
|
+
if len(arr) <= lag + 1:
|
|
65
|
+
return float("nan")
|
|
66
|
+
c = np.corrcoef(arr[:-lag], arr[lag:])
|
|
67
|
+
v = c[0, 1]
|
|
68
|
+
return float(v) if not math.isnan(v) else float("nan")
|
|
69
|
+
|
|
70
|
+
|
|
71
|
+
def _burst_stats(ps: list[PacketInfo], direction: int, gap_ms: float = 500) -> dict:
|
|
72
|
+
"""Burst statistics for one traffic direction.
|
|
73
|
+
|
|
74
|
+
A burst is a run of data-carrying packets (>100 B) with no gap > gap_ms.
|
|
75
|
+
|
|
76
|
+
Returns:
|
|
77
|
+
n_bursts — number of distinct bursts
|
|
78
|
+
burst_mean_bytes — mean bytes per burst
|
|
79
|
+
burst_mean_pkts — mean packets per burst
|
|
80
|
+
burst_mean_dur_ms — mean burst duration (ms)
|
|
81
|
+
interburst_mean_ms — mean gap between burst end and next burst start (ms)
|
|
82
|
+
interburst_cv — coefficient of variation of inter-burst gaps
|
|
83
|
+
interburst_p95_ms — 95th-percentile inter-burst gap (ms)
|
|
84
|
+
"""
|
|
85
|
+
pkts = [p for p in ps if p.direction == direction and p.length > 100]
|
|
86
|
+
nan_result = {
|
|
87
|
+
"n_bursts": 0,
|
|
88
|
+
"burst_mean_bytes": float("nan"),
|
|
89
|
+
"burst_mean_pkts": float("nan"),
|
|
90
|
+
"burst_mean_dur_ms": float("nan"),
|
|
91
|
+
"interburst_mean_ms": float("nan"),
|
|
92
|
+
"interburst_cv": float("nan"),
|
|
93
|
+
"interburst_p95_ms": float("nan"),
|
|
94
|
+
}
|
|
95
|
+
if len(pkts) < 2:
|
|
96
|
+
return nan_result
|
|
97
|
+
|
|
98
|
+
bursts: list[list[PacketInfo]] = []
|
|
99
|
+
cur = [pkts[0]]
|
|
100
|
+
for i in range(1, len(pkts)):
|
|
101
|
+
if (pkts[i].timestamp - pkts[i - 1].timestamp) * 1000 > gap_ms:
|
|
102
|
+
bursts.append(cur)
|
|
103
|
+
cur = [pkts[i]]
|
|
104
|
+
else:
|
|
105
|
+
cur.append(pkts[i])
|
|
106
|
+
bursts.append(cur)
|
|
107
|
+
|
|
108
|
+
burst_bytes = np.array([sum(p.length for p in b) for b in bursts], dtype=np.float64)
|
|
109
|
+
burst_pkts = np.array([len(b) for b in bursts], dtype=np.float64)
|
|
110
|
+
burst_durs = np.array(
|
|
111
|
+
[(b[-1].timestamp - b[0].timestamp) * 1000 for b in bursts], dtype=np.float64
|
|
112
|
+
)
|
|
113
|
+
|
|
114
|
+
if len(bursts) > 1:
|
|
115
|
+
gaps = np.array(
|
|
116
|
+
[(bursts[i + 1][0].timestamp - bursts[i][-1].timestamp) * 1000
|
|
117
|
+
for i in range(len(bursts) - 1)],
|
|
118
|
+
dtype=np.float64,
|
|
119
|
+
)
|
|
120
|
+
ib_mean = float(gaps.mean())
|
|
121
|
+
ib_cv = float(gaps.std() / gaps.mean()) if gaps.mean() > 0 else float("nan")
|
|
122
|
+
ib_p95 = float(np.percentile(gaps, 95))
|
|
123
|
+
else:
|
|
124
|
+
ib_mean = ib_cv = ib_p95 = float("nan")
|
|
125
|
+
|
|
126
|
+
return {
|
|
127
|
+
"n_bursts": len(bursts),
|
|
128
|
+
"burst_mean_bytes": float(burst_bytes.mean()),
|
|
129
|
+
"burst_mean_pkts": float(burst_pkts.mean()),
|
|
130
|
+
"burst_mean_dur_ms": float(burst_durs.mean()),
|
|
131
|
+
"interburst_mean_ms": ib_mean,
|
|
132
|
+
"interburst_cv": ib_cv,
|
|
133
|
+
"interburst_p95_ms": ib_p95,
|
|
134
|
+
}
|
|
135
|
+
|
|
136
|
+
|
|
137
|
+
def _classify_pattern(
|
|
138
|
+
n_pkts: int,
|
|
139
|
+
up_frac: float,
|
|
140
|
+
bytes_per_sec: float,
|
|
141
|
+
iat_cv: float,
|
|
142
|
+
iat_autocorr_lag1: float,
|
|
143
|
+
up_n_bursts: int,
|
|
144
|
+
dn_pkt_p95: float,
|
|
145
|
+
n_requests: int,
|
|
146
|
+
http_ver: str,
|
|
147
|
+
) -> str:
|
|
148
|
+
"""Classify the dominant traffic pattern into one of six labels.
|
|
149
|
+
|
|
150
|
+
Labels:
|
|
151
|
+
idle — too few packets to characterise (<10)
|
|
152
|
+
bulk_upload — upload-dominated (>75% of bytes)
|
|
153
|
+
bulk_download — download-dominated, large packets, high throughput
|
|
154
|
+
streaming — sustained download with regular inter-arrival (LLM tokens,
|
|
155
|
+
video, audio); low IAT variance + positive autocorrelation
|
|
156
|
+
interactive — alternating request/response turns (LLM chat, browsing,
|
|
157
|
+
API calls); multiple upload bursts or detected requests
|
|
158
|
+
request_response — single upload burst followed by download (one-shot API call)
|
|
159
|
+
mixed — does not fit any of the above cleanly
|
|
160
|
+
"""
|
|
161
|
+
def _ok(v: float) -> bool:
|
|
162
|
+
return v is not None and not math.isnan(v)
|
|
163
|
+
|
|
164
|
+
if n_pkts < 10:
|
|
165
|
+
return "idle"
|
|
166
|
+
|
|
167
|
+
if up_frac > 0.75:
|
|
168
|
+
return "bulk_upload"
|
|
169
|
+
|
|
170
|
+
if up_frac < 0.15 and _ok(dn_pkt_p95) and dn_pkt_p95 > 1200 and _ok(bytes_per_sec) and bytes_per_sec > 50_000:
|
|
171
|
+
return "bulk_download"
|
|
172
|
+
|
|
173
|
+
is_periodic = (
|
|
174
|
+
_ok(iat_cv) and iat_cv < 1.2
|
|
175
|
+
and _ok(iat_autocorr_lag1) and iat_autocorr_lag1 > 0.25
|
|
176
|
+
and up_frac < 0.30
|
|
177
|
+
)
|
|
178
|
+
if is_periodic:
|
|
179
|
+
return "streaming"
|
|
180
|
+
|
|
181
|
+
if n_requests >= 2 or up_n_bursts >= 2:
|
|
182
|
+
return "interactive"
|
|
183
|
+
|
|
184
|
+
if up_n_bursts == 1 or n_requests == 1:
|
|
185
|
+
return "request_response"
|
|
186
|
+
|
|
187
|
+
return "mixed"
|
|
188
|
+
|
|
189
|
+
|
|
190
|
+
# ---------------------------------------------------------------------------
|
|
191
|
+
# Public functions
|
|
192
|
+
# ---------------------------------------------------------------------------
|
|
193
|
+
|
|
194
|
+
|
|
195
|
+
def classify_http_version(ps: list[PacketInfo], alpn: str = "nan") -> str:
|
|
196
|
+
"""Infer HTTP version, preferring ALPN ground truth over a packet heuristic.
|
|
197
|
+
|
|
198
|
+
If ALPN is available (not 'nan'), it is authoritative:
|
|
199
|
+
'h2' → 'HTTP/2'
|
|
200
|
+
'http/1.1' → 'HTTP/1.1'
|
|
201
|
+
Fallback heuristic (used when ALPN is 'nan', i.e. session-reused socket):
|
|
202
|
+
Median of small (<200 B) upload packets ≤ 72 B → HTTP/1.1 (pure TCP ACKs)
|
|
203
|
+
Median > 72 B → HTTP/2 (WINDOW_UPDATE/PING frames ~79-87 B)
|
|
204
|
+
Returns 'HTTP/1.1', 'HTTP/2', or 'unknown'.
|
|
205
|
+
"""
|
|
206
|
+
if alpn and alpn not in ("nan", ""):
|
|
207
|
+
if alpn == "h2":
|
|
208
|
+
return "HTTP/2"
|
|
209
|
+
if alpn in ("http/1.1", "http/1.0"):
|
|
210
|
+
return "HTTP/1.1"
|
|
211
|
+
small = np.array(
|
|
212
|
+
[p.length for p in ps if p.direction == 0 and p.length < 200],
|
|
213
|
+
dtype=np.float32,
|
|
214
|
+
)
|
|
215
|
+
if len(small) < 3:
|
|
216
|
+
return "unknown"
|
|
217
|
+
return "HTTP/1.1" if np.median(small) <= 72 else "HTTP/2"
|
|
218
|
+
|
|
219
|
+
|
|
220
|
+
def http_version_debug(ps: list[PacketInfo], alpn: str = "nan") -> dict:
|
|
221
|
+
"""Return a detailed breakdown of the HTTP version classification.
|
|
222
|
+
|
|
223
|
+
Useful for auditing flows where the heuristic result seems wrong.
|
|
224
|
+
"""
|
|
225
|
+
small = [p.length for p in ps if p.direction == 0 and p.length < 200]
|
|
226
|
+
all_up = [p.length for p in ps if p.direction == 0]
|
|
227
|
+
|
|
228
|
+
verdict = classify_http_version(ps, alpn)
|
|
229
|
+
source = "ALPN" if (alpn and alpn not in ("nan", "")) else "heuristic"
|
|
230
|
+
|
|
231
|
+
buckets = {
|
|
232
|
+
"<54": sum(1 for l in small if l < 54),
|
|
233
|
+
"54-66": sum(1 for l in small if 54 <= l <= 66),
|
|
234
|
+
"67-80": sum(1 for l in small if 67 <= l <= 80),
|
|
235
|
+
"81-100": sum(1 for l in small if 81 <= l <= 100),
|
|
236
|
+
"101-199": sum(1 for l in small if 101 <= l <= 199),
|
|
237
|
+
}
|
|
238
|
+
|
|
239
|
+
med = float(np.median(small)) if small else None
|
|
240
|
+
if source == "ALPN":
|
|
241
|
+
confidence = "high (ALPN)"
|
|
242
|
+
elif med is None:
|
|
243
|
+
confidence = "unknown (no small packets)"
|
|
244
|
+
elif abs(med - 72) > 12:
|
|
245
|
+
confidence = "high (heuristic)"
|
|
246
|
+
else:
|
|
247
|
+
confidence = "low (heuristic, median near threshold)"
|
|
248
|
+
|
|
249
|
+
return {
|
|
250
|
+
"verdict": verdict,
|
|
251
|
+
"source": source,
|
|
252
|
+
"confidence": confidence,
|
|
253
|
+
"alpn": alpn,
|
|
254
|
+
"small_up_count": len(small),
|
|
255
|
+
"total_up_count": len(all_up),
|
|
256
|
+
"small_up_median": med,
|
|
257
|
+
"small_up_p25": float(np.percentile(small, 25)) if small else None,
|
|
258
|
+
"small_up_p75": float(np.percentile(small, 75)) if small else None,
|
|
259
|
+
"small_up_min": float(min(small)) if small else None,
|
|
260
|
+
"small_up_max": float(max(small)) if small else None,
|
|
261
|
+
"histogram": buckets,
|
|
262
|
+
"threshold": 72,
|
|
263
|
+
"note": "ACKs cluster at ~66 B; HTTP/2 WINDOW_UPDATE/PING at ~79-87 B",
|
|
264
|
+
}
|
|
265
|
+
|
|
266
|
+
|
|
267
|
+
def detect_requests(
|
|
268
|
+
ps: list[PacketInfo],
|
|
269
|
+
gap_ms: float = 800,
|
|
270
|
+
min_bytes: int = 5000,
|
|
271
|
+
min_pkts: int = 5,
|
|
272
|
+
) -> list[dict]:
|
|
273
|
+
"""Split the upload stream on idle gaps to find HTTP/1.1 request boundaries.
|
|
274
|
+
|
|
275
|
+
Only data-carrying packets (>100 B) are used for burst detection to avoid
|
|
276
|
+
pure TCP ACKs extending burst windows across server-sent heartbeats.
|
|
277
|
+
|
|
278
|
+
Returns one dict per detected request:
|
|
279
|
+
req_kb, req_pkts, req_dur_ms — upload burst stats
|
|
280
|
+
think_ms — server processing time (first dn packet after burst)
|
|
281
|
+
res_kb, res_pkts, res_dur_ms — downstream response stats
|
|
282
|
+
client_gap_ms — gap before next request (None if last)
|
|
283
|
+
t_start, t_end — burst timestamps
|
|
284
|
+
turn_idx — 0-based request index
|
|
285
|
+
"""
|
|
286
|
+
data_ups = [p for p in ps if p.direction == 0 and p.length > 100]
|
|
287
|
+
if len(data_ups) < 2:
|
|
288
|
+
return []
|
|
289
|
+
|
|
290
|
+
bursts: list[list[PacketInfo]] = []
|
|
291
|
+
cur = [data_ups[0]]
|
|
292
|
+
for i in range(1, len(data_ups)):
|
|
293
|
+
if (data_ups[i].timestamp - data_ups[i - 1].timestamp) * 1000 > gap_ms:
|
|
294
|
+
bursts.append(cur)
|
|
295
|
+
cur = [data_ups[i]]
|
|
296
|
+
else:
|
|
297
|
+
cur.append(data_ups[i])
|
|
298
|
+
bursts.append(cur)
|
|
299
|
+
|
|
300
|
+
real = [
|
|
301
|
+
b for b in bursts
|
|
302
|
+
if sum(p.length for p in b) >= min_bytes and len(b) >= min_pkts
|
|
303
|
+
]
|
|
304
|
+
if not real:
|
|
305
|
+
return []
|
|
306
|
+
|
|
307
|
+
dns = [p for p in ps if p.direction == 1]
|
|
308
|
+
dn_ts = np.array([p.timestamp for p in dns], dtype=np.float64)
|
|
309
|
+
dn_lens = np.array([p.length for p in dns], dtype=np.float32)
|
|
310
|
+
last_ts = ps[-1].timestamp
|
|
311
|
+
|
|
312
|
+
rows = []
|
|
313
|
+
for idx, b in enumerate(real):
|
|
314
|
+
burst_end = b[-1].timestamp
|
|
315
|
+
next_start = real[idx + 1][0].timestamp if idx + 1 < len(real) else None
|
|
316
|
+
res_win_end = next_start if next_start else last_ts + 1
|
|
317
|
+
|
|
318
|
+
dn_s = int(np.searchsorted(dn_ts, burst_end, side="right"))
|
|
319
|
+
dn_e = int(np.searchsorted(dn_ts, res_win_end, side="left"))
|
|
320
|
+
|
|
321
|
+
think_ms = float((dn_ts[dn_s] - burst_end) * 1000) if dn_s < len(dn_ts) else None
|
|
322
|
+
res_lens = dn_lens[dn_s:dn_e]
|
|
323
|
+
res_kb = float(res_lens.sum()) / 1024
|
|
324
|
+
n_res = dn_e - dn_s
|
|
325
|
+
|
|
326
|
+
if n_res > 0:
|
|
327
|
+
res_dur_ms = float((dn_ts[dn_e - 1] - dn_ts[dn_s]) * 1000)
|
|
328
|
+
client_gap_ms = float((next_start - dn_ts[dn_e - 1]) * 1000) if next_start else None
|
|
329
|
+
else:
|
|
330
|
+
res_dur_ms = client_gap_ms = None
|
|
331
|
+
|
|
332
|
+
rows.append({
|
|
333
|
+
"req_kb": sum(p.length for p in b) / 1024,
|
|
334
|
+
"req_pkts": len(b),
|
|
335
|
+
"req_dur_ms": (b[-1].timestamp - b[0].timestamp) * 1000,
|
|
336
|
+
"think_ms": think_ms,
|
|
337
|
+
"res_kb": res_kb,
|
|
338
|
+
"res_pkts": n_res,
|
|
339
|
+
"res_dur_ms": res_dur_ms,
|
|
340
|
+
"client_gap_ms": client_gap_ms,
|
|
341
|
+
"t_start": b[0].timestamp,
|
|
342
|
+
"t_end": burst_end,
|
|
343
|
+
"turn_idx": idx,
|
|
344
|
+
})
|
|
345
|
+
return rows
|
|
346
|
+
|
|
347
|
+
|
|
348
|
+
def burst_analysis(
|
|
349
|
+
ps: list[PacketInfo],
|
|
350
|
+
gap_ms: float = 500,
|
|
351
|
+
) -> dict:
|
|
352
|
+
"""General burst statistics for both traffic directions.
|
|
353
|
+
|
|
354
|
+
Unlike detect_requests (which is HTTP/1.1-specific), this works on any
|
|
355
|
+
protocol by splitting each direction's data stream on idle gaps.
|
|
356
|
+
|
|
357
|
+
Returns upload and download burst stats as nested dicts under 'up' and 'dn'.
|
|
358
|
+
"""
|
|
359
|
+
return {
|
|
360
|
+
"up": _burst_stats(ps, direction=0, gap_ms=gap_ms),
|
|
361
|
+
"dn": _burst_stats(ps, direction=1, gap_ms=gap_ms),
|
|
362
|
+
}
|
|
363
|
+
|
|
364
|
+
|
|
365
|
+
def conn_features(
|
|
366
|
+
ps: list[PacketInfo],
|
|
367
|
+
http_ver: str,
|
|
368
|
+
requests: list[dict],
|
|
369
|
+
) -> dict | None:
|
|
370
|
+
"""Extract a comprehensive statistical feature set from a connection's packet stream.
|
|
371
|
+
|
|
372
|
+
Returns None if the connection has fewer than 5 packets.
|
|
373
|
+
Fields that are undefined for a given flow appear as NaN (use nan_to_none before
|
|
374
|
+
serialising to JSON).
|
|
375
|
+
|
|
376
|
+
Output is organised into sections (all in a flat dict):
|
|
377
|
+
|
|
378
|
+
Flow summary
|
|
379
|
+
http_ver — 'HTTP/1.1', 'HTTP/2', or 'unknown'
|
|
380
|
+
n_pkts — total packet count
|
|
381
|
+
flow_dur_s — connection duration in seconds
|
|
382
|
+
bytes_up/dn — total bytes per direction
|
|
383
|
+
up_frac — fraction of bytes that are upload (0-1)
|
|
384
|
+
|
|
385
|
+
Throughput
|
|
386
|
+
bytes_per_sec, bytes_per_sec_up, bytes_per_sec_dn
|
|
387
|
+
pps, pps_up, pps_dn
|
|
388
|
+
|
|
389
|
+
Packet size — all packets
|
|
390
|
+
pkt_mean/median/std/min/max
|
|
391
|
+
pkt_p25/p75/p95/p99
|
|
392
|
+
pkt_skew — Fisher skewness (>0 = right tail, i.e. many small + few large)
|
|
393
|
+
pkt_kurt — excess kurtosis (>0 = heavier tails than normal)
|
|
394
|
+
pkt_entropy — Shannon entropy of the size histogram (bits); low = repetitive sizes
|
|
395
|
+
frac_ack — fraction of packets <100 B (pure TCP ACKs / control frames)
|
|
396
|
+
frac_medium — fraction 100-999 B
|
|
397
|
+
frac_large — fraction ≥1000 B
|
|
398
|
+
frac_mtu — fraction ≥1400 B (near MTU, data-carrying)
|
|
399
|
+
|
|
400
|
+
Packet size — per direction
|
|
401
|
+
up_pkt_mean/median/std/p95
|
|
402
|
+
dn_pkt_mean/median/std/p95
|
|
403
|
+
|
|
404
|
+
Inter-arrival time (IAT) — all packets (milliseconds)
|
|
405
|
+
iat_mean/median/std/min/max_ms
|
|
406
|
+
iat_p25/p75/p95/p99_ms
|
|
407
|
+
iat_cv — coefficient of variation (std/mean); high = bursty, low = smooth
|
|
408
|
+
iat_entropy — entropy of the IAT histogram; low = periodic, high = irregular
|
|
409
|
+
iat_autocorr_lag1/lag2 — autocorrelation at lag 1 and 2; >0.3 suggests periodicity
|
|
410
|
+
|
|
411
|
+
IAT — per direction
|
|
412
|
+
up_iat_mean/median/std_ms
|
|
413
|
+
dn_iat_mean/median/std_ms
|
|
414
|
+
|
|
415
|
+
Upload burst analysis (gap_ms=500 threshold)
|
|
416
|
+
up_n_bursts
|
|
417
|
+
up_burst_mean_bytes/pkts/dur_ms
|
|
418
|
+
up_interburst_mean_ms, up_interburst_cv, up_interburst_p95_ms
|
|
419
|
+
|
|
420
|
+
Timing landmarks
|
|
421
|
+
ttfb_ms — time from first upload data packet to first download data packet (ms)
|
|
422
|
+
idle_up_med — median size of small (<200 B) upload packets; used for HTTP version
|
|
423
|
+
heuristic (~66 B = ACK → HTTP/1.1, ~82 B = H2 frame → HTTP/2)
|
|
424
|
+
|
|
425
|
+
HTTP/1.1 request-level features (NaN when <2 requests detected)
|
|
426
|
+
n_requests
|
|
427
|
+
mean_think_ms — mean server processing time across requests
|
|
428
|
+
mean_res_kb — mean response size per request (KB)
|
|
429
|
+
req_slope — linear slope of request sizes over turns (KB/turn)
|
|
430
|
+
req_growth — ratio of last to first request size
|
|
431
|
+
req_cv — coefficient of variation of request sizes
|
|
432
|
+
mono_frac — fraction of consecutive turns where request grew
|
|
433
|
+
delta_res_corr — correlation between Δrequest and previous response size
|
|
434
|
+
delta_res_ratio — mean ratio of Δrequest to previous response
|
|
435
|
+
|
|
436
|
+
Traffic pattern (derived label)
|
|
437
|
+
traffic_pattern — one of:
|
|
438
|
+
'idle' <10 packets
|
|
439
|
+
'bulk_upload' >75% bytes are upload
|
|
440
|
+
'bulk_download' <15% bytes upload, large packets, high throughput
|
|
441
|
+
'streaming' sustained download with regular IAT (LLM tokens, video, audio)
|
|
442
|
+
'interactive' alternating request/response turns (LLM chat, API, browsing)
|
|
443
|
+
'request_response' single upload burst + download (one-shot API call)
|
|
444
|
+
'mixed' does not fit cleanly
|
|
445
|
+
"""
|
|
446
|
+
if len(ps) < 5:
|
|
447
|
+
return None
|
|
448
|
+
|
|
449
|
+
lens = np.array([p.length for p in ps], dtype=np.float32)
|
|
450
|
+
dirs = np.array([p.direction for p in ps], dtype=np.int8)
|
|
451
|
+
ts = np.array([p.timestamp for p in ps], dtype=np.float64)
|
|
452
|
+
iats = np.diff(ts) * 1000 # ms
|
|
453
|
+
|
|
454
|
+
up_m = dirs == 0
|
|
455
|
+
dn_m = ~up_m
|
|
456
|
+
up_len = lens[up_m]
|
|
457
|
+
dn_len = lens[dn_m]
|
|
458
|
+
|
|
459
|
+
bytes_up = float(up_len.sum())
|
|
460
|
+
bytes_dn = float(dn_len.sum())
|
|
461
|
+
total = bytes_up + bytes_dn
|
|
462
|
+
if total == 0 or len(iats) == 0:
|
|
463
|
+
return None
|
|
464
|
+
|
|
465
|
+
dur = float(ts[-1] - ts[0])
|
|
466
|
+
n_pkts = len(ps)
|
|
467
|
+
|
|
468
|
+
# --- throughput ---
|
|
469
|
+
bps = total / dur if dur > 0 else float("nan")
|
|
470
|
+
bps_up = bytes_up / dur if dur > 0 else float("nan")
|
|
471
|
+
bps_dn = bytes_dn / dur if dur > 0 else float("nan")
|
|
472
|
+
pps = n_pkts / dur if dur > 0 else float("nan")
|
|
473
|
+
pps_up = int(up_m.sum()) / dur if dur > 0 else float("nan")
|
|
474
|
+
pps_dn = int(dn_m.sum()) / dur if dur > 0 else float("nan")
|
|
475
|
+
|
|
476
|
+
# --- directional IATs ---
|
|
477
|
+
up_ts = ts[up_m]
|
|
478
|
+
dn_ts_a = ts[dn_m]
|
|
479
|
+
up_iats = np.diff(up_ts) * 1000 if len(up_ts) > 1 else np.array([], dtype=np.float64)
|
|
480
|
+
dn_iats = np.diff(dn_ts_a) * 1000 if len(dn_ts_a) > 1 else np.array([], dtype=np.float64)
|
|
481
|
+
|
|
482
|
+
iat_cv = float(iats.std() / iats.mean()) if iats.mean() > 0 else float("nan")
|
|
483
|
+
ac1 = _autocorr(iats, 1)
|
|
484
|
+
ac2 = _autocorr(iats, 2)
|
|
485
|
+
|
|
486
|
+
# --- TTFB ---
|
|
487
|
+
up_data = [p for p in ps if p.direction == 0 and p.length > 100]
|
|
488
|
+
dn_data = [p for p in ps if p.direction == 1 and p.length > 100]
|
|
489
|
+
if up_data and dn_data and dn_data[0].timestamp > up_data[0].timestamp:
|
|
490
|
+
ttfb_ms = float((dn_data[0].timestamp - up_data[0].timestamp) * 1000)
|
|
491
|
+
else:
|
|
492
|
+
ttfb_ms = float("nan")
|
|
493
|
+
|
|
494
|
+
# --- upload burst stats ---
|
|
495
|
+
ub = _burst_stats(ps, direction=0, gap_ms=500)
|
|
496
|
+
|
|
497
|
+
# --- HTTP/1.1 request features ---
|
|
498
|
+
req_kbs = [r["req_kb"] for r in requests]
|
|
499
|
+
res_kbs = [r["res_kb"] for r in requests]
|
|
500
|
+
think_times = [r["think_ms"] for r in requests if r.get("think_ms") is not None]
|
|
501
|
+
|
|
502
|
+
if len(req_kbs) >= 2:
|
|
503
|
+
turns = np.arange(len(req_kbs), dtype=float)
|
|
504
|
+
req_slope = float(np.polyfit(turns, req_kbs, 1)[0])
|
|
505
|
+
req_growth = float(req_kbs[-1] / max(req_kbs[0], 0.01))
|
|
506
|
+
req_cv = float(np.std(req_kbs) / max(np.mean(req_kbs), 0.01))
|
|
507
|
+
mono_frac = float(
|
|
508
|
+
sum(req_kbs[i + 1] > req_kbs[i] for i in range(len(req_kbs) - 1))
|
|
509
|
+
/ (len(req_kbs) - 1)
|
|
510
|
+
)
|
|
511
|
+
delta_reqs = [req_kbs[i + 1] - req_kbs[i] for i in range(len(req_kbs) - 1)]
|
|
512
|
+
prev_res = [res_kbs[i] for i in range(len(res_kbs) - 1)]
|
|
513
|
+
valid = [(d, r) for d, r in zip(delta_reqs, prev_res) if r > 0]
|
|
514
|
+
if len(valid) >= 2:
|
|
515
|
+
d_arr, r_arr = np.array([v[0] for v in valid]), np.array([v[1] for v in valid])
|
|
516
|
+
delta_res_corr = float(np.corrcoef(d_arr, r_arr)[0, 1])
|
|
517
|
+
delta_res_ratio = float(np.mean(d_arr / r_arr))
|
|
518
|
+
else:
|
|
519
|
+
delta_res_corr = delta_res_ratio = float("nan")
|
|
520
|
+
else:
|
|
521
|
+
req_slope = req_growth = req_cv = float("nan")
|
|
522
|
+
mono_frac = delta_res_corr = delta_res_ratio = float("nan")
|
|
523
|
+
|
|
524
|
+
small_ups = up_len[up_len < 200]
|
|
525
|
+
idle_up_med = float(np.median(small_ups)) if len(small_ups) >= 3 else float("nan")
|
|
526
|
+
|
|
527
|
+
# --- traffic pattern ---
|
|
528
|
+
pattern = _classify_pattern(
|
|
529
|
+
n_pkts=n_pkts,
|
|
530
|
+
up_frac=bytes_up / total,
|
|
531
|
+
bytes_per_sec=bps,
|
|
532
|
+
iat_cv=iat_cv,
|
|
533
|
+
iat_autocorr_lag1=ac1,
|
|
534
|
+
up_n_bursts=ub["n_bursts"],
|
|
535
|
+
dn_pkt_p95=_pct(dn_len, 95),
|
|
536
|
+
n_requests=len(requests),
|
|
537
|
+
http_ver=http_ver,
|
|
538
|
+
)
|
|
539
|
+
|
|
540
|
+
return {
|
|
541
|
+
# flow summary
|
|
542
|
+
"http_ver": http_ver,
|
|
543
|
+
"n_pkts": n_pkts,
|
|
544
|
+
"flow_dur_s": dur,
|
|
545
|
+
"bytes_up": bytes_up,
|
|
546
|
+
"bytes_dn": bytes_dn,
|
|
547
|
+
"up_frac": bytes_up / total,
|
|
548
|
+
|
|
549
|
+
# throughput
|
|
550
|
+
"bytes_per_sec": bps,
|
|
551
|
+
"bytes_per_sec_up": bps_up,
|
|
552
|
+
"bytes_per_sec_dn": bps_dn,
|
|
553
|
+
"pps": pps,
|
|
554
|
+
"pps_up": pps_up,
|
|
555
|
+
"pps_dn": pps_dn,
|
|
556
|
+
|
|
557
|
+
# packet size — all
|
|
558
|
+
"pkt_mean": float(lens.mean()),
|
|
559
|
+
"pkt_median": float(np.median(lens)),
|
|
560
|
+
"pkt_std": float(lens.std()),
|
|
561
|
+
"pkt_min": float(lens.min()),
|
|
562
|
+
"pkt_max": float(lens.max()),
|
|
563
|
+
"pkt_p25": _pct(lens, 25),
|
|
564
|
+
"pkt_p75": _pct(lens, 75),
|
|
565
|
+
"pkt_p95": _pct(lens, 95),
|
|
566
|
+
"pkt_p99": _pct(lens, 99),
|
|
567
|
+
"pkt_skew": _skewness(lens),
|
|
568
|
+
"pkt_kurt": _kurtosis(lens),
|
|
569
|
+
"pkt_entropy": _entropy(lens),
|
|
570
|
+
"frac_ack": float(np.mean(lens < 100)),
|
|
571
|
+
"frac_medium": float(np.mean((lens >= 100) & (lens < 1000))),
|
|
572
|
+
"frac_large": float(np.mean(lens >= 1000)),
|
|
573
|
+
"frac_mtu": float(np.mean(lens >= 1400)),
|
|
574
|
+
|
|
575
|
+
# packet size — upload
|
|
576
|
+
"up_pkt_mean": _safe_mean(up_len),
|
|
577
|
+
"up_pkt_median": _safe_median(up_len),
|
|
578
|
+
"up_pkt_std": _safe_std(up_len),
|
|
579
|
+
"up_pkt_p95": _pct(up_len, 95),
|
|
580
|
+
|
|
581
|
+
# packet size — download
|
|
582
|
+
"dn_pkt_mean": _safe_mean(dn_len),
|
|
583
|
+
"dn_pkt_median": _safe_median(dn_len),
|
|
584
|
+
"dn_pkt_std": _safe_std(dn_len),
|
|
585
|
+
"dn_pkt_p95": _pct(dn_len, 95),
|
|
586
|
+
|
|
587
|
+
# IAT — all (ms)
|
|
588
|
+
"iat_mean_ms": float(iats.mean()),
|
|
589
|
+
"iat_median_ms": float(np.median(iats)),
|
|
590
|
+
"iat_std_ms": float(iats.std()),
|
|
591
|
+
"iat_min_ms": float(iats.min()),
|
|
592
|
+
"iat_max_ms": float(iats.max()),
|
|
593
|
+
"iat_p25_ms": _pct(iats, 25),
|
|
594
|
+
"iat_p75_ms": _pct(iats, 75),
|
|
595
|
+
"iat_p95_ms": _pct(iats, 95),
|
|
596
|
+
"iat_p99_ms": _pct(iats, 99),
|
|
597
|
+
"iat_cv": iat_cv,
|
|
598
|
+
"iat_entropy": _entropy(iats),
|
|
599
|
+
"iat_autocorr_lag1": ac1,
|
|
600
|
+
"iat_autocorr_lag2": ac2,
|
|
601
|
+
|
|
602
|
+
# IAT — upload
|
|
603
|
+
"up_iat_mean_ms": _safe_mean(up_iats),
|
|
604
|
+
"up_iat_median_ms": _safe_median(up_iats),
|
|
605
|
+
"up_iat_std_ms": _safe_std(up_iats),
|
|
606
|
+
|
|
607
|
+
# IAT — download
|
|
608
|
+
"dn_iat_mean_ms": _safe_mean(dn_iats),
|
|
609
|
+
"dn_iat_median_ms": _safe_median(dn_iats),
|
|
610
|
+
"dn_iat_std_ms": _safe_std(dn_iats),
|
|
611
|
+
|
|
612
|
+
# upload bursts
|
|
613
|
+
"up_n_bursts": ub["n_bursts"],
|
|
614
|
+
"up_burst_mean_bytes": ub["burst_mean_bytes"],
|
|
615
|
+
"up_burst_mean_pkts": ub["burst_mean_pkts"],
|
|
616
|
+
"up_burst_mean_dur_ms": ub["burst_mean_dur_ms"],
|
|
617
|
+
"up_interburst_mean_ms":ub["interburst_mean_ms"],
|
|
618
|
+
"up_interburst_cv": ub["interburst_cv"],
|
|
619
|
+
"up_interburst_p95_ms": ub["interburst_p95_ms"],
|
|
620
|
+
|
|
621
|
+
# timing landmarks
|
|
622
|
+
"ttfb_ms": ttfb_ms,
|
|
623
|
+
"idle_up_med": idle_up_med,
|
|
624
|
+
|
|
625
|
+
# HTTP/1.1 request features
|
|
626
|
+
"n_requests": len(requests),
|
|
627
|
+
"mean_think_ms": float(np.mean(think_times)) if think_times else float("nan"),
|
|
628
|
+
"mean_res_kb": float(np.mean(res_kbs)) if res_kbs else float("nan"),
|
|
629
|
+
"req_slope": req_slope,
|
|
630
|
+
"req_growth": req_growth,
|
|
631
|
+
"req_cv": req_cv,
|
|
632
|
+
"mono_frac": mono_frac,
|
|
633
|
+
"delta_res_corr": delta_res_corr,
|
|
634
|
+
"delta_res_ratio": delta_res_ratio,
|
|
635
|
+
|
|
636
|
+
# derived pattern
|
|
637
|
+
"traffic_pattern": pattern,
|
|
638
|
+
}
|
|
639
|
+
|
|
640
|
+
|
|
641
|
+
def nan_to_none(obj: object) -> object:
|
|
642
|
+
"""Recursively replace float NaN with None for JSON serialisation."""
|
|
643
|
+
if isinstance(obj, dict):
|
|
644
|
+
return {k: nan_to_none(v) for k, v in obj.items()}
|
|
645
|
+
if isinstance(obj, list):
|
|
646
|
+
return [nan_to_none(v) for v in obj]
|
|
647
|
+
if isinstance(obj, float) and math.isnan(obj):
|
|
648
|
+
return None
|
|
649
|
+
return obj
|
|
@@ -47,7 +47,30 @@ class Connection:
|
|
|
47
47
|
self.packet_stream: List[PacketInfo] = []
|
|
48
48
|
self.other_info = other_info
|
|
49
49
|
self.__sni = None
|
|
50
|
+
self.__alpn = None
|
|
50
51
|
self.is_quic = False
|
|
52
|
+
self._stats_dirty = True
|
|
53
|
+
self._up_bytes_c = 0
|
|
54
|
+
self._down_bytes_c = 0
|
|
55
|
+
self._up_packets_c = 0
|
|
56
|
+
self._down_packets_c = 0
|
|
57
|
+
|
|
58
|
+
def _build_stats(self):
|
|
59
|
+
if not self._stats_dirty:
|
|
60
|
+
return
|
|
61
|
+
up_b = dn_b = up_p = dn_p = 0
|
|
62
|
+
for p in self.packet_stream:
|
|
63
|
+
if p.direction == 0:
|
|
64
|
+
up_b += p.length
|
|
65
|
+
up_p += 1
|
|
66
|
+
else:
|
|
67
|
+
dn_b += p.length
|
|
68
|
+
dn_p += 1
|
|
69
|
+
self._up_bytes_c = up_b
|
|
70
|
+
self._down_bytes_c = dn_b
|
|
71
|
+
self._up_packets_c = up_p
|
|
72
|
+
self._down_packets_c = dn_p
|
|
73
|
+
self._stats_dirty = False
|
|
51
74
|
|
|
52
75
|
@staticmethod
|
|
53
76
|
def getTransPortType(packet):
|
|
@@ -91,6 +114,7 @@ class Connection:
|
|
|
91
114
|
assert False, "Packet Type is Unknown"
|
|
92
115
|
self.connection_type.add(packet.packet_type)
|
|
93
116
|
self.packet_stream.append(packet)
|
|
117
|
+
self._stats_dirty = True
|
|
94
118
|
|
|
95
119
|
def __len__(self):
|
|
96
120
|
return len(self.packet_stream)
|
|
@@ -101,6 +125,12 @@ class Connection:
|
|
|
101
125
|
def getSNI(self):
|
|
102
126
|
return self.__sni
|
|
103
127
|
|
|
128
|
+
def setALPN(self, alpn: str):
|
|
129
|
+
self.__alpn = alpn
|
|
130
|
+
|
|
131
|
+
def getALPN(self) -> str | None:
|
|
132
|
+
return self.__alpn
|
|
133
|
+
|
|
104
134
|
def getPacketsOfTypes(self, packet_types: set[PacketType]) -> List[PacketInfo]:
|
|
105
135
|
return list(filter(lambda x: x.packet_type in packet_types, self.packet_stream))
|
|
106
136
|
|
|
@@ -149,35 +179,23 @@ class Connection:
|
|
|
149
179
|
|
|
150
180
|
@property
|
|
151
181
|
def down_bytes(self):
|
|
152
|
-
|
|
153
|
-
|
|
154
|
-
if p.direction == 1:
|
|
155
|
-
total += p.length
|
|
156
|
-
return total
|
|
182
|
+
self._build_stats()
|
|
183
|
+
return self._down_bytes_c
|
|
157
184
|
|
|
158
185
|
@property
|
|
159
186
|
def up_bytes(self):
|
|
160
|
-
|
|
161
|
-
|
|
162
|
-
if p.direction == 0:
|
|
163
|
-
total += p.length
|
|
164
|
-
return total
|
|
187
|
+
self._build_stats()
|
|
188
|
+
return self._up_bytes_c
|
|
165
189
|
|
|
166
190
|
@property
|
|
167
191
|
def down_packets(self):
|
|
168
|
-
|
|
169
|
-
|
|
170
|
-
if p.direction == 1:
|
|
171
|
-
total += 1
|
|
172
|
-
return total
|
|
192
|
+
self._build_stats()
|
|
193
|
+
return self._down_packets_c
|
|
173
194
|
|
|
174
195
|
@property
|
|
175
196
|
def up_packets(self):
|
|
176
|
-
|
|
177
|
-
|
|
178
|
-
if p.direction == 0:
|
|
179
|
-
total += 1
|
|
180
|
-
return total
|
|
197
|
+
self._build_stats()
|
|
198
|
+
return self._up_packets_c
|
|
181
199
|
|
|
182
200
|
@property
|
|
183
201
|
def start_timestamp(self):
|
|
@@ -0,0 +1,60 @@
|
|
|
1
|
+
import calendar
|
|
2
|
+
import datetime, pytz
|
|
3
|
+
|
|
4
|
+
|
|
5
|
+
def convertUNIXToDT(timestamp: float, timezone="Australia/Sydney") -> datetime.datetime:
|
|
6
|
+
australian_timezone = pytz.timezone(timezone)
|
|
7
|
+
utc_dt = datetime.datetime.fromtimestamp(timestamp, tz=pytz.utc)
|
|
8
|
+
return utc_dt.astimezone(australian_timezone)
|
|
9
|
+
|
|
10
|
+
|
|
11
|
+
def convertUNIXToHumanReadable(timestamp: float, timezone="Australia/Sydney") -> str:
|
|
12
|
+
dt = convertUNIXToDT(timestamp=timestamp, timezone=timezone)
|
|
13
|
+
ms = dt.microsecond // 1000 # get milliseconds, doing this so it isn't 0
|
|
14
|
+
return dt.strftime("%Y-%m-%d %H:%M:%S") + f".{ms}"
|
|
15
|
+
|
|
16
|
+
|
|
17
|
+
def getDatetimeFromTime(time: datetime.time) -> datetime.datetime:
|
|
18
|
+
return datetime.datetime.combine(datetime.datetime.today(), time=time)
|
|
19
|
+
|
|
20
|
+
|
|
21
|
+
def addBufferToTime(time: datetime.time, buffer_in_seconds: float) -> datetime.time:
|
|
22
|
+
delta = datetime.timedelta(seconds=buffer_in_seconds)
|
|
23
|
+
return (getDatetimeFromTime(time=time) + delta).time()
|
|
24
|
+
|
|
25
|
+
|
|
26
|
+
def fast_strptime(s: str) -> float:
|
|
27
|
+
"""Parse '2026-01-11 12:25:31.689768 +0530 IST' to a UNIX timestamp float.
|
|
28
|
+
|
|
29
|
+
Avoids datetime.strptime (slow format-string regex) by parsing integers
|
|
30
|
+
directly. ~5-10x faster when called per-packet on large CSV files.
|
|
31
|
+
"""
|
|
32
|
+
parts = s.split()
|
|
33
|
+
# parts[0]=date parts[1]=time parts[2]=offset parts[3]=tz_name (optional)
|
|
34
|
+
dp = parts[0] # "2026-01-11"
|
|
35
|
+
tp = parts[1] # "12:25:31.689768" or "12:25:31"
|
|
36
|
+
op = parts[2] if len(parts) > 2 else "+0000"
|
|
37
|
+
|
|
38
|
+
y = int(dp[0:4])
|
|
39
|
+
mo = int(dp[5:7])
|
|
40
|
+
d = int(dp[8:10])
|
|
41
|
+
|
|
42
|
+
dot = tp.find(".")
|
|
43
|
+
if dot >= 0:
|
|
44
|
+
hms = tp[:dot]
|
|
45
|
+
frac_str = tp[dot + 1 : dot + 7] # max 6 digits (microseconds)
|
|
46
|
+
microseconds = int(frac_str.ljust(6, "0"))
|
|
47
|
+
else:
|
|
48
|
+
hms = tp
|
|
49
|
+
microseconds = 0
|
|
50
|
+
|
|
51
|
+
h = int(hms[0:2])
|
|
52
|
+
m = int(hms[3:5])
|
|
53
|
+
sec = int(hms[6:8])
|
|
54
|
+
|
|
55
|
+
sign = 1 if op[0] == "+" else -1
|
|
56
|
+
offset_secs = sign * (int(op[1:3]) * 3600 + int(op[3:5]) * 60)
|
|
57
|
+
|
|
58
|
+
# calendar.timegm treats the tuple as UTC; subtract offset to convert to UTC
|
|
59
|
+
utc_ts = calendar.timegm((y, mo, d, h, m, sec, 0, 0, 0))
|
|
60
|
+
return utc_ts - offset_secs + microseconds / 1_000_000
|
|
@@ -0,0 +1,162 @@
|
|
|
1
|
+
from scapy.all import PcapReader
|
|
2
|
+
from network_core.conn import Connection, getPacketInfoFromPacket
|
|
3
|
+
from network_core.dataModels import TransPortType, FiveTuple, PacketInfo, PacketType
|
|
4
|
+
|
|
5
|
+
from ..sni.clientHello import quic_ch, tls_ch
|
|
6
|
+
from .csvIO import read_csv_to_dicts
|
|
7
|
+
from .dt import fast_strptime
|
|
8
|
+
|
|
9
|
+
|
|
10
|
+
def getConnMp(pcap_path: str, put_snis=True, remove_payload=True):
|
|
11
|
+
"""
|
|
12
|
+
If remove payload I will null the transport payload for connection
|
|
13
|
+
|
|
14
|
+
This will return a connection map mapping FiveTuple object to Connection
|
|
15
|
+
"""
|
|
16
|
+
conn_mp: dict[FiveTuple, Connection] = {}
|
|
17
|
+
|
|
18
|
+
with PcapReader(pcap_path) as pcap_reader:
|
|
19
|
+
for packet in pcap_reader:
|
|
20
|
+
temp_conn = Connection.getConnFromPacket(packet=packet)
|
|
21
|
+
if temp_conn is None:
|
|
22
|
+
continue
|
|
23
|
+
key = temp_conn.five_tuple
|
|
24
|
+
rev_key = temp_conn.five_tuple.rev_ft()
|
|
25
|
+
|
|
26
|
+
direction = 0 # outgoing
|
|
27
|
+
|
|
28
|
+
if key not in conn_mp and rev_key not in conn_mp:
|
|
29
|
+
conn_mp[key] = temp_conn
|
|
30
|
+
temp_conn.addPacket(
|
|
31
|
+
getPacketInfoFromPacket(packet=packet, direction=direction)
|
|
32
|
+
)
|
|
33
|
+
|
|
34
|
+
if key in conn_mp:
|
|
35
|
+
conn_mp[key].addPacket(
|
|
36
|
+
getPacketInfoFromPacket(packet=packet, direction=direction)
|
|
37
|
+
)
|
|
38
|
+
elif rev_key in conn_mp:
|
|
39
|
+
direction = 1 # incoming
|
|
40
|
+
conn_mp[rev_key].addPacket(
|
|
41
|
+
getPacketInfoFromPacket(packet=packet, direction=direction)
|
|
42
|
+
)
|
|
43
|
+
|
|
44
|
+
for _, conn in conn_mp.items():
|
|
45
|
+
conn.sort()
|
|
46
|
+
|
|
47
|
+
if put_snis:
|
|
48
|
+
for _, conn in conn_mp.items():
|
|
49
|
+
sni: str | None = None
|
|
50
|
+
if conn.five_tuple.transport_type == TransPortType.TCP:
|
|
51
|
+
ch_dict = tls_ch(connection=conn)
|
|
52
|
+
if ch_dict is None:
|
|
53
|
+
continue
|
|
54
|
+
else:
|
|
55
|
+
sni = ch_dict["snis"][0]
|
|
56
|
+
conn.setSNI(sni) # type: ignore
|
|
57
|
+
elif conn.five_tuple.transport_type == TransPortType.UDP:
|
|
58
|
+
ch_dict = quic_ch(connection=conn)
|
|
59
|
+
if ch_dict is None:
|
|
60
|
+
continue
|
|
61
|
+
else:
|
|
62
|
+
sni = ch_dict["snis"][0]
|
|
63
|
+
conn.setSNI(sni) # type: ignore
|
|
64
|
+
conn.is_quic = True
|
|
65
|
+
|
|
66
|
+
if remove_payload:
|
|
67
|
+
# This removes payload
|
|
68
|
+
conn.removePayload()
|
|
69
|
+
|
|
70
|
+
return conn_mp
|
|
71
|
+
|
|
72
|
+
|
|
73
|
+
# ---------------------------------------------------------------------------
|
|
74
|
+
# CSV loading path (faster than pcap for post-processed data)
|
|
75
|
+
# ---------------------------------------------------------------------------
|
|
76
|
+
|
|
77
|
+
|
|
78
|
+
def _ft_from_go_string(tuple_str: str) -> FiveTuple:
|
|
79
|
+
"""Parse the Go extractor five-tuple format.
|
|
80
|
+
|
|
81
|
+
Example: '192.168.31.246:64841->49.44.204.59:443->(Protocol: 6)'
|
|
82
|
+
"""
|
|
83
|
+
parts = tuple_str.split("->")
|
|
84
|
+
src = parts[0].strip()
|
|
85
|
+
dst = parts[1].strip()
|
|
86
|
+
src_port = int(src.split(":")[-1])
|
|
87
|
+
dst_port = int(dst.split(":")[-1])
|
|
88
|
+
src_ip = ":".join(src.split(":")[:-1])
|
|
89
|
+
dst_ip = ":".join(dst.split(":")[:-1])
|
|
90
|
+
|
|
91
|
+
protocol = parts[2].strip().split(":")[-1].strip(")").strip()
|
|
92
|
+
if protocol == "6":
|
|
93
|
+
transport_type = TransPortType.TCP
|
|
94
|
+
elif protocol == "17":
|
|
95
|
+
transport_type = TransPortType.UDP
|
|
96
|
+
else:
|
|
97
|
+
transport_type = TransPortType.UNKNOWN
|
|
98
|
+
|
|
99
|
+
return FiveTuple(
|
|
100
|
+
src_ip=src_ip,
|
|
101
|
+
dst_ip=dst_ip,
|
|
102
|
+
src_port=src_port,
|
|
103
|
+
dst_port=dst_port,
|
|
104
|
+
transport_type=transport_type,
|
|
105
|
+
)
|
|
106
|
+
|
|
107
|
+
|
|
108
|
+
def _packet_from_csv_row(row: dict, transport_type: TransPortType) -> PacketInfo:
|
|
109
|
+
if transport_type == TransPortType.TCP:
|
|
110
|
+
packet_type = PacketType.TCP
|
|
111
|
+
elif transport_type == TransPortType.UDP:
|
|
112
|
+
packet_type = PacketType.UDP
|
|
113
|
+
else:
|
|
114
|
+
packet_type = PacketType.UNKNOWN
|
|
115
|
+
|
|
116
|
+
return PacketInfo(
|
|
117
|
+
length=int(row["PacketLength"]),
|
|
118
|
+
timestamp=fast_strptime(row["Timestamp"]),
|
|
119
|
+
direction=int(row["Direction"]),
|
|
120
|
+
other_info={},
|
|
121
|
+
packet_type=packet_type,
|
|
122
|
+
)
|
|
123
|
+
|
|
124
|
+
|
|
125
|
+
def get_connections_from_csv(
|
|
126
|
+
packets_csv_path: str,
|
|
127
|
+
flows_csv_path: str,
|
|
128
|
+
) -> list[Connection]:
|
|
129
|
+
"""Build Connection objects from the CSVs produced by the Go flow extractor.
|
|
130
|
+
|
|
131
|
+
Faster than getConnMp() for post-processed data because it skips Scapy
|
|
132
|
+
packet parsing and TLS/QUIC SNI re-extraction (SNI is already in the flows CSV).
|
|
133
|
+
|
|
134
|
+
packets_csv_path — path to the *Packets.csv (FlowId, PacketLength, Timestamp, Direction)
|
|
135
|
+
flows_csv_path — path to the *Flows.csv (FlowId, FiveTuple, StartTime, EndTime, SNI[, ALPN])
|
|
136
|
+
"""
|
|
137
|
+
flow_index: dict[str, dict] = {}
|
|
138
|
+
for row in read_csv_to_dicts(flows_csv_path):
|
|
139
|
+
flow_index[row["FlowId"]] = {
|
|
140
|
+
"five_tuple": _ft_from_go_string(row["FiveTuple"]),
|
|
141
|
+
"sni": row["SNI"],
|
|
142
|
+
"alpn": row.get("ALPN", "nan"),
|
|
143
|
+
}
|
|
144
|
+
|
|
145
|
+
conns: dict[str, Connection] = {}
|
|
146
|
+
for row in read_csv_to_dicts(packets_csv_path):
|
|
147
|
+
fid = row["FlowId"]
|
|
148
|
+
if fid not in conns:
|
|
149
|
+
info = flow_index[fid]
|
|
150
|
+
c = Connection(five_tuple=info["five_tuple"])
|
|
151
|
+
c.setSNI(info["sni"])
|
|
152
|
+
if info["alpn"] and info["alpn"] != "nan":
|
|
153
|
+
c.setALPN(info["alpn"])
|
|
154
|
+
conns[fid] = c
|
|
155
|
+
conns[fid].addPacket(
|
|
156
|
+
_packet_from_csv_row(row, conns[fid].five_tuple.transport_type)
|
|
157
|
+
)
|
|
158
|
+
|
|
159
|
+
conn_list = list(conns.values())
|
|
160
|
+
for c in conn_list:
|
|
161
|
+
c.sort()
|
|
162
|
+
return conn_list
|
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
Metadata-Version: 2.4
|
|
2
2
|
Name: network_core
|
|
3
|
-
Version: 0.
|
|
3
|
+
Version: 0.3.1
|
|
4
4
|
Summary: Core networking utilities and data models
|
|
5
5
|
Author: Your Name
|
|
6
6
|
Requires-Python: >=3.9
|
|
@@ -13,3 +13,4 @@ Requires-Dist: scapy
|
|
|
13
13
|
Requires-Dist: matplotlib
|
|
14
14
|
Requires-Dist: brotli
|
|
15
15
|
Requires-Dist: blackboxprotobuf
|
|
16
|
+
Requires-Dist: cryptography
|
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
[project]
|
|
2
2
|
name = "network_core"
|
|
3
|
-
version = "0.
|
|
3
|
+
version = "0.3.1"
|
|
4
4
|
description = "Core networking utilities and data models"
|
|
5
5
|
authors = [{ name = "Your Name" }]
|
|
6
6
|
readme = "README.md"
|
|
@@ -14,7 +14,8 @@ dependencies = [
|
|
|
14
14
|
"scapy",
|
|
15
15
|
"matplotlib",
|
|
16
16
|
"brotli",
|
|
17
|
-
"blackboxprotobuf"
|
|
17
|
+
"blackboxprotobuf",
|
|
18
|
+
"cryptography"
|
|
18
19
|
]
|
|
19
20
|
|
|
20
21
|
[build-system]
|
|
File without changes
|
|
@@ -1,47 +0,0 @@
|
|
|
1
|
-
import datetime, pytz
|
|
2
|
-
|
|
3
|
-
|
|
4
|
-
def convertUNIXToDT(timestamp: float, timezone="Australia/Sydney") -> datetime.datetime:
|
|
5
|
-
australian_timezone = pytz.timezone(timezone)
|
|
6
|
-
utc_dt = datetime.datetime.fromtimestamp(timestamp, tz=pytz.utc)
|
|
7
|
-
return utc_dt.astimezone(australian_timezone)
|
|
8
|
-
|
|
9
|
-
|
|
10
|
-
def convertUNIXToHumanReadable(timestamp: float, timezone="Australia/Sydney") -> str:
|
|
11
|
-
dt = convertUNIXToDT(timestamp=timestamp, timezone=timezone)
|
|
12
|
-
ms = dt.microsecond // 1000 # get milliseconds, doing this so it isn't 0
|
|
13
|
-
return dt.strftime("%Y-%m-%d %H:%M:%S") + f".{ms}"
|
|
14
|
-
|
|
15
|
-
|
|
16
|
-
def getDatetimeFromTime(time: datetime.time) -> datetime.datetime:
|
|
17
|
-
return datetime.datetime.combine(datetime.datetime.today(), time=time)
|
|
18
|
-
|
|
19
|
-
|
|
20
|
-
def addBufferToTime(time: datetime.time, buffer_in_seconds: float) -> datetime.time:
|
|
21
|
-
delta = datetime.timedelta(seconds=buffer_in_seconds)
|
|
22
|
-
return (getDatetimeFromTime(time=time) + delta).time()
|
|
23
|
-
|
|
24
|
-
|
|
25
|
-
def fast_strptime(s: str) -> float:
|
|
26
|
-
"""
|
|
27
|
-
2026-01-11 12:25:31.689768 +0530 IST to UNIX
|
|
28
|
-
"""
|
|
29
|
-
parts = s.split()
|
|
30
|
-
|
|
31
|
-
# Drop trailing timezone name (like 'UTC', 'AEDT', etc.)
|
|
32
|
-
if len(parts) > 3:
|
|
33
|
-
s = " ".join(parts[:3])
|
|
34
|
-
else:
|
|
35
|
-
s = " ".join(parts)
|
|
36
|
-
|
|
37
|
-
# Handle too many fractional digits (datetime only supports microseconds)
|
|
38
|
-
date, time_str, offset = s.split()
|
|
39
|
-
if "." in time_str:
|
|
40
|
-
main, frac = time_str.split(".")
|
|
41
|
-
frac = frac[:6] # truncate nanoseconds to microseconds
|
|
42
|
-
time_str = f"{main}.{frac}"
|
|
43
|
-
s = f"{date} {time_str} {offset}"
|
|
44
|
-
|
|
45
|
-
fmt = "%Y-%m-%d %H:%M:%S.%f %z" if "." in time_str else "%Y-%m-%d %H:%M:%S %z"
|
|
46
|
-
|
|
47
|
-
return datetime.datetime.strptime(s, fmt).timestamp()
|
|
@@ -1,68 +0,0 @@
|
|
|
1
|
-
from scapy.all import PcapReader
|
|
2
|
-
from network_core.conn import Connection, getPacketInfoFromPacket
|
|
3
|
-
from network_core.dataModels import TransPortType, FiveTuple
|
|
4
|
-
|
|
5
|
-
from ..sni.clientHello import quic_ch, tls_ch
|
|
6
|
-
|
|
7
|
-
|
|
8
|
-
def getConnMp(pcap_path: str, put_snis=True, remove_payload=True):
|
|
9
|
-
"""
|
|
10
|
-
If remove payload I will null the transport payload for connection
|
|
11
|
-
|
|
12
|
-
This will return a connection map mapping FiveTuple object to Connection
|
|
13
|
-
"""
|
|
14
|
-
conn_mp: dict[FiveTuple, Connection] = {}
|
|
15
|
-
|
|
16
|
-
with PcapReader(pcap_path) as pcap_reader:
|
|
17
|
-
for packet in pcap_reader:
|
|
18
|
-
temp_conn = Connection.getConnFromPacket(packet=packet)
|
|
19
|
-
if temp_conn is None:
|
|
20
|
-
continue
|
|
21
|
-
key = temp_conn.five_tuple
|
|
22
|
-
rev_key = temp_conn.five_tuple.rev_ft()
|
|
23
|
-
|
|
24
|
-
direction = 0 # outgoing
|
|
25
|
-
|
|
26
|
-
if key not in conn_mp and rev_key not in conn_mp:
|
|
27
|
-
conn_mp[key] = temp_conn
|
|
28
|
-
temp_conn.addPacket(
|
|
29
|
-
getPacketInfoFromPacket(packet=packet, direction=direction)
|
|
30
|
-
)
|
|
31
|
-
|
|
32
|
-
if key in conn_mp:
|
|
33
|
-
conn_mp[key].addPacket(
|
|
34
|
-
getPacketInfoFromPacket(packet=packet, direction=direction)
|
|
35
|
-
)
|
|
36
|
-
elif rev_key in conn_mp:
|
|
37
|
-
direction = 1 # incoming
|
|
38
|
-
conn_mp[rev_key].addPacket(
|
|
39
|
-
getPacketInfoFromPacket(packet=packet, direction=direction)
|
|
40
|
-
)
|
|
41
|
-
|
|
42
|
-
for _, conn in conn_mp.items():
|
|
43
|
-
conn.sort()
|
|
44
|
-
|
|
45
|
-
if put_snis:
|
|
46
|
-
for _, conn in conn_mp.items():
|
|
47
|
-
sni: str | None = None
|
|
48
|
-
if conn.five_tuple.transport_type == TransPortType.TCP:
|
|
49
|
-
ch_dict = tls_ch(connection=conn)
|
|
50
|
-
if ch_dict is None:
|
|
51
|
-
continue
|
|
52
|
-
else:
|
|
53
|
-
sni = ch_dict["snis"][0]
|
|
54
|
-
conn.setSNI(sni) # type: ignore
|
|
55
|
-
elif conn.five_tuple.transport_type == TransPortType.UDP:
|
|
56
|
-
ch_dict = quic_ch(connection=conn)
|
|
57
|
-
if ch_dict is None:
|
|
58
|
-
continue
|
|
59
|
-
else:
|
|
60
|
-
sni = ch_dict["snis"][0]
|
|
61
|
-
conn.setSNI(sni) # type: ignore
|
|
62
|
-
conn.is_quic = True
|
|
63
|
-
|
|
64
|
-
if remove_payload:
|
|
65
|
-
# This removes payload
|
|
66
|
-
conn.removePayload()
|
|
67
|
-
|
|
68
|
-
return conn_mp
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
{network_core-0.2.2/network_core/http/httpExtract → network_core-0.3.1/network_core/sni}/__init__.py
RENAMED
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|