network-core 0.2.2__tar.gz → 0.3.0__tar.gz
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- {network_core-0.2.2 → network_core-0.3.0}/PKG-INFO +2 -1
- network_core-0.3.0/network_core/__init__.py +17 -0
- network_core-0.3.0/network_core/analysis.py +248 -0
- {network_core-0.2.2 → network_core-0.3.0}/network_core/conn.py +38 -20
- network_core-0.3.0/network_core/utils/dt.py +60 -0
- network_core-0.3.0/network_core/utils/pcapIO.py +162 -0
- {network_core-0.2.2 → network_core-0.3.0}/network_core.egg-info/PKG-INFO +2 -1
- {network_core-0.2.2 → network_core-0.3.0}/network_core.egg-info/SOURCES.txt +1 -0
- {network_core-0.2.2 → network_core-0.3.0}/network_core.egg-info/requires.txt +1 -0
- {network_core-0.2.2 → network_core-0.3.0}/pyproject.toml +3 -2
- network_core-0.2.2/network_core/utils/__init__.py +0 -0
- network_core-0.2.2/network_core/utils/dt.py +0 -47
- network_core-0.2.2/network_core/utils/pcapIO.py +0 -68
- {network_core-0.2.2 → network_core-0.3.0}/network_core/connOps.py +0 -0
- {network_core-0.2.2 → network_core-0.3.0}/network_core/dataModels.py +0 -0
- {network_core-0.2.2/network_core → network_core-0.3.0/network_core/http}/__init__.py +0 -0
- {network_core-0.2.2/network_core/http → network_core-0.3.0/network_core/http/httpExtract}/__init__.py +0 -0
- {network_core-0.2.2 → network_core-0.3.0}/network_core/http/httpExtract/helpers.py +0 -0
- {network_core-0.2.2 → network_core-0.3.0}/network_core/http/httpExtract/parser.py +0 -0
- {network_core-0.2.2 → network_core-0.3.0}/network_core/http/httpExtract/pdh.py +0 -0
- {network_core-0.2.2 → network_core-0.3.0}/network_core/http/httpIO.py +0 -0
- {network_core-0.2.2/network_core/http/httpExtract → network_core-0.3.0/network_core/sni}/__init__.py +0 -0
- {network_core-0.2.2 → network_core-0.3.0}/network_core/sni/clientHello.py +0 -0
- {network_core-0.2.2 → network_core-0.3.0}/network_core/sni/constants.py +0 -0
- {network_core-0.2.2 → network_core-0.3.0}/network_core/sni/crypto.py +0 -0
- {network_core-0.2.2 → network_core-0.3.0}/network_core/sni/helpers.py +0 -0
- {network_core-0.2.2 → network_core-0.3.0}/network_core/sni/parsers.py +0 -0
- {network_core-0.2.2/network_core/sni → network_core-0.3.0/network_core/utils}/__init__.py +0 -0
- {network_core-0.2.2 → network_core-0.3.0}/network_core/utils/csvIO.py +0 -0
- {network_core-0.2.2 → network_core-0.3.0}/network_core/utils/jsonIO.py +0 -0
- {network_core-0.2.2 → network_core-0.3.0}/network_core.egg-info/dependency_links.txt +0 -0
- {network_core-0.2.2 → network_core-0.3.0}/network_core.egg-info/top_level.txt +0 -0
- {network_core-0.2.2 → network_core-0.3.0}/setup.cfg +0 -0
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
Metadata-Version: 2.4
|
|
2
2
|
Name: network_core
|
|
3
|
-
Version: 0.
|
|
3
|
+
Version: 0.3.0
|
|
4
4
|
Summary: Core networking utilities and data models
|
|
5
5
|
Author: Your Name
|
|
6
6
|
Requires-Python: >=3.9
|
|
@@ -13,3 +13,4 @@ Requires-Dist: scapy
|
|
|
13
13
|
Requires-Dist: matplotlib
|
|
14
14
|
Requires-Dist: brotli
|
|
15
15
|
Requires-Dist: blackboxprotobuf
|
|
16
|
+
Requires-Dist: cryptography
|
|
@@ -0,0 +1,17 @@
|
|
|
1
|
+
from .conn import Connection
|
|
2
|
+
from .connOps import (
|
|
3
|
+
ConnStats,
|
|
4
|
+
filterConnections,
|
|
5
|
+
combine_connections,
|
|
6
|
+
printConns,
|
|
7
|
+
getPacketsInInterval,
|
|
8
|
+
normalizePacketStream,
|
|
9
|
+
)
|
|
10
|
+
from .dataModels import PacketType, TransPortType, FiveTuple, PacketInfo, HttpUnit
|
|
11
|
+
from .analysis import (
|
|
12
|
+
classify_http_version,
|
|
13
|
+
http_version_debug,
|
|
14
|
+
detect_requests,
|
|
15
|
+
conn_features,
|
|
16
|
+
nan_to_none,
|
|
17
|
+
)
|
|
@@ -0,0 +1,248 @@
|
|
|
1
|
+
"""Connection and packet stream analysis functions."""
|
|
2
|
+
from __future__ import annotations
|
|
3
|
+
|
|
4
|
+
import math
|
|
5
|
+
|
|
6
|
+
import numpy as np
|
|
7
|
+
|
|
8
|
+
from .dataModels import PacketInfo
|
|
9
|
+
|
|
10
|
+
|
|
11
|
+
def classify_http_version(ps: list[PacketInfo], alpn: str = "nan") -> str:
|
|
12
|
+
"""Infer HTTP version, preferring ALPN ground truth over a packet heuristic.
|
|
13
|
+
|
|
14
|
+
If ALPN is available (not 'nan'), it is authoritative:
|
|
15
|
+
'h2' → 'HTTP/2'
|
|
16
|
+
'http/1.1' → 'HTTP/1.1'
|
|
17
|
+
Fallback heuristic (used when ALPN is 'nan', i.e. session-reused socket):
|
|
18
|
+
Median of small (<200 B) upload packets ≤ 72 B → HTTP/1.1 (pure TCP ACKs)
|
|
19
|
+
Median > 72 B → HTTP/2 (WINDOW_UPDATE/PING frames ~79-87 B)
|
|
20
|
+
Returns 'HTTP/1.1', 'HTTP/2', or 'unknown'.
|
|
21
|
+
"""
|
|
22
|
+
if alpn and alpn not in ("nan", ""):
|
|
23
|
+
if alpn == "h2":
|
|
24
|
+
return "HTTP/2"
|
|
25
|
+
if alpn in ("http/1.1", "http/1.0"):
|
|
26
|
+
return "HTTP/1.1"
|
|
27
|
+
small = np.array(
|
|
28
|
+
[p.length for p in ps if p.direction == 0 and p.length < 200],
|
|
29
|
+
dtype=np.float32,
|
|
30
|
+
)
|
|
31
|
+
if len(small) < 3:
|
|
32
|
+
return "unknown"
|
|
33
|
+
return "HTTP/1.1" if np.median(small) <= 72 else "HTTP/2"
|
|
34
|
+
|
|
35
|
+
|
|
36
|
+
def http_version_debug(ps: list[PacketInfo], alpn: str = "nan") -> dict:
|
|
37
|
+
"""Return a detailed breakdown of the HTTP version classification.
|
|
38
|
+
|
|
39
|
+
Useful for auditing flows where the heuristic result seems wrong.
|
|
40
|
+
"""
|
|
41
|
+
small = [p.length for p in ps if p.direction == 0 and p.length < 200]
|
|
42
|
+
all_up = [p.length for p in ps if p.direction == 0]
|
|
43
|
+
|
|
44
|
+
verdict = classify_http_version(ps, alpn)
|
|
45
|
+
source = "ALPN" if (alpn and alpn not in ("nan", "")) else "heuristic"
|
|
46
|
+
|
|
47
|
+
buckets = {
|
|
48
|
+
"<54": sum(1 for l in small if l < 54),
|
|
49
|
+
"54-66": sum(1 for l in small if 54 <= l <= 66),
|
|
50
|
+
"67-80": sum(1 for l in small if 67 <= l <= 80),
|
|
51
|
+
"81-100": sum(1 for l in small if 81 <= l <= 100),
|
|
52
|
+
"101-199": sum(1 for l in small if 101 <= l <= 199),
|
|
53
|
+
}
|
|
54
|
+
|
|
55
|
+
med = float(np.median(small)) if small else None
|
|
56
|
+
if source == "ALPN":
|
|
57
|
+
confidence = "high (ALPN)"
|
|
58
|
+
elif med is None:
|
|
59
|
+
confidence = "unknown (no small packets)"
|
|
60
|
+
elif abs(med - 72) > 12:
|
|
61
|
+
confidence = "high (heuristic)"
|
|
62
|
+
else:
|
|
63
|
+
confidence = "low (heuristic, median near threshold)"
|
|
64
|
+
|
|
65
|
+
return {
|
|
66
|
+
"verdict": verdict,
|
|
67
|
+
"source": source,
|
|
68
|
+
"confidence": confidence,
|
|
69
|
+
"alpn": alpn,
|
|
70
|
+
"small_up_count": len(small),
|
|
71
|
+
"total_up_count": len(all_up),
|
|
72
|
+
"small_up_median": med,
|
|
73
|
+
"small_up_p25": float(np.percentile(small, 25)) if small else None,
|
|
74
|
+
"small_up_p75": float(np.percentile(small, 75)) if small else None,
|
|
75
|
+
"small_up_min": float(min(small)) if small else None,
|
|
76
|
+
"small_up_max": float(max(small)) if small else None,
|
|
77
|
+
"histogram": buckets,
|
|
78
|
+
"threshold": 72,
|
|
79
|
+
"note": "ACKs cluster at ~66 B; HTTP/2 WINDOW_UPDATE/PING at ~79-87 B",
|
|
80
|
+
}
|
|
81
|
+
|
|
82
|
+
|
|
83
|
+
def detect_requests(
|
|
84
|
+
ps: list[PacketInfo],
|
|
85
|
+
gap_ms: float = 800,
|
|
86
|
+
min_bytes: int = 5000,
|
|
87
|
+
min_pkts: int = 5,
|
|
88
|
+
) -> list[dict]:
|
|
89
|
+
"""Split the upload stream on idle gaps to find HTTP/1.1 request boundaries.
|
|
90
|
+
|
|
91
|
+
Only data-carrying packets (>100 B) are used for burst detection to avoid
|
|
92
|
+
pure TCP ACKs extending burst windows across server-sent heartbeats.
|
|
93
|
+
|
|
94
|
+
Returns one dict per detected request:
|
|
95
|
+
req_kb, req_pkts, req_dur_ms — upload burst stats
|
|
96
|
+
think_ms — server processing time (first dn packet after burst)
|
|
97
|
+
res_kb, res_pkts, res_dur_ms — downstream response stats
|
|
98
|
+
client_gap_ms — gap before next request (None if last)
|
|
99
|
+
t_start, t_end — burst timestamps
|
|
100
|
+
turn_idx — 0-based request index
|
|
101
|
+
"""
|
|
102
|
+
data_ups = [p for p in ps if p.direction == 0 and p.length > 100]
|
|
103
|
+
if len(data_ups) < 2:
|
|
104
|
+
return []
|
|
105
|
+
|
|
106
|
+
bursts: list[list[PacketInfo]] = []
|
|
107
|
+
cur = [data_ups[0]]
|
|
108
|
+
for i in range(1, len(data_ups)):
|
|
109
|
+
if (data_ups[i].timestamp - data_ups[i - 1].timestamp) * 1000 > gap_ms:
|
|
110
|
+
bursts.append(cur)
|
|
111
|
+
cur = [data_ups[i]]
|
|
112
|
+
else:
|
|
113
|
+
cur.append(data_ups[i])
|
|
114
|
+
bursts.append(cur)
|
|
115
|
+
|
|
116
|
+
real = [
|
|
117
|
+
b for b in bursts
|
|
118
|
+
if sum(p.length for p in b) >= min_bytes and len(b) >= min_pkts
|
|
119
|
+
]
|
|
120
|
+
if not real:
|
|
121
|
+
return []
|
|
122
|
+
|
|
123
|
+
dns = [p for p in ps if p.direction == 1]
|
|
124
|
+
dn_ts = np.array([p.timestamp for p in dns], dtype=np.float64)
|
|
125
|
+
dn_lens = np.array([p.length for p in dns], dtype=np.float32)
|
|
126
|
+
last_ts = ps[-1].timestamp
|
|
127
|
+
|
|
128
|
+
rows = []
|
|
129
|
+
for idx, b in enumerate(real):
|
|
130
|
+
burst_end = b[-1].timestamp
|
|
131
|
+
next_start = real[idx + 1][0].timestamp if idx + 1 < len(real) else None
|
|
132
|
+
res_win_end = next_start if next_start else last_ts + 1
|
|
133
|
+
|
|
134
|
+
dn_s = int(np.searchsorted(dn_ts, burst_end, side="right"))
|
|
135
|
+
dn_e = int(np.searchsorted(dn_ts, res_win_end, side="left"))
|
|
136
|
+
|
|
137
|
+
think_ms = float((dn_ts[dn_s] - burst_end) * 1000) if dn_s < len(dn_ts) else None
|
|
138
|
+
res_lens = dn_lens[dn_s:dn_e]
|
|
139
|
+
res_kb = float(res_lens.sum()) / 1024
|
|
140
|
+
n_res = dn_e - dn_s
|
|
141
|
+
|
|
142
|
+
if n_res > 0:
|
|
143
|
+
res_dur_ms = float((dn_ts[dn_e - 1] - dn_ts[dn_s]) * 1000)
|
|
144
|
+
client_gap_ms = float((next_start - dn_ts[dn_e - 1]) * 1000) if next_start else None
|
|
145
|
+
else:
|
|
146
|
+
res_dur_ms = client_gap_ms = None
|
|
147
|
+
|
|
148
|
+
rows.append({
|
|
149
|
+
"req_kb": sum(p.length for p in b) / 1024,
|
|
150
|
+
"req_pkts": len(b),
|
|
151
|
+
"req_dur_ms": (b[-1].timestamp - b[0].timestamp) * 1000,
|
|
152
|
+
"think_ms": think_ms,
|
|
153
|
+
"res_kb": res_kb,
|
|
154
|
+
"res_pkts": n_res,
|
|
155
|
+
"res_dur_ms": res_dur_ms,
|
|
156
|
+
"client_gap_ms": client_gap_ms,
|
|
157
|
+
"t_start": b[0].timestamp,
|
|
158
|
+
"t_end": burst_end,
|
|
159
|
+
"turn_idx": idx,
|
|
160
|
+
})
|
|
161
|
+
return rows
|
|
162
|
+
|
|
163
|
+
|
|
164
|
+
def conn_features(
|
|
165
|
+
ps: list[PacketInfo],
|
|
166
|
+
http_ver: str,
|
|
167
|
+
requests: list[dict],
|
|
168
|
+
) -> dict | None:
|
|
169
|
+
"""Extract connection-level features for ML or analysis.
|
|
170
|
+
|
|
171
|
+
Returns None if the connection has too few packets to be meaningful.
|
|
172
|
+
NaN fields indicate metrics that require ≥2 requests (e.g. req_slope).
|
|
173
|
+
"""
|
|
174
|
+
if len(ps) < 5:
|
|
175
|
+
return None
|
|
176
|
+
|
|
177
|
+
lens = np.array([p.length for p in ps], dtype=np.float32)
|
|
178
|
+
dirs = np.array([p.direction for p in ps], dtype=np.int8)
|
|
179
|
+
ts = np.array([p.timestamp for p in ps], dtype=np.float64)
|
|
180
|
+
iats = np.diff(ts) * 1000
|
|
181
|
+
|
|
182
|
+
up_m = dirs == 0
|
|
183
|
+
bytes_up = float(lens[up_m].sum())
|
|
184
|
+
bytes_dn = float(lens[~up_m].sum())
|
|
185
|
+
total = bytes_up + bytes_dn
|
|
186
|
+
if total == 0 or len(iats) == 0:
|
|
187
|
+
return None
|
|
188
|
+
|
|
189
|
+
req_kbs = [r["req_kb"] for r in requests]
|
|
190
|
+
res_kbs = [r["res_kb"] for r in requests]
|
|
191
|
+
|
|
192
|
+
if len(req_kbs) >= 2:
|
|
193
|
+
turns = np.arange(len(req_kbs), dtype=float)
|
|
194
|
+
req_slope = float(np.polyfit(turns, req_kbs, 1)[0])
|
|
195
|
+
req_growth = float(req_kbs[-1] / max(req_kbs[0], 0.01))
|
|
196
|
+
req_cv = float(np.std(req_kbs) / max(np.mean(req_kbs), 0.01))
|
|
197
|
+
mono_frac = float(
|
|
198
|
+
sum(req_kbs[i + 1] > req_kbs[i] for i in range(len(req_kbs) - 1))
|
|
199
|
+
/ (len(req_kbs) - 1)
|
|
200
|
+
)
|
|
201
|
+
delta_reqs = [req_kbs[i + 1] - req_kbs[i] for i in range(len(req_kbs) - 1)]
|
|
202
|
+
prev_res = [res_kbs[i] for i in range(len(res_kbs) - 1)]
|
|
203
|
+
valid = [(d, r) for d, r in zip(delta_reqs, prev_res) if r > 0]
|
|
204
|
+
if len(valid) >= 2:
|
|
205
|
+
d_arr, r_arr = np.array([v[0] for v in valid]), np.array([v[1] for v in valid])
|
|
206
|
+
delta_res_corr = float(np.corrcoef(d_arr, r_arr)[0, 1])
|
|
207
|
+
delta_res_ratio = float(np.mean(d_arr / r_arr))
|
|
208
|
+
else:
|
|
209
|
+
delta_res_corr = delta_res_ratio = float("nan")
|
|
210
|
+
else:
|
|
211
|
+
req_slope = req_growth = req_cv = float("nan")
|
|
212
|
+
mono_frac = delta_res_corr = delta_res_ratio = float("nan")
|
|
213
|
+
|
|
214
|
+
small_ups = lens[up_m][lens[up_m] < 200]
|
|
215
|
+
idle_med = float(np.median(small_ups)) if len(small_ups) >= 3 else float("nan")
|
|
216
|
+
|
|
217
|
+
return {
|
|
218
|
+
"http_ver": http_ver,
|
|
219
|
+
"n_pkts": int(len(ps)),
|
|
220
|
+
"bytes_up": bytes_up,
|
|
221
|
+
"bytes_dn": bytes_dn,
|
|
222
|
+
"up_frac": bytes_up / total,
|
|
223
|
+
"pkt_std": float(lens.std()),
|
|
224
|
+
"pkt_p95": float(np.percentile(lens, 95)),
|
|
225
|
+
"pkt_median": float(np.median(lens)),
|
|
226
|
+
"frac_ack": float(np.mean(lens < 100)),
|
|
227
|
+
"idle_up_med": idle_med,
|
|
228
|
+
"iat_max_ms": float(iats.max()),
|
|
229
|
+
"flow_dur_s": float(ts[-1] - ts[0]),
|
|
230
|
+
"n_requests": len(requests),
|
|
231
|
+
"req_slope": req_slope,
|
|
232
|
+
"req_growth": req_growth,
|
|
233
|
+
"req_cv": req_cv,
|
|
234
|
+
"mono_frac": mono_frac,
|
|
235
|
+
"delta_res_corr": delta_res_corr,
|
|
236
|
+
"delta_res_ratio": delta_res_ratio,
|
|
237
|
+
}
|
|
238
|
+
|
|
239
|
+
|
|
240
|
+
def nan_to_none(obj: object) -> object:
|
|
241
|
+
"""Recursively replace float NaN with None for JSON serialisation."""
|
|
242
|
+
if isinstance(obj, dict):
|
|
243
|
+
return {k: nan_to_none(v) for k, v in obj.items()}
|
|
244
|
+
if isinstance(obj, list):
|
|
245
|
+
return [nan_to_none(v) for v in obj]
|
|
246
|
+
if isinstance(obj, float) and math.isnan(obj):
|
|
247
|
+
return None
|
|
248
|
+
return obj
|
|
@@ -47,7 +47,30 @@ class Connection:
|
|
|
47
47
|
self.packet_stream: List[PacketInfo] = []
|
|
48
48
|
self.other_info = other_info
|
|
49
49
|
self.__sni = None
|
|
50
|
+
self.__alpn = None
|
|
50
51
|
self.is_quic = False
|
|
52
|
+
self._stats_dirty = True
|
|
53
|
+
self._up_bytes_c = 0
|
|
54
|
+
self._down_bytes_c = 0
|
|
55
|
+
self._up_packets_c = 0
|
|
56
|
+
self._down_packets_c = 0
|
|
57
|
+
|
|
58
|
+
def _build_stats(self):
|
|
59
|
+
if not self._stats_dirty:
|
|
60
|
+
return
|
|
61
|
+
up_b = dn_b = up_p = dn_p = 0
|
|
62
|
+
for p in self.packet_stream:
|
|
63
|
+
if p.direction == 0:
|
|
64
|
+
up_b += p.length
|
|
65
|
+
up_p += 1
|
|
66
|
+
else:
|
|
67
|
+
dn_b += p.length
|
|
68
|
+
dn_p += 1
|
|
69
|
+
self._up_bytes_c = up_b
|
|
70
|
+
self._down_bytes_c = dn_b
|
|
71
|
+
self._up_packets_c = up_p
|
|
72
|
+
self._down_packets_c = dn_p
|
|
73
|
+
self._stats_dirty = False
|
|
51
74
|
|
|
52
75
|
@staticmethod
|
|
53
76
|
def getTransPortType(packet):
|
|
@@ -91,6 +114,7 @@ class Connection:
|
|
|
91
114
|
assert False, "Packet Type is Unknown"
|
|
92
115
|
self.connection_type.add(packet.packet_type)
|
|
93
116
|
self.packet_stream.append(packet)
|
|
117
|
+
self._stats_dirty = True
|
|
94
118
|
|
|
95
119
|
def __len__(self):
|
|
96
120
|
return len(self.packet_stream)
|
|
@@ -101,6 +125,12 @@ class Connection:
|
|
|
101
125
|
def getSNI(self):
|
|
102
126
|
return self.__sni
|
|
103
127
|
|
|
128
|
+
def setALPN(self, alpn: str):
|
|
129
|
+
self.__alpn = alpn
|
|
130
|
+
|
|
131
|
+
def getALPN(self) -> str | None:
|
|
132
|
+
return self.__alpn
|
|
133
|
+
|
|
104
134
|
def getPacketsOfTypes(self, packet_types: set[PacketType]) -> List[PacketInfo]:
|
|
105
135
|
return list(filter(lambda x: x.packet_type in packet_types, self.packet_stream))
|
|
106
136
|
|
|
@@ -149,35 +179,23 @@ class Connection:
|
|
|
149
179
|
|
|
150
180
|
@property
|
|
151
181
|
def down_bytes(self):
|
|
152
|
-
|
|
153
|
-
|
|
154
|
-
if p.direction == 1:
|
|
155
|
-
total += p.length
|
|
156
|
-
return total
|
|
182
|
+
self._build_stats()
|
|
183
|
+
return self._down_bytes_c
|
|
157
184
|
|
|
158
185
|
@property
|
|
159
186
|
def up_bytes(self):
|
|
160
|
-
|
|
161
|
-
|
|
162
|
-
if p.direction == 0:
|
|
163
|
-
total += p.length
|
|
164
|
-
return total
|
|
187
|
+
self._build_stats()
|
|
188
|
+
return self._up_bytes_c
|
|
165
189
|
|
|
166
190
|
@property
|
|
167
191
|
def down_packets(self):
|
|
168
|
-
|
|
169
|
-
|
|
170
|
-
if p.direction == 1:
|
|
171
|
-
total += 1
|
|
172
|
-
return total
|
|
192
|
+
self._build_stats()
|
|
193
|
+
return self._down_packets_c
|
|
173
194
|
|
|
174
195
|
@property
|
|
175
196
|
def up_packets(self):
|
|
176
|
-
|
|
177
|
-
|
|
178
|
-
if p.direction == 0:
|
|
179
|
-
total += 1
|
|
180
|
-
return total
|
|
197
|
+
self._build_stats()
|
|
198
|
+
return self._up_packets_c
|
|
181
199
|
|
|
182
200
|
@property
|
|
183
201
|
def start_timestamp(self):
|
|
@@ -0,0 +1,60 @@
|
|
|
1
|
+
import calendar
|
|
2
|
+
import datetime, pytz
|
|
3
|
+
|
|
4
|
+
|
|
5
|
+
def convertUNIXToDT(timestamp: float, timezone="Australia/Sydney") -> datetime.datetime:
|
|
6
|
+
australian_timezone = pytz.timezone(timezone)
|
|
7
|
+
utc_dt = datetime.datetime.fromtimestamp(timestamp, tz=pytz.utc)
|
|
8
|
+
return utc_dt.astimezone(australian_timezone)
|
|
9
|
+
|
|
10
|
+
|
|
11
|
+
def convertUNIXToHumanReadable(timestamp: float, timezone="Australia/Sydney") -> str:
|
|
12
|
+
dt = convertUNIXToDT(timestamp=timestamp, timezone=timezone)
|
|
13
|
+
ms = dt.microsecond // 1000 # get milliseconds, doing this so it isn't 0
|
|
14
|
+
return dt.strftime("%Y-%m-%d %H:%M:%S") + f".{ms}"
|
|
15
|
+
|
|
16
|
+
|
|
17
|
+
def getDatetimeFromTime(time: datetime.time) -> datetime.datetime:
|
|
18
|
+
return datetime.datetime.combine(datetime.datetime.today(), time=time)
|
|
19
|
+
|
|
20
|
+
|
|
21
|
+
def addBufferToTime(time: datetime.time, buffer_in_seconds: float) -> datetime.time:
|
|
22
|
+
delta = datetime.timedelta(seconds=buffer_in_seconds)
|
|
23
|
+
return (getDatetimeFromTime(time=time) + delta).time()
|
|
24
|
+
|
|
25
|
+
|
|
26
|
+
def fast_strptime(s: str) -> float:
|
|
27
|
+
"""Parse '2026-01-11 12:25:31.689768 +0530 IST' to a UNIX timestamp float.
|
|
28
|
+
|
|
29
|
+
Avoids datetime.strptime (slow format-string regex) by parsing integers
|
|
30
|
+
directly. ~5-10x faster when called per-packet on large CSV files.
|
|
31
|
+
"""
|
|
32
|
+
parts = s.split()
|
|
33
|
+
# parts[0]=date parts[1]=time parts[2]=offset parts[3]=tz_name (optional)
|
|
34
|
+
dp = parts[0] # "2026-01-11"
|
|
35
|
+
tp = parts[1] # "12:25:31.689768" or "12:25:31"
|
|
36
|
+
op = parts[2] if len(parts) > 2 else "+0000"
|
|
37
|
+
|
|
38
|
+
y = int(dp[0:4])
|
|
39
|
+
mo = int(dp[5:7])
|
|
40
|
+
d = int(dp[8:10])
|
|
41
|
+
|
|
42
|
+
dot = tp.find(".")
|
|
43
|
+
if dot >= 0:
|
|
44
|
+
hms = tp[:dot]
|
|
45
|
+
frac_str = tp[dot + 1 : dot + 7] # max 6 digits (microseconds)
|
|
46
|
+
microseconds = int(frac_str.ljust(6, "0"))
|
|
47
|
+
else:
|
|
48
|
+
hms = tp
|
|
49
|
+
microseconds = 0
|
|
50
|
+
|
|
51
|
+
h = int(hms[0:2])
|
|
52
|
+
m = int(hms[3:5])
|
|
53
|
+
sec = int(hms[6:8])
|
|
54
|
+
|
|
55
|
+
sign = 1 if op[0] == "+" else -1
|
|
56
|
+
offset_secs = sign * (int(op[1:3]) * 3600 + int(op[3:5]) * 60)
|
|
57
|
+
|
|
58
|
+
# calendar.timegm treats the tuple as UTC; subtract offset to convert to UTC
|
|
59
|
+
utc_ts = calendar.timegm((y, mo, d, h, m, sec, 0, 0, 0))
|
|
60
|
+
return utc_ts - offset_secs + microseconds / 1_000_000
|
|
@@ -0,0 +1,162 @@
|
|
|
1
|
+
from scapy.all import PcapReader
|
|
2
|
+
from network_core.conn import Connection, getPacketInfoFromPacket
|
|
3
|
+
from network_core.dataModels import TransPortType, FiveTuple, PacketInfo, PacketType
|
|
4
|
+
|
|
5
|
+
from ..sni.clientHello import quic_ch, tls_ch
|
|
6
|
+
from .csvIO import read_csv_to_dicts
|
|
7
|
+
from .dt import fast_strptime
|
|
8
|
+
|
|
9
|
+
|
|
10
|
+
def getConnMp(pcap_path: str, put_snis=True, remove_payload=True):
|
|
11
|
+
"""
|
|
12
|
+
If remove payload I will null the transport payload for connection
|
|
13
|
+
|
|
14
|
+
This will return a connection map mapping FiveTuple object to Connection
|
|
15
|
+
"""
|
|
16
|
+
conn_mp: dict[FiveTuple, Connection] = {}
|
|
17
|
+
|
|
18
|
+
with PcapReader(pcap_path) as pcap_reader:
|
|
19
|
+
for packet in pcap_reader:
|
|
20
|
+
temp_conn = Connection.getConnFromPacket(packet=packet)
|
|
21
|
+
if temp_conn is None:
|
|
22
|
+
continue
|
|
23
|
+
key = temp_conn.five_tuple
|
|
24
|
+
rev_key = temp_conn.five_tuple.rev_ft()
|
|
25
|
+
|
|
26
|
+
direction = 0 # outgoing
|
|
27
|
+
|
|
28
|
+
if key not in conn_mp and rev_key not in conn_mp:
|
|
29
|
+
conn_mp[key] = temp_conn
|
|
30
|
+
temp_conn.addPacket(
|
|
31
|
+
getPacketInfoFromPacket(packet=packet, direction=direction)
|
|
32
|
+
)
|
|
33
|
+
|
|
34
|
+
if key in conn_mp:
|
|
35
|
+
conn_mp[key].addPacket(
|
|
36
|
+
getPacketInfoFromPacket(packet=packet, direction=direction)
|
|
37
|
+
)
|
|
38
|
+
elif rev_key in conn_mp:
|
|
39
|
+
direction = 1 # incoming
|
|
40
|
+
conn_mp[rev_key].addPacket(
|
|
41
|
+
getPacketInfoFromPacket(packet=packet, direction=direction)
|
|
42
|
+
)
|
|
43
|
+
|
|
44
|
+
for _, conn in conn_mp.items():
|
|
45
|
+
conn.sort()
|
|
46
|
+
|
|
47
|
+
if put_snis:
|
|
48
|
+
for _, conn in conn_mp.items():
|
|
49
|
+
sni: str | None = None
|
|
50
|
+
if conn.five_tuple.transport_type == TransPortType.TCP:
|
|
51
|
+
ch_dict = tls_ch(connection=conn)
|
|
52
|
+
if ch_dict is None:
|
|
53
|
+
continue
|
|
54
|
+
else:
|
|
55
|
+
sni = ch_dict["snis"][0]
|
|
56
|
+
conn.setSNI(sni) # type: ignore
|
|
57
|
+
elif conn.five_tuple.transport_type == TransPortType.UDP:
|
|
58
|
+
ch_dict = quic_ch(connection=conn)
|
|
59
|
+
if ch_dict is None:
|
|
60
|
+
continue
|
|
61
|
+
else:
|
|
62
|
+
sni = ch_dict["snis"][0]
|
|
63
|
+
conn.setSNI(sni) # type: ignore
|
|
64
|
+
conn.is_quic = True
|
|
65
|
+
|
|
66
|
+
if remove_payload:
|
|
67
|
+
# This removes payload
|
|
68
|
+
conn.removePayload()
|
|
69
|
+
|
|
70
|
+
return conn_mp
|
|
71
|
+
|
|
72
|
+
|
|
73
|
+
# ---------------------------------------------------------------------------
|
|
74
|
+
# CSV loading path (faster than pcap for post-processed data)
|
|
75
|
+
# ---------------------------------------------------------------------------
|
|
76
|
+
|
|
77
|
+
|
|
78
|
+
def _ft_from_go_string(tuple_str: str) -> FiveTuple:
|
|
79
|
+
"""Parse the Go extractor five-tuple format.
|
|
80
|
+
|
|
81
|
+
Example: '192.168.31.246:64841->49.44.204.59:443->(Protocol: 6)'
|
|
82
|
+
"""
|
|
83
|
+
parts = tuple_str.split("->")
|
|
84
|
+
src = parts[0].strip()
|
|
85
|
+
dst = parts[1].strip()
|
|
86
|
+
src_port = int(src.split(":")[-1])
|
|
87
|
+
dst_port = int(dst.split(":")[-1])
|
|
88
|
+
src_ip = ":".join(src.split(":")[:-1])
|
|
89
|
+
dst_ip = ":".join(dst.split(":")[:-1])
|
|
90
|
+
|
|
91
|
+
protocol = parts[2].strip().split(":")[-1].strip(")").strip()
|
|
92
|
+
if protocol == "6":
|
|
93
|
+
transport_type = TransPortType.TCP
|
|
94
|
+
elif protocol == "17":
|
|
95
|
+
transport_type = TransPortType.UDP
|
|
96
|
+
else:
|
|
97
|
+
transport_type = TransPortType.UNKNOWN
|
|
98
|
+
|
|
99
|
+
return FiveTuple(
|
|
100
|
+
src_ip=src_ip,
|
|
101
|
+
dst_ip=dst_ip,
|
|
102
|
+
src_port=src_port,
|
|
103
|
+
dst_port=dst_port,
|
|
104
|
+
transport_type=transport_type,
|
|
105
|
+
)
|
|
106
|
+
|
|
107
|
+
|
|
108
|
+
def _packet_from_csv_row(row: dict, transport_type: TransPortType) -> PacketInfo:
|
|
109
|
+
if transport_type == TransPortType.TCP:
|
|
110
|
+
packet_type = PacketType.TCP
|
|
111
|
+
elif transport_type == TransPortType.UDP:
|
|
112
|
+
packet_type = PacketType.UDP
|
|
113
|
+
else:
|
|
114
|
+
packet_type = PacketType.UNKNOWN
|
|
115
|
+
|
|
116
|
+
return PacketInfo(
|
|
117
|
+
length=int(row["PacketLength"]),
|
|
118
|
+
timestamp=fast_strptime(row["Timestamp"]),
|
|
119
|
+
direction=int(row["Direction"]),
|
|
120
|
+
other_info={},
|
|
121
|
+
packet_type=packet_type,
|
|
122
|
+
)
|
|
123
|
+
|
|
124
|
+
|
|
125
|
+
def get_connections_from_csv(
|
|
126
|
+
packets_csv_path: str,
|
|
127
|
+
flows_csv_path: str,
|
|
128
|
+
) -> list[Connection]:
|
|
129
|
+
"""Build Connection objects from the CSVs produced by the Go flow extractor.
|
|
130
|
+
|
|
131
|
+
Faster than getConnMp() for post-processed data because it skips Scapy
|
|
132
|
+
packet parsing and TLS/QUIC SNI re-extraction (SNI is already in the flows CSV).
|
|
133
|
+
|
|
134
|
+
packets_csv_path — path to the *Packets.csv (FlowId, PacketLength, Timestamp, Direction)
|
|
135
|
+
flows_csv_path — path to the *Flows.csv (FlowId, FiveTuple, StartTime, EndTime, SNI[, ALPN])
|
|
136
|
+
"""
|
|
137
|
+
flow_index: dict[str, dict] = {}
|
|
138
|
+
for row in read_csv_to_dicts(flows_csv_path):
|
|
139
|
+
flow_index[row["FlowId"]] = {
|
|
140
|
+
"five_tuple": _ft_from_go_string(row["FiveTuple"]),
|
|
141
|
+
"sni": row["SNI"],
|
|
142
|
+
"alpn": row.get("ALPN", "nan"),
|
|
143
|
+
}
|
|
144
|
+
|
|
145
|
+
conns: dict[str, Connection] = {}
|
|
146
|
+
for row in read_csv_to_dicts(packets_csv_path):
|
|
147
|
+
fid = row["FlowId"]
|
|
148
|
+
if fid not in conns:
|
|
149
|
+
info = flow_index[fid]
|
|
150
|
+
c = Connection(five_tuple=info["five_tuple"])
|
|
151
|
+
c.setSNI(info["sni"])
|
|
152
|
+
if info["alpn"] and info["alpn"] != "nan":
|
|
153
|
+
c.setALPN(info["alpn"])
|
|
154
|
+
conns[fid] = c
|
|
155
|
+
conns[fid].addPacket(
|
|
156
|
+
_packet_from_csv_row(row, conns[fid].five_tuple.transport_type)
|
|
157
|
+
)
|
|
158
|
+
|
|
159
|
+
conn_list = list(conns.values())
|
|
160
|
+
for c in conn_list:
|
|
161
|
+
c.sort()
|
|
162
|
+
return conn_list
|
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
Metadata-Version: 2.4
|
|
2
2
|
Name: network_core
|
|
3
|
-
Version: 0.
|
|
3
|
+
Version: 0.3.0
|
|
4
4
|
Summary: Core networking utilities and data models
|
|
5
5
|
Author: Your Name
|
|
6
6
|
Requires-Python: >=3.9
|
|
@@ -13,3 +13,4 @@ Requires-Dist: scapy
|
|
|
13
13
|
Requires-Dist: matplotlib
|
|
14
14
|
Requires-Dist: brotli
|
|
15
15
|
Requires-Dist: blackboxprotobuf
|
|
16
|
+
Requires-Dist: cryptography
|
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
[project]
|
|
2
2
|
name = "network_core"
|
|
3
|
-
version = "0.
|
|
3
|
+
version = "0.3.0"
|
|
4
4
|
description = "Core networking utilities and data models"
|
|
5
5
|
authors = [{ name = "Your Name" }]
|
|
6
6
|
readme = "README.md"
|
|
@@ -14,7 +14,8 @@ dependencies = [
|
|
|
14
14
|
"scapy",
|
|
15
15
|
"matplotlib",
|
|
16
16
|
"brotli",
|
|
17
|
-
"blackboxprotobuf"
|
|
17
|
+
"blackboxprotobuf",
|
|
18
|
+
"cryptography"
|
|
18
19
|
]
|
|
19
20
|
|
|
20
21
|
[build-system]
|
|
File without changes
|
|
@@ -1,47 +0,0 @@
|
|
|
1
|
-
import datetime, pytz
|
|
2
|
-
|
|
3
|
-
|
|
4
|
-
def convertUNIXToDT(timestamp: float, timezone="Australia/Sydney") -> datetime.datetime:
|
|
5
|
-
australian_timezone = pytz.timezone(timezone)
|
|
6
|
-
utc_dt = datetime.datetime.fromtimestamp(timestamp, tz=pytz.utc)
|
|
7
|
-
return utc_dt.astimezone(australian_timezone)
|
|
8
|
-
|
|
9
|
-
|
|
10
|
-
def convertUNIXToHumanReadable(timestamp: float, timezone="Australia/Sydney") -> str:
|
|
11
|
-
dt = convertUNIXToDT(timestamp=timestamp, timezone=timezone)
|
|
12
|
-
ms = dt.microsecond // 1000 # get milliseconds, doing this so it isn't 0
|
|
13
|
-
return dt.strftime("%Y-%m-%d %H:%M:%S") + f".{ms}"
|
|
14
|
-
|
|
15
|
-
|
|
16
|
-
def getDatetimeFromTime(time: datetime.time) -> datetime.datetime:
|
|
17
|
-
return datetime.datetime.combine(datetime.datetime.today(), time=time)
|
|
18
|
-
|
|
19
|
-
|
|
20
|
-
def addBufferToTime(time: datetime.time, buffer_in_seconds: float) -> datetime.time:
|
|
21
|
-
delta = datetime.timedelta(seconds=buffer_in_seconds)
|
|
22
|
-
return (getDatetimeFromTime(time=time) + delta).time()
|
|
23
|
-
|
|
24
|
-
|
|
25
|
-
def fast_strptime(s: str) -> float:
|
|
26
|
-
"""
|
|
27
|
-
2026-01-11 12:25:31.689768 +0530 IST to UNIX
|
|
28
|
-
"""
|
|
29
|
-
parts = s.split()
|
|
30
|
-
|
|
31
|
-
# Drop trailing timezone name (like 'UTC', 'AEDT', etc.)
|
|
32
|
-
if len(parts) > 3:
|
|
33
|
-
s = " ".join(parts[:3])
|
|
34
|
-
else:
|
|
35
|
-
s = " ".join(parts)
|
|
36
|
-
|
|
37
|
-
# Handle too many fractional digits (datetime only supports microseconds)
|
|
38
|
-
date, time_str, offset = s.split()
|
|
39
|
-
if "." in time_str:
|
|
40
|
-
main, frac = time_str.split(".")
|
|
41
|
-
frac = frac[:6] # truncate nanoseconds to microseconds
|
|
42
|
-
time_str = f"{main}.{frac}"
|
|
43
|
-
s = f"{date} {time_str} {offset}"
|
|
44
|
-
|
|
45
|
-
fmt = "%Y-%m-%d %H:%M:%S.%f %z" if "." in time_str else "%Y-%m-%d %H:%M:%S %z"
|
|
46
|
-
|
|
47
|
-
return datetime.datetime.strptime(s, fmt).timestamp()
|
|
@@ -1,68 +0,0 @@
|
|
|
1
|
-
from scapy.all import PcapReader
|
|
2
|
-
from network_core.conn import Connection, getPacketInfoFromPacket
|
|
3
|
-
from network_core.dataModels import TransPortType, FiveTuple
|
|
4
|
-
|
|
5
|
-
from ..sni.clientHello import quic_ch, tls_ch
|
|
6
|
-
|
|
7
|
-
|
|
8
|
-
def getConnMp(pcap_path: str, put_snis=True, remove_payload=True):
|
|
9
|
-
"""
|
|
10
|
-
If remove payload I will null the transport payload for connection
|
|
11
|
-
|
|
12
|
-
This will return a connection map mapping FiveTuple object to Connection
|
|
13
|
-
"""
|
|
14
|
-
conn_mp: dict[FiveTuple, Connection] = {}
|
|
15
|
-
|
|
16
|
-
with PcapReader(pcap_path) as pcap_reader:
|
|
17
|
-
for packet in pcap_reader:
|
|
18
|
-
temp_conn = Connection.getConnFromPacket(packet=packet)
|
|
19
|
-
if temp_conn is None:
|
|
20
|
-
continue
|
|
21
|
-
key = temp_conn.five_tuple
|
|
22
|
-
rev_key = temp_conn.five_tuple.rev_ft()
|
|
23
|
-
|
|
24
|
-
direction = 0 # outgoing
|
|
25
|
-
|
|
26
|
-
if key not in conn_mp and rev_key not in conn_mp:
|
|
27
|
-
conn_mp[key] = temp_conn
|
|
28
|
-
temp_conn.addPacket(
|
|
29
|
-
getPacketInfoFromPacket(packet=packet, direction=direction)
|
|
30
|
-
)
|
|
31
|
-
|
|
32
|
-
if key in conn_mp:
|
|
33
|
-
conn_mp[key].addPacket(
|
|
34
|
-
getPacketInfoFromPacket(packet=packet, direction=direction)
|
|
35
|
-
)
|
|
36
|
-
elif rev_key in conn_mp:
|
|
37
|
-
direction = 1 # incoming
|
|
38
|
-
conn_mp[rev_key].addPacket(
|
|
39
|
-
getPacketInfoFromPacket(packet=packet, direction=direction)
|
|
40
|
-
)
|
|
41
|
-
|
|
42
|
-
for _, conn in conn_mp.items():
|
|
43
|
-
conn.sort()
|
|
44
|
-
|
|
45
|
-
if put_snis:
|
|
46
|
-
for _, conn in conn_mp.items():
|
|
47
|
-
sni: str | None = None
|
|
48
|
-
if conn.five_tuple.transport_type == TransPortType.TCP:
|
|
49
|
-
ch_dict = tls_ch(connection=conn)
|
|
50
|
-
if ch_dict is None:
|
|
51
|
-
continue
|
|
52
|
-
else:
|
|
53
|
-
sni = ch_dict["snis"][0]
|
|
54
|
-
conn.setSNI(sni) # type: ignore
|
|
55
|
-
elif conn.five_tuple.transport_type == TransPortType.UDP:
|
|
56
|
-
ch_dict = quic_ch(connection=conn)
|
|
57
|
-
if ch_dict is None:
|
|
58
|
-
continue
|
|
59
|
-
else:
|
|
60
|
-
sni = ch_dict["snis"][0]
|
|
61
|
-
conn.setSNI(sni) # type: ignore
|
|
62
|
-
conn.is_quic = True
|
|
63
|
-
|
|
64
|
-
if remove_payload:
|
|
65
|
-
# This removes payload
|
|
66
|
-
conn.removePayload()
|
|
67
|
-
|
|
68
|
-
return conn_mp
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
{network_core-0.2.2/network_core/http/httpExtract → network_core-0.3.0/network_core/sni}/__init__.py
RENAMED
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|